void MakeDiagonalReal( Matrix<T>& A, Int offset ) { EL_DEBUG_CSE const Int iStart = Max(-offset,0); const Int jStart = Max( offset,0); const Int diagLength = A.DiagonalLength(offset); for( Int k=0; k<diagLength; ++k ) { const Int i = iStart + k; const Int j = jStart + k; A.MakeReal( i, j ); } }
void ConjugateDiagonal( Matrix<T>& A, Int offset ) { EL_DEBUG_CSE const Int iStart = Max(-offset,0); const Int jStart = Max( offset,0); const Int diagLength = A.DiagonalLength(offset); EL_PARALLEL_FOR for( Int k=0; k<diagLength; ++k ) { const Int i = iStart + k; const Int j = jStart + k; A.Conjugate( i, j ); } }
void LUVBUnblocked ( Conjugation conjugation, Int offset, const Matrix<F>& H, const Matrix<F>& householderScalars, Matrix<F>& A ) { DEBUG_CSE const Int diagLength = H.DiagonalLength(offset); DEBUG_ONLY( if( householderScalars.Height() != diagLength ) LogicError ("householderScalars must be the same length as H's offset diag."); )
void GetMappedDiagonal ( const Matrix<T>& A, Matrix<S>& d, function<S(const T&)> func, Int offset ) { EL_DEBUG_CSE const Int diagLength = A.DiagonalLength(offset); d.Resize( diagLength, 1 ); const Int iStart = Max(-offset,0); const Int jStart = Max( offset,0); S* dBuf = d.Buffer(); const T* ABuf = A.LockedBuffer(); const Int ldim = A.LDim(); EL_PARALLEL_FOR for( Int k=0; k<diagLength; ++k ) { const Int i = iStart + k; const Int j = jStart + k; dBuf[k] = func(ABuf[i+j*ldim]); } }
inline void ApplyPackedReflectorsLUVF ( Conjugation conjugation, int offset, const Matrix<Complex<R> >& H, const Matrix<Complex<R> >& t, Matrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUVF"); if( offset < 0 || offset > H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); #endif typedef Complex<R> C; Matrix<C> HTL, HTR, H00, H01, H02, HPan, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<C> AT, A0, ATop, AB, A1, A2; Matrix<C> tT, t0, tB, t1, t2; Matrix<C> HPanCopy; Matrix<C> SInv, Z; LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); ATop.View2x1( A0, A1 ); Zeros( HPan.Width(), ATop.Width(), Z ); Zeros( HPan.Width(), HPan.Width(), SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); Herk( LOWER, ADJOINT, C(1), HPanCopy, C(0), SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( ADJOINT, NORMAL, C(1), HPanCopy, ATop, C(0), Z ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, C(1), SInv, Z ); Gemm( NORMAL, NORMAL, C(-1), HPanCopy, Z, C(1), ATop ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RUVF ( Conjugation conjugation, Int offset, const Matrix<F>& H, const Matrix<F>& t, Matrix<F>& A ) { #ifndef RELEASE CallStackEntry cse("apply_packed_reflectors::RUVF"); // TODO: Proper dimension checks if( t.Height() != H.DiagonalLength(offset) ) LogicError("t must be the same length as H's offset diag"); #endif Matrix<F> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<F> ALeft; Matrix<F> tT, t0, tB, t1, t2; Matrix<F> SInv, Z; LockedPartitionDownOffsetDiagonal ( offset, H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); LockedView2x1( HPan, H01, H11 ); View( ALeft, A, 0, 0, A.Height(), HPan.Height() ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( UPPER, HPanCopy, 0, RIGHT ); SetDiagonal( HPanCopy, F(1), 0, RIGHT ); Herk( UPPER, ADJOINT, F(1), HPanCopy, SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, NORMAL, F(1), ALeft, HPanCopy, Z ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), SInv, Z ); Gemm( NORMAL, ADJOINT, F(-1), Z, HPanCopy, F(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } }
inline void RLHF ( Conjugation conjugation, int offset, const Matrix<Complex<R> >& H, const Matrix<Complex<R> >& t, Matrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLHF"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); #endif typedef Complex<R> C; Matrix<C> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<C> ALeft; Matrix<C> tT, t0, tB, t1, t2; Matrix<C> SInv, Z; LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; LockedView ( HPan, H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanHeight ); View( ALeft, A, 0, 0, A.Height(), HPanWidth ); Zeros( ALeft.Height(), HPan.Height(), Z ); Zeros( HPan.Height(), HPan.Height(), SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonal( RIGHT, offset, HPanCopy, C(1) ); Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, ADJOINT, C(1), ALeft, HPanCopy, C(0), Z ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z ); Gemm( NORMAL, NORMAL, C(-1), Z, HPanCopy, C(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LUHF ( Conjugation conjugation, Int offset, const Matrix<F>& H, const Matrix<F>& t, Matrix<F>& A ) { #ifndef RELEASE CallStackEntry cse("apply_packed_reflectors::LUHF"); // TODO: Proper dimension checks if( t.Height() != H.DiagonalLength(offset) ) LogicError("t must be the same length as H's offset diag"); #endif Matrix<F> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<F> AT, A0, AB, A1, A2; Matrix<F> tT, t0, tB, t1, t2; Matrix<F> SInv, Z; LockedPartitionDownOffsetDiagonal ( offset, H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2, H11.Height() ); LockedView1x2( HPan, H11, H12 ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTriangular( UPPER, HPanCopy ); SetDiagonal( HPanCopy, F(1) ); Herk( LOWER, NORMAL, F(1), HPanCopy, SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, NORMAL, F(1), HPanCopy, AB, Z ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, F(1), SInv, Z ); Gemm( ADJOINT, NORMAL, F(-1), HPanCopy, Z, F(1), AB ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } }
inline void ApplyPackedReflectorsLUHB ( Conjugation conjugation, int offset, const Matrix<Complex<R> >& H, const Matrix<Complex<R> >& t, Matrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); #endif typedef Complex<R> C; Matrix<C> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<C> ABottom; Matrix<C> tT, t0, tB, t1, t2; Matrix<C> SInv, Z; LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanHeight ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); Zeros( HPanHeight, ABottom.Width(), Z ); Zeros( HPanHeight, HPanHeight, SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, NORMAL, C(1), HPanCopy, ABottom, C(0), Z ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z ); Gemm( ADJOINT, NORMAL, C(-1), HPanCopy, Z, C(1), ABottom ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }