void Herk ( UpperOrLower uplo, Orientation orientation, Base<T> alpha, const Matrix<T>& A, Base<T> beta, Matrix<T>& C ) { DEBUG_ONLY(CSE cse("Herk")) Syrk( uplo, orientation, T(alpha), A, T(beta), C, true ); }
inline void RLHF( int offset, const Matrix<R>& H, Matrix<R>& A ) { #ifndef RELEASE CallStackEntry entry("apply_packed_reflectors::RLHF"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); #endif Matrix<R> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<R> ALeft; Matrix<R> SInv, Z; LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; LockedView ( HPan, H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); View( ALeft, A, 0, 0, A.Height(), HPanWidth ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonal( RIGHT, offset, HPanCopy, R(1) ); Syrk( UPPER, NORMAL, R(1), HPanCopy, SInv ); HalveMainDiagonal( SInv ); Gemm( NORMAL, TRANSPOSE, R(1), ALeft, HPanCopy, Z ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, R(1), SInv, Z ); Gemm( NORMAL, NORMAL, R(-1), Z, HPanCopy, R(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); } }
inline void Herk ( UpperOrLower uplo, Orientation orientation, T alpha, const DistMatrix<T>& A, T beta, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("Herk"); #endif Syrk( uplo, orientation, alpha, A, beta, C, true ); }
inline void Herk ( UpperOrLower uplo, Orientation orientation, T alpha, const DistMatrix<T>& A, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("Herk"); #endif const Int n = ( orientation==NORMAL ? A.Height() : A.Width() ); Zeros( C, n, n ); Syrk( uplo, orientation, alpha, A, T(0), C, true ); }
void LT_Dot ( T alpha, const AbstractDistMatrix<T>& APre, AbstractDistMatrix<T>& CPre, const bool conjugate, Int blockSize=2000 ) { EL_DEBUG_CSE const Int n = CPre.Height(); const Grid& g = APre.Grid(); const Orientation orient = ( conjugate ? ADJOINT : TRANSPOSE ); DistMatrixReadProxy<T,T,VC,STAR> AProx( APre ); auto& A = AProx.GetLocked(); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& C = CProx.Get(); DistMatrix<T,STAR,STAR> Z( blockSize, blockSize, g ); Zero( Z ); for( Int kOuter=0; kOuter<n; kOuter+=blockSize ) { const Int nbOuter = Min(blockSize,n-kOuter); const Range<Int> indOuter( kOuter, kOuter+nbOuter ); auto A1 = A( ALL, indOuter ); auto C11 = C( indOuter, indOuter ); Z.Resize( nbOuter, nbOuter ); Syrk( LOWER, TRANSPOSE, alpha, A1.Matrix(), Z.Matrix(), conjugate ); AxpyContract( T(1), Z, C11 ); for( Int kInner=kOuter+nbOuter; kInner<n; kInner+=blockSize ) { const Int nbInner = Min(blockSize,n-kInner); const Range<Int> indInner( kInner, kInner+nbInner ); auto A2 = A( ALL, indInner ); auto C21 = C( indInner, indOuter ); LocalGemm( orient, NORMAL, alpha, A1, A2, Z ); AxpyContract( T(1), Z, C21 ); } } }
inline void internal::ApplyPackedReflectorsLLVF ( int offset, const DistMatrix<R,MC,MR>& H, DistMatrix<R,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLLVF"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset > 0 ) throw std::logic_error("Transforms cannot extend above matrix"); if( offset < -H.Height() ) throw std::logic_error("Transforms cannot extend below matrix"); if( H.Height() != A.Height() ) throw std::logic_error ("Height of transforms must equal height of target matrix"); #endif const Grid& g = H.Grid(); // Matrix views DistMatrix<R,MC,MR> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R,MC,MR> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<R,VC, STAR> HPan_VC_STAR(g); DistMatrix<R,MC, STAR> HPan_MC_STAR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MR > Z_STAR_MR(g); DistMatrix<R,STAR,VR > Z_STAR_VR(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); int HPanHeight = H11.Height() + H21.Height(); int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); HPan_MC_STAR.AlignWith( AB ); Z_STAR_MR.AlignWith( AB ); Z_STAR_VR.AlignWith( AB ); Z_STAR_MR.ResizeTo( HPanWidth, AB.Width() ); SInv_STAR_STAR.ResizeTo( HPanWidth, HPanWidth ); Zero( SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_VC_STAR = HPanCopy; Syrk ( UPPER, TRANSPOSE, (R)1, HPan_VC_STAR.LockedLocalMatrix(), (R)0, SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_MC_STAR = HPanCopy; internal::LocalGemm ( TRANSPOSE, NORMAL, (R)1, HPan_MC_STAR, AB, (R)0, Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); internal::LocalTrsm ( LEFT, UPPER, TRANSPOSE, NON_UNIT, (R)1, SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; internal::LocalGemm ( NORMAL, NORMAL, (R)-1, HPan_MC_STAR, Z_STAR_MR, (R)1, AB ); //--------------------------------------------------------------------// HPan_MC_STAR.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyPackedReflectorsLUVF ( int offset, const Matrix<R>& H, Matrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUVF"); if( offset < 0 || offset > H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); #endif Matrix<R> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<R> AT, A0, ATop, AB, A1, A2; Matrix<R> SInv, Z; LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); ATop.View2x1( A0, A1 ); Zeros( HPan.Width(), ATop.Width(), Z ); Zeros( HPan.Width(), HPan.Width(), SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); Syrk( LOWER, TRANSPOSE, R(1), HPanCopy, R(0), SInv ); HalveMainDiagonal( SInv ); Gemm( TRANSPOSE, NORMAL, R(1), HPanCopy, ATop, R(0), Z ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, R(1), SInv, Z ); Gemm( NORMAL, NORMAL, R(-1), HPanCopy, Z, R(1), ATop ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyPackedReflectorsLUVF ( int offset, const DistMatrix<R>& H, DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUVF"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset < 0 || offset > H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); #endif const Grid& g = H.Grid(); DistMatrix<R> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R> AT(g), A0(g), ATop(g), AB(g), A1(g), A2(g); DistMatrix<R> HPanCopy(g); DistMatrix<R,VC, STAR> HPan_VC_STAR(g); DistMatrix<R,MC, STAR> HPan_MC_STAR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MR > Z_STAR_MR(g); DistMatrix<R,STAR,VR > Z_STAR_VR(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); ATop.View2x1( A0, A1 ); HPan_MC_STAR.AlignWith( ATop ); Z_STAR_MR.AlignWith( ATop ); Z_STAR_VR.AlignWith( ATop ); Zeros( HPan.Width(), ATop.Width(), Z_STAR_MR ); Zeros( HPan.Width(), HPan.Width(), SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); HPan_VC_STAR = HPanCopy; Syrk ( LOWER, TRANSPOSE, R(1), HPan_VC_STAR.LockedLocalMatrix(), R(0), SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_MC_STAR = HPanCopy; LocalGemm ( TRANSPOSE, NORMAL, R(1), HPan_MC_STAR, ATop, R(0), Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, R(1), SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; LocalGemm( NORMAL, NORMAL, R(-1), HPan_MC_STAR, Z_STAR_MR, R(1), ATop ); //--------------------------------------------------------------------// HPan_MC_STAR.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RLHF ( int offset, const DistMatrix<R>& H, DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLHF"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); #endif const Grid& g = H.Grid(); DistMatrix<R> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R> ALeft(g); DistMatrix<R,STAR,VR > HPan_STAR_VR(g); DistMatrix<R,STAR,MR > HPan_STAR_MR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MC > ZTrans_STAR_MC(g); DistMatrix<R,STAR,VC > ZTrans_STAR_VC(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; LockedView ( HPan, H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); View( ALeft, A, 0, 0, A.Height(), HPanWidth ); HPan_STAR_MR.AlignWith( ALeft ); ZTrans_STAR_MC.AlignWith( ALeft ); ZTrans_STAR_VC.AlignWith( ALeft ); Zeros( HPan.Height(), ALeft.Height(), ZTrans_STAR_MC ); Zeros( HPan.Height(), HPan.Height(), SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonal( RIGHT, offset, HPanCopy, R(1) ); HPan_STAR_VR = HPanCopy; Syrk ( UPPER, NORMAL, R(1), HPan_STAR_VR.LockedMatrix(), R(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_STAR_MR = HPan_STAR_VR; LocalGemm ( NORMAL, TRANSPOSE, R(1), HPan_STAR_MR, ALeft, R(0), ZTrans_STAR_MC ); ZTrans_STAR_VC.SumScatterFrom( ZTrans_STAR_MC ); LocalTrsm ( LEFT, UPPER, TRANSPOSE, NON_UNIT, R(1), SInv_STAR_STAR, ZTrans_STAR_VC ); ZTrans_STAR_MC = ZTrans_STAR_VC; LocalGemm ( TRANSPOSE, NORMAL, R(-1), ZTrans_STAR_MC, HPan_STAR_MR, R(1), ALeft ); //--------------------------------------------------------------------// HPan_STAR_MR.FreeAlignments(); ZTrans_STAR_MC.FreeAlignments(); ZTrans_STAR_VC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LLVF( int offset, const Matrix<R>& H, Matrix<R>& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::LLVF"); if( offset > 0 || offset < -H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Height() ) throw std::logic_error ("Height of transforms must equal height of target matrix"); #endif Matrix<R> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<R> AT, A0, AB, A1, A2; Matrix<R> SInv, Z; LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); int HPanHeight = H11.Height() + H21.Height(); int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); LockedView( HPan, H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); Zeros( HPanWidth, AB.Width(), Z ); Zeros( HPanWidth, HPanWidth, SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonal( LEFT, offset, HPanCopy, R(1) ); Syrk( LOWER, TRANSPOSE, R(1), HPanCopy, R(0), SInv ); HalveMainDiagonal( SInv ); Gemm( TRANSPOSE, NORMAL, R(1), HPanCopy, AB, R(0), Z ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, R(1), SInv, Z ); Gemm( NORMAL, NORMAL, R(-1), HPanCopy, Z, R(1), AB ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyPackedReflectorsLUHB ( int offset, const DistMatrix<R>& H, DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); #endif const Grid& g = H.Grid(); DistMatrix<R> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R> ABottom(g); DistMatrix<R,STAR,VR > HPan_STAR_VR(g); DistMatrix<R,STAR,MC > HPan_STAR_MC(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MR > Z_STAR_MR(g); DistMatrix<R,STAR,VR > Z_STAR_VR(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); HPan_STAR_MC.AlignWith( ABottom ); Z_STAR_MR.AlignWith( ABottom ); Z_STAR_VR.AlignWith( ABottom ); Zeros( HPanHeight, ABottom.Width(), Z_STAR_MR ); Zeros( HPanHeight, HPanHeight, SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_STAR_VR = HPanCopy; Syrk ( UPPER, NORMAL, R(1), HPan_STAR_VR.LockedLocalMatrix(), R(0), SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_STAR_MC = HPan_STAR_VR; LocalGemm ( NORMAL, NORMAL, R(1), HPan_STAR_MC, ABottom, R(0), Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); LocalTrsm ( LEFT, UPPER, NORMAL, NON_UNIT, R(1), SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; LocalGemm ( TRANSPOSE, NORMAL, R(-1), HPan_STAR_MC, Z_STAR_MR, R(1), ABottom ); //--------------------------------------------------------------------// HPan_STAR_MC.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyPackedReflectorsLUHB ( int offset, const Matrix<R>& H, Matrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); #endif Matrix<R> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<R> ABottom; Matrix<R> SInv, Z; LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); Zeros( HPanHeight, ABottom.Width(), Z ); Zeros( HPanHeight, HPanHeight, SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); Syrk( UPPER, NORMAL, R(1), HPanCopy, R(0), SInv ); HalveMainDiagonal( SInv ); Gemm( NORMAL, NORMAL, R(1), HPanCopy, ABottom, R(0), Z ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, R(1), SInv, Z ); Gemm( TRANSPOSE, NORMAL, R(-1), HPanCopy, Z, R(1), ABottom ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RLVB( int offset, const Matrix<R>& H, Matrix<R>& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLVB"); if( offset > 0 || offset < -H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Width() ) throw std::logic_error ("Height of transforms must equal width of target matrix"); #endif Matrix<R> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<R> ARight; Matrix<R> SInv, Z; LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanHeight = H11.Height() + H21.Height(); const int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); const int leftover = A.Width()-HPanHeight; LockedView( HPan, H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); View( ARight, A, 0, leftover, A.Height(), HPanHeight ); Zeros( ARight.Height(), HPanWidth, Z ); Zeros( HPanWidth, HPanWidth, SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonal( LEFT, offset, HPanCopy, R(1) ); Syrk( LOWER, TRANSPOSE, R(1), HPanCopy, R(0), SInv ); HalveMainDiagonal( SInv ); Gemm( NORMAL, NORMAL, R(1), ARight, HPanCopy, R(0), Z ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, R(1), SInv, Z ); Gemm( NORMAL, TRANSPOSE, R(-1), Z, HPanCopy, R(1), ARight ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); } #ifndef RELEASE PopCallStack(); #endif }