inline void RUVB( int offset, const Matrix<R>& H, Matrix<R>& A ) { #ifndef RELEASE CallStackEntry entry("apply_packed_reflectors::RUVB"); if( offset < 0 || offset > H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Width() ) throw std::logic_error ("Height of transforms must equal width of target matrix"); #endif Matrix<R> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<R> ALeft; Matrix<R> SInv, Z; LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; LockedView( HPan, H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); View( ALeft, A, 0, 0, A.Height(), HPanHeight ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonal( RIGHT, offset, HPanCopy, R(1) ); Syrk( LOWER, TRANSPOSE, R(1), HPanCopy, SInv ); HalveMainDiagonal( SInv ); Gemm( NORMAL, NORMAL, R(1), ALeft, HPanCopy, Z ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, R(1), SInv, Z ); Gemm( NORMAL, TRANSPOSE, R(-1), Z, HPanCopy, R(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); } }
inline void TrsmLUNLarge ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLUNLarge"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); U01_MC_STAR.AlignWith( X0 ); X1_STAR_MR.AlignWith( X0 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1_STAR_VR = X1; // X1[* ,VR] <- X1[MC,MR] // X1[* ,VR] := U11^-1[* ,* ] X1[* ,VR] LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm( NORMAL, NORMAL, F(-1), U01_MC_STAR, X1_STAR_MR, F(1), X0 ); //--------------------------------------------------------------------// U01_MC_STAR.FreeAlignments(); X1_STAR_MR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmLUNMedium ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE CallStackEntry entry("internal::TrsmLUNMedium"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); U01_MC_STAR.AlignWith( X0 ); X1Trans_MR_STAR.AlignWith( X0 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1Trans_MR_STAR.TransposeFrom( X1 ); // X1[* ,MR] <- X1[MC,MR] // X1[* ,MR] := U11^-1[* ,* ] X1[* ,MR] // // X1^T[MR,* ] := X1^T[MR,* ] U11^-T[* ,* ] LocalTrsm ( RIGHT, UPPER, TRANSPOSE, diag, F(1), U11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular ); X1.TransposeFrom( X1Trans_MR_STAR ); U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm ( NORMAL, TRANSPOSE, F(-1), U01_MC_STAR, X1Trans_MR_STAR, F(1), X0 ); //--------------------------------------------------------------------// U01_MC_STAR.FreeAlignments(); X1Trans_MR_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } }
inline void internal::TrsvUN ( UnitOrNonUnit diag, const DistMatrix<F,MC,MR>& U, DistMatrix<F,MC,MR>& x ) { #ifndef RELEASE PushCallStack("internal::TrsvUN"); if( U.Grid() != x.Grid() ) throw std::logic_error("{U,x} must be distributed over the same grid"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); if( U.Width() != xLength ) throw std::logic_error("Nonconformal TrsvUN"); #endif const Grid& g = U.Grid(); if( x.Width() == 1 ) { // Matrix views DistMatrix<F,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,MC,MR> xT(g), x0(g), xB(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,MR, STAR> x1_MR_STAR(g); DistMatrix<F,MC, STAR> z0_MC_STAR(g); // Start the algorithm LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( x, xT, xB, 0 ); while( xT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( xT, x0, x1, /**/ /**/ xB, x2 ); x1_MR_STAR.AlignWith( U01 ); z0_MC_STAR.AlignWith( U01 ); z0_MC_STAR.ResizeTo( x0.Height(), 1 ); //----------------------------------------------------------------// x1_STAR_STAR = x1; U11_STAR_STAR = U11; Trsv ( UPPER, NORMAL, diag, U11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_MR_STAR = x1_STAR_STAR; Gemv ( NORMAL, (F)-1, U01.LockedLocalMatrix(), x1_MR_STAR.LockedLocalMatrix(), (F)0, z0_MC_STAR.LocalMatrix() ); x0.SumScatterUpdate( (F)1, z0_MC_STAR ); //----------------------------------------------------------------// x1_MR_STAR.FreeAlignments(); z0_MC_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( xT, x0, /**/ /**/ x1, xB, x2 ); } } else { // Matrix views DistMatrix<F,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,MC,MR> xL(g), xR(g), x0(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,STAR,MR > x1_STAR_MR(g); DistMatrix<F,STAR,MC > z0_STAR_MC(g); DistMatrix<F,MR, MC > z0_MR_MC(g); DistMatrix<F,MC, MR > z0(g); // Start the algorithm LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionLeft( x, xL, xR, 0 ); while( xL.Width() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionLeft ( xL, /**/ xR, x0, x1, /**/ x2 ); x1_STAR_MR.AlignWith( U01 ); z0_STAR_MC.AlignWith( U01 ); z0.AlignWith( x0 ); z0_STAR_MC.ResizeTo( 1, x0.Width() ); //----------------------------------------------------------------// x1_STAR_STAR = x1; U11_STAR_STAR = U11; Trsv ( UPPER, NORMAL, diag, U11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_STAR_MR = x1_STAR_STAR; Gemv ( NORMAL, (F)-1, U01.LockedLocalMatrix(), x1_STAR_MR.LockedLocalMatrix(), (F)0, z0_STAR_MC.LocalMatrix() ); z0_MR_MC.SumScatterFrom( z0_STAR_MC ); z0 = z0_MR_MC; Axpy( (F)1, z0, x0 ); //----------------------------------------------------------------// x1_STAR_MR.FreeAlignments(); z0_STAR_MC.FreeAlignments(); z0.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionLeft ( xL, /**/ xR, x0, /**/ x1, x2 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyPackedReflectorsLUHB ( int offset, const DistMatrix<R>& H, DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); #endif const Grid& g = H.Grid(); DistMatrix<R> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R> ABottom(g); DistMatrix<R,STAR,VR > HPan_STAR_VR(g); DistMatrix<R,STAR,MC > HPan_STAR_MC(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MR > Z_STAR_MR(g); DistMatrix<R,STAR,VR > Z_STAR_VR(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); HPan_STAR_MC.AlignWith( ABottom ); Z_STAR_MR.AlignWith( ABottom ); Z_STAR_VR.AlignWith( ABottom ); Zeros( HPanHeight, ABottom.Width(), Z_STAR_MR ); Zeros( HPanHeight, HPanHeight, SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_STAR_VR = HPanCopy; Syrk ( UPPER, NORMAL, R(1), HPan_STAR_VR.LockedLocalMatrix(), R(0), SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_STAR_MC = HPan_STAR_VR; LocalGemm ( NORMAL, NORMAL, R(1), HPan_STAR_MC, ABottom, R(0), Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); LocalTrsm ( LEFT, UPPER, NORMAL, NON_UNIT, R(1), SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; LocalGemm ( TRANSPOSE, NORMAL, R(-1), HPan_STAR_MC, Z_STAR_MR, R(1), ABottom ); //--------------------------------------------------------------------// HPan_STAR_MC.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyPackedReflectorsLUHB ( int offset, const Matrix<R>& H, Matrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); #endif Matrix<R> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<R> ABottom; Matrix<R> SInv, Z; LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); Zeros( HPanHeight, ABottom.Width(), Z ); Zeros( HPanHeight, HPanHeight, SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); Syrk( UPPER, NORMAL, R(1), HPanCopy, R(0), SInv ); HalveMainDiagonal( SInv ); Gemm( NORMAL, NORMAL, R(1), HPanCopy, ABottom, R(0), Z ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, R(1), SInv, Z ); Gemm( TRANSPOSE, NORMAL, R(-1), HPanCopy, Z, R(1), ABottom ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmRLN ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmRLN"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,MR, STAR> L10Trans_MR_STAR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionLeft( X, XL, XR, 0 ); while( XL.Width() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionLeft ( XL, /**/ XR, X0, X1, /**/ X2 ); X1Trans_STAR_MC.AlignWith( X0 ); L10Trans_MR_STAR.AlignWith( X0 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X1_VC_STAR, checkIfSingular ); // X0[MC,MR] -= X1[MC,* ] L10[*,MR] // = X1^T[* ,MC] L10^T[MR,* ] X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); L10Trans_MR_STAR.TransposeFrom( L10 ); LocalGemm ( TRANSPOSE, TRANSPOSE, F(-1), X1Trans_STAR_MC, L10Trans_MR_STAR, F(1), X0 ); //--------------------------------------------------------------------// X1Trans_STAR_MC.FreeAlignments(); L10Trans_MR_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionLeft ( XL, /**/ XR, X0, /**/ X1, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmLLTLarge ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLLTLarge"); if( orientation == NORMAL ) throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); L10_STAR_MC.AlignWith( X0 ); X1_STAR_MR.AlignWith( X0 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; // L11[* ,* ] <- L11[MC,MR] X1_STAR_VR = X1; // X1[* ,VR] <- X1[MC,MR] // X1[* ,VR] := L11^-[T/H][* ,* ] X1[* ,VR] LocalTrsm ( LEFT, LOWER, orientation, diag, F(1), L11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] L10_STAR_MC = L10; // L10[* ,MC] <- L10[MC,MR] // X0[MC,MR] -= (L10[* ,MC])^(T/H) X1[* ,MR] // = L10^[T/H][MC,* ] X1[* ,MR] LocalGemm ( orientation, NORMAL, F(-1), L10_STAR_MC, X1_STAR_MR, F(1), X0 ); //--------------------------------------------------------------------// L10_STAR_MC.FreeAlignments(); X1_STAR_MR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmRUNC ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& U, DistMatrix<T>& X ) { #ifndef RELEASE CallStackEntry entry("internal::TrmmRUNC"); if( U.Grid() != X.Grid() ) throw std::logic_error ("U and X must be distributed over the same grid"); if( U.Height() != U.Width() || X.Width() != U.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmRUNC: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,MR, STAR> U12Trans_MR_STAR(g); DistMatrix<T,STAR,STAR> U11_STAR_STAR(g); DistMatrix<T,VC, STAR> X1_VC_STAR(g); DistMatrix<T,MC, STAR> X1_MC_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionLeft( X, XL, XR, 0 ); while( XL.Width() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionLeft ( XL, /**/ XR, X0, X1, /**/ X2 ); X1_MC_STAR.AlignWith( X2 ); U12Trans_MR_STAR.AlignWith( X2 ); X1_VC_STAR.AlignWith( X1 ); //--------------------------------------------------------------------// X1_MC_STAR = X1; U12Trans_MR_STAR.TransposeFrom( U12 ); LocalGemm ( NORMAL, TRANSPOSE, T(1), X1_MC_STAR, U12Trans_MR_STAR, T(1), X2 ); U11_STAR_STAR = U11; X1_VC_STAR = X1_MC_STAR; LocalTrmm ( RIGHT, UPPER, NORMAL, diag, T(1), U11_STAR_STAR, X1_VC_STAR ); X1 = X1_VC_STAR; //--------------------------------------------------------------------// X1_MC_STAR.FreeAlignments(); U12Trans_MR_STAR.FreeAlignments(); X1_VC_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionLeft ( XL, /**/ XR, X0, /**/ X1, X2 ); } }
inline void internal::TrsvLT ( Orientation orientation, UnitOrNonUnit diag, const DistMatrix<F,MC,MR>& L, DistMatrix<F,MC,MR>& x ) { #ifndef RELEASE PushCallStack("internal::TrsvLT"); if( L.Grid() != x.Grid() ) throw std::logic_error("{L,x} must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrsvLT expects a (conjugate-)transpose option"); if( L.Height() != L.Width() ) throw std::logic_error("L must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); if( L.Width() != xLength ) throw std::logic_error("Nonconformal TrsvLT"); #endif const Grid& g = L.Grid(); if( x.Width() == 1 ) { // Matrix views DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,MC,MR> xT(g), x0(g), xB(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,MC, STAR> x1_MC_STAR(g); DistMatrix<F,MR, STAR> z0_MR_STAR(g); DistMatrix<F,MR, MC > z0_MR_MC(g); DistMatrix<F,MC, MR > z0(g); // Start the algorithm LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( x, xT, xB, 0 ); while( xT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( xT, x0, x1, /**/ /**/ xB, x2 ); x1_MC_STAR.AlignWith( L10 ); z0_MR_STAR.AlignWith( L10 ); z0_MR_STAR.ResizeTo( x0.Height(), 1 ); z0.AlignWith( x0 ); //----------------------------------------------------------------// x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, orientation, diag, L11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_MC_STAR = x1_STAR_STAR; Gemv ( orientation, (F)-1, L10.LockedLocalMatrix(), x1_MC_STAR.LockedLocalMatrix(), (F)0, z0_MR_STAR.LocalMatrix() ); z0_MR_MC.SumScatterFrom( z0_MR_STAR ); z0 = z0_MR_MC; Axpy( (F)1, z0, x0 ); //----------------------------------------------------------------// x1_MC_STAR.FreeAlignments(); z0_MR_STAR.FreeAlignments(); z0.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( xT, x0, /**/ /**/ x1, xB, x2 ); } } else { // Matrix views DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,MC,MR> xL(g), xR(g), x0(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,STAR,MC > x1_STAR_MC(g); DistMatrix<F,STAR,MR > z0_STAR_MR(g); // Start the algorithm LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionLeft( x, xL, xR, 0 ); while( xL.Width() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionLeft ( xL, /**/ xR, x0, x1, /**/ x2 ); x1_STAR_MC.AlignWith( L10 ); z0_STAR_MR.AlignWith( L10 ); //----------------------------------------------------------------// x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, orientation, diag, L11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_STAR_MC = x1_STAR_STAR; Gemv ( orientation, (F)-1, L10.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), (F)0, z0_STAR_MR.LocalMatrix() ); x0.SumScatterUpdate( (F)1, z0_STAR_MR ); //----------------------------------------------------------------// x1_STAR_MC.FreeAlignments(); z0_STAR_MR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionLeft ( xL, /**/ xR, x0, /**/ x1, x2 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmRUT ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmRUT"); if( orientation == NORMAL ) throw std::logic_error("TrsmRUT expects a (Conjugate)Transpose option"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01AdjOrTrans_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionLeft( X, XL, XR, 0 ); while( XL.Width() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionLeft ( XL, /**/ XR, X0, X1, /**/ X2 ); X1_VC_STAR.AlignWith( X0 ); X1Trans_STAR_MC.AlignWith( X0 ); U01_VR_STAR.AlignWith( X0 ); U01AdjOrTrans_STAR_MR.AlignWith( X0 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, UPPER, orientation, diag, F(1), U11_STAR_STAR, X1_VC_STAR, checkIfSingular ); X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); U01_VR_STAR = U01; if( orientation == ADJOINT ) U01AdjOrTrans_STAR_MR.AdjointFrom( U01_VR_STAR ); else U01AdjOrTrans_STAR_MR.TransposeFrom( U01_VR_STAR ); // X0[MC,MR] -= X1[MC,* ] (U01[MR,* ])^(T/H) // = X1^T[* ,MC] (U01^(T/H))[* ,MR] LocalGemm ( TRANSPOSE, NORMAL, F(-1), X1Trans_STAR_MC, U01AdjOrTrans_STAR_MR, F(1), X0 ); //--------------------------------------------------------------------// X1_VC_STAR.FreeAlignments(); X1Trans_STAR_MC.FreeAlignments(); U01_VR_STAR.FreeAlignments(); U01AdjOrTrans_STAR_MR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionLeft ( XL, /**/ XR, X0, /**/ X1, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RLVB ( Conjugation conjugation, int offset, const DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLVB"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset > 0 || offset < -H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Width() ) throw std::logic_error ("Height of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); DistMatrix<C> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C> ARight(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,VC, STAR> HPan_VC_STAR(g); DistMatrix<C,MR, STAR> HPan_MR_STAR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MC > ZAdj_STAR_MC(g); DistMatrix<C,STAR,VC > ZAdj_STAR_VC(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanHeight = H11.Height() + H21.Height(); const int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); const int leftover = A.Width()-HPanHeight; LockedView( HPan, H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanWidth ); View( ARight, A, 0, leftover, A.Height(), HPanHeight ); HPan_MR_STAR.AlignWith( ARight ); ZAdj_STAR_MC.AlignWith( ARight ); ZAdj_STAR_VC.AlignWith( ARight ); Zeros( HPan.Width(), ARight.Height(), ZAdj_STAR_MC ); Zeros( HPan.Width(), HPan.Width(), SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonal( LEFT, offset, HPanCopy, C(1) ); HPan_VC_STAR = HPanCopy; Herk ( LOWER, ADJOINT, C(1), HPan_VC_STAR.LockedMatrix(), C(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MR_STAR = HPan_VC_STAR; LocalGemm ( ADJOINT, ADJOINT, C(1), HPan_MR_STAR, ARight, C(0), ZAdj_STAR_MC ); ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC ); LocalTrsm ( LEFT, LOWER, ADJOINT, NON_UNIT, C(1), SInv_STAR_STAR, ZAdj_STAR_VC ); ZAdj_STAR_MC = ZAdj_STAR_VC; LocalGemm ( ADJOINT, ADJOINT, C(-1), ZAdj_STAR_MC, HPan_MR_STAR, C(1), ARight ); //--------------------------------------------------------------------// HPan_MR_STAR.FreeAlignments(); ZAdj_STAR_MC.FreeAlignments(); ZAdj_STAR_VC.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RLVB ( Conjugation conjugation, int offset, const Matrix<Complex<R> >& H, const Matrix<Complex<R> >& t, Matrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLVB"); if( offset > 0 || offset < -H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Width() ) throw std::logic_error ("Height of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); #endif typedef Complex<R> C; Matrix<C> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<C> ARight; Matrix<C> tT, t0, tB, t1, t2; Matrix<C> SInv, Z; LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanHeight = H11.Height() + H21.Height(); const int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); const int leftover = A.Width()-HPanHeight; LockedView( HPan, H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanWidth ); View( ARight, A, 0, leftover, A.Height(), HPanHeight ); Zeros( ARight.Height(), HPan.Width(), Z ); Zeros( HPan.Width(), HPan.Width(), SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonal( LEFT, offset, HPanCopy, C(1) ); Herk( LOWER, ADJOINT, C(1), HPanCopy, C(0), SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, NORMAL, C(1), ARight, HPanCopy, C(0), Z ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, C(1), SInv, Z ); Gemm( NORMAL, ADJOINT, C(-1), Z, HPanCopy, C(1), ARight ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RLVB ( int offset, const DistMatrix<R>& H, DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLVB"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset > 0 || offset < -H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Width() ) throw std::logic_error ("Height of transforms must equal width of target matrix"); #endif const Grid& g = H.Grid(); DistMatrix<R> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R> ARight(g); DistMatrix<R,VC, STAR> HPan_VC_STAR(g); DistMatrix<R,MR, STAR> HPan_MR_STAR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MC > ZTrans_STAR_MC(g); DistMatrix<R,STAR,VC > ZTrans_STAR_VC(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanHeight = H11.Height() + H21.Height(); const int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); const int leftover = A.Width()-HPanHeight; LockedView( HPan, H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); View( ARight, A, 0, leftover, A.Height(), HPanHeight ); HPan_MR_STAR.AlignWith( ARight ); ZTrans_STAR_MC.AlignWith( ARight ); ZTrans_STAR_VC.AlignWith( ARight ); Zeros( HPanWidth, ARight.Height(), ZTrans_STAR_MC ); Zeros( HPanWidth, HPanWidth, SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonal( LEFT, offset, HPanCopy, R(1) ); HPan_VC_STAR = HPanCopy; Syrk ( LOWER, TRANSPOSE, R(1), HPan_VC_STAR.LockedMatrix(), R(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_MR_STAR = HPan_VC_STAR; LocalGemm ( TRANSPOSE, TRANSPOSE, R(1), HPan_MR_STAR, ARight, R(0), ZTrans_STAR_MC ); ZTrans_STAR_VC.SumScatterFrom( ZTrans_STAR_MC ); LocalTrsm ( LEFT, LOWER, TRANSPOSE, NON_UNIT, R(1), SInv_STAR_STAR, ZTrans_STAR_VC ); ZTrans_STAR_MC = ZTrans_STAR_VC; LocalGemm ( TRANSPOSE, TRANSPOSE, R(-1), ZTrans_STAR_MC, HPan_MR_STAR, R(1), ARight ); //--------------------------------------------------------------------// HPan_MR_STAR.FreeAlignments(); ZTrans_STAR_MC.FreeAlignments(); ZTrans_STAR_VC.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmLUNSmall ( UnitOrNonUnit diag, F alpha, const DistMatrix<F,VC,STAR>& U, DistMatrix<F,VC,STAR>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLUNSmall"); if( U.Grid() != X.Grid() ) throw std::logic_error ("U and X must be distributed over the same grid"); if( U.Height() != U.Width() || U.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrsmLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str() ); } if( U.ColAlignment() != X.ColAlignment() ) throw std::logic_error("U and X are assumed to be aligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F,VC,STAR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,VC,STAR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> X1_STAR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; // U11[* ,* ] <- U11[VC,* ] X1_STAR_STAR = X1; // X1[* ,* ] <- X1[VC,* ] // X1[* ,* ] := U11^-1[* ,* ] X1[* ,* ] LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_STAR, checkIfSingular ); X1 = X1_STAR_STAR; // X0[VC,* ] -= U01[VC,* ] X1[* ,* ] LocalGemm( NORMAL, NORMAL, F(-1), U01, X1_STAR_STAR, F(1), X0 ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmLLNC ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE CallStackEntry entry("internal::TrmmLLNC"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || L.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLNC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,MC, STAR> L21_MC_STAR(g); DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> X1Trans_MR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); L21_MC_STAR.AlignWith( X2 ); X1Trans_MR_STAR.AlignWith( X2 ); X1_STAR_VR.AlignWith( X1 ); //--------------------------------------------------------------------// L21_MC_STAR = L21; X1Trans_MR_STAR.TransposeFrom( X1 ); LocalGemm ( NORMAL, TRANSPOSE, T(1), L21_MC_STAR, X1Trans_MR_STAR, T(1), X2 ); L11_STAR_STAR = L11; X1_STAR_VR.TransposeFrom( X1Trans_MR_STAR ); LocalTrmm( LEFT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; //--------------------------------------------------------------------// L21_MC_STAR.FreeAlignments(); X1Trans_MR_STAR.FreeAlignments(); X1_STAR_VR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } }
inline void ApplyPackedReflectorsLUHB ( Conjugation conjugation, int offset, const Matrix<Complex<R> >& H, const Matrix<Complex<R> >& t, Matrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); #endif typedef Complex<R> C; Matrix<C> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<C> ABottom; Matrix<C> tT, t0, tB, t1, t2; Matrix<C> SInv, Z; LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanHeight ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); Zeros( HPanHeight, ABottom.Width(), Z ); Zeros( HPanHeight, HPanHeight, SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, NORMAL, C(1), HPanCopy, ABottom, C(0), Z ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z ); Gemm( ADJOINT, NORMAL, C(-1), HPanCopy, Z, C(1), ABottom ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmLLTSmall ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F,STAR,VR>& L, DistMatrix<F,VR,STAR>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLLTSmall"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrsmLLT: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( L.RowAlignment() != X.ColAlignment() ) throw std::logic_error("L and X must be aligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F,STAR,VR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,VR,STAR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> X1_STAR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; // L11[* ,* ] <- L11[* ,VR] X1_STAR_STAR = X1; // X1[* ,* ] <- X1[VR,* ] // X1[* ,* ] := L11^-[T/H][* ,* ] X1[* ,* ] LocalTrsm ( LEFT, LOWER, orientation, diag, F(1), L11_STAR_STAR, X1_STAR_STAR, checkIfSingular ); X1 = X1_STAR_STAR; // X0[VR,* ] -= L10[* ,VR]^(T/H) X1[* ,* ] LocalGemm( orientation, NORMAL, F(-1), L10, X1_STAR_STAR, F(1), X0 ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyPackedReflectorsLUHB ( Conjugation conjugation, int offset, const DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's offset diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); DistMatrix<C> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C> ABottom(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,STAR,VR > HPan_STAR_VR(g); DistMatrix<C,STAR,MC > HPan_STAR_MC(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MR > Z_STAR_MR(g); DistMatrix<C,STAR,VR > Z_STAR_VR(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanHeight ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); HPan_STAR_MC.AlignWith( ABottom ); Z_STAR_MR.AlignWith( ABottom ); Z_STAR_VR.AlignWith( ABottom ); Zeros( HPanHeight, ABottom.Width(), Z_STAR_MR ); Zeros( HPanHeight, HPanHeight, SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_STAR_VR = HPanCopy; Herk ( UPPER, NORMAL, C(1), HPan_STAR_VR.LockedLocalMatrix(), C(0), SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_STAR_MC = HPan_STAR_VR; LocalGemm ( NORMAL, NORMAL, C(1), HPan_STAR_MC, ABottom, C(0), Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); LocalTrsm ( LEFT, UPPER, NORMAL, NON_UNIT, C(1), SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; LocalGemm ( ADJOINT, NORMAL, C(-1), HPan_STAR_MC, Z_STAR_MR, C(1), ABottom ); //--------------------------------------------------------------------// HPan_STAR_MC.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }