void LLNMedium ( const AbstractDistMatrix<F>& LPre, AbstractDistMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = LPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> LProx( LPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& L = LProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g); for( Int k=0; k<m; k+=bsize ) { const Int nbProp = Min(bsize,m-k); const bool in2x2 = ( k+nbProp<m && L.Get(k+nbProp-1,k+nbProp) != F(0) ); const Int nb = ( in2x2 ? nbProp+1 : nbProp ); const Range<Int> ind1( k, k+nb ), ind2( k+nb, m ); auto L11 = L( ind1, ind1 ); auto L21 = L( ind2, ind1 ); auto X1 = X( ind1, ALL ); auto X2 = X( ind2, ALL ); L11_STAR_STAR = L11; // L11[* ,* ] <- L11[MC,MR] X1Trans_MR_STAR.AlignWith( X2 ); Transpose( X1, X1Trans_MR_STAR ); // X1^T[MR,* ] := X1^T[MR,* ] L11^-T[* ,* ] // = (L11^-1[* ,* ] X1[* ,MR])^T LocalQuasiTrsm ( RIGHT, LOWER, TRANSPOSE, F(1), L11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular ); Transpose( X1Trans_MR_STAR, X1 ); L21_MC_STAR.AlignWith( X2 ); L21_MC_STAR = L21; // L21[MC,* ] <- L21[MC,MR] // X2[MC,MR] -= L21[MC,* ] X1[* ,MR] LocalGemm ( NORMAL, TRANSPOSE, F(-1), L21_MC_STAR, X1Trans_MR_STAR, F(1), X2 ); } }
void LLNLarge ( const AbstractDistMatrix<F>& LPre, AbstractDistMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = LPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> LProx( LPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& L = LProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); for( Int k=0; k<m; k+=bsize ) { const Int nbProp = Min(bsize,m-k); const bool in2x2 = ( k+nbProp<m && L.Get(k+nbProp-1,k+nbProp) != F(0) ); const Int nb = ( in2x2 ? nbProp+1 : nbProp ); const Range<Int> ind1( k, k+nb ), ind2( k+nb, m ); auto L11 = L( ind1, ind1 ); auto L21 = L( ind2, ind1 ); auto X1 = X( ind1, ALL ); auto X2 = X( ind2, ALL ); // X1[* ,VR] := L11^-1[* ,* ] X1[* ,VR] L11_STAR_STAR = L11; X1_STAR_VR = X1; LocalQuasiTrsm ( LEFT, LOWER, NORMAL, F(1), L11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR.AlignWith( X2 ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] L21_MC_STAR.AlignWith( X2 ); L21_MC_STAR = L21; // L21[MC,* ] <- L21[MC,MR] // X2[MC,MR] -= L21[MC,* ] X1[* ,MR] LocalGemm( NORMAL, NORMAL, F(-1), L21_MC_STAR, X1_STAR_MR, F(1), X2 ); } }
inline void TrmmLLTCOld ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLTCOld"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrmmLLT expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLTC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,MC, STAR> L21_MC_STAR(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> D1AdjOrTrans_MR_STAR(g); DistMatrix<T,MR, MC > D1AdjOrTrans_MR_MC(g); DistMatrix<T,MC, MR > D1(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDown ( X, XT, XB, 0 ); while( XB.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDown ( XT, X0, /**/ /**/ X1, XB, X2 ); L21_MC_STAR.AlignWith( X2 ); D1AdjOrTrans_MR_STAR.AlignWith( X1 ); D1AdjOrTrans_MR_MC.AlignWith( X1 ); D1.AlignWith( X1 ); Zeros( X1.Width(), X1.Height(), D1AdjOrTrans_MR_STAR ); Zeros( X1.Height(), X1.Width(), D1 ); //--------------------------------------------------------------------// X1_STAR_VR = X1; L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, orientation, diag, T(1), L11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; L21_MC_STAR = L21; LocalGemm ( orientation, NORMAL, T(1), X2, L21_MC_STAR, T(0), D1AdjOrTrans_MR_STAR ); D1AdjOrTrans_MR_MC.SumScatterFrom( D1AdjOrTrans_MR_STAR ); if( orientation == TRANSPOSE ) Transpose( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); else Adjoint( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); Axpy( T(1), D1, X1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); D1AdjOrTrans_MR_MC.FreeAlignments(); D1AdjOrTrans_MR_STAR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDown ( XT, X0, X1, /**/ /**/ XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmLLNC ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE CallStackEntry entry("internal::TrmmLLNC"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || L.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLNC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,MC, STAR> L21_MC_STAR(g); DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> X1Trans_MR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); L21_MC_STAR.AlignWith( X2 ); X1Trans_MR_STAR.AlignWith( X2 ); X1_STAR_VR.AlignWith( X1 ); //--------------------------------------------------------------------// L21_MC_STAR = L21; X1Trans_MR_STAR.TransposeFrom( X1 ); LocalGemm ( NORMAL, TRANSPOSE, T(1), L21_MC_STAR, X1Trans_MR_STAR, T(1), X2 ); L11_STAR_STAR = L11; X1_STAR_VR.TransposeFrom( X1Trans_MR_STAR ); LocalTrmm( LEFT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; //--------------------------------------------------------------------// L21_MC_STAR.FreeAlignments(); X1Trans_MR_STAR.FreeAlignments(); X1_STAR_VR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } }
void LLN ( UnitOrNonUnit diag, F alpha, const AbstractDistMatrix<F>& LPre, AbstractDistMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int n = LPre.Height(); const Int bsize = Blocksize(); const Grid& g = LPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> LProx( LPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& L = LProx.GetLocked(); auto& X = XProx.Get(); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g), X11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > X10_STAR_MR(g), X11_STAR_MR(g); DistMatrix<F,STAR,VR > X10_STAR_VR(g); ScaleTrapezoid( alpha, LOWER, X ); for( Int k=0; k<n; k+=bsize ) { const Int nb = Min(bsize,n-k); const Range<Int> ind0( 0, k ), ind1( k, k+nb ), ind2( k+nb, n ); auto L11 = L( ind1, ind1 ); auto L21 = L( ind2, ind1 ); auto X10 = X( ind1, ind0 ); auto X11 = X( ind1, ind1 ); auto X20 = X( ind2, ind0 ); auto X21 = X( ind2, ind1 ); L11_STAR_STAR = L11; X11_STAR_STAR = X11; X10_STAR_VR = X10; LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X10_STAR_VR, checkIfSingular ); Trstrm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X11_STAR_STAR, checkIfSingular ); X11 = X11_STAR_STAR; X11_STAR_MR.AlignWith( X21 ); X11_STAR_MR = X11_STAR_STAR; MakeTrapezoidal( LOWER, X11_STAR_MR ); X10_STAR_MR.AlignWith( X20 ); X10_STAR_MR = X10_STAR_VR; X10 = X10_STAR_MR; L21_MC_STAR.AlignWith( X20 ); L21_MC_STAR = L21; LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X10_STAR_MR, F(1), X20 ); LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X11_STAR_MR, F(1), X21 ); } }
inline void TwoSidedTrsmLVar5 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmLVar5"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( L.Height() != L.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != L.Height() ) LogicError("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MR > A21Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,VC, STAR> L21_VC_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > L21Adj_STAR_MR(g); DistMatrix<F,VC, STAR> Y21_VC_STAR(g); DistMatrix<F> Y21(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A21_MC_STAR.AlignWith( A22 ); A21_VC_STAR.AlignWith( A22 ); A21_VR_STAR.AlignWith( A22 ); A21Adj_STAR_MR.AlignWith( A22 ); L21_MC_STAR.AlignWith( A22 ); L21_VC_STAR.AlignWith( A22 ); L21_VR_STAR.AlignWith( A22 ); L21Adj_STAR_MR.AlignWith( A22 ); Y21.AlignWith( A21 ); Y21_VC_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A11 := inv(L11) A11 inv(L11)' L11_STAR_STAR = L11; A11_STAR_STAR = A11; LocalTwoSidedTrsm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // Y21 := L21 A11 L21_VC_STAR = L21; Zeros( Y21_VC_STAR, A21.Height(), A21.Width() ); Hemm ( RIGHT, LOWER, F(1), A11_STAR_STAR.Matrix(), L21_VC_STAR.Matrix(), F(0), Y21_VC_STAR.Matrix() ); Y21 = Y21_VC_STAR; // A21 := A21 inv(L11)' A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; // A21 := A21 - 1/2 Y21 Axpy( F(-1)/F(2), Y21, A21 ); // A22 := A22 - (L21 A21' + A21 L21') A21_MC_STAR = A21; L21_MC_STAR = L21; A21_VC_STAR = A21_MC_STAR; A21_VR_STAR = A21_VC_STAR; L21_VR_STAR = L21_VC_STAR; A21Adj_STAR_MR.AdjointFrom( A21_VR_STAR ); L21Adj_STAR_MR.AdjointFrom( L21_VR_STAR ); LocalTrr2k ( LOWER, F(-1), L21_MC_STAR, A21Adj_STAR_MR, A21_MC_STAR, L21Adj_STAR_MR, F(1), A22 ); // A21 := A21 - 1/2 Y21 Axpy( F(-1)/F(2), Y21, A21 ); // A21 := inv(L22) A21 // // This is the bottleneck because A21 only has blocksize columns Trsm( LEFT, LOWER, NORMAL, diag, F(1), L22, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void internal::HegstRLVar3( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& L ) { #ifndef RELEASE PushCallStack("internal::HegstRLVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> YTL(g), YTR(g), Y00(g), Y01(g), Y02(g), YBL(g), YBR(g), Y10(g), Y11(g), Y12(g), Y20(g), Y21(g), Y22(g); DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,MR > A11_STAR_MR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,STAR,MR > L10_STAR_MR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); DistMatrix<F,MC, STAR> Z21_MC_STAR(g); // We will use an entire extra matrix as temporary storage. If this is not // acceptable, use HegstRLVar4 instead. DistMatrix<F,MC,MR> Y(g); Y.AlignWith( A ); Y.ResizeTo( A.Height(), A.Width() ); Zero( Y ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDownDiagonal ( Y, YTL, YTR, YBL, YBR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDownDiagonal ( YTL, /**/ YTR, Y00, /**/ Y01, Y02, /*************/ /******************/ /**/ Y10, /**/ Y11, Y12, YBL, /**/ YBR, Y20, /**/ Y21, Y22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A11_STAR_MR.AlignWith( Y21 ); A21_VC_STAR.AlignWith( A21 ); A10_STAR_VR.AlignWith( A10 ); A10_STAR_MR.AlignWith( A10 ); L10_STAR_VR.AlignWith( A10 ); L10_STAR_MR.AlignWith( A10 ); L21_MC_STAR.AlignWith( Y21 ); X21_MC_STAR.AlignWith( A20 ); Z21_MC_STAR.AlignWith( L20 ); //--------------------------------------------------------------------// // A10 := A10 - 1/2 Y10 Axpy( (F)-0.5, Y10, A10 ); // A11 := A11 - (A10 L10' + L10 A10') A10_STAR_VR = A10; L10_STAR_VR = L10; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( LOWER, NORMAL, (F)1, A10_STAR_VR.LocalMatrix(), L10_STAR_VR.LocalMatrix(), (F)0, X11_STAR_STAR.LocalMatrix() ); MakeTrapezoidal( LEFT, LOWER, 0, X11_STAR_STAR ); A11.SumScatterUpdate( (F)-1, X11_STAR_STAR ); // A11 := inv(L11) A11 inv(L11)' A11_STAR_STAR = A11; L11_STAR_STAR = L11; internal::LocalHegst( RIGHT, LOWER, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 - A20 L10' L10_STAR_MR = L10_STAR_VR; X21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); internal::LocalGemm ( NORMAL, ADJOINT, (F)1, A20, L10_STAR_MR, (F)0, X21_MC_STAR ); A21.SumScatterUpdate( (F)-1, X21_MC_STAR ); // A21 := A21 inv(L11)' A21_VC_STAR = A21; internal::LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, (F)1, L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; // A10 := A10 - 1/2 Y10 Axpy( (F)-0.5, Y10, A10 ); // A10 := inv(L11) A10 A10_STAR_VR = A10; internal::LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, (F)1, L11_STAR_STAR, A10_STAR_VR ); // Y20 := Y20 + L21 A10 A10_STAR_MR = A10_STAR_VR; A10 = A10_STAR_MR; L21_MC_STAR = L21; internal::LocalGemm ( NORMAL, NORMAL, (F)1, L21_MC_STAR, A10_STAR_MR, (F)1, Y20 ); // Y21 := L21 A11 // // Symmetrize A11[* ,* ] by copying the lower triangle into the upper // so that we can call a local gemm instead of worrying about // reproducing a hemm with nonsymmetric local matrices. { const int height = A11_STAR_STAR.LocalHeight(); const int ldim = A11_STAR_STAR.LocalLDim(); F* A11Buffer = A11_STAR_STAR.LocalBuffer(); for( int i=1; i<height; ++i ) for( int j=0; j<i; ++j ) A11Buffer[j+i*ldim] = Conj(A11Buffer[i+j*ldim]); } A11_STAR_MR = A11_STAR_STAR; internal::LocalGemm ( NORMAL, NORMAL, (F)1, L21_MC_STAR, A11_STAR_MR, (F)0, Y21 ); // Y21 := Y21 + L20 A10' Z21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); internal::LocalGemm ( NORMAL, ADJOINT, (F)1, L20, A10_STAR_MR, (F)0, Z21_MC_STAR ); Y21.SumScatterUpdate( (F)1, Z21_MC_STAR ); //--------------------------------------------------------------------// A11_STAR_MR.FreeAlignments(); A21_VC_STAR.FreeAlignments(); A10_STAR_VR.FreeAlignments(); A10_STAR_MR.FreeAlignments(); L10_STAR_VR.FreeAlignments(); L10_STAR_MR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); X21_MC_STAR.FreeAlignments(); Z21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDownDiagonal ( YTL, /**/ YTR, Y00, Y01, /**/ Y02, /**/ Y10, Y11, /**/ Y12, /*************/ /******************/ YBL, /**/ YBR, Y20, Y21, /**/ Y22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrtrsmLLN ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE CallStackEntry entry("internal::TrtrsmLLN"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XTL(g), XTR(g), X00(g), X01(g), X02(g), XBL(g), XBR(g), X10(g), X11(g), X12(g), X20(g), X21(g), X22(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > X10_STAR_MR(g); DistMatrix<F,STAR,VR > X10_STAR_VR(g); DistMatrix<F,STAR,MR > X11_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); // Start the algorithm ScaleTrapezoid( alpha, LOWER, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDownDiagonal ( X, XTL, XTR, XBL, XBR, 0 ); while( XBR.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDownDiagonal ( XTL, /**/ XTR, X00, /**/ X01, X02, /*************/ /******************/ /**/ X10, /**/ X11, X12, XBL, /**/ XBR, X20, /**/ X21, X22 ); L21_MC_STAR.AlignWith( X20 ); X10_STAR_MR.AlignWith( X20 ); X11_STAR_MR.AlignWith( X21 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X11_STAR_STAR = X11; X10_STAR_VR = X10; LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X10_STAR_VR, checkIfSingular ); LocalTrtrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X11_STAR_STAR, checkIfSingular ); X11 = X11_STAR_STAR; X11_STAR_MR = X11_STAR_STAR; MakeTriangular( LOWER, X11_STAR_MR ); X10_STAR_MR = X10_STAR_VR; X10 = X10_STAR_MR; L21_MC_STAR = L21; LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X10_STAR_MR, F(1), X20 ); LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X11_STAR_MR, F(1), X21 ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDownDiagonal ( XTL, /**/ XTR, X00, X01, /**/ X02, /**/ X10, X11, /**/ X12, /*************/ /******************/ XBL, /**/ XBR, X20, X21, /**/ X22 ); } }
inline void TwoSidedTrmmLVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmLVar2"); if( A.Height() != A.Width() ) throw std::logic_error( "A must be square." ); if( L.Height() != L.Width() ) throw std::logic_error( "Triangular matrices must be square." ); if( A.Height() != L.Height() ) throw std::logic_error( "A and L must be the same size." ); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > L21Adj_STAR_MR(g); DistMatrix<F,VC, STAR> L21_VC_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > X10_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MC, STAR> Z21_MC_STAR(g); DistMatrix<F,MR, STAR> Z21_MR_STAR(g); DistMatrix<F,MR, MC > Z21_MR_MC(g); DistMatrix<F> Y21(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A21_VC_STAR.AlignWith( A22 ); L21_MC_STAR.AlignWith( A20 ); L21_VC_STAR.AlignWith( A22 ); L21_VR_STAR.AlignWith( A22 ); L21Adj_STAR_MR.AlignWith( A22 ); X10_STAR_MR.AlignWith( A10 ); Y21.AlignWith( A21 ); Z21_MC_STAR.AlignWith( A22 ); Z21_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A10 := L11' A10 L11_STAR_STAR = L11; A10_STAR_VR = A10; LocalTrmm ( LEFT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A10 := A10 + L21' A20 L21_MC_STAR = L21; X10_STAR_MR.ResizeTo( A10.Height(), A10.Width() ); LocalGemm( ADJOINT, NORMAL, F(1), L21_MC_STAR, A20, F(0), X10_STAR_MR ); A10.SumScatterUpdate( F(1), X10_STAR_MR ); // Y21 := A22 L21 L21_VC_STAR = L21_MC_STAR; L21_VR_STAR = L21_VC_STAR; L21Adj_STAR_MR.AdjointFrom( L21_VR_STAR ); Z21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); Z21_MR_STAR.ResizeTo( A21.Height(), A21.Width() ); Zero( Z21_MC_STAR ); Zero( Z21_MR_STAR ); LocalSymmetricAccumulateLL ( ADJOINT, F(1), A22, L21_MC_STAR, L21Adj_STAR_MR, Z21_MC_STAR, Z21_MR_STAR ); Z21_MR_MC.SumScatterFrom( Z21_MR_STAR ); Y21 = Z21_MR_MC; Y21.SumScatterUpdate( F(1), Z21_MC_STAR ); // A21 := A21 L11 A21_VC_STAR = A21; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); // A11 := L11' A11 L11 A11_STAR_STAR = A11; LocalTwoSidedTrmm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A11 := A11 + (A21' L21 + L21' A21) A21_VC_STAR = A21; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( LOWER, ADJOINT, F(1), A21_VC_STAR.LocalMatrix(), L21_VC_STAR.LocalMatrix(), F(0), X11_STAR_STAR.LocalMatrix() ); A11.SumScatterUpdate( F(1), X11_STAR_STAR ); // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); //--------------------------------------------------------------------// A21_VC_STAR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); L21_VC_STAR.FreeAlignments(); L21_VR_STAR.FreeAlignments(); L21Adj_STAR_MR.FreeAlignments(); X10_STAR_MR.FreeAlignments(); Y21.FreeAlignments(); Z21_MC_STAR.FreeAlignments(); Z21_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TriangularInverseLVar3( UnitOrNonUnit diag, DistMatrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TriangularInverseLVar3"); if( L.Height() != L.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,MR > L10_STAR_MR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,VC, STAR> L21_VC_STAR(g); // Start the algorithm PartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( LTL.Height() < L.Height() ) { RepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); L10_STAR_MR.AlignWith( L20 ); L21_MC_STAR.AlignWith( L20 ); //--------------------------------------------------------------------// L10_STAR_VR = L10; L11_STAR_STAR = L11; LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(-1), L11_STAR_STAR, L10_STAR_VR ); L21_MC_STAR = L21; L10_STAR_MR = L10_STAR_VR; LocalGemm ( NORMAL, NORMAL, F(1), L21_MC_STAR, L10_STAR_MR, F(1), L20 ); L10 = L10_STAR_MR; L21_VC_STAR = L21_MC_STAR; LocalTrsm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, L21_VC_STAR ); LocalTriangularInverse( LOWER, diag, L11_STAR_STAR ); L11 = L11_STAR_STAR; L21 = L21_VC_STAR; //--------------------------------------------------------------------// L10_STAR_MR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }