inline void TrsmLLTLarge ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLLTLarge"); if( orientation == NORMAL ) throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); L10_STAR_MC.AlignWith( X0 ); X1_STAR_MR.AlignWith( X0 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; // L11[* ,* ] <- L11[MC,MR] X1_STAR_VR = X1; // X1[* ,VR] <- X1[MC,MR] // X1[* ,VR] := L11^-[T/H][* ,* ] X1[* ,VR] LocalTrsm ( LEFT, LOWER, orientation, diag, F(1), L11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] L10_STAR_MC = L10; // L10[* ,MC] <- L10[MC,MR] // X0[MC,MR] -= (L10[* ,MC])^(T/H) X1[* ,MR] // = L10^[T/H][MC,* ] X1[* ,MR] LocalGemm ( orientation, NORMAL, F(-1), L10_STAR_MC, X1_STAR_MR, F(1), X0 ); //--------------------------------------------------------------------// L10_STAR_MC.FreeAlignments(); X1_STAR_MR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmLVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrmmLVar4"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( L.Height() != L.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != L.Height() ) LogicError("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,MR, STAR> L10Adj_MR_STAR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > Y10_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10_STAR_VR.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A10_STAR_MC.AlignWith( A00 ); A21_MC_STAR.AlignWith( A20 ); L10_STAR_VR.AlignWith( A00 ); L10Adj_MR_STAR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); Y10_STAR_VR.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := A11 L10 A11_STAR_STAR = A11; L10Adj_MR_STAR.AdjointFrom( L10 ); L10_STAR_VR.AdjointFrom( L10Adj_MR_STAR ); Zeros( Y10_STAR_VR, A10.Height(), A10.Width() ); Hemm ( LEFT, LOWER, F(1), A11_STAR_STAR.LockedMatrix(), L10_STAR_VR.LockedMatrix(), F(0), Y10_STAR_VR.Matrix() ); // A10 := A10 + 1/2 Y10 A10_STAR_VR = A10; Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A00 := A00 + (A10' L10 + L10' A10) A10_STAR_MR = A10_STAR_VR; A10_STAR_MC = A10_STAR_VR; L10_STAR_MC = L10_STAR_VR; LocalTrr2k ( LOWER, ADJOINT, ADJOINT, ADJOINT, F(1), A10_STAR_MC, L10Adj_MR_STAR, L10_STAR_MC, A10_STAR_MR, F(1), A00 ); // A10 := A10 + 1/2 Y10 Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A10 := L11' A10 L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A20 := A20 + A21 L10 A21_MC_STAR = A21; LocalGemm ( NORMAL, ADJOINT, F(1), A21_MC_STAR, L10Adj_MR_STAR, F(1), A20 ); // A11 := L11' A11 L11 LocalTwoSidedTrmm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 L11 A21_VC_STAR = A21_MC_STAR; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void TrmmLLNC ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE CallStackEntry entry("internal::TrmmLLNC"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || L.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLNC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,MC, STAR> L21_MC_STAR(g); DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> X1Trans_MR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); L21_MC_STAR.AlignWith( X2 ); X1Trans_MR_STAR.AlignWith( X2 ); X1_STAR_VR.AlignWith( X1 ); //--------------------------------------------------------------------// L21_MC_STAR = L21; X1Trans_MR_STAR.TransposeFrom( X1 ); LocalGemm ( NORMAL, TRANSPOSE, T(1), L21_MC_STAR, X1Trans_MR_STAR, T(1), X2 ); L11_STAR_STAR = L11; X1_STAR_VR.TransposeFrom( X1Trans_MR_STAR ); LocalTrmm( LEFT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; //--------------------------------------------------------------------// L21_MC_STAR.FreeAlignments(); X1Trans_MR_STAR.FreeAlignments(); X1_STAR_VR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } }
inline void TrsmRLT ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE CallStackEntry entry("internal::TrsmRLT"); if( orientation == NORMAL ) LogicError("TrsmRLT expects a (Conjugate)Transpose option"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > L21AdjOrTrans_STAR_MR(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionRight( X, XL, XR, 0 ); while( XR.Width() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); X1_VC_STAR.AlignWith( X2 ); X1Trans_STAR_MC.AlignWith( X2 ); L21_VR_STAR.AlignWith( X2 ); L21AdjOrTrans_STAR_MR.AlignWith( X2 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, LOWER, orientation, diag, F(1), L11_STAR_STAR, X1_VC_STAR, checkIfSingular ); X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); L21_VR_STAR = L21; if( orientation == ADJOINT ) L21AdjOrTrans_STAR_MR.AdjointFrom( L21_VR_STAR ); else L21AdjOrTrans_STAR_MR.TransposeFrom( L21_VR_STAR ); // X2[MC,MR] -= X1[MC,*] (L21[MR,*])^(T/H) // = X1^T[* ,MC] (L21^(T/H))[*,MR] LocalGemm ( TRANSPOSE, NORMAL, F(-1), X1Trans_STAR_MC, L21AdjOrTrans_STAR_MR, F(1), X2 ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } }
inline void internal::HegstLLVar4( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& L ) { #ifndef RELEASE PushCallStack("internal::HegstLLVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,STAR,MR > L10_STAR_MR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > Y10_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10_STAR_VR.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A10_STAR_MC.AlignWith( A00 ); A21_MC_STAR.AlignWith( A20 ); L10_STAR_VR.AlignWith( A00 ); L10_STAR_MR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); Y10_STAR_VR.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := A11 L10 A11_STAR_STAR = A11; L10_STAR_VR = L10; Y10_STAR_VR.ResizeTo( A10.Height(), A10.Width() ); Zero( Y10_STAR_VR ); Hemm ( LEFT, LOWER, (F)0.5, A11_STAR_STAR.LockedLocalMatrix(), L10_STAR_VR.LockedLocalMatrix(), (F)0, Y10_STAR_VR.LocalMatrix() ); // A10 := A10 + 1/2 Y10 A10_STAR_VR = A10; Axpy( (F)1, Y10_STAR_VR, A10_STAR_VR ); // A00 := A00 + (A10' L10 + L10' A10) A10_STAR_MR = A10_STAR_VR; A10_STAR_MC = A10_STAR_VR; L10_STAR_MR = L10_STAR_VR; L10_STAR_MC = L10_STAR_VR; internal::LocalTrr2k ( LOWER, ADJOINT, ADJOINT, (F)1, A10_STAR_MC, L10_STAR_MR, L10_STAR_MC, A10_STAR_MR, (F)1, A00 ); // A10 := A10 + 1/2 Y10 Axpy( (F)1, Y10_STAR_VR, A10_STAR_VR ); // A10 := L11' A10 L11_STAR_STAR = L11; internal::LocalTrmm ( LEFT, LOWER, ADJOINT, NON_UNIT, (F)1, L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A20 := A20 + A21 L10 A21_MC_STAR = A21; internal::LocalGemm ( NORMAL, NORMAL, (F)1, A21_MC_STAR, L10_STAR_MR, (F)1, A20 ); // A11 := L11' A11 L11 internal::LocalHegst ( LEFT, LOWER, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 L11 A21_VC_STAR = A21_MC_STAR; internal::LocalTrmm ( RIGHT, LOWER, NORMAL, NON_UNIT, (F)1, L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10_STAR_VR.FreeAlignments(); A10_STAR_MR.FreeAlignments(); A10_STAR_MC.FreeAlignments(); A21_MC_STAR.FreeAlignments(); L10_STAR_VR.FreeAlignments(); L10_STAR_MR.FreeAlignments(); L10_STAR_MC.FreeAlignments(); Y10_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalTrmmAccumulateLLT ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, const DistMatrix<T,MC,STAR>& X_MC_STAR, DistMatrix<T,MR,STAR>& Z_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalTrmmAccumulateLLT"); if( L.Grid() != X_MC_STAR.Grid() || X_MC_STAR.Grid() != Z_MR_STAR.Grid() ) throw std::logic_error ("{L,X,Z} must be distributed over the same grid"); if( L.Height() != L.Width() || L.Height() != X_MC_STAR.Height() || L.Height() != Z_MR_STAR.Height() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLLT: " << "\n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X[MC,* ] ~ " << X_MC_STAR.Height() << " x " << X_MC_STAR.Width() << "\n" << " Z[MR,* ] ` " << Z_MR_STAR.Height() << " x " << Z_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( X_MC_STAR.ColAlignment() != L.ColAlignment() || Z_MR_STAR.ColAlignment() != L.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> D11(g); DistMatrix<T,MC,STAR> XT_MC_STAR(g), X0_MC_STAR(g), XB_MC_STAR(g), X1_MC_STAR(g), X2_MC_STAR(g); DistMatrix<T,MR,STAR> ZT_MR_STAR(g), Z0_MR_STAR(g), ZB_MR_STAR(g), Z1_MR_STAR(g), Z2_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); LockedPartitionDown ( X_MC_STAR, XT_MC_STAR, XB_MC_STAR, 0 ); PartitionDown ( Z_MR_STAR, ZT_MR_STAR, ZB_MR_STAR, 0 ); while( LTL.Height() < L.Height() ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); LockedRepartitionDown ( XT_MC_STAR, X0_MC_STAR, /**********/ /**********/ X1_MC_STAR, XB_MC_STAR, X2_MC_STAR ); RepartitionDown ( ZT_MR_STAR, Z0_MR_STAR, /**********/ /**********/ Z1_MR_STAR, ZB_MR_STAR, Z2_MR_STAR ); D11.AlignWith( L11 ); //--------------------------------------------------------------------// D11 = L11; MakeTrapezoidal( LEFT, LOWER, 0, D11 ); if( diag == UNIT ) SetDiagonalToOne( D11 ); LocalGemm ( orientation, NORMAL, alpha, D11, X1_MC_STAR, T(1), Z1_MR_STAR ); LocalGemm ( orientation, NORMAL, alpha, L21, X2_MC_STAR, T(1), Z1_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlideLockedPartitionDown ( XT_MC_STAR, X0_MC_STAR, X1_MC_STAR, /**********/ /**********/ XB_MC_STAR, X2_MC_STAR ); SlidePartitionDown ( ZT_MR_STAR, Z0_MR_STAR, Z1_MR_STAR, /**********/ /**********/ ZB_MR_STAR, Z2_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void TriangularInverseLVar3( UnitOrNonUnit diag, DistMatrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TriangularInverseLVar3"); if( L.Height() != L.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,MR > L10_STAR_MR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,VC, STAR> L21_VC_STAR(g); // Start the algorithm PartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( LTL.Height() < L.Height() ) { RepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); L10_STAR_MR.AlignWith( L20 ); L21_MC_STAR.AlignWith( L20 ); //--------------------------------------------------------------------// L10_STAR_VR = L10; L11_STAR_STAR = L11; LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(-1), L11_STAR_STAR, L10_STAR_VR ); L21_MC_STAR = L21; L10_STAR_MR = L10_STAR_VR; LocalGemm ( NORMAL, NORMAL, F(1), L21_MC_STAR, L10_STAR_MR, F(1), L20 ); L10 = L10_STAR_MR; L21_VC_STAR = L21_MC_STAR; LocalTrsm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, L21_VC_STAR ); LocalTriangularInverse( LOWER, diag, L11_STAR_STAR ); L11 = L11_STAR_STAR; L21 = L21_VC_STAR; //--------------------------------------------------------------------// L10_STAR_MR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HegstRLVar3( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& L ) { #ifndef RELEASE PushCallStack("internal::HegstRLVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> YTL(g), YTR(g), Y00(g), Y01(g), Y02(g), YBL(g), YBR(g), Y10(g), Y11(g), Y12(g), Y20(g), Y21(g), Y22(g); DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,MR > A11_STAR_MR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,STAR,MR > L10_STAR_MR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); DistMatrix<F,MC, STAR> Z21_MC_STAR(g); // We will use an entire extra matrix as temporary storage. If this is not // acceptable, use HegstRLVar4 instead. DistMatrix<F,MC,MR> Y(g); Y.AlignWith( A ); Y.ResizeTo( A.Height(), A.Width() ); Zero( Y ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDownDiagonal ( Y, YTL, YTR, YBL, YBR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDownDiagonal ( YTL, /**/ YTR, Y00, /**/ Y01, Y02, /*************/ /******************/ /**/ Y10, /**/ Y11, Y12, YBL, /**/ YBR, Y20, /**/ Y21, Y22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A11_STAR_MR.AlignWith( Y21 ); A21_VC_STAR.AlignWith( A21 ); A10_STAR_VR.AlignWith( A10 ); A10_STAR_MR.AlignWith( A10 ); L10_STAR_VR.AlignWith( A10 ); L10_STAR_MR.AlignWith( A10 ); L21_MC_STAR.AlignWith( Y21 ); X21_MC_STAR.AlignWith( A20 ); Z21_MC_STAR.AlignWith( L20 ); //--------------------------------------------------------------------// // A10 := A10 - 1/2 Y10 Axpy( (F)-0.5, Y10, A10 ); // A11 := A11 - (A10 L10' + L10 A10') A10_STAR_VR = A10; L10_STAR_VR = L10; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( LOWER, NORMAL, (F)1, A10_STAR_VR.LocalMatrix(), L10_STAR_VR.LocalMatrix(), (F)0, X11_STAR_STAR.LocalMatrix() ); MakeTrapezoidal( LEFT, LOWER, 0, X11_STAR_STAR ); A11.SumScatterUpdate( (F)-1, X11_STAR_STAR ); // A11 := inv(L11) A11 inv(L11)' A11_STAR_STAR = A11; L11_STAR_STAR = L11; internal::LocalHegst( RIGHT, LOWER, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 - A20 L10' L10_STAR_MR = L10_STAR_VR; X21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); internal::LocalGemm ( NORMAL, ADJOINT, (F)1, A20, L10_STAR_MR, (F)0, X21_MC_STAR ); A21.SumScatterUpdate( (F)-1, X21_MC_STAR ); // A21 := A21 inv(L11)' A21_VC_STAR = A21; internal::LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, (F)1, L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; // A10 := A10 - 1/2 Y10 Axpy( (F)-0.5, Y10, A10 ); // A10 := inv(L11) A10 A10_STAR_VR = A10; internal::LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, (F)1, L11_STAR_STAR, A10_STAR_VR ); // Y20 := Y20 + L21 A10 A10_STAR_MR = A10_STAR_VR; A10 = A10_STAR_MR; L21_MC_STAR = L21; internal::LocalGemm ( NORMAL, NORMAL, (F)1, L21_MC_STAR, A10_STAR_MR, (F)1, Y20 ); // Y21 := L21 A11 // // Symmetrize A11[* ,* ] by copying the lower triangle into the upper // so that we can call a local gemm instead of worrying about // reproducing a hemm with nonsymmetric local matrices. { const int height = A11_STAR_STAR.LocalHeight(); const int ldim = A11_STAR_STAR.LocalLDim(); F* A11Buffer = A11_STAR_STAR.LocalBuffer(); for( int i=1; i<height; ++i ) for( int j=0; j<i; ++j ) A11Buffer[j+i*ldim] = Conj(A11Buffer[i+j*ldim]); } A11_STAR_MR = A11_STAR_STAR; internal::LocalGemm ( NORMAL, NORMAL, (F)1, L21_MC_STAR, A11_STAR_MR, (F)0, Y21 ); // Y21 := Y21 + L20 A10' Z21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); internal::LocalGemm ( NORMAL, ADJOINT, (F)1, L20, A10_STAR_MR, (F)0, Z21_MC_STAR ); Y21.SumScatterUpdate( (F)1, Z21_MC_STAR ); //--------------------------------------------------------------------// A11_STAR_MR.FreeAlignments(); A21_VC_STAR.FreeAlignments(); A10_STAR_VR.FreeAlignments(); A10_STAR_MR.FreeAlignments(); L10_STAR_VR.FreeAlignments(); L10_STAR_MR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); X21_MC_STAR.FreeAlignments(); Z21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDownDiagonal ( YTL, /**/ YTR, Y00, Y01, /**/ Y02, /**/ Y10, Y11, /**/ Y12, /*************/ /******************/ YBL, /**/ YBR, Y20, Y21, /**/ Y22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmLVar5 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmLVar5"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( L.Height() != L.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != L.Height() ) LogicError("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MR > A21Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,VC, STAR> L21_VC_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > L21Adj_STAR_MR(g); DistMatrix<F,VC, STAR> Y21_VC_STAR(g); DistMatrix<F> Y21(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A21_MC_STAR.AlignWith( A22 ); A21_VC_STAR.AlignWith( A22 ); A21_VR_STAR.AlignWith( A22 ); A21Adj_STAR_MR.AlignWith( A22 ); L21_MC_STAR.AlignWith( A22 ); L21_VC_STAR.AlignWith( A22 ); L21_VR_STAR.AlignWith( A22 ); L21Adj_STAR_MR.AlignWith( A22 ); Y21.AlignWith( A21 ); Y21_VC_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A11 := inv(L11) A11 inv(L11)' L11_STAR_STAR = L11; A11_STAR_STAR = A11; LocalTwoSidedTrsm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // Y21 := L21 A11 L21_VC_STAR = L21; Zeros( Y21_VC_STAR, A21.Height(), A21.Width() ); Hemm ( RIGHT, LOWER, F(1), A11_STAR_STAR.Matrix(), L21_VC_STAR.Matrix(), F(0), Y21_VC_STAR.Matrix() ); Y21 = Y21_VC_STAR; // A21 := A21 inv(L11)' A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; // A21 := A21 - 1/2 Y21 Axpy( F(-1)/F(2), Y21, A21 ); // A22 := A22 - (L21 A21' + A21 L21') A21_MC_STAR = A21; L21_MC_STAR = L21; A21_VC_STAR = A21_MC_STAR; A21_VR_STAR = A21_VC_STAR; L21_VR_STAR = L21_VC_STAR; A21Adj_STAR_MR.AdjointFrom( A21_VR_STAR ); L21Adj_STAR_MR.AdjointFrom( L21_VR_STAR ); LocalTrr2k ( LOWER, F(-1), L21_MC_STAR, A21Adj_STAR_MR, A21_MC_STAR, L21Adj_STAR_MR, F(1), A22 ); // A21 := A21 - 1/2 Y21 Axpy( F(-1)/F(2), Y21, A21 ); // A21 := inv(L22) A21 // // This is the bottleneck because A21 only has blocksize columns Trsm( LEFT, LOWER, NORMAL, diag, F(1), L22, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void TrsmRLN ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmRLN"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,MR, STAR> L10Trans_MR_STAR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionLeft( X, XL, XR, 0 ); while( XL.Width() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionLeft ( XL, /**/ XR, X0, X1, /**/ X2 ); X1Trans_STAR_MC.AlignWith( X0 ); L10Trans_MR_STAR.AlignWith( X0 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X1_VC_STAR, checkIfSingular ); // X0[MC,MR] -= X1[MC,* ] L10[*,MR] // = X1^T[* ,MC] L10^T[MR,* ] X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); L10Trans_MR_STAR.TransposeFrom( L10 ); LocalGemm ( TRANSPOSE, TRANSPOSE, F(-1), X1Trans_STAR_MC, L10Trans_MR_STAR, F(1), X0 ); //--------------------------------------------------------------------// X1Trans_STAR_MC.FreeAlignments(); L10Trans_MR_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionLeft ( XL, /**/ XR, X0, /**/ X1, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::TrsvLN ( UnitOrNonUnit diag, const DistMatrix<F,MC,MR>& L, DistMatrix<F,MC,MR>& x ) { #ifndef RELEASE PushCallStack("internal::TrsvLN"); if( L.Grid() != x.Grid() ) throw std::logic_error("{L,x} must be distributed over the same grid"); if( L.Height() != L.Width() ) throw std::logic_error("L must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); if( L.Width() != xLength ) throw std::logic_error("Nonconformal TrsvLN"); #endif const Grid& g = L.Grid(); if( x.Width() == 1 ) { // Matrix views DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,MC,MR> xT(g), x0(g), xB(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,MR, STAR> x1_MR_STAR(g); DistMatrix<F,MC, STAR> z2_MC_STAR(g); // Start the algorithm LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDown ( x, xT, xB, 0 ); while( xB.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDown ( xT, x0, /**/ /**/ x1, xB, x2 ); x1_MR_STAR.AlignWith( L21 ); z2_MC_STAR.AlignWith( L21 ); z2_MC_STAR.ResizeTo( x2.Height(), 1 ); //----------------------------------------------------------------// x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, NORMAL, diag, L11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_MR_STAR = x1_STAR_STAR; Gemv ( NORMAL, (F)-1, L21.LockedLocalMatrix(), x1_MR_STAR.LockedLocalMatrix(), (F)0, z2_MC_STAR.LocalMatrix() ); x2.SumScatterUpdate( (F)1, z2_MC_STAR ); //----------------------------------------------------------------// x1_MR_STAR.FreeAlignments(); z2_MC_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDown ( xT, x0, x1, /**/ /**/ xB, x2 ); } } else { // Matrix views DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,MC,MR> xL(g), xR(g), x0(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,STAR,MR > x1_STAR_MR(g); DistMatrix<F,STAR,MC > z2_STAR_MC(g); DistMatrix<F,MR, MC > z2_MR_MC(g); DistMatrix<F,MC, MR > z2(g); // Start the algorithm LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionRight( x, xL, xR, 0 ); while( xR.Width() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionRight ( xL, /**/ xR, x0, /**/ x1, x2 ); x1_STAR_MR.AlignWith( L21 ); z2_STAR_MC.AlignWith( L21 ); z2.AlignWith( x2 ); z2_STAR_MC.ResizeTo( 1, x2.Width() ); //----------------------------------------------------------------// x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, NORMAL, diag, L11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_STAR_MR = x1_STAR_STAR; Gemv ( NORMAL, (F)-1, L21.LockedLocalMatrix(), x1_STAR_MR.LockedLocalMatrix(), (F)0, z2_STAR_MC.LocalMatrix() ); z2_MR_MC.SumScatterFrom( z2_STAR_MC ); z2 = z2_MR_MC; Axpy( (F)1, z2, x2 ); //----------------------------------------------------------------// x1_STAR_MR.FreeAlignments(); z2_STAR_MC.FreeAlignments(); z2.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionRight ( xL, /**/ xR, x0, x1, /**/ x2 ); } } #ifndef RELEASE PopCallStack(); #endif }
Int ADMM ( const AbstractDistMatrix<Real>& APre, const AbstractDistMatrix<Real>& bPre, const AbstractDistMatrix<Real>& cPre, AbstractDistMatrix<Real>& zPre, const ADMMCtrl<Real>& ctrl ) { EL_DEBUG_CSE DistMatrixReadProxy<Real,Real,MC,MR> AProx( APre ), bProx( bPre ), cProx( cPre ); DistMatrixWriteProxy<Real,Real,MC,MR> zProx( zPre ); auto& A = AProx.GetLocked(); auto& b = bProx.GetLocked(); auto& c = cProx.GetLocked(); auto& z = zProx.Get(); // Cache a custom partially-pivoted LU factorization of // | rho*I A^H | = | B11 B12 | // | A 0 | | B21 B22 | // by (justifiably) avoiding pivoting in the first n steps of // the factorization, so that // [I,rho*I] = lu(rho*I). // The factorization would then proceed with // B21 := B21 U11^{-1} = A (rho*I)^{-1} = A/rho // B12 := L11^{-1} B12 = I A^H = A^H. // The Schur complement would then be // B22 := B22 - B21 B12 = 0 - (A*A^H)/rho. // We then factor said matrix with LU with partial pivoting and // swap the necessary rows of B21 in order to implicitly commute // the row pivots with the Gauss transforms in the manner standard // for GEPP. Unless A A' is singular, pivoting should not be needed, // as Cholesky factorization of the negative matrix should be valid. // // The result is the factorization // | I 0 | | rho*I A^H | = | I 0 | | rho*I U12 |, // | 0 P22 | | A 0 | | L21 L22 | | 0 U22 | // where [L22,U22] are stored within B22. const Int m = A.Height(); const Int n = A.Width(); const Grid& grid = A.Grid(); DistMatrix<Real> U12(grid), L21(grid), B22(grid), bPiv(grid); U12.Align( 0, n%U12.RowStride() ); L21.Align( n%L21.ColStride(), 0 ); B22.Align( n%B22.ColStride(), n%B22.RowStride() ); Adjoint( A, U12 ); L21 = A; L21 *= 1/ctrl.rho; Herk( LOWER, NORMAL, -1/ctrl.rho, A, B22 ); MakeHermitian( LOWER, B22 ); DistPermutation P2(grid); LU( B22, P2 ); P2.PermuteRows( L21 ); bPiv = b; P2.PermuteRows( bPiv ); // Possibly form the inverse of L22 U22 DistMatrix<Real> X22(grid); if( ctrl.inv ) { X22 = B22; MakeTrapezoidal( LOWER, X22 ); FillDiagonal( X22, Real(1) ); TriangularInverse( LOWER, UNIT, X22 ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, Real(1), B22, X22 ); } Int numIter=0; DistMatrix<Real> g(grid), xTmp(grid), y(grid), t(grid); Zeros( g, m+n, 1 ); PartitionDown( g, xTmp, y, n ); DistMatrix<Real> x(grid), u(grid), zOld(grid), xHat(grid); Zeros( z, n, 1 ); Zeros( u, n, 1 ); Zeros( t, n, 1 ); while( numIter < ctrl.maxIter ) { zOld = z; // Find x from // | rho*I A^H | | x | = | rho*(z-u)-c | // | A 0 | | y | | b | // via our cached custom factorization: // // |x| = inv(U) inv(L) P' |rho*(z-u)-c| // |y| |b | // = |rho*I U12|^{-1} |I 0 | |I 0 | |rho*(z-u)-c| // = |0 U22| |L21 L22| |0 P22'| |b | // = " " |rho*(z-u)-c| // | P22' b | xTmp = z; xTmp -= u; xTmp *= ctrl.rho; xTmp -= c; y = bPiv; Gemv( NORMAL, Real(-1), L21, xTmp, Real(1), y ); if( ctrl.inv ) { Gemv( NORMAL, Real(1), X22, y, t ); y = t; } else { Trsv( LOWER, NORMAL, UNIT, B22, y ); Trsv( UPPER, NORMAL, NON_UNIT, B22, y ); } Gemv( NORMAL, Real(-1), U12, y, Real(1), xTmp ); xTmp *= 1/ctrl.rho; // xHat := alpha*x + (1-alpha)*zOld xHat = xTmp; xHat *= ctrl.alpha; Axpy( 1-ctrl.alpha, zOld, xHat ); // z := pos(xHat+u) z = xHat; z += u; LowerClip( z, Real(0) ); // u := u + (xHat-z) u += xHat; u -= z; const Real objective = Dot( c, xTmp ); // rNorm := || x - z ||_2 t = xTmp; t -= z; const Real rNorm = FrobeniusNorm( t ); // sNorm := |rho| || z - zOld ||_2 t = z; t -= zOld; const Real sNorm = Abs(ctrl.rho)*FrobeniusNorm( t ); const Real epsPri = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Max(FrobeniusNorm(xTmp),FrobeniusNorm(z)); const Real epsDual = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Abs(ctrl.rho)*FrobeniusNorm(u); if( ctrl.print ) { t = xTmp; LowerClip( t, Real(0) ); t -= xTmp; const Real clipDist = FrobeniusNorm( t ); if( grid.Rank() == 0 ) cout << numIter << ": " << "||x-z||_2=" << rNorm << ", " << "epsPri=" << epsPri << ", " << "|rho| ||z-zOld||_2=" << sNorm << ", " << "epsDual=" << epsDual << ", " << "||x-Pos(x)||_2=" << clipDist << ", " << "c'x=" << objective << endl; } if( rNorm < epsPri && sNorm < epsDual ) break; ++numIter; } if( ctrl.maxIter == numIter && grid.Rank() == 0 ) cout << "ADMM failed to converge" << endl; x = xTmp; return numIter; }
inline void TrtrsmLLN ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE CallStackEntry entry("internal::TrtrsmLLN"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XTL(g), XTR(g), X00(g), X01(g), X02(g), XBL(g), XBR(g), X10(g), X11(g), X12(g), X20(g), X21(g), X22(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > X10_STAR_MR(g); DistMatrix<F,STAR,VR > X10_STAR_VR(g); DistMatrix<F,STAR,MR > X11_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); // Start the algorithm ScaleTrapezoid( alpha, LOWER, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDownDiagonal ( X, XTL, XTR, XBL, XBR, 0 ); while( XBR.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDownDiagonal ( XTL, /**/ XTR, X00, /**/ X01, X02, /*************/ /******************/ /**/ X10, /**/ X11, X12, XBL, /**/ XBR, X20, /**/ X21, X22 ); L21_MC_STAR.AlignWith( X20 ); X10_STAR_MR.AlignWith( X20 ); X11_STAR_MR.AlignWith( X21 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X11_STAR_STAR = X11; X10_STAR_VR = X10; LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X10_STAR_VR, checkIfSingular ); LocalTrtrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X11_STAR_STAR, checkIfSingular ); X11 = X11_STAR_STAR; X11_STAR_MR = X11_STAR_STAR; MakeTriangular( LOWER, X11_STAR_MR ); X10_STAR_MR = X10_STAR_VR; X10 = X10_STAR_MR; L21_MC_STAR = L21; LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X10_STAR_MR, F(1), X20 ); LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X11_STAR_MR, F(1), X21 ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDownDiagonal ( XTL, /**/ XTR, X00, X01, /**/ X02, /**/ X10, X11, /**/ X12, /*************/ /******************/ XBL, /**/ XBR, X20, X21, /**/ X22 ); } }
inline void TwoSidedTrmmLVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmLVar2"); if( A.Height() != A.Width() ) throw std::logic_error( "A must be square." ); if( L.Height() != L.Width() ) throw std::logic_error( "Triangular matrices must be square." ); if( A.Height() != L.Height() ) throw std::logic_error( "A and L must be the same size." ); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > L21Adj_STAR_MR(g); DistMatrix<F,VC, STAR> L21_VC_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > X10_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MC, STAR> Z21_MC_STAR(g); DistMatrix<F,MR, STAR> Z21_MR_STAR(g); DistMatrix<F,MR, MC > Z21_MR_MC(g); DistMatrix<F> Y21(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A21_VC_STAR.AlignWith( A22 ); L21_MC_STAR.AlignWith( A20 ); L21_VC_STAR.AlignWith( A22 ); L21_VR_STAR.AlignWith( A22 ); L21Adj_STAR_MR.AlignWith( A22 ); X10_STAR_MR.AlignWith( A10 ); Y21.AlignWith( A21 ); Z21_MC_STAR.AlignWith( A22 ); Z21_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A10 := L11' A10 L11_STAR_STAR = L11; A10_STAR_VR = A10; LocalTrmm ( LEFT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A10 := A10 + L21' A20 L21_MC_STAR = L21; X10_STAR_MR.ResizeTo( A10.Height(), A10.Width() ); LocalGemm( ADJOINT, NORMAL, F(1), L21_MC_STAR, A20, F(0), X10_STAR_MR ); A10.SumScatterUpdate( F(1), X10_STAR_MR ); // Y21 := A22 L21 L21_VC_STAR = L21_MC_STAR; L21_VR_STAR = L21_VC_STAR; L21Adj_STAR_MR.AdjointFrom( L21_VR_STAR ); Z21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); Z21_MR_STAR.ResizeTo( A21.Height(), A21.Width() ); Zero( Z21_MC_STAR ); Zero( Z21_MR_STAR ); LocalSymmetricAccumulateLL ( ADJOINT, F(1), A22, L21_MC_STAR, L21Adj_STAR_MR, Z21_MC_STAR, Z21_MR_STAR ); Z21_MR_MC.SumScatterFrom( Z21_MR_STAR ); Y21 = Z21_MR_MC; Y21.SumScatterUpdate( F(1), Z21_MC_STAR ); // A21 := A21 L11 A21_VC_STAR = A21; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); // A11 := L11' A11 L11 A11_STAR_STAR = A11; LocalTwoSidedTrmm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A11 := A11 + (A21' L21 + L21' A21) A21_VC_STAR = A21; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( LOWER, ADJOINT, F(1), A21_VC_STAR.LocalMatrix(), L21_VC_STAR.LocalMatrix(), F(0), X11_STAR_STAR.LocalMatrix() ); A11.SumScatterUpdate( F(1), X11_STAR_STAR ); // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); //--------------------------------------------------------------------// A21_VC_STAR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); L21_VC_STAR.FreeAlignments(); L21_VR_STAR.FreeAlignments(); L21Adj_STAR_MR.FreeAlignments(); X10_STAR_MR.FreeAlignments(); Y21.FreeAlignments(); Z21_MC_STAR.FreeAlignments(); Z21_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmLLTSmall ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F,STAR,VR>& L, DistMatrix<F,VR,STAR>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLLTSmall"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrsmLLT: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( L.RowAlignment() != X.ColAlignment() ) throw std::logic_error("L and X must be aligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F,STAR,VR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,VR,STAR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> X1_STAR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; // L11[* ,* ] <- L11[* ,VR] X1_STAR_STAR = X1; // X1[* ,* ] <- X1[VR,* ] // X1[* ,* ] := L11^-[T/H][* ,* ] X1[* ,* ] LocalTrsm ( LEFT, LOWER, orientation, diag, F(1), L11_STAR_STAR, X1_STAR_STAR, checkIfSingular ); X1 = X1_STAR_STAR; // X0[VR,* ] -= L10[* ,VR]^(T/H) X1[* ,* ] LocalGemm( orientation, NORMAL, F(-1), L10, X1_STAR_STAR, F(1), X0 ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmRLNCOld ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmRLNCOld"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || X.Width() != L.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmRLNC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,MR, STAR> L21_MR_STAR(g); DistMatrix<T,VC, STAR> X1_VC_STAR(g); DistMatrix<T,MC, STAR> D1_MC_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionRight( X, XL, XR, 0 ); while( XR.Width() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); L21_MR_STAR.AlignWith( X2 ); D1_MC_STAR.AlignWith( X1 ); Zeros( X1.Height(), X1.Width(), D1_MC_STAR ); //--------------------------------------------------------------------// X1_VC_STAR = X1; L11_STAR_STAR = L11; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_VC_STAR ); X1 = X1_VC_STAR; L21_MR_STAR = L21; LocalGemm( NORMAL, NORMAL, T(1), X2, L21_MR_STAR, T(0), D1_MC_STAR ); X1.SumScatterUpdate( T(1), D1_MC_STAR ); //--------------------------------------------------------------------// L21_MR_STAR.FreeAlignments(); D1_MC_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmLLTA ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLTA"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error ("TrmmLLTA expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLTA: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); DistMatrix<T,MC,STAR> X1_MC_STAR(g); DistMatrix<T,MR,STAR> Z1_MR_STAR(g); DistMatrix<T,MR,MC > Z1_MR_MC(g); X1_MC_STAR.AlignWith( L ); Z1_MR_STAR.AlignWith( L ); PartitionRight( X, XL, XR, 0 ); while( XL.Width() < X.Width() ) { RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); Zeros( X1.Height(), X1.Width(), Z1_MR_STAR ); //--------------------------------------------------------------------// X1_MC_STAR = X1; LocalTrmmAccumulateLLT ( orientation, diag, alpha, L, X1_MC_STAR, Z1_MR_STAR ); Z1_MR_MC.SumScatterFrom( Z1_MR_STAR ); X1 = Z1_MR_MC; //--------------------------------------------------------------------// SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmLVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmLVar2"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,MR, STAR> A10Adj_MR_STAR(g); DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MR, STAR> F10Adj_MR_STAR(g); DistMatrix<F,MR, STAR> L10Adj_MR_STAR(g); DistMatrix<F,VC, STAR> L10Adj_VC_STAR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> X11_MC_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); DistMatrix<F,MC, STAR> Y10Adj_MC_STAR(g); DistMatrix<F,MR, MC > Y10Adj_MR_MC(g); DistMatrix<F> X11(g); DistMatrix<F> Y10Adj(g); Matrix<F> Y10Local; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10Adj_MR_STAR.AlignWith( L10 ); F10Adj_MR_STAR.AlignWith( A00 ); L10Adj_MR_STAR.AlignWith( A00 ); L10Adj_VC_STAR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); X11.AlignWith( A11 ); X11_MC_STAR.AlignWith( L10 ); X21_MC_STAR.AlignWith( A20 ); Y10Adj_MC_STAR.AlignWith( A00 ); Y10Adj_MR_MC.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := L10 A00 L10Adj_MR_STAR.AdjointFrom( L10 ); L10Adj_VC_STAR = L10Adj_MR_STAR; L10_STAR_MC.AdjointFrom( L10Adj_VC_STAR ); Y10Adj_MC_STAR.ResizeTo( A10.Width(), A10.Height() ); F10Adj_MR_STAR.ResizeTo( A10.Width(), A10.Height() ); Zero( Y10Adj_MC_STAR ); Zero( F10Adj_MR_STAR ); LocalSymmetricAccumulateRL ( ADJOINT, F(1), A00, L10_STAR_MC, L10Adj_MR_STAR, Y10Adj_MC_STAR, F10Adj_MR_STAR ); Y10Adj.SumScatterFrom( Y10Adj_MC_STAR ); Y10Adj_MR_MC = Y10Adj; Y10Adj_MR_MC.SumScatterUpdate( F(1), F10Adj_MR_STAR ); Adjoint( Y10Adj_MR_MC.LockedLocalMatrix(), Y10Local ); // X11 := A10 L10' X11_MC_STAR.ResizeTo( A11.Height(), A11.Width() ); LocalGemm ( NORMAL, NORMAL, F(1), A10, L10Adj_MR_STAR, F(0), X11_MC_STAR ); // A10 := A10 - Y10 Axpy( F(-1), Y10Local, A10.LocalMatrix() ); A10Adj_MR_STAR.AdjointFrom( A10 ); // A11 := A11 - (X11 + L10 A10') = A11 - (A10 L10' + L10 A10') LocalGemm ( NORMAL, NORMAL, F(1), L10, A10Adj_MR_STAR, F(1), X11_MC_STAR ); X11.SumScatterFrom( X11_MC_STAR ); MakeTrapezoidal( LEFT, LOWER, 0, X11 ); Axpy( F(-1), X11, A11 ); // A10 := inv(L11) A10 L11_STAR_STAR = L11; A10_STAR_VR.AdjointFrom( A10Adj_MR_STAR ); LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A11 := inv(L11) A11 inv(L11)' A11_STAR_STAR = A11; LocalTwoSidedTrsm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 - A20 L10' X21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); LocalGemm ( NORMAL, NORMAL, F(1), A20, L10Adj_MR_STAR, F(0), X21_MC_STAR ); A21.SumScatterUpdate( F(-1), X21_MC_STAR ); // A21 := A21 inv(L11)' A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10Adj_MR_STAR.FreeAlignments(); F10Adj_MR_STAR.FreeAlignments(); L10Adj_MR_STAR.FreeAlignments(); L10Adj_VC_STAR.FreeAlignments(); L10_STAR_MC.FreeAlignments(); X11.FreeAlignments(); X11_MC_STAR.FreeAlignments(); X21_MC_STAR.FreeAlignments(); Y10Adj_MC_STAR.FreeAlignments(); Y10Adj_MR_MC.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmLLTCOld ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLTCOld"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrmmLLT expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLTC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,MC, STAR> L21_MC_STAR(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> D1AdjOrTrans_MR_STAR(g); DistMatrix<T,MR, MC > D1AdjOrTrans_MR_MC(g); DistMatrix<T,MC, MR > D1(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDown ( X, XT, XB, 0 ); while( XB.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDown ( XT, X0, /**/ /**/ X1, XB, X2 ); L21_MC_STAR.AlignWith( X2 ); D1AdjOrTrans_MR_STAR.AlignWith( X1 ); D1AdjOrTrans_MR_MC.AlignWith( X1 ); D1.AlignWith( X1 ); Zeros( X1.Width(), X1.Height(), D1AdjOrTrans_MR_STAR ); Zeros( X1.Height(), X1.Width(), D1 ); //--------------------------------------------------------------------// X1_STAR_VR = X1; L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, orientation, diag, T(1), L11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; L21_MC_STAR = L21; LocalGemm ( orientation, NORMAL, T(1), X2, L21_MC_STAR, T(0), D1AdjOrTrans_MR_STAR ); D1AdjOrTrans_MR_MC.SumScatterFrom( D1AdjOrTrans_MR_STAR ); if( orientation == TRANSPOSE ) Transpose( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); else Adjoint( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); Axpy( T(1), D1, X1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); D1AdjOrTrans_MR_MC.FreeAlignments(); D1AdjOrTrans_MR_STAR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDown ( XT, X0, X1, /**/ /**/ XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrdtrmmLVar1( Orientation orientation, DistMatrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TrdtrmmLVar1"); if( L.Height() != L.Width() ) LogicError("L must be square"); if( orientation == NORMAL ) LogicError("Orientation must be (conjugate-)transpose"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,MD,STAR> d1(g); // Temporary distributions DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,STAR,VC > S10_STAR_VC(g); DistMatrix<F,STAR,MC > S10_STAR_MC(g); DistMatrix<F,STAR,MR > L10_STAR_MR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); L10_STAR_VR.AlignWith( L ); S10_STAR_VC.AlignWith( L ); S10_STAR_MC.AlignWith( L ); L10_STAR_MR.AlignWith( L ); PartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( LTL.Height() < L.Height() && LTL.Width() < L.Height() ) { RepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------// L11.GetDiagonal( d1 ); L10_STAR_VR = L10; S10_STAR_VC = L10_STAR_VR; S10_STAR_MC = S10_STAR_VC; DiagonalSolve( LEFT, NORMAL, d1, L10_STAR_VR, true ); L10_STAR_MR = L10_STAR_VR; LocalTrrk ( LOWER, orientation, F(1), S10_STAR_MC, L10_STAR_MR, F(1), L00 ); L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, orientation, UNIT, F(1), L11_STAR_STAR, L10_STAR_VR ); L10 = L10_STAR_VR; LocalTrdtrmm( orientation, LOWER, L11_STAR_STAR ); L11 = L11_STAR_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }