inline void internal::CholeskyLVar3( DistMatrix<F,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyLVar3"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary matrices DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MC > A21Trans_STAR_MC(g); DistMatrix<F,STAR,MR > A21Adj_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A21_VR_STAR.AlignWith( A22 ); A21_VC_STAR.AlignWith( A22 ); A21Trans_STAR_MC.AlignWith( A22 ); A21Adj_STAR_MR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; internal::LocalCholesky( LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A21_VC_STAR = A21; internal::LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A21_VC_STAR ); A21_VR_STAR = A21_VC_STAR; A21Trans_STAR_MC.TransposeFrom( A21_VC_STAR ); A21Adj_STAR_MR.AdjointFrom( A21_VR_STAR ); // (A21^T[* ,MC])^T A21^H[* ,MR] = A21[MC,* ] A21^H[* ,MR] // = (A21 A21^H)[MC,MR] internal::LocalTrrk ( LOWER, TRANSPOSE, (F)-1, A21Trans_STAR_MC, A21Adj_STAR_MR, (F)1, A22 ); A21.TransposeFrom( A21Trans_STAR_MC ); //--------------------------------------------------------------------// A21_VR_STAR.FreeAlignments(); A21_VC_STAR.FreeAlignments(); A21Trans_STAR_MC.FreeAlignments(); A21Adj_STAR_MR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HegstLLVar4( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& L ) { #ifndef RELEASE PushCallStack("internal::HegstLLVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,STAR,MR > L10_STAR_MR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > Y10_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10_STAR_VR.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A10_STAR_MC.AlignWith( A00 ); A21_MC_STAR.AlignWith( A20 ); L10_STAR_VR.AlignWith( A00 ); L10_STAR_MR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); Y10_STAR_VR.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := A11 L10 A11_STAR_STAR = A11; L10_STAR_VR = L10; Y10_STAR_VR.ResizeTo( A10.Height(), A10.Width() ); Zero( Y10_STAR_VR ); Hemm ( LEFT, LOWER, (F)0.5, A11_STAR_STAR.LockedLocalMatrix(), L10_STAR_VR.LockedLocalMatrix(), (F)0, Y10_STAR_VR.LocalMatrix() ); // A10 := A10 + 1/2 Y10 A10_STAR_VR = A10; Axpy( (F)1, Y10_STAR_VR, A10_STAR_VR ); // A00 := A00 + (A10' L10 + L10' A10) A10_STAR_MR = A10_STAR_VR; A10_STAR_MC = A10_STAR_VR; L10_STAR_MR = L10_STAR_VR; L10_STAR_MC = L10_STAR_VR; internal::LocalTrr2k ( LOWER, ADJOINT, ADJOINT, (F)1, A10_STAR_MC, L10_STAR_MR, L10_STAR_MC, A10_STAR_MR, (F)1, A00 ); // A10 := A10 + 1/2 Y10 Axpy( (F)1, Y10_STAR_VR, A10_STAR_VR ); // A10 := L11' A10 L11_STAR_STAR = L11; internal::LocalTrmm ( LEFT, LOWER, ADJOINT, NON_UNIT, (F)1, L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A20 := A20 + A21 L10 A21_MC_STAR = A21; internal::LocalGemm ( NORMAL, NORMAL, (F)1, A21_MC_STAR, L10_STAR_MR, (F)1, A20 ); // A11 := L11' A11 L11 internal::LocalHegst ( LEFT, LOWER, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 L11 A21_VC_STAR = A21_MC_STAR; internal::LocalTrmm ( RIGHT, LOWER, NORMAL, NON_UNIT, (F)1, L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10_STAR_VR.FreeAlignments(); A10_STAR_MR.FreeAlignments(); A10_STAR_MC.FreeAlignments(); A21_MC_STAR.FreeAlignments(); L10_STAR_VR.FreeAlignments(); L10_STAR_MR.FreeAlignments(); L10_STAR_MC.FreeAlignments(); Y10_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void CholeskyUVar3( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyUVar3"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary matrix distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A12_STAR_VR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalCholesky( UPPER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MC = A12_STAR_VR; A12_STAR_MR = A12_STAR_VR; LocalTrrk ( UPPER, ADJOINT, F(-1), A12_STAR_MC, A12_STAR_MR, F(1), A22 ); A12 = A12_STAR_MR; //--------------------------------------------------------------------// A12_STAR_MC.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HPDInverseLVar2( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::HPDInverseLVar2"); if( A.Height() != A.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A21Trans_STAR_MC(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MR > A21Adj_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A10_STAR_VR.AlignWith( A00 ); A21_VC_STAR.AlignWith( A20 ); A10_STAR_MC.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A21Trans_STAR_MC.AlignWith( A20 ); A21_VR_STAR.AlignWith( A22 ); A21Adj_STAR_MR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalCholesky( LOWER, A11_STAR_STAR ); A10_STAR_VR = A10; LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A10_STAR_VR ); A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); A10_STAR_MC = A10_STAR_VR; A10_STAR_MR = A10_STAR_VR; LocalTrrk ( LOWER, ADJOINT, F(1), A10_STAR_MC, A10_STAR_MR, F(1), A00 ); A21Trans_STAR_MC.TransposeFrom( A21_VC_STAR ); LocalGemm ( TRANSPOSE, NORMAL, F(-1), A21Trans_STAR_MC, A10_STAR_MR, F(1), A20 ); A21_VR_STAR = A21_VC_STAR; A21Adj_STAR_MR.AdjointFrom( A21_VR_STAR ); LocalTrrk ( LOWER, TRANSPOSE, F(-1), A21Trans_STAR_MC, A21Adj_STAR_MR, F(1), A22 ); LocalTrsm ( LEFT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A10_STAR_VR ); LocalTrsm ( RIGHT, LOWER, NORMAL, NON_UNIT, F(-1), A11_STAR_STAR, A21_VC_STAR ); LocalTriangularInverse( LOWER, NON_UNIT, A11_STAR_STAR ); LocalTrtrmm( ADJOINT, LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A10 = A10_STAR_VR; A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10_STAR_VR.FreeAlignments(); A21_VC_STAR.FreeAlignments(); A10_STAR_MC.FreeAlignments(); A10_STAR_MR.FreeAlignments(); A21Trans_STAR_MC.FreeAlignments(); A21_VR_STAR.FreeAlignments(); A21Adj_STAR_MR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar5 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmUVar5"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,STAR,VC > A12_STAR_VC(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,MR > U12_STAR_MR(g); DistMatrix<F,STAR,VC > U12_STAR_VC(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,STAR,VR > Y12_STAR_VR(g); DistMatrix<F> Y12(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A12_STAR_VC.AlignWith( A22 ); A12_STAR_VR.AlignWith( A22 ); U12_STAR_MC.AlignWith( A22 ); U12_STAR_MR.AlignWith( A22 ); U12_STAR_VC.AlignWith( A22 ); U12_STAR_VR.AlignWith( A22 ); Y12.AlignWith( A12 ); Y12_STAR_VR.AlignWith( A12 ); //--------------------------------------------------------------------// // A11 := inv(U11)' A11 inv(U11) U11_STAR_STAR = U11; A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // Y12 := A11 U12 U12_STAR_VR = U12; Y12_STAR_VR.ResizeTo( A12.Height(), A12.Width() ); Hemm ( LEFT, UPPER, F(1), A11_STAR_STAR.LocalMatrix(), U12_STAR_VR.LocalMatrix(), F(0), Y12_STAR_VR.LocalMatrix() ); Y12 = Y12_STAR_VR; // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12, A12 ); // A22 := A22 - (A12' U12 + U12' A12) A12_STAR_VR = A12; A12_STAR_VC = A12_STAR_VR; U12_STAR_VC = U12_STAR_VR; A12_STAR_MC = A12_STAR_VC; U12_STAR_MC = U12_STAR_VC; A12_STAR_MR = A12_STAR_VR; U12_STAR_MR = U12_STAR_VR; LocalTrr2k ( UPPER, ADJOINT, ADJOINT, F(-1), U12_STAR_MC, A12_STAR_MR, A12_STAR_MC, U12_STAR_MR, F(1), A22 ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12, A12 ); // A12 := A12 inv(U22) // // This is the bottleneck because A12 only has blocksize rows Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U22, A12 ); //--------------------------------------------------------------------// A12_STAR_MC.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_VC.FreeAlignments(); A12_STAR_VR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); U12_STAR_MR.FreeAlignments(); U12_STAR_VC.FreeAlignments(); U12_STAR_VR.FreeAlignments(); Y12.FreeAlignments(); Y12_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
void LSquare( DistMatrix<R>& A ) { #ifndef RELEASE CallStackEntry entry("hermitian_tridiag::LSquare"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( A.Grid().Height() != A.Grid().Width() ) throw std::logic_error("The process grid must be square"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<R> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<R> WPan(g); DistMatrix<R,STAR,STAR> A11_STAR_STAR(g); DistMatrix<R,MC, STAR> APan_MC_STAR(g), A11_MC_STAR(g), A21_MC_STAR(g); DistMatrix<R,MR, STAR> APan_MR_STAR(g), A11_MR_STAR(g), A21_MR_STAR(g); DistMatrix<R,MC, STAR> WPan_MC_STAR(g), W11_MC_STAR(g), W21_MC_STAR(g); DistMatrix<R,MR, STAR> WPan_MR_STAR(g), W11_MR_STAR(g), W21_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); if( A22.Height() > 0 ) { WPan.AlignWith( A11 ); APan_MC_STAR.AlignWith( A11 ); WPan_MC_STAR.AlignWith( A11 ); APan_MR_STAR.AlignWith( A11 ); WPan_MR_STAR.AlignWith( A11 ); //----------------------------------------------------------------// WPan.ResizeTo( ABR.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); hermitian_tridiag::PanelLSquare ( ABR, WPan, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionDown ( APan_MC_STAR, A11_MC_STAR, A21_MC_STAR, A11.Height() ); PartitionDown ( APan_MR_STAR, A11_MR_STAR, A21_MR_STAR, A11.Height() ); PartitionDown ( WPan_MC_STAR, W11_MC_STAR, W21_MC_STAR, A11.Height() ); PartitionDown ( WPan_MR_STAR, W11_MR_STAR, W21_MR_STAR, A11.Height() ); LocalTrr2k ( LOWER, TRANSPOSE, TRANSPOSE, R(-1), A21_MC_STAR, W21_MR_STAR, W21_MC_STAR, A21_MR_STAR, R(1), A22 ); //----------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; HermitianTridiag( LOWER, A11_STAR_STAR.Matrix() ); A11 = A11_STAR_STAR; } SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void internal::HegstLUVar2( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& U ) { #ifndef RELEASE PushCallStack("internal::HegstLUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,MR, STAR> U12Adj_MR_STAR(g); DistMatrix<F,VC, STAR> U12Adj_VC_STAR(g); DistMatrix<F,MC, STAR> X01_MC_STAR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MC, MR > Y12(g); DistMatrix<F,MC, MR > Z12Adj(g); DistMatrix<F,MR, MC > Z12Adj_MR_MC(g); DistMatrix<F,MC, STAR> Z12Adj_MC_STAR(g); DistMatrix<F,MR, STAR> Z12Adj_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A12_STAR_VR.AlignWith( A12 ); U12_STAR_MC.AlignWith( A22 ); U12_STAR_VR.AlignWith( A12 ); U12Adj_MR_STAR.AlignWith( A22 ); U12Adj_VC_STAR.AlignWith( A22 ); X01_MC_STAR.AlignWith( A01 ); Y12.AlignWith( A12 ); Z12Adj.AlignWith( A12 ); Z12Adj_MR_MC.AlignWith( A12 ); Z12Adj_MC_STAR.AlignWith( A22 ); Z12Adj_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A01 := A01 U11' U11_STAR_STAR = U11; A01_VC_STAR = A01; internal::LocalTrmm ( RIGHT, UPPER, ADJOINT, NON_UNIT, (F)1, U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A01 := A01 + A02 U12' U12Adj_MR_STAR.AdjointFrom( U12 ); X01_MC_STAR.ResizeTo( A01.Height(), A01.Width() ); internal::LocalGemm ( NORMAL, NORMAL, (F)1, A02, U12Adj_MR_STAR, (F)0, X01_MC_STAR ); A01.SumScatterUpdate( (F)1, X01_MC_STAR ); // Y12 := U12 A22 U12Adj_VC_STAR = U12Adj_MR_STAR; U12_STAR_MC.AdjointFrom( U12Adj_VC_STAR ); Z12Adj_MC_STAR.ResizeTo( A12.Width(), A12.Height() ); Z12Adj_MR_STAR.ResizeTo( A12.Width(), A12.Height() ); Zero( Z12Adj_MC_STAR ); Zero( Z12Adj_MR_STAR ); internal::LocalSymmetricAccumulateRU ( ADJOINT, (F)1, A22, U12_STAR_MC, U12Adj_MR_STAR, Z12Adj_MC_STAR, Z12Adj_MR_STAR ); Z12Adj.SumScatterFrom( Z12Adj_MC_STAR ); Z12Adj_MR_MC = Z12Adj; Z12Adj_MR_MC.SumScatterUpdate( (F)1, Z12Adj_MR_STAR ); Y12.ResizeTo( A12.Height(), A12.Width() ); Adjoint( Z12Adj_MR_MC.LockedLocalMatrix(), Y12.LocalMatrix() ); // A12 := U11 A12 A12_STAR_VR = A12; U11_STAR_STAR = U11; internal::LocalTrmm ( LEFT, UPPER, NORMAL, NON_UNIT, (F)1, U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; // A12 := A12 + 1/2 Y12 Axpy( (F)0.5, Y12, A12 ); // A11 := U11 A11 U11' A11_STAR_STAR = A11; internal::LocalHegst( LEFT, UPPER, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A11 := A11 + (A12 U12' + U12 A12') A12_STAR_VR = A12; U12_STAR_VR = U12; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( UPPER, NORMAL, (F)1, A12_STAR_VR.LocalMatrix(), U12_STAR_VR.LocalMatrix(), (F)0, X11_STAR_STAR.LocalMatrix() ); A11.SumScatterUpdate( (F)1, X11_STAR_STAR ); // A12 := A12 + 1/2 Y12 Axpy( (F)0.5, Y12, A12 ); //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); U12_STAR_VR.FreeAlignments(); U12Adj_MR_STAR.FreeAlignments(); U12Adj_VC_STAR.FreeAlignments(); X01_MC_STAR.FreeAlignments(); Y12.FreeAlignments(); Z12Adj.FreeAlignments(); Z12Adj_MR_MC.FreeAlignments(); Z12Adj_MC_STAR.FreeAlignments(); Z12Adj_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LU( DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("LU"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A12_STAR_VR.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalLU( A11_STAR_STAR ); A11 = A11_STAR_STAR; A21_MC_STAR = A21; LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A21_MC_STAR ); A21 = A21_MC_STAR; // Perhaps we should give up perfectly distributing this operation since // it's total contribution is only O(n^2) A12_STAR_VR = A12; LocalTrsm ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; LocalGemm( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 ); A12 = A12_STAR_MR; //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void TwoSidedTrmmLVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmLVar2"); if( A.Height() != A.Width() ) throw std::logic_error( "A must be square." ); if( L.Height() != L.Width() ) throw std::logic_error( "Triangular matrices must be square." ); if( A.Height() != L.Height() ) throw std::logic_error( "A and L must be the same size." ); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > L21Adj_STAR_MR(g); DistMatrix<F,VC, STAR> L21_VC_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > X10_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MC, STAR> Z21_MC_STAR(g); DistMatrix<F,MR, STAR> Z21_MR_STAR(g); DistMatrix<F,MR, MC > Z21_MR_MC(g); DistMatrix<F> Y21(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A21_VC_STAR.AlignWith( A22 ); L21_MC_STAR.AlignWith( A20 ); L21_VC_STAR.AlignWith( A22 ); L21_VR_STAR.AlignWith( A22 ); L21Adj_STAR_MR.AlignWith( A22 ); X10_STAR_MR.AlignWith( A10 ); Y21.AlignWith( A21 ); Z21_MC_STAR.AlignWith( A22 ); Z21_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A10 := L11' A10 L11_STAR_STAR = L11; A10_STAR_VR = A10; LocalTrmm ( LEFT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A10 := A10 + L21' A20 L21_MC_STAR = L21; X10_STAR_MR.ResizeTo( A10.Height(), A10.Width() ); LocalGemm( ADJOINT, NORMAL, F(1), L21_MC_STAR, A20, F(0), X10_STAR_MR ); A10.SumScatterUpdate( F(1), X10_STAR_MR ); // Y21 := A22 L21 L21_VC_STAR = L21_MC_STAR; L21_VR_STAR = L21_VC_STAR; L21Adj_STAR_MR.AdjointFrom( L21_VR_STAR ); Z21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); Z21_MR_STAR.ResizeTo( A21.Height(), A21.Width() ); Zero( Z21_MC_STAR ); Zero( Z21_MR_STAR ); LocalSymmetricAccumulateLL ( ADJOINT, F(1), A22, L21_MC_STAR, L21Adj_STAR_MR, Z21_MC_STAR, Z21_MR_STAR ); Z21_MR_MC.SumScatterFrom( Z21_MR_STAR ); Y21 = Z21_MR_MC; Y21.SumScatterUpdate( F(1), Z21_MC_STAR ); // A21 := A21 L11 A21_VC_STAR = A21; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); // A11 := L11' A11 L11 A11_STAR_STAR = A11; LocalTwoSidedTrmm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A11 := A11 + (A21' L21 + L21' A21) A21_VC_STAR = A21; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( LOWER, ADJOINT, F(1), A21_VC_STAR.LocalMatrix(), L21_VC_STAR.LocalMatrix(), F(0), X11_STAR_STAR.LocalMatrix() ); A11.SumScatterUpdate( F(1), X11_STAR_STAR ); // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); //--------------------------------------------------------------------// A21_VC_STAR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); L21_VC_STAR.FreeAlignments(); L21_VR_STAR.FreeAlignments(); L21Adj_STAR_MR.FreeAlignments(); X10_STAR_MR.FreeAlignments(); Y21.FreeAlignments(); Z21_MC_STAR.FreeAlignments(); Z21_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar2"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,MC, STAR> A01_MC_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,MC, STAR> F01_MC_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X11_STAR_MR(g); DistMatrix<F,MR, STAR> X12Adj_MR_STAR(g); DistMatrix<F,MR, MC > X12Adj_MR_MC(g); DistMatrix<F,MR, MC > Y01_MR_MC(g); DistMatrix<F,MR, STAR> Y01_MR_STAR(g); DistMatrix<F> X11(g); DistMatrix<F> Y01(g); Matrix<F> X12Local; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_MC_STAR.AlignWith( U01 ); Y01.AlignWith( A01 ); Y01_MR_STAR.AlignWith( A00 ); U01_MC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); X11_STAR_MR.AlignWith( U01 ); X11.AlignWith( A11 ); X12Adj_MR_STAR.AlignWith( A02 ); X12Adj_MR_MC.AlignWith( A12 ); F01_MC_STAR.AlignWith( A00 ); //--------------------------------------------------------------------// // Y01 := A00 U01 U01_MC_STAR = U01; U01_VR_STAR = U01_MC_STAR; U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); Zeros( Y01_MR_STAR, A01.Height(), A01.Width() ); Zeros( F01_MC_STAR, A01.Height(), A01.Width() ); LocalSymmetricAccumulateLU ( ADJOINT, F(1), A00, U01_MC_STAR, U01Adj_STAR_MR, F01_MC_STAR, Y01_MR_STAR ); Y01_MR_MC.SumScatterFrom( Y01_MR_STAR ); Y01 = Y01_MR_MC; Y01.SumScatterUpdate( F(1), F01_MC_STAR ); // X11 := U01' A01 LocalGemm( ADJOINT, NORMAL, F(1), U01_MC_STAR, A01, X11_STAR_MR ); // A01 := A01 - Y01 Axpy( F(-1), Y01, A01 ); A01_MC_STAR = A01; // A11 := A11 - triu(X11 + A01' U01) = A11 - (U01 A01 + A01' U01) LocalGemm( ADJOINT, NORMAL, F(1), A01_MC_STAR, U01, F(1), X11_STAR_MR ); X11.SumScatterFrom( X11_STAR_MR ); MakeTriangular( UPPER, X11 ); Axpy( F(-1), X11, A11 ); // A01 := A01 inv(U11) U11_STAR_STAR = U11; A01_VC_STAR = A01_MC_STAR; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A12 := A12 - A02' U01 LocalGemm( ADJOINT, NORMAL, F(1), A02, U01_MC_STAR, X12Adj_MR_STAR ); X12Adj_MR_MC.SumScatterFrom( X12Adj_MR_STAR ); Adjoint( X12Adj_MR_MC.LockedMatrix(), X12Local ); Axpy( F(-1), X12Local, A12.Matrix() ); // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
inline void CholeskyLVar2Naive( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyLVar2Naive"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); if( A.Grid().VCRank() == 0 ) { std::cout << "CholeskyLVar2Naive exists solely for academic purposes. Please " "use CholeskyLVar2 in real applications." << std::endl; } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> X11_MC_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A10_STAR_MR.AlignWith( A10 ); X11_MC_STAR.AlignWith( A10 ); X21_MC_STAR.AlignWith( A20 ); X11_MC_STAR.ResizeTo( A11.Height(), A11.Width() ); X21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); //--------------------------------------------------------------------// A10_STAR_MR = A10; LocalGemm( NORMAL, ADJOINT, F(1), A10, A10_STAR_MR, F(0), X11_MC_STAR ); A11.SumScatterUpdate( F(-1), X11_MC_STAR ); A11_STAR_STAR = A11; LocalCholesky( LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; LocalGemm( NORMAL, ADJOINT, F(1), A20, A10_STAR_MR, F(0), X21_MC_STAR ); A21.SumScatterUpdate( F(-1), X21_MC_STAR ); A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10_STAR_MR.FreeAlignments(); X11_MC_STAR.FreeAlignments(); X21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Var3( Orientation orientation, DistMatrix<F>& A, DistMatrix<F,MC,STAR>& d ) { #ifndef RELEASE PushCallStack("ldl::Var3"); if( orientation == NORMAL ) throw std::logic_error("Can only perform LDL^T and LDL^H"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( A.Grid() != d.Grid() ) throw std::logic_error("A and d must use the same grid"); if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) ) throw std::logic_error ("d must be a column vector of the same height as A"); if( d.Viewing() && d.ColAlignment() != A.ColAlignment() ) throw std::logic_error("d must be aligned with A"); #endif const Grid& g = A.Grid(); if( !d.Viewing() ) { d.AlignWith( A ); d.ResizeTo( A.Height(), 1 ); } // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,STAR> dT(g), d0(g), dB(g), d1(g), d2(g); // Temporary matrices DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,STAR> d1_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MC > S21Trans_STAR_MC(g); DistMatrix<F,STAR,MR > A21AdjOrTrans_STAR_MR(g); const bool conjugate = ( orientation == ADJOINT ); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( d, dT, dB, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( dT, d0, /**/ /**/ d1, dB, d2 ); A21_VC_STAR.AlignWith( A22 ); A21_VR_STAR.AlignWith( A22 ); S21Trans_STAR_MC.AlignWith( A22 ); A21AdjOrTrans_STAR_MR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalLDL( orientation, A11_STAR_STAR, d1_STAR_STAR ); A11 = A11_STAR_STAR; d1 = d1_STAR_STAR; A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, orientation, UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); S21Trans_STAR_MC.TransposeFrom( A21_VC_STAR ); DiagonalSolve( RIGHT, NORMAL, d1_STAR_STAR, A21_VC_STAR ); A21_VR_STAR = A21_VC_STAR; A21AdjOrTrans_STAR_MR.TransposeFrom( A21_VR_STAR, conjugate ); LocalTrrk ( LOWER, TRANSPOSE, F(-1), S21Trans_STAR_MC, A21AdjOrTrans_STAR_MR, F(1), A22 ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A21_VC_STAR.FreeAlignments(); A21_VR_STAR.FreeAlignments(); S21Trans_STAR_MC.FreeAlignments(); A21AdjOrTrans_STAR_MR.FreeAlignments(); SlidePartitionDown ( dT, d0, d1, /**/ /**/ dB, d2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RowEchelon( DistMatrix<F>& A, DistMatrix<F>& B ) { #ifndef RELEASE CallStackEntry entry("RowEchelon"); if( A.Grid() != B.Grid() ) LogicError("{A,B} must be distributed over the same grid"); if( A.Height() != B.Height() ) LogicError("A and B must be the same height"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), APan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > B1_STAR_VR(g); DistMatrix<F,STAR,MR > B1_STAR_MR(g); DistMatrix<Int,STAR,STAR> p1_STAR_STAR(g); // In case B's columns are not aligned with A's const bool BAligned = ( B.ColShift() == A.ColShift() ); DistMatrix<F,MC,STAR> A21_MC_STAR_B(g); // Pivot composition std::vector<Int> image, preimage; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( B, BT, BB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); View2x1 ( APan, A12, A22 ); A12_STAR_VR.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); B1_STAR_VR.AlignWith( B1 ); B1_STAR_MR.AlignWith( B1 ); if( ! BAligned ) A21_MC_STAR_B.AlignWith( B2 ); p1_STAR_STAR.ResizeTo( A11.Height(), 1 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; A21_MC_STAR = A21; lu::Panel( A11_STAR_STAR, A21_MC_STAR, p1_STAR_STAR, A00.Height() ); ComposePivots( p1_STAR_STAR, A00.Height(), image, preimage ); ApplyRowPivots( APan, image, preimage ); ApplyRowPivots( BB, image, preimage ); A12_STAR_VR = A12; B1_STAR_VR = B1; LocalTrsm ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); LocalTrsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, B1_STAR_VR ); A12_STAR_MR = A12_STAR_VR; B1_STAR_MR = B1_STAR_VR; LocalGemm( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 ); if( BAligned ) { LocalGemm ( NORMAL, NORMAL, F(-1), A21_MC_STAR, B1_STAR_MR, F(1), B2 ); } else { A21_MC_STAR_B = A21_MC_STAR; LocalGemm ( NORMAL, NORMAL, F(-1), A21_MC_STAR_B, B1_STAR_MR, F(1), B2 ); } A11 = A11_STAR_STAR; A12 = A12_STAR_MR; B1 = B1_STAR_MR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } }
inline void TwoSidedTrsmUVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar4"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,MC > A01Trans_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,VC > A12_STAR_VC(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MR, STAR> U12Trans_MR_STAR(g); DistMatrix<F,VR, STAR> U12Trans_VR_STAR(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,STAR,VC > U12_STAR_VC(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,VR > Y12_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_VC_STAR.AlignWith( A02 ); A01Trans_STAR_MC.AlignWith( A02 ); A12_STAR_VR.AlignWith( A22 ); A12_STAR_VC.AlignWith( A22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); U12Trans_MR_STAR.AlignWith( A02 ); U12Trans_VR_STAR.AlignWith( A02 ); U12_STAR_VR.AlignWith( A02 ); U12_STAR_VC.AlignWith( A22 ); U12_STAR_MC.AlignWith( A22 ); Y12_STAR_VR.AlignWith( A12 ); //--------------------------------------------------------------------// // A01 := A01 inv(U11) A01_VC_STAR = A01; U11_STAR_STAR = U11; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A02 := A02 - A01 U12 A01Trans_STAR_MC.TransposeFrom( A01_VC_STAR ); U12Trans_MR_STAR.TransposeFrom( U12 ); LocalGemm ( TRANSPOSE, TRANSPOSE, F(-1), A01Trans_STAR_MC, U12Trans_MR_STAR, F(1), A02 ); // Y12 := A11 U12 U12Trans_VR_STAR = U12Trans_MR_STAR; Zeros( U12_STAR_VR, A12.Height(), A12.Width() ); Transpose( U12Trans_VR_STAR.Matrix(), U12_STAR_VR.Matrix() ); Zeros( Y12_STAR_VR, A12.Height(), A12.Width() ); Hemm ( LEFT, UPPER, F(1), A11_STAR_STAR.Matrix(), U12_STAR_VR.Matrix(), F(0), Y12_STAR_VR.Matrix() ); // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12_STAR_VR, A12_STAR_VR ); // A22 := A22 - (A12' U12 + U12' A12) A12_STAR_MR = A12_STAR_VR; A12_STAR_VC = A12_STAR_VR; U12_STAR_VC = U12_STAR_VR; A12_STAR_MC = A12_STAR_VC; U12_STAR_MC = U12_STAR_VC; LocalTrr2k ( UPPER, ADJOINT, TRANSPOSE, ADJOINT, F(-1), A12_STAR_MC, U12Trans_MR_STAR, U12_STAR_MC, A12_STAR_MR, F(1), A22 ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12_STAR_VR, A12_STAR_VR ); A12 = A12_STAR_VR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /**********************************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
inline void internal::HegstRLVar3( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& L ) { #ifndef RELEASE PushCallStack("internal::HegstRLVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> YTL(g), YTR(g), Y00(g), Y01(g), Y02(g), YBL(g), YBR(g), Y10(g), Y11(g), Y12(g), Y20(g), Y21(g), Y22(g); DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,MR > A11_STAR_MR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,STAR,MR > L10_STAR_MR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); DistMatrix<F,MC, STAR> Z21_MC_STAR(g); // We will use an entire extra matrix as temporary storage. If this is not // acceptable, use HegstRLVar4 instead. DistMatrix<F,MC,MR> Y(g); Y.AlignWith( A ); Y.ResizeTo( A.Height(), A.Width() ); Zero( Y ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDownDiagonal ( Y, YTL, YTR, YBL, YBR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDownDiagonal ( YTL, /**/ YTR, Y00, /**/ Y01, Y02, /*************/ /******************/ /**/ Y10, /**/ Y11, Y12, YBL, /**/ YBR, Y20, /**/ Y21, Y22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A11_STAR_MR.AlignWith( Y21 ); A21_VC_STAR.AlignWith( A21 ); A10_STAR_VR.AlignWith( A10 ); A10_STAR_MR.AlignWith( A10 ); L10_STAR_VR.AlignWith( A10 ); L10_STAR_MR.AlignWith( A10 ); L21_MC_STAR.AlignWith( Y21 ); X21_MC_STAR.AlignWith( A20 ); Z21_MC_STAR.AlignWith( L20 ); //--------------------------------------------------------------------// // A10 := A10 - 1/2 Y10 Axpy( (F)-0.5, Y10, A10 ); // A11 := A11 - (A10 L10' + L10 A10') A10_STAR_VR = A10; L10_STAR_VR = L10; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( LOWER, NORMAL, (F)1, A10_STAR_VR.LocalMatrix(), L10_STAR_VR.LocalMatrix(), (F)0, X11_STAR_STAR.LocalMatrix() ); MakeTrapezoidal( LEFT, LOWER, 0, X11_STAR_STAR ); A11.SumScatterUpdate( (F)-1, X11_STAR_STAR ); // A11 := inv(L11) A11 inv(L11)' A11_STAR_STAR = A11; L11_STAR_STAR = L11; internal::LocalHegst( RIGHT, LOWER, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 - A20 L10' L10_STAR_MR = L10_STAR_VR; X21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); internal::LocalGemm ( NORMAL, ADJOINT, (F)1, A20, L10_STAR_MR, (F)0, X21_MC_STAR ); A21.SumScatterUpdate( (F)-1, X21_MC_STAR ); // A21 := A21 inv(L11)' A21_VC_STAR = A21; internal::LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, (F)1, L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; // A10 := A10 - 1/2 Y10 Axpy( (F)-0.5, Y10, A10 ); // A10 := inv(L11) A10 A10_STAR_VR = A10; internal::LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, (F)1, L11_STAR_STAR, A10_STAR_VR ); // Y20 := Y20 + L21 A10 A10_STAR_MR = A10_STAR_VR; A10 = A10_STAR_MR; L21_MC_STAR = L21; internal::LocalGemm ( NORMAL, NORMAL, (F)1, L21_MC_STAR, A10_STAR_MR, (F)1, Y20 ); // Y21 := L21 A11 // // Symmetrize A11[* ,* ] by copying the lower triangle into the upper // so that we can call a local gemm instead of worrying about // reproducing a hemm with nonsymmetric local matrices. { const int height = A11_STAR_STAR.LocalHeight(); const int ldim = A11_STAR_STAR.LocalLDim(); F* A11Buffer = A11_STAR_STAR.LocalBuffer(); for( int i=1; i<height; ++i ) for( int j=0; j<i; ++j ) A11Buffer[j+i*ldim] = Conj(A11Buffer[i+j*ldim]); } A11_STAR_MR = A11_STAR_STAR; internal::LocalGemm ( NORMAL, NORMAL, (F)1, L21_MC_STAR, A11_STAR_MR, (F)0, Y21 ); // Y21 := Y21 + L20 A10' Z21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); internal::LocalGemm ( NORMAL, ADJOINT, (F)1, L20, A10_STAR_MR, (F)0, Z21_MC_STAR ); Y21.SumScatterUpdate( (F)1, Z21_MC_STAR ); //--------------------------------------------------------------------// A11_STAR_MR.FreeAlignments(); A21_VC_STAR.FreeAlignments(); A10_STAR_VR.FreeAlignments(); A10_STAR_MR.FreeAlignments(); L10_STAR_VR.FreeAlignments(); L10_STAR_MR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); X21_MC_STAR.FreeAlignments(); Z21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDownDiagonal ( YTL, /**/ YTR, Y00, Y01, /**/ Y02, /**/ Y10, Y11, /**/ Y12, /*************/ /******************/ YBL, /**/ YBR, Y20, Y21, /**/ Y22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LU( DistMatrix<F>& A, DistMatrix<int,VC,STAR>& p ) { #ifndef RELEASE CallStackEntry entry("LU"); if( A.Grid() != p.Grid() ) throw std::logic_error("{A,p} must be distributed over the same grid"); if( p.Viewing() && (std::min(A.Height(),A.Width()) != p.Height() || p.Width() != 1) ) throw std::logic_error ("p must be a vector of the same height as the min dimension of A."); #endif const Grid& g = A.Grid(); if( !p.Viewing() ) p.ResizeTo( std::min(A.Height(),A.Width()), 1 ); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), AB(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<int,VC,STAR> pT(g), p0(g), pB(g), p1(g), p2(g); // Temporary distributions DistMatrix<F, STAR,STAR> A11_STAR_STAR(g); DistMatrix<F, MC, STAR> A21_MC_STAR(g); DistMatrix<F, STAR,VR > A12_STAR_VR(g); DistMatrix<F, STAR,MR > A12_STAR_MR(g); DistMatrix<int,STAR,STAR> p1_STAR_STAR(g); // Pivot composition std::vector<int> image, preimage; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( p, pT, pB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( pT, p0, /**/ /**/ p1, pB, p2 ); View1x2( AB, ABL, ABR ); const int pivotOffset = A01.Height(); A12_STAR_VR.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); p1_STAR_STAR.ResizeTo( p1.Height(), 1 ); //--------------------------------------------------------------------// A21_MC_STAR = A21; A11_STAR_STAR = A11; lu::Panel( A11_STAR_STAR, A21_MC_STAR, p1_STAR_STAR, pivotOffset ); ComposePivots( p1_STAR_STAR, pivotOffset, image, preimage ); ApplyRowPivots( AB, image, preimage ); // Perhaps we should give up perfectly distributing this operation since // it's total contribution is only O(n^2) A12_STAR_VR = A12; LocalTrsm ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; LocalGemm( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 ); A11 = A11_STAR_STAR; A12 = A12_STAR_MR; A21 = A21_MC_STAR; p1 = p1_STAR_STAR; //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDown ( pT, p0, p1, /**/ /**/ pB, p2 ); } }
inline void TwoSidedTrsmLVar5 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmLVar5"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( L.Height() != L.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != L.Height() ) LogicError("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MR > A21Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,VC, STAR> L21_VC_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > L21Adj_STAR_MR(g); DistMatrix<F,VC, STAR> Y21_VC_STAR(g); DistMatrix<F> Y21(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A21_MC_STAR.AlignWith( A22 ); A21_VC_STAR.AlignWith( A22 ); A21_VR_STAR.AlignWith( A22 ); A21Adj_STAR_MR.AlignWith( A22 ); L21_MC_STAR.AlignWith( A22 ); L21_VC_STAR.AlignWith( A22 ); L21_VR_STAR.AlignWith( A22 ); L21Adj_STAR_MR.AlignWith( A22 ); Y21.AlignWith( A21 ); Y21_VC_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A11 := inv(L11) A11 inv(L11)' L11_STAR_STAR = L11; A11_STAR_STAR = A11; LocalTwoSidedTrsm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // Y21 := L21 A11 L21_VC_STAR = L21; Zeros( Y21_VC_STAR, A21.Height(), A21.Width() ); Hemm ( RIGHT, LOWER, F(1), A11_STAR_STAR.Matrix(), L21_VC_STAR.Matrix(), F(0), Y21_VC_STAR.Matrix() ); Y21 = Y21_VC_STAR; // A21 := A21 inv(L11)' A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; // A21 := A21 - 1/2 Y21 Axpy( F(-1)/F(2), Y21, A21 ); // A22 := A22 - (L21 A21' + A21 L21') A21_MC_STAR = A21; L21_MC_STAR = L21; A21_VC_STAR = A21_MC_STAR; A21_VR_STAR = A21_VC_STAR; L21_VR_STAR = L21_VC_STAR; A21Adj_STAR_MR.AdjointFrom( A21_VR_STAR ); L21Adj_STAR_MR.AdjointFrom( L21_VR_STAR ); LocalTrr2k ( LOWER, F(-1), L21_MC_STAR, A21Adj_STAR_MR, A21_MC_STAR, L21Adj_STAR_MR, F(1), A22 ); // A21 := A21 - 1/2 Y21 Axpy( F(-1)/F(2), Y21, A21 ); // A21 := inv(L22) A21 // // This is the bottleneck because A21 only has blocksize columns Trsm( LEFT, LOWER, NORMAL, diag, F(1), L22, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void TwoSidedTrmmLVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrmmLVar4"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( L.Height() != L.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != L.Height() ) LogicError("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,MR, STAR> L10Adj_MR_STAR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > Y10_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10_STAR_VR.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A10_STAR_MC.AlignWith( A00 ); A21_MC_STAR.AlignWith( A20 ); L10_STAR_VR.AlignWith( A00 ); L10Adj_MR_STAR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); Y10_STAR_VR.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := A11 L10 A11_STAR_STAR = A11; L10Adj_MR_STAR.AdjointFrom( L10 ); L10_STAR_VR.AdjointFrom( L10Adj_MR_STAR ); Zeros( Y10_STAR_VR, A10.Height(), A10.Width() ); Hemm ( LEFT, LOWER, F(1), A11_STAR_STAR.LockedMatrix(), L10_STAR_VR.LockedMatrix(), F(0), Y10_STAR_VR.Matrix() ); // A10 := A10 + 1/2 Y10 A10_STAR_VR = A10; Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A00 := A00 + (A10' L10 + L10' A10) A10_STAR_MR = A10_STAR_VR; A10_STAR_MC = A10_STAR_VR; L10_STAR_MC = L10_STAR_VR; LocalTrr2k ( LOWER, ADJOINT, ADJOINT, ADJOINT, F(1), A10_STAR_MC, L10Adj_MR_STAR, L10_STAR_MC, A10_STAR_MR, F(1), A00 ); // A10 := A10 + 1/2 Y10 Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A10 := L11' A10 L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A20 := A20 + A21 L10 A21_MC_STAR = A21; LocalGemm ( NORMAL, ADJOINT, F(1), A21_MC_STAR, L10Adj_MR_STAR, F(1), A20 ); // A11 := L11' A11 L11 LocalTwoSidedTrmm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 L11 A21_VC_STAR = A21_MC_STAR; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void TwoSidedTrsmLVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmLVar2"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,MR, STAR> A10Adj_MR_STAR(g); DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MR, STAR> F10Adj_MR_STAR(g); DistMatrix<F,MR, STAR> L10Adj_MR_STAR(g); DistMatrix<F,VC, STAR> L10Adj_VC_STAR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> X11_MC_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); DistMatrix<F,MC, STAR> Y10Adj_MC_STAR(g); DistMatrix<F,MR, MC > Y10Adj_MR_MC(g); DistMatrix<F> X11(g); DistMatrix<F> Y10Adj(g); Matrix<F> Y10Local; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10Adj_MR_STAR.AlignWith( L10 ); F10Adj_MR_STAR.AlignWith( A00 ); L10Adj_MR_STAR.AlignWith( A00 ); L10Adj_VC_STAR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); X11.AlignWith( A11 ); X11_MC_STAR.AlignWith( L10 ); X21_MC_STAR.AlignWith( A20 ); Y10Adj_MC_STAR.AlignWith( A00 ); Y10Adj_MR_MC.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := L10 A00 L10Adj_MR_STAR.AdjointFrom( L10 ); L10Adj_VC_STAR = L10Adj_MR_STAR; L10_STAR_MC.AdjointFrom( L10Adj_VC_STAR ); Y10Adj_MC_STAR.ResizeTo( A10.Width(), A10.Height() ); F10Adj_MR_STAR.ResizeTo( A10.Width(), A10.Height() ); Zero( Y10Adj_MC_STAR ); Zero( F10Adj_MR_STAR ); LocalSymmetricAccumulateRL ( ADJOINT, F(1), A00, L10_STAR_MC, L10Adj_MR_STAR, Y10Adj_MC_STAR, F10Adj_MR_STAR ); Y10Adj.SumScatterFrom( Y10Adj_MC_STAR ); Y10Adj_MR_MC = Y10Adj; Y10Adj_MR_MC.SumScatterUpdate( F(1), F10Adj_MR_STAR ); Adjoint( Y10Adj_MR_MC.LockedLocalMatrix(), Y10Local ); // X11 := A10 L10' X11_MC_STAR.ResizeTo( A11.Height(), A11.Width() ); LocalGemm ( NORMAL, NORMAL, F(1), A10, L10Adj_MR_STAR, F(0), X11_MC_STAR ); // A10 := A10 - Y10 Axpy( F(-1), Y10Local, A10.LocalMatrix() ); A10Adj_MR_STAR.AdjointFrom( A10 ); // A11 := A11 - (X11 + L10 A10') = A11 - (A10 L10' + L10 A10') LocalGemm ( NORMAL, NORMAL, F(1), L10, A10Adj_MR_STAR, F(1), X11_MC_STAR ); X11.SumScatterFrom( X11_MC_STAR ); MakeTrapezoidal( LEFT, LOWER, 0, X11 ); Axpy( F(-1), X11, A11 ); // A10 := inv(L11) A10 L11_STAR_STAR = L11; A10_STAR_VR.AdjointFrom( A10Adj_MR_STAR ); LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A11 := inv(L11) A11 inv(L11)' A11_STAR_STAR = A11; LocalTwoSidedTrsm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 - A20 L10' X21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); LocalGemm ( NORMAL, NORMAL, F(1), A20, L10Adj_MR_STAR, F(0), X21_MC_STAR ); A21.SumScatterUpdate( F(-1), X21_MC_STAR ); // A21 := A21 inv(L11)' A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10Adj_MR_STAR.FreeAlignments(); F10Adj_MR_STAR.FreeAlignments(); L10Adj_MR_STAR.FreeAlignments(); L10Adj_VC_STAR.FreeAlignments(); L10_STAR_MC.FreeAlignments(); X11.FreeAlignments(); X11_MC_STAR.FreeAlignments(); X21_MC_STAR.FreeAlignments(); Y10Adj_MC_STAR.FreeAlignments(); Y10Adj_MR_MC.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
void LSquare ( DistMatrix<Complex<R> >& A, DistMatrix<Complex<R>,STAR,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("hermitian_tridiag::LSquare"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); #ifndef RELEASE if( g.Height() != g.Width() ) throw std::logic_error("The process grid must be square"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( t.Viewing() ) throw std::logic_error("t must not be a view"); #endif typedef Complex<R> C; DistMatrix<C,MD,STAR> tDiag(g); tDiag.AlignWithDiagonal( A, -1 ); tDiag.ResizeTo( A.Height()-1, 1 ); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); // Temporary distributions DistMatrix<C> WPan(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> A11_STAR_STAR(g); DistMatrix<C,MC, STAR> APan_MC_STAR(g), A11_MC_STAR(g), A21_MC_STAR(g); DistMatrix<C,MR, STAR> APan_MR_STAR(g), A11_MR_STAR(g), A21_MR_STAR(g); DistMatrix<C,MC, STAR> WPan_MC_STAR(g), W11_MC_STAR(g), W21_MC_STAR(g); DistMatrix<C,MR, STAR> WPan_MR_STAR(g), W11_MR_STAR(g), W21_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( tDiag, tT, tB, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); if( A22.Height() > 0 ) { WPan.AlignWith( A11 ); APan_MC_STAR.AlignWith( A11 ); WPan_MC_STAR.AlignWith( A11 ); APan_MR_STAR.AlignWith( A11 ); WPan_MR_STAR.AlignWith( A11 ); //----------------------------------------------------------------// WPan.ResizeTo( ABR.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); hermitian_tridiag::PanelLSquare ( ABR, WPan, t1, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionDown ( APan_MC_STAR, A11_MC_STAR, A21_MC_STAR, A11.Height() ); PartitionDown ( APan_MR_STAR, A11_MR_STAR, A21_MR_STAR, A11.Height() ); PartitionDown ( WPan_MC_STAR, W11_MC_STAR, W21_MC_STAR, A11.Height() ); PartitionDown ( WPan_MR_STAR, W11_MR_STAR, W21_MR_STAR, A11.Height() ); LocalTrr2k ( LOWER, ADJOINT, ADJOINT, C(-1), A21_MC_STAR, W21_MR_STAR, W21_MC_STAR, A21_MR_STAR, C(1), A22 ); //----------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; t1_STAR_STAR.ResizeTo( t1.Height(), 1 ); HermitianTridiag ( LOWER, A11_STAR_STAR.Matrix(), t1_STAR_STAR.Matrix() ); A11 = A11_STAR_STAR; t1 = t1_STAR_STAR; } SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } // Redistribute from matrix-diagonal form to fully replicated t = tDiag; }
inline void internal::HermitianTridiagU ( DistMatrix<Complex<R>,MC, MR >& A, DistMatrix<Complex<R>,STAR,STAR>& t ) { #ifndef RELEASE PushCallStack("internal::HermitianTridiagU"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( t.Viewing() ) throw std::logic_error("t must not be a view"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); DistMatrix<C,MD,STAR> tDiag(g); tDiag.AlignWithDiagonal( A, 1 ); tDiag.ResizeTo( A.Height()-1, 1 ); if( g.InGrid() ) { // Matrix views DistMatrix<C,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); // Temporary distributions DistMatrix<C,MC, MR > WPan(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> A11_STAR_STAR(g); DistMatrix<C,MC, STAR> APan_MC_STAR(g), A01_MC_STAR(g), A11_MC_STAR(g); DistMatrix<C,MR, STAR> APan_MR_STAR(g), A01_MR_STAR(g), A11_MR_STAR(g); DistMatrix<C,MC, STAR> WPan_MC_STAR(g), W01_MC_STAR(g), W11_MC_STAR(g); DistMatrix<C,MR, STAR> WPan_MR_STAR(g), W01_MR_STAR(g), W11_MR_STAR(g); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionUp ( tDiag, tT, tB, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); RepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2 ); if( A00.Height() > 0 ) { WPan.AlignWith( A01 ); APan_MC_STAR.AlignWith( A00 ); WPan_MC_STAR.AlignWith( A00 ); APan_MR_STAR.AlignWith( A00 ); WPan_MR_STAR.AlignWith( A00 ); //------------------------------------------------------------// WPan.ResizeTo( ATL.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); internal::HermitianPanelTridiagU ( ATL, WPan, t1, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionUp ( APan_MC_STAR, A01_MC_STAR, A11_MC_STAR, A11.Height() ); PartitionUp ( APan_MR_STAR, A01_MR_STAR, A11_MR_STAR, A11.Height() ); PartitionUp ( WPan_MC_STAR, W01_MC_STAR, W11_MC_STAR, A11.Height() ); PartitionUp ( WPan_MR_STAR, W01_MR_STAR, W11_MR_STAR, A11.Height() ); internal::LocalTrr2k ( UPPER, ADJOINT, ADJOINT, (C)-1, A01_MC_STAR, W01_MR_STAR, W01_MC_STAR, A01_MR_STAR, (C)1, A00 ); //------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; t1_STAR_STAR.ResizeTo( t1.Height(), 1 ); HermitianTridiag ( UPPER, A11_STAR_STAR.LocalMatrix(), t1_STAR_STAR.LocalMatrix() ); A11 = A11_STAR_STAR; t1 = t1_STAR_STAR; } SlidePartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } } // Redistribute from matrix-diagonal form to fully replicated t = tDiag; #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmUVar5 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmUVar5"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A01_MC_STAR(g); DistMatrix<F,MR, STAR> A01_MR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,MR, STAR> U01_MR_STAR(g); DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,VC, STAR> Y01_VC_STAR(g); DistMatrix<F> Y01(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_MC_STAR.AlignWith( A00 ); A01_MR_STAR.AlignWith( A00 ); A01_VC_STAR.AlignWith( A00 ); U01_MC_STAR.AlignWith( A00 ); U01_MR_STAR.AlignWith( A00 ); U01_VC_STAR.AlignWith( A00 ); Y01.AlignWith( A01 ); Y01_VC_STAR.AlignWith( A01 ); //--------------------------------------------------------------------// // Y01 := U01 A11 A11_STAR_STAR = A11; U01_VC_STAR = U01; Y01_VC_STAR.ResizeTo( A01.Height(), A01.Width() ); Hemm ( RIGHT, UPPER, F(1), A11_STAR_STAR.LocalMatrix(), U01_VC_STAR.LocalMatrix(), F(0), Y01_VC_STAR.LocalMatrix() ); Y01 = Y01_VC_STAR; // A01 := U00 A01 Trmm( LEFT, UPPER, NORMAL, diag, F(1), U00, A01 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01, A01 ); // A00 := A00 + (U01 A01' + A01 U01') A01_MC_STAR = A01; U01_MC_STAR = U01; A01_VC_STAR = A01_MC_STAR; A01_MR_STAR = A01_VC_STAR; U01_MR_STAR = U01_MC_STAR; LocalTrr2k ( UPPER, ADJOINT, ADJOINT, F(1), U01_MC_STAR, A01_MR_STAR, A01_MC_STAR, U01_MR_STAR, F(1), A00 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01_VC_STAR, A01_VC_STAR ); // A01 := A01 U11' U11_STAR_STAR = U11; LocalTrmm ( RIGHT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := U11 A11 U11' LocalTwoSidedTrmm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; //--------------------------------------------------------------------// A01_MC_STAR.FreeAlignments(); A01_MR_STAR.FreeAlignments(); A01_VC_STAR.FreeAlignments(); U01_MC_STAR.FreeAlignments(); U01_MR_STAR.FreeAlignments(); U01_VC_STAR.FreeAlignments(); Y01.FreeAlignments(); Y01_VC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HermitianTridiagU( DistMatrix<R,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::HermitianTridiagU"); if( A.Height() != A.Width() ) throw std::logic_error( "A must be square." ); #endif const Grid& g = A.Grid(); if( g.InGrid() ) { // Matrix views DistMatrix<R,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<R,MC, MR > WPan(g); DistMatrix<R,STAR,STAR> A11_STAR_STAR(g); DistMatrix<R,MC, STAR> APan_MC_STAR(g), A01_MC_STAR(g), A11_MC_STAR(g); DistMatrix<R,MR, STAR> APan_MR_STAR(g), A01_MR_STAR(g), A11_MR_STAR(g); DistMatrix<R,MC, STAR> WPan_MC_STAR(g), W01_MC_STAR(g), W11_MC_STAR(g); DistMatrix<R,MR, STAR> WPan_MR_STAR(g), W01_MR_STAR(g), W11_MR_STAR(g); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); if( A00.Height() > 0 ) { WPan.AlignWith( A01 ); APan_MC_STAR.AlignWith( A00 ); WPan_MC_STAR.AlignWith( A00 ); APan_MR_STAR.AlignWith( A00 ); WPan_MR_STAR.AlignWith( A00 ); //------------------------------------------------------------// WPan.ResizeTo( ATL.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); internal::HermitianPanelTridiagU ( ATL, WPan, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionUp ( APan_MC_STAR, A01_MC_STAR, A11_MC_STAR, A11.Height() ); PartitionUp ( APan_MR_STAR, A01_MR_STAR, A11_MR_STAR, A11.Height() ); PartitionUp ( WPan_MC_STAR, W01_MC_STAR, W11_MC_STAR, A11.Height() ); PartitionUp ( WPan_MR_STAR, W01_MR_STAR, W11_MR_STAR, A11.Height() ); internal::LocalTrr2k ( UPPER, TRANSPOSE, TRANSPOSE, (R)-1, A01_MC_STAR, W01_MR_STAR, W01_MC_STAR, A01_MR_STAR, (R)1, A00 ); //------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; HermitianTridiag( UPPER, A11_STAR_STAR.LocalMatrix() ); A11 = A11_STAR_STAR; } SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar1 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar1"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MR, MC > Z01_MR_MC(g); DistMatrix<F,MC, STAR> Z01_MC_STAR(g); DistMatrix<F,MR, STAR> Z01_MR_STAR(g); DistMatrix<F> Y01(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_VC_STAR.AlignWith( A01 ); U01_MC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01_VC_STAR.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); Y01.AlignWith( A01 ); Z01_MR_MC.AlignWith( A01 ); Z01_MC_STAR.AlignWith( A00 ); Z01_MR_STAR.AlignWith( A00 ); //--------------------------------------------------------------------// // Y01 := A00 U01 U01_MC_STAR = U01; U01_VR_STAR = U01_MC_STAR; U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); Zeros( Z01_MC_STAR, A01.Height(), A01.Width() ); Zeros( Z01_MR_STAR, A01.Height(), A01.Width() ); LocalSymmetricAccumulateLU ( ADJOINT, F(1), A00, U01_MC_STAR, U01Adj_STAR_MR, Z01_MC_STAR, Z01_MR_STAR ); Z01_MR_MC.SumScatterFrom( Z01_MR_STAR ); Y01 = Z01_MR_MC; Y01.SumScatterUpdate( F(1), Z01_MC_STAR ); // A01 := inv(U00)' A01 // // This is the bottleneck because A01 only has blocksize columns Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U00, A01 ); // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A11 := A11 - (U01' A01 + A01' U01) A01_VC_STAR = A01; U01_VC_STAR = U01_MC_STAR; Zeros( X11_STAR_STAR, A11.Height(), A11.Width() ); Her2k ( UPPER, ADJOINT, F(-1), A01_VC_STAR.Matrix(), U01_VC_STAR.Matrix(), F(0), X11_STAR_STAR.Matrix() ); A11.SumScatterUpdate( F(1), X11_STAR_STAR ); // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; U11_STAR_STAR = U11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A01 := A01 inv(U11) A01_VC_STAR = A01; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
inline void internal::CholeskyUVar3Square( DistMatrix<F,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyUVar3Square"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices."); if( A.Grid().Height() != A.Grid().Width() ) throw std::logic_error ("CholeskyUVar3Square assumes a square process grid."); #endif const Grid& g = A.Grid(); // Find the process holding our transposed data const int r = g.Height(); int transposeRank; { const int colAlignment = A.ColAlignment(); const int rowAlignment = A.RowAlignment(); const int colShift = A.ColShift(); const int rowShift = A.RowShift(); const int transposeRow = (colAlignment+rowShift) % r; const int transposeCol = (rowAlignment+colShift) % r; transposeRank = transposeRow + r*transposeCol; } const bool onDiagonal = ( transposeRank == g.VCRank() ); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary matrix distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A12_STAR_VR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; internal::LocalCholesky( UPPER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A12_STAR_VR = A12; internal::LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; // SendRecv to form A12[* ,MC] from A12[* ,MR] A12_STAR_MC.ResizeTo( A12.Height(), A12.Width() ); { if( onDiagonal ) { const int size = A11.Height()*A22.LocalWidth(); MemCopy ( A12_STAR_MC.LocalBuffer(), A12_STAR_MR.LocalBuffer(), size ); } else { const int sendSize = A11.Height()*A22.LocalWidth(); const int recvSize = A11.Width()*A22.LocalHeight(); // We know that the ldim is the height since we have manually // created both temporary matrices. mpi::SendRecv ( A12_STAR_MR.LocalBuffer(), sendSize, transposeRank, 0, A12_STAR_MC.LocalBuffer(), recvSize, transposeRank, 0, g.VCComm() ); } } internal::LocalTrrk ( UPPER, ADJOINT, (F)-1, A12_STAR_MC, A12_STAR_MR, (F)1, A22 ); A12 = A12_STAR_MR; //--------------------------------------------------------------------// A12_STAR_MC.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LVar2( DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("cholesky::LVar2"); if( A.Height() != A.Width() ) LogicError("Can only compute Cholesky factor of square matrices"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,MR, STAR> A10Adj_MR_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> X11_MC_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A10Adj_MR_STAR.AlignWith( A10 ); X11_MC_STAR.AlignWith( A10 ); X21_MC_STAR.AlignWith( A20 ); //--------------------------------------------------------------------// A10Adj_MR_STAR.AdjointFrom( A10 ); LocalGemm( NORMAL, NORMAL, F(1), A10, A10Adj_MR_STAR, X11_MC_STAR ); A11.SumScatterUpdate( F(-1), X11_MC_STAR ); A11_STAR_STAR = A11; LocalCholesky( LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; LocalGemm( NORMAL, NORMAL, F(1), A20, A10Adj_MR_STAR, X21_MC_STAR ); A21.SumScatterUpdate( F(-1), X21_MC_STAR ); A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void internal::CholeskyLVar3Naive( DistMatrix<F,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyLVar3Naive"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); if( A.Grid().VCRank() == 0 ) { std::cout << "CholeskyLVar3Naive exists solely for academic purposes. Please " "use CholeskyLVar3 in real applications." << std::endl; } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary matrices DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,MR, STAR> A21_MR_STAR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A21_VC_STAR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A21_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; internal::LocalCholesky( LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A21_VC_STAR = A21; internal::LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A21_VC_STAR ); A21_MC_STAR = A21_VC_STAR; A21_MR_STAR = A21_VC_STAR; // (A21^T[* ,MC])^T A21^H[* ,MR] = A21[MC,* ] A21^H[* ,MR] // = (A21 A21^H)[MC,MR] internal::LocalTrrk ( LOWER, ADJOINT, (F)-1, A21_MC_STAR, A21_MR_STAR, (F)1, A22 ); A21 = A21_MC_STAR; //--------------------------------------------------------------------// A21_VC_STAR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); A21_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }