inline void CholeskyUVar2( DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("hpd_inverse::CholeskyUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,VR, STAR> A01_VR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MC > A01Trans_STAR_MC(g); DistMatrix<F,MR, STAR> A01_MR_STAR(g); DistMatrix<F,STAR,MR > A01Adj_STAR_MR(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A01_VC_STAR.AlignWith( A00 ); A12_STAR_VR.AlignWith( A02 ); A01Trans_STAR_MC.AlignWith( A00 ); A01_VR_STAR.AlignWith( A00 ); A01Adj_STAR_MR.AlignWith( A00 ); A12_STAR_MR.AlignWith( A02 ); A12_STAR_MC.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalCholesky( UPPER, A11_STAR_STAR ); A01_VC_STAR = A01; LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A01_VC_STAR ); A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A01Trans_STAR_MC.TransposeFrom( A01_VC_STAR ); A01_VR_STAR = A01_VC_STAR; A01Adj_STAR_MR.AdjointFrom( A01_VR_STAR ); LocalTrrk ( UPPER, TRANSPOSE, F(1), A01Trans_STAR_MC, A01Adj_STAR_MR, F(1), A00 ); A12_STAR_MR = A12_STAR_VR; LocalGemm ( TRANSPOSE, NORMAL, F(-1), A01Trans_STAR_MC, A12_STAR_MR, F(1), A02 ); A12_STAR_MC = A12_STAR_VR; LocalTrrk ( UPPER, ADJOINT, F(-1), A12_STAR_MC, A12_STAR_MR, F(1), A22 ); LocalTrsm ( RIGHT, UPPER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A01_VC_STAR ); LocalTrsm ( LEFT, UPPER, NORMAL, NON_UNIT, F(-1), A11_STAR_STAR, A12_STAR_VR ); LocalTriangularInverse( UPPER, NON_UNIT, A11_STAR_STAR ); LocalTrtrmm( ADJOINT, UPPER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A01 = A01_VC_STAR; A12 = A12_STAR_VR; //--------------------------------------------------------------------// A01_VC_STAR.FreeAlignments(); A12_STAR_VR.FreeAlignments(); A01Trans_STAR_MC.FreeAlignments(); A01_VR_STAR.FreeAlignments(); A01Adj_STAR_MR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_MC.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void TwoSidedTrmmUVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmUVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,VR, STAR> A01_VR_STAR(g); DistMatrix<F,STAR,MC > A01Adj_STAR_MC(g); DistMatrix<F,STAR,MR > A01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,MR, STAR> A12Adj_MR_STAR(g); DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MC > U01Adj_STAR_MC(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,VC, STAR> Y01_VC_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_VC_STAR.AlignWith( A00 ); A01_VR_STAR.AlignWith( A00 ); A01Adj_STAR_MC.AlignWith( A00 ); A01Adj_STAR_MR.AlignWith( A00 ); A12Adj_MR_STAR.AlignWith( A02 ); U01_VC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01Adj_STAR_MC.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); Y01_VC_STAR.AlignWith( A01 ); //--------------------------------------------------------------------// // Y01 := U01 A11 A11_STAR_STAR = A11; U01_VC_STAR = U01; Zeros( A01.Height(), A01.Width(), Y01_VC_STAR ); Hemm ( RIGHT, UPPER, F(1), A11_STAR_STAR.LockedMatrix(), U01_VC_STAR.LockedMatrix(), F(0), Y01_VC_STAR.Matrix() ); // A01 := A01 + 1/2 Y01 A01_VC_STAR = A01; Axpy( F(1)/F(2), Y01_VC_STAR, A01_VC_STAR ); // A00 := A00 + (U01 A01' + A01 U01') A01Adj_STAR_MC.AdjointFrom( A01_VC_STAR ); U01Adj_STAR_MC.AdjointFrom( U01_VC_STAR ); A01_VR_STAR = A01_VC_STAR; U01_VR_STAR = U01_VC_STAR; A01Adj_STAR_MR.AdjointFrom( A01_VR_STAR ); U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); LocalTrr2k ( UPPER, ADJOINT, ADJOINT, F(1), U01Adj_STAR_MC, A01Adj_STAR_MR, A01Adj_STAR_MC, U01Adj_STAR_MR, F(1), A00 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01_VC_STAR, A01_VC_STAR ); // A01 := A01 U11' U11_STAR_STAR = U11; LocalTrmm ( RIGHT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A02 := A02 + U01 A12 A12Adj_MR_STAR.AdjointFrom( A12 ); LocalGemm ( ADJOINT, ADJOINT, F(1), U01Adj_STAR_MC, A12Adj_MR_STAR, F(1), A02 ); // A11 := U11 A11 U11' LocalTwoSidedTrmm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A12 := U11 A12 A12_STAR_VR.AdjointFrom( A12Adj_MR_STAR ); LocalTrmm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; //--------------------------------------------------------------------// A01_VC_STAR.FreeAlignments(); A01_VR_STAR.FreeAlignments(); A01Adj_STAR_MC.FreeAlignments(); A01Adj_STAR_MR.FreeAlignments(); A12Adj_MR_STAR.FreeAlignments(); U01_VC_STAR.FreeAlignments(); U01_VR_STAR.FreeAlignments(); U01Adj_STAR_MC.FreeAlignments(); U01Adj_STAR_MR.FreeAlignments(); Y01_VC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }