inline void TwoSidedTrsmUVar1 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar1"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MR, MC > Z01_MR_MC(g); DistMatrix<F,MC, STAR> Z01_MC_STAR(g); DistMatrix<F,MR, STAR> Z01_MR_STAR(g); DistMatrix<F> Y01(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_VC_STAR.AlignWith( A01 ); U01_MC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01_VC_STAR.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); Y01.AlignWith( A01 ); Z01_MR_MC.AlignWith( A01 ); Z01_MC_STAR.AlignWith( A00 ); Z01_MR_STAR.AlignWith( A00 ); //--------------------------------------------------------------------// // Y01 := A00 U01 U01_MC_STAR = U01; U01_VR_STAR = U01_MC_STAR; U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); Zeros( Z01_MC_STAR, A01.Height(), A01.Width() ); Zeros( Z01_MR_STAR, A01.Height(), A01.Width() ); LocalSymmetricAccumulateLU ( ADJOINT, F(1), A00, U01_MC_STAR, U01Adj_STAR_MR, Z01_MC_STAR, Z01_MR_STAR ); Z01_MR_MC.SumScatterFrom( Z01_MR_STAR ); Y01 = Z01_MR_MC; Y01.SumScatterUpdate( F(1), Z01_MC_STAR ); // A01 := inv(U00)' A01 // // This is the bottleneck because A01 only has blocksize columns Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U00, A01 ); // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A11 := A11 - (U01' A01 + A01' U01) A01_VC_STAR = A01; U01_VC_STAR = U01_MC_STAR; Zeros( X11_STAR_STAR, A11.Height(), A11.Width() ); Her2k ( UPPER, ADJOINT, F(-1), A01_VC_STAR.Matrix(), U01_VC_STAR.Matrix(), F(0), X11_STAR_STAR.Matrix() ); A11.SumScatterUpdate( F(1), X11_STAR_STAR ); // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; U11_STAR_STAR = U11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A01 := A01 inv(U11) A01_VC_STAR = A01; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
inline void TrdtrmmUVar1( Orientation orientation, DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TrdtrmmUVar1"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); if( orientation == NORMAL ) throw std::logic_error("Orientation must be (conjugate-)transpose"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,MD,STAR> d1(g); // Temporary distributions DistMatrix<F,MC, STAR> S01_MC_STAR(g); DistMatrix<F,VC, STAR> S01_VC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01AdjOrTrans_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); S01_MC_STAR.AlignWith( U ); S01_VC_STAR.AlignWith( U ); U01_VR_STAR.AlignWith( U ); U01AdjOrTrans_STAR_MR.AlignWith( U ); PartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UTL.Height() < U.Height() && UTL.Width() < U.Height() ) { RepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------// U11.GetDiagonal( d1 ); S01_MC_STAR = U01; S01_VC_STAR = S01_MC_STAR; U01_VR_STAR = S01_VC_STAR; if( orientation == TRANSPOSE ) { DiagonalSolve( RIGHT, NORMAL, d1, U01_VR_STAR ); U01AdjOrTrans_STAR_MR.TransposeFrom( U01_VR_STAR ); } else { DiagonalSolve( RIGHT, ADJOINT, d1, U01_VR_STAR ); U01AdjOrTrans_STAR_MR.AdjointFrom( U01_VR_STAR ); } LocalTrrk( UPPER, F(1), S01_MC_STAR, U01AdjOrTrans_STAR_MR, F(1), U00 ); U11_STAR_STAR = U11; LocalTrmm ( RIGHT, UPPER, ADJOINT, UNIT, F(1), U11_STAR_STAR, U01_VR_STAR ); U01 = U01_VR_STAR; LocalTrdtrmm( orientation, UPPER, U11_STAR_STAR ); U11 = U11_STAR_STAR; //--------------------------------------------------------------------// d1.FreeAlignments(); SlidePartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmRUT ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmRUT"); if( orientation == NORMAL ) throw std::logic_error("TrsmRUT expects a (Conjugate)Transpose option"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01AdjOrTrans_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionLeft( X, XL, XR, 0 ); while( XL.Width() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionLeft ( XL, /**/ XR, X0, X1, /**/ X2 ); X1_VC_STAR.AlignWith( X0 ); X1Trans_STAR_MC.AlignWith( X0 ); U01_VR_STAR.AlignWith( X0 ); U01AdjOrTrans_STAR_MR.AlignWith( X0 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, UPPER, orientation, diag, F(1), U11_STAR_STAR, X1_VC_STAR, checkIfSingular ); X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); U01_VR_STAR = U01; if( orientation == ADJOINT ) U01AdjOrTrans_STAR_MR.AdjointFrom( U01_VR_STAR ); else U01AdjOrTrans_STAR_MR.TransposeFrom( U01_VR_STAR ); // X0[MC,MR] -= X1[MC,* ] (U01[MR,* ])^(T/H) // = X1^T[* ,MC] (U01^(T/H))[* ,MR] LocalGemm ( TRANSPOSE, NORMAL, F(-1), X1Trans_STAR_MC, U01AdjOrTrans_STAR_MR, F(1), X0 ); //--------------------------------------------------------------------// X1_VC_STAR.FreeAlignments(); X1Trans_STAR_MC.FreeAlignments(); U01_VR_STAR.FreeAlignments(); U01AdjOrTrans_STAR_MR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionLeft ( XL, /**/ XR, X0, /**/ X1, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmUVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmUVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,VR, STAR> A01_VR_STAR(g); DistMatrix<F,STAR,MC > A01Adj_STAR_MC(g); DistMatrix<F,STAR,MR > A01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,MR, STAR> A12Adj_MR_STAR(g); DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MC > U01Adj_STAR_MC(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,VC, STAR> Y01_VC_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_VC_STAR.AlignWith( A00 ); A01_VR_STAR.AlignWith( A00 ); A01Adj_STAR_MC.AlignWith( A00 ); A01Adj_STAR_MR.AlignWith( A00 ); A12Adj_MR_STAR.AlignWith( A02 ); U01_VC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01Adj_STAR_MC.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); Y01_VC_STAR.AlignWith( A01 ); //--------------------------------------------------------------------// // Y01 := U01 A11 A11_STAR_STAR = A11; U01_VC_STAR = U01; Zeros( A01.Height(), A01.Width(), Y01_VC_STAR ); Hemm ( RIGHT, UPPER, F(1), A11_STAR_STAR.LockedMatrix(), U01_VC_STAR.LockedMatrix(), F(0), Y01_VC_STAR.Matrix() ); // A01 := A01 + 1/2 Y01 A01_VC_STAR = A01; Axpy( F(1)/F(2), Y01_VC_STAR, A01_VC_STAR ); // A00 := A00 + (U01 A01' + A01 U01') A01Adj_STAR_MC.AdjointFrom( A01_VC_STAR ); U01Adj_STAR_MC.AdjointFrom( U01_VC_STAR ); A01_VR_STAR = A01_VC_STAR; U01_VR_STAR = U01_VC_STAR; A01Adj_STAR_MR.AdjointFrom( A01_VR_STAR ); U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); LocalTrr2k ( UPPER, ADJOINT, ADJOINT, F(1), U01Adj_STAR_MC, A01Adj_STAR_MR, A01Adj_STAR_MC, U01Adj_STAR_MR, F(1), A00 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01_VC_STAR, A01_VC_STAR ); // A01 := A01 U11' U11_STAR_STAR = U11; LocalTrmm ( RIGHT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A02 := A02 + U01 A12 A12Adj_MR_STAR.AdjointFrom( A12 ); LocalGemm ( ADJOINT, ADJOINT, F(1), U01Adj_STAR_MC, A12Adj_MR_STAR, F(1), A02 ); // A11 := U11 A11 U11' LocalTwoSidedTrmm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A12 := U11 A12 A12_STAR_VR.AdjointFrom( A12Adj_MR_STAR ); LocalTrmm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; //--------------------------------------------------------------------// A01_VC_STAR.FreeAlignments(); A01_VR_STAR.FreeAlignments(); A01Adj_STAR_MC.FreeAlignments(); A01Adj_STAR_MR.FreeAlignments(); A12Adj_MR_STAR.FreeAlignments(); U01_VC_STAR.FreeAlignments(); U01_VR_STAR.FreeAlignments(); U01Adj_STAR_MC.FreeAlignments(); U01Adj_STAR_MR.FreeAlignments(); Y01_VC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar2"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,MC, STAR> A01_MC_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,MC, STAR> F01_MC_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X11_STAR_MR(g); DistMatrix<F,MR, STAR> X12Adj_MR_STAR(g); DistMatrix<F,MR, MC > X12Adj_MR_MC(g); DistMatrix<F,MR, MC > Y01_MR_MC(g); DistMatrix<F,MR, STAR> Y01_MR_STAR(g); DistMatrix<F> X11(g); DistMatrix<F> Y01(g); Matrix<F> X12Local; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_MC_STAR.AlignWith( U01 ); Y01.AlignWith( A01 ); Y01_MR_STAR.AlignWith( A00 ); U01_MC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); X11_STAR_MR.AlignWith( U01 ); X11.AlignWith( A11 ); X12Adj_MR_STAR.AlignWith( A02 ); X12Adj_MR_MC.AlignWith( A12 ); F01_MC_STAR.AlignWith( A00 ); //--------------------------------------------------------------------// // Y01 := A00 U01 U01_MC_STAR = U01; U01_VR_STAR = U01_MC_STAR; U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); Zeros( Y01_MR_STAR, A01.Height(), A01.Width() ); Zeros( F01_MC_STAR, A01.Height(), A01.Width() ); LocalSymmetricAccumulateLU ( ADJOINT, F(1), A00, U01_MC_STAR, U01Adj_STAR_MR, F01_MC_STAR, Y01_MR_STAR ); Y01_MR_MC.SumScatterFrom( Y01_MR_STAR ); Y01 = Y01_MR_MC; Y01.SumScatterUpdate( F(1), F01_MC_STAR ); // X11 := U01' A01 LocalGemm( ADJOINT, NORMAL, F(1), U01_MC_STAR, A01, X11_STAR_MR ); // A01 := A01 - Y01 Axpy( F(-1), Y01, A01 ); A01_MC_STAR = A01; // A11 := A11 - triu(X11 + A01' U01) = A11 - (U01 A01 + A01' U01) LocalGemm( ADJOINT, NORMAL, F(1), A01_MC_STAR, U01, F(1), X11_STAR_MR ); X11.SumScatterFrom( X11_STAR_MR ); MakeTriangular( UPPER, X11 ); Axpy( F(-1), X11, A11 ); // A01 := A01 inv(U11) U11_STAR_STAR = U11; A01_VC_STAR = A01_MC_STAR; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A12 := A12 - A02' U01 LocalGemm( ADJOINT, NORMAL, F(1), A02, U01_MC_STAR, X12Adj_MR_STAR ); X12Adj_MR_MC.SumScatterFrom( X12Adj_MR_STAR ); Adjoint( X12Adj_MR_MC.LockedMatrix(), X12Local ); Axpy( F(-1), X12Local, A12.Matrix() ); // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
inline void TrtrmmUVar1( Orientation orientation, DistMatrix<T>& U ) { #ifndef RELEASE PushCallStack("internal::TrtrmmUVar1"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<T,MC, STAR> U01_MC_STAR(g); DistMatrix<T,VC, STAR> U01_VC_STAR(g); DistMatrix<T,VR, STAR> U01_VR_STAR(g); DistMatrix<T,STAR,MR > U01AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,STAR> U11_STAR_STAR(g); U01_MC_STAR.AlignWith( U ); U01_VC_STAR.AlignWith( U ); U01_VR_STAR.AlignWith( U ); U01AdjOrTrans_STAR_MR.AlignWith( U ); PartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UTL.Height() < U.Height() && UTL.Width() < U.Height() ) { RepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------// U01_MC_STAR = U01; U01_VC_STAR = U01_MC_STAR; U01_VR_STAR = U01_VC_STAR; if( orientation == ADJOINT ) U01AdjOrTrans_STAR_MR.AdjointFrom( U01_VR_STAR ); else U01AdjOrTrans_STAR_MR.TransposeFrom( U01_VR_STAR ); LocalTrrk( UPPER, T(1), U01_MC_STAR, U01AdjOrTrans_STAR_MR, T(1), U00 ); U11_STAR_STAR = U11; LocalTrmm ( RIGHT, UPPER, orientation, NON_UNIT, T(1), U11_STAR_STAR, U01_VC_STAR ); U01 = U01_VC_STAR; LocalTrtrmm( orientation, UPPER, U11_STAR_STAR ); U11 = U11_STAR_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }