inline void LocalSymmetricAccumulateLU ( Orientation orientation, T alpha, const DistMatrix<T>& A, const DistMatrix<T,MC, STAR>& B_MC_STAR, const DistMatrix<T,STAR,MR >& BAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR, DistMatrix<T,MR, STAR>& Z_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalSymmetricAccumulateLU"); if( A.Grid() != B_MC_STAR.Grid() || B_MC_STAR.Grid() != BAdjOrTrans_STAR_MR.Grid() || BAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() || Z_MC_STAR.Grid() != Z_MR_STAR.Grid() ) throw std::logic_error ("{A,B,Z} must be distributed over the same grid"); if( A.Height() != A.Width() || A.Height() != B_MC_STAR.Height() || A.Height() != BAdjOrTrans_STAR_MR.Width() || A.Height() != Z_MC_STAR.Height() || A.Height() != Z_MR_STAR.Height() || B_MC_STAR.Width() != BAdjOrTrans_STAR_MR.Height() || BAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() || Z_MC_STAR.Width() != Z_MR_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymmetricAccumulateLU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B[MC,* ] ~ " << B_MC_STAR.Height() << " x " << B_MC_STAR.Width() << "\n" << " B^H/T[* ,MR] ~ " << BAdjOrTrans_STAR_MR.Height() << " x " << BAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n" << " Z[MR,* ] ` " << Z_MR_STAR.Height() << " x " << Z_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( B_MC_STAR.ColAlignment() != A.ColAlignment() || BAdjOrTrans_STAR_MR.RowAlignment() != A.RowAlignment() || Z_MC_STAR.ColAlignment() != A.ColAlignment() || Z_MR_STAR.ColAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<T> D11(g); DistMatrix<T,MC,STAR> BT_MC_STAR(g), B0_MC_STAR(g), BB_MC_STAR(g), B1_MC_STAR(g), B2_MC_STAR(g); DistMatrix<T,STAR,MR> BLAdjOrTrans_STAR_MR(g), BRAdjOrTrans_STAR_MR(g), B0AdjOrTrans_STAR_MR(g), B1AdjOrTrans_STAR_MR(g), B2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); DistMatrix<T,MR,STAR> ZT_MR_STAR(g), Z0_MR_STAR(g), ZB_MR_STAR(g), Z1_MR_STAR(g), Z2_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B_MC_STAR, BT_MC_STAR, BB_MC_STAR, 0 ); LockedPartitionRight ( BAdjOrTrans_STAR_MR, BLAdjOrTrans_STAR_MR, BRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); PartitionDown ( Z_MR_STAR, ZT_MR_STAR, ZB_MR_STAR, 0 ); while( ATL.Height() < A.Height() ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT_MC_STAR, B0_MC_STAR, /**********/ /**********/ B1_MC_STAR, BB_MC_STAR, B2_MC_STAR ); LockedRepartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, /**/ B1AdjOrTrans_STAR_MR, B2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); RepartitionDown ( ZT_MR_STAR, Z0_MR_STAR, /**********/ /**********/ Z1_MR_STAR, ZB_MR_STAR, Z2_MR_STAR ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// D11 = A11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); LocalGemm ( NORMAL, orientation, alpha, D11, B1AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); MakeTrapezoidal( LEFT, UPPER, 1, D11 ); LocalGemm ( orientation, NORMAL, alpha, D11, B1_MC_STAR, T(1), Z1_MR_STAR ); LocalGemm ( NORMAL, orientation, alpha, A12, B2AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( orientation, NORMAL, alpha, A12, B1_MC_STAR, T(1), Z2_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT_MC_STAR, B0_MC_STAR, B1_MC_STAR, /**********/ /**********/ BB_MC_STAR, B2_MC_STAR ); SlideLockedPartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, B1AdjOrTrans_STAR_MR, /**/ B2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); SlidePartitionDown ( ZT_MR_STAR, Z0_MR_STAR, Z1_MR_STAR, /**********/ /**********/ ZB_MR_STAR, Z2_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void LocalTrmmAccumulateLLN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& L, const DistMatrix<T,STAR,MR >& XTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE CallStackEntry entry("internal::LocalTrmmAccumulateLLN"); if( L.Grid() != XTrans_STAR_MR.Grid() || XTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{L,X,Z} must be distributed over the same grid"); if( L.Height() != L.Width() || L.Height() != XTrans_STAR_MR.Width() || L.Height() != Z_MC_STAR.Height() || XTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLLN: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X^H/T[* ,MR] ~ " << XTrans_STAR_MR.Height() << " x " << XTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XTrans_STAR_MR.RowAlignment() != L.RowAlignment() || Z_MC_STAR.ColAlignment() != L.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MR> XLTrans_STAR_MR(g), XRTrans_STAR_MR(g), X0Trans_STAR_MR(g), X1Trans_STAR_MR(g), X2Trans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); LockedPartitionRight ( XTrans_STAR_MR, XLTrans_STAR_MR, XRTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( LTL.Height() < L.Height() ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); LockedRepartitionRight ( XLTrans_STAR_MR, /**/ XRTrans_STAR_MR, X0Trans_STAR_MR, /**/ X1Trans_STAR_MR, X2Trans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( L11 ); //--------------------------------------------------------------------// D11 = L11; MakeTriangular( LOWER, D11 ); if( diag == UNIT ) SetDiagonal( D11, T(1) ); LocalGemm ( NORMAL, orientation, alpha, D11, X1Trans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( NORMAL, orientation, alpha, L21, X1Trans_STAR_MR, T(1), Z2_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlideLockedPartitionRight ( XLTrans_STAR_MR, /**/ XRTrans_STAR_MR, X0Trans_STAR_MR, X1Trans_STAR_MR, /**/ X2Trans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); }
inline void internal::LocalTrmmAccumulateLUN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& U, const DistMatrix<T,STAR,MR >& XAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalTrmmAccumulateLUN"); if( U.Grid() != XAdjOrTrans_STAR_MR.Grid() || XAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{U,X,Z} must be distributed over the same grid"); if( U.Height() != U.Width() || U.Height() != XAdjOrTrans_STAR_MR.Width() || U.Height() != Z_MC_STAR.Height() || XAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X^H/T[* ,MR] ~ " << XAdjOrTrans_STAR_MR.Height() << " x " << XAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XAdjOrTrans_STAR_MR.RowAlignment() != U.RowAlignment() || Z_MC_STAR.ColAlignment() != U.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T,MC,MR> D11(g); DistMatrix<T,STAR,MR> XLAdjOrTrans_STAR_MR(g), XRAdjOrTrans_STAR_MR(g), X0AdjOrTrans_STAR_MR(g), X1AdjOrTrans_STAR_MR(g), X2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); LockedPartitionRight ( XAdjOrTrans_STAR_MR, XLAdjOrTrans_STAR_MR, XRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( UTL.Height() < U.Height() ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); LockedRepartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, /**/ X1AdjOrTrans_STAR_MR, X2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( U11 ); //--------------------------------------------------------------------// D11 = U11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); if( diag == UNIT ) SetDiagonalToOne( D11 ); internal::LocalGemm ( NORMAL, orientation, alpha, D11, X1AdjOrTrans_STAR_MR, (T)1, Z1_MC_STAR ); internal::LocalGemm ( NORMAL, orientation, alpha, U01, X1AdjOrTrans_STAR_MR, (T)1, Z0_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlideLockedPartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, X1AdjOrTrans_STAR_MR, /**/ X2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }