inline void TrmmLLNA ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLNA"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || L.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLNA: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); DistMatrix<T,VR, STAR> X1_VR_STAR(g); DistMatrix<T,STAR,MR > X1Trans_STAR_MR(g); DistMatrix<T,MC, STAR> Z1_MC_STAR(g); X1_VR_STAR.AlignWith( L ); X1Trans_STAR_MR.AlignWith( L ); Z1_MC_STAR.AlignWith( L ); PartitionRight( X, XL, XR, 0 ); while( XL.Width() < X.Width() ) { RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); Zeros( X1.Height(), X1.Width(), Z1_MC_STAR ); //--------------------------------------------------------------------// X1_VR_STAR = X1; X1Trans_STAR_MR.TransposeFrom( X1_VR_STAR ); LocalTrmmAccumulateLLN ( TRANSPOSE, diag, alpha, L, X1Trans_STAR_MR, Z1_MC_STAR ); X1.SumScatterFrom( Z1_MC_STAR ); //--------------------------------------------------------------------// SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalTrmmAccumulateLLN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& L, const DistMatrix<T,STAR,MR >& XTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE CallStackEntry entry("internal::LocalTrmmAccumulateLLN"); if( L.Grid() != XTrans_STAR_MR.Grid() || XTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{L,X,Z} must be distributed over the same grid"); if( L.Height() != L.Width() || L.Height() != XTrans_STAR_MR.Width() || L.Height() != Z_MC_STAR.Height() || XTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLLN: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X^H/T[* ,MR] ~ " << XTrans_STAR_MR.Height() << " x " << XTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XTrans_STAR_MR.RowAlignment() != L.RowAlignment() || Z_MC_STAR.ColAlignment() != L.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MR> XLTrans_STAR_MR(g), XRTrans_STAR_MR(g), X0Trans_STAR_MR(g), X1Trans_STAR_MR(g), X2Trans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); LockedPartitionRight ( XTrans_STAR_MR, XLTrans_STAR_MR, XRTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( LTL.Height() < L.Height() ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); LockedRepartitionRight ( XLTrans_STAR_MR, /**/ XRTrans_STAR_MR, X0Trans_STAR_MR, /**/ X1Trans_STAR_MR, X2Trans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( L11 ); //--------------------------------------------------------------------// D11 = L11; MakeTriangular( LOWER, D11 ); if( diag == UNIT ) SetDiagonal( D11, T(1) ); LocalGemm ( NORMAL, orientation, alpha, D11, X1Trans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( NORMAL, orientation, alpha, L21, X1Trans_STAR_MR, T(1), Z2_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlideLockedPartitionRight ( XLTrans_STAR_MR, /**/ XRTrans_STAR_MR, X0Trans_STAR_MR, X1Trans_STAR_MR, /**/ X2Trans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); }