inline void TrmmRLNC ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmRLNC"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || X.Width() != L.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmRLNC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,MR, STAR> L10Trans_MR_STAR(g); DistMatrix<T,VC, STAR> X1_VC_STAR(g); DistMatrix<T,MC, STAR> X1_MC_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionRight( X, XL, XR, 0 ); while( XR.Width() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); X1_MC_STAR.AlignWith( X0 ); L10Trans_MR_STAR.AlignWith( X0 ); X1_VC_STAR.AlignWith( X1 ); //--------------------------------------------------------------------// X1_MC_STAR = X1; L10Trans_MR_STAR.TransposeFrom( L10 ); LocalGemm ( NORMAL, TRANSPOSE, T(1), X1_MC_STAR, L10Trans_MR_STAR, T(1), X0 ); L11_STAR_STAR = L11; X1_VC_STAR = X1_MC_STAR; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_VC_STAR ); X1 = X1_VC_STAR; //--------------------------------------------------------------------// X1_MC_STAR.FreeAlignments(); L10Trans_MR_STAR.FreeAlignments(); X1_VC_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmRLN ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmRLN"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,MR, STAR> L10Trans_MR_STAR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionLeft( X, XL, XR, 0 ); while( XL.Width() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionLeft ( XL, /**/ XR, X0, X1, /**/ X2 ); X1Trans_STAR_MC.AlignWith( X0 ); L10Trans_MR_STAR.AlignWith( X0 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X1_VC_STAR, checkIfSingular ); // X0[MC,MR] -= X1[MC,* ] L10[*,MR] // = X1^T[* ,MC] L10^T[MR,* ] X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); L10Trans_MR_STAR.TransposeFrom( L10 ); LocalGemm ( TRANSPOSE, TRANSPOSE, F(-1), X1Trans_STAR_MC, L10Trans_MR_STAR, F(1), X0 ); //--------------------------------------------------------------------// X1Trans_STAR_MC.FreeAlignments(); L10Trans_MR_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionLeft ( XL, /**/ XR, X0, /**/ X1, X2 ); } #ifndef RELEASE PopCallStack(); #endif }