void BackwardSingle ( const DistMatrix<F,VC,STAR>& L, DistMatrix<F,VC,STAR>& X, bool conjugate=false ) { const Grid& g = L.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); DistMatrix<F,STAR,STAR> D(g), L11_STAR_STAR(g), Z1_STAR_STAR(g); FormDiagonalBlocks( L, D, conjugate ); const Int m = L.Height(); const Int n = L.Width(); const Int numRHS = X.Width(); const Int bsize = Blocksize(); const Int kLast = LastOffset( n, bsize ); for( Int k=kLast; k>=0; k-=bsize ) { const Int nb = Min(bsize,n-k); const Range<Int> ind1(k,k+nb), ind2(k+nb,m); auto L11Trans_STAR_STAR = D( IR(0,nb), ind1 ); auto L21 = L( ind2, ind1 ); auto X1 = X( ind1, IR(0,numRHS) ); auto X2 = X( ind2, IR(0,numRHS) ); // X1 -= L21' X2 LocalGemm( orientation, NORMAL, F(-1), L21, X2, Z1_STAR_STAR ); axpy::util::UpdateWithLocalData( F(1), X1, Z1_STAR_STAR ); El::AllReduce( Z1_STAR_STAR, X1.DistComm() ); // X1 := L11^-1 X1 LocalTrsm ( LEFT, UPPER, NORMAL, UNIT, F(1), L11Trans_STAR_STAR, Z1_STAR_STAR ); X1 = Z1_STAR_STAR; } }
inline void TrsmLLTSmall ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F,VC,STAR>& L, DistMatrix<F,VC,STAR>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLLTSmall"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrsmLLT: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( L.ColAlignment() != X.ColAlignment() ) throw std::logic_error("L and X must be aligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F,VC,STAR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,VC,STAR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> Z1_STAR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); //--------------------------------------------------------------------// // X1 -= L21' X2 Zeros( X1.Height(), X1.Width(), Z1_STAR_STAR ); LocalGemm( orientation, NORMAL, F(-1), L21, X2, F(0), Z1_STAR_STAR ); AddInLocalData( X1, Z1_STAR_STAR ); Z1_STAR_STAR.SumOverGrid(); // X1 := L11^-1 X1 L11_STAR_STAR = L11; LocalTrsm ( LEFT, LOWER, orientation, diag, F(1), L11_STAR_STAR, Z1_STAR_STAR, checkIfSingular ); X1 = Z1_STAR_STAR; //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }