void LUN( const Matrix<F>& U, Matrix<F>& X, bool checkIfSingular ) { DEBUG_CSE const Int m = X.Height(); const Int bsize = Blocksize(); const Int kLast = LastOffset( m, bsize ); Int k=kLast, kOld=m; while( true ) { const bool in2x2 = ( k>0 && U.Get(k,k-1) != F(0) ); if( in2x2 ) --k; const Int nb = kOld-k; const Range<Int> ind0( 0, k ), ind1( k, k+nb ); auto U01 = U( ind0, ind1 ); auto U11 = U( ind1, ind1 ); auto X0 = X( ind0, ALL ); auto X1 = X( ind1, ALL ); LUNUnb( U11, X1, checkIfSingular ); Gemm( NORMAL, NORMAL, F(-1), U01, X1, F(1), X0 ); if( k == 0 ) break; kOld = k; k -= Min(bsize,k); } }
void BatchTransposedSparseToCoordinates ( const Matrix<Field>& NTrans, const Matrix<Field>& Y, Matrix<Field>& V, Int blocksize ) { EL_DEBUG_CSE const Int n = NTrans.Height(); const Int numRHS = Y.Width(); // TODO(poulson): Test the relative performance of this branch if( numRHS == 1 ) { TransposedSparseToCoordinates( NTrans, Y, V ); return; } Zeros( V, n, numRHS ); const Int iLast = LastOffset( n, blocksize ); for( Int i=iLast; i>=0; i-=blocksize ) { const Int nb = Min(n-i,blocksize); const Range<Int> ind0(0,i), ind1(i,i+nb); auto NTrans10 = NTrans( ind1, ind0 ); auto NTrans11 = NTrans( ind1, ind1 ); auto Y1 = Y( ind1, ALL ); auto V0 = V( ind0, ALL ); auto V1 = V( ind1, ALL ); BatchTransposedSparseToCoordinatesUnblocked( NTrans11, Y1, V1 ); Gemm( TRANSPOSE, NORMAL, Field(-1), NTrans10, V1, Field(1), V0 ); } }
void LUNMedium ( const AbstractDistMatrix<F>& UPre, AbstractDistMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = UPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> UProx( UPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& U = UProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g); const Int kLast = LastOffset( m, bsize ); Int k=kLast, kOld=m; while( true ) { const bool in2x2 = ( k>0 && U.Get(k,k-1) != F(0) ); if( in2x2 ) --k; const Int nb = kOld-k; const Range<Int> ind0( 0, k ), ind1( k, k+nb ); auto U01 = U( ind0, ind1 ); auto U11 = U( ind1, ind1 ); auto X0 = X( ind0, ALL ); auto X1 = X( ind1, ALL ); U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1Trans_MR_STAR.AlignWith( X0 ); Transpose( X1, X1Trans_MR_STAR ); // X1^T[MR,* ] := X1^T[MR,* ] U11^-T[* ,* ] // = (U11^-1[* ,* ] X1[* ,MR])^T LocalQuasiTrsm ( RIGHT, UPPER, TRANSPOSE, F(1), U11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular ); Transpose( X1Trans_MR_STAR, X1 ); U01_MC_STAR.AlignWith( X0 ); U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm ( NORMAL, TRANSPOSE, F(-1), U01_MC_STAR, X1Trans_MR_STAR, F(1), X0 ); if( k == 0 ) break; kOld = k; k -= Min(bsize,k); } }
void LUNMedium ( UnitOrNonUnit diag, const AbstractDistMatrix<F>& UPre, AbstractDistMatrix<F>& XPre, bool checkIfSingular ) { EL_DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = UPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> UProx( UPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& U = UProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g); const Int kLast = LastOffset( m, bsize ); for( Int k=kLast; k>=0; k-=bsize ) { const Int nb = Min(bsize,m-k); const Range<Int> ind0( 0, k ), ind1( k, k+nb ); auto U01 = U( ind0, ind1 ); auto U11 = U( ind1, ind1 ); auto X0 = X( ind0, ALL ); auto X1 = X( ind1, ALL ); U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1Trans_MR_STAR.AlignWith( X0 ); Transpose( X1, X1Trans_MR_STAR ); // X1^T[MR,* ] := X1^T[MR,* ] U11^-T[* ,* ] // = (U11^-1[* ,* ] X1[* ,MR])^T LocalTrsm ( RIGHT, UPPER, TRANSPOSE, diag, F(1), U11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular ); Transpose( X1Trans_MR_STAR, X1 ); U01_MC_STAR.AlignWith( X0 ); U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm ( NORMAL, TRANSPOSE, F(-1), U01_MC_STAR, X1Trans_MR_STAR, F(1), X0 ); } }
void LUNLarge ( UnitOrNonUnit diag, const AbstractDistMatrix<F>& UPre, AbstractDistMatrix<F>& XPre, bool checkIfSingular ) { EL_DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = UPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> UProx( UPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& U = UProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); const Int kLast = LastOffset( m, bsize ); for( Int k=kLast; k>=0; k-=bsize ) { const Int nb = Min(bsize,m-k); const Range<Int> ind0( 0, k ), ind1( k, k+nb ); auto U01 = U( ind0, ind1 ); auto U11 = U( ind1, ind1 ); auto X0 = X( ind0, ALL ); auto X1 = X( ind1, ALL ); U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1_STAR_VR = X1; // X1[* ,VR] <- X1[MC,MR] // X1[* ,VR] := U11^-1[* ,* ] X1[* ,VR] LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR.AlignWith( X0 ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] U01_MC_STAR.AlignWith( X0 ); U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm( NORMAL, NORMAL, F(-1), U01_MC_STAR, X1_STAR_MR, F(1), X0 ); } }
void Householder ( AbstractDistMatrix<F>& APre, AbstractDistMatrix<F>& householderScalarsPre, AbstractDistMatrix<Base<F>>& signaturePre ) { DEBUG_CSE DEBUG_ONLY(AssertSameGrids( APre, householderScalarsPre, signaturePre )) const Int m = APre.Height(); const Int n = APre.Width(); const Int minDim = Min(m,n); const Int iOff = m-minDim; const Int jOff = n-minDim; DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre ); DistMatrixWriteProxy<F,F,MD,STAR> householderScalarsProx( householderScalarsPre ); DistMatrixWriteProxy<Base<F>,Base<F>,MD,STAR> signatureProx( signaturePre ); auto& A = AProx.Get(); auto& householderScalars = householderScalarsProx.Get(); auto& signature = signatureProx.Get(); householderScalars.Resize( minDim, 1 ); signature.Resize( minDim, 1 ); const Int bsize = Blocksize(); const Int kLast = LastOffset( minDim, bsize ); for( Int k=kLast; k>=0; k-=bsize ) { const Int nb = Min(bsize,minDim-k); const Int ki = k + iOff; const Int kj = k + jOff; const Range<Int> ind0Vert( 0, ki ), ind1( k, k+nb ), ind1Vert( ki, ki+nb ), indL( 0, kj+nb ); auto A0L = A( ind0Vert, indL ); auto A1L = A( ind1Vert, indL ); auto householderScalars1 = householderScalars( ind1, ALL ); auto sig1 = signature( ind1, ALL ); PanelHouseholder( A1L, householderScalars1, sig1 ); ApplyQ( RIGHT, ADJOINT, A1L, householderScalars1, sig1, A0L ); } }
void Householder ( Matrix<F>& A, Matrix<F>& householderScalars, Matrix<Base<F>>& signature ) { DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); const Int minDim = Min(m,n); const Int iOff = m-minDim; const Int jOff = n-minDim; householderScalars.Resize( minDim, 1 ); signature.Resize( minDim, 1 ); const Int bsize = Blocksize(); const Int kLast = LastOffset( minDim, bsize ); for( Int k=kLast; k>=0; k-=bsize ) { const Int nb = Min(bsize,minDim-k); const Int ki = k + iOff; const Int kj = k + jOff; const Range<Int> ind0Vert( 0, ki ), ind1( k, k+nb ), ind1Vert( ki, ki+nb ), indL( 0, kj+nb ); auto A0L = A( ind0Vert, indL ); auto A1L = A( ind1Vert, indL ); auto householderScalars1 = householderScalars( ind1, ALL ); auto sig1 = signature( ind1, ALL ); PanelHouseholder( A1L, householderScalars1, sig1 ); ApplyQ( RIGHT, ADJOINT, A1L, householderScalars1, sig1, A0L ); } }
void BackwardSingle ( const DistMatrix<F,VC,STAR>& L, DistMatrix<F,VC,STAR>& X, bool conjugate=false ) { const Grid& g = L.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); DistMatrix<F,STAR,STAR> D(g), L11_STAR_STAR(g), Z1_STAR_STAR(g); FormDiagonalBlocks( L, D, conjugate ); const Int m = L.Height(); const Int n = L.Width(); const Int numRHS = X.Width(); const Int bsize = Blocksize(); const Int kLast = LastOffset( n, bsize ); for( Int k=kLast; k>=0; k-=bsize ) { const Int nb = Min(bsize,n-k); const Range<Int> ind1(k,k+nb), ind2(k+nb,m); auto L11Trans_STAR_STAR = D( IR(0,nb), ind1 ); auto L21 = L( ind2, ind1 ); auto X1 = X( ind1, IR(0,numRHS) ); auto X2 = X( ind2, IR(0,numRHS) ); // X1 -= L21' X2 LocalGemm( orientation, NORMAL, F(-1), L21, X2, Z1_STAR_STAR ); axpy::util::UpdateWithLocalData( F(1), X1, Z1_STAR_STAR ); El::AllReduce( Z1_STAR_STAR, X1.DistComm() ); // X1 := L11^-1 X1 LocalTrsm ( LEFT, UPPER, NORMAL, UNIT, F(1), L11Trans_STAR_STAR, Z1_STAR_STAR ); X1 = Z1_STAR_STAR; } }
DEBUG_ONLY(CSE cse("trsm::LUNLarge")) const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = UPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> UProx( UPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& U = UProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); const Int kLast = LastOffset( m, bsize ); for( Int k=kLast; k>=0; k-=bsize ) { const Int nb = Min(bsize,m-k); const Range<Int> ind0( 0, k ), ind1( k, k+nb ); auto U01 = U( ind0, ind1 ); auto U11 = U( ind1, ind1 ); auto X0 = X( ind0, ALL ); auto X1 = X( ind1, ALL ); U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1_STAR_VR = X1; // X1[* ,VR] <- X1[MC,MR]
void LUNLarge ( const ElementalMatrix<F>& UPre, ElementalMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = UPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> UProx( UPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& U = UProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); const Int kLast = LastOffset( m, bsize ); Int k=kLast, kOld=m; while( true ) { const bool in2x2 = ( k>0 && U.Get(k,k-1) != F(0) ); if( in2x2 ) --k; const Int nb = kOld-k; const Range<Int> ind0( 0, k ), ind1( k, k+nb ); auto U01 = U( ind0, ind1 ); auto U11 = U( ind1, ind1 ); auto X0 = X( ind0, ALL ); auto X1 = X( ind1, ALL ); U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1_STAR_VR = X1; // X1[* ,VR] <- X1[MC,MR] // X1[* ,VR] := U11^-1[* ,* ] X1[* ,VR] LocalQuasiTrsm ( LEFT, UPPER, NORMAL, F(1), U11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR.AlignWith( X0 ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] U01_MC_STAR.AlignWith( X0 ); U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm( NORMAL, NORMAL, F(-1), U01_MC_STAR, X1_STAR_MR, F(1), X0 ); if( k == 0 ) break; kOld = k; k -= Min(bsize,k); } }