void LLNMedium ( const AbstractDistMatrix<F>& LPre, AbstractDistMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = LPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> LProx( LPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& L = LProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g); for( Int k=0; k<m; k+=bsize ) { const Int nbProp = Min(bsize,m-k); const bool in2x2 = ( k+nbProp<m && L.Get(k+nbProp-1,k+nbProp) != F(0) ); const Int nb = ( in2x2 ? nbProp+1 : nbProp ); const Range<Int> ind1( k, k+nb ), ind2( k+nb, m ); auto L11 = L( ind1, ind1 ); auto L21 = L( ind2, ind1 ); auto X1 = X( ind1, ALL ); auto X2 = X( ind2, ALL ); L11_STAR_STAR = L11; // L11[* ,* ] <- L11[MC,MR] X1Trans_MR_STAR.AlignWith( X2 ); Transpose( X1, X1Trans_MR_STAR ); // X1^T[MR,* ] := X1^T[MR,* ] L11^-T[* ,* ] // = (L11^-1[* ,* ] X1[* ,MR])^T LocalQuasiTrsm ( RIGHT, LOWER, TRANSPOSE, F(1), L11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular ); Transpose( X1Trans_MR_STAR, X1 ); L21_MC_STAR.AlignWith( X2 ); L21_MC_STAR = L21; // L21[MC,* ] <- L21[MC,MR] // X2[MC,MR] -= L21[MC,* ] X1[* ,MR] LocalGemm ( NORMAL, TRANSPOSE, F(-1), L21_MC_STAR, X1Trans_MR_STAR, F(1), X2 ); } }
void LLNLarge ( const AbstractDistMatrix<F>& LPre, AbstractDistMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = LPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> LProx( LPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& L = LProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); for( Int k=0; k<m; k+=bsize ) { const Int nbProp = Min(bsize,m-k); const bool in2x2 = ( k+nbProp<m && L.Get(k+nbProp-1,k+nbProp) != F(0) ); const Int nb = ( in2x2 ? nbProp+1 : nbProp ); const Range<Int> ind1( k, k+nb ), ind2( k+nb, m ); auto L11 = L( ind1, ind1 ); auto L21 = L( ind2, ind1 ); auto X1 = X( ind1, ALL ); auto X2 = X( ind2, ALL ); // X1[* ,VR] := L11^-1[* ,* ] X1[* ,VR] L11_STAR_STAR = L11; X1_STAR_VR = X1; LocalQuasiTrsm ( LEFT, LOWER, NORMAL, F(1), L11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR.AlignWith( X2 ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] L21_MC_STAR.AlignWith( X2 ); L21_MC_STAR = L21; // L21[MC,* ] <- L21[MC,MR] // X2[MC,MR] -= L21[MC,* ] X1[* ,MR] LocalGemm( NORMAL, NORMAL, F(-1), L21_MC_STAR, X1_STAR_MR, F(1), X2 ); } }
void LLN ( UnitOrNonUnit diag, F alpha, const AbstractDistMatrix<F>& LPre, AbstractDistMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int n = LPre.Height(); const Int bsize = Blocksize(); const Grid& g = LPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> LProx( LPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& L = LProx.GetLocked(); auto& X = XProx.Get(); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g), X11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > X10_STAR_MR(g), X11_STAR_MR(g); DistMatrix<F,STAR,VR > X10_STAR_VR(g); ScaleTrapezoid( alpha, LOWER, X ); for( Int k=0; k<n; k+=bsize ) { const Int nb = Min(bsize,n-k); const Range<Int> ind0( 0, k ), ind1( k, k+nb ), ind2( k+nb, n ); auto L11 = L( ind1, ind1 ); auto L21 = L( ind2, ind1 ); auto X10 = X( ind1, ind0 ); auto X11 = X( ind1, ind1 ); auto X20 = X( ind2, ind0 ); auto X21 = X( ind2, ind1 ); L11_STAR_STAR = L11; X11_STAR_STAR = X11; X10_STAR_VR = X10; LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X10_STAR_VR, checkIfSingular ); Trstrm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X11_STAR_STAR, checkIfSingular ); X11 = X11_STAR_STAR; X11_STAR_MR.AlignWith( X21 ); X11_STAR_MR = X11_STAR_STAR; MakeTrapezoidal( LOWER, X11_STAR_MR ); X10_STAR_MR.AlignWith( X20 ); X10_STAR_MR = X10_STAR_VR; X10 = X10_STAR_MR; L21_MC_STAR.AlignWith( X20 ); L21_MC_STAR = L21; LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X10_STAR_MR, F(1), X20 ); LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X11_STAR_MR, F(1), X21 ); } }
inline void ALM ( const ElementalMatrix<F>& MPre, ElementalMatrix<F>& LPre, ElementalMatrix<F>& SPre, const RPCACtrl<Base<F>>& ctrl ) { DistMatrixReadProxy<F,F,MC,MR> MProx( MPre ); DistMatrixWriteProxy<F,F,MC,MR> LProx( LPre ), SProx( SPre ); auto& M = MProx.GetLocked(); auto& L = LProx.Get(); auto& S = SProx.Get(); typedef Base<F> Real; const Int m = M.Height(); const Int n = M.Width(); const int commRank = mpi::Rank( M.Grid().Comm() ); // If tau is unspecified, set it to 1/sqrt(max(m,n)) const Base<F> tau = ( ctrl.tau <= Real(0) ? Real(1) / sqrt(Real(Max(m,n))) : ctrl.tau ); if( ctrl.tol <= Real(0) ) LogicError("tol cannot be non-positive"); const Base<F> tol = ctrl.tol; const double startTime = mpi::Time(); DistMatrix<F> Y( M ); NormalizeEntries( Y ); const Real twoNorm = TwoNorm( Y ); const Real maxNorm = MaxNorm( Y ); const Real infNorm = maxNorm / tau; const Real dualNorm = Max( twoNorm, infNorm ); Y *= F(1)/dualNorm; // If beta is unspecified, set it to 1 / 2 || sign(M) ||_2 Base<F> beta = ( ctrl.beta <= Real(0) ? Real(1) / (2*twoNorm) : ctrl.beta ); const Real frobM = FrobeniusNorm( M ); const Real maxM = MaxNorm( M ); if( ctrl.progress && commRank == 0 ) cout << "|| M ||_F = " << frobM << "\n" << "|| M ||_max = " << maxM << endl; Zeros( L, m, n ); Zeros( S, m, n ); Int numIts=0, numPrimalIts=0; DistMatrix<F> LLast( M.Grid() ), SLast( M.Grid() ), E( M.Grid() ); while( true ) { ++numIts; Int rank, numNonzeros; while( true ) { ++numPrimalIts; LLast = L; SLast = S; // ST_{tau/beta}(M - L + Y/beta) S = M; S -= L; Axpy( F(1)/beta, Y, S ); SoftThreshold( S, tau/beta ); numNonzeros = ZeroNorm( S ); // SVT_{1/beta}(M - S + Y/beta) L = M; L -= S; Axpy( F(1)/beta, Y, L ); if( ctrl.usePivQR ) rank = SVT( L, Real(1)/beta, ctrl.numPivSteps ); else rank = SVT( L, Real(1)/beta ); LLast -= L; SLast -= S; const Real frobLDiff = FrobeniusNorm( LLast ); const Real frobSDiff = FrobeniusNorm( SLast ); if( frobLDiff/frobM < tol && frobSDiff/frobM < tol ) { if( ctrl.progress && commRank == 0 ) cout << "Primal loop converged: " << mpi::Time()-startTime << " total secs" << endl; break; } else { if( ctrl.progress && commRank == 0 ) cout << " " << numPrimalIts << ": \n" << " || Delta L ||_F / || M ||_F = " << frobLDiff/frobM << "\n" << " || Delta S ||_F / || M ||_F = " << frobSDiff/frobM << "\n" << " rank=" << rank << ", numNonzeros=" << numNonzeros << ", " << mpi::Time()-startTime << " total secs" << endl; } } // E := M - (L + S) E = M; E -= L; E -= S; const Real frobE = FrobeniusNorm( E ); if( frobE/frobM <= tol ) { if( ctrl.progress && commRank == 0 ) cout << "Converged after " << numIts << " iterations and " << numPrimalIts << " primal iterations with rank=" << rank << ", numNonzeros=" << numNonzeros << " and " << "|| E ||_F / || M ||_F = " << frobE/frobM << ", " << mpi::Time()-startTime << " total secs" << endl; break; } else if( numIts >= ctrl.maxIts ) { if( ctrl.progress && commRank == 0 ) cout << "Aborting after " << numIts << " iterations and " << mpi::Time()-startTime << " total secs" << endl; break; } else { if( ctrl.progress && commRank == 0 ) cout << numPrimalIts << ": || E ||_F / || M ||_F = " << frobE/frobM << ", rank=" << rank << ", numNonzeros=" << numNonzeros << ", " << mpi::Time()-startTime << " total secs" << endl; } // Y := Y + beta E Axpy( beta, E, Y ); beta *= ctrl.rho; } }
inline void ADMM ( const ElementalMatrix<F>& MPre, ElementalMatrix<F>& LPre, ElementalMatrix<F>& SPre, const RPCACtrl<Base<F>>& ctrl ) { DistMatrixReadProxy<F,F,MC,MR> MProx( MPre ); DistMatrixWriteProxy<F,F,MC,MR> LProx( LPre ), SProx( SPre ); auto& M = MProx.GetLocked(); auto& L = LProx.Get(); auto& S = SProx.Get(); typedef Base<F> Real; const Int m = M.Height(); const Int n = M.Width(); const int commRank = mpi::Rank( M.Grid().Comm() ); // If tau is not specified, then set it to 1/sqrt(max(m,n)) const Base<F> tau = ( ctrl.tau <= Real(0) ? Real(1)/sqrt(Real(Max(m,n))) : ctrl.tau ); if( ctrl.beta <= Real(0) ) LogicError("beta cannot be non-positive"); if( ctrl.tol <= Real(0) ) LogicError("tol cannot be non-positive"); const Base<F> beta = ctrl.beta; const Base<F> tol = ctrl.tol; const double startTime = mpi::Time(); DistMatrix<F> E( M.Grid() ), Y( M.Grid() ); Zeros( Y, m, n ); const Real frobM = FrobeniusNorm( M ); const Real maxM = MaxNorm( M ); if( ctrl.progress && commRank == 0 ) cout << "|| M ||_F = " << frobM << "\n" << "|| M ||_max = " << maxM << endl; Zeros( L, m, n ); Zeros( S, m, n ); Int numIts = 0; while( true ) { ++numIts; // ST_{tau/beta}(M - L + Y/beta) S = M; S -= L; Axpy( F(1)/beta, Y, S ); SoftThreshold( S, tau/beta ); const Int numNonzeros = ZeroNorm( S ); // SVT_{1/beta}(M - S + Y/beta) L = M; L -= S; Axpy( F(1)/beta, Y, L ); Int rank; if( ctrl.usePivQR ) rank = SVT( L, Real(1)/beta, ctrl.numPivSteps ); else rank = SVT( L, Real(1)/beta ); // E := M - (L + S) E = M; E -= L; E -= S; const Real frobE = FrobeniusNorm( E ); if( frobE/frobM <= tol ) { if( ctrl.progress && commRank == 0 ) cout << "Converged after " << numIts << " iterations " << " with rank=" << rank << ", numNonzeros=" << numNonzeros << " and " << "|| E ||_F / || M ||_F = " << frobE/frobM << ", and " << mpi::Time()-startTime << " total secs" << endl; break; } else if( numIts >= ctrl.maxIts ) { if( ctrl.progress && commRank == 0 ) cout << "Aborting after " << numIts << " iterations and " << mpi::Time()-startTime << " total secs" << endl; break; } else { if( ctrl.progress && commRank == 0 ) cout << numIts << ": || E ||_F / || M ||_F = " << frobE/frobM << ", rank=" << rank << ", numNonzeros=" << numNonzeros << ", " << mpi::Time()-startTime << " total secs" << endl; } // Y := Y + beta E Axpy( beta, E, Y ); } }