inline void TrsvLT ( Orientation orientation, UnitOrNonUnit diag, const DistMatrix<F>& L, DistMatrix<F>& x ) { #ifndef RELEASE PushCallStack("internal::TrsvLT"); if( L.Grid() != x.Grid() ) throw std::logic_error("{L,x} must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrsvLT expects a (conjugate-)transpose option"); if( L.Height() != L.Width() ) throw std::logic_error("L must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); if( L.Width() != xLength ) throw std::logic_error("Nonconformal TrsvLT"); #endif const Grid& g = L.Grid(); if( x.Width() == 1 ) { // Matrix views DistMatrix<F> L10(g), L11(g); DistMatrix<F> xT(g), x0(g), xB(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,MC, STAR> x1_MC_STAR(g); DistMatrix<F,MC, MR > z1(g); DistMatrix<F,MR, MC > z1_MR_MC(g); DistMatrix<F,MR, STAR> z_MR_STAR(g); // Views of z[MR,* ] DistMatrix<F,MR,STAR> z0_MR_STAR(g), z1_MR_STAR(g); z_MR_STAR.AlignWith( L ); Zeros( x.Height(), 1, z_MR_STAR ); // Start the algorithm PartitionUp ( x, xT, xB, 0 ); while( xT.Height() > 0 ) { RepartitionUp ( xT, x0, x1, /**/ /**/ xB, x2 ); const int n0 = x0.Height(); const int n1 = x1.Height(); L10.LockedView( L, n0, 0, n1, n0 ); L11.LockedView( L, n0, n0, n1, n1 ); z0_MR_STAR.View( z_MR_STAR, 0, 0, n0, 1 ); z1_MR_STAR.View( z_MR_STAR, n0, 0, n1, 1 ); x1_MC_STAR.AlignWith( L10 ); z1.AlignWith( x1 ); //----------------------------------------------------------------// if( x2.Height() != 0 ) { z1_MR_MC.SumScatterFrom( z1_MR_STAR ); z1 = z1_MR_MC; Axpy( F(1), z1, x1 ); } x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, orientation, diag, L11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_MC_STAR = x1_STAR_STAR; Gemv ( orientation, F(-1), L10.LockedLocalMatrix(), x1_MC_STAR.LockedLocalMatrix(), F(1), z0_MR_STAR.LocalMatrix() ); //----------------------------------------------------------------// x1_MC_STAR.FreeAlignments(); z1.FreeAlignments(); SlidePartitionUp ( xT, x0, /**/ /**/ x1, xB, x2 ); } } else { // Matrix views DistMatrix<F> L10(g), L11(g); DistMatrix<F> xL(g), xR(g), x0(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,STAR,MC > x1_STAR_MC(g); DistMatrix<F,STAR,MR > z_STAR_MR(g); // Views of z[* ,MR], which will store updates to x DistMatrix<F,STAR,MR> z0_STAR_MR(g), z1_STAR_MR(g); z_STAR_MR.AlignWith( L ); Zeros( 1, x.Width(), z_STAR_MR ); // Start the algorithm PartitionLeft( x, xL, xR, 0 ); while( xL.Width() > 0 ) { RepartitionLeft ( xL, /**/ xR, x0, x1, /**/ x2 ); const int n0 = x0.Width(); const int n1 = x1.Width(); L10.LockedView( L, n0, 0, n1, n0 ); L11.LockedView( L, n0, n0, n1, n1 ); z0_STAR_MR.View( z_STAR_MR, 0, 0, 1, n0 ); z1_STAR_MR.View( z_STAR_MR, 0, n0, 1, n1 ); x1_STAR_MC.AlignWith( L10 ); //----------------------------------------------------------------// if( x2.Width() != 0 ) x1.SumScatterUpdate( F(1), z1_STAR_MR ); x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, orientation, diag, L11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_STAR_MC = x1_STAR_STAR; Gemv ( orientation, F(-1), L10.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), F(1), z0_STAR_MR.LocalMatrix() ); //----------------------------------------------------------------// x1_STAR_MC.FreeAlignments(); SlidePartitionLeft ( xL, /**/ xR, x0, /**/ x1, x2 ); } } #ifndef RELEASE PopCallStack(); #endif }
Int ADMM ( const Matrix<Real>& A, const Matrix<Real>& b, const Matrix<Real>& c, Matrix<Real>& z, const ADMMCtrl<Real>& ctrl ) { DEBUG_ONLY(CSE cse("lp::direct::ADMM")) // Cache a custom partially-pivoted LU factorization of // | rho*I A^H | = | B11 B12 | // | A 0 | | B21 B22 | // by (justifiably) avoiding pivoting in the first n steps of // the factorization, so that // [I,rho*I] = lu(rho*I). // The factorization would then proceed with // B21 := B21 U11^{-1} = A (rho*I)^{-1} = A/rho // B12 := L11^{-1} B12 = I A^H = A^H. // The Schur complement would then be // B22 := B22 - B21 B12 = 0 - (A*A^H)/rho. // We then factor said matrix with LU with partial pivoting and // swap the necessary rows of B21 in order to implicitly commute // the row pivots with the Gauss transforms in the manner standard // for GEPP. Unless A A' is singular, pivoting should not be needed, // as Cholesky factorization of the negative matrix should be valid. // // The result is the factorization // | I 0 | | rho*I A^H | = | I 0 | | rho*I U12 |, // | 0 P22 | | A 0 | | L21 L22 | | 0 U22 | // where [L22,U22] are stored within B22. Matrix<Real> U12, L21, B22, bPiv; Adjoint( A, U12 ); L21 = A; L21 *= 1/ctrl.rho; Herk( LOWER, NORMAL, -1/ctrl.rho, A, B22 ); MakeHermitian( LOWER, B22 ); // TODO: Replace with sparse-direct Cholesky version? Matrix<Int> rowPiv2; LU( B22, rowPiv2 ); ApplyRowPivots( L21, rowPiv2 ); bPiv = b; ApplyRowPivots( bPiv, rowPiv2 ); // Possibly form the inverse of L22 U22 Matrix<Real> X22; if( ctrl.inv ) { X22 = B22; MakeTrapezoidal( LOWER, X22 ); FillDiagonal( X22, Real(1) ); TriangularInverse( LOWER, UNIT, X22 ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, Real(1), B22, X22 ); } Int numIter=0; const Int m = A.Height(); const Int n = A.Width(); Matrix<Real> g, xTmp, y, t; Zeros( g, m+n, 1 ); PartitionDown( g, xTmp, y, n ); Matrix<Real> x, u, zOld, xHat; Zeros( z, n, 1 ); Zeros( u, n, 1 ); Zeros( t, n, 1 ); while( numIter < ctrl.maxIter ) { zOld = z; // Find x from // | rho*I A^H | | x | = | rho*(z-u)-c | // | A 0 | | y | | b | // via our cached custom factorization: // // |x| = inv(U) inv(L) P' |rho*(z-u)-c| // |y| |b | // = |rho*I U12|^{-1} |I 0 | |I 0 | |rho*(z-u)-c| // = |0 U22| |L21 L22| |0 P22'| |b | // = " " |rho*(z-u)-c| // | P22' b | xTmp = z; xTmp -= u; xTmp *= ctrl.rho; xTmp -= c; y = bPiv; Gemv( NORMAL, Real(-1), L21, xTmp, Real(1), y ); if( ctrl.inv ) { Gemv( NORMAL, Real(1), X22, y, t ); y = t; } else { Trsv( LOWER, NORMAL, UNIT, B22, y ); Trsv( UPPER, NORMAL, NON_UNIT, B22, y ); } Gemv( NORMAL, Real(-1), U12, y, Real(1), xTmp ); xTmp *= 1/ctrl.rho; // xHat := alpha*x + (1-alpha)*zOld xHat = xTmp; xHat *= ctrl.alpha; Axpy( 1-ctrl.alpha, zOld, xHat ); // z := pos(xHat+u) z = xHat; z += u; LowerClip( z, Real(0) ); // u := u + (xHat-z) u += xHat; u -= z; const Real objective = Dot( c, xTmp ); // rNorm := || x - z ||_2 t = xTmp; t -= z; const Real rNorm = FrobeniusNorm( t ); // sNorm := |rho| || z - zOld ||_2 t = z; t -= zOld; const Real sNorm = Abs(ctrl.rho)*FrobeniusNorm( t ); const Real epsPri = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Max(FrobeniusNorm(xTmp),FrobeniusNorm(z)); const Real epsDual = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Abs(ctrl.rho)*FrobeniusNorm(u); if( ctrl.print ) { t = xTmp; LowerClip( t, Real(0) ); t -= xTmp; const Real clipDist = FrobeniusNorm( t ); cout << numIter << ": " << "||x-z||_2=" << rNorm << ", " << "epsPri=" << epsPri << ", " << "|rho| ||z-zOld||_2=" << sNorm << ", " << "epsDual=" << epsDual << ", " << "||x-Pos(x)||_2=" << clipDist << ", " << "c'x=" << objective << endl; } if( rNorm < epsPri && sNorm < epsDual ) break; ++numIter; } if( ctrl.maxIter == numIter ) cout << "ADMM failed to converge" << endl; x = xTmp; return numIter; }