void Tikhonov ( Orientation orientation, const Matrix<F>& A, const Matrix<F>& B, const Matrix<F>& G, Matrix<F>& X, TikhonovAlg alg ) { DEBUG_CSE const bool normal = ( orientation==NORMAL ); const Int m = ( normal ? A.Height() : A.Width() ); const Int n = ( normal ? A.Width() : A.Height() ); if( G.Width() != n ) LogicError("Tikhonov matrix was the wrong width"); if( orientation == TRANSPOSE && IsComplex<F>::value ) LogicError("Transpose version of complex Tikhonov not yet supported"); if( m >= n ) { Matrix<F> Z; if( alg == TIKHONOV_CHOLESKY ) { if( orientation == NORMAL ) Herk( LOWER, ADJOINT, Base<F>(1), A, Z ); else Herk( LOWER, NORMAL, Base<F>(1), A, Z ); Herk( LOWER, ADJOINT, Base<F>(1), G, Base<F>(1), Z ); Cholesky( LOWER, Z ); } else { const Int mG = G.Height(); Zeros( Z, m+mG, n ); auto ZT = Z( IR(0,m), IR(0,n) ); auto ZB = Z( IR(m,m+mG), IR(0,n) ); if( orientation == NORMAL ) ZT = A; else Adjoint( A, ZT ); ZB = G; qr::ExplicitTriang( Z ); } if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, F(1), A, B, X ); else Gemm( NORMAL, NORMAL, F(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else { LogicError("This case not yet supported"); } }
inline void CholeskyUVar2( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("hpd_inverse::CholeskyUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); //--------------------------------------------------------------------// Cholesky( UPPER, A11 ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11, A01 ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A12 ); Herk( UPPER, NORMAL, F(1), A01, F(1), A00 ); Gemm( NORMAL, NORMAL, F(-1), A01, A12, F(1), A02 ); Herk( UPPER, ADJOINT, F(-1), A12, F(1), A22 ); Trsm( RIGHT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A01 ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, F(-1), A11, A12 ); TriangularInverse( UPPER, NON_UNIT, A11 ); Trtrmm( ADJOINT, UPPER, A11 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
void QP ( const Matrix<Real>& A, const Matrix<Real>& B, Matrix<Real>& X, const qp::direct::Ctrl<Real>& ctrl ) { DEBUG_CSE const Int n = A.Width(); const Int k = B.Width(); Matrix<Real> Q, AHat, bHat, c; Herk( LOWER, ADJOINT, Real(1), A, Q ); Zeros( AHat, 0, n ); Zeros( bHat, 0, 1 ); Zeros( X, n, k ); Matrix<Real> y, z; for( Int j=0; j<k; ++j ) { auto x = X( ALL, IR(j) ); auto b = B( ALL, IR(j) ); Zeros( c, n, 1 ); Gemv( ADJOINT, Real(-1), A, b, Real(0), c ); El::QP( Q, AHat, bHat, c, x, y, z, ctrl ); } }
UnitaryCoherence( DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("UnitaryCoherence"); #endif typedef BASE(F) R; const Grid& grid = U.Grid(); const Int n = U.Height(); const Int r = U.Width(); // Z := U U' in n^2 r work DistMatrix<F> Z( grid ); Herk( UPPER, NORMAL, F(1), U, Z ); // Now make Z explicitly Hermitian so that our job is easier MakeHermitian( UPPER, Z ); // Compute the maximum column two-norm squared const Int localWidth = Z.LocalWidth(); const Int localHeight = Z.LocalHeight(); std::vector<R> normsSquared( localWidth ); for( Int jLocal=0; jLocal<localWidth; ++jLocal ) { const R localNorm = blas::Nrm2( localHeight, Z.LockedBuffer(0,jLocal), 1 ); normsSquared[jLocal] = localNorm*localNorm; } mpi::AllReduce( &normsSquared[0], localWidth, grid.ColComm() ); R maxLocalNormSquared = *std::max_element( normsSquared.begin(), normsSquared.end() ); const R maxNormSquared = mpi::AllReduce( maxLocalNormSquared, mpi::MAX, grid.RowComm() ); return (n*maxNormSquared)/r; }
UnitaryCoherence( Matrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("UnitaryCoherence"); #endif typedef BASE(F) R; const Int n = U.Height(); const Int r = U.Width(); // Z := U U' in n^2 r work Matrix<F> Z; Herk( UPPER, NORMAL, F(1), U, Z ); // Now make Z explicitly Hermitian so that our job is easier MakeHermitian( UPPER, Z ); // Compute the maximum column two-norm R maxColNorm = 0; for( Int j=0; j<n; ++j ) { const R colNorm = blas::Nrm2( n, Z.LockedBuffer(0,j), 1 ); maxColNorm = std::max( colNorm, maxColNorm ); } return (n*maxColNorm*maxColNorm)/r; }
Base<F> Coherence( const ElementalMatrix<F>& A ) { DEBUG_ONLY(CSE cse("Coherence")) DistMatrix<F> B( A ); DistMatrix<Base<F>,MR,STAR> norms(B.Grid()); ColumnTwoNorms( B, norms ); DiagonalSolve( RIGHT, NORMAL, norms, B, true ); DistMatrix<F> C(B.Grid()); Identity( C, A.Width(), A.Width() ); Herk( UPPER, ADJOINT, Base<F>(-1), B, Base<F>(1), C ); return HermitianMaxNorm( UPPER, C ); }
Base<F> Coherence( const Matrix<F>& A ) { DEBUG_ONLY(CallStackEntry cse("Coherence")) Matrix<F> B( A ); Matrix<Base<F>> norms; ColumnNorms( B, norms ); DiagonalSolve( RIGHT, NORMAL, norms, B, true ); Matrix<F> C; Identity( C, A.Width(), A.Width() ); Herk( UPPER, ADJOINT, Base<F>(-1), B, Base<F>(1), C ); return HermitianMaxNorm( UPPER, C ); }
Int ADMM ( const Matrix<Real>& A, const Matrix<Real>& B, Matrix<Real>& X, const ADMMCtrl<Real>& ctrl ) { DEBUG_CSE const Real maxReal = limits::Max<Real>(); Matrix<Real> Q, C; Herk( LOWER, ADJOINT, Real(1), A, Q ); Gemm( ADJOINT, NORMAL, Real(-1), A, B, C ); return qp::box::ADMM( Q, C, Real(0), maxReal, X, ctrl ); }
inline void CholeskyLVar2( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyLVar2"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); //--------------------------------------------------------------------// Herk( LOWER, NORMAL, F(-1), A10, F(1), A11 ); CholeskyLVar3Unb( A11 ); Gemm( NORMAL, ADJOINT, F(-1), A20, A10, F(1), A21 ); Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
Int ADMM ( const ElementalMatrix<Real>& APre, const ElementalMatrix<Real>& B, ElementalMatrix<Real>& X, const ADMMCtrl<Real>& ctrl ) { DEBUG_CSE const Real maxReal = limits::Max<Real>(); DistMatrixReadProxy<Real,Real,MC,MR> AProx( APre ); auto& A = AProx.GetLocked(); DistMatrix<Real> Q(A.Grid()), C(A.Grid()); Herk( LOWER, ADJOINT, Real(1), A, Q ); Gemm( ADJOINT, NORMAL, Real(-1), A, B, C ); return qp::box::ADMM( Q, C, Real(0), maxReal, X, ctrl ); }
void QP ( const DistSparseMatrix<Real>& A, const DistMultiVec<Real>& B, DistMultiVec<Real>& X, const qp::direct::Ctrl<Real>& ctrl ) { DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); const Int k = B.Width(); mpi::Comm comm = A.Comm(); DistSparseMatrix<Real> Q(comm), AHat(comm); DistMultiVec<Real> bHat(comm), c(comm); Herk( LOWER, ADJOINT, Real(1), A, Q ); MakeHermitian( LOWER, Q ); Zeros( AHat, 0, n ); Zeros( bHat, 0, 1 ); Zeros( X, n, k ); DistMultiVec<Real> q(comm), y(comm), z(comm); auto& qLoc = q.Matrix(); auto& XLoc = X.Matrix(); auto& BLoc = B.LockedMatrix(); for( Int j=0; j<k; ++j ) { auto xLoc = XLoc( ALL, IR(j) ); auto bLoc = BLoc( ALL, IR(j) ); Zeros( c, n, 1 ); Zeros( q, m, 1 ); qLoc = bLoc; Multiply( ADJOINT, Real(-1), A, q, Real(0), c ); Zeros( q, n, 1 ); qLoc = xLoc; El::QP( Q, AHat, bHat, c, q, y, z, ctrl ); xLoc = qLoc; } }
inline void UVar3( Matrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("cholesky::UVar3"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); //--------------------------------------------------------------------// cholesky::UVar3Unb( A11 ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A12 ); Herk( UPPER, ADJOINT, F(-1), A12, F(1), A22 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
void Covariance( const Matrix<F>& D, Matrix<F>& S ) { DEBUG_CSE const Int numObs = D.Height(); const Int n = D.Width(); // Compute the average column Matrix<F> ones, xMean; Ones( ones, numObs, 1 ); Gemv( TRANSPOSE, F(1)/F(numObs), D, ones, xMean ); // Subtract the mean from each column of D Matrix<F> DDev( D ); for( Int i=0; i<numObs; ++i ) blas::Axpy ( n, F(-1), xMean.LockedBuffer(), 1, DDev.Buffer(i,0), DDev.LDim() ); // Form S := 1/(numObs-1) DDev DDev' Herk( LOWER, ADJOINT, Base<F>(1)/Base<F>(numObs-1), DDev, S ); Conjugate( S ); MakeHermitian( LOWER, S ); }
void QP ( const ElementalMatrix<Real>& APre, const ElementalMatrix<Real>& BPre, ElementalMatrix<Real>& XPre, const qp::direct::Ctrl<Real>& ctrl ) { DEBUG_CSE DistMatrixReadProxy<Real,Real,MC,MR> AProx( APre ), BProx( BPre ); DistMatrixWriteProxy<Real,Real,MC,MR> XProx( XPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& X = XProx.Get(); const Int n = A.Width(); const Int k = B.Width(); const Grid& g = A.Grid(); DistMatrix<Real> Q(g), AHat(g), bHat(g), c(g); Herk( LOWER, ADJOINT, Real(1), A, Q ); Zeros( AHat, 0, n ); Zeros( bHat, 0, 1 ); Zeros( X, n, k ); DistMatrix<Real> y(g), z(g); for( Int j=0; j<k; ++j ) { auto x = X( ALL, IR(j) ); auto b = B( ALL, IR(j) ); Zeros( c, n, 1 ); Gemv( ADJOINT, Real(-1), A, b, Real(0), c ); El::QP( Q, AHat, bHat, c, x, y, z, ctrl ); } }
void Covariance ( const ElementalMatrix<F>& DPre, ElementalMatrix<F>& SPre ) { DEBUG_CSE DistMatrixReadProxy<F,F,MC,MR> DProx( DPre ); DistMatrixWriteProxy<F,F,MC,MR> SProx( SPre ); auto& D = DProx.GetLocked(); auto& S = SProx.Get(); const Grid& g = D.Grid(); const Int numObs = D.Height(); // Compute the average column DistMatrix<F> ones(g), xMean(g); Ones( ones, numObs, 1 ); Gemv( TRANSPOSE, F(1)/F(numObs), D, ones, xMean ); DistMatrix<F,MR,STAR> xMean_MR(g); xMean_MR.AlignWith( D ); xMean_MR = xMean; // Subtract the mean from each column of D DistMatrix<F> DDev( D ); for( Int iLoc=0; iLoc<DDev.LocalHeight(); ++iLoc ) blas::Axpy ( DDev.LocalWidth(), F(-1), xMean_MR.LockedBuffer(), 1, DDev.Buffer(iLoc,0), DDev.LDim() ); // Form S := 1/(numObs-1) DDev DDev' Herk( LOWER, ADJOINT, Base<F>(1)/Base<F>(numObs-1), DDev, S ); Conjugate( S ); MakeHermitian( LOWER, S ); }
QDWHInfo QDWHInner( Matrix<F>& A, Base<F> sMinUpper, const QDWHCtrl& ctrl ) { EL_DEBUG_CSE typedef Base<F> Real; typedef Complex<Real> Cpx; const Int m = A.Height(); const Int n = A.Width(); const Real oneThird = Real(1)/Real(3); if( m < n ) LogicError("Height cannot be less than width"); QDWHInfo info; QRCtrl<Base<F>> qrCtrl; qrCtrl.colPiv = ctrl.colPiv; const Real eps = limits::Epsilon<Real>(); const Real tol = 5*eps; const Real cubeRootTol = Pow(tol,oneThird); Real L = sMinUpper / Sqrt(Real(n)); Real frobNormADiff; Matrix<F> ALast, ATemp, C; Matrix<F> Q( m+n, n ); auto QT = Q( IR(0,m ), ALL ); auto QB = Q( IR(m,END), ALL ); while( info.numIts < ctrl.maxIts ) { ALast = A; Real L2; Cpx dd, sqd; if( Abs(1-L) < tol ) { L2 = 1; dd = 0; sqd = 1; } else { L2 = L*L; dd = Pow( 4*(1-L2)/(L2*L2), oneThird ); sqd = Sqrt( Real(1)+dd ); } const Cpx arg = Real(8) - Real(4)*dd + Real(8)*(2-L2)/(L2*sqd); const Real a = (sqd + Sqrt(arg)/Real(2)).real(); const Real b = (a-1)*(a-1)/4; const Real c = a+b-1; const Real alpha = a-b/c; const Real beta = b/c; L = L*(a+b*L2)/(1+c*L2); if( c > 100 ) { // // The standard QR-based algorithm // QT = A; QT *= Sqrt(c); MakeIdentity( QB ); qr::ExplicitUnitary( Q, true, qrCtrl ); Gemm( NORMAL, ADJOINT, F(alpha/Sqrt(c)), QT, QB, F(beta), A ); ++info.numQRIts; } else { // // Use faster Cholesky-based algorithm since A is well-conditioned // Identity( C, n, n ); Herk( LOWER, ADJOINT, c, A, Real(1), C ); Cholesky( LOWER, C ); ATemp = A; Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp ); A *= beta; Axpy( alpha, ATemp, A ); ++info.numCholIts; } ++info.numIts; ALast -= A; frobNormADiff = FrobeniusNorm( ALast ); if( frobNormADiff <= cubeRootTol && Abs(1-L) <= tol ) break; } return info; }
inline void RLHF ( Conjugation conjugation, int offset, const Matrix<Complex<R> >& H, const Matrix<Complex<R> >& t, Matrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLHF"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); #endif typedef Complex<R> C; Matrix<C> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<C> ALeft; Matrix<C> tT, t0, tB, t1, t2; Matrix<C> SInv, Z; LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; LockedView ( HPan, H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanHeight ); View( ALeft, A, 0, 0, A.Height(), HPanWidth ); Zeros( ALeft.Height(), HPan.Height(), Z ); Zeros( HPan.Height(), HPan.Height(), SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonal( RIGHT, offset, HPanCopy, C(1) ); Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, ADJOINT, C(1), ALeft, HPanCopy, C(0), Z ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z ); Gemm( NORMAL, NORMAL, C(-1), Z, HPanCopy, C(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
Int ADMM ( const Matrix<Real>& A, const Matrix<Real>& b, const Matrix<Real>& c, Matrix<Real>& z, const ADMMCtrl<Real>& ctrl ) { EL_DEBUG_CSE // Cache a custom partially-pivoted LU factorization of // | rho*I A^H | = | B11 B12 | // | A 0 | | B21 B22 | // by (justifiably) avoiding pivoting in the first n steps of // the factorization, so that // [I,rho*I] = lu(rho*I). // The factorization would then proceed with // B21 := B21 U11^{-1} = A (rho*I)^{-1} = A/rho // B12 := L11^{-1} B12 = I A^H = A^H. // The Schur complement would then be // B22 := B22 - B21 B12 = 0 - (A*A^H)/rho. // We then factor said matrix with LU with partial pivoting and // swap the necessary rows of B21 in order to implicitly commute // the row pivots with the Gauss transforms in the manner standard // for GEPP. Unless A A' is singular, pivoting should not be needed, // as Cholesky factorization of the negative matrix should be valid. // // The result is the factorization // | I 0 | | rho*I A^H | = | I 0 | | rho*I U12 |, // | 0 P22 | | A 0 | | L21 L22 | | 0 U22 | // where [L22,U22] are stored within B22. Matrix<Real> U12, L21, B22, bPiv; Adjoint( A, U12 ); L21 = A; L21 *= 1/ctrl.rho; Herk( LOWER, NORMAL, -1/ctrl.rho, A, B22 ); MakeHermitian( LOWER, B22 ); // TODO: Replace with sparse-direct Cholesky version? Permutation P2; LU( B22, P2 ); P2.PermuteRows( L21 ); bPiv = b; P2.PermuteRows( bPiv ); // Possibly form the inverse of L22 U22 Matrix<Real> X22; if( ctrl.inv ) { X22 = B22; MakeTrapezoidal( LOWER, X22 ); FillDiagonal( X22, Real(1) ); TriangularInverse( LOWER, UNIT, X22 ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, Real(1), B22, X22 ); } Int numIter=0; const Int m = A.Height(); const Int n = A.Width(); Matrix<Real> g, xTmp, y, t; Zeros( g, m+n, 1 ); PartitionDown( g, xTmp, y, n ); Matrix<Real> x, u, zOld, xHat; Zeros( z, n, 1 ); Zeros( u, n, 1 ); Zeros( t, n, 1 ); while( numIter < ctrl.maxIter ) { zOld = z; // Find x from // | rho*I A^H | | x | = | rho*(z-u)-c | // | A 0 | | y | | b | // via our cached custom factorization: // // |x| = inv(U) inv(L) P' |rho*(z-u)-c| // |y| |b | // = |rho*I U12|^{-1} |I 0 | |I 0 | |rho*(z-u)-c| // = |0 U22| |L21 L22| |0 P22'| |b | // = " " |rho*(z-u)-c| // | P22' b | xTmp = z; xTmp -= u; xTmp *= ctrl.rho; xTmp -= c; y = bPiv; Gemv( NORMAL, Real(-1), L21, xTmp, Real(1), y ); if( ctrl.inv ) { Gemv( NORMAL, Real(1), X22, y, t ); y = t; } else { Trsv( LOWER, NORMAL, UNIT, B22, y ); Trsv( UPPER, NORMAL, NON_UNIT, B22, y ); } Gemv( NORMAL, Real(-1), U12, y, Real(1), xTmp ); xTmp *= 1/ctrl.rho; // xHat := alpha*x + (1-alpha)*zOld xHat = xTmp; xHat *= ctrl.alpha; Axpy( 1-ctrl.alpha, zOld, xHat ); // z := pos(xHat+u) z = xHat; z += u; LowerClip( z, Real(0) ); // u := u + (xHat-z) u += xHat; u -= z; const Real objective = Dot( c, xTmp ); // rNorm := || x - z ||_2 t = xTmp; t -= z; const Real rNorm = FrobeniusNorm( t ); // sNorm := |rho| || z - zOld ||_2 t = z; t -= zOld; const Real sNorm = Abs(ctrl.rho)*FrobeniusNorm( t ); const Real epsPri = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Max(FrobeniusNorm(xTmp),FrobeniusNorm(z)); const Real epsDual = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Abs(ctrl.rho)*FrobeniusNorm(u); if( ctrl.print ) { t = xTmp; LowerClip( t, Real(0) ); t -= xTmp; const Real clipDist = FrobeniusNorm( t ); cout << numIter << ": " << "||x-z||_2=" << rNorm << ", " << "epsPri=" << epsPri << ", " << "|rho| ||z-zOld||_2=" << sNorm << ", " << "epsDual=" << epsDual << ", " << "||x-Pos(x)||_2=" << clipDist << ", " << "c'x=" << objective << endl; } if( rNorm < epsPri && sNorm < epsDual ) break; ++numIter; } if( ctrl.maxIter == numIter ) cout << "ADMM failed to converge" << endl; x = xTmp; return numIter; }
inline void RLHF ( Conjugation conjugation, int offset, const DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLHF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); DistMatrix<C> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C> ALeft(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,STAR,VR > HPan_STAR_VR(g); DistMatrix<C,STAR,MR > HPan_STAR_MR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MC > ZAdj_STAR_MC(g); DistMatrix<C,STAR,VC > ZAdj_STAR_VC(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; LockedView ( HPan, H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanHeight ); View( ALeft, A, 0, 0, A.Height(), HPanWidth ); HPan_STAR_MR.AlignWith( ALeft ); ZAdj_STAR_MC.AlignWith( ALeft ); ZAdj_STAR_VC.AlignWith( ALeft ); Zeros( HPan.Height(), ALeft.Height(), ZAdj_STAR_MC ); Zeros( HPan.Height(), HPan.Height(), SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonal( RIGHT, offset, HPanCopy, C(1) ); HPan_STAR_VR = HPanCopy; Herk ( UPPER, NORMAL, C(1), HPan_STAR_VR.LockedMatrix(), C(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_STAR_MR = HPan_STAR_VR; LocalGemm ( NORMAL, ADJOINT, C(1), HPan_STAR_MR, ALeft, C(0), ZAdj_STAR_MC ); ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC ); LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, C(1), SInv_STAR_STAR, ZAdj_STAR_VC ); ZAdj_STAR_MC = ZAdj_STAR_VC; LocalGemm ( ADJOINT, NORMAL, C(-1), ZAdj_STAR_MC, HPan_STAR_MR, C(1), ALeft ); //--------------------------------------------------------------------// HPan_STAR_MR.FreeAlignments(); ZAdj_STAR_MC.FreeAlignments(); ZAdj_STAR_VC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RUVF ( Conjugation conjugation, Int offset, const Matrix<F>& H, const Matrix<F>& t, Matrix<F>& A ) { #ifndef RELEASE CallStackEntry cse("apply_packed_reflectors::RUVF"); // TODO: Proper dimension checks if( t.Height() != H.DiagonalLength(offset) ) LogicError("t must be the same length as H's offset diag"); #endif Matrix<F> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<F> ALeft; Matrix<F> tT, t0, tB, t1, t2; Matrix<F> SInv, Z; LockedPartitionDownOffsetDiagonal ( offset, H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); LockedView2x1( HPan, H01, H11 ); View( ALeft, A, 0, 0, A.Height(), HPan.Height() ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( UPPER, HPanCopy, 0, RIGHT ); SetDiagonal( HPanCopy, F(1), 0, RIGHT ); Herk( UPPER, ADJOINT, F(1), HPanCopy, SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, NORMAL, F(1), ALeft, HPanCopy, Z ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), SInv, Z ); Gemm( NORMAL, ADJOINT, F(-1), Z, HPanCopy, F(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } }
inline void ApplyPackedReflectorsLUVF ( Conjugation conjugation, int offset, const Matrix<Complex<R> >& H, const Matrix<Complex<R> >& t, Matrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUVF"); if( offset < 0 || offset > H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); #endif typedef Complex<R> C; Matrix<C> HTL, HTR, H00, H01, H02, HPan, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<C> AT, A0, ATop, AB, A1, A2; Matrix<C> tT, t0, tB, t1, t2; Matrix<C> HPanCopy; Matrix<C> SInv, Z; LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); ATop.View2x1( A0, A1 ); Zeros( HPan.Width(), ATop.Width(), Z ); Zeros( HPan.Width(), HPan.Width(), SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); Herk( LOWER, ADJOINT, C(1), HPanCopy, C(0), SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( ADJOINT, NORMAL, C(1), HPanCopy, ATop, C(0), Z ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, C(1), SInv, Z ); Gemm( NORMAL, NORMAL, C(-1), HPanCopy, Z, C(1), ATop ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
int QDWH ( Matrix<F>& A, typename Base<F>::type lowerBound, typename Base<F>::type upperBound ) { #ifndef RELEASE PushCallStack("QDWH"); #endif typedef typename Base<F>::type R; const int height = A.Height(); const int width = A.Width(); const R oneHalf = R(1)/R(2); const R oneThird = R(1)/R(3); if( height < width ) throw std::logic_error("Height cannot be less than width"); const R epsilon = lapack::MachineEpsilon<R>(); const R tol = 5*epsilon; const R cubeRootTol = Pow(tol,oneThird); // Form the first iterate Scale( 1/upperBound, A ); int numIts=0; R frobNormADiff; Matrix<F> ALast; Matrix<F> Q( height+width, width ); Matrix<F> QT, QB; PartitionDown( Q, QT, QB, height ); Matrix<F> C; Matrix<F> ATemp; do { ++numIts; ALast = A; R L2; Complex<R> dd, sqd; if( Abs(1-lowerBound) < tol ) { L2 = 1; dd = 0; sqd = 1; } else { L2 = lowerBound*lowerBound; dd = Pow( 4*(1-L2)/(L2*L2), oneThird ); sqd = Sqrt( 1+dd ); } const Complex<R> arg = 8 - 4*dd + 8*(2-L2)/(L2*sqd); const R a = (sqd + Sqrt( arg )/2).real; const R b = (a-1)*(a-1)/4; const R c = a+b-1; const Complex<R> alpha = a-b/c; const Complex<R> beta = b/c; lowerBound = lowerBound*(a+b*L2)/(1+c*L2); if( c > 100 ) { // // The standard QR-based algorithm // QT = A; Scale( Sqrt(c), QT ); MakeIdentity( QB ); ExplicitQR( Q ); Gemm( NORMAL, ADJOINT, alpha/Sqrt(c), QT, QB, beta, A ); } else { // // Use faster Cholesky-based algorithm since A is well-conditioned // Identity( width, width, C ); Herk( LOWER, ADJOINT, F(c), A, F(1), C ); Cholesky( LOWER, C ); ATemp = A; Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp ); Scale( beta, A ); Axpy( alpha, ATemp, A ); } Axpy( F(-1), A, ALast ); frobNormADiff = Norm( ALast, FROBENIUS_NORM ); } while( frobNormADiff > cubeRootTol || Abs(1-lowerBound) > tol ); #ifndef RELEASE PopCallStack(); #endif return numIts; }
inline void RUVF ( Conjugation conjugation, Int offset, const DistMatrix<F>& H, const DistMatrix<F,MD,STAR>& t, DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry cse("apply_packed_reflectors::RUVF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) LogicError("{H,t,A} must be distributed over the same grid"); // TODO: Proper dimension checks if( t.Height() != H.DiagonalLength(offset) ) LogicError("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) LogicError("t must be aligned with H's 'offset' diagonal"); #endif const Grid& g = H.Grid(); DistMatrix<F> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<F> ALeft(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<F,VC, STAR> HPan_VC_STAR(g); DistMatrix<F,MR, STAR> HPan_MR_STAR(g); DistMatrix<F,STAR,STAR> t1_STAR_STAR(g); DistMatrix<F,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<F,STAR,MC > ZAdj_STAR_MC(g); DistMatrix<F,STAR,VC > ZAdj_STAR_VC(g); LockedPartitionDownOffsetDiagonal ( offset, H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); LockedView2x1( HPan, H01, H11 ); View( ALeft, A, 0, 0, A.Height(), HPan.Height() ); HPan_MR_STAR.AlignWith( ALeft ); ZAdj_STAR_MC.AlignWith( ALeft ); ZAdj_STAR_VC.AlignWith( ALeft ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( UPPER, HPanCopy, 0, RIGHT ); SetDiagonal( HPanCopy, F(1), 0, RIGHT ); HPan_VC_STAR = HPanCopy; Zeros( SInv_STAR_STAR, HPan.Width(), HPan.Width() ); Herk ( UPPER, ADJOINT, F(1), HPan_VC_STAR.LockedMatrix(), F(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MR_STAR = HPan_VC_STAR; LocalGemm( ADJOINT, ADJOINT, F(1), HPan_MR_STAR, ALeft, ZAdj_STAR_MC ); ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC ); LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), SInv_STAR_STAR, ZAdj_STAR_VC ); ZAdj_STAR_MC = ZAdj_STAR_VC; LocalGemm ( ADJOINT, ADJOINT, F(-1), ZAdj_STAR_MC, HPan_MR_STAR, F(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } }
inline void ApplyPackedReflectorsLUHB ( Conjugation conjugation, int offset, const Matrix<Complex<R> >& H, const Matrix<Complex<R> >& t, Matrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); #endif typedef Complex<R> C; Matrix<C> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<C> ABottom; Matrix<C> tT, t0, tB, t1, t2; Matrix<C> SInv, Z; LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanHeight ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); Zeros( HPanHeight, ABottom.Width(), Z ); Zeros( HPanHeight, HPanHeight, SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, NORMAL, C(1), HPanCopy, ABottom, C(0), Z ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z ); Gemm( ADJOINT, NORMAL, C(-1), HPanCopy, Z, C(1), ABottom ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
void Ridge ( Orientation orientation, const Matrix<Field>& A, const Matrix<Field>& B, Base<Field> gamma, Matrix<Field>& X, RidgeAlg alg ) { EL_DEBUG_CSE const bool normal = ( orientation==NORMAL ); const Int m = ( normal ? A.Height() : A.Width() ); const Int n = ( normal ? A.Width() : A.Height() ); if( orientation == TRANSPOSE && IsComplex<Field>::value ) LogicError("Transpose version of complex Ridge not yet supported"); if( m >= n ) { Matrix<Field> Z; if( alg == RIDGE_CHOLESKY ) { if( orientation == NORMAL ) Herk( LOWER, ADJOINT, Base<Field>(1), A, Z ); else Herk( LOWER, NORMAL, Base<Field>(1), A, Z ); ShiftDiagonal( Z, Field(gamma*gamma) ); Cholesky( LOWER, Z ); if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, Field(1), A, B, X ); else Gemm( NORMAL, NORMAL, Field(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else if( alg == RIDGE_QR ) { Zeros( Z, m+n, n ); auto ZT = Z( IR(0,m), IR(0,n) ); auto ZB = Z( IR(m,m+n), IR(0,n) ); if( orientation == NORMAL ) ZT = A; else Adjoint( A, ZT ); FillDiagonal( ZB, Field(gamma) ); // NOTE: This QR factorization could exploit the upper-triangular // structure of the diagonal matrix ZB qr::ExplicitTriang( Z ); if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, Field(1), A, B, X ); else Gemm( NORMAL, NORMAL, Field(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else { Matrix<Field> U, V; Matrix<Base<Field>> s; if( orientation == NORMAL ) { SVDCtrl<Base<Field>> ctrl; ctrl.overwrite = false; SVD( A, U, s, V, ctrl ); } else { Matrix<Field> AAdj; Adjoint( A, AAdj ); SVDCtrl<Base<Field>> ctrl; ctrl.overwrite = true; SVD( AAdj, U, s, V, ctrl ); } auto sigmaMap = [=]( const Base<Field>& sigma ) { return sigma / (sigma*sigma + gamma*gamma); }; EntrywiseMap( s, MakeFunction(sigmaMap) ); Gemm( ADJOINT, NORMAL, Field(1), U, B, X ); DiagonalScale( LEFT, NORMAL, s, X ); U = X; Gemm( NORMAL, NORMAL, Field(1), V, U, X ); } } else { LogicError("This case not yet supported"); } }
inline void ApplyPackedReflectorsLUHB ( Conjugation conjugation, int offset, const DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's offset diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); DistMatrix<C> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C> ABottom(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,STAR,VR > HPan_STAR_VR(g); DistMatrix<C,STAR,MC > HPan_STAR_MC(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MR > Z_STAR_MR(g); DistMatrix<C,STAR,VR > Z_STAR_VR(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanHeight ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); HPan_STAR_MC.AlignWith( ABottom ); Z_STAR_MR.AlignWith( ABottom ); Z_STAR_VR.AlignWith( ABottom ); Zeros( HPanHeight, ABottom.Width(), Z_STAR_MR ); Zeros( HPanHeight, HPanHeight, SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_STAR_VR = HPanCopy; Herk ( UPPER, NORMAL, C(1), HPan_STAR_VR.LockedLocalMatrix(), C(0), SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_STAR_MC = HPan_STAR_VR; LocalGemm ( NORMAL, NORMAL, C(1), HPan_STAR_MC, ABottom, C(0), Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); LocalTrsm ( LEFT, UPPER, NORMAL, NON_UNIT, C(1), SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; LocalGemm ( ADJOINT, NORMAL, C(-1), HPan_STAR_MC, Z_STAR_MR, C(1), ABottom ); //--------------------------------------------------------------------// HPan_STAR_MC.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LUHF ( Conjugation conjugation, Int offset, const DistMatrix<F>& H, const DistMatrix<F,MD,STAR>& t, DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry cse("apply_packed_reflectors::LUHF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) LogicError("{H,t,A} must be distributed over the same grid"); // TODO: Proper dimension checks if( t.Height() != H.DiagonalLength(offset) ) LogicError("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) LogicError("t must be aligned with H's offset diagonal"); #endif const Grid& g = H.Grid(); DistMatrix<F> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<F> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<F,STAR,VR > HPan_STAR_VR(g); DistMatrix<F,STAR,MC > HPan_STAR_MC(g); DistMatrix<F,STAR,STAR> t1_STAR_STAR(g); DistMatrix<F,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<F,STAR,MR > Z_STAR_MR(g); DistMatrix<F,STAR,VR > Z_STAR_VR(g); LockedPartitionDownOffsetDiagonal ( offset, H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2, H11.Height() ); LockedView1x2( HPan, H11, H12 ); HPan_STAR_MC.AlignWith( AB ); Z_STAR_MR.AlignWith( AB ); Z_STAR_VR.AlignWith( AB ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTriangular( UPPER, HPanCopy ); SetDiagonal( HPanCopy, F(1) ); HPan_STAR_VR = HPanCopy; Zeros( SInv_STAR_STAR, HPan.Height(), HPan.Height() ); Herk ( LOWER, NORMAL, F(1), HPan_STAR_VR.LockedMatrix(), F(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_STAR_MC = HPan_STAR_VR; LocalGemm( NORMAL, NORMAL, F(1), HPan_STAR_MC, AB, Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, F(1), SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; LocalGemm( ADJOINT, NORMAL, F(-1), HPan_STAR_MC, Z_STAR_MR, F(1), AB ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } }
inline void LUHF ( Conjugation conjugation, Int offset, const Matrix<F>& H, const Matrix<F>& t, Matrix<F>& A ) { #ifndef RELEASE CallStackEntry cse("apply_packed_reflectors::LUHF"); // TODO: Proper dimension checks if( t.Height() != H.DiagonalLength(offset) ) LogicError("t must be the same length as H's offset diag"); #endif Matrix<F> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<F> AT, A0, AB, A1, A2; Matrix<F> tT, t0, tB, t1, t2; Matrix<F> SInv, Z; LockedPartitionDownOffsetDiagonal ( offset, H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2, H11.Height() ); LockedView1x2( HPan, H11, H12 ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTriangular( UPPER, HPanCopy ); SetDiagonal( HPanCopy, F(1) ); Herk( LOWER, NORMAL, F(1), HPanCopy, SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, NORMAL, F(1), HPanCopy, AB, Z ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, F(1), SInv, Z ); Gemm( ADJOINT, NORMAL, F(-1), HPanCopy, Z, F(1), AB ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } }
inline void internal::ApplyPackedReflectorsLLVF ( Conjugation conjugation, int offset, const DistMatrix<Complex<R>,MC,MR >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R>,MC,MR >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLLVF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset > 0 ) throw std::logic_error("Transforms cannot extend above matrix"); if( offset < -H.Height() ) throw std::logic_error("Transforms cannot extend below matrix"); if( H.Height() != A.Height() ) throw std::logic_error ("Height of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag."); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); // Matrix views DistMatrix<C,MC,MR> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C,MC,MR> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,VC, STAR> HPan_VC_STAR(g); DistMatrix<C,MC, STAR> HPan_MC_STAR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MR > Z_STAR_MR(g); DistMatrix<C,STAR,VR > Z_STAR_VR(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); int HPanHeight = H11.Height() + H21.Height(); int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); HPan_MC_STAR.AlignWith( AB ); Z_STAR_MR.AlignWith( AB ); Z_STAR_VR.AlignWith( AB ); Z_STAR_MR.ResizeTo( HPan.Width(), AB.Width() ); SInv_STAR_STAR.ResizeTo( HPan.Width(), HPan.Width() ); Zero( SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_VC_STAR = HPanCopy; Herk ( UPPER, ADJOINT, (C)1, HPan_VC_STAR.LockedLocalMatrix(), (C)0, SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MC_STAR = HPanCopy; internal::LocalGemm ( ADJOINT, NORMAL, (C)1, HPan_MC_STAR, AB, (C)0, Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); internal::LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, (C)1, SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; internal::LocalGemm ( NORMAL, NORMAL, (C)-1, HPan_MC_STAR, Z_STAR_MR, (C)1, AB ); //--------------------------------------------------------------------// HPan_MC_STAR.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
int Halley ( DistMatrix<F>& A, typename Base<F>::type upperBound ) { #ifndef RELEASE PushCallStack("Halley"); #endif typedef typename Base<F>::type R; const Grid& g = A.Grid(); const int height = A.Height(); const int width = A.Width(); const R oneHalf = R(1)/R(2); const R oneThird = R(1)/R(3); if( height < width ) throw std::logic_error("Height cannot be less than width"); const R epsilon = lapack::MachineEpsilon<R>(); const R tol = 5*epsilon; const R cubeRootTol = Pow(tol,oneThird); const R a = 3; const R b = 1; const R c = 3; // Form the first iterate Scale( 1/upperBound, A ); int numIts=0; R frobNormADiff; DistMatrix<F> ALast( g ); DistMatrix<F> Q( height+width, width, g ); DistMatrix<F> QT(g), QB(g); PartitionDown( Q, QT, QB, height ); DistMatrix<F> C( g ); DistMatrix<F> ATemp( g ); do { if( numIts > 100 ) throw std::runtime_error("Halley iteration did not converge"); ++numIts; ALast = A; // TODO: Come up with a test for when we can use the Cholesky approach if( true ) { // // The standard QR-based algorithm // QT = A; Scale( Sqrt(c), QT ); MakeIdentity( QB ); ExplicitQR( Q ); Gemm( NORMAL, ADJOINT, F(a-b/c)/Sqrt(c), QT, QB, F(b/c), A ); } else { // // Use faster Cholesky-based algorithm since A is well-conditioned // Identity( width, width, C ); Herk( LOWER, ADJOINT, F(c), A, F(1), C ); Cholesky( LOWER, C ); ATemp = A; Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp ); Scale( b/c, A ); Axpy( a-b/c, ATemp, A ); } Axpy( F(-1), A, ALast ); frobNormADiff = Norm( ALast, FROBENIUS_NORM ); } while( frobNormADiff > cubeRootTol ); #ifndef RELEASE PopCallStack(); #endif return numIts; }