void LUMod ( Matrix<F>& A, Permutation& P, const Matrix<F>& u, const Matrix<F>& v, bool conjugate, Base<F> tau ) { DEBUG_CSE typedef Base<F> Real; const Int m = A.Height(); const Int n = A.Width(); const Int minDim = Min(m,n); if( minDim != m ) LogicError("It is assumed that height(A) <= width(A)"); if( u.Height() != m || u.Width() != 1 ) LogicError("u is expected to be a conforming column vector"); if( v.Height() != n || v.Width() != 1 ) LogicError("v is expected to be a conforming column vector"); // w := inv(L) P u auto w( u ); P.PermuteRows( w ); Trsv( LOWER, NORMAL, UNIT, A, w ); // Maintain an external vector for the temporary subdiagonal of U Matrix<F> uSub; Zeros( uSub, minDim-1, 1 ); // Reduce w to a multiple of e0 for( Int i=minDim-2; i>=0; --i ) { // Decide if we should pivot the i'th and i+1'th rows of w const F lambdaSub = A(i+1,i); const F ups_ii = A(i,i); const F omega_i = w(i); const F omega_ip1 = w(i+1); const Real rightTerm = Abs(lambdaSub*omega_i+omega_ip1); const bool pivot = ( Abs(omega_i) < tau*rightTerm ); const Range<Int> indi( i, i+1 ), indip1( i+1, i+2 ), indB( i+2, m ), indR( i+1, n ); auto lBi = A( indB, indi ); auto lBip1 = A( indB, indip1 ); auto uiR = A( indi, indR ); auto uip1R = A( indip1, indR ); if( pivot ) { // P := P_i P P.Swap( i, i+1 ); // Simultaneously perform // U := P_i U and // L := P_i L P_i^T // // Then update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // w := T_{i,L} P_i w, // where T_{i,L} is the Gauss transform which zeros (P_i w)_{i+1}. // // More succinctly, // gamma := w(i) / w(i+1), // w(i) := w(i+1), // w(i+1) := 0, // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:). const F gamma = omega_i / omega_ip1; const F lambda_ii = F(1) + gamma*lambdaSub; A(i, i) = gamma; A(i+1,i) = 0; auto lBiCopy = lBi; Swap( NORMAL, lBi, lBip1 ); Axpy( gamma, lBiCopy, lBi ); auto uip1RCopy = uip1R; RowSwap( A, i, i+1 ); Axpy( -gamma, uip1RCopy, uip1R ); // Force L back to *unit* lower-triangular form via the transform // L := L T_{i,U}^{-1} D^{-1}, // where D is diagonal and responsible for forcing L(i,i) and // L(i+1,i+1) back to 1. The effect on L is: // eta := L(i,i+1)/L(i,i), // L(:,i+1) -= eta L(:,i), // delta_i := L(i,i), // delta_ip1 := L(i+1,i+1), // L(:,i) /= delta_i, // L(:,i+1) /= delta_ip1, // while the effect on U is // U(i,:) += eta U(i+1,:) // U(i,:) *= delta_i, // U(i+1,:) *= delta_{i+1}, // and the effect on w is // w(i) *= delta_i. const F eta = lambdaSub/lambda_ii; const F delta_i = lambda_ii; const F delta_ip1 = F(1) - eta*gamma; Axpy( -eta, lBi, lBip1 ); A(i+1,i) = gamma/delta_i; lBi *= F(1)/delta_i; lBip1 *= F(1)/delta_ip1; A(i,i) = eta*ups_ii*delta_i; Axpy( eta, uip1R, uiR ); uiR *= delta_i; uip1R *= delta_ip1; uSub(i) = ups_ii*delta_ip1; // Finally set w(i) w(i) = omega_ip1*delta_i; } else { // Update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // w := T_{i,L} w, // where T_{i,L} is the Gauss transform which zeros w_{i+1}. // // More succinctly, // gamma := w(i+1) / w(i), // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:), // w(i+1) := 0. const F gamma = omega_ip1 / omega_i; A(i+1,i) += gamma; Axpy( gamma, lBip1, lBi ); Axpy( -gamma, uiR, uip1R ); uSub(i) = -gamma*ups_ii; } } // Add the modified w v' into U { auto a0 = A( IR(0), ALL ); const F omega_0 = w(0); Matrix<F> vTrans; Transpose( v, vTrans, conjugate ); Axpy( omega_0, vTrans, a0 ); } // Transform U from upper-Hessenberg to upper-triangular form for( Int i=0; i<minDim-1; ++i ) { // Decide if we should pivot the i'th and i+1'th rows U const F lambdaSub = A(i+1,i); const F ups_ii = A(i,i); const F ups_ip1i = uSub(i); const Real rightTerm = Abs(lambdaSub*ups_ii+ups_ip1i); const bool pivot = ( Abs(ups_ii) < tau*rightTerm ); const Range<Int> indi( i, i+1 ), indip1( i+1, i+2 ), indB( i+2, m ), indR( i+1, n ); auto lBi = A( indB, indi ); auto lBip1 = A( indB, indip1 ); auto uiR = A( indi, indR ); auto uip1R = A( indip1, indR ); if( pivot ) { // P := P_i P P.Swap( i, i+1 ); // Simultaneously perform // U := P_i U and // L := P_i L P_i^T // // Then update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // where T_{i,L} is the Gauss transform which zeros U(i+1,i). // // More succinctly, // gamma := U(i+1,i) / U(i,i), // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:). const F gamma = ups_ii / ups_ip1i; const F lambda_ii = F(1) + gamma*lambdaSub; A(i+1,i) = ups_ip1i; A(i, i) = gamma; auto lBiCopy = lBi; Swap( NORMAL, lBi, lBip1 ); Axpy( gamma, lBiCopy, lBi ); auto uip1RCopy = uip1R; RowSwap( A, i, i+1 ); Axpy( -gamma, uip1RCopy, uip1R ); // Force L back to *unit* lower-triangular form via the transform // L := L T_{i,U}^{-1} D^{-1}, // where D is diagonal and responsible for forcing L(i,i) and // L(i+1,i+1) back to 1. The effect on L is: // eta := L(i,i+1)/L(i,i), // L(:,i+1) -= eta L(:,i), // delta_i := L(i,i), // delta_ip1 := L(i+1,i+1), // L(:,i) /= delta_i, // L(:,i+1) /= delta_ip1, // while the effect on U is // U(i,:) += eta U(i+1,:) // U(i,:) *= delta_i, // U(i+1,:) *= delta_{i+1}. const F eta = lambdaSub/lambda_ii; const F delta_i = lambda_ii; const F delta_ip1 = F(1) - eta*gamma; Axpy( -eta, lBi, lBip1 ); A(i+1,i) = gamma/delta_i; lBi *= F(1)/delta_i; lBip1 *= F(1)/delta_ip1; A(i,i) = ups_ip1i*delta_i; Axpy( eta, uip1R, uiR ); uiR *= delta_i; uip1R *= delta_ip1; } else { // Update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // where T_{i,L} is the Gauss transform which zeros U(i+1,i). // // More succinctly, // gamma := U(i+1,i)/ U(i,i), // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:). const F gamma = ups_ip1i / ups_ii; A(i+1,i) += gamma; Axpy( gamma, lBip1, lBi ); Axpy( -gamma, uiR, uip1R ); } } }
QDWHInfo QDWHInner( Matrix<F>& A, Base<F> sMinUpper, const QDWHCtrl& ctrl ) { EL_DEBUG_CSE typedef Base<F> Real; typedef Complex<Real> Cpx; const Int m = A.Height(); const Int n = A.Width(); const Real oneThird = Real(1)/Real(3); if( m < n ) LogicError("Height cannot be less than width"); QDWHInfo info; QRCtrl<Base<F>> qrCtrl; qrCtrl.colPiv = ctrl.colPiv; const Real eps = limits::Epsilon<Real>(); const Real tol = 5*eps; const Real cubeRootTol = Pow(tol,oneThird); Real L = sMinUpper / Sqrt(Real(n)); Real frobNormADiff; Matrix<F> ALast, ATemp, C; Matrix<F> Q( m+n, n ); auto QT = Q( IR(0,m ), ALL ); auto QB = Q( IR(m,END), ALL ); while( info.numIts < ctrl.maxIts ) { ALast = A; Real L2; Cpx dd, sqd; if( Abs(1-L) < tol ) { L2 = 1; dd = 0; sqd = 1; } else { L2 = L*L; dd = Pow( 4*(1-L2)/(L2*L2), oneThird ); sqd = Sqrt( Real(1)+dd ); } const Cpx arg = Real(8) - Real(4)*dd + Real(8)*(2-L2)/(L2*sqd); const Real a = (sqd + Sqrt(arg)/Real(2)).real(); const Real b = (a-1)*(a-1)/4; const Real c = a+b-1; const Real alpha = a-b/c; const Real beta = b/c; L = L*(a+b*L2)/(1+c*L2); if( c > 100 ) { // // The standard QR-based algorithm // QT = A; QT *= Sqrt(c); MakeIdentity( QB ); qr::ExplicitUnitary( Q, true, qrCtrl ); Gemm( NORMAL, ADJOINT, F(alpha/Sqrt(c)), QT, QB, F(beta), A ); ++info.numQRIts; } else { // // Use faster Cholesky-based algorithm since A is well-conditioned // Identity( C, n, n ); Herk( LOWER, ADJOINT, c, A, Real(1), C ); Cholesky( LOWER, C ); ATemp = A; Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp ); A *= beta; Axpy( alpha, ATemp, A ); ++info.numCholIts; } ++info.numIts; ALast -= A; frobNormADiff = FrobeniusNorm( ALast ); if( frobNormADiff <= cubeRootTol && Abs(1-L) <= tol ) break; } return info; }
void LLNUnb( const Matrix<F>& L, Matrix<F>& X, bool checkIfSingular ) { DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); const F* LBuf = L.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldl = L.LDim(); const Int ldx = X.LDim(); Int k=0; while( k < m ) { const bool in2x2 = ( k+1<m && LBuf[k+(k+1)*ldl] != F(0) ); if( in2x2 ) { // Solve the 2x2 linear systems via a 2x2 LQ decomposition produced // by the Givens rotation // | L(k,k) L(k,k+1) | | c -conj(s) | = | gamma11 0 | // | s c | // and by also forming the bottom two entries of the 2x2 resulting // lower-triangular matrix, say gamma21 and gamma22 // // Extract the 2x2 diagonal block, D const F delta11 = LBuf[ k + k *ldl]; const F delta12 = LBuf[ k +(k+1)*ldl]; const F delta21 = LBuf[(k+1)+ k *ldl]; const F delta22 = LBuf[(k+1)+(k+1)*ldl]; // Decompose D = L Q Real c; F s; const F gamma11 = Givens( delta11, delta12, c, s ); const F gamma21 = c*delta21 + s*delta22; const F gamma22 = -Conj(s)*delta21 + c*delta22; if( checkIfSingular ) { // TODO: Check if sufficiently small instead if( gamma11 == F(0) || gamma22 == F(0) ) LogicError("Singular diagonal block detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve against L xBuf[k ] /= gamma11; xBuf[k+1] -= gamma21*xBuf[k]; xBuf[k+1] /= gamma22; // Solve against Q const F chi1 = xBuf[k ]; const F chi2 = xBuf[k+1]; xBuf[k ] = c*chi1 - Conj(s)*chi2; xBuf[k+1] = s*chi1 + c*chi2; // Update x2 := x2 - L21 x1 blas::Axpy ( m-(k+2), -xBuf[k ], &LBuf[(k+2)+ k *ldl], 1, &xBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xBuf[k+1], &LBuf[(k+2)+(k+1)*ldl], 1, &xBuf[k+2], 1 ); } k += 2; } else { if( checkIfSingular ) { // TODO: Check if sufficiently small instead if( LBuf[k+k*ldl] == F(0) ) LogicError("Singular diagonal entry detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; xBuf[k] /= LBuf[k+k*ldl]; blas::Axpy ( m-(k+1), -xBuf[k], &LBuf[(k+1)+k*ldl], 1, &xBuf[k+1], 1 ); } k += 1; } } }
inline void ALM ( const Matrix<F>& M, Matrix<F>& L, Matrix<F>& S, const RPCACtrl<Base<F>>& ctrl ) { typedef Base<F> Real; const Int m = M.Height(); const Int n = M.Width(); // If tau is unspecified, set it to 1/sqrt(max(m,n)) const Base<F> tau = ( ctrl.tau <= Real(0) ? Real(1) / sqrt(Real(Max(m,n))) : ctrl.tau ); if( ctrl.tol <= Real(0) ) LogicError("tol cannot be non-positive"); const Base<F> tol = ctrl.tol; const double startTime = mpi::Time(); Matrix<F> Y( M ); NormalizeEntries( Y ); const Real twoNorm = TwoNorm( Y ); const Real maxNorm = MaxNorm( Y ); const Real infNorm = maxNorm / tau; const Real dualNorm = Max( twoNorm, infNorm ); Y *= F(1)/dualNorm; // If beta is unspecified, set it to 1 / 2 || sign(M) ||_2 Base<F> beta = ( ctrl.beta <= Real(0) ? Real(1) / (2*twoNorm) : ctrl.beta ); const Real frobM = FrobeniusNorm( M ); const Real maxM = MaxNorm( M ); if( ctrl.progress ) cout << "|| M ||_F = " << frobM << "\n" << "|| M ||_max = " << maxM << endl; Zeros( L, m, n ); Zeros( S, m, n ); Int numIts=0, numPrimalIts=0; Matrix<F> LLast, SLast, E; while( true ) { ++numIts; Int rank, numNonzeros; while( true ) { ++numPrimalIts; LLast = L; SLast = S; // ST_{tau/beta}(M - L + Y/beta) S = M; S -= L; Axpy( F(1)/beta, Y, S ); SoftThreshold( S, tau/beta ); numNonzeros = ZeroNorm( S ); // SVT_{1/beta}(M - S + Y/beta) L = M; L -= S; Axpy( F(1)/beta, Y, L ); if( ctrl.usePivQR ) rank = SVT( L, Real(1)/beta, ctrl.numPivSteps ); else rank = SVT( L, Real(1)/beta ); LLast -= L; SLast -= S; const Real frobLDiff = FrobeniusNorm( LLast ); const Real frobSDiff = FrobeniusNorm( SLast ); if( frobLDiff/frobM < tol && frobSDiff/frobM < tol ) { if( ctrl.progress ) cout << "Primal loop converged: " << mpi::Time()-startTime << " total secs" << endl; break; } else { if( ctrl.progress ) cout << " " << numPrimalIts << ": \n" << " || Delta L ||_F / || M ||_F = " << frobLDiff/frobM << "\n" << " || Delta S ||_F / || M ||_F = " << frobSDiff/frobM << "\n" << " rank=" << rank << ", numNonzeros=" << numNonzeros << ", " << mpi::Time()-startTime << " total secs" << endl; } } // E := M - (L + S) E = M; E -= L; E -= S; const Real frobE = FrobeniusNorm( E ); if( frobE/frobM <= tol ) { if( ctrl.progress ) cout << "Converged after " << numIts << " iterations and " << numPrimalIts << " primal iterations with rank=" << rank << ", numNonzeros=" << numNonzeros << " and " << "|| E ||_F / || M ||_F = " << frobE/frobM << ", " << mpi::Time()-startTime << " total secs" << endl; break; } else if( numIts >= ctrl.maxIts ) { if( ctrl.progress ) cout << "Aborting after " << numIts << " iterations and " << mpi::Time()-startTime << " total secs" << endl; break; } else { if( ctrl.progress ) cout << numPrimalIts << ": || E ||_F / || M ||_F = " << frobE/frobM << ", rank=" << rank << ", numNonzeros=" << numNonzeros << ", " << mpi::Time()-startTime << " total secs" << endl; } // Y := Y + beta E Axpy( beta, E, Y ); beta *= ctrl.rho; } }
inline void Inverse( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("Inverse"); if( A.Height() != A.Width() ) throw std::logic_error("Cannot invert non-square matrices"); #endif Matrix<int> p; LU( A, p ); TriangularInverse( UPPER, NON_UNIT, A ); // Solve inv(A) L = inv(U) for inv(A) Matrix<F> ATL, ATR, ABL, ABR; Matrix<F> A00, A01, A02, A10, A11, A12, A20, A21, A22; Matrix<F> A1, A2; Matrix<F> L11, L21; PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); View( A1, A, 0, A00.Width(), A.Height(), A01.Width() ); View( A2, A, 0, A00.Width()+A01.Width(), A.Height(), A02.Width() ); //--------------------------------------------------------------------// // Copy out L1 L11 = A11; L21 = A21; // Zero the strictly lower triangular portion of A1 MakeTrapezoidal( LEFT, UPPER, 0, A11 ); Zero( A21 ); // Perform the lazy update of A1 Gemm( NORMAL, NORMAL, F(-1), A2, L21, F(1), A1 ); // Solve against this diagonal block of L11 Trsm( RIGHT, LOWER, NORMAL, UNIT, F(1), L11, A1 ); //--------------------------------------------------------------------// SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /*******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } // inv(A) := inv(A) P ApplyInverseColumnPivots( A, p ); #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyPackedReflectorsRLHF ( Conjugation conjugation, int offset, const Matrix<Complex<R> >& H, const Matrix<Complex<R> >& t, Matrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsRLHF"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); #endif typedef Complex<R> C; Matrix<C> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<C> ALeft; Matrix<C> tT, t0, tB, t1, t2; Matrix<C> SInv, Z; LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; HPan.LockedView( H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanHeight ); ALeft.View( A, 0, 0, A.Height(), HPanWidth ); Zeros( ALeft.Height(), HPan.Height(), Z ); Zeros( HPan.Height(), HPan.Height(), SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv ); FixDiagonal( conjugation, t1, SInv ); Gemm( NORMAL, ADJOINT, C(1), ALeft, HPanCopy, C(0), Z ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z ); Gemm( NORMAL, NORMAL, C(-1), Z, HPanCopy, C(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
int main( int argc, char* argv[] ) { Environment env( argc, argv ); try { const string inputBasisFile = Input("--inputBasisFile","input basis file",string("SVPChallenge40.txt")); const bool trans = Input("--transpose","transpose input?",true); const string outputBasisFile = Input("--outputBasisFile","output basis file",string("BKZ")); const string shortestVecFile = Input ("--shortestVecFile","shortest vector file",string("shortest")); const Real delta = Input("--delta","delta for LLL",Real(0.9999)); const Real eta = Input ("--eta","eta for LLL", Real(1)/Real(2) + Pow(limits::Epsilon<Real>(),Real(0.9))); const Int varInt = Input("--variant","0: weak LLL, 1: normal LLL, 2: deep insertion LLL, 3: deep reduction LLL",1); const Int blocksize = Input("--blocksize","BKZ blocksize",20); const bool variableBsize = Input("--variableBsize","variable blocksize?",false); const bool variableEnumType = Input("--variableEnumType","variable enum type?",false); const Int multiEnumWindow = Input("--multiEnumWindow","window for y-sparse enumeration",15); const Int phaseLength = Input("--phaseLength","YSPARSE_ENUM phase length",10); const Int progressLevel = Input("--progressLevel","YSPARSE_ENUM progress level",4); const bool presort = Input("--presort","presort columns?",false); const bool smallestFirst = Input("--smallestFirst","sort smallest first?",true); const bool recursiveLLL = Input("--recursiveLLL","recursive LLL?",true); const bool recursiveBKZ = Input("--recursiveBKZ","recursive BKZ?",false); const Int cutoff = Input("--cutoff","recursive cutoff",10); const bool earlyAbort = Input("--earlyAbort","early abort BKZ?",false); const Int numEnumsBeforeAbort = Input("--numEnumsBeforeAbort","num enums before early aborting",1000); const bool subBKZ = Input("--subBKZ","use BKZ w/ lower blocksize for subproblems?",true); const bool subEarlyAbort = Input("--subEarlyAbort","early abort subproblem?",false); const bool jumpstartBKZ = Input("--jumpstartBKZ","jumpstart BKZ?",false); const Int startColBKZ = Input("--startColBKZ","BKZ start column",0); const bool timeLLL = Input("--timeLLL","time LLL?",false); const bool timeBKZ = Input("--timeBKZ","time BKZ?",true); const bool progressLLL = Input("--progressLLL","print LLL progress?",false); const bool progressBKZ = Input("--progressBKZ","print BKZ progress?",false); const bool print = Input("--print","output all matrices?",true); const bool logFailedEnums = Input("--logFailedEnums","log failed enumerations in BKZ?",false); const bool logStreakSizes = Input("--logStreakSizes","log enum streak sizes in BKZ?",false); const bool logNontrivialCoords = Input("--logNontrivialCoords","log nontrivial enum coords?",false); const bool logNorms = Input("--logNorms","log norms of B?",true); const bool logProjNorms = Input("--logProjNorms","log proj norms of B?",true); const bool checkpoint = Input("--checkpoint","checkpoint each tour?",true); const Real targetRatio = Input("--targetRatio","targeted ratio of GH(L)",Real(1.05)); const bool timeEnum = Input("--timeEnum","time enum?",true); const bool innerEnumProgress = Input("--innerEnumProgress","inner enum progress?",false); const bool probEnum = Input("--probEnum","probabalistic enumeration *after* BKZ?",true); const bool fullEnum = Input("--fullEnum","SVP via full enum?",false); const bool enumOnSubset = Input("--enumOnSubset","enum on subset?",false); const Int subsetStart = Input("--subsetStart","start of subset",0); const Int subsetSize = Input("--subsetSize","num cols in subset",60); const bool doubleCycle = Input("--doubleCycle","cycle last vectors?",false); #ifdef EL_HAVE_MPC const mpfr_prec_t prec = Input("--prec","MPFR precision",mpfr_prec_t(1024)); #endif ProcessInput(); PrintInputReport(); #ifdef EL_HAVE_MPC mpc::SetPrecision( prec ); #endif Matrix<Real> B; if( trans ) { Matrix<Real> BTrans; Read( BTrans, inputBasisFile ); Transpose( BTrans, B ); } else Read( B, inputBasisFile ); const Int m = B.Height(); const Int n = B.Width(); const Real BOrigOne = OneNorm( B ); Output("|| B_orig ||_1 = ",BOrigOne); if( print ) Print( B, "BOrig" ); auto blocksizeLambda = [&]( Int j ) { // With k-sparse if( j <= 3 ) return 146; else if( j <= 10 ) return 62; else if( j <= 20 ) return 60; else if( j <= 50 ) return 55; else return 45; // Full enum /* if( j == 0 ) return 80; else if( j == 1 ) return 75; else if( j == 2 ) return 70; else if( j <= 10 ) return 62; else if( j <= 20 ) return 60; else if( j <= 50 ) return 55; else return 45; */ }; auto enumTypeLambda = [&]( Int j ) { if( j <= 3 ) return YSPARSE_ENUM; else return FULL_ENUM; //return FULL_ENUM; }; BKZCtrl<Real> ctrl; ctrl.blocksize = blocksize; ctrl.variableBlocksize = variableBsize; ctrl.blocksizeFunc = function<Int(Int)>(blocksizeLambda); ctrl.variableEnumType = variableEnumType; ctrl.enumTypeFunc = function<EnumType(Int)>(enumTypeLambda); ctrl.multiEnumWindow = multiEnumWindow; ctrl.time = timeBKZ; ctrl.progress = progressBKZ; ctrl.recursive = recursiveBKZ; ctrl.jumpstart = jumpstartBKZ; ctrl.startCol = startColBKZ; ctrl.enumCtrl.enumType = FULL_ENUM; ctrl.enumCtrl.time = timeEnum; ctrl.enumCtrl.innerProgress = innerEnumProgress; ctrl.enumCtrl.phaseLength = phaseLength; ctrl.enumCtrl.progressLevel = progressLevel; ctrl.earlyAbort = earlyAbort; ctrl.numEnumsBeforeAbort = numEnumsBeforeAbort; ctrl.subBKZ = subBKZ; ctrl.subEarlyAbort = subEarlyAbort; ctrl.logFailedEnums = logFailedEnums; ctrl.logStreakSizes = logStreakSizes; ctrl.logNontrivialCoords = logNontrivialCoords; ctrl.logNorms = logNorms; ctrl.logProjNorms = logProjNorms; ctrl.checkpoint = checkpoint; ctrl.lllCtrl.delta = delta; ctrl.lllCtrl.eta = eta; ctrl.lllCtrl.variant = static_cast<LLLVariant>(varInt); ctrl.lllCtrl.recursive = recursiveLLL; ctrl.lllCtrl.cutoff = cutoff; ctrl.lllCtrl.presort = presort; ctrl.lllCtrl.smallestFirst = smallestFirst; ctrl.lllCtrl.progress = progressLLL; ctrl.lllCtrl.time = timeLLL; ctrl.enumCtrl.customMaxInfNorms = true; ctrl.enumCtrl.customMaxOneNorms = true; const Int startIndex = Max(n/2-1,0); const Int numPhases = ((n-startIndex)+phaseLength-1) / phaseLength; ctrl.enumCtrl.maxInfNorms.resize( numPhases, 1 ); ctrl.enumCtrl.maxOneNorms.resize( numPhases ); // NOTE: This is tailored to SVP 146 where the ranges are // 0: [72,82) // 1: [82,92) // 2: [92,102) // 3: [102,112) // 4: [112,122) // 5: [122,132) // 6: [132,142) // 7: [142,146) ctrl.enumCtrl.maxOneNorms[0] = 0; ctrl.enumCtrl.maxOneNorms[1] = 1; ctrl.enumCtrl.maxOneNorms[2] = 1; ctrl.enumCtrl.maxOneNorms[3] = 1; ctrl.enumCtrl.maxOneNorms[4] = 1; ctrl.enumCtrl.maxOneNorms[5] = 2; ctrl.enumCtrl.maxOneNorms[6] = 3; ctrl.enumCtrl.maxOneNorms[7] = 3; ctrl.enumCtrl.maxInfNorms[0] = 1; ctrl.enumCtrl.maxInfNorms[1] = 1; ctrl.enumCtrl.maxInfNorms[2] = 1; ctrl.enumCtrl.maxInfNorms[3] = 1; ctrl.enumCtrl.maxInfNorms[4] = 1; ctrl.enumCtrl.maxInfNorms[5] = 1; ctrl.enumCtrl.maxInfNorms[6] = 2; ctrl.enumCtrl.maxInfNorms[7] = 2; const double startTime = mpi::Time(); Matrix<Real> R; auto info = BKZ( B, R, ctrl ); const double runTime = mpi::Time() - startTime; Output (" BKZ(",blocksize,",",delta,",",eta,") took ",runTime," seconds"); Output(" achieved delta: ",info.delta); Output(" achieved eta: ",info.eta); Output(" num swaps: ",info.numSwaps); Output(" num enums: ",info.numEnums); Output(" num failed enums: ",info.numEnumFailures); Output(" log(vol(L)): ",info.logVol); const Real GH = LatticeGaussianHeuristic( info.rank, info.logVol ); const Real challenge = targetRatio*GH; Output(" GH(L): ",GH); Output(" targetRatio*GH(L): ",challenge); if( print ) { Print( B, "B" ); Print( R, "R" ); } Write( B, outputBasisFile, ASCII, "BKZ" ); const Real BOneNorm = OneNorm( B ); Output("|| B ||_1 = ",BOneNorm); auto b0 = B( ALL, IR(0) ); const Real b0Norm = FrobeniusNorm( b0 ); Output("|| b_0 ||_2 = ",b0Norm); if( print ) Print( b0, "b0" ); bool succeeded = false; if( b0Norm <= challenge ) { Output ("SVP Challenge solved via BKZ: || b_0 ||_2=",b0Norm, " <= targetRatio*GH(L)=",challenge); succeeded = true; Write( b0, shortestVecFile, ASCII, "b0" ); } else Output ("SVP Challenge NOT solved via BKZ: || b_0 ||_2=",b0Norm, " > targetRatio*GH(L)=",challenge); if( !succeeded || fullEnum || (enumOnSubset && subsetStart != 0) ) { const Int start = ( enumOnSubset ? subsetStart : 0 ); const Int numCols = ( enumOnSubset ? subsetSize : n ); const Range<Int> subInd( start, start+numCols ); auto BSub = B( ALL, subInd ); auto RSub = R( subInd, subInd ); const Real target = ( start == 0 ? challenge : RSub.Get(0,0) ); Timer timer; if( enumOnSubset && doubleCycle && subsetSize >= 2 ) { Matrix<double> v; EnumCtrl<double> enumCtrl; enumCtrl.enumType = ( probEnum ? GNR_ENUM : FULL_ENUM ); enumCtrl.numTrials = 1; Matrix<double> BSubSwap; Zeros( BSubSwap, m, subsetSize ); auto BL = B( ALL, IR(start,start+subsetSize-2) ); auto BSubSwapL = BSubSwap( ALL, IR(0,subsetSize-2) ); Copy( BL, BSubSwapL ); for( Int j=start+subsetSize-2; j<n-1; ++j ) { auto bj = B( ALL, IR(j) ); auto bSubSwapj = BSubSwap( ALL, IR(subsetSize-2) ); Copy( bj, bSubSwapj ); for( Int k=j+1; k<n-1; ++k ) { auto bk = B( ALL, IR(k) ); auto bSubSwapk = BSubSwap( ALL, IR(subsetSize-1) ); Copy( bk, bSubSwapk ); Matrix<double> RSubSwap( BSubSwap ); Output("Cycling with j=",j,", k=",k); qr::ExplicitTriang( RSubSwap ); timer.Start(); Real result = ShortestVectorEnumeration ( BSubSwap, RSubSwap, double(target), v, enumCtrl ); Output("Enumeration: ",timer.Stop()," seconds"); if( result < RSubSwap.Get(0,0)-double(0.001) ) { Print( BSubSwap, "BSubSwap" ); Print( v, "v" ); Matrix<double> x; Zeros( x, m, 1 ); Gemv( NORMAL, 1., BSubSwap, v, 0., x ); Print( x, "x" ); const double xNorm = FrobeniusNorm( x ); Output("|| x ||_2 = ",xNorm); Output("Claimed || x ||_2 = ",result); Write( x, shortestVecFile, ASCII, "x" ); } } } } else { Matrix<F> v; EnumCtrl<Real> enumCtrl; enumCtrl.enumType = ( probEnum ? GNR_ENUM : FULL_ENUM ); timer.Start(); Real result; if( fullEnum ) result = ShortestVectorEnumeration( BSub, RSub, target, v, enumCtrl ); else result = ShortVectorEnumeration( BSub, RSub, target, v, enumCtrl ); Output("Enumeration: ",timer.Stop()," seconds"); if( result < target ) { Print( BSub, "BSub" ); Print( v, "v" ); Matrix<Real> x; Zeros( x, m, 1 ); Gemv( NORMAL, Real(1), BSub, v, Real(0), x ); Print( x, "x" ); const Real xNorm = FrobeniusNorm( x ); Output("|| x ||_2 = ",xNorm); Output("Claimed || x ||_2 = ",result); Write( x, shortestVecFile, ASCII, "x" ); EnrichLattice( BSub, v ); Print( B, "BNew" ); } else Output("Enumeration failed after ",timer.Stop()," seconds"); } } } catch( std::exception& e ) { ReportException(e); } return 0; }
void TestCorrectness ( const Matrix<F>& A, const Matrix<F>& phaseP, const Matrix<F>& phaseQ, Matrix<F>& AOrig, bool print, bool display ) { typedef Base<F> Real; const Int m = AOrig.Height(); const Int n = AOrig.Width(); const Real eps = limits::Epsilon<Real>(); const Real oneNormAOrig = OneNorm( AOrig ); Output("Testing error..."); PushIndent(); // Grab the diagonal and superdiagonal of the bidiagonal matrix auto d = GetDiagonal( A, 0 ); auto e = GetDiagonal( A, (m>=n ? 1 : -1) ); // Zero B and then fill its bidiagonal Matrix<F> B; Zeros( B, m, n ); SetDiagonal( B, d, 0 ); SetDiagonal( B, e, (m>=n ? 1 : -1) ); if( print ) Print( B, "Bidiagonal" ); if( display ) Display( B, "Bidiagonal" ); if( print || display ) { Matrix<F> Q, P; Identity( Q, m, m ); Identity( P, n, n ); bidiag::ApplyQ( LEFT, NORMAL, A, phaseQ, Q ); bidiag::ApplyP( RIGHT, NORMAL, A, phaseP, P ); if( print ) { Print( Q, "Q" ); Print( P, "P" ); } if( display ) { Display( Q, "Q" ); Display( P, "P" ); } } // Reverse the accumulated Householder transforms bidiag::ApplyQ( LEFT, ADJOINT, A, phaseQ, AOrig ); bidiag::ApplyP( RIGHT, NORMAL, A, phaseP, AOrig ); if( print ) Print( AOrig, "Manual bidiagonal" ); if( display ) Display( AOrig, "Manual bidiagonal" ); // Compare the appropriate portion of AOrig and B if( m >= n ) { MakeTrapezoidal( UPPER, AOrig ); MakeTrapezoidal( LOWER, AOrig, 1 ); } else { MakeTrapezoidal( LOWER, AOrig ); MakeTrapezoidal( UPPER, AOrig, -1 ); } B -= AOrig; if( print ) Print( B, "Error in rotated bidiagonal" ); if( display ) Display( B, "Error in rotated bidiagonal" ); const Real infNormError = InfinityNorm( B ); const Real relError = infNormError / (Max(m,n)*oneNormAOrig*eps); Output("||B - Q^H A P||_oo / (max(m,n) || A ||_1 eps) = ",relError); PopIndent(); // TODO: Use a more refined failure condition if( relError > Real(1) ) LogicError("Relative error was unacceptably large"); }
void EN ( const Matrix<Real>& A, const Matrix<Real>& b, Real lambda1, Real lambda2, Matrix<Real>& x, const qp::affine::Ctrl<Real>& ctrl ) { EL_DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); const Range<Int> uInd(0,n), vInd(n,2*n), rInd(2*n,2*n+m); Matrix<Real> Q, c, AHat, G, h; // Q := | 2*lambda_2 0 0 | // | 0 2*lambda_2 0 | // | 0 0 2 | // ================================ Zeros( Q, 2*n+m, 2*n+m ); auto QTL = Q( IR(0,2*n), IR(0,2*n) ); FillDiagonal( QTL, 2*lambda2 ); auto Qrr = Q( rInd, rInd ); FillDiagonal( Qrr, Real(1) ); // c := lambda_1*[1;1;0] // ===================== Zeros( c, 2*n+m, 1 ); auto cuv = c( IR(0,2*n), ALL ); Fill( cuv, lambda1 ); // \hat A := [A, -A, I] // ==================== Zeros( AHat, m, 2*n+m ); auto AHatu = AHat( ALL, uInd ); auto AHatv = AHat( ALL, vInd ); auto AHatr = AHat( ALL, rInd ); AHatu = A; AHatv -= A; FillDiagonal( AHatr, Real(1) ); // G := | -I 0 0 | // | 0 -I 0 | // ================ Zeros( G, 2*n, 2*n+m ); FillDiagonal( G, Real(-1) ); // h := 0 // ====== Zeros( h, 2*n, 1 ); // Solve the affine QP // =================== Matrix<Real> xHat, y, z, s; QP( Q, AHat, G, b, c, h, xHat, y, z, s, ctrl ); // x := u - v // ========== x = xHat( uInd, ALL ); x -= xHat( vInd, ALL ); }
inline void Var3( Orientation orientation, Matrix<F>& A, Matrix<F>& d ) { #ifndef RELEASE PushCallStack("ldl::Var3"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) ) throw std::logic_error ("d must be a column vector the same height as A"); if( orientation == NORMAL ) throw std::logic_error("Can only perform LDL^T or LDL^H"); #endif const int n = A.Height(); if( !d.Viewing() ) d.ResizeTo( n, 1 ); Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> dT, d0, dB, d1, d2; Matrix<F> S21; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( d, dT, dB, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( dT, d0, /**/ /**/ d1, dB, d2 ); //--------------------------------------------------------------------// ldl::Var3Unb( orientation, A11, d1 ); Trsm( RIGHT, LOWER, orientation, UNIT, F(1), A11, A21 ); S21 = A21; DiagonalSolve( RIGHT, NORMAL, d1, A21 ); internal::TrrkNT( LOWER, orientation, F(-1), S21, A21, F(1), A22 ); //--------------------------------------------------------------------// SlidePartitionDown ( dT, d0, d1, /**/ /**/ dB, d2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
HermitianFrobeniusNorm( UpperOrLower uplo, const Matrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("HermitianFrobeniusNorm"); #endif if( A.Height() != A.Width() ) LogicError("Hermitian matrices must be square."); typedef BASE(F) R; R scale = 0; R scaledSquare = 1; const Int height = A.Height(); const Int width = A.Width(); if( uplo == UPPER ) { for( Int j=0; j<width; ++j ) { for( Int i=0; i<j; ++i ) { const R alphaAbs = Abs(A.Get(i,j)); if( alphaAbs != 0 ) { if( alphaAbs <= scale ) { const R relScale = alphaAbs/scale; scaledSquare += 2*relScale*relScale; } else { const R relScale = scale/alphaAbs; scaledSquare = scaledSquare*relScale*relScale + 2; scale = alphaAbs; } } } const R alphaAbs = Abs(A.Get(j,j)); if( alphaAbs != 0 ) { if( alphaAbs <= scale ) { const R relScale = alphaAbs/scale; scaledSquare += relScale*relScale; } else { const R relScale = scale/alphaAbs; scaledSquare = scaledSquare*relScale*relScale + 1; scale = alphaAbs; } } } } else { for( Int j=0; j<width; ++j ) { for( Int i=j+1; i<height; ++i ) { const R alphaAbs = Abs(A.Get(i,j)); if( alphaAbs != 0 ) { if( alphaAbs <= scale ) { const R relScale = alphaAbs/scale; scaledSquare += 2*relScale*relScale; } else { const R relScale = scale/alphaAbs; scaledSquare = scaledSquare*relScale*relScale + 2; scale = alphaAbs; } } } const R alphaAbs = Abs(A.Get(j,j)); if( alphaAbs != 0 ) { if( alphaAbs <= scale ) { const R relScale = alphaAbs/scale; scaledSquare += relScale*relScale; } else { const R relScale = scale/alphaAbs; scaledSquare = scaledSquare*relScale*relScale + 1; scale = alphaAbs; } } } } return scale*Sqrt(scaledSquare); }
inline void Var3Unb( Orientation orientation, Matrix<F>& A, Matrix<F>& d ) { #ifndef RELEASE PushCallStack("ldl::Var3Unb"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) ) throw std::logic_error ("d must be a column vector the same height as A"); if( orientation == NORMAL ) throw std::logic_error("Can only perform LDL^T or LDL^H"); #endif const int n = A.Height(); if( !d.Viewing() ) d.ResizeTo( n, 1 ); F* ABuffer = A.Buffer(); F* dBuffer = d.Buffer(); const int ldim = A.LDim(); for( int j=0; j<n; ++j ) { const int a21Height = n - (j+1); // Extract and store the diagonal of D const F alpha11 = ABuffer[j+j*ldim]; if( alpha11 == F(0) ) throw SingularMatrixException(); dBuffer[j] = alpha11; F* RESTRICT a21 = &ABuffer[(j+1)+j*ldim]; if( orientation == ADJOINT ) { // A22 := A22 - a21 (a21 / alpha11)^H for( int k=0; k<a21Height; ++k ) { const F beta = Conj(a21[k]/alpha11); F* RESTRICT A22Col = &ABuffer[(j+1)+(j+1+k)*ldim]; for( int i=k; i<a21Height; ++i ) A22Col[i] -= a21[i]*beta; } } else { // A22 := A22 - a21 (a21 / alpha11)^T for( int k=0; k<a21Height; ++k ) { const F beta = a21[k]/alpha11; F* RESTRICT A22Col = &ABuffer[(j+1)+(j+1+k)*ldim]; for( int i=k; i<a21Height; ++i ) A22Col[i] -= a21[i]*beta; } } // a21 := a21 / alpha11 for( int i=0; i<a21Height; ++i ) a21[i] /= alpha11; } #ifndef RELEASE PopCallStack(); #endif }
int QDWH ( Matrix<F>& A, typename Base<F>::type lowerBound, typename Base<F>::type upperBound ) { #ifndef RELEASE PushCallStack("QDWH"); #endif typedef typename Base<F>::type R; const int height = A.Height(); const int width = A.Width(); const R oneHalf = R(1)/R(2); const R oneThird = R(1)/R(3); if( height < width ) throw std::logic_error("Height cannot be less than width"); const R epsilon = lapack::MachineEpsilon<R>(); const R tol = 5*epsilon; const R cubeRootTol = Pow(tol,oneThird); // Form the first iterate Scale( 1/upperBound, A ); int numIts=0; R frobNormADiff; Matrix<F> ALast; Matrix<F> Q( height+width, width ); Matrix<F> QT, QB; PartitionDown( Q, QT, QB, height ); Matrix<F> C; Matrix<F> ATemp; do { ++numIts; ALast = A; R L2; Complex<R> dd, sqd; if( Abs(1-lowerBound) < tol ) { L2 = 1; dd = 0; sqd = 1; } else { L2 = lowerBound*lowerBound; dd = Pow( 4*(1-L2)/(L2*L2), oneThird ); sqd = Sqrt( 1+dd ); } const Complex<R> arg = 8 - 4*dd + 8*(2-L2)/(L2*sqd); const R a = (sqd + Sqrt( arg )/2).real; const R b = (a-1)*(a-1)/4; const R c = a+b-1; const Complex<R> alpha = a-b/c; const Complex<R> beta = b/c; lowerBound = lowerBound*(a+b*L2)/(1+c*L2); if( c > 100 ) { // // The standard QR-based algorithm // QT = A; Scale( Sqrt(c), QT ); MakeIdentity( QB ); ExplicitQR( Q ); Gemm( NORMAL, ADJOINT, alpha/Sqrt(c), QT, QB, beta, A ); } else { // // Use faster Cholesky-based algorithm since A is well-conditioned // Identity( width, width, C ); Herk( LOWER, ADJOINT, F(c), A, F(1), C ); Cholesky( LOWER, C ); ATemp = A; Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp ); Scale( beta, A ); Axpy( alpha, ATemp, A ); } Axpy( F(-1), A, ALast ); frobNormADiff = Norm( ALast, FROBENIUS_NORM ); } while( frobNormADiff > cubeRootTol || Abs(1-lowerBound) > tol ); #ifndef RELEASE PopCallStack(); #endif return numIts; }
inline void RowEchelon( Matrix<F>& A, Matrix<F>& B ) { #ifndef RELEASE CallStackEntry entry("RowEchelon"); if( A.Height() != B.Height() ) LogicError("A and B must be the same height"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, APan, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> BT, B0, BB, B1, B2; Matrix<Int> p1; // Pivot composition std::vector<Int> image, preimage; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( B, BT, BB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); View2x1 ( APan, A12, A22 ); //--------------------------------------------------------------------// lu::Panel( APan, p1, A00.Height() ); ComposePivots( p1, A00.Height(), image, preimage ); ApplyRowPivots( BB, image, preimage ); Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, A12 ); Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, B1 ); Gemm( NORMAL, NORMAL, F(-1), A21, A12, F(1), A22 ); Gemm( NORMAL, NORMAL, F(-1), A21, B1, F(1), B2 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } }
inline void TwoSidedTrsmUVar1( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmUVar1"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; // Temporary products Matrix<F> Y01; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------// // Y01 := A00 U01 Zeros( A01.Height(), A01.Width(), Y01 ); Hemm( LEFT, UPPER, F(1), A00, U01, F(0), Y01 ); // A01 := inv(U00)' A01 Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U00, A01 ); // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A11 := A11 - (U01' A01 + A01' U01) Her2k( UPPER, ADJOINT, F(-1), U01, A01, F(1), A11 ); // A11 := inv(U11)' A11 inv(U11) TwoSidedTrsmUUnb( diag, A11, U11 ); // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A01 := A01 inv(U11) Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U11, A01 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HouseholderSolve ( Orientation orientation, Matrix<Complex<R> >& A, const Matrix<Complex<R> >& B, Matrix<Complex<R> >& X ) { #ifndef RELEASE PushCallStack("HouseholderSolve"); if( orientation == TRANSPOSE ) throw std::logic_error("Invalid orientation"); #endif typedef Complex<R> C; // TODO: Add scaling const int m = A.Height(); const int n = A.Width(); Matrix<C> t; if( orientation == NORMAL ) { if( m != B.Height() ) throw std::logic_error("A and B do not conform"); if( m >= n ) { // Overwrite A with its packed QR factorization (and store the // corresponding Householder scalars in t) QR( A, t ); // Copy B into X X = B; // Apply Q' to X ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, FORWARD, CONJUGATED, 0, A, t, X ); // Shrink X to its new height X.ResizeTo( n, X.Width() ); // Solve against R (checking for singularities) Matrix<C> AT; LockedView( AT, A, 0, 0, n, n ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, C(1), AT, X, true ); } else { // Overwrite A with its packed LQ factorization (and store the // corresponding Householder scalars in it) LQ( A, t ); // Copy B into X X.ResizeTo( n, B.Width() ); Matrix<C> XT, XB; PartitionDown( X, XT, XB, m ); XT = B; Zero( XB ); // Solve against L (checking for singularities) Matrix<C> AL; LockedView( AL, A, 0, 0, m, m ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, C(1), AL, XT, true ); // Apply Q' to X ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, BACKWARD, CONJUGATED, 0, A, t, X ); } } else // orientation == ADJOINT { if( n != B.Height() ) throw std::logic_error("A and B do not conform"); if( m >= n ) { // Overwrite A with its packed QR factorization (and store the // corresponding Householder scalars in t) QR( A, t ); // Copy B into X X.ResizeTo( m, B.Width() ); Matrix<C> XT, XB; PartitionDown( X, XT, XB, n ); XT = B; Zero( XB ); // Solve against R' (checking for singularities) Matrix<C> AT; LockedView( AT, A, 0, 0, n, n ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, C(1), AT, XT, true ); // Apply Q to X ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, 0, A, t, X ); } else { // Overwrite A with its packed LQ factorization (and store the // corresponding Householder scalars in t) LQ( A, t ); // Copy B into X X = B; // Apply Q to X ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, FORWARD, UNCONJUGATED, 0, A, t, X ); // Shrink X to its new height X.ResizeTo( m, X.Width() ); // Solve against L' (check for singularities) Matrix<C> AL; LockedView( AL, A, 0, 0, m, m ); Trsm( LEFT, LOWER, ADJOINT, NON_UNIT, C(1), AL, X, true ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void MakeTrapezoidal ( LeftOrRight side, UpperOrLower uplo, int offset, Matrix<T>& A ) { #ifndef RELEASE PushCallStack("MakeTrapezoidal"); #endif const int height = A.Height(); const int width = A.Width(); const int ldim = A.LDim(); T* buffer = A.Buffer(); if( uplo == LOWER ) { if( side == LEFT ) { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=std::max(0,offset+1); j<width; ++j ) { const int lastZeroRow = j-offset-1; const int numZeroRows = std::min( lastZeroRow+1, height ); MemZero( &buffer[j*ldim], numZeroRows ); } } else { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=std::max(0,offset-height+width+1); j<width; ++j ) { const int lastZeroRow = j-offset+height-width-1; const int numZeroRows = std::min( lastZeroRow+1, height ); MemZero( &buffer[j*ldim], numZeroRows ); } } } else { if( side == LEFT ) { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=0; j<width; ++j ) { const int firstZeroRow = std::max(j-offset+1,0); if( firstZeroRow < height ) MemZero ( &buffer[firstZeroRow+j*ldim], height-firstZeroRow ); } } else { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=0; j<width; ++j ) { const int firstZeroRow = std::max(j-offset+height-width+1,0); if( firstZeroRow < height ) MemZero ( &buffer[firstZeroRow+j*ldim], height-firstZeroRow ); } } } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar5( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar5"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; // Temporary products Matrix<F> Y12; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------// // A11 := inv(U11)' A11 inv(U11) TwoSidedTrsmUUnb( diag, A11, U11 ); // Y12 := A11 U12 Zeros( Y12, A12.Height(), A12.Width() ); Hemm( LEFT, UPPER, F(1), A11, U12, F(0), Y12 ); // A12 := inv(U11)' A12 Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U11, A12 ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12, A12 ); // A22 := A22 - (A12' U12 + U12' A12) Her2k( UPPER, ADJOINT, F(-1), A12, U12, F(1), A22 ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12, A12 ); // A12 := A12 inv(U22) Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U22, A12 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
inline void ApplyPackedReflectorsRLHF ( int offset, const Matrix<R>& H, Matrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsRLHF"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); #endif Matrix<R> HTL, HTR, H00, H01, H02, HPan, HPanCopy, HBL, HBR, H10, H11, H12, H20, H21, H22; Matrix<R> ALeft; Matrix<R> SInv, Z; LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; HPan.LockedView( H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); ALeft.View( A, 0, 0, A.Height(), HPanWidth ); Zeros( ALeft.Height(), HPan.Height(), Z ); Zeros( HPan.Height(), HPan.Height(), SInv ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); Syrk( UPPER, NORMAL, R(1), HPanCopy, R(0), SInv ); HalveMainDiagonal( SInv ); Gemm( NORMAL, TRANSPOSE, R(1), ALeft, HPanCopy, R(0), Z ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, R(1), SInv, Z ); Gemm( NORMAL, NORMAL, R(-1), Z, HPanCopy, R(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LQ( Matrix<Complex<Real> >& A, Matrix<Complex<Real> >& t ) { #ifndef RELEASE PushCallStack("LQ"); #endif typedef Complex<Real> C; t.ResizeTo( std::min(A.Height(),A.Width()), 1 ); // Matrix views Matrix<C> ATL, ATR, A00, A01, A02, ATopPan, ABottomPan, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<C> tT, t0, tB, t1, t2; PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); View1x2( ATopPan, A11, A12 ); View1x2( ABottomPan, A21, A22 ); //--------------------------------------------------------------------// internal::PanelLQ( ATopPan, t1 ); ApplyPackedReflectors ( RIGHT, UPPER, HORIZONTAL, FORWARD, CONJUGATED, 0, ATopPan, t1, ABottomPan ); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HermitianTridiagU ( Matrix<Complex<R> >& A, Matrix<Complex<R> >& t ) { #ifndef RELEASE PushCallStack("HermitianTridiagU"); #endif const int tHeight = std::max(A.Height()-1,0); #ifndef RELEASE if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( t.Viewing() && (t.Height() != tHeight || t.Width() != 1) ) throw std::logic_error("t is of the wrong size"); #endif typedef Complex<R> C; if( !t.Viewing() ) t.ResizeTo( tHeight, 1 ); // Matrix views Matrix<C> ATL, ATR, A00, a01, A02, a01T, ABL, ABR, a10, alpha11, a12, alpha01B, A20, a21, A22; // Temporary matrices Matrix<C> w01; PushBlocksizeStack( 1 ); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height()+1 < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); PartitionUp ( a01, a01T, alpha01B, 1 ); w01.ResizeTo( a01.Height(), 1 ); //--------------------------------------------------------------------// const C tau = Reflector( alpha01B, a01T ); const R epsilon1 = alpha01B.GetRealPart(0,0); t.Set(t.Height()-1-A22.Height(),0,tau); alpha01B.Set(0,0,C(1)); Hemv( UPPER, tau, A00, a01, C(0), w01 ); const C alpha = -tau*Dot( w01, a01 )/C(2); Axpy( alpha, a01, w01 ); Her2( UPPER, C(-1), a01, w01, A00 ); alpha01B.Set(0,0,epsilon1); //--------------------------------------------------------------------// SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmLVar2( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmLVar2"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( L.Height() != L.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != L.Height() ) LogicError("A and L must be the same size"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; // Temporary products Matrix<F> X11; Matrix<F> Y10; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------// // Y10 := L10 A00 Zeros( Y10, L10.Height(), A00.Width() ); Hemm( RIGHT, LOWER, F(1), A00, L10, F(0), Y10 ); // A10 := A10 - 1/2 Y10 Axpy( F(-1)/F(2), Y10, A10 ); // A11 := A11 - (A10 L10' + L10 A10') Her2k( LOWER, NORMAL, F(-1), A10, L10, F(1), A11 ); // A11 := inv(L11) A11 inv(L11)' TwoSidedTrsmLUnb( diag, A11, L11 ); // A21 := A21 - A20 L10' Gemm( NORMAL, ADJOINT, F(-1), A20, L10, F(1), A21 ); // A21 := A21 inv(L11)' Trsm( RIGHT, LOWER, ADJOINT, diag, F(1), L11, A21 ); // A10 := A10 - 1/2 Y10 Axpy( F(-1)/F(2), Y10, A10 ); // A10 := inv(L11) A10 Trsm( LEFT, LOWER, NORMAL, diag, F(1), L11, A10 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void ADMM ( const Matrix<F>& M, Matrix<F>& L, Matrix<F>& S, const RPCACtrl<Base<F>>& ctrl ) { typedef Base<F> Real; const Int m = M.Height(); const Int n = M.Width(); // If tau is not specified, then set it to 1/sqrt(max(m,n)) const Base<F> tau = ( ctrl.tau <= Real(0) ? Real(1)/sqrt(Real(Max(m,n))) : ctrl.tau ); if( ctrl.beta <= Real(0) ) LogicError("beta cannot be non-positive"); if( ctrl.tol <= Real(0) ) LogicError("tol cannot be non-positive"); const Base<F> beta = ctrl.beta; const Base<F> tol = ctrl.tol; const double startTime = mpi::Time(); Matrix<F> E, Y; Zeros( Y, m, n ); const Real frobM = FrobeniusNorm( M ); const Real maxM = MaxNorm( M ); if( ctrl.progress ) cout << "|| M ||_F = " << frobM << "\n" << "|| M ||_max = " << maxM << endl; Zeros( L, m, n ); Zeros( S, m, n ); Int numIts = 0; while( true ) { ++numIts; // ST_{tau/beta}(M - L + Y/beta) S = M; S -= L; Axpy( F(1)/beta, Y, S ); SoftThreshold( S, tau/beta ); const Int numNonzeros = ZeroNorm( S ); // SVT_{1/beta}(M - S + Y/beta) L = M; L -= S; Axpy( F(1)/beta, Y, L ); Int rank; if( ctrl.usePivQR ) rank = SVT( L, Real(1)/beta, ctrl.numPivSteps ); else rank = SVT( L, Real(1)/beta ); // E := M - (L + S) E = M; E -= L; E -= S; const Real frobE = FrobeniusNorm( E ); if( frobE/frobM <= tol ) { if( ctrl.progress ) cout << "Converged after " << numIts << " iterations " << " with rank=" << rank << ", numNonzeros=" << numNonzeros << " and " << "|| E ||_F / || M ||_F = " << frobE/frobM << ", and " << mpi::Time()-startTime << " total secs" << endl; break; } else if( numIts >= ctrl.maxIts ) { if( ctrl.progress ) cout << "Aborting after " << numIts << " iterations and " << mpi::Time()-startTime << " total secs" << endl; break; } else { if( ctrl.progress ) cout << numIts << ": || E ||_F / || M ||_F = " << frobE/frobM << ", rank=" << rank << ", numNonzeros=" << numNonzeros << ", " << mpi::Time()-startTime << " total secs" << endl; } // Y := Y + beta E Axpy( beta, E, Y ); } }
SVDInfo LAPACKHelper ( Matrix<F>& A, Matrix<F>& U, Matrix<Base<F>>& s, Matrix<F>& V, const SVDCtrl<Base<F>>& ctrl ) { DEBUG_CSE typedef Base<F> Real; if( !ctrl.overwrite ) LogicError("LAPACKHelper assumes ctrl.overwrite == true"); auto approach = ctrl.bidiagSVDCtrl.approach; if( approach != THIN_SVD && approach != FULL_SVD && approach != COMPACT_SVD ) LogicError("LAPACKHelper assumes THIN_SVD, FULL_SVD, or COMPACT_SVD"); SVDInfo info; const Int m = A.Height(); const Int n = A.Width(); const Int k = Min(m,n); const bool thin = ( approach == THIN_SVD ); const bool compact = ( approach == COMPACT_SVD ); const bool avoidU = !ctrl.bidiagSVDCtrl.wantU; const bool avoidV = !ctrl.bidiagSVDCtrl.wantV; s.Resize( k, 1 ); Matrix<F> VAdj; if( thin || compact ) { U.Resize( m, k ); VAdj.Resize( k, n ); } else { U.Resize( m, m ); VAdj.Resize( n, n ); } lapack::DivideAndConquerSVD ( m, n, A.Buffer(), A.LDim(), s.Buffer(), U.Buffer(), U.LDim(), VAdj.Buffer(), VAdj.LDim(), (thin||compact) ); if( compact ) { const Real twoNorm = ( k==0 ? Real(0) : s(0) ); const Real thresh = bidiag_svd::APosterioriThreshold ( m, n, twoNorm, ctrl.bidiagSVDCtrl ); Int rank = k; for( Int j=0; j<k; ++j ) { if( s(j) <= thresh ) { rank = j; break; } } s.Resize( rank, 1 ); if( !avoidU ) U.Resize( m, rank ); if( !avoidV ) VAdj.Resize( rank, n ); } if( !avoidV ) Adjoint( VAdj, V ); return info; }
inline void LU( Matrix<F>& A, Matrix<int>& p ) { #ifndef RELEASE PushCallStack("LU"); if( p.Viewing() && (std::min(A.Height(),A.Width()) != p.Height() || p.Width() != 1) ) throw std::logic_error ("p must be a vector of the same height as the min dimension of A."); #endif if( !p.Viewing() ) p.ResizeTo( std::min(A.Height(),A.Width()), 1 ); // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABRL, ABRR, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<int> pT, p0, pB, p1, p2; // Pivot composition std::vector<int> image, preimage; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( p, pT, pB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( pT, p0, /**/ /**/ p1, pB, p2 ); PartitionRight( ABR, ABRL, ABRR, A11.Width() ); const int pivotOffset = A01.Height(); //--------------------------------------------------------------------// internal::PanelLU( ABRL, p1, pivotOffset ); internal::ComposePanelPivots( p1, pivotOffset, image, preimage ); ApplyRowPivots( ABL, image, preimage ); ApplyRowPivots( ABRR, image, preimage ); Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, A12 ); Gemm( NORMAL, NORMAL, F(-1), A21, A12, F(1), A22 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDown ( pT, p0, p1, /**/ /**/ pB, p2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmLVar2( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmLVar2"); if( A.Height() != A.Width() ) throw std::logic_error( "A must be square." ); if( L.Height() != L.Width() ) throw std::logic_error( "Triangular matrices must be square." ); if( A.Height() != L.Height() ) throw std::logic_error( "A and L must be the same size." ); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; // Temporary products Matrix<F> Y21; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------// // A10 := L11' A10 Trmm( LEFT, LOWER, ADJOINT, diag, F(1), L11, A10 ); // A10 := A10 + L21' A20 Gemm( ADJOINT, NORMAL, F(1), L21, A20, F(1), A10 ); // Y21 := A22 L21 Zeros( A21.Height(), A21.Width(), Y21 ); Hemm( LEFT, LOWER, F(1), A22, L21, F(0), Y21 ); // A21 := A21 L11 Trmm( RIGHT, LOWER, NORMAL, diag, F(1), L11, A21 ); // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); // A11 := L11' A11 L11 TwoSidedTrmmLUnb( diag, A11, L11 ); // A11 := A11 + (A21' L21 + L21' A21) Her2k( LOWER, ADJOINT, F(1), A21, L21, F(1), A11 ); // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
Int ADMM ( const Matrix<Real>& A, const Matrix<Real>& b, const Matrix<Real>& c, Matrix<Real>& z, const ADMMCtrl<Real>& ctrl ) { EL_DEBUG_CSE // Cache a custom partially-pivoted LU factorization of // | rho*I A^H | = | B11 B12 | // | A 0 | | B21 B22 | // by (justifiably) avoiding pivoting in the first n steps of // the factorization, so that // [I,rho*I] = lu(rho*I). // The factorization would then proceed with // B21 := B21 U11^{-1} = A (rho*I)^{-1} = A/rho // B12 := L11^{-1} B12 = I A^H = A^H. // The Schur complement would then be // B22 := B22 - B21 B12 = 0 - (A*A^H)/rho. // We then factor said matrix with LU with partial pivoting and // swap the necessary rows of B21 in order to implicitly commute // the row pivots with the Gauss transforms in the manner standard // for GEPP. Unless A A' is singular, pivoting should not be needed, // as Cholesky factorization of the negative matrix should be valid. // // The result is the factorization // | I 0 | | rho*I A^H | = | I 0 | | rho*I U12 |, // | 0 P22 | | A 0 | | L21 L22 | | 0 U22 | // where [L22,U22] are stored within B22. Matrix<Real> U12, L21, B22, bPiv; Adjoint( A, U12 ); L21 = A; L21 *= 1/ctrl.rho; Herk( LOWER, NORMAL, -1/ctrl.rho, A, B22 ); MakeHermitian( LOWER, B22 ); // TODO: Replace with sparse-direct Cholesky version? Permutation P2; LU( B22, P2 ); P2.PermuteRows( L21 ); bPiv = b; P2.PermuteRows( bPiv ); // Possibly form the inverse of L22 U22 Matrix<Real> X22; if( ctrl.inv ) { X22 = B22; MakeTrapezoidal( LOWER, X22 ); FillDiagonal( X22, Real(1) ); TriangularInverse( LOWER, UNIT, X22 ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, Real(1), B22, X22 ); } Int numIter=0; const Int m = A.Height(); const Int n = A.Width(); Matrix<Real> g, xTmp, y, t; Zeros( g, m+n, 1 ); PartitionDown( g, xTmp, y, n ); Matrix<Real> x, u, zOld, xHat; Zeros( z, n, 1 ); Zeros( u, n, 1 ); Zeros( t, n, 1 ); while( numIter < ctrl.maxIter ) { zOld = z; // Find x from // | rho*I A^H | | x | = | rho*(z-u)-c | // | A 0 | | y | | b | // via our cached custom factorization: // // |x| = inv(U) inv(L) P' |rho*(z-u)-c| // |y| |b | // = |rho*I U12|^{-1} |I 0 | |I 0 | |rho*(z-u)-c| // = |0 U22| |L21 L22| |0 P22'| |b | // = " " |rho*(z-u)-c| // | P22' b | xTmp = z; xTmp -= u; xTmp *= ctrl.rho; xTmp -= c; y = bPiv; Gemv( NORMAL, Real(-1), L21, xTmp, Real(1), y ); if( ctrl.inv ) { Gemv( NORMAL, Real(1), X22, y, t ); y = t; } else { Trsv( LOWER, NORMAL, UNIT, B22, y ); Trsv( UPPER, NORMAL, NON_UNIT, B22, y ); } Gemv( NORMAL, Real(-1), U12, y, Real(1), xTmp ); xTmp *= 1/ctrl.rho; // xHat := alpha*x + (1-alpha)*zOld xHat = xTmp; xHat *= ctrl.alpha; Axpy( 1-ctrl.alpha, zOld, xHat ); // z := pos(xHat+u) z = xHat; z += u; LowerClip( z, Real(0) ); // u := u + (xHat-z) u += xHat; u -= z; const Real objective = Dot( c, xTmp ); // rNorm := || x - z ||_2 t = xTmp; t -= z; const Real rNorm = FrobeniusNorm( t ); // sNorm := |rho| || z - zOld ||_2 t = z; t -= zOld; const Real sNorm = Abs(ctrl.rho)*FrobeniusNorm( t ); const Real epsPri = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Max(FrobeniusNorm(xTmp),FrobeniusNorm(z)); const Real epsDual = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Abs(ctrl.rho)*FrobeniusNorm(u); if( ctrl.print ) { t = xTmp; LowerClip( t, Real(0) ); t -= xTmp; const Real clipDist = FrobeniusNorm( t ); cout << numIter << ": " << "||x-z||_2=" << rNorm << ", " << "epsPri=" << epsPri << ", " << "|rho| ||z-zOld||_2=" << sNorm << ", " << "epsDual=" << epsDual << ", " << "||x-Pos(x)||_2=" << clipDist << ", " << "c'x=" << objective << endl; } if( rNorm < epsPri && sNorm < epsDual ) break; ++numIter; } if( ctrl.maxIter == numIter ) cout << "ADMM failed to converge" << endl; x = xTmp; return numIter; }
void LLTUnb ( bool conjugate, const Matrix<F>& L, const Matrix<F>& shifts, Matrix<F>& X ) { DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); const F* LBuf = L.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldl = L.LDim(); const Int ldx = X.LDim(); if( conjugate ) Conjugate( X ); Int k=m-1; while( k >= 0 ) { const bool in2x2 = ( k>0 && LBuf[(k-1)+k*ldl] != F(0) ); if( in2x2 ) { --k; // Solve the 2x2 linear systems via 2x2 LQ decompositions produced // by the Givens rotation // | L(k,k)-shift L(k,k+1) | | c -conj(s) | = | gamma11 0 | // | s c | // and by also forming the bottom two entries of the 2x2 resulting // lower-triangular matrix, say gamma21 and gamma22 // // Extract the constant part of the 2x2 diagonal block, D const F delta12 = LBuf[ k +(k+1)*ldl]; const F delta21 = LBuf[(k+1)+ k *ldl]; for( Int j=0; j<n; ++j ) { const F delta11 = LBuf[ k + k *ldl] - shifts.Get(j,0); const F delta22 = LBuf[(k+1)+(k+1)*ldl] - shifts.Get(j,0); // Decompose D = L Q Real c; F s; const F gamma11 = Givens( delta11, delta12, c, s ); const F gamma21 = c*delta21 + s*delta22; const F gamma22 = -Conj(s)*delta21 + c*delta22; F* xBuf = &XBuf[j*ldx]; // Solve against Q^T const F chi1 = xBuf[k ]; const F chi2 = xBuf[k+1]; xBuf[k ] = c*chi1 + s*chi2; xBuf[k+1] = -Conj(s)*chi1 + c*chi2; // Solve against R^T xBuf[k+1] /= gamma22; xBuf[k ] -= gamma21*xBuf[k+1]; xBuf[k ] /= gamma11; // Update x0 := x0 - L10^T x1 blas::Axpy( k, -xBuf[k ], &LBuf[k ], ldl, xBuf, 1 ); blas::Axpy( k, -xBuf[k+1], &LBuf[k+1], ldl, xBuf, 1 ); } } else { for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve the 1x1 linear system xBuf[k] /= LBuf[k+k*ldl] - shifts.Get(j,0); // Update x0 := x0 - l10^T chi_1 blas::Axpy( k, -xBuf[k], &LBuf[k], ldl, xBuf, 1 ); } } --k; } if( conjugate ) Conjugate( X ); }
inline void PanelLQ ( Matrix<Complex<Real> >& A, Matrix<Complex<Real> >& t ) { #ifndef RELEASE PushCallStack("internal::PanelLQ"); if( t.Height() != std::min(A.Height(),A.Width()) || t.Width() != 1 ) throw std::logic_error ("t must be a vector of height equal to the minimum dimension of A"); #endif typedef Complex<Real> C; Matrix<C> ATL, ATR, A00, a01, A02, aTopRow, ABottomPan, ABL, ABR, a10, alpha11, a12, A20, a21, A22; Matrix<C> tT, t0, tB, tau1, t2; Matrix<C> z, aTopRowConj; PushBlocksizeStack( 1 ); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2 ); aTopRow.View1x2( alpha11, a12 ); ABottomPan.View1x2( a21, A22 ); Zeros( ABottomPan.Height(), 1, z ); //--------------------------------------------------------------------// const C tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); const C alpha = alpha11.Get(0,0); alpha11.Set(0,0,1); Conjugate( aTopRow, aTopRowConj ); Gemv( NORMAL, C(1), ABottomPan, aTopRowConj, C(0), z ); Ger( -Conj(tau), z, aTopRowConj, ABottomPan ); alpha11.Set(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline typename Base<F>::type HermitianFrobeniusNorm( UpperOrLower uplo, const Matrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::HermitianFrobeniusNorm"); #endif typedef typename Base<F>::type R; if( A.Height() != A.Width() ) throw std::logic_error("Hermitian matrices must be square."); R scale = 0; R scaledSquare = 1; const int height = A.Height(); const int width = A.Width(); if( uplo == UPPER ) { for( int j=0; j<width; ++j ) { for( int i=0; i<j; ++i ) { const R alphaAbs = Abs(A.Get(i,j)); if( alphaAbs != 0 ) { if( alphaAbs <= scale ) { const R relScale = alphaAbs/scale; scaledSquare += 2*relScale*relScale; } else { const R relScale = scale/alphaAbs; scaledSquare = scaledSquare*relScale*relScale + 2; scale = alphaAbs; } } } const R alphaAbs = Abs(A.Get(j,j)); if( alphaAbs != 0 ) { if( alphaAbs <= scale ) { const R relScale = alphaAbs/scale; scaledSquare += relScale*relScale; } else { const R relScale = scale/alphaAbs; scaledSquare = scaledSquare*relScale*relScale + 1; scale = alphaAbs; } } } } else { for( int j=0; j<width; ++j ) { for( int i=j+1; i<height; ++i ) { const R alphaAbs = Abs(A.Get(i,j)); if( alphaAbs != 0 ) { if( alphaAbs <= scale ) { const R relScale = alphaAbs/scale; scaledSquare += 2*relScale*relScale; } else { const R relScale = scale/alphaAbs; scaledSquare = scaledSquare*relScale*relScale + 2; scale = alphaAbs; } } } const R alphaAbs = Abs(A.Get(j,j)); if( alphaAbs != 0 ) { if( alphaAbs <= scale ) { const R relScale = alphaAbs/scale; scaledSquare += relScale*relScale; } else { const R relScale = scale/alphaAbs; scaledSquare = scaledSquare*relScale*relScale + 1; scale = alphaAbs; } } } } const R norm = scale*Sqrt(scaledSquare); #ifndef RELEASE PopCallStack(); #endif return norm; }