Exemple #1
void LUMod
( Matrix<F>& A,
        Permutation& P,
  const Matrix<F>& u,
  const Matrix<F>& v,
  bool conjugate,
  Base<F> tau )
    typedef Base<F> Real;
    const Int m = A.Height();
    const Int n = A.Width();
    const Int minDim = Min(m,n);
    if( minDim != m )
        LogicError("It is assumed that height(A) <= width(A)");
    if( u.Height() != m || u.Width() != 1 )
        LogicError("u is expected to be a conforming column vector");
    if( v.Height() != n || v.Width() != 1 )
        LogicError("v is expected to be a conforming column vector");

    // w := inv(L) P u
    auto w( u );
    P.PermuteRows( w );
    Trsv( LOWER, NORMAL, UNIT, A, w );

    // Maintain an external vector for the temporary subdiagonal of U
    Matrix<F> uSub;
    Zeros( uSub, minDim-1, 1 );

    // Reduce w to a multiple of e0
    for( Int i=minDim-2; i>=0; --i )
        // Decide if we should pivot the i'th and i+1'th rows of w
        const F lambdaSub = A(i+1,i);
        const F ups_ii = A(i,i);
        const F omega_i = w(i);
        const F omega_ip1 = w(i+1);
        const Real rightTerm = Abs(lambdaSub*omega_i+omega_ip1);
        const bool pivot = ( Abs(omega_i) < tau*rightTerm );

        const Range<Int> indi( i, i+1 ),
                         indip1( i+1, i+2 ),
                         indB( i+2, m ),
                         indR( i+1, n );

        auto lBi   = A( indB,   indi   );
        auto lBip1 = A( indB,   indip1 );
        auto uiR   = A( indi,   indR   );
        auto uip1R = A( indip1, indR   );

        if( pivot )
            // P := P_i P
            P.Swap( i, i+1 );

            // Simultaneously perform
            //   U := P_i U and
            //   L := P_i L P_i^T
            // Then update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U,
            //     w := T_{i,L} P_i w,
            // where T_{i,L} is the Gauss transform which zeros (P_i w)_{i+1}.
            // More succinctly,
            //     gamma    := w(i) / w(i+1),
            //     w(i)     := w(i+1),
            //     w(i+1)   := 0,
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:).
            const F gamma = omega_i / omega_ip1;
            const F lambda_ii = F(1) + gamma*lambdaSub;
            A(i,  i) = gamma;
            A(i+1,i) = 0;

            auto lBiCopy = lBi;
            Swap( NORMAL, lBi, lBip1 );
            Axpy( gamma, lBiCopy, lBi );

            auto uip1RCopy = uip1R;
            RowSwap( A, i, i+1 );
            Axpy( -gamma, uip1RCopy, uip1R );

            // Force L back to *unit* lower-triangular form via the transform
            //     L := L T_{i,U}^{-1} D^{-1},
            // where D is diagonal and responsible for forcing L(i,i) and
            // L(i+1,i+1) back to 1. The effect on L is:
            //     eta       := L(i,i+1)/L(i,i),
            //     L(:,i+1)  -= eta L(:,i),
            //     delta_i   := L(i,i),
            //     delta_ip1 := L(i+1,i+1),
            //     L(:,i)   /= delta_i,
            //     L(:,i+1) /= delta_ip1,
            // while the effect on U is
            //     U(i,:)   += eta U(i+1,:)
            //     U(i,:)   *= delta_i,
            //     U(i+1,:) *= delta_{i+1},
            // and the effect on w is
            //     w(i) *= delta_i.
            const F eta = lambdaSub/lambda_ii;
            const F delta_i = lambda_ii;
            const F delta_ip1 = F(1) - eta*gamma;

            Axpy( -eta, lBi, lBip1 );
            A(i+1,i) = gamma/delta_i;
            lBi   *= F(1)/delta_i;
            lBip1 *= F(1)/delta_ip1;

            A(i,i) = eta*ups_ii*delta_i;
            Axpy( eta, uip1R, uiR );
            uiR   *= delta_i;
            uip1R *= delta_ip1;
            uSub(i) = ups_ii*delta_ip1;

            // Finally set w(i)
            w(i) = omega_ip1*delta_i;
            // Update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U,
            //     w := T_{i,L} w,
            // where T_{i,L} is the Gauss transform which zeros w_{i+1}.
            // More succinctly,
            //     gamma    := w(i+1) / w(i),
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:),
            //     w(i+1)   := 0.
            const F gamma = omega_ip1 / omega_i;
            A(i+1,i) += gamma;
            Axpy(  gamma, lBip1, lBi );
            Axpy( -gamma, uiR, uip1R );
            uSub(i) = -gamma*ups_ii;

    // Add the modified w v' into U
        auto a0 = A( IR(0), ALL );
        const F omega_0 = w(0);
        Matrix<F> vTrans;
        Transpose( v, vTrans, conjugate );
        Axpy( omega_0, vTrans, a0 );

    // Transform U from upper-Hessenberg to upper-triangular form
    for( Int i=0; i<minDim-1; ++i )
        // Decide if we should pivot the i'th and i+1'th rows U
        const F lambdaSub = A(i+1,i);
        const F ups_ii = A(i,i);
        const F ups_ip1i = uSub(i);
        const Real rightTerm = Abs(lambdaSub*ups_ii+ups_ip1i);
        const bool pivot = ( Abs(ups_ii) < tau*rightTerm );

        const Range<Int> indi( i, i+1 ),
                         indip1( i+1, i+2 ),
                         indB( i+2, m ),
                         indR( i+1, n );

        auto lBi   = A( indB,   indi   );
        auto lBip1 = A( indB,   indip1 );
        auto uiR   = A( indi,   indR   );
        auto uip1R = A( indip1, indR   );

        if( pivot )
            // P := P_i P
            P.Swap( i, i+1 );

            // Simultaneously perform
            //   U := P_i U and
            //   L := P_i L P_i^T
            // Then update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U,
            // where T_{i,L} is the Gauss transform which zeros U(i+1,i).
            // More succinctly,
            //     gamma    := U(i+1,i) / U(i,i),
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:).
            const F gamma = ups_ii / ups_ip1i;
            const F lambda_ii = F(1) + gamma*lambdaSub;
            A(i+1,i) = ups_ip1i;
            A(i,  i) = gamma;

            auto lBiCopy = lBi;
            Swap( NORMAL, lBi, lBip1 );
            Axpy( gamma, lBiCopy, lBi );

            auto uip1RCopy = uip1R;
            RowSwap( A, i, i+1 );
            Axpy( -gamma, uip1RCopy, uip1R );

            // Force L back to *unit* lower-triangular form via the transform
            //     L := L T_{i,U}^{-1} D^{-1},
            // where D is diagonal and responsible for forcing L(i,i) and
            // L(i+1,i+1) back to 1. The effect on L is:
            //     eta       := L(i,i+1)/L(i,i),
            //     L(:,i+1)  -= eta L(:,i),
            //     delta_i   := L(i,i),
            //     delta_ip1 := L(i+1,i+1),
            //     L(:,i)   /= delta_i,
            //     L(:,i+1) /= delta_ip1,
            // while the effect on U is
            //     U(i,:)   += eta U(i+1,:)
            //     U(i,:)   *= delta_i,
            //     U(i+1,:) *= delta_{i+1}.
            const F eta = lambdaSub/lambda_ii;
            const F delta_i = lambda_ii;
            const F delta_ip1 = F(1) - eta*gamma;

            Axpy( -eta, lBi, lBip1 );
            A(i+1,i) = gamma/delta_i;
            lBi   *= F(1)/delta_i;
            lBip1 *= F(1)/delta_ip1;

            A(i,i) = ups_ip1i*delta_i;
            Axpy( eta, uip1R, uiR );
            uiR   *= delta_i;
            uip1R *= delta_ip1;
            // Update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U,
            // where T_{i,L} is the Gauss transform which zeros U(i+1,i).
            // More succinctly,
            //     gamma    := U(i+1,i)/ U(i,i),
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:).
            const F gamma = ups_ip1i / ups_ii;
            A(i+1,i) += gamma;
            Axpy(  gamma, lBip1, lBi );
            Axpy( -gamma, uiR, uip1R );
Exemple #2
QDWHInfo QDWHInner( Matrix<F>& A, Base<F> sMinUpper, const QDWHCtrl& ctrl )
    typedef Base<F> Real;
    typedef Complex<Real> Cpx;
    const Int m = A.Height();
    const Int n = A.Width();
    const Real oneThird = Real(1)/Real(3);
    if( m < n )
        LogicError("Height cannot be less than width");

    QDWHInfo info;

    QRCtrl<Base<F>> qrCtrl;
    qrCtrl.colPiv = ctrl.colPiv;

    const Real eps = limits::Epsilon<Real>();
    const Real tol = 5*eps;
    const Real cubeRootTol = Pow(tol,oneThird);
    Real L = sMinUpper / Sqrt(Real(n));

    Real frobNormADiff;
    Matrix<F> ALast, ATemp, C;
    Matrix<F> Q( m+n, n );
    auto QT = Q( IR(0,m  ), ALL );
    auto QB = Q( IR(m,END), ALL );
    while( info.numIts < ctrl.maxIts )
        ALast = A;

        Real L2;
        Cpx dd, sqd;
        if( Abs(1-L) < tol )
            L2 = 1;
            dd = 0;
            sqd = 1;
            L2 = L*L;
            dd = Pow( 4*(1-L2)/(L2*L2), oneThird );
            sqd = Sqrt( Real(1)+dd );
        const Cpx arg = Real(8) - Real(4)*dd + Real(8)*(2-L2)/(L2*sqd);
        const Real a = (sqd + Sqrt(arg)/Real(2)).real();
        const Real b = (a-1)*(a-1)/4;
        const Real c = a+b-1;
        const Real alpha = a-b/c;
        const Real beta = b/c;

        L = L*(a+b*L2)/(1+c*L2);

        if( c > 100 )
            // The standard QR-based algorithm
            QT = A;
            QT *= Sqrt(c);
            MakeIdentity( QB );
            qr::ExplicitUnitary( Q, true, qrCtrl );
            Gemm( NORMAL, ADJOINT, F(alpha/Sqrt(c)), QT, QB, F(beta), A );
            // Use faster Cholesky-based algorithm since A is well-conditioned
            Identity( C, n, n );
            Herk( LOWER, ADJOINT, c, A, Real(1), C );
            Cholesky( LOWER, C );
            ATemp = A;
            Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp );
            Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp );
            A *= beta;
            Axpy( alpha, ATemp, A );

        ALast -= A;
        frobNormADiff = FrobeniusNorm( ALast );
        if( frobNormADiff <= cubeRootTol && Abs(1-L) <= tol )
    return info;
Exemple #3
void LLNUnb( const Matrix<F>& L, Matrix<F>& X, bool checkIfSingular )
    typedef Base<F> Real;
    const Int m = X.Height();
    const Int n = X.Width();

    const F* LBuf = L.LockedBuffer();
          F* XBuf = X.Buffer();
    const Int ldl = L.LDim();
    const Int ldx = X.LDim();

    Int k=0;
    while( k < m )
        const bool in2x2 = ( k+1<m && LBuf[k+(k+1)*ldl] != F(0) );
        if( in2x2 ) 
            // Solve the 2x2 linear systems via a 2x2 LQ decomposition produced
            // by the Givens rotation
            //    | L(k,k) L(k,k+1) | | c -conj(s) | = | gamma11 0 |
            //                        | s    c     |
            // and by also forming the bottom two entries of the 2x2 resulting
            // lower-triangular matrix, say gamma21 and gamma22
            // Extract the 2x2 diagonal block, D
            const F delta11 = LBuf[ k   + k   *ldl];
            const F delta12 = LBuf[ k   +(k+1)*ldl];
            const F delta21 = LBuf[(k+1)+ k   *ldl];
            const F delta22 = LBuf[(k+1)+(k+1)*ldl];
            // Decompose D = L Q
            Real c; F s;
            const F gamma11 = Givens( delta11, delta12, c, s );
            const F gamma21 =        c*delta21 + s*delta22;
            const F gamma22 = -Conj(s)*delta21 + c*delta22;
            if( checkIfSingular )
                // TODO: Check if sufficiently small instead
                if( gamma11 == F(0) || gamma22 == F(0) )
                    LogicError("Singular diagonal block detected");
            for( Int j=0; j<n; ++j )
                F* xBuf = &XBuf[j*ldx];

                // Solve against L
                xBuf[k  ] /= gamma11;
                xBuf[k+1] -= gamma21*xBuf[k];
                xBuf[k+1] /= gamma22;

                // Solve against Q
                const F chi1 = xBuf[k  ];
                const F chi2 = xBuf[k+1];
                xBuf[k  ] = c*chi1 - Conj(s)*chi2;
                xBuf[k+1] = s*chi1 +       c*chi2;

                // Update x2 := x2 - L21 x1
                ( m-(k+2), -xBuf[k  ], 
                  &LBuf[(k+2)+ k   *ldl], 1, &xBuf[k+2], 1 );
                ( m-(k+2), -xBuf[k+1], 
                  &LBuf[(k+2)+(k+1)*ldl], 1, &xBuf[k+2], 1 );

            k += 2;
            if( checkIfSingular )
                // TODO: Check if sufficiently small instead
                if( LBuf[k+k*ldl] == F(0) )
                    LogicError("Singular diagonal entry detected");
            for( Int j=0; j<n; ++j )
                F* xBuf = &XBuf[j*ldx];
                xBuf[k] /= LBuf[k+k*ldl];
                ( m-(k+1), -xBuf[k], &LBuf[(k+1)+k*ldl], 1, &xBuf[k+1], 1 );
            k += 1;
Exemple #4
inline void ALM
( const Matrix<F>& M,
        Matrix<F>& L,
        Matrix<F>& S, 
  const RPCACtrl<Base<F>>& ctrl )
    typedef Base<F> Real;
    const Int m = M.Height();
    const Int n = M.Width();

    // If tau is unspecified, set it to 1/sqrt(max(m,n))
    const Base<F> tau = 
      ( ctrl.tau <= Real(0) ? Real(1) / sqrt(Real(Max(m,n))) :
        ctrl.tau );
    if( ctrl.tol <= Real(0) )
        LogicError("tol cannot be non-positive");
    const Base<F> tol = ctrl.tol;

    const double startTime = mpi::Time();

    Matrix<F> Y( M );
    NormalizeEntries( Y );
    const Real twoNorm = TwoNorm( Y );
    const Real maxNorm = MaxNorm( Y );
    const Real infNorm = maxNorm / tau; 
    const Real dualNorm = Max( twoNorm, infNorm );
    Y *= F(1)/dualNorm;

    // If beta is unspecified, set it to 1 / 2 || sign(M) ||_2
    Base<F> beta = 
      ( ctrl.beta <= Real(0) ? Real(1) / (2*twoNorm) : ctrl.beta );

    const Real frobM = FrobeniusNorm( M );
    const Real maxM = MaxNorm( M );
    if( ctrl.progress )
        cout << "|| M ||_F = " << frobM << "\n"
             << "|| M ||_max = " << maxM << endl;

    Zeros( L, m, n );
    Zeros( S, m, n );

    Int numIts=0, numPrimalIts=0;
    Matrix<F> LLast, SLast, E;
    while( true )
        Int rank, numNonzeros;
        while( true )

            LLast = L;
            SLast = S;

            // ST_{tau/beta}(M - L + Y/beta)
            S = M;
            S -= L;
            Axpy( F(1)/beta, Y, S );
            SoftThreshold( S, tau/beta );
            numNonzeros = ZeroNorm( S );

            // SVT_{1/beta}(M - S + Y/beta)
            L = M;
            L -= S;
            Axpy( F(1)/beta, Y, L );
            if( ctrl.usePivQR )
                rank = SVT( L, Real(1)/beta, ctrl.numPivSteps );
                rank = SVT( L, Real(1)/beta );

            LLast -= L;
            SLast -= S;
            const Real frobLDiff = FrobeniusNorm( LLast );
            const Real frobSDiff = FrobeniusNorm( SLast );

            if( frobLDiff/frobM < tol && frobSDiff/frobM < tol )
                if( ctrl.progress )
                    cout << "Primal loop converged: " 
                         << mpi::Time()-startTime << " total secs"
                         << endl;
                if( ctrl.progress )
                    cout << "  " << numPrimalIts 
                         << ": \n"
                         << "   || Delta L ||_F / || M ||_F = " 
                         << frobLDiff/frobM << "\n"
                         << "   || Delta S ||_F / || M ||_F = "
                         << frobSDiff/frobM << "\n"
                         << "   rank=" << rank
                         << ", numNonzeros=" << numNonzeros 
                         << ", " << mpi::Time()-startTime << " total secs" 
                         << endl;

        // E := M - (L + S)
        E = M;    
        E -= L;
        E -= S;
        const Real frobE = FrobeniusNorm( E );

        if( frobE/frobM <= tol )            
            if( ctrl.progress )
                cout << "Converged after " << numIts << " iterations and "
                     << numPrimalIts << " primal iterations with rank=" 
                     << rank << ", numNonzeros=" << numNonzeros << " and "
                     << "|| E ||_F / || M ||_F = " << frobE/frobM
                     << ", " << mpi::Time()-startTime << " total secs"
                     << endl;
        else if( numIts >= ctrl.maxIts )
            if( ctrl.progress )
                cout << "Aborting after " << numIts << " iterations and "
                     << mpi::Time()-startTime << " total secs" 
                     << endl;
            if( ctrl.progress )
                cout << numPrimalIts << ": || E ||_F / || M ||_F = " 
                     << frobE/frobM << ", rank=" << rank 
                     << ", numNonzeros=" << numNonzeros << ", "
                     << mpi::Time()-startTime << " total secs" 
                     << endl;
        // Y := Y + beta E
        Axpy( beta, E, Y );
        beta *= ctrl.rho;
Exemple #5
inline void
Inverse( Matrix<F>& A )
#ifndef RELEASE
    if( A.Height() != A.Width() )
        throw std::logic_error("Cannot invert non-square matrices");
    Matrix<int> p;
    LU( A, p );
    TriangularInverse( UPPER, NON_UNIT, A );

    // Solve inv(A) L = inv(U) for inv(A)
    Matrix<F> ATL, ATR,
           ABL, ABR;
    Matrix<F> A00, A01, A02,
           A10, A11, A12,
           A20, A21, A22;
    Matrix<F> A1, A2;
    Matrix<F> L11,
    ( A, ATL, ATR,
      ABL, ABR, 0 );
    while( ABR.Height() < A.Height() )
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
          /**/       A10, A11, /**/ A12,
          /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        View( A1, A, 0, A00.Width(),             A.Height(), A01.Width() );
        View( A2, A, 0, A00.Width()+A01.Width(), A.Height(), A02.Width() );

        // Copy out L1
        L11 = A11;
        L21 = A21;

        // Zero the strictly lower triangular portion of A1
        MakeTrapezoidal( LEFT, UPPER, 0, A11 );
        Zero( A21 );

        // Perform the lazy update of A1
        Gemm( NORMAL, NORMAL, F(-1), A2, L21, F(1), A1 );

        // Solve against this diagonal block of L11
        Trsm( RIGHT, LOWER, NORMAL, UNIT, F(1), L11, A1 );

        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
          /*************/ /*******************/
          /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

    // inv(A) := inv(A) P
    ApplyInverseColumnPivots( A, p );
#ifndef RELEASE
Exemple #6
inline void
( Conjugation conjugation, int offset, 
  const Matrix<Complex<R> >& H,
  const Matrix<Complex<R> >& t,
        Matrix<Complex<R> >& A )
#ifndef RELEASE
    if( offset > 0 || offset < -H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Width() )
        throw std::logic_error
        ("Width of transforms must equal width of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag");
    typedef Complex<R> C;

        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<C> ALeft;
        tT,  t0,
        tB,  t1,

    Matrix<C> SInv, Z;

    ( H, HTL, HTR,
         HBL, HBR, 0 );
    ( t, tT,
         tB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        const int HPanWidth = H10.Width() + H11.Width();
        const int HPanOffset = 
            std::min( H11.Height(), std::max(-offset-H00.Height(),0) );
        const int HPanHeight = H11.Height()-HPanOffset;
        HPan.LockedView( H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth );

        ( tT,  t0,
         /**/ /**/
          tB,  t2, HPanHeight );

        ALeft.View( A, 0, 0, A.Height(), HPanWidth );

        Zeros( ALeft.Height(), HPan.Height(), Z );
        Zeros( HPan.Height(), HPan.Height(), SInv );
        HPanCopy = HPan;
        MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy );
        SetDiagonalToOne( RIGHT, offset, HPanCopy );

        Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv );
        FixDiagonal( conjugation, t1, SInv );

        Gemm( NORMAL, ADJOINT, C(1), ALeft, HPanCopy, C(0), Z );
        Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z );
        Gemm( NORMAL, NORMAL, C(-1), Z, HPanCopy, C(1), ALeft );

        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        ( tT,  t0,
         /**/ /**/
          tB,  t2 );
#ifndef RELEASE
int main( int argc, char* argv[] )
    Environment env( argc, argv );

        const string inputBasisFile =
          Input("--inputBasisFile","input basis file",string("SVPChallenge40.txt"));
        const bool trans = Input("--transpose","transpose input?",true);
        const string outputBasisFile = 
          Input("--outputBasisFile","output basis file",string("BKZ"));
        const string shortestVecFile = 
          ("--shortestVecFile","shortest vector file",string("shortest"));
        const Real delta = Input("--delta","delta for LLL",Real(0.9999));
        const Real eta =
          ("--eta","eta for LLL",
           Real(1)/Real(2) + Pow(limits::Epsilon<Real>(),Real(0.9)));
        const Int varInt = Input("--variant","0: weak LLL, 1: normal LLL, 2: deep insertion LLL, 3: deep reduction LLL",1);
        const Int blocksize = Input("--blocksize","BKZ blocksize",20);
        const bool variableBsize = Input("--variableBsize","variable blocksize?",false);
        const bool variableEnumType = Input("--variableEnumType","variable enum type?",false);
        const Int multiEnumWindow = Input("--multiEnumWindow","window for y-sparse enumeration",15);
        const Int phaseLength =
          Input("--phaseLength","YSPARSE_ENUM phase length",10);
        const Int progressLevel =
          Input("--progressLevel","YSPARSE_ENUM progress level",4);
        const bool presort = Input("--presort","presort columns?",false);
        const bool smallestFirst =
          Input("--smallestFirst","sort smallest first?",true);
        const bool recursiveLLL = Input("--recursiveLLL","recursive LLL?",true);
        const bool recursiveBKZ =
          Input("--recursiveBKZ","recursive BKZ?",false);
        const Int cutoff = Input("--cutoff","recursive cutoff",10);
        const bool earlyAbort = Input("--earlyAbort","early abort BKZ?",false);
        const Int numEnumsBeforeAbort =
          Input("--numEnumsBeforeAbort","num enums before early aborting",1000);
        const bool subBKZ =
          Input("--subBKZ","use BKZ w/ lower blocksize for subproblems?",true);
        const bool subEarlyAbort =
          Input("--subEarlyAbort","early abort subproblem?",false);
        const bool jumpstartBKZ =
          Input("--jumpstartBKZ","jumpstart BKZ?",false);
        const Int startColBKZ = Input("--startColBKZ","BKZ start column",0);
        const bool timeLLL = Input("--timeLLL","time LLL?",false);
        const bool timeBKZ = Input("--timeBKZ","time BKZ?",true);
        const bool progressLLL =
          Input("--progressLLL","print LLL progress?",false); 
        const bool progressBKZ =
          Input("--progressBKZ","print BKZ progress?",false); 
        const bool print = Input("--print","output all matrices?",true);
        const bool logFailedEnums =
          Input("--logFailedEnums","log failed enumerations in BKZ?",false);
        const bool logStreakSizes = 
          Input("--logStreakSizes","log enum streak sizes in BKZ?",false);
        const bool logNontrivialCoords =
          Input("--logNontrivialCoords","log nontrivial enum coords?",false);
        const bool logNorms = Input("--logNorms","log norms of B?",true);
        const bool logProjNorms =
          Input("--logProjNorms","log proj norms of B?",true);
        const bool checkpoint =
          Input("--checkpoint","checkpoint each tour?",true);
        const Real targetRatio =
          Input("--targetRatio","targeted ratio of GH(L)",Real(1.05));
        const bool timeEnum = Input("--timeEnum","time enum?",true);
        const bool innerEnumProgress =
          Input("--innerEnumProgress","inner enum progress?",false);
        const bool probEnum =
          Input("--probEnum","probabalistic enumeration *after* BKZ?",true);
        const bool fullEnum = Input("--fullEnum","SVP via full enum?",false);
        const bool enumOnSubset = Input("--enumOnSubset","enum on subset?",false);
        const Int subsetStart = Input("--subsetStart","start of subset",0);
        const Int subsetSize = Input("--subsetSize","num cols in subset",60);
        const bool doubleCycle = Input("--doubleCycle","cycle last vectors?",false);
#ifdef EL_HAVE_MPC
        const mpfr_prec_t prec =
          Input("--prec","MPFR precision",mpfr_prec_t(1024));

#ifdef EL_HAVE_MPC
        mpc::SetPrecision( prec );

        Matrix<Real> B; 
        if( trans )
            Matrix<Real> BTrans;
            Read( BTrans, inputBasisFile );
            Transpose( BTrans, B ); 
            Read( B, inputBasisFile );
        const Int m = B.Height();
        const Int n = B.Width();
        const Real BOrigOne = OneNorm( B ); 
        Output("|| B_orig ||_1 = ",BOrigOne);
        if( print )
            Print( B, "BOrig" );

        auto blocksizeLambda =
          [&]( Int j )
              // With k-sparse
              if( j <= 3 )
                  return 146;
              else if( j <= 10 )
                  return 62;
              else if( j <= 20 )
                  return 60;
              else if( j <= 50 )
                  return 55;
                  return 45;
              // Full enum
              if( j == 0 )
                  return 80;
              else if( j == 1 )
                  return 75;
              else if( j == 2 )
                  return 70;
              else if( j <= 10 )
                  return 62;
              else if( j <= 20 )
                  return 60;
              else if( j <= 50 )
                  return 55;
                  return 45;
        auto enumTypeLambda = 
          [&]( Int j )
              if( j <= 3 )
                  return YSPARSE_ENUM;
                  return FULL_ENUM;
              //return FULL_ENUM;
        BKZCtrl<Real> ctrl;
        ctrl.blocksize = blocksize;
        ctrl.variableBlocksize = variableBsize;
        ctrl.blocksizeFunc = function<Int(Int)>(blocksizeLambda);
        ctrl.variableEnumType = variableEnumType;
        ctrl.enumTypeFunc = function<EnumType(Int)>(enumTypeLambda);
        ctrl.multiEnumWindow = multiEnumWindow;
        ctrl.time = timeBKZ;
        ctrl.progress = progressBKZ;
        ctrl.recursive = recursiveBKZ;
        ctrl.jumpstart = jumpstartBKZ;
        ctrl.startCol = startColBKZ;
        ctrl.enumCtrl.enumType = FULL_ENUM;
        ctrl.enumCtrl.time = timeEnum;
        ctrl.enumCtrl.innerProgress = innerEnumProgress; 
        ctrl.enumCtrl.phaseLength = phaseLength;
        ctrl.enumCtrl.progressLevel = progressLevel;
        ctrl.earlyAbort = earlyAbort;
        ctrl.numEnumsBeforeAbort = numEnumsBeforeAbort;
        ctrl.subBKZ = subBKZ;
        ctrl.subEarlyAbort = subEarlyAbort;
        ctrl.logFailedEnums = logFailedEnums;
        ctrl.logStreakSizes = logStreakSizes;
        ctrl.logNontrivialCoords = logNontrivialCoords;
        ctrl.logNorms = logNorms;
        ctrl.logProjNorms = logProjNorms;
        ctrl.checkpoint = checkpoint;
        ctrl.lllCtrl.delta = delta;
        ctrl.lllCtrl.eta = eta;
        ctrl.lllCtrl.variant = static_cast<LLLVariant>(varInt);
        ctrl.lllCtrl.recursive = recursiveLLL;
        ctrl.lllCtrl.cutoff = cutoff;
        ctrl.lllCtrl.presort = presort;
        ctrl.lllCtrl.smallestFirst = smallestFirst;
        ctrl.lllCtrl.progress = progressLLL;
        ctrl.lllCtrl.time = timeLLL;

        ctrl.enumCtrl.customMaxInfNorms = true;
        ctrl.enumCtrl.customMaxOneNorms = true;
        const Int startIndex = Max(n/2-1,0);
        const Int numPhases = ((n-startIndex)+phaseLength-1) / phaseLength;
        ctrl.enumCtrl.maxInfNorms.resize( numPhases, 1 );
        ctrl.enumCtrl.maxOneNorms.resize( numPhases );
        // NOTE: This is tailored to SVP 146 where the ranges are
        // 0: [72,82)
        // 1: [82,92)
        // 2: [92,102)
        // 3: [102,112)
        // 4: [112,122)
        // 5: [122,132)
        // 6: [132,142)
        // 7: [142,146)
        ctrl.enumCtrl.maxOneNorms[0] = 0;
        ctrl.enumCtrl.maxOneNorms[1] = 1;
        ctrl.enumCtrl.maxOneNorms[2] = 1;
        ctrl.enumCtrl.maxOneNorms[3] = 1;
        ctrl.enumCtrl.maxOneNorms[4] = 1;
        ctrl.enumCtrl.maxOneNorms[5] = 2;
        ctrl.enumCtrl.maxOneNorms[6] = 3;
        ctrl.enumCtrl.maxOneNorms[7] = 3;

        ctrl.enumCtrl.maxInfNorms[0] = 1;
        ctrl.enumCtrl.maxInfNorms[1] = 1;
        ctrl.enumCtrl.maxInfNorms[2] = 1;
        ctrl.enumCtrl.maxInfNorms[3] = 1;
        ctrl.enumCtrl.maxInfNorms[4] = 1;
        ctrl.enumCtrl.maxInfNorms[5] = 1;
        ctrl.enumCtrl.maxInfNorms[6] = 2;
        ctrl.enumCtrl.maxInfNorms[7] = 2;

        const double startTime = mpi::Time();
        Matrix<Real> R;
        auto info = BKZ( B, R, ctrl );
        const double runTime = mpi::Time() - startTime;
        ("  BKZ(",blocksize,",",delta,",",eta,") took ",runTime," seconds"); 
        Output("    achieved delta:   ",info.delta);
        Output("    achieved eta:     ",info.eta);
        Output("    num swaps:        ",info.numSwaps);
        Output("    num enums:        ",info.numEnums);
        Output("    num failed enums: ",info.numEnumFailures);
        Output("    log(vol(L)):      ",info.logVol);
        const Real GH = LatticeGaussianHeuristic( info.rank, info.logVol );
        const Real challenge = targetRatio*GH;
        Output("    GH(L):             ",GH);
        Output("    targetRatio*GH(L): ",challenge);
        if( print )
            Print( B, "B" ); 
            Print( R, "R" );
        Write( B, outputBasisFile, ASCII, "BKZ" );
        const Real BOneNorm = OneNorm( B );
        Output("|| B ||_1 = ",BOneNorm);

        auto b0 = B( ALL, IR(0) );
        const Real b0Norm = FrobeniusNorm( b0 );
        Output("|| b_0 ||_2 = ",b0Norm);
        if( print )
            Print( b0, "b0" );
        bool succeeded = false;
        if( b0Norm <= challenge )
            ("SVP Challenge solved via BKZ: || b_0 ||_2=",b0Norm,
             " <= targetRatio*GH(L)=",challenge);
            succeeded = true;
            Write( b0, shortestVecFile, ASCII, "b0" );
            ("SVP Challenge NOT solved via BKZ: || b_0 ||_2=",b0Norm,
             " > targetRatio*GH(L)=",challenge);

        if( !succeeded || fullEnum || (enumOnSubset && subsetStart != 0) )
            const Int start = ( enumOnSubset ? subsetStart : 0 ); 
            const Int numCols = ( enumOnSubset ? subsetSize : n );
            const Range<Int> subInd( start, start+numCols );
            auto BSub = B( ALL, subInd );
            auto RSub = R( subInd, subInd );

            const Real target = ( start == 0 ? challenge : RSub.Get(0,0) );

            Timer timer;
            if( enumOnSubset && doubleCycle && subsetSize >= 2 )
                Matrix<double> v;
                EnumCtrl<double> enumCtrl;
                enumCtrl.enumType = ( probEnum ? GNR_ENUM : FULL_ENUM );
                enumCtrl.numTrials = 1;

                Matrix<double> BSubSwap;
                Zeros( BSubSwap, m, subsetSize );
                auto BL = B( ALL, IR(start,start+subsetSize-2) );
                auto BSubSwapL = BSubSwap( ALL, IR(0,subsetSize-2) );
                Copy( BL, BSubSwapL );
                for( Int j=start+subsetSize-2; j<n-1; ++j )
                    auto bj = B( ALL, IR(j) ); 
                    auto bSubSwapj = BSubSwap( ALL, IR(subsetSize-2) );
                    Copy( bj, bSubSwapj );
                    for( Int k=j+1; k<n-1; ++k )
                        auto bk = B( ALL, IR(k) );
                        auto bSubSwapk = BSubSwap( ALL, IR(subsetSize-1) ); 
                        Copy( bk, bSubSwapk );
                        Matrix<double> RSubSwap( BSubSwap );
                        Output("Cycling with j=",j,", k=",k);
                        qr::ExplicitTriang( RSubSwap );
                        Real result =
                          ( BSubSwap, RSubSwap, double(target), v, enumCtrl );
                        Output("Enumeration: ",timer.Stop()," seconds");
                        if( result < RSubSwap.Get(0,0)-double(0.001) )
                            Print( BSubSwap, "BSubSwap" );
                            Print( v, "v" );
                            Matrix<double> x;
                            Zeros( x, m, 1 );
                            Gemv( NORMAL, 1., BSubSwap, v, 0., x );
                            Print( x, "x" );
                            const double xNorm = FrobeniusNorm( x );
                            Output("|| x ||_2 = ",xNorm);
                            Output("Claimed || x ||_2 = ",result);
                            Write( x, shortestVecFile, ASCII, "x" );
                Matrix<F> v;
                EnumCtrl<Real> enumCtrl;
                enumCtrl.enumType = ( probEnum ? GNR_ENUM : FULL_ENUM );
                Real result;
                if( fullEnum )
                  result = 
                    ShortestVectorEnumeration( BSub, RSub, target, v, enumCtrl );
                  result = 
                    ShortVectorEnumeration( BSub, RSub, target, v, enumCtrl );
                Output("Enumeration: ",timer.Stop()," seconds");
                if( result < target )
                    Print( BSub, "BSub" );
                    Print( v, "v" );
                    Matrix<Real> x;
                    Zeros( x, m, 1 );
                    Gemv( NORMAL, Real(1), BSub, v, Real(0), x );
                    Print( x, "x" );
                    const Real xNorm = FrobeniusNorm( x );
                    Output("|| x ||_2 = ",xNorm);
                    Output("Claimed || x ||_2 = ",result);
                    Write( x, shortestVecFile, ASCII, "x" );

                    EnrichLattice( BSub, v );
                    Print( B, "BNew" );
                    Output("Enumeration failed after ",timer.Stop()," seconds");
    catch( std::exception& e ) { ReportException(e); }
    return 0;
Exemple #8
void TestCorrectness
( const Matrix<F>& A, 
  const Matrix<F>& phaseP,
  const Matrix<F>& phaseQ,
        Matrix<F>& AOrig,
  bool print,
  bool display )
    typedef Base<F> Real;
    const Int m = AOrig.Height();
    const Int n = AOrig.Width();
    const Real eps = limits::Epsilon<Real>();
    const Real oneNormAOrig = OneNorm( AOrig );
    Output("Testing error...");

    // Grab the diagonal and superdiagonal of the bidiagonal matrix
    auto d = GetDiagonal( A, 0 );
    auto e = GetDiagonal( A, (m>=n ? 1 : -1) );

    // Zero B and then fill its bidiagonal
    Matrix<F> B;
    Zeros( B, m, n );
    SetDiagonal( B, d, 0  );
    SetDiagonal( B, e, (m>=n ? 1 : -1) );
    if( print )
        Print( B, "Bidiagonal" );
    if( display )
        Display( B, "Bidiagonal" );

    if( print || display )
        Matrix<F> Q, P;
        Identity( Q, m, m );
        Identity( P, n, n );
        bidiag::ApplyQ( LEFT,  NORMAL, A, phaseQ, Q );
        bidiag::ApplyP( RIGHT, NORMAL, A, phaseP, P );
        if( print )
            Print( Q, "Q" );
            Print( P, "P" );
        if( display )
            Display( Q, "Q" );
            Display( P, "P" );

    // Reverse the accumulated Householder transforms
    bidiag::ApplyQ( LEFT,  ADJOINT, A, phaseQ, AOrig );
    bidiag::ApplyP( RIGHT, NORMAL,  A, phaseP, AOrig );
    if( print )
        Print( AOrig, "Manual bidiagonal" );
    if( display )
        Display( AOrig, "Manual bidiagonal" );

    // Compare the appropriate portion of AOrig and B
    if( m >= n )
        MakeTrapezoidal( UPPER, AOrig );
        MakeTrapezoidal( LOWER, AOrig, 1 );
        MakeTrapezoidal( LOWER, AOrig ); 
        MakeTrapezoidal( UPPER, AOrig, -1 );
    B -= AOrig;
    if( print )
        Print( B, "Error in rotated bidiagonal" );
    if( display )
        Display( B, "Error in rotated bidiagonal" );
    const Real infNormError = InfinityNorm( B );
    const Real relError = infNormError / (Max(m,n)*oneNormAOrig*eps);

    Output("||B - Q^H A P||_oo / (max(m,n) || A ||_1 eps) = ",relError);

    // TODO: Use a more refined failure condition
    if( relError > Real(1) )
        LogicError("Relative error was unacceptably large");
Exemple #9
void EN
( const Matrix<Real>& A,
  const Matrix<Real>& b,
        Real lambda1,
        Real lambda2,
        Matrix<Real>& x,
  const qp::affine::Ctrl<Real>& ctrl )
    const Int m = A.Height();
    const Int n = A.Width();
    const Range<Int> uInd(0,n), vInd(n,2*n), rInd(2*n,2*n+m);

    Matrix<Real> Q, c, AHat, G, h;

    // Q := | 2*lambda_2     0      0 |
    //      |     0      2*lambda_2 0 |
    //      |     0          0      2 |
    // ================================
    Zeros( Q, 2*n+m, 2*n+m );
    auto QTL = Q( IR(0,2*n), IR(0,2*n) );
    FillDiagonal( QTL, 2*lambda2 );
    auto Qrr = Q( rInd, rInd );
    FillDiagonal( Qrr, Real(1) );

    // c := lambda_1*[1;1;0]
    // =====================
    Zeros( c, 2*n+m, 1 );
    auto cuv = c( IR(0,2*n), ALL );
    Fill( cuv, lambda1 );

    // \hat A := [A, -A, I]
    // ====================
    Zeros( AHat, m, 2*n+m );
    auto AHatu = AHat( ALL, uInd );
    auto AHatv = AHat( ALL, vInd );
    auto AHatr = AHat( ALL, rInd );
    AHatu = A;
    AHatv -= A;
    FillDiagonal( AHatr, Real(1) );

    // G := | -I  0 0 |
    //      |  0 -I 0 |
    // ================
    Zeros( G, 2*n, 2*n+m );
    FillDiagonal( G, Real(-1) );

    // h := 0
    // ======
    Zeros( h, 2*n, 1 );

    // Solve the affine QP
    // ===================
    Matrix<Real> xHat, y, z, s;
    QP( Q, AHat, G, b, c, h, xHat, y, z, s, ctrl );

    // x := u - v
    // ==========
    x = xHat( uInd, ALL );
    x -= xHat( vInd, ALL );
Exemple #10
inline void
Var3( Orientation orientation, Matrix<F>& A, Matrix<F>& d )
#ifndef RELEASE
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) )
        throw std::logic_error
        ("d must be a column vector the same height as A");
    if( orientation == NORMAL )
        throw std::logic_error("Can only perform LDL^T or LDL^H");
    const int n = A.Height();
    if( !d.Viewing() )
        d.ResizeTo( n, 1 );

        ATL, ATR,  A00, A01, A02,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;
        dT,  d0,
        dB,  d1,
    Matrix<F> S21;

    // Start the algorithm
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    ( d, dT,
         dB, 0 );
    while( ABR.Height() > 0 )
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        ( dT,  d0,
         /**/ /**/
          dB,  d2 );

        ldl::Var3Unb( orientation, A11, d1 );
        Trsm( RIGHT, LOWER, orientation, UNIT, F(1), A11, A21 );
        S21 = A21;
        DiagonalSolve( RIGHT, NORMAL, d1, A21 );
        internal::TrrkNT( LOWER, orientation, F(-1), S21, A21, F(1), A22 );

        ( dT,  d0,
         /**/ /**/
          dB,  d2 );

        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
#ifndef RELEASE
HermitianFrobeniusNorm( UpperOrLower uplo, const Matrix<F>& A )
#ifndef RELEASE
    CallStackEntry entry("HermitianFrobeniusNorm");
    if( A.Height() != A.Width() )
        LogicError("Hermitian matrices must be square.");

    typedef BASE(F) R;
    R scale = 0;
    R scaledSquare = 1;
    const Int height = A.Height();
    const Int width = A.Width();
    if( uplo == UPPER )
        for( Int j=0; j<width; ++j )
            for( Int i=0; i<j; ++i )
                const R alphaAbs = Abs(A.Get(i,j));
                if( alphaAbs != 0 )
                    if( alphaAbs <= scale )
                        const R relScale = alphaAbs/scale;
                        scaledSquare += 2*relScale*relScale;
                        const R relScale = scale/alphaAbs;
                        scaledSquare = scaledSquare*relScale*relScale + 2;
                        scale = alphaAbs;
            const R alphaAbs = Abs(A.Get(j,j));
            if( alphaAbs != 0 )
                if( alphaAbs <= scale )
                    const R relScale = alphaAbs/scale;
                    scaledSquare += relScale*relScale;
                    const R relScale = scale/alphaAbs;
                    scaledSquare = scaledSquare*relScale*relScale + 1;
                    scale = alphaAbs;
        for( Int j=0; j<width; ++j )
            for( Int i=j+1; i<height; ++i )
                const R alphaAbs = Abs(A.Get(i,j));
                if( alphaAbs != 0 )
                    if( alphaAbs <= scale )
                        const R relScale = alphaAbs/scale;
                        scaledSquare += 2*relScale*relScale;
                        const R relScale = scale/alphaAbs;
                        scaledSquare = scaledSquare*relScale*relScale + 2;
                        scale = alphaAbs;
            const R alphaAbs = Abs(A.Get(j,j));
            if( alphaAbs != 0 )
                if( alphaAbs <= scale )
                    const R relScale = alphaAbs/scale;
                    scaledSquare += relScale*relScale;
                    const R relScale = scale/alphaAbs;
                    scaledSquare = scaledSquare*relScale*relScale + 1;
                    scale = alphaAbs;
    return scale*Sqrt(scaledSquare);
Exemple #12
inline void
Var3Unb( Orientation orientation, Matrix<F>& A, Matrix<F>& d )
#ifndef RELEASE
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) )
        throw std::logic_error
        ("d must be a column vector the same height as A");
    if( orientation == NORMAL )
        throw std::logic_error("Can only perform LDL^T or LDL^H");
    const int n = A.Height();
    if( !d.Viewing() )
        d.ResizeTo( n, 1 );

    F* ABuffer = A.Buffer();
    F* dBuffer = d.Buffer();
    const int ldim = A.LDim();
    for( int j=0; j<n; ++j )
        const int a21Height = n - (j+1);

        // Extract and store the diagonal of D
        const F alpha11 = ABuffer[j+j*ldim];
        if( alpha11 == F(0) )
            throw SingularMatrixException();
        dBuffer[j] = alpha11; 

        F* RESTRICT a21 = &ABuffer[(j+1)+j*ldim];
        if( orientation == ADJOINT )
            // A22 := A22 - a21 (a21 / alpha11)^H
            for( int k=0; k<a21Height; ++k )
                const F beta = Conj(a21[k]/alpha11);
                F* RESTRICT A22Col = &ABuffer[(j+1)+(j+1+k)*ldim];
                for( int i=k; i<a21Height; ++i )
                    A22Col[i] -= a21[i]*beta;
            // A22 := A22 - a21 (a21 / alpha11)^T
            for( int k=0; k<a21Height; ++k )
                const F beta = a21[k]/alpha11;
                F* RESTRICT A22Col = &ABuffer[(j+1)+(j+1+k)*ldim];
                for( int i=k; i<a21Height; ++i )
                    A22Col[i] -= a21[i]*beta;
        // a21 := a21 / alpha11
        for( int i=0; i<a21Height; ++i )
            a21[i] /= alpha11;
#ifndef RELEASE
Exemple #13
int QDWH
( Matrix<F>& A, 
  typename Base<F>::type lowerBound,
  typename Base<F>::type upperBound )
#ifndef RELEASE
    typedef typename Base<F>::type R;
    const int height = A.Height();
    const int width = A.Width();
    const R oneHalf = R(1)/R(2);
    const R oneThird = R(1)/R(3);

    if( height < width )
        throw std::logic_error("Height cannot be less than width");

    const R epsilon = lapack::MachineEpsilon<R>();
    const R tol = 5*epsilon;
    const R cubeRootTol = Pow(tol,oneThird);

    // Form the first iterate
    Scale( 1/upperBound, A );

    int numIts=0;
    R frobNormADiff;
    Matrix<F> ALast;
    Matrix<F> Q( height+width, width );
    Matrix<F> QT, QB;
    PartitionDown( Q, QT,
                      QB, height );
    Matrix<F> C;
    Matrix<F> ATemp;
        ALast = A;

        R L2;
        Complex<R> dd, sqd;
        if( Abs(1-lowerBound) < tol )
            L2 = 1;
            dd = 0;
            sqd = 1;
            L2 = lowerBound*lowerBound;
            dd = Pow( 4*(1-L2)/(L2*L2), oneThird );
            sqd = Sqrt( 1+dd );
        const Complex<R> arg = 8 - 4*dd + 8*(2-L2)/(L2*sqd);
        const R a = (sqd + Sqrt( arg )/2).real;
        const R b = (a-1)*(a-1)/4;
        const R c = a+b-1;
        const Complex<R> alpha = a-b/c;
        const Complex<R> beta = b/c;

        lowerBound = lowerBound*(a+b*L2)/(1+c*L2);

        if( c > 100 )
            // The standard QR-based algorithm
            QT = A;
            Scale( Sqrt(c), QT );
            MakeIdentity( QB );
            ExplicitQR( Q );
            Gemm( NORMAL, ADJOINT, alpha/Sqrt(c), QT, QB, beta, A );
            // Use faster Cholesky-based algorithm since A is well-conditioned
            Identity( width, width, C );
            Herk( LOWER, ADJOINT, F(c), A, F(1), C );
            Cholesky( LOWER, C );
            ATemp = A;
            Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp );
            Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp );
            Scale( beta, A );
            Axpy( alpha, ATemp, A );

        Axpy( F(-1), A, ALast );
        frobNormADiff = Norm( ALast, FROBENIUS_NORM );
    while( frobNormADiff > cubeRootTol || Abs(1-lowerBound) > tol );
#ifndef RELEASE
    return numIts;
inline void
RowEchelon( Matrix<F>& A, Matrix<F>& B )
#ifndef RELEASE
    CallStackEntry entry("RowEchelon");
    if( A.Height() != B.Height() )
        LogicError("A and B must be the same height");
    // Matrix views
        ATL, ATR,  A00, A01, A02,  APan,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;
        BT,  B0,
        BB,  B1,

    Matrix<Int> p1;

    // Pivot composition
    std::vector<Int> image, preimage;

    // Start the algorithm
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    ( B, BT,
         BB, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        ( BT,  B0,
         /**/ /**/
          BB,  B2 );

        ( APan, A12,
                A22 );

        lu::Panel( APan, p1, A00.Height() );
        ComposePivots( p1, A00.Height(), image, preimage );
        ApplyRowPivots( BB, image, preimage );

        Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, A12 );
        Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, B1 );

        Gemm( NORMAL, NORMAL, F(-1), A21, A12, F(1), A22 );
        Gemm( NORMAL, NORMAL, F(-1), A21, B1,  F(1), B2 );

        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        ( BT,  B0,
         /**/ /**/
          BB,  B2 );
Exemple #15
inline void
TwoSidedTrsmUVar1( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U )
#ifndef RELEASE
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( U.Height() != U.Width() )
        throw std::logic_error("Triangular matrices must be square");
    if( A.Height() != U.Height() )
        throw std::logic_error("A and U must be the same size");
    // Matrix views
        ATL, ATR,  A00, A01, A02,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;
        UTL, UTR,  U00, U01, U02,
        UBL, UBR,  U10, U11, U12,
                   U20, U21, U22;

    // Temporary products
    Matrix<F> Y01;

    ( A, ATL, ATR,
         ABL, ABR, 0 );
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    while( ATL.Height() < A.Height() )
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        // Y01 := A00 U01
        Zeros( A01.Height(), A01.Width(), Y01 );
        Hemm( LEFT, UPPER, F(1), A00, U01, F(0), Y01 );

        // A01 := inv(U00)' A01
        Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U00, A01 );

        // A01 := A01 - 1/2 Y01
        Axpy( F(-1)/F(2), Y01, A01 );

        // A11 := A11 - (U01' A01 + A01' U01)
        Her2k( UPPER, ADJOINT, F(-1), U01, A01, F(1), A11 );

        // A11 := inv(U11)' A11 inv(U11)
        TwoSidedTrsmUUnb( diag, A11, U11 );

        // A01 := A01 - 1/2 Y01
        Axpy( F(-1)/F(2), Y01, A01 );

        // A01 := A01 inv(U11)
        Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U11, A01 );

        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );
#ifndef RELEASE
Exemple #16
inline void
( Orientation orientation, 
  Matrix<Complex<R> >& A, 
  const Matrix<Complex<R> >& B,
        Matrix<Complex<R> >& X )
#ifndef RELEASE
    if( orientation == TRANSPOSE )
        throw std::logic_error("Invalid orientation");
    typedef Complex<R> C;

    // TODO: Add scaling
    const int m = A.Height();
    const int n = A.Width();
    Matrix<C> t;
    if( orientation == NORMAL )
        if( m != B.Height() )
            throw std::logic_error("A and B do not conform");

        if( m >= n )
            // Overwrite A with its packed QR factorization (and store the 
            // corresponding Householder scalars in t)
            QR( A, t );

            // Copy B into X
            X = B;

            // Apply Q' to X

            // Shrink X to its new height
            X.ResizeTo( n, X.Width() );

            // Solve against R (checking for singularities)
            Matrix<C> AT;
            LockedView( AT, A, 0, 0, n, n );
            Trsm( LEFT, UPPER, NORMAL, NON_UNIT, C(1), AT, X, true );
            // Overwrite A with its packed LQ factorization (and store the
            // corresponding Householder scalars in it)
            LQ( A, t );

            // Copy B into X
            X.ResizeTo( n, B.Width() );
            Matrix<C> XT,
            PartitionDown( X, XT,
                              XB, m );
            XT = B;
            Zero( XB );

            // Solve against L (checking for singularities)
            Matrix<C> AL;
            LockedView( AL, A, 0, 0, m, m );
            Trsm( LEFT, LOWER, NORMAL, NON_UNIT, C(1), AL, XT, true );

            // Apply Q' to X 
    else // orientation == ADJOINT
        if( n != B.Height() )
            throw std::logic_error("A and B do not conform");

        if( m >= n )
            // Overwrite A with its packed QR factorization (and store the 
            // corresponding Householder scalars in t)
            QR( A, t );

            // Copy B into X
            X.ResizeTo( m, B.Width() );
            Matrix<C> XT,
            PartitionDown( X, XT,
                              XB, n );
            XT = B;
            Zero( XB );

            // Solve against R' (checking for singularities)
            Matrix<C> AT;
            LockedView( AT, A, 0, 0, n, n );
            Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, C(1), AT, XT, true );

            // Apply Q to X
            // Overwrite A with its packed LQ factorization (and store the
            // corresponding Householder scalars in t)
            LQ( A, t );

            // Copy B into X
            X = B;

            // Apply Q to X

            // Shrink X to its new height
            X.ResizeTo( m, X.Width() );

            // Solve against L' (check for singularities)
            Matrix<C> AL;
            LockedView( AL, A, 0, 0, m, m );
            Trsm( LEFT, LOWER, ADJOINT, NON_UNIT, C(1), AL, X, true );
#ifndef RELEASE
Exemple #17
inline void
( LeftOrRight side, UpperOrLower uplo, int offset, Matrix<T>& A )
#ifndef RELEASE
    const int height = A.Height();
    const int width = A.Width();
    const int ldim = A.LDim();
    T* buffer = A.Buffer();

    if( uplo == LOWER )
        if( side == LEFT )
            #pragma omp parallel for
            for( int j=std::max(0,offset+1); j<width; ++j )
                const int lastZeroRow = j-offset-1;
                const int numZeroRows = std::min( lastZeroRow+1, height );
                MemZero( &buffer[j*ldim], numZeroRows );
            #pragma omp parallel for
            for( int j=std::max(0,offset-height+width+1); j<width; ++j )
                const int lastZeroRow = j-offset+height-width-1;
                const int numZeroRows = std::min( lastZeroRow+1, height );
                MemZero( &buffer[j*ldim], numZeroRows );
        if( side == LEFT )
            #pragma omp parallel for
            for( int j=0; j<width; ++j )
                const int firstZeroRow = std::max(j-offset+1,0);
                if( firstZeroRow < height )
                    ( &buffer[firstZeroRow+j*ldim], height-firstZeroRow );
            #pragma omp parallel for
            for( int j=0; j<width; ++j )
                const int firstZeroRow = std::max(j-offset+height-width+1,0);
                if( firstZeroRow < height )
                    ( &buffer[firstZeroRow+j*ldim], height-firstZeroRow );
#ifndef RELEASE
Exemple #18
inline void
TwoSidedTrsmUVar5( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U )
#ifndef RELEASE
    CallStackEntry entry("internal::TwoSidedTrsmUVar5");
    if( A.Height() != A.Width() )
        LogicError("A must be square");
    if( U.Height() != U.Width() )
        LogicError("Triangular matrices must be square");
    if( A.Height() != U.Height() )
        LogicError("A and U must be the same size");
    // Matrix views
        ATL, ATR,  A00, A01, A02,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;
        UTL, UTR,  U00, U01, U02,
        UBL, UBR,  U10, U11, U12,
                   U20, U21, U22;

    // Temporary products
    Matrix<F> Y12;

    ( A, ATL, ATR,
         ABL, ABR, 0 );
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    while( ATL.Height() < A.Height() )
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        // A11 := inv(U11)' A11 inv(U11)
        TwoSidedTrsmUUnb( diag, A11, U11 );

        // Y12 := A11 U12
        Zeros( Y12, A12.Height(), A12.Width() );
        Hemm( LEFT, UPPER, F(1), A11, U12, F(0), Y12 );

        // A12 := inv(U11)' A12
        Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U11, A12 );

        // A12 := A12 - 1/2 Y12
        Axpy( F(-1)/F(2), Y12, A12 );

        // A22 := A22 - (A12' U12 + U12' A12)
        Her2k( UPPER, ADJOINT, F(-1), A12, U12, F(1), A22 );

        // A12 := A12 - 1/2 Y12
        Axpy( F(-1)/F(2), Y12, A12 );

        // A12 := A12 inv(U22)
        Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U22, A12 );

        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );
Exemple #19
inline void
( int offset, const Matrix<R>& H, Matrix<R>& A )
#ifndef RELEASE
    if( offset > 0 || offset < -H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Width() )
        throw std::logic_error
        ("Width of transforms must equal width of target matrix");
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<R> ALeft;

    Matrix<R> SInv, Z;

    ( H, HTL, HTR,
         HBL, HBR, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        const int HPanWidth = H10.Width() + H11.Width();
        const int HPanOffset = 
            std::min( H11.Height(), std::max(-offset-H00.Height(),0) );
        const int HPanHeight = H11.Height()-HPanOffset;
        HPan.LockedView( H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth );

        ALeft.View( A, 0, 0, A.Height(), HPanWidth );

        Zeros( ALeft.Height(), HPan.Height(), Z );
        Zeros( HPan.Height(), HPan.Height(), SInv );
        HPanCopy = HPan;
        MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy );
        SetDiagonalToOne( RIGHT, offset, HPanCopy );

        Syrk( UPPER, NORMAL, R(1), HPanCopy, R(0), SInv );
        HalveMainDiagonal( SInv );

        Gemm( NORMAL, TRANSPOSE, R(1), ALeft, HPanCopy, R(0), Z );
        Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, R(1), SInv, Z );
        Gemm( NORMAL, NORMAL, R(-1), Z, HPanCopy, R(1), ALeft );

        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );
#ifndef RELEASE
Exemple #20
inline void
LQ( Matrix<Complex<Real> >& A, 
    Matrix<Complex<Real> >& t )
#ifndef RELEASE
    typedef Complex<Real> C;
    t.ResizeTo( std::min(A.Height(),A.Width()), 1 );

    // Matrix views
        ATL, ATR,  A00, A01, A02,  ATopPan, ABottomPan,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;
        tT,  t0,
        tB,  t1,

    ( A, ATL, ATR,
         ABL, ABR, 0 );
    ( t, tT,
         tB, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        ( tT,  t0,
         /**/ /**/
          tB,  t2 );

        View1x2( ATopPan, A11, A12 );
        View1x2( ABottomPan, A21, A22 );

        internal::PanelLQ( ATopPan, t1 );
          0, ATopPan, t1, ABottomPan );

        ( tT,  t0,
         /**/ /**/
          tB,  t2 );

        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
#ifndef RELEASE
Exemple #21
inline void HermitianTridiagU
( Matrix<Complex<R> >& A, Matrix<Complex<R> >& t )
#ifndef RELEASE
    const int tHeight = std::max(A.Height()-1,0);
#ifndef RELEASE
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( t.Viewing() && (t.Height() != tHeight || t.Width() != 1) )
        throw std::logic_error("t is of the wrong size");
    typedef Complex<R> C;
    if( !t.Viewing() )
        t.ResizeTo( tHeight, 1 );

    // Matrix views 
        ATL, ATR,  A00, a01,     A02,  a01T,
        ABL, ABR,  a10, alpha11, a12,  alpha01B,
                   A20, a21,     A22;

    // Temporary matrices
    Matrix<C> w01;

    PushBlocksizeStack( 1 );
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    while( ABR.Height()+1 < A.Height() )
        ( ATL, /**/ ATR,  A00, a01,     /**/ A02,
               /**/       a10, alpha11, /**/ a12,
         /*************/ /**********************/
          ABL, /**/ ABR,  A20, a21,     /**/ A22 );

        ( a01, a01T,
               alpha01B, 1 );

        w01.ResizeTo( a01.Height(), 1 );
        const C tau = Reflector( alpha01B, a01T );
        const R epsilon1 = alpha01B.GetRealPart(0,0);

        Hemv( UPPER, tau, A00, a01, C(0), w01 );
        const C alpha = -tau*Dot( w01, a01 )/C(2);
        Axpy( alpha, a01, w01 );
        Her2( UPPER, C(-1), a01, w01, A00 );

        ( ATL, /**/ ATR,  A00, /**/ a01,     A02,
         /*************/ /**********************/
               /**/       a10, /**/ alpha11, a12,
          ABL, /**/ ABR,  A20, /**/ a21,     A22 );
#ifndef RELEASE
Exemple #22
inline void
TwoSidedTrsmLVar2( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L )
#ifndef RELEASE
    CallStackEntry entry("internal::TwoSidedTrsmLVar2");
    if( A.Height() != A.Width() )
        LogicError("A must be square");
    if( L.Height() != L.Width() )
        LogicError("Triangular matrices must be square");
    if( A.Height() != L.Height() )
        LogicError("A and L must be the same size");
    // Matrix views
        ATL, ATR,  A00, A01, A02,
        ABL, ABR,  A10, A11, A12,
                         A20, A21, A22;
        LTL, LTR,  L00, L01, L02,
        LBL, LBR,  L10, L11, L12,
                   L20, L21, L22;

    // Temporary products
    Matrix<F> X11;
    Matrix<F> Y10;

    ( A, ATL, ATR,
         ABL, ABR, 0 );
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    while( ATL.Height() < A.Height() )
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        // Y10 := L10 A00
        Zeros( Y10, L10.Height(), A00.Width() );
        Hemm( RIGHT, LOWER, F(1), A00, L10, F(0), Y10 );

        // A10 := A10 - 1/2 Y10
        Axpy( F(-1)/F(2), Y10, A10 );

        // A11 := A11 - (A10 L10' + L10 A10')
        Her2k( LOWER, NORMAL, F(-1), A10, L10, F(1), A11 );

        // A11 := inv(L11) A11 inv(L11)'
        TwoSidedTrsmLUnb( diag, A11, L11 );

        // A21 := A21 - A20 L10'
        Gemm( NORMAL, ADJOINT, F(-1), A20, L10, F(1), A21 );

        // A21 := A21 inv(L11)'
        Trsm( RIGHT, LOWER, ADJOINT, diag, F(1), L11, A21 );

        // A10 := A10 - 1/2 Y10
        Axpy( F(-1)/F(2), Y10, A10 );

        // A10 := inv(L11) A10
        Trsm( LEFT, LOWER, NORMAL, diag, F(1), L11, A10 );

        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
          LBL, /**/ LBR,  L20, L21, /**/ L22 );
Exemple #23
inline void ADMM
( const Matrix<F>& M, 
        Matrix<F>& L,
        Matrix<F>& S, 
  const RPCACtrl<Base<F>>& ctrl )
    typedef Base<F> Real;
    const Int m = M.Height();
    const Int n = M.Width();

    // If tau is not specified, then set it to 1/sqrt(max(m,n))
    const Base<F> tau = 
        ( ctrl.tau <= Real(0) ? Real(1)/sqrt(Real(Max(m,n))) : ctrl.tau );
    if( ctrl.beta <= Real(0) )
        LogicError("beta cannot be non-positive");
    if( ctrl.tol <= Real(0) )
        LogicError("tol cannot be non-positive");
    const Base<F> beta = ctrl.beta;
    const Base<F> tol = ctrl.tol;

    const double startTime = mpi::Time();
    Matrix<F> E, Y;
    Zeros( Y, m, n );

    const Real frobM = FrobeniusNorm( M );
    const Real maxM = MaxNorm( M );
    if( ctrl.progress )
        cout << "|| M ||_F = " << frobM << "\n"
             << "|| M ||_max = " << maxM << endl;

    Zeros( L, m, n );
    Zeros( S, m, n );

    Int numIts = 0;
    while( true )

        // ST_{tau/beta}(M - L + Y/beta)
        S = M;
        S -= L;
        Axpy( F(1)/beta, Y, S );
        SoftThreshold( S, tau/beta );
        const Int numNonzeros = ZeroNorm( S );

        // SVT_{1/beta}(M - S + Y/beta)
        L = M;
        L -= S;
        Axpy( F(1)/beta, Y, L );
        Int rank;
        if( ctrl.usePivQR )
            rank = SVT( L, Real(1)/beta, ctrl.numPivSteps );
            rank = SVT( L, Real(1)/beta );
        // E := M - (L + S)
        E = M;    
        E -= L;
        E -= S;
        const Real frobE = FrobeniusNorm( E );

        if( frobE/frobM <= tol )            
            if( ctrl.progress )
                cout << "Converged after " << numIts << " iterations "
                     << " with rank=" << rank 
                     << ", numNonzeros=" << numNonzeros << " and "
                     << "|| E ||_F / || M ||_F = " << frobE/frobM
                     << ", and " << mpi::Time()-startTime << " total secs"
                     << endl;
        else if( numIts >= ctrl.maxIts )
            if( ctrl.progress )
                cout << "Aborting after " << numIts << " iterations and "
                     << mpi::Time()-startTime << " total secs" 
                     << endl;
            if( ctrl.progress )
                cout << numIts << ": || E ||_F / || M ||_F = " 
                     << frobE/frobM << ", rank=" << rank 
                     << ", numNonzeros=" << numNonzeros 
                     << ", " << mpi::Time()-startTime << " total secs"
                     << endl;
        // Y := Y + beta E
        Axpy( beta, E, Y );
Exemple #24
(     Matrix<F>& A,
      Matrix<F>& U,
      Matrix<Base<F>>& s,
      Matrix<F>& V,
  const SVDCtrl<Base<F>>& ctrl )
    typedef Base<F> Real;
    if( !ctrl.overwrite )
        LogicError("LAPACKHelper assumes ctrl.overwrite == true");
    auto approach = ctrl.bidiagSVDCtrl.approach;
    if( approach != THIN_SVD &&
        approach != FULL_SVD &&
        approach != COMPACT_SVD )
        LogicError("LAPACKHelper assumes THIN_SVD, FULL_SVD, or COMPACT_SVD");

    SVDInfo info;
    const Int m = A.Height();
    const Int n = A.Width();
    const Int k = Min(m,n);
    const bool thin = ( approach == THIN_SVD );
    const bool compact = ( approach == COMPACT_SVD );
    const bool avoidU = !ctrl.bidiagSVDCtrl.wantU;
    const bool avoidV = !ctrl.bidiagSVDCtrl.wantV;
    s.Resize( k, 1 );
    Matrix<F> VAdj;

    if( thin || compact )
        U.Resize( m, k );
        VAdj.Resize( k, n );
        U.Resize( m, m );
        VAdj.Resize( n, n );
    ( m, n,
      A.Buffer(), A.LDim(),
      U.Buffer(), U.LDim(),
      VAdj.Buffer(), VAdj.LDim(),
      (thin||compact) );

    if( compact )
        const Real twoNorm = ( k==0 ? Real(0) : s(0) );
        const Real thresh =
          ( m, n, twoNorm, ctrl.bidiagSVDCtrl );

        Int rank = k;
        for( Int j=0; j<k; ++j )
            if( s(j) <= thresh )
                rank = j;
        s.Resize( rank, 1 );
        if( !avoidU ) U.Resize( m, rank );
        if( !avoidV ) VAdj.Resize( rank, n );
    if( !avoidV ) Adjoint( VAdj, V );

    return info;
Exemple #25
inline void
LU( Matrix<F>& A, Matrix<int>& p )
#ifndef RELEASE
    if( p.Viewing() && 
        (std::min(A.Height(),A.Width()) != p.Height() || p.Width() != 1) ) 
        throw std::logic_error
        ("p must be a vector of the same height as the min dimension of A.");
    if( !p.Viewing() )
        p.ResizeTo( std::min(A.Height(),A.Width()), 1 );

    // Matrix views
        ATL, ATR,  A00, A01, A02,  ABRL, ABRR,
        ABL, ABR,  A10, A11, A12,  
                   A20, A21, A22;

        pT,  p0, 
        pB,  p1,

    // Pivot composition
    std::vector<int> image, preimage;

    // Start the algorithm
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    ( p, pT,
         pB, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        ( pT,  p0,
         /**/ /**/
          pB,  p2 );

        PartitionRight( ABR, ABRL, ABRR, A11.Width() );

        const int pivotOffset = A01.Height();
        internal::PanelLU( ABRL, p1, pivotOffset );
        internal::ComposePanelPivots( p1, pivotOffset, image, preimage );
        ApplyRowPivots( ABL, image, preimage );
        ApplyRowPivots( ABRR, image, preimage );

        Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, A12 );
        Gemm( NORMAL, NORMAL, F(-1), A21, A12, F(1), A22 );

        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        ( pT,  p0,
         /**/ /**/
          pB,  p2 );
#ifndef RELEASE
Exemple #26
inline void
TwoSidedTrmmLVar2( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L )
#ifndef RELEASE
    if( A.Height() != A.Width() )
        throw std::logic_error( "A must be square." );
    if( L.Height() != L.Width() )
        throw std::logic_error( "Triangular matrices must be square." );
    if( A.Height() != L.Height() )
        throw std::logic_error( "A and L must be the same size." );
    // Matrix views
    ATL, ATR,  A00, A01, A02,
         ABL, ABR,  A10, A11, A12,
         A20, A21, A22;
    LTL, LTR,  L00, L01, L02,
         LBL, LBR,  L10, L11, L12,
         L20, L21, L22;

    // Temporary products
    Matrix<F> Y21;

    ( A, ATL, ATR,
      ABL, ABR, 0 );
    ( L, LTL, LTR,
      LBL, LBR, 0 );
    while( ATL.Height() < A.Height() )
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
          /*************/ /******************/
          /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
          /*************/ /******************/
          /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        // A10 := L11' A10
        Trmm( LEFT, LOWER, ADJOINT, diag, F(1), L11, A10 );

        // A10 := A10 + L21' A20
        Gemm( ADJOINT, NORMAL, F(1), L21, A20, F(1), A10 );

        // Y21 := A22 L21
        Zeros( A21.Height(), A21.Width(), Y21 );
        Hemm( LEFT, LOWER, F(1), A22, L21, F(0), Y21 );

        // A21 := A21 L11
        Trmm( RIGHT, LOWER, NORMAL, diag, F(1), L11, A21 );

        // A21 := A21 + 1/2 Y21
        Axpy( F(1)/F(2), Y21, A21 );

        // A11 := L11' A11 L11
        TwoSidedTrmmLUnb( diag, A11, L11 );

        // A11 := A11 + (A21' L21 + L21' A21)
        Her2k( LOWER, ADJOINT, F(1), A21, L21, F(1), A11 );

        // A21 := A21 + 1/2 Y21
        Axpy( F(1)/F(2), Y21, A21 );

        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
          /**/       A10, A11, /**/ A12,
          /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
          /**/       L10, L11, /**/ L12,
          /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );
#ifndef RELEASE
Exemple #27
( const Matrix<Real>& A,
  const Matrix<Real>& b,
  const Matrix<Real>& c,
        Matrix<Real>& z,
  const ADMMCtrl<Real>& ctrl )

    // Cache a custom partially-pivoted LU factorization of
    //    |  rho*I   A^H | = | B11  B12 |
    //    |  A       0   |   | B21  B22 |
    // by (justifiably) avoiding pivoting in the first n steps of
    // the factorization, so that
    //    [I,rho*I] = lu(rho*I).
    // The factorization would then proceed with
    //    B21 := B21 U11^{-1} = A (rho*I)^{-1} = A/rho
    //    B12 := L11^{-1} B12 = I A^H = A^H.
    // The Schur complement would then be
    //    B22 := B22 - B21 B12 = 0 - (A*A^H)/rho.
    // We then factor said matrix with LU with partial pivoting and
    // swap the necessary rows of B21 in order to implicitly commute
    // the row pivots with the Gauss transforms in the manner standard
    // for GEPP. Unless A A' is singular, pivoting should not be needed,
    // as Cholesky factorization of the negative matrix should be valid.
    // The result is the factorization
    //   | I 0   | | rho*I A^H | = | I   0   | | rho*I U12 |,
    //   | 0 P22 | | A     0   |   | L21 L22 | | 0     U22 |
    // where [L22,U22] are stored within B22.
    Matrix<Real> U12, L21, B22, bPiv;
    Adjoint( A, U12 );
    L21 = A;
    L21 *= 1/ctrl.rho;
    Herk( LOWER, NORMAL, -1/ctrl.rho, A, B22 );
    MakeHermitian( LOWER, B22 );
    // TODO: Replace with sparse-direct Cholesky version?
    Permutation P2;
    LU( B22, P2 );
    P2.PermuteRows( L21 );
    bPiv = b;
    P2.PermuteRows( bPiv );

    // Possibly form the inverse of L22 U22
    Matrix<Real> X22;
    if( ctrl.inv )
        X22 = B22;
        MakeTrapezoidal( LOWER, X22 );
        FillDiagonal( X22, Real(1) );
        TriangularInverse( LOWER, UNIT, X22 );
        Trsm( LEFT, UPPER, NORMAL, NON_UNIT, Real(1), B22, X22 );

    Int numIter=0;
    const Int m = A.Height();
    const Int n = A.Width();
    Matrix<Real> g, xTmp, y, t;
    Zeros( g, m+n, 1 );
    PartitionDown( g, xTmp, y, n );
    Matrix<Real> x, u, zOld, xHat;
    Zeros( z, n, 1 );
    Zeros( u, n, 1 );
    Zeros( t, n, 1 );
    while( numIter < ctrl.maxIter )
        zOld = z;

        // Find x from
        //  | rho*I  A^H | | x | = | rho*(z-u)-c |
        //  | A      0   | | y |   | b           |
        // via our cached custom factorization:
        // |x| = inv(U) inv(L) P' |rho*(z-u)-c|
        // |y|                    |b          |
        //     = |rho*I U12|^{-1} |I   0  | |I 0   | |rho*(z-u)-c|
        //     = |0     U22|      |L21 L22| |0 P22'| |b          |
        //     = "                        " |rho*(z-u)-c|
        //                                  | P22' b    |
        xTmp = z;
        xTmp -= u;
        xTmp *= ctrl.rho;
        xTmp -= c;
        y = bPiv;
        Gemv( NORMAL, Real(-1), L21, xTmp, Real(1), y );
        if( ctrl.inv )
            Gemv( NORMAL, Real(1), X22, y, t );
            y = t;
            Trsv( LOWER, NORMAL, UNIT, B22, y );
            Trsv( UPPER, NORMAL, NON_UNIT, B22, y );
        Gemv( NORMAL, Real(-1), U12, y, Real(1), xTmp );
        xTmp *= 1/ctrl.rho;

        // xHat := alpha*x + (1-alpha)*zOld
        xHat = xTmp;
        xHat *= ctrl.alpha;
        Axpy( 1-ctrl.alpha, zOld, xHat );

        // z := pos(xHat+u)
        z = xHat;
        z += u;
        LowerClip( z, Real(0) );

        // u := u + (xHat-z)
        u += xHat;
        u -= z;

        const Real objective = Dot( c, xTmp );

        // rNorm := || x - z ||_2
        t = xTmp;
        t -= z;
        const Real rNorm = FrobeniusNorm( t );
        // sNorm := |rho| || z - zOld ||_2
        t = z;
        t -= zOld;
        const Real sNorm = Abs(ctrl.rho)*FrobeniusNorm( t );

        const Real epsPri = Sqrt(Real(n))*ctrl.absTol +
        const Real epsDual = Sqrt(Real(n))*ctrl.absTol +

        if( ctrl.print )
            t = xTmp;
            LowerClip( t, Real(0) );
            t -= xTmp;
            const Real clipDist = FrobeniusNorm( t );
            cout << numIter << ": "
              << "||x-z||_2=" << rNorm << ", "
              << "epsPri=" << epsPri << ", "
              << "|rho| ||z-zOld||_2=" << sNorm << ", "
              << "epsDual=" << epsDual << ", "
              << "||x-Pos(x)||_2=" << clipDist << ", "
              << "c'x=" << objective << endl;
        if( rNorm < epsPri && sNorm < epsDual )
    if( ctrl.maxIter == numIter )
        cout << "ADMM failed to converge" << endl;
    x = xTmp;
    return numIter;
Exemple #28
void LLTUnb
( bool conjugate, const Matrix<F>& L, const Matrix<F>& shifts, Matrix<F>& X )
    typedef Base<F> Real;
    const Int m = X.Height();
    const Int n = X.Width();

    const F* LBuf = L.LockedBuffer();
          F* XBuf = X.Buffer();
    const Int ldl = L.LDim();
    const Int ldx = X.LDim();

    if( conjugate )
        Conjugate( X );

    Int k=m-1;
    while( k >= 0 )
        const bool in2x2 = ( k>0 && LBuf[(k-1)+k*ldl] != F(0) );
        if( in2x2 )
            // Solve the 2x2 linear systems via 2x2 LQ decompositions produced
            // by the Givens rotation
            //    | L(k,k)-shift L(k,k+1) | | c -conj(s) | = | gamma11 0 |
            //                              | s    c     |
            // and by also forming the bottom two entries of the 2x2 resulting
            // lower-triangular matrix, say gamma21 and gamma22
            // Extract the constant part of the 2x2 diagonal block, D
            const F delta12 = LBuf[   k +(k+1)*ldl];
            const F delta21 = LBuf[(k+1)+   k *ldl];
            for( Int j=0; j<n; ++j )
                const F delta11 = LBuf[   k +   k *ldl] - shifts.Get(j,0);
                const F delta22 = LBuf[(k+1)+(k+1)*ldl] - shifts.Get(j,0);
                // Decompose D = L Q
                Real c; F s;
                const F gamma11 = Givens( delta11, delta12, c, s );
                const F gamma21 =        c*delta21 + s*delta22;
                const F gamma22 = -Conj(s)*delta21 + c*delta22;

                F* xBuf = &XBuf[j*ldx];
                // Solve against Q^T
                const F chi1 = xBuf[k  ];
                const F chi2 = xBuf[k+1];
                xBuf[k  ] =        c*chi1 + s*chi2;
                xBuf[k+1] = -Conj(s)*chi1 + c*chi2;

                // Solve against R^T
                xBuf[k+1] /= gamma22;
                xBuf[k  ] -= gamma21*xBuf[k+1];
                xBuf[k  ] /= gamma11;

                // Update x0 := x0 - L10^T x1
                blas::Axpy( k, -xBuf[k  ], &LBuf[k  ], ldl, xBuf, 1 );
                blas::Axpy( k, -xBuf[k+1], &LBuf[k+1], ldl, xBuf, 1 );
            for( Int j=0; j<n; ++j )
                F* xBuf = &XBuf[j*ldx];
                // Solve the 1x1 linear system
                xBuf[k] /= LBuf[k+k*ldl] - shifts.Get(j,0);

                // Update x0 := x0 - l10^T chi_1
                blas::Axpy( k, -xBuf[k], &LBuf[k], ldl, xBuf, 1 );
    if( conjugate )
        Conjugate( X );
Exemple #29
inline void
( Matrix<Complex<Real> >& A,
  Matrix<Complex<Real> >& t )
#ifndef RELEASE
    if( t.Height() != std::min(A.Height(),A.Width()) || t.Width() != 1 )
        throw std::logic_error
        ("t must be a vector of height equal to the minimum dimension of A");
    typedef Complex<Real> C;

        ATL, ATR,  A00, a01,     A02,  aTopRow, ABottomPan,
        ABL, ABR,  a10, alpha11, a12,
                   A20, a21,     A22;
        tT,  t0,
        tB,  tau1,

    Matrix<C> z, aTopRowConj;

    PushBlocksizeStack( 1 );
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    ( t, tT,
         tB, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
        ( ATL, /**/ ATR,  A00, /**/ a01,     A02,
         /*************/ /**********************/
               /**/       a10, /**/ alpha11, a12,
          ABL, /**/ ABR,  A20, /**/ a21,     A22 );

        ( tT,  t0,
         /**/ /****/
          tB,  t2 );

        aTopRow.View1x2( alpha11, a12 );
        ABottomPan.View1x2( a21, A22 );

        Zeros( ABottomPan.Height(), 1, z );
        const C tau = Reflector( alpha11, a12 );
        tau1.Set( 0, 0, tau );
        const C alpha = alpha11.Get(0,0);

        Conjugate( aTopRow, aTopRowConj );
        Gemv( NORMAL, C(1), ABottomPan, aTopRowConj, C(0), z );
        Ger( -Conj(tau), z, aTopRowConj, ABottomPan );


        ( tT,  t0,
         /**/ /****/
          tB,  t2 );

        ( ATL, /**/ ATR,  A00, a01,     /**/ A02,
               /**/       a10, alpha11, /**/ a12,
         /*************/ /**********************/
          ABL, /**/ ABR,  A20, a21,     /**/ A22 );
#ifndef RELEASE
Exemple #30
inline typename Base<F>::type 
HermitianFrobeniusNorm( UpperOrLower uplo, const Matrix<F>& A )
#ifndef RELEASE
    typedef typename Base<F>::type R;

    if( A.Height() != A.Width() )
        throw std::logic_error("Hermitian matrices must be square.");

    R scale = 0;
    R scaledSquare = 1;
    const int height = A.Height();
    const int width = A.Width();
    if( uplo == UPPER )
        for( int j=0; j<width; ++j )
            for( int i=0; i<j; ++i )
                const R alphaAbs = Abs(A.Get(i,j));
                if( alphaAbs != 0 )
                    if( alphaAbs <= scale )
                        const R relScale = alphaAbs/scale;
                        scaledSquare += 2*relScale*relScale;
                        const R relScale = scale/alphaAbs;
                        scaledSquare = scaledSquare*relScale*relScale + 2;
                        scale = alphaAbs;
            const R alphaAbs = Abs(A.Get(j,j));
            if( alphaAbs != 0 )
                if( alphaAbs <= scale )
                    const R relScale = alphaAbs/scale;
                    scaledSquare += relScale*relScale;
                    const R relScale = scale/alphaAbs;
                    scaledSquare = scaledSquare*relScale*relScale + 1;
                    scale = alphaAbs;
        for( int j=0; j<width; ++j )
            for( int i=j+1; i<height; ++i )
                const R alphaAbs = Abs(A.Get(i,j));
                if( alphaAbs != 0 )
                    if( alphaAbs <= scale )
                        const R relScale = alphaAbs/scale;
                        scaledSquare += 2*relScale*relScale;
                        const R relScale = scale/alphaAbs;
                        scaledSquare = scaledSquare*relScale*relScale + 2;
                        scale = alphaAbs;
            const R alphaAbs = Abs(A.Get(j,j));
            if( alphaAbs != 0 )
                if( alphaAbs <= scale )
                    const R relScale = alphaAbs/scale;
                    scaledSquare += relScale*relScale;
                    const R relScale = scale/alphaAbs;
                    scaledSquare = scaledSquare*relScale*relScale + 1;
                    scale = alphaAbs;

    const R norm = scale*Sqrt(scaledSquare);
#ifndef RELEASE
    return norm;