Esempio n. 1
0
void Tikhonov
( Orientation orientation,
  const Matrix<F>& A,
  const Matrix<F>& B, 
  const Matrix<F>& G,
        Matrix<F>& X,
  TikhonovAlg alg )
{
    DEBUG_CSE
    const bool normal = ( orientation==NORMAL );
    const Int m = ( normal ? A.Height() : A.Width()  );
    const Int n = ( normal ? A.Width()  : A.Height() );
    if( G.Width() != n )
        LogicError("Tikhonov matrix was the wrong width");
    if( orientation == TRANSPOSE && IsComplex<F>::value )
        LogicError("Transpose version of complex Tikhonov not yet supported");

    if( m >= n )
    {
        Matrix<F> Z;
        if( alg == TIKHONOV_CHOLESKY )
        {
            if( orientation == NORMAL )
                Herk( LOWER, ADJOINT, Base<F>(1), A, Z );
            else
                Herk( LOWER, NORMAL, Base<F>(1), A, Z );
            Herk( LOWER, ADJOINT, Base<F>(1), G, Base<F>(1), Z );
            Cholesky( LOWER, Z );
        }
        else
        {
            const Int mG = G.Height();
            Zeros( Z, m+mG, n );
            auto ZT = Z( IR(0,m),    IR(0,n) );
            auto ZB = Z( IR(m,m+mG), IR(0,n) );
            if( orientation == NORMAL )
                ZT = A;
            else
                Adjoint( A, ZT );
            ZB = G;
            qr::ExplicitTriang( Z ); 
        }
        if( orientation == NORMAL )
            Gemm( ADJOINT, NORMAL, F(1), A, B, X );
        else
            Gemm( NORMAL, NORMAL, F(1), A, B, X );
        cholesky::SolveAfter( LOWER, NORMAL, Z, X );
    }
    else
    {
        LogicError("This case not yet supported");
    }
}
Esempio n. 2
0
inline void
CholeskyUVar2( Matrix<F>& A )
{
#ifndef RELEASE
    PushCallStack("hpd_inverse::CholeskyUVar2");
    if( A.Height() != A.Width() )
        throw std::logic_error("Nonsquare matrices cannot be triangular");
#endif
    // Matrix views
    Matrix<F> 
        ATL, ATR,  A00, A01, A02,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        //--------------------------------------------------------------------//
        Cholesky( UPPER, A11 );
        Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11, A01 );
        Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A12 );
        Herk( UPPER, NORMAL, F(1), A01, F(1), A00 );
        Gemm( NORMAL, NORMAL, F(-1), A01, A12, F(1), A02 );
        Herk( UPPER, ADJOINT, F(-1), A12, F(1), A22 );
        Trsm( RIGHT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A01 );
        Trsm( LEFT, UPPER, NORMAL, NON_UNIT, F(-1), A11, A12 );
        TriangularInverse( UPPER, NON_UNIT, A11 );
        Trtrmm( ADJOINT, UPPER, A11 );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 3
0
void QP
( const Matrix<Real>& A, 
  const Matrix<Real>& B, 
        Matrix<Real>& X, 
  const qp::direct::Ctrl<Real>& ctrl )
{
    DEBUG_CSE

    const Int n = A.Width();
    const Int k = B.Width();
    Matrix<Real> Q, AHat, bHat, c;

    Herk( LOWER, ADJOINT, Real(1), A, Q );
    Zeros( AHat, 0, n );
    Zeros( bHat, 0, 1 );
    Zeros( X,    n, k );

    Matrix<Real> y, z;
    for( Int j=0; j<k; ++j )
    {
        auto x = X( ALL, IR(j) );
        auto b = B( ALL, IR(j) );

        Zeros( c, n, 1 );
        Gemv( ADJOINT, Real(-1), A, b, Real(0), c );

        El::QP( Q, AHat, bHat, c, x, y, z, ctrl );
    }
}
UnitaryCoherence( DistMatrix<F>& U )
{
#ifndef RELEASE
    CallStackEntry entry("UnitaryCoherence");
#endif
    typedef BASE(F) R;
    const Grid& grid = U.Grid();
    const Int n = U.Height();
    const Int r = U.Width();

    // Z := U U' in n^2 r work
    DistMatrix<F> Z( grid );
    Herk( UPPER, NORMAL, F(1), U, Z );

    // Now make Z explicitly Hermitian so that our job is easier
    MakeHermitian( UPPER, Z );

    // Compute the maximum column two-norm squared
    const Int localWidth = Z.LocalWidth();
    const Int localHeight = Z.LocalHeight();
    std::vector<R> normsSquared( localWidth );
    for( Int jLocal=0; jLocal<localWidth; ++jLocal )
    {
        const R localNorm = 
            blas::Nrm2( localHeight, Z.LockedBuffer(0,jLocal), 1 );
        normsSquared[jLocal] = localNorm*localNorm;
    }
    mpi::AllReduce( &normsSquared[0], localWidth, grid.ColComm() );
    R maxLocalNormSquared = 
        *std::max_element( normsSquared.begin(), normsSquared.end() );
    const R maxNormSquared = 
        mpi::AllReduce( maxLocalNormSquared, mpi::MAX, grid.RowComm() );
    return (n*maxNormSquared)/r;
}
UnitaryCoherence( Matrix<F>& U )
{
#ifndef RELEASE
    CallStackEntry entry("UnitaryCoherence");
#endif
    typedef BASE(F) R;
    const Int n = U.Height();
    const Int r = U.Width();

    // Z := U U' in n^2 r work
    Matrix<F> Z;
    Herk( UPPER, NORMAL, F(1), U, Z );

    // Now make Z explicitly Hermitian so that our job is easier
    MakeHermitian( UPPER, Z );

    // Compute the maximum column two-norm
    R maxColNorm = 0;
    for( Int j=0; j<n; ++j )
    {
        const R colNorm = blas::Nrm2( n, Z.LockedBuffer(0,j), 1 );
        maxColNorm = std::max( colNorm, maxColNorm );
    }
    return (n*maxColNorm*maxColNorm)/r;
}
Esempio n. 6
0
Base<F> Coherence( const ElementalMatrix<F>& A )
{
    DEBUG_ONLY(CSE cse("Coherence"))
    DistMatrix<F> B( A );
    DistMatrix<Base<F>,MR,STAR> norms(B.Grid());
    ColumnTwoNorms( B, norms );

    DiagonalSolve( RIGHT, NORMAL, norms, B, true );
    DistMatrix<F> C(B.Grid());
    Identity( C, A.Width(), A.Width() );
    Herk( UPPER, ADJOINT, Base<F>(-1), B, Base<F>(1), C );

    return HermitianMaxNorm( UPPER, C );
}
Esempio n. 7
0
Base<F> Coherence( const Matrix<F>& A )
{
    DEBUG_ONLY(CallStackEntry cse("Coherence"))
    Matrix<F> B( A );
    Matrix<Base<F>> norms;
    ColumnNorms( B, norms );

    DiagonalSolve( RIGHT, NORMAL, norms, B, true );
    Matrix<F> C;
    Identity( C, A.Width(), A.Width() );
    Herk( UPPER, ADJOINT, Base<F>(-1), B, Base<F>(1), C );

    return HermitianMaxNorm( UPPER, C );
}
Esempio n. 8
0
Int
ADMM
( const Matrix<Real>& A,
  const Matrix<Real>& B,
        Matrix<Real>& X, 
  const ADMMCtrl<Real>& ctrl )
{
    DEBUG_CSE
    const Real maxReal = limits::Max<Real>();

    Matrix<Real> Q, C;
    Herk( LOWER, ADJOINT, Real(1), A, Q );
    Gemm( ADJOINT, NORMAL, Real(-1), A, B, C );

    return qp::box::ADMM( Q, C, Real(0), maxReal, X, ctrl );
}
Esempio n. 9
0
inline void
CholeskyLVar2( Matrix<F>& A )
{
#ifndef RELEASE
    PushCallStack("internal::CholeskyLVar2");
    if( A.Height() != A.Width() )
        throw std::logic_error
        ("Can only compute Cholesky factor of square matrices");
#endif
    // Matrix views
    Matrix<F> 
        ATL, ATR,   A00, A01, A02,
        ABL, ABR,   A10, A11, A12,
                    A20, A21, A22;

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        //--------------------------------------------------------------------//
        Herk( LOWER, NORMAL, F(-1), A10, F(1), A11 );
        CholeskyLVar3Unb( A11 );
        Gemm( NORMAL, ADJOINT, F(-1), A20, A10, F(1), A21 );
        Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11, A21 );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 10
0
Int
ADMM
( const ElementalMatrix<Real>& APre,
  const ElementalMatrix<Real>& B, 
        ElementalMatrix<Real>& X,
  const ADMMCtrl<Real>& ctrl )
{
    DEBUG_CSE
    const Real maxReal = limits::Max<Real>();

    DistMatrixReadProxy<Real,Real,MC,MR> AProx( APre );
    auto& A = AProx.GetLocked();

    DistMatrix<Real> Q(A.Grid()), C(A.Grid());
    Herk( LOWER, ADJOINT, Real(1), A, Q );
    Gemm( ADJOINT, NORMAL, Real(-1), A, B, C );
    
    return qp::box::ADMM( Q, C, Real(0), maxReal, X, ctrl );
}
Esempio n. 11
0
void QP
( const DistSparseMatrix<Real>& A, 
  const DistMultiVec<Real>& B, 
        DistMultiVec<Real>& X, 
  const qp::direct::Ctrl<Real>& ctrl )
{
    DEBUG_CSE

    const Int m = A.Height();
    const Int n = A.Width();
    const Int k = B.Width();
    mpi::Comm comm = A.Comm();
    DistSparseMatrix<Real> Q(comm), AHat(comm);
    DistMultiVec<Real> bHat(comm), c(comm);

    Herk( LOWER, ADJOINT, Real(1), A, Q );
    MakeHermitian( LOWER, Q );
    Zeros( AHat, 0, n );
    Zeros( bHat, 0, 1 );
    Zeros( X,    n, k );

    DistMultiVec<Real> q(comm), y(comm), z(comm);
    auto& qLoc = q.Matrix();
    auto& XLoc = X.Matrix();
    auto& BLoc = B.LockedMatrix();
    for( Int j=0; j<k; ++j )
    {
        auto xLoc = XLoc( ALL, IR(j) );
        auto bLoc = BLoc( ALL, IR(j) );

        Zeros( c, n, 1 );
        Zeros( q, m, 1 );
        qLoc = bLoc;
        Multiply( ADJOINT, Real(-1), A, q, Real(0), c );

        Zeros( q, n, 1 );
        qLoc = xLoc;
        El::QP( Q, AHat, bHat, c, q, y, z, ctrl );
        xLoc = qLoc;
    }
}
Esempio n. 12
0
inline void
UVar3( Matrix<F>& A )
{
#ifndef RELEASE
    CallStackEntry entry("cholesky::UVar3");
    if( A.Height() != A.Width() )
        throw std::logic_error
        ("Can only compute Cholesky factor of square matrices");
#endif
    // Matrix views
    Matrix<F> 
        ATL, ATR,  A00, A01, A02,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 ); 
    while( ABR.Height() > 0 )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        //--------------------------------------------------------------------//
        cholesky::UVar3Unb( A11 );
        Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A12 );
        Herk( UPPER, ADJOINT, F(-1), A12, F(1), A22 );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }
}
Esempio n. 13
0
void Covariance( const Matrix<F>& D, Matrix<F>& S )
{
    DEBUG_CSE
    const Int numObs = D.Height();
    const Int n = D.Width();

    // Compute the average column
    Matrix<F> ones, xMean;
    Ones( ones, numObs, 1 );
    Gemv( TRANSPOSE, F(1)/F(numObs), D, ones, xMean );

    // Subtract the mean from each column of D
    Matrix<F> DDev( D );
    for( Int i=0; i<numObs; ++i )
        blas::Axpy
        ( n, F(-1), xMean.LockedBuffer(), 1, DDev.Buffer(i,0), DDev.LDim() );

    // Form S := 1/(numObs-1) DDev DDev'
    Herk( LOWER, ADJOINT, Base<F>(1)/Base<F>(numObs-1), DDev, S );
    Conjugate( S );
    MakeHermitian( LOWER, S );
}
Esempio n. 14
0
void QP
( const ElementalMatrix<Real>& APre, 
  const ElementalMatrix<Real>& BPre, 
        ElementalMatrix<Real>& XPre,
  const qp::direct::Ctrl<Real>& ctrl )
{
    DEBUG_CSE

    DistMatrixReadProxy<Real,Real,MC,MR>
      AProx( APre ),
      BProx( BPre );
    DistMatrixWriteProxy<Real,Real,MC,MR>
      XProx( XPre );
    auto& A = AProx.GetLocked();
    auto& B = BProx.GetLocked();
    auto& X = XProx.Get();

    const Int n = A.Width();
    const Int k = B.Width();
    const Grid& g = A.Grid();
    DistMatrix<Real> Q(g), AHat(g), bHat(g), c(g);

    Herk( LOWER, ADJOINT, Real(1), A, Q );
    Zeros( AHat, 0, n );
    Zeros( bHat, 0, 1 );
    Zeros( X,    n, k );

    DistMatrix<Real> y(g), z(g);
    for( Int j=0; j<k; ++j )
    {
        auto x = X( ALL, IR(j) );
        auto b = B( ALL, IR(j) );

        Zeros( c, n, 1 );
        Gemv( ADJOINT, Real(-1), A, b, Real(0), c );

        El::QP( Q, AHat, bHat, c, x, y, z, ctrl );
    }
}
Esempio n. 15
0
void Covariance
( const ElementalMatrix<F>& DPre, ElementalMatrix<F>& SPre )
{
    DEBUG_CSE

    DistMatrixReadProxy<F,F,MC,MR>
      DProx( DPre );
    DistMatrixWriteProxy<F,F,MC,MR>
      SProx( SPre );
    auto& D = DProx.GetLocked();
    auto& S = SProx.Get();

    const Grid& g = D.Grid();
    const Int numObs = D.Height();

    // Compute the average column
    DistMatrix<F> ones(g), xMean(g);
    Ones( ones, numObs, 1 );
    Gemv( TRANSPOSE, F(1)/F(numObs), D, ones, xMean );
    DistMatrix<F,MR,STAR> xMean_MR(g);
    xMean_MR.AlignWith( D );
    xMean_MR = xMean;

    // Subtract the mean from each column of D
    DistMatrix<F> DDev( D );
    for( Int iLoc=0; iLoc<DDev.LocalHeight(); ++iLoc )
        blas::Axpy
        ( DDev.LocalWidth(), F(-1), 
          xMean_MR.LockedBuffer(), 1, 
          DDev.Buffer(iLoc,0),     DDev.LDim() );

    // Form S := 1/(numObs-1) DDev DDev'
    Herk( LOWER, ADJOINT, Base<F>(1)/Base<F>(numObs-1), DDev, S );
    Conjugate( S );
    MakeHermitian( LOWER, S );
}
Esempio n. 16
0
QDWHInfo QDWHInner( Matrix<F>& A, Base<F> sMinUpper, const QDWHCtrl& ctrl )
{
    EL_DEBUG_CSE
    typedef Base<F> Real;
    typedef Complex<Real> Cpx;
    const Int m = A.Height();
    const Int n = A.Width();
    const Real oneThird = Real(1)/Real(3);
    if( m < n )
        LogicError("Height cannot be less than width");

    QDWHInfo info;

    QRCtrl<Base<F>> qrCtrl;
    qrCtrl.colPiv = ctrl.colPiv;

    const Real eps = limits::Epsilon<Real>();
    const Real tol = 5*eps;
    const Real cubeRootTol = Pow(tol,oneThird);
    Real L = sMinUpper / Sqrt(Real(n));

    Real frobNormADiff;
    Matrix<F> ALast, ATemp, C;
    Matrix<F> Q( m+n, n );
    auto QT = Q( IR(0,m  ), ALL );
    auto QB = Q( IR(m,END), ALL );
    while( info.numIts < ctrl.maxIts )
    {
        ALast = A;

        Real L2;
        Cpx dd, sqd;
        if( Abs(1-L) < tol )
        {
            L2 = 1;
            dd = 0;
            sqd = 1;
        }
        else
        {
            L2 = L*L;
            dd = Pow( 4*(1-L2)/(L2*L2), oneThird );
            sqd = Sqrt( Real(1)+dd );
        }
        const Cpx arg = Real(8) - Real(4)*dd + Real(8)*(2-L2)/(L2*sqd);
        const Real a = (sqd + Sqrt(arg)/Real(2)).real();
        const Real b = (a-1)*(a-1)/4;
        const Real c = a+b-1;
        const Real alpha = a-b/c;
        const Real beta = b/c;

        L = L*(a+b*L2)/(1+c*L2);

        if( c > 100 )
        {
            //
            // The standard QR-based algorithm
            //
            QT = A;
            QT *= Sqrt(c);
            MakeIdentity( QB );
            qr::ExplicitUnitary( Q, true, qrCtrl );
            Gemm( NORMAL, ADJOINT, F(alpha/Sqrt(c)), QT, QB, F(beta), A );
            ++info.numQRIts;
        }
        else
        {
            //
            // Use faster Cholesky-based algorithm since A is well-conditioned
            //
            Identity( C, n, n );
            Herk( LOWER, ADJOINT, c, A, Real(1), C );
            Cholesky( LOWER, C );
            ATemp = A;
            Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp );
            Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp );
            A *= beta;
            Axpy( alpha, ATemp, A );
            ++info.numCholIts;
        }

        ++info.numIts;
        ALast -= A;
        frobNormADiff = FrobeniusNorm( ALast );
        if( frobNormADiff <= cubeRootTol && Abs(1-L) <= tol )
            break;
    }
    return info;
}
Esempio n. 17
0
inline void
RLHF
( Conjugation conjugation, int offset, 
  const Matrix<Complex<R> >& H,
  const Matrix<Complex<R> >& t,
        Matrix<Complex<R> >& A )
{
#ifndef RELEASE
    PushCallStack("apply_packed_reflectors::RLHF");
    if( offset > 0 || offset < -H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Width() )
        throw std::logic_error
        ("Width of transforms must equal width of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag");
#endif
    typedef Complex<R> C;

    Matrix<C>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<C> ALeft;
    Matrix<C>
        tT,  t0,
        tB,  t1,
             t2;

    Matrix<C> SInv, Z;

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        const int HPanWidth = H10.Width() + H11.Width();
        const int HPanOffset = 
            std::min( H11.Height(), std::max(-offset-H00.Height(),0) );
        const int HPanHeight = H11.Height()-HPanOffset;
        LockedView
        ( HPan, H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2, HPanHeight );

        View( ALeft, A, 0, 0, A.Height(), HPanWidth );

        Zeros( ALeft.Height(), HPan.Height(), Z );
        Zeros( HPan.Height(), HPan.Height(), SInv );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy );
        SetDiagonal( RIGHT, offset, HPanCopy, C(1) );

        Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv );
        FixDiagonal( conjugation, t1, SInv );

        Gemm( NORMAL, ADJOINT, C(1), ALeft, HPanCopy, C(0), Z );
        Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z );
        Gemm( NORMAL, NORMAL, C(-1), Z, HPanCopy, C(1), ALeft );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 18
0
Int ADMM
( const Matrix<Real>& A,
  const Matrix<Real>& b,
  const Matrix<Real>& c,
        Matrix<Real>& z,
  const ADMMCtrl<Real>& ctrl )
{
    EL_DEBUG_CSE

    // Cache a custom partially-pivoted LU factorization of
    //    |  rho*I   A^H | = | B11  B12 |
    //    |  A       0   |   | B21  B22 |
    // by (justifiably) avoiding pivoting in the first n steps of
    // the factorization, so that
    //    [I,rho*I] = lu(rho*I).
    // The factorization would then proceed with
    //    B21 := B21 U11^{-1} = A (rho*I)^{-1} = A/rho
    //    B12 := L11^{-1} B12 = I A^H = A^H.
    // The Schur complement would then be
    //    B22 := B22 - B21 B12 = 0 - (A*A^H)/rho.
    // We then factor said matrix with LU with partial pivoting and
    // swap the necessary rows of B21 in order to implicitly commute
    // the row pivots with the Gauss transforms in the manner standard
    // for GEPP. Unless A A' is singular, pivoting should not be needed,
    // as Cholesky factorization of the negative matrix should be valid.
    //
    // The result is the factorization
    //   | I 0   | | rho*I A^H | = | I   0   | | rho*I U12 |,
    //   | 0 P22 | | A     0   |   | L21 L22 | | 0     U22 |
    // where [L22,U22] are stored within B22.
    Matrix<Real> U12, L21, B22, bPiv;
    Adjoint( A, U12 );
    L21 = A;
    L21 *= 1/ctrl.rho;
    Herk( LOWER, NORMAL, -1/ctrl.rho, A, B22 );
    MakeHermitian( LOWER, B22 );
    // TODO: Replace with sparse-direct Cholesky version?
    Permutation P2;
    LU( B22, P2 );
    P2.PermuteRows( L21 );
    bPiv = b;
    P2.PermuteRows( bPiv );

    // Possibly form the inverse of L22 U22
    Matrix<Real> X22;
    if( ctrl.inv )
    {
        X22 = B22;
        MakeTrapezoidal( LOWER, X22 );
        FillDiagonal( X22, Real(1) );
        TriangularInverse( LOWER, UNIT, X22 );
        Trsm( LEFT, UPPER, NORMAL, NON_UNIT, Real(1), B22, X22 );
    }

    Int numIter=0;
    const Int m = A.Height();
    const Int n = A.Width();
    Matrix<Real> g, xTmp, y, t;
    Zeros( g, m+n, 1 );
    PartitionDown( g, xTmp, y, n );
    Matrix<Real> x, u, zOld, xHat;
    Zeros( z, n, 1 );
    Zeros( u, n, 1 );
    Zeros( t, n, 1 );
    while( numIter < ctrl.maxIter )
    {
        zOld = z;

        // Find x from
        //  | rho*I  A^H | | x | = | rho*(z-u)-c |
        //  | A      0   | | y |   | b           |
        // via our cached custom factorization:
        //
        // |x| = inv(U) inv(L) P' |rho*(z-u)-c|
        // |y|                    |b          |
        //     = |rho*I U12|^{-1} |I   0  | |I 0   | |rho*(z-u)-c|
        //     = |0     U22|      |L21 L22| |0 P22'| |b          |
        //     = "                        " |rho*(z-u)-c|
        //                                  | P22' b    |
        xTmp = z;
        xTmp -= u;
        xTmp *= ctrl.rho;
        xTmp -= c;
        y = bPiv;
        Gemv( NORMAL, Real(-1), L21, xTmp, Real(1), y );
        if( ctrl.inv )
        {
            Gemv( NORMAL, Real(1), X22, y, t );
            y = t;
        }
        else
        {
            Trsv( LOWER, NORMAL, UNIT, B22, y );
            Trsv( UPPER, NORMAL, NON_UNIT, B22, y );
        }
        Gemv( NORMAL, Real(-1), U12, y, Real(1), xTmp );
        xTmp *= 1/ctrl.rho;

        // xHat := alpha*x + (1-alpha)*zOld
        xHat = xTmp;
        xHat *= ctrl.alpha;
        Axpy( 1-ctrl.alpha, zOld, xHat );

        // z := pos(xHat+u)
        z = xHat;
        z += u;
        LowerClip( z, Real(0) );

        // u := u + (xHat-z)
        u += xHat;
        u -= z;

        const Real objective = Dot( c, xTmp );

        // rNorm := || x - z ||_2
        t = xTmp;
        t -= z;
        const Real rNorm = FrobeniusNorm( t );
        // sNorm := |rho| || z - zOld ||_2
        t = z;
        t -= zOld;
        const Real sNorm = Abs(ctrl.rho)*FrobeniusNorm( t );

        const Real epsPri = Sqrt(Real(n))*ctrl.absTol +
            ctrl.relTol*Max(FrobeniusNorm(xTmp),FrobeniusNorm(z));
        const Real epsDual = Sqrt(Real(n))*ctrl.absTol +
            ctrl.relTol*Abs(ctrl.rho)*FrobeniusNorm(u);

        if( ctrl.print )
        {
            t = xTmp;
            LowerClip( t, Real(0) );
            t -= xTmp;
            const Real clipDist = FrobeniusNorm( t );
            cout << numIter << ": "
              << "||x-z||_2=" << rNorm << ", "
              << "epsPri=" << epsPri << ", "
              << "|rho| ||z-zOld||_2=" << sNorm << ", "
              << "epsDual=" << epsDual << ", "
              << "||x-Pos(x)||_2=" << clipDist << ", "
              << "c'x=" << objective << endl;
        }
        if( rNorm < epsPri && sNorm < epsDual )
            break;
        ++numIter;
    }
    if( ctrl.maxIter == numIter )
        cout << "ADMM failed to converge" << endl;
    x = xTmp;
    return numIter;
}
Esempio n. 19
0
inline void
RLHF
( Conjugation conjugation, int offset, 
  const DistMatrix<Complex<R> >& H,
  const DistMatrix<Complex<R>,MD,STAR>& t,
        DistMatrix<Complex<R> >& A )
{
#ifndef RELEASE
    PushCallStack("apply_packed_reflectors::RLHF");
    if( H.Grid() != t.Grid() || t.Grid() != A.Grid() )
        throw std::logic_error
        ("{H,t,A} must be distributed over the same grid");
    if( offset > 0 || offset < -H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Width() )
        throw std::logic_error
        ("Width of transforms must equal width of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag");
    if( !t.AlignedWithDiagonal( H, offset ) )
        throw std::logic_error("t must be aligned with H's 'offset' diagonal");
#endif
    typedef Complex<R> C;
    const Grid& g = H.Grid();

    DistMatrix<C>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g), HPanCopy(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<C> ALeft(g);
    DistMatrix<C,MD,STAR>
        tT(g),  t0(g),
        tB(g),  t1(g),
                t2(g);

    DistMatrix<C,STAR,VR  > HPan_STAR_VR(g);
    DistMatrix<C,STAR,MR  > HPan_STAR_MR(g);
    DistMatrix<C,STAR,STAR> t1_STAR_STAR(g);
    DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<C,STAR,MC  > ZAdj_STAR_MC(g);
    DistMatrix<C,STAR,VC  > ZAdj_STAR_VC(g);

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        const int HPanWidth = H10.Width() + H11.Width();
        const int HPanOffset = 
            std::min( H11.Height(), std::max(-offset-H00.Height(),0) );
        const int HPanHeight = H11.Height()-HPanOffset;
        LockedView
        ( HPan, H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2, HPanHeight );

        View( ALeft, A, 0, 0, A.Height(), HPanWidth );

        HPan_STAR_MR.AlignWith( ALeft );
        ZAdj_STAR_MC.AlignWith( ALeft );
        ZAdj_STAR_VC.AlignWith( ALeft );
        Zeros( HPan.Height(), ALeft.Height(), ZAdj_STAR_MC );
        Zeros( HPan.Height(), HPan.Height(), SInv_STAR_STAR );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy );
        SetDiagonal( RIGHT, offset, HPanCopy, C(1) );

        HPan_STAR_VR = HPanCopy;
        Herk
        ( UPPER, NORMAL,
          C(1), HPan_STAR_VR.LockedMatrix(),
          C(0), SInv_STAR_STAR.Matrix() );
        SInv_STAR_STAR.SumOverGrid();
        t1_STAR_STAR = t1;
        FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR );

        HPan_STAR_MR = HPan_STAR_VR;
        LocalGemm
        ( NORMAL, ADJOINT,
          C(1), HPan_STAR_MR, ALeft, C(0), ZAdj_STAR_MC );
        ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC );

        LocalTrsm
        ( LEFT, UPPER, ADJOINT, NON_UNIT,
          C(1), SInv_STAR_STAR, ZAdj_STAR_VC );

        ZAdj_STAR_MC = ZAdj_STAR_VC;
        LocalGemm
        ( ADJOINT, NORMAL,
          C(-1), ZAdj_STAR_MC, HPan_STAR_MR, C(1), ALeft );
        //--------------------------------------------------------------------//
        HPan_STAR_MR.FreeAlignments();
        ZAdj_STAR_MC.FreeAlignments();
        ZAdj_STAR_VC.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 20
0
inline void
RUVF
( Conjugation conjugation, Int offset, 
  const Matrix<F>& H, const Matrix<F>& t, Matrix<F>& A )
{
#ifndef RELEASE
    CallStackEntry cse("apply_packed_reflectors::RUVF");
    // TODO: Proper dimension checks
    if( t.Height() != H.DiagonalLength(offset) )
        LogicError("t must be the same length as H's offset diag");
#endif
    Matrix<F>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<F> ALeft;
    Matrix<F>
        tT,  t0,
        tB,  t1,
             t2;
    Matrix<F> SInv, Z;

    LockedPartitionDownOffsetDiagonal
    ( offset,
      H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );

        LockedView2x1( HPan, H01, H11 );
        View( ALeft, A, 0, 0, A.Height(), HPan.Height() );

        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( UPPER, HPanCopy, 0, RIGHT );
        SetDiagonal( HPanCopy, F(1), 0, RIGHT );
 
        Herk( UPPER, ADJOINT, F(1), HPanCopy, SInv );
        FixDiagonal( conjugation, t1, SInv );

        Gemm( NORMAL, NORMAL, F(1), ALeft, HPanCopy, Z );
        Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), SInv, Z );
        Gemm( NORMAL, ADJOINT, F(-1), Z, HPanCopy, F(1), ALeft );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );
    }
}
Esempio n. 21
0
inline void
ApplyPackedReflectorsLUVF
( Conjugation conjugation, int offset, 
  const Matrix<Complex<R> >& H,
  const Matrix<Complex<R> >& t,
        Matrix<Complex<R> >& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsLUVF");
    if( offset < 0 || offset > H.Height() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Height() )
        throw std::logic_error
        ("Width of transforms must equal height of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag");
#endif
    typedef Complex<R> C;

    Matrix<C>
        HTL, HTR,  H00, H01, H02,  HPan,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<C>
        AT,  A0,  ATop,
        AB,  A1,
             A2;
    Matrix<C>
        tT,  t0,
        tB,  t1,
             t2;

    Matrix<C> HPanCopy;
    Matrix<C> SInv, Z;

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    PartitionDown
    ( A, AT,
         AB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        const int HPanHeight = H01.Height() + H11.Height();
        const int HPanOffset = 
            std::min( H11.Width(), std::max(offset-H00.Width(),0) );
        const int HPanWidth = H11.Width()-HPanOffset;
        HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2, HPanWidth );

        RepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        ATop.View2x1( A0,
                      A1 );

        Zeros( HPan.Width(), ATop.Width(), Z );
        Zeros( HPan.Width(), HPan.Width(), SInv );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy );
        SetDiagonalToOne( RIGHT, offset, HPanCopy );
        Herk( LOWER, ADJOINT, C(1), HPanCopy, C(0), SInv );
        FixDiagonal( conjugation, t1, SInv );

        Gemm( ADJOINT, NORMAL, C(1), HPanCopy, ATop, C(0), Z );
        Trsm( LEFT, LOWER, NORMAL, NON_UNIT, C(1), SInv, Z );
        Gemm( NORMAL, NORMAL, C(-1), HPanCopy, Z, C(1), ATop );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );

        SlidePartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 22
0
int QDWH
( Matrix<F>& A, 
  typename Base<F>::type lowerBound,
  typename Base<F>::type upperBound )
{
#ifndef RELEASE
    PushCallStack("QDWH");
#endif
    typedef typename Base<F>::type R;
    const int height = A.Height();
    const int width = A.Width();
    const R oneHalf = R(1)/R(2);
    const R oneThird = R(1)/R(3);

    if( height < width )
        throw std::logic_error("Height cannot be less than width");

    const R epsilon = lapack::MachineEpsilon<R>();
    const R tol = 5*epsilon;
    const R cubeRootTol = Pow(tol,oneThird);

    // Form the first iterate
    Scale( 1/upperBound, A );

    int numIts=0;
    R frobNormADiff;
    Matrix<F> ALast;
    Matrix<F> Q( height+width, width );
    Matrix<F> QT, QB;
    PartitionDown( Q, QT,
                      QB, height );
    Matrix<F> C;
    Matrix<F> ATemp;
    do
    {
        ++numIts;
        ALast = A;

        R L2;
        Complex<R> dd, sqd;
        if( Abs(1-lowerBound) < tol )
        {
            L2 = 1;
            dd = 0;
            sqd = 1;
        }
        else
        {
            L2 = lowerBound*lowerBound;
            dd = Pow( 4*(1-L2)/(L2*L2), oneThird );
            sqd = Sqrt( 1+dd );
        }
        const Complex<R> arg = 8 - 4*dd + 8*(2-L2)/(L2*sqd);
        const R a = (sqd + Sqrt( arg )/2).real;
        const R b = (a-1)*(a-1)/4;
        const R c = a+b-1;
        const Complex<R> alpha = a-b/c;
        const Complex<R> beta = b/c;

        lowerBound = lowerBound*(a+b*L2)/(1+c*L2);

        if( c > 100 )
        {
            //
            // The standard QR-based algorithm
            //
            QT = A;
            Scale( Sqrt(c), QT );
            MakeIdentity( QB );
            ExplicitQR( Q );
            Gemm( NORMAL, ADJOINT, alpha/Sqrt(c), QT, QB, beta, A );
        }
        else
        {
            //
            // Use faster Cholesky-based algorithm since A is well-conditioned
            //
            Identity( width, width, C );
            Herk( LOWER, ADJOINT, F(c), A, F(1), C );
            Cholesky( LOWER, C );
            ATemp = A;
            Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp );
            Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp );
            Scale( beta, A );
            Axpy( alpha, ATemp, A );
        }

        Axpy( F(-1), A, ALast );
        frobNormADiff = Norm( ALast, FROBENIUS_NORM );
    }
    while( frobNormADiff > cubeRootTol || Abs(1-lowerBound) > tol );
#ifndef RELEASE
    PopCallStack();
#endif
    return numIts;
}
Esempio n. 23
0
inline void
RUVF
( Conjugation conjugation, Int offset, 
  const DistMatrix<F>& H, const DistMatrix<F,MD,STAR>& t, DistMatrix<F>& A )
{
#ifndef RELEASE
    CallStackEntry cse("apply_packed_reflectors::RUVF");
    if( H.Grid() != t.Grid() || t.Grid() != A.Grid() )
        LogicError("{H,t,A} must be distributed over the same grid");
    // TODO: Proper dimension checks
    if( t.Height() != H.DiagonalLength(offset) )
        LogicError("t must be the same length as H's offset diag");
    if( !t.AlignedWithDiagonal( H, offset ) )
        LogicError("t must be aligned with H's 'offset' diagonal");
#endif
    const Grid& g = H.Grid();
    DistMatrix<F>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g), HPanCopy(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<F> ALeft(g);
    DistMatrix<F,MD,STAR>
        tT(g),  t0(g),
        tB(g),  t1(g),
                t2(g);

    DistMatrix<F,VC,  STAR> HPan_VC_STAR(g);
    DistMatrix<F,MR,  STAR> HPan_MR_STAR(g);
    DistMatrix<F,STAR,STAR> t1_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<F,STAR,MC  > ZAdj_STAR_MC(g);
    DistMatrix<F,STAR,VC  > ZAdj_STAR_VC(g);

    LockedPartitionDownOffsetDiagonal
    ( offset,
      H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );

        LockedView2x1( HPan, H01, H11 );
        View( ALeft, A, 0, 0, A.Height(), HPan.Height() );

        HPan_MR_STAR.AlignWith( ALeft );
        ZAdj_STAR_MC.AlignWith( ALeft );
        ZAdj_STAR_VC.AlignWith( ALeft );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( UPPER, HPanCopy, 0, RIGHT );
        SetDiagonal( HPanCopy, F(1), 0, RIGHT );
 
        HPan_VC_STAR = HPanCopy;
        Zeros( SInv_STAR_STAR, HPan.Width(), HPan.Width() );
        Herk
        ( UPPER, ADJOINT, 
          F(1), HPan_VC_STAR.LockedMatrix(),
          F(0), SInv_STAR_STAR.Matrix() ); 
        SInv_STAR_STAR.SumOverGrid();
        t1_STAR_STAR = t1;
        FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR );

        HPan_MR_STAR = HPan_VC_STAR;
        LocalGemm( ADJOINT, ADJOINT, F(1), HPan_MR_STAR, ALeft, ZAdj_STAR_MC );
        ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC );
        
        LocalTrsm
        ( LEFT, UPPER, ADJOINT, NON_UNIT, 
          F(1), SInv_STAR_STAR, ZAdj_STAR_VC );

        ZAdj_STAR_MC = ZAdj_STAR_VC;
        LocalGemm
        ( ADJOINT, ADJOINT, F(-1), ZAdj_STAR_MC, HPan_MR_STAR, F(1), ALeft );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );
    }
}
Esempio n. 24
0
inline void
ApplyPackedReflectorsLUHB
( Conjugation conjugation, int offset, 
  const Matrix<Complex<R> >& H,
  const Matrix<Complex<R> >& t,
        Matrix<Complex<R> >& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsLUHB");
    if( offset < 0 || offset > H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Height() )
        throw std::logic_error
        ("Width of transforms must equal height of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag");
#endif
    typedef Complex<R> C;

    Matrix<C>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<C> ABottom;
    Matrix<C>
        tT,  t0,
        tB,  t1,
             t2;

    Matrix<C> SInv, Z;

    LockedPartitionUpDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionUp
    ( t, tT,
         tB, 0 );
    while( HBR.Height() < H.Height() && HBR.Width() < H.Width() )
    {
        LockedRepartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );
    
        const int HPanWidth = H11.Width() + H12.Width();
        const int HPanHeight = 
            std::min( H11.Height(), std::max(HPanWidth-offset,0) );
        const int leftover = A.Height()-HPanWidth;
        HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth );

        LockedRepartitionUp
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2, HPanHeight );

        ABottom.View( A, leftover, 0, HPanWidth, A.Width() );

        Zeros( HPanHeight, ABottom.Width(), Z );
        Zeros( HPanHeight, HPanHeight, SInv );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy );
        SetDiagonalToOne( LEFT, offset, HPanCopy );

        Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv );
        FixDiagonal( conjugation, t1, SInv );

        Gemm( NORMAL, NORMAL, C(1), HPanCopy, ABottom, C(0), Z );
        Trsm( LEFT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z );
        Gemm( ADJOINT, NORMAL, C(-1), HPanCopy, Z, C(1), ABottom );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        SlideLockedPartitionUp
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 25
0
void Ridge
( Orientation orientation,
  const Matrix<Field>& A,
  const Matrix<Field>& B,
        Base<Field> gamma,
        Matrix<Field>& X,
  RidgeAlg alg )
{
    EL_DEBUG_CSE

    const bool normal = ( orientation==NORMAL );
    const Int m = ( normal ? A.Height() : A.Width()  );
    const Int n = ( normal ? A.Width()  : A.Height() );
    if( orientation == TRANSPOSE && IsComplex<Field>::value )
        LogicError("Transpose version of complex Ridge not yet supported");

    if( m >= n )
    {
        Matrix<Field> Z;
        if( alg == RIDGE_CHOLESKY )
        {
            if( orientation == NORMAL )
                Herk( LOWER, ADJOINT, Base<Field>(1), A, Z );
            else
                Herk( LOWER, NORMAL, Base<Field>(1), A, Z );
            ShiftDiagonal( Z, Field(gamma*gamma) );
            Cholesky( LOWER, Z );
            if( orientation == NORMAL )
                Gemm( ADJOINT, NORMAL, Field(1), A, B, X );
            else
                Gemm( NORMAL, NORMAL, Field(1), A, B, X );
            cholesky::SolveAfter( LOWER, NORMAL, Z, X );
        }
        else if( alg == RIDGE_QR )
        {
            Zeros( Z, m+n, n );
            auto ZT = Z( IR(0,m),   IR(0,n) );
            auto ZB = Z( IR(m,m+n), IR(0,n) );
            if( orientation == NORMAL )
                ZT = A;
            else
                Adjoint( A, ZT );
            FillDiagonal( ZB, Field(gamma) );
            // NOTE: This QR factorization could exploit the upper-triangular
            //       structure of the diagonal matrix ZB
            qr::ExplicitTriang( Z );
            if( orientation == NORMAL )
                Gemm( ADJOINT, NORMAL, Field(1), A, B, X );
            else
                Gemm( NORMAL, NORMAL, Field(1), A, B, X );
            cholesky::SolveAfter( LOWER, NORMAL, Z, X );
        }
        else
        {
            Matrix<Field> U, V;
            Matrix<Base<Field>> s;
            if( orientation == NORMAL )
            {
                SVDCtrl<Base<Field>> ctrl;
                ctrl.overwrite = false;
                SVD( A, U, s, V, ctrl );
            }
            else
            {
                Matrix<Field> AAdj;
                Adjoint( A, AAdj );

                SVDCtrl<Base<Field>> ctrl;
                ctrl.overwrite = true;
                SVD( AAdj, U, s, V, ctrl );
            }
            auto sigmaMap =
              [=]( const Base<Field>& sigma )
              { return sigma / (sigma*sigma + gamma*gamma); };
            EntrywiseMap( s, MakeFunction(sigmaMap) );
            Gemm( ADJOINT, NORMAL, Field(1), U, B, X );
            DiagonalScale( LEFT, NORMAL, s, X );
            U = X;
            Gemm( NORMAL, NORMAL, Field(1), V, U, X );
        }
    }
    else
    {
        LogicError("This case not yet supported");
    }
}
Esempio n. 26
0
inline void
ApplyPackedReflectorsLUHB
( Conjugation conjugation, int offset, 
  const DistMatrix<Complex<R> >& H,
  const DistMatrix<Complex<R>,MD,STAR>& t,
        DistMatrix<Complex<R> >& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsLUHB");
    if( H.Grid() != t.Grid() || t.Grid() != A.Grid() )
        throw std::logic_error
        ("{H,t,A} must be distributed over the same grid");
    if( offset < 0 || offset > H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Height() )
        throw std::logic_error
        ("Width of transforms must equal height of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag");
    if( !t.AlignedWithDiagonal( H, offset ) )
        throw std::logic_error("t must be aligned with H's offset diagonal");
#endif
    typedef Complex<R> C;
    const Grid& g = H.Grid();

    DistMatrix<C>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g), HPanCopy(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<C> ABottom(g);
    DistMatrix<C,MD,STAR>
        tT(g),  t0(g),
        tB(g),  t1(g),
                t2(g);

    DistMatrix<C,STAR,VR  > HPan_STAR_VR(g);
    DistMatrix<C,STAR,MC  > HPan_STAR_MC(g);
    DistMatrix<C,STAR,STAR> t1_STAR_STAR(g);
    DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<C,STAR,MR  > Z_STAR_MR(g);
    DistMatrix<C,STAR,VR  > Z_STAR_VR(g);

    LockedPartitionUpDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionUp
    ( t, tT,
         tB, 0 );
    while( HBR.Height() < H.Height() && HBR.Width() < H.Width() )
    {
        LockedRepartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );
    
        const int HPanWidth = H11.Width() + H12.Width();
        const int HPanHeight = 
            std::min( H11.Height(), std::max(HPanWidth-offset,0) );
        const int leftover = A.Height()-HPanWidth;
        HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth );

        LockedRepartitionUp
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2, HPanHeight );

        ABottom.View( A, leftover, 0, HPanWidth, A.Width() );

        HPan_STAR_MC.AlignWith( ABottom );
        Z_STAR_MR.AlignWith( ABottom );
        Z_STAR_VR.AlignWith( ABottom );
        Zeros( HPanHeight, ABottom.Width(), Z_STAR_MR );
        Zeros( HPanHeight, HPanHeight, SInv_STAR_STAR );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy );
        SetDiagonalToOne( LEFT, offset, HPanCopy );

        HPan_STAR_VR = HPanCopy;
        Herk
        ( UPPER, NORMAL,
          C(1), HPan_STAR_VR.LockedLocalMatrix(),
          C(0), SInv_STAR_STAR.LocalMatrix() );
        SInv_STAR_STAR.SumOverGrid();
        t1_STAR_STAR = t1;
        FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR );

        HPan_STAR_MC = HPan_STAR_VR;
        LocalGemm
        ( NORMAL, NORMAL, C(1), HPan_STAR_MC, ABottom, C(0), Z_STAR_MR );
        Z_STAR_VR.SumScatterFrom( Z_STAR_MR );

        LocalTrsm
        ( LEFT, UPPER, NORMAL, NON_UNIT, C(1), SInv_STAR_STAR, Z_STAR_VR );

        Z_STAR_MR = Z_STAR_VR;
        LocalGemm
        ( ADJOINT, NORMAL, C(-1), HPan_STAR_MC, Z_STAR_MR, C(1), ABottom );
        //--------------------------------------------------------------------//
        HPan_STAR_MC.FreeAlignments();
        Z_STAR_MR.FreeAlignments();
        Z_STAR_VR.FreeAlignments();

        SlideLockedPartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        SlideLockedPartitionUp
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 27
0
inline void
LUHF
( Conjugation conjugation, Int offset, 
  const DistMatrix<F>& H, const DistMatrix<F,MD,STAR>& t, DistMatrix<F>& A )
{
#ifndef RELEASE
    CallStackEntry cse("apply_packed_reflectors::LUHF");
    if( H.Grid() != t.Grid() || t.Grid() != A.Grid() )
        LogicError("{H,t,A} must be distributed over the same grid");
    // TODO: Proper dimension checks
    if( t.Height() != H.DiagonalLength(offset) )
        LogicError("t must be the same length as H's offset diag");
    if( !t.AlignedWithDiagonal( H, offset ) )
        LogicError("t must be aligned with H's offset diagonal");
#endif
    const Grid& g = H.Grid(); 
    DistMatrix<F>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g), HPanCopy(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<F> 
        AT(g),  A0(g),
        AB(g),  A1(g),
                A2(g);
    DistMatrix<F,MD,STAR>
        tT(g),  t0(g),
        tB(g),  t1(g),
                t2(g);

    DistMatrix<F,STAR,VR  > HPan_STAR_VR(g);
    DistMatrix<F,STAR,MC  > HPan_STAR_MC(g);
    DistMatrix<F,STAR,STAR> t1_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<F,STAR,MR  > Z_STAR_MR(g);
    DistMatrix<F,STAR,VR  > Z_STAR_VR(g);

    LockedPartitionDownOffsetDiagonal
    ( offset,
      H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    PartitionDown
    ( A, AT,
         AB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );

        RepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2, H11.Height() );

        LockedView1x2( HPan, H11, H12 );

        HPan_STAR_MC.AlignWith( AB );
        Z_STAR_MR.AlignWith( AB );
        Z_STAR_VR.AlignWith( AB );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTriangular( UPPER, HPanCopy );
        SetDiagonal( HPanCopy, F(1) );

        HPan_STAR_VR = HPanCopy;
        Zeros( SInv_STAR_STAR, HPan.Height(), HPan.Height() );
        Herk
        ( LOWER, NORMAL,
          F(1), HPan_STAR_VR.LockedMatrix(),
          F(0), SInv_STAR_STAR.Matrix() );
        SInv_STAR_STAR.SumOverGrid();
        t1_STAR_STAR = t1;
        FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR );

        HPan_STAR_MC = HPan_STAR_VR;
        LocalGemm( NORMAL, NORMAL, F(1), HPan_STAR_MC, AB, Z_STAR_MR );
        Z_STAR_VR.SumScatterFrom( Z_STAR_MR );

        LocalTrsm
        ( LEFT, LOWER, NORMAL, NON_UNIT, F(1), SInv_STAR_STAR, Z_STAR_VR );

        Z_STAR_MR = Z_STAR_VR;
        LocalGemm( ADJOINT, NORMAL, F(-1), HPan_STAR_MC, Z_STAR_MR, F(1), AB );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );

        SlidePartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
    }
}
Esempio n. 28
0
inline void
LUHF
( Conjugation conjugation, Int offset, 
  const Matrix<F>& H, const Matrix<F>& t, Matrix<F>& A )
{
#ifndef RELEASE
    CallStackEntry cse("apply_packed_reflectors::LUHF");
    // TODO: Proper dimension checks
    if( t.Height() != H.DiagonalLength(offset) )
        LogicError("t must be the same length as H's offset diag");
#endif
    Matrix<F>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<F> 
        AT,  A0,
        AB,  A1,
             A2;
    Matrix<F>
        tT,  t0,
        tB,  t1,
             t2;
    Matrix<F> SInv, Z;

    LockedPartitionDownOffsetDiagonal
    ( offset,
      H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    PartitionDown
    ( A, AT,
         AB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );

        RepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2, H11.Height() );

        LockedView1x2( HPan, H11, H12 );

        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTriangular( UPPER, HPanCopy );
        SetDiagonal( HPanCopy, F(1) );

        Herk( LOWER, NORMAL, F(1), HPanCopy, SInv );
        FixDiagonal( conjugation, t1, SInv );

        Gemm( NORMAL, NORMAL, F(1), HPanCopy, AB, Z );
        Trsm( LEFT, LOWER, NORMAL, NON_UNIT, F(1), SInv, Z );
        Gemm( ADJOINT, NORMAL, F(-1), HPanCopy, Z, F(1), AB );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );

        SlidePartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
    }
}
inline void
internal::ApplyPackedReflectorsLLVF
( Conjugation conjugation, int offset, 
  const DistMatrix<Complex<R>,MC,MR  >& H,
  const DistMatrix<Complex<R>,MD,STAR>& t,
        DistMatrix<Complex<R>,MC,MR  >& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsLLVF");
    if( H.Grid() != t.Grid() || t.Grid() != A.Grid() )
        throw std::logic_error
        ("{H,t,A} must be distributed over the same grid");
    if( offset > 0 )
        throw std::logic_error("Transforms cannot extend above matrix");
    if( offset < -H.Height() )
        throw std::logic_error("Transforms cannot extend below matrix");
    if( H.Height() != A.Height() )
        throw std::logic_error
        ("Height of transforms must equal height of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag.");
    if( !t.AlignedWithDiagonal( H, offset ) )
        throw std::logic_error("t must be aligned with H's 'offset' diagonal");
#endif
    typedef Complex<R> C;
    const Grid& g = H.Grid();

    // Matrix views    
    DistMatrix<C,MC,MR>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g), HPanCopy(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<C,MC,MR>
        AT(g),  A0(g),
        AB(g),  A1(g),
                A2(g);
    DistMatrix<C,MD,STAR>
        tT(g),  t0(g),
        tB(g),  t1(g),
                t2(g);

    DistMatrix<C,VC,  STAR> HPan_VC_STAR(g);
    DistMatrix<C,MC,  STAR> HPan_MC_STAR(g);
    DistMatrix<C,STAR,STAR> t1_STAR_STAR(g);
    DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<C,STAR,MR  > Z_STAR_MR(g);
    DistMatrix<C,STAR,VR  > Z_STAR_VR(g);

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    PartitionDown
    ( A, AT,
         AB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        int HPanHeight = H11.Height() + H21.Height();
        int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) );
        HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2, HPanWidth );

        RepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        HPan_MC_STAR.AlignWith( AB );
        Z_STAR_MR.AlignWith( AB );
        Z_STAR_VR.AlignWith( AB );
        Z_STAR_MR.ResizeTo( HPan.Width(), AB.Width() );
        SInv_STAR_STAR.ResizeTo( HPan.Width(), HPan.Width() );
        Zero( SInv_STAR_STAR );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy );
        SetDiagonalToOne( LEFT, offset, HPanCopy );

        HPan_VC_STAR = HPanCopy;
        Herk
        ( UPPER, ADJOINT, 
          (C)1, HPan_VC_STAR.LockedLocalMatrix(),
          (C)0, SInv_STAR_STAR.LocalMatrix() );     
        SInv_STAR_STAR.SumOverGrid();
        t1_STAR_STAR = t1;
        FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR );

        HPan_MC_STAR = HPanCopy;
        internal::LocalGemm
        ( ADJOINT, NORMAL, (C)1, HPan_MC_STAR, AB, (C)0, Z_STAR_MR );
        Z_STAR_VR.SumScatterFrom( Z_STAR_MR );
        
        internal::LocalTrsm
        ( LEFT, UPPER, ADJOINT, NON_UNIT, (C)1, SInv_STAR_STAR, Z_STAR_VR );

        Z_STAR_MR = Z_STAR_VR;
        internal::LocalGemm
        ( NORMAL, NORMAL, (C)-1, HPan_MC_STAR, Z_STAR_MR, (C)1, AB );
        //--------------------------------------------------------------------//
        HPan_MC_STAR.FreeAlignments();
        Z_STAR_MR.FreeAlignments();
        Z_STAR_VR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 ); 

        SlidePartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 30
0
int Halley
( DistMatrix<F>& A, typename Base<F>::type upperBound )
{
#ifndef RELEASE
    PushCallStack("Halley");
#endif
    typedef typename Base<F>::type R;
    const Grid& g = A.Grid();
    const int height = A.Height();
    const int width = A.Width();
    const R oneHalf = R(1)/R(2);
    const R oneThird = R(1)/R(3);

    if( height < width )
        throw std::logic_error("Height cannot be less than width");

    const R epsilon = lapack::MachineEpsilon<R>();
    const R tol = 5*epsilon;
    const R cubeRootTol = Pow(tol,oneThird);
    const R a = 3;
    const R b = 1;
    const R c = 3;

    // Form the first iterate
    Scale( 1/upperBound, A );

    int numIts=0;
    R frobNormADiff;
    DistMatrix<F> ALast( g );
    DistMatrix<F> Q( height+width, width, g );
    DistMatrix<F> QT(g), QB(g);
    PartitionDown( Q, QT,
                      QB, height );
    DistMatrix<F> C( g );
    DistMatrix<F> ATemp( g );
    do
    {
        if( numIts > 100 )
            throw std::runtime_error("Halley iteration did not converge");
        ++numIts;
        ALast = A;

        // TODO: Come up with a test for when we can use the Cholesky approach
        if( true )
        {
            //
            // The standard QR-based algorithm
            //
            QT = A;
            Scale( Sqrt(c), QT );
            MakeIdentity( QB );
            ExplicitQR( Q );
            Gemm( NORMAL, ADJOINT, F(a-b/c)/Sqrt(c), QT, QB, F(b/c), A );
        }
        else
        {
            //
            // Use faster Cholesky-based algorithm since A is well-conditioned
            //
            Identity( width, width, C );
            Herk( LOWER, ADJOINT, F(c), A, F(1), C );
            Cholesky( LOWER, C );
            ATemp = A;
            Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp );
            Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp );
            Scale( b/c, A );
            Axpy( a-b/c, ATemp, A );
        }

        Axpy( F(-1), A, ALast );
        frobNormADiff = Norm( ALast, FROBENIUS_NORM );
    }
    while( frobNormADiff > cubeRootTol );
#ifndef RELEASE
    PopCallStack();
#endif
    return numIts;
}