Esempio n. 1
0
inline void
RLVB
( Conjugation conjugation, int offset, 
  const Matrix<Complex<R> >& H,
  const Matrix<Complex<R> >& t,
        Matrix<Complex<R> >& A )
{
#ifndef RELEASE
    PushCallStack("apply_packed_reflectors::RLVB");
    if( offset > 0 || offset < -H.Height() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Height() != A.Width() )
        throw std::logic_error
        ("Height of transforms must equal width of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag");
#endif
    typedef Complex<R> C;

    Matrix<C>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<C> ARight;
    Matrix<C>
        tT,  t0,
        tB,  t1,
             t2;
    
    Matrix<C> SInv, Z;

    LockedPartitionUpDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionUp
    ( t, tT,
         tB, 0 );
    while( HBR.Height() < H.Height() && HBR.Width() < H.Width() )
    {
        LockedRepartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        const int HPanHeight = H11.Height() + H21.Height();
        const int HPanWidth = 
            std::min( H11.Width(), std::max(HPanHeight+offset,0) );
        const int leftover = A.Width()-HPanHeight;
        LockedView( HPan, H, H00.Height(), H00.Width(), HPanHeight, HPanWidth );

        LockedRepartitionUp
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2, HPanWidth );

        View( ARight, A, 0, leftover, A.Height(), HPanHeight );

        Zeros( ARight.Height(), HPan.Width(), Z );
        Zeros( HPan.Width(), HPan.Width(), SInv );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy );
        SetDiagonal( LEFT, offset, HPanCopy, C(1) );

        Herk( LOWER, ADJOINT, C(1), HPanCopy, C(0), SInv );
        FixDiagonal( conjugation, t1, SInv );

        Gemm( NORMAL, NORMAL, C(1), ARight, HPanCopy, C(0), Z );
        Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, C(1), SInv, Z );
        Gemm( NORMAL, ADJOINT, C(-1), Z, HPanCopy, C(1), ARight );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        SlideLockedPartitionUp
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 2
0
inline void
RLVB( int offset, const Matrix<R>& H, Matrix<R>& A )
{
#ifndef RELEASE
    PushCallStack("apply_packed_reflectors::RLVB");
    if( offset > 0 || offset < -H.Height() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Height() != A.Width() )
        throw std::logic_error
        ("Height of transforms must equal width of target matrix");
#endif
    Matrix<R>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<R> ARight;

    Matrix<R> SInv, Z;

    LockedPartitionUpDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    while( HBR.Height() < H.Height() && HBR.Width() < H.Width() )
    {
        LockedRepartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        const int HPanHeight = H11.Height() + H21.Height();
        const int HPanWidth = 
            std::min( H11.Width(), std::max(HPanHeight+offset,0) );
        const int leftover = A.Width()-HPanHeight;
        LockedView( HPan, H, H00.Height(), H00.Width(), HPanHeight, HPanWidth );

        View( ARight, A, 0, leftover, A.Height(), HPanHeight );

        Zeros( ARight.Height(), HPanWidth, Z );
        Zeros( HPanWidth, HPanWidth, SInv );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy );
        SetDiagonal( LEFT, offset, HPanCopy, R(1) );

        Syrk( LOWER, TRANSPOSE, R(1), HPanCopy, R(0), SInv );
        HalveMainDiagonal( SInv );

        Gemm( NORMAL, NORMAL, R(1), ARight, HPanCopy, R(0), Z );
        Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, R(1), SInv, Z );
        Gemm( NORMAL, TRANSPOSE, R(-1), Z, HPanCopy, R(1), ARight );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
inline void
RowEchelon( Matrix<F>& A, Matrix<F>& B )
{
#ifndef RELEASE
    CallStackEntry entry("RowEchelon");
    if( A.Height() != B.Height() )
        LogicError("A and B must be the same height");
#endif
    // Matrix views
    Matrix<F>
        ATL, ATR,  A00, A01, A02,  APan,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;
    Matrix<F>
        BT,  B0,
        BB,  B1,
             B2;

    Matrix<Int> p1;

    // Pivot composition
    std::vector<Int> image, preimage;

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionDown
    ( B, BT,
         BB, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        RepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );

        View2x1
        ( APan, A12,
                A22 );

        //--------------------------------------------------------------------//
        lu::Panel( APan, p1, A00.Height() );
        ComposePivots( p1, A00.Height(), image, preimage );
        ApplyRowPivots( BB, image, preimage );

        Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, A12 );
        Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, B1 );

        Gemm( NORMAL, NORMAL, F(-1), A21, A12, F(1), A22 );
        Gemm( NORMAL, NORMAL, F(-1), A21, B1,  F(1), B2 );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlidePartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );
    }
}
Esempio n. 4
0
QDWHInfo QDWHInner( Matrix<F>& A, Base<F> sMinUpper, const QDWHCtrl& ctrl )
{
    EL_DEBUG_CSE
    typedef Base<F> Real;
    typedef Complex<Real> Cpx;
    const Int m = A.Height();
    const Int n = A.Width();
    const Real oneThird = Real(1)/Real(3);
    if( m < n )
        LogicError("Height cannot be less than width");

    QDWHInfo info;

    QRCtrl<Base<F>> qrCtrl;
    qrCtrl.colPiv = ctrl.colPiv;

    const Real eps = limits::Epsilon<Real>();
    const Real tol = 5*eps;
    const Real cubeRootTol = Pow(tol,oneThird);
    Real L = sMinUpper / Sqrt(Real(n));

    Real frobNormADiff;
    Matrix<F> ALast, ATemp, C;
    Matrix<F> Q( m+n, n );
    auto QT = Q( IR(0,m  ), ALL );
    auto QB = Q( IR(m,END), ALL );
    while( info.numIts < ctrl.maxIts )
    {
        ALast = A;

        Real L2;
        Cpx dd, sqd;
        if( Abs(1-L) < tol )
        {
            L2 = 1;
            dd = 0;
            sqd = 1;
        }
        else
        {
            L2 = L*L;
            dd = Pow( 4*(1-L2)/(L2*L2), oneThird );
            sqd = Sqrt( Real(1)+dd );
        }
        const Cpx arg = Real(8) - Real(4)*dd + Real(8)*(2-L2)/(L2*sqd);
        const Real a = (sqd + Sqrt(arg)/Real(2)).real();
        const Real b = (a-1)*(a-1)/4;
        const Real c = a+b-1;
        const Real alpha = a-b/c;
        const Real beta = b/c;

        L = L*(a+b*L2)/(1+c*L2);

        if( c > 100 )
        {
            //
            // The standard QR-based algorithm
            //
            QT = A;
            QT *= Sqrt(c);
            MakeIdentity( QB );
            qr::ExplicitUnitary( Q, true, qrCtrl );
            Gemm( NORMAL, ADJOINT, F(alpha/Sqrt(c)), QT, QB, F(beta), A );
            ++info.numQRIts;
        }
        else
        {
            //
            // Use faster Cholesky-based algorithm since A is well-conditioned
            //
            Identity( C, n, n );
            Herk( LOWER, ADJOINT, c, A, Real(1), C );
            Cholesky( LOWER, C );
            ATemp = A;
            Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp );
            Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp );
            A *= beta;
            Axpy( alpha, ATemp, A );
            ++info.numCholIts;
        }

        ++info.numIts;
        ALast -= A;
        frobNormADiff = FrobeniusNorm( ALast );
        if( frobNormADiff <= cubeRootTol && Abs(1-L) <= tol )
            break;
    }
    return info;
}
Esempio n. 5
0
inline void
LUHF
( Conjugation conjugation, Int offset, 
  const Matrix<F>& H, const Matrix<F>& t, Matrix<F>& A )
{
#ifndef RELEASE
    CallStackEntry cse("apply_packed_reflectors::LUHF");
    // TODO: Proper dimension checks
    if( t.Height() != H.DiagonalLength(offset) )
        LogicError("t must be the same length as H's offset diag");
#endif
    Matrix<F>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<F> 
        AT,  A0,
        AB,  A1,
             A2;
    Matrix<F>
        tT,  t0,
        tB,  t1,
             t2;
    Matrix<F> SInv, Z;

    LockedPartitionDownOffsetDiagonal
    ( offset,
      H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    PartitionDown
    ( A, AT,
         AB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );

        RepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2, H11.Height() );

        LockedView1x2( HPan, H11, H12 );

        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTriangular( UPPER, HPanCopy );
        SetDiagonal( HPanCopy, F(1) );

        Herk( LOWER, NORMAL, F(1), HPanCopy, SInv );
        FixDiagonal( conjugation, t1, SInv );

        Gemm( NORMAL, NORMAL, F(1), HPanCopy, AB, Z );
        Trsm( LEFT, LOWER, NORMAL, NON_UNIT, F(1), SInv, Z );
        Gemm( ADJOINT, NORMAL, F(-1), HPanCopy, Z, F(1), AB );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );

        SlidePartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
    }
}
Esempio n. 6
0
inline void
TwoSidedTrsmUVar4( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U )
{
#ifndef RELEASE
    PushCallStack("internal::TwoSidedTrsmUVar4");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( U.Height() != U.Width() )
        throw std::logic_error("Triangular matrices must be square");
    if( A.Height() != U.Height() )
        throw std::logic_error("A and U must be the same size");
#endif
    // Matrix views
    Matrix<F>
        ATL, ATR,  A00, A01, A02,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;
    Matrix<F>
        UTL, UTR,  U00, U01, U02,
        UBL, UBR,  U10, U11, U12,
                   U20, U21, U22;

    // Temporary products
    Matrix<F> Y12;

    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        //--------------------------------------------------------------------//
        // A01 := A01 inv(U11)
        Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U11, A01 );

        // A11 := inv(U11)' A11 inv(U11)
        TwoSidedTrsmUUnb( diag, A11, U11 );

        // A02 := A02 - A01 U12
        Gemm( NORMAL, NORMAL, F(-1), A01, U12, F(1), A02 );

        // Y12 := A11 U12
        Zeros( A12.Height(), A12.Width(), Y12 );
        Hemm( LEFT, UPPER, F(1), A11, U12, F(0), Y12 );

        // A12 := inv(U11)' A12
        Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U11, A12 );

        // A12 := A12 - 1/2 Y12
        Axpy( F(-1)/F(2), Y12, A12 );

        // A22 := A22 - (A12' U12 + U12' A12)
        Her2k( UPPER, ADJOINT, F(-1), A12, U12, F(1), A22 );

        // A12 := A12 - 1/2 Y12
        Axpy( F(-1)/F(2), Y12, A12 );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /**********************************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 7
0
inline void
TrtrsmLLN
( UnitOrNonUnit diag, F alpha, const Matrix<F>& L, Matrix<F>& X,
  bool checkIfSingular=true )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrtrsmLLN");
#endif
    // Matrix views
    Matrix<F>
    LTL, LTR,  L00, L01, L02,
         LBL, LBR,  L10, L11, L12,
         L20, L21, L22;
    Matrix<F>
    XTL, XTR,  X00, X01, X02,
         XBL, XBR,  X10, X11, X12,
         X20, X21, X22;

    Matrix<F> Z11;

    // Start the algorithm
    ScaleTrapezoid( alpha, LOWER, X );
    LockedPartitionDownDiagonal
    ( L, LTL, LTR,
      LBL, LBR, 0 );
    PartitionDownDiagonal
    ( X, XTL, XTR,
      XBL, XBR, 0 );
    while( XBR.Height() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
          /*************/ /******************/
          /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        RepartitionDownDiagonal
        ( XTL, /**/ XTR,  X00, /**/ X01, X02,
          /*************/ /******************/
          /**/       X10, /**/ X11, X12,
          XBL, /**/ XBR,  X20, /**/ X21, X22 );

        //--------------------------------------------------------------------//
        Trsm( LEFT, LOWER, NORMAL, diag, F(1), L11, X10, checkIfSingular );
        TrtrsmLLNUnb( diag, F(1), L11, X11 );
        Gemm( NORMAL, NORMAL, F(-1), L21, X10, F(1), X20 );
        Z11 = X11;
        MakeTriangular( LOWER, Z11 );
        Gemm( NORMAL, NORMAL, F(-1), L21, Z11, F(1), X21 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
          /**/       L10, L11, /**/ L12,
          /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        SlidePartitionDownDiagonal
        ( XTL, /**/ XTR,  X00, X01, /**/ X02,
          /**/       X10, X11, /**/ X12,
          /*************/ /******************/
          XBL, /**/ XBR,  X20, X21, /**/ X22 );
    }
}
Esempio n. 8
0
void TransformColumns
( const Matrix<F>& V,
        DistMatrix<F,MC,MR,BLOCK>& A )
{
    DEBUG_CSE
    const Int width = A.Width();
    const Grid& grid = A.Grid();

    const Int blockWidth = A.BlockWidth();
    const Int firstBlockWidth = blockWidth - A.RowCut();
    if( width <= firstBlockWidth || grid.Width() == 1 )
    {
        if( grid.Col() == A.ColOwner(0) )
        {
            // This process row can locally update its portion of A
            TransformColumns( V, A.Matrix() );
        }
    }
    else if( width <= firstBlockWidth + blockWidth )
    {
        const int firstCol = A.ColOwner( 0 );
        const int secondCol = A.ColOwner( firstBlockWidth );
        if( grid.Col() == firstCol )
        {
            //
            // Replace A with
            //
            //   | ALeft, ARight | | VLeft, VRight |,
            //
            // where ALeft is owned by this process column and ARight by the
            // next.
            //

            // Partition space for the combined matrix
            Matrix<F> ACombine( A.LocalHeight(), width );
            auto ALeft = ACombine( ALL, IR(0,firstBlockWidth) );
            auto ARight = ACombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            ALeft = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ALeft, ARight, A.RowComm(), secondCol, secondCol );

            // Form our portion of the result
            auto VLeft = V( ALL, IR(0,firstBlockWidth) );
            Gemm( NORMAL, NORMAL, F(1), ACombine, VLeft, A.Matrix() );
        }
        else if( grid.Col() == secondCol )
        {
            //
            // Replace A with
            //
            //   | ALeft, ARight | | VLeft, VRight |,
            //
            // where ALeft is owned by the previous process column and ARight
            // by this one.
            //

            // Partition space for the combined matrix
            Matrix<F> ACombine( A.LocalHeight(), width );
            auto ALeft = ACombine( ALL, IR(0,firstBlockWidth) );
            auto ARight = ACombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            ARight = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ARight, ALeft, A.RowComm(), firstCol, firstCol );

            // Form our portion of the result
            auto VRight = V( ALL, IR(firstBlockWidth,END) );
            Gemm( NORMAL, NORMAL, F(1), ACombine, VRight, A.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,MC,STAR,BLOCK> A_MC_STAR( A );
        Matrix<F> ALocCopy( A_MC_STAR.Matrix() );
        Gemm( NORMAL, NORMAL, F(1), ALocCopy, V, A_MC_STAR.Matrix() );
        A = A_MC_STAR;
    }
}
Esempio n. 9
0
inline void
TwoSidedTrsmUVar2( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TwoSidedTrsmUVar2");
    if( A.Height() != A.Width() )
        LogicError("A must be square");
    if( U.Height() != U.Width() )
        LogicError("Triangular matrices must be square");
    if( A.Height() != U.Height() )
        LogicError("A and U must be the same size");
#endif
    // Matrix views
    Matrix<F>
        ATL, ATR,  A00, A01, A02,
        ABL, ABR,  A10, A11, A12,
                   A20, A21, A22;
    Matrix<F>
        UTL, UTR,  U00, U01, U02,
        UBL, UBR,  U10, U11, U12,
                   U20, U21, U22;

    // Temporary products
    Matrix<F> Y01;

    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        //--------------------------------------------------------------------//
        // Y01 := A00 U01
        Zeros( Y01, A01.Height(), A01.Width() );
        Hemm( LEFT, UPPER, F(1), A00, U01, F(0), Y01 );

        // A01 := A01 - 1/2 Y01
        Axpy( F(-1)/F(2), Y01, A01 );
        
        // A11 := A11 - (U01' A01 + A01' U01)
        Her2k( UPPER, ADJOINT, F(-1), U01, A01, F(1), A11 );

        // A11 := inv(U11)' A11 inv(U11)
        TwoSidedTrsmUUnb( diag, A11, U11 );

        // A12 := A12 - A02' U01
        Gemm( ADJOINT, NORMAL, F(-1), A02, U01, F(1), A12 );

        // A12 := inv(U11)' A12
        Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U11, A12 );
        
        // A01 := A01 - 1/2 Y01
        Axpy( F(-1)/F(2), Y01, A01 );

        // A01 := A01 inv(U11)
        Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U11, A01 );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );
    }
}
Esempio n. 10
0
inline void
ApplyPackedReflectorsRLHF
( int offset, const Matrix<R>& H, Matrix<R>& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsRLHF");
    if( offset > 0 || offset < -H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Width() )
        throw std::logic_error
        ("Width of transforms must equal width of target matrix");
#endif
    Matrix<R>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<R> ALeft;

    Matrix<R> SInv, Z;

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        const int HPanWidth = H10.Width() + H11.Width();
        const int HPanOffset = 
            std::min( H11.Height(), std::max(-offset-H00.Height(),0) );
        const int HPanHeight = H11.Height()-HPanOffset;
        HPan.LockedView( H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth );

        ALeft.View( A, 0, 0, A.Height(), HPanWidth );

        Zeros( ALeft.Height(), HPan.Height(), Z );
        Zeros( HPan.Height(), HPan.Height(), SInv );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy );
        SetDiagonalToOne( RIGHT, offset, HPanCopy );

        Syrk( UPPER, NORMAL, R(1), HPanCopy, R(0), SInv );
        HalveMainDiagonal( SInv );

        Gemm( NORMAL, TRANSPOSE, R(1), ALeft, HPanCopy, R(0), Z );
        Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, R(1), SInv, Z );
        Gemm( NORMAL, NORMAL, R(-1), Z, HPanCopy, R(1), ALeft );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 11
0
void Tikhonov
( Orientation orientation,
  const ElementalMatrix<F>& APre,
  const ElementalMatrix<F>& BPre, 
  const ElementalMatrix<F>& G,
        ElementalMatrix<F>& XPre, 
  TikhonovAlg alg )
{
    DEBUG_CSE

    DistMatrixReadProxy<F,F,MC,MR>
      AProx( APre ),
      BProx( BPre );
    DistMatrixWriteProxy<F,F,MC,MR>
      XProx( XPre );
    auto& A = AProx.GetLocked();
    auto& B = BProx.GetLocked();
    auto& X = XProx.Get();

    const bool normal = ( orientation==NORMAL );
    const Int m = ( normal ? A.Height() : A.Width()  );
    const Int n = ( normal ? A.Width()  : A.Height() );
    if( G.Width() != n )
        LogicError("Tikhonov matrix was the wrong width");
    if( orientation == TRANSPOSE && IsComplex<F>::value )
        LogicError("Transpose version of complex Tikhonov not yet supported");

    if( m >= n )
    {
        DistMatrix<F> Z(A.Grid());
        if( alg == TIKHONOV_CHOLESKY )
        {
            if( orientation == NORMAL )
                Herk( LOWER, ADJOINT, Base<F>(1), A, Z );
            else
                Herk( LOWER, NORMAL, Base<F>(1), A, Z );
            Herk( LOWER, ADJOINT, Base<F>(1), G, Base<F>(1), Z );
            Cholesky( LOWER, Z );
        }
        else
        {
            const Int mG = G.Height();
            Zeros( Z, m+mG, n );
            auto ZT = Z( IR(0,m),    IR(0,n) );
            auto ZB = Z( IR(m,m+mG), IR(0,n) );
            if( orientation == NORMAL )
                ZT = A;
            else
                Adjoint( A, ZT );
            ZB = G;
            qr::ExplicitTriang( Z ); 
        }
        if( orientation == NORMAL )
            Gemm( ADJOINT, NORMAL, F(1), A, B, X );
        else
            Gemm( NORMAL, NORMAL, F(1), A, B, X );
        cholesky::SolveAfter( LOWER, NORMAL, Z, X );
    }
    else
    {
        LogicError("This case not yet supported");
    }
}
Esempio n. 12
0
inline void
Cannon_NN
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    CallStackEntry entry("gemm::Cannon_NN");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        LogicError("{A,B,C} must have the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal matrices: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        LogicError( msg.str() );
    }
#endif
    const Grid& g = A.Grid();
    if( g.Height() != g.Width() )
        LogicError("Process grid must be square for Cannon's");
    if( C.ColAlignment() != A.ColAlignment() || 
        C.RowAlignment() != B.RowAlignment() )
        LogicError("C is not properly aligned");

    const Int row = g.Row();
    const Int col = g.Col();
    const Int pSqrt = g.Height();
    mpi::Comm rowComm = g.RowComm();
    mpi::Comm colComm = g.ColComm(); 
    if( A.Width() % pSqrt != 0 )
        LogicError("For now, width(A) must be integer multiple of sqrt(p)");

    // Begin by scaling our local portion of C
    Scale( beta, C );

    // Load the initial A and B packages (may want to transpose B...)
    const Int localHeightA = A.LocalHeight();
    const Int localHeightB = B.LocalHeight();
    const Int localWidthA = A.LocalWidth();
    const Int localWidthB = B.LocalWidth();
    Matrix<T> pkgA(localHeightA,localWidthA,localHeightA), 
              pkgB(localHeightB,localWidthB,localHeightB);
    for( Int jLoc=0; jLoc<localWidthA; ++jLoc )
        MemCopy
        ( pkgA.Buffer(0,jLoc), A.LockedBuffer(0,jLoc), localHeightA );
    for( Int jLoc=0; jLoc<localWidthB; ++jLoc )
        MemCopy
        ( pkgB.Buffer(0,jLoc), B.LockedBuffer(0,jLoc), localHeightB );

    // Perform the initial circular shifts so that our A and B packages align
    const Int rowShiftA = A.RowShift();
    const Int colShiftB = B.ColShift();
    const Int leftInitA = (col+pSqrt-colShiftB) % pSqrt;
    const Int rightInitA = (col+colShiftB) % pSqrt;
    const Int aboveInitB = (row+pSqrt-rowShiftA) % pSqrt;
    const Int belowInitB = (row+rowShiftA) % pSqrt;
    const Int pkgSizeA = localHeightA*localWidthA;
    const Int pkgSizeB = localHeightB*localWidthB;
    mpi::SendRecv( pkgA.Buffer(), pkgSizeA, leftInitA, rightInitA, rowComm );
    mpi::SendRecv( pkgB.Buffer(), pkgSizeB, aboveInitB, belowInitB, colComm );

    // Now begin the data flow
    const Int aboveRow = (row+pSqrt-1) % pSqrt;
    const Int belowRow = (row+1) % pSqrt;
    const Int leftCol = (col+pSqrt-1) % pSqrt;
    const Int rightCol = (col+1) % pSqrt;
    for( Int q=0; q<pSqrt; ++q )
    {
        Gemm( NORMAL, NORMAL, alpha, pkgA, pkgB, T(1), C.Matrix() );
        if( q != pSqrt-1 )
        {
            mpi::SendRecv
            ( pkgA.Buffer(), pkgSizeA, leftCol, rightCol, rowComm );
            mpi::SendRecv
            ( pkgB.Buffer(), pkgSizeB, aboveRow, belowRow, colComm );
        }
    }
}
Esempio n. 13
0
int QDWH
( Matrix<F>& A, 
  typename Base<F>::type lowerBound,
  typename Base<F>::type upperBound )
{
#ifndef RELEASE
    PushCallStack("QDWH");
#endif
    typedef typename Base<F>::type R;
    const int height = A.Height();
    const int width = A.Width();
    const R oneHalf = R(1)/R(2);
    const R oneThird = R(1)/R(3);

    if( height < width )
        throw std::logic_error("Height cannot be less than width");

    const R epsilon = lapack::MachineEpsilon<R>();
    const R tol = 5*epsilon;
    const R cubeRootTol = Pow(tol,oneThird);

    // Form the first iterate
    Scale( 1/upperBound, A );

    int numIts=0;
    R frobNormADiff;
    Matrix<F> ALast;
    Matrix<F> Q( height+width, width );
    Matrix<F> QT, QB;
    PartitionDown( Q, QT,
                      QB, height );
    Matrix<F> C;
    Matrix<F> ATemp;
    do
    {
        ++numIts;
        ALast = A;

        R L2;
        Complex<R> dd, sqd;
        if( Abs(1-lowerBound) < tol )
        {
            L2 = 1;
            dd = 0;
            sqd = 1;
        }
        else
        {
            L2 = lowerBound*lowerBound;
            dd = Pow( 4*(1-L2)/(L2*L2), oneThird );
            sqd = Sqrt( 1+dd );
        }
        const Complex<R> arg = 8 - 4*dd + 8*(2-L2)/(L2*sqd);
        const R a = (sqd + Sqrt( arg )/2).real;
        const R b = (a-1)*(a-1)/4;
        const R c = a+b-1;
        const Complex<R> alpha = a-b/c;
        const Complex<R> beta = b/c;

        lowerBound = lowerBound*(a+b*L2)/(1+c*L2);

        if( c > 100 )
        {
            //
            // The standard QR-based algorithm
            //
            QT = A;
            Scale( Sqrt(c), QT );
            MakeIdentity( QB );
            ExplicitQR( Q );
            Gemm( NORMAL, ADJOINT, alpha/Sqrt(c), QT, QB, beta, A );
        }
        else
        {
            //
            // Use faster Cholesky-based algorithm since A is well-conditioned
            //
            Identity( width, width, C );
            Herk( LOWER, ADJOINT, F(c), A, F(1), C );
            Cholesky( LOWER, C );
            ATemp = A;
            Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp );
            Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp );
            Scale( beta, A );
            Axpy( alpha, ATemp, A );
        }

        Axpy( F(-1), A, ALast );
        frobNormADiff = Norm( ALast, FROBENIUS_NORM );
    }
    while( frobNormADiff > cubeRootTol || Abs(1-lowerBound) > tol );
#ifndef RELEASE
    PopCallStack();
#endif
    return numIts;
}
Esempio n. 14
0
inline void
ApplyPackedReflectorsLUHB
( Conjugation conjugation, int offset, 
  const Matrix<Complex<R> >& H,
  const Matrix<Complex<R> >& t,
        Matrix<Complex<R> >& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsLUHB");
    if( offset < 0 || offset > H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Height() )
        throw std::logic_error
        ("Width of transforms must equal height of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag");
#endif
    typedef Complex<R> C;

    Matrix<C>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<C> ABottom;
    Matrix<C>
        tT,  t0,
        tB,  t1,
             t2;

    Matrix<C> SInv, Z;

    LockedPartitionUpDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionUp
    ( t, tT,
         tB, 0 );
    while( HBR.Height() < H.Height() && HBR.Width() < H.Width() )
    {
        LockedRepartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );
    
        const int HPanWidth = H11.Width() + H12.Width();
        const int HPanHeight = 
            std::min( H11.Height(), std::max(HPanWidth-offset,0) );
        const int leftover = A.Height()-HPanWidth;
        HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth );

        LockedRepartitionUp
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2, HPanHeight );

        ABottom.View( A, leftover, 0, HPanWidth, A.Width() );

        Zeros( HPanHeight, ABottom.Width(), Z );
        Zeros( HPanHeight, HPanHeight, SInv );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy );
        SetDiagonalToOne( LEFT, offset, HPanCopy );

        Herk( UPPER, NORMAL, C(1), HPanCopy, C(0), SInv );
        FixDiagonal( conjugation, t1, SInv );

        Gemm( NORMAL, NORMAL, C(1), HPanCopy, ABottom, C(0), Z );
        Trsm( LEFT, UPPER, NORMAL, NON_UNIT, C(1), SInv, Z );
        Gemm( ADJOINT, NORMAL, C(-1), HPanCopy, Z, C(1), ABottom );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        SlideLockedPartitionUp
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 15
0
inline void
Inverse( Matrix<F>& A )
{
#ifndef RELEASE
    PushCallStack("Inverse");
    if( A.Height() != A.Width() )
        throw std::logic_error("Cannot invert non-square matrices");
#endif
    Matrix<int> p;
    LU( A, p );
    TriangularInverse( UPPER, NON_UNIT, A );

    // Solve inv(A) L = inv(U) for inv(A)
    Matrix<F> ATL, ATR,
           ABL, ABR;
    Matrix<F> A00, A01, A02,
           A10, A11, A12,
           A20, A21, A22;
    Matrix<F> A1, A2;
    Matrix<F> L11,
           L21;
    PartitionUpDiagonal
    ( A, ATL, ATR,
      ABL, ABR, 0 );
    while( ABR.Height() < A.Height() )
    {
        RepartitionUpDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
          /**/       A10, A11, /**/ A12,
          /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        View( A1, A, 0, A00.Width(),             A.Height(), A01.Width() );
        View( A2, A, 0, A00.Width()+A01.Width(), A.Height(), A02.Width() );

        //--------------------------------------------------------------------//
        // Copy out L1
        L11 = A11;
        L21 = A21;

        // Zero the strictly lower triangular portion of A1
        MakeTrapezoidal( LEFT, UPPER, 0, A11 );
        Zero( A21 );

        // Perform the lazy update of A1
        Gemm( NORMAL, NORMAL, F(-1), A2, L21, F(1), A1 );

        // Solve against this diagonal block of L11
        Trsm( RIGHT, LOWER, NORMAL, UNIT, F(1), L11, A1 );
        //--------------------------------------------------------------------//

        SlidePartitionUpDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
          /*************/ /*******************/
          /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );
    }

    // inv(A) := inv(A) P
    ApplyInverseColumnPivots( A, p );
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 16
0
inline void
ApplyPackedReflectorsLUHB
( int offset, const Matrix<R>& H, Matrix<R>& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsLUHB");
    if( offset < 0 || offset > H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Height() )
        throw std::logic_error
        ("Width of transforms must equal height of target matrix");
#endif
    Matrix<R>
        HTL, HTR,  H00, H01, H02,  HPan, HPanCopy,
        HBL, HBR,  H10, H11, H12,
                   H20, H21, H22;
    Matrix<R> ABottom;

    Matrix<R> SInv, Z;

    LockedPartitionUpDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    while( HBR.Height() < H.Height() && HBR.Width() < H.Width() )
    {
        LockedRepartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );
    
        const int HPanWidth = H11.Width() + H12.Width();
        const int HPanHeight = 
            std::min( H11.Height(), std::max(HPanWidth-offset,0) );
        const int leftover = A.Height()-HPanWidth;
        HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth );

        ABottom.View( A, leftover, 0, HPanWidth, A.Width() );

        Zeros( HPanHeight, ABottom.Width(), Z );
        Zeros( HPanHeight, HPanHeight, SInv );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy );
        SetDiagonalToOne( LEFT, offset, HPanCopy );

        Syrk( UPPER, NORMAL, R(1), HPanCopy, R(0), SInv );
        HalveMainDiagonal( SInv );

        Gemm( NORMAL, NORMAL, R(1), HPanCopy, ABottom, R(0), Z );
        Trsm( LEFT, UPPER, NORMAL, NON_UNIT, R(1), SInv, Z );
        Gemm( TRANSPOSE, NORMAL, R(-1), HPanCopy, Z, R(1), ABottom );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 17
0
bool NnlsBlockpivot(const DenseMatrix<T>& LHS,
                    const DenseMatrix<T>& RHS,
                    DenseMatrix<T>& X, // input as xinit
                    DenseMatrix<T>& Y) // gradX
{
    // Solve (LHS)*X = RHS for X by block principal pivoting. Matrix LHS
    // is assumed to be symmetric positive definite.

    const int PBAR = 3;
    const unsigned int WIDTH  = RHS.Width();
    const unsigned int HEIGHT = RHS.Height();
    const unsigned int MAX_ITER = HEIGHT*5;

    BitMatrix passive_set = (X > T(0));

    std::vector<unsigned int> tmp_indices(WIDTH);
    for (unsigned int i=0; i<WIDTH; ++i)
        tmp_indices[i] = i;

    MakeZeros(X);
    if (!BppSolveNormalEqNoGroup(tmp_indices, passive_set, LHS, RHS, X))
        return false;

    // Y = LHS * X - RHS
    Gemm(NORMAL, NORMAL, T(1), LHS, X, T(0), Y);
    Axpy( T(-1), RHS, Y);

    std::vector<int> P(WIDTH, PBAR), Ninf(WIDTH, HEIGHT+1);

    BitMatrix nonopt_set = (Y < T(0)) & ~passive_set;
    BitMatrix infeas_set = (X < T(0)) & passive_set;
    std::vector<int> col_sums(WIDTH);
    std::vector<int> not_good(WIDTH);

    nonopt_set.SumColumns(not_good);
    infeas_set.SumColumns(col_sums);
    not_good += col_sums;
    BitMatrix not_opt_cols = (not_good > 0);
    BitMatrix not_opt_mask;

    std::vector<unsigned int> non_opt_col_indices(WIDTH);
    not_opt_cols.Find(non_opt_col_indices);

    DenseMatrix<double> RHSsub(HEIGHT, WIDTH);
    DenseMatrix<double> Xsub(HEIGHT, WIDTH);
    DenseMatrix<double> Ysub(HEIGHT, WIDTH);

    unsigned int iter = 0;
    while (!non_opt_col_indices.empty())
    {
        // exit if not getting anywhere
        if (iter >= MAX_ITER)
            return false;

        UpdatePassiveSet(passive_set, PBAR, HEIGHT,
                         not_opt_cols, nonopt_set, infeas_set,
                         not_good, P, Ninf);

        // equivalent of repmat(NotOptCols, HEIGHT, 1)
        not_opt_mask = MatrixFromColumnMask(not_opt_cols, HEIGHT);

        // Setup for the normal equation solver by extracting submatrices
        // from RHS and X.  The normal equation solver will extract further
        // subproblems from RHSsub and Xsub and write all updated values
        // back into RHSsub and Xsub.
        RHS.SubmatrixFromCols(RHSsub, non_opt_col_indices);
        X.SubmatrixFromCols(Xsub, non_opt_col_indices);

        if (!BppSolveNormalEqNoGroup(non_opt_col_indices, passive_set, LHS, RHSsub, Xsub))
            return false;

        ZeroizeSmallValues(Xsub, 1.0e-12);

        // compute Ysub = LHS * Xsub - RHSsub
        Ysub.Resize(RHSsub.Height(), RHSsub.Width());
        Gemm(NORMAL, NORMAL, T(1), LHS, Xsub, T(0), Ysub);
        Axpy( T(-1), RHSsub, Ysub);

        // update Y and X using the new values in Ysub and Xsub
        OverwriteCols(Y, Ysub, non_opt_col_indices, non_opt_col_indices.size());
        OverwriteCols(X, Xsub, non_opt_col_indices, non_opt_col_indices.size());

        ZeroizeSmallValues(X, 1.0e-12);
        ZeroizeSmallValues(Y, 1.0e-12);

        // Check optimality - BppUpdateSets does the equivalent of the next two lines.
        // nonopt_set = not_opt_mask & (Y < T(0)) & ~passive_set;
        // infeas_set = not_opt_mask & (X < T(0)) & passive_set;
        BppUpdateSets(nonopt_set, infeas_set, not_opt_mask, X, Y, passive_set);

        nonopt_set.SumColumns(not_good);
        infeas_set.SumColumns(col_sums);
        not_good += col_sums;
        not_opt_cols = (not_good > 0);
        not_opt_cols.Find(non_opt_col_indices);

        ++iter;
    }

    return true;
}
Esempio n. 18
0
void TransformRows
( const Matrix<F>& V,
        DistMatrix<F,MC,MR,BLOCK>& A )
{
    DEBUG_CSE
    const Int height = A.Height();
    const Grid& grid = A.Grid();

    const Int blockHeight = A.BlockHeight();
    const Int firstBlockHeight = blockHeight - A.ColCut();
    if( height <= firstBlockHeight || grid.Height() == 1 )
    {
        if( grid.Row() == A.RowOwner(0) )
        {
            // This process row can locally update its portion of A
            TransformRows( V, A.Matrix() );
        }
    }
    else if( height <= firstBlockHeight + blockHeight )
    {
        const int firstRow = A.RowOwner( 0 );
        const int secondRow = A.RowOwner( firstBlockHeight );
        if( grid.Row() == firstRow )
        {
            //
            // Replace A with
            //
            //   | VLeft, VRight |' | ATop    |,
            //                      | ABottom |
            //
            // where ATop is owned by this process row and ABottom by the next.
            //
            auto VLeft = V( ALL, IR(0,firstBlockHeight) );

            // Partition space for the combined matrix
            Matrix<F> ACombine( height, A.LocalWidth() );
            auto ATop = ACombine( IR(0,firstBlockHeight), ALL );
            auto ABottom = ACombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            ATop = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ATop, ABottom, A.ColComm(), secondRow, secondRow );

            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), VLeft, ACombine, A.Matrix() );
        }
        else if( grid.Row() == secondRow )
        {
            //
            // Replace A with
            //
            //   | VLeft, VRight |' | ATop    |,
            //                      | ABottom |
            //
            // where ATop is owned by the previous process row and ABottom by
            // this one.
            //
            auto VRight = V( ALL, IR(firstBlockHeight,END) );

            // Partition space for the combined matrix
            Matrix<F> ACombine( height, A.LocalWidth() );
            auto ATop = ACombine( IR(0,firstBlockHeight), ALL );
            auto ABottom = ACombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            ABottom = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ABottom, ATop, A.ColComm(), firstRow, firstRow );

            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), VRight, ACombine, A.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,STAR,MR,BLOCK> A_STAR_MR( A );
        Matrix<F> ALocCopy( A_STAR_MR.Matrix() );
        Gemm( ADJOINT, NORMAL, F(1), V, ALocCopy, A_STAR_MR.Matrix() );
        A = A_STAR_MR;
    }
}
Esempio n. 19
0
bool NnlsHals(const MatrixType<T>& A,
              DenseMatrix<T>& W, 
              DenseMatrix<T>& H,
              const T tol,
              const bool verbose,
              const unsigned int max_iter)
{
    unsigned int n = A.Width();
    unsigned int k = W.Width();

    if (static_cast<unsigned int>(W.Height()) != static_cast<unsigned int>(A.Height()))
        throw std::logic_error("NnlsHals: W and A must have identical height");
    if (static_cast<unsigned int>(H.Width()) != static_cast<unsigned int>(A.Width()))
        throw std::logic_error("NnlsHals: H and A must have identical width");
    if (H.Height() != W.Width())
        throw std::logic_error("NnlsHals: non-conformant W and H");

    DenseMatrix<T> WtW(k, k), WtA(k, n), WtWH_r(1, n), gradH(k, n);

    if (verbose)
        std::cout << "\nRunning NNLS solver..." << std::endl;
    
    // compute W'W and W'A for the normal equations
    Gemm(TRANSPOSE, NORMAL, T(1.0), W, W, T(0.0), WtW);
    Gemm(TRANSPOSE, NORMAL, T(1.0), W, A, T(0.0), WtA);

    bool success = false;

    T pg0 = T(0), pg;
    for (unsigned int i=0; i<max_iter; ++i)
    {
        // compute the new matrix H
        UpdateH_Hals(H, WtWH_r, WtW, WtA);
        
        // compute gradH = WtW*H - WtA
        Gemm(NORMAL, NORMAL, T(1.0), WtW, H, T(0.0), gradH);
        Axpy( T(-1.0), WtA, gradH);

        // compute progress metric
        if (0 == i)
        {
            pg0 = ProjectedGradientNorm(gradH, H);
            if (verbose)
                ReportProgress(i+1, T(1.0));
            continue;
        }
        else
        {
            pg = ProjectedGradientNorm(gradH, H);
        }

        if (verbose)
            ReportProgress(i+1, pg/pg0);
        
        // check progress vs. desired tolerance
        if (pg < tol * pg0)
        {
            success = true;
            NormalizeAndScale<T>(W, H);
            break;
        }
    }

    if (!success)
        std::cerr << "NNLS solver reached iteration limit." << std::endl;
    
    return success;
}
Esempio n. 20
0
inline void
TwoSidedTrsmLVar2( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L )
{
#ifndef RELEASE
    PushCallStack("internal::TwoSidedTrsmLVar2");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( L.Height() != L.Width() )
        throw std::logic_error("Triangular matrices must be square");
    if( A.Height() != L.Height() )
        throw std::logic_error("A and L must be the same size");
#endif
    // Matrix views
    Matrix<F>
        ATL, ATR,  A00, A01, A02,
        ABL, ABR,  A10, A11, A12,
                         A20, A21, A22;
    Matrix<F>
        LTL, LTR,  L00, L01, L02,
        LBL, LBR,  L10, L11, L12,
                   L20, L21, L22;

    // Temporary products
    Matrix<F> X11;
    Matrix<F> Y10;

    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDownDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        //--------------------------------------------------------------------//
        // Y10 := L10 A00
        Zeros( L10.Height(), A00.Width(), Y10 );
        Hemm( RIGHT, LOWER, F(1), A00, L10, F(0), Y10 );

        // A10 := A10 - 1/2 Y10
        Axpy( F(-1)/F(2), Y10, A10 );

        // A11 := A11 - (A10 L10' + L10 A10')
        Her2k( LOWER, NORMAL, F(-1), A10, L10, F(1), A11 );

        // A11 := inv(L11) A11 inv(L11)'
        TwoSidedTrsmLUnb( diag, A11, L11 );

        // A21 := A21 - A20 L10'
        Gemm( NORMAL, ADJOINT, F(-1), A20, L10, F(1), A21 );

        // A21 := A21 inv(L11)'
        Trsm( RIGHT, LOWER, ADJOINT, diag, F(1), L11, A21 );

        // A10 := A10 - 1/2 Y10
        Axpy( F(-1)/F(2), Y10, A10 );

        // A10 := inv(L11) A10
        Trsm( LEFT, LOWER, NORMAL, diag, F(1), L11, A10 );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /**********************************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 21
0
inline void
LU( Matrix<F>& A, Matrix<int>& p )
{
#ifndef RELEASE
    PushCallStack("LU");
    if( p.Viewing() && 
        (std::min(A.Height(),A.Width()) != p.Height() || p.Width() != 1) ) 
        throw std::logic_error
        ("p must be a vector of the same height as the min dimension of A.");
#endif
    if( !p.Viewing() )
        p.ResizeTo( std::min(A.Height(),A.Width()), 1 );

    // Matrix views
    Matrix<F>
        ATL, ATR,  A00, A01, A02,  ABRL, ABRR,
        ABL, ABR,  A10, A11, A12,  
                   A20, A21, A22;

    Matrix<int>
        pT,  p0, 
        pB,  p1,
             p2;

    // Pivot composition
    std::vector<int> image, preimage;

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionDown
    ( p, pT,
         pB, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        RepartitionDown
        ( pT,  p0,
         /**/ /**/
               p1,
          pB,  p2 );

        PartitionRight( ABR, ABRL, ABRR, A11.Width() );

        const int pivotOffset = A01.Height();
        //--------------------------------------------------------------------//
        internal::PanelLU( ABRL, p1, pivotOffset );
        internal::ComposePanelPivots( p1, pivotOffset, image, preimage );
        ApplyRowPivots( ABL, image, preimage );
        ApplyRowPivots( ABRR, image, preimage );

        Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, A12 );
        Gemm( NORMAL, NORMAL, F(-1), A21, A12, F(1), A22 );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlidePartitionDown
        ( pT,  p0,
               p1,
         /**/ /**/
          pB,  p2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 22
0
inline void
LU( Matrix<F>& A, Matrix<Int>& p )
{
#ifndef RELEASE
    CallStackEntry entry("LU");
#endif
    p.ResizeTo( Min(A.Height(),A.Width()), 1 );

    // Matrix views
    Matrix<F>
        ATL, ATR,  A00, A01, A02,  ABRL, ABRR,
        ABL, ABR,  A10, A11, A12,  
                   A20, A21, A22;

    Matrix<Int>
        pT,  p0, 
        pB,  p1,
             p2;

    // Pivot composition
    std::vector<Int> image, preimage;

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionDown
    ( p, pT,
         pB, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        RepartitionDown
        ( pT,  p0,
         /**/ /**/
               p1,
          pB,  p2 );

        PartitionRight( ABR, ABRL, ABRR, A11.Width() );

        const Int pivotOffset = A01.Height();
        //--------------------------------------------------------------------//
        lu::Panel( ABRL, p1, pivotOffset );
        ComposePivots( p1, pivotOffset, image, preimage );
        ApplyRowPivots( ABL, image, preimage );
        ApplyRowPivots( ABRR, image, preimage );

        Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, A12 );
        Gemm( NORMAL, NORMAL, F(-1), A21, A12, F(1), A22 );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlidePartitionDown
        ( pT,  p0,
               p1,
         /**/ /**/
          pB,  p2 );
    }
}
Esempio n. 23
0
int Halley
( DistMatrix<F>& A, typename Base<F>::type upperBound )
{
#ifndef RELEASE
    PushCallStack("Halley");
#endif
    typedef typename Base<F>::type R;
    const Grid& g = A.Grid();
    const int height = A.Height();
    const int width = A.Width();
    const R oneHalf = R(1)/R(2);
    const R oneThird = R(1)/R(3);

    if( height < width )
        throw std::logic_error("Height cannot be less than width");

    const R epsilon = lapack::MachineEpsilon<R>();
    const R tol = 5*epsilon;
    const R cubeRootTol = Pow(tol,oneThird);
    const R a = 3;
    const R b = 1;
    const R c = 3;

    // Form the first iterate
    Scale( 1/upperBound, A );

    int numIts=0;
    R frobNormADiff;
    DistMatrix<F> ALast( g );
    DistMatrix<F> Q( height+width, width, g );
    DistMatrix<F> QT(g), QB(g);
    PartitionDown( Q, QT,
                      QB, height );
    DistMatrix<F> C( g );
    DistMatrix<F> ATemp( g );
    do
    {
        if( numIts > 100 )
            throw std::runtime_error("Halley iteration did not converge");
        ++numIts;
        ALast = A;

        // TODO: Come up with a test for when we can use the Cholesky approach
        if( true )
        {
            //
            // The standard QR-based algorithm
            //
            QT = A;
            Scale( Sqrt(c), QT );
            MakeIdentity( QB );
            ExplicitQR( Q );
            Gemm( NORMAL, ADJOINT, F(a-b/c)/Sqrt(c), QT, QB, F(b/c), A );
        }
        else
        {
            //
            // Use faster Cholesky-based algorithm since A is well-conditioned
            //
            Identity( width, width, C );
            Herk( LOWER, ADJOINT, F(c), A, F(1), C );
            Cholesky( LOWER, C );
            ATemp = A;
            Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp );
            Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp );
            Scale( b/c, A );
            Axpy( a-b/c, ATemp, A );
        }

        Axpy( F(-1), A, ALast );
        frobNormADiff = Norm( ALast, FROBENIUS_NORM );
    }
    while( frobNormADiff > cubeRootTol );
#ifndef RELEASE
    PopCallStack();
#endif
    return numIts;
}