예제 #1
0
void EntrywiseMap
( const ElementalMatrix<S>& A,
        ElementalMatrix<T>& B, 
        function<T(S)> func )
{ 
    if( A.DistData().colDist == B.DistData().colDist &&
        A.DistData().rowDist == B.DistData().rowDist )
    {
        B.AlignWith( A.DistData() );
        B.Resize( A.Height(), A.Width() );
        EntrywiseMap( A.LockedMatrix(), B.Matrix(), func );
    }
    else
    {
        B.Resize( A.Height(), A.Width() );
        #define GUARD(CDIST,RDIST) \
          B.DistData().colDist == CDIST && B.DistData().rowDist == RDIST
        #define PAYLOAD(CDIST,RDIST) \
          DistMatrix<S,CDIST,RDIST> AProx(B.Grid()); \
          AProx.AlignWith( B.DistData() ); \
          Copy( A, AProx ); \
          EntrywiseMap( AProx.Matrix(), B.Matrix(), func );
        #include <El/macros/GuardAndPayload.h>
        #undef GUARD
        #undef PAYLOAD
    }
}
예제 #2
0
파일: Eig.cpp 프로젝트: restrin/Elemental
void TestCorrectness
( bool print,
  const ElementalMatrix<Complex<Real>>& A,
  const ElementalMatrix<Complex<Real>>& w,
  const ElementalMatrix<Complex<Real>>& V )
{
    const Int n = A.Height();
    const Real eps = limits::Epsilon<Real>();
    const Real oneNormA = OneNorm( A );

    // Find the residual R = AV-VW
    DistMatrix<Complex<Real>> R( V.Height(), V.Width(), A.Grid() );
    Gemm
    ( NORMAL, NORMAL,
      Complex<Real>(1), A, V,
      Complex<Real>(0), R);
    DistMatrix<Complex<Real>> VW( V );
    DiagonalScale( RIGHT, NORMAL, w, VW );
    R -= VW;
    
    const Real infError = InfinityNorm( R );
    const Real relError = infError / (eps*n*oneNormA);
    OutputFromRoot
    (A.Grid().Comm(),"|| A V - V W ||_oo / (eps n || A ||_1) = ",relError);

    // TODO: A more refined failure condition
    if( relError > Real(100) )
        LogicError("Relative error was unacceptably large");
}
예제 #3
0
void PartialColScatter
( T alpha,
  const ElementalMatrix<T>& A,
        ElementalMatrix<T>& B )
{
    DEBUG_ONLY(CSE cse("axpy_contract::PartialColScatter"))
    AssertSameGrids( A, B );
    if( A.Height() != B.Height() || A.Width() != B.Width() )
        LogicError("A and B must be the same size");

#ifdef EL_CACHE_WARNINGS
    if( A.Width() != 1 && A.Grid().Rank() == 0 )
    {
        cerr <<
          "axpy_contract::PartialColScatterUpdate potentially causes a large "
          "amount of cache-thrashing. If possible, avoid it by forming the "
          "(conjugate-)transpose of the [UGath,* ] matrix instead."
          << endl;
    }
#endif
    if( B.ColAlign() % A.ColStride() == A.ColAlign() )
    {
        const Int colStride = B.ColStride();
        const Int colStridePart = B.PartialColStride();
        const Int colStrideUnion = B.PartialUnionColStride();
        const Int colRankPart = B.PartialColRank();
        const Int colAlign = B.ColAlign();

        const Int height = B.Height();
        const Int width = B.Width();
        const Int localHeight = B.LocalHeight();
        const Int maxLocalHeight = MaxLength( height, colStride );
        const Int recvSize = mpi::Pad( maxLocalHeight*width );
        const Int sendSize = colStrideUnion*recvSize;

        //vector<T> buffer( sendSize );
        vector<T> buffer;
        buffer.reserve( sendSize );

        // Pack
        copy::util::PartialColStridedPack
        ( height, width,
          colAlign, colStride,
          colStrideUnion, colStridePart, colRankPart,
          A.ColShift(),
          A.LockedBuffer(), A.LDim(),
          buffer.data(),    recvSize );

        // Communicate
        mpi::ReduceScatter( buffer.data(), recvSize, B.PartialUnionColComm() );

        // Unpack our received data
        axpy::util::InterleaveMatrixUpdate
        ( alpha, localHeight, width,
          buffer.data(), 1, localHeight,
          B.Buffer(),    1, B.LDim() );
    }
    else
        LogicError("Unaligned PartialColScatter not implemented");
}
예제 #4
0
void PartialRowScatter
( T alpha,
  const ElementalMatrix<T>& A,
        ElementalMatrix<T>& B )
{
    DEBUG_ONLY(CSE cse("axpy_contract::PartialRowScatter"))
    AssertSameGrids( A, B );
    if( A.Height() != B.Height() || A.Width() != B.Width() )
        LogicError("Matrix sizes did not match");
    if( !B.Participating() )
        return;

    if( B.RowAlign() % A.RowStride() == A.RowAlign() )
    {
        const Int rowStride = B.RowStride();
        const Int rowStridePart = B.PartialRowStride();
        const Int rowStrideUnion = B.PartialUnionRowStride();
        const Int rowRankPart = B.PartialRowRank();

        const Int height = B.Height();
        const Int width = B.Width();
        const Int maxLocalWidth = MaxLength( width, rowStride );
        const Int recvSize = mpi::Pad( height*maxLocalWidth );
        const Int sendSize = rowStrideUnion*recvSize;

        //vector<T> buffer( sendSize );
        vector<T> buffer;
        buffer.reserve( sendSize );

        // Pack
        copy::util::PartialRowStridedPack
        ( height, width,
          B.RowAlign(), rowStride,
          rowStrideUnion, rowStridePart, rowRankPart,
          A.RowShift(),
          A.LockedBuffer(), A.LDim(),
          buffer.data(),    recvSize );

        // Communicate
        mpi::ReduceScatter( buffer.data(), recvSize, B.PartialUnionRowComm() );

        // Unpack our received data
        axpy::util::InterleaveMatrixUpdate
        ( alpha, height, B.LocalWidth(),
          buffer.data(), 1, height,
          B.Buffer(),    1, B.LDim() );
    }
    else
        LogicError("Unaligned PartialRowScatter not implemented");
}
예제 #5
0
void Contract
( const ElementalMatrix<T>& A,
        ElementalMatrix<T>& B )
{
    DEBUG_ONLY(CSE cse("Contract"))
    AssertSameGrids( A, B );
    const Dist U = B.ColDist();
    const Dist V = B.RowDist();
    // TODO: Shorten this implementation?
    if( A.ColDist() == U && A.RowDist() == V )
    {
        Copy( A, B );
    }
    else if( A.ColDist() == U && A.RowDist() == Partial(V) )
    {
        B.AlignAndResize
        ( A.ColAlign(), A.RowAlign(), A.Height(), A.Width(), false, false );
        Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() );
        AxpyContract( T(1), A, B );
    }
    else if( A.ColDist() == Partial(U) && A.RowDist() == V )
    {
        B.AlignAndResize
        ( A.ColAlign(), A.RowAlign(), A.Height(), A.Width(), false, false );
        Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() );
        AxpyContract( T(1), A, B );
    }
    else if( A.ColDist() == U && A.RowDist() == Collect(V) )
    {
        B.AlignColsAndResize
        ( A.ColAlign(), A.Height(), A.Width(), false, false );
        Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() );
        AxpyContract( T(1), A, B );
    }
    else if( A.ColDist() == Collect(U) && A.RowDist() == V )
    {
        B.AlignRowsAndResize
        ( A.RowAlign(), A.Height(), A.Width(), false, false );
        Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() );
        AxpyContract( T(1), A, B );
    }
    else if( A.ColDist() == Collect(U) && A.RowDist() == Collect(V) )
    {
        Zeros( B, A.Height(), A.Width() );
        AxpyContract( T(1), A, B );
    }
    else
        LogicError("Incompatible distributions");
}
예제 #6
0
void Scatter
( T alpha,
  const ElementalMatrix<T>& A,
        ElementalMatrix<T>& B )
{
    DEBUG_ONLY(CSE cse("axpy_contract::Scatter"))
    AssertSameGrids( A, B );
    if( A.Height() != B.Height() || A.Width() != B.Width() )
        LogicError("Sizes of A and B must match");
    if( !B.Participating() )
        return;

    const Int colStride = B.ColStride();
    const Int rowStride = B.RowStride();
    const Int colAlign = B.ColAlign();
    const Int rowAlign = B.RowAlign();

    const Int height = B.Height();
    const Int width = B.Width();
    const Int localHeight = B.LocalHeight();
    const Int localWidth = B.LocalWidth();
    const Int maxLocalHeight = MaxLength(height,colStride);
    const Int maxLocalWidth = MaxLength(width,rowStride);

    const Int recvSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
    const Int sendSize = colStride*rowStride*recvSize;

    //vector<T> buffer( sendSize );
    vector<T> buffer;
    buffer.reserve( sendSize );

    // Pack 
    copy::util::StridedPack
    ( height, width,
      colAlign, colStride,
      rowAlign, rowStride,
      A.LockedBuffer(), A.LDim(),
      buffer.data(),    recvSize );

    // Communicate
    mpi::ReduceScatter( buffer.data(), recvSize, B.DistComm() );

    // Unpack our received data
    axpy::util::InterleaveMatrixUpdate
    ( alpha, localHeight, localWidth,
      buffer.data(), 1, localHeight,
      B.Buffer(),    1, B.LDim() );
}
예제 #7
0
파일: LUN.hpp 프로젝트: timwee/Elemental
void LUNMedium
( const ElementalMatrix<F>& UPre, ElementalMatrix<F>& XPre, 
  bool checkIfSingular )
{
    DEBUG_CSE
    const Int m = XPre.Height();
    const Int bsize = Blocksize();
    const Grid& g = UPre.Grid();

    DistMatrixReadProxy<F,F,MC,MR> UProx( UPre );
    DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre );
    auto& U = UProx.GetLocked();
    auto& X = XProx.Get();

    DistMatrix<F,MC,  STAR> U01_MC_STAR(g);
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,MR,  STAR> X1Trans_MR_STAR(g);

    const Int kLast = LastOffset( m, bsize );
    Int k=kLast, kOld=m;
    while( true )
    {
        const bool in2x2 = ( k>0 && U.Get(k,k-1) != F(0) );
        if( in2x2 )
            --k;
        const Int nb = kOld-k;

        const Range<Int> ind0( 0, k    ),
                         ind1( k, k+nb );

        auto U01 = U( ind0, ind1 );
        auto U11 = U( ind1, ind1 );

        auto X0 = X( ind0, ALL );
        auto X1 = X( ind1, ALL );

        U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR]
        X1Trans_MR_STAR.AlignWith( X0 );
        Transpose( X1, X1Trans_MR_STAR );
        
        // X1^T[MR,* ] := X1^T[MR,* ] U11^-T[* ,* ]
        //              = (U11^-1[* ,* ] X1[* ,MR])^T
        LocalQuasiTrsm
        ( RIGHT, UPPER, TRANSPOSE,
          F(1), U11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular );
        Transpose( X1Trans_MR_STAR, X1 );

        U01_MC_STAR.AlignWith( X0 );
        U01_MC_STAR = U01;  // U01[MC,* ] <- U01[MC,MR]

        // X0[MC,MR] -= U01[MC,* ] X1[* ,MR]
        LocalGemm
        ( NORMAL, TRANSPOSE, F(-1), U01_MC_STAR, X1Trans_MR_STAR, F(1), X0 );

        if( k == 0 )
            break;
        kOld = k;
        k -= Min(bsize,k);
    }
}
예제 #8
0
void TransposeContract
( const ElementalMatrix<T>& A,
        ElementalMatrix<T>& B, bool conjugate )
{
    EL_DEBUG_CSE
    const Dist U = B.ColDist();
    const Dist V = B.RowDist();
    if( A.ColDist() == V && A.RowDist() == Partial(U) )
    {
        Transpose( A, B, conjugate );
    }
    else
    {
        unique_ptr<ElementalMatrix<T>> 
            ASumFilt( B.ConstructTranspose(B.Grid(),B.Root()) );
        if( B.ColConstrained() )
            ASumFilt->AlignRowsWith( B, true );
        if( B.RowConstrained() )
            ASumFilt->AlignColsWith( B, true );
        Contract( A, *ASumFilt );
        if( !B.ColConstrained() )
            B.AlignColsWith( *ASumFilt, false );
        if( !B.RowConstrained() )
            B.AlignRowsWith( *ASumFilt, false );
        // We should have ensured that the alignments match
        B.Resize( A.Width(), A.Height() );
        Transpose( ASumFilt->LockedMatrix(), B.Matrix(), conjugate );
    }
}
예제 #9
0
void AugmentedKKT
( const ElementalMatrix<Real>& A,
  const ElementalMatrix<Real>& x,
  const ElementalMatrix<Real>& z,
        ElementalMatrix<Real>& JPre,
  bool onlyLower )
{
    EL_DEBUG_CSE
    const Int m = A.Height();
    const Int n = A.Width();

    DistMatrixWriteProxy<Real,Real,MC,MR> JProx( JPre );
    auto& J = JProx.Get();

    Zeros( J, m+n, m+n );
    const IR xInd(0,n), yInd(n,n+m);
    auto Jxx = J(xInd,xInd); auto Jxy = J(xInd,yInd);
    auto Jyx = J(yInd,xInd); auto Jyy = J(yInd,yInd);
    DistMatrix<Real,MC,STAR> d( z );
    DiagonalSolve( LEFT, NORMAL, x, d );
    Diagonal( Jxx, d );
    Jyx = A;
    if( !onlyLower )
        Transpose( A, Jxy );
}
예제 #10
0
void MaxEig
( const ElementalMatrix<Real>& xPre, 
        ElementalMatrix<Real>& maxEigsPre,
  const ElementalMatrix<Int>& orders, 
  const ElementalMatrix<Int>& firstIndsPre,
  Int cutoff )
{
    DEBUG_ONLY(CSE cse("soc::MaxEig"))
    AssertSameGrids( xPre, maxEigsPre, orders, firstIndsPre );

    ElementalProxyCtrl ctrl;
    ctrl.colConstrain = true;
    ctrl.colAlign = 0;

    DistMatrixReadProxy<Real,Real,VC,STAR>
      xProx( xPre, ctrl );
    DistMatrixWriteProxy<Real,Real,VC,STAR>
      maxEigsProx( maxEigsPre, ctrl );
    DistMatrixReadProxy<Int,Int,VC,STAR>
      firstIndsProx( firstIndsPre, ctrl );
    auto& x = xProx.GetLocked();
    auto& maxEigs = maxEigsProx.Get();
    auto& firstInds = firstIndsProx.GetLocked();

    const Int height = x.Height();
    const Int localHeight = x.LocalHeight();
    DEBUG_ONLY(
      if( x.Width() != 1 || orders.Width() != 1 || firstInds.Width() != 1 )
          LogicError("x, orders, and firstInds should be column vectors");
      if( orders.Height() != height || firstInds.Height() != height )
          LogicError("orders and firstInds should be of the same height as x");
    )
예제 #11
0
void MakeExtendedKahan
( ElementalMatrix<F>& A, Base<F> phi, Base<F> mu )
{
    EL_DEBUG_CSE
    typedef Base<F> Real;

    if( A.Height() != A.Width() )
        LogicError("Extended Kahan matrices must be square");
    const Int n = A.Height();
    if( n % 3 != 0 )
        LogicError("Dimension must be an integer multiple of 3");
    const Int l = n / 3;
    if( !l || (l & (l-1)) )
        LogicError("n/3 is not a power of two");
    Int k=0;
    while( Int(1u<<k) < l )
        ++k;

    if( phi <= Real(0) || phi >= Real(1) )
        LogicError("phi must be in (0,1)");
    if( mu <= Real(0) || mu >= Real(1) )
        LogicError("mu must be in (0,1)");

    // Start by setting A to the identity, and then modify the necessary 
    // l x l blocks of its 3 x 3 partitioning.
    MakeIdentity( A );
    unique_ptr<ElementalMatrix<F>> ABlock( A.Construct(A.Grid(),A.Root()) );
    View( *ABlock, A, IR(2*l,3*l), IR(2*l,3*l) );
    *ABlock *= mu;
    View( *ABlock, A, IR(0,l), IR(l,2*l) );
    Walsh( *ABlock, k );
    *ABlock *= -phi;
    View( *ABlock, A, IR(l,2*l), IR(2*l,3*l) );
    Walsh( *ABlock, k );
    *ABlock *= phi;

    // Now scale A by S
    const Real zeta = Sqrt(Real(1)-phi*phi);
    auto& ALoc = A.Matrix();
    for( Int iLoc=0; iLoc<A.LocalHeight(); ++iLoc )
    {
        const Int i = A.GlobalRow(iLoc);
        const Real gamma = Pow(zeta,Real(i));
        for( Int jLoc=0; jLoc<A.LocalWidth(); ++jLoc )
            ALoc(iLoc,jLoc) *= gamma;
    }
}
예제 #12
0
void ExplicitTriang( ElementalMatrix<F>& A )
{
    DEBUG_ONLY(CSE cse("rq::ExplicitTriang"))
    DistMatrix<F,MD,STAR> t(A.Grid());
    DistMatrix<Base<F>,MD,STAR> d(A.Grid());
    Householder( A, t, d );
    MakeTrapezoidal( UPPER, A, A.Width()-A.Height() );
}
예제 #13
0
void IPM
( const ElementalMatrix<Real>& A,
  const ElementalMatrix<Real>& b, 
        Real lambda,
        ElementalMatrix<Real>& x,
  const qp::affine::Ctrl<Real>& ctrl )
{
    DEBUG_CSE
    const Int m = A.Height();
    const Int n = A.Width();
    const Grid& g = A.Grid();
    const Range<Int> uInd(0,n), vInd(n,2*n), rInd(2*n,2*n+m);
    DistMatrix<Real> Q(g), c(g), AHat(g), G(g), h(g);

    // Q := | 0 0 0 |
    //      | 0 0 0 |
    //      | 0 0 I |
    // ==============
    Zeros( Q, 2*n+m, 2*n+m );
    auto Qrr = Q( rInd, rInd );
    FillDiagonal( Qrr, Real(1) );

    // c := lambda*[1;1;0]
    // ===================
    Zeros( c, 2*n+m, 1 );
    auto cuv = c( IR(0,2*n), ALL );
    Fill( cuv, lambda );

    // \hat A := [A, -A, I]
    // ====================
    Zeros( AHat, m, 2*n+m );
    auto AHatu = AHat( IR(0,m), uInd );
    auto AHatv = AHat( IR(0,m), vInd );
    auto AHatr = AHat( IR(0,m), rInd );
    AHatu = A;
    AHatv -= A;
    FillDiagonal( AHatr, Real(1) );

    // G := | -I  0 0 |
    //      |  0 -I 0 |
    // ================
    Zeros( G, 2*n, 2*n+m );
    FillDiagonal( G, Real(-1) );

    // h := 0
    // ======
    Zeros( h, 2*n, 1 );

    // Solve the affine QP
    // ===================
    DistMatrix<Real> xHat(g), y(g), z(g), s(g);
    QP( Q, AHat, G, b, c, h, xHat, y, z, s, ctrl );

    // x := u - v
    // ==========
    x = xHat( uInd, ALL );
    x -= xHat( vInd, ALL );
}
예제 #14
0
파일: SDC.hpp 프로젝트: timwee/Elemental
void SDC
( UpperOrLower uplo,
  ElementalMatrix<F>& APre, 
  ElementalMatrix<Base<F>>& wPre, 
  const HermitianSDCCtrl<Base<F>> ctrl )
{
    DEBUG_CSE

    typedef Base<F> Real;
    const Int n = APre.Height();
    wPre.Resize( n, 1 );
    if( APre.Grid().Size() == 1 )
    {
        HermitianEig( uplo, APre.Matrix(), wPre.Matrix() );
        return;
    }
    if( n <= ctrl.cutoff )
    {
        HermitianEig( uplo, APre, wPre );
        return;
    }

    DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre );
    DistMatrixWriteProxy<Real,Real,VR,STAR> wProx( wPre );
    auto& A = AProx.Get();
    auto& w = wProx.Get();

    // Perform this level's split
    const auto part = SpectralDivide( uplo, A, ctrl );
    auto ind1 = IR(0,part.index);
    auto ind2 = IR(part.index,n);

    auto ATL = A( ind1, ind1 );
    auto ATR = A( ind1, ind2 );
    auto ABL = A( ind2, ind1 );
    auto ABR = A( ind2, ind2 );

    auto wT = w( ind1, ALL );
    auto wB = w( ind2, ALL );

    if( uplo == LOWER )
        Zero( ABL );
    else
        Zero( ATR );

    // Recurse on the two subproblems
    DistMatrix<F> ATLSub, ABRSub;
    DistMatrix<Real,VR,STAR> wTSub, wBSub;
    PushSubproblems
    ( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub, ctrl.progress );
    if( ATL.Participating() )
        SDC( uplo, ATLSub, wTSub, ctrl );
    if( ABR.Participating() )
        SDC( uplo, ABRSub, wBSub, ctrl );
    PullSubproblems( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub );
}
예제 #15
0
파일: LLN.hpp 프로젝트: timwee/Elemental
void LLNMedium
( const ElementalMatrix<F>& LPre,
        ElementalMatrix<F>& XPre, 
  bool checkIfSingular )
{
    DEBUG_CSE
    const Int m = XPre.Height();
    const Int bsize = Blocksize();
    const Grid& g = LPre.Grid();

    DistMatrixReadProxy<F,F,MC,MR> LProx( LPre );
    DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre );
    auto& L = LProx.GetLocked();
    auto& X = XProx.Get();

    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,MC,  STAR> L21_MC_STAR(g);
    DistMatrix<F,MR,  STAR> X1Trans_MR_STAR(g);

    for( Int k=0; k<m; k+=bsize )
    {
        const Int nbProp = Min(bsize,m-k);
        const bool in2x2 = ( k+nbProp<m && L.Get(k+nbProp-1,k+nbProp) != F(0) );
        const Int nb = ( in2x2 ? nbProp+1 : nbProp );

        const Range<Int> ind1( k,    k+nb ),
                         ind2( k+nb, m    );

        auto L11 = L( ind1, ind1 );
        auto L21 = L( ind2, ind1 );

        auto X1 = X( ind1, ALL );
        auto X2 = X( ind2, ALL );

        L11_STAR_STAR = L11; // L11[* ,* ] <- L11[MC,MR]
        X1Trans_MR_STAR.AlignWith( X2 );
        Transpose( X1, X1Trans_MR_STAR );

        // X1^T[MR,* ] := X1^T[MR,* ] L11^-T[* ,* ]
        //              = (L11^-1[* ,* ] X1[* ,MR])^T
        LocalQuasiTrsm
        ( RIGHT, LOWER, TRANSPOSE,
          F(1), L11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular );

        Transpose( X1Trans_MR_STAR, X1 );
        L21_MC_STAR.AlignWith( X2 );
        L21_MC_STAR = L21;                   // L21[MC,* ] <- L21[MC,MR]
        
        // X2[MC,MR] -= L21[MC,* ] X1[* ,MR]
        LocalGemm
        ( NORMAL, TRANSPOSE, F(-1), L21_MC_STAR, X1Trans_MR_STAR, F(1), X2 );
    }
}
예제 #16
0
파일: LLN.hpp 프로젝트: timwee/Elemental
void LLNLarge
( const ElementalMatrix<F>& LPre,
        ElementalMatrix<F>& XPre, 
  bool checkIfSingular )
{
    DEBUG_CSE
    const Int m = XPre.Height();
    const Int bsize = Blocksize();
    const Grid& g = LPre.Grid();

    DistMatrixReadProxy<F,F,MC,MR> LProx( LPre );
    DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre );
    auto& L = LProx.GetLocked();
    auto& X = XProx.Get();

    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,MC,  STAR> L21_MC_STAR(g);
    DistMatrix<F,STAR,MR  > X1_STAR_MR(g);
    DistMatrix<F,STAR,VR  > X1_STAR_VR(g);

    for( Int k=0; k<m; k+=bsize )
    {
        const Int nbProp = Min(bsize,m-k);
        const bool in2x2 = ( k+nbProp<m && L.Get(k+nbProp-1,k+nbProp) != F(0) );
        const Int nb = ( in2x2 ? nbProp+1 : nbProp );

        const Range<Int> ind1( k,    k+nb ),
                         ind2( k+nb, m    );

        auto L11 = L( ind1, ind1 );
        auto L21 = L( ind2, ind1 );

        auto X1 = X( ind1, ALL );
        auto X2 = X( ind2, ALL );

        // X1[* ,VR] := L11^-1[* ,* ] X1[* ,VR]
        L11_STAR_STAR = L11; 
        X1_STAR_VR    = X1; 
        LocalQuasiTrsm
        ( LEFT, LOWER, NORMAL, F(1), L11_STAR_STAR, X1_STAR_VR,
          checkIfSingular );

        X1_STAR_MR.AlignWith( X2 );
        X1_STAR_MR  = X1_STAR_VR; // X1[* ,MR]  <- X1[* ,VR]
        X1          = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR]
        L21_MC_STAR.AlignWith( X2 );
        L21_MC_STAR = L21;        // L21[MC,* ] <- L21[MC,MR]
        
        // X2[MC,MR] -= L21[MC,* ] X1[* ,MR]
        LocalGemm( NORMAL, NORMAL, F(-1), L21_MC_STAR, X1_STAR_MR, F(1), X2 );
    }
}
예제 #17
0
void ExplicitTriang( ElementalMatrix<F>& A )
{
    DEBUG_CSE
    const Grid& g = A.Grid();
    DistMatrix<F,MD,STAR> householderScalars(g);
    DistMatrix<Base<F>,MD,STAR> signature(g);
    LQ( A, householderScalars, signature );

    const Int m = A.Height();
    const Int n = A.Width();
    const Int minDim = Min(m,n);
    A.Resize( m, minDim );
    MakeTrapezoidal( LOWER, A );
}
예제 #18
0
void KKT
( const ElementalMatrix<Real>& A,
  const ElementalMatrix<Real>& x,
  const ElementalMatrix<Real>& z,
        ElementalMatrix<Real>& JPre, bool onlyLower )
{
    EL_DEBUG_CSE
    const Int m = A.Height();
    const Int n = A.Width();

    DistMatrixWriteProxy<Real,Real,MC,MR> JProx( JPre );
    auto& J = JProx.Get();

    Zeros( J, 2*n+m, 2*n+m );
    const IR xInd(0,n), yInd(n,n+m), zInd(n+m,2*n+m);
    auto Jxx = J(xInd,xInd); auto Jxy = J(xInd,yInd); auto Jxz = J(xInd,zInd);
    auto Jyx = J(yInd,xInd); auto Jyy = J(yInd,yInd); auto Jyz = J(yInd,zInd);
    auto Jzx = J(zInd,xInd); auto Jzy = J(zInd,yInd); auto Jzz = J(zInd,zInd);

    // Jyx := A
    // ========
    Jyx = A;

    // Jzx := -I
    // =========
    Identity( Jzx, n, n );
    Jzx *= -1;

    // Jzz := - z <> x
    // ===============
    DistMatrix<Real,MC,STAR> t(x);
    DiagonalSolve( LEFT, NORMAL, z, t );
    t *= -1;
    Diagonal( Jzz, t );

    if( !onlyLower )
    {
        // Jxy := A^T
        // ==========
        Transpose( A, Jxy );

        // Jxz := -I
        // =========
        Identity( Jxz, n, n );
        Jxz *= -1;
    }
}
예제 #19
0
void
Householder
( ElementalMatrix<F>& APre,
  ElementalMatrix<F>& phasePre, 
  ElementalMatrix<Base<F>>& signaturePre )
{
    DEBUG_CSE
    DEBUG_ONLY(AssertSameGrids( APre, phasePre, signaturePre ))
    const Int m = APre.Height();
    const Int n = APre.Width();
    const Int minDim = Min(m,n);
    const Int iOff = m-minDim;
    const Int jOff = n-minDim;

    DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre );
    DistMatrixWriteProxy<F,F,MD,STAR> phaseProx( phasePre );
    DistMatrixWriteProxy<Base<F>,Base<F>,MD,STAR> signatureProx( signaturePre );
    auto& A = AProx.Get();
    auto& phase = phaseProx.Get();
    auto& signature = signatureProx.Get();
    phase.Resize( minDim, 1 );
    signature.Resize( minDim, 1 );

    const Int bsize = Blocksize();
    const Int kLast = LastOffset( minDim, bsize );
    for( Int k=kLast; k>=0; k-=bsize )
    {
        const Int nb = Min(bsize,minDim-k);

        const Int ki = k + iOff;
        const Int kj = k + jOff;

        const Range<Int> ind0Vert( 0,  ki    ),
                         ind1(     k,  k+nb  ),
                         ind1Vert( ki, ki+nb ),
                         indL( 0, kj+nb );

        auto A0L = A( ind0Vert, indL );
        auto A1L = A( ind1Vert, indL );
        auto phase1 = phase( ind1, ALL );
        auto sig1 = signature( ind1, ALL );

        PanelHouseholder( A1L, phase1, sig1 );
        ApplyQ( RIGHT, ADJOINT, A1L, phase1, sig1, A0L );
    }
}
예제 #20
0
void TestCorrectness
( bool print,
  UpperOrLower uplo,
  const ElementalMatrix<F>& AOrig,
  const ElementalMatrix<F>& A,
  const ElementalMatrix<Base<F>>& w,
  const ElementalMatrix<F>& Z )
{
    typedef Base<F> Real;
    const Grid& g = A.Grid();
    const Int n = Z.Height();
    const Int k = Z.Width();
    const Real eps = limits::Epsilon<Real>();

    DistMatrix<F> X(g);
    Identity( X, k, k );
    Herk( uplo, ADJOINT, Real(-1), Z, Real(1), X );
    const Real infOrthogError = HermitianInfinityNorm( uplo, X );
    const Real relOrthogError = infOrthogError / (eps*n);
    OutputFromRoot(g.Comm(),"||Z^H Z - I||_oo / (eps n) = ",relOrthogError);

    // X := AZ
    X.AlignWith( Z );
    Zeros( X, n, k );
    Hemm( LEFT, uplo, F(1), AOrig, Z, F(0), X );
    // Find the residual ||X-ZW||_oo = ||AZ-ZW||_oo
    DistMatrix<F> ZW( Z );
    DiagonalScale( RIGHT, NORMAL, w, ZW );
    X -= ZW;
    const Real oneNormA = HermitianOneNorm( uplo, AOrig );
    if( oneNormA == Real(0) )
        LogicError("Tried to test relative accuracy on zero matrix...");
    const Real infError = InfinityNorm( X );
    const Real relError = infError / (n*eps*oneNormA);
    OutputFromRoot(g.Comm(),"||A Z - Z W||_oo / (eps n ||A||_1) = ",relError);

    // TODO: More refined failure conditions
    if( relOrthogError > Real(200) ) // yes, really
        LogicError("Relative orthogonality error was unacceptably large");
    if( relError > Real(10) )
        LogicError("Relative error was unacceptably large");
}
예제 #21
0
void
Householder
( ElementalMatrix<F>& APre,
  ElementalMatrix<F>& phasePre, 
  ElementalMatrix<Base<F>>& signaturePre )
{
    DEBUG_CSE
    DEBUG_ONLY(AssertSameGrids( APre, phasePre, signaturePre ))
    const Int m = APre.Height();
    const Int n = APre.Width();
    const Int minDim = Min(m,n);

    DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre );
    DistMatrixWriteProxy<F,F,MD,STAR> phaseProx( phasePre );
    DistMatrixWriteProxy<Base<F>,Base<F>,MD,STAR> signatureProx( signaturePre );
    auto& A = AProx.Get();
    auto& phase = phaseProx.Get();
    auto& signature = signatureProx.Get();

    phase.Resize( minDim, 1 );
    signature.Resize( minDim, 1 );

    const Int bsize = Blocksize();
    for( Int k=0; k<minDim; k+=bsize )
    {
        const Int nb = Min(bsize,minDim-k);

        const Range<Int> ind1( k,    k+nb ),
                         indB( k,    END  ),
                         ind2( k+nb, END  );

        auto AB1 = A( indB, ind1 );
        auto AB2 = A( indB, ind2 );
        auto phase1 = phase( ind1, ALL );
        auto sig1 = signature( ind1, ALL );

        PanelHouseholder( AB1, phase1, sig1 );
        ApplyQ( LEFT, ADJOINT, AB1, phase1, sig1, AB2 );
    }
}
예제 #22
0
void IndexDependentMap
( const ElementalMatrix<S>& A,
        ElementalMatrix<T>& B, 
  function<T(Int,Int,S)> func )
{
    DEBUG_CSE
    const Int mLoc = A.LocalHeight();
    const Int nLoc = A.LocalWidth();
    B.AlignWith( A.DistData() );
    B.Resize( A.Height(), A.Width() );
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.Matrix();
    for( Int jLoc=0; jLoc<nLoc; ++jLoc )
    {
        const Int j = A.GlobalCol(jLoc);
        for( Int iLoc=0; iLoc<mLoc; ++iLoc )
        {
            const Int i = A.GlobalRow(iLoc);
            BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc));
        }
    }
}
예제 #23
0
파일: Local.hpp 프로젝트: timwee/Elemental
void LocalTrr2kKernel
( UpperOrLower uplo,
  Orientation orientA, Orientation orientB,
  Orientation orientC, Orientation orientD,
  T alpha, const ElementalMatrix<T>& A, const ElementalMatrix<T>& B,
  T beta,  const ElementalMatrix<T>& C, const ElementalMatrix<T>& D,
                 ElementalMatrix<T>& E )
{
    DEBUG_CSE

    const bool transA = orientA != NORMAL;
    const bool transB = orientB != NORMAL;
    const bool transC = orientC != NORMAL;
    const bool transD = orientD != NORMAL;
    // TODO: Stringent distribution and alignment checks

    typedef ElementalMatrix<T> ADM;
    auto A0 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) );
    auto A1 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) );
    auto B0 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) );
    auto B1 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) );
    auto C0 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) );
    auto C1 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) );
    auto D0 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) );
    auto D1 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) );
    auto ETL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto ETR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto EBL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto EBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto FTL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto FBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );

    const Int half = E.Height() / 2;
    if( transA )
        LockedPartitionRight( A, *A0, *A1, half );
    else
        LockedPartitionDown( A, *A0, *A1, half );
    if( transB )
        LockedPartitionDown( B, *B0, *B1, half );
    else
        LockedPartitionRight( B, *B0, *B1, half );
    if( transC )
        LockedPartitionRight( C, *C0, *C1, half );
    else
        LockedPartitionDown( C, *C0, *C1, half );
    if( transD )
        LockedPartitionDown( D, *D0, *D1, half );
    else
        LockedPartitionRight( D, *D0, *D1, half );
    PartitionDownDiagonal( E, *ETL, *ETR, *EBL, *EBR, half );

    if( uplo == LOWER )
    {
        Gemm
        ( orientA, orientB, 
          alpha, A1->LockedMatrix(), B0->LockedMatrix(), 
          T(1), EBL->Matrix() );
        Gemm
        ( orientC, orientD, 
          beta, C1->LockedMatrix(), D0->LockedMatrix(), 
          T(1), EBL->Matrix() );
    }
    else
    {
        Gemm
        ( orientA, orientB, 
          alpha, A0->LockedMatrix(), B1->LockedMatrix(), 
          T(1), ETR->Matrix() );
        Gemm
        ( orientC, orientD, 
          beta, C0->LockedMatrix(), D1->LockedMatrix(), 
          T(1), ETR->Matrix() );
    }

    FTL->AlignWith( *ETL );
    FTL->Resize( ETL->Height(), ETL->Width() );
    Gemm
    ( orientA, orientB, 
      alpha, A0->LockedMatrix(), B0->LockedMatrix(),
      T(0), FTL->Matrix() );
    Gemm
    ( orientC, orientD,
      beta, C0->LockedMatrix(), D0->LockedMatrix(),
      T(1), FTL->Matrix() );
    AxpyTrapezoid( uplo, T(1), *FTL, *ETL );

    FBR->AlignWith( *EBR );
    FBR->Resize( EBR->Height(), EBR->Width() );
    Gemm
    ( orientA, orientB, 
      alpha, A1->LockedMatrix(), B1->LockedMatrix(),
      T(0), FBR->Matrix() );
    Gemm
    ( orientC, orientD,
      beta, C1->LockedMatrix(), D1->LockedMatrix(),
      T(1), FBR->Matrix() );
    AxpyTrapezoid( uplo, T(1), *FBR, *EBR );
}
예제 #24
0
파일: Local.hpp 프로젝트: timwee/Elemental
void LocalTrr2k
( UpperOrLower uplo, 
  Orientation orientA, Orientation orientB,
  Orientation orientC, Orientation orientD,
  T alpha, const ElementalMatrix<T>& A, const ElementalMatrix<T>& B,
  T beta,  const ElementalMatrix<T>& C, const ElementalMatrix<T>& D,
  T gamma,       ElementalMatrix<T>& E )
{
    using namespace trr2k;
    DEBUG_CSE

    const bool transA = orientA != NORMAL;
    const bool transB = orientB != NORMAL;
    const bool transC = orientC != NORMAL;
    const bool transD = orientD != NORMAL;
    // TODO: Stringent distribution and alignment checks

    ScaleTrapezoid( gamma, uplo, E );
    if( E.Height() < E.Grid().Width()*LocalTrr2kBlocksize<T>() )
    {
        LocalTrr2kKernel
        ( uplo, orientA, orientB, orientC, orientD, 
          alpha, A, B, beta, C, D, E );
    }
    else
    {
        typedef ElementalMatrix<T> ADM;
        // Ugh. This is likely too much overhead. It should be measured.
        auto A0 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) );
        auto A1 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) );
        auto B0 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) );
        auto B1 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) );
        auto C0 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) );
        auto C1 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) );
        auto D0 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) );
        auto D1 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) );
        auto ETL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
        auto ETR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
        auto EBL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
        auto EBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );

        const Int half = E.Height() / 2;
        if( transA )
            LockedPartitionRight( A, *A0, *A1, half );
        else
            LockedPartitionDown( A, *A0, *A1, half );
        if( transB )
            LockedPartitionDown( B, *B0, *B1, half );
        else
            LockedPartitionRight( B, *B0, *B1, half );
        if( transC )
            LockedPartitionRight( C, *C0, *C1, half );
        else
            LockedPartitionDown( C, *C0, *C1, half );
        if( transD )
            LockedPartitionDown( D, *D0, *D1, half );
        else
            LockedPartitionRight( D, *D0, *D1, half );
        PartitionDownDiagonal( E, *ETL, *ETR, *EBL, *EBR, half );

        if( uplo == LOWER )
        { 
            Gemm
            ( orientA, orientB, 
              alpha, A1->LockedMatrix(), B0->LockedMatrix(), 
              T(1), EBL->Matrix() );
            Gemm
            ( orientC, orientD, 
              beta,  C1->LockedMatrix(), D0->LockedMatrix(), 
              T(1), EBL->Matrix() );
        }
        else
        {
            Gemm
            ( orientA, orientB,
              alpha, A0->LockedMatrix(), B1->LockedMatrix(), 
              T(1), ETR->Matrix() );
            Gemm
            ( orientC, orientD,
              beta,  C0->LockedMatrix(), D1->LockedMatrix(), 
              T(1), ETR->Matrix() );
        }

        // Recurse
        LocalTrr2k
        ( uplo, orientA, orientB, orientC, orientD, 
          alpha, *A0, *B0, beta, *C0, *D0, T(1), *ETL );
        LocalTrr2k
        ( uplo, orientA, orientB, orientC, orientD, 
          alpha, *A1, *B1, beta, *C1, *D1, T(1), *EBR );
    }
}
예제 #25
0
void Gather
( const ElementalMatrix<T>& A,
        DistMatrix<T,CIRC,CIRC>& B )
{
    DEBUG_ONLY(CSE cse("copy::Gather"))
    AssertSameGrids( A, B );
    if( A.DistSize() == 1 && A.CrossSize() == 1 )
    {
        B.Resize( A.Height(), A.Width() );
        if( B.CrossRank() == B.Root() )
            Copy( A.LockedMatrix(), B.Matrix() );
        return;
    }

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    // Gather the colShifts and rowShifts
    // ==================================
    Int myShifts[2];
    myShifts[0] = A.ColShift();
    myShifts[1] = A.RowShift();
    vector<Int> shifts;
    const Int crossSize = B.CrossSize();
    if( B.CrossRank() == B.Root() )
        shifts.resize( 2*crossSize );
    mpi::Gather( myShifts, 2, shifts.data(), 2, B.Root(), B.CrossComm() );

    // Gather the payload data
    // =======================
    const bool irrelevant = ( A.RedundantRank()!=0 || A.CrossRank()!=A.Root() );
    int totalSend = ( irrelevant ? 0 : A.LocalHeight()*A.LocalWidth() );
    vector<int> recvCounts, recvOffsets;
    if( B.CrossRank() == B.Root() )
        recvCounts.resize( crossSize );
    mpi::Gather( &totalSend, 1, recvCounts.data(), 1, B.Root(), B.CrossComm() );
    int totalRecv = Scan( recvCounts, recvOffsets );
    //vector<T> sendBuf(totalSend), recvBuf(totalRecv);
    vector<T> sendBuf, recvBuf;
    sendBuf.reserve( totalSend );
    recvBuf.reserve( totalRecv );
    if( !irrelevant )
        copy::util::InterleaveMatrix
        ( A.LocalHeight(), A.LocalWidth(),
          A.LockedBuffer(), 1, A.LDim(),
          sendBuf.data(),   1, A.LocalHeight() );
    mpi::Gather
    ( sendBuf.data(), totalSend,
      recvBuf.data(), recvCounts.data(), recvOffsets.data(), 
      B.Root(), B.CrossComm() );

    // Unpack
    // ======
    if( B.Root() == B.CrossRank() )
    {
        for( Int q=0; q<crossSize; ++q )
        {
            if( recvCounts[q] == 0 )
                continue;
            const Int colShift = shifts[2*q+0];
            const Int rowShift = shifts[2*q+1];
            const Int colStride = A.ColStride();
            const Int rowStride = A.RowStride();
            const Int localHeight = Length( height, colShift, colStride );
            const Int localWidth = Length( width, rowShift, rowStride );
            copy::util::InterleaveMatrix
            ( localHeight, localWidth,
              &recvBuf[recvOffsets[q]],    1,         localHeight,
              B.Buffer(colShift,rowShift), colStride, rowStride*B.LDim() );
        }
    }
}
예제 #26
0
파일: SDC.hpp 프로젝트: timwee/Elemental
void SDC
( UpperOrLower uplo, 
  ElementalMatrix<F>& APre,
  ElementalMatrix<Base<F>>& wPre, 
  ElementalMatrix<F>& QPre, 
  const HermitianSDCCtrl<Base<F>> ctrl )
{
    DEBUG_CSE

    typedef Base<F> Real;
    const Grid& g = APre.Grid();
    const Int n = APre.Height();
    wPre.Resize( n, 1 );
    QPre.Resize( n, n );
    if( APre.Grid().Size() == 1 )
    {
        HermitianEig( uplo, APre.Matrix(), wPre.Matrix(), QPre.Matrix() );
        return;
    }
    if( n <= ctrl.cutoff )
    {
        HermitianEig( uplo, APre, wPre, QPre );
        return;
    }

    DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre );
    DistMatrixWriteProxy<F,F,MC,MR> QProx( QPre );
    DistMatrixWriteProxy<Real,Real,VR,STAR> wProx( wPre );
    auto& A = AProx.Get();
    auto& Q = QProx.Get();
    auto& w = wProx.Get();

    // Perform this level's split
    const auto part = SpectralDivide( uplo, A, Q, ctrl );
    auto ind1 = IR(0,part.index);
    auto ind2 = IR(part.index,n);

    auto ATL = A( ind1, ind1 );
    auto ATR = A( ind1, ind2 );
    auto ABL = A( ind2, ind1 );
    auto ABR = A( ind2, ind2 );

    auto wT = w( ind1, ALL );
    auto wB = w( ind2, ALL );

    auto QL = Q( ALL, ind1 );
    auto QR = Q( ALL, ind2 );

    if( uplo == LOWER )
        Zero( ABL );
    else
        Zero( ATR );

    // Recurse on the two subproblems
    DistMatrix<F> ATLSub, ABRSub, ZTSub, ZBSub;
    DistMatrix<Real,VR,STAR> wTSub, wBSub;
    PushSubproblems
    ( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub, ZTSub, ZBSub, 
      ctrl.progress );
    if( ATLSub.Participating() )
        SDC( uplo, ATLSub, wTSub, ZTSub, ctrl );
    if( ABRSub.Participating() )
        SDC( uplo, ABRSub, wBSub, ZBSub, ctrl );

    // Pull the results back to this grid
    DistMatrix<F> ZT(g), ZB(g);
    PullSubproblems
    ( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub, ZT, ZB, ZTSub, ZBSub );

    // Update the eigen vectors
    auto G( QL );
    Gemm( NORMAL, NORMAL, F(1), G, ZT, QL );
    G = QR;
    Gemm( NORMAL, NORMAL, F(1), G, ZB, QR );
}
예제 #27
0
void LUMod
(       ElementalMatrix<F>& APre,
        DistPermutation& P,
  const ElementalMatrix<F>& u,
  const ElementalMatrix<F>& v, 
  bool conjugate,
  Base<F> tau )
{
    DEBUG_CSE
    const Grid& g = APre.Grid();
    typedef Base<F> Real;

    DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre );
    auto& A = AProx.Get();

    const Int m = A.Height();
    const Int n = A.Width();
    const Int minDim = Min(m,n);

    if( minDim != m )
        LogicError("It is assumed that height(A) <= width(A)");
    if( u.Height() != m || u.Width() != 1 )
        LogicError("u is expected to be a conforming column vector");
    if( v.Height() != n || v.Width() != 1 )
        LogicError("v is expected to be a conforming column vector");
    AssertSameGrids( A, u, v );

    // w := inv(L) P u
    // TODO: Consider locally maintaining all of w to avoid unnecessarily 
    //       broadcasting at every iteration.
    DistMatrix<F> w( u );
    P.PermuteRows( w );
    Trsv( LOWER, NORMAL, UNIT, A, w );

    // Maintain an external vector for the temporary subdiagonal of U
    DistMatrix<F,MD,STAR> uSub(g);
    uSub.SetRoot( A.DiagonalRoot(-1) );
    uSub.AlignCols( A.DiagonalAlign(-1) );
    Zeros( uSub, minDim-1, 1 );

    // Reduce w to a multiple of e0
    for( Int i=minDim-2; i>=0; --i )
    {
        // Decide if we should pivot the i'th and i+1'th rows of w
        const F lambdaSub = A.Get(i+1,i);
        const F ups_ii = A.Get(i,i); 
        const F omega_i = w.Get( i, 0 );
        const F omega_ip1 = w.Get( i+1, 0 );
        const Real rightTerm = Abs(lambdaSub*omega_i+omega_ip1);
        const bool pivot = ( Abs(omega_i) < tau*rightTerm );

        const Range<Int> indB( i+2, m ),
                         indR( i+1, n ),
                         indi( i, i+1 ),
                         indip1( i+1, i+2 );

        auto lBi   = A( indB,   indi   );
        auto lBip1 = A( indB,   indip1 );
        auto uiR   = A( indi,   indR   );
        auto uip1R = A( indip1, indR   );

        if( pivot )
        {
            // P := P_i P
            P.Swap( i, i+1 );

            // Simultaneously perform 
            //   U := P_i U and
            //   L := P_i L P_i^T
            //
            // Then update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U, 
            //     w := T_{i,L} P_i w,
            // where T_{i,L} is the Gauss transform which zeros (P_i w)_{i+1}.
            // 
            // More succinctly,
            //     gamma    := w(i) / w(i+1),
            //     w(i)     := w(i+1), 
            //     w(i+1)   := 0,
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:).
            const F gamma = omega_i / omega_ip1;
            const F lambda_ii = F(1) + gamma*lambdaSub;
            A.Set( i,   i, gamma );
            A.Set( i+1, i, 0     );

            auto lBiCopy = lBi;
            Swap( NORMAL, lBi, lBip1 );
            Axpy( gamma, lBiCopy, lBi );

            auto uip1RCopy = uip1R;
            RowSwap( A, i, i+1 );
            Axpy( -gamma, uip1RCopy, uip1R );

            // Force L back to *unit* lower-triangular form via the transform
            //     L := L T_{i,U}^{-1} D^{-1}, 
            // where D is diagonal and responsible for forcing L(i,i) and 
            // L(i+1,i+1) back to 1. The effect on L is:
            //     eta       := L(i,i+1)/L(i,i),
            //     L(:,i+1)  -= eta L(:,i),
            //     delta_i   := L(i,i),
            //     delta_ip1 := L(i+1,i+1),
            //     L(:,i)   /= delta_i,
            //     L(:,i+1) /= delta_ip1,
            // while the effect on U is
            //     U(i,:)   += eta U(i+1,:)
            //     U(i,:)   *= delta_i,
            //     U(i+1,:) *= delta_{i+1},
            // and the effect on w is
            //     w(i) *= delta_i.
            const F eta = lambdaSub/lambda_ii;
            const F delta_i = lambda_ii;
            const F delta_ip1 = F(1) - eta*gamma;

            Axpy( -eta, lBi, lBip1 );
            A.Set( i+1, i, gamma/delta_i );
            lBi   *= F(1)/delta_i;
            lBip1 *= F(1)/delta_ip1;

            A.Set( i, i, eta*ups_ii*delta_i );
            Axpy( eta, uip1R, uiR );
            uiR   *= delta_i;
            uip1R *= delta_ip1;
            uSub.Set( i, 0, ups_ii*delta_ip1 );

            // Finally set w(i)
            w.Set( i, 0, omega_ip1*delta_i );
        }
        else
        {
            // Update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U, 
            //     w := T_{i,L} w,
            // where T_{i,L} is the Gauss transform which zeros w_{i+1}.
            // 
            // More succinctly,
            //     gamma    := w(i+1) / w(i),
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:),
            //     w(i+1)   := 0.
            const F gamma = omega_ip1 / omega_i;
            A.Update( i+1, i, gamma );
            Axpy(  gamma, lBip1, lBi );
            Axpy( -gamma, uiR, uip1R );
            uSub.Set( i, 0, -gamma*ups_ii );
        }
    }

    // Add the modified w v' into U
    {
        auto a0 = A( IR(0), ALL );
        const F omega_0 = w.Get( 0, 0 ); 
        DistMatrix<F> vTrans(g);
        vTrans.AlignWith( a0 );
        Transpose( v, vTrans, conjugate );
        Axpy( omega_0, vTrans, a0 );
    }

    // Transform U from upper-Hessenberg to upper-triangular form
    for( Int i=0; i<minDim-1; ++i ) 
    {
        // Decide if we should pivot the i'th and i+1'th rows U
        const F lambdaSub = A.Get( i+1, i );
        const F ups_ii = A.Get( i, i );
        const F ups_ip1i = uSub.Get( i, 0 );
        const Real rightTerm = Abs(lambdaSub*ups_ii+ups_ip1i);
        const bool pivot = ( Abs(ups_ii) < tau*rightTerm );

        const Range<Int> indB( i+2, m ),
                         indR( i+1, n ),
                         indi( i, i+1 ),
                         indip1( i+1, i+2 );

        auto lBi   = A( indB,   indi   );
        auto lBip1 = A( indB,   indip1 );
        auto uiR   = A( indi,   indR   );
        auto uip1R = A( indip1, indR   );

        if( pivot )
        {
            // P := P_i P
            P.Swap( i, i+1 );

            // Simultaneously perform 
            //   U := P_i U and
            //   L := P_i L P_i^T
            //
            // Then update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U, 
            // where T_{i,L} is the Gauss transform which zeros U(i+1,i).
            // 
            // More succinctly,
            //     gamma    := U(i+1,i) / U(i,i),
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:).
            const F gamma = ups_ii / ups_ip1i;
            const F lambda_ii = F(1) + gamma*lambdaSub;
            A.Set( i+1, i, ups_ip1i );
            A.Set( i, i, gamma );

            auto lBiCopy = lBi;
            Swap( NORMAL, lBi, lBip1 );
            Axpy( gamma, lBiCopy, lBi );

            auto uip1RCopy = uip1R;
            RowSwap( A, i, i+1 );
            Axpy( -gamma, uip1RCopy, uip1R );

            // Force L back to *unit* lower-triangular form via the transform
            //     L := L T_{i,U}^{-1} D^{-1}, 
            // where D is diagonal and responsible for forcing L(i,i) and 
            // L(i+1,i+1) back to 1. The effect on L is:
            //     eta       := L(i,i+1)/L(i,i),
            //     L(:,i+1)  -= eta L(:,i),
            //     delta_i   := L(i,i),
            //     delta_ip1 := L(i+1,i+1),
            //     L(:,i)   /= delta_i,
            //     L(:,i+1) /= delta_ip1,
            // while the effect on U is
            //     U(i,:)   += eta U(i+1,:)
            //     U(i,:)   *= delta_i,
            //     U(i+1,:) *= delta_{i+1}.
            const F eta = lambdaSub/lambda_ii;
            const F delta_i = lambda_ii;
            const F delta_ip1 = F(1) - eta*gamma;
            Axpy( -eta, lBi, lBip1 );
            A.Set( i+1, i, gamma/delta_i );
            lBi   *= F(1)/delta_i;
            lBip1 *= F(1)/delta_ip1;

            A.Set( i, i, ups_ip1i*delta_i );
            Axpy( eta, uip1R, uiR );
            uiR   *= delta_i;
            uip1R *= delta_ip1;
        }
        else
        {
            // Update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U, 
            // where T_{i,L} is the Gauss transform which zeros U(i+1,i).
            // 
            // More succinctly,
            //     gamma    := U(i+1,i)/ U(i,i),
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:).
            const F gamma = ups_ip1i / ups_ii;
            A.Update( i+1, i, gamma );
            Axpy(  gamma, lBip1, lBi );
            Axpy( -gamma, uiR, uip1R );
        }
    }
}
예제 #28
0
void ColScatter
( T alpha,
  const ElementalMatrix<T>& A,
        ElementalMatrix<T>& B )
{
    DEBUG_ONLY(CSE cse("axpy_contract::ColScatter"))
    AssertSameGrids( A, B );
    if( A.Height() != B.Height() || A.Width() != B.Width() )
        LogicError("A and B must be the same size");
#ifdef EL_VECTOR_WARNINGS
    if( A.Width() == 1 && B.Grid().Rank() == 0 )
    {
        cerr <<
          "The vector version of ColScatter does not"
          " yet have a vector version implemented, but it would only "
          "require a modification of the vector version of RowScatter"
          << endl;
    }
#endif
#ifdef EL_CACHE_WARNINGS
    if( A.Width() != 1 && B.Grid().Rank() == 0 )
    {
        cerr <<
          "axpy_contract::ColScatter potentially causes a large "
          "amount of cache-thrashing. If possible, avoid it by forming the "
          "(conjugate-)transpose of the [* ,V] matrix instead." << endl;
    }
#endif
    if( !B.Participating() )
        return;
    const Int height = B.Height();
    const Int localHeight = B.LocalHeight();
    const Int localWidth = B.LocalWidth();

    const Int colAlign = B.ColAlign();
    const Int colStride = B.ColStride();

    const Int rowDiff = B.RowAlign()-A.RowAlign();
    // TODO: Allow for modular equivalence if possible
    if( rowDiff == 0 )
    {
        const Int maxLocalHeight = MaxLength(height,colStride);

        const Int recvSize = mpi::Pad( maxLocalHeight*localWidth );
        const Int sendSize = colStride*recvSize;
        //vector<T> buffer( sendSize );
        vector<T> buffer;
        buffer.reserve( sendSize );

        // Pack 
        copy::util::ColStridedPack
        ( height, localWidth,
          colAlign, colStride,
          A.LockedBuffer(), A.LDim(),
          buffer.data(),    recvSize );
    
        // Communicate
        mpi::ReduceScatter( buffer.data(), recvSize, B.ColComm() );

        // Update with our received data
        axpy::util::InterleaveMatrixUpdate
        ( alpha, localHeight, localWidth,
          buffer.data(), 1, localHeight,
          B.Buffer(),    1, B.LDim() );
    }
    else
    {
#ifdef EL_UNALIGNED_WARNINGS
        if( B.Grid().Rank() == 0 )
            cerr << "Unaligned ColScatter" << endl;
#endif
        const Int localWidthA = A.LocalWidth();
        const Int maxLocalHeight = MaxLength(height,colStride);

        const Int recvSize_RS = mpi::Pad( maxLocalHeight*localWidthA );
        const Int sendSize_RS = colStride*recvSize_RS;
        const Int recvSize_SR = localHeight*localWidth;

        //vector<T> buffer( recvSize_RS + Max(sendSize_RS,recvSize_SR) );
        vector<T> buffer;
        buffer.reserve( recvSize_RS + Max(sendSize_RS,recvSize_SR) );
        T* firstBuf = &buffer[0];
        T* secondBuf = &buffer[recvSize_RS];

        // Pack
        copy::util::ColStridedPack
        ( height, localWidth,
          colAlign, colStride,
          A.LockedBuffer(), A.LDim(),
          secondBuf,        recvSize_RS );

        // Reduce-scatter over each col
        mpi::ReduceScatter( secondBuf, firstBuf, recvSize_RS, B.ColComm() );

        // Trade reduced data with the appropriate col
        const Int sendCol = Mod( B.RowRank()+rowDiff, B.RowStride() );
        const Int recvCol = Mod( B.RowRank()-rowDiff, B.RowStride() );
        mpi::SendRecv
        ( firstBuf,  localHeight*localWidthA, sendCol,
          secondBuf, localHeight*localWidth,  recvCol, B.RowComm() );

        // Update with our received data
        axpy::util::InterleaveMatrixUpdate
        ( alpha, localHeight, localWidth,
          secondBuf,  1, localHeight,
          B.Buffer(), 1, B.LDim() );
    }
}
예제 #29
0
void RowScatter
( T alpha,
  const ElementalMatrix<T>& A,
        ElementalMatrix<T>& B )
{
    DEBUG_ONLY(CSE cse("axpy_contract::RowScatter"))
    AssertSameGrids( A, B );
    if( A.Height() != B.Height() || A.Width() != B.Width() )
        LogicError("Matrix sizes did not match");
    if( !B.Participating() )
        return;

    const Int width = B.Width();
    const Int colDiff = B.ColAlign()-A.ColAlign();
    if( colDiff == 0 )
    {
        if( width == 1 )
        {
            const Int localHeight = B.LocalHeight();
            const Int portionSize = mpi::Pad( localHeight );
            //vector<T> buffer( portionSize );
            vector<T> buffer;
            buffer.reserve( portionSize );

            // Reduce to rowAlign
            const Int rowAlign = B.RowAlign();
            mpi::Reduce
            ( A.LockedBuffer(), buffer.data(), portionSize,
              rowAlign, B.RowComm() );

            if( B.RowRank() == rowAlign )
            {
                axpy::util::InterleaveMatrixUpdate
                ( alpha, localHeight, 1,
                  buffer.data(), 1, localHeight,
                  B.Buffer(),    1, B.LDim() );
            }
        }
        else
        {
            const Int rowStride = B.RowStride();
            const Int rowAlign = B.RowAlign();

            const Int localHeight = B.LocalHeight();
            const Int localWidth = B.LocalWidth();
            const Int maxLocalWidth = MaxLength(width,rowStride);

            const Int portionSize = mpi::Pad( localHeight*maxLocalWidth );
            const Int sendSize = rowStride*portionSize;

            // Pack 
            //vector<T> buffer( sendSize );
            vector<T> buffer;
            buffer.reserve( sendSize );
            copy::util::RowStridedPack
            ( localHeight, width,
              rowAlign, rowStride,
              A.LockedBuffer(), A.LDim(),
              buffer.data(), portionSize );

            // Communicate
            mpi::ReduceScatter( buffer.data(), portionSize, B.RowComm() );

            // Update with our received data
            axpy::util::InterleaveMatrixUpdate
            ( alpha, localHeight, localWidth,
              buffer.data(), 1, localHeight,
              B.Buffer(),    1, B.LDim() );
        }
    }
    else
    {
#ifdef EL_UNALIGNED_WARNINGS
        if( B.Grid().Rank() == 0 )
            cerr << "Unaligned RowScatter" << endl;
#endif
        const Int colRank = B.ColRank();
        const Int colStride = B.ColStride();

        const Int sendRow = Mod( colRank+colDiff, colStride );
        const Int recvRow = Mod( colRank-colDiff, colStride );

        const Int localHeight = B.LocalHeight();
        const Int localHeightA = A.LocalHeight();

        if( width == 1 )
        {
            //vector<T> buffer( localHeight+localHeightA );
            vector<T> buffer;
            buffer.reserve( localHeight+localHeightA );
            T* sendBuf = &buffer[0];
            T* recvBuf = &buffer[localHeightA];

            // Reduce to rowAlign
            const Int rowAlign = B.RowAlign();
            mpi::Reduce
            ( A.LockedBuffer(), sendBuf, localHeightA, rowAlign, B.RowComm() );

            if( B.RowRank() == rowAlign )
            {
                // Perform the realignment
                mpi::SendRecv
                ( sendBuf, localHeightA, sendRow,
                  recvBuf, localHeight,  recvRow, B.ColComm() );

                axpy::util::InterleaveMatrixUpdate
                ( alpha, localHeight, 1,
                  recvBuf,    1, localHeight,
                  B.Buffer(), 1, B.LDim() );
            }
        }
        else
        {
            const Int rowStride = B.RowStride();
            const Int rowAlign = B.RowAlign();

            const Int localWidth = B.LocalWidth();
            const Int maxLocalWidth = MaxLength(width,rowStride);

            const Int recvSize_RS = mpi::Pad( localHeightA*maxLocalWidth );
            const Int sendSize_RS = rowStride * recvSize_RS;
            const Int recvSize_SR = localHeight * localWidth;

            //vector<T> buffer( recvSize_RS + Max(sendSize_RS,recvSize_SR) );
            vector<T> buffer;
            buffer.reserve( recvSize_RS + Max(sendSize_RS,recvSize_SR) );
            T* firstBuf = &buffer[0];
            T* secondBuf = &buffer[recvSize_RS];

            // Pack 
            copy::util::RowStridedPack
            ( localHeightA, width,
              rowAlign, rowStride,
              A.LockedBuffer(), A.LDim(),
              secondBuf,        recvSize_RS );

            // Reduce-scatter over each process row
            mpi::ReduceScatter( secondBuf, firstBuf, recvSize_RS, B.RowComm() );

            // Trade reduced data with the appropriate process row
            mpi::SendRecv
            ( firstBuf,  localHeightA*localWidth, sendRow,
              secondBuf, localHeight*localWidth,  recvRow, B.ColComm() );

            // Update with our received data
            axpy::util::InterleaveMatrixUpdate
            ( alpha, localHeight, localWidth,
              secondBuf,  1, localHeight,
              B.Buffer(), 1, B.LDim() );
        }
    }
}
예제 #30
0
void SolveAfter
( Orientation orientation,
  const ElementalMatrix<F>& APre,
  const ElementalMatrix<F>& householderScalars, 
  const ElementalMatrix<Base<F>>& signature,
  const ElementalMatrix<F>& B, 
        ElementalMatrix<F>& XPre )
{
    DEBUG_CSE
    const Int m = APre.Height();
    const Int n = APre.Width();
    if( m > n )
        LogicError("Must have full row rank");

    DistMatrixReadProxy<F,F,MC,MR> AProx( APre );
    DistMatrixWriteProxy<F,F,MC,MR> XProx( XPre );
    auto& A = AProx.GetLocked();
    auto& X = XProx.Get();

    X.Resize( n, B.Width() );
    // TODO: Add scaling

    auto AL = A( IR(0,m), IR(0,m) );
    if( orientation == NORMAL )
    {
        if( m != B.Height() )
            LogicError("A and B do not conform");

        // Copy B into X
        auto XT = X( IR(0,m), ALL );
        auto XB = X( IR(m,n), ALL );
        XT = B;
        Zero( XB );

        if( orientation == TRANSPOSE )
            Conjugate( XT );

        // Solve against L (checking for singularities)
        Trsm( LEFT, LOWER, NORMAL, NON_UNIT, F(1), AL, XT, true );

        // Apply Q' to X 
        lq::ApplyQ( LEFT, ADJOINT, A, householderScalars, signature, X );

        if( orientation == TRANSPOSE )
            Conjugate( X );
    }
    else
    {
        // Copy B into X
        X = B;

        if( orientation == TRANSPOSE )
            Conjugate( X );

        // Apply Q to X
        lq::ApplyQ( LEFT, NORMAL, A, householderScalars, signature, X );

        // Shrink X to its new height
        X.Resize( m, X.Width() );

        // Solve against L' (check for singularities)
        Trsm( LEFT, LOWER, ADJOINT, NON_UNIT, F(1), AL, X, true );

        if( orientation == TRANSPOSE )
            Conjugate( X );
    }
}