示例#1
0
void TransposeContract
( const ElementalMatrix<T>& A,
        ElementalMatrix<T>& B, bool conjugate )
{
    EL_DEBUG_CSE
    const Dist U = B.ColDist();
    const Dist V = B.RowDist();
    if( A.ColDist() == V && A.RowDist() == Partial(U) )
    {
        Transpose( A, B, conjugate );
    }
    else
    {
        unique_ptr<ElementalMatrix<T>> 
            ASumFilt( B.ConstructTranspose(B.Grid(),B.Root()) );
        if( B.ColConstrained() )
            ASumFilt->AlignRowsWith( B, true );
        if( B.RowConstrained() )
            ASumFilt->AlignColsWith( B, true );
        Contract( A, *ASumFilt );
        if( !B.ColConstrained() )
            B.AlignColsWith( *ASumFilt, false );
        if( !B.RowConstrained() )
            B.AlignRowsWith( *ASumFilt, false );
        // We should have ensured that the alignments match
        B.Resize( A.Width(), A.Height() );
        Transpose( ASumFilt->LockedMatrix(), B.Matrix(), conjugate );
    }
}
void TransposeAxpyContract
( T alpha, const ElementalMatrix<T>& A,
  ElementalMatrix<T>& B, bool conjugate )
{
    EL_DEBUG_CSE
    const Dist U = B.ColDist();
    const Dist V = B.RowDist();
    if( A.ColDist() == V && A.RowDist() == U )
    {
        TransposeAxpy( alpha, A, B, conjugate );
    }
    else if( (A.ColDist() == V && A.RowDist() == Partial(U)) ||
             (A.ColDist() == V && A.RowDist() == Collect(U)) ||
             (A.RowDist() == U && A.ColDist() == Partial(V)) ||
             (A.RowDist() == U && A.ColDist() == Collect(V)) )
    {
        unique_ptr<ElementalMatrix<T>>
                                    ASumFilt( B.ConstructTranspose(B.Grid(),B.Root()) );
        if( B.ColConstrained() )
            ASumFilt->AlignRowsWith( B, true );
        if( B.RowConstrained() )
            ASumFilt->AlignColsWith( B, true );
        Contract( A, *ASumFilt );
        if( !B.ColConstrained() )
            B.AlignColsWith( *ASumFilt, false );
        if( !B.RowConstrained() )
            B.AlignRowsWith( *ASumFilt, false );

        // We should have ensured that the alignments are compatible
        TransposeAxpy( alpha, ASumFilt->LockedMatrix(), B.Matrix(), conjugate );
    }
    else
        LogicError("Incompatible distributions");
}
示例#3
0
void RiffleDecay( ElementalMatrix<F>& A, Int n )
{
    DEBUG_CSE
    Riffle( A, n );
    unique_ptr<ElementalMatrix<F>> PInf( A.Construct(A.Grid(),A.Root()) );
    PInf->AlignWith( A.DistData() );
    RiffleStationary( *PInf, n );
    A -= *PInf;
}
示例#4
0
void ReshapeIntoGrid
( Int realSize, Int imagSize, 
  const ElementalMatrix<T>& x, ElementalMatrix<T>& X )
{
    X.SetGrid( x.Grid() );
    X.Resize( imagSize, realSize );

    auto xSub = unique_ptr<ElementalMatrix<T>>
    ( x.Construct(x.Grid(),x.Root()) );
    auto XSub = unique_ptr<ElementalMatrix<T>>
    ( X.Construct(X.Grid(),X.Root()) );

    for( Int j=0; j<realSize; ++j )
    {
              View( *XSub, X, IR(0,imagSize),                IR(j) );
        LockedView( *xSub, x, IR(j*imagSize,(j+1)*imagSize), ALL   );
        Copy( *xSub, *XSub );
    }
}
示例#5
0
void GEPPGrowth( ElementalMatrix<T>& A, Int n )
{
    DEBUG_ONLY(CSE cse("GEPPGrowth"))
    Identity( A, n, n );
    if( n <= 1 )
        return;

    // Set the last column to all ones
    unique_ptr<ElementalMatrix<T>> aLast( A.Construct(A.Grid(),A.Root()) );
    View( *aLast, A, IR(0,n), IR(n-1,n) );
    Fill( *aLast, T(1) );

    // Set the subdiagonals to -1
    for( Int j=1; j<n; ++j )
        FillDiagonal( A, T(-1), -j );
}
示例#6
0
void MakeExtendedKahan
( ElementalMatrix<F>& A, Base<F> phi, Base<F> mu )
{
    EL_DEBUG_CSE
    typedef Base<F> Real;

    if( A.Height() != A.Width() )
        LogicError("Extended Kahan matrices must be square");
    const Int n = A.Height();
    if( n % 3 != 0 )
        LogicError("Dimension must be an integer multiple of 3");
    const Int l = n / 3;
    if( !l || (l & (l-1)) )
        LogicError("n/3 is not a power of two");
    Int k=0;
    while( Int(1u<<k) < l )
        ++k;

    if( phi <= Real(0) || phi >= Real(1) )
        LogicError("phi must be in (0,1)");
    if( mu <= Real(0) || mu >= Real(1) )
        LogicError("mu must be in (0,1)");

    // Start by setting A to the identity, and then modify the necessary 
    // l x l blocks of its 3 x 3 partitioning.
    MakeIdentity( A );
    unique_ptr<ElementalMatrix<F>> ABlock( A.Construct(A.Grid(),A.Root()) );
    View( *ABlock, A, IR(2*l,3*l), IR(2*l,3*l) );
    *ABlock *= mu;
    View( *ABlock, A, IR(0,l), IR(l,2*l) );
    Walsh( *ABlock, k );
    *ABlock *= -phi;
    View( *ABlock, A, IR(l,2*l), IR(2*l,3*l) );
    Walsh( *ABlock, k );
    *ABlock *= phi;

    // Now scale A by S
    const Real zeta = Sqrt(Real(1)-phi*phi);
    auto& ALoc = A.Matrix();
    for( Int iLoc=0; iLoc<A.LocalHeight(); ++iLoc )
    {
        const Int i = A.GlobalRow(iLoc);
        const Real gamma = Pow(zeta,Real(i));
        for( Int jLoc=0; jLoc<A.LocalWidth(); ++jLoc )
            ALoc(iLoc,jLoc) *= gamma;
    }
}
示例#7
0
void DruinskyToledo( ElementalMatrix<F>& A, Int k )
{
    EL_DEBUG_CSE
    const Int n = 2*k;
    Zeros( A, n, n );
    if( k == 0 )
      return;
    if( k == 1 )
    {
        Ones( A, n, n );
        return;
    }
    typedef Base<F> Real;
    const Real phi = Real(1) + 4*limits::Epsilon<Real>();
    const Real alphaPhi = LDLPivotConstant<Real>(BUNCH_KAUFMAN_A)*phi;
    vector<Real> d( k-2 );
    Real sigma(1);
    for( Int i=0; i<k-2; ++i )
    {
        d[i] = -alphaPhi/sigma;
        sigma -= 1/d[i];
    }

    unique_ptr<ElementalMatrix<F>> ASub( A.Construct(A.Grid(),A.Root()) );

    View( *ASub, A, IR(k-2,k), IR(0,k) );
    Ones( *ASub, 2, k );

    View( *ASub, A, IR(0,k), IR(k-2,k) );
    Ones( *ASub, k, 2 );

    View( *ASub, A, IR(0,k-2), IR(0,k-2) );
    Diagonal( *ASub, d );

    View( *ASub, A, IR(k,n), IR(0,k) );
    Identity( *ASub, k, k );

    View( *ASub, A, IR(k,n), IR(k,n) );
    Identity( *ASub, k, k );

    View( *ASub, A, IR(0,k), IR(k,n) );
    Identity( *ASub, k, k );
}
示例#8
0
void LocalTrr2kKernel
( UpperOrLower uplo,
  Orientation orientA, Orientation orientB,
  Orientation orientC, Orientation orientD,
  T alpha, const ElementalMatrix<T>& A, const ElementalMatrix<T>& B,
  T beta,  const ElementalMatrix<T>& C, const ElementalMatrix<T>& D,
                 ElementalMatrix<T>& E )
{
    DEBUG_CSE

    const bool transA = orientA != NORMAL;
    const bool transB = orientB != NORMAL;
    const bool transC = orientC != NORMAL;
    const bool transD = orientD != NORMAL;
    // TODO: Stringent distribution and alignment checks

    typedef ElementalMatrix<T> ADM;
    auto A0 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) );
    auto A1 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) );
    auto B0 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) );
    auto B1 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) );
    auto C0 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) );
    auto C1 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) );
    auto D0 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) );
    auto D1 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) );
    auto ETL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto ETR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto EBL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto EBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto FTL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
    auto FBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );

    const Int half = E.Height() / 2;
    if( transA )
        LockedPartitionRight( A, *A0, *A1, half );
    else
        LockedPartitionDown( A, *A0, *A1, half );
    if( transB )
        LockedPartitionDown( B, *B0, *B1, half );
    else
        LockedPartitionRight( B, *B0, *B1, half );
    if( transC )
        LockedPartitionRight( C, *C0, *C1, half );
    else
        LockedPartitionDown( C, *C0, *C1, half );
    if( transD )
        LockedPartitionDown( D, *D0, *D1, half );
    else
        LockedPartitionRight( D, *D0, *D1, half );
    PartitionDownDiagonal( E, *ETL, *ETR, *EBL, *EBR, half );

    if( uplo == LOWER )
    {
        Gemm
        ( orientA, orientB, 
          alpha, A1->LockedMatrix(), B0->LockedMatrix(), 
          T(1), EBL->Matrix() );
        Gemm
        ( orientC, orientD, 
          beta, C1->LockedMatrix(), D0->LockedMatrix(), 
          T(1), EBL->Matrix() );
    }
    else
    {
        Gemm
        ( orientA, orientB, 
          alpha, A0->LockedMatrix(), B1->LockedMatrix(), 
          T(1), ETR->Matrix() );
        Gemm
        ( orientC, orientD, 
          beta, C0->LockedMatrix(), D1->LockedMatrix(), 
          T(1), ETR->Matrix() );
    }

    FTL->AlignWith( *ETL );
    FTL->Resize( ETL->Height(), ETL->Width() );
    Gemm
    ( orientA, orientB, 
      alpha, A0->LockedMatrix(), B0->LockedMatrix(),
      T(0), FTL->Matrix() );
    Gemm
    ( orientC, orientD,
      beta, C0->LockedMatrix(), D0->LockedMatrix(),
      T(1), FTL->Matrix() );
    AxpyTrapezoid( uplo, T(1), *FTL, *ETL );

    FBR->AlignWith( *EBR );
    FBR->Resize( EBR->Height(), EBR->Width() );
    Gemm
    ( orientA, orientB, 
      alpha, A1->LockedMatrix(), B1->LockedMatrix(),
      T(0), FBR->Matrix() );
    Gemm
    ( orientC, orientD,
      beta, C1->LockedMatrix(), D1->LockedMatrix(),
      T(1), FBR->Matrix() );
    AxpyTrapezoid( uplo, T(1), *FBR, *EBR );
}
示例#9
0
void LocalTrr2k
( UpperOrLower uplo, 
  Orientation orientA, Orientation orientB,
  Orientation orientC, Orientation orientD,
  T alpha, const ElementalMatrix<T>& A, const ElementalMatrix<T>& B,
  T beta,  const ElementalMatrix<T>& C, const ElementalMatrix<T>& D,
  T gamma,       ElementalMatrix<T>& E )
{
    using namespace trr2k;
    DEBUG_CSE

    const bool transA = orientA != NORMAL;
    const bool transB = orientB != NORMAL;
    const bool transC = orientC != NORMAL;
    const bool transD = orientD != NORMAL;
    // TODO: Stringent distribution and alignment checks

    ScaleTrapezoid( gamma, uplo, E );
    if( E.Height() < E.Grid().Width()*LocalTrr2kBlocksize<T>() )
    {
        LocalTrr2kKernel
        ( uplo, orientA, orientB, orientC, orientD, 
          alpha, A, B, beta, C, D, E );
    }
    else
    {
        typedef ElementalMatrix<T> ADM;
        // Ugh. This is likely too much overhead. It should be measured.
        auto A0 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) );
        auto A1 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) );
        auto B0 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) );
        auto B1 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) );
        auto C0 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) );
        auto C1 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) );
        auto D0 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) );
        auto D1 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) );
        auto ETL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
        auto ETR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
        auto EBL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );
        auto EBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) );

        const Int half = E.Height() / 2;
        if( transA )
            LockedPartitionRight( A, *A0, *A1, half );
        else
            LockedPartitionDown( A, *A0, *A1, half );
        if( transB )
            LockedPartitionDown( B, *B0, *B1, half );
        else
            LockedPartitionRight( B, *B0, *B1, half );
        if( transC )
            LockedPartitionRight( C, *C0, *C1, half );
        else
            LockedPartitionDown( C, *C0, *C1, half );
        if( transD )
            LockedPartitionDown( D, *D0, *D1, half );
        else
            LockedPartitionRight( D, *D0, *D1, half );
        PartitionDownDiagonal( E, *ETL, *ETR, *EBL, *EBR, half );

        if( uplo == LOWER )
        { 
            Gemm
            ( orientA, orientB, 
              alpha, A1->LockedMatrix(), B0->LockedMatrix(), 
              T(1), EBL->Matrix() );
            Gemm
            ( orientC, orientD, 
              beta,  C1->LockedMatrix(), D0->LockedMatrix(), 
              T(1), EBL->Matrix() );
        }
        else
        {
            Gemm
            ( orientA, orientB,
              alpha, A0->LockedMatrix(), B1->LockedMatrix(), 
              T(1), ETR->Matrix() );
            Gemm
            ( orientC, orientD,
              beta,  C0->LockedMatrix(), D1->LockedMatrix(), 
              T(1), ETR->Matrix() );
        }

        // Recurse
        LocalTrr2k
        ( uplo, orientA, orientB, orientC, orientD, 
          alpha, *A0, *B0, beta, *C0, *D0, T(1), *ETL );
        LocalTrr2k
        ( uplo, orientA, orientB, orientC, orientD, 
          alpha, *A1, *B1, beta, *C1, *D1, T(1), *EBR );
    }
}
示例#10
0
void Gather
( const ElementalMatrix<T>& A,
        DistMatrix<T,CIRC,CIRC>& B )
{
    DEBUG_ONLY(CSE cse("copy::Gather"))
    AssertSameGrids( A, B );
    if( A.DistSize() == 1 && A.CrossSize() == 1 )
    {
        B.Resize( A.Height(), A.Width() );
        if( B.CrossRank() == B.Root() )
            Copy( A.LockedMatrix(), B.Matrix() );
        return;
    }

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    // Gather the colShifts and rowShifts
    // ==================================
    Int myShifts[2];
    myShifts[0] = A.ColShift();
    myShifts[1] = A.RowShift();
    vector<Int> shifts;
    const Int crossSize = B.CrossSize();
    if( B.CrossRank() == B.Root() )
        shifts.resize( 2*crossSize );
    mpi::Gather( myShifts, 2, shifts.data(), 2, B.Root(), B.CrossComm() );

    // Gather the payload data
    // =======================
    const bool irrelevant = ( A.RedundantRank()!=0 || A.CrossRank()!=A.Root() );
    int totalSend = ( irrelevant ? 0 : A.LocalHeight()*A.LocalWidth() );
    vector<int> recvCounts, recvOffsets;
    if( B.CrossRank() == B.Root() )
        recvCounts.resize( crossSize );
    mpi::Gather( &totalSend, 1, recvCounts.data(), 1, B.Root(), B.CrossComm() );
    int totalRecv = Scan( recvCounts, recvOffsets );
    //vector<T> sendBuf(totalSend), recvBuf(totalRecv);
    vector<T> sendBuf, recvBuf;
    sendBuf.reserve( totalSend );
    recvBuf.reserve( totalRecv );
    if( !irrelevant )
        copy::util::InterleaveMatrix
        ( A.LocalHeight(), A.LocalWidth(),
          A.LockedBuffer(), 1, A.LDim(),
          sendBuf.data(),   1, A.LocalHeight() );
    mpi::Gather
    ( sendBuf.data(), totalSend,
      recvBuf.data(), recvCounts.data(), recvOffsets.data(), 
      B.Root(), B.CrossComm() );

    // Unpack
    // ======
    if( B.Root() == B.CrossRank() )
    {
        for( Int q=0; q<crossSize; ++q )
        {
            if( recvCounts[q] == 0 )
                continue;
            const Int colShift = shifts[2*q+0];
            const Int rowShift = shifts[2*q+1];
            const Int colStride = A.ColStride();
            const Int rowStride = A.RowStride();
            const Int localHeight = Length( height, colShift, colStride );
            const Int localWidth = Length( width, rowShift, rowStride );
            copy::util::InterleaveMatrix
            ( localHeight, localWidth,
              &recvBuf[recvOffsets[q]],    1,         localHeight,
              B.Buffer(colShift,rowShift), colStride, rowStride*B.LDim() );
        }
    }
}