Ejemplo n.º 1
0
void StackedGeometricColumnScaling
( const DistMatrix<Field,      U,V   >& A,
  const DistMatrix<Field,      U,V   >& B,
        DistMatrix<Base<Field>,V,STAR>& geomScaling )
{
    EL_DEBUG_CSE
    // NOTE: Assuming A.ColComm() == B.ColComm() and that the row alignments
    //       are equal
    typedef Base<Field> Real;

    DistMatrix<Real,V,STAR> maxScalingA(A.Grid()),
                            maxScalingB(A.Grid());
    ColumnMaxNorms( A, maxScalingA );
    ColumnMaxNorms( B, maxScalingB );

    const Int mLocalA = A.LocalHeight();
    const Int mLocalB = B.LocalHeight();
    const Int nLocal = A.LocalWidth();
    geomScaling.AlignWith( maxScalingA );
    geomScaling.Resize( A.Width(), 1 );
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.LockedMatrix();
    auto& geomScalingLoc = geomScaling.Matrix();
    auto& maxScalingALoc = maxScalingA.Matrix();
    auto& maxScalingBLoc = maxScalingB.Matrix();
    for( Int jLoc=0; jLoc<nLocal; ++jLoc )
    {
        Real minAbs = Max(maxScalingALoc(jLoc),maxScalingBLoc(jLoc));
        for( Int iLoc=0; iLoc<mLocalA; ++iLoc )
        {
            const Real absVal = Abs(ALoc(iLoc,jLoc));
            if( absVal > 0 && absVal < minAbs )
                minAbs = Min(minAbs,absVal);
        }
        for( Int iLoc=0; iLoc<mLocalB; ++iLoc )
        {
            const Real absVal = Abs(BLoc(iLoc,jLoc));
            if( absVal > 0 && absVal < minAbs )
                minAbs = Min(minAbs,absVal);
        }
        geomScalingLoc(jLoc) = minAbs;
    }
    mpi::AllReduce( geomScaling.Buffer(), nLocal, mpi::MIN, A.ColComm() );

    for( Int jLoc=0; jLoc<nLocal; ++jLoc )
    {
        const Real maxAbsA = maxScalingALoc(jLoc);
        const Real maxAbsB = maxScalingBLoc(jLoc);
        const Real maxAbs = Max(maxAbsA,maxAbsB);
        const Real minAbs = geomScalingLoc(jLoc);
        geomScalingLoc(jLoc) = Sqrt(minAbs*maxAbs);
    }
}
Ejemplo n.º 2
0
void QP
( const DistSparseMatrix<Real>& A, 
  const DistMultiVec<Real>& B, 
        DistMultiVec<Real>& X, 
  const qp::direct::Ctrl<Real>& ctrl )
{
    DEBUG_CSE

    const Int m = A.Height();
    const Int n = A.Width();
    const Int k = B.Width();
    mpi::Comm comm = A.Comm();
    DistSparseMatrix<Real> Q(comm), AHat(comm);
    DistMultiVec<Real> bHat(comm), c(comm);

    Herk( LOWER, ADJOINT, Real(1), A, Q );
    MakeHermitian( LOWER, Q );
    Zeros( AHat, 0, n );
    Zeros( bHat, 0, 1 );
    Zeros( X,    n, k );

    DistMultiVec<Real> q(comm), y(comm), z(comm);
    auto& qLoc = q.Matrix();
    auto& XLoc = X.Matrix();
    auto& BLoc = B.LockedMatrix();
    for( Int j=0; j<k; ++j )
    {
        auto xLoc = XLoc( ALL, IR(j) );
        auto bLoc = BLoc( ALL, IR(j) );

        Zeros( c, n, 1 );
        Zeros( q, m, 1 );
        qLoc = bLoc;
        Multiply( ADJOINT, Real(-1), A, q, Real(0), c );

        Zeros( q, n, 1 );
        qLoc = xLoc;
        El::QP( Q, AHat, bHat, c, q, y, z, ctrl );
        xLoc = qLoc;
    }
}
Ejemplo n.º 3
0
void IndexDependentMap
( const BlockMatrix<S>& A,
        BlockMatrix<T>& B, 
  function<T(Int,Int,S)> func )
{
    DEBUG_CSE
    const Int mLoc = A.LocalHeight();
    const Int nLoc = A.LocalWidth();
    B.AlignWith( A.DistData() );
    B.Resize( A.Height(), A.Width() );
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.Matrix();
    for( Int jLoc=0; jLoc<nLoc; ++jLoc )
    {
        const Int j = A.GlobalCol(jLoc);
        for( Int iLoc=0; iLoc<mLoc; ++iLoc )
        {
            const Int i = A.GlobalRow(iLoc);
            BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc));
        }
    }
}
Ejemplo n.º 4
0
void Helper
( const AbstractDistMatrix<S>& A,
        AbstractDistMatrix<T>& B ) 
{
    EL_DEBUG_CSE

    // TODO: Decide whether S or T should be used as the transmission type
    //       based upon which is smaller. Transmit S by default.
    const Int height = A.Height();
    const Int width = A.Width();
    const Grid& g = B.Grid();
    B.Resize( height, width );
    Zero( B );
    const bool BPartic = B.Participating();
    const int BRoot = B.Root();

    const bool includeViewers = (A.Grid() != B.Grid());

    const Int localHeight = A.LocalHeight();
    const Int localWidth = A.LocalWidth();
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.Matrix();

    // TODO: Break into smaller pieces to avoid excessive memory usage?
    vector<Entry<S>> remoteEntries;
    vector<int> distOwners;
    if( A.RedundantRank() == 0 )
    {
        const bool noRedundant = B.RedundantSize() == 1;
        const int colStride = B.ColStride();
        const int rowRank = B.RowRank();
        const int colRank = B.ColRank();

        vector<Int> globalRows(localHeight), localRows(localHeight);
        vector<int> ownerRows(localHeight);
        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
        {
            const Int i = A.GlobalRow(iLoc);
            const int ownerRow = B.RowOwner(i);
            globalRows[iLoc] = i;
            ownerRows[iLoc] = ownerRow;
            localRows[iLoc] = B.LocalRow(i,ownerRow);
        }

        remoteEntries.reserve( localHeight*localWidth );
        distOwners.reserve( localHeight*localWidth );
        for( Int jLoc=0; jLoc<localWidth; ++jLoc )
        {
            const Int j = A.GlobalCol(jLoc);
            const int ownerCol = B.ColOwner(j);
            const Int localCol = B.LocalCol(j,ownerCol);
            const bool isLocalCol = ( BPartic && ownerCol == rowRank );
            for( Int iLoc=0; iLoc<localHeight; ++iLoc ) 
            {
                const int ownerRow = ownerRows[iLoc];
                const Int localRow = localRows[iLoc];
                const bool isLocalRow = ( BPartic && ownerRow == colRank );
                const S& alpha = ALoc(iLoc,jLoc);
                if( noRedundant && isLocalRow && isLocalCol )
                {
                    BLoc(localRow,localCol) = Caster<S,T>::Cast(alpha);
                }
                else
                {
                    remoteEntries.push_back
                    ( Entry<S>{localRow,localCol,alpha} );
                    distOwners.push_back( ownerRow + colStride*ownerCol );
                }
            }
        }
    }

    // We will first push to redundant rank 0 of B
    const int redundantRootB = 0;

    // Compute the metadata
    // ====================
    const Int totalSend = remoteEntries.size();
    mpi::Comm comm;
    vector<int> sendCounts, owners(totalSend);
    if( includeViewers )
    {
        comm = g.ViewingComm();
        const int viewingSize = mpi::Size( g.ViewingComm() );
        const int distBSize = mpi::Size( B.DistComm() );

        vector<int> distBToViewing(distBSize);
        for( int distBRank=0; distBRank<distBSize; ++distBRank )
        {
            const int vcOwner =
              g.CoordsToVC
              (B.ColDist(),B.RowDist(),distBRank,BRoot,redundantRootB);
            distBToViewing[distBRank] = g.VCToViewing(vcOwner);
        }

        sendCounts.resize(viewingSize,0);
        for( Int k=0; k<totalSend; ++k )
        {
            owners[k] = distBToViewing[distOwners[k]];
            ++sendCounts[owners[k]];
        }
    }
    else
    {
        if( !g.InGrid() )
            return;
        comm = g.VCComm();

        const int distBSize = mpi::Size( B.DistComm() );
        vector<int> distBToVC(distBSize);
        for( int distBRank=0; distBRank<distBSize; ++distBRank )
        {
            distBToVC[distBRank] = 
              g.CoordsToVC
              (B.ColDist(),B.RowDist(),distBRank,BRoot,redundantRootB);
        }

        const int vcSize = mpi::Size( g.VCComm() );
        sendCounts.resize(vcSize,0);
        for( Int k=0; k<totalSend; ++k )
        {
            owners[k] = distBToVC[distOwners[k]];
            ++sendCounts[owners[k]];
        }
    }
    SwapClear( distOwners );

    // Pack the data
    // =============
    vector<int> sendOffs;
    Scan( sendCounts, sendOffs );
    vector<Entry<S>> sendBuf;
    FastResize( sendBuf, totalSend );
    auto offs = sendOffs;
    for( Int k=0; k<totalSend; ++k )
        sendBuf[offs[owners[k]]++] = remoteEntries[k];
    SwapClear( remoteEntries );
    SwapClear( owners );

    // Exchange and unpack the data
    // ============================
    auto recvBuf = mpi::AllToAll( sendBuf, sendCounts, sendOffs, comm );
    if( BPartic )
    {
        if( B.RedundantRank() == redundantRootB )
        {
            Int recvBufSize = recvBuf.size();
            for( Int k=0; k<recvBufSize; ++k )
            {
                const auto& entry = recvBuf[k];
                BLoc(entry.i,entry.j) = Caster<S,T>::Cast(entry.value);
            }
        }
        El::Broadcast( B, B.RedundantComm(), redundantRootB );
    }
}