Пример #1
0
void LowerPanel
( DistMatrix<F>& A,
  DistMatrix<F>& W,
  DistMatrix<F,MD,STAR>& t,
  DistMatrix<F,MC,STAR>& B_MC_STAR, 
  DistMatrix<F,MR,STAR>& B_MR_STAR,
  DistMatrix<F,MC,STAR>& W_MC_STAR,
  DistMatrix<F,MR,STAR>& W_MR_STAR,
  const SymvCtrl<F>& ctrl )
{
    DEBUG_CSE
    const Int n = A.Height();
    const Int nW = W.Width();
    DEBUG_ONLY(
      AssertSameGrids( A, W, t );
      if( n != A.Width() )
          LogicError("A must be square");
      if( n != W.Height() )
          LogicError("A and W must be the same height");
      if( n <= nW )
          LogicError("W must be a column panel");
      if( W.ColAlign() != A.ColAlign() || 
          W.RowAlign() != A.RowAlign() )
          LogicError("W and A must be aligned");
      if( t.Height() != nW || t.Width() != 1 )
          LogicError
          ("t must be a column vector of the same length as W's width");
      if( !A.DiagonalAlignedWith(t,-1) )
          LogicError("t is not aligned with A's subdiagonal.");
    )
Пример #2
0
void LPan
( DistMatrix<F>& A,
  DistMatrix<F>& W,
  DistMatrix<F,MD,STAR>& t,
  DistMatrix<F,MC,STAR>& APan_MC_STAR, 
  DistMatrix<F,MR,STAR>& APan_MR_STAR,
  DistMatrix<F,MC,STAR>& W_MC_STAR,
  DistMatrix<F,MR,STAR>& W_MR_STAR )
{
    const Int n = A.Height();
    const Int nW = W.Width();
    DEBUG_ONLY(
        CallStackEntry cse("herm_tridiag::LPan");
        AssertSameGrids( A, W, t );
        if( n != A.Width() )
            LogicError("A must be square");
        if( n != W.Height() )
            LogicError("A and W must be the same height");
        if( n <= nW )
            LogicError("W must be a column panel");
        if( W.ColAlign() != A.ColAlign() || 
            W.RowAlign() != A.RowAlign() )
            LogicError("W and A must be aligned");
        if( t.Height() != nW || t.Width() != 1 )
            LogicError
            ("t must be a column vector of the same length as W's width");
        if( !A.DiagonalAlignedWith(t,-1) )
            LogicError("t is not aligned with A's subdiagonal.");
    )
Пример #3
0
void AllGather
( const DistMatrix<T,        U,           V   >& A,
        DistMatrix<T,Collect<U>(),Collect<V>()>& B )
{
    EL_DEBUG_CSE
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    if( A.Participating() )
    {
        if( A.DistSize() == 1 )
        {
            Copy( A.LockedMatrix(), B.Matrix() );
        }
        else
        {
            const Int colStride = A.ColStride();
            const Int rowStride = A.RowStride();
            const Int distStride = colStride*rowStride;
            const Int maxLocalHeight = MaxLength(height,colStride);
            const Int maxLocalWidth = MaxLength(width,rowStride);
            const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
            vector<T> buf;
            FastResize( buf, (distStride+1)*portionSize );
            T* sendBuf = &buf[0];
            T* recvBuf = &buf[portionSize];

            // Pack
            util::InterleaveMatrix
            ( A.LocalHeight(), A.LocalWidth(),
              A.LockedBuffer(), 1, A.LDim(),
              sendBuf,          1, A.LocalHeight() );

            // Communicate
            mpi::AllGather
            ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() );

            // Unpack
            util::StridedUnpack
            ( height, width,
              A.ColAlign(), colStride,
              A.RowAlign(), rowStride,
              recvBuf, portionSize,
              B.Buffer(), B.LDim() );
        }
    }
    if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF )
        El::Broadcast( B, A.CrossComm(), A.Root() );
}
Пример #4
0
void PartialColAllGather
( const DistMatrix<T,        U,   V>& A, 
        DistMatrix<T,Partial<U>(),V>& B ) 
{
    EL_DEBUG_CSE
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
#ifdef EL_VECTOR_WARNINGS
    if( width == 1 && A.Grid().Rank() == 0 )
    {
        cerr <<
          "The vector version of PartialColAllGather is not yet written but "
          "would only require modifying the vector version of "
          "PartialRowAllGather" << endl;
    }
#endif
#ifdef EL_CACHE_WARNINGS
    if( width && A.Grid().Rank() == 0 )
    {
        cerr <<
          "PartialColAllGather potentially causes a large amount of cache-"
          "thrashing. If possible, avoid it by performing the redistribution"
          "on the (conjugate-)transpose" << endl;
    }
#endif
    B.AlignColsAndResize
    ( Mod(A.ColAlign(),B.ColStride()), height, width, false, false );
    if( !A.Participating() )
        return;

    EL_DEBUG_ONLY(
      if( A.LocalWidth() != A.Width() )
          LogicError("This routine assumes rows are not distributed");
    )
Пример #5
0
void EnsureConformal
( const DistMatrix<T,MR,STAR>& A, const DistMatrix<T>& C, string name )
{
    if( A.Height() != C.Width() || A.ColAlign() != C.RowAlign() )
        LogicError(name," not conformal with C");
}
Пример #6
0
void ColAllToAllPromote
( const DistMatrix<T,        U,                     V   >& A,
        DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& B )
{
    DEBUG_CSE
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.AlignColsAndResize
    ( Mod(A.ColAlign(),B.ColStride()), height, width, false, false );
    if( !B.Participating() )
        return;

    const Int colStride = A.ColStride();
    const Int colStridePart = A.PartialColStride();
    const Int colStrideUnion = A.PartialUnionColStride();
    const Int colRankPart = A.PartialColRank();
    const Int colDiff = B.ColAlign() - Mod(A.ColAlign(),colStridePart);

    const Int maxLocalHeight = MaxLength(height,colStride);
    const Int maxLocalWidth = MaxLength(width,colStrideUnion);
    const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );

    if( colDiff == 0 )
    {
        if( A.PartialUnionColStride() == 1 )
        {
            Copy( A.LockedMatrix(), B.Matrix() );
        }
        else
        {
            vector<T> buffer;
            FastResize( buffer, 2*colStrideUnion*portionSize );
            T* firstBuf  = &buffer[0];
            T* secondBuf = &buffer[colStrideUnion*portionSize];

            // Pack            
            util::RowStridedPack
            ( A.LocalHeight(), width,
              B.RowAlign(), colStrideUnion,
              A.LockedBuffer(), A.LDim(),
              firstBuf,         portionSize );

            // Simultaneously Gather in columns and Scatter in rows
            mpi::AllToAll
            ( firstBuf,  portionSize,
              secondBuf, portionSize, A.PartialUnionColComm() );

            // Unpack
            util::PartialColStridedUnpack 
            ( height, B.LocalWidth(),
              A.ColAlign(), colStride,
              colStrideUnion, colStridePart, colRankPart,
              B.ColShift(),
              secondBuf,  portionSize,
              B.Buffer(), B.LDim() );
        }
    }
    else
    {
#ifdef EL_UNALIGNED_WARNINGS
        if( A.Grid().Rank() == 0 )
            cerr << "Unaligned PartialColAllToAllPromote" << endl;
#endif
        const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart );
        const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart );

        vector<T> buffer;
        FastResize( buffer, 2*colStrideUnion*portionSize );
        T* firstBuf  = &buffer[0];
        T* secondBuf = &buffer[colStrideUnion*portionSize];

        // Pack
        util::RowStridedPack
        ( A.LocalHeight(), width,
          B.RowAlign(), colStrideUnion,
          A.LockedBuffer(), A.LDim(),
          secondBuf,        portionSize );

        // Realign the input
        mpi::SendRecv
        ( secondBuf, colStrideUnion*portionSize, sendColRankPart,
          firstBuf,  colStrideUnion*portionSize, recvColRankPart,
          A.PartialColComm() );

        // Simultaneously Scatter in columns and Gather in rows
        mpi::AllToAll
        ( firstBuf,  portionSize,
          secondBuf, portionSize, A.PartialUnionColComm() );

        // Unpack
        util::PartialColStridedUnpack 
        ( height, B.LocalWidth(),
          A.ColAlign(), colStride,
          colStrideUnion, colStridePart, recvColRankPart,
          B.ColShift(),
          secondBuf,  portionSize,
          B.Buffer(), B.LDim() );
    }
}
Пример #7
0
void InPlaceRedist
( DistMatrix<F>& paddedZ, Int rowAlign, const Base<F>* readBuffer )
{
    typedef Base<F> Real;
    const Grid& g = paddedZ.Grid();
    const Int height = paddedZ.Height();
    const Int width = paddedZ.Width();

    const Int r = g.Height();
    const Int c = g.Width();
    const Int p = r * c;
    const Int row = g.Row();
    const Int col = g.Col();
    const Int rowShift = paddedZ.RowShift();
    const Int colAlign = paddedZ.ColAlign();
    const Int localWidth = Length(width,g.VRRank(),rowAlign,p);

    const Int maxHeight = MaxLength(height,r);
    const Int maxWidth = MaxLength(width,p);
    const Int portionSize = mpi::Pad( maxHeight*maxWidth );
    
    // Allocate our send/recv buffers
    std::vector<Real> buffer(2*r*portionSize);
    Real* sendBuffer = &buffer[0];
    Real* recvBuffer = &buffer[r*portionSize];

    // Pack
    OUTER_PARALLEL_FOR
    for( Int k=0; k<r; ++k )
    {
        Real* data = &sendBuffer[k*portionSize];

        const Int thisColShift = Shift(k,colAlign,r);
        const Int thisLocalHeight = Length(height,thisColShift,r);

        INNER_PARALLEL_FOR_COLLAPSE2
        for( Int j=0; j<localWidth; ++j )
            for( Int i=0; i<thisLocalHeight; ++i )
                data[i+j*thisLocalHeight] = 
                    readBuffer[thisColShift+i*r+j*height];
    }

    // Communicate
    mpi::AllToAll
    ( sendBuffer, portionSize,
      recvBuffer, portionSize, g.ColComm() );

    // Unpack
    const Int localHeight = Length(height,row,colAlign,r);
    OUTER_PARALLEL_FOR
    for( Int k=0; k<r; ++k )
    {
        const Real* data = &recvBuffer[k*portionSize];

        const Int thisRank = col+k*c;
        const Int thisRowShift = Shift(thisRank,rowAlign,p);
        const Int thisRowOffset = (thisRowShift-rowShift) / c;
        const Int thisLocalWidth = Length(width,thisRowShift,p);

        INNER_PARALLEL_FOR
        for( Int j=0; j<thisLocalWidth; ++j )
        {
            const Real* dataCol = &(data[j*localHeight]);
            Real* thisCol = (Real*)paddedZ.Buffer(0,thisRowOffset+j*r);
            if( IsComplex<F>::val )
            {
                for( Int i=0; i<localHeight; ++i )
                {
                    thisCol[2*i] = dataCol[i];
                    thisCol[2*i+1] = 0;
                }
            }
            else
            {
                MemCopy( thisCol, dataCol, localHeight );
            }
        }
    }
}
Пример #8
0
void TranslateBetweenGrids
( const DistMatrix<T,MC,MR>& A, DistMatrix<T,MC,MR>& B ) 
{
    DEBUG_ONLY(CSE cse("copy::TranslateBetweenGrids [MC,MR]"))

    B.Resize( A.Height(), A.Width() );
    // Just need to ensure that each viewing comm contains the other team's
    // owning comm. Congruence is too strong.

    // Compute the number of process rows and columns that each process
    // needs to send to.
    const Int colStride = B.ColStride();
    const Int rowStride = B.RowStride();
    const Int colRank = B.ColRank();
    const Int rowRank = B.RowRank();
    const Int colStrideA = A.ColStride();
    const Int rowStrideA = A.RowStride();
    const Int colGCD = GCD( colStride, colStrideA );
    const Int rowGCD = GCD( rowStride, rowStrideA );
    const Int colLCM = colStride*colStrideA / colGCD;
    const Int rowLCM = rowStride*rowStrideA / rowGCD;
    const Int numColSends = colStride / colGCD;
    const Int numRowSends = rowStride / rowGCD;

    const Int colAlign = B.ColAlign();
    const Int rowAlign = B.RowAlign();
    const Int colAlignA = A.ColAlign();
    const Int rowAlignA = A.RowAlign();

    const bool inBGrid = B.Participating();
    const bool inAGrid = A.Participating();
    if( !inBGrid && !inAGrid )
        return;

    const Int maxSendSize =
        (A.Height()/(colStrideA*numColSends)+1) *
        (A.Width()/(rowStrideA*numRowSends)+1);

    // Translate the ranks from A's VC communicator to B's viewing so that
    // we can match send/recv communicators. Since A's VC communicator is not
    // necessarily defined on every process, we instead work with A's owning
    // group and account for row-major ordering if necessary.
    const int sizeA = A.Grid().Size();
    vector<int> rankMap(sizeA), ranks(sizeA);
    if( A.Grid().Order() == COLUMN_MAJOR )
    {
        for( int j=0; j<sizeA; ++j )
            ranks[j] = j;
    }
    else
    {
        // The (i,j) = i + j*colStrideA rank in the column-major ordering is
        // equal to the j + i*rowStrideA rank in a row-major ordering.
        // Since we desire rankMap[i+j*colStrideA] to correspond to process
        // (i,j) in A's grid's rank in this viewing group, ranks[i+j*colStrideA]
        // should correspond to process (i,j) in A's owning group. Since the
        // owning group is ordered row-major in this case, its rank is
        // j+i*rowStrideA. Note that setting
        // ranks[j+i*rowStrideA] = i+j*colStrideA is *NOT* valid.
        for( int i=0; i<colStrideA; ++i )
            for( int j=0; j<rowStrideA; ++j )
                ranks[i+j*colStrideA] = j+i*rowStrideA;
    }
    mpi::Translate
    ( A.Grid().OwningGroup(), sizeA, &ranks[0],
      B.Grid().ViewingComm(), &rankMap[0] );

    // Have each member of A's grid individually send to all numRow x numCol
    // processes in order, while the members of this grid receive from all
    // necessary processes at each step.
    Int requiredMemory = 0;
    if( inAGrid )
        requiredMemory += maxSendSize;
    if( inBGrid )
        requiredMemory += maxSendSize;
    vector<T> auxBuf( requiredMemory );
    Int offset = 0;
    T* sendBuf = &auxBuf[offset];
    if( inAGrid )
        offset += maxSendSize;
    T* recvBuf = &auxBuf[offset];

    Int recvRow = 0; // avoid compiler warnings...
    if( inAGrid )
        recvRow = Mod(Mod(A.ColRank()-colAlignA,colStrideA)+colAlign,colStride);
    for( Int colSend=0; colSend<numColSends; ++colSend )
    {
        Int recvCol = 0; // avoid compiler warnings...
        if( inAGrid )
            recvCol=Mod(Mod(A.RowRank()-rowAlignA,rowStrideA)+rowAlign,
                        rowStride);
        for( Int rowSend=0; rowSend<numRowSends; ++rowSend )
        {
            mpi::Request sendRequest;
            // Fire off this round of non-blocking sends
            if( inAGrid )
            {
                // Pack the data
                Int sendHeight = Length(A.LocalHeight(),colSend,numColSends);
                Int sendWidth = Length(A.LocalWidth(),rowSend,numRowSends);
                copy::util::InterleaveMatrix
                ( sendHeight, sendWidth,
                  A.LockedBuffer(colSend,rowSend),
                 numColSends, numRowSends*A.LDim(),
                  sendBuf, 1, sendHeight );
                // Send data
                const Int recvVCRank = recvRow + recvCol*colStride;
                const Int recvViewingRank = B.Grid().VCToViewing( recvVCRank );
                mpi::ISend
                ( sendBuf, sendHeight*sendWidth, recvViewingRank,
                  B.Grid().ViewingComm(), sendRequest );
            }
            // Perform this round of recv's
            if( inBGrid )
            {
                const Int sendColOffset = colAlignA;
                const Int recvColOffset =
                    (colSend*colStrideA+colAlign) % colStride;
                const Int sendRowOffset = rowAlignA;
                const Int recvRowOffset =
                    (rowSend*rowStrideA+rowAlign) % rowStride;

                const Int firstSendRow =
                    Mod( Mod(colRank-recvColOffset,colStride)+sendColOffset,
                         colStrideA );
                const Int firstSendCol =
                    Mod( Mod(rowRank-recvRowOffset,rowStride)+sendRowOffset,
                         rowStrideA );

                const Int colShift = Mod( colRank-recvColOffset, colStride );
                const Int rowShift = Mod( rowRank-recvRowOffset, rowStride );
                const Int numColRecvs = Length( colStrideA, colShift, colStride );
                const Int numRowRecvs = Length( rowStrideA, rowShift, rowStride );

                // Recv data
                // For now, simply receive sequentially. Until we switch to
                // nonblocking recv's, we won't be using much of the
                // recvBuf
                Int sendRow = firstSendRow;
                for( Int colRecv=0; colRecv<numColRecvs; ++colRecv )
                {
                    const Int sendColShift = Shift( sendRow, colAlignA, colStrideA ) + colSend*colStrideA;
                    const Int sendHeight = Length( A.Height(), sendColShift, colLCM );
                    const Int localColOffset = (sendColShift-B.ColShift()) / colStride;

                    Int sendCol = firstSendCol;
                    for( Int rowRecv=0; rowRecv<numRowRecvs; ++rowRecv )
                    {
                        const Int sendRowShift = Shift( sendCol, rowAlignA, rowStrideA ) + rowSend*rowStrideA;
                        const Int sendWidth = Length( A.Width(), sendRowShift, rowLCM );
                        const Int localRowOffset = (sendRowShift-B.RowShift()) / rowStride;

                        const Int sendVCRank = sendRow+sendCol*colStrideA;
                        mpi::Recv
                        ( recvBuf, sendHeight*sendWidth, rankMap[sendVCRank],
                          B.Grid().ViewingComm() );

                        // Unpack the data
                        copy::util::InterleaveMatrix
                        ( sendHeight, sendWidth,
                          recvBuf, 1, sendHeight,
                          B.Buffer(localColOffset,localRowOffset),
                          colLCM/colStride, (rowLCM/rowStride)*B.LDim() );

                        // Set up the next send col
                        sendCol = (sendCol + rowStride) % rowStrideA;
                    }
                    // Set up the next send row
                    sendRow = (sendRow + colStride) % colStrideA;
                }
            }
            // Ensure that this round of non-blocking sends completes
            if( inAGrid )
            {
                mpi::Wait( sendRequest );
                recvCol = (recvCol + rowStrideA) % rowStride;
            }
        }
        if( inAGrid )
            recvRow = (recvRow + colStrideA) % colStride;
    }
}
Пример #9
0
void AllGather
( const DistMatrix<T,        U,           V   >& A, 
        DistMatrix<T,Collect<U>(),Collect<V>()>& B ) 
{
    DEBUG_ONLY(CSE cse("copy::AllGather"))
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    if( A.Participating() )
    {
        const Int colStride = A.ColStride();
        const Int rowStride = A.RowStride();
        const Int distStride = colStride*rowStride;
        const Int maxLocalHeight = MaxLength(height,colStride);
        const Int maxLocalWidth = MaxLength(width,rowStride);
        const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
        vector<T> buf( (distStride+1)*portionSize );
        T* sendBuf = &buf[0];
        T* recvBuf = &buf[portionSize];

        // Pack
        util::InterleaveMatrix
        ( A.LocalHeight(), A.LocalWidth(),
          A.LockedBuffer(), 1, A.LDim(),
          sendBuf,          1, A.LocalHeight() );

        // Communicate
        mpi::AllGather
        ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() );

        // Unpack
        util::StridedUnpack
        ( height, width,
          A.ColAlign(), colStride,
          A.RowAlign(), rowStride,
          recvBuf, portionSize,
          B.Buffer(), B.LDim() );
    }
    if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF )
    {
        // Pack from the root
        const Int BLocalHeight = B.LocalHeight();
        const Int BLocalWidth = B.LocalWidth();
        vector<T> buf(BLocalHeight*BLocalWidth);
        if( A.CrossRank() == A.Root() )
            util::InterleaveMatrix
            ( BLocalHeight, BLocalWidth,
              B.LockedBuffer(), 1, B.LDim(),
              buf.data(),       1, BLocalHeight ); 

        // Broadcast from the root
        mpi::Broadcast
        ( buf.data(), BLocalHeight*BLocalWidth, A.Root(), A.CrossComm() );

        // Unpack if not the root
        if( A.CrossRank() != A.Root() )
            util::InterleaveMatrix
            ( BLocalHeight, BLocalWidth,
              buf.data(), 1, BLocalHeight,
              B.Buffer(), 1, B.LDim() );
    }
}
Пример #10
0
void TransposeDist( const DistMatrix<T,U,V>& A, DistMatrix<T,V,U>& B ) 
{
    DEBUG_ONLY(CSE cse("copy::TransposeDist"))
    AssertSameGrids( A, B );

    const Grid& g = B.Grid();
    B.Resize( A.Height(), A.Width() );
    if( !B.Participating() )
        return;

    const Int colStrideA = A.ColStride();
    const Int rowStrideA = A.RowStride();
    const Int distSize = A.DistSize();

    if( A.DistSize() == 1 && B.DistSize() == 1 ) 
    {
        Copy( A.LockedMatrix(), B.Matrix() );
    }
    else if( A.Width() == 1 )
    {
        const Int height = A.Height();
        const Int maxLocalHeight = MaxLength(height,distSize);
        const Int portionSize = mpi::Pad( maxLocalHeight );

        const Int colDiff = Shift(A.DistRank(),A.ColAlign(),distSize) - 
                            Shift(B.DistRank(),B.ColAlign(),distSize);
        const Int sendRankB = Mod( B.DistRank()+colDiff, distSize );
        const Int recvRankA = Mod( A.DistRank()-colDiff, distSize );
        const Int recvRankB = 
            (recvRankA/colStrideA)+rowStrideA*(recvRankA%colStrideA);

        vector<T> buffer;
        FastResize( buffer, (colStrideA+rowStrideA)*portionSize );
        T* sendBuf = &buffer[0];
        T* recvBuf = &buffer[colStrideA*portionSize];

        if( A.RowRank() == A.RowAlign() )
        {
            // Pack
            // TODO: Use kernel from copy::util
            const Int AColShift = A.ColShift();
            const T* ABuf = A.LockedBuffer();
            EL_PARALLEL_FOR
            for( Int k=0; k<rowStrideA; ++k )
            {
                T* data = &recvBuf[k*portionSize];

                const Int shift = 
                  Shift_(A.ColRank()+colStrideA*k,A.ColAlign(),distSize);
                const Int offset = (shift-AColShift) / colStrideA;
                const Int thisLocalHeight = Length_(height,shift,distSize);

                for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc )
                    data[iLoc] = ABuf[offset+iLoc*rowStrideA];
            }
        }

        // (e.g., A[VC,STAR] <- A[MC,MR])
        mpi::Scatter
        ( recvBuf, portionSize,
          sendBuf, portionSize, A.RowAlign(), A.RowComm() );

        // (e.g., A[VR,STAR] <- A[VC,STAR])
        mpi::SendRecv
        ( sendBuf, portionSize, sendRankB,
          recvBuf, portionSize, recvRankB, B.DistComm() );

        // (e.g., A[MR,MC] <- A[VR,STAR])
        mpi::Gather
        ( recvBuf, portionSize,
          sendBuf, portionSize, B.RowAlign(), B.RowComm() );

        if( B.RowRank() == B.RowAlign() )
        {
            // Unpack
            // TODO: Use kernel from copy::util
            T* bufB = B.Buffer();
            EL_PARALLEL_FOR
            for( Int k=0; k<colStrideA; ++k )
            {
                const T* data = &sendBuf[k*portionSize];

                const Int shift = 
                  Shift_(B.ColRank()+rowStrideA*k,B.ColAlign(),distSize);
                const Int offset = (shift-B.ColShift()) / rowStrideA;
                const Int thisLocalHeight = Length_(height,shift,distSize);

                for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc )
                    bufB[offset+iLoc*colStrideA] = data[iLoc];
            }
        }
    }
Пример #11
0
void ColAllToAllDemote
( const DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& A,
        DistMatrix<T,        U,                     V   >& B )
{
    DEBUG_ONLY(CallStackEntry cse("copy::ColAllToAllDemote"))
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.AlignColsAndResize( A.ColAlign(), height, width, false, false );
    if( !B.Participating() )
        return;

    const Int colAlign = B.ColAlign();
    const Int rowAlignA = A.RowAlign();

    const Int colStride = B.ColStride();
    const Int colStridePart = B.PartialColStride();
    const Int colStrideUnion = B.PartialUnionColStride();
    const Int colRankPart = B.PartialColRank();
    const Int colDiff = (colAlign%colStridePart) - A.ColAlign();

    const Int colShiftA = A.ColShift();

    const Int localHeightB = B.LocalHeight();
    const Int localWidthA = A.LocalWidth();
    const Int maxLocalHeight = MaxLength(height,colStride);
    const Int maxLocalWidth = MaxLength(width,colStrideUnion);
    const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );

    std::vector<T> buffer( 2*colStrideUnion*portionSize );
    T* firstBuf  = &buffer[0];
    T* secondBuf = &buffer[colStrideUnion*portionSize];

    if( colDiff == 0 )
    {
        // Pack            
        util::PartialColStridedPack
        ( height, localWidthA,
          colAlign, colStride, 
          colStrideUnion, colStridePart, colRankPart,
          colShiftA,
          A.LockedBuffer(), A.LDim(),
          firstBuf,         portionSize );

        // Simultaneously Scatter in columns and Gather in rows
        mpi::AllToAll
        ( firstBuf,  portionSize,
          secondBuf, portionSize, B.PartialUnionColComm() );

        // Unpack
        util::RowStridedUnpack
        ( localHeightB, width,
          rowAlignA, colStrideUnion,
          secondBuf, portionSize,
          B.Buffer(), B.LDim() );
    }
    else
    {
#ifdef EL_UNALIGNED_WARNINGS
        if( B.Grid().Rank() == 0 )
            std::cerr << "Unaligned ColAllToAllDemote" << std::endl;
#endif
        const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart );
        const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart );

        // Pack
        util::PartialColStridedPack
        ( height, localWidthA,
          colAlign, colStride, 
          colStrideUnion, colStridePart, sendColRankPart,
          colShiftA,
          A.LockedBuffer(), A.LDim(),
          secondBuf,        portionSize );

        // Simultaneously Scatter in columns and Gather in rows
        mpi::AllToAll
        ( secondBuf, portionSize,
          firstBuf,  portionSize, B.PartialUnionColComm() );

        // Realign the result
        mpi::SendRecv
        ( firstBuf,  colStrideUnion*portionSize, sendColRankPart,
          secondBuf, colStrideUnion*portionSize, recvColRankPart,
          B.PartialColComm() );

        // Unpack
        util::RowStridedUnpack
        ( localHeightB, width,
          rowAlignA, colStrideUnion,
          secondBuf, portionSize,
          B.Buffer(), B.LDim() );
    }
}