Пример #1
0
void BlockedRowStridedUnpack
( Int height, Int width,
  Int rowAlign, Int rowStride,
  Int blockWidth, Int rowCut,
  const T* APortions, Int portionSize,
        T* B,         Int BLDim )
{
    const Int firstBlockWidth = blockWidth - rowCut;
    for( Int portion=0; portion<rowStride; ++portion )
    {
        const T* APortion = &APortions[portion*portionSize];
        const Int rowShift = Shift_( portion, rowAlign, rowStride );
        // Loop over the block columns from this portion
        Int blockCol = rowShift;
        Int colIndex =
          ( rowShift==0 ? 0 : firstBlockWidth + (rowShift-1)*blockWidth );
        Int packedColIndex = 0;
        while( colIndex < width )
        {
            const Int thisBlockWidth =
              ( blockCol == 0 ?
                firstBlockWidth :
                Min(blockWidth,width-colIndex) );

            lapack::Copy
            ( 'F', height, thisBlockWidth,
              &APortion[packedColIndex*height], height,
              &B[colIndex*BLDim],               BLDim );

            blockCol += rowStride;
            colIndex += thisBlockWidth + (rowStride-1)*blockWidth;
            packedColIndex += thisBlockWidth;
        }
    }
}
Пример #2
0
const DistMatrix<T,STAR,STAR>&
DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,VR,STAR>& A )
{
#ifndef RELEASE
    CallStackEntry entry("[* ,* ] = [VR,* ]");
    this->AssertNotLocked();
    this->AssertSameGrid( A.Grid() );
#endif
    const elem::Grid& g = this->Grid();
    this->ResizeTo( A.Height(), A.Width() );
    if( !this->Participating() )
        return *this;

    const Int p = g.Size();
    const Int height = this->Height();
    const Int width = this->Width();
    const Int localHeightOfA = A.LocalHeight();
    const Int maxLocalHeight = MaxLength(height,p);

    const Int portionSize = mpi::Pad( maxLocalHeight*width );
    T* buffer = this->auxMemory_.Require( (p+1)*portionSize );
    T* sendBuf = &buffer[0];
    T* recvBuf = &buffer[portionSize];

    // Pack
    const Int ALDim = A.LDim();
    const T* ABuf = A.LockedBuffer();
    PARALLEL_FOR
    for( Int j=0; j<width; ++j )
        MemCopy
        ( &sendBuf[j*localHeightOfA], &ABuf[j*ALDim], localHeightOfA );

    // Communicate
    mpi::AllGather
    ( sendBuf, portionSize,
      recvBuf, portionSize, g.VRComm() );

    // Unpack
    T* thisBuf = this->Buffer();
    const Int thisLDim = this->LDim();
    const Int colAlignmentOfA = A.ColAlignment();
    OUTER_PARALLEL_FOR
    for( Int k=0; k<p; ++k )
    {
        const T* data = &recvBuf[k*portionSize];
        const Int colShift = Shift_( k, colAlignmentOfA, p );
        const Int localHeight = Length_( height, colShift, p );
        INNER_PARALLEL_FOR
        for( Int j=0; j<width; ++j )
        {
            T* destCol = &thisBuf[colShift+j*thisLDim];
            const T* sourceCol = &data[j*localHeight];
            for( Int iLoc=0; iLoc<localHeight; ++iLoc )
                destCol[iLoc*p] = sourceCol[iLoc];
        }
    }
    this->auxMemory_.Release();
    return *this;
}
Пример #3
0
const DistMatrix<T,STAR,STAR>&
DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,STAR,VR>& A )
{
#ifndef RELEASE
    CallStackEntry entry("[* ,* ] = [* ,VR]");
    this->AssertNotLocked();
    this->AssertSameGrid( A.Grid() );
#endif
    const elem::Grid& g = this->Grid();
    this->ResizeTo( A.Height(), A.Width() );
    if( !this->Participating() )
        return *this;

    const Int p = g.Size();
    const Int height = this->Height();
    const Int width = this->Width();
    const Int localWidthOfA = A.LocalWidth();
    const Int maxLocalWidth = MaxLength(width,p);

    const Int portionSize = mpi::Pad( height*maxLocalWidth );
    T* buffer = this->auxMemory_.Require( (p+1)*portionSize );
    T* sendBuf = &buffer[0];
    T* recvBuf = &buffer[portionSize];

    // Pack
    const Int ALDim = A.LDim();
    const T* ABuf = A.LockedBuffer();
    PARALLEL_FOR
    for( Int jLoc=0; jLoc<localWidthOfA; ++jLoc )
        MemCopy( &sendBuf[jLoc*height], &ABuf[jLoc*ALDim], height );

    // Communicate
    mpi::AllGather
    ( sendBuf, portionSize,
      recvBuf, portionSize, g.VRComm() );

    // Unpack
    T* thisBuf = this->Buffer();
    const Int thisLDim = this->LDim();
    const Int rowAlignmentOfA = A.RowAlignment();
    OUTER_PARALLEL_FOR
    for( Int k=0; k<p; ++k )
    {
        const T* data = &recvBuf[k*portionSize];
        const Int rowShift = Shift_( k, rowAlignmentOfA, p );
        const Int localWidth = Length_( width, rowShift, p );
        INNER_PARALLEL_FOR
        for( Int jLoc=0; jLoc<localWidth; ++jLoc )
            MemCopy
            ( &thisBuf[(rowShift+jLoc*p)*thisLDim], 
              &data[jLoc*height], height );
    }
    this->auxMemory_.Release();
    return *this;
}
Пример #4
0
void StridedUnpack
( Int height, Int width,
  Int colAlign, Int colStride,
  Int rowAlign, Int rowStride,
  const T* APortions, Int portionSize,
        T* B,         Int BLDim )
{
    for( Int l=0; l<rowStride; ++l )
    {
        const Int rowShift = Shift_( l, rowAlign, rowStride );
        const Int localWidth = Length_( width, rowShift, rowStride );
        for( Int k=0; k<colStride; ++k )
        {
            const Int colShift = Shift_( k, colAlign, colStride );
            const Int localHeight = Length_( height, colShift, colStride );
            InterleaveMatrix
            ( localHeight, localWidth,
              &APortions[(k+l*colStride)*portionSize], 1, localHeight,
              &B[colShift+rowShift*BLDim], colStride, rowStride*BLDim );
        }
    }
}
Пример #5
0
void ColStridedColumnPack
( Int height, 
  Int colAlign, Int colStride,
  const T* A,
        T* BPortions, Int portionSize )
{
    for( Int k=0; k<colStride; ++k )
    {
        const Int colShift = Shift_( k, colAlign, colStride );
        const Int localHeight = Length_( height, colShift, colStride );
        StridedMemCopy
        ( &BPortions[k*portionSize], 1, 
          &A[colShift],              colStride, localHeight );
    }
}
Пример #6
0
void ColStridedUnpack
( Int height, Int width,
  Int colAlign, Int colStride,
  const T* APortions, Int portionSize,
        T* B,         Int BLDim )
{
    for( Int k=0; k<colStride; ++k )
    {
        const Int colShift = Shift_( k, colAlign, colStride );
        const Int localHeight = Length_( height, colShift, colStride );
        InterleaveMatrix
        ( localHeight, width,
          &APortions[k*portionSize], 1,         localHeight,
          &B[colShift],              colStride, BLDim );
    }
}
Пример #7
0
void ColStridedPack
( Int height, Int width,
  Int colAlign, Int colStride,
  const T* A,         Int ALDim,
        T* BPortions, Int portionSize )
{
    for( Int k=0; k<colStride; ++k )
    {
        const Int colShift = Shift_( k, colAlign, colStride );
        const Int localHeight = Length_( height, colShift, colStride );
        InterleaveMatrix
        ( localHeight, width,
          &A[colShift],              colStride, ALDim,
          &BPortions[k*portionSize], 1,         localHeight );
    }
}
Пример #8
0
void RowStridedUnpack
( Int height, Int width,
  Int rowAlign, Int rowStride,
  const T* APortions, Int portionSize,
        T* B,         Int BLDim )
{
    for( Int k=0; k<rowStride; ++k )
    {
        const Int rowShift = Shift_( k, rowAlign, rowStride );
        const Int localWidth = Length_( width, rowShift, rowStride );
        lapack::Copy
        ( 'F', height, localWidth,
          &APortions[k*portionSize], height,
          &B[rowShift*BLDim],        rowStride*BLDim );
    }
}
Пример #9
0
void RowStridedPack
( Int height, Int width,
  Int rowAlign, Int rowStride,
  const T* A,         Int ALDim,
        T* BPortions, Int portionSize )
{
    for( Int k=0; k<rowStride; ++k )
    {
        const Int rowShift = Shift_( k, rowAlign, rowStride );
        const Int localWidth = Length_( width, rowShift, rowStride );
        lapack::Copy
        ( 'F', height, localWidth, 
          &A[rowShift*ALDim],        rowStride*ALDim,
          &BPortions[k*portionSize], height );
    }
}
Пример #10
0
void PartialColStridedColumnUnpack
( Int height, 
  Int colAlign, Int colStride,
  Int colStrideUnion, Int colStridePart, Int colRankPart,
  Int colShiftB,
  const T* APortions, Int portionSize,
        T* B )
{
    for( Int k=0; k<colStrideUnion; ++k )
    {
        const Int colShift =
            Shift_( colRankPart+k*colStridePart, colAlign, colStride );
        const Int colOffset = (colShift-colShiftB) / colStridePart;
        const Int localHeight = Length_( height, colShift, colStride );
        StridedMemCopy
        ( &B[colOffset],             colStrideUnion,
          &APortions[k*portionSize], 1,              localHeight );
    }
}
Пример #11
0
void PartialColStridedPack
( Int height, Int width,
  Int colAlign, Int colStride,
  Int colStrideUnion, Int colStridePart, Int colRankPart,
  Int colShiftA,
  const T* A,         Int ALDim,
        T* BPortions, Int portionSize )
{
    for( Int k=0; k<colStrideUnion; ++k )
    {
        const Int colShift =
            Shift_( colRankPart+k*colStridePart, colAlign, colStride );
        const Int colOffset = (colShift-colShiftA) / colStridePart;
        const Int localHeight = Length_( height, colShift, colStride );
        InterleaveMatrix
        ( localHeight, width,
          &A[colOffset],             colStrideUnion, ALDim,
          &BPortions[k*portionSize], 1,              localHeight );
    }
}
Пример #12
0
void PartialRowStridedPack
( Int height, Int width,
  Int rowAlign, Int rowStride,
  Int rowStrideUnion, Int rowStridePart, Int rowRankPart,
  Int rowShiftA,
  const T* A,         Int ALDim,
        T* BPortions, Int portionSize )
{
    for( Int k=0; k<rowStrideUnion; ++k )
    {
        const Int rowShift =
            Shift_( rowRankPart+k*rowStridePart, rowAlign, rowStride );
        const Int rowOffset = (rowShift-rowShiftA) / rowStridePart;
        const Int localWidth = Length_( width, rowShift, rowStride );
        lapack::Copy
        ( 'F', height, localWidth,
          &A[rowOffset*ALDim],       rowStrideUnion*ALDim,
          &BPortions[k*portionSize], height );
    }
}
Пример #13
0
void PartialColStridedUnpack
( Int height, Int width,
  Int colAlign, Int colStride,
  Int colStrideUnion, Int colStridePart, Int colRankPart,
  Int colShiftB,
  const T* APortions, Int portionSize,
        T* B,         Int BLDim )
{
    for( Int k=0; k<colStrideUnion; ++k )
    {
        const Int colShift =
            Shift_( colRankPart+k*colStridePart, colAlign, colStride );
        const Int colOffset = (colShift-colShiftB) / colStridePart;
        const Int localHeight = Length_( height, colShift, colStride );
        InterleaveMatrix
        ( localHeight, width,
          &APortions[k*portionSize], 1,              localHeight,
          &B[colOffset],             colStrideUnion, BLDim );
    }
}
Пример #14
0
void PartialRowStridedUnpack
( Int height, Int width,
  Int rowAlign, Int rowStride,
  Int rowStrideUnion, Int rowStridePart, Int rowRankPart,
  Int rowShiftB,
  const T* APortions, Int portionSize,
        T* B,         Int BLDim )
{
    for( Int k=0; k<rowStrideUnion; ++k )
    {
        const Int rowShift =
            Shift_( rowRankPart+k*rowStridePart, rowAlign, rowStride );
        const Int rowOffset = (rowShift-rowShiftB) / rowStridePart;
        const Int localWidth = Length_( width, rowShift, rowStride );
        lapack::Copy
        ( 'F', height, localWidth,
          &APortions[k*portionSize], height,
          &B[rowOffset*BLDim],       rowStrideUnion*BLDim );
    }
}
Пример #15
0
void BlockedColStridedUnpack
( Int height, Int width,
  Int colAlign, Int colStride,
  Int blockHeight, Int colCut,
  const T* APortions, Int portionSize,
        T* B,         Int BLDim )
{
    const Int firstBlockHeight = blockHeight - colCut;
    for( Int portion=0; portion<colStride; ++portion )
    {
        const T* APortion = &APortions[portion*portionSize];
        const Int colShift = Shift_( portion, colAlign, colStride );
        const Int localHeight =
          BlockedLength_( height, colShift, blockHeight, colCut, colStride );

        // Loop over the block rows from this portion
        Int blockRow = colShift;
        Int rowIndex =
          ( colShift==0 ? 0 : firstBlockHeight + (colShift-1)*blockHeight );
        Int packedRowIndex = 0; 
        while( rowIndex < height )
        {
            const Int thisBlockHeight =
              ( blockRow == 0 ?
                firstBlockHeight :
                Min(blockHeight,height-rowIndex) );

            lapack::Copy
            ( 'F', thisBlockHeight, width,
              &APortion[packedRowIndex], localHeight,
              &B[rowIndex],              BLDim );

            blockRow += colStride;
            rowIndex += thisBlockHeight + (colStride-1)*blockHeight;
            packedRowIndex += thisBlockHeight;
        }
    }
}
Пример #16
0
void TransposeDist( const DistMatrix<T,U,V>& A, DistMatrix<T,V,U>& B ) 
{
    DEBUG_ONLY(CSE cse("copy::TransposeDist"))
    AssertSameGrids( A, B );

    const Grid& g = B.Grid();
    B.Resize( A.Height(), A.Width() );
    if( !B.Participating() )
        return;

    const Int colStrideA = A.ColStride();
    const Int rowStrideA = A.RowStride();
    const Int distSize = A.DistSize();

    if( A.DistSize() == 1 && B.DistSize() == 1 ) 
    {
        Copy( A.LockedMatrix(), B.Matrix() );
    }
    else if( A.Width() == 1 )
    {
        const Int height = A.Height();
        const Int maxLocalHeight = MaxLength(height,distSize);
        const Int portionSize = mpi::Pad( maxLocalHeight );

        const Int colDiff = Shift(A.DistRank(),A.ColAlign(),distSize) - 
                            Shift(B.DistRank(),B.ColAlign(),distSize);
        const Int sendRankB = Mod( B.DistRank()+colDiff, distSize );
        const Int recvRankA = Mod( A.DistRank()-colDiff, distSize );
        const Int recvRankB = 
            (recvRankA/colStrideA)+rowStrideA*(recvRankA%colStrideA);

        vector<T> buffer;
        FastResize( buffer, (colStrideA+rowStrideA)*portionSize );
        T* sendBuf = &buffer[0];
        T* recvBuf = &buffer[colStrideA*portionSize];

        if( A.RowRank() == A.RowAlign() )
        {
            // Pack
            // TODO: Use kernel from copy::util
            const Int AColShift = A.ColShift();
            const T* ABuf = A.LockedBuffer();
            EL_PARALLEL_FOR
            for( Int k=0; k<rowStrideA; ++k )
            {
                T* data = &recvBuf[k*portionSize];

                const Int shift = 
                  Shift_(A.ColRank()+colStrideA*k,A.ColAlign(),distSize);
                const Int offset = (shift-AColShift) / colStrideA;
                const Int thisLocalHeight = Length_(height,shift,distSize);

                for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc )
                    data[iLoc] = ABuf[offset+iLoc*rowStrideA];
            }
        }

        // (e.g., A[VC,STAR] <- A[MC,MR])
        mpi::Scatter
        ( recvBuf, portionSize,
          sendBuf, portionSize, A.RowAlign(), A.RowComm() );

        // (e.g., A[VR,STAR] <- A[VC,STAR])
        mpi::SendRecv
        ( sendBuf, portionSize, sendRankB,
          recvBuf, portionSize, recvRankB, B.DistComm() );

        // (e.g., A[MR,MC] <- A[VR,STAR])
        mpi::Gather
        ( recvBuf, portionSize,
          sendBuf, portionSize, B.RowAlign(), B.RowComm() );

        if( B.RowRank() == B.RowAlign() )
        {
            // Unpack
            // TODO: Use kernel from copy::util
            T* bufB = B.Buffer();
            EL_PARALLEL_FOR
            for( Int k=0; k<colStrideA; ++k )
            {
                const T* data = &sendBuf[k*portionSize];

                const Int shift = 
                  Shift_(B.ColRank()+rowStrideA*k,B.ColAlign(),distSize);
                const Int offset = (shift-B.ColShift()) / rowStrideA;
                const Int thisLocalHeight = Length_(height,shift,distSize);

                for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc )
                    bufB[offset+iLoc*colStrideA] = data[iLoc];
            }
        }
    }
Пример #17
0
const DistMatrix<T,STAR,STAR>&
DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,STAR,MD>& A )
{ 
#ifndef RELEASE
    CallStackEntry entry("[* ,* ] = [* ,MD]");
    this->AssertNotLocked();
    this->AssertSameGrid( A.Grid() );
#endif
    const elem::Grid& g = this->Grid();
    this->ResizeTo( A.Height(), A.Width() );
    if( !this->Participating() )
        return *this;

    const Int p = g.Size();
    const Int lcm = g.LCM();
    const Int ownerPath = A.diagPath_;
    const Int ownerPathRank = A.rowAlignment_;

    const Int height = this->Height();
    const Int width = this->Width();
    const Int localWidth = A.LocalWidth();
    const Int maxLocalWidth = MaxLength( width, lcm );
    const Int portionSize = mpi::Pad( height*maxLocalWidth );

    // Since a MD communicator has not been implemented, we will take
    // the suboptimal route of 'rounding up' everyone's contribution over 
    // the VC communicator.
    T* buffer = this->auxMemory_.Require( (p+1)*portionSize );
    T* sendBuf = &buffer[0];
    T* recvBuf = &buffer[portionSize];

    // Pack
    if( A.Participating() )
    {
        const Int ALDim = A.LDim();
        const T* ABuf = A.LockedBuffer();
        PARALLEL_FOR
        for( Int jLoc=0; jLoc<localWidth; ++jLoc )
            MemCopy( &sendBuf[jLoc*height], &ABuf[jLoc*ALDim], height );
    }

    // Communicate
    mpi::AllGather
    ( sendBuf, portionSize,
      recvBuf, portionSize, g.VCComm() );

    // Unpack
    T* thisBuf = this->Buffer();
    const Int thisLDim = this->LDim();
    OUTER_PARALLEL_FOR
    for( Int k=0; k<p; ++k )
    {
        if( g.DiagPath( k ) == ownerPath )
        {
            const T* data = &recvBuf[k*portionSize];
            const Int thisPathRank = g.DiagPathRank( k );
            const Int thisRowShift = Shift_( thisPathRank, ownerPathRank, lcm );
            const Int thisLocalWidth = Length_( width, thisRowShift, lcm );
            INNER_PARALLEL_FOR
            for( Int jLoc=0; jLoc<thisLocalWidth; ++jLoc )
                MemCopy
                ( &thisBuf[(thisRowShift+jLoc*lcm)*thisLDim], 
                  &data[jLoc*height], height );
        }
    }
    this->auxMemory_.Release();
    return *this;
}