예제 #1
0
파일: Ger.hpp 프로젝트: SamKChang/madness
inline void LocalGer
( T alpha, const DistMatrix<T,xColDist,xRowDist>& x,
           const DistMatrix<T,yColDist,yRowDist>& y,
                 DistMatrix<T,AColDist,ARowDist>& A )
{
    DEBUG_ONLY(CallStackEntry cse("LocalGer"))
    // TODO: Add error checking here
    Ger( alpha, x.LockedMatrix(), y.LockedMatrix(), A.Matrix() );
}
예제 #2
0
inline void LocalGer
( T alpha, const DistMatrix<T,xColDist,xRowDist>& x,
  const DistMatrix<T,yColDist,yRowDist>& y,
  DistMatrix<T,AColDist,ARowDist>& A )
{
#ifndef RELEASE
    CallStackEntry entry("LocalGer");
    // TODO: Add error checking here
#endif
    Ger( alpha, x.LockedMatrix(), y.LockedMatrix(), A.Matrix() );
}
예제 #3
0
void StackedGeometricColumnScaling
( const DistMatrix<Field,      U,V   >& A,
  const DistMatrix<Field,      U,V   >& B,
        DistMatrix<Base<Field>,V,STAR>& geomScaling )
{
    EL_DEBUG_CSE
    // NOTE: Assuming A.ColComm() == B.ColComm() and that the row alignments
    //       are equal
    typedef Base<Field> Real;

    DistMatrix<Real,V,STAR> maxScalingA(A.Grid()),
                            maxScalingB(A.Grid());
    ColumnMaxNorms( A, maxScalingA );
    ColumnMaxNorms( B, maxScalingB );

    const Int mLocalA = A.LocalHeight();
    const Int mLocalB = B.LocalHeight();
    const Int nLocal = A.LocalWidth();
    geomScaling.AlignWith( maxScalingA );
    geomScaling.Resize( A.Width(), 1 );
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.LockedMatrix();
    auto& geomScalingLoc = geomScaling.Matrix();
    auto& maxScalingALoc = maxScalingA.Matrix();
    auto& maxScalingBLoc = maxScalingB.Matrix();
    for( Int jLoc=0; jLoc<nLocal; ++jLoc )
    {
        Real minAbs = Max(maxScalingALoc(jLoc),maxScalingBLoc(jLoc));
        for( Int iLoc=0; iLoc<mLocalA; ++iLoc )
        {
            const Real absVal = Abs(ALoc(iLoc,jLoc));
            if( absVal > 0 && absVal < minAbs )
                minAbs = Min(minAbs,absVal);
        }
        for( Int iLoc=0; iLoc<mLocalB; ++iLoc )
        {
            const Real absVal = Abs(BLoc(iLoc,jLoc));
            if( absVal > 0 && absVal < minAbs )
                minAbs = Min(minAbs,absVal);
        }
        geomScalingLoc(jLoc) = minAbs;
    }
    mpi::AllReduce( geomScaling.Buffer(), nLocal, mpi::MIN, A.ColComm() );

    for( Int jLoc=0; jLoc<nLocal; ++jLoc )
    {
        const Real maxAbsA = maxScalingALoc(jLoc);
        const Real maxAbsB = maxScalingBLoc(jLoc);
        const Real maxAbs = Max(maxAbsA,maxAbsB);
        const Real minAbs = geomScalingLoc(jLoc);
        geomScalingLoc(jLoc) = Sqrt(minAbs*maxAbs);
    }
}
예제 #4
0
const DistMatrix<T,MD,STAR>&
DistMatrix<T,MD,STAR>::operator=( const DistMatrix<T,MD,STAR>& A )
{
#ifndef RELEASE
    CallStackEntry entry("[MD,* ] = [MD,* ]");
    this->AssertNotLocked();
    this->AssertSameGrid( A.Grid() );
#endif
    if( !this->Viewing() && !this->ConstrainedColAlignment() )
    {
        this->diagPath_ = A.diagPath_;
        this->colAlignment_ = A.colAlignment_;
        if( this->Participating() )
            this->colShift_ = A.ColShift();
    }
    this->ResizeTo( A.Height(), A.Width() );

    if( this->diagPath_ == A.diagPath_ && 
        this->colAlignment_ == A.colAlignment_ )
    {
        this->matrix_ = A.LockedMatrix();
    }
    else
    {
#ifdef UNALIGNED_WARNINGS
        if( this->Grid().Rank() == 0 )
            std::cerr << "Unaligned [MD,* ] <- [MD,* ]." << std::endl;
#endif
        LogicError("Unaligned [MD,* ] = [MD,* ] not yet implemented");
    }
    return *this;
}
예제 #5
0
void RowMaxNorms
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms )
{
    DEBUG_CSE
    norms.AlignWith( A );
    norms.Resize( A.Height(), 1 );
    RowMaxNorms( A.LockedMatrix(), norms.Matrix() );
    AllReduce( norms, A.RowComm(), mpi::MAX );
}
예제 #6
0
void ColumnMinAbs
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,V,STAR>& mins )
{
    EL_DEBUG_CSE
    const Int n = A.Width();
    mins.AlignWith( A );
    mins.Resize( n, 1 );
    ColumnMinAbs( A.LockedMatrix(), mins.Matrix() );
    AllReduce( mins.Matrix(), A.ColComm(), mpi::MIN );
}
예제 #7
0
inline void
DiagonalScale
( LeftOrRight side, Orientation orientation,
  const DistMatrix<typename Base<T>::type,U,V>& d, DistMatrix<T,W,Z>& X )
{
#ifndef RELEASE
    PushCallStack("DiagonalScale");
#endif
    typedef typename Base<T>::type R;

    if( side == LEFT )
    {
        if( U == W && V == STAR && d.ColAlignment() == X.ColAlignment() )
        {
            DiagonalScale( LEFT, orientation, d.LockedMatrix(), X.Matrix() );
        }
        else
        {
            DistMatrix<R,W,STAR> d_W_STAR( X.Grid() );
            d_W_STAR = d;
            DiagonalScale
            ( LEFT, orientation, d_W_STAR.LockedMatrix(), X.Matrix() );
        }
    }
    else
    {
        if( U == Z && V == STAR && d.ColAlignment() == X.RowAlignment() )
        {
            DiagonalScale( RIGHT, orientation, d.LockedMatrix(), X.Matrix() );
        }
        else
        {
            DistMatrix<R,Z,STAR> d_Z_STAR( X.Grid() );
            d_Z_STAR = d;
            DiagonalScale
            ( RIGHT, orientation, d_Z_STAR.LockedMatrix(), X.Matrix() );
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
예제 #8
0
inline void
Print
( const DistMatrix<T,CIRC,CIRC>& A, std::string title="",
  std::ostream& os=std::cout )
{
#ifndef RELEASE
    CallStackEntry entry("Print");
#endif
    if( A.Grid().VCRank() == A.Root() )
        Print( A.LockedMatrix(), title, os );
}
예제 #9
0
void AllGather
( const DistMatrix<T,        U,           V   >& A,
        DistMatrix<T,Collect<U>(),Collect<V>()>& B )
{
    EL_DEBUG_CSE
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    if( A.Participating() )
    {
        if( A.DistSize() == 1 )
        {
            Copy( A.LockedMatrix(), B.Matrix() );
        }
        else
        {
            const Int colStride = A.ColStride();
            const Int rowStride = A.RowStride();
            const Int distStride = colStride*rowStride;
            const Int maxLocalHeight = MaxLength(height,colStride);
            const Int maxLocalWidth = MaxLength(width,rowStride);
            const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
            vector<T> buf;
            FastResize( buf, (distStride+1)*portionSize );
            T* sendBuf = &buf[0];
            T* recvBuf = &buf[portionSize];

            // Pack
            util::InterleaveMatrix
            ( A.LocalHeight(), A.LocalWidth(),
              A.LockedBuffer(), 1, A.LDim(),
              sendBuf,          1, A.LocalHeight() );

            // Communicate
            mpi::AllGather
            ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() );

            // Unpack
            util::StridedUnpack
            ( height, width,
              A.ColAlign(), colStride,
              A.RowAlign(), rowStride,
              recvBuf, portionSize,
              B.Buffer(), B.LDim() );
        }
    }
    if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF )
        El::Broadcast( B, A.CrossComm(), A.Root() );
}
예제 #10
0
void RowTwoNorms
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms )
{
    DEBUG_CSE
    norms.AlignWith( A );
    norms.Resize( A.Height(), 1 );
    if( A.Width() == 0 )
    {
        Zero( norms );
        return;
    }
    RowTwoNormsHelper( A.LockedMatrix(), norms.Matrix(), A.RowComm() );
}
예제 #11
0
inline void
LocalTrmm
( LeftOrRight side, UpperOrLower uplo,
  Orientation orientation, UnitOrNonUnit diag,
  T alpha, const DistMatrix<T,STAR,STAR>& A,
                 DistMatrix<T,BColDist,BRowDist>& B )
{
#ifndef RELEASE
    CallStackEntry entry("LocalTrmm");
    if( (side == LEFT && BColDist != STAR) ||
        (side == RIGHT && BRowDist != STAR) )
        LogicError
        ("Distribution of RHS must conform with that of triangle");
#endif
    Trmm
    ( side, uplo, orientation, diag, alpha, A.LockedMatrix(), B.Matrix() );
}
예제 #12
0
void GatherSubdiagonal
( const DistMatrix<F,MC,MR,BLOCK>& H,
  const IR& winInd,
        DistMatrix<Base<F>,STAR,STAR>& hSubWin )
{
    DEBUG_CSE
    const Int winSize = winInd.end - winInd.beg;
    const Int blockSize = H.BlockHeight();
    const Grid& grid = H.Grid();
    const auto& HLoc = H.LockedMatrix();
    DEBUG_ONLY(
      if( H.BlockHeight() != H.BlockWidth() )
          LogicError("Assumed square distribution blocks");
      if( H.ColCut() != H.RowCut() )
          LogicError("Assumed symmetric cuts");
      if( blockSize < 2 )
          LogicError("Assumed blocks of size at least two");
    )
예제 #13
0
void IndexDependentMap
( const DistMatrix<S,U,V,wrap>& A,
        DistMatrix<T,U,V,wrap>& B,
  function<T(Int,Int,const S&)> func )
{
    EL_DEBUG_CSE
    const Int mLoc = A.LocalHeight();
    const Int nLoc = A.LocalWidth();
    B.AlignWith( A.DistData() );
    B.Resize( A.Height(), A.Width() );
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.Matrix();
    for( Int jLoc=0; jLoc<nLoc; ++jLoc )
    {
        const Int j = A.GlobalCol(jLoc);
        for( Int iLoc=0; iLoc<mLoc; ++iLoc )
        {
            const Int i = A.GlobalRow(iLoc);
            BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc));
        }
    }
}
예제 #14
0
void TransformRows
( const Matrix<F>& Z,
        DistMatrix<F,MC,MR,BLOCK>& H )
{
    DEBUG_CSE
    const Int height = H.Height();
    const Grid& grid = H.Grid();

    const Int blockHeight = H.BlockHeight();
    const Int firstBlockHeight = blockHeight - H.ColCut();
    if( height <= firstBlockHeight || grid.Height() == 1 )
    {
        if( grid.Row() == H.RowOwner(0) )
        {
            // This process row can locally update its portion of H
            Matrix<F> HLocCopy( H.Matrix() );
            Gemm( ADJOINT, NORMAL, F(1), Z, HLocCopy, H.Matrix() );
        }
    }
    else if( height <= firstBlockHeight + blockHeight )
    {
        const bool firstRow = H.RowOwner( 0 );
        const bool secondRow = H.RowOwner( firstBlockHeight );
        if( grid.Row() == firstRow )
        {
            // 
            // Replace H with 
            //
            //   | ZLeft, ZRight |' | HTop    |,
            //                      | HBottom |
            //
            // where HTop is owned by this process row and HBottom by the next.
            //
            auto ZLeft = Z( ALL, IR(0,firstBlockHeight) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( height, H.LocalWidth() );
            auto HTop = HCombine( IR(0,firstBlockHeight), ALL );
            auto HBottom = HCombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            HTop = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HTop, HBottom, H.ColComm(), secondRow, secondRow );
            
            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), ZLeft, HCombine, H.Matrix() );
        }
        else if( grid.Row() == secondRow )
        {
            // 
            // Replace H with 
            //
            //   | ZLeft, ZRight |' | HTop    |,
            //                      | HBottom |
            //
            // where HTop is owned by the previous process row and HBottom by
            // this one.
            //
            auto ZRight = Z( ALL, IR(firstBlockHeight,END) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( height, H.LocalWidth() );
            auto HTop = HCombine( IR(0,firstBlockHeight), ALL );
            auto HBottom = HCombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            HBottom = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HBottom, HTop, H.ColComm(), firstRow, firstRow );
            
            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), ZRight, HCombine, H.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,STAR,MR,BLOCK> H_STAR_MR( H );
        Matrix<F> HLocCopy( H_STAR_MR.Matrix() );
        Gemm( ADJOINT, NORMAL, F(1), Z, HLocCopy, H_STAR_MR.Matrix() );
        H = H_STAR_MR;
    }
}
예제 #15
0
void TransformColumns
( const Matrix<F>& Z,
        DistMatrix<F,MC,MR,BLOCK>& H )
{
    DEBUG_CSE
    const Int width = H.Width();
    const Grid& grid = H.Grid();

    const Int blockWidth = H.BlockWidth();
    const Int firstBlockWidth = blockWidth - H.RowCut();
    if( width <= firstBlockWidth || grid.Width() == 1 )
    {
        if( grid.Col() == H.ColOwner(0) )
        {
            // This process row can locally update its portion of H
            Matrix<F> HLocCopy( H.Matrix() );
            Gemm( NORMAL, NORMAL, F(1), HLocCopy, Z, H.Matrix() );
        }
    }
    else if( width <= firstBlockWidth + blockWidth )
    {
        const bool firstCol = H.ColOwner( 0 );
        const bool secondCol = H.ColOwner( firstBlockWidth );
        if( grid.Col() == firstCol )
        {
            // 
            // Replace H with 
            //
            //   | HLeft, HRight | | ZLeft, ZRight |,
            //
            // where HLeft is owned by this process column and HRight by the
            // next.
            //
            auto ZLeft = Z( ALL, IR(0,firstBlockWidth) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( H.LocalHeight(), width );
            auto HLeft = HCombine( ALL, IR(0,firstBlockWidth) );
            auto HRight = HCombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            HLeft = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HLeft, HRight, H.RowComm(), secondCol, secondCol );
            
            // Form our portion of the result
            Gemm( NORMAL, NORMAL, F(1), HCombine, ZLeft, H.Matrix() );
        }
        else if( grid.Col() == secondCol )
        {
            // 
            // Replace H with 
            //
            //   | HLeft, HRight | | ZLeft, ZRight |,
            //
            // where HLeft is owned by the previous process column and HRight
            // by this one.
            //
            auto ZRight = Z( ALL, IR(firstBlockWidth,END) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( H.LocalHeight(), width );
            auto HLeft = HCombine( ALL, IR(0,firstBlockWidth) );
            auto HRight = HCombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            HRight = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HRight, HLeft, H.RowComm(), firstCol, firstCol );
            
            // Form our portion of the result
            Gemm( NORMAL, NORMAL, F(1), HCombine, ZRight, H.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,MC,STAR,BLOCK> H_MC_STAR( H );
        Matrix<F> HLocCopy( H_MC_STAR.Matrix() );
        Gemm( NORMAL, NORMAL, F(1), HLocCopy, Z, H_MC_STAR.Matrix() );
        H = H_MC_STAR;
    }
}
예제 #16
0
const DistMatrix<T,STAR,STAR>&
DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,STAR,STAR>& A )
{
#ifndef RELEASE
    CallStackEntry entry("[* ,* ] = [* ,* ]");
    this->AssertNotLocked();
#endif
    this->ResizeTo( A.Height(), A.Width() );

    if( this->Grid() == A.Grid() )
    {
        this->matrix_ = A.LockedMatrix();
    }
    else
    {
        // TODO: Remember why I wrote this...
        if( !mpi::CongruentComms( A.Grid().ViewingComm(),
                                  this->Grid().ViewingComm() ) )
            LogicError
            ("Redistributing between nonmatching grids currently requires"
             " the viewing communicators to match.");

        // Compute and allocate the amount of required memory
        Int requiredMemory = 0;
        if( A.Grid().VCRank() == 0 )
            requiredMemory += A.Height()*A.Width();
        if( this->Participating() )
            requiredMemory += A.Height()*A.Width();
        T* buffer = this->auxMemory_.Require( requiredMemory );
        Int offset = 0;
        T* sendBuf = &buffer[offset];
        if( A.Grid().VCRank() == 0 )
            offset += A.Height()*A.Width();
        T* bcastBuffer = &buffer[offset];

        // Send from the root of A to the root of this matrix's grid
        mpi::Request sendRequest;
        if( A.Grid().VCRank() == 0 )
        {
            for( Int j=0; j<A.Width(); ++j ) 
                for( Int i=0; i<A.Height(); ++i )
                    sendBuf[i+j*A.Height()] = A.GetLocal(i,j);
            const Int recvViewingRank = this->Grid().VCToViewingMap(0);
            mpi::ISend
            ( sendBuf, A.Height()*A.Width(), recvViewingRank,
              this->Grid().ViewingComm(), sendRequest );
        }

        // Receive on the root of this matrix's grid and then broadcast
        // over this matrix's owning communicator
        if( this->Participating() )
        {
            if( this->Grid().VCRank() == 0 )
            {
                const Int sendViewingRank = A.Grid().VCToViewingMap(0);
                mpi::Recv
                ( bcastBuffer, A.Height()*A.Width(), sendViewingRank,
                  this->Grid().ViewingComm() );
            }

            mpi::Broadcast
            ( bcastBuffer, A.Height()*A.Width(), 0, this->Grid().VCComm() );

            for( Int j=0; j<A.Width(); ++j )
                for( Int i=0; i<A.Height(); ++i )
                    this->SetLocal(i,j,bcastBuffer[i+j*A.Height()]);
        }

        if( A.Grid().VCRank() == 0 )
            mpi::Wait( sendRequest );
        this->auxMemory_.Release();
    }
    return *this;
}
예제 #17
0
inline void
ComposePivots
( const DistMatrix<Int,STAR,STAR>& p, 
  std::vector<Int>& image, std::vector<Int>& preimage )
{ ComposePivots( p.LockedMatrix(), image, preimage ); }
예제 #18
0
void TransformRows
( const Matrix<F>& V,
        DistMatrix<F,MC,MR,BLOCK>& A )
{
    DEBUG_CSE
    const Int height = A.Height();
    const Grid& grid = A.Grid();

    const Int blockHeight = A.BlockHeight();
    const Int firstBlockHeight = blockHeight - A.ColCut();
    if( height <= firstBlockHeight || grid.Height() == 1 )
    {
        if( grid.Row() == A.RowOwner(0) )
        {
            // This process row can locally update its portion of A
            TransformRows( V, A.Matrix() );
        }
    }
    else if( height <= firstBlockHeight + blockHeight )
    {
        const int firstRow = A.RowOwner( 0 );
        const int secondRow = A.RowOwner( firstBlockHeight );
        if( grid.Row() == firstRow )
        {
            //
            // Replace A with
            //
            //   | VLeft, VRight |' | ATop    |,
            //                      | ABottom |
            //
            // where ATop is owned by this process row and ABottom by the next.
            //
            auto VLeft = V( ALL, IR(0,firstBlockHeight) );

            // Partition space for the combined matrix
            Matrix<F> ACombine( height, A.LocalWidth() );
            auto ATop = ACombine( IR(0,firstBlockHeight), ALL );
            auto ABottom = ACombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            ATop = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ATop, ABottom, A.ColComm(), secondRow, secondRow );

            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), VLeft, ACombine, A.Matrix() );
        }
        else if( grid.Row() == secondRow )
        {
            //
            // Replace A with
            //
            //   | VLeft, VRight |' | ATop    |,
            //                      | ABottom |
            //
            // where ATop is owned by the previous process row and ABottom by
            // this one.
            //
            auto VRight = V( ALL, IR(firstBlockHeight,END) );

            // Partition space for the combined matrix
            Matrix<F> ACombine( height, A.LocalWidth() );
            auto ATop = ACombine( IR(0,firstBlockHeight), ALL );
            auto ABottom = ACombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            ABottom = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ABottom, ATop, A.ColComm(), firstRow, firstRow );

            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), VRight, ACombine, A.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,STAR,MR,BLOCK> A_STAR_MR( A );
        Matrix<F> ALocCopy( A_STAR_MR.Matrix() );
        Gemm( ADJOINT, NORMAL, F(1), V, ALocCopy, A_STAR_MR.Matrix() );
        A = A_STAR_MR;
    }
}
예제 #19
0
void TransformColumns
( const Matrix<F>& V,
        DistMatrix<F,MC,MR,BLOCK>& A )
{
    DEBUG_CSE
    const Int width = A.Width();
    const Grid& grid = A.Grid();

    const Int blockWidth = A.BlockWidth();
    const Int firstBlockWidth = blockWidth - A.RowCut();
    if( width <= firstBlockWidth || grid.Width() == 1 )
    {
        if( grid.Col() == A.ColOwner(0) )
        {
            // This process row can locally update its portion of A
            TransformColumns( V, A.Matrix() );
        }
    }
    else if( width <= firstBlockWidth + blockWidth )
    {
        const int firstCol = A.ColOwner( 0 );
        const int secondCol = A.ColOwner( firstBlockWidth );
        if( grid.Col() == firstCol )
        {
            //
            // Replace A with
            //
            //   | ALeft, ARight | | VLeft, VRight |,
            //
            // where ALeft is owned by this process column and ARight by the
            // next.
            //

            // Partition space for the combined matrix
            Matrix<F> ACombine( A.LocalHeight(), width );
            auto ALeft = ACombine( ALL, IR(0,firstBlockWidth) );
            auto ARight = ACombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            ALeft = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ALeft, ARight, A.RowComm(), secondCol, secondCol );

            // Form our portion of the result
            auto VLeft = V( ALL, IR(0,firstBlockWidth) );
            Gemm( NORMAL, NORMAL, F(1), ACombine, VLeft, A.Matrix() );
        }
        else if( grid.Col() == secondCol )
        {
            //
            // Replace A with
            //
            //   | ALeft, ARight | | VLeft, VRight |,
            //
            // where ALeft is owned by the previous process column and ARight
            // by this one.
            //

            // Partition space for the combined matrix
            Matrix<F> ACombine( A.LocalHeight(), width );
            auto ALeft = ACombine( ALL, IR(0,firstBlockWidth) );
            auto ARight = ACombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            ARight = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ARight, ALeft, A.RowComm(), firstCol, firstCol );

            // Form our portion of the result
            auto VRight = V( ALL, IR(firstBlockWidth,END) );
            Gemm( NORMAL, NORMAL, F(1), ACombine, VRight, A.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,MC,STAR,BLOCK> A_MC_STAR( A );
        Matrix<F> ALocCopy( A_MC_STAR.Matrix() );
        Gemm( NORMAL, NORMAL, F(1), ALocCopy, V, A_MC_STAR.Matrix() );
        A = A_MC_STAR;
    }
}
예제 #20
0
void Scatter
( const DistMatrix<T,CIRC,CIRC>& A,
        ElementalMatrix<T>& B )
{
    DEBUG_CSE
    AssertSameGrids( A, B );

    const Int m = A.Height();
    const Int n = A.Width();
    const Int colStride = B.ColStride();
    const Int rowStride = B.RowStride();
    B.Resize( m, n );
    if( B.CrossSize() != 1 || B.RedundantSize() != 1 )
    {
        // TODO:
        // Broadcast over the redundant communicator and use mpi::Translate
        // rank to determine whether a process is the root of the broadcast.
        GeneralPurpose( A, B ); 
        return;
    }

    const Int pkgSize = mpi::Pad(MaxLength(m,colStride)*MaxLength(n,rowStride));
    const Int recvSize = pkgSize;
    const Int sendSize = B.DistSize()*pkgSize;

    // Translate the root of A into the DistComm of B (if possible)
    const Int root = A.Root();
    const Int target = mpi::Translate( A.CrossComm(), root, B.DistComm() ); 
    if( target == mpi::UNDEFINED )
        return;

    if( B.DistSize() == 1 )
    {
        Copy( A.LockedMatrix(), B.Matrix() );
        return;
    }

    vector<T> buffer;
    T* recvBuf=0; // some compilers (falsely) warn otherwise
    if( A.CrossRank() == root )
    {
        FastResize( buffer, sendSize+recvSize );
        T* sendBuf = &buffer[0];
        recvBuf    = &buffer[sendSize];

        // Pack the send buffer
        copy::util::StridedPack
        ( m, n,
          B.ColAlign(), colStride,
          B.RowAlign(), rowStride,
          A.LockedBuffer(), A.LDim(),
          sendBuf,          pkgSize );

        // Scatter from the root
        mpi::Scatter
        ( sendBuf, pkgSize, recvBuf, pkgSize, target, B.DistComm() );
    }
    else
    {
        FastResize( buffer, recvSize );
        recvBuf = &buffer[0];

        // Perform the receiving portion of the scatter from the non-root
        mpi::Scatter
        ( static_cast<T*>(0), pkgSize,
          recvBuf,            pkgSize, target, B.DistComm() );
    }

    // Unpack
    copy::util::InterleaveMatrix
    ( B.LocalHeight(), B.LocalWidth(),
      recvBuf,    1, B.LocalHeight(),
      B.Buffer(), 1, B.LDim() );
}
예제 #21
0
void ColAllToAllPromote
( const DistMatrix<T,        U,                     V   >& A,
        DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& B )
{
    DEBUG_CSE
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.AlignColsAndResize
    ( Mod(A.ColAlign(),B.ColStride()), height, width, false, false );
    if( !B.Participating() )
        return;

    const Int colStride = A.ColStride();
    const Int colStridePart = A.PartialColStride();
    const Int colStrideUnion = A.PartialUnionColStride();
    const Int colRankPart = A.PartialColRank();
    const Int colDiff = B.ColAlign() - Mod(A.ColAlign(),colStridePart);

    const Int maxLocalHeight = MaxLength(height,colStride);
    const Int maxLocalWidth = MaxLength(width,colStrideUnion);
    const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );

    if( colDiff == 0 )
    {
        if( A.PartialUnionColStride() == 1 )
        {
            Copy( A.LockedMatrix(), B.Matrix() );
        }
        else
        {
            vector<T> buffer;
            FastResize( buffer, 2*colStrideUnion*portionSize );
            T* firstBuf  = &buffer[0];
            T* secondBuf = &buffer[colStrideUnion*portionSize];

            // Pack            
            util::RowStridedPack
            ( A.LocalHeight(), width,
              B.RowAlign(), colStrideUnion,
              A.LockedBuffer(), A.LDim(),
              firstBuf,         portionSize );

            // Simultaneously Gather in columns and Scatter in rows
            mpi::AllToAll
            ( firstBuf,  portionSize,
              secondBuf, portionSize, A.PartialUnionColComm() );

            // Unpack
            util::PartialColStridedUnpack 
            ( height, B.LocalWidth(),
              A.ColAlign(), colStride,
              colStrideUnion, colStridePart, colRankPart,
              B.ColShift(),
              secondBuf,  portionSize,
              B.Buffer(), B.LDim() );
        }
    }
    else
    {
#ifdef EL_UNALIGNED_WARNINGS
        if( A.Grid().Rank() == 0 )
            cerr << "Unaligned PartialColAllToAllPromote" << endl;
#endif
        const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart );
        const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart );

        vector<T> buffer;
        FastResize( buffer, 2*colStrideUnion*portionSize );
        T* firstBuf  = &buffer[0];
        T* secondBuf = &buffer[colStrideUnion*portionSize];

        // Pack
        util::RowStridedPack
        ( A.LocalHeight(), width,
          B.RowAlign(), colStrideUnion,
          A.LockedBuffer(), A.LDim(),
          secondBuf,        portionSize );

        // Realign the input
        mpi::SendRecv
        ( secondBuf, colStrideUnion*portionSize, sendColRankPart,
          firstBuf,  colStrideUnion*portionSize, recvColRankPart,
          A.PartialColComm() );

        // Simultaneously Scatter in columns and Gather in rows
        mpi::AllToAll
        ( firstBuf,  portionSize,
          secondBuf, portionSize, A.PartialUnionColComm() );

        // Unpack
        util::PartialColStridedUnpack 
        ( height, B.LocalWidth(),
          A.ColAlign(), colStride,
          colStrideUnion, colStridePart, recvColRankPart,
          B.ColShift(),
          secondBuf,  portionSize,
          B.Buffer(), B.LDim() );
    }
}
예제 #22
0
void TransposeDist( const DistMatrix<T,U,V>& A, DistMatrix<T,V,U>& B ) 
{
    DEBUG_ONLY(CSE cse("copy::TransposeDist"))
    AssertSameGrids( A, B );

    const Grid& g = B.Grid();
    B.Resize( A.Height(), A.Width() );
    if( !B.Participating() )
        return;

    const Int colStrideA = A.ColStride();
    const Int rowStrideA = A.RowStride();
    const Int distSize = A.DistSize();

    if( A.DistSize() == 1 && B.DistSize() == 1 ) 
    {
        Copy( A.LockedMatrix(), B.Matrix() );
    }
    else if( A.Width() == 1 )
    {
        const Int height = A.Height();
        const Int maxLocalHeight = MaxLength(height,distSize);
        const Int portionSize = mpi::Pad( maxLocalHeight );

        const Int colDiff = Shift(A.DistRank(),A.ColAlign(),distSize) - 
                            Shift(B.DistRank(),B.ColAlign(),distSize);
        const Int sendRankB = Mod( B.DistRank()+colDiff, distSize );
        const Int recvRankA = Mod( A.DistRank()-colDiff, distSize );
        const Int recvRankB = 
            (recvRankA/colStrideA)+rowStrideA*(recvRankA%colStrideA);

        vector<T> buffer;
        FastResize( buffer, (colStrideA+rowStrideA)*portionSize );
        T* sendBuf = &buffer[0];
        T* recvBuf = &buffer[colStrideA*portionSize];

        if( A.RowRank() == A.RowAlign() )
        {
            // Pack
            // TODO: Use kernel from copy::util
            const Int AColShift = A.ColShift();
            const T* ABuf = A.LockedBuffer();
            EL_PARALLEL_FOR
            for( Int k=0; k<rowStrideA; ++k )
            {
                T* data = &recvBuf[k*portionSize];

                const Int shift = 
                  Shift_(A.ColRank()+colStrideA*k,A.ColAlign(),distSize);
                const Int offset = (shift-AColShift) / colStrideA;
                const Int thisLocalHeight = Length_(height,shift,distSize);

                for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc )
                    data[iLoc] = ABuf[offset+iLoc*rowStrideA];
            }
        }

        // (e.g., A[VC,STAR] <- A[MC,MR])
        mpi::Scatter
        ( recvBuf, portionSize,
          sendBuf, portionSize, A.RowAlign(), A.RowComm() );

        // (e.g., A[VR,STAR] <- A[VC,STAR])
        mpi::SendRecv
        ( sendBuf, portionSize, sendRankB,
          recvBuf, portionSize, recvRankB, B.DistComm() );

        // (e.g., A[MR,MC] <- A[VR,STAR])
        mpi::Gather
        ( recvBuf, portionSize,
          sendBuf, portionSize, B.RowAlign(), B.RowComm() );

        if( B.RowRank() == B.RowAlign() )
        {
            // Unpack
            // TODO: Use kernel from copy::util
            T* bufB = B.Buffer();
            EL_PARALLEL_FOR
            for( Int k=0; k<colStrideA; ++k )
            {
                const T* data = &sendBuf[k*portionSize];

                const Int shift = 
                  Shift_(B.ColRank()+rowStrideA*k,B.ColAlign(),distSize);
                const Int offset = (shift-B.ColShift()) / rowStrideA;
                const Int thisLocalHeight = Length_(height,shift,distSize);

                for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc )
                    bufB[offset+iLoc*colStrideA] = data[iLoc];
            }
        }
    }
예제 #23
0
inline void ForwardMany
( const DistMatrix<F,VC,STAR>& L, DistMatrix<F,VC,STAR>& X )
{
    const Grid& g = L.Grid();
    if( g.Size() == 1 )
    {
        FrontLowerForwardSolve( L.LockedMatrix(), X.Matrix() );
        return;
    }

    // Matrix views
    DistMatrix<F,VC,STAR>
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);

    DistMatrix<F,VC,STAR> XT(g),  X0(g),
                          XB(g),  X1(g),
                                  X2(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> X1_STAR_STAR(g);

    LockedPartitionDownDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionDown
    ( X, XT,
         XB, 0 );
    while( LTL.Width() < L.Width() )
    {
        LockedRepartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        RepartitionDown
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2, L11.Height() );

        //--------------------------------------------------------------------//
        L11_STAR_STAR = L11; // L11[* ,* ] <- L11[VC,* ]
        X1_STAR_STAR = X1;   // X1[* ,* ] <- X1[VC,* ]

        // X1[* ,* ] := (L11[* ,* ])^-1 X1[* ,* ]
        LocalTrsm
        ( LEFT, LOWER, NORMAL, NON_UNIT, 
          F(1), L11_STAR_STAR, X1_STAR_STAR, true );
        X1 = X1_STAR_STAR;

        // X2[VC,* ] -= L21[VC,* ] X1[* ,* ]
        LocalGemm( NORMAL, NORMAL, F(-1), L21, X1_STAR_STAR, F(1), X2 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        SlidePartitionDown
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );
    }
}
예제 #24
0
파일: Row.hpp 프로젝트: mcg1969/Elemental
inline R
Row( DistMatrix<R>& chi, DistMatrix<R>& x )
{
#ifndef RELEASE
    PushCallStack("reflector::Row");
    if( chi.Grid() != x.Grid() )
        throw std::logic_error
        ("chi and x must be distributed over the same grid");
    if( chi.Height() != 1 || chi.Width() != 1 )
        throw std::logic_error("chi must be a scalar");
    if( x.Height() != 1 )
        throw std::logic_error("x must be a row vector");
    if( chi.Grid().Row() != chi.ColAlignment() )
        throw std::logic_error("Reflecting with incorrect row of processes");
    if( x.Grid().Row() != x.ColAlignment() )
        throw std::logic_error("Reflecting with incorrect row of processes");
#endif
    const Grid& grid = x.Grid();
    mpi::Comm rowComm = grid.RowComm();
    const int gridCol = grid.Col();
    const int gridWidth = grid.Width();
    const int rowAlignment = chi.RowAlignment();

    std::vector<R> localNorms(gridWidth);
    R localNorm = Nrm2( x.LockedMatrix() ); 
    mpi::AllGather( &localNorm, 1, &localNorms[0], 1, rowComm );
    R norm = blas::Nrm2( gridWidth, &localNorms[0], 1 );

    if( norm == 0 )
    {
        if( gridCol == rowAlignment )
            chi.SetLocal(0,0,-chi.GetLocal(0,0));
#ifndef RELEASE
        PopCallStack();
#endif
        return R(2);
    }

    R alpha;
    if( gridCol == rowAlignment )
        alpha = chi.GetLocal(0,0);
    mpi::Broadcast( &alpha, 1, rowAlignment, rowComm );

    R beta;
    if( alpha <= 0 )
        beta = lapack::SafeNorm( alpha, norm );
    else
        beta = -lapack::SafeNorm( alpha, norm );

    const R one = 1;
    const R safeMin = lapack::MachineSafeMin<R>();
    const R epsilon = lapack::MachineEpsilon<R>();
    const R safeInv = safeMin/epsilon;
    int count = 0;
    if( Abs(beta) < safeInv )
    {
        R invOfSafeInv = one/safeInv;
        do
        {
            ++count;
            Scale( invOfSafeInv, x );
            alpha *= invOfSafeInv;
            beta *= invOfSafeInv;
        } while( Abs(beta) < safeInv );

        localNorm = Nrm2( x.LockedMatrix() );
        mpi::AllGather( &localNorm, 1, &localNorms[0], 1, rowComm );
        norm = blas::Nrm2( gridWidth, &localNorms[0], 1 );
        if( alpha <= 0 )
            beta = lapack::SafeNorm( alpha, norm );
        else
            beta = -lapack::SafeNorm( alpha, norm );
    }

    R tau = (beta-alpha)/beta;
    Scale( one/(alpha-beta), x );

    for( int j=0; j<count; ++j )
        beta *= safeInv;
    if( gridCol == rowAlignment )
        chi.SetLocal(0,0,beta);
        
#ifndef RELEASE
    PopCallStack();
#endif
    return tau;
}