コード例 #1
0
ファイル: G3DGemm.cpp プロジェクト: certik/Elemental
// Broadcast a matrix from the root grid to the others
void DepthBroadcast
( const mpi::Comm& depthComm,
  const DistMatrix<double,MC,MR>& A, 
        DistMatrix<double,MC,MR>& B )
{
    const int rank = mpi::CommRank(mpi::COMM_WORLD);
    const Grid& meshGrid = A.Grid();
    const int meshSize = meshGrid.Size();
    const int depthRank = rank / meshSize;

    const int localSize = A.LocalHeight()*A.LocalWidth();
    if( A.LocalHeight() != A.LocalLDim() )
        throw std::logic_error("Leading dimension did not match local height");

    B.Empty();
    B.AlignWith( A );
    B.ResizeTo( A.Height(), A.Width() );

    // Have the root pack the broadcast data
    if( depthRank == 0 )
        MemCopy( B.LocalBuffer(), A.LockedLocalBuffer(), localSize );

    // Broadcast from the root
    mpi::Broadcast( B.LocalBuffer(), localSize, 0, depthComm );
}
コード例 #2
0
ファイル: G3DGemm.cpp プロジェクト: certik/Elemental
/*
 * Distributes A in such a way that
 *   Layer 0 <- A(:, 0:(n/h - 1))
 *   Layer 1 <- A(:, (n/h):(2n/h - 1))
 *     .
 *     .
 *     .
 *   Layer h-1 <- A(:, ((h-1)n/h):n)
 */
void DistributeCols
( const mpi::Comm& depthComm,
  const DistMatrix<double,MC,MR>& A, 
        DistMatrix<double,MC,MR>& B )
{
    const Grid& meshGrid = A.Grid();
    const int meshSize = meshGrid.Size();
    const int depthSize = mpi::CommSize( depthComm );
    const int depthRank = mpi::CommRank( depthComm );

    const int sendCount = A.LocalHeight()*A.LocalWidth();
    const int recvCount = sendCount / depthSize;

    // For now, we will make B as large as A...
    // TODO: NOT DO THIS
    if( A.LocalHeight() != A.LocalLDim() )
        throw std::logic_error("Local height did not match local ldim");
    B.Empty();
    B.AlignWith( A );
    Zeros( A.Height(), A.Width(), B );

    // Scatter
    const int localColOffset = (A.LocalWidth()/depthSize)*depthRank;
    mpi::Scatter
    ( A.LockedLocalBuffer(), recvCount, 
      B.LocalBuffer(0,localColOffset), recvCount, 0, depthComm );
}
コード例 #3
0
ファイル: Uniform.hpp プロジェクト: jimgoo/Elemental
    static void Func
    ( DistMatrix<T,MC,STAR>& A, T center, typename Base<T>::type radius )
    {
        const Grid& grid = A.Grid();
        if( grid.InGrid() )
        {
            const int n = A.Width();
            const int localHeight = A.LocalHeight();
            const int bufSize = localHeight*n;
            std::vector<T> buffer( bufSize );

            // Create random matrix on process column 0, then broadcast
            if( grid.Col() == 0 )
            {
                for( int j=0; j<n; ++j )
                    for( int iLocal=0; iLocal<localHeight; ++iLocal )
                        buffer[iLocal+j*localHeight] = 
                            center + radius*SampleUnitBall<T>();
            }
            mpi::Broadcast( &buffer[0], bufSize, 0, grid.RowComm() );

            // Unpack
            T* localBuffer = A.LocalBuffer();
            const int ldim = A.LocalLDim();
#ifdef HAVE_OPENMP
            #pragma omp parallel for
#endif
            for( int j=0; j<n; ++j )
            {
                const T* bufferCol = &buffer[j*localHeight];
                T* col = &localBuffer[j*ldim];
                MemCopy( col, bufferCol, localHeight );
            }
        }
    }
コード例 #4
0
ファイル: Uniform.hpp プロジェクト: jimgoo/Elemental
    static void Func
    ( DistMatrix<T,STAR,STAR>& A, T center, typename Base<T>::type radius )
    {
        const Grid& grid = A.Grid();
        const int m = A.Height();
        const int n = A.Width();
        const int bufSize = m*n;

        if( grid.InGrid() )
        {
            std::vector<T> buffer( bufSize );

            if( grid.Rank() == 0 )
            {
                for( int j=0; j<n; ++j )
                    for( int i=0; i<m; ++i )
                        buffer[i+j*m] = center+radius*SampleUnitBall<T>();
            }
            mpi::Broadcast( &buffer[0], bufSize, 0, grid.Comm() );

            // Unpack
            T* localBuffer = A.LocalBuffer();
            const int ldim = A.LocalLDim();
#ifdef HAVE_OPENMP
            #pragma omp parallel for
#endif
            for( int j=0; j<n; ++j )
            {
                const T* bufferCol = &buffer[j*m];
                T* col = &localBuffer[j*ldim];
                MemCopy( col, bufferCol, m );
            }
        }
    }
コード例 #5
0
ファイル: Uniform.hpp プロジェクト: jimgoo/Elemental
    static void Func
    ( DistMatrix<T,STAR,MR>& A, T center, typename Base<T>::type radius )
    {
        const Grid& grid = A.Grid();
        const int m = A.Height();
        const int localWidth = A.LocalWidth();
        const int bufSize = m*localWidth;
        std::vector<T> buffer( bufSize );

        // Create random matrix on process row 0, then broadcast
        if( grid.Row() == 0 )
        {
            for( int j=0; j<localWidth; ++j )
                for( int i=0; i<m; ++i )
                    buffer[i+j*m] = center+radius*SampleUnitBall<T>();
        }
        mpi::Broadcast( &buffer[0], bufSize, 0, grid.ColComm() );

        // Unpack
        T* localBuffer = A.LocalBuffer();
        const int ldim = A.LocalLDim();
#ifdef HAVE_OPENMP
        #pragma omp parallel for
#endif
        for( int jLocal=0; jLocal<localWidth; ++jLocal )
        {
            const T* bufferCol = &buffer[jLocal*m];
            T* col = &localBuffer[jLocal*ldim];
            MemCopy( col, bufferCol, m );
        }
    }
コード例 #6
0
ファイル: Uniform.hpp プロジェクト: jimgoo/Elemental
    static void Func
    ( DistMatrix<T,MR,STAR>& A, T center, typename Base<T>::type radius )
    {
        const Grid& grid = A.Grid();
        const int n = A.Width();
        const int localHeight = A.LocalHeight();
        const int bufSize = localHeight*n;
        std::vector<T> buffer( bufSize );

        // Create random matrix on process row 0, then broadcast
        if( grid.Row() == 0 )
        {
            for( int j=0; j<n; ++j )
                for( int i=0; i<localHeight; ++i )
                    buffer[i+j*localHeight] = center+radius*SampleUnitBall<T>();
        }
        mpi::Broadcast( &buffer[0], bufSize, 0, grid.ColComm() );

        // Unpack
        T* localBuffer = A.LocalBuffer();
        const int ldim = A.LocalLDim();
#ifdef HAVE_OPENMP
        #pragma omp parallel for COLLAPSE(2)
#endif
        for( int j=0; j<n; ++j )
            for( int iLocal=0; iLocal<localHeight; ++iLocal )
                localBuffer[iLocal+j*ldim] = buffer[iLocal+j*localHeight];
    }
コード例 #7
0
ファイル: MakeTriangular.hpp プロジェクト: jimgoo/Elemental
inline void
MakeTriangular( UpperOrLower uplo, DistMatrix<T,U,V>& A )
{
#ifndef RELEASE
    PushCallStack("MakeTriangular");
#endif
    const int height = A.Height();
    const int localHeight = A.LocalHeight();
    const int localWidth = A.LocalWidth();
    const int colShift = A.ColShift();
    const int rowShift = A.RowShift();
    const int colStride = A.ColStride();
    const int rowStride = A.RowStride();

    T* localBuffer = A.LocalBuffer();
    const int ldim = A.LocalLDim();

    if( uplo == LOWER )
    {

#ifdef HAVE_OPENMP
        #pragma omp parallel for
#endif
        for( int jLocal=0; jLocal<localWidth; ++jLocal )
        {
            const int j = rowShift + jLocal*rowStride;
            const int lastZeroRow = j-1;
            if( lastZeroRow >= 0 )
            {
                const int boundary = std::min( lastZeroRow+1, height );
                const int numZeroRows =
                    RawLocalLength( boundary, colShift, colStride );
                MemZero( &localBuffer[jLocal*ldim], numZeroRows );
            }
        }
    }
    else
    {
#ifdef HAVE_OPENMP
        #pragma omp parallel for
#endif
        for( int jLocal=0; jLocal<localWidth; ++jLocal )
        {
            const int j = rowShift + jLocal*rowStride;
            const int firstZeroRow = j+1;
            const int numNonzeroRows =
                RawLocalLength(firstZeroRow,colShift,colStride);
            if( numNonzeroRows < localHeight )
            {
                T* col = &localBuffer[numNonzeroRows+jLocal*ldim];
                MemZero( col, localHeight-numNonzeroRows );
            }
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #8
0
ファイル: HermitianSVD.hpp プロジェクト: jimgoo/Elemental
inline void HermitianSVD
( UpperOrLower uplo, 
  DistMatrix<F>& A, DistMatrix<typename Base<F>::type,VR,STAR>& s, 
  DistMatrix<F>& U, DistMatrix<F>& V )
{
#ifndef RELEASE
    PushCallStack("HermitianSVD");
#endif
    typedef typename Base<F>::type R;

    // Grab an eigenvalue decomposition of A
    HermitianEig( uplo, A, s, V ); 
    
    // Redistribute the singular values into an [MR,* ] distribution
    const Grid& grid = A.Grid();
    DistMatrix<R,MR,STAR> s_MR_STAR( grid );
    s_MR_STAR.AlignWith( V );
    s_MR_STAR = s;

    // Set the singular values to the absolute value of the eigenvalues
    const int numLocalVals = s.LocalHeight();
    for( int iLocal=0; iLocal<numLocalVals; ++iLocal )
    {
        const R sigma = s.GetLocal(iLocal,0);
        s.SetLocal(iLocal,0,Abs(sigma));
    }

    // Copy V into U (flipping the sign as necessary)
    U.AlignWith( V );
    U.ResizeTo( V.Height(), V.Width() );
    const int localHeight = V.LocalHeight();
    const int localWidth = V.LocalWidth();
    for( int jLocal=0; jLocal<localWidth; ++jLocal )
    {
        const R sigma = s_MR_STAR.GetLocal( jLocal, 0 );
        F* UCol = U.LocalBuffer( 0, jLocal );
        const F* VCol = V.LockedLocalBuffer( 0, jLocal );
        if( sigma >= 0 )
            for( int iLocal=0; iLocal<localHeight; ++iLocal )
                UCol[iLocal] = VCol[iLocal];
        else
            for( int iLocal=0; iLocal<localHeight; ++iLocal )
                UCol[iLocal] = -VCol[iLocal];
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #9
0
ファイル: G3DGemm.cpp プロジェクト: certik/Elemental
// Have the top layer initialize the distributed matrix, B
void InitB( DistMatrix<double,MC,MR>& B )
{
    const int rank = mpi::CommRank(mpi::COMM_WORLD);
    const Grid& g = B.Grid();
    const int meshSize = g.Size();
    const int depthRank = rank / meshSize;

    if( depthRank == 0 )
    {
        if( B.LocalHeight() != B.LocalLDim() )
            throw std::logic_error("Local ldim of B was too large");

        double* localBuffer = B.LocalBuffer();
        const int localSize = B.LocalHeight()*B.LocalWidth();
        for( int iLocal=0; iLocal<localSize; ++iLocal )
            localBuffer[iLocal] = iLocal*meshSize + rank;

        B.Print("B");
    }
}
コード例 #10
0
ファイル: LLT.hpp プロジェクト: jimgoo/Elemental
inline void AddInLocalData
( const DistMatrix<F,VC,STAR>& X1, DistMatrix<F,STAR,STAR>& Z )
{
#ifndef RELEASE
    PushCallStack("internal::AddInLocalData");
#endif
    const int width = X1.Width();
    const int localHeight = X1.LocalHeight();
    const int stride = X1.Grid().Size();
    const int offset = X1.ColShift();
    for( int j=0; j<width; ++j )
    {
        F* ZColBuffer = Z.LocalBuffer(0,j);
        const F* X1ColBuffer = X1.LockedLocalBuffer(0,j);
        for( int iLocal=0; iLocal<localHeight; ++iLocal )
            ZColBuffer[offset+stride*iLocal] += X1ColBuffer[iLocal];
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #11
0
ファイル: G3DGemm.cpp プロジェクト: certik/Elemental
// Reduce across depth to get end result C
void SumContributions
( mpi::Comm& depthComm,
  const DistMatrix<double,MC,MR>& APartial,
        DistMatrix<double,MC,MR>& A )
{
    const int rank = mpi::CommRank( mpi::COMM_WORLD );
    const Grid& meshGrid = APartial.Grid();

    A.Empty();
    A.AlignWith( APartial );
    A.ResizeTo( APartial.Height(), APartial.Width() );

    if( APartial.LocalHeight() != APartial.LocalLDim() )
        throw std::logic_error
        ("APartial did not have matching local height/ldim");
    if( A.LocalHeight() != A.LocalLDim() )
        throw std::logic_error("A did not have matching local height/ldim");

    const int dataSize = APartial.LocalHeight()*APartial.LocalWidth();
    mpi::AllReduce
    ( APartial.LockedLocalBuffer(), A.LocalBuffer(), dataSize, 
      mpi::SUM, depthComm );
}
コード例 #12
0
ファイル: Her.hpp プロジェクト: certik/Elemental
inline void
Her
( UpperOrLower uplo,
  T alpha, const DistMatrix<T>& x,
                 DistMatrix<T>& A )
{
#ifndef RELEASE
    PushCallStack("Her");
    if( A.Grid() != x.Grid() )
        throw std::logic_error("{A,x} must be distributed over the same grid");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    const int xLength = ( x.Width()==1 ? x.Height() : x.Width() );
    if( A.Height() != xLength )
    {
        std::ostringstream msg;
        msg << "A must conform with x: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  x ~ " << x.Height() << " x " << x.Width() << "\n";
        throw std::logic_error( msg.str() );
    }
#endif
    const Grid& g = A.Grid();

    const int localHeight = A.LocalHeight();
    const int localWidth = A.LocalWidth();
    const int r = g.Height();
    const int c = g.Width();
    const int colShift = A.ColShift();
    const int rowShift = A.RowShift();

    if( x.Width() == 1 )
    {
        DistMatrix<T,MC,STAR> x_MC_STAR(g);
        DistMatrix<T,MR,STAR> x_MR_STAR(g);

        x_MC_STAR.AlignWith( A );
        x_MR_STAR.AlignWith( A );
        //--------------------------------------------------------------------//
        x_MC_STAR = x;
        x_MR_STAR = x_MC_STAR;

        const T* xLocal = x_MC_STAR.LockedLocalBuffer();
        if( uplo == LOWER )
        {
            for( int jLocal=0; jLocal<localWidth; ++jLocal )
            {
                const int j = rowShift + jLocal*c;
                const int heightAboveDiag = LocalLength(j,colShift,r);

                const T gamma = alpha*Conj(x_MR_STAR.GetLocal(jLocal,0));
                T* ALocalCol = A.LocalBuffer(0,jLocal);
                for( int iLocal=heightAboveDiag; iLocal<localHeight; ++iLocal )
                    ALocalCol[iLocal] += gamma*xLocal[iLocal];
            }
        }
        else
        {
            for( int jLocal=0; jLocal<localWidth; ++jLocal )
            {
                const int j = rowShift + jLocal*c;
                const int heightToDiag = LocalLength(j+1,colShift,r);

                const T gamma = alpha*Conj(x_MR_STAR.GetLocal(jLocal,0));
                T* ALocalCol = A.LocalBuffer(0,jLocal);
                for( int iLocal=0; iLocal<heightToDiag; ++iLocal )
                    ALocalCol[iLocal] += gamma*xLocal[iLocal];
            }
        }
        //--------------------------------------------------------------------//
        x_MC_STAR.FreeAlignments();
        x_MR_STAR.FreeAlignments();
    }
    else
    {
        DistMatrix<T,STAR,MC> x_STAR_MC(g);
        DistMatrix<T,STAR,MR> x_STAR_MR(g);

        x_STAR_MC.AlignWith( A );
        x_STAR_MR.AlignWith( A );
        //--------------------------------------------------------------------//
        x_STAR_MR = x;
        x_STAR_MC = x_STAR_MR;

        const T* xLocal = x_STAR_MC.LockedLocalBuffer();
        const int incx = x_STAR_MC.LocalLDim();
        if( uplo == LOWER )
        {
            for( int jLocal=0; jLocal<localWidth; ++jLocal )
            {
                const int j = rowShift + jLocal*c;
                const int heightAboveDiag = LocalLength(j,colShift,r);

                const T gamma = alpha*Conj(x_STAR_MR.GetLocal(0,jLocal));
                T* ALocalCol = A.LocalBuffer(0,jLocal);
                for( int iLocal=heightAboveDiag; iLocal<localHeight; ++iLocal )
                    ALocalCol[iLocal] += gamma*xLocal[iLocal*incx];
            }
        }
        else
        {
            for( int jLocal=0; jLocal<localWidth; ++jLocal )
            {
                const int j = rowShift + jLocal*c;
                const int heightToDiag = LocalLength(j+1,colShift,r);

                const T gamma = alpha*Conj(x_STAR_MR.GetLocal(0,jLocal));
                T* ALocalCol = A.LocalBuffer(0,jLocal);
                for( int iLocal=0; iLocal<heightToDiag; ++iLocal )
                    ALocalCol[iLocal] += gamma*xLocal[iLocal*incx];
            }
        }
        //--------------------------------------------------------------------//
        x_STAR_MC.FreeAlignments();
        x_STAR_MR.FreeAlignments();
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #13
0
ファイル: Panel.hpp プロジェクト: jimgoo/Elemental
inline void
PanelLU
( DistMatrix<F,  STAR,STAR>& A, 
  DistMatrix<F,  MC,  STAR>& B, 
  DistMatrix<int,STAR,STAR>& p, 
  int pivotOffset )
{
#ifndef RELEASE
    PushCallStack("internal::PanelLU");
    if( A.Grid() != p.Grid() || p.Grid() != B.Grid() )
        throw std::logic_error
        ("Matrices must be distributed over the same grid");
    if( A.Width() != B.Width() )
        throw std::logic_error("A and B must be the same width");
    if( A.Height() != p.Height() || p.Width() != 1 )
        throw std::logic_error("p must be a vector that conforms with A");
#endif
    const Grid& g = A.Grid();
    const int r = g.Height();
    const int colShift = B.ColShift();
    const int colAlignment = B.ColAlignment();

    // Matrix views
    DistMatrix<F,STAR,STAR> 
        ATL(g), ATR(g),  A00(g), a01(g),     A02(g),  
        ABL(g), ABR(g),  a10(g), alpha11(g), a12(g),  
                         A20(g), a21(g),     A22(g);

    DistMatrix<F,MC,STAR>
        BL(g), BR(g),
        B0(g), b1(g), B2(g);

    const int width = A.Width();
    const int numBytes = (width+1)*sizeof(F)+sizeof(int);
    std::vector<byte> sendData(numBytes);
    std::vector<byte> recvData(numBytes);

    // Extract pointers to send and recv data
    // TODO: Think of how to make this safer with respect to alignment issues
    F* sendBufFloat = (F*)&sendData[0];
    F* recvBufFloat = (F*)&recvData[0];
    int* sendBufInt = (int*)&sendData[(width+1)*sizeof(F)];
    int* recvBufInt = (int*)&recvData[(width+1)*sizeof(F)];

    // Start the algorithm
    PushBlocksizeStack( 1 );
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionRight( B, BL, BR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ a01,     A02,
         /*************/ /**********************/
               /**/       a10, /**/ alpha11, a12,
          ABL, /**/ ABR,  A20, /**/ a21,     A22 );

        RepartitionRight
        ( BL, /**/ BR,  
          B0, /**/ b1, B2 );

        //--------------------------------------------------------------------//
        const int currentRow = a01.Height();
        
        // Store the index/value of the pivot candidate in A
        F pivot = alpha11.GetLocal(0,0);
        int pivotRow = currentRow;
        for( int i=0; i<a21.Height(); ++i )
        {
            F value = a21.GetLocal(i,0);
            if( FastAbs(value) > FastAbs(pivot) )
            {
                pivot = value;
                pivotRow = currentRow + i + 1;
            }
        }

        // Update the pivot candidate to include local data from B
        for( int i=0; i<B.LocalHeight(); ++i )
        {
            F value = b1.GetLocal(i,0);
            if( FastAbs(value) > FastAbs(pivot) )
            {
                pivot = value;
                pivotRow = A.Height() + colShift + i*r;
            }
        }

        // Fill the send buffer with:
        // [ pivotValue | pivot row data | pivotRow ]
        if( pivotRow < A.Height() )
        {
            sendBufFloat[0] = A.GetLocal(pivotRow,a10.Width());

            const int ALDim = A.LocalLDim();
            const F* ABuffer = A.LocalBuffer(pivotRow,0);
            for( int j=0; j<width; ++j )
                sendBufFloat[j+1] = ABuffer[j*ALDim];
        }
        else
        {
            const int localRow = ((pivotRow-A.Height())-colShift)/r;
            sendBufFloat[0] = b1.GetLocal(localRow,0);

            const int BLDim = B.LocalLDim();
            const F* BBuffer = B.LocalBuffer(localRow,0);
            for( int j=0; j<width; ++j )
                sendBufFloat[j+1] = BBuffer[j*BLDim];
        }
        *sendBufInt = pivotRow;

        // Communicate to establish the pivot information
        mpi::AllReduce
        ( &sendData[0], &recvData[0], numBytes, PivotOp<F>(), g.ColComm() );

        // Update the pivot vector
        pivotRow = *recvBufInt;
        p.SetLocal(currentRow,0,pivotRow+pivotOffset);

        // Copy the current row into the pivot row
        if( pivotRow < A.Height() )
        {
            const int ALDim = A.LocalLDim();
            F* ASetBuffer = A.LocalBuffer(pivotRow,0);
            const F* AGetBuffer = A.LocalBuffer(currentRow,0);
            for( int j=0; j<width; ++j )
                ASetBuffer[j*ALDim] = AGetBuffer[j*ALDim];
        }
        else
        {
            const int ownerRank = (colAlignment+(pivotRow-A.Height())) % r;
            if( g.Row() == ownerRank )
            {
                const int localRow = ((pivotRow-A.Height())-colShift) / r;

                const int ALDim = A.LocalLDim();
                const int BLDim = B.LocalLDim();
                F* BBuffer = B.LocalBuffer(localRow,0);
                const F* ABuffer = A.LocalBuffer(currentRow,0);
                for( int j=0; j<width; ++j )
                    BBuffer[j*BLDim] = ABuffer[j*ALDim];
            }
        }

        // Copy the pivot row into the current row
        {
            F* ABuffer = A.LocalBuffer(currentRow,0);
            const int ALDim = A.LocalLDim();
            for( int j=0; j<width; ++j )
                ABuffer[j*ALDim] = recvBufFloat[j+1];
        }

        // Now we can perform the update of the current panel
        const F alpha = alpha11.GetLocal(0,0);
        if( alpha == F(0) )
            throw SingularMatrixException();
        const F alpha11Inv = F(1) / alpha;
        Scale( alpha11Inv, a21.LocalMatrix() );
        Scale( alpha11Inv, b1.LocalMatrix()  );
        Geru( F(-1), a21.LocalMatrix(), a12.LocalMatrix(), A22.LocalMatrix() );
        Geru( F(-1), b1.LocalMatrix(), a12.LocalMatrix(), B2.LocalMatrix() );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, a01,     /**/ A02,
               /**/       a10, alpha11, /**/ a12,
         /*************/ /**********************/
          ABL, /**/ ABR,  A20, a21,     /**/ A22 );

        SlidePartitionRight
        ( BL,     /**/ BR,  
          B0, b1, /**/ B2 );
    }
    PopBlocksizeStack();

#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #14
0
ファイル: MakeTrapezoidal.hpp プロジェクト: certik/Elemental
inline void
MakeTrapezoidal
( LeftOrRight side, UpperOrLower uplo, int offset,
  DistMatrix<T,U,V>& A )
{
#ifndef RELEASE
    PushCallStack("MakeTrapezoidal");
#endif
    const int height = A.Height();
    const int width = A.Width();
    const int localHeight = A.LocalHeight();
    const int localWidth = A.LocalWidth();
    const int colShift = A.ColShift();
    const int rowShift = A.RowShift();
    const int colStride = A.ColStride();
    const int rowStride = A.RowStride();

    T* localBuffer = A.LocalBuffer();
    const int ldim = A.LocalLDim();

    if( uplo == LOWER )
    {

#ifdef HAVE_OPENMP
        #pragma omp parallel for
#endif
        for( int jLocal=0; jLocal<localWidth; ++jLocal )
        {
            const int j = rowShift + jLocal*rowStride;
            const int lastZeroRow =
                ( side==LEFT ? j-offset-1
                  : j-offset+height-width-1 );
            if( lastZeroRow >= 0 )
            {
                const int boundary = std::min( lastZeroRow+1, height );
                const int numZeroRows =
                    RawLocalLength( boundary, colShift, colStride );
                MemZero( &localBuffer[jLocal*ldim], numZeroRows );
            }
        }
    }
    else
    {
#ifdef HAVE_OPENMP
        #pragma omp parallel for
#endif
        for( int jLocal=0; jLocal<localWidth; ++jLocal )
        {
            const int j = rowShift + jLocal*rowStride;
            const int firstZeroRow =
                ( side==LEFT ? std::max(j-offset+1,0)
                  : std::max(j-offset+height-width+1,0) );
            const int numNonzeroRows =
                RawLocalLength(firstZeroRow,colShift,colStride);
            if( numNonzeroRows < localHeight )
            {
                T* col = &localBuffer[numNonzeroRows+jLocal*ldim];
                MemZero( col, localHeight-numNonzeroRows );
            }
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #15
0
inline void
ApplyColumnPivots
( DistMatrix<F>& A, 
  const std::vector<int>& image,
  const std::vector<int>& preimage )
{
    const int b = image.size();
#ifndef RELEASE
    PushCallStack("ApplyColumnPivots");
    if( A.Width() < b || b != preimage.size() )
        throw std::logic_error
        ("image and preimage must be vectors of equal length that are not "
         "wider than A.");
#endif
    const int localHeight = A.LocalHeight();
    if( A.Height() == 0 || A.Width() == 0 )
    {
#ifndef RELEASE
        PopCallStack();
#endif
        return;
    }

    // Extract the relevant process grid information
    const Grid& g = A.Grid();
    const int c = g.Width();
    const int rowAlignment = A.RowAlignment();
    const int rowShift = A.RowShift();
    const int myCol = g.Col();

    // Extract the send and recv counts from the image and preimage.
    // This process's sends may be logically partitioned into two sets:
    //   (a) sends from rows [0,...,b-1]
    //   (b) sends from rows [b,...]
    // The latter is analyzed with image, the former deduced with preimage.
    std::vector<int> sendCounts(c,0), recvCounts(c,0);
    for( int j=rowShift; j<b; j+=c )
    {
        const int sendCol = preimage[j];         
        const int sendTo = (rowAlignment+sendCol) % c; 
        sendCounts[sendTo] += localHeight;

        const int recvCol = image[j];
        const int recvFrom = (rowAlignment+recvCol) % c;
        recvCounts[recvFrom] += localHeight;
    }
    for( int j=0; j<b; ++j )
    {
        const int sendCol = preimage[j];
        if( sendCol >= b )
        {
            const int sendTo = (rowAlignment+sendCol) % c;
            if( sendTo == myCol )
            {
                const int sendFrom = (rowAlignment+j) % c;
                recvCounts[sendFrom] += localHeight;
            }
        }

        const int recvCol = image[j];
        if( recvCol >= b )
        {
            const int recvFrom = (rowAlignment+recvCol) % c;
            if( recvFrom == myCol )
            {
                const int recvTo = (rowAlignment+j) % c;
                sendCounts[recvTo] += localHeight;
            }
        }
    }

    // Construct the send and recv displacements from the counts
    std::vector<int> sendDispls(c), recvDispls(c);
    int totalSend=0, totalRecv=0;
    for( int i=0; i<c; ++i )
    {
        sendDispls[i] = totalSend;
        recvDispls[i] = totalRecv;
        totalSend += sendCounts[i];
        totalRecv += recvCounts[i];
    }
#ifndef RELEASE
    if( totalSend != totalRecv )
    {
        std::ostringstream msg;
        msg << "Send and recv counts do not match: (send,recv)=" 
             << totalSend << "," << totalRecv;
        throw std::logic_error( msg.str().c_str() );
    }
#endif

    // Fill vectors with the send data
    std::vector<F> sendData(std::max(1,totalSend));
    std::vector<int> offsets(c,0);
    const int localWidth = LocalLength( b, rowShift, c );
    for( int jLocal=0; jLocal<localWidth; ++jLocal )
    {
        const int sendCol = preimage[rowShift+jLocal*c];
        const int sendTo = (rowAlignment+sendCol) % c;
        const int offset = sendDispls[sendTo]+offsets[sendTo];
        MemCopy( &sendData[offset], A.LocalBuffer(0,jLocal), localHeight );
        offsets[sendTo] += localHeight;
    }
    for( int j=0; j<b; ++j )
    {
        const int recvCol = image[j];
        if( recvCol >= b )
        {
            const int recvFrom = (rowAlignment+recvCol) % c; 
            if( recvFrom == myCol )
            {
                const int recvTo = (rowAlignment+j) % c;
                const int jLocal = (recvCol-rowShift) / c;
                const int offset = sendDispls[recvTo]+offsets[recvTo];
                MemCopy
                ( &sendData[offset], A.LocalBuffer(0,jLocal), localHeight );
                offsets[recvTo] += localHeight;
            }
        }
    }

    // Communicate all pivot rows
    std::vector<F> recvData(std::max(1,totalRecv));
    mpi::AllToAll
    ( &sendData[0], &sendCounts[0], &sendDispls[0],
      &recvData[0], &recvCounts[0], &recvDispls[0], g.RowComm() );

    // Unpack the recv data
    for( int k=0; k<c; ++k )
    {
        offsets[k] = 0;
        int thisRowShift = Shift( k, rowAlignment, c );
        for( int j=thisRowShift; j<b; j+=c )
        {
            const int sendCol = preimage[j];
            const int sendTo = (rowAlignment+sendCol) % c;
            if( sendTo == myCol )
            {
                const int offset = recvDispls[k]+offsets[k];
                const int jLocal = (sendCol-rowShift) / c;
                MemCopy
                ( A.LocalBuffer(0,jLocal), &recvData[offset], localHeight );
                offsets[k] += localHeight;
            }
        }
    }
    for( int j=0; j<b; ++j )
    {
        const int recvCol = image[j];
        if( recvCol >= b )
        {
            const int recvTo = (rowAlignment+j) % c;
            if( recvTo == myCol )
            {
                const int recvFrom = (rowAlignment+recvCol) % c; 
                const int jLocal = (j-rowShift) / c;
                const int offset = recvDispls[recvFrom]+offsets[recvFrom];
                MemCopy
                ( A.LocalBuffer(0,jLocal), &recvData[offset], localHeight );
                offsets[recvFrom] += localHeight;
            }
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
コード例 #16
0
ファイル: ApplyRowPivots.hpp プロジェクト: jimgoo/Elemental
inline void
ApplyRowPivots
( DistMatrix<F>& A, 
  const std::vector<int>& image,
  const std::vector<int>& preimage )
{
    const int b = image.size();
#ifndef RELEASE
    PushCallStack("ApplyRowPivots");
    if( A.Height() < b || b != (int)preimage.size() )
        throw std::logic_error
        ("image and preimage must be vectors of equal length that are not "
         "taller than A.");
#endif
    const int localWidth = A.LocalWidth();
    if( A.Height() == 0 || A.Width() == 0 )
    {
#ifndef RELEASE
        PopCallStack();
#endif
        return;
    }
    
    // Extract the relevant process grid information
    const Grid& g = A.Grid();
    const int r = g.Height();
    const int colAlignment = A.ColAlignment();
    const int colShift = A.ColShift();
    const int myRow = g.Row();

    // Extract the send and recv counts from the image and preimage.
    // This process's sends may be logically partitioned into two sets:
    //   (a) sends from rows [0,...,b-1]
    //   (b) sends from rows [b,...]
    // The latter is analyzed with image, the former deduced with preimage.
    std::vector<int> sendCounts(r,0), recvCounts(r,0);
    for( int i=colShift; i<b; i+=r )
    {
        const int sendRow = preimage[i];         
        const int sendTo = (colAlignment+sendRow) % r; 
        sendCounts[sendTo] += localWidth;

        const int recvRow = image[i];
        const int recvFrom = (colAlignment+recvRow) % r;
        recvCounts[recvFrom] += localWidth;
    }
    for( int i=0; i<b; ++i )
    {
        const int sendRow = preimage[i];
        if( sendRow >= b )
        {
            const int sendTo = (colAlignment+sendRow) % r;
            if( sendTo == myRow )
            {
                const int sendFrom = (colAlignment+i) % r;
                recvCounts[sendFrom] += localWidth;
            }
        }

        const int recvRow = image[i];
        if( recvRow >= b )
        {
            const int recvFrom = (colAlignment+recvRow) % r;
            if( recvFrom == myRow )
            {
                const int recvTo = (colAlignment+i) % r;
                sendCounts[recvTo] += localWidth;
            }
        }
    }

    // Construct the send and recv displacements from the counts
    std::vector<int> sendDispls(r), recvDispls(r);
    int totalSend=0, totalRecv=0;
    for( int i=0; i<r; ++i )
    {
        sendDispls[i] = totalSend;
        recvDispls[i] = totalRecv;
        totalSend += sendCounts[i];
        totalRecv += recvCounts[i];
    }
#ifndef RELEASE
    if( totalSend != totalRecv )
    {
        std::ostringstream msg;
        msg << "Send and recv counts do not match: (send,recv)=" 
             << totalSend << "," << totalRecv;
        throw std::logic_error( msg.str().c_str() );
    }
#endif

    // Fill vectors with the send data
    const int ALDim = A.LocalLDim();
    std::vector<F> sendData(std::max(1,totalSend));
    std::vector<int> offsets(r,0);
    const int localHeight = LocalLength( b, colShift, r );
    for( int iLocal=0; iLocal<localHeight; ++iLocal )
    {
        const int sendRow = preimage[colShift+iLocal*r];
        const int sendTo = (colAlignment+sendRow) % r;
        const int offset = sendDispls[sendTo]+offsets[sendTo];
        const F* ABuffer = A.LocalBuffer(iLocal,0);
        for( int jLocal=0; jLocal<localWidth; ++jLocal )
            sendData[offset+jLocal] = ABuffer[jLocal*ALDim];
        offsets[sendTo] += localWidth;
    }
    for( int i=0; i<b; ++i )
    {
        const int recvRow = image[i];
        if( recvRow >= b )
        {
            const int recvFrom = (colAlignment+recvRow) % r; 
            if( recvFrom == myRow )
            {
                const int recvTo = (colAlignment+i) % r;
                const int iLocal = (recvRow-colShift) / r;
                const int offset = sendDispls[recvTo]+offsets[recvTo];
                const F* ABuffer = A.LocalBuffer(iLocal,0);
                for( int jLocal=0; jLocal<localWidth; ++jLocal )
                    sendData[offset+jLocal] = ABuffer[jLocal*ALDim];
                offsets[recvTo] += localWidth;
            }
        }
    }

    // Communicate all pivot rows
    std::vector<F> recvData(std::max(1,totalRecv));
    mpi::AllToAll
    ( &sendData[0], &sendCounts[0], &sendDispls[0],
      &recvData[0], &recvCounts[0], &recvDispls[0], g.ColComm() );

    // Unpack the recv data
    for( int k=0; k<r; ++k )
    {
        offsets[k] = 0;
        int thisColShift = Shift( k, colAlignment, r );
        for( int i=thisColShift; i<b; i+=r )
        {
            const int sendRow = preimage[i];
            const int sendTo = (colAlignment+sendRow) % r;
            if( sendTo == myRow )
            {
                const int offset = recvDispls[k]+offsets[k];
                const int iLocal = (sendRow-colShift) / r;
                F* ABuffer = A.LocalBuffer(iLocal,0);
                for( int jLocal=0; jLocal<localWidth; ++jLocal )
                    ABuffer[jLocal*ALDim] = recvData[offset+jLocal];
                offsets[k] += localWidth;
            }
        }
    }
    for( int i=0; i<b; ++i )
    {
        const int recvRow = image[i];
        if( recvRow >= b )
        {
            const int recvTo = (colAlignment+i) % r;
            if( recvTo == myRow )
            {
                const int recvFrom = (colAlignment+recvRow) % r; 
                const int iLocal = (i-colShift) / r;
                const int offset = recvDispls[recvFrom]+offsets[recvFrom];
                F* ABuffer = A.LocalBuffer(iLocal,0);
                for( int jLocal=0; jLocal<localWidth; ++jLocal )
                    ABuffer[jLocal*ALDim] = recvData[offset+jLocal];
                offsets[recvFrom] += localWidth;
            }
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}