Ejemplo n.º 1
0
void RowMaxNorms
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms )
{
    DEBUG_CSE
    norms.AlignWith( A );
    norms.Resize( A.Height(), 1 );
    RowMaxNorms( A.LockedMatrix(), norms.Matrix() );
    AllReduce( norms, A.RowComm(), mpi::MAX );
}
Ejemplo n.º 2
0
void RowTwoNorms
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms )
{
    DEBUG_CSE
    norms.AlignWith( A );
    norms.Resize( A.Height(), 1 );
    if( A.Width() == 0 )
    {
        Zero( norms );
        return;
    }
    RowTwoNormsHelper( A.LockedMatrix(), norms.Matrix(), A.RowComm() );
}
Ejemplo n.º 3
0
void TransformColumns
( const Matrix<F>& Z,
        DistMatrix<F,MC,MR,BLOCK>& H )
{
    DEBUG_CSE
    const Int width = H.Width();
    const Grid& grid = H.Grid();

    const Int blockWidth = H.BlockWidth();
    const Int firstBlockWidth = blockWidth - H.RowCut();
    if( width <= firstBlockWidth || grid.Width() == 1 )
    {
        if( grid.Col() == H.ColOwner(0) )
        {
            // This process row can locally update its portion of H
            Matrix<F> HLocCopy( H.Matrix() );
            Gemm( NORMAL, NORMAL, F(1), HLocCopy, Z, H.Matrix() );
        }
    }
    else if( width <= firstBlockWidth + blockWidth )
    {
        const bool firstCol = H.ColOwner( 0 );
        const bool secondCol = H.ColOwner( firstBlockWidth );
        if( grid.Col() == firstCol )
        {
            // 
            // Replace H with 
            //
            //   | HLeft, HRight | | ZLeft, ZRight |,
            //
            // where HLeft is owned by this process column and HRight by the
            // next.
            //
            auto ZLeft = Z( ALL, IR(0,firstBlockWidth) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( H.LocalHeight(), width );
            auto HLeft = HCombine( ALL, IR(0,firstBlockWidth) );
            auto HRight = HCombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            HLeft = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HLeft, HRight, H.RowComm(), secondCol, secondCol );
            
            // Form our portion of the result
            Gemm( NORMAL, NORMAL, F(1), HCombine, ZLeft, H.Matrix() );
        }
        else if( grid.Col() == secondCol )
        {
            // 
            // Replace H with 
            //
            //   | HLeft, HRight | | ZLeft, ZRight |,
            //
            // where HLeft is owned by the previous process column and HRight
            // by this one.
            //
            auto ZRight = Z( ALL, IR(firstBlockWidth,END) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( H.LocalHeight(), width );
            auto HLeft = HCombine( ALL, IR(0,firstBlockWidth) );
            auto HRight = HCombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            HRight = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HRight, HLeft, H.RowComm(), firstCol, firstCol );
            
            // Form our portion of the result
            Gemm( NORMAL, NORMAL, F(1), HCombine, ZRight, H.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,MC,STAR,BLOCK> H_MC_STAR( H );
        Matrix<F> HLocCopy( H_MC_STAR.Matrix() );
        Gemm( NORMAL, NORMAL, F(1), HLocCopy, Z, H_MC_STAR.Matrix() );
        H = H_MC_STAR;
    }
}
Ejemplo n.º 4
0
void TransposeDist( const DistMatrix<T,U,V>& A, DistMatrix<T,V,U>& B ) 
{
    DEBUG_ONLY(CSE cse("copy::TransposeDist"))
    AssertSameGrids( A, B );

    const Grid& g = B.Grid();
    B.Resize( A.Height(), A.Width() );
    if( !B.Participating() )
        return;

    const Int colStrideA = A.ColStride();
    const Int rowStrideA = A.RowStride();
    const Int distSize = A.DistSize();

    if( A.DistSize() == 1 && B.DistSize() == 1 ) 
    {
        Copy( A.LockedMatrix(), B.Matrix() );
    }
    else if( A.Width() == 1 )
    {
        const Int height = A.Height();
        const Int maxLocalHeight = MaxLength(height,distSize);
        const Int portionSize = mpi::Pad( maxLocalHeight );

        const Int colDiff = Shift(A.DistRank(),A.ColAlign(),distSize) - 
                            Shift(B.DistRank(),B.ColAlign(),distSize);
        const Int sendRankB = Mod( B.DistRank()+colDiff, distSize );
        const Int recvRankA = Mod( A.DistRank()-colDiff, distSize );
        const Int recvRankB = 
            (recvRankA/colStrideA)+rowStrideA*(recvRankA%colStrideA);

        vector<T> buffer;
        FastResize( buffer, (colStrideA+rowStrideA)*portionSize );
        T* sendBuf = &buffer[0];
        T* recvBuf = &buffer[colStrideA*portionSize];

        if( A.RowRank() == A.RowAlign() )
        {
            // Pack
            // TODO: Use kernel from copy::util
            const Int AColShift = A.ColShift();
            const T* ABuf = A.LockedBuffer();
            EL_PARALLEL_FOR
            for( Int k=0; k<rowStrideA; ++k )
            {
                T* data = &recvBuf[k*portionSize];

                const Int shift = 
                  Shift_(A.ColRank()+colStrideA*k,A.ColAlign(),distSize);
                const Int offset = (shift-AColShift) / colStrideA;
                const Int thisLocalHeight = Length_(height,shift,distSize);

                for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc )
                    data[iLoc] = ABuf[offset+iLoc*rowStrideA];
            }
        }

        // (e.g., A[VC,STAR] <- A[MC,MR])
        mpi::Scatter
        ( recvBuf, portionSize,
          sendBuf, portionSize, A.RowAlign(), A.RowComm() );

        // (e.g., A[VR,STAR] <- A[VC,STAR])
        mpi::SendRecv
        ( sendBuf, portionSize, sendRankB,
          recvBuf, portionSize, recvRankB, B.DistComm() );

        // (e.g., A[MR,MC] <- A[VR,STAR])
        mpi::Gather
        ( recvBuf, portionSize,
          sendBuf, portionSize, B.RowAlign(), B.RowComm() );

        if( B.RowRank() == B.RowAlign() )
        {
            // Unpack
            // TODO: Use kernel from copy::util
            T* bufB = B.Buffer();
            EL_PARALLEL_FOR
            for( Int k=0; k<colStrideA; ++k )
            {
                const T* data = &sendBuf[k*portionSize];

                const Int shift = 
                  Shift_(B.ColRank()+rowStrideA*k,B.ColAlign(),distSize);
                const Int offset = (shift-B.ColShift()) / rowStrideA;
                const Int thisLocalHeight = Length_(height,shift,distSize);

                for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc )
                    bufB[offset+iLoc*colStrideA] = data[iLoc];
            }
        }
    }
Ejemplo n.º 5
0
void TransformColumns
( const Matrix<F>& V,
        DistMatrix<F,MC,MR,BLOCK>& A )
{
    DEBUG_CSE
    const Int width = A.Width();
    const Grid& grid = A.Grid();

    const Int blockWidth = A.BlockWidth();
    const Int firstBlockWidth = blockWidth - A.RowCut();
    if( width <= firstBlockWidth || grid.Width() == 1 )
    {
        if( grid.Col() == A.ColOwner(0) )
        {
            // This process row can locally update its portion of A
            TransformColumns( V, A.Matrix() );
        }
    }
    else if( width <= firstBlockWidth + blockWidth )
    {
        const int firstCol = A.ColOwner( 0 );
        const int secondCol = A.ColOwner( firstBlockWidth );
        if( grid.Col() == firstCol )
        {
            //
            // Replace A with
            //
            //   | ALeft, ARight | | VLeft, VRight |,
            //
            // where ALeft is owned by this process column and ARight by the
            // next.
            //

            // Partition space for the combined matrix
            Matrix<F> ACombine( A.LocalHeight(), width );
            auto ALeft = ACombine( ALL, IR(0,firstBlockWidth) );
            auto ARight = ACombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            ALeft = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ALeft, ARight, A.RowComm(), secondCol, secondCol );

            // Form our portion of the result
            auto VLeft = V( ALL, IR(0,firstBlockWidth) );
            Gemm( NORMAL, NORMAL, F(1), ACombine, VLeft, A.Matrix() );
        }
        else if( grid.Col() == secondCol )
        {
            //
            // Replace A with
            //
            //   | ALeft, ARight | | VLeft, VRight |,
            //
            // where ALeft is owned by the previous process column and ARight
            // by this one.
            //

            // Partition space for the combined matrix
            Matrix<F> ACombine( A.LocalHeight(), width );
            auto ALeft = ACombine( ALL, IR(0,firstBlockWidth) );
            auto ARight = ACombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            ARight = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ARight, ALeft, A.RowComm(), firstCol, firstCol );

            // Form our portion of the result
            auto VRight = V( ALL, IR(firstBlockWidth,END) );
            Gemm( NORMAL, NORMAL, F(1), ACombine, VRight, A.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,MC,STAR,BLOCK> A_MC_STAR( A );
        Matrix<F> ALocCopy( A_MC_STAR.Matrix() );
        Gemm( NORMAL, NORMAL, F(1), ALocCopy, V, A_MC_STAR.Matrix() );
        A = A_MC_STAR;
    }
}