Beispiel #1
0
inline void
Forsythe( DistMatrix<T,U,V>& J, Int n, T alpha, T lambda )
{
    DEBUG_ONLY(CallStackEntry cse("Forsythe"))
    J.Resize( n, n );
    MakeForsythe( J, alpha, lambda );
}
Beispiel #2
0
inline void
Identity( DistMatrix<T,U,V>& I, Int m, Int n )
{
    DEBUG_ONLY(CallStackEntry cse("Identity"))
    I.Resize( m, n );
    MakeIdentity( I );
}
Beispiel #3
0
inline void
BinaryFlat
( DistMatrix<T,CIRC,CIRC>& A, Int height, Int width, 
  const std::string filename )
{
    DEBUG_ONLY(CallStackEntry cse("read::Binary"))
    std::ifstream file( filename.c_str(), std::ios::binary );
    if( !file.is_open() )
        RuntimeError("Could not open ",filename);

    const Int numBytes = FileSize( file );
    const Int numBytesExp = height*width*sizeof(T);
    if( numBytes != numBytesExp )
        RuntimeError
        ("Expected file to be ",numBytesExp," bytes but found ",numBytes);

    A.Resize( height, width );
    if( A.CrossRank() == A.Root() )
    {
        if( A.Height() == A.LDim() )
            file.read( (char*)A.Buffer(), height*width*sizeof(T) );
        else
            for( Int j=0; j<width; ++j )
                file.read( (char*)A.Buffer(0,j), height*sizeof(T) );
    }
}
Beispiel #4
0
void Scatter
( const DistMatrix<T,CIRC,CIRC>& A,
        DistMatrix<T,STAR,STAR>& B )
{
    DEBUG_CSE
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.Resize( height, width );

    if( B.Participating() )
    {
        const Int pkgSize = mpi::Pad( height*width );
        vector<T> buffer;
        FastResize( buffer, pkgSize );

        // Pack            
        if( A.Participating() )
            util::InterleaveMatrix
            ( height, width,
              A.LockedBuffer(), 1, A.LDim(),
              buffer.data(),    1, height );

        // Broadcast from the process that packed
        mpi::Broadcast( buffer.data(), pkgSize, A.Root(), A.CrossComm() );

        // Unpack
        util::InterleaveMatrix
        ( height, width,
          buffer.data(), 1, height,
          B.Buffer(),    1, B.LDim() );
    }
}
Beispiel #5
0
inline void
BinaryFlat
( DistMatrix<T,STAR,V>& A, Int height, Int width, const std::string filename )
{
    DEBUG_ONLY(CallStackEntry cse("read::BinaryFlat"))
    std::ifstream file( filename.c_str(), std::ios::binary );
    if( !file.is_open() )
        RuntimeError("Could not open ",filename);

    const Int numBytes = FileSize( file );
    const Int numBytesExp = height*width*sizeof(T);
    if( numBytes != numBytesExp )
        RuntimeError
        ("Expected file to be ",numBytesExp," bytes but found ",numBytes);

    A.Resize( height, width );
    const Int localWidth = A.LocalWidth();
    const Int rowShift = A.RowShift();
    const Int rowStride = A.RowStride();
    for( Int jLoc=0; jLoc<localWidth; ++jLoc )
    {
        const Int j = rowShift + jLoc*rowStride;
        const Int localIndex = j*height;
        const std::streamoff pos = localIndex*sizeof(T);
        file.seekg( pos );
        file.read( (char*)A.Buffer(0,jLoc), height*sizeof(T) );
    }
}
Beispiel #6
0
inline void
Binary( DistMatrix<T,STAR,STAR>& A, const std::string filename )
{
    DEBUG_ONLY(CallStackEntry cse("read::Binary"))
    std::ifstream file( filename.c_str(), std::ios::binary );
    if( !file.is_open() )
        RuntimeError("Could not open ",filename);

    Int height, width;
    file >> height;
    file >> width;
    const Int numBytes = FileSize( file );
    const Int metaBytes = 2*sizeof(Int);
    const Int dataBytes = height*width*sizeof(T);
    const Int numBytesExp = metaBytes + dataBytes;
    if( numBytes != numBytesExp )
        RuntimeError
        ("Expected file to be ",numBytesExp," bytes but found ",numBytes);

    A.Resize( height, width );
    if( A.Height() == A.LDim() )
        file.read( (char*)A.Buffer(), height*width*sizeof(T) );
    else
        for( Int j=0; j<width; ++j )
            file.read( (char*)A.Buffer(0,j), height*sizeof(T) );
}
void ConsistentlyComputeDecomposition
(       DistMatrix<Field,MC,MR,BLOCK>& H,
        DistMatrix<Complex<Base<Field>>,STAR,STAR>& w,
        Matrix<Field>& Z,
  const HessenbergSchurCtrl& ctrl=HessenbergSchurCtrl() )
{
    EL_DEBUG_CSE
    // Because double-precision floating-point computation is often
    // non-deterministic due to extra-precision computation being frequent but
    // not guaranteed, we must be careful to not allow this non-determinism to
    // be amplified by the forward instability of Francis sweeps.
    const Grid& grid = H.Grid();
    const int owner = H.Owner(0,0);

    DistMatrix<Field,CIRC,CIRC> H_CIRC_CIRC( grid, owner );
    H_CIRC_CIRC = H;
    w.Resize( H.Height(), 1 );
    if( H_CIRC_CIRC.CrossRank() == H_CIRC_CIRC.Root() )
        HessenbergSchur( H_CIRC_CIRC.Matrix(), w.Matrix(), Z, ctrl );
    else
        Z.Resize( H.Height(), H.Height() );
    H = H_CIRC_CIRC;
    El::Broadcast( w.Matrix(), H_CIRC_CIRC.CrossComm(), H_CIRC_CIRC.Root() );
    El::Broadcast( Z, H_CIRC_CIRC.CrossComm(), H_CIRC_CIRC.Root() );
}
Beispiel #8
0
inline void
Jordan( DistMatrix<T,U,V>& J, Int n, T lambda )
{
    DEBUG_ONLY(CallStackEntry cse("Jordan"))
    J.Resize( n, n );
    MakeJordan( J, lambda );
}
Beispiel #9
0
void RowMaxNorms
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms )
{
    DEBUG_CSE
    norms.AlignWith( A );
    norms.Resize( A.Height(), 1 );
    RowMaxNorms( A.LockedMatrix(), norms.Matrix() );
    AllReduce( norms, A.RowComm(), mpi::MAX );
}
Beispiel #10
0
inline void
BinaryFlat
( DistMatrix<T,U,V>& A, Int height, Int width, const std::string filename )
{
    DEBUG_ONLY(CallStackEntry cse("read::BinaryFlat"))
    std::ifstream file( filename.c_str(), std::ios::binary );
    if( !file.is_open() )
        RuntimeError("Could not open ",filename);

    const Int numBytes = FileSize( file );
    const Int numBytesExp = height*width*sizeof(T);
    if( numBytes != numBytesExp )
        RuntimeError
        ("Expected file to be ",numBytesExp," bytes but found ",numBytes);

    A.Resize( height, width );
    if( U == A.UGath && V == A.VGath )
    {
        if( A.CrossRank() == A.Root() )
        {
            if( A.Height() == A.LDim() )
                file.read( (char*)A.Buffer(), height*width*sizeof(T) );
            else
                for( Int j=0; j<width; ++j )
                    file.read( (char*)A.Buffer(0,j), height*sizeof(T) );
        }
    }
    else if( U == A.UGath )
    {
        const Int localWidth = A.LocalWidth();
        for( Int jLoc=0; jLoc<localWidth; ++jLoc )
        {
            const Int j = A.GlobalCol(jLoc);
            const Int localIndex = j*height;
            const std::streamoff pos = localIndex*sizeof(T);
            file.seekg( pos );
            file.read( (char*)A.Buffer(0,jLoc), height*sizeof(T) );
        }
    }
    else
    {
        const Int localHeight = A.LocalHeight();
        const Int localWidth = A.LocalWidth();
        for( Int jLoc=0; jLoc<localWidth; ++jLoc )
        {
            const Int j = A.GlobalCol(jLoc);
            for( Int iLoc=0; iLoc<localHeight; ++iLoc )
            {
                const Int i = A.GlobalRow(iLoc);
                const Int localIndex = i+j*height;
                const std::streamoff pos = localIndex*sizeof(T);
                file.seekg( pos );
                file.read( (char*)A.Buffer(iLoc,jLoc), sizeof(T) );
            }
        }
    }
}
Beispiel #11
0
void ColumnMinAbs
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,V,STAR>& mins )
{
    EL_DEBUG_CSE
    const Int n = A.Width();
    mins.AlignWith( A );
    mins.Resize( n, 1 );
    ColumnMinAbs( A.LockedMatrix(), mins.Matrix() );
    AllReduce( mins.Matrix(), A.ColComm(), mpi::MIN );
}
Beispiel #12
0
void AllGather
( const DistMatrix<T,        U,           V   >& A,
        DistMatrix<T,Collect<U>(),Collect<V>()>& B )
{
    EL_DEBUG_CSE
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    if( A.Participating() )
    {
        if( A.DistSize() == 1 )
        {
            Copy( A.LockedMatrix(), B.Matrix() );
        }
        else
        {
            const Int colStride = A.ColStride();
            const Int rowStride = A.RowStride();
            const Int distStride = colStride*rowStride;
            const Int maxLocalHeight = MaxLength(height,colStride);
            const Int maxLocalWidth = MaxLength(width,rowStride);
            const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
            vector<T> buf;
            FastResize( buf, (distStride+1)*portionSize );
            T* sendBuf = &buf[0];
            T* recvBuf = &buf[portionSize];

            // Pack
            util::InterleaveMatrix
            ( A.LocalHeight(), A.LocalWidth(),
              A.LockedBuffer(), 1, A.LDim(),
              sendBuf,          1, A.LocalHeight() );

            // Communicate
            mpi::AllGather
            ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() );

            // Unpack
            util::StridedUnpack
            ( height, width,
              A.ColAlign(), colStride,
              A.RowAlign(), rowStride,
              recvBuf, portionSize,
              B.Buffer(), B.LDim() );
        }
    }
    if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF )
        El::Broadcast( B, A.CrossComm(), A.Root() );
}
Beispiel #13
0
void StackedGeometricColumnScaling
( const DistMatrix<Field,      U,V   >& A,
  const DistMatrix<Field,      U,V   >& B,
        DistMatrix<Base<Field>,V,STAR>& geomScaling )
{
    EL_DEBUG_CSE
    // NOTE: Assuming A.ColComm() == B.ColComm() and that the row alignments
    //       are equal
    typedef Base<Field> Real;

    DistMatrix<Real,V,STAR> maxScalingA(A.Grid()),
                            maxScalingB(A.Grid());
    ColumnMaxNorms( A, maxScalingA );
    ColumnMaxNorms( B, maxScalingB );

    const Int mLocalA = A.LocalHeight();
    const Int mLocalB = B.LocalHeight();
    const Int nLocal = A.LocalWidth();
    geomScaling.AlignWith( maxScalingA );
    geomScaling.Resize( A.Width(), 1 );
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.LockedMatrix();
    auto& geomScalingLoc = geomScaling.Matrix();
    auto& maxScalingALoc = maxScalingA.Matrix();
    auto& maxScalingBLoc = maxScalingB.Matrix();
    for( Int jLoc=0; jLoc<nLocal; ++jLoc )
    {
        Real minAbs = Max(maxScalingALoc(jLoc),maxScalingBLoc(jLoc));
        for( Int iLoc=0; iLoc<mLocalA; ++iLoc )
        {
            const Real absVal = Abs(ALoc(iLoc,jLoc));
            if( absVal > 0 && absVal < minAbs )
                minAbs = Min(minAbs,absVal);
        }
        for( Int iLoc=0; iLoc<mLocalB; ++iLoc )
        {
            const Real absVal = Abs(BLoc(iLoc,jLoc));
            if( absVal > 0 && absVal < minAbs )
                minAbs = Min(minAbs,absVal);
        }
        geomScalingLoc(jLoc) = minAbs;
    }
    mpi::AllReduce( geomScaling.Buffer(), nLocal, mpi::MIN, A.ColComm() );

    for( Int jLoc=0; jLoc<nLocal; ++jLoc )
    {
        const Real maxAbsA = maxScalingALoc(jLoc);
        const Real maxAbsB = maxScalingBLoc(jLoc);
        const Real maxAbs = Max(maxAbsA,maxAbsB);
        const Real minAbs = geomScalingLoc(jLoc);
        geomScalingLoc(jLoc) = Sqrt(minAbs*maxAbs);
    }
}
Beispiel #14
0
void RowTwoNorms
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms )
{
    DEBUG_CSE
    norms.AlignWith( A );
    norms.Resize( A.Height(), 1 );
    if( A.Width() == 0 )
    {
        Zero( norms );
        return;
    }
    RowTwoNormsHelper( A.LockedMatrix(), norms.Matrix(), A.RowComm() );
}
Beispiel #15
0
inline void
Lauchli( DistMatrix<T,U,V>& A, Int n, T mu )
{
    DEBUG_ONLY(CallStackEntry cse("Lauchli"))
    A.Resize( n+1, n );

    auto ABlock = View( A, 0, 0, 1, n );
    MakeOnes( ABlock );

    std::vector<T> d(n,mu);
    ABlock = View( A, 1, 0, n, n );
    Diagonal( ABlock, d );
}
void HermitianUniformSpectrum
( DistMatrix<F,U,V>& A, Int n, Base<F> lower, Base<F> upper )
{
    DEBUG_ONLY(CallStackEntry cse("HermitianUniformSpectrum"))
    A.Resize( n, n );
    const Grid& grid = A.Grid();
    typedef Base<F> Real;
    const bool isComplex = IsComplex<F>::val;
    const bool standardDist = ( U == MC && V == MR );

    // Form d and D
    std::vector<F> d( n );
    if( grid.Rank() == 0 )
        for( Int j=0; j<n; ++j )
            d[j] = SampleUniform<Real>( lower, upper );
    mpi::Broadcast( d.data(), n, 0, grid.Comm() );
    DistMatrix<F> ABackup( grid );
    ABackup.AlignWith( A );
    Diagonal( ABackup, d );

    // Apply a Haar matrix from both sides
    DistMatrix<F> Q(grid);
    DistMatrix<F,MD,STAR> t(grid);
    DistMatrix<Real,MD,STAR> s(grid);
    ImplicitHaar( Q, t, s, n );

    // Copy the result into the correct distribution
    qr::ApplyQ( LEFT, NORMAL, Q, t, s, ABackup );
    qr::ApplyQ( RIGHT, ADJOINT, Q, t, s, ABackup );
    A = ABackup;

    // Force the diagonal to be real-valued
    if( isComplex )
    {
        const Int localHeight = A.LocalHeight();
        const Int localWidth = A.LocalWidth();
        for( Int jLoc=0; jLoc<localWidth; ++jLoc )
        {
            const Int j = A.GlobalCol(jLoc);
            for( Int iLoc=0; iLoc<localHeight; ++iLoc )
            {
                const Int i = A.GlobalRow(iLoc);
                if( i == j )
                    A.SetLocalImagPart( iLoc, jLoc, Real(0) );
            }
        }
    }
}
Beispiel #17
0
void ScaLAPACKHelper
( DistMatrix<F,MC,MR,BLOCK>& A,
  DistMatrix<F,MR,STAR,BLOCK>& householderScalars )
{
    EL_DEBUG_CSE
    AssertScaLAPACKSupport();
#ifdef EL_HAVE_SCALAPACK
    const Int m = A.Height();
    const Int n = A.Width();
    const Int minDim = Min(m,n);
    householderScalars.AlignWith( A );
    householderScalars.Resize( minDim, 1 );

    auto descA = FillDesc( A );
    scalapack::QR
    ( m, n, A.Buffer(), descA.data(), householderScalars.Buffer() );
#endif
}
Beispiel #18
0
void Filter
( const DistMatrix<T,Collect<U>(),Collect<V>()>& A,
        DistMatrix<T,        U,           V   >& B )
{
    DEBUG_CSE
    AssertSameGrids( A, B );

    B.Resize( A.Height(), A.Width() );
    if( !B.Participating() )
        return;

    const Int colShift = B.ColShift();
    const Int rowShift = B.RowShift();
    util::InterleaveMatrix
    ( B.LocalHeight(), B.LocalWidth(),
      A.LockedBuffer(colShift,rowShift), B.ColStride(), B.RowStride()*A.LDim(),
      B.Buffer(),                        1,             B.LDim() );
}
Beispiel #19
0
inline void
Lehmer( DistMatrix<F,U,V>& L, Int n )
{
    DEBUG_ONLY(CallStackEntry cse("Lehmer"))
    L.Resize( n, n );
    const Int localHeight = L.LocalHeight();
    const Int localWidth = L.LocalWidth();
    for( Int jLoc=0; jLoc<localWidth; ++jLoc )
    {
        const Int j = L.GlobalCol(jLoc);
        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
        {
            const Int i = L.GlobalRow(iLoc);
            if( i < j )
                L.SetLocal( iLoc, jLoc, F(i+1)/F(j+1) );
            else
                L.SetLocal( iLoc, jLoc, F(j+1)/F(i+1) );
        }
    }
}
Beispiel #20
0
inline void
Binary( DistMatrix<T,U,V>& A, const std::string filename )
{
    DEBUG_ONLY(CallStackEntry cse("read::Binary"))
    std::ifstream file( filename.c_str(), std::ios::binary );
    if( !file.is_open() )
        RuntimeError("Could not open ",filename);

    Int height, width;
    file >> height;
    file >> width;
    const Int numBytes = FileSize( file );
    const Int metaBytes = 2*sizeof(Int);
    const Int dataBytes = height*width*sizeof(T);
    const Int numBytesExp = metaBytes + dataBytes;
    if( numBytes != numBytesExp )
        RuntimeError
        ("Expected file to be ",numBytesExp," bytes but found ",numBytes);

    A.Resize( height, width );
    const Int localHeight = A.LocalHeight();
    const Int localWidth = A.LocalWidth();
    const Int colShift = A.ColShift(); 
    const Int rowShift = A.RowShift();
    const Int colStride = A.ColStride();
    const Int rowStride = A.RowStride();
    for( Int jLoc=0; jLoc<localWidth; ++jLoc )
    {
        const Int j = rowShift + jLoc*rowStride;
        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
        {
            const Int i = colShift + iLoc*colStride;
            const Int localIndex = i+j*height;
            const std::streamoff pos = metaBytes + localIndex*sizeof(T);
            file.seekg( pos );
            file.read( (char*)A.Buffer(iLoc,jLoc), sizeof(T) );
        }
    }
}
Beispiel #21
0
void IndexDependentMap
( const DistMatrix<S,U,V,wrap>& A,
        DistMatrix<T,U,V,wrap>& B,
  function<T(Int,Int,const S&)> func )
{
    EL_DEBUG_CSE
    const Int mLoc = A.LocalHeight();
    const Int nLoc = A.LocalWidth();
    B.AlignWith( A.DistData() );
    B.Resize( A.Height(), A.Width() );
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.Matrix();
    for( Int jLoc=0; jLoc<nLoc; ++jLoc )
    {
        const Int j = A.GlobalCol(jLoc);
        for( Int iLoc=0; iLoc<mLoc; ++iLoc )
        {
            const Int i = A.GlobalRow(iLoc);
            BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc));
        }
    }
}
Beispiel #22
0
void QR
( DistMatrix<F,MC,MR,BLOCK>& A,
  DistMatrix<F,MR,STAR,BLOCK>& phase )
{
    DEBUG_CSE
    AssertScaLAPACKSupport();
#ifdef EL_HAVE_SCALAPACK
    const Int m = A.Height();
    const Int n = A.Width();
    const Int minDim = Min(m,n);
    phase.AlignWith( A );
    phase.Resize( minDim, 1 ); 

    const int bHandle = blacs::Handle( A );
    const int context = blacs::GridInit( bHandle, A );
    auto descA = FillDesc( A, context );

    scalapack::QR( m, n, A.Buffer(), descA.data(), phase.Buffer() );

    blacs::FreeGrid( context );
    blacs::FreeHandle( bHandle );
#endif
}
Beispiel #23
0
inline void
Pei( DistMatrix<T,U,V>& P, Int n, T alpha )
{
    DEBUG_ONLY(CallStackEntry cse("Pei"))
    P.Resize( n, n );
    const Int localHeight = P.LocalHeight();
    const Int localWidth = P.LocalWidth();
    const Int colShift = P.ColShift();
    const Int rowShift = P.RowShift();
    const Int colStride = P.ColStride();
    const Int rowStride = P.RowStride();
    for( Int jLoc=0; jLoc<localWidth; ++jLoc )
    {
        const Int j = rowShift + jLoc*rowStride;
        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
        {
            const Int i = colShift + iLoc*colStride;
            P.SetLocal( iLoc, jLoc, T(1) );
            if( i == j )
                P.UpdateLocal( iLoc, jLoc, alpha );
        }
    }
}
Beispiel #24
0
inline void
Redheffer( DistMatrix<T,U,V>& R, Int n )
{
    DEBUG_ONLY(CallStackEntry cse("Redheffer"))
    R.Resize( n, n );
    const Int localHeight = R.LocalHeight();
    const Int localWidth = R.LocalWidth();
    const Int colShift = R.ColShift();
    const Int rowShift = R.RowShift();
    const Int colStride = R.ColStride();
    const Int rowStride = R.RowStride();
    for( Int jLoc=0; jLoc<localWidth; ++jLoc )
    {
        const Int j = rowShift + jLoc*rowStride;
        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
        {
            const Int i = colShift + iLoc*colStride;
            if( j==0 || ((j+1)%(i+1))==0 )
                R.SetLocal( iLoc, jLoc, T(1) );
            else
                R.SetLocal( iLoc, jLoc, T(0) );
        }
    }
}
Beispiel #25
0
void FormDiagonalBlocks
( const DistMatrix<F,VC,STAR>& L, DistMatrix<F,STAR,STAR>& D, bool conjugate )
{
    const Grid& g = L.Grid();

    const Int height = L.Width();
    const Int blocksize = Blocksize();

    const int commRank = g.VCRank();
    const int commSize = g.Size();

    const Int localHeight = Length(height,commRank,commSize);
    const Int maxLocalHeight = MaxLength(height,commSize);
    const Int portionSize = maxLocalHeight*blocksize;

    std::vector<F> sendBuffer( portionSize );
    const Int colShift = L.ColShift();
    const Int LLDim = L.LDim();
    const F* LBuffer = L.LockedBuffer();
    if( conjugate )
    {
        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
        {
            const Int i = colShift + iLoc*commSize;
            const Int block = i / blocksize;
            const Int jStart = block*blocksize;
            const Int b = std::min(height-jStart,blocksize);
            for( Int jOff=0; jOff<b; ++jOff )
                sendBuffer[iLoc*blocksize+jOff] = 
                    Conj(LBuffer[iLoc+(jStart+jOff)*LLDim]);
        }
    }
    else
    {
        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
        {
            const Int i = colShift + iLoc*commSize;
            const Int block = i / blocksize;
            const Int jStart = block*blocksize;
            const Int b = std::min(height-jStart,blocksize);
            for( Int jOff=0; jOff<b; ++jOff )
                sendBuffer[iLoc*blocksize+jOff] = 
                    LBuffer[iLoc+(jStart+jOff)*LLDim];
        }
    }

    std::vector<F> recvBuffer( portionSize*commSize );
    mpi::AllGather
    ( &sendBuffer[0], portionSize, &recvBuffer[0], portionSize, g.VCComm() );
    SwapClear( sendBuffer );
    
    D.Resize( blocksize, height );
    F* DBuffer = D.Buffer();
    const Int DLDim = D.LDim();
    for( Int proc=0; proc<commSize; ++proc )
    {
        const F* procRecv = &recvBuffer[proc*portionSize];
        const Int procLocalHeight = Length(height,proc,commSize);
        for( Int iLoc=0; iLoc<procLocalHeight; ++iLoc )
        {
            const Int i = proc + iLoc*commSize;
            for( Int jOff=0; jOff<blocksize; ++jOff )
                DBuffer[jOff+i*DLDim] = procRecv[jOff+iLoc*blocksize];
        }
    }
}
Beispiel #26
0
void AllGather
( const DistMatrix<T,        U,           V   >& A, 
        DistMatrix<T,Collect<U>(),Collect<V>()>& B ) 
{
    DEBUG_ONLY(CSE cse("copy::AllGather"))
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    if( A.Participating() )
    {
        const Int colStride = A.ColStride();
        const Int rowStride = A.RowStride();
        const Int distStride = colStride*rowStride;
        const Int maxLocalHeight = MaxLength(height,colStride);
        const Int maxLocalWidth = MaxLength(width,rowStride);
        const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
        vector<T> buf( (distStride+1)*portionSize );
        T* sendBuf = &buf[0];
        T* recvBuf = &buf[portionSize];

        // Pack
        util::InterleaveMatrix
        ( A.LocalHeight(), A.LocalWidth(),
          A.LockedBuffer(), 1, A.LDim(),
          sendBuf,          1, A.LocalHeight() );

        // Communicate
        mpi::AllGather
        ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() );

        // Unpack
        util::StridedUnpack
        ( height, width,
          A.ColAlign(), colStride,
          A.RowAlign(), rowStride,
          recvBuf, portionSize,
          B.Buffer(), B.LDim() );
    }
    if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF )
    {
        // Pack from the root
        const Int BLocalHeight = B.LocalHeight();
        const Int BLocalWidth = B.LocalWidth();
        vector<T> buf(BLocalHeight*BLocalWidth);
        if( A.CrossRank() == A.Root() )
            util::InterleaveMatrix
            ( BLocalHeight, BLocalWidth,
              B.LockedBuffer(), 1, B.LDim(),
              buf.data(),       1, BLocalHeight ); 

        // Broadcast from the root
        mpi::Broadcast
        ( buf.data(), BLocalHeight*BLocalWidth, A.Root(), A.CrossComm() );

        // Unpack if not the root
        if( A.CrossRank() != A.Root() )
            util::InterleaveMatrix
            ( BLocalHeight, BLocalWidth,
              buf.data(), 1, BLocalHeight,
              B.Buffer(), 1, B.LDim() );
    }
}
Beispiel #27
0
void Gather
( const BlockMatrix<T>& A,
        DistMatrix<T,CIRC,CIRC,BLOCK>& B )
{
    DEBUG_ONLY(CSE cse("copy::Gather"))
    AssertSameGrids( A, B );
    if( A.DistSize() == 1 && A.CrossSize() == 1 )
    {
        B.Resize( A.Height(), A.Width() );
        if( B.CrossRank() == B.Root() )
            Copy( A.LockedMatrix(), B.Matrix() );
        return;
    }

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    // Gather the colShifts and rowShifts
    // ==================================
    Int myShifts[2];
    myShifts[0] = A.ColShift();
    myShifts[1] = A.RowShift();
    vector<Int> shifts;
    const Int crossSize = B.CrossSize();
    if( B.CrossRank() == B.Root() )
        shifts.resize( 2*crossSize );
    mpi::Gather( myShifts, 2, shifts.data(), 2, B.Root(), B.CrossComm() );

    // Gather the payload data
    // =======================
    const bool irrelevant = ( A.RedundantRank()!=0 || A.CrossRank()!=A.Root() );
    int totalSend = ( irrelevant ? 0 : A.LocalHeight()*A.LocalWidth() );
    vector<int> recvCounts, recvOffsets;
    if( B.CrossRank() == B.Root() )
        recvCounts.resize( crossSize );
    mpi::Gather( &totalSend, 1, recvCounts.data(), 1, B.Root(), B.CrossComm() );
    int totalRecv = Scan( recvCounts, recvOffsets );
    //vector<T> sendBuf(totalSend), recvBuf(totalRecv);
    vector<T> sendBuf, recvBuf;
    sendBuf.reserve( totalSend );
    recvBuf.reserve( totalRecv );
    if( !irrelevant )
        copy::util::InterleaveMatrix
        ( A.LocalHeight(), A.LocalWidth(),
          A.LockedBuffer(), 1, A.LDim(),
          sendBuf.data(),   1, A.LocalHeight() );
    mpi::Gather
    ( sendBuf.data(), totalSend,
      recvBuf.data(), recvCounts.data(), recvOffsets.data(), 
      B.Root(), B.CrossComm() );

    // Unpack
    // ======
    const Int mb = A.BlockHeight();
    const Int nb = A.BlockWidth();
    const Int colCut = A.ColCut();
    const Int rowCut = A.RowCut();
    if( B.Root() == B.CrossRank() )
    {
        for( Int q=0; q<crossSize; ++q )
        {
            if( recvCounts[q] == 0 )
                continue;
            const Int colShift = shifts[2*q+0];
            const Int rowShift = shifts[2*q+1];
            const Int colStride = A.ColStride();
            const Int rowStride = A.RowStride();
            const Int localHeight =
              BlockedLength( height, colShift, mb, colCut, colStride );
            const Int localWidth = 
              BlockedLength( width, rowShift, nb, rowCut, rowStride );
            const T* data = &recvBuf[recvOffsets[q]];
            for( Int jLoc=0; jLoc<localWidth; ++jLoc )
            {
                const Int jBefore = rowShift*nb - rowCut;
                const Int jLocAdj = ( rowShift==0 ? jLoc+rowCut : jLoc );
                const Int numFilledLocalBlocks = jLocAdj / nb;
                const Int jMid = numFilledLocalBlocks*nb*rowStride;
                const Int jPost = jLocAdj-numFilledLocalBlocks*nb;
                const Int j = jBefore + jMid + jPost;
                const T* sourceCol = &data[jLoc*localHeight];
                for( Int iLoc=0; iLoc<localHeight; ++iLoc )
                {
                    const Int iBefore = colShift*mb - colCut;
                    const Int iLocAdj = (colShift==0 ? iLoc+colCut : iLoc);
                    const Int numFilledLocalBlocks = iLocAdj / mb;
                    const Int iMid = numFilledLocalBlocks*mb*colStride;
                    const Int iPost = iLocAdj-numFilledLocalBlocks*mb;
                    const Int i = iBefore + iMid + iPost;
                    B.SetLocal(i,j,sourceCol[iLoc]);
                }
            }
        }
    }
}
Beispiel #28
0
void Gather
( const ElementalMatrix<T>& A,
        DistMatrix<T,CIRC,CIRC>& B )
{
    DEBUG_ONLY(CSE cse("copy::Gather"))
    AssertSameGrids( A, B );
    if( A.DistSize() == 1 && A.CrossSize() == 1 )
    {
        B.Resize( A.Height(), A.Width() );
        if( B.CrossRank() == B.Root() )
            Copy( A.LockedMatrix(), B.Matrix() );
        return;
    }

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    // Gather the colShifts and rowShifts
    // ==================================
    Int myShifts[2];
    myShifts[0] = A.ColShift();
    myShifts[1] = A.RowShift();
    vector<Int> shifts;
    const Int crossSize = B.CrossSize();
    if( B.CrossRank() == B.Root() )
        shifts.resize( 2*crossSize );
    mpi::Gather( myShifts, 2, shifts.data(), 2, B.Root(), B.CrossComm() );

    // Gather the payload data
    // =======================
    const bool irrelevant = ( A.RedundantRank()!=0 || A.CrossRank()!=A.Root() );
    int totalSend = ( irrelevant ? 0 : A.LocalHeight()*A.LocalWidth() );
    vector<int> recvCounts, recvOffsets;
    if( B.CrossRank() == B.Root() )
        recvCounts.resize( crossSize );
    mpi::Gather( &totalSend, 1, recvCounts.data(), 1, B.Root(), B.CrossComm() );
    int totalRecv = Scan( recvCounts, recvOffsets );
    //vector<T> sendBuf(totalSend), recvBuf(totalRecv);
    vector<T> sendBuf, recvBuf;
    sendBuf.reserve( totalSend );
    recvBuf.reserve( totalRecv );
    if( !irrelevant )
        copy::util::InterleaveMatrix
        ( A.LocalHeight(), A.LocalWidth(),
          A.LockedBuffer(), 1, A.LDim(),
          sendBuf.data(),   1, A.LocalHeight() );
    mpi::Gather
    ( sendBuf.data(), totalSend,
      recvBuf.data(), recvCounts.data(), recvOffsets.data(), 
      B.Root(), B.CrossComm() );

    // Unpack
    // ======
    if( B.Root() == B.CrossRank() )
    {
        for( Int q=0; q<crossSize; ++q )
        {
            if( recvCounts[q] == 0 )
                continue;
            const Int colShift = shifts[2*q+0];
            const Int rowShift = shifts[2*q+1];
            const Int colStride = A.ColStride();
            const Int rowStride = A.RowStride();
            const Int localHeight = Length( height, colShift, colStride );
            const Int localWidth = Length( width, rowShift, rowStride );
            copy::util::InterleaveMatrix
            ( localHeight, localWidth,
              &recvBuf[recvOffsets[q]],    1,         localHeight,
              B.Buffer(colShift,rowShift), colStride, rowStride*B.LDim() );
        }
    }
}
void TranslateBetweenGrids
( const DistMatrix<T,MC,MR>& A, DistMatrix<T,MC,MR>& B ) 
{
    DEBUG_ONLY(CSE cse("copy::TranslateBetweenGrids [MC,MR]"))

    B.Resize( A.Height(), A.Width() );
    // Just need to ensure that each viewing comm contains the other team's
    // owning comm. Congruence is too strong.

    // Compute the number of process rows and columns that each process
    // needs to send to.
    const Int colStride = B.ColStride();
    const Int rowStride = B.RowStride();
    const Int colRank = B.ColRank();
    const Int rowRank = B.RowRank();
    const Int colStrideA = A.ColStride();
    const Int rowStrideA = A.RowStride();
    const Int colGCD = GCD( colStride, colStrideA );
    const Int rowGCD = GCD( rowStride, rowStrideA );
    const Int colLCM = colStride*colStrideA / colGCD;
    const Int rowLCM = rowStride*rowStrideA / rowGCD;
    const Int numColSends = colStride / colGCD;
    const Int numRowSends = rowStride / rowGCD;

    const Int colAlign = B.ColAlign();
    const Int rowAlign = B.RowAlign();
    const Int colAlignA = A.ColAlign();
    const Int rowAlignA = A.RowAlign();

    const bool inBGrid = B.Participating();
    const bool inAGrid = A.Participating();
    if( !inBGrid && !inAGrid )
        return;

    const Int maxSendSize =
        (A.Height()/(colStrideA*numColSends)+1) *
        (A.Width()/(rowStrideA*numRowSends)+1);

    // Translate the ranks from A's VC communicator to B's viewing so that
    // we can match send/recv communicators. Since A's VC communicator is not
    // necessarily defined on every process, we instead work with A's owning
    // group and account for row-major ordering if necessary.
    const int sizeA = A.Grid().Size();
    vector<int> rankMap(sizeA), ranks(sizeA);
    if( A.Grid().Order() == COLUMN_MAJOR )
    {
        for( int j=0; j<sizeA; ++j )
            ranks[j] = j;
    }
    else
    {
        // The (i,j) = i + j*colStrideA rank in the column-major ordering is
        // equal to the j + i*rowStrideA rank in a row-major ordering.
        // Since we desire rankMap[i+j*colStrideA] to correspond to process
        // (i,j) in A's grid's rank in this viewing group, ranks[i+j*colStrideA]
        // should correspond to process (i,j) in A's owning group. Since the
        // owning group is ordered row-major in this case, its rank is
        // j+i*rowStrideA. Note that setting
        // ranks[j+i*rowStrideA] = i+j*colStrideA is *NOT* valid.
        for( int i=0; i<colStrideA; ++i )
            for( int j=0; j<rowStrideA; ++j )
                ranks[i+j*colStrideA] = j+i*rowStrideA;
    }
    mpi::Translate
    ( A.Grid().OwningGroup(), sizeA, &ranks[0],
      B.Grid().ViewingComm(), &rankMap[0] );

    // Have each member of A's grid individually send to all numRow x numCol
    // processes in order, while the members of this grid receive from all
    // necessary processes at each step.
    Int requiredMemory = 0;
    if( inAGrid )
        requiredMemory += maxSendSize;
    if( inBGrid )
        requiredMemory += maxSendSize;
    vector<T> auxBuf( requiredMemory );
    Int offset = 0;
    T* sendBuf = &auxBuf[offset];
    if( inAGrid )
        offset += maxSendSize;
    T* recvBuf = &auxBuf[offset];

    Int recvRow = 0; // avoid compiler warnings...
    if( inAGrid )
        recvRow = Mod(Mod(A.ColRank()-colAlignA,colStrideA)+colAlign,colStride);
    for( Int colSend=0; colSend<numColSends; ++colSend )
    {
        Int recvCol = 0; // avoid compiler warnings...
        if( inAGrid )
            recvCol=Mod(Mod(A.RowRank()-rowAlignA,rowStrideA)+rowAlign,
                        rowStride);
        for( Int rowSend=0; rowSend<numRowSends; ++rowSend )
        {
            mpi::Request sendRequest;
            // Fire off this round of non-blocking sends
            if( inAGrid )
            {
                // Pack the data
                Int sendHeight = Length(A.LocalHeight(),colSend,numColSends);
                Int sendWidth = Length(A.LocalWidth(),rowSend,numRowSends);
                copy::util::InterleaveMatrix
                ( sendHeight, sendWidth,
                  A.LockedBuffer(colSend,rowSend),
                 numColSends, numRowSends*A.LDim(),
                  sendBuf, 1, sendHeight );
                // Send data
                const Int recvVCRank = recvRow + recvCol*colStride;
                const Int recvViewingRank = B.Grid().VCToViewing( recvVCRank );
                mpi::ISend
                ( sendBuf, sendHeight*sendWidth, recvViewingRank,
                  B.Grid().ViewingComm(), sendRequest );
            }
            // Perform this round of recv's
            if( inBGrid )
            {
                const Int sendColOffset = colAlignA;
                const Int recvColOffset =
                    (colSend*colStrideA+colAlign) % colStride;
                const Int sendRowOffset = rowAlignA;
                const Int recvRowOffset =
                    (rowSend*rowStrideA+rowAlign) % rowStride;

                const Int firstSendRow =
                    Mod( Mod(colRank-recvColOffset,colStride)+sendColOffset,
                         colStrideA );
                const Int firstSendCol =
                    Mod( Mod(rowRank-recvRowOffset,rowStride)+sendRowOffset,
                         rowStrideA );

                const Int colShift = Mod( colRank-recvColOffset, colStride );
                const Int rowShift = Mod( rowRank-recvRowOffset, rowStride );
                const Int numColRecvs = Length( colStrideA, colShift, colStride );
                const Int numRowRecvs = Length( rowStrideA, rowShift, rowStride );

                // Recv data
                // For now, simply receive sequentially. Until we switch to
                // nonblocking recv's, we won't be using much of the
                // recvBuf
                Int sendRow = firstSendRow;
                for( Int colRecv=0; colRecv<numColRecvs; ++colRecv )
                {
                    const Int sendColShift = Shift( sendRow, colAlignA, colStrideA ) + colSend*colStrideA;
                    const Int sendHeight = Length( A.Height(), sendColShift, colLCM );
                    const Int localColOffset = (sendColShift-B.ColShift()) / colStride;

                    Int sendCol = firstSendCol;
                    for( Int rowRecv=0; rowRecv<numRowRecvs; ++rowRecv )
                    {
                        const Int sendRowShift = Shift( sendCol, rowAlignA, rowStrideA ) + rowSend*rowStrideA;
                        const Int sendWidth = Length( A.Width(), sendRowShift, rowLCM );
                        const Int localRowOffset = (sendRowShift-B.RowShift()) / rowStride;

                        const Int sendVCRank = sendRow+sendCol*colStrideA;
                        mpi::Recv
                        ( recvBuf, sendHeight*sendWidth, rankMap[sendVCRank],
                          B.Grid().ViewingComm() );

                        // Unpack the data
                        copy::util::InterleaveMatrix
                        ( sendHeight, sendWidth,
                          recvBuf, 1, sendHeight,
                          B.Buffer(localColOffset,localRowOffset),
                          colLCM/colStride, (rowLCM/rowStride)*B.LDim() );

                        // Set up the next send col
                        sendCol = (sendCol + rowStride) % rowStrideA;
                    }
                    // Set up the next send row
                    sendRow = (sendRow + colStride) % colStrideA;
                }
            }
            // Ensure that this round of non-blocking sends completes
            if( inAGrid )
            {
                mpi::Wait( sendRequest );
                recvCol = (recvCol + rowStrideA) % rowStride;
            }
        }
        if( inAGrid )
            recvRow = (recvRow + colStrideA) % colStride;
    }
}
Beispiel #30
0
HessenbergSchurInfo
MultiBulge
( DistMatrix<F,MC,MR,BLOCK>& H,
  DistMatrix<Complex<Base<F>>,STAR,STAR>& w,
  DistMatrix<F,MC,MR,BLOCK>& Z,
  const HessenbergSchurCtrl& ctrl )
{
    DEBUG_CSE 
    typedef Base<F> Real;
    const Real zero(0);
    const Grid& grid = H.Grid();

    const Int n = H.Height();
    Int winBeg = ( ctrl.winBeg==END ? n : ctrl.winBeg );
    Int winEnd = ( ctrl.winEnd==END ? n : ctrl.winEnd );
    const Int winSize = winEnd - winBeg;
    const Int blockSize = H.BlockHeight();

    // TODO(poulson): Implement a more reasonable/configurable means of deciding
    // when to call the sequential implementation
    Int minMultiBulgeSize = Max( ctrl.minMultiBulgeSize, 2*blockSize );
    // This maximum is meant to account for parallel overheads and needs to be
    // more principled (and perhaps based upon the number of workers and the 
    // cluster characteristics)
    // TODO(poulson): Re-enable this
    //minMultiBulgeSize = Max( minMultiBulgeSize, 500 );

    HessenbergSchurInfo info;

    w.Resize( n, 1 );
    if( winSize < minMultiBulgeSize )
    {
        return multibulge::RedundantlyHandleWindow( H, w, Z, ctrl );
    }

    auto ctrlShifts( ctrl );
    ctrlShifts.winBeg = 0;
    ctrlShifts.winEnd = END;
    ctrlShifts.fullTriangle = false;

    Int numIterSinceDeflation = 0;
    const Int numStaleIterBeforeExceptional = 5;
    // Cf. LAPACK's DLAQR0 for this choice
    const Int maxIter =
      Max(30,2*numStaleIterBeforeExceptional) * Max(10,winSize);

    Int iterBegLast=-1, winEndLast=-1;
    DistMatrix<F,STAR,STAR> hMainWin(grid), hSuperWin(grid);
    DistMatrix<Real,STAR,STAR> hSubWin(grid);
    while( winBeg < winEnd )
    {
        if( info.numIterations >= maxIter )
        {
            if( ctrl.demandConverged )
                RuntimeError("MultiBulge QR iteration did not converge");
            else
                break;
        }
        auto winInd = IR(winBeg,winEnd);

        // Detect an irreducible Hessenberg window, [iterBeg,winEnd)
        // ---------------------------------------------------------
        // TODO(poulson): Have the interblock chase from the previous sweep
        // collect the main and sub diagonal of H along the diagonal workers 
        // and then broadcast across the "cross" communicator.
        util::GatherTridiagonal( H, winInd, hMainWin, hSubWin, hSuperWin );
        Output("winBeg=",winBeg,", winEnd=",winEnd);
        Print( H, "H" );
        Print( hMainWin, "hMainWin" );
        Print( hSubWin, "hSubWin" );
        Print( hSuperWin, "hSuperWin" );

        const Int iterOffset =
          DetectSmallSubdiagonal
          ( hMainWin.Matrix(), hSubWin.Matrix(), hSuperWin.Matrix() );
        const Int iterBeg = winBeg + iterOffset;
        const Int iterWinSize = winEnd-iterBeg;
        if( iterOffset > 0 )
        {
            H.Set( iterBeg, iterBeg-1, zero );
            hSubWin.Set( iterOffset-1, 0, zero );
        }
        if( iterWinSize == 1 )
        {
            if( ctrl.progress )
                Output("One-by-one window at ",iterBeg);
            w.Set( iterBeg, 0, hMainWin.GetLocal(iterOffset,0) );

            winEnd = iterBeg;
            numIterSinceDeflation = 0;
            continue;
        }
        else if( iterWinSize == 2 )
        {
            if( ctrl.progress )
                Output("Two-by-two window at ",iterBeg);
            const F eta00 = hMainWin.GetLocal(iterOffset,0);
            const F eta01 = hSuperWin.GetLocal(iterOffset,0);
            const Real eta10 = hSubWin.GetLocal(iterOffset,0);
            const F eta11 = hMainWin.GetLocal(iterOffset+1,0);
            multibulge::TwoByTwo
            ( H, eta00, eta01, eta10, eta11, w, Z, iterBeg, ctrl );

            winEnd = iterBeg;
            numIterSinceDeflation = 0;
            continue;
        }
        else if( iterWinSize < minMultiBulgeSize )
        {
            // The window is small enough to switch to the simple scheme
            if( ctrl.progress )
                Output("Redundantly handling window [",iterBeg,",",winEnd,"]");
            auto ctrlIter( ctrl );
            ctrlIter.winBeg = iterBeg;
            ctrlIter.winEnd = winEnd;
            auto iterInfo =
              multibulge::RedundantlyHandleWindow( H, w, Z, ctrlIter );
            info.numIterations += iterInfo.numIterations;
             
            winEnd = iterBeg;
            numIterSinceDeflation = 0;
            continue;
        }

        const Int numShiftsRec = ctrl.numShifts( n, iterWinSize );
        if( ctrl.progress )
        {
            Output("Iter. ",info.numIterations,": ");
            Output("  window is [",iterBeg,",",winEnd,")");
            Output("  recommending ",numShiftsRec," shifts");
        }

        // NOTE(poulson): In the case where exceptional shifts are used, the
        // main and subdiagonals of H in the window are currently redundantly
        // gathered. It could be worthwhile to pass in hMainWin and hSubWin.
        const Int shiftBeg = multibulge::ComputeShifts
        ( H, w, iterBeg, winBeg, winEnd, numShiftsRec, numIterSinceDeflation,
          numStaleIterBeforeExceptional, ctrlShifts );
        auto shiftInd = IR(shiftBeg,winEnd);
        auto wShifts = w(shiftInd,ALL);

        // Perform a small-bulge sweep
        auto ctrlSweep( ctrl );
        ctrlSweep.winBeg = iterBeg;
        ctrlSweep.winEnd = winEnd;
        multibulge::Sweep( H, wShifts, Z, ctrlSweep );

        ++info.numIterations;
        if( iterBeg == iterBegLast && winEnd == winEndLast )
            ++numIterSinceDeflation;
        iterBegLast = iterBeg;
        winEndLast = winEnd;
    }
    info.numUnconverged = winEnd-winBeg;
    return info;
}