void ConsistentlyComputeDecomposition
(       DistMatrix<Field,MC,MR,BLOCK>& H,
        DistMatrix<Complex<Base<Field>>,STAR,STAR>& w,
        Matrix<Field>& Z,
  const HessenbergSchurCtrl& ctrl=HessenbergSchurCtrl() )
    // Because double-precision floating-point computation is often
    // non-deterministic due to extra-precision computation being frequent but
    // not guaranteed, we must be careful to not allow this non-determinism to
    // be amplified by the forward instability of Francis sweeps.
    const Grid& grid = H.Grid();
    const int owner = H.Owner(0,0);

    DistMatrix<Field,CIRC,CIRC> H_CIRC_CIRC( grid, owner );
    H_CIRC_CIRC = H;
    w.Resize( H.Height(), 1 );
    if( H_CIRC_CIRC.CrossRank() == H_CIRC_CIRC.Root() )
        HessenbergSchur( H_CIRC_CIRC.Matrix(), w.Matrix(), Z, ctrl );
        Z.Resize( H.Height(), H.Height() );
    H = H_CIRC_CIRC;
    El::Broadcast( w.Matrix(), H_CIRC_CIRC.CrossComm(), H_CIRC_CIRC.Root() );
    El::Broadcast( Z, H_CIRC_CIRC.CrossComm(), H_CIRC_CIRC.Root() );
Ejemplo n.º 2
void ColumnMinAbs
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,V,STAR>& mins )
    const Int n = A.Width();
    mins.AlignWith( A );
    mins.Resize( n, 1 );
    ColumnMinAbs( A.LockedMatrix(), mins.Matrix() );
    AllReduce( mins.Matrix(), A.ColComm(), mpi::MIN );
Ejemplo n.º 3
inline void
MakeJordan( DistMatrix<T,U,V>& J, T lambda )
    DEBUG_ONLY(CallStackEntry cse("MakeJordan"))
    Zero( J.Matrix() );

    const Int localHeight = J.LocalHeight();
    const Int localWidth = J.LocalWidth();
    const Int colShift = J.ColShift();
    const Int rowShift = J.RowShift();
    const Int colStride = J.ColStride();
    const Int rowStride = J.RowStride();
    for( Int jLoc=0; jLoc<localWidth; ++jLoc )
        const Int j = rowShift + jLoc*rowStride;
        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
            const Int i = colShift + iLoc*colStride;
            if( i == j )
                J.SetLocal( iLoc, jLoc, lambda );
            else if( i == j-1 )
                J.SetLocal( iLoc, jLoc, T(1) );
Ejemplo n.º 4
inline void
LocalLU( DistMatrix<F,STAR,STAR>& A )
#ifndef RELEASE
    CallStackEntry entry("LocalLU");
    LU( A.Matrix() );
Ejemplo n.º 5
inline void
( UpperOrLower uplo, DistMatrix<F,STAR,STAR>& A, 
  LDLPivotType pivotType=BUNCH_KAUFMAN_A )
    DEBUG_ONLY(CallStackEntry cse("LocalHermitianInverse"))
    SymmetricInverse( uplo, A.Matrix(), true, pivotType );
Ejemplo n.º 6
inline void
LocalHPDInverse( UpperOrLower uplo, DistMatrix<F,STAR,STAR>& A )
#ifndef RELEASE
    CallStackEntry entry("LocalHPDInverse");
    HPDInverse( uplo, A.Matrix() );
Ejemplo n.º 7
inline void
LocalInverse( DistMatrix<F,STAR,STAR>& A )
#ifndef RELEASE
    CallStackEntry entry("LocalInverse");
    Inverse( A.Matrix() );
Ejemplo n.º 8
inline void
Conjugate( DistMatrix<T,U,V>& A )
#ifndef RELEASE
    CallStackEntry entry("Conjugate (in-place)");
    Conjugate( A.Matrix() );
Ejemplo n.º 9
inline void
( Orientation orientation, UpperOrLower uplo, DistMatrix<T,STAR,STAR>& A )
#ifndef RELEASE
    CallStackEntry entry("LocalTrdtrmm");
    Trdtrmm( orientation, uplo, A.Matrix() );
Ejemplo n.º 10
inline void LocalGer
( T alpha, const DistMatrix<T,xColDist,xRowDist>& x,
           const DistMatrix<T,yColDist,yRowDist>& y,
                 DistMatrix<T,AColDist,ARowDist>& A )
    DEBUG_ONLY(CallStackEntry cse("LocalGer"))
    // TODO: Add error checking here
    Ger( alpha, x.LockedMatrix(), y.LockedMatrix(), A.Matrix() );
Ejemplo n.º 11
void RowMaxNorms
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms )
    norms.AlignWith( A );
    norms.Resize( A.Height(), 1 );
    RowMaxNorms( A.LockedMatrix(), norms.Matrix() );
    AllReduce( norms, A.RowComm(), mpi::MAX );
Ejemplo n.º 12
inline void
( LeftOrRight side, Orientation orientation,
  const DistMatrix<typename Base<T>::type,U,V>& d, DistMatrix<T,W,Z>& X )
#ifndef RELEASE
    typedef typename Base<T>::type R;

    if( side == LEFT )
        if( U == W && V == STAR && d.ColAlignment() == X.ColAlignment() )
            DiagonalScale( LEFT, orientation, d.LockedMatrix(), X.Matrix() );
            DistMatrix<R,W,STAR> d_W_STAR( X.Grid() );
            d_W_STAR = d;
            ( LEFT, orientation, d_W_STAR.LockedMatrix(), X.Matrix() );
        if( U == Z && V == STAR && d.ColAlignment() == X.RowAlignment() )
            DiagonalScale( RIGHT, orientation, d.LockedMatrix(), X.Matrix() );
            DistMatrix<R,Z,STAR> d_Z_STAR( X.Grid() );
            d_Z_STAR = d;
            ( RIGHT, orientation, d_Z_STAR.LockedMatrix(), X.Matrix() );
#ifndef RELEASE
Ejemplo n.º 13
inline void
LocalHPDInverse( UpperOrLower uplo, DistMatrix<F,STAR,STAR>& A )
#ifndef RELEASE
    HPDInverse( uplo, A.Matrix() );
#ifndef RELEASE
Ejemplo n.º 14
inline void
LocalInverse( DistMatrix<F,STAR,STAR>& A )
#ifndef RELEASE
    Inverse( A.Matrix() );
#ifndef RELEASE
Ejemplo n.º 15
inline void LocalGer
( T alpha, const DistMatrix<T,xColDist,xRowDist>& x,
  const DistMatrix<T,yColDist,yRowDist>& y,
  DistMatrix<T,AColDist,ARowDist>& A )
#ifndef RELEASE
    CallStackEntry entry("LocalGer");
    // TODO: Add error checking here
    Ger( alpha, x.LockedMatrix(), y.LockedMatrix(), A.Matrix() );
Ejemplo n.º 16
inline void
Zero( DistMatrix<T,U,V>& A )
#ifndef RELEASE
    Zero( A.Matrix() );
#ifndef RELEASE
Ejemplo n.º 17
void AllGather
( const DistMatrix<T,        U,           V   >& A,
        DistMatrix<T,Collect<U>(),Collect<V>()>& B )
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    if( A.Participating() )
        if( A.DistSize() == 1 )
            Copy( A.LockedMatrix(), B.Matrix() );
            const Int colStride = A.ColStride();
            const Int rowStride = A.RowStride();
            const Int distStride = colStride*rowStride;
            const Int maxLocalHeight = MaxLength(height,colStride);
            const Int maxLocalWidth = MaxLength(width,rowStride);
            const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
            vector<T> buf;
            FastResize( buf, (distStride+1)*portionSize );
            T* sendBuf = &buf[0];
            T* recvBuf = &buf[portionSize];

            // Pack
            ( A.LocalHeight(), A.LocalWidth(),
              A.LockedBuffer(), 1, A.LDim(),
              sendBuf,          1, A.LocalHeight() );

            // Communicate
            ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() );

            // Unpack
            ( height, width,
              A.ColAlign(), colStride,
              A.RowAlign(), rowStride,
              recvBuf, portionSize,
              B.Buffer(), B.LDim() );
    if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF )
        El::Broadcast( B, A.CrossComm(), A.Root() );
Ejemplo n.º 18
void StackedGeometricColumnScaling
( const DistMatrix<Field,      U,V   >& A,
  const DistMatrix<Field,      U,V   >& B,
        DistMatrix<Base<Field>,V,STAR>& geomScaling )
    // NOTE: Assuming A.ColComm() == B.ColComm() and that the row alignments
    //       are equal
    typedef Base<Field> Real;

    DistMatrix<Real,V,STAR> maxScalingA(A.Grid()),
    ColumnMaxNorms( A, maxScalingA );
    ColumnMaxNorms( B, maxScalingB );

    const Int mLocalA = A.LocalHeight();
    const Int mLocalB = B.LocalHeight();
    const Int nLocal = A.LocalWidth();
    geomScaling.AlignWith( maxScalingA );
    geomScaling.Resize( A.Width(), 1 );
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.LockedMatrix();
    auto& geomScalingLoc = geomScaling.Matrix();
    auto& maxScalingALoc = maxScalingA.Matrix();
    auto& maxScalingBLoc = maxScalingB.Matrix();
    for( Int jLoc=0; jLoc<nLocal; ++jLoc )
        Real minAbs = Max(maxScalingALoc(jLoc),maxScalingBLoc(jLoc));
        for( Int iLoc=0; iLoc<mLocalA; ++iLoc )
            const Real absVal = Abs(ALoc(iLoc,jLoc));
            if( absVal > 0 && absVal < minAbs )
                minAbs = Min(minAbs,absVal);
        for( Int iLoc=0; iLoc<mLocalB; ++iLoc )
            const Real absVal = Abs(BLoc(iLoc,jLoc));
            if( absVal > 0 && absVal < minAbs )
                minAbs = Min(minAbs,absVal);
        geomScalingLoc(jLoc) = minAbs;
    mpi::AllReduce( geomScaling.Buffer(), nLocal, mpi::MIN, A.ColComm() );

    for( Int jLoc=0; jLoc<nLocal; ++jLoc )
        const Real maxAbsA = maxScalingALoc(jLoc);
        const Real maxAbsB = maxScalingBLoc(jLoc);
        const Real maxAbs = Max(maxAbsA,maxAbsB);
        const Real minAbs = geomScalingLoc(jLoc);
        geomScalingLoc(jLoc) = Sqrt(minAbs*maxAbs);
Ejemplo n.º 19
void RowTwoNorms
( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms )
    norms.AlignWith( A );
    norms.Resize( A.Height(), 1 );
    if( A.Width() == 0 )
        Zero( norms );
    RowTwoNormsHelper( A.LockedMatrix(), norms.Matrix(), A.RowComm() );
Ejemplo n.º 20
inline void
( LeftOrRight side, UpperOrLower uplo,
  Orientation orientation, UnitOrNonUnit diag,
  T alpha, const DistMatrix<T,STAR,STAR>& A,
                 DistMatrix<T,BColDist,BRowDist>& B )
#ifndef RELEASE
    CallStackEntry entry("LocalTrmm");
    if( (side == LEFT && BColDist != STAR) ||
        (side == RIGHT && BRowDist != STAR) )
        ("Distribution of RHS must conform with that of triangle");
    ( side, uplo, orientation, diag, alpha, A.LockedMatrix(), B.Matrix() );
Ejemplo n.º 21
void GeometricColumnScaling
( const DistMatrix<Field,      U,V   >& A,
        DistMatrix<Base<Field>,V,STAR>& geomScaling )
    typedef Base<Field> Real;
    DistMatrix<Real,V,STAR> maxScaling(A.Grid());
    ColumnMaxNorms( A, maxScaling );
    ColumnMinAbsNonzero( A, maxScaling, geomScaling );
    const Int nLocal = A.LocalWidth();
    auto& maxScalingLoc = maxScaling.Matrix();
    auto& geomScalingLoc = geomScaling.Matrix();
    for( Int jLoc=0; jLoc<nLocal; ++jLoc )
        const Real maxAbs = maxScalingLoc(jLoc);
        const Real minAbs = geomScalingLoc(jLoc);
        geomScalingLoc(jLoc) = Sqrt(minAbs*maxAbs);
Ejemplo n.º 22
void GeometricRowScaling
( const DistMatrix<Field,      U,V   >& A,
        DistMatrix<Base<Field>,U,STAR>& geomScaling )
    typedef Base<Field> Real;
    DistMatrix<Real,U,STAR> maxScaling(A.Grid());
    RowMaxNorms( A, maxScaling );
    RowMinAbsNonzero( A, maxScaling, geomScaling );
    const Int mLocal = A.LocalHeight();
    auto& maxScalingLoc = maxScaling.Matrix();
    auto& geomScalingLoc = geomScaling.Matrix();
    for( Int iLoc=0; iLoc<mLocal; ++iLoc )
        const Real maxAbs = maxScalingLoc(iLoc);
        const Real minAbs = geomScalingLoc(iLoc);
        geomScalingLoc(iLoc) = Sqrt(minAbs*maxAbs);
Ejemplo n.º 23
void IndexDependentMap
( const DistMatrix<S,U,V,wrap>& A,
        DistMatrix<T,U,V,wrap>& B,
  function<T(Int,Int,const S&)> func )
    const Int mLoc = A.LocalHeight();
    const Int nLoc = A.LocalWidth();
    B.AlignWith( A.DistData() );
    B.Resize( A.Height(), A.Width() );
    auto& ALoc = A.LockedMatrix();
    auto& BLoc = B.Matrix();
    for( Int jLoc=0; jLoc<nLoc; ++jLoc )
        const Int j = A.GlobalCol(jLoc);
        for( Int iLoc=0; iLoc<mLoc; ++iLoc )
            const Int i = A.GlobalRow(iLoc);
            BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc));
Ejemplo n.º 24
inline void
MakeIdentity( DistMatrix<T,U,V>& I )
    DEBUG_ONLY(CallStackEntry cse("MakeIdentity"))
    Zero( I.Matrix() );

    const Int localHeight = I.LocalHeight();
    const Int localWidth = I.LocalWidth();
    const Int colShift = I.ColShift();
    const Int rowShift = I.RowShift();
    const Int colStride = I.ColStride();
    const Int rowStride = I.RowStride();
    for( Int jLoc=0; jLoc<localWidth; ++jLoc )
        const Int j = rowShift + jLoc*rowStride;
        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
            const Int i = colShift + iLoc*colStride;
            if( i == j )
                I.SetLocal( iLoc, jLoc, T(1) );
Ejemplo n.º 25
void SweepHelper
( DistMatrix<Real,MC,MR,BLOCK>& H,
  DistMatrix<Complex<Real>,STAR,STAR>& shifts,
  DistMatrix<Real,MC,MR,BLOCK>& Z,
  const HessenbergSchurCtrl& ctrl )
    const Int n = H.Height();
    const Int winBeg = ( ctrl.winBeg==END ? n : ctrl.winBeg );
    const Int winEnd = ( ctrl.winEnd==END ? n : ctrl.winEnd );
    const Int winSize = winEnd-winBeg;
    auto ctrlMod( ctrl );
    ctrlMod.winBeg = winBeg;
    ctrlMod.winEnd = winEnd;

    const Int numShifts = shifts.Height();
    multibulge::PairShifts( shifts.Matrix() );

    const Int remainder = (numShifts % 2);
    if( remainder == 1 )
        ("Remainder shifts are not yet supported for distributed sweeps");
    auto shiftsEven = shifts(IR(remainder,END),ALL);

    if( winSize >= 4 )
        multibulge::Sweep( H, shiftsEven, Z, ctrlMod );
        // Sweep in pairs
        LogicError("Distributed pair sweeps are not yet supported");
Ejemplo n.º 26
inline void ForwardMany
( const DistMatrix<F,VC,STAR>& L, DistMatrix<F,VC,STAR>& X )
    const Grid& g = L.Grid();
    if( g.Size() == 1 )
        FrontLowerForwardSolve( L.LockedMatrix(), X.Matrix() );

    // Matrix views
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);

    DistMatrix<F,VC,STAR> XT(g),  X0(g),
                          XB(g),  X1(g),

    // Temporary distributions
    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> X1_STAR_STAR(g);

    ( L, LTL, LTR,
         LBL, LBR, 0 );
    ( X, XT,
         XB, 0 );
    while( LTL.Width() < L.Width() )
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        ( XT,  X0,
         /**/ /**/
          XB,  X2, L11.Height() );

        L11_STAR_STAR = L11; // L11[* ,* ] <- L11[VC,* ]
        X1_STAR_STAR = X1;   // X1[* ,* ] <- X1[VC,* ]

        // X1[* ,* ] := (L11[* ,* ])^-1 X1[* ,* ]
          F(1), L11_STAR_STAR, X1_STAR_STAR, true );
        X1 = X1_STAR_STAR;

        // X2[VC,* ] -= L21[VC,* ] X1[* ,* ]
        LocalGemm( NORMAL, NORMAL, F(-1), L21, X1_STAR_STAR, F(1), X2 );

        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        ( XT,  X0,
         /**/ /**/
          XB,  X2 );
Ejemplo n.º 27
void ColAllToAllPromote
( const DistMatrix<T,        U,                     V   >& A,
        DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& B )
    AssertSameGrids( A, B );

    const Int height = A.Height();
    const Int width = A.Width();
    ( Mod(A.ColAlign(),B.ColStride()), height, width, false, false );
    if( !B.Participating() )

    const Int colStride = A.ColStride();
    const Int colStridePart = A.PartialColStride();
    const Int colStrideUnion = A.PartialUnionColStride();
    const Int colRankPart = A.PartialColRank();
    const Int colDiff = B.ColAlign() - Mod(A.ColAlign(),colStridePart);

    const Int maxLocalHeight = MaxLength(height,colStride);
    const Int maxLocalWidth = MaxLength(width,colStrideUnion);
    const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );

    if( colDiff == 0 )
        if( A.PartialUnionColStride() == 1 )
            Copy( A.LockedMatrix(), B.Matrix() );
            vector<T> buffer;
            FastResize( buffer, 2*colStrideUnion*portionSize );
            T* firstBuf  = &buffer[0];
            T* secondBuf = &buffer[colStrideUnion*portionSize];

            // Pack            
            ( A.LocalHeight(), width,
              B.RowAlign(), colStrideUnion,
              A.LockedBuffer(), A.LDim(),
              firstBuf,         portionSize );

            // Simultaneously Gather in columns and Scatter in rows
            ( firstBuf,  portionSize,
              secondBuf, portionSize, A.PartialUnionColComm() );

            // Unpack
            ( height, B.LocalWidth(),
              A.ColAlign(), colStride,
              colStrideUnion, colStridePart, colRankPart,
              secondBuf,  portionSize,
              B.Buffer(), B.LDim() );
        if( A.Grid().Rank() == 0 )
            cerr << "Unaligned PartialColAllToAllPromote" << endl;
        const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart );
        const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart );

        vector<T> buffer;
        FastResize( buffer, 2*colStrideUnion*portionSize );
        T* firstBuf  = &buffer[0];
        T* secondBuf = &buffer[colStrideUnion*portionSize];

        // Pack
        ( A.LocalHeight(), width,
          B.RowAlign(), colStrideUnion,
          A.LockedBuffer(), A.LDim(),
          secondBuf,        portionSize );

        // Realign the input
        ( secondBuf, colStrideUnion*portionSize, sendColRankPart,
          firstBuf,  colStrideUnion*portionSize, recvColRankPart,
          A.PartialColComm() );

        // Simultaneously Scatter in columns and Gather in rows
        ( firstBuf,  portionSize,
          secondBuf, portionSize, A.PartialUnionColComm() );

        // Unpack
        ( height, B.LocalWidth(),
          A.ColAlign(), colStride,
          colStrideUnion, colStridePart, recvColRankPart,
          secondBuf,  portionSize,
          B.Buffer(), B.LDim() );
Ejemplo n.º 28
void TransformRows
( const Matrix<F>& Z,
        DistMatrix<F,MC,MR,BLOCK>& H )
    const Int height = H.Height();
    const Grid& grid = H.Grid();

    const Int blockHeight = H.BlockHeight();
    const Int firstBlockHeight = blockHeight - H.ColCut();
    if( height <= firstBlockHeight || grid.Height() == 1 )
        if( grid.Row() == H.RowOwner(0) )
            // This process row can locally update its portion of H
            Matrix<F> HLocCopy( H.Matrix() );
            Gemm( ADJOINT, NORMAL, F(1), Z, HLocCopy, H.Matrix() );
    else if( height <= firstBlockHeight + blockHeight )
        const bool firstRow = H.RowOwner( 0 );
        const bool secondRow = H.RowOwner( firstBlockHeight );
        if( grid.Row() == firstRow )
            // Replace H with 
            //   | ZLeft, ZRight |' | HTop    |,
            //                      | HBottom |
            // where HTop is owned by this process row and HBottom by the next.
            auto ZLeft = Z( ALL, IR(0,firstBlockHeight) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( height, H.LocalWidth() );
            auto HTop = HCombine( IR(0,firstBlockHeight), ALL );
            auto HBottom = HCombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            HTop = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HTop, HBottom, H.ColComm(), secondRow, secondRow );
            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), ZLeft, HCombine, H.Matrix() );
        else if( grid.Row() == secondRow )
            // Replace H with 
            //   | ZLeft, ZRight |' | HTop    |,
            //                      | HBottom |
            // where HTop is owned by the previous process row and HBottom by
            // this one.
            auto ZRight = Z( ALL, IR(firstBlockHeight,END) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( height, H.LocalWidth() );
            auto HTop = HCombine( IR(0,firstBlockHeight), ALL );
            auto HBottom = HCombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            HBottom = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HBottom, HTop, H.ColComm(), firstRow, firstRow );
            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), ZRight, HCombine, H.Matrix() );
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,STAR,MR,BLOCK> H_STAR_MR( H );
        Matrix<F> HLocCopy( H_STAR_MR.Matrix() );
        Gemm( ADJOINT, NORMAL, F(1), Z, HLocCopy, H_STAR_MR.Matrix() );
        H = H_STAR_MR;
Ejemplo n.º 29
void TransformColumns
( const Matrix<F>& Z,
        DistMatrix<F,MC,MR,BLOCK>& H )
    const Int width = H.Width();
    const Grid& grid = H.Grid();

    const Int blockWidth = H.BlockWidth();
    const Int firstBlockWidth = blockWidth - H.RowCut();
    if( width <= firstBlockWidth || grid.Width() == 1 )
        if( grid.Col() == H.ColOwner(0) )
            // This process row can locally update its portion of H
            Matrix<F> HLocCopy( H.Matrix() );
            Gemm( NORMAL, NORMAL, F(1), HLocCopy, Z, H.Matrix() );
    else if( width <= firstBlockWidth + blockWidth )
        const bool firstCol = H.ColOwner( 0 );
        const bool secondCol = H.ColOwner( firstBlockWidth );
        if( grid.Col() == firstCol )
            // Replace H with 
            //   | HLeft, HRight | | ZLeft, ZRight |,
            // where HLeft is owned by this process column and HRight by the
            // next.
            auto ZLeft = Z( ALL, IR(0,firstBlockWidth) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( H.LocalHeight(), width );
            auto HLeft = HCombine( ALL, IR(0,firstBlockWidth) );
            auto HRight = HCombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            HLeft = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HLeft, HRight, H.RowComm(), secondCol, secondCol );
            // Form our portion of the result
            Gemm( NORMAL, NORMAL, F(1), HCombine, ZLeft, H.Matrix() );
        else if( grid.Col() == secondCol )
            // Replace H with 
            //   | HLeft, HRight | | ZLeft, ZRight |,
            // where HLeft is owned by the previous process column and HRight
            // by this one.
            auto ZRight = Z( ALL, IR(firstBlockWidth,END) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( H.LocalHeight(), width );
            auto HLeft = HCombine( ALL, IR(0,firstBlockWidth) );
            auto HRight = HCombine( ALL, IR(firstBlockWidth,END) );

            // Copy our portion into the combined matrix
            HRight = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HRight, HLeft, H.RowComm(), firstCol, firstCol );
            // Form our portion of the result
            Gemm( NORMAL, NORMAL, F(1), HCombine, ZRight, H.Matrix() );
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,MC,STAR,BLOCK> H_MC_STAR( H );
        Matrix<F> HLocCopy( H_MC_STAR.Matrix() );
        Gemm( NORMAL, NORMAL, F(1), HLocCopy, Z, H_MC_STAR.Matrix() );
        H = H_MC_STAR;
Ejemplo n.º 30
void Gather
( const BlockMatrix<T>& A,
        DistMatrix<T,CIRC,CIRC,BLOCK>& B )
    DEBUG_ONLY(CSE cse("copy::Gather"))
    AssertSameGrids( A, B );
    if( A.DistSize() == 1 && A.CrossSize() == 1 )
        B.Resize( A.Height(), A.Width() );
        if( B.CrossRank() == B.Root() )
            Copy( A.LockedMatrix(), B.Matrix() );

    const Int height = A.Height();
    const Int width = A.Width();
    B.SetGrid( A.Grid() );
    B.Resize( height, width );

    // Gather the colShifts and rowShifts
    // ==================================
    Int myShifts[2];
    myShifts[0] = A.ColShift();
    myShifts[1] = A.RowShift();
    vector<Int> shifts;
    const Int crossSize = B.CrossSize();
    if( B.CrossRank() == B.Root() )
        shifts.resize( 2*crossSize );
    mpi::Gather( myShifts, 2, shifts.data(), 2, B.Root(), B.CrossComm() );

    // Gather the payload data
    // =======================
    const bool irrelevant = ( A.RedundantRank()!=0 || A.CrossRank()!=A.Root() );
    int totalSend = ( irrelevant ? 0 : A.LocalHeight()*A.LocalWidth() );
    vector<int> recvCounts, recvOffsets;
    if( B.CrossRank() == B.Root() )
        recvCounts.resize( crossSize );
    mpi::Gather( &totalSend, 1, recvCounts.data(), 1, B.Root(), B.CrossComm() );
    int totalRecv = Scan( recvCounts, recvOffsets );
    //vector<T> sendBuf(totalSend), recvBuf(totalRecv);
    vector<T> sendBuf, recvBuf;
    sendBuf.reserve( totalSend );
    recvBuf.reserve( totalRecv );
    if( !irrelevant )
        ( A.LocalHeight(), A.LocalWidth(),
          A.LockedBuffer(), 1, A.LDim(),
          sendBuf.data(),   1, A.LocalHeight() );
    ( sendBuf.data(), totalSend,
      recvBuf.data(), recvCounts.data(), recvOffsets.data(), 
      B.Root(), B.CrossComm() );

    // Unpack
    // ======
    const Int mb = A.BlockHeight();
    const Int nb = A.BlockWidth();
    const Int colCut = A.ColCut();
    const Int rowCut = A.RowCut();
    if( B.Root() == B.CrossRank() )
        for( Int q=0; q<crossSize; ++q )
            if( recvCounts[q] == 0 )
            const Int colShift = shifts[2*q+0];
            const Int rowShift = shifts[2*q+1];
            const Int colStride = A.ColStride();
            const Int rowStride = A.RowStride();
            const Int localHeight =
              BlockedLength( height, colShift, mb, colCut, colStride );
            const Int localWidth = 
              BlockedLength( width, rowShift, nb, rowCut, rowStride );
            const T* data = &recvBuf[recvOffsets[q]];
            for( Int jLoc=0; jLoc<localWidth; ++jLoc )
                const Int jBefore = rowShift*nb - rowCut;
                const Int jLocAdj = ( rowShift==0 ? jLoc+rowCut : jLoc );
                const Int numFilledLocalBlocks = jLocAdj / nb;
                const Int jMid = numFilledLocalBlocks*nb*rowStride;
                const Int jPost = jLocAdj-numFilledLocalBlocks*nb;
                const Int j = jBefore + jMid + jPost;
                const T* sourceCol = &data[jLoc*localHeight];
                for( Int iLoc=0; iLoc<localHeight; ++iLoc )
                    const Int iBefore = colShift*mb - colCut;
                    const Int iLocAdj = (colShift==0 ? iLoc+colCut : iLoc);
                    const Int numFilledLocalBlocks = iLocAdj / mb;
                    const Int iMid = numFilledLocalBlocks*mb*colStride;
                    const Int iPost = iLocAdj-numFilledLocalBlocks*mb;
                    const Int i = iBefore + iMid + iPost;