示例#1
0
void GatherSubdiagonal
( const DistMatrix<F,MC,MR,BLOCK>& H,
  const IR& winInd,
        DistMatrix<Base<F>,STAR,STAR>& hSubWin )
{
    DEBUG_CSE
    const Int winSize = winInd.end - winInd.beg;
    const Int blockSize = H.BlockHeight();
    const Grid& grid = H.Grid();
    const auto& HLoc = H.LockedMatrix();
    DEBUG_ONLY(
      if( H.BlockHeight() != H.BlockWidth() )
          LogicError("Assumed square distribution blocks");
      if( H.ColCut() != H.RowCut() )
          LogicError("Assumed symmetric cuts");
      if( blockSize < 2 )
          LogicError("Assumed blocks of size at least two");
    )
示例#2
0
void TransformRows
( const Matrix<F>& Z,
        DistMatrix<F,MC,MR,BLOCK>& H )
{
    DEBUG_CSE
    const Int height = H.Height();
    const Grid& grid = H.Grid();

    const Int blockHeight = H.BlockHeight();
    const Int firstBlockHeight = blockHeight - H.ColCut();
    if( height <= firstBlockHeight || grid.Height() == 1 )
    {
        if( grid.Row() == H.RowOwner(0) )
        {
            // This process row can locally update its portion of H
            Matrix<F> HLocCopy( H.Matrix() );
            Gemm( ADJOINT, NORMAL, F(1), Z, HLocCopy, H.Matrix() );
        }
    }
    else if( height <= firstBlockHeight + blockHeight )
    {
        const bool firstRow = H.RowOwner( 0 );
        const bool secondRow = H.RowOwner( firstBlockHeight );
        if( grid.Row() == firstRow )
        {
            // 
            // Replace H with 
            //
            //   | ZLeft, ZRight |' | HTop    |,
            //                      | HBottom |
            //
            // where HTop is owned by this process row and HBottom by the next.
            //
            auto ZLeft = Z( ALL, IR(0,firstBlockHeight) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( height, H.LocalWidth() );
            auto HTop = HCombine( IR(0,firstBlockHeight), ALL );
            auto HBottom = HCombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            HTop = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HTop, HBottom, H.ColComm(), secondRow, secondRow );
            
            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), ZLeft, HCombine, H.Matrix() );
        }
        else if( grid.Row() == secondRow )
        {
            // 
            // Replace H with 
            //
            //   | ZLeft, ZRight |' | HTop    |,
            //                      | HBottom |
            //
            // where HTop is owned by the previous process row and HBottom by
            // this one.
            //
            auto ZRight = Z( ALL, IR(firstBlockHeight,END) );

            // Partition space for the combined matrix
            Matrix<F> HCombine( height, H.LocalWidth() );
            auto HTop = HCombine( IR(0,firstBlockHeight), ALL );
            auto HBottom = HCombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            HBottom = H.LockedMatrix();

            // Exchange the data
            El::SendRecv( HBottom, HTop, H.ColComm(), firstRow, firstRow );
            
            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), ZRight, HCombine, H.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,STAR,MR,BLOCK> H_STAR_MR( H );
        Matrix<F> HLocCopy( H_STAR_MR.Matrix() );
        Gemm( ADJOINT, NORMAL, F(1), Z, HLocCopy, H_STAR_MR.Matrix() );
        H = H_STAR_MR;
    }
}
示例#3
0
HessenbergSchurInfo
MultiBulge
( DistMatrix<F,MC,MR,BLOCK>& H,
  DistMatrix<Complex<Base<F>>,STAR,STAR>& w,
  DistMatrix<F,MC,MR,BLOCK>& Z,
  const HessenbergSchurCtrl& ctrl )
{
    DEBUG_CSE 
    typedef Base<F> Real;
    const Real zero(0);
    const Grid& grid = H.Grid();

    const Int n = H.Height();
    Int winBeg = ( ctrl.winBeg==END ? n : ctrl.winBeg );
    Int winEnd = ( ctrl.winEnd==END ? n : ctrl.winEnd );
    const Int winSize = winEnd - winBeg;
    const Int blockSize = H.BlockHeight();

    // TODO(poulson): Implement a more reasonable/configurable means of deciding
    // when to call the sequential implementation
    Int minMultiBulgeSize = Max( ctrl.minMultiBulgeSize, 2*blockSize );
    // This maximum is meant to account for parallel overheads and needs to be
    // more principled (and perhaps based upon the number of workers and the 
    // cluster characteristics)
    // TODO(poulson): Re-enable this
    //minMultiBulgeSize = Max( minMultiBulgeSize, 500 );

    HessenbergSchurInfo info;

    w.Resize( n, 1 );
    if( winSize < minMultiBulgeSize )
    {
        return multibulge::RedundantlyHandleWindow( H, w, Z, ctrl );
    }

    auto ctrlShifts( ctrl );
    ctrlShifts.winBeg = 0;
    ctrlShifts.winEnd = END;
    ctrlShifts.fullTriangle = false;

    Int numIterSinceDeflation = 0;
    const Int numStaleIterBeforeExceptional = 5;
    // Cf. LAPACK's DLAQR0 for this choice
    const Int maxIter =
      Max(30,2*numStaleIterBeforeExceptional) * Max(10,winSize);

    Int iterBegLast=-1, winEndLast=-1;
    DistMatrix<F,STAR,STAR> hMainWin(grid), hSuperWin(grid);
    DistMatrix<Real,STAR,STAR> hSubWin(grid);
    while( winBeg < winEnd )
    {
        if( info.numIterations >= maxIter )
        {
            if( ctrl.demandConverged )
                RuntimeError("MultiBulge QR iteration did not converge");
            else
                break;
        }
        auto winInd = IR(winBeg,winEnd);

        // Detect an irreducible Hessenberg window, [iterBeg,winEnd)
        // ---------------------------------------------------------
        // TODO(poulson): Have the interblock chase from the previous sweep
        // collect the main and sub diagonal of H along the diagonal workers 
        // and then broadcast across the "cross" communicator.
        util::GatherTridiagonal( H, winInd, hMainWin, hSubWin, hSuperWin );
        Output("winBeg=",winBeg,", winEnd=",winEnd);
        Print( H, "H" );
        Print( hMainWin, "hMainWin" );
        Print( hSubWin, "hSubWin" );
        Print( hSuperWin, "hSuperWin" );

        const Int iterOffset =
          DetectSmallSubdiagonal
          ( hMainWin.Matrix(), hSubWin.Matrix(), hSuperWin.Matrix() );
        const Int iterBeg = winBeg + iterOffset;
        const Int iterWinSize = winEnd-iterBeg;
        if( iterOffset > 0 )
        {
            H.Set( iterBeg, iterBeg-1, zero );
            hSubWin.Set( iterOffset-1, 0, zero );
        }
        if( iterWinSize == 1 )
        {
            if( ctrl.progress )
                Output("One-by-one window at ",iterBeg);
            w.Set( iterBeg, 0, hMainWin.GetLocal(iterOffset,0) );

            winEnd = iterBeg;
            numIterSinceDeflation = 0;
            continue;
        }
        else if( iterWinSize == 2 )
        {
            if( ctrl.progress )
                Output("Two-by-two window at ",iterBeg);
            const F eta00 = hMainWin.GetLocal(iterOffset,0);
            const F eta01 = hSuperWin.GetLocal(iterOffset,0);
            const Real eta10 = hSubWin.GetLocal(iterOffset,0);
            const F eta11 = hMainWin.GetLocal(iterOffset+1,0);
            multibulge::TwoByTwo
            ( H, eta00, eta01, eta10, eta11, w, Z, iterBeg, ctrl );

            winEnd = iterBeg;
            numIterSinceDeflation = 0;
            continue;
        }
        else if( iterWinSize < minMultiBulgeSize )
        {
            // The window is small enough to switch to the simple scheme
            if( ctrl.progress )
                Output("Redundantly handling window [",iterBeg,",",winEnd,"]");
            auto ctrlIter( ctrl );
            ctrlIter.winBeg = iterBeg;
            ctrlIter.winEnd = winEnd;
            auto iterInfo =
              multibulge::RedundantlyHandleWindow( H, w, Z, ctrlIter );
            info.numIterations += iterInfo.numIterations;
             
            winEnd = iterBeg;
            numIterSinceDeflation = 0;
            continue;
        }

        const Int numShiftsRec = ctrl.numShifts( n, iterWinSize );
        if( ctrl.progress )
        {
            Output("Iter. ",info.numIterations,": ");
            Output("  window is [",iterBeg,",",winEnd,")");
            Output("  recommending ",numShiftsRec," shifts");
        }

        // NOTE(poulson): In the case where exceptional shifts are used, the
        // main and subdiagonals of H in the window are currently redundantly
        // gathered. It could be worthwhile to pass in hMainWin and hSubWin.
        const Int shiftBeg = multibulge::ComputeShifts
        ( H, w, iterBeg, winBeg, winEnd, numShiftsRec, numIterSinceDeflation,
          numStaleIterBeforeExceptional, ctrlShifts );
        auto shiftInd = IR(shiftBeg,winEnd);
        auto wShifts = w(shiftInd,ALL);

        // Perform a small-bulge sweep
        auto ctrlSweep( ctrl );
        ctrlSweep.winBeg = iterBeg;
        ctrlSweep.winEnd = winEnd;
        multibulge::Sweep( H, wShifts, Z, ctrlSweep );

        ++info.numIterations;
        if( iterBeg == iterBegLast && winEnd == winEndLast )
            ++numIterSinceDeflation;
        iterBegLast = iterBeg;
        winEndLast = winEnd;
    }
    info.numUnconverged = winEnd-winBeg;
    return info;
}
示例#4
0
void TransformRows
( const Matrix<F>& V,
        DistMatrix<F,MC,MR,BLOCK>& A )
{
    DEBUG_CSE
    const Int height = A.Height();
    const Grid& grid = A.Grid();

    const Int blockHeight = A.BlockHeight();
    const Int firstBlockHeight = blockHeight - A.ColCut();
    if( height <= firstBlockHeight || grid.Height() == 1 )
    {
        if( grid.Row() == A.RowOwner(0) )
        {
            // This process row can locally update its portion of A
            TransformRows( V, A.Matrix() );
        }
    }
    else if( height <= firstBlockHeight + blockHeight )
    {
        const int firstRow = A.RowOwner( 0 );
        const int secondRow = A.RowOwner( firstBlockHeight );
        if( grid.Row() == firstRow )
        {
            //
            // Replace A with
            //
            //   | VLeft, VRight |' | ATop    |,
            //                      | ABottom |
            //
            // where ATop is owned by this process row and ABottom by the next.
            //
            auto VLeft = V( ALL, IR(0,firstBlockHeight) );

            // Partition space for the combined matrix
            Matrix<F> ACombine( height, A.LocalWidth() );
            auto ATop = ACombine( IR(0,firstBlockHeight), ALL );
            auto ABottom = ACombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            ATop = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ATop, ABottom, A.ColComm(), secondRow, secondRow );

            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), VLeft, ACombine, A.Matrix() );
        }
        else if( grid.Row() == secondRow )
        {
            //
            // Replace A with
            //
            //   | VLeft, VRight |' | ATop    |,
            //                      | ABottom |
            //
            // where ATop is owned by the previous process row and ABottom by
            // this one.
            //
            auto VRight = V( ALL, IR(firstBlockHeight,END) );

            // Partition space for the combined matrix
            Matrix<F> ACombine( height, A.LocalWidth() );
            auto ATop = ACombine( IR(0,firstBlockHeight), ALL );
            auto ABottom = ACombine( IR(firstBlockHeight,END), ALL );

            // Copy our portion into the combined matrix
            ABottom = A.LockedMatrix();

            // Exchange the data
            El::SendRecv( ABottom, ATop, A.ColComm(), firstRow, firstRow );

            // Form our portion of the result
            Gemm( ADJOINT, NORMAL, F(1), VRight, ACombine, A.Matrix() );
        }
    }
    else
    {
        // Fall back to the entire process column interacting.
        // TODO(poulson): Only form the subset of the result that we need.
        DistMatrix<F,STAR,MR,BLOCK> A_STAR_MR( A );
        Matrix<F> ALocCopy( A_STAR_MR.Matrix() );
        Gemm( ADJOINT, NORMAL, F(1), V, ALocCopy, A_STAR_MR.Matrix() );
        A = A_STAR_MR;
    }
}