Example #1
0
inline void
MakeForsythe( DistMatrix<T,U,V>& J, T alpha, T lambda )
{
    DEBUG_ONLY(CallStackEntry cse("MakeForsythe"))
    MakeJordan( J, lambda );
    const Int m = J.Height();
    const Int n = J.Width();
    if( m > 0 && n > 0 )
        J.Set( m-1, 0, alpha );
}
Example #2
0
int 
main( int argc, char* argv[] )
{
    Initialize( argc, argv );

    try
    {
        const Int n = Input("--size","size of matrix",10);
        const bool display = Input("--display","display matrix?",true);
        const bool print = Input("--print","print matrix?",false);
        ProcessInput();
        PrintInputReport();

        auto J = Legendre<double>( DefaultGrid(), n );
        if( display )
        {
            Display( J, "Jacobi matrix for Legendre polynomials" );
#ifdef HAVE_QT5
            Spy( J, "Spy plot for Jacobi matrix" );
#endif
        }
        if( print )
            Print( J, "Jacobi matrix for Legendre polynomials" );

#ifdef HAVE_PMRRR
        // We will compute Gaussian quadrature points and weights over [-1,+1]
        // using the eigenvalue decomposition of the Jacobi matrix for the 
        // Legendre polynomials.
        DistMatrix<double,VR,  STAR> points;
        DistMatrix<double,STAR,VR  > X;
        HermitianTridiagEig
        ( J.GetDiagonal(), J.GetDiagonal(-1), points, X, ASCENDING );
        if( display )
            Display( points, "Quadrature points" );
        if( print )
            Print( points, "points" );
        auto firstRow = View( X, 0, 0, 1, n );
        DistMatrix<double,STAR,STAR> weights = firstRow;
        for( Int j=0; j<n; ++j )
        {
            const double gamma = weights.Get( 0, j );
            weights.Set( 0, j, 2*gamma*gamma );
        }
        if( display )
            Display( weights, "Quadrature weights" );
        if( print )
            Print( weights, "weights" );
#endif
    }
    catch( std::exception& e ) { ReportException(e); }

    Finalize();
    return 0;
}
Example #3
0
inline void
MakeForsythe( DistMatrix<T,U,V>& J, T alpha, T lambda )
{
#ifndef RELEASE
    CallStackEntry entry("MakeForsythe");
#endif
    MakeJordan( J, lambda );
    const Int m = J.Height();
    const Int n = J.Width();
    if( m > 0 && n > 0 )
        J.Set( m-1, 0, alpha );
}
Example #4
0
HessenbergSchurInfo
MultiBulge
( DistMatrix<F,MC,MR,BLOCK>& H,
  DistMatrix<Complex<Base<F>>,STAR,STAR>& w,
  DistMatrix<F,MC,MR,BLOCK>& Z,
  const HessenbergSchurCtrl& ctrl )
{
    DEBUG_CSE 
    typedef Base<F> Real;
    const Real zero(0);
    const Grid& grid = H.Grid();

    const Int n = H.Height();
    Int winBeg = ( ctrl.winBeg==END ? n : ctrl.winBeg );
    Int winEnd = ( ctrl.winEnd==END ? n : ctrl.winEnd );
    const Int winSize = winEnd - winBeg;
    const Int blockSize = H.BlockHeight();

    // TODO(poulson): Implement a more reasonable/configurable means of deciding
    // when to call the sequential implementation
    Int minMultiBulgeSize = Max( ctrl.minMultiBulgeSize, 2*blockSize );
    // This maximum is meant to account for parallel overheads and needs to be
    // more principled (and perhaps based upon the number of workers and the 
    // cluster characteristics)
    // TODO(poulson): Re-enable this
    //minMultiBulgeSize = Max( minMultiBulgeSize, 500 );

    HessenbergSchurInfo info;

    w.Resize( n, 1 );
    if( winSize < minMultiBulgeSize )
    {
        return multibulge::RedundantlyHandleWindow( H, w, Z, ctrl );
    }

    auto ctrlShifts( ctrl );
    ctrlShifts.winBeg = 0;
    ctrlShifts.winEnd = END;
    ctrlShifts.fullTriangle = false;

    Int numIterSinceDeflation = 0;
    const Int numStaleIterBeforeExceptional = 5;
    // Cf. LAPACK's DLAQR0 for this choice
    const Int maxIter =
      Max(30,2*numStaleIterBeforeExceptional) * Max(10,winSize);

    Int iterBegLast=-1, winEndLast=-1;
    DistMatrix<F,STAR,STAR> hMainWin(grid), hSuperWin(grid);
    DistMatrix<Real,STAR,STAR> hSubWin(grid);
    while( winBeg < winEnd )
    {
        if( info.numIterations >= maxIter )
        {
            if( ctrl.demandConverged )
                RuntimeError("MultiBulge QR iteration did not converge");
            else
                break;
        }
        auto winInd = IR(winBeg,winEnd);

        // Detect an irreducible Hessenberg window, [iterBeg,winEnd)
        // ---------------------------------------------------------
        // TODO(poulson): Have the interblock chase from the previous sweep
        // collect the main and sub diagonal of H along the diagonal workers 
        // and then broadcast across the "cross" communicator.
        util::GatherTridiagonal( H, winInd, hMainWin, hSubWin, hSuperWin );
        Output("winBeg=",winBeg,", winEnd=",winEnd);
        Print( H, "H" );
        Print( hMainWin, "hMainWin" );
        Print( hSubWin, "hSubWin" );
        Print( hSuperWin, "hSuperWin" );

        const Int iterOffset =
          DetectSmallSubdiagonal
          ( hMainWin.Matrix(), hSubWin.Matrix(), hSuperWin.Matrix() );
        const Int iterBeg = winBeg + iterOffset;
        const Int iterWinSize = winEnd-iterBeg;
        if( iterOffset > 0 )
        {
            H.Set( iterBeg, iterBeg-1, zero );
            hSubWin.Set( iterOffset-1, 0, zero );
        }
        if( iterWinSize == 1 )
        {
            if( ctrl.progress )
                Output("One-by-one window at ",iterBeg);
            w.Set( iterBeg, 0, hMainWin.GetLocal(iterOffset,0) );

            winEnd = iterBeg;
            numIterSinceDeflation = 0;
            continue;
        }
        else if( iterWinSize == 2 )
        {
            if( ctrl.progress )
                Output("Two-by-two window at ",iterBeg);
            const F eta00 = hMainWin.GetLocal(iterOffset,0);
            const F eta01 = hSuperWin.GetLocal(iterOffset,0);
            const Real eta10 = hSubWin.GetLocal(iterOffset,0);
            const F eta11 = hMainWin.GetLocal(iterOffset+1,0);
            multibulge::TwoByTwo
            ( H, eta00, eta01, eta10, eta11, w, Z, iterBeg, ctrl );

            winEnd = iterBeg;
            numIterSinceDeflation = 0;
            continue;
        }
        else if( iterWinSize < minMultiBulgeSize )
        {
            // The window is small enough to switch to the simple scheme
            if( ctrl.progress )
                Output("Redundantly handling window [",iterBeg,",",winEnd,"]");
            auto ctrlIter( ctrl );
            ctrlIter.winBeg = iterBeg;
            ctrlIter.winEnd = winEnd;
            auto iterInfo =
              multibulge::RedundantlyHandleWindow( H, w, Z, ctrlIter );
            info.numIterations += iterInfo.numIterations;
             
            winEnd = iterBeg;
            numIterSinceDeflation = 0;
            continue;
        }

        const Int numShiftsRec = ctrl.numShifts( n, iterWinSize );
        if( ctrl.progress )
        {
            Output("Iter. ",info.numIterations,": ");
            Output("  window is [",iterBeg,",",winEnd,")");
            Output("  recommending ",numShiftsRec," shifts");
        }

        // NOTE(poulson): In the case where exceptional shifts are used, the
        // main and subdiagonals of H in the window are currently redundantly
        // gathered. It could be worthwhile to pass in hMainWin and hSubWin.
        const Int shiftBeg = multibulge::ComputeShifts
        ( H, w, iterBeg, winBeg, winEnd, numShiftsRec, numIterSinceDeflation,
          numStaleIterBeforeExceptional, ctrlShifts );
        auto shiftInd = IR(shiftBeg,winEnd);
        auto wShifts = w(shiftInd,ALL);

        // Perform a small-bulge sweep
        auto ctrlSweep( ctrl );
        ctrlSweep.winBeg = iterBeg;
        ctrlSweep.winEnd = winEnd;
        multibulge::Sweep( H, wShifts, Z, ctrlSweep );

        ++info.numIterations;
        if( iterBeg == iterBegLast && winEnd == winEndLast )
            ++numIterSinceDeflation;
        iterBegLast = iterBeg;
        winEndLast = winEnd;
    }
    info.numUnconverged = winEnd-winBeg;
    return info;
}
Example #5
0
inline void UnblockedBidiagU
( DistMatrix<Complex<R> >& A, 
  DistMatrix<Complex<R>,MD,STAR>& tP,
  DistMatrix<Complex<R>,MD,STAR>& tQ )
{
#ifndef RELEASE
    PushCallStack("BidiagU");
#endif
    const int tPHeight = std::max(A.Width()-1,0);
    const int tQHeight = A.Width();
#ifndef RELEASE
    if( A.Grid() != tP.Grid() || tP.Grid() != tQ.Grid() )
        throw std::logic_error("Process grids do not match");
    if( A.Height() < A.Width() )
        throw std::logic_error("A must be at least as tall as it is wide");
    if( tP.Viewing() && (tP.Height() != tPHeight || tP.Width() != 1) )
        throw std::logic_error("tP is the wrong height");
    if( tQ.Viewing() && (tQ.Height() != tQHeight || tQ.Width() != 1) )
        throw std::logic_error("tQ is the wrong height");
#endif
    typedef Complex<R> C;
    const Grid& g = A.Grid();

    if( !tP.Viewing() )
        tP.ResizeTo( tPHeight, 1 );
    if( !tQ.Viewing() )
        tQ.ResizeTo( tQHeight, 1 );

    // Matrix views 
    DistMatrix<C>
        ATL(g), ATR(g),  A00(g), a01(g),     A02(g),  alpha12L(g), a12R(g),
        ABL(g), ABR(g),  a10(g), alpha11(g), a12(g),  aB1(g), AB2(g),
                         A20(g), a21(g),     A22(g);

    // Temporary matrices
    DistMatrix<C,STAR,MR  > a12_STAR_MR(g);
    DistMatrix<C,MC,  STAR> aB1_MC_STAR(g);
    DistMatrix<C,MR,  STAR> x12Adj_MR_STAR(g);
    DistMatrix<C,MC,  STAR> w21_MC_STAR(g);

    PushBlocksizeStack( 1 );
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    while( ATL.Width() < A.Width() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ a01,     A02,
         /*************/ /**********************/
               /**/       a10, /**/ alpha11, a12,
          ABL, /**/ ABR,  A20, /**/ a21,     A22 );

        View2x1
        ( aB1, alpha11,
               a21 );
        View2x1
        ( AB2, a12,
               A22 );

        aB1_MC_STAR.AlignWith( aB1 );
        a12_STAR_MR.AlignWith( a12 );
        x12Adj_MR_STAR.AlignWith( AB2 );
        w21_MC_STAR.AlignWith( A22 );
        Zeros( a12.Width(), 1, x12Adj_MR_STAR );
        Zeros( a21.Height(), 1, w21_MC_STAR );
        const bool thisIsMyRow = ( g.Row() == alpha11.ColAlignment() );
        const bool thisIsMyCol = ( g.Col() == alpha11.RowAlignment() );
        const bool nextIsMyCol = ( g.Col() == a12.RowAlignment() );
        //--------------------------------------------------------------------//

        // Find tauQ, u, and epsilonQ such that
        //     I - conj(tauQ) | 1 | | 1, u^H | | alpha11 | = | epsilonQ |
        //                    | u |            |    a21  |   |    0     |
        const C tauQ = Reflector( alpha11, a21 );
        tQ.Set(A00.Height(),0,tauQ );
        C epsilonQ=0;
        if( thisIsMyCol && thisIsMyRow )
            epsilonQ = alpha11.GetLocal(0,0);

        // Set aB1 = | 1 | and form x12^H := (aB1^H AB2)^H = AB2^H aB1
        //           | u |
        alpha11.Set(0,0,C(1));
        aB1_MC_STAR = aB1;
        internal::LocalGemv
        ( ADJOINT, C(1), AB2, aB1_MC_STAR, C(0), x12Adj_MR_STAR );
        x12Adj_MR_STAR.SumOverCol();

        // Update AB2 := AB2 - conj(tauQ) aB1 x12
        //             = AB2 - conj(tauQ) aB1 aB1^H AB2 
        //             = (I - conj(tauQ) aB1 aB1^H) AB2
        internal::LocalGer( -Conj(tauQ), aB1_MC_STAR, x12Adj_MR_STAR, AB2 );

        // Put epsilonQ back instead of the temporary value, 1
        if( thisIsMyCol && thisIsMyRow )
            alpha11.SetLocal(0,0,epsilonQ);

        if( A22.Width() != 0 )
        {
            // Due to the deficiencies in the BLAS ?gemv routines, this section
            // is easier if we temporarily conjugate a12
            Conjugate( a12 ); 

            // Expose the subvector we seek to zero, a12R
            PartitionRight( a12, alpha12L, a12R );

            // Find tauP, v, and epsilonP such that
            //     I - conj(tauP) | 1 | | 1, v^H | | alpha12L | = | epsilonP |
            //                    | v |            |  a12R^T  |   |    0     |
            const C tauP = Reflector( alpha12L, a12R );
            tP.Set(A00.Height(),0,tauP);
            C epsilonP=0;
            if( nextIsMyCol && thisIsMyRow )
                epsilonP = alpha12L.GetLocal(0,0);

            // Set a12^T = | 1 | and form w21 := A22 a12^T = A22 | 1 |
            //             | v |                                 | v |
            alpha12L.Set(0,0,C(1));
            a12_STAR_MR = a12;
            internal::LocalGemv
            ( NORMAL, C(1), A22, a12_STAR_MR, C(0), w21_MC_STAR );
            w21_MC_STAR.SumOverRow();

            // A22 := A22 - tauP w21 conj(a12)
            //      = A22 - tauP A22 a12^T conj(a12)
            //      = A22 (I - tauP a12^T conj(a12))
            //      = A22 conj(I - conj(tauP) a12^H a12)
            // which compensates for the fact that the reflector was generated
            // on the conjugated a12.
            internal::LocalGer( -tauP, w21_MC_STAR, a12_STAR_MR, A22 );

            // Put epsilonP back instead of the temporary value, 1
            if( nextIsMyCol && thisIsMyRow )
                alpha12L.SetLocal(0,0,epsilonP);

            // Undue the temporary conjugation
            Conjugate( a12 );
        }
        //--------------------------------------------------------------------//
        aB1_MC_STAR.FreeAlignments();
        a12_STAR_MR.FreeAlignments();
        x12Adj_MR_STAR.FreeAlignments();
        w21_MC_STAR.FreeAlignments();

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, a01,     /**/ A02,
               /**/       a10, alpha11, /**/ a12,
         /*************/ /**********************/
          ABL, /**/ ABR,  A20, a21,     /**/ A22 );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}