inline void MakeForsythe( DistMatrix<T,U,V>& J, T alpha, T lambda ) { DEBUG_ONLY(CallStackEntry cse("MakeForsythe")) MakeJordan( J, lambda ); const Int m = J.Height(); const Int n = J.Width(); if( m > 0 && n > 0 ) J.Set( m-1, 0, alpha ); }
int main( int argc, char* argv[] ) { Initialize( argc, argv ); try { const Int n = Input("--size","size of matrix",10); const bool display = Input("--display","display matrix?",true); const bool print = Input("--print","print matrix?",false); ProcessInput(); PrintInputReport(); auto J = Legendre<double>( DefaultGrid(), n ); if( display ) { Display( J, "Jacobi matrix for Legendre polynomials" ); #ifdef HAVE_QT5 Spy( J, "Spy plot for Jacobi matrix" ); #endif } if( print ) Print( J, "Jacobi matrix for Legendre polynomials" ); #ifdef HAVE_PMRRR // We will compute Gaussian quadrature points and weights over [-1,+1] // using the eigenvalue decomposition of the Jacobi matrix for the // Legendre polynomials. DistMatrix<double,VR, STAR> points; DistMatrix<double,STAR,VR > X; HermitianTridiagEig ( J.GetDiagonal(), J.GetDiagonal(-1), points, X, ASCENDING ); if( display ) Display( points, "Quadrature points" ); if( print ) Print( points, "points" ); auto firstRow = View( X, 0, 0, 1, n ); DistMatrix<double,STAR,STAR> weights = firstRow; for( Int j=0; j<n; ++j ) { const double gamma = weights.Get( 0, j ); weights.Set( 0, j, 2*gamma*gamma ); } if( display ) Display( weights, "Quadrature weights" ); if( print ) Print( weights, "weights" ); #endif } catch( std::exception& e ) { ReportException(e); } Finalize(); return 0; }
inline void MakeForsythe( DistMatrix<T,U,V>& J, T alpha, T lambda ) { #ifndef RELEASE CallStackEntry entry("MakeForsythe"); #endif MakeJordan( J, lambda ); const Int m = J.Height(); const Int n = J.Width(); if( m > 0 && n > 0 ) J.Set( m-1, 0, alpha ); }
HessenbergSchurInfo MultiBulge ( DistMatrix<F,MC,MR,BLOCK>& H, DistMatrix<Complex<Base<F>>,STAR,STAR>& w, DistMatrix<F,MC,MR,BLOCK>& Z, const HessenbergSchurCtrl& ctrl ) { DEBUG_CSE typedef Base<F> Real; const Real zero(0); const Grid& grid = H.Grid(); const Int n = H.Height(); Int winBeg = ( ctrl.winBeg==END ? n : ctrl.winBeg ); Int winEnd = ( ctrl.winEnd==END ? n : ctrl.winEnd ); const Int winSize = winEnd - winBeg; const Int blockSize = H.BlockHeight(); // TODO(poulson): Implement a more reasonable/configurable means of deciding // when to call the sequential implementation Int minMultiBulgeSize = Max( ctrl.minMultiBulgeSize, 2*blockSize ); // This maximum is meant to account for parallel overheads and needs to be // more principled (and perhaps based upon the number of workers and the // cluster characteristics) // TODO(poulson): Re-enable this //minMultiBulgeSize = Max( minMultiBulgeSize, 500 ); HessenbergSchurInfo info; w.Resize( n, 1 ); if( winSize < minMultiBulgeSize ) { return multibulge::RedundantlyHandleWindow( H, w, Z, ctrl ); } auto ctrlShifts( ctrl ); ctrlShifts.winBeg = 0; ctrlShifts.winEnd = END; ctrlShifts.fullTriangle = false; Int numIterSinceDeflation = 0; const Int numStaleIterBeforeExceptional = 5; // Cf. LAPACK's DLAQR0 for this choice const Int maxIter = Max(30,2*numStaleIterBeforeExceptional) * Max(10,winSize); Int iterBegLast=-1, winEndLast=-1; DistMatrix<F,STAR,STAR> hMainWin(grid), hSuperWin(grid); DistMatrix<Real,STAR,STAR> hSubWin(grid); while( winBeg < winEnd ) { if( info.numIterations >= maxIter ) { if( ctrl.demandConverged ) RuntimeError("MultiBulge QR iteration did not converge"); else break; } auto winInd = IR(winBeg,winEnd); // Detect an irreducible Hessenberg window, [iterBeg,winEnd) // --------------------------------------------------------- // TODO(poulson): Have the interblock chase from the previous sweep // collect the main and sub diagonal of H along the diagonal workers // and then broadcast across the "cross" communicator. util::GatherTridiagonal( H, winInd, hMainWin, hSubWin, hSuperWin ); Output("winBeg=",winBeg,", winEnd=",winEnd); Print( H, "H" ); Print( hMainWin, "hMainWin" ); Print( hSubWin, "hSubWin" ); Print( hSuperWin, "hSuperWin" ); const Int iterOffset = DetectSmallSubdiagonal ( hMainWin.Matrix(), hSubWin.Matrix(), hSuperWin.Matrix() ); const Int iterBeg = winBeg + iterOffset; const Int iterWinSize = winEnd-iterBeg; if( iterOffset > 0 ) { H.Set( iterBeg, iterBeg-1, zero ); hSubWin.Set( iterOffset-1, 0, zero ); } if( iterWinSize == 1 ) { if( ctrl.progress ) Output("One-by-one window at ",iterBeg); w.Set( iterBeg, 0, hMainWin.GetLocal(iterOffset,0) ); winEnd = iterBeg; numIterSinceDeflation = 0; continue; } else if( iterWinSize == 2 ) { if( ctrl.progress ) Output("Two-by-two window at ",iterBeg); const F eta00 = hMainWin.GetLocal(iterOffset,0); const F eta01 = hSuperWin.GetLocal(iterOffset,0); const Real eta10 = hSubWin.GetLocal(iterOffset,0); const F eta11 = hMainWin.GetLocal(iterOffset+1,0); multibulge::TwoByTwo ( H, eta00, eta01, eta10, eta11, w, Z, iterBeg, ctrl ); winEnd = iterBeg; numIterSinceDeflation = 0; continue; } else if( iterWinSize < minMultiBulgeSize ) { // The window is small enough to switch to the simple scheme if( ctrl.progress ) Output("Redundantly handling window [",iterBeg,",",winEnd,"]"); auto ctrlIter( ctrl ); ctrlIter.winBeg = iterBeg; ctrlIter.winEnd = winEnd; auto iterInfo = multibulge::RedundantlyHandleWindow( H, w, Z, ctrlIter ); info.numIterations += iterInfo.numIterations; winEnd = iterBeg; numIterSinceDeflation = 0; continue; } const Int numShiftsRec = ctrl.numShifts( n, iterWinSize ); if( ctrl.progress ) { Output("Iter. ",info.numIterations,": "); Output(" window is [",iterBeg,",",winEnd,")"); Output(" recommending ",numShiftsRec," shifts"); } // NOTE(poulson): In the case where exceptional shifts are used, the // main and subdiagonals of H in the window are currently redundantly // gathered. It could be worthwhile to pass in hMainWin and hSubWin. const Int shiftBeg = multibulge::ComputeShifts ( H, w, iterBeg, winBeg, winEnd, numShiftsRec, numIterSinceDeflation, numStaleIterBeforeExceptional, ctrlShifts ); auto shiftInd = IR(shiftBeg,winEnd); auto wShifts = w(shiftInd,ALL); // Perform a small-bulge sweep auto ctrlSweep( ctrl ); ctrlSweep.winBeg = iterBeg; ctrlSweep.winEnd = winEnd; multibulge::Sweep( H, wShifts, Z, ctrlSweep ); ++info.numIterations; if( iterBeg == iterBegLast && winEnd == winEndLast ) ++numIterSinceDeflation; iterBegLast = iterBeg; winEndLast = winEnd; } info.numUnconverged = winEnd-winBeg; return info; }
inline void UnblockedBidiagU ( DistMatrix<Complex<R> >& A, DistMatrix<Complex<R>,MD,STAR>& tP, DistMatrix<Complex<R>,MD,STAR>& tQ ) { #ifndef RELEASE PushCallStack("BidiagU"); #endif const int tPHeight = std::max(A.Width()-1,0); const int tQHeight = A.Width(); #ifndef RELEASE if( A.Grid() != tP.Grid() || tP.Grid() != tQ.Grid() ) throw std::logic_error("Process grids do not match"); if( A.Height() < A.Width() ) throw std::logic_error("A must be at least as tall as it is wide"); if( tP.Viewing() && (tP.Height() != tPHeight || tP.Width() != 1) ) throw std::logic_error("tP is the wrong height"); if( tQ.Viewing() && (tQ.Height() != tQHeight || tQ.Width() != 1) ) throw std::logic_error("tQ is the wrong height"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); if( !tP.Viewing() ) tP.ResizeTo( tPHeight, 1 ); if( !tQ.Viewing() ) tQ.ResizeTo( tQHeight, 1 ); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), a01(g), A02(g), alpha12L(g), a12R(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), aB1(g), AB2(g), A20(g), a21(g), A22(g); // Temporary matrices DistMatrix<C,STAR,MR > a12_STAR_MR(g); DistMatrix<C,MC, STAR> aB1_MC_STAR(g); DistMatrix<C,MR, STAR> x12Adj_MR_STAR(g); DistMatrix<C,MC, STAR> w21_MC_STAR(g); PushBlocksizeStack( 1 ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); View2x1 ( aB1, alpha11, a21 ); View2x1 ( AB2, a12, A22 ); aB1_MC_STAR.AlignWith( aB1 ); a12_STAR_MR.AlignWith( a12 ); x12Adj_MR_STAR.AlignWith( AB2 ); w21_MC_STAR.AlignWith( A22 ); Zeros( a12.Width(), 1, x12Adj_MR_STAR ); Zeros( a21.Height(), 1, w21_MC_STAR ); const bool thisIsMyRow = ( g.Row() == alpha11.ColAlignment() ); const bool thisIsMyCol = ( g.Col() == alpha11.RowAlignment() ); const bool nextIsMyCol = ( g.Col() == a12.RowAlignment() ); //--------------------------------------------------------------------// // Find tauQ, u, and epsilonQ such that // I - conj(tauQ) | 1 | | 1, u^H | | alpha11 | = | epsilonQ | // | u | | a21 | | 0 | const C tauQ = Reflector( alpha11, a21 ); tQ.Set(A00.Height(),0,tauQ ); C epsilonQ=0; if( thisIsMyCol && thisIsMyRow ) epsilonQ = alpha11.GetLocal(0,0); // Set aB1 = | 1 | and form x12^H := (aB1^H AB2)^H = AB2^H aB1 // | u | alpha11.Set(0,0,C(1)); aB1_MC_STAR = aB1; internal::LocalGemv ( ADJOINT, C(1), AB2, aB1_MC_STAR, C(0), x12Adj_MR_STAR ); x12Adj_MR_STAR.SumOverCol(); // Update AB2 := AB2 - conj(tauQ) aB1 x12 // = AB2 - conj(tauQ) aB1 aB1^H AB2 // = (I - conj(tauQ) aB1 aB1^H) AB2 internal::LocalGer( -Conj(tauQ), aB1_MC_STAR, x12Adj_MR_STAR, AB2 ); // Put epsilonQ back instead of the temporary value, 1 if( thisIsMyCol && thisIsMyRow ) alpha11.SetLocal(0,0,epsilonQ); if( A22.Width() != 0 ) { // Due to the deficiencies in the BLAS ?gemv routines, this section // is easier if we temporarily conjugate a12 Conjugate( a12 ); // Expose the subvector we seek to zero, a12R PartitionRight( a12, alpha12L, a12R ); // Find tauP, v, and epsilonP such that // I - conj(tauP) | 1 | | 1, v^H | | alpha12L | = | epsilonP | // | v | | a12R^T | | 0 | const C tauP = Reflector( alpha12L, a12R ); tP.Set(A00.Height(),0,tauP); C epsilonP=0; if( nextIsMyCol && thisIsMyRow ) epsilonP = alpha12L.GetLocal(0,0); // Set a12^T = | 1 | and form w21 := A22 a12^T = A22 | 1 | // | v | | v | alpha12L.Set(0,0,C(1)); a12_STAR_MR = a12; internal::LocalGemv ( NORMAL, C(1), A22, a12_STAR_MR, C(0), w21_MC_STAR ); w21_MC_STAR.SumOverRow(); // A22 := A22 - tauP w21 conj(a12) // = A22 - tauP A22 a12^T conj(a12) // = A22 (I - tauP a12^T conj(a12)) // = A22 conj(I - conj(tauP) a12^H a12) // which compensates for the fact that the reflector was generated // on the conjugated a12. internal::LocalGer( -tauP, w21_MC_STAR, a12_STAR_MR, A22 ); // Put epsilonP back instead of the temporary value, 1 if( nextIsMyCol && thisIsMyRow ) alpha12L.SetLocal(0,0,epsilonP); // Undue the temporary conjugation Conjugate( a12 ); } //--------------------------------------------------------------------// aB1_MC_STAR.FreeAlignments(); a12_STAR_MR.FreeAlignments(); x12Adj_MR_STAR.FreeAlignments(); w21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }