HessenbergSchurInfo MultiBulge ( Matrix<F>& H, Matrix<Complex<Base<F>>>& w, Matrix<F>& Z, const HessenbergSchurCtrl& ctrl ) { DEBUG_CSE typedef Base<F> Real; const Real zero(0); const Int n = H.Height(); Int winBeg = ( ctrl.winBeg==END ? n : ctrl.winBeg ); Int winEnd = ( ctrl.winEnd==END ? n : ctrl.winEnd ); const Int winSize = winEnd - winBeg; const Int minMultiBulgeSize = Max( ctrl.minMultiBulgeSize, 4 ); HessenbergSchurInfo info; if( winSize < minMultiBulgeSize ) { return Simple( H, w, Z, ctrl ); } w.Resize( n, 1 ); Matrix<F> U, W, WAccum; auto ctrlShifts( ctrl ); ctrlShifts.winBeg = 0; ctrlShifts.winEnd = END; ctrlShifts.fullTriangle = false; Int numIterSinceDeflation = 0; const Int numStaleIterBeforeExceptional = 5; // Cf. LAPACK's DLAQR0 for this choice const Int maxIter = Max(30,2*numStaleIterBeforeExceptional) * Max(10,winSize); Int iterBegLast=-1, winEndLast=-1; while( winBeg < winEnd ) { if( info.numIterations >= maxIter ) { if( ctrl.demandConverged ) RuntimeError("MultiBulge QR iteration did not converge"); else break; } auto winInd = IR(winBeg,winEnd); // Detect an irreducible Hessenberg window, [iterBeg,winEnd) // --------------------------------------------------------- const Int iterOffset = DetectSmallSubdiagonal( H(winInd,winInd) ); const Int iterBeg = winBeg + iterOffset; const Int iterWinSize = winEnd-iterBeg; if( iterOffset > 0 ) H(iterBeg,iterBeg-1) = zero; if( iterWinSize == 1 ) { w(iterBeg) = H(iterBeg,iterBeg); --winEnd; numIterSinceDeflation = 0; continue; } else if( iterWinSize == 2 ) { multibulge::TwoByTwo( H, w, Z, iterBeg, ctrl ); winEnd -= 2; numIterSinceDeflation = 0; continue; } else if( iterWinSize < minMultiBulgeSize ) { // The window is small enough to switch to the simple scheme auto ctrlSub( ctrl ); ctrlSub.winBeg = iterBeg; ctrlSub.winEnd = winEnd; Simple( H, w, Z, ctrlSub ); winEnd = iterBeg; continue; } const Int numShiftsRec = ctrl.numShifts( n, iterWinSize ); if( ctrl.progress ) { Output("Iter. ",info.numIterations,": "); Output(" window is [",iterBeg,",",winEnd,")"); Output(" recommending ",numShiftsRec," shifts"); } const Int shiftBeg = multibulge::ComputeShifts ( H, w, iterBeg, winBeg, winEnd, numShiftsRec, numIterSinceDeflation, numStaleIterBeforeExceptional, ctrlShifts ); auto shiftInd = IR(shiftBeg,winEnd); auto wShifts = w(shiftInd,ALL); // Perform a small-bulge sweep auto ctrlSweep( ctrl ); ctrlSweep.winBeg = iterBeg; ctrlSweep.winEnd = winEnd; multibulge::Sweep( H, wShifts, Z, U, W, WAccum, ctrlSweep ); ++info.numIterations; if( iterBeg == iterBegLast && winEnd == winEndLast ) ++numIterSinceDeflation; iterBegLast = iterBeg; winEndLast = winEnd; } info.numUnconverged = winEnd-winBeg; return info; }
HessenbergSchurInfo DoubleShift ( Matrix<Real>& H, Matrix<Complex<Real>>& w, Matrix<Real>& Z, const HessenbergSchurCtrl& ctrl ) { EL_DEBUG_CSE const Real realZero(0); const Int maxIter=30; // Cf. LAPACK for these somewhat arbitrary constants const Real exceptScale0=Real(3)/Real(4), exceptScale1=Real(-4375)/Real(10000); const Int n = H.Height(); const Int nZ = Z.Height(); Int winBeg = ( ctrl.winBeg==END ? n : ctrl.winBeg ); Int winEnd = ( ctrl.winEnd==END ? n : ctrl.winEnd ); const Int windowSize = winEnd - winBeg; HessenbergSchurInfo info; w.Resize( n, 1 ); if( windowSize == 0 ) { return info; } if( windowSize == 1 ) { w(winBeg) = H(winBeg,winBeg); return info; } // Follow LAPACK's suit and clear the two diagonals below the subdiagonal for( Int j=winBeg; j<winEnd-3; ++j ) { H(j+2,j) = realZero; H(j+3,j) = realZero; } if( winBeg <= winEnd-3 ) H(winEnd-1,winEnd-3) = realZero; // Attempt to converge the eigenvalues one or two at a time auto ctrlSweep( ctrl ); while( winBeg < winEnd ) { Int iterBeg = winBeg; Int iter; for( iter=0; iter<maxIter; ++iter ) { auto winInd = IR(iterBeg,winEnd); iterBeg += DetectSmallSubdiagonal( H(winInd,winInd) ); if( iterBeg > winBeg ) { H(iterBeg,iterBeg-1) = realZero; } if( iterBeg == winEnd-1 ) { w(iterBeg) = H(iterBeg,iterBeg); --winEnd; break; } else if( iterBeg == winEnd-2 ) { Real c, s; schur::TwoByTwo ( H(winEnd-2,winEnd-2), H(winEnd-2,winEnd-1), H(winEnd-1,winEnd-2), H(winEnd-1,winEnd-1), w(iterBeg), w(iterBeg+1), c, s ); if( ctrl.fullTriangle ) { if( n > winEnd ) blas::Rot ( n-winEnd, &H(winEnd-2,winEnd), H.LDim(), &H(winEnd-1,winEnd), H.LDim(), c, s ); blas::Rot ( winEnd-2, &H(0,winEnd-2), 1, &H(0,winEnd-1), 1, c, s ); } if( ctrl.wantSchurVecs ) { blas::Rot ( nZ, &Z(0,winEnd-2), 1, &Z(0,winEnd-1), 1, c, s ); } winEnd -= 2; break; } // Pick either the Francis shifts or exceptional shifts Real eta00, eta01, eta10, eta11; if( iter == maxIter/3 ) { const Real scale = Abs(H(iterBeg+1,iterBeg)) + Abs(H(iterBeg+2,iterBeg+1)); eta00 = exceptScale0*scale + H(iterBeg,iterBeg); eta01 = exceptScale1*scale; eta10 = scale; eta11 = eta00; } else if( iter == 2*maxIter/3 ) { const Real scale = Abs(H(winEnd-1,winEnd-2)) + Abs(H(winEnd-2,winEnd-3)); eta00 = exceptScale0*scale + H(winEnd-1,winEnd-1); eta01 = exceptScale1*scale; eta10 = scale; eta11 = eta00; } else { eta00 = H(winEnd-2,winEnd-2); eta01 = H(winEnd-2,winEnd-1); eta10 = H(winEnd-1,winEnd-2); eta11 = H(winEnd-1,winEnd-1); } Complex<Real> shift0, shift1; double_shift::PrepareShifts ( eta00, eta01, eta10, eta11, shift0, shift1 ); ctrlSweep.winBeg = iterBeg; ctrlSweep.winEnd = winEnd; double_shift::SweepOpt( H, shift0, shift1, Z, ctrlSweep ); ++info.numIterations; } if( iter == maxIter ) { if( ctrl.demandConverged ) RuntimeError("QR iteration did not converge"); else break; } } if( ctrl.progress ) Output(info.numIterations," iterations"); info.numUnconverged = winEnd-winBeg; return info; }
HessenbergSchurInfo MultiBulge ( DistMatrix<F,MC,MR,BLOCK>& H, DistMatrix<Complex<Base<F>>,STAR,STAR>& w, DistMatrix<F,MC,MR,BLOCK>& Z, const HessenbergSchurCtrl& ctrl ) { DEBUG_CSE typedef Base<F> Real; const Real zero(0); const Grid& grid = H.Grid(); const Int n = H.Height(); Int winBeg = ( ctrl.winBeg==END ? n : ctrl.winBeg ); Int winEnd = ( ctrl.winEnd==END ? n : ctrl.winEnd ); const Int winSize = winEnd - winBeg; const Int blockSize = H.BlockHeight(); // TODO(poulson): Implement a more reasonable/configurable means of deciding // when to call the sequential implementation Int minMultiBulgeSize = Max( ctrl.minMultiBulgeSize, 2*blockSize ); // This maximum is meant to account for parallel overheads and needs to be // more principled (and perhaps based upon the number of workers and the // cluster characteristics) // TODO(poulson): Re-enable this //minMultiBulgeSize = Max( minMultiBulgeSize, 500 ); HessenbergSchurInfo info; w.Resize( n, 1 ); if( winSize < minMultiBulgeSize ) { return multibulge::RedundantlyHandleWindow( H, w, Z, ctrl ); } auto ctrlShifts( ctrl ); ctrlShifts.winBeg = 0; ctrlShifts.winEnd = END; ctrlShifts.fullTriangle = false; Int numIterSinceDeflation = 0; const Int numStaleIterBeforeExceptional = 5; // Cf. LAPACK's DLAQR0 for this choice const Int maxIter = Max(30,2*numStaleIterBeforeExceptional) * Max(10,winSize); Int iterBegLast=-1, winEndLast=-1; DistMatrix<F,STAR,STAR> hMainWin(grid), hSuperWin(grid); DistMatrix<Real,STAR,STAR> hSubWin(grid); while( winBeg < winEnd ) { if( info.numIterations >= maxIter ) { if( ctrl.demandConverged ) RuntimeError("MultiBulge QR iteration did not converge"); else break; } auto winInd = IR(winBeg,winEnd); // Detect an irreducible Hessenberg window, [iterBeg,winEnd) // --------------------------------------------------------- // TODO(poulson): Have the interblock chase from the previous sweep // collect the main and sub diagonal of H along the diagonal workers // and then broadcast across the "cross" communicator. util::GatherTridiagonal( H, winInd, hMainWin, hSubWin, hSuperWin ); Output("winBeg=",winBeg,", winEnd=",winEnd); Print( H, "H" ); Print( hMainWin, "hMainWin" ); Print( hSubWin, "hSubWin" ); Print( hSuperWin, "hSuperWin" ); const Int iterOffset = DetectSmallSubdiagonal ( hMainWin.Matrix(), hSubWin.Matrix(), hSuperWin.Matrix() ); const Int iterBeg = winBeg + iterOffset; const Int iterWinSize = winEnd-iterBeg; if( iterOffset > 0 ) { H.Set( iterBeg, iterBeg-1, zero ); hSubWin.Set( iterOffset-1, 0, zero ); } if( iterWinSize == 1 ) { if( ctrl.progress ) Output("One-by-one window at ",iterBeg); w.Set( iterBeg, 0, hMainWin.GetLocal(iterOffset,0) ); winEnd = iterBeg; numIterSinceDeflation = 0; continue; } else if( iterWinSize == 2 ) { if( ctrl.progress ) Output("Two-by-two window at ",iterBeg); const F eta00 = hMainWin.GetLocal(iterOffset,0); const F eta01 = hSuperWin.GetLocal(iterOffset,0); const Real eta10 = hSubWin.GetLocal(iterOffset,0); const F eta11 = hMainWin.GetLocal(iterOffset+1,0); multibulge::TwoByTwo ( H, eta00, eta01, eta10, eta11, w, Z, iterBeg, ctrl ); winEnd = iterBeg; numIterSinceDeflation = 0; continue; } else if( iterWinSize < minMultiBulgeSize ) { // The window is small enough to switch to the simple scheme if( ctrl.progress ) Output("Redundantly handling window [",iterBeg,",",winEnd,"]"); auto ctrlIter( ctrl ); ctrlIter.winBeg = iterBeg; ctrlIter.winEnd = winEnd; auto iterInfo = multibulge::RedundantlyHandleWindow( H, w, Z, ctrlIter ); info.numIterations += iterInfo.numIterations; winEnd = iterBeg; numIterSinceDeflation = 0; continue; } const Int numShiftsRec = ctrl.numShifts( n, iterWinSize ); if( ctrl.progress ) { Output("Iter. ",info.numIterations,": "); Output(" window is [",iterBeg,",",winEnd,")"); Output(" recommending ",numShiftsRec," shifts"); } // NOTE(poulson): In the case where exceptional shifts are used, the // main and subdiagonals of H in the window are currently redundantly // gathered. It could be worthwhile to pass in hMainWin and hSubWin. const Int shiftBeg = multibulge::ComputeShifts ( H, w, iterBeg, winBeg, winEnd, numShiftsRec, numIterSinceDeflation, numStaleIterBeforeExceptional, ctrlShifts ); auto shiftInd = IR(shiftBeg,winEnd); auto wShifts = w(shiftInd,ALL); // Perform a small-bulge sweep auto ctrlSweep( ctrl ); ctrlSweep.winBeg = iterBeg; ctrlSweep.winEnd = winEnd; multibulge::Sweep( H, wShifts, Z, ctrlSweep ); ++info.numIterations; if( iterBeg == iterBegLast && winEnd == winEndLast ) ++numIterSinceDeflation; iterBegLast = iterBeg; winEndLast = winEnd; } info.numUnconverged = winEnd-winBeg; return info; }