DCInfo Merge ( Real beta, // The n0 (unsorted) eigenvalues from T0. const Matrix<Real>& w0, // The n1 (unsorted) eigenvalues from T1. const Matrix<Real>& w1, // On exit, the (unsorted) eigenvalues of the merged tridiagonal matrix Matrix<Real>& d, // If ctrl.wantEigVecs is true, then, on entry, a packing of the eigenvectors // from the two subproblems, // // Q = | Q0, 0 |, // | 0, Q1 | // // where Q0 is n0 x n0, and Q1 is n1 x n1. // // If ctrl.wantEigVecs is false, then, on entry, Q is the same as above, but // with only the row that goes through the last row of Q0 and the row that // goes through the first row of Q1 kept. // // If ctrl.wantEigVecs is true, on exit, Q will contain the eigenvectors of // the merged tridiagonal matrix. If ctrl.wantEigVecs is false, then only the // two rows of the result mentioned above will be output. Matrix<Real>& Q, const HermitianTridiagEigCtrl<Real>& ctrl ) { DEBUG_CSE const Int n0 = w0.Height(); const Int n1 = w1.Height(); const Int n = n0 + n1; const auto& dcCtrl = ctrl.dcCtrl; DCInfo info; auto& secularInfo = info.secularInfo; if( ctrl.progress ) Output("n=",n,", n0=",n0,", n1=",n1); Matrix<Real> Q0, Q1; if( ctrl.wantEigVecs ) { // Q = | Q0 0 | // | 0 Q1 | View( Q0, Q, IR(0,n0), IR(0,n0) ); View( Q1, Q, IR(n0,END), IR(n0,END) ); } else { View( Q0, Q, IR(0), IR(0,n0) ); View( Q1, Q, IR(1), IR(n0,END) ); } // Before permutation, // // r = sqrt(2 |beta|) z, // // where // // z = [ sgn(beta)*Q0(n0-1,:), Q1(0,:) ] / sqrt(2). // // But we reorder indices 0 and n0-1 to put r in the first position. Thus, // we must form // // d = [w0(n0-1); w0(0:n0-2); w1] // // and consider the matrix // // diag(d) + 2 |beta| z z'. // // Form d = [w0(n0-1); w0(0:n0-2); w1]. // This effectively cyclically shifts [0,n0) |-> [1,n0+1) mod n0. d.Resize( n, 1 ); d(0) = w0(n0-1); for( Int j=0; j<n0-1; ++j ) { d(j+1) = w0(j); } for( Int j=0; j<n1; ++j ) { d(j+n0) = w1(j); } // Compute the scale of the problem and rescale. We will rescale the // eigenvalues at the end of this routine. Note that LAPACK's {s,d}laed2 // [CITATION] uses max(|beta|,||z||_max), where || z ||_2 = sqrt(2), // which could be much too small if || r ||_2 is much larger than beta // and sqrt(2). Real scale = Max( 2*Abs(beta), MaxNorm(d) ); SafeScale( Real(1), scale, d ); SafeScale( Real(1), scale, beta ); // Now that the problem is rescaled, our deflation tolerance simplifies to // // tol = deflationFudge eps max( || d ||_max, 2*|beta| ) // = deflationFudge eps. // // Cf. LAPACK's {s,d}lasd2 [CITATION] for this tolerance. const Real eps = limits::Epsilon<Real>(); const Real deflationTol = dcCtrl.deflationFudge*eps; Matrix<Real> z(n,1); Matrix<Int> columnTypes(n,1); const Real betaSgn = Sgn( beta, false ); const Int lastRowOfQ0 = ( ctrl.wantEigVecs ? n0-1 : 0 ); const Real sqrtTwo = Sqrt( Real(2) ); z(0) = betaSgn*Q0(lastRowOfQ0,n0-1) / sqrtTwo; columnTypes(0) = DENSE_COLUMN; for( Int j=0; j<n0-1; ++j ) { z(j+1) = betaSgn*Q0(lastRowOfQ0,j) / sqrtTwo; columnTypes(j+1) = COLUMN_NONZERO_IN_FIRST_BLOCK; } for( Int j=0; j<n1; ++j ) { z(j+n0) = Q1(0,j) / sqrtTwo; columnTypes(j+n0) = COLUMN_NONZERO_IN_SECOND_BLOCK; } Permutation combineSortPerm; SortingPermutation( d, combineSortPerm, ASCENDING ); combineSortPerm.PermuteRows( d ); combineSortPerm.PermuteRows( z ); combineSortPerm.PermuteRows( columnTypes ); auto combinedToOrig = [&]( const Int& combinedIndex ) { const Int preCombined = combineSortPerm.Preimage( combinedIndex ); if( preCombined < n0 ) // Undo the cyclic shift [0,n0) |-> [1,n0+1) mod n0 which // pushed the removed row into the first position. return Mod( preCombined-1, n0 ); else return preCombined; }; Permutation deflationPerm; deflationPerm.MakeIdentity( n ); deflationPerm.MakeArbitrary(); // Since we do not yet know how many undeflated entries there will be, we // must use the no-deflation case as our storage upper bound. Matrix<Real> dUndeflated(n,1), zUndeflated(n,1); dUndeflated(0) = 0; zUndeflated(0) = z(0); // Deflate all (off-diagonal) update entries sufficiently close to zero Int numDeflated = 0; Int numUndeflated = 0; // We will keep track of the last column that we encountered that was not // initially deflatable (but that could be deflated later due to close // diagonal entries if another undeflatable column is not encountered // first). Int revivalCandidate = n; for( Int j=0; j<n; ++j ) { if( Abs(2*beta*z(j)) <= deflationTol ) { // We can deflate due to the r component being sufficiently small const Int deflationDest = (n-1) - numDeflated; deflationPerm.SetImage( j, deflationDest ); if( ctrl.progress ) Output ("Deflating via p(",j,")=",deflationDest, " because |2*beta*z(",j,")|=|",2*beta*z(j),"| <= ", deflationTol); columnTypes(j) = DEFLATED_COLUMN; ++numDeflated; ++secularInfo.numDeflations; ++secularInfo.numSmallUpdateDeflations; } else { revivalCandidate = j; if( ctrl.progress ) Output("Breaking initial deflation loop at j=",j); break; } } // If we already fully deflated, then the following loop should be trivial const Int deflationRestart = revivalCandidate+1; for( Int j=deflationRestart; j<n; ++j ) { if( Abs(2*beta*z(j)) <= deflationTol ) { const Int deflationDest = (n-1) - numDeflated; deflationPerm.SetImage( j, deflationDest ); if( ctrl.progress ) Output ("Deflating via p(",j,")=",deflationDest, " because |2*beta*z(",j,")|=|",2*beta*z(j),"| <= ", deflationTol); columnTypes(j) = DEFLATED_COLUMN; ++numDeflated; ++secularInfo.numDeflations; ++secularInfo.numSmallUpdateDeflations; continue; } const Real gamma = SafeNorm( z(j), z(revivalCandidate) ); const Real c = z(j) / gamma; const Real s = z(revivalCandidate) / gamma; const Real offDiagNew = c*s*(d(j)-d(revivalCandidate)); if( Abs(offDiagNew) <= deflationTol ) { // Deflate the previously undeflatable index by rotating // z(revivalCandidate) into z(j) (Cf. the discussion // surrounding Eq. (4.4) of Gu/Eisenstat's TR [CITATION]). // // In particular, we want // // | z(j), z(revivalCandidate) | | c -s | = | gamma, 0 |, // | s c | // // where gamma = || z(revivalCandidate); z(j) ||_2. Putting // // c = z(j) / gamma, // s = z(revivalCandidate) / gamma, // // implies // // | c, s | | z(j) | = | gamma |. // | -s, c | | z(revivalCandidate) | | 0 | // z(j) = gamma; z(revivalCandidate) = 0; // Apply | c -s | to both sides of d // | s c | const Real deltaDeflate = d(revivalCandidate)*(c*c) + d(j)*(s*s); d(j) = d(j)*(c*c) + d(revivalCandidate)*(s*s); d(revivalCandidate) = deltaDeflate; // Apply | c -s | from the right to Q // | s c | // const Int revivalOrig = combinedToOrig( revivalCandidate ); const Int jOrig = combinedToOrig( j ); if( ctrl.wantEigVecs ) { // TODO(poulson): Exploit the nonzero structure of Q? blas::Rot( n, &Q(0,jOrig), 1, &Q(0,revivalOrig), 1, c, s ); } else { blas::Rot( 2, &Q(0,jOrig), 1, &Q(0,revivalOrig), 1, c, s ); } const Int deflationDest = (n-1) - numDeflated; deflationPerm.SetImage( revivalCandidate, deflationDest ); if( ctrl.progress ) Output ("Deflating via p(",revivalCandidate,")=", deflationDest," because |c*s*(d(",j,")-d(",revivalCandidate, "))|=",offDiagNew," <= ",deflationTol); if( columnTypes(revivalCandidate) != columnTypes(j) ) { // We mixed top and bottom columns so the result is dense. columnTypes(j) = DENSE_COLUMN; } columnTypes(revivalCandidate) = DEFLATED_COLUMN; revivalCandidate = j; ++numDeflated; ++secularInfo.numDeflations; ++secularInfo.numCloseDiagonalDeflations; continue; } // We cannot yet deflate index j, so we must give up on the previous // revival candidate and then set revivalCandidate = j. dUndeflated(numUndeflated) = d(revivalCandidate); zUndeflated(numUndeflated) = z(revivalCandidate); deflationPerm.SetImage( revivalCandidate, numUndeflated ); if( ctrl.progress ) Output ("Could not deflate with j=",j," and revivalCandidate=", revivalCandidate,", so p(",revivalCandidate,")=", numUndeflated); ++numUndeflated; revivalCandidate = j; } if( revivalCandidate < n ) { // Give up on the revival candidate dUndeflated(numUndeflated) = d(revivalCandidate); zUndeflated(numUndeflated) = z(revivalCandidate); deflationPerm.SetImage( revivalCandidate, numUndeflated ); if( ctrl.progress ) Output ("Final iteration, so p(",revivalCandidate,")=",numUndeflated); ++numUndeflated; } // Now shrink dUndeflated and zUndeflated down to their proper size dUndeflated.Resize( numUndeflated, 1 ); zUndeflated.Resize( numUndeflated, 1 ); // Count the number of columns of Q with each nonzero pattern std::vector<Int> packingCounts( NUM_DC_COMBINED_COLUMN_TYPES, 0 ); for( Int j=0; j<n; ++j ) ++packingCounts[columnTypes(j)]; DEBUG_ONLY( if( packingCounts[DEFLATED_COLUMN] != numDeflated ) LogicError ("Inconsistency between packingCounts[DEFLATED_COLUMN]=", packingCounts[DEFLATED_COLUMN], " and numDeflated=",numDeflated); )