Esempio n. 1
0
DCInfo
Merge
( Real beta,
  // The n0 (unsorted) eigenvalues from T0.
  const Matrix<Real>& w0,
  // The n1 (unsorted) eigenvalues from T1.
  const Matrix<Real>& w1,
  // On exit, the (unsorted) eigenvalues of the merged tridiagonal matrix
  Matrix<Real>& d,
  // If ctrl.wantEigVecs is true, then, on entry, a packing of the eigenvectors
  // from the two subproblems,
  //
  //   Q = | Q0, 0  |,
  //       | 0,  Q1 |
  //
  // where Q0 is n0 x n0, and Q1 is n1 x n1.
  //
  // If ctrl.wantEigVecs is false, then, on entry, Q is the same as above, but
  // with only the row that goes through the last row of Q0 and the row that
  // goes through the first row of Q1 kept.
  //
  // If ctrl.wantEigVecs is true, on exit, Q will contain the eigenvectors of
  // the merged tridiagonal matrix. If ctrl.wantEigVecs is false, then only the
  // two rows of the result mentioned above will be output.
  Matrix<Real>& Q,
  const HermitianTridiagEigCtrl<Real>& ctrl )
{
    DEBUG_CSE
    const Int n0 = w0.Height();
    const Int n1 = w1.Height();
    const Int n = n0 + n1;
    const auto& dcCtrl = ctrl.dcCtrl;

    DCInfo info;
    auto& secularInfo = info.secularInfo;
    if( ctrl.progress )
        Output("n=",n,", n0=",n0,", n1=",n1);

    Matrix<Real> Q0, Q1;
    if( ctrl.wantEigVecs )
    {
        // Q = | Q0 0  |
        //     |  0 Q1 |
        View( Q0, Q, IR(0,n0), IR(0,n0) );
        View( Q1, Q, IR(n0,END), IR(n0,END) );
    }
    else
    {
        View( Q0, Q, IR(0), IR(0,n0) );
        View( Q1, Q, IR(1), IR(n0,END) );
    }

    // Before permutation, 
    //
    //   r = sqrt(2 |beta|) z,
    //
    // where
    //     
    //   z = [ sgn(beta)*Q0(n0-1,:), Q1(0,:) ] / sqrt(2).
    //
    // But we reorder indices 0 and n0-1 to put r in the first position. Thus,
    // we must form 
    //
    //   d = [w0(n0-1); w0(0:n0-2); w1]
    //
    // and consider the matrix
    //
    //   diag(d) + 2 |beta| z z'.
    //

    // Form d = [w0(n0-1); w0(0:n0-2); w1].
    // This effectively cyclically shifts [0,n0) |-> [1,n0+1) mod n0.
    d.Resize( n, 1 );
    d(0) = w0(n0-1);
    for( Int j=0; j<n0-1; ++j )
    {
        d(j+1) = w0(j);
    }
    for( Int j=0; j<n1; ++j )
    {
        d(j+n0) = w1(j);
    }

    // Compute the scale of the problem and rescale. We will rescale the 
    // eigenvalues at the end of this routine. Note that LAPACK's {s,d}laed2
    // [CITATION] uses max(|beta|,||z||_max), where || z ||_2 = sqrt(2),
    // which could be much too small if || r ||_2 is much larger than beta 
    // and sqrt(2).
    Real scale = Max( 2*Abs(beta), MaxNorm(d) );
    SafeScale( Real(1), scale, d );
    SafeScale( Real(1), scale, beta );

    // Now that the problem is rescaled, our deflation tolerance simplifies to
    //
    //   tol = deflationFudge eps max( || d ||_max, 2*|beta| )
    //       = deflationFudge eps.
    //
    // Cf. LAPACK's {s,d}lasd2 [CITATION] for this tolerance.
    const Real eps = limits::Epsilon<Real>();
    const Real deflationTol = dcCtrl.deflationFudge*eps;

    Matrix<Real> z(n,1);
    Matrix<Int> columnTypes(n,1);
    const Real betaSgn = Sgn( beta, false );
    const Int lastRowOfQ0 = ( ctrl.wantEigVecs ? n0-1 : 0 );
    const Real sqrtTwo = Sqrt( Real(2) );
    z(0) = betaSgn*Q0(lastRowOfQ0,n0-1) / sqrtTwo;
    columnTypes(0) = DENSE_COLUMN;
    for( Int j=0; j<n0-1; ++j )
    {
        z(j+1) = betaSgn*Q0(lastRowOfQ0,j) / sqrtTwo;
        columnTypes(j+1) = COLUMN_NONZERO_IN_FIRST_BLOCK;
    }
    for( Int j=0; j<n1; ++j )
    {
        z(j+n0) = Q1(0,j) / sqrtTwo;
        columnTypes(j+n0) = COLUMN_NONZERO_IN_SECOND_BLOCK;
    }

    Permutation combineSortPerm;
    SortingPermutation( d, combineSortPerm, ASCENDING );
    combineSortPerm.PermuteRows( d );
    combineSortPerm.PermuteRows( z );
    combineSortPerm.PermuteRows( columnTypes );

    auto combinedToOrig = [&]( const Int& combinedIndex )
      {
          const Int preCombined = combineSortPerm.Preimage( combinedIndex );
          if( preCombined < n0 )
              // Undo the cyclic shift [0,n0) |-> [1,n0+1) mod n0 which
              // pushed the removed row into the first position.
              return Mod( preCombined-1, n0 );
          else
              return preCombined;
      };

    Permutation deflationPerm;
    deflationPerm.MakeIdentity( n );
    deflationPerm.MakeArbitrary();
    // Since we do not yet know how many undeflated entries there will be, we
    // must use the no-deflation case as our storage upper bound.
    Matrix<Real> dUndeflated(n,1), zUndeflated(n,1);
    dUndeflated(0) = 0;
    zUndeflated(0) = z(0);

    // Deflate all (off-diagonal) update entries sufficiently close to zero
    Int numDeflated = 0;
    Int numUndeflated = 0;
    // We will keep track of the last column that we encountered that was not
    // initially deflatable (but that could be deflated later due to close
    // diagonal entries if another undeflatable column is not encountered
    // first).
    Int revivalCandidate = n;
    for( Int j=0; j<n; ++j )
    {
        if( Abs(2*beta*z(j)) <= deflationTol )
        {
            // We can deflate due to the r component being sufficiently small
            const Int deflationDest = (n-1) - numDeflated;
            deflationPerm.SetImage( j, deflationDest );
            if( ctrl.progress )
                Output
                ("Deflating via p(",j,")=",deflationDest,
                 " because |2*beta*z(",j,")|=|",2*beta*z(j),"| <= ",
                 deflationTol);
            columnTypes(j) = DEFLATED_COLUMN;
            ++numDeflated;
            ++secularInfo.numDeflations;
            ++secularInfo.numSmallUpdateDeflations;
        }
        else
        {
            revivalCandidate = j;
            if( ctrl.progress )
                Output("Breaking initial deflation loop at j=",j);
            break;
        }
    }
    // If we already fully deflated, then the following loop should be trivial
    const Int deflationRestart = revivalCandidate+1;
    for( Int j=deflationRestart; j<n; ++j )
    {
        if( Abs(2*beta*z(j)) <= deflationTol )
        {
            const Int deflationDest = (n-1) - numDeflated;
            deflationPerm.SetImage( j, deflationDest );
            if( ctrl.progress )
                Output
                ("Deflating via p(",j,")=",deflationDest,
                 " because |2*beta*z(",j,")|=|",2*beta*z(j),"| <= ",
                 deflationTol);
            columnTypes(j) = DEFLATED_COLUMN;
            ++numDeflated;
            ++secularInfo.numDeflations;
            ++secularInfo.numSmallUpdateDeflations;
            continue;
        }
        const Real gamma = SafeNorm( z(j), z(revivalCandidate) );
        const Real c = z(j) / gamma;
        const Real s = z(revivalCandidate) / gamma;
        const Real offDiagNew = c*s*(d(j)-d(revivalCandidate));
        if( Abs(offDiagNew) <= deflationTol )
        {
            // Deflate the previously undeflatable index by rotating
            // z(revivalCandidate) into z(j) (Cf. the discussion
            // surrounding Eq. (4.4) of Gu/Eisenstat's TR [CITATION]).
            //
            // In particular, we want
            //
            //   | z(j), z(revivalCandidate) | | c -s | = | gamma, 0 |,
            //                                 | s  c |
            //
            // where gamma = || z(revivalCandidate); z(j) ||_2. Putting 
            //
            //   c = z(j)                / gamma,
            //   s = z(revivalCandidate) / gamma,
            //
            // implies
            //
            //   |  c,  s | |        z(j)         | = | gamma |.
            //   | -s,  c | | z(revivalCandidate) |   |   0   |
            //
            z(j) = gamma;
            z(revivalCandidate) = 0;

            // Apply | c -s | to both sides of d
            //       | s  c |
            const Real deltaDeflate = d(revivalCandidate)*(c*c) + d(j)*(s*s);
            d(j) = d(j)*(c*c) + d(revivalCandidate)*(s*s);
            d(revivalCandidate) = deltaDeflate;

            // Apply | c -s | from the right to Q
            //       | s  c |
            //
            const Int revivalOrig = combinedToOrig( revivalCandidate );
            const Int jOrig = combinedToOrig( j );
            if( ctrl.wantEigVecs )
            {
                // TODO(poulson): Exploit the nonzero structure of Q?
                blas::Rot( n, &Q(0,jOrig), 1, &Q(0,revivalOrig), 1, c, s );
            }
            else
            {
                blas::Rot( 2, &Q(0,jOrig), 1, &Q(0,revivalOrig), 1, c, s );
            }

            const Int deflationDest = (n-1) - numDeflated;
            deflationPerm.SetImage( revivalCandidate, deflationDest );
            if( ctrl.progress )
                Output
                ("Deflating via p(",revivalCandidate,")=",
                 deflationDest," because |c*s*(d(",j,")-d(",revivalCandidate,
                 "))|=",offDiagNew," <= ",deflationTol);

            if( columnTypes(revivalCandidate) != columnTypes(j) )
            {
                // We mixed top and bottom columns so the result is dense.
                columnTypes(j) = DENSE_COLUMN;
            }
            columnTypes(revivalCandidate) = DEFLATED_COLUMN;

            revivalCandidate = j;
            ++numDeflated;
            ++secularInfo.numDeflations;
            ++secularInfo.numCloseDiagonalDeflations;
            continue;
        }

        // We cannot yet deflate index j, so we must give up on the previous
        // revival candidate and then set revivalCandidate = j.
        dUndeflated(numUndeflated) = d(revivalCandidate);
        zUndeflated(numUndeflated) = z(revivalCandidate);
        deflationPerm.SetImage( revivalCandidate, numUndeflated );
        if( ctrl.progress )
            Output
            ("Could not deflate with j=",j," and revivalCandidate=",
             revivalCandidate,", so p(",revivalCandidate,")=",
             numUndeflated);
        ++numUndeflated;
        revivalCandidate = j;
    }
    if( revivalCandidate < n )
    {
        // Give up on the revival candidate
        dUndeflated(numUndeflated) = d(revivalCandidate);
        zUndeflated(numUndeflated) = z(revivalCandidate);
        deflationPerm.SetImage( revivalCandidate, numUndeflated );
        if( ctrl.progress )
            Output
            ("Final iteration, so p(",revivalCandidate,")=",numUndeflated);
        ++numUndeflated;
    }

    // Now shrink dUndeflated and zUndeflated down to their proper size
    dUndeflated.Resize( numUndeflated, 1 );
    zUndeflated.Resize( numUndeflated, 1 );

    // Count the number of columns of Q with each nonzero pattern
    std::vector<Int> packingCounts( NUM_DC_COMBINED_COLUMN_TYPES, 0 );
    for( Int j=0; j<n; ++j )
        ++packingCounts[columnTypes(j)];
    DEBUG_ONLY(
      if( packingCounts[DEFLATED_COLUMN] != numDeflated )
          LogicError
          ("Inconsistency between packingCounts[DEFLATED_COLUMN]=",
           packingCounts[DEFLATED_COLUMN],
           " and numDeflated=",numDeflated);
    )