Esempio n. 1
0
void TestCorrectness
( bool print, 
  const Matrix<Field>& A,
  const Permutation& P,
  const Matrix<Field>& AOrig,
        Int numRHS=100 )
{
    typedef Base<Field> Real;
    const Int n = AOrig.Width();
    const Real eps = limits::Epsilon<Real>();
    const Real oneNormA = OneNorm( AOrig );

    Output("Testing error...");

    // Generate random right-hand sides
    Matrix<Field> X;
    Uniform( X, n, numRHS );
    auto Y( X );
    const Real oneNormY = OneNorm( Y );
    P.PermuteRows( Y );
    lu::SolveAfter( NORMAL, A, Y );

    // Now investigate the residual, ||AOrig Y - X||_oo
    Gemm( NORMAL, NORMAL, Field(-1), AOrig, Y, Field(1), X );
    const Real infError = InfinityNorm( X );
    const Real relError = infError / (eps*n*Max(oneNormA,oneNormY));

    // TODO(poulson): Use a rigorous failure condition
    Output("||A X - Y||_oo / (eps n Max(||A||_1,||Y||_1)) = ",relError);
    if( relError > Real(1000) )
        LogicError("Unacceptably large relative error");
}
Esempio n. 2
0
void LUMod
( Matrix<F>& A,
        Permutation& P, 
  const Matrix<F>& u,
  const Matrix<F>& v,
  bool conjugate,
  Base<F> tau )
{
    DEBUG_CSE
    typedef Base<F> Real;
    const Int m = A.Height();
    const Int n = A.Width();
    const Int minDim = Min(m,n);
    if( minDim != m )
        LogicError("It is assumed that height(A) <= width(A)");
    if( u.Height() != m || u.Width() != 1 )
        LogicError("u is expected to be a conforming column vector");
    if( v.Height() != n || v.Width() != 1 )
        LogicError("v is expected to be a conforming column vector");

    // w := inv(L) P u
    auto w( u );
    P.PermuteRows( w );
    Trsv( LOWER, NORMAL, UNIT, A, w );

    // Maintain an external vector for the temporary subdiagonal of U
    Matrix<F> uSub;
    Zeros( uSub, minDim-1, 1 );

    // Reduce w to a multiple of e0
    for( Int i=minDim-2; i>=0; --i )
    {
        // Decide if we should pivot the i'th and i+1'th rows of w
        const F lambdaSub = A(i+1,i);
        const F ups_ii = A(i,i); 
        const F omega_i = w(i);
        const F omega_ip1 = w(i+1);
        const Real rightTerm = Abs(lambdaSub*omega_i+omega_ip1);
        const bool pivot = ( Abs(omega_i) < tau*rightTerm );

        const Range<Int> indi( i, i+1 ),
                         indip1( i+1, i+2 ),
                         indB( i+2, m ),
                         indR( i+1, n );

        auto lBi   = A( indB,   indi   );
        auto lBip1 = A( indB,   indip1 );
        auto uiR   = A( indi,   indR   );
        auto uip1R = A( indip1, indR   );

        if( pivot )
        {
            // P := P_i P
            P.Swap( i, i+1 );

            // Simultaneously perform 
            //   U := P_i U and
            //   L := P_i L P_i^T
            //
            // Then update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U, 
            //     w := T_{i,L} P_i w,
            // where T_{i,L} is the Gauss transform which zeros (P_i w)_{i+1}.
            // 
            // More succinctly,
            //     gamma    := w(i) / w(i+1),
            //     w(i)     := w(i+1), 
            //     w(i+1)   := 0,
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:).
            const F gamma = omega_i / omega_ip1;
            const F lambda_ii = F(1) + gamma*lambdaSub;
            A(i,  i) = gamma;
            A(i+1,i) = 0;

            auto lBiCopy = lBi;
            Swap( NORMAL, lBi, lBip1 );
            Axpy( gamma, lBiCopy, lBi );

            auto uip1RCopy = uip1R;
            RowSwap( A, i, i+1 );
            Axpy( -gamma, uip1RCopy, uip1R );

            // Force L back to *unit* lower-triangular form via the transform
            //     L := L T_{i,U}^{-1} D^{-1}, 
            // where D is diagonal and responsible for forcing L(i,i) and 
            // L(i+1,i+1) back to 1. The effect on L is:
            //     eta       := L(i,i+1)/L(i,i),
            //     L(:,i+1)  -= eta L(:,i),
            //     delta_i   := L(i,i),
            //     delta_ip1 := L(i+1,i+1),
            //     L(:,i)   /= delta_i,
            //     L(:,i+1) /= delta_ip1,
            // while the effect on U is
            //     U(i,:)   += eta U(i+1,:)
            //     U(i,:)   *= delta_i,
            //     U(i+1,:) *= delta_{i+1},
            // and the effect on w is
            //     w(i) *= delta_i.
            const F eta = lambdaSub/lambda_ii;
            const F delta_i = lambda_ii;
            const F delta_ip1 = F(1) - eta*gamma;

            Axpy( -eta, lBi, lBip1 );
            A(i+1,i) = gamma/delta_i;
            lBi   *= F(1)/delta_i;
            lBip1 *= F(1)/delta_ip1;

            A(i,i) = eta*ups_ii*delta_i;
            Axpy( eta, uip1R, uiR );
            uiR   *= delta_i;
            uip1R *= delta_ip1;
            uSub(i) = ups_ii*delta_ip1;

            // Finally set w(i)
            w(i) = omega_ip1*delta_i;
        }
        else
        {
            // Update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U, 
            //     w := T_{i,L} w,
            // where T_{i,L} is the Gauss transform which zeros w_{i+1}.
            // 
            // More succinctly,
            //     gamma    := w(i+1) / w(i),
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:),
            //     w(i+1)   := 0.
            const F gamma = omega_ip1 / omega_i;
            A(i+1,i) += gamma;
            Axpy(  gamma, lBip1, lBi );
            Axpy( -gamma, uiR, uip1R );
            uSub(i) = -gamma*ups_ii;
        }
    }

    // Add the modified w v' into U
    {
        auto a0 = A( IR(0), ALL );
        const F omega_0 = w(0); 
        Matrix<F> vTrans;
        Transpose( v, vTrans, conjugate );
        Axpy( omega_0, vTrans, a0 );
    }

    // Transform U from upper-Hessenberg to upper-triangular form
    for( Int i=0; i<minDim-1; ++i ) 
    {
        // Decide if we should pivot the i'th and i+1'th rows U
        const F lambdaSub = A(i+1,i);
        const F ups_ii = A(i,i);
        const F ups_ip1i = uSub(i);
        const Real rightTerm = Abs(lambdaSub*ups_ii+ups_ip1i);
        const bool pivot = ( Abs(ups_ii) < tau*rightTerm );

        const Range<Int> indi( i, i+1 ),
                         indip1( i+1, i+2 ),
                         indB( i+2, m ),
                         indR( i+1, n );

        auto lBi   = A( indB,   indi   );
        auto lBip1 = A( indB,   indip1 );
        auto uiR   = A( indi,   indR   );
        auto uip1R = A( indip1, indR   );

        if( pivot )
        {
            // P := P_i P
            P.Swap( i, i+1 );

            // Simultaneously perform 
            //   U := P_i U and
            //   L := P_i L P_i^T
            //
            // Then update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U, 
            // where T_{i,L} is the Gauss transform which zeros U(i+1,i).
            // 
            // More succinctly,
            //     gamma    := U(i+1,i) / U(i,i),
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:).
            const F gamma = ups_ii / ups_ip1i;
            const F lambda_ii = F(1) + gamma*lambdaSub;
            A(i+1,i) = ups_ip1i;
            A(i,  i) = gamma;

            auto lBiCopy = lBi;
            Swap( NORMAL, lBi, lBip1 );
            Axpy( gamma, lBiCopy, lBi );

            auto uip1RCopy = uip1R;
            RowSwap( A, i, i+1 );
            Axpy( -gamma, uip1RCopy, uip1R );

            // Force L back to *unit* lower-triangular form via the transform
            //     L := L T_{i,U}^{-1} D^{-1}, 
            // where D is diagonal and responsible for forcing L(i,i) and 
            // L(i+1,i+1) back to 1. The effect on L is:
            //     eta       := L(i,i+1)/L(i,i),
            //     L(:,i+1)  -= eta L(:,i),
            //     delta_i   := L(i,i),
            //     delta_ip1 := L(i+1,i+1),
            //     L(:,i)   /= delta_i,
            //     L(:,i+1) /= delta_ip1,
            // while the effect on U is
            //     U(i,:)   += eta U(i+1,:)
            //     U(i,:)   *= delta_i,
            //     U(i+1,:) *= delta_{i+1}.
            const F eta = lambdaSub/lambda_ii;
            const F delta_i = lambda_ii;
            const F delta_ip1 = F(1) - eta*gamma;

            Axpy( -eta, lBi, lBip1 );
            A(i+1,i) = gamma/delta_i;
            lBi   *= F(1)/delta_i;
            lBip1 *= F(1)/delta_ip1;

            A(i,i) = ups_ip1i*delta_i;
            Axpy( eta, uip1R, uiR );
            uiR   *= delta_i;
            uip1R *= delta_ip1;
        }
        else
        {
            // Update
            //     L := L T_{i,L}^{-1},
            //     U := T_{i,L} U, 
            // where T_{i,L} is the Gauss transform which zeros U(i+1,i).
            // 
            // More succinctly,
            //     gamma    := U(i+1,i)/ U(i,i),
            //     L(:,i)   += gamma L(:,i+1),
            //     U(i+1,:) -= gamma U(i,:).
            const F gamma = ups_ip1i / ups_ii;
            A(i+1,i) += gamma;
            Axpy(  gamma, lBip1, lBi );
            Axpy( -gamma, uiR, uip1R );
        }
    }
}
Esempio n. 3
0
DCInfo
Merge
( Real beta,
  // The n0 (unsorted) eigenvalues from T0.
  const Matrix<Real>& w0,
  // The n1 (unsorted) eigenvalues from T1.
  const Matrix<Real>& w1,
  // On exit, the (unsorted) eigenvalues of the merged tridiagonal matrix
  Matrix<Real>& d,
  // If ctrl.wantEigVecs is true, then, on entry, a packing of the eigenvectors
  // from the two subproblems,
  //
  //   Q = | Q0, 0  |,
  //       | 0,  Q1 |
  //
  // where Q0 is n0 x n0, and Q1 is n1 x n1.
  //
  // If ctrl.wantEigVecs is false, then, on entry, Q is the same as above, but
  // with only the row that goes through the last row of Q0 and the row that
  // goes through the first row of Q1 kept.
  //
  // If ctrl.wantEigVecs is true, on exit, Q will contain the eigenvectors of
  // the merged tridiagonal matrix. If ctrl.wantEigVecs is false, then only the
  // two rows of the result mentioned above will be output.
  Matrix<Real>& Q,
  const HermitianTridiagEigCtrl<Real>& ctrl )
{
    DEBUG_CSE
    const Int n0 = w0.Height();
    const Int n1 = w1.Height();
    const Int n = n0 + n1;
    const auto& dcCtrl = ctrl.dcCtrl;

    DCInfo info;
    auto& secularInfo = info.secularInfo;
    if( ctrl.progress )
        Output("n=",n,", n0=",n0,", n1=",n1);

    Matrix<Real> Q0, Q1;
    if( ctrl.wantEigVecs )
    {
        // Q = | Q0 0  |
        //     |  0 Q1 |
        View( Q0, Q, IR(0,n0), IR(0,n0) );
        View( Q1, Q, IR(n0,END), IR(n0,END) );
    }
    else
    {
        View( Q0, Q, IR(0), IR(0,n0) );
        View( Q1, Q, IR(1), IR(n0,END) );
    }

    // Before permutation, 
    //
    //   r = sqrt(2 |beta|) z,
    //
    // where
    //     
    //   z = [ sgn(beta)*Q0(n0-1,:), Q1(0,:) ] / sqrt(2).
    //
    // But we reorder indices 0 and n0-1 to put r in the first position. Thus,
    // we must form 
    //
    //   d = [w0(n0-1); w0(0:n0-2); w1]
    //
    // and consider the matrix
    //
    //   diag(d) + 2 |beta| z z'.
    //

    // Form d = [w0(n0-1); w0(0:n0-2); w1].
    // This effectively cyclically shifts [0,n0) |-> [1,n0+1) mod n0.
    d.Resize( n, 1 );
    d(0) = w0(n0-1);
    for( Int j=0; j<n0-1; ++j )
    {
        d(j+1) = w0(j);
    }
    for( Int j=0; j<n1; ++j )
    {
        d(j+n0) = w1(j);
    }

    // Compute the scale of the problem and rescale. We will rescale the 
    // eigenvalues at the end of this routine. Note that LAPACK's {s,d}laed2
    // [CITATION] uses max(|beta|,||z||_max), where || z ||_2 = sqrt(2),
    // which could be much too small if || r ||_2 is much larger than beta 
    // and sqrt(2).
    Real scale = Max( 2*Abs(beta), MaxNorm(d) );
    SafeScale( Real(1), scale, d );
    SafeScale( Real(1), scale, beta );

    // Now that the problem is rescaled, our deflation tolerance simplifies to
    //
    //   tol = deflationFudge eps max( || d ||_max, 2*|beta| )
    //       = deflationFudge eps.
    //
    // Cf. LAPACK's {s,d}lasd2 [CITATION] for this tolerance.
    const Real eps = limits::Epsilon<Real>();
    const Real deflationTol = dcCtrl.deflationFudge*eps;

    Matrix<Real> z(n,1);
    Matrix<Int> columnTypes(n,1);
    const Real betaSgn = Sgn( beta, false );
    const Int lastRowOfQ0 = ( ctrl.wantEigVecs ? n0-1 : 0 );
    const Real sqrtTwo = Sqrt( Real(2) );
    z(0) = betaSgn*Q0(lastRowOfQ0,n0-1) / sqrtTwo;
    columnTypes(0) = DENSE_COLUMN;
    for( Int j=0; j<n0-1; ++j )
    {
        z(j+1) = betaSgn*Q0(lastRowOfQ0,j) / sqrtTwo;
        columnTypes(j+1) = COLUMN_NONZERO_IN_FIRST_BLOCK;
    }
    for( Int j=0; j<n1; ++j )
    {
        z(j+n0) = Q1(0,j) / sqrtTwo;
        columnTypes(j+n0) = COLUMN_NONZERO_IN_SECOND_BLOCK;
    }

    Permutation combineSortPerm;
    SortingPermutation( d, combineSortPerm, ASCENDING );
    combineSortPerm.PermuteRows( d );
    combineSortPerm.PermuteRows( z );
    combineSortPerm.PermuteRows( columnTypes );

    auto combinedToOrig = [&]( const Int& combinedIndex )
      {
          const Int preCombined = combineSortPerm.Preimage( combinedIndex );
          if( preCombined < n0 )
              // Undo the cyclic shift [0,n0) |-> [1,n0+1) mod n0 which
              // pushed the removed row into the first position.
              return Mod( preCombined-1, n0 );
          else
              return preCombined;
      };

    Permutation deflationPerm;
    deflationPerm.MakeIdentity( n );
    deflationPerm.MakeArbitrary();
    // Since we do not yet know how many undeflated entries there will be, we
    // must use the no-deflation case as our storage upper bound.
    Matrix<Real> dUndeflated(n,1), zUndeflated(n,1);
    dUndeflated(0) = 0;
    zUndeflated(0) = z(0);

    // Deflate all (off-diagonal) update entries sufficiently close to zero
    Int numDeflated = 0;
    Int numUndeflated = 0;
    // We will keep track of the last column that we encountered that was not
    // initially deflatable (but that could be deflated later due to close
    // diagonal entries if another undeflatable column is not encountered
    // first).
    Int revivalCandidate = n;
    for( Int j=0; j<n; ++j )
    {
        if( Abs(2*beta*z(j)) <= deflationTol )
        {
            // We can deflate due to the r component being sufficiently small
            const Int deflationDest = (n-1) - numDeflated;
            deflationPerm.SetImage( j, deflationDest );
            if( ctrl.progress )
                Output
                ("Deflating via p(",j,")=",deflationDest,
                 " because |2*beta*z(",j,")|=|",2*beta*z(j),"| <= ",
                 deflationTol);
            columnTypes(j) = DEFLATED_COLUMN;
            ++numDeflated;
            ++secularInfo.numDeflations;
            ++secularInfo.numSmallUpdateDeflations;
        }
        else
        {
            revivalCandidate = j;
            if( ctrl.progress )
                Output("Breaking initial deflation loop at j=",j);
            break;
        }
    }
    // If we already fully deflated, then the following loop should be trivial
    const Int deflationRestart = revivalCandidate+1;
    for( Int j=deflationRestart; j<n; ++j )
    {
        if( Abs(2*beta*z(j)) <= deflationTol )
        {
            const Int deflationDest = (n-1) - numDeflated;
            deflationPerm.SetImage( j, deflationDest );
            if( ctrl.progress )
                Output
                ("Deflating via p(",j,")=",deflationDest,
                 " because |2*beta*z(",j,")|=|",2*beta*z(j),"| <= ",
                 deflationTol);
            columnTypes(j) = DEFLATED_COLUMN;
            ++numDeflated;
            ++secularInfo.numDeflations;
            ++secularInfo.numSmallUpdateDeflations;
            continue;
        }
        const Real gamma = SafeNorm( z(j), z(revivalCandidate) );
        const Real c = z(j) / gamma;
        const Real s = z(revivalCandidate) / gamma;
        const Real offDiagNew = c*s*(d(j)-d(revivalCandidate));
        if( Abs(offDiagNew) <= deflationTol )
        {
            // Deflate the previously undeflatable index by rotating
            // z(revivalCandidate) into z(j) (Cf. the discussion
            // surrounding Eq. (4.4) of Gu/Eisenstat's TR [CITATION]).
            //
            // In particular, we want
            //
            //   | z(j), z(revivalCandidate) | | c -s | = | gamma, 0 |,
            //                                 | s  c |
            //
            // where gamma = || z(revivalCandidate); z(j) ||_2. Putting 
            //
            //   c = z(j)                / gamma,
            //   s = z(revivalCandidate) / gamma,
            //
            // implies
            //
            //   |  c,  s | |        z(j)         | = | gamma |.
            //   | -s,  c | | z(revivalCandidate) |   |   0   |
            //
            z(j) = gamma;
            z(revivalCandidate) = 0;

            // Apply | c -s | to both sides of d
            //       | s  c |
            const Real deltaDeflate = d(revivalCandidate)*(c*c) + d(j)*(s*s);
            d(j) = d(j)*(c*c) + d(revivalCandidate)*(s*s);
            d(revivalCandidate) = deltaDeflate;

            // Apply | c -s | from the right to Q
            //       | s  c |
            //
            const Int revivalOrig = combinedToOrig( revivalCandidate );
            const Int jOrig = combinedToOrig( j );
            if( ctrl.wantEigVecs )
            {
                // TODO(poulson): Exploit the nonzero structure of Q?
                blas::Rot( n, &Q(0,jOrig), 1, &Q(0,revivalOrig), 1, c, s );
            }
            else
            {
                blas::Rot( 2, &Q(0,jOrig), 1, &Q(0,revivalOrig), 1, c, s );
            }

            const Int deflationDest = (n-1) - numDeflated;
            deflationPerm.SetImage( revivalCandidate, deflationDest );
            if( ctrl.progress )
                Output
                ("Deflating via p(",revivalCandidate,")=",
                 deflationDest," because |c*s*(d(",j,")-d(",revivalCandidate,
                 "))|=",offDiagNew," <= ",deflationTol);

            if( columnTypes(revivalCandidate) != columnTypes(j) )
            {
                // We mixed top and bottom columns so the result is dense.
                columnTypes(j) = DENSE_COLUMN;
            }
            columnTypes(revivalCandidate) = DEFLATED_COLUMN;

            revivalCandidate = j;
            ++numDeflated;
            ++secularInfo.numDeflations;
            ++secularInfo.numCloseDiagonalDeflations;
            continue;
        }

        // We cannot yet deflate index j, so we must give up on the previous
        // revival candidate and then set revivalCandidate = j.
        dUndeflated(numUndeflated) = d(revivalCandidate);
        zUndeflated(numUndeflated) = z(revivalCandidate);
        deflationPerm.SetImage( revivalCandidate, numUndeflated );
        if( ctrl.progress )
            Output
            ("Could not deflate with j=",j," and revivalCandidate=",
             revivalCandidate,", so p(",revivalCandidate,")=",
             numUndeflated);
        ++numUndeflated;
        revivalCandidate = j;
    }
    if( revivalCandidate < n )
    {
        // Give up on the revival candidate
        dUndeflated(numUndeflated) = d(revivalCandidate);
        zUndeflated(numUndeflated) = z(revivalCandidate);
        deflationPerm.SetImage( revivalCandidate, numUndeflated );
        if( ctrl.progress )
            Output
            ("Final iteration, so p(",revivalCandidate,")=",numUndeflated);
        ++numUndeflated;
    }

    // Now shrink dUndeflated and zUndeflated down to their proper size
    dUndeflated.Resize( numUndeflated, 1 );
    zUndeflated.Resize( numUndeflated, 1 );

    // Count the number of columns of Q with each nonzero pattern
    std::vector<Int> packingCounts( NUM_DC_COMBINED_COLUMN_TYPES, 0 );
    for( Int j=0; j<n; ++j )
        ++packingCounts[columnTypes(j)];
    DEBUG_ONLY(
      if( packingCounts[DEFLATED_COLUMN] != numDeflated )
          LogicError
          ("Inconsistency between packingCounts[DEFLATED_COLUMN]=",
           packingCounts[DEFLATED_COLUMN],
           " and numDeflated=",numDeflated);
    )