void TestCorrectness ( bool print, const Matrix<Field>& A, const Permutation& P, const Matrix<Field>& AOrig, Int numRHS=100 ) { typedef Base<Field> Real; const Int n = AOrig.Width(); const Real eps = limits::Epsilon<Real>(); const Real oneNormA = OneNorm( AOrig ); Output("Testing error..."); // Generate random right-hand sides Matrix<Field> X; Uniform( X, n, numRHS ); auto Y( X ); const Real oneNormY = OneNorm( Y ); P.PermuteRows( Y ); lu::SolveAfter( NORMAL, A, Y ); // Now investigate the residual, ||AOrig Y - X||_oo Gemm( NORMAL, NORMAL, Field(-1), AOrig, Y, Field(1), X ); const Real infError = InfinityNorm( X ); const Real relError = infError / (eps*n*Max(oneNormA,oneNormY)); // TODO(poulson): Use a rigorous failure condition Output("||A X - Y||_oo / (eps n Max(||A||_1,||Y||_1)) = ",relError); if( relError > Real(1000) ) LogicError("Unacceptably large relative error"); }
void LUMod ( Matrix<F>& A, Permutation& P, const Matrix<F>& u, const Matrix<F>& v, bool conjugate, Base<F> tau ) { DEBUG_CSE typedef Base<F> Real; const Int m = A.Height(); const Int n = A.Width(); const Int minDim = Min(m,n); if( minDim != m ) LogicError("It is assumed that height(A) <= width(A)"); if( u.Height() != m || u.Width() != 1 ) LogicError("u is expected to be a conforming column vector"); if( v.Height() != n || v.Width() != 1 ) LogicError("v is expected to be a conforming column vector"); // w := inv(L) P u auto w( u ); P.PermuteRows( w ); Trsv( LOWER, NORMAL, UNIT, A, w ); // Maintain an external vector for the temporary subdiagonal of U Matrix<F> uSub; Zeros( uSub, minDim-1, 1 ); // Reduce w to a multiple of e0 for( Int i=minDim-2; i>=0; --i ) { // Decide if we should pivot the i'th and i+1'th rows of w const F lambdaSub = A(i+1,i); const F ups_ii = A(i,i); const F omega_i = w(i); const F omega_ip1 = w(i+1); const Real rightTerm = Abs(lambdaSub*omega_i+omega_ip1); const bool pivot = ( Abs(omega_i) < tau*rightTerm ); const Range<Int> indi( i, i+1 ), indip1( i+1, i+2 ), indB( i+2, m ), indR( i+1, n ); auto lBi = A( indB, indi ); auto lBip1 = A( indB, indip1 ); auto uiR = A( indi, indR ); auto uip1R = A( indip1, indR ); if( pivot ) { // P := P_i P P.Swap( i, i+1 ); // Simultaneously perform // U := P_i U and // L := P_i L P_i^T // // Then update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // w := T_{i,L} P_i w, // where T_{i,L} is the Gauss transform which zeros (P_i w)_{i+1}. // // More succinctly, // gamma := w(i) / w(i+1), // w(i) := w(i+1), // w(i+1) := 0, // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:). const F gamma = omega_i / omega_ip1; const F lambda_ii = F(1) + gamma*lambdaSub; A(i, i) = gamma; A(i+1,i) = 0; auto lBiCopy = lBi; Swap( NORMAL, lBi, lBip1 ); Axpy( gamma, lBiCopy, lBi ); auto uip1RCopy = uip1R; RowSwap( A, i, i+1 ); Axpy( -gamma, uip1RCopy, uip1R ); // Force L back to *unit* lower-triangular form via the transform // L := L T_{i,U}^{-1} D^{-1}, // where D is diagonal and responsible for forcing L(i,i) and // L(i+1,i+1) back to 1. The effect on L is: // eta := L(i,i+1)/L(i,i), // L(:,i+1) -= eta L(:,i), // delta_i := L(i,i), // delta_ip1 := L(i+1,i+1), // L(:,i) /= delta_i, // L(:,i+1) /= delta_ip1, // while the effect on U is // U(i,:) += eta U(i+1,:) // U(i,:) *= delta_i, // U(i+1,:) *= delta_{i+1}, // and the effect on w is // w(i) *= delta_i. const F eta = lambdaSub/lambda_ii; const F delta_i = lambda_ii; const F delta_ip1 = F(1) - eta*gamma; Axpy( -eta, lBi, lBip1 ); A(i+1,i) = gamma/delta_i; lBi *= F(1)/delta_i; lBip1 *= F(1)/delta_ip1; A(i,i) = eta*ups_ii*delta_i; Axpy( eta, uip1R, uiR ); uiR *= delta_i; uip1R *= delta_ip1; uSub(i) = ups_ii*delta_ip1; // Finally set w(i) w(i) = omega_ip1*delta_i; } else { // Update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // w := T_{i,L} w, // where T_{i,L} is the Gauss transform which zeros w_{i+1}. // // More succinctly, // gamma := w(i+1) / w(i), // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:), // w(i+1) := 0. const F gamma = omega_ip1 / omega_i; A(i+1,i) += gamma; Axpy( gamma, lBip1, lBi ); Axpy( -gamma, uiR, uip1R ); uSub(i) = -gamma*ups_ii; } } // Add the modified w v' into U { auto a0 = A( IR(0), ALL ); const F omega_0 = w(0); Matrix<F> vTrans; Transpose( v, vTrans, conjugate ); Axpy( omega_0, vTrans, a0 ); } // Transform U from upper-Hessenberg to upper-triangular form for( Int i=0; i<minDim-1; ++i ) { // Decide if we should pivot the i'th and i+1'th rows U const F lambdaSub = A(i+1,i); const F ups_ii = A(i,i); const F ups_ip1i = uSub(i); const Real rightTerm = Abs(lambdaSub*ups_ii+ups_ip1i); const bool pivot = ( Abs(ups_ii) < tau*rightTerm ); const Range<Int> indi( i, i+1 ), indip1( i+1, i+2 ), indB( i+2, m ), indR( i+1, n ); auto lBi = A( indB, indi ); auto lBip1 = A( indB, indip1 ); auto uiR = A( indi, indR ); auto uip1R = A( indip1, indR ); if( pivot ) { // P := P_i P P.Swap( i, i+1 ); // Simultaneously perform // U := P_i U and // L := P_i L P_i^T // // Then update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // where T_{i,L} is the Gauss transform which zeros U(i+1,i). // // More succinctly, // gamma := U(i+1,i) / U(i,i), // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:). const F gamma = ups_ii / ups_ip1i; const F lambda_ii = F(1) + gamma*lambdaSub; A(i+1,i) = ups_ip1i; A(i, i) = gamma; auto lBiCopy = lBi; Swap( NORMAL, lBi, lBip1 ); Axpy( gamma, lBiCopy, lBi ); auto uip1RCopy = uip1R; RowSwap( A, i, i+1 ); Axpy( -gamma, uip1RCopy, uip1R ); // Force L back to *unit* lower-triangular form via the transform // L := L T_{i,U}^{-1} D^{-1}, // where D is diagonal and responsible for forcing L(i,i) and // L(i+1,i+1) back to 1. The effect on L is: // eta := L(i,i+1)/L(i,i), // L(:,i+1) -= eta L(:,i), // delta_i := L(i,i), // delta_ip1 := L(i+1,i+1), // L(:,i) /= delta_i, // L(:,i+1) /= delta_ip1, // while the effect on U is // U(i,:) += eta U(i+1,:) // U(i,:) *= delta_i, // U(i+1,:) *= delta_{i+1}. const F eta = lambdaSub/lambda_ii; const F delta_i = lambda_ii; const F delta_ip1 = F(1) - eta*gamma; Axpy( -eta, lBi, lBip1 ); A(i+1,i) = gamma/delta_i; lBi *= F(1)/delta_i; lBip1 *= F(1)/delta_ip1; A(i,i) = ups_ip1i*delta_i; Axpy( eta, uip1R, uiR ); uiR *= delta_i; uip1R *= delta_ip1; } else { // Update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // where T_{i,L} is the Gauss transform which zeros U(i+1,i). // // More succinctly, // gamma := U(i+1,i)/ U(i,i), // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:). const F gamma = ups_ip1i / ups_ii; A(i+1,i) += gamma; Axpy( gamma, lBip1, lBi ); Axpy( -gamma, uiR, uip1R ); } } }
DCInfo Merge ( Real beta, // The n0 (unsorted) eigenvalues from T0. const Matrix<Real>& w0, // The n1 (unsorted) eigenvalues from T1. const Matrix<Real>& w1, // On exit, the (unsorted) eigenvalues of the merged tridiagonal matrix Matrix<Real>& d, // If ctrl.wantEigVecs is true, then, on entry, a packing of the eigenvectors // from the two subproblems, // // Q = | Q0, 0 |, // | 0, Q1 | // // where Q0 is n0 x n0, and Q1 is n1 x n1. // // If ctrl.wantEigVecs is false, then, on entry, Q is the same as above, but // with only the row that goes through the last row of Q0 and the row that // goes through the first row of Q1 kept. // // If ctrl.wantEigVecs is true, on exit, Q will contain the eigenvectors of // the merged tridiagonal matrix. If ctrl.wantEigVecs is false, then only the // two rows of the result mentioned above will be output. Matrix<Real>& Q, const HermitianTridiagEigCtrl<Real>& ctrl ) { DEBUG_CSE const Int n0 = w0.Height(); const Int n1 = w1.Height(); const Int n = n0 + n1; const auto& dcCtrl = ctrl.dcCtrl; DCInfo info; auto& secularInfo = info.secularInfo; if( ctrl.progress ) Output("n=",n,", n0=",n0,", n1=",n1); Matrix<Real> Q0, Q1; if( ctrl.wantEigVecs ) { // Q = | Q0 0 | // | 0 Q1 | View( Q0, Q, IR(0,n0), IR(0,n0) ); View( Q1, Q, IR(n0,END), IR(n0,END) ); } else { View( Q0, Q, IR(0), IR(0,n0) ); View( Q1, Q, IR(1), IR(n0,END) ); } // Before permutation, // // r = sqrt(2 |beta|) z, // // where // // z = [ sgn(beta)*Q0(n0-1,:), Q1(0,:) ] / sqrt(2). // // But we reorder indices 0 and n0-1 to put r in the first position. Thus, // we must form // // d = [w0(n0-1); w0(0:n0-2); w1] // // and consider the matrix // // diag(d) + 2 |beta| z z'. // // Form d = [w0(n0-1); w0(0:n0-2); w1]. // This effectively cyclically shifts [0,n0) |-> [1,n0+1) mod n0. d.Resize( n, 1 ); d(0) = w0(n0-1); for( Int j=0; j<n0-1; ++j ) { d(j+1) = w0(j); } for( Int j=0; j<n1; ++j ) { d(j+n0) = w1(j); } // Compute the scale of the problem and rescale. We will rescale the // eigenvalues at the end of this routine. Note that LAPACK's {s,d}laed2 // [CITATION] uses max(|beta|,||z||_max), where || z ||_2 = sqrt(2), // which could be much too small if || r ||_2 is much larger than beta // and sqrt(2). Real scale = Max( 2*Abs(beta), MaxNorm(d) ); SafeScale( Real(1), scale, d ); SafeScale( Real(1), scale, beta ); // Now that the problem is rescaled, our deflation tolerance simplifies to // // tol = deflationFudge eps max( || d ||_max, 2*|beta| ) // = deflationFudge eps. // // Cf. LAPACK's {s,d}lasd2 [CITATION] for this tolerance. const Real eps = limits::Epsilon<Real>(); const Real deflationTol = dcCtrl.deflationFudge*eps; Matrix<Real> z(n,1); Matrix<Int> columnTypes(n,1); const Real betaSgn = Sgn( beta, false ); const Int lastRowOfQ0 = ( ctrl.wantEigVecs ? n0-1 : 0 ); const Real sqrtTwo = Sqrt( Real(2) ); z(0) = betaSgn*Q0(lastRowOfQ0,n0-1) / sqrtTwo; columnTypes(0) = DENSE_COLUMN; for( Int j=0; j<n0-1; ++j ) { z(j+1) = betaSgn*Q0(lastRowOfQ0,j) / sqrtTwo; columnTypes(j+1) = COLUMN_NONZERO_IN_FIRST_BLOCK; } for( Int j=0; j<n1; ++j ) { z(j+n0) = Q1(0,j) / sqrtTwo; columnTypes(j+n0) = COLUMN_NONZERO_IN_SECOND_BLOCK; } Permutation combineSortPerm; SortingPermutation( d, combineSortPerm, ASCENDING ); combineSortPerm.PermuteRows( d ); combineSortPerm.PermuteRows( z ); combineSortPerm.PermuteRows( columnTypes ); auto combinedToOrig = [&]( const Int& combinedIndex ) { const Int preCombined = combineSortPerm.Preimage( combinedIndex ); if( preCombined < n0 ) // Undo the cyclic shift [0,n0) |-> [1,n0+1) mod n0 which // pushed the removed row into the first position. return Mod( preCombined-1, n0 ); else return preCombined; }; Permutation deflationPerm; deflationPerm.MakeIdentity( n ); deflationPerm.MakeArbitrary(); // Since we do not yet know how many undeflated entries there will be, we // must use the no-deflation case as our storage upper bound. Matrix<Real> dUndeflated(n,1), zUndeflated(n,1); dUndeflated(0) = 0; zUndeflated(0) = z(0); // Deflate all (off-diagonal) update entries sufficiently close to zero Int numDeflated = 0; Int numUndeflated = 0; // We will keep track of the last column that we encountered that was not // initially deflatable (but that could be deflated later due to close // diagonal entries if another undeflatable column is not encountered // first). Int revivalCandidate = n; for( Int j=0; j<n; ++j ) { if( Abs(2*beta*z(j)) <= deflationTol ) { // We can deflate due to the r component being sufficiently small const Int deflationDest = (n-1) - numDeflated; deflationPerm.SetImage( j, deflationDest ); if( ctrl.progress ) Output ("Deflating via p(",j,")=",deflationDest, " because |2*beta*z(",j,")|=|",2*beta*z(j),"| <= ", deflationTol); columnTypes(j) = DEFLATED_COLUMN; ++numDeflated; ++secularInfo.numDeflations; ++secularInfo.numSmallUpdateDeflations; } else { revivalCandidate = j; if( ctrl.progress ) Output("Breaking initial deflation loop at j=",j); break; } } // If we already fully deflated, then the following loop should be trivial const Int deflationRestart = revivalCandidate+1; for( Int j=deflationRestart; j<n; ++j ) { if( Abs(2*beta*z(j)) <= deflationTol ) { const Int deflationDest = (n-1) - numDeflated; deflationPerm.SetImage( j, deflationDest ); if( ctrl.progress ) Output ("Deflating via p(",j,")=",deflationDest, " because |2*beta*z(",j,")|=|",2*beta*z(j),"| <= ", deflationTol); columnTypes(j) = DEFLATED_COLUMN; ++numDeflated; ++secularInfo.numDeflations; ++secularInfo.numSmallUpdateDeflations; continue; } const Real gamma = SafeNorm( z(j), z(revivalCandidate) ); const Real c = z(j) / gamma; const Real s = z(revivalCandidate) / gamma; const Real offDiagNew = c*s*(d(j)-d(revivalCandidate)); if( Abs(offDiagNew) <= deflationTol ) { // Deflate the previously undeflatable index by rotating // z(revivalCandidate) into z(j) (Cf. the discussion // surrounding Eq. (4.4) of Gu/Eisenstat's TR [CITATION]). // // In particular, we want // // | z(j), z(revivalCandidate) | | c -s | = | gamma, 0 |, // | s c | // // where gamma = || z(revivalCandidate); z(j) ||_2. Putting // // c = z(j) / gamma, // s = z(revivalCandidate) / gamma, // // implies // // | c, s | | z(j) | = | gamma |. // | -s, c | | z(revivalCandidate) | | 0 | // z(j) = gamma; z(revivalCandidate) = 0; // Apply | c -s | to both sides of d // | s c | const Real deltaDeflate = d(revivalCandidate)*(c*c) + d(j)*(s*s); d(j) = d(j)*(c*c) + d(revivalCandidate)*(s*s); d(revivalCandidate) = deltaDeflate; // Apply | c -s | from the right to Q // | s c | // const Int revivalOrig = combinedToOrig( revivalCandidate ); const Int jOrig = combinedToOrig( j ); if( ctrl.wantEigVecs ) { // TODO(poulson): Exploit the nonzero structure of Q? blas::Rot( n, &Q(0,jOrig), 1, &Q(0,revivalOrig), 1, c, s ); } else { blas::Rot( 2, &Q(0,jOrig), 1, &Q(0,revivalOrig), 1, c, s ); } const Int deflationDest = (n-1) - numDeflated; deflationPerm.SetImage( revivalCandidate, deflationDest ); if( ctrl.progress ) Output ("Deflating via p(",revivalCandidate,")=", deflationDest," because |c*s*(d(",j,")-d(",revivalCandidate, "))|=",offDiagNew," <= ",deflationTol); if( columnTypes(revivalCandidate) != columnTypes(j) ) { // We mixed top and bottom columns so the result is dense. columnTypes(j) = DENSE_COLUMN; } columnTypes(revivalCandidate) = DEFLATED_COLUMN; revivalCandidate = j; ++numDeflated; ++secularInfo.numDeflations; ++secularInfo.numCloseDiagonalDeflations; continue; } // We cannot yet deflate index j, so we must give up on the previous // revival candidate and then set revivalCandidate = j. dUndeflated(numUndeflated) = d(revivalCandidate); zUndeflated(numUndeflated) = z(revivalCandidate); deflationPerm.SetImage( revivalCandidate, numUndeflated ); if( ctrl.progress ) Output ("Could not deflate with j=",j," and revivalCandidate=", revivalCandidate,", so p(",revivalCandidate,")=", numUndeflated); ++numUndeflated; revivalCandidate = j; } if( revivalCandidate < n ) { // Give up on the revival candidate dUndeflated(numUndeflated) = d(revivalCandidate); zUndeflated(numUndeflated) = z(revivalCandidate); deflationPerm.SetImage( revivalCandidate, numUndeflated ); if( ctrl.progress ) Output ("Final iteration, so p(",revivalCandidate,")=",numUndeflated); ++numUndeflated; } // Now shrink dUndeflated and zUndeflated down to their proper size dUndeflated.Resize( numUndeflated, 1 ); zUndeflated.Resize( numUndeflated, 1 ); // Count the number of columns of Q with each nonzero pattern std::vector<Int> packingCounts( NUM_DC_COMBINED_COLUMN_TYPES, 0 ); for( Int j=0; j<n; ++j ) ++packingCounts[columnTypes(j)]; DEBUG_ONLY( if( packingCounts[DEFLATED_COLUMN] != numDeflated ) LogicError ("Inconsistency between packingCounts[DEFLATED_COLUMN]=", packingCounts[DEFLATED_COLUMN], " and numDeflated=",numDeflated); )