void TestCorrectness ( bool print, const DistMatrix<Field>& A, const DistPermutation& P, const DistMatrix<Field>& AOrig, Int numRHS=100 ) { typedef Base<Field> Real; const Grid& grid = A.Grid(); const Int n = AOrig.Width(); const Real eps = limits::Epsilon<Real>(); const Real oneNormA = OneNorm( AOrig ); OutputFromRoot(grid.Comm(),"Testing error..."); // Generate random right-hand sides DistMatrix<Field> X(grid); Uniform( X, n, numRHS ); auto Y( X ); const Real oneNormY = OneNorm( Y ); P.PermuteRows( Y ); lu::SolveAfter( NORMAL, A, Y ); // Now investigate the residual, ||AOrig Y - X||_oo Gemm( NORMAL, NORMAL, Field(-1), AOrig, Y, Field(1), X ); const Real infError = InfinityNorm( X ); const Real relError = infError / (eps*n*Max(oneNormA,oneNormY)); // TODO(poulson): Use a rigorous failure condition OutputFromRoot (grid.Comm(),"||A X - Y||_oo / (eps n Max(||A||_1,||Y||_1)) = ",relError); if( relError > Real(1000) ) LogicError("Unacceptably large relative error"); }
void TestCorrectness ( bool print, const DistMatrix<F>& A, const DistPermutation& P, const DistMatrix<F>& AOrig ) { typedef Base<F> Real; const Grid& g = A.Grid(); const Int n = AOrig.Width(); if( g.Rank() == 0 ) Output("Testing error..."); // Generate random right-hand sides DistMatrix<F> X(g); Uniform( X, n, 100 ); auto Y( X ); P.PermuteRows( Y ); lu::SolveAfter( NORMAL, A, Y ); // Now investigate the residual, ||AOrig Y - X||_oo const Real infNormX = InfinityNorm( X ); const Real frobNormX = FrobeniusNorm( X ); Gemm( NORMAL, NORMAL, F(-1), AOrig, Y, F(1), X ); const Real infNormError = InfinityNorm( X ); const Real frobNormError = FrobeniusNorm( X ); const Real infNormA = InfinityNorm( AOrig ); const Real frobNormA = FrobeniusNorm( AOrig ); if( g.Rank() == 0 ) Output ("||A||_oo = ",infNormA,"\n", "||A||_F = ",frobNormA,"\n", "||X||_oo = ",infNormX,"\n", "||X||_F = ",frobNormX,"\n", "||A A^-1 X - X||_oo = ",infNormError,"\n", "||A A^-1 X - X||_F = ",frobNormError); }
void LUMod ( ElementalMatrix<F>& APre, DistPermutation& P, const ElementalMatrix<F>& u, const ElementalMatrix<F>& v, bool conjugate, Base<F> tau ) { DEBUG_CSE const Grid& g = APre.Grid(); typedef Base<F> Real; DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre ); auto& A = AProx.Get(); const Int m = A.Height(); const Int n = A.Width(); const Int minDim = Min(m,n); if( minDim != m ) LogicError("It is assumed that height(A) <= width(A)"); if( u.Height() != m || u.Width() != 1 ) LogicError("u is expected to be a conforming column vector"); if( v.Height() != n || v.Width() != 1 ) LogicError("v is expected to be a conforming column vector"); AssertSameGrids( A, u, v ); // w := inv(L) P u // TODO: Consider locally maintaining all of w to avoid unnecessarily // broadcasting at every iteration. DistMatrix<F> w( u ); P.PermuteRows( w ); Trsv( LOWER, NORMAL, UNIT, A, w ); // Maintain an external vector for the temporary subdiagonal of U DistMatrix<F,MD,STAR> uSub(g); uSub.SetRoot( A.DiagonalRoot(-1) ); uSub.AlignCols( A.DiagonalAlign(-1) ); Zeros( uSub, minDim-1, 1 ); // Reduce w to a multiple of e0 for( Int i=minDim-2; i>=0; --i ) { // Decide if we should pivot the i'th and i+1'th rows of w const F lambdaSub = A.Get(i+1,i); const F ups_ii = A.Get(i,i); const F omega_i = w.Get( i, 0 ); const F omega_ip1 = w.Get( i+1, 0 ); const Real rightTerm = Abs(lambdaSub*omega_i+omega_ip1); const bool pivot = ( Abs(omega_i) < tau*rightTerm ); const Range<Int> indB( i+2, m ), indR( i+1, n ), indi( i, i+1 ), indip1( i+1, i+2 ); auto lBi = A( indB, indi ); auto lBip1 = A( indB, indip1 ); auto uiR = A( indi, indR ); auto uip1R = A( indip1, indR ); if( pivot ) { // P := P_i P P.Swap( i, i+1 ); // Simultaneously perform // U := P_i U and // L := P_i L P_i^T // // Then update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // w := T_{i,L} P_i w, // where T_{i,L} is the Gauss transform which zeros (P_i w)_{i+1}. // // More succinctly, // gamma := w(i) / w(i+1), // w(i) := w(i+1), // w(i+1) := 0, // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:). const F gamma = omega_i / omega_ip1; const F lambda_ii = F(1) + gamma*lambdaSub; A.Set( i, i, gamma ); A.Set( i+1, i, 0 ); auto lBiCopy = lBi; Swap( NORMAL, lBi, lBip1 ); Axpy( gamma, lBiCopy, lBi ); auto uip1RCopy = uip1R; RowSwap( A, i, i+1 ); Axpy( -gamma, uip1RCopy, uip1R ); // Force L back to *unit* lower-triangular form via the transform // L := L T_{i,U}^{-1} D^{-1}, // where D is diagonal and responsible for forcing L(i,i) and // L(i+1,i+1) back to 1. The effect on L is: // eta := L(i,i+1)/L(i,i), // L(:,i+1) -= eta L(:,i), // delta_i := L(i,i), // delta_ip1 := L(i+1,i+1), // L(:,i) /= delta_i, // L(:,i+1) /= delta_ip1, // while the effect on U is // U(i,:) += eta U(i+1,:) // U(i,:) *= delta_i, // U(i+1,:) *= delta_{i+1}, // and the effect on w is // w(i) *= delta_i. const F eta = lambdaSub/lambda_ii; const F delta_i = lambda_ii; const F delta_ip1 = F(1) - eta*gamma; Axpy( -eta, lBi, lBip1 ); A.Set( i+1, i, gamma/delta_i ); lBi *= F(1)/delta_i; lBip1 *= F(1)/delta_ip1; A.Set( i, i, eta*ups_ii*delta_i ); Axpy( eta, uip1R, uiR ); uiR *= delta_i; uip1R *= delta_ip1; uSub.Set( i, 0, ups_ii*delta_ip1 ); // Finally set w(i) w.Set( i, 0, omega_ip1*delta_i ); } else { // Update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // w := T_{i,L} w, // where T_{i,L} is the Gauss transform which zeros w_{i+1}. // // More succinctly, // gamma := w(i+1) / w(i), // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:), // w(i+1) := 0. const F gamma = omega_ip1 / omega_i; A.Update( i+1, i, gamma ); Axpy( gamma, lBip1, lBi ); Axpy( -gamma, uiR, uip1R ); uSub.Set( i, 0, -gamma*ups_ii ); } } // Add the modified w v' into U { auto a0 = A( IR(0), ALL ); const F omega_0 = w.Get( 0, 0 ); DistMatrix<F> vTrans(g); vTrans.AlignWith( a0 ); Transpose( v, vTrans, conjugate ); Axpy( omega_0, vTrans, a0 ); } // Transform U from upper-Hessenberg to upper-triangular form for( Int i=0; i<minDim-1; ++i ) { // Decide if we should pivot the i'th and i+1'th rows U const F lambdaSub = A.Get( i+1, i ); const F ups_ii = A.Get( i, i ); const F ups_ip1i = uSub.Get( i, 0 ); const Real rightTerm = Abs(lambdaSub*ups_ii+ups_ip1i); const bool pivot = ( Abs(ups_ii) < tau*rightTerm ); const Range<Int> indB( i+2, m ), indR( i+1, n ), indi( i, i+1 ), indip1( i+1, i+2 ); auto lBi = A( indB, indi ); auto lBip1 = A( indB, indip1 ); auto uiR = A( indi, indR ); auto uip1R = A( indip1, indR ); if( pivot ) { // P := P_i P P.Swap( i, i+1 ); // Simultaneously perform // U := P_i U and // L := P_i L P_i^T // // Then update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // where T_{i,L} is the Gauss transform which zeros U(i+1,i). // // More succinctly, // gamma := U(i+1,i) / U(i,i), // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:). const F gamma = ups_ii / ups_ip1i; const F lambda_ii = F(1) + gamma*lambdaSub; A.Set( i+1, i, ups_ip1i ); A.Set( i, i, gamma ); auto lBiCopy = lBi; Swap( NORMAL, lBi, lBip1 ); Axpy( gamma, lBiCopy, lBi ); auto uip1RCopy = uip1R; RowSwap( A, i, i+1 ); Axpy( -gamma, uip1RCopy, uip1R ); // Force L back to *unit* lower-triangular form via the transform // L := L T_{i,U}^{-1} D^{-1}, // where D is diagonal and responsible for forcing L(i,i) and // L(i+1,i+1) back to 1. The effect on L is: // eta := L(i,i+1)/L(i,i), // L(:,i+1) -= eta L(:,i), // delta_i := L(i,i), // delta_ip1 := L(i+1,i+1), // L(:,i) /= delta_i, // L(:,i+1) /= delta_ip1, // while the effect on U is // U(i,:) += eta U(i+1,:) // U(i,:) *= delta_i, // U(i+1,:) *= delta_{i+1}. const F eta = lambdaSub/lambda_ii; const F delta_i = lambda_ii; const F delta_ip1 = F(1) - eta*gamma; Axpy( -eta, lBi, lBip1 ); A.Set( i+1, i, gamma/delta_i ); lBi *= F(1)/delta_i; lBip1 *= F(1)/delta_ip1; A.Set( i, i, ups_ip1i*delta_i ); Axpy( eta, uip1R, uiR ); uiR *= delta_i; uip1R *= delta_ip1; } else { // Update // L := L T_{i,L}^{-1}, // U := T_{i,L} U, // where T_{i,L} is the Gauss transform which zeros U(i+1,i). // // More succinctly, // gamma := U(i+1,i)/ U(i,i), // L(:,i) += gamma L(:,i+1), // U(i+1,:) -= gamma U(i,:). const F gamma = ups_ip1i / ups_ii; A.Update( i+1, i, gamma ); Axpy( gamma, lBip1, lBi ); Axpy( -gamma, uiR, uip1R ); } } }
int main( int argc, char* argv[] ) { Environment env( argc, argv ); const Int worldRank = mpi::Rank(); try { const Int n = Input("--size","width of matrix",100); const bool display = Input("--display","display matrices?",false); const bool print = Input("--print","print matrices?",false); const bool smallestFirst = Input("--smallestFirst","smallest norm first?",false); ProcessInput(); PrintInputReport(); const Int m = n; QRCtrl<Real> ctrl; ctrl.smallestFirst = smallestFirst; DistMatrix<C> A; GKS( A, n ); const Real frobA = FrobeniusNorm( A ); if( display ) Display( A, "A" ); if( print ) Print( A, "A" ); // Compute the pivoted QR decomposition of A, but do not overwrite A auto QRPiv( A ); DistMatrix<C,MD,STAR> tPiv; DistMatrix<Real,MD,STAR> dPiv; DistPermutation Omega; QR( QRPiv, tPiv, dPiv, Omega ); if( display ) { Display( QRPiv, "QRPiv" ); Display( tPiv, "tPiv" ); Display( dPiv, "dPiv" ); DistMatrix<Int> OmegaFull; Omega.ExplicitMatrix( OmegaFull ); Display( OmegaFull, "Omega" ); } if( print ) { Print( QRPiv, "QRPiv" ); Print( tPiv, "tPiv" ); Print( dPiv, "dPiv" ); DistMatrix<Int> OmegaFull; Omega.ExplicitMatrix( OmegaFull ); Print( OmegaFull, "Omega" ); } // Compute the standard QR decomposition of A auto QRNoPiv( A ); DistMatrix<C,MD,STAR> tNoPiv; DistMatrix<Real,MD,STAR> dNoPiv; QR( QRNoPiv, tNoPiv, dNoPiv ); if( display ) { Display( QRNoPiv, "QRNoPiv" ); Display( tNoPiv, "tNoPiv" ); Display( dNoPiv, "dNoPiv" ); } if( print ) { Print( QRNoPiv, "QRNoPiv" ); Print( tNoPiv, "tNoPiv" ); Print( dNoPiv, "dNoPiv" ); } // Check the error in the pivoted QR factorization, // || A Omega^T - Q R ||_F / || A ||_F auto E( QRPiv ); MakeTrapezoidal( UPPER, E ); qr::ApplyQ( LEFT, NORMAL, QRPiv, tPiv, dPiv, E ); Omega.InversePermuteCols( E ); E -= A; const Real frobQRPiv = FrobeniusNorm( E ); if( display ) Display( E, "A P - Q R" ); if( print ) Print( E, "A P - Q R" ); // Check the error in the standard QR factorization, // || A - Q R ||_F / || A ||_F E = QRNoPiv; MakeTrapezoidal( UPPER, E ); qr::ApplyQ( LEFT, NORMAL, QRNoPiv, tNoPiv, dNoPiv, E ); E -= A; const Real frobQRNoPiv = FrobeniusNorm( E ); if( display ) Display( E, "A - Q R" ); if( print ) Print( E, "A - Q R" ); // Check orthogonality of pivoted Q, || I - Q^H Q ||_F / || A ||_F Identity( E, m, n ); qr::ApplyQ( LEFT, NORMAL, QRPiv, tPiv, dPiv, E ); qr::ApplyQ( LEFT, ADJOINT, QRPiv, tPiv, dPiv, E ); const Int k = Min(m,n); auto EUpper = View( E, 0, 0, k, k ); ShiftDiagonal( EUpper, C(-1) ); const Real frobOrthogPiv = FrobeniusNorm( EUpper ); if( display ) Display( E, "pivoted I - Q^H Q" ); if( print ) Print( E, "pivoted I - Q^H Q" ); // Check orthogonality of unpivoted Q, || I - Q^H Q ||_F / || A ||_F Identity( E, m, n ); qr::ApplyQ( LEFT, NORMAL, QRPiv, tPiv, dPiv, E ); qr::ApplyQ( LEFT, ADJOINT, QRPiv, tPiv, dPiv, E ); EUpper = View( E, 0, 0, k, k ); ShiftDiagonal( EUpper, C(-1) ); const Real frobOrthogNoPiv = FrobeniusNorm( EUpper ); if( display ) Display( E, "unpivoted I - Q^H Q" ); if( print ) Print( E, "unpivoted I - Q^H Q" ); if( worldRank == 0 ) Output ("|| A ||_F = ",frobA,"\n\n", "With pivoting: \n", " || A P - Q R ||_F / || A ||_F = ",frobQRPiv/frobA,"\n", " || I - Q^H Q ||_F / || A ||_F = ",frobOrthogPiv/frobA,"\n\n", "Without pivoting: \n", " || A - Q R ||_F / || A ||_F = ",frobQRNoPiv/frobA,"\n", " || I - Q^H Q ||_F / || A ||_F = ",frobOrthogNoPiv/frobA,"\n"); } catch( exception& e ) { ReportException(e); } return 0; }