inline F Trace( const DistMatrix<F,MC,MR>& A ) { #ifndef RELEASE PushCallStack("Trace"); #endif if( A.Height() != A.Width() ) throw std::logic_error("Cannot compute trace of nonsquare matrix"); const Grid& g = A.Grid(); DistMatrix<F,MD,STAR> d(g); A.GetDiagonal( d ); F localTrace = 0; if( d.InDiagonal() ) { const int nLocalDiag = d.LocalHeight(); for( int iLocal=0; iLocal<nLocalDiag; ++iLocal ) localTrace += d.GetLocalEntry(iLocal,0); } F trace; mpi::AllReduce( &localTrace, &trace, 1, mpi::SUM, g.VCComm() ); #ifndef RELEASE PopCallStack(); #endif return trace; }
inline void MakeSymmetric( UpperOrLower uplo, DistMatrix<T>& A ) { #ifndef RELEASE PushCallStack("MakeSymmetric"); #endif if( A.Height() != A.Width() ) throw std::logic_error("Cannot make non-square matrix symmetric"); const Grid& g = A.Grid(); DistMatrix<T,MD,STAR> d(g); A.GetDiagonal( d ); if( uplo == LOWER ) MakeTrapezoidal( LEFT, LOWER, -1, A ); else MakeTrapezoidal( LEFT, UPPER, +1, A ); DistMatrix<T> ATrans(g); Transpose( A, ATrans ); Axpy( T(1), ATrans, A ); A.SetDiagonal( d ); #ifndef RELEASE PopCallStack(); #endif }
inline void SimpleSingularValuesUpper ( DistMatrix<Real>& A, DistMatrix<Real,VR,STAR>& s ) { #ifndef RELEASE PushCallStack("svd::SimpleSingularValuesUpper"); #endif const int m = A.Height(); const int n = A.Width(); const int k = std::min( m, n ); const int offdiagonal = ( m>=n ? 1 : -1 ); const char uplo = ( m>=n ? 'U' : 'L' ); const Grid& g = A.Grid(); // Bidiagonalize A Bidiag( A ); // Grab copies of the diagonal and sub/super-diagonal of A DistMatrix<Real,MD,STAR> d_MD_STAR( g ), e_MD_STAR( g ); A.GetDiagonal( d_MD_STAR ); A.GetDiagonal( e_MD_STAR, offdiagonal ); // In order to use serial QR kernels, we need the full bidiagonal matrix // on each process // // NOTE: lapack::BidiagQRAlg expects e to be of length k DistMatrix<Real,STAR,STAR> d_STAR_STAR( d_MD_STAR ); DistMatrix<Real,STAR,STAR> eHat_STAR_STAR( k, 1, g ); DistMatrix<Real,STAR,STAR> e_STAR_STAR( g ); e_STAR_STAR.View( eHat_STAR_STAR, 0, 0, k-1, 1 ); e_STAR_STAR = e_MD_STAR; // Compute the singular values of the bidiagonal matrix lapack::BidiagQRAlg ( uplo, k, 0, 0, d_STAR_STAR.LocalBuffer(), e_STAR_STAR.LocalBuffer(), (Real*)0, 1, (Real*)0, 1 ); // Copy out the appropriate subset of the singular values s = d_STAR_STAR; #ifndef RELEASE PopCallStack(); #endif }
inline SafeProduct<F> SafeDeterminant( DistMatrix<F,MC,MR>& A ) { #ifndef RELEASE PushCallStack("SafeDeterminant"); #endif if( A.Height() != A.Width() ) throw std::logic_error ("Cannot compute determinant of nonsquare matrix"); typedef typename Base<F>::type R; const int n = A.Height(); SafeProduct<F> det( n ); const Grid& g = A.Grid(); try { DistMatrix<int,VC,STAR> p; LU( A, p ); const bool isOdd = PivotParity( p ); DistMatrix<F,MD,STAR> d(g); A.GetDiagonal( d ); F localRho = 1; R localKappa = 0; if( d.InDiagonal() ) { const int nLocalDiag = d.LocalHeight(); for( int iLocal=0; iLocal<nLocalDiag; ++iLocal ) { const F delta = d.GetLocalEntry(iLocal,0); R alpha = Abs(delta); localRho *= delta/alpha; localKappa += std::log(alpha)/n; } } mpi::AllReduce( &localRho, &det.rho, 1, mpi::PROD, g.VCComm() ); mpi::AllReduce( &localKappa, &det.kappa, 1, mpi::SUM, g.VCComm() ); if( isOdd ) det.rho = -det.rho; } catch( SingularMatrixException& e ) { det.rho = 0; det.kappa = 0; } #ifndef RELEASE PopCallStack(); #endif return det; }
inline SafeProduct<F> LUPartialPiv( DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("determinant::LUPartialPiv"); #endif if( A.Height() != A.Width() ) throw std::logic_error ("Cannot compute determinant of nonsquare matrix"); typedef BASE(F) R; const int n = A.Height(); const R scale(n); SafeProduct<F> det( n ); const Grid& g = A.Grid(); try { DistMatrix<int,VC,STAR> p(g); elem::LU( A, p ); const bool isOdd = PivotParity( p ); DistMatrix<F,MD,STAR> d(g); A.GetDiagonal( d ); F localRho = 1; R localKappa = 0; if( d.Participating() ) { const int nLocalDiag = d.LocalHeight(); for( int iLocal=0; iLocal<nLocalDiag; ++iLocal ) { const F delta = d.GetLocal(iLocal,0); R alpha = Abs(delta); localRho *= delta/alpha; localKappa += Log(alpha)/scale; } } mpi::AllReduce( &localRho, &det.rho, 1, mpi::PROD, g.VCComm() ); mpi::AllReduce( &localKappa, &det.kappa, 1, mpi::SUM, g.VCComm() ); if( isOdd ) det.rho = -det.rho; } catch( SingularMatrixException& e ) { det.rho = 0; det.kappa = 0; } return det; }
inline SafeProduct<F> SafeHPDDeterminantWithOverwrite( UpperOrLower uplo, DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::SafeHPDDeterminantWithOverwrite"); #endif if( A.Height() != A.Width() ) throw std::logic_error ("Cannot compute determinant of nonsquare matrix"); typedef typename Base<F>::type R; const int n = A.Height(); const R scale = R(n)/R(2); SafeProduct<F> det( n ); const Grid& g = A.Grid(); try { Cholesky( uplo, A ); DistMatrix<F,MD,STAR> d(g); A.GetDiagonal( d ); R localKappa = 0; if( d.InDiagonal() ) { const int nLocalDiag = d.LocalHeight(); for( int iLocal=0; iLocal<nLocalDiag; ++iLocal ) { const R delta = RealPart(d.GetLocal(iLocal,0)); localKappa += Log(delta)/scale; } } mpi::AllReduce( &localKappa, &det.kappa, 1, mpi::SUM, g.VCComm() ); det.rho = F(1); } catch( NonHPDMatrixException& e ) { det.rho = 0; det.kappa = 0; } #ifndef RELEASE PopCallStack(); #endif return det; }
inline SafeProduct<F> AfterLUPartialPiv( const DistMatrix<F>& A, const DistMatrix<Int,VC,STAR>& p ) { #ifndef RELEASE CallStackEntry entry("determinant::AfterLUPartialPiv"); #endif if( A.Height() != A.Width() ) LogicError("Cannot compute determinant of nonsquare matrix"); if( A.Grid() != p.Grid() ) LogicError("A and p must have the same grid"); if( A.Height() != p.Height() ) LogicError("Pivot vector is incorrect length"); typedef BASE(F) R; const Int n = A.Height(); const Grid& g = A.Grid(); DistMatrix<F,MD,STAR> d(g); A.GetDiagonal( d ); F localRho = 1; R localKappa = 0; if( d.Participating() ) { const R scale(n); const Int nLocalDiag = d.LocalHeight(); for( Int iLoc=0; iLoc<nLocalDiag; ++iLoc ) { const F delta = d.GetLocal(iLoc,0); R alpha = Abs(delta); localRho *= delta/alpha; localKappa += Log(alpha)/scale; } } SafeProduct<F> det( n ); det.rho = mpi::AllReduce( localRho, mpi::PROD, g.VCComm() ); det.kappa = mpi::AllReduce( localKappa, mpi::SUM, g.VCComm() ); const bool isOdd = PivotParity( p ); if( isOdd ) det.rho = -det.rho; return det; }
void TestCorrectness ( const DistMatrix<F>& A, const DistMatrix<F,STAR,STAR>& tP, const DistMatrix<F,STAR,STAR>& tQ, DistMatrix<F>& AOrig, bool print, bool display ) { typedef Base<F> Real; const Grid& g = A.Grid(); const Int m = AOrig.Height(); const Int n = AOrig.Width(); const Real infNormAOrig = InfinityNorm( AOrig ); const Real frobNormAOrig = FrobeniusNorm( AOrig ); if( g.Rank() == 0 ) cout << "Testing error..." << endl; // Grab the diagonal and superdiagonal of the bidiagonal matrix auto d = A.GetDiagonal( 0 ); auto e = A.GetDiagonal( (m>=n ? 1 : -1) ); // Zero B and then fill its bidiagonal DistMatrix<F> B(g); B.AlignWith( A ); Zeros( B, m, n ); B.SetDiagonal( d, 0 ); B.SetDiagonal( e, (m>=n ? 1 : -1) ); if( print ) Print( B, "Bidiagonal" ); if( display ) Display( B, "Bidiagonal" ); if( print || display ) { DistMatrix<F> Q(g), P(g); Identity( Q, m, m ); Identity( P, n, n ); bidiag::ApplyQ( LEFT, NORMAL, A, tQ, Q ); bidiag::ApplyP( RIGHT, NORMAL, A, tP, P ); if( print ) { Print( Q, "Q" ); Print( P, "P" ); } if( display ) { Display( Q, "Q" ); Display( P, "P" ); } } // Reverse the accumulated Householder transforms bidiag::ApplyQ( LEFT, ADJOINT, A, tQ, AOrig ); bidiag::ApplyP( RIGHT, NORMAL, A, tP, AOrig ); if( print ) Print( AOrig, "Manual bidiagonal" ); if( display ) Display( AOrig, "Manual bidiagonal" ); // Compare the appropriate portion of AOrig and B if( m >= n ) { MakeTriangular( UPPER, AOrig ); MakeTrapezoidal( LOWER, AOrig, 1 ); } else { MakeTriangular( LOWER, AOrig ); MakeTrapezoidal( UPPER, AOrig, -1 ); } Axpy( F(-1), AOrig, B ); if( print ) Print( B, "Error in rotated bidiagonal" ); if( display ) Display( B, "Error in rotated bidiagonal" ); const Real infNormError = InfinityNorm( B ); const Real frobNormError = FrobeniusNorm( B ); if( g.Rank() == 0 ) { cout << " ||A||_oo = " << infNormAOrig << "\n" << " ||A||_F = " << frobNormAOrig << "\n" << " ||B - Q^H A P||_oo = " << infNormError << "\n" << " ||B - Q^H A P||_F = " << frobNormError << endl; } }
void TestCorrectness ( bool print, UpperOrLower uplo, const DistMatrix<R>& A, DistMatrix<R>& AOrig ) { const Grid& g = A.Grid(); const int m = AOrig.Height(); int subdiagonal = ( uplo==LOWER ? -1 : +1 ); if( g.Rank() == 0 ) cout << "Testing error..." << endl; // Grab the diagonal and subdiagonal of the symmetric tridiagonal matrix DistMatrix<R,MD,STAR> d(g); DistMatrix<R,MD,STAR> e(g); A.GetDiagonal( d ); A.GetDiagonal( e, subdiagonal ); // Grab a full copy of e so that we may fill the opposite subdiagonal // The unaligned [MD,STAR] <- [MD,STAR] redistribution is not yet written, // so go around it via [MD,STAR] <- [STAR,STAR] <- [MD,STAR] DistMatrix<R,STAR,STAR> e_STAR_STAR(g); DistMatrix<R,MD,STAR> eOpposite(g); e_STAR_STAR = e; eOpposite.AlignWithDiagonal( A, -subdiagonal ); eOpposite = e_STAR_STAR; // Zero B and then fill its tridiagonal DistMatrix<R> B(g); B.AlignWith( A ); Zeros( m, m, B ); B.SetDiagonal( d ); B.SetDiagonal( e, subdiagonal ); B.SetDiagonal( eOpposite, -subdiagonal ); // Reverse the accumulated Householder transforms, ignoring symmetry if( uplo == LOWER ) { ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, subdiagonal, A, B ); ApplyPackedReflectors ( RIGHT, LOWER, VERTICAL, BACKWARD, subdiagonal, A, B ); } else { ApplyPackedReflectors ( LEFT, UPPER, VERTICAL, FORWARD, subdiagonal, A, B ); ApplyPackedReflectors ( RIGHT, UPPER, VERTICAL, FORWARD, subdiagonal, A, B ); } // Compare the appropriate triangle of AOrig and B MakeTriangular( uplo, AOrig ); MakeTriangular( uplo, B ); Axpy( R(-1), AOrig, B ); const R infNormOfAOrig = HermitianNorm( uplo, AOrig, INFINITY_NORM ); const R frobNormOfAOrig = HermitianNorm( uplo, AOrig, FROBENIUS_NORM ); const R infNormOfError = HermitianNorm( uplo, B, INFINITY_NORM ); const R frobNormOfError = HermitianNorm( uplo, B, FROBENIUS_NORM ); if( g.Rank() == 0 ) { cout << " ||AOrig||_1 = ||AOrig||_oo = " << infNormOfAOrig << "\n" << " ||AOrig||_F = " << frobNormOfAOrig << "\n" << " ||A - Q^H T Q||_oo = " << infNormOfError << "\n" << " ||A - Q^H T Q||_F = " << frobNormOfError << endl; } }
inline void SimpleSVDUpper ( DistMatrix<double>& A, DistMatrix<double,VR,STAR>& s, DistMatrix<double>& V ) { #ifndef RELEASE PushCallStack("svd::SimpleSVDUpper"); #endif typedef double Real; typedef Complex<Real> C; const int m = A.Height(); const int n = A.Width(); const int k = std::min( m, n ); const int offdiagonal = ( m>=n ? 1 : -1 ); const char uplo = ( m>=n ? 'U' : 'L' ); const Grid& g = A.Grid(); // Bidiagonalize A Bidiag( A ); // Grab copies of the diagonal and sub/super-diagonal of A DistMatrix<Real,MD,STAR> d_MD_STAR( g ), e_MD_STAR( g ); A.GetDiagonal( d_MD_STAR ); A.GetDiagonal( e_MD_STAR, offdiagonal ); // In order to use serial QR kernels, we need the full bidiagonal matrix // on each process DistMatrix<Real,STAR,STAR> d_STAR_STAR( d_MD_STAR ), e_STAR_STAR( e_MD_STAR ); // Initialize U and V to the appropriate identity matrices. DistMatrix<Real,VC,STAR> U_VC_STAR( g ); DistMatrix<Real,VC,STAR> V_VC_STAR( g ); U_VC_STAR.AlignWith( A ); V_VC_STAR.AlignWith( V ); Identity( m, k, U_VC_STAR ); Identity( n, k, V_VC_STAR ); // Compute the SVD of the bidiagonal matrix and accumulate the Givens // rotations into our local portion of U and V // NOTE: This _only_ works in the case where m >= n const int numAccum = 32; const int maxNumIts = 30; const int bAlg = 512; std::vector<C> GBuffer( (k-1)*numAccum ), HBuffer( (k-1)*numAccum ); FLA_Bsvd_v_opd_var1 ( k, U_VC_STAR.LocalHeight(), V_VC_STAR.LocalHeight(), numAccum, maxNumIts, d_STAR_STAR.LocalBuffer(), 1, e_STAR_STAR.LocalBuffer(), 1, &GBuffer[0], 1, k-1, &HBuffer[0], 1, k-1, U_VC_STAR.LocalBuffer(), 1, U_VC_STAR.LocalLDim(), V_VC_STAR.LocalBuffer(), 1, V_VC_STAR.LocalLDim(), bAlg ); // Make a copy of A (for the Householder vectors) and pull the necessary // portions of U and V into a standard matrix dist. DistMatrix<Real> B( A ); if( m >= n ) { DistMatrix<Real> AT( g ), AB( g ); DistMatrix<Real,VC,STAR> UT_VC_STAR( g ), UB_VC_STAR( g ); PartitionDown( A, AT, AB, n ); PartitionDown( U_VC_STAR, UT_VC_STAR, UB_VC_STAR, n ); AT = UT_VC_STAR; MakeZeros( AB ); V = V_VC_STAR; } else { DistMatrix<Real> VT( g ), VB( g ); DistMatrix<Real,VC,STAR> VT_VC_STAR( g ), VB_VC_STAR( g ); PartitionDown( V, VT, VB, m ); PartitionDown ( V_VC_STAR, VT_VC_STAR, VB_VC_STAR, m ); VT = VT_VC_STAR; MakeZeros( VB ); } // Backtransform U and V if( m >= n ) { ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, 0, B, A ); ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, BACKWARD, 1, B, V ); } else { ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, -1, B, A ); ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, BACKWARD, 0, B, V ); } // Copy out the appropriate subset of the singular values s = d_STAR_STAR; #ifndef RELEASE PopCallStack(); #endif }
inline void SimpleSVDUpper ( DistMatrix<Real>& A, DistMatrix<Real,VR,STAR>& s, DistMatrix<Real>& V ) { #ifndef RELEASE PushCallStack("svd::SimpleSVDUpper"); #endif const int m = A.Height(); const int n = A.Width(); const int k = std::min( m, n ); const int offdiagonal = ( m>=n ? 1 : -1 ); const char uplo = ( m>=n ? 'U' : 'L' ); const Grid& g = A.Grid(); // Bidiagonalize A Bidiag( A ); // Grab copies of the diagonal and sub/super-diagonal of A DistMatrix<Real,MD,STAR> d_MD_STAR( g ), e_MD_STAR( g ); A.GetDiagonal( d_MD_STAR ); A.GetDiagonal( e_MD_STAR, offdiagonal ); // In order to use serial QR kernels, we need the full bidiagonal matrix // on each process. // // NOTE: lapack::BidiagQRAlg expects e to be of length k DistMatrix<Real,STAR,STAR> d_STAR_STAR( d_MD_STAR ); DistMatrix<Real,STAR,STAR> eHat_STAR_STAR( k, 1, g ); DistMatrix<Real,STAR,STAR> e_STAR_STAR( g ); e_STAR_STAR.View( eHat_STAR_STAR, 0, 0, k-1, 1 ); e_STAR_STAR = e_MD_STAR; // Initialize U and VTrans to the appropriate identity matrices. DistMatrix<Real,VC,STAR> U_VC_STAR( g ); DistMatrix<Real,STAR,VC> VTrans_STAR_VC( g ); U_VC_STAR.AlignWith( A ); VTrans_STAR_VC.AlignWith( V ); Identity( m, k, U_VC_STAR ); Identity( k, n, VTrans_STAR_VC ); // Compute the SVD of the bidiagonal matrix and accumulate the Givens // rotations into our local portion of U and VTrans Matrix<Real>& ULocal = U_VC_STAR.LocalMatrix(); Matrix<Real>& VTransLocal = VTrans_STAR_VC.LocalMatrix(); lapack::BidiagQRAlg ( uplo, k, VTransLocal.Width(), ULocal.Height(), d_STAR_STAR.LocalBuffer(), e_STAR_STAR.LocalBuffer(), VTransLocal.Buffer(), VTransLocal.LDim(), ULocal.Buffer(), ULocal.LDim() ); // Make a copy of A (for the Householder vectors) and pull the necessary // portions of U and VTrans into a standard matrix dist. DistMatrix<Real> B( A ); if( m >= n ) { DistMatrix<Real> AT( g ), AB( g ); DistMatrix<Real,VC,STAR> UT_VC_STAR( g ), UB_VC_STAR( g ); PartitionDown( A, AT, AB, n ); PartitionDown( U_VC_STAR, UT_VC_STAR, UB_VC_STAR, n ); AT = UT_VC_STAR; MakeZeros( AB ); Transpose( VTrans_STAR_VC, V ); } else { DistMatrix<Real> VT( g ), VB( g ); DistMatrix<Real,STAR,VC> VTransL_STAR_VC( g ), VTransR_STAR_VC( g ); PartitionDown( V, VT, VB, m ); PartitionRight( VTrans_STAR_VC, VTransL_STAR_VC, VTransR_STAR_VC, m ); Transpose( VTransL_STAR_VC, VT ); MakeZeros( VB ); } // Backtransform U and V if( m >= n ) { ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, 0, B, A ); ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, BACKWARD, 1, B, V ); } else { ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, -1, B, A ); ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, BACKWARD, 0, B, V ); } // Copy out the appropriate subset of the singular values s = d_STAR_STAR; #ifndef RELEASE PopCallStack(); #endif }