inline void Zero( Matrix<T>& A ) { #ifndef RELEASE PushCallStack("Zero"); #endif const int height = A.Height(); const int width = A.Width(); #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=0; j<width; ++j ) MemZero( A.Buffer(0,j), height ); #ifndef RELEASE PopCallStack(); #endif }
inline void Pseudoinverse( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("Pseudoinverse"); #endif typedef typename Base<F>::type R; const Grid& g = A.Grid(); const int m = A.Height(); const int n = A.Width(); const int k = std::max(m,n); // Get the SVD of A DistMatrix<R,VR,STAR> s(g); DistMatrix<F> U(g), V(g); U = A; SVD( U, s, V ); // Compute the two-norm of A as the maximum singular value const R twoNorm = Norm( s, INFINITY_NORM ); // Set the tolerance equal to k ||A||_2 eps and invert above tolerance const R eps = lapack::MachineEpsilon<R>(); const R tolerance = k*twoNorm*eps; const int numLocalVals = s.LocalHeight(); for( int iLocal=0; iLocal<numLocalVals; ++iLocal ) { const R sigma = s.GetLocal(iLocal,0); if( sigma < tolerance ) s.SetLocal(iLocal,0,0); else s.SetLocal(iLocal,0,1/sigma); } // Scale U with the singular values, U := U Sigma DiagonalScale( RIGHT, NORMAL, s, U ); // Form pinvA = (U Sigma V^H)^H = V (U Sigma)^H Zeros( n, m, A ); Gemm( NORMAL, ADJOINT, F(1), V, U, F(0), A ); #ifndef RELEASE PopCallStack(); #endif }
inline void Walsh( int k, Matrix<T>& A, bool binary ) { #ifndef RELEASE PushCallStack("Walsh"); #endif if( k < 1 ) throw std::logic_error("Walsh matrices are only defined for k>=1"); const unsigned n = 1u<<k; A.ResizeTo( n, n ); // Run a simple O(n^2 log n) algorithm for computing the entries // based upon successive sign flips const T onValue = 1; const T offValue = ( binary ? 0 : -1 ); for( unsigned j=0; j<n; ++j ) { for( unsigned i=0; i<n; ++i ) { // Recurse on the quadtree, flipping the sign of the entry each // time we are in the bottom-right quadrant unsigned r = i; unsigned s = j; unsigned t = n; bool on = true; while( t != 1u ) { t >>= 1; if( r >= t && s >= t ) on = !on; r %= t; s %= t; } if( on ) A.Set( i, j, onValue ); else A.Set( i, j, offValue ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void LQ( Matrix<Real>& A ) { #ifndef RELEASE PushCallStack("LQ"); #endif if( IsComplex<Real>::val ) throw std::logic_error("Called real routine with complex datatype"); // Matrix views Matrix<Real> ATL, ATR, A00, A01, A02, ATopPan, ABottomPan, ABL, ABR, A10, A11, A12, A20, A21, A22; PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); View1x2( ATopPan, A11, A12 ); View1x2( ABottomPan, A21, A22 ); //--------------------------------------------------------------------// internal::PanelLQ( ATopPan ); ApplyPackedReflectors ( RIGHT, UPPER, HORIZONTAL, FORWARD, 0, ATopPan, ABottomPan ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HPSDSquareRoot( UpperOrLower uplo, DistMatrix<R,MC,MR>& A ) { #ifndef RELEASE PushCallStack("HPSDSquareRoot"); #endif // Get the EVD of A const Grid& g = A.Grid(); DistMatrix<R,VR,STAR> w(g); DistMatrix<R,MC,MR> Z(g); HermitianEig( uplo, A, w, Z ); // Compute the two-norm of A as the maximum absolute value // of its eigenvalues R maxLocalAbsEig = 0; const int localHeight = w.LocalHeight(); for( int iLocal=0; iLocal<localHeight; ++iLocal ) maxLocalAbsEig = std::max(maxLocalAbsEig,Abs(w.GetLocalEntry(iLocal,0))); R twoNorm; mpi::AllReduce( &maxLocalAbsEig, &twoNorm, 1, mpi::MAX, g.VCComm() ); // Compute the smallest eigenvalue of A R minLocalEig = twoNorm; for( int iLocal=0; iLocal<localHeight; ++iLocal ) minLocalEig = std::min(minLocalEig,w.GetLocalEntry(iLocal,0)); R minEig; mpi::AllReduce( &minLocalEig, &minEig, 1, mpi::MIN, g.VCComm() ); // Set the tolerance equal to n ||A||_2 eps const int n = A.Height(); const R eps = lapack::MachineEpsilon<R>(); const R tolerance = n*twoNorm*eps; // Ensure that the minimum eigenvalue is not less than - n ||A||_2 eps if( minEig < -tolerance ) throw NonHPSDMatrixException(); // Form the pseudoinverse square_root::Functor<R> f( tolerance ); hermitian_function::ReformHermitianMatrix( uplo, A, w, Z, f ); #ifndef RELEASE PopCallStack(); #endif }
inline typename Base<F>::type LogDetDivergence( UpperOrLower uplo, const Matrix<F>& A, const Matrix<F>& B ) { #ifndef RELEASE PushCallStack("LogDetDivergence"); #endif if( A.Height() != A.Width() || B.Height() != B.Width() || A.Height() != B.Height() ) throw std::logic_error ("A and B must be square matrices of the same size"); typedef typename Base<F>::type R; const int n = A.Height(); Matrix<F> ACopy( A ); Matrix<F> BCopy( B ); Cholesky( uplo, ACopy ); Cholesky( uplo, BCopy ); if( uplo == LOWER ) { Trtrsm( LEFT, uplo, NORMAL, NON_UNIT, F(1), BCopy, ACopy ); } else { MakeTrapezoidal( LEFT, uplo, 0, ACopy ); Trsm( LEFT, uplo, NORMAL, NON_UNIT, F(1), BCopy, ACopy ); } MakeTrapezoidal( LEFT, uplo, 0, ACopy ); const R frobNorm = Norm( ACopy, FROBENIUS_NORM ); Matrix<F> d; ACopy.GetDiagonal( d ); R logDet(0); for( int i=0; i<n; ++i ) logDet += 2*Log( RealPart(d.Get(i,0)) ); const R logDetDiv = frobNorm*frobNorm - logDet - R(n); #ifndef RELEASE PopCallStack(); #endif return logDetDiv; }
inline void TrrkNNKernel ( UpperOrLower uplo, T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C ) { #ifndef RELEASE PushCallStack("TrrkNNKernel"); CheckInputNN( A, B, C ); #endif Matrix<T> AT, AB; Matrix<T> BL, BR; Matrix<T> CTL, CTR, CBL, CBR; Matrix<T> DTL, DBR; const int half = C.Height()/2; Scale( beta, C ); LockedPartitionDown ( A, AT, AB, half ); LockedPartitionRight( B, BL, BR, half ); PartitionDownDiagonal ( C, CTL, CTR, CBL, CBR, half ); DTL.ResizeTo( CTL.Height(), CTL.Width() ); DBR.ResizeTo( CBR.Height(), CBR.Width() ); //------------------------------------------------------------------------// if( uplo == LOWER ) Gemm( NORMAL, NORMAL, alpha, AB, BL, T(1), CBL ); else Gemm( NORMAL, NORMAL, alpha, AT, BR, T(1), CTR ); Gemm( NORMAL, NORMAL, alpha, AT, BL, T(0), DTL ); AxpyTriangle( uplo, T(1), DTL, CTL ); Gemm( NORMAL, NORMAL, alpha, AB, BR, T(0), DBR ); AxpyTriangle( uplo, T(1), DBR, CBR ); //------------------------------------------------------------------------// #ifndef RELEASE PopCallStack(); #endif }
inline void Hankel( int m, int n, const std::vector<T>& a, Matrix<T>& A ) { #ifndef RELEASE PushCallStack("Hankel"); #endif const int length = m+n-1; if( a.size() != (unsigned)length ) throw std::logic_error("a was the wrong size"); A.ResizeTo( m, n ); for( int j=0; j<n; ++j ) for( int i=0; i<n; ++i ) A.Set( i, j, a[i+j] ); #ifndef RELEASE PopCallStack(); #endif }
inline void internal::TrmmLUN ( UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC,MR>& U, DistMatrix<T,MC,MR>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLUN"); #endif // TODO: Come up with a better routing mechanism if( U.Height() > 5*X.Width() ) internal::TrmmLUNA( diag, alpha, U, X ); else internal::TrmmLUNC( diag, alpha, U, X ); #ifndef RELEASE PopCallStack(); #endif }
inline typename Base<F>::type SymmetricNorm( UpperOrLower uplo, const DistMatrix<F>& A, NormType type ) { #ifndef RELEASE PushCallStack("SymmetricNorm"); #endif typename Base<F>::type norm = 0; if( type == NUCLEAR_NORM ) norm = internal::SymmetricNuclearNorm( uplo, A ); else if( type == TWO_NORM ) norm = internal::SymmetricTwoNorm( uplo, A ); else norm = HermitianNorm( uplo, A ); #ifndef RELEASE PopCallStack(); #endif return norm; }
inline typename Base<F>::type EntrywiseOneNorm( const Matrix<F>& A ) { #ifndef RELEASE PushCallStack("EntrywiseOneNorm"); #endif typedef typename Base<F>::type R; R norm = 0; const int width = A.Width(); const int height = A.Height(); for( int j=0; j<width; ++j ) for( int i=0; i<height; ++i ) norm += Abs(A.Get(i,j)); #ifndef RELEASE PopCallStack(); #endif return norm; }
inline void DistMatrix<T,MD,STAR,Int>::ResizeTo( Int height, Int width ) { #ifndef RELEASE PushCallStack("[MD,* ]::ResizeTo"); this->AssertNotLockedView(); if( height < 0 || width < 0 ) throw std::logic_error("Height and width must be non-negative"); #endif this->height_ = height; this->width_ = width; if( this->Participating() ) this->localMatrix_.ResizeTo ( LocalLength(height,this->ColShift(),this->Grid().LCM()), width ); #ifndef RELEASE PopCallStack(); #endif }
inline void SymmLL ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLL"); #endif // TODO: Come up with a better routing mechanism if( A.Height() > 5*B.Width() ) SymmLLA( alpha, A, B, beta, C ); else SymmLLC( alpha, A, B, beta, C ); #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmRLN ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmRLN"); #endif // TODO: Come up with a better routing mechanism if( L.Height() > 5*X.Height() ) TrmmRLNA( diag, alpha, L, X ); else TrmmRLNC( diag, alpha, L, X ); #ifndef RELEASE PopCallStack(); #endif }
inline void SymmRU ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE PushCallStack("internal::SymmRU"); #endif // TODO: Come up with a better routing mechanism if( A.Height() > 5*B.Height() ) SymmRUA( alpha, A, B, beta, C, conjugate ); else SymmRUC( alpha, A, B, beta, C, conjugate ); #ifndef RELEASE PopCallStack(); #endif }
inline void Gemv ( Orientation orientation, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& x, T beta, DistMatrix<T>& y ) { #ifndef RELEASE PushCallStack("Gemv"); #endif if( orientation == NORMAL ) internal::GemvN( alpha, A, x, beta, y ); else internal::GemvT( orientation, alpha, A, x, beta, y ); #ifndef RELEASE PopCallStack(); #endif }
inline void ExpandPackedReflectors ( UpperOrLower uplo, VerticalOrHorizontal dir, Conjugation conjugation, int offset, DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t ) { #ifndef RELEASE PushCallStack("ExpandPackedReflectors"); #endif if( uplo == LOWER && dir == VERTICAL ) internal::ExpandPackedReflectorsLV( conjugation, offset, H, t ); else throw std::logic_error("This option is not yet supported"); #ifndef RELEASE PopCallStack(); #endif }
inline void HemmLU ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HemmLU"); #endif // TODO: Come up with a better routing mechanism if( A.Height() > 5*B.Width() ) HemmLUA( alpha, A, B, beta, C ); else HemmLUC( alpha, A, B, beta, C ); #ifndef RELEASE PopCallStack(); #endif }
inline void LockedPartitionDown ( const DM& A, DM& AT, DM& AB, Int heightAT ) { #ifndef RELEASE PushCallStack("LockedPartitionDown [DistMatrix]"); if( heightAT < 0 ) throw std::logic_error("Height of top partition must be non-negative"); #endif heightAT = std::min(heightAT,A.Height()); const Int heightAB = A.Height()-heightAT; LockedView( AT, A, 0, 0, heightAT, A.Width() ); LockedView( AB, A, heightAT, 0, heightAB, A.Width() ); #ifndef RELEASE PopCallStack(); #endif }
inline void ExpandPackedReflectors ( UpperOrLower uplo, VerticalOrHorizontal dir, Conjugation conjugation, int offset, DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,STAR,STAR>& t ) { #ifndef RELEASE PushCallStack("ExpandPackedReflectors"); #endif DistMatrix<Complex<R>,MD,STAR> tDiag(H.Grid()); tDiag.AlignWithDiagonal( H, offset ); tDiag = t; ExpandPackedReflectors( uplo, dir, conjugation, offset, H, tDiag ); #ifndef RELEASE PopCallStack(); #endif }
inline void internal::ApplyPackedReflectorsLUHB ( int offset, const DistMatrix<R,MC,MR>& H, DistMatrix<R,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset < 0 ) throw std::logic_error("Transforms cannot extend below matrix"); #endif throw std::logic_error("This routine is not yet implemented"); #ifndef RELEASE PopCallStack(); #endif }
inline void SortEig( DistMatrix<R,VR,STAR>& w, DistMatrix<Complex<R> >& Z ) { #ifndef RELEASE PushCallStack("SortEig"); #endif const int n = Z.Height(); const int k = Z.Width(); const Grid& g = Z.Grid(); DistMatrix<Complex<R>,VC,STAR> Z_VC_STAR( Z ); DistMatrix<R,STAR,STAR> w_STAR_STAR( w ); // Initialize the pairs of indices and eigenvalues std::vector<internal::IndexValuePair<R> > pairs( k ); for( int i=0; i<k; ++i ) { pairs[i].index = i; pairs[i].value = w_STAR_STAR.GetLocal(i,0); } // Sort the eigenvalues and simultaneously form the permutation std::sort ( pairs.begin(), pairs.end(), internal::IndexValuePair<R>::Compare ); // Locally reorder the eigenvectors and eigenvalues using the new ordering const int mLocal = Z_VC_STAR.LocalHeight(); DistMatrix<Complex<R>,VC,STAR> ZPerm_VC_STAR( n, k, g ); for( int j=0; j<k; ++j ) { const int source = pairs[j].index; MemCopy ( ZPerm_VC_STAR.LocalBuffer(0,j), Z_VC_STAR.LockedLocalBuffer(0,source), mLocal ); w_STAR_STAR.SetLocal(j,0,pairs[j].value); } Z_VC_STAR.Empty(); Z = ZPerm_VC_STAR; w = w_STAR_STAR; #ifndef RELEASE PopCallStack(); #endif }
inline SafeProduct<F> SafeHPDDeterminantWithOverwrite( UpperOrLower uplo, DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::SafeHPDDeterminantWithOverwrite"); #endif if( A.Height() != A.Width() ) throw std::logic_error ("Cannot compute determinant of nonsquare matrix"); typedef typename Base<F>::type R; const int n = A.Height(); const R scale = R(n)/R(2); SafeProduct<F> det( n ); const Grid& g = A.Grid(); try { Cholesky( uplo, A ); DistMatrix<F,MD,STAR> d(g); A.GetDiagonal( d ); R localKappa = 0; if( d.InDiagonal() ) { const int nLocalDiag = d.LocalHeight(); for( int iLocal=0; iLocal<nLocalDiag; ++iLocal ) { const R delta = RealPart(d.GetLocal(iLocal,0)); localKappa += Log(delta)/scale; } } mpi::AllReduce( &localKappa, &det.kappa, 1, mpi::SUM, g.VCComm() ); det.rho = F(1); } catch( NonHPDMatrixException& e ) { det.rho = 0; det.kappa = 0; } #ifndef RELEASE PopCallStack(); #endif return det; }
inline void CholeskyLVar2( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyLVar2"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); //--------------------------------------------------------------------// Herk( LOWER, NORMAL, F(-1), A10, F(1), A11 ); CholeskyLVar3Unb( A11 ); Gemm( NORMAL, ADJOINT, F(-1), A20, A10, F(1), A21 ); Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline T Dotu( const Matrix<T>& x, const Matrix<T>& y ) { #ifndef RELEASE PushCallStack("Dotu"); if( (x.Height() != 1 && x.Width() != 1) || (y.Height() != 1 && y.Width() != 1) ) throw std::logic_error("Expected vector inputs"); int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); int yLength = ( y.Width() == 1 ? y.Height() : y.Width() ); if( xLength != yLength ) throw std::logic_error("x and y must be the same length"); #endif T dotProduct; if( x.Width() == 1 && y.Width() == 1 ) { dotProduct = blas::Dotu ( x.Height(), x.LockedBuffer(), 1, y.LockedBuffer(), 1 ); } else if( x.Width() == 1 ) { dotProduct = blas::Dotu ( x.Height(), x.LockedBuffer(), 1, y.LockedBuffer(), y.LDim() ); } else if( y.Width() == 1 ) { dotProduct = blas::Dotu ( x.Width(), x.LockedBuffer(), x.LDim(), y.LockedBuffer(), 1 ); } else { dotProduct = blas::Dotu ( x.Width(), x.LockedBuffer(), x.LDim(), y.LockedBuffer(), y.LDim() ); } #ifndef RELEASE PopCallStack(); #endif return dotProduct; }
inline void SortEig( DistMatrix<R,VR,STAR>& w ) { #ifndef RELEASE PushCallStack("SortEig"); #endif const int k = w.Height(); // Gather a full copy of w on each process and locally sort DistMatrix<R,STAR,STAR> w_STAR_STAR( w ); R* wBuffer = w_STAR_STAR.LocalBuffer(); std::sort( &wBuffer[0], &wBuffer[k] ); // Refill the distributed w with the sorted values w = w_STAR_STAR; #ifndef RELEASE PopCallStack(); #endif }
inline F Trace( const Matrix<F>& A ) { #ifndef RELEASE PushCallStack("Trace"); #endif if( A.Height() != A.Width() ) throw std::logic_error("Cannot compute trace of nonsquare matrix"); Matrix<F> d; A.GetDiagonal( d ); F trace = 0; const int n = A.Height(); for( int i=0; i<n; ++i ) trace += d.Get(i,0); #ifndef RELEASE PopCallStack(); #endif return trace; }
inline void Trr2kTTTN ( UpperOrLower uplo, Orientation orientationOfA, Orientation orientationOfB, Orientation orientationOfC, T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, const DistMatrix<T,MC,MR>& C, const DistMatrix<T,MC,MR>& D, T beta, DistMatrix<T,MC,MR>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kTTTN"); #endif Trr2kTNTT ( uplo, orientationOfC, orientationOfA, orientationOfB, alpha, C, D, A, B, beta, E ); #ifndef RELEASE PopCallStack(); #endif }
inline void LockedPartitionUp ( const M& A, M& AT, M& AB, Int heightAB ) { #ifndef RELEASE PushCallStack("LockedPartitionUp [Matrix]"); if( heightAB < 0 ) throw std::logic_error ("Height of bottom partition must be non-negative"); #endif heightAB = std::min(heightAB,A.Height()); const Int heightAT = A.Height()-heightAB; LockedView( AT, A, 0, 0, heightAT, A.Width() ); LockedView( AB, A, heightAT, 0, heightAB, A.Width() ); #ifndef RELEASE PopCallStack(); #endif }
inline void TriangularInverseUVar3( UnitOrNonUnit diag, Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TriangularInverseUVar3"); if( U.Height() != U.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif // Matrix views Matrix<F> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; // Start the algorithm PartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UBR.Height() < U.Height() ) { RepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); //--------------------------------------------------------------------// Trsm( RIGHT, UPPER, NORMAL, diag, F(-1), U11, U01 ); Gemm( NORMAL, NORMAL, F(1), U01, U12, F(1), U02 ); Trsm( LEFT, UPPER, NORMAL, diag, F(1), U11, U12 ); TriangularInverseUVar3Unb( diag, U11 ); //--------------------------------------------------------------------// SlidePartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); } #ifndef RELEASE PopCallStack(); #endif }