void matrix::svd(matrix& U, diagMatrix& S, matrix& Vdag) const { static StopWatch watch("matrix::svd"); watch.start(); //Initialize input and outputs: matrix A = *this; //destructible copy int M = A.nRows(); int N = A.nCols(); U.init(M,M); Vdag.init(N,N); S.resize(std::min(M,N)); //Initialize temporaries: char jobz = 'A'; //full SVD (return complete unitary matrices) int lwork = 2*(M*N + M + N); std::vector<complex> work(lwork); std::vector<double> rwork(S.nRows() * std::max(5*S.nRows()+7, 2*(M+N)+1)); std::vector<int> iwork(8*S.nRows()); //Call LAPACK and check errors: int info=0; zgesdd_(&jobz, &M, &N, A.data(), &M, S.data(), U.data(), &M, Vdag.data(), &N, work.data(), &lwork, rwork.data(), iwork.data(), &info); if(info>0) //convergence failure; try the slower stabler version { int info=0; matrix A = *this; //destructible copy zgesvd_(&jobz, &jobz, &M, &N, A.data(), &M, S.data(), U.data(), &M, Vdag.data(), &N, work.data(), &lwork, rwork.data(), &info); if(info<0) { logPrintf("Argument# %d to LAPACK SVD routine ZGESVD is invalid.\n", -info); stackTraceExit(1); } if(info>0) { logPrintf("Error code %d in LAPACK SVD routine ZGESVD.\n", info); stackTraceExit(1); } } if(info<0) { logPrintf("Argument# %d to LAPACK SVD routine ZGESDD is invalid.\n", -info); stackTraceExit(1); } watch.stop(); }
bool schur(const cmat &A, cmat &U, cmat &T) { it_assert_debug(A.rows() == A.cols(), "schur(): Matrix is not square"); char jobvs = 'V'; char sort = 'N'; int info; int n = A.rows(); int lda = n; int ldvs = n; int lwork = 2 * n; // This may be choosen better! int sdim = 0; vec rwork(n); cvec w(n); cvec work(lwork); T.set_size(lda, n, false); U.set_size(ldvs, n, false); T = A; // The routine overwrites input matrix with eigenvectors zgees_(&jobvs, &sort, 0, &n, T._data(), &lda, &sdim, w._data(), U._data(), &ldvs, work._data(), &lwork, rwork._data(), 0, &info); return (info == 0); }
void operator() (char const jobz, char const uplo, int const n, int const kd, T* ab, int const ldab, R* w, T* z, int const ldz, optimal_workspace , int& info ) const { traits::detail::array<T> work( n ); traits::detail::array<R> rwork( 3*n-2 ); hbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, traits::vector_storage( work ), traits::vector_storage( rwork ), info ); }
inline int operator() (char jobvs, MatrA& a, EigVal& w, SchVec& vs, minimal_workspace ) const { typedef typename MatrA::value_type value_type ; typedef typename traits::type_traits< value_type >::real_type real_type ; int n = traits::matrix_size1( a ); traits::detail::array<value_type> work( 2*n ); traits::detail::array<real_type> rwork( n ); return gees( jobvs, a, w, vs, work, rwork ); } // gees()
bool qr(const cmat &A, cmat &Q, cmat &R, bmat &P) { int info; int m = A.rows(); int n = A.cols(); int lwork = n; int k = std::min(m, n); cvec tau(k); cvec work(lwork); vec rwork(std::max(1, 2*n)); ivec jpvt(n); jpvt.zeros(); R = A; // perform workspace query for optimum lwork value int lwork_tmp = -1; zgeqp3_(&m, &n, R._data(), &m, jpvt._data(), tau._data(), work._data(), &lwork_tmp, rwork._data(), &info); if (info == 0) { lwork = static_cast<int>(real(work(0))); work.set_size(lwork, false); } zgeqp3_(&m, &n, R._data(), &m, jpvt._data(), tau._data(), work._data(), &lwork, rwork._data(), &info); Q = R; Q.set_size(m, m, true); // construct permutation matrix P = zeros_b(n, n); for (int j = 0; j < n; j++) P(jpvt(j) - 1, j) = 1; // construct R for (int i = 0; i < m; i++) for (int j = 0; j < std::min(i, n); j++) R(i, j) = 0; // perform workspace query for optimum lwork value lwork_tmp = -1; zungqr_(&m, &m, &k, Q._data(), &m, tau._data(), work._data(), &lwork_tmp, &info); if (info == 0) { lwork = static_cast<int>(real(work(0))); work.set_size(lwork, false); } zungqr_(&m, &m, &k, Q._data(), &m, tau._data(), work._data(), &lwork, &info); return (info == 0); }
// complex double void PivotedQRWrapper(int m, int n, std::complex<double> *A, int lda, std::vector<int>& jpvt, std::vector< std::complex<double> >& tau) { #ifndef RELEASE CallStackEntry entry("PivotedQRWrapper"); #endif int lwork = (n + 1) * BLOCKSIZE; std::vector< std::complex<double> > work(lwork); std::vector<double> rwork(lapack::PivotedQRRealWorkSize(n)); lapack::PivotedQR(m, n, A, lda, &jpvt[0], &tau[0], &work[0], lwork, &rwork[0]); for( size_t i=0; i<jpvt.size(); ++i ) jpvt[i]--; }
BOOST_FORCEINLINE result_type operator()(A0 const& a0, A1 const& a1, A2 const& a2) const { result_type rcond; nt2_la_int n = nt2::height(a0); nt2_la_int ld = n; nt2_la_int info; nt2::memory::container<tag::table_, v_t, nt2::_2D> work(nt2::of_size(2*n,1)); nt2::memory::container<tag::table_, result_type, nt2::_2D> rwork(nt2::of_size(2*n,1)); NT2_F77NAME(zgecon) ( &a1, &n, a0.raw(), &ld, &a2, &rcond , work.raw() , rwork.raw(), &info ); return rcond; }
void MultiplyAdjoint ( int maxRank, std::complex<Real> alpha, const Dense<std::complex<Real> >& A, const Dense<std::complex<Real> >& B, LowRank<std::complex<Real> >& C ) { #ifndef RELEASE CallStackEntry entry("hmat_tools::MultiplyAdjoint (F := D D^H)"); #endif typedef std::complex<Real> Scalar; const int m = A.Height(); const int n = B.Height(); const int minDim = std::min( m, n ); const int r = std::min( minDim, maxRank ); // C.U := alpha A B^H MultiplyAdjoint( alpha, A, B, C.U ); // Get the economic SVD of C.U, C.U = U Sigma V^H, overwriting C.U with U. Vector<Real> s( minDim ); Dense<Scalar> VH( minDim, n ); const int lwork = lapack::SVDWorkSize( m, n ); std::vector<Scalar> work( lwork ); std::vector<Real> rwork( 5*minDim ); lapack::SVD ( 'O', 'S', m, n, C.U.Buffer(), C.U.LDim(), s.Buffer(), 0, 1, VH.Buffer(), VH.LDim(), &work[0], lwork, &rwork[0] ); // Truncate the SVD in-place C.U.Resize( m, r ); s.Resize( r ); VH.Resize( r, n ); C.V.SetType( GENERAL ); C.V.Resize( n, r ); // Put (Sigma V^H)^T = (V^H)^T Sigma into C.V const int VHLDim = VH.LDim(); for( int j=0; j<r; ++j ) { const Real sigma = s.Get(j); Scalar* RESTRICT VCol = C.V.Buffer(0,j); const Scalar* RESTRICT VHRow = VH.LockedBuffer(j,0); for( int i=0; i<n; ++i ) VCol[i] = sigma*VHRow[i*VHLDim]; } }
void DivideAndConquerSVD ( int m, int n, dcomplex* A, int lda, double* s, dcomplex* U, int ldu, dcomplex* VAdj, int ldva ) { #ifndef RELEASE PushCallStack("lapack::DivideAndConquerSVD"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobz='S'; int lwork=-1, info; dcomplex dummyWork; const int k = std::min(m,n); const int K = std::max(m,n); const int lrwork = k*std::max(5*k+7,2*K+2*k+1); std::vector<double> rwork(lrwork); std::vector<int> iwork(8*k); LAPACK(zgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &dummyWork, &lwork, &rwork[0], &iwork[0], &info ); lwork = dummyWork.real; std::vector<dcomplex> work(lwork); LAPACK(zgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &work[0], &lwork, &rwork[0], &iwork[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("zgesdd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
void matrix::diagonalize(matrix& evecs, diagMatrix& eigs) const { static StopWatch watch("matrix::diagonalize"); watch.start(); myassert(nCols()==nRows()); int N = nRows(); myassert(N > 0); //Check hermiticity const complex* thisData = data(); double errNum=0.0, errDen=0.0; for(int i=0; i<N; i++) for(int j=0; j<N; j++) { errNum += norm(thisData[index(i,j)]-thisData[index(j,i)].conj()); errDen += norm(thisData[index(i,j)]); } double hermErr = sqrt(errNum / (errDen*N)); if(hermErr > 1e-10) { logPrintf("Relative hermiticity error of %le (>1e-10) encountered in diagonalize\n", hermErr); stackTraceExit(1); } char jobz = 'V'; //compute eigenvectors and eigenvalues char range = 'A'; //compute all eigenvalues char uplo = 'U'; //use upper-triangular part matrix A = *this; //copy input matrix (zheevr destroys input matrix) double eigMin = 0., eigMax = 0.; //eigenvalue range (not used for range-type 'A') int indexMin = 0, indexMax = 0; //eignevalue index range (not used for range-type 'A') double absTol = 0.; int nEigsFound; eigs.resize(N); evecs.init(N, N); std::vector<int> iSuppz(2*N); int lwork = (64+1)*N; std::vector<complex> work(lwork); //Magic number 64 obtained by running ILAENV as suggested in doc of zheevr (and taking the max over all N) int lrwork = 24*N; std::vector<double> rwork(lrwork); //from doc of zheevr int liwork = 10*N; std::vector<int> iwork(liwork); //from doc of zheevr int info=0; zheevr_(&jobz, &range, &uplo, &N, A.data(), &N, &eigMin, &eigMax, &indexMin, &indexMax, &absTol, &nEigsFound, eigs.data(), evecs.data(), &N, iSuppz.data(), work.data(), &lwork, rwork.data(), &lrwork, iwork.data(), &liwork, &info); if(info<0) { logPrintf("Argument# %d to LAPACK eigenvalue routine ZHEEVR is invalid.\n", -info); stackTraceExit(1); } if(info>0) { logPrintf("Error code %d in LAPACK eigenvalue routine ZHEEVR.\n", info); stackTraceExit(1); } watch.stop(); }
void QRSVD ( int m, int n, scomplex* A, int lda, float* s, scomplex* U, int ldu, scomplex* VAdj, int ldva ) { #ifndef RELEASE PushCallStack("lapack::QRSVD"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobu='S', jobva='S'; int lwork=-1, info; const int k = std::min(m,n); std::vector<float> rwork(5*k); scomplex dummyWork; LAPACK(cgesvd) ( &jobu, &jobva, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &dummyWork, &lwork, &rwork[0], &info ); lwork = dummyWork.real; std::vector<scomplex> work(lwork); LAPACK(cgesvd) ( &jobu, &jobva, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &work[0], &lwork, &rwork[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("cgesvd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
void SingularValues( int m, int n, dcomplex* A, int lda, double* s ) { #ifndef RELEASE PushCallStack("lapack::SingularValues"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobu='N', jobva='N'; int fakeLDim=1, lwork=-1, info; dcomplex dummyWork; const int k = std::min(m,n); std::vector<double> rwork(5*k); LAPACK(zgesvd) ( &jobu, &jobva, &m, &n, A, &lda, s, 0, &fakeLDim, 0, &fakeLDim, &dummyWork, &lwork, &rwork[0], &info ); lwork = dummyWork.real; std::vector<dcomplex> work(lwork); LAPACK(zgesvd) ( &jobu, &jobva, &m, &n, A, &lda, s, 0, &fakeLDim, 0, &fakeLDim, &work[0], &lwork, &rwork[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("zgesvd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
// Condition estimate: // Return ratio of largest/smallest singular values. //--------------------------------------------------------- double cond(ZMat& mat) //--------------------------------------------------------- { double rcond = 1.; ZVec work(2*mat.num_cols(), "work"); DVec rwork(2*mat.num_cols(), "rwork"); int info; ZMat matcopy(mat); IVec ipiv(mat.num_rows(), "ipiv"); ZGETRF(mat.num_rows(), mat.num_cols(), matcopy.data(), mat.num_rows(), ipiv.data(), info); // fprintf(stdout, "zgetrf: info=%d \n", info); // must fix this double anorm = 1.0; ZGECON('I', mat.num_cols(), matcopy.data(), mat.num_rows(), anorm, rcond, work.data(), rwork.data(), info); // fprintf(stdout, "zgecon: info=%d rcond=%lf\n", info, rcond); return 1./rcond; }
void matrix::diagonalize(matrix& levecs, std::vector<complex>& eigs, matrix& revecs) const { static StopWatch watch("matrix::diagonalizeNH"); watch.start(); //Prepare inputs and outputs: matrix A = *this; //destructible copy int N = A.nRows(); myassert(N > 0); myassert(A.nCols()==N); eigs.resize(N); levecs.init(N, N); revecs.init(N, N); //Prepare temporaries: char jobz = 'V'; //compute eigenvectors and eigenvalues int lwork = (64+1)*N; std::vector<complex> work(lwork); //Magic number 64 obtained by running ILAENV as suggested in doc of zheevr (and taking the max over all N) std::vector<double> rwork(2*N); //Call LAPACK and check errors: int info=0; zgeev_(&jobz, &jobz, &N, A.data(), &N, eigs.data(), levecs.data(), &N, revecs.data(), &N, work.data(), &lwork, rwork.data(), &info); if(info<0) { logPrintf("Argument# %d to LAPACK eigenvalue routine ZGEEV is invalid.\n", -info); stackTraceExit(1); } if(info>0) { logPrintf("Error code %d in LAPACK eigenvalue routine ZGEEV.\n", info); stackTraceExit(1); } watch.stop(); }
main(int argc, char **argv ) { /* This is the Hello World program for CPSC424/524. Author: Andrew Sherman, Yale University Date: 1/23/2017 Credits: This program is based on a program provided by Barry Wilkinson (UNCC), which had a similar communication pattern, but did not include any simulated work. */ char message[100]; int i,rank, size, type=99; int worktime, sparm, rwork(int,int); double wct0, wct1, total_time; MPI_Status status; MPI_Init(&argc,&argv); // Required MPI initialization call MPI_Comm_size(MPI_COMM_WORLD,&size); // Get no. of processes MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Which process am I? /* If I am the master (rank 0) ... */ if (rank == 0) { sparm = rwork(0,0); //initialize the workers' work times /* Create the message using sprintf */ sprintf(message, "Hello, from process %d.",rank); MPI_Barrier(MPI_COMM_WORLD); //wait for everyone to be ready before starting timer wct0 = MPI_Wtime(); //set the start time /* Send the message to all the workers, which is where the work happens */ for (i=1; i<size; i++) { MPI_Send(message, strlen(message)+1, MPI_CHAR, i, type, MPI_COMM_WORLD); MPI_Send(&sparm, 1, MPI_INT, i, type, MPI_COMM_WORLD); } for (i=1; i<size; i++) { MPI_Recv(message, 100, MPI_CHAR, i, type, MPI_COMM_WORLD, &status); sleep(3); printf("Message from process %d: %s\n", i, message); } wct1 = MPI_Wtime(); // Get total elapsed time total_time = wct1 - wct0; printf("Message printed by master: Total elapsed time is %f seconds.\n",total_time); } /* Otherwise, if I am a worker ... */ else { MPI_Barrier(MPI_COMM_WORLD); //wait for everyone to be ready before starting /* Receive messages from the master */ MPI_Recv(message, 100, MPI_CHAR, 0, type, MPI_COMM_WORLD, &status); MPI_Recv(&sparm, 1, MPI_INT, 0, type, MPI_COMM_WORLD, &status); worktime = rwork(rank,sparm); // Simulate some work // printf("From process %d: I worked for %d seconds after receiving the following message:\n\t %s\n", // rank,worktime,message); sprintf(message, "Hello master, from process %d after working %d seconds", rank, worktime); MPI_Send(message, strlen(message)+1, MPI_CHAR, 0, type, MPI_COMM_WORLD); } MPI_Finalize(); // Required MPI termination call }
void DenseLinearEigenSystem< std::complex<double> >::eigensolve_lapack_with_vectors() { #ifndef LAPACK std::string problem; problem = "The DenseLinearEigenSystem::eigensolve_lapack_with_vectors method has been called\n"; problem += "but the compiler option -DLAPACK was not provided when\n"; problem += "the library was built."; throw ExceptionExternal( problem ); #else std::size_t N = p_A -> nrows(); // Cache issues of varying significance plague problems of size 2^j + 2^k + ... // when LDA = N, so this is my shameless 'tweak' to maintain predictable // performance, at least for N <=1024 or so. int padding( 0 ); if ( ( N % 2 == 0 ) && ( N > 127 ) ) { padding = 1; } #ifdef PARANOID if ( ( p_A -> nrows() != p_B -> nrows() ) || ( p_A -> ncols() != p_B -> ncols() ) ) { std::string problem( "The DenseLinearEigenSystem::eigensolve_lapack_with_vectors method has detected a failure. \n" ); throw ExceptionGeom( problem, p_A -> nrows(), p_A -> ncols(), p_B -> nrows(), p_B -> ncols() ); } #endif // transpose the input matrices so they are in column_major format FortranData Af( *p_A, true, padding ); FortranData Bf( *p_B, true, padding ); // eigenvalue storage DenseVector<double> alpha( 2 * N, 0.0 ); DenseVector<double> beta( 2 * N, 0.0 ); // eigenvector storage DenseVector<double> vec_left( 2, 0.0 ); DenseVector<double> vec_right( 2 * N * N, 0.0 ); // some workspace for the LAPACK routine DenseVector<double> work( 2, 0.0 ); DenseVector<double> rwork( 8 * N, 0.0 ); int info( 0 ); // Call FORTRAN LAPACK to get the required workspace LAPACK_ZGGEV( ( char* ) "N", ( char* ) "V", N, Af.base(), N + padding, Bf.base(), N + padding, &alpha[ 0 ], &beta[ 0 ], &vec_left[ 0 ], 1, &vec_right[ 0 ], N, &work[ 0 ], -1, &rwork[ 0 ], info ); int required_workspace = int( work[ 0 ] ); #ifdef DEBUG std::cout << "[DEBUG] DenseLinearEigenSystem::eigensolve_lapack_with_vectors is requesting \n"; std::cout << "[DEBUG] a workspace vector of size " << required_workspace << "\n"; #endif work.resize( 2 * required_workspace ); // call FORTRAN LAPACK again with the optimum workspace LAPACK_ZGGEV( ( char* ) "N", ( char* ) "V", N, Af.base(), N + padding, Bf.base(), N + padding, &alpha[ 0 ], &beta[ 0 ], &vec_left[ 0 ], 1, &vec_right[ 0 ], N, &work[ 0 ], required_workspace, &rwork[ 0 ], info ); if ( 0 != info ) { std::string problem( "The DenseLinearEigenSystem::eigensolve_lapack_with_vectors method has detected a failure.\n" ); throw ExceptionExternal( problem, info ); } // create a complex eigenvalue vector EIGENVALUES_ALPHA = DenseVector<D_complex>( N, 0.0 ); EIGENVALUES_BETA = DenseVector<D_complex>( N, 0.0 ); // complex eigenvector matrix ALL_EIGENVECTORS = DenseMatrix<D_complex>( N, N, 0.0 ); // step through the eigenvalues for ( std::size_t i = 0; i < N; ++i ) { const D_complex eye( 0.0, 1.0 ); EIGENVALUES_ALPHA[ i ] = alpha[ 2 * i ] + alpha[ 2 * i + 1 ] * eye; EIGENVALUES_BETA[ i ] = beta[ 2 * i ] + beta[ 2 * i + 1 ] * eye; for ( std::size_t j = 0; j < N; ++j ) { ALL_EIGENVECTORS( i, j ) = vec_right[ 2 * i * N + 2 * j ] + vec_right[ 2 * i * N + 2 * j + 1 ] * eye; } } #endif }
int Pseudoinverse( size_t m, size_t n, const doublecomplex *A, size_t lda, doublecomplex *P, size_t ldp ){ integer info; CMat Acopy(Eigen::Map<const CMat,Eigen::Unaligned,Eigen::OuterStride<> >(A, m, n, Eigen::OuterStride<>(lda))); Eigen::Map<CMat,Eigen::Unaligned,Eigen::OuterStride<> > mP(P, n, m, Eigen::OuterStride<>(ldp)); if(m >= n){ // tall case RVec S(n); CMat VH(n,n); doublecomplex dum; integer lwork = -1; RVec rwork(5*n); zgesvd_( "O","A", m,n, Acopy.data(), Acopy.outerStride(), S.data(), NULL, m, VH.data(), VH.outerStride(), &dum, lwork, rwork.data(), &info ); lwork = (integer)dum.real(); CVec work(lwork); zgesvd_( "O","A", m,n, Acopy.data(), Acopy.outerStride(), S.data(), NULL, m, VH.data(), VH.outerStride(), work.data(), lwork, rwork.data(), &info ); mP = Acopy.adjoint(); { double threshold = 2 * std::numeric_limits<double>::epsilon() * S[0]; for(size_t i = 0; i < n; ++i){ if(S[i] < threshold){ break; } S[i] = 1./S[i]; } } mP = VH.adjoint() * S.asDiagonal() * mP; }else{ // wide case RVec S(m); CMat U(m,m); doublecomplex dum; integer lwork = -1; RVec rwork(5*m); zgesvd_( "A","O", m,n, Acopy.data(), Acopy.outerStride(), S.data(), U.data(), U.outerStride(), NULL, m, &dum, lwork, rwork.data(), &info ); lwork = (integer)dum.real(); CVec work(lwork); zgesvd_( "A","O", m,n, Acopy.data(), Acopy.outerStride(), S.data(), U.data(), U.outerStride(), NULL, m, work.data(), lwork, rwork.data(), &info ); mP = Acopy.adjoint(); { double threshold = 2 * std::numeric_limits<double>::epsilon() * S[0]; for(size_t i = 0; i < m; ++i){ if(S[i] < threshold){ break; } S[i] = 1./S[i]; } } mP = mP * S.asDiagonal() * U.adjoint(); } return info; }