void matrix::svd(matrix& U, diagMatrix& S, matrix& Vdag) const { static StopWatch watch("matrix::svd"); watch.start(); //Initialize input and outputs: matrix A = *this; //destructible copy int M = A.nRows(); int N = A.nCols(); U.init(M,M); Vdag.init(N,N); S.resize(std::min(M,N)); //Initialize temporaries: char jobz = 'A'; //full SVD (return complete unitary matrices) int lwork = 2*(M*N + M + N); std::vector<complex> work(lwork); std::vector<double> rwork(S.nRows() * std::max(5*S.nRows()+7, 2*(M+N)+1)); std::vector<int> iwork(8*S.nRows()); //Call LAPACK and check errors: int info=0; zgesdd_(&jobz, &M, &N, A.data(), &M, S.data(), U.data(), &M, Vdag.data(), &N, work.data(), &lwork, rwork.data(), iwork.data(), &info); if(info>0) //convergence failure; try the slower stabler version { int info=0; matrix A = *this; //destructible copy zgesvd_(&jobz, &jobz, &M, &N, A.data(), &M, S.data(), U.data(), &M, Vdag.data(), &N, work.data(), &lwork, rwork.data(), &info); if(info<0) { logPrintf("Argument# %d to LAPACK SVD routine ZGESVD is invalid.\n", -info); stackTraceExit(1); } if(info>0) { logPrintf("Error code %d in LAPACK SVD routine ZGESVD.\n", info); stackTraceExit(1); } } if(info<0) { logPrintf("Argument# %d to LAPACK SVD routine ZGESDD is invalid.\n", -info); stackTraceExit(1); } watch.stop(); }
void HPreData::makeACopy() { // Make a A copy int i,j,k; vector<int> iwork(numColOriginal, 0); Astart.assign(numColOriginal + 1, 0); int AcountX = ARindex.size(); Aindex.resize(AcountX); Avalue.resize(AcountX); for (int k = 0; k < AcountX; k++) if (ARindex[k] < numColOriginal) iwork[ARindex[k]]++; for (i = 1; i <= numColOriginal; i++) Astart[i] = Astart[i - 1] + iwork[i - 1]; for (i = 0; i < numColOriginal; i++) iwork[i] = Astart[i]; for (int iRow = 0; iRow < numRowOriginal; iRow++) { for (k = ARstart[iRow]; k < ARstart[iRow + 1]; k++) { int iColumn = ARindex[k]; if (iColumn != numColOriginal) { int iPut = iwork[iColumn]++; Aindex[iPut] = iRow; Avalue[iPut] = ARvalue[k]; } } } Aend.resize(numColOriginal + 1, 0); for (i = 0; i < numColOriginal; i++) Aend[i] = Astart[i + 1]; }
GSVD GSVD_create_d (double **m1, long numberOfRows1, long numberOfColumns, double **m2, long numberOfRows2) { try { long m = numberOfRows1, n = numberOfColumns, p = numberOfRows2; long lwork = MAX (MAX (3 * n, m), p) + n; // Store the matrices a and b as column major! autoNUMmatrix<double> a (NUMmatrix_transpose (m1, m, n), 1, 1); autoNUMmatrix<double> b (NUMmatrix_transpose (m2, p, n), 1, 1); autoNUMmatrix<double> q (1, n, 1, n); autoNUMvector<double> alpha (1, n); autoNUMvector<double> beta (1, n); autoNUMvector<double> work (1, lwork); autoNUMvector<long> iwork (1, n); char jobu1 = 'N', jobu2 = 'N', jobq = 'Q'; long k, l, info; NUMlapack_dggsvd (&jobu1, &jobu2, &jobq, &m, &n, &p, &k, &l, &a[1][1], &m, &b[1][1], &p, &alpha[1], &beta[1], NULL, &m, NULL, &p, &q[1][1], &n, &work[1], &iwork[1], &info); if (info != 0) { Melder_throw ("dggsvd failed, error = ", info); } long kl = k + l; autoGSVD me = GSVD_create (kl); for (long i = 1; i <= kl; i++) { my d1[i] = alpha[i]; my d2[i] = beta[i]; } // Transpose q for (long i = 1; i <= n; i++) { for (long j = i + 1; j <= n; j++) { my q[i][j] = q[j][i]; my q[j][i] = q[i][j]; } my q[i][i] = q[i][i]; } // Get R from a(1:k+l,n-k-l+1:n) double *pr = &a[1][1]; for (long i = 1; i <= kl; i++) { for (long j = i; j <= kl; j++) { my r[i][j] = pr[i - 1 + (n - kl + j - 1) * m]; /* from col-major */ } } return me.transfer(); } catch (MelderError) { Melder_throw ("GSVD not created."); } }
int ccdl::LeastSquaresSvdFit ( int const nobs, int const nparam, double const * A_obs_by_param, double * x_param, double const * b_obs, double TOL ) { // min (xt.At-bt).(A.x-b) std::vector<double> A( A_obs_by_param, A_obs_by_param + nparam*nobs ); int nmax = std::max( nparam, nobs ); std::vector<double> X( nmax, 0. ); std::copy( b_obs, b_obs + nobs, X.data() ); std::vector<double> S( nmax, 0. ); int LWORK = -1; std::vector<int> iwork(1,0); int INFO = 0; double twork = 0; int rank = 0; int nrhs=1; dgelsd_( &nobs, &nparam, &nrhs, A.data(), &nobs, X.data(), &nmax, S.data(), &TOL, &rank, &twork, &LWORK, iwork.data(), &INFO ); if ( INFO == 0 ) { LWORK = twork+1; std::vector<double> WORK( LWORK, 0. ); int LIWORK = iwork[0]; iwork.resize( LIWORK ); #ifdef PDBG std::printf("dgelsd_\n"); #endif dgelsd_( &nobs, &nparam, &nrhs, A.data(), &nobs, X.data(), &nmax, S.data(), &TOL, &rank, WORK.data(), &LWORK, iwork.data(), &INFO ); #ifdef PDBG std::printf("return %i\n",INFO); #endif std::copy( X.data(), X.data() + nparam, x_param ); } else { std::fill( x_param, x_param + nparam, 0. ); }; return INFO; }
void DivideAndConquerSVD ( int m, int n, dcomplex* A, int lda, double* s, dcomplex* U, int ldu, dcomplex* VAdj, int ldva ) { #ifndef RELEASE PushCallStack("lapack::DivideAndConquerSVD"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobz='S'; int lwork=-1, info; dcomplex dummyWork; const int k = std::min(m,n); const int K = std::max(m,n); const int lrwork = k*std::max(5*k+7,2*K+2*k+1); std::vector<double> rwork(lrwork); std::vector<int> iwork(8*k); LAPACK(zgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &dummyWork, &lwork, &rwork[0], &iwork[0], &info ); lwork = dummyWork.real; std::vector<dcomplex> work(lwork); LAPACK(zgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &work[0], &lwork, &rwork[0], &iwork[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("zgesdd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
void matrix::diagonalize(matrix& evecs, diagMatrix& eigs) const { static StopWatch watch("matrix::diagonalize"); watch.start(); myassert(nCols()==nRows()); int N = nRows(); myassert(N > 0); //Check hermiticity const complex* thisData = data(); double errNum=0.0, errDen=0.0; for(int i=0; i<N; i++) for(int j=0; j<N; j++) { errNum += norm(thisData[index(i,j)]-thisData[index(j,i)].conj()); errDen += norm(thisData[index(i,j)]); } double hermErr = sqrt(errNum / (errDen*N)); if(hermErr > 1e-10) { logPrintf("Relative hermiticity error of %le (>1e-10) encountered in diagonalize\n", hermErr); stackTraceExit(1); } char jobz = 'V'; //compute eigenvectors and eigenvalues char range = 'A'; //compute all eigenvalues char uplo = 'U'; //use upper-triangular part matrix A = *this; //copy input matrix (zheevr destroys input matrix) double eigMin = 0., eigMax = 0.; //eigenvalue range (not used for range-type 'A') int indexMin = 0, indexMax = 0; //eignevalue index range (not used for range-type 'A') double absTol = 0.; int nEigsFound; eigs.resize(N); evecs.init(N, N); std::vector<int> iSuppz(2*N); int lwork = (64+1)*N; std::vector<complex> work(lwork); //Magic number 64 obtained by running ILAENV as suggested in doc of zheevr (and taking the max over all N) int lrwork = 24*N; std::vector<double> rwork(lrwork); //from doc of zheevr int liwork = 10*N; std::vector<int> iwork(liwork); //from doc of zheevr int info=0; zheevr_(&jobz, &range, &uplo, &N, A.data(), &N, &eigMin, &eigMax, &indexMin, &indexMax, &absTol, &nEigsFound, eigs.data(), evecs.data(), &N, iSuppz.data(), work.data(), &lwork, rwork.data(), &lrwork, iwork.data(), &liwork, &info); if(info<0) { logPrintf("Argument# %d to LAPACK eigenvalue routine ZHEEVR is invalid.\n", -info); stackTraceExit(1); } if(info>0) { logPrintf("Error code %d in LAPACK eigenvalue routine ZHEEVR.\n", info); stackTraceExit(1); } watch.stop(); }
void DivideAndConquerSVD ( int m, int n, float* A, int lda, float* s, float* U, int ldu, float* VTrans, int ldvt ) { #ifndef RELEASE PushCallStack("lapack::DivideAndConquerSVD"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobz='S'; int lwork=-1, info; float dummyWork; const int k = std::min(m,n); std::vector<int> iwork(8*k); LAPACK(sgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VTrans, &ldvt, &dummyWork, &lwork, &iwork[0], &info ); lwork = dummyWork; std::vector<float> work(lwork); LAPACK(sgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VTrans, &ldvt, &work[0], &lwork, &iwork[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("sgesdd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
bool svd_lapack(int n, vector_t & A, vector_t & S, matrix_t & V) { int m=n; vector_t U(n*n); vector_t tV(n*n); cout << "Using LAPACK SVD library function...\n"; #ifdef WITH_LAPACK int info=0; vector<int> iwork(8*m,0); double optim_lwork; int lwork; lwork = -1; // Determine workspace needed dgesdd_("A", &m, &n, &A[0] , &m, &S[0], &U[0], &m, &tV[0], &n, &optim_lwork, &lwork, &iwork[0], &info); lwork = (int) optim_lwork; vector_t work( lwork, 0 ); // Perform actual SVD dgesdd_("A", &m, &n, &A[0] , &m, &S[0], &U[0], &m, &tV[0], &n, &work[0], &lwork, &iwork[0], &info); // Copy and transpose V int k = 0; for( int i = 0; i < n; i++ ) for( int j = 0; j < n; j++ ) { V[j][i] = tV[k]; ++k; } return true; #else // LAPACK support not compiled return false; #endif }
void HPreData::makeARCopy() { // Make a AR copy int i,j,k; vector<int> iwork(numRow, 0); ARstart.resize(numRow + 1, 0); int AcountX = Aindex.size(); ARindex.resize(AcountX); ARvalue.resize(AcountX); for (int k = 0; k < AcountX; k++) iwork[Aindex[k]]++; for (i = 1; i <= numRow; i++) ARstart[i] = ARstart[i - 1] + iwork[i - 1]; for (i = 0; i < numRow; i++) iwork[i] = ARstart[i]; for (int iCol = 0; iCol < numCol; iCol++) { for (k = Astart[iCol]; k < Astart[iCol + 1]; k++) { int iRow = Aindex[k]; int iPut = iwork[iRow]++; ARindex[iPut] = iCol; ARvalue[iPut] = Avalue[k]; } } }
void bob::math::eigSym_(const blitz::Array<double,2>& A, const blitz::Array<double,2>& B, blitz::Array<double,2>& V, blitz::Array<double,1>& D) { // Size variable const int N = A.extent(0); // Prepares to call LAPACK function // Initialises LAPACK variables const int itype = 1; const char jobz = 'V'; // Get both the eigenvalues and the eigenvectors const char uplo = 'U'; int info = 0; const int lda = N; const int ldb = N; // Initialises LAPACK arrays blitz::Array<double,2> A_blitz_lapack; // Tries to use V directly blitz::Array<double,2> Vt = V.transpose(1,0); const bool V_direct_use = bob::core::array::isCZeroBaseContiguous(Vt); if (V_direct_use) { A_blitz_lapack.reference(Vt); // Ugly fix for non-const transpose A_blitz_lapack = const_cast<blitz::Array<double,2>&>(A).transpose(1,0); } else // Ugly fix for non-const transpose A_blitz_lapack.reference( bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(A).transpose(1,0))); double *A_lapack = A_blitz_lapack.data(); // Ugly fix for non-const transpose blitz::Array<double,2> B_blitz_lapack( bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(B).transpose(1,0))); double *B_lapack = B_blitz_lapack.data(); blitz::Array<double,1> D_blitz_lapack; const bool D_direct_use = bob::core::array::isCZeroBaseContiguous(D); if (D_direct_use) D_blitz_lapack.reference(D); else D_blitz_lapack.resize(D.shape()); double *D_lapack = D_blitz_lapack.data(); // Calls the LAPACK function // A/ Queries the optimal size of the working arrays const int lwork_query = -1; double work_query; const int liwork_query = -1; int iwork_query; dsygvd_( &itype, &jobz, &uplo, &N, A_lapack, &lda, B_lapack, &ldb, D_lapack, &work_query, &lwork_query, &iwork_query, &liwork_query, &info); // B/ Computes the generalized eigenvalue decomposition const int lwork = static_cast<int>(work_query); boost::shared_array<double> work(new double[lwork]); const int liwork = static_cast<int>(iwork_query); boost::shared_array<int> iwork(new int[liwork]); dsygvd_( &itype, &jobz, &uplo, &N, A_lapack, &lda, B_lapack, &ldb, D_lapack, work.get(), &lwork, iwork.get(), &liwork, &info); // Checks info variable if (info != 0) throw std::runtime_error("The LAPACK function 'dsygvd' returned a non-zero value. This might be caused by a non-positive definite B matrix."); // Copy singular vectors back to V if required if (!V_direct_use) V = A_blitz_lapack.transpose(1,0); // Copy result back to sigma if required if (!D_direct_use) D = D_blitz_lapack; }
int XC::SymBandEigenSolver::solve(int nModes) { if(!theSOE) { std::cerr << "SymBandEigenSolver::solve() -- no XC::EigenSOE has been set yet\n"; return -1; } // Set number of modes numModes= nModes; // Number of equations int n= theSOE->size; // Check for quick return if(numModes < 1) { numModes= 0; return 0; } // Simple check if(numModes > n) numModes= n; // Allocate storage for eigenvalues eigenvalue.resize(n); // Real work array (see LAPACK dsbevx subroutine documentation) work.resize(7*n); // Integer work array (see LAPACK dsbevx subroutine documentation) std::vector<int> iwork(5*n); // Leading dimension of eigenvectors int ldz = n; // Allocate storage for eigenvectors eigenvector.resize(ldz*numModes); // Number of superdiagonals int kd= theSOE->numSuperD; // Matrix data double *ab= theSOE->A.getDataPtr(); // Leading dimension of the matrix int ldab= kd + 1; // Leading dimension of q int ldq= n; // Orthogonal matrix used in reduction to tridiagonal form // (see LAPACK dsbevx subroutine documentation) std::vector<double> q(ldq*n); // Index ranges [1,numModes] of eigenpairs to compute int il = 1; int iu = numModes; // Compute eigenvalues and eigenvectors char jobz[] = "V"; // Selected eigenpairs are based on index range [il,iu] char range[] = "I"; // Upper triagle of matrix is stored char uplo[] = "U"; // Return value std::vector<int> ifail(n); int info= 0; // Number of eigenvalues returned int m= 0; // Not used double vl = 0.0; double vu = 1.0; // Not used ... I think! double abstol = -1.0; // if Mass matrix we make modifications to A: // A -> M^(-1/2) A M^(-1/2) double *M= theSOE->M.getDataPtr(); double *A= theSOE->A.getDataPtr(); int numSuperD = theSOE->numSuperD; int size = n; if(M) //Its seems that the M matrix must be DIAGONAL. { int i,j; bool singular = false; // form M^(-1/2) and check for singular mass matrix for(int k=0; k<size; k++) { if(M[k] == 0.0) { singular = true; // alternative is to set as a small no ~ 1e-10 times smallest m(i,i) != 0.0 std::cerr << "SymBandEigenSolver::solve() - M matrix singular\n"; return -1; } else { M[k] = 1.0/sqrt(M[k]); } } // make modifications to A // Aij -> Mi Aij Mj (based on new_ M) for(i=0; i<size; i++) { double *AijPtr = A +(i+1)*(numSuperD+1) - 1; int minColRow = i - numSuperD; if(minColRow < 0) minColRow = 0; for(j=i; j>=minColRow; j--) { *AijPtr *= M[j]*M[i]; AijPtr--; } } } // Calls the LAPACK routine that computes the eigenvalues and eigenvectors // of the matrix A previously transforme. dsbevx_(jobz, range, uplo, &n, &kd, ab, &ldab, &q[0], &ldq, &vl, &vu, &il, &iu, &abstol, &m, eigenvalue.getDataPtr(), eigenvector.getDataPtr(), &ldz, work.getDataPtr(), &iwork[0], &ifail[0], &info); if(info < 0) { std::cerr << "SymBandEigenSolver::solve() -- invalid argument number " << -info << " passed to LAPACK dsbevx\n"; return info; } if(info > 0) { std::cerr << "SymBandEigenSolver::solve() -- LAPACK dsbevx returned error code " << info << std::endl; return -info; } if(m < numModes) { std::cerr << "SymBandEigenSolver::solve() -- LAPACK dsbevx only computed " << m << " eigenvalues, " << numModes << "were requested\n"; numModes = m; } theSOE->factored = true; // make modifications to the eigenvectors // Eij -> Mi Eij (based on new_ M) M= theSOE->M.getDataPtr(); if(M) { for(int j=0; j<numModes; j++) { double *eigVectJptr = &eigenvector[j*ldz]; double *MPtr = M; for(int i=0; i<size; i++) *eigVectJptr++ *= *MPtr++; } } return 0; }
/// 计算结果存储在矩阵a中 /// n_global: the order of the matrix static void inv_driver(blas_idx_t n_global) { auto grid = std::make_shared<blacs_grid_t>(); //// self code //n_global = 3; //double *aaa = new double(n_global*n_global); //for (int i = 0; i < 9; i++) //{ // aaa[i] = i + 1; //} //aaa[8] = 10; //auto a = block_cyclic_mat_t::createWithArray(grid, n_global, n_global, aaa); // Create a NxN random matrix A auto a = block_cyclic_mat_t::random(grid, n_global, n_global); // Create a NxN matrix to hold A^{-1} auto ai = block_cyclic_mat_t::constant(grid, n_global, n_global); // Copy A to A^{-1} since it will be overwritten during factorization std::copy_n(a->local_data(), a->local_size(), ai->local_data()); MPI_Barrier (MPI_COMM_WORLD); double t0 = MPI_Wtime(); // Factorize A blas_idx_t ia = 1, ja = 1; std::vector<blas_idx_t> ipiv(a->local_rows() + a->row_block_size() + 100); blas_idx_t info; //含义应该是D-GE-TRF。 //第一个D表示我们的矩阵是double类型的 //GE表示我们的矩阵是General类型的 //TRF表示对矩阵进行三角分解也就是我们通常所说的LU分解。 pdgetrf_(n_global, n_global, ai->local_data(), ia, ja, ai->descriptor(), ipiv.data(), info); assert(info == 0); double t_factor = MPI_Wtime() - t0; // Compute A^{-1} based on the LU factorization // Compute workspace for double and integer work arrays on each process blas_idx_t lwork = 10; blas_idx_t liwork = 10; std::vector<double> work (lwork); std::vector<blas_idx_t> iwork(liwork); lwork = liwork = -1; // 计算lwork与liwork的值 pdgetri_(n_global, ai->local_data(), ia, ja, ai->descriptor(), ipiv.data(), work.data(), lwork, iwork.data(), liwork, info); assert(info == 0); lwork = static_cast<blas_idx_t>(work[0]); liwork = static_cast<size_t>(iwork[0]); work.resize(lwork); iwork.resize(liwork); // Now compute the inverse t0 = MPI_Wtime(); pdgetri_(n_global, ai->local_data(), ia, ja, ai->descriptor(), ipiv.data(), work.data(), lwork, iwork.data(), liwork, info); assert(info == 0); double t_solve = MPI_Wtime() - t0; // Verify that the inverse is correct using A*A^{-1} = I auto identity = block_cyclic_mat_t::diagonal(grid, n_global, n_global); // Compute I = A * A^{-1} - I and verify that the ||I|| is small char nein = 'N'; double alpha = 1.0, beta = -1.0; pdgemm_(nein, nein, n_global, n_global, n_global, alpha, a->local_data() , ia, ja, a->descriptor(), ai->local_data(), ia, ja, ai->descriptor(), beta, identity->local_data(), ia, ja, identity->descriptor()); // Compute 1-norm of the result char norm='1'; work.resize(identity->local_cols()); double err = pdlange_(norm, n_global, n_global, identity->local_data(), ia, ja, identity->descriptor(), work.data()); double t_total = t_factor + t_solve; double t_glob; MPI_Reduce(&t_total, &t_glob, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if (grid->iam() == 0) { double gflops = getri_flops(n_global)/t_glob/grid->nprocs(); printf("\n" "MATRIX INVERSE BENCHMARK SUMMARY\n" "================================\n" "N = %d\tNP = %d\tNP_ROW = %d\tNP_COL = %d\n" "Time for PxGETRF + PxGETRI = %10.7f seconds\tGflops/Proc = %10.7f, Error = %f\n", n_global, grid->nprocs(), grid->nprows(), grid->npcols(), t_glob, gflops, err);fflush(stdout); } }
bool eigen_lapack(int n, vector_t & A, vector_t & S, matrix_t & V) { // Use eigenvalue decomposition instead of SVD // Get only the highest eigen-values, (par::cluster_mds_dim) int i1 = n - par::cluster_mds_dim + 1; int i2 = n; double z = -1; // Integer workspace size, 5N vector<int> iwork(5*n,0); double optim_lwork; int lwork = -1; int out_m; vector_t out_w( par::cluster_mds_dim , 0 ); vector_t out_z( n * par::cluster_mds_dim ,0 ); int ldz = n; vector<int> ifail(n,0); int info=0; double nz = 0; // Get workspace dsyevx_("V" , // get eigenvalues and eigenvectors "I" , // get interval of selected eigenvalues "L" , // data stored as upper triangular &n , // order of matrix &A[0] , // input matrix &n , // LDA &nz , // Vlower &nz , // Vupper &i1, // from 1st ... &i2, // ... to nth eigenvalue &z , // 0 for ABSTOL &out_m, // # of eigenvalues found &out_w[0], // first M entries contain sorted eigen-values &out_z[0], // array (can be mxm? nxn) &ldz, // make n at first &optim_lwork, // Get optimal workspace &lwork, // size of workspace &iwork[0], // int workspace &ifail[0], // output: failed to converge &info ); // Assign workspace lwork = (int) optim_lwork; vector_t work( lwork, 0 ); dsyevx_("V" , // get eigenvalues and eigenvectors "I" , // get interval of selected eigenvalues "L" , // data stored as upper triangular &n , // order of matrix &A[0] , // input matrix &n , // LDA &nz , // Vlower &nz , // Vupper &i1, // from 1st ... &i2, // ... to nth eigenvalue &z , // 0 for ABSTOL &out_m, // # of eigenvalues found &out_w[0], // first M entries contain sorted eigen-values &out_z[0], // array (can be mxm? nxn) &ldz, // make n at first &work[0], // Workspace &lwork, // size of workspace &iwork[0], // int workspace &ifail[0], // output: failed to converge &info ); // Get eigenvalues, vectors for (int i=0; i< par::cluster_mds_dim; i++) S[i] = out_w[i]; for (int i=0; i<n; i++) for (int j=0;j<par::cluster_mds_dim; j++) V[i][j] = out_z[ i + j*n ]; return true; }
bool LinearFitableFunction::fitTheFunction() { if (basisSet == NULL) return false; Locker lock(*this); int numParams = basisSet->getNElements(); int numData = allSamplesToFit->getNElements(); if (numData < numParams) { mwarning(M_SYSTEM_MESSAGE_DOMAIN, "WARNING: Attempted fit without enough data."); return false; } if (VERBOSE_FITABLE_FUNCTION) { mprintf("Fitable function: n data=%d n params=%d", numData, numParams); } std::vector<float> B(numParams); std::vector<float> Y(numData); std::vector<float> X(numData * numParams); // create space to hold all Parameters if (Parameters != NULL) delete [] Parameters; Parameters = new float [numParams]; // get data back in correct format std::vector<double> temp(numInputs); // unfold the data into the prescribed basis. // the format here is Y = Xb // we start with the most recent data, but order does not matter shared_ptr<FitableSample> sampleToFit = allSamplesToFit->getFrontmost(); for (int n=0; n<numData;n++) { Datum *inputVector = sampleToFit->getInputVector(); if (VERBOSE_FITABLE_FUNCTION>1) { MWTime timeUS = sampleToFit->getTime(); // testing only mprintf("Fitable function: datum %d timeUS = %ld",n, (long)timeUS); } for (int i=0;i<numInputs;i++) { temp[i] = inputVector->getElement(i); } for (int p=0; p<numParams;p++) { // Need to use Fortran-style (i.e. column-major) ordering X[n + p*numData] = (basisSet->getElement(p))->applyBasis(temp.data()); } Y[n] = (float)(sampleToFit->getOutputData()); sampleToFit = sampleToFit->getNext(); // go to next sample on the list } // linear regression to find Parameters (SVD pseudo-inverse) // B = inv(XtX) XtY // using SGELSD from LAPACK { __CLPK_integer m = numData; __CLPK_integer n = numParams; __CLPK_integer nrhs = 1; __CLPK_real *a = X.data(); __CLPK_integer lda = m; __CLPK_real *b = Y.data(); __CLPK_integer ldb = m; std::vector<__CLPK_real> s(n); // The choice of N*epsilon for rcond comes from _Numerical Recipes in C_, Section 15.4, // "Solution by Use of Singular Value Decomposition" __CLPK_real rcond = n * std::numeric_limits<__CLPK_real>::epsilon(); __CLPK_integer rank; std::vector<__CLPK_real> work(1); __CLPK_integer lwork = -1; std::vector<__CLPK_integer> iwork(1); __CLPK_integer info; // We call SGELSD two times: once to determine appropriate sizes for work and iwork, and once to // perform the actual fit for (int numReps = 0; numReps < 2; numReps++) { sgelsd_(&m, &n, &nrhs, a, &lda, b, &ldb, s.data(), &rcond, &rank, work.data(), &lwork, iwork.data(), &info); if (info != 0) { merror(M_GENERIC_MESSAGE_DOMAIN, "Fitable function: SGELSD returned %d", int(info)); return false; } if (-1 == lwork) { // Set sizes of work and iwork for next pass work.resize(work.at(0)); lwork = work.size(); iwork.resize(iwork.at(0)); } else { // Store solution for (int p = 0; p < numParams; p++) { B[p] = b[p]; } } } } // save for later application for (int p=0; p<numParams;p++) { Parameters[p] = B[p]; if (VERBOSE_FITABLE_FUNCTION) { mprintf("Fitable function: final fit param %d = %f5", p, Parameters[p]); } } return true; }