Пример #1
0
void matrix::svd(matrix& U, diagMatrix& S, matrix& Vdag) const
{	static StopWatch watch("matrix::svd");
	watch.start();
	//Initialize input and outputs:
	matrix A = *this; //destructible copy
	int M = A.nRows();
	int N = A.nCols();
	U.init(M,M);
	Vdag.init(N,N);
	S.resize(std::min(M,N));
	//Initialize temporaries:
	char jobz = 'A'; //full SVD (return complete unitary matrices)
	int lwork = 2*(M*N + M + N);
	std::vector<complex> work(lwork);
	std::vector<double> rwork(S.nRows() * std::max(5*S.nRows()+7, 2*(M+N)+1));
	std::vector<int> iwork(8*S.nRows());
	//Call LAPACK and check errors:
	int info=0;
	zgesdd_(&jobz, &M, &N, A.data(), &M, S.data(), U.data(), &M, Vdag.data(), &N,
		work.data(), &lwork, rwork.data(), iwork.data(), &info);
	if(info>0) //convergence failure; try the slower stabler version
	{	int info=0;
		matrix A = *this; //destructible copy
		zgesvd_(&jobz, &jobz, &M, &N, A.data(), &M, S.data(), U.data(), &M, Vdag.data(), &N,
			work.data(), &lwork, rwork.data(), &info);
		if(info<0) { logPrintf("Argument# %d to LAPACK SVD routine ZGESVD is invalid.\n", -info); stackTraceExit(1); }
		if(info>0) { logPrintf("Error code %d in LAPACK SVD routine ZGESVD.\n", info); stackTraceExit(1); }
	}
	if(info<0) { logPrintf("Argument# %d to LAPACK SVD routine ZGESDD is invalid.\n", -info); stackTraceExit(1); }
	watch.stop();
}
Пример #2
0
void HPreData::makeACopy() {
    // Make a A copy

    int i,j,k;
    vector<int> iwork(numColOriginal, 0);
    Astart.assign(numColOriginal + 1, 0);
    int AcountX = ARindex.size();
    Aindex.resize(AcountX);
    Avalue.resize(AcountX);
    for (int k = 0; k < AcountX; k++)
    	if (ARindex[k] < numColOriginal)
    		iwork[ARindex[k]]++;
    for (i = 1; i <= numColOriginal; i++)
        Astart[i] = Astart[i - 1] + iwork[i - 1];
    for (i = 0; i < numColOriginal; i++)
        iwork[i] = Astart[i];
    for (int iRow = 0; iRow < numRowOriginal; iRow++) {
        for (k = ARstart[iRow]; k < ARstart[iRow + 1]; k++) {
            int iColumn = ARindex[k];
            if (iColumn != numColOriginal) {
				int iPut = iwork[iColumn]++;
				Aindex[iPut] = iRow;
				Avalue[iPut] = ARvalue[k];
            }
        }
    }

    Aend.resize(numColOriginal + 1, 0);
    for (i = 0; i < numColOriginal; i++)
        Aend[i] = Astart[i + 1];
}
GSVD GSVD_create_d (double **m1, long numberOfRows1, long numberOfColumns, double **m2, long numberOfRows2) {
	try {
		long m = numberOfRows1, n = numberOfColumns, p = numberOfRows2;
		long lwork = MAX (MAX (3 * n, m), p) + n;

		// Store the matrices a and b as column major!
		autoNUMmatrix<double> a (NUMmatrix_transpose (m1, m, n), 1, 1);
		autoNUMmatrix<double> b (NUMmatrix_transpose (m2, p, n), 1, 1);
		autoNUMmatrix<double> q (1, n, 1, n);
		autoNUMvector<double> alpha (1, n);
		autoNUMvector<double> beta (1, n);
		autoNUMvector<double> work (1, lwork);
		autoNUMvector<long> iwork (1, n);


		char jobu1 = 'N', jobu2 = 'N', jobq = 'Q';
		long k, l, info;
		NUMlapack_dggsvd (&jobu1, &jobu2, &jobq, &m, &n, &p, &k, &l,
		    &a[1][1], &m, &b[1][1], &p, &alpha[1], &beta[1], NULL, &m,
		    NULL, &p, &q[1][1], &n, &work[1], &iwork[1], &info);
		if (info != 0) {
			Melder_throw ("dggsvd failed, error = ", info);
		}


		long kl = k + l;
		autoGSVD me = GSVD_create (kl);

		for (long i = 1; i <= kl; i++) {
			my d1[i] = alpha[i];
			my d2[i] = beta[i];
		}

		// Transpose q

		for (long i = 1; i <= n; i++) {
			for (long j = i + 1; j <= n; j++) {
				my q[i][j] = q[j][i];
				my q[j][i] = q[i][j];
			}
			my q[i][i] = q[i][i];
		}

		// Get R from a(1:k+l,n-k-l+1:n)

		double *pr = &a[1][1];
		for (long i = 1; i <= kl; i++) {
			for (long j = i; j <= kl; j++) {
				my r[i][j] = pr[i - 1 + (n - kl + j - 1) * m]; /* from col-major */
			}
		}
		return me.transfer();
	} catch (MelderError) {
		Melder_throw ("GSVD not created.");
	}
}
Пример #4
0
int ccdl::LeastSquaresSvdFit
( int const nobs, int const nparam,
  double const * A_obs_by_param,
  double * x_param,
  double const * b_obs,
  double TOL )
{
  // min (xt.At-bt).(A.x-b)

  std::vector<double> A( A_obs_by_param, A_obs_by_param + nparam*nobs );
  int nmax = std::max( nparam, nobs );
  std::vector<double> X( nmax, 0. );
  std::copy( b_obs, b_obs + nobs, X.data() );
  std::vector<double> S( nmax, 0. );
  int LWORK = -1;
  std::vector<int> iwork(1,0);
  int INFO = 0;
  double twork = 0;
  int rank = 0;
  int nrhs=1;

  dgelsd_( &nobs, &nparam, &nrhs,
	   A.data(), &nobs, X.data(), &nmax,
	   S.data(), &TOL, &rank, &twork, &LWORK, iwork.data(), &INFO );

  
  if ( INFO == 0 )
    {
      LWORK = twork+1;
      std::vector<double> WORK( LWORK, 0. );
      int LIWORK = iwork[0];
      iwork.resize( LIWORK );
      
#ifdef PDBG
      std::printf("dgelsd_\n");
#endif
      
      dgelsd_( &nobs, &nparam, &nrhs,
	       A.data(), &nobs, X.data(), &nmax,
	       S.data(), &TOL, &rank, WORK.data(), &LWORK, iwork.data(), &INFO );

#ifdef PDBG
      std::printf("return %i\n",INFO);
#endif
      std::copy( X.data(), X.data() + nparam, x_param );
    }
  else
    {
      std::fill( x_param, x_param + nparam, 0. );
    };
  return INFO;
}
Пример #5
0
void DivideAndConquerSVD
( int m, int n, dcomplex* A, int lda, 
  double* s, dcomplex* U, int ldu, dcomplex* VAdj, int ldva )
{
#ifndef RELEASE
    PushCallStack("lapack::DivideAndConquerSVD");
#endif
    if( m==0 || n==0 )
    {
#ifndef RELEASE
        PopCallStack();
#endif
        return;
    }

    const char jobz='S';
    int lwork=-1, info;
    dcomplex dummyWork;
    const int k = std::min(m,n);
    const int K = std::max(m,n);
    const int lrwork = k*std::max(5*k+7,2*K+2*k+1);
    std::vector<double> rwork(lrwork);
    std::vector<int> iwork(8*k);

    LAPACK(zgesdd)
    ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &dummyWork, &lwork,
      &rwork[0], &iwork[0], &info );

    lwork = dummyWork.real;
    std::vector<dcomplex> work(lwork);
    LAPACK(zgesdd)
    ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &work[0], &lwork,
      &rwork[0], &iwork[0], &info );
    if( info < 0 )
    {
        std::ostringstream msg;
        msg << "Argument " << -info << " had illegal value";
        throw std::logic_error( msg.str().c_str() );
    }
    else if( info > 0 )
    {
        throw std::runtime_error("zgesdd's updating process failed");
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #6
0
void matrix::diagonalize(matrix& evecs, diagMatrix& eigs) const
{	static StopWatch watch("matrix::diagonalize");
	watch.start();
	
	myassert(nCols()==nRows());
	int N = nRows();
	myassert(N > 0);
	
	//Check hermiticity
	const complex* thisData = data();
	double errNum=0.0, errDen=0.0;
	for(int i=0; i<N; i++)
		for(int j=0; j<N; j++)
		{	errNum += norm(thisData[index(i,j)]-thisData[index(j,i)].conj());
			errDen += norm(thisData[index(i,j)]);
		}
	double hermErr = sqrt(errNum / (errDen*N));
	if(hermErr > 1e-10)
	{	logPrintf("Relative hermiticity error of %le (>1e-10) encountered in diagonalize\n", hermErr);
		stackTraceExit(1);
	}
	
	char jobz = 'V'; //compute eigenvectors and eigenvalues
	char range = 'A'; //compute all eigenvalues
	char uplo = 'U'; //use upper-triangular part
	matrix A = *this; //copy input matrix (zheevr destroys input matrix)
	double eigMin = 0., eigMax = 0.; //eigenvalue range (not used for range-type 'A')
	int indexMin = 0, indexMax = 0; //eignevalue index range (not used for range-type 'A')
	double absTol = 0.; int nEigsFound;
	eigs.resize(N);
	evecs.init(N, N);
	std::vector<int> iSuppz(2*N);
	int lwork = (64+1)*N; std::vector<complex> work(lwork); //Magic number 64 obtained by running ILAENV as suggested in doc of zheevr (and taking the max over all N)
	int lrwork = 24*N; std::vector<double> rwork(lrwork); //from doc of zheevr
	int liwork = 10*N; std::vector<int> iwork(liwork); //from doc of zheevr
	int info=0;
	zheevr_(&jobz, &range, &uplo, &N, A.data(), &N,
		&eigMin, &eigMax, &indexMin, &indexMax, &absTol, &nEigsFound,
		eigs.data(), evecs.data(), &N, iSuppz.data(), work.data(), &lwork,
		rwork.data(), &lrwork, iwork.data(), &liwork, &info);
	if(info<0) { logPrintf("Argument# %d to LAPACK eigenvalue routine ZHEEVR is invalid.\n", -info); stackTraceExit(1); }
	if(info>0) { logPrintf("Error code %d in LAPACK eigenvalue routine ZHEEVR.\n", info); stackTraceExit(1); }
	watch.stop();
}
Пример #7
0
void DivideAndConquerSVD
( int m, int n, float* A, int lda, 
  float* s, float* U, int ldu, float* VTrans, int ldvt )
{
#ifndef RELEASE
    PushCallStack("lapack::DivideAndConquerSVD");
#endif
    if( m==0 || n==0 )
    {
#ifndef RELEASE
        PopCallStack();
#endif
        return;
    }

    const char jobz='S';
    int lwork=-1, info;
    float dummyWork;
    const int k = std::min(m,n);
    std::vector<int> iwork(8*k);

    LAPACK(sgesdd)
    ( &jobz, &m, &n, A, &lda, s, U, &ldu, VTrans, &ldvt, &dummyWork, &lwork,
      &iwork[0], &info );

    lwork = dummyWork;
    std::vector<float> work(lwork);
    LAPACK(sgesdd)
    ( &jobz, &m, &n, A, &lda, s, U, &ldu, VTrans, &ldvt, &work[0], &lwork,
      &iwork[0], &info );
    if( info < 0 )
    {
        std::ostringstream msg;
        msg << "Argument " << -info << " had illegal value";
        throw std::logic_error( msg.str().c_str() );
    }
    else if( info > 0 )
    {
        throw std::runtime_error("sgesdd's updating process failed");
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #8
0
bool svd_lapack(int n, vector_t & A, vector_t & S, matrix_t & V)
{
  int m=n;
  vector_t U(n*n);
  vector_t tV(n*n);

  cout << "Using LAPACK SVD library function...\n";
    
#ifdef WITH_LAPACK
    
  int info=0;
    
  vector<int> iwork(8*m,0);    
  double optim_lwork;
  int lwork;
  lwork = -1;
  
  // Determine workspace needed
  dgesdd_("A", &m, &n, &A[0] , &m, &S[0], &U[0], &m, &tV[0], &n, &optim_lwork, &lwork, &iwork[0], &info);
  lwork = (int) optim_lwork;
  vector_t work( lwork, 0 );
  
  // Perform actual SVD
  dgesdd_("A", &m, &n, &A[0] , &m, &S[0], &U[0], &m, &tV[0], &n, &work[0], &lwork, &iwork[0], &info);
  
  // Copy and transpose V
  int k = 0;
  for( int i = 0; i < n; i++ )
    for( int j = 0; j < n; j++ )
      {
	V[j][i] = tV[k];
	++k;
      }
  
  return true;
  
#else
  
  // LAPACK support not compiled 
  return false;
  
#endif
  
}
Пример #9
0
void HPreData::makeARCopy() {
	// Make a AR copy
    int i,j,k;
    vector<int> iwork(numRow, 0);
    ARstart.resize(numRow + 1, 0);
    int AcountX = Aindex.size();
    ARindex.resize(AcountX);
    ARvalue.resize(AcountX);
    for (int k = 0; k < AcountX; k++)
        iwork[Aindex[k]]++;
    for (i = 1; i <= numRow; i++)
        ARstart[i] = ARstart[i - 1] + iwork[i - 1];
    for (i = 0; i < numRow; i++)
        iwork[i] = ARstart[i];
    for (int iCol = 0; iCol < numCol; iCol++) {
        for (k = Astart[iCol]; k < Astart[iCol + 1]; k++) {
            int iRow = Aindex[k];
            int iPut = iwork[iRow]++;
            ARindex[iPut] = iCol;
            ARvalue[iPut] = Avalue[k];
        }
    }
}
Пример #10
0
void bob::math::eigSym_(const blitz::Array<double,2>& A, const blitz::Array<double,2>& B,
  blitz::Array<double,2>& V, blitz::Array<double,1>& D)
{
  // Size variable
  const int N = A.extent(0);

  // Prepares to call LAPACK function
  // Initialises LAPACK variables
  const int itype = 1;
  const char jobz = 'V'; // Get both the eigenvalues and the eigenvectors
  const char uplo = 'U';
  int info = 0;
  const int lda = N;
  const int ldb = N;

  // Initialises LAPACK arrays
  blitz::Array<double,2> A_blitz_lapack;
  // Tries to use V directly
  blitz::Array<double,2> Vt = V.transpose(1,0);
  const bool V_direct_use = bob::core::array::isCZeroBaseContiguous(Vt);
  if (V_direct_use)
  {
    A_blitz_lapack.reference(Vt);
    // Ugly fix for non-const transpose
    A_blitz_lapack = const_cast<blitz::Array<double,2>&>(A).transpose(1,0);
  }
  else
    // Ugly fix for non-const transpose
    A_blitz_lapack.reference(
      bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(A).transpose(1,0)));
  double *A_lapack = A_blitz_lapack.data();
  // Ugly fix for non-const transpose
  blitz::Array<double,2> B_blitz_lapack(
    bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(B).transpose(1,0)));
  double *B_lapack = B_blitz_lapack.data();
  blitz::Array<double,1> D_blitz_lapack;
  const bool D_direct_use = bob::core::array::isCZeroBaseContiguous(D);
  if (D_direct_use)
    D_blitz_lapack.reference(D);
  else
    D_blitz_lapack.resize(D.shape());
  double *D_lapack = D_blitz_lapack.data();

  // Calls the LAPACK function
  // A/ Queries the optimal size of the working arrays
  const int lwork_query = -1;
  double work_query;
  const int liwork_query = -1;
  int iwork_query;
  dsygvd_( &itype, &jobz, &uplo, &N, A_lapack, &lda, B_lapack, &ldb, D_lapack,
    &work_query, &lwork_query, &iwork_query, &liwork_query, &info);
  // B/ Computes the generalized eigenvalue decomposition
  const int lwork = static_cast<int>(work_query);
  boost::shared_array<double> work(new double[lwork]);
  const int liwork = static_cast<int>(iwork_query);
  boost::shared_array<int> iwork(new int[liwork]);
  dsygvd_( &itype, &jobz, &uplo, &N, A_lapack, &lda, B_lapack, &ldb, D_lapack,
    work.get(), &lwork, iwork.get(), &liwork, &info);

  // Checks info variable
  if (info != 0)
    throw std::runtime_error("The LAPACK function 'dsygvd' returned a non-zero value. This might be caused by a non-positive definite B matrix.");

  // Copy singular vectors back to V if required
  if (!V_direct_use)
    V = A_blitz_lapack.transpose(1,0);

  // Copy result back to sigma if required
  if (!D_direct_use)
    D = D_blitz_lapack;
}
Пример #11
0
int XC::SymBandEigenSolver::solve(int nModes)
  {
    if(!theSOE)
      {
        std::cerr << "SymBandEigenSolver::solve() -- no XC::EigenSOE has been set yet\n";
        return -1;
      }
  
    // Set number of modes
    numModes= nModes;

    // Number of equations
    int n= theSOE->size;

    // Check for quick return
    if(numModes < 1)
      {
        numModes= 0;
        return 0;
      }

    // Simple check
    if(numModes > n)
      numModes= n;

    // Allocate storage for eigenvalues
    eigenvalue.resize(n);

    // Real work array (see LAPACK dsbevx subroutine documentation)
    work.resize(7*n);

    // Integer work array (see LAPACK dsbevx subroutine documentation)
    std::vector<int> iwork(5*n);

    // Leading dimension of eigenvectors
    int ldz = n;

    // Allocate storage for eigenvectors
    eigenvector.resize(ldz*numModes);

    // Number of superdiagonals
    int kd= theSOE->numSuperD;

    // Matrix data
    double *ab= theSOE->A.getDataPtr();

    // Leading dimension of the matrix
    int ldab= kd + 1;

    // Leading dimension of q
    int ldq= n;

    // Orthogonal matrix used in reduction to tridiagonal form
    // (see LAPACK dsbevx subroutine documentation)
    std::vector<double> q(ldq*n);

    // Index ranges [1,numModes] of eigenpairs to compute
    int il = 1;
    int iu = numModes;

    // Compute eigenvalues and eigenvectors
    char jobz[] = "V";

    // Selected eigenpairs are based on index range [il,iu]
    char range[] = "I";

    // Upper triagle of matrix is stored
    char uplo[] = "U";
  
    // Return value
    std::vector<int> ifail(n);
    int info= 0;

    // Number of eigenvalues returned
    int m= 0;

    // Not used
    double vl = 0.0;
    double vu = 1.0;

    // Not used ... I think!
    double abstol = -1.0;


    // if Mass matrix we make modifications to A:
    //         A -> M^(-1/2) A M^(-1/2)
    double *M= theSOE->M.getDataPtr();
    double *A= theSOE->A.getDataPtr();
    int numSuperD = theSOE->numSuperD;
    int size = n;
    if(M) //Its seems that the M matrix must be DIAGONAL.
      {
        int i,j;
        bool singular = false;
        // form M^(-1/2) and check for singular mass matrix
        for(int k=0; k<size; k++)
          {
            if(M[k] == 0.0)
              {
	        singular = true;
	        // alternative is to set as a small no ~ 1e-10 times smallest m(i,i) != 0.0
	        std::cerr << "SymBandEigenSolver::solve() - M matrix singular\n";
	        return -1;
              }
            else
              {
	        M[k] = 1.0/sqrt(M[k]);
              }
          }
        // make modifications to A
        // Aij -> Mi Aij Mj  (based on new_ M)
        for(i=0; i<size; i++)
          {
            double *AijPtr = A +(i+1)*(numSuperD+1) - 1;
            int minColRow = i - numSuperD;
            if(minColRow < 0) minColRow = 0;
            for(j=i; j>=minColRow; j--)
              {
	        *AijPtr *= M[j]*M[i];
	        AijPtr--;
              }
          }
      }

    // Calls the LAPACK routine that computes the eigenvalues and eigenvectors
    // of the matrix A previously transforme.
    dsbevx_(jobz, range, uplo, &n, &kd, ab, &ldab,
	    &q[0], &ldq, &vl, &vu, &il, &iu, &abstol, &m,
	    eigenvalue.getDataPtr(), eigenvector.getDataPtr(), &ldz, work.getDataPtr(), &iwork[0], &ifail[0], &info);

    if(info < 0)
      {
        std::cerr << "SymBandEigenSolver::solve() -- invalid argument number " << -info << " passed to LAPACK dsbevx\n";
        return info;
      }

    if(info > 0)
      {
        std::cerr << "SymBandEigenSolver::solve() -- LAPACK dsbevx returned error code " << info << std::endl;
        return -info;
      }

    if(m < numModes)
      {
        std::cerr << "SymBandEigenSolver::solve() -- LAPACK dsbevx only computed " << m << " eigenvalues, " <<
        numModes << "were requested\n";
        numModes = m;
      }

    theSOE->factored = true;

    // make modifications to the eigenvectors
    //   Eij -> Mi Eij  (based on new_ M)

    M= theSOE->M.getDataPtr();
    if(M)
      {
        for(int j=0; j<numModes; j++)
          {
            double *eigVectJptr = &eigenvector[j*ldz];
            double *MPtr = M;
            for(int i=0; i<size; i++) 
	      *eigVectJptr++ *= *MPtr++;
          }
      }
    return 0;
  }
Пример #12
0
///  计算结果存储在矩阵a中
///  n_global: the order of the matrix
static void inv_driver(blas_idx_t n_global)		
{

    auto grid = std::make_shared<blacs_grid_t>();
	
	//// self code
	//n_global = 3;
	//double *aaa = new double(n_global*n_global);
	//for (int i = 0; i < 9; i++)
	//{
	//	aaa[i] = i + 1;
	//}
	//aaa[8] = 10;
	//auto a = block_cyclic_mat_t::createWithArray(grid, n_global, n_global, aaa);


    // Create a NxN random matrix A
    auto a = block_cyclic_mat_t::random(grid, n_global, n_global);        

    // Create a NxN matrix to hold A^{-1}
    auto ai = block_cyclic_mat_t::constant(grid, n_global, n_global);

    // Copy A to A^{-1} since it will be overwritten during factorization
    std::copy_n(a->local_data(), a->local_size(), ai->local_data());

    MPI_Barrier (MPI_COMM_WORLD);

    double t0 = MPI_Wtime();
    
    // Factorize A 
    blas_idx_t ia = 1, ja = 1;
    std::vector<blas_idx_t> ipiv(a->local_rows() + a->row_block_size() + 100);
    blas_idx_t info;

	//含义应该是D-GE-TRF。
	//第一个D表示我们的矩阵是double类型的
	//GE表示我们的矩阵是General类型的
	//TRF表示对矩阵进行三角分解也就是我们通常所说的LU分解。
    pdgetrf_(n_global, n_global, 
        ai->local_data(), ia, ja, ai->descriptor(), 
        ipiv.data(), 
        info);
    assert(info == 0);
    double t_factor = MPI_Wtime() - t0;

    // Compute A^{-1} based on the LU factorization

    // Compute workspace for double and integer work arrays on each process
    blas_idx_t lwork  = 10;
    blas_idx_t liwork = 10;
    std::vector<double>     work (lwork); 
    std::vector<blas_idx_t> iwork(liwork);

    lwork = liwork = -1;   

	// 计算lwork与liwork的值
    pdgetri_(n_global, 
        ai->local_data(), ia, ja, ai->descriptor(), 
        ipiv.data(), 
        work.data(), lwork, iwork.data(), liwork, info);
    assert(info == 0);
    lwork  = static_cast<blas_idx_t>(work[0]);
    liwork = static_cast<size_t>(iwork[0]);
    work.resize(lwork);
    iwork.resize(liwork);

    // Now compute the inverse
    t0 = MPI_Wtime();
    pdgetri_(n_global, 
        ai->local_data(), ia, ja, ai->descriptor(), 
        ipiv.data(), 
        work.data(), lwork, iwork.data(), liwork, info);
    assert(info == 0);
    double t_solve = MPI_Wtime() - t0;

    // Verify that the inverse is correct using A*A^{-1} = I
    auto identity = block_cyclic_mat_t::diagonal(grid, n_global, n_global);

    // Compute I = A * A^{-1} - I and verify that the ||I|| is small    
    char nein = 'N';
    double alpha = 1.0, beta = -1.0;
    pdgemm_(nein, nein, n_global, n_global, n_global, alpha, 
        a->local_data() , ia, ja, a->descriptor(),
        ai->local_data(), ia, ja, ai->descriptor(),
        beta,
        identity->local_data(), ia, ja, identity->descriptor());

    // Compute 1-norm of the result
    char norm='1';
    work.resize(identity->local_cols());
    double err = pdlange_(norm, n_global, n_global, 
        identity->local_data(), ia, ja, identity->descriptor(), work.data());

    double t_total = t_factor + t_solve;
    double t_glob;
    MPI_Reduce(&t_total, &t_glob, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

    if (grid->iam() == 0) 
    {
        double gflops = getri_flops(n_global)/t_glob/grid->nprocs();
        printf("\n"
            "MATRIX INVERSE BENCHMARK SUMMARY\n"
            "================================\n"
            "N = %d\tNP = %d\tNP_ROW = %d\tNP_COL = %d\n"
            "Time for PxGETRF + PxGETRI = %10.7f seconds\tGflops/Proc = %10.7f, Error = %f\n",
            n_global, grid->nprocs(), grid->nprows(), grid->npcols(), 
            t_glob, gflops, err);fflush(stdout);
    }
}
Пример #13
0
bool eigen_lapack(int n, vector_t & A, vector_t & S, matrix_t & V)
{
    
  // Use eigenvalue decomposition instead of SVD
  // Get only the highest eigen-values, (par::cluster_mds_dim)
  
  int i1 = n - par::cluster_mds_dim + 1;
  int i2 = n;
  double z = -1;
  
  // Integer workspace size, 5N
  vector<int> iwork(5*n,0);    
  
  double optim_lwork;
  int lwork = -1;
  
  int out_m;
  vector_t out_w( par::cluster_mds_dim , 0 );
  vector_t out_z( n * par::cluster_mds_dim ,0 );
  
  int ldz = n;
  vector<int> ifail(n,0);
  int info=0;
  double nz = 0;
  
  // Get workspace
  
  dsyevx_("V" ,         // get eigenvalues and eigenvectors
	  "I" ,         // get interval of selected eigenvalues
	  "L" ,         // data stored as upper triangular
	  &n  ,         // order of matrix
	  &A[0] ,       // input matrix
	  &n ,          // LDA
	  &nz ,         // Vlower
	  &nz ,         // Vupper
	  &i1,          // from 1st ...
	  &i2,          // ... to nth eigenvalue
	  &z ,          // 0 for ABSTOL
	  &out_m,       // # of eigenvalues found
	  &out_w[0],    // first M entries contain sorted eigen-values
	  &out_z[0],    // array (can be mxm? nxn)
	  &ldz,         // make n at first
	  &optim_lwork, // Get optimal workspace 
	  &lwork,       // size of workspace
	  &iwork[0],    // int workspace
	  &ifail[0],    // output: failed to converge
	  &info );
  
  // Assign workspace
  
  lwork = (int) optim_lwork;
  vector_t work( lwork, 0 );

  dsyevx_("V" ,      // get eigenvalues and eigenvectors
	  "I" ,      // get interval of selected eigenvalues
	  "L" ,      // data stored as upper triangular
	  &n  ,      // order of matrix
	  &A[0] ,    // input matrix
	  &n ,       // LDA
	  &nz ,      // Vlower
	  &nz ,      // Vupper
	  &i1,       // from 1st ...
	  &i2,       // ... to nth eigenvalue
	  &z ,       // 0 for ABSTOL
	  &out_m,    // # of eigenvalues found
	  &out_w[0], // first M entries contain sorted eigen-values
	  &out_z[0], // array (can be mxm? nxn)
	  &ldz,      // make n at first
	  &work[0],  // Workspace
	  &lwork,    // size of workspace
	  &iwork[0], // int workspace
	  &ifail[0], // output: failed to converge
	  &info );      

  // Get eigenvalues, vectors
  for (int i=0; i< par::cluster_mds_dim; i++)
    S[i] = out_w[i];
  
  for (int i=0; i<n; i++)
    for (int j=0;j<par::cluster_mds_dim; j++)
      V[i][j] = out_z[ i + j*n ];
  
  return true;
  
}
Пример #14
0
bool LinearFitableFunction::fitTheFunction() {

    if (basisSet == NULL) return false;
    
    Locker lock(*this);
    
    int numParams = basisSet->getNElements();
    int numData = allSamplesToFit->getNElements();
    if (numData < numParams) {
        mwarning(M_SYSTEM_MESSAGE_DOMAIN,
			"WARNING: Attempted fit without enough data.");
        return false;
    }

    if (VERBOSE_FITABLE_FUNCTION) {
        mprintf("Fitable function: n data=%d n params=%d", numData, numParams);
    }

    std::vector<float> B(numParams);
    std::vector<float> Y(numData);
    std::vector<float> X(numData * numParams);
    
    // create space to hold all Parameters
    if (Parameters != NULL) delete [] Parameters;
    Parameters = new float [numParams];

    // get data back in correct format
    std::vector<double> temp(numInputs);
    
    
    // unfold the data into the prescribed basis.
    // the format here is Y = Xb 
    
    // we start with the most recent data, but order does not matter
    shared_ptr<FitableSample> sampleToFit = allSamplesToFit->getFrontmost(); 
    
    for (int n=0; n<numData;n++) {
        
        Datum *inputVector = sampleToFit->getInputVector();
        
        if (VERBOSE_FITABLE_FUNCTION>1) {
            MWTime timeUS = sampleToFit->getTime();        // testing only
            mprintf("Fitable function: datum %d timeUS = %ld",n, (long)timeUS);
        }

        for (int i=0;i<numInputs;i++) {
            temp[i] = inputVector->getElement(i);
        }
        for (int p=0; p<numParams;p++) {
            // Need to use Fortran-style (i.e. column-major) ordering
            X[n + p*numData] = (basisSet->getElement(p))->applyBasis(temp.data());
        }
        
        Y[n] = (float)(sampleToFit->getOutputData());
        
        sampleToFit = sampleToFit->getNext();  // go to next sample on the list
    }


    // linear regression to find Parameters (SVD pseudo-inverse)
    // B = inv(XtX) XtY
    // using SGELSD from LAPACK
    {
        __CLPK_integer m = numData;
        __CLPK_integer n = numParams;
        __CLPK_integer nrhs = 1;
        __CLPK_real *a = X.data();
        __CLPK_integer lda = m;
        __CLPK_real *b = Y.data();
        __CLPK_integer ldb = m;
        std::vector<__CLPK_real> s(n);
        // The choice of N*epsilon for rcond comes from _Numerical Recipes in C_, Section 15.4,
        // "Solution by Use of Singular Value Decomposition"
        __CLPK_real rcond = n * std::numeric_limits<__CLPK_real>::epsilon();
        __CLPK_integer rank;
        std::vector<__CLPK_real> work(1);
        __CLPK_integer lwork = -1;
        std::vector<__CLPK_integer> iwork(1);
        __CLPK_integer info;
        
        // We call SGELSD two times: once to determine appropriate sizes for work and iwork, and once to
        // perform the actual fit
        for (int numReps = 0; numReps < 2; numReps++) {
            sgelsd_(&m,
                    &n,
                    &nrhs,
                    a,
                    &lda,
                    b,
                    &ldb,
                    s.data(),
                    &rcond,
                    &rank,
                    work.data(),
                    &lwork,
                    iwork.data(),
                    &info);
            
            if (info != 0) {
                merror(M_GENERIC_MESSAGE_DOMAIN, "Fitable function: SGELSD returned %d", int(info));
                return false;
            }
            
            if (-1 == lwork) {
                // Set sizes of work and iwork for next pass
                work.resize(work.at(0));
                lwork = work.size();
                iwork.resize(iwork.at(0));
            } else {
                // Store solution
                for (int p = 0; p < numParams; p++) {
                    B[p] = b[p];
                }
            }
        }
    }
    
    // save for later application
    for (int p=0; p<numParams;p++) {
        Parameters[p] = B[p];
        if (VERBOSE_FITABLE_FUNCTION) {
            mprintf("Fitable function: final fit param %d = %f5", p, Parameters[p]);
        }
    }
    
    return true;
    
}