Пример #1
0
/**
 * Description not yet available.
 * \param
 */
dmatrix sin(const dmatrix& m)
{
  ivector cmin(m.rowmin(),m.rowmax());
  ivector cmax(m.rowmin(),m.rowmax());
  int i;
  for (i=m.rowmin();i<=m.rowmax();i++)
  {
    cmin(i)=m(i).indexmin();
    cmax(i)=m(i).indexmax();
  }
  dmatrix tmp(m.rowmin(),m.rowmax(),cmin,cmax);
  for (i=m.rowmin();i<=m.rowmax();i++)
  {
    tmp(i)=sin(m(i));
  }
  return tmp;
}
Пример #2
0
/**
 * Description not yet available.
 * \param
 */
dmatrix elem_prod(const dmatrix& m, const dmatrix& m2)
{
  ivector cmin(m.rowmin(),m.rowmax());
  ivector cmax(m.rowmin(),m.rowmax());
  int i;
  for (i=m.rowmin();i<=m.rowmax();i++)
  {
    cmin(i)=m(i).indexmin();
    cmax(i)=m(i).indexmax();
  }
  dmatrix tmp(m.rowmin(),m.rowmax(),cmin,cmax);
  for (i=m.rowmin();i<=m.rowmax();i++)
  {
    tmp(i)=elem_prod(m(i),m2(i));
  }
  return tmp;
}
Пример #3
0
/**
Return the sum of the diagonal of a square matrix mat.

\param mat is a square scalar matrix
*/
double trace(const dmatrix& mat)
{
  if (mat.colmin() != mat.rowmin() || mat.colmax() != mat.rowmax() )
  {
    cerr << "Matrix not square in trace\n";
    ad_exit(1);
  }

  double sum = 0.0;
  for (int i = mat.colmin(); i <= mat.colmax(); ++i)
  {
    sum += mat.elem(i, i);
  }
  return sum;
}
 double clusteringValidity::getAverageToCentroidDiameter(const dmatrix& m1) const {
   dvector a(m1.columns());
   int i,j;
   l2Distance<double> dist;
   double distance=0.0;
   for (i=0; i<m1.rows(); i++) {
     a.add(m1.getRow(i));
   }
   a.divide(m1.rows());
   for (j=0; j< m1.rows(); j++) {
     distance+=dist.apply(a,m1.getRow(j));
   }
   if (m1.rows()>0) {
     return (2*distance/(double)m1.rows());
   } else {
     return 2*distance;
   }
   
 }
 double clusteringValidity::getMaximumDistance(const dmatrix& m1,
                                               const dmatrix& m2) const {
   int i,j;
   dmatrix distances(m1.rows(),m2.rows());
   l2Distance<double> dist;
   for (i=0; i<m1.rows(); i++) {
     for (j=0; j<m2.rows(); j++) {
       distances[i][j]=dist.apply(m1.getRow(i),m2.getRow(j));
     }
   }
   return distances.maximum();
 }
 double clusteringValidity::getAverageDistance(const dmatrix& m1,
                                               const dmatrix& m2) const {
   double distance=0.0;
   int i,j;
   l2Distance<double> dist;
   for (i=0; i<m1.rows(); i++) {
     for (j=0; j<m2.rows(); j++) {
       distance+=dist.apply(m1.getRow(i),m2.getRow(j));
     }
   }
   distance=distance/((double)m1.rows()*(double)m2.rows());
   return distance;
 }
 double clusteringValidity::getAverageDiameter(const dmatrix& m1) const {
   double distance=0.0;
   int j,k;
   l2Distance<double> dist;
   for (j=0; j<m1.rows(); j++) {
     for (k=0; k<m1.rows(); k++) {
       distance+=dist.apply(m1.getRow(j),
                            m1.getRow(k));
     }
   }
   if (m1.rows()>1) {
     return (distance/((double)m1.rows()*
                       (double)(m1.rows()-1)));
   } else {
     return distance;
   }
 }
Пример #8
0
/**
 * Description not yet available.
 * \param
 */
dvariable ghk(const dvar_vector& lower,const dvar_vector& upper,
  const dvar_matrix& Sigma, const dmatrix& eps)
{
  RETURN_ARRAYS_INCREMENT();
  int n=lower.indexmax();
  int m=eps.indexmax();
  dvariable ssum=0.0;
  dvar_matrix ch=choleski_decomp(Sigma);
  dvar_vector l(1,n);
  dvar_vector u(1,n);

  for (int k=1;k<=m;k++)
  {
    dvariable weight=1.0;
    l=lower;
    u=upper;
    for (int j=1;j<=n;j++)
    {
      l(j)/=ch(j,j);
      u(j)/=ch(j,j);
      dvariable Phiu=cumd_norm(u(j));
      dvariable Phil=cumd_norm(l(j));
      weight*=Phiu-Phil;
      dvariable eta=inv_cumd_norm((Phiu-Phil)*eps(k,j)+Phil+1.e-30);
      for (int i=j+1;i<=n;i++)
      {
        dvariable tmp=ch(i,j)*eta;
        l(i)-=tmp;
        u(i)-=tmp;
      }
    }
    ssum+=weight;
  }
  RETURN_ARRAYS_DECREMENT();
  return ssum/m;
}
Пример #9
0
/**
 * Description not yet available.
 * \param
 */
dmatrix symmetrize(const dmatrix& m)
{
    if (m.rowmin() != m.colmin() || m.rowmax() != m.colmax() )
    {
        cerr << " Non square matrix passed to dmatrix symmetrize\n";
        ad_exit(1);
    }
    int rmin=m.rowmin();
    int rmax=m.rowmax();

    dmatrix s(rmin,rmax,rmin,rmax);
    for (int i=rmin; i<=rmax; i++)
    {
        s(i,i)=m(i,i);

        for (int j=rmin; j<i; j++)
        {
            s(i,j)=(m(i,j)+m(j,i))/2.;
            s(j,i)=s(i,j);
        }
    }
    return s;
}
Пример #10
0
dvariable mult_likelihood(const dmatrix &o, const dvar_matrix &p, dvar_matrix &nu, 
                          const dvariable &log_vn)
{

	// kludge to ensure observed and predicted matrixes are the same size
	if(o.colsize()!=p.colsize() || o.rowsize()!=p.rowsize())
	{
		cerr<<"Error in multivariate_t_likelihood, observed and predicted matrixes"
		" are not the same size\n";
		ad_exit(1);
	}
	dvariable vn = mfexp(log_vn);
	dvariable ff = 0.0;
	int r1 = o.rowmin();
	int r2 = o.rowmax();
	int c1 = o.colmin();
	int c2 = o.colmax();

	for(int i = r1; i <= r2; i++ )
	{
		dvar_vector sobs = vn * o(i)/sum(o(i));  //scale observed numbers by effective sample size.
		ff -= gammln(vn);
		for(int j = c1; j <= c2; j++ )
		{
			if( value(sobs(j)) > 0.0 )
				ff += gammln(sobs(j));
		}
		ff -= sobs * log(TINY + p(i));
		dvar_vector o1=o(i)/sum(o(i));
		dvar_vector p1=p(i)/sum(p(i));
		nu(i) = elem_div(o1-p1,sqrt(elem_prod(p1,1.-p1)/vn));


	}
	// exit(1);
	return ff;
}
		/**
		   calculate Pseudo-Inverse using SVD(Singular Value Decomposition)
		   by lapack library DGESVD (_a can be non-square matrix)
		*/
		int calcPseudoInverse(const dmatrix &_a, dmatrix &_a_pseu, double _sv_ratio)
		{
				int i, j, k;
				char jobu  = 'A';
				char jobvt = 'A';
				int m = (int)_a.rows();
				int n = (int)_a.cols();
				int max_mn = max(m,n);
				int min_mn = min(m,n);

				dmatrix a(m,n);
				a = _a;

				int lda = m;
				double *s = new double[max_mn];
				int ldu = m;
				double *u = new double[ldu*m];
				int ldvt = n;
				double *vt = new double[ldvt*n];
				int lwork = max(3*min_mn+max_mn, 5*min_mn);     // for CLAPACK ver.2 & ver.3
				double *work = new double[lwork];
				int info;

				for(i = 0; i < max_mn; i++) s[i] = 0.0;
		   
				dgesvd_(&jobu, &jobvt, &m, &n, &(a(0,0)), &lda, s, u, &ldu, vt, &ldvt, work,
						&lwork, &info);


				double smin, smax=0.0;
				for (j = 0; j < min_mn; j++) if (s[j] > smax) smax = s[j];
				smin = smax*_sv_ratio; 			// default _sv_ratio is 1.0e-3
				for (j = 0; j < min_mn; j++) if (s[j] < smin) s[j] = 0.0;

				//------------ calculate pseudo inverse   pinv(A) = V*S^(-1)*U^(T)
				// S^(-1)*U^(T)
				for (j = 0; j < m; j++){
						if (s[j]){
								for (i = 0; i < m; i++) u[j*m+i] /= s[j];
						}
						else {
								for (i = 0; i < m; i++) u[j*m+i] = 0.0;
						}
				}

				// V * (S^(-1)*U^(T)) 
				_a_pseu.resize(n,m);
				for(j = 0; j < n; j++){
						for(i = 0; i < m; i++){
								_a_pseu(j,i) = 0.0;
								for(k = 0; k < min_mn; k++){
										if(s[k]) _a_pseu(j,i) += vt[j*n+k] * u[k*m+i];
								}
						}
				}

				delete [] work;
				delete [] vt;
				delete [] s;
				delete [] u;

				return info;
		}
Пример #12
0
inline void orthogonalize(int N__,
                          int n__,
                          std::vector<wave_functions*> wfs__,
                          int idx_bra__,
                          int idx_ket__,
                          dmatrix<T>& o__,
                          wave_functions& tmp__)
{
    PROFILE("sddk::wave_functions::orthogonalize");

    auto pu = wfs__[0]->pu();
        
    /* project out the old subspace:
     * |\tilda phi_new> = |phi_new> - |phi_old><phi_old|phi_new> */
    if (N__ > 0) {
        inner(*wfs__[idx_bra__], 0, N__, *wfs__[idx_ket__], N__, n__, 0.0, o__, 0, 0);
        transform(pu, -1.0, wfs__, 0, N__, o__, 0, 0, 1.0, wfs__, N__, n__);
    }

    /* orthogonalize new n__ x n__ block */
    inner(*wfs__[idx_bra__], N__, n__, *wfs__[idx_ket__], N__, n__, 0.0, o__, 0, 0);

    /* single MPI rank */
    if (o__.blacs_grid().comm().size() == 1) {
        bool use_magma{false};
        #if defined(__GPU) && defined(__MAGMA)
        if (pu == GPU) {
            use_magma = true;
        }
        #endif

        if (use_magma) {
            #ifdef __GPU
            /* Cholesky factorization */
            if (int info = linalg<GPU>::potrf(n__, o__.template at<GPU>(), o__.ld())) {
                std::stringstream s;
                s << "error in GPU factorization, info = " << info;
                TERMINATE(s);
            }
            /* inversion of triangular matrix */
            if (linalg<GPU>::trtri(n__, o__.template at<GPU>(), o__.ld())) {
                TERMINATE("error in inversion");
            }
            #endif
        } else { /* CPU version */
            //check_hermitian("OVLP", o__, n__);
            //o__.serialize("overlap.dat", n__);
            /* Cholesky factorization */
            if (int info = linalg<CPU>::potrf(n__, &o__(0, 0), o__.ld())) {
                std::stringstream s;
                s << "error in factorization, info = " << info << std::endl
                  << "number of existing states: " << N__ << std::endl
                  << "number of new states: " << n__ << std::endl
                  << "number of wave_functions: " << wfs__.size() << std::endl
                  << "idx_bra: " << idx_bra__ << " " << "idx_ket:" << idx_ket__;
                TERMINATE(s);
            }
            /* inversion of triangular matrix */
            if (linalg<CPU>::trtri(n__, &o__(0, 0), o__.ld())) {
                TERMINATE("error in inversion");
            }
            if (pu == GPU) {
                #ifdef __GPU
                acc::copyin(o__.template at<GPU>(), o__.ld(), o__.template at<CPU>(), o__.ld(), n__, n__);
                #endif
            }
        }

        /* CPU version */
        if (pu == CPU) {
            /* multiplication by triangular matrix */
            for (auto& e: wfs__) {
                /* wave functions are complex, transformation matrix is complex */
                if (std::is_same<T, double_complex>::value) {
                    linalg<CPU>::trmm('R', 'U', 'N', e->pw_coeffs().num_rows_loc(), n__, double_complex(1, 0),
                                      reinterpret_cast<double_complex*>(o__.template at<CPU>()), o__.ld(),
                                      e->pw_coeffs().prime().at<CPU>(0, N__), e->pw_coeffs().prime().ld());

                    if (e->has_mt() && e->mt_coeffs().num_rows_loc()) {
                        linalg<CPU>::trmm('R', 'U', 'N', e->mt_coeffs().num_rows_loc(), n__, double_complex(1, 0),
                                          reinterpret_cast<double_complex*>(o__.template at<CPU>()), o__.ld(),
                                          e->mt_coeffs().prime().at<CPU>(0, N__), e->mt_coeffs().prime().ld());
                    }
                }
                /* wave functions are real (psi(G) = psi^{*}(-G)), transformation matrix is real */
                if (std::is_same<T, double>::value) {
                    linalg<CPU>::trmm('R', 'U', 'N', 2 * e->pw_coeffs().num_rows_loc(), n__, 1.0,
                                      reinterpret_cast<double*>(o__.template at<CPU>()), o__.ld(),
                                      reinterpret_cast<double*>(e->pw_coeffs().prime().at<CPU>(0, N__)), 2 * e->pw_coeffs().prime().ld());

                    if (e->has_mt() && e->mt_coeffs().num_rows_loc()) {
                        linalg<CPU>::trmm('R', 'U', 'N', 2 * e->mt_coeffs().num_rows_loc(), n__, 1.0,
                                          reinterpret_cast<double*>(o__.template at<CPU>()), o__.ld(),
                                          reinterpret_cast<double*>(e->mt_coeffs().prime().at<CPU>(0, N__)), 2 * e->mt_coeffs().prime().ld());
                    }
                }
            }
        }
        #ifdef __GPU
        if (pu == GPU) {
            /* multiplication by triangular matrix */
            for (auto& e: wfs__) {
                if (std::is_same<T, double_complex>::value) {
                    double_complex alpha(1, 0);

                    linalg<GPU>::trmm('R', 'U', 'N', e->pw_coeffs().num_rows_loc(), n__, &alpha,
                                      reinterpret_cast<double_complex*>(o__.template at<GPU>()), o__.ld(),
                                      e->pw_coeffs().prime().at<GPU>(0, N__), e->pw_coeffs().prime().ld());

                    if (e->has_mt() && e->mt_coeffs().num_rows_loc()) {
                        linalg<GPU>::trmm('R', 'U', 'N', e->mt_coeffs().num_rows_loc(), n__, &alpha,
                                          reinterpret_cast<double_complex*>(o__.template at<GPU>()), o__.ld(),
                                          e->mt_coeffs().prime().at<GPU>(0, N__), e->mt_coeffs().prime().ld());
                    }
                    /* alpha should not go out of the scope, so wait */
                    acc::sync_stream(-1);
                }
                if (std::is_same<T, double>::value) {
                    double alpha{1};

                    linalg<GPU>::trmm('R', 'U', 'N', 2 * e->pw_coeffs().num_rows_loc(), n__, &alpha,
                                      reinterpret_cast<double*>(o__.template at<GPU>()), o__.ld(),
                                      reinterpret_cast<double*>(e->pw_coeffs().prime().at<GPU>(0, N__)), 2 * e->pw_coeffs().prime().ld());

                    if (e->has_mt() && e->mt_coeffs().num_rows_loc()) {
                        linalg<GPU>::trmm('R', 'U', 'N', 2 * e->mt_coeffs().num_rows_loc(), n__, &alpha,
                                          reinterpret_cast<double*>(o__.template at<GPU>()), o__.ld(),
                                          reinterpret_cast<double*>(e->mt_coeffs().prime().at<GPU>(0, N__)), 2 * e->mt_coeffs().prime().ld());
                    }
                    acc::sync_stream(-1);
                }
            }
            acc::sync_stream(-1);
        }
        #endif
    } else { /* parallel transformation */
        sddk::timer t1("sddk::wave_functions::orthogonalize|potrf");
        if (int info = linalg<CPU>::potrf(n__, o__)) {
            std::stringstream s;
            s << "error in factorization, info = " << info;
            TERMINATE(s);
        }
        t1.stop();

        sddk::timer t2("sddk::wave_functions::orthogonalize|trtri");
        if (linalg<CPU>::trtri(n__, o__)) {
            TERMINATE("error in inversion");
        }
        t2.stop();

        /* o is upper triangular matrix */
        for (int i = 0; i < n__; i++) {
            for (int j = i + 1; j < n__; j++) {
                o__.set(j, i, 0);
            }
        }

        /* phi is transformed into phi, so we can't use it as the output buffer; use tmp instead and then overwrite phi */
        for (auto& e: wfs__) {
            transform(pu, *e, N__, n__, o__, 0, 0, tmp__, 0, n__);
            e->copy_from(tmp__, 0, n__, N__, pu);
        }
    }
}
		/**
		   solve linear equation using SVD(Singular Value Decomposition)
		   by lapack library DGESVD (_a can be non-square matrix)
		*/
		int solveLinearEquationSVD(const dmatrix &_a, const dvector &_b, dvector &_x, double _sv_ratio)
		{
				const int m = _a.rows();
				const int n = _a.cols();
				assert( m == static_cast<int>(_b.size()) );
				_x.resize(n);

				int i, j;
				char jobu  = 'A';
				char jobvt = 'A';
        
				int max_mn = max(m,n);
				int min_mn = min(m,n);

				dmatrix a(m,n);
				a = _a;

				int lda = m;
				double *s = new double[max_mn];		// singular values
				int ldu = m;
				double *u = new double[ldu*m];
				int ldvt = n;
				double *vt = new double[ldvt*n];

				int lwork = max(3*min_mn+max_mn, 5*min_mn);     // for CLAPACK ver.2 & ver.3
				double *work = new double[lwork];
				int info;

				for(i = 0; i < max_mn; i++) s[i] = 0.0;

				dgesvd_(&jobu, &jobvt, &m, &n, &(a(0,0)), &lda, s, u, &ldu, vt, &ldvt, work,
						&lwork, &info);

				double tmp;

				double smin, smax=0.0;
				for (j = 0; j < min_mn; j++) if (s[j] > smax) smax = s[j];
				smin = smax*_sv_ratio; // 1.0e-3;
				for (j = 0; j < min_mn; j++) if (s[j] < smin) s[j] = 0.0;
	
				double *utb = new double[m];		// U^T*b

				for (j = 0; j < m; j++){
						tmp = 0;
						if (s[j]){
								for (i = 0; i < m; i++) tmp += u[j*m+i] * _b(i);
								tmp /= s[j];
						}
						utb[j] = tmp;
				}

				// v*utb
				for (j = 0; j < n; j++){
						tmp = 0;
						for (i = 0; i < n; i++){
								if(s[i]) tmp += utb[i] * vt[j*n+i];
						}
						_x(j) = tmp;
				}

				delete [] utb;
				delete [] work;
				delete [] vt;
				delete [] s;
				delete [] u;
	
				return info;
		}
Пример #14
0
 int indexmin()
 {
     return U.indexmin();
 }
Пример #15
0
inline void orthogonalize(device_t                     pu__,
                          int                          num_sc__,
                          int                          N__,
                          int                          n__,
                          std::vector<Wave_functions*> wfs__,
                          int                          idx_bra__,
                          int                          idx_ket__,
                          dmatrix<T>&                  o__,
                          wave_functions&              tmp__)
{
    PROFILE("sddk::wave_functions::orthogonalize");

    /* project out the old subspace:
     * |\tilda phi_new> = |phi_new> - |phi_old><phi_old|phi_new> */
    if (N__ > 0) {
        inner(num_sc__, *wfs__[idx_bra__], 0, N__, *wfs__[idx_ket__], N__, n__, o__, 0, 0);
        transform(pu__, -1.0, wfs__, 0, N__, o__, 0, 0, 1.0, wfs__, N__, n__);
    }

    //if (true) {

    //    inner(num_sc__, *wfs__[idx_bra__], N__, n__, *wfs__[idx_ket__], N__, n__, o__, 0, 0);

    //    linalg<CPU>::geqrf(n__, n__, o__, 0, 0);
    //    auto diag = o__.get_diag(n__);
    //    if (o__.blacs_grid().comm().rank() == 0) {
    //        printf("diagonal of R-factor\n");
    //        for (int i = 0; i < n__; i++) {
    //            if (std::abs(diag[i]) < 1e-6) {
    //                std::cout << "small norm: " << i << " " << diag[i] << std::endl;
    //            }
    //        }
    //    }

    //    //std::vector<double> eo(n__);
    //    //dmatrix<T> evec(o__.num_rows(), o__.num_cols(), o__.blacs_grid(), o__.bs_row(), o__.bs_col());

    //    //Eigenproblem_elpa1 evs(o__.blacs_grid(), o__.bs_row());
    //    //evs.solve(n__, n__, o__.template at<CPU>(), o__.ld(), eo.data(), evec.template at<CPU>(), evec.ld(),
    //    //          o__.num_rows_local(), o__.num_cols_local());

    //    //if (o__.blacs_grid().comm().rank() == 0) { 
    //    //    std::cout << "smallest ev of the new n x x block: " << eo[0] << std::endl;
    //    //}
    //}

    /* orthogonalize new n__ x n__ block */
    inner(num_sc__, *wfs__[idx_bra__], N__, n__, *wfs__[idx_ket__], N__, n__, o__, 0, 0);

    /* single MPI rank */
    if (o__.blacs_grid().comm().size() == 1) {
        bool use_magma{false};
        #if defined(__GPU) && defined(__MAGMA)
        if (pu__ == GPU) {
            use_magma = true;
        }
        #endif

        if (use_magma) {
            #ifdef __GPU
            /* Cholesky factorization */
            if (int info = linalg<GPU>::potrf(n__, o__.template at<GPU>(), o__.ld())) {
                std::stringstream s;
                s << "error in GPU factorization, info = " << info;
                TERMINATE(s);
            }
            /* inversion of triangular matrix */
            if (linalg<GPU>::trtri(n__, o__.template at<GPU>(), o__.ld())) {
                TERMINATE("error in inversion");
            }
            #endif
        } else { /* CPU version */
            //check_hermitian("OVLP", o__, n__);
            //o__.serialize("overlap.dat", n__);
            /* Cholesky factorization */
            if (int info = linalg<CPU>::potrf(n__, &o__(0, 0), o__.ld())) {
                std::stringstream s;
                s << "error in factorization, info = " << info << std::endl
                  << "number of existing states: " << N__ << std::endl
                  << "number of new states: " << n__ << std::endl
                  << "number of wave_functions: " << wfs__.size() << std::endl
                  << "idx_bra: " << idx_bra__ << " " << "idx_ket:" << idx_ket__;
                TERMINATE(s);
            }
            /* inversion of triangular matrix */
            if (linalg<CPU>::trtri(n__, &o__(0, 0), o__.ld())) {
                TERMINATE("error in inversion");
            }
            if (pu__ == GPU) {
                #ifdef __GPU
                acc::copyin(o__.template at<GPU>(), o__.ld(), o__.template at<CPU>(), o__.ld(), n__, n__);
                #endif
            }
        }

        for (int isc = 0; isc < num_sc__; isc++) {
            /* CPU version */
            if (pu__ == CPU) {
                /* multiplication by triangular matrix */
                for (auto& e: wfs__) {
                    /* alias for spin component of wave-functions */
                    auto& wfsc = e->component(isc);
                    /* wave functions are complex, transformation matrix is complex */
                    if (std::is_same<T, double_complex>::value) {
                        linalg<CPU>::trmm('R', 'U', 'N', wfsc.pw_coeffs().num_rows_loc(), n__, double_complex(1, 0),
                                          reinterpret_cast<double_complex*>(o__.template at<CPU>()), o__.ld(),
                                          wfsc.pw_coeffs().prime().at<CPU>(0, N__), e->component(isc).pw_coeffs().prime().ld());

                        if (wfsc.has_mt() && wfsc.mt_coeffs().num_rows_loc()) {
                            linalg<CPU>::trmm('R', 'U', 'N', wfsc.mt_coeffs().num_rows_loc(), n__, double_complex(1, 0),
                                              reinterpret_cast<double_complex*>(o__.template at<CPU>()), o__.ld(),
                                              wfsc.mt_coeffs().prime().at<CPU>(0, N__), wfsc.mt_coeffs().prime().ld());
                        }
                    }
                    /* wave functions are real (psi(G) = psi^{*}(-G)), transformation matrix is real */
                    if (std::is_same<T, double>::value) {
                        linalg<CPU>::trmm('R', 'U', 'N', 2 * wfsc.pw_coeffs().num_rows_loc(), n__, 1.0,
                                          reinterpret_cast<double*>(o__.template at<CPU>()), o__.ld(),
                                          reinterpret_cast<double*>(wfsc.pw_coeffs().prime().at<CPU>(0, N__)), 2 * wfsc.pw_coeffs().prime().ld());

                        if (wfsc.has_mt() && wfsc.mt_coeffs().num_rows_loc()) {
                            linalg<CPU>::trmm('R', 'U', 'N', 2 * wfsc.mt_coeffs().num_rows_loc(), n__, 1.0,
                                              reinterpret_cast<double*>(o__.template at<CPU>()), o__.ld(),
                                              reinterpret_cast<double*>(wfsc.mt_coeffs().prime().at<CPU>(0, N__)), 2 * wfsc.mt_coeffs().prime().ld());
                        }
                    }
                }
            }
            #ifdef __GPU
            if (pu__ == GPU) {
                /* multiplication by triangular matrix */
                for (auto& e: wfs__) {
                    auto& wfsc = e->component(isc);
                    if (std::is_same<T, double_complex>::value) {
                        double_complex alpha(1, 0);

                        linalg<GPU>::trmm('R', 'U', 'N', wfsc.pw_coeffs().num_rows_loc(), n__, &alpha,
                                          reinterpret_cast<double_complex*>(o__.template at<GPU>()), o__.ld(),
                                          wfsc.pw_coeffs().prime().at<GPU>(0, N__), wfsc.pw_coeffs().prime().ld());

                        if (wfsc.has_mt() && wfsc.mt_coeffs().num_rows_loc()) {
                            linalg<GPU>::trmm('R', 'U', 'N', wfsc.mt_coeffs().num_rows_loc(), n__, &alpha,
                                              reinterpret_cast<double_complex*>(o__.template at<GPU>()), o__.ld(),
                                              wfsc.mt_coeffs().prime().at<GPU>(0, N__), wfsc.mt_coeffs().prime().ld());
                        }
                        /* alpha should not go out of the scope, so wait */
                        acc::sync_stream(-1);
                    }
                    if (std::is_same<T, double>::value) {
                        double alpha{1};

                        linalg<GPU>::trmm('R', 'U', 'N', 2 * wfsc.pw_coeffs().num_rows_loc(), n__, &alpha,
                                          reinterpret_cast<double*>(o__.template at<GPU>()), o__.ld(),
                                          reinterpret_cast<double*>(wfsc.pw_coeffs().prime().at<GPU>(0, N__)), 2 * wfsc.pw_coeffs().prime().ld());

                        if (wfsc.has_mt() && wfsc.mt_coeffs().num_rows_loc()) {
                            linalg<GPU>::trmm('R', 'U', 'N', 2 * wfsc.mt_coeffs().num_rows_loc(), n__, &alpha,
                                              reinterpret_cast<double*>(o__.template at<GPU>()), o__.ld(),
                                              reinterpret_cast<double*>(wfsc.mt_coeffs().prime().at<GPU>(0, N__)), 2 * wfsc.mt_coeffs().prime().ld());
                        }
                        acc::sync_stream(-1);
                    }
                }
                acc::sync_stream(-1);
            }
            #endif
        }
    } else { /* parallel transformation */
        sddk::timer t1("sddk::wave_functions::orthogonalize|potrf");
        if (int info = linalg<CPU>::potrf(n__, o__)) {
            std::stringstream s;
            s << "error in factorization, info = " << info;
            TERMINATE(s);
        }
        t1.stop();

        sddk::timer t2("sddk::wave_functions::orthogonalize|trtri");
        if (linalg<CPU>::trtri(n__, o__)) {
            TERMINATE("error in inversion");
        }
        t2.stop();

        /* o is upper triangular matrix */
        for (int i = 0; i < n__; i++) {
            for (int j = i + 1; j < n__; j++) {
                o__.set(j, i, 0);
            }
        }

        /* phi is transformed into phi, so we can't use it as the output buffer; use tmp instead and then overwrite phi */
        for (auto& e: wfs__) {
            for (int isc = 0; isc < num_sc__; isc++) {
                transform(pu__, e->component(isc), N__, n__, o__, 0, 0, tmp__, 0, n__);
                e->component(isc).copy_from(tmp__, 0, n__, N__, pu__);
            }
        }
    }
}
Пример #16
0
 int indexmax()
 {
     return U.indexmax();
 }
Пример #17
0
/**
 * Description not yet available.
 * \param
 */
dvar_matrix operator*(const dvar_matrix& m1, const dmatrix& cm2)
 {
   if (m1.colmin() != cm2.rowmin() || m1.colmax() != cm2.rowmax())
   {
     cerr << " Incompatible array bounds in "
     "dmatrix operator*(const dvar_matrix& x, const dmatrix& m)\n";
     ad_exit(21);
   }
   dmatrix cm1=value(m1);
   //dmatrix cm2=value(m2);
   dmatrix tmp(m1.rowmin(),m1.rowmax(), cm2.colmin(), cm2.colmax());
#ifdef OPT_LIB
   const size_t rowsize = (size_t)cm2.rowsize();
#else
   const int _rowsize = cm2.rowsize();
   assert(_rowsize > 0);
   const size_t rowsize = (size_t)_rowsize;
#endif
   try
   {
     double* temp_col = new double[rowsize];
     temp_col-=cm2.rowmin();
     for (int j=cm2.colmin(); j<=cm2.colmax(); j++)
     {
       for (int k=cm2.rowmin(); k<=cm2.rowmax(); k++)
       {
         temp_col[k] = cm2.elem(k,j);
       }
       for (int i=cm1.rowmin(); i<=cm1.rowmax(); i++)
       {
         double sum=0.0;
         dvector& temp_row = cm1(i);
         for (int k=cm1.colmin(); k<=cm1.colmax(); k++)
         {
           sum+=temp_row(k) * (temp_col[k]);
           // sum+=temp_row(k) * cm2(k,j);
         }
         tmp(i,j)=sum;
       }
     }
     temp_col+=cm2.rowmin();
     delete [] temp_col;
     temp_col = 0;
   }
   catch (std::bad_alloc& e)
   {
     cerr << "Error[" << __FILE__ << ':' << __LINE__
          << "]: Unable to allocate array.\n";
     //ad_exit(21);
     throw e;
   }
   dvar_matrix vtmp=nograd_assign(tmp);
   save_identifier_string("TEST1");
   //m1.save_dvar_matrix_value();
   m1.save_dvar_matrix_position();
   cm2.save_dmatrix_value();
   cm2.save_dmatrix_position();
   vtmp.save_dvar_matrix_position();
   save_identifier_string("TEST6");
   gradient_structure::GRAD_STACK1->
            set_gradient_stack(dmcm_prod);
   return vtmp;
 }
 double clusteringValidity::getAverageInterpointDistance(const dmatrix& m1,
                                                         const dmatrix& m2) const {
   l2Distance<double> dist;
   int i;
   dvector a(m1.columns());
   dvector b(m2.columns());
   for (i=0; i<m1.rows();i++) {
     a.add(m1.getRow(i));
   }
   a.divide(m1.rows()); // centroid 1
   for (i=0; i<m2.rows();i++) {
     b.add(m2.getRow(i));
   }
   b.divide(m2.rows()); // centroid 2
   double distance=0.0;
   for (i=0; i<m1.rows(); i++) {
     distance+=dist.apply(m1.getRow(i),a);
   }
   for (i=0; i<m2.rows(); i++) {
     distance+=dist.apply(m2.getRow(i),b);
   }
   return (distance/(m1.rows()+m2.rows()));
 }
Пример #19
0
  bool MLP::trainSteepestSequential(const dmatrix& data,
                                    const ivector& internalIds) {

    const parameters& param = getParameters();
    char buffer[256];
    bool abort = false;
    scramble<int> scrambler;
    int i,j,k;
    double tmpError;
    ivector idx;
    idx.resize(data.rows(),0,false,false);
    for (i=0;i<idx.size();++i) {
      idx.at(i)=i;
    }

    if (param.momentum > 0) {
      // with momentum
      dvector grad,delta(weights.size(),0.0);

      for (i=0; !abort && (i<param.maxNumberOfEpochs); ++i) {
        scrambler.apply(idx); // present the pattern in a random sequence
        totalError = 0;
        for (j=0;j<idx.size();++j) {
          k=idx.at(j);
          calcGradient(data.getRow(k),internalIds.at(k),grad);
          computeActualError(internalIds.at(k),tmpError);
          totalError+=tmpError;
          delta.addScaled(param.learnrate,grad,param.momentum,delta);
          weights.add(delta);
        }

        // update progress info object
        if (validProgressObject()) {
          sprintf(buffer,"Error=%f",totalError/errorNorm);
          getProgressObject().step(buffer);
          abort = abort || (totalError/errorNorm <= param.stopError);
          abort = abort || getProgressObject().breakRequested();
        }
      }
    } else {
      // without momentum
      ivector idx;
      idx.resize(data.rows(),0,false,false);
      dvector grad;

      int i,j,k;
      double tmpError;
      for (i=0;i<idx.size();++i) {
        idx.at(i)=i;
      }
      for (i=0; !abort && (i<param.maxNumberOfEpochs); ++i) {
        scrambler.apply(idx); // present the pattern in a random sequence
        totalError = 0;
        for (j=0;j<idx.size();++j) {
          k=idx.at(j);
          calcGradient(data.getRow(k),internalIds.at(k),grad);
          computeActualError(internalIds.at(k),tmpError);
          totalError+=tmpError;
          weights.addScaled(param.learnrate,grad);
        }

        // update progress info object
        if (validProgressObject()) {
          sprintf(buffer,"Error=%f",totalError/errorNorm);
          getProgressObject().step(buffer);
          abort = abort || (totalError/errorNorm <= param.stopError);
          abort = abort || getProgressObject().breakRequested();
        }
      }
    }
    return true;
  }
void Body::calcCMJacobian(Link *base, dmatrix &J)
{
    // prepare subm, submwc
    JointPathPtr jp;
    if (base){
        jp = getJointPath(rootLink(), base);
        Link *skip = jp->joint(0);
        skip->subm = rootLink()->m;
        skip->submwc = rootLink()->m*rootLink()->wc;
        Link *l = rootLink()->child;
        if (l){
            if (l != skip) {
                l->calcSubMassCM();
                skip->subm += l->subm;
                skip->submwc += l->submwc;
            }
            l = l->sibling;
            while(l){
                if (l != skip){
                    l->calcSubMassCM();
                    skip->subm += l->subm;
                    skip->submwc += l->submwc;
                }
                l = l->sibling;
            }
        }
        
        // assuming there is no branch between base and root
        for (int i=1; i<jp->numJoints(); i++){
            l = jp->joint(i);
            l->subm = l->parent->m + l->parent->subm;
            l->submwc = l->parent->m*l->parent->wc + l->parent->submwc;
        }
        
        J.resize(3, numJoints());
    }else{
        rootLink()->calcSubMassCM();
        J.resize(3, numJoints()+6);
    }
    
    // compute Jacobian
    std::vector<int> sgn(numJoints(), 1);
    if (jp) {
        for (int i=0; i<jp->numJoints(); i++) sgn[jp->joint(i)->jointId] = -1;
    }
    
    for (int i=0; i<numJoints(); i++){
        Link *j = joint(i);
        switch(j->jointType){
        case Link::ROTATIONAL_JOINT:
        {
            Vector3 omega(sgn[j->jointId]*j->R*j->a);
            Vector3 arm((j->submwc-j->subm*j->p)/totalMass_);
            Vector3 dp(omega.cross(arm));
            J.col(j->jointId) = dp;
            break;
        }
        default:
            std::cerr << "calcCMJacobian() : unsupported jointType("
                      << j->jointType << std::endl;
        }
    }
    if (!base){
        int c = numJoints();
        J(0, c  ) = 1.0; J(0, c+1) = 0.0; J(0, c+2) = 0.0;
        J(1, c  ) = 0.0; J(1, c+1) = 1.0; J(1, c+2) = 0.0;
        J(2, c  ) = 0.0; J(2, c+1) = 0.0; J(2, c+2) = 1.0;

        Vector3 dp(rootLink()->submwc/totalMass_ - rootLink()->p);
        J(0, c+3) =    0.0; J(0, c+4) =  dp(2); J(0, c+5) = -dp(1);
        J(1, c+3) = -dp(2); J(1, c+4) =    0.0; J(1, c+5) =  dp(0);
        J(2, c+3) =  dp(1); J(2, c+4) = -dp(0); J(2, c+5) =    0.0;
    }
}
Пример #21
0
  // implements the Fuzzy C Means algorithm
  bool fuzzyCMeans::train(const dmatrix& data) {

    bool ok=true;
    int t=0;
    // create the distance functor according to the paramter norm
    distanceFunctor<double>* distFunc = 0;
    switch (getParameters().norm)  {
      case parameters::L1:
        distFunc = new l1Distance<double>;
        break;
      case parameters::L2:
        distFunc = new l2Distance<double>;
        break;
      default:
        break;
    }
    int nbOfClusters=getParameters().nbOfClusters;
    int nbOfPoints=data.rows();
    if(nbOfClusters>nbOfPoints) {
      setStatusString("more Clusters than points");
      ok = false;
    }
    double q=getParameters().fuzzifier;
    if (q<=1) {
      setStatusString("q has to be bigger than 1");
      ok = false;
    }
    // select some points of the given data to initialise the centroids
    selectRandomPoints(data,nbOfClusters,centroids);
    // initialize variables
    centroids.resize(nbOfClusters,data.columns(),0.0);
    dmatrix memberships(nbOfPoints, nbOfClusters, 0.0);
    double terminationCriterion=0;
    double newDistance;
    dvector newCenter(data.columns());
    dvector currentPoint(data.columns());
    dmatrix newCentroids(nbOfClusters,data.columns(),0.0);
    double sumOfMemberships=0;
    double membership=0;
    double dist1;
    double dist2;
    int i,j,k,m;
    do {
        // calculate new memberships
      memberships.fill(0.0);  //  clear old memberships
      for (i=0; i<nbOfPoints; i++) {
        for (j=0; j<nbOfClusters; j++) {
          newDistance=0;
          dist1=distFunc->apply(data.getRow(i),
                                centroids.getRow(j));
          for (k=0; k<nbOfClusters; k++) {
            dist2=distFunc->apply(data.getRow(i),
                                  centroids.getRow(k));
       // if distance is 0, normal calculation of membership is not possible.
            if (dist2!=0) {
              newDistance+=pow((dist1/dist2),(1/(q-1)));
            }
          }
      // if point and centroid are equal
          if (newDistance!=0)
            memberships.at(i,j)=1/newDistance;
          else {
            dvector row(memberships.columns(),0.0);
            memberships.setRow(i,row);
            memberships.at(i,j)=1;
            break;
          }
        }
      }
      t++;  // counts the iterations

     // calculate new centroids based on modified memberships
      for (m=0; m<nbOfClusters; m++) {
        newCenter.fill(0.0);
        sumOfMemberships=0;
        for (i=0; i<nbOfPoints; i++) {
          currentPoint=data.getRow(i);
          membership=pow(memberships.at(i,m),q);
          sumOfMemberships+=membership;
          currentPoint.multiply(membership);
          newCenter.add(currentPoint);
        }
        newCenter.divide(sumOfMemberships);
        newCentroids.setRow(m,newCenter);
      }
      terminationCriterion=distFunc->apply(centroids,newCentroids);
      centroids=newCentroids;
    }
    // the termination criterions
    while ( (terminationCriterion>getParameters().epsilon)
            && (t<getParameters().maxIterations));

    int nbClusters = nbOfClusters;
    //Put the id information into the result object
    //Each cluster has the id of its position in the matrix
    ivector tids(nbClusters);
    for (i=0; i<nbClusters; i++) {
      tids.at(i)=i;
    }
    outTemplate=outputTemplate(tids);
    return ok;


  }
		/**
		   solve linear equation using LU decomposition
		   by lapack library DGESVX (_a must be square matrix)
		*/
		int solveLinearEquationLU(const dmatrix &_a, const dvector &_b, dvector &_x)
		{
				assert(_a.cols() == _a.rows() && _a.cols() == _b.size() );

				int n = (int)_a.cols();
				int nrhs = 1;

				int lda = n;

				std::vector<int> ipiv(n);

				int ldb = n;

				int info;

				// compute the solution
#ifndef USE_CLAPACK_INTERFACE
  				char fact      = 'N';
				char transpose = 'N';

				double *af = new double[n*n];

				int ldaf = n;

				char equed = 'N';

				double *r = new double[n];
				double *c = new double[n];

				int ldx = n;

				double rcond;

				double *ferr = new double[nrhs];
				double *berr = new double[nrhs];
				double *work = new double[4*n];

				int *iwork = new int[n];

			    _x.resize(n);			// memory allocation for the return vector
				dgesvx_(&fact, &transpose, &n, &nrhs, const_cast<double *>(&(_a(0,0))), &lda, af, &ldaf, &(ipiv[0]),
						&equed, r, c, const_cast<double *>(&(_b(0))), &ldb, &(_x(0)), &ldx, &rcond,
						ferr, berr, work, iwork, &info);

				delete [] iwork;
				delete [] work;
				delete [] berr;
				delete [] ferr;
				delete [] c;
				delete [] r;

				delete [] af;
#else
				_x = _b;
				info = clapack_dgesv(CblasColMajor,
									 n, nrhs, const_cast<double *>(&(a(0,0))), lda, &(ipiv[0]),
									 &(_x(0)), ldb);
#endif

				return info;
		}
 bool normModHubertStat::apply(const std::vector<dmatrix>& 
                               clusteredData, double& index, 
                               const dmatrix& centroids) const {
   index =0.0;
   int nbClusters=clusteredData.size();
   l2Distance<double> dist;
   int nbPoints=0;
   int i,j,k,l;
   // count the points that are in clusteredData
   for (i=0; i<nbClusters; i++) {
     nbPoints+=clusteredData[i].rows();
   }
   // x is the distance matrix. It has in the i-th rows and j-th column
   // the distance between the i-th and j-th point
   // y is an other distance matrix. It has in the i-th row and j-th column
   // the distance of the centroids of the clusters they belong to.
   dmatrix x(nbPoints,nbPoints);
   dmatrix y(nbPoints,nbPoints);
   int row=0;
   int col=0;
   for (i=0; i<nbClusters; i++) {
     for (j=0; j<clusteredData[i].rows(); j++) {
       for (k=0; k<nbClusters; k++) {
         for (l=0; l<clusteredData[k].rows(); l++) {
           if (col>row) {
             y.at(row,col)=dist.apply(centroids.getRow(i),
                                      centroids.getRow(k));
             x.at(row,col)=dist.apply(clusteredData[i].getRow(j),
                                      clusteredData[k].getRow(l));
           }
           if (col<nbPoints-1) col++;
           else { 
             col=0;
             row++;
           }  
         }
       }
     }
   }
   double m=0.5*(nbPoints*(nbPoints-1));
   double meanX=x.sumOfElements()/m;
   double meanY=y.sumOfElements()/m;
   double tmp1=meanX*meanX;
   double tmp2=meanY*meanY;
   double varianzX=0.0;
   double varianzY=0.0;
   for (i=0; i<nbPoints; i++) {
     for (j=i+1; j<nbPoints; j++) {
       varianzX+=x[i][j]*x[i][j]-tmp1;
       varianzY+=y[i][j]*y[i][j]-tmp2;
     }
   }
   varianzX=varianzX/m;
   varianzY=varianzY/m;
   varianzX=sqrt(varianzX);
   varianzY=sqrt(varianzY);
   double varianz=varianzX*varianzY;
   for (i=0; i<nbPoints; i++) {
     for (j=i+1; j<nbPoints; j++) {
       index+=(x[i][j]-meanX)*(y[i][j]-meanY);
     }
   }
   index=index/(m*varianz); 
 
   return true;
 }
  bool dunnIndex::apply(const std::vector<dmatrix>& clusteredData,
                        double& index, const dmatrix& centroids) const {
    int nbClusters=clusteredData.size();
    double denominator=0.0;
    int i,j;
    l2Distance<double> dist;
    dvector diameters(nbClusters);
    parameters param;
    param=getParameters();
    // pointer to the function which implements the measure according to the 
    // parameters
    double (lti::clusteringValidity::*diamFunc)(const dmatrix&) const;
    
#ifdef _LTI_MSC_DOT_NET_2003
    // nasty bug in this version of the .NET compiler
#define QUALIFIER
#else 
#define QUALIFIER &lti::dunnIndex::
#endif

    switch (param.diameterMeasure) {
      case parameters::Standard:
        diamFunc=QUALIFIER getStandardDiameter;
        break;
      case parameters::Average:
        diamFunc=QUALIFIER getAverageDiameter;
        break;
      case parameters::Centroid:
        diamFunc=QUALIFIER getAverageToCentroidDiameter;
        break;
      default:
        diamFunc=QUALIFIER getStandardDiameter;
        setStatusString("Unknown diameterMeasure in clusteringValidity\n");
        return false;
    }
    // compute all diameters of all clusters
    for (i=0; i<nbClusters; i++) {
      diameters[i]=(this->*diamFunc)(clusteredData[i]);
    }
    denominator=diameters.maximum();
    // pointer to the function which calculates the distance of the functions
    // a pointer to a function is used, because the function will be called 
    // many times later
    double (lti::clusteringValidity::*distFunc)
      (const dmatrix&,const dmatrix&) const ;
    // set pointer to function which is set by the parameter distanceMeasure
    switch (param.distanceMeasure) {
      case parameters::Minimum:
        distFunc=QUALIFIER getMinimumDistance;
        break;
      case parameters::Maximum:
        distFunc=QUALIFIER getMaximumDistance;
        break;
      case parameters::Mean:
        distFunc=QUALIFIER getAverageDistance;
        break;
      case parameters::Centroids:
        distFunc=QUALIFIER getCentroidDistance;
        break;
      case parameters::Interpoint:
        distFunc=QUALIFIER getAverageInterpointDistance;
        break;
      default:
        distFunc=QUALIFIER getAverageDistance;
        setStatusString("Unknown distanceMeasure in clusteringValidity\n");
        return false;
    }
    // compute the distances of all clusters to each other
    int counter=0;
    dvector distanceVector(static_cast<int>(.5*(nbClusters*(nbClusters-1))));
    for (i=0; i<nbClusters; i++) {
      for (j=i+1; j<nbClusters; j++) {
        if (distFunc==QUALIFIER getCentroidDistance) {
          distanceVector[counter]=dist.apply(centroids.getRow(i),
                                             centroids.getRow(j));
        } else {
          distanceVector[counter]=(this->*distFunc)(clusteredData[i],
                                                    clusteredData[j]);
        }
        counter++;
      }
    }
    distanceVector.divide(denominator);
    index=distanceVector.minimum();

    return true;
  }
Пример #25
0
bool JointPathEx::calcJacobianInverseNullspace(dmatrix &J, dmatrix &Jinv, dmatrix &Jnull) {
    const int n = numJoints();
                
    hrp::dmatrix w = hrp::dmatrix::Identity(n,n);
    //
    // wmat/weight: weighting joint angle weight
    //
    // w_i = 1 + | dH/dt |      if d|dH/dt| >= 0
    //     = 1                  if d|dH/dt| <  0
    // dH/dt = (t_max - t_min)^2 (2t - t_max - t_min)
    //         / 4 (t_max - t)^2 (t - t_min)^2
    //
    // T. F. Chang and R.-V. Dubey: "A weighted least-norm solution based
    // scheme for avoiding joint limits for redundant manipulators", in IEEE
    // Trans. On Robotics and Automation, 11((2):286-292, April 1995.
    //
    for ( int j = 0; j < n ; j++ ) {
        double jang = joints[j]->q;
        double jmax = joints[j]->ulimit;
        double jmin = joints[j]->llimit;
        double e = deg2rad(1);
        if ( eps_eq(jang, jmax,e) && eps_eq(jang, jmin,e) ) {
        } else if ( eps_eq(jang, jmax,e) ) {
            jang = jmax - e;
        } else if ( eps_eq(jang, jmin,e) ) {
            jang = jmin + e;
        }

        double r;
        if ( eps_eq(jang, jmax,e) && eps_eq(jang, jmin,e) ) {
            r = DBL_MAX;
        } else {
            r = fabs( (pow((jmax - jmin),2) * (( 2 * jang) - jmax - jmin)) /
                      (4 * pow((jmax - jang),2) * pow((jang - jmin),2)) );
            if (std::isnan(r)) r = 0;
        }

        // If use_inside_joint_weight_retrieval = true (true by default), use T. F. Chang and R.-V. Dubeby weight retrieval inward.
        // Otherwise, joint weight is always calculated from limit value to resolve https://github.com/fkanehiro/hrpsys-base/issues/516.
        if (( r - avoid_weight_gain[j] ) >= 0 ) {
	  w(j, j) = optional_weight_vector[j] * ( 1.0 / ( 1.0 + r) );
	} else {
            if (use_inside_joint_weight_retrieval)
                w(j, j) = optional_weight_vector[j] * 1.0;
            else
                w(j, j) = optional_weight_vector[j] * ( 1.0 / ( 1.0 + r) );
	}
        avoid_weight_gain[j] = r;
    }
    if ( DEBUG ) {
        std::cerr << " cost :";
        for(int j = 0; j < n; j++ ) { std::cerr << std::setw(8) << std::setiosflags(std::ios::fixed) << std::setprecision(4) << avoid_weight_gain[j]; }
        std::cerr << std::endl;
        std::cerr << " optw :";
        for(int j = 0; j < n; j++ ) { std::cerr << std::setw(8) << std::setiosflags(std::ios::fixed) << std::setprecision(4) << optional_weight_vector[j]; }
        std::cerr << std::endl;
        std::cerr << "    w :";
        for(int j = 0; j < n; j++ ) { std::cerr << std::setw(8) << std::setiosflags(std::ios::fixed) << std::setprecision(4) << w(j, j); }
        std::cerr << std::endl;
    }

    double manipulability = sqrt((J*J.transpose()).determinant());
    double k = 0;
    if ( manipulability < manipulability_limit ) {
	k = manipulability_gain * pow((1 - ( manipulability / manipulability_limit )), 2);
    }
    if ( DEBUG ) {
	std::cerr << " manipulability = " <<  manipulability << " < " << manipulability_limit << ", k = " << k << " -> " << sr_gain * k << std::endl;
    }

    calcSRInverse(J, Jinv, sr_gain * k, w);

    Jnull = ( hrp::dmatrix::Identity(n, n) - Jinv * J);

    return true;
}
Пример #26
0
inline void Band::set_fv_h_o<GPU, electronic_structure_method_t::full_potential_lapwlo>(K_point* kp__,
                                                                                        Periodic_function<double>* effective_potential__,
                                                                                        dmatrix<double_complex>& h__,
                                                                                        dmatrix<double_complex>& o__) const
{
    runtime::Timer t("sirius::Band::set_fv_h_o");
    
    runtime::Timer t2("sirius::Band::set_fv_h_o|alloc");
    h__.zero();
    h__.allocate(memory_t::device);
    h__.zero_on_device();

    o__.zero();
    o__.allocate(memory_t::device);
    o__.zero_on_device();

    double_complex zone(1, 0);

    int num_atoms_in_block = 2 * omp_get_max_threads();
    int nblk = unit_cell_.num_atoms() / num_atoms_in_block +
               std::min(1, unit_cell_.num_atoms() % num_atoms_in_block);
    DUMP("nblk: %i", nblk);

    int max_mt_aw = num_atoms_in_block * unit_cell_.max_mt_aw_basis_size();
    DUMP("max_mt_aw: %i", max_mt_aw);

    mdarray<double_complex, 3> alm_row(kp__->num_gkvec_row(), max_mt_aw, 2, memory_t::host_pinned | memory_t::device);

    mdarray<double_complex, 3> alm_col(kp__->num_gkvec_col(), max_mt_aw, 2, memory_t::host_pinned | memory_t::device);

    mdarray<double_complex, 3> halm_col(kp__->num_gkvec_col(), max_mt_aw, 2, memory_t::host_pinned | memory_t::device);
    t2.stop();

    runtime::Timer t1("sirius::Band::set_fv_h_o|zgemm");
    for (int iblk = 0; iblk < nblk; iblk++) {
        int num_mt_aw = 0;
        std::vector<int> offsets(num_atoms_in_block);
        for (int ia = iblk * num_atoms_in_block; ia < std::min(unit_cell_.num_atoms(), (iblk + 1) * num_atoms_in_block); ia++) {
            int ialoc = ia - iblk * num_atoms_in_block;
            auto& atom = unit_cell_.atom(ia);
            auto& type = atom.type();
            offsets[ialoc] = num_mt_aw;
            num_mt_aw += type.mt_aw_basis_size();
        }

        int s = iblk % 2;
            
        #pragma omp parallel
        {
            int tid = omp_get_thread_num();
            for (int ia = iblk * num_atoms_in_block; ia < std::min(unit_cell_.num_atoms(), (iblk + 1) * num_atoms_in_block); ia++) {
                if (ia % omp_get_num_threads() == tid) {
                    int ialoc = ia - iblk * num_atoms_in_block;
                    auto& atom = unit_cell_.atom(ia);
                    auto& type = atom.type();

                    mdarray<double_complex, 2> alm_row_tmp(alm_row.at<CPU>(0, offsets[ialoc], s),
                                                           alm_row.at<GPU>(0, offsets[ialoc], s),
                                                           kp__->num_gkvec_row(), type.mt_aw_basis_size());

                    mdarray<double_complex, 2> alm_col_tmp(alm_col.at<CPU>(0, offsets[ialoc], s),
                                                           alm_col.at<GPU>(0, offsets[ialoc], s),
                                                           kp__->num_gkvec_col(), type.mt_aw_basis_size());
                    
                    mdarray<double_complex, 2> halm_col_tmp(halm_col.at<CPU>(0, offsets[ialoc], s),
                                                            halm_col.at<GPU>(0, offsets[ialoc], s),
                                                            kp__->num_gkvec_col(), type.mt_aw_basis_size());

                    kp__->alm_coeffs_row()->generate(ia, alm_row_tmp);
                    for (int xi = 0; xi < type.mt_aw_basis_size(); xi++) {
                        for (int igk = 0; igk < kp__->num_gkvec_row(); igk++) {
                            alm_row_tmp(igk, xi) = std::conj(alm_row_tmp(igk, xi));
                        }
                    }
                    alm_row_tmp.async_copy_to_device(tid);

                    kp__->alm_coeffs_col()->generate(ia, alm_col_tmp);
                    alm_col_tmp.async_copy_to_device(tid);

                    apply_hmt_to_apw<spin_block_t::nm>(atom, kp__->num_gkvec_col(), alm_col_tmp, halm_col_tmp);
                    halm_col_tmp.async_copy_to_device(tid);

                    /* setup apw-lo and lo-apw blocks */
                    set_fv_h_o_apw_lo(kp__, type, atom, ia, alm_row_tmp, alm_col_tmp, h__, o__);
                }
            }
            acc::sync_stream(tid);
        }
        acc::sync_stream(omp_get_max_threads());
        linalg<GPU>::gemm(0, 1, kp__->num_gkvec_row(), kp__->num_gkvec_col(), num_mt_aw, &zone, 
                          alm_row.at<GPU>(0, 0, s), alm_row.ld(), alm_col.at<GPU>(0, 0, s), alm_col.ld(), &zone, 
                          o__.at<GPU>(), o__.ld(), omp_get_max_threads());

        linalg<GPU>::gemm(0, 1, kp__->num_gkvec_row(), kp__->num_gkvec_col(), num_mt_aw, &zone, 
                          alm_row.at<GPU>(0, 0, s), alm_row.ld(), halm_col.at<GPU>(0, 0, s), halm_col.ld(), &zone,
                          h__.at<GPU>(), h__.ld(), omp_get_max_threads());
    }

    acc::copyout(h__.at<CPU>(), h__.ld(), h__.at<GPU>(), h__.ld(), kp__->num_gkvec_row(), kp__->num_gkvec_col());
    acc::copyout(o__.at<CPU>(), o__.ld(), o__.at<GPU>(), o__.ld(), kp__->num_gkvec_row(), kp__->num_gkvec_col());
    
    double tval = t1.stop();
    if (kp__->comm().rank() == 0) {
        DUMP("effective zgemm performance: %12.6f GFlops",
             2 * 8e-9 * kp__->num_gkvec() * kp__->num_gkvec() * unit_cell_.mt_aw_basis_size() / tval);
    }

    /* add interstitial contributon */
    set_fv_h_o_it(kp__, effective_potential__, h__, o__);

    /* setup lo-lo block */
    set_fv_h_o_lo_lo(kp__, h__, o__);

    h__.deallocate_on_device();
    o__.deallocate_on_device();
}
/**
   calculate the mass matrix using the unit vector method
   \todo replace the unit vector method here with
   a more efficient method that only requires O(n) computation time

   The motion equation (dv != dvo)
   |       |   | dv   |   |    |   | fext      |
   | out_M | * | dw   | + | b1 | = | tauext    |
   |       |   |ddq   |   |    |   | u         |
*/
void Body::calcMassMatrix(dmatrix& out_M)
{
    // buffers for the unit vector method
    dmatrix b1;
    dvector ddqorg;
    dvector uorg;
    Vector3 dvoorg;
    Vector3 dworg;
    Vector3 root_w_x_v;
    Vector3 g(0, 0, 9.8);

    uint nJ = numJoints();
    int totaldof = nJ;
    if( !isStaticModel_ ) totaldof += 6;

    out_M.resize(totaldof,totaldof);
    b1.resize(totaldof, 1);

    // preserve and clear the joint accelerations
    ddqorg.resize(nJ);
    uorg.resize(nJ);
    for(uint i = 0; i < nJ; ++i){
        Link* ptr = joint(i);
        ddqorg[i] = ptr->ddq;
        uorg  [i] = ptr->u;
        ptr->ddq = 0.0;
    }

    // preserve and clear the root link acceleration
    dvoorg = rootLink_->dvo;
    dworg  = rootLink_->dw;
    root_w_x_v = rootLink_->w.cross(rootLink_->vo + rootLink_->w.cross(rootLink_->p));
    rootLink_->dvo = g - root_w_x_v;   // dv = g, dw = 0
    rootLink_->dw.setZero();
	
    setColumnOfMassMatrix(b1, 0);

    if( !isStaticModel_ ){
        for(int i=0; i < 3; ++i){
            rootLink_->dvo[i] += 1.0;
            setColumnOfMassMatrix(out_M, i);
            rootLink_->dvo[i] -= 1.0;
        }
        for(int i=0; i < 3; ++i){
            rootLink_->dw[i] = 1.0;
            Vector3 dw_x_p = rootLink_->dw.cross(rootLink_->p);	//  spatial acceleration caused by ang. acc.
            rootLink_->dvo -= dw_x_p;
            setColumnOfMassMatrix(out_M, i + 3);
            rootLink_->dvo += dw_x_p;
            rootLink_->dw[i] = 0.0;
        }
    }

    for(uint i = 0; i < nJ; ++i){
        Link* ptr = joint(i);
        ptr->ddq = 1.0;
        int j = i + 6;
        setColumnOfMassMatrix(out_M, j);
        out_M(j, j) += ptr->Jm2; // motor inertia
        ptr->ddq = 0.0;
    }

    // subtract the constant term
    for(size_t i = 0; i < (size_t)out_M.cols(); ++i){
        out_M.col(i) -= b1;
    }

    // recover state
    for(uint i = 0; i < nJ; ++i){
        Link* ptr = joint(i);
        ptr->ddq  = ddqorg[i];
        ptr->u    = uorg  [i];
    }
    rootLink_->dvo = dvoorg;
    rootLink_->dw  = dworg;
}
Пример #28
0
inline void Band::set_fv_h_o<CPU, electronic_structure_method_t::full_potential_lapwlo>(K_point* kp__,
                                                                                        Periodic_function<double>* effective_potential__,
                                                                                        dmatrix<double_complex>& h__,
                                                                                        dmatrix<double_complex>& o__) const
{
    PROFILE_WITH_TIMER("sirius::Band::set_fv_h_o");
    
    h__.zero();
    o__.zero();

    double_complex zone(1, 0);
    
    int num_atoms_in_block = 2 * omp_get_max_threads();
    int nblk = unit_cell_.num_atoms() / num_atoms_in_block +
               std::min(1, unit_cell_.num_atoms() % num_atoms_in_block);
    DUMP("nblk: %i", nblk);

    int max_mt_aw = num_atoms_in_block * unit_cell_.max_mt_aw_basis_size();
    DUMP("max_mt_aw: %i", max_mt_aw);

    mdarray<double_complex, 2> alm_row(kp__->num_gkvec_row(), max_mt_aw);
    mdarray<double_complex, 2> alm_col(kp__->num_gkvec_col(), max_mt_aw);
    mdarray<double_complex, 2> halm_col(kp__->num_gkvec_col(), max_mt_aw);

    runtime::Timer t1("sirius::Band::set_fv_h_o|zgemm");
    for (int iblk = 0; iblk < nblk; iblk++)
    {
        int num_mt_aw = 0;
        std::vector<int> offsets(num_atoms_in_block);
        for (int ia = iblk * num_atoms_in_block; ia < std::min(unit_cell_.num_atoms(), (iblk + 1) * num_atoms_in_block); ia++)
        {
            auto& atom = unit_cell_.atom(ia);
            auto& type = atom.type();
            offsets[ia - iblk * num_atoms_in_block] = num_mt_aw;
            num_mt_aw += type.mt_aw_basis_size();
        }
        
        #ifdef __PRINT_OBJECT_CHECKSUM
        alm_row.zero();
        alm_col.zero();
        halm_col.zero();
        #endif

        #pragma omp parallel
        {
            int tid = omp_get_thread_num();
            for (int ia = iblk * num_atoms_in_block; ia < std::min(unit_cell_.num_atoms(), (iblk + 1) * num_atoms_in_block); ia++)
            {
                if (ia % omp_get_num_threads() == tid)
                {
                    int ialoc = ia - iblk * num_atoms_in_block;
                    auto& atom = unit_cell_.atom(ia);
                    auto& type = atom.type();

                    mdarray<double_complex, 2> alm_row_tmp(alm_row.at<CPU>(0, offsets[ialoc]), kp__->num_gkvec_row(), type.mt_aw_basis_size());
                    mdarray<double_complex, 2> alm_col_tmp(alm_col.at<CPU>(0, offsets[ialoc]), kp__->num_gkvec_col(), type.mt_aw_basis_size());
                    mdarray<double_complex, 2> halm_col_tmp(halm_col.at<CPU>(0, offsets[ialoc]), kp__->num_gkvec_col(), type.mt_aw_basis_size());

                    kp__->alm_coeffs_row()->generate(ia, alm_row_tmp);
                    for (int xi = 0; xi < type.mt_aw_basis_size(); xi++)
                    {
                        for (int igk = 0; igk < kp__->num_gkvec_row(); igk++) alm_row_tmp(igk, xi) = std::conj(alm_row_tmp(igk, xi));
                    }
                    kp__->alm_coeffs_col()->generate(ia, alm_col_tmp);
                    apply_hmt_to_apw<spin_block_t::nm>(atom, kp__->num_gkvec_col(), alm_col_tmp, halm_col_tmp);

                    /* setup apw-lo and lo-apw blocks */
                    set_fv_h_o_apw_lo(kp__, type, atom, ia, alm_row_tmp, alm_col_tmp, h__, o__);
                }
            }
        }
        #ifdef __PRINT_OBJECT_CHECKSUM
        double_complex z1 = alm_row.checksum();
        double_complex z2 = alm_col.checksum();
        double_complex z3 = halm_col.checksum();
        DUMP("checksum(alm_row): %18.10f %18.10f", std::real(z1), std::imag(z1));
        DUMP("checksum(alm_col): %18.10f %18.10f", std::real(z2), std::imag(z2));
        DUMP("checksum(halm_col): %18.10f %18.10f", std::real(z3), std::imag(z3));
        #endif
        linalg<CPU>::gemm(0, 1, kp__->num_gkvec_row(), kp__->num_gkvec_col(), num_mt_aw, zone,
                          alm_row.at<CPU>(), alm_row.ld(), alm_col.at<CPU>(), alm_col.ld(), zone, 
                          o__.at<CPU>(), o__.ld());

        linalg<CPU>::gemm(0, 1, kp__->num_gkvec_row(), kp__->num_gkvec_col(), num_mt_aw, zone, 
                          alm_row.at<CPU>(), alm_row.ld(), halm_col.at<CPU>(), halm_col.ld(), zone,
                          h__.at<CPU>(), h__.ld());
    }
    double tval = t1.stop();
    if (kp__->comm().rank() == 0)
    {
        DUMP("effective zgemm performance: %12.6f GFlops",
             2 * 8e-9 * kp__->num_gkvec() * kp__->num_gkvec() * unit_cell_.mt_aw_basis_size() / tval);
    }

    /* add interstitial contributon */
    set_fv_h_o_it(kp__, effective_potential__, h__, o__);

    /* setup lo-lo block */
    set_fv_h_o_lo_lo(kp__, h__, o__);
}
Пример #29
0
  // Calls the same method of the superclass.
  bool svm::genericTrain(const dmatrix& input, const ivector& ids) {

    char buffer[80];

    if (validProgressObject()) {
      getProgressObject().reset();
      getProgressObject().setTitle("SVM: Training");
      getProgressObject().setMaxSteps(nClasses);
    }

    bias.resize(nClasses,getParameters().bias,false,true);
    trainData=new dmatrix(input);
    alpha.resize(nClasses,input.rows(),0,false,true);
    makeTargets(ids);
    errorCache.resize(input.rows());

    const parameters& param=getParameters();

    C=param.C;
    tolerance=param.tolerance;
    epsilon=param.epsilon;
    bool abort=false;

    // train one SVM for each class
    for (int cid=0; cid<nClasses && !abort; cid++) {
      int numChanged=0;
      bool examineAll=true;

      currentTarget=&target->getRow(cid);
      currentClass=cid;
      currentAlpha=&alpha.getRow(cid);

      _lti_debug("Training class " << cid << "\n");

      fillErrorCache();

      while ((numChanged > 0 || examineAll) && !abort) {
        numChanged=0;
        if (examineAll) {
          // iterate over all alphas
          for (int i=0; i<trainData->rows(); i++) {
            if (examineExample(i)) {
              numChanged++;
            }
          }
          // next turn, look only at non-bound alphas
          examineAll=false;
        } else {
          // iterate over all non-0 and non-C alphas
          int *tmpAlpha=new int[alpha.getRow(cid).size()];
          int j=0,i=0;
          for (i=0; i<alpha.getRow(cid).size(); i++) {
            if (alpha.getRow(cid).at(i) != 0.0 &&
                alpha.getRow(cid).at(i) != C) {
              tmpAlpha[j++]=i;
            }
          }
          delete[] tmpAlpha;
          for (i=0; i<j; i++) {
            if (examineExample(i)) {
              numChanged++;
            }
          }
          // next turn, examine all if we did not succeed this time
          if (numChanged == 0) {
            examineAll=true;
          }
        }
      }
      // update progress info object
      if (validProgressObject()) {
        sprintf(buffer,"numChanged=%d, error=%f",numChanged,errorSum);
        getProgressObject().step(buffer);
        abort=abort || getProgressObject().breakRequested();
      }

     // now limit the number of support vectors
      // does not work yet, so disable it
      if (0) {
        int supnum=0;
        ivector index(currentAlpha->size());
        ivector newindex(currentAlpha->size());
        dvector newkey(currentAlpha->size());
        for (int i=0; i<currentAlpha->size(); i++) {
          if (currentAlpha->at(i) > 0) {
            supnum++;
          }
          index[i]=i;
        }
        if (supnum > param.nSupport && param.nSupport > 0) {
          lti::sort2<double> sorter;
          sorter.apply(*currentAlpha,index,newkey,newindex);

          int i;
          for (i=0; i<newkey.size() &&
                 lti::abs(newkey[i]) > std::numeric_limits<double>::epsilon(); i++) {
          }
          for (int j=i; j<currentAlpha->size()-param.nSupport; j++) {
            currentAlpha->at(newindex[j])=0;
          }
          _lti_debug("Final alpha: " << *currentAlpha << std::endl);
        }
      }
    }

    defineOutputTemplate();

    _lti_debug("alpha:\n" << alpha << "\n");

    // make sure that all lagrange multipliers are larger than
    // zero, otherwise we might get into trouble later
    alpha.apply(rectify);

    if (abort) {
      setStatusString("Training aborted by user!");
    }
    return !abort;
  }
Пример #30
0
  // TODO: comment your train method
  bool MLP::train(const dvector& theWeights,
                  const dmatrix& data,
                  const ivector& ids) {

    if (data.empty()) {
      setStatusString("Train data empty");
      return false;
    }


    if (ids.size()!=data.rows()) {
      std::string str;
      str = "dimensionality of IDs vector and the number of rows ";
      str+= "of the input matrix must have the same size.";
      setStatusString(str.c_str());
      return false;
    }


    // tracks the status of the training process.
    // if an error occurs set to false and use setStatusString()
    // however, training should continue, fixing the error as well as possible
    bool b=true;

    // vector with internal ids
    ivector newIds,idsLUT;
    newIds.resize(ids.size(),0,false,false);

    // map to get the internal Id to an external Id;
    std::map<int,int> extToInt;
    std::map<int,int>::iterator it;

    int i,k;
    for (i=0,k=0;i<ids.size();++i) {
      it = extToInt.find(ids.at(i));
      if (it != extToInt.end()) {
        newIds.at(i) = (*it).second;
      } else {
        extToInt[ids.at(i)] = k;
        newIds.at(i) = k;
        ++k;
      }
    }

    idsLUT.resize(extToInt.size());
    for (it=extToInt.begin();it!=extToInt.end();++it) {
      idsLUT.at((*it).second) = (*it).first;
    }

    // initialize the inputs and output units from the given data
    outputs = idsLUT.size();
    inputs  = data.columns();

    const parameters& param = getParameters();

    // display which kind of algorithm is to be used
    if (validProgressObject()) {
      getProgressObject().reset();
      std::string str("MLP: Training using ");
      switch(param.trainingMode) {
        case parameters::ConjugateGradients:
          str += "conjugate gradients";
          break;
        case parameters::SteepestDescent:
          str += "steepest descent";
          break;
        default:
          str += "unnamed method";
      }
      getProgressObject().setTitle(str);
      getProgressObject().setMaxSteps(param.maxNumberOfEpochs+1);
    }

    dvector grad;
    if (&theWeights != &weights) {
      weights.copy(theWeights);
    }

    if (!initWeights(true)) { // keep the weights
      setStatusString("Wrong weights!");
      return false;
    };

    computeErrorNorm(newIds);

    if (param.trainingMode == parameters::ConjugateGradients) {
      b = trainConjugateGradients(data,newIds);
    } else {
      if (param.batchMode) { // batch training mode:
        b = trainSteepestBatch(data,newIds);
      } else { // sequential training mode:
        b = trainSteepestSequential(data,newIds);
      }
    }

    if (validProgressObject()) {
      getProgressObject().step("Training ready.");
    }

    outputTemplate tmpOutTemp(idsLUT);
    setOutputTemplate(tmpOutTemp);

    // create the appropriate outputTemplate
    makeOutputTemplate(outputs,data,ids);

    return b;
  }