예제 #1
1
void ode_evaluate(
	CppAD::vector<Float> &x  , 
	size_t m                 , 
	CppAD::vector<Float> &fm )
{
	typedef CppAD::vector<Float> Vector;

	size_t n = x.size();
	size_t ell;
	CPPAD_ASSERT_KNOWN( m == 0 || m == 1,
		"ode_evaluate: m is not zero or one"
	);
	CPPAD_ASSERT_KNOWN( 
		((m==0) & (fm.size()==n)) || ((m==1) & (fm.size()==n*n)),
		"ode_evaluate: the size of fm is not correct"
	);
	if( m == 0 )
		ell = n;
	else	ell = n + n * n;

	// set up the case we are integrating
	Float  ti   = 0.;
	Float  tf   = 1.;
	Float  smin = 1e-5;
	Float smax  = 1.;
	Float scur  = 1.;
	Float erel  = 0.;
	vector<Float> yi(ell), eabs(ell);
	size_t i, j;
	for(i = 0; i < ell; i++)
	{	eabs[i] = 1e-10;
		if( i < n )
			yi[i] = 1.;
		else	yi[i]  = 0.;
	}

	// return values
	Vector yf(ell), ef(ell), maxabs(ell);
	size_t nstep;

	// construct ode method for taking one step
	ode_evaluate_method<Float> method(m, x);

	// solve differential equation
	yf = OdeErrControl(method, 
		ti, tf, yi, smin, smax, scur, eabs, erel, ef, maxabs, nstep);

	if( m == 0 )
	{	for(i = 0; i < n; i++)
			fm[i] = yf[i];
	}
	else
	{	for(i = 0; i < n; i++)
			for(j = 0; j < n; j++)
				fm[i * n + j] = yf[n + i * n + j];
	}
	return;
}
예제 #2
0
파일: surv2.c 프로젝트: nicole-chen/kmc
void eltestwt (double *x, double *wt, double * mu1,int *Lx1,double *pi,double *lamre) {
double mu=mu1[0];
double Lx=Lx1[0];
double allw = summm(wt,Lx);
double BU = 0.02*allw/maxabs(x,mu,Lx);
double lam0=0.,lo,up;
double toldouble[1]={1e-9};/*tolerance used in root searching*/
int MAXITER[1]={1000};
int i=0;
if (lamfunC(0,x,mu,wt,allw,Lx) == 0){ 
  lam0 = 0.;
} else {
 if( lamfunC(0,x,mu,wt,allw,Lx) > 0 ) {
   lo = 0.;
   up = BU;
    while(lamfunC(up,x,mu,wt,allw,Lx)>0){
         up += BU;
    }
  } else {
    up = 0;
    lo = - BU;
      while(lamfunC(lo,x,mu,wt,allw,Lx) < 0 ){
           lo  = lo - BU;
      }
     }
 lam0 = R_zeroin2surv(lo,up,toldouble,MAXITER,x,mu,wt,allw,Lx);
}

for (i=0;i<Lx;i++){ pi[i] = wt[i]/(allw + lam0*(x[i]-mu));}
lamre[0]=lam0;

}
예제 #3
0
/**
   normalize vector to max to max;*/
void normalize_max(double *p, long nloc, double max){
    if(!nloc) return;
    double ss=max/maxabs(p,nloc);
    for(int i=0; i<nloc; i++){
	p[i]*=ss;
    }
}
예제 #4
0
파일: delaunay.cpp 프로젝트: omco/geode
GEODE_ALWAYS_INLINE static inline bool triangle_oriented(const RawField<const Perturbed2,VertexId> X, VertexId v0, VertexId v1, VertexId v2) {
  const auto x0 = X[v0],
             x1 = X[v1],
             x2 = X[v2];
  // If we have a nonsentinel triangle, use a normal orientation test
  if (maxabs(x0.value().x,x1.value().x,x2.value().x)!=bound)
    return triangle_oriented(x0,x1,x2);
  // Fall back to sentinel case analysis
  return triangle_oriented_sentinels(x0,x1,x2);
}
//#####################################################################
// Function Diagonalize
//#####################################################################
template<class T,int bandwidth> void BANDED_SYMMETRIC_MATRIX<T,bandwidth>::
Diagonalize()
{
    static const T tolerance=10*std::numeric_limits<T>::epsilon();
    int m=Size();if(m<2) return;
    A(m)[2]=0; // use a_m,m+1 as a sentinel
    int start=1;
    for(;;){
        // find unreduced section
        while(!A(start)[2] || abs(A(start)[2])<=tolerance*maxabs(A(start)[1],A(start)[2])){
            A(start)[2]=0;if(++start==m) return;}
        int end=start+1;
        while(A(end)[2] && abs(A(end)[2])>tolerance*maxabs(A(end)[1],A(end)[2])) end++;
        A(end)[2]=0;
        // compute shift
        T ap=A(end-1)[1],bp=A(end-1)[2],aq=A(end)[1],d=(ap-aq)/2;
        T shift=d?aq-bp*bp/(d+sign(d)*sqrt(d*d+bp*bp)):aq-bp;
        Implicit_QR_Step(start,shift);}
}
//#####################################################################
// Function Print_Spectral_Information
//#####################################################################
template<class T,int bandwidth> void BANDED_SYMMETRIC_MATRIX<T,bandwidth>::
Print_Spectral_Information() const
{
#ifndef COMPILE_WITHOUT_READ_WRITE_SUPPORT
    if(!Size()){
        LOG::cout<<"eigenvalue range = null, condition = null"<<std::endl;
        return;}
    ARRAY<T> D;Eigenvalues(D);
    T lambda_min=ARRAYS_COMPUTATIONS::Min(D),lambda_max=ARRAYS_COMPUTATIONS::Max(D);
    T condition=lambda_min*lambda_max>0?Robust_Divide(maxabs(lambda_min,lambda_max),minabs(lambda_min,lambda_max)):0;
    LOG::cout<<"eigenvalue range = "<<lambda_min<<" "<<lambda_max<<", condition = "<<condition<<std::endl;
    Sort(D);LOG::cout<<"eigenvalues = "<<D;
#endif
}
예제 #7
0
bool OdeErrMaxabs(void)
{   bool ok = true;     // initial return value

    CppAD::vector<double> w(2);
    w[0] = 10.;
    w[1] = 1.;
    Method method(w);

    CppAD::vector<double> xi(2);
    xi[0] = 1.;
    xi[1] = 0.;

    CppAD::vector<double> eabs(2);
    eabs[0] = 0.;
    eabs[1] = 0.;

    CppAD::vector<double> ef(2);
    CppAD::vector<double> xf(2);
    CppAD::vector<double> maxabs(2);

    double ti   = 0.;
    double tf   = 1.;
    double smin = .5;
    double smax = 1.;
    double scur = .5;
    double erel = 1e-4;

    bool accurate = false;
    while( ! accurate )
    {   xf = OdeErrControl(method,
                           ti, tf, xi, smin, smax, scur, eabs, erel, ef, maxabs);
        accurate = true;
        size_t i;
        for(i = 0; i < 2; i++)
            accurate &= ef[i] <= erel * maxabs[i];
        if( ! accurate )
            smin = smin / 2;
    }

    double x0 = exp(-w[0]*tf);
    ok &= CppAD::NearEqual(x0, xf[0], erel, 0.);
    ok &= CppAD::NearEqual(0., ef[0], erel, erel);

    double x1 = w[0] * (exp(-w[0]*tf) - exp(-w[1]*tf))/(w[1] - w[0]);
    ok &= CppAD::NearEqual(x1, xf[1], erel, 0.);
    ok &= CppAD::NearEqual(0., ef[1], erel, erel);

    return ok;
}
예제 #8
0
bool OdeErrControl_three(void)
{	bool ok = true;     // initial return value

	double alpha = 10.;
	Method_three method(alpha);

	CppAD::vector<double> xi(2);
	xi[0] = 1.;
	xi[1] = 0.;

	CppAD::vector<double> eabs(2);
	eabs[0] = 1e-4;
	eabs[1] = 1e-4;

	// inputs
	double ti   = 0.;
	double tf   = 1.;
	double smin = 1e-4;
	double smax = 1.;
	double scur = 1.;
	double erel = 0.;

	// outputs
	CppAD::vector<double> ef(2);
	CppAD::vector<double> xf(2);
	CppAD::vector<double> maxabs(2);
	size_t nstep;


	xf = OdeErrControl(method,
		ti, tf, xi, smin, smax, scur, eabs, erel, ef, maxabs, nstep);


	double x0       = exp( alpha * tf * tf );
	ok &= CppAD::NearEqual(x0, xf[0], 1e-4, 1e-4);
	ok &= CppAD::NearEqual(0., ef[0], 1e-4, 1e-4);

	double root_pi    = sqrt( 4. * atan(1.));
	double root_alpha = sqrt( alpha );
	double x1 = CppAD::erf(alpha * tf) * root_pi / (2 * root_alpha);
	ok &= CppAD::NearEqual(x1, xf[1], 1e-4, 1e-4);
	ok &= CppAD::NearEqual(0., ef[1], 1e-4, 1e-4);

	ok &= method.F.was_negative();

	return ok;
}
예제 #9
0
Vector OdeErrControl(
	Method          &method,
	const Scalar    &ti    ,
	const Scalar    &tf    ,
	const Vector    &xi    ,
	const Scalar    &smin  ,
	const Scalar    &smax  ,
	Scalar          &scur  ,
	const Vector    &eabs  ,
	const Scalar    &erel  ,
	Vector          &ef    )
{	Vector maxabs(xi.size());
	size_t nstep;
	return OdeErrControl(
	method, ti, tf, xi, smin, smax, scur, eabs, erel, ef, maxabs, nstep
	);
}
예제 #10
0
bool OdeGearControl(void)
{	bool ok = true;     // initial return value

	CPPAD_TEST_VECTOR<double> w(2);
	w[0] = 10.;
	w[1] = 1.;
	Fun F(w);

	CPPAD_TEST_VECTOR<double> xi(2);
	xi[0] = 1.;
	xi[1] = 0.;

	CPPAD_TEST_VECTOR<double> eabs(2);
	eabs[0] = 1e-4;
	eabs[1] = 1e-4;

	// return values
	CPPAD_TEST_VECTOR<double> ef(2);
	CPPAD_TEST_VECTOR<double> maxabs(2);
	CPPAD_TEST_VECTOR<double> xf(2);
	size_t                nstep;

	// input values
	size_t  M   = 5;
	double ti   = 0.;
	double tf   = 1.;
	double smin = 1e-8;
	double smax = 1.;
	double sini = 1e-10;
	double erel = 0.;
	
	xf = CppAD::OdeGearControl(F, M,
		ti, tf, xi, smin, smax, sini, eabs, erel, ef, maxabs, nstep);

	double x0 = exp(-w[0]*tf);
	ok &= CppAD::NearEqual(x0, xf[0], 1e-4, 1e-4);
	ok &= CppAD::NearEqual(0., ef[0], 1e-4, 1e-4);

	double x1 = w[0] * (exp(-w[0]*tf) - exp(-w[1]*tf))/(w[1] - w[0]);
	ok &= CppAD::NearEqual(x1, xf[1], 1e-4, 1e-4);
	ok &= CppAD::NearEqual(0., ef[1], 1e-4, 1e-4);

	return ok;
}
예제 #11
0
bool OdeErrControl_two(void)
{	bool ok = true;     // initial return value

	CppAD::vector<double> w(2);
	w[0] = 10.;
	w[1] = 1.;
	Method_two method(w);

	CppAD::vector<double> xi(2);
	xi[0] = 1.;
	xi[1] = 0.;

	CppAD::vector<double> eabs(2);
	eabs[0] = 1e-4;
	eabs[1] = 1e-4;

	// inputs
	double ti   = 0.;
	double tf   = 1.;
	double smin = 1e-4;
	double smax = 1.;
	double scur = .5;
	double erel = 0.;

	// outputs
	CppAD::vector<double> ef(2);
	CppAD::vector<double> xf(2);
	CppAD::vector<double> maxabs(2);
	size_t nstep;


	xf = OdeErrControl(method,
		ti, tf, xi, smin, smax, scur, eabs, erel, ef, maxabs, nstep);

	double x0 = exp(-w[0]*tf);
	ok &= CppAD::NearEqual(x0, xf[0], 1e-4, 1e-4);
	ok &= CppAD::NearEqual(0., ef[0], 1e-4, 1e-4);

	double x1 = w[0] * (exp(-w[0]*tf) - exp(-w[1]*tf))/(w[1] - w[0]);
	ok &= CppAD::NearEqual(x1, xf[1], 1e-4, 1e-4);
	ok &= CppAD::NearEqual(0., ef[1], 1e-4, 1e-4);

	return ok;
}
예제 #12
0
파일: delaunay.cpp 프로젝트: omco/geode
// Test whether an edge is Delaunay
GEODE_ALWAYS_INLINE static inline bool is_delaunay(const TriangleTopology& mesh, RawField<const Perturbed2,VertexId> X, const HalfedgeId edge) {
  // Boundary edges belong to the sentinel quad and are always Delaunay.
  const auto rev = mesh.reverse(edge);
  if (mesh.is_boundary(rev))
    return true;
  // Grab vertices in counterclockwise order
  const auto v0 = mesh.src(edge),
             v1 = mesh.src(mesh.prev(rev)),
             v2 = mesh.dst(edge),
             v3 = mesh.src(mesh.prev(edge));
  const auto x0 = X[v0],
             x1 = X[v1],
             x2 = X[v2],
             x3 = X[v3];
  // If we have a nonsentinel interior edge, perform a normal incircle test
  if (maxabs(x0.value().x,x1.value().x,x2.value().x,x3.value().x)!=bound)
    return !incircle(x0,x1,x2,x3);
  // Fall back to sentinel case analysis
  return is_delaunay_sentinels(x0,x1,x2,x3);
}
예제 #13
0
/* ************************************************************
   PROCEDURE cholonBlk - CHOLESKY on a dense diagonal block.
            Also updates nonzeros below this diagonal block -
            they need merely be divided by the scalar diagonals
            "lkk" afterwards.
   INPUT
     m      - number of rows (length of the first column).
     ncols  - number of columns in the supernode.(n <= m)
     lb     - Length ncols. Skip k-th pivot if drops below lb[k].
     ub     - max(diag(x)) / maxu^2. No stability check for pivots > ub.
     maxu   - Max. acceptable |lik|/lkk when lkk suffers cancelation.
     first - global column number of column 0. This is used only to insert
        the global column numbers into skipIr.
   UPDATED
     x  - On input, contains the columns of the supernode to
          be factored. On output, contains the factored columns of
          the supernode.
     skipIr - Lists skipped pivots with their global column number
        in 0:neqns-1. Active range is first:first+ncols-1.
        Skipped if d(k) suffers cancelation and max(abs(L(:,k)) > maxu.
     *pnskip - nnz in skip; *pnskip <= order N of sparse matrix.
   OUTPUT
     d - Length ncols. Diagonal in L*diag(d)*L' with diag(L)=all-1.
   ************************************************************ */
void cholonBlk(double *x, double *d, mwIndex m, const mwIndex ncols, const mwIndex first,
               const double ub, const double maxu, double *lb,
               mwIndex *skipIr, mwIndex *pnskip)
{
  mwIndex inz,i,k,n,coltail, nskip;
  double xkk, xik, ubk;
  double *xi;
/* ------------------------------------------------------------
   Initialize:
   ------------------------------------------------------------ */
  n = ncols;
  nskip = *pnskip;
  inz = 0;
  coltail = m - ncols;
  for(k = 0; k < ncols; k++, --m, --n){
/* -------------------------------------------------------
   Let xkk = L(k,k), ubk = max(|xik|) / maxu.
   ------------------------------------------------------- */
    xkk = x[inz];
    if(xkk > lb[k]){ /* now xkk > 0 */
      if(xkk < ub){
        ubk = maxabs(x+inz+1,m-1) / maxu;
        if(xkk < ubk){
/* ------------------------------------------------------------
   If we need to add on diagonal, store this in (skipIr, lb(k)).
   ------------------------------------------------------------ */
          skipIr[nskip++] = first + k;
	  lb[k] = ubk - xkk;           /* amount added on diagonal */
	  xkk = ubk;
	}
      }
/* --------------------------------------------------------------
   Set dk = xkk, lkk = 1 (for LDL').
   -------------------------------------------------------------- */
      d[k] = xkk;                   /* now d[k] > 0 MEANS NO-SKIPPING */
      x[inz] = 1.0;
      xi = x + inz + m;                 /* point to next column */
      ++inz;
/* --------------------------------------------------------------
   REGULAR JOB: correct remaining n-k cols with col k.
   x(k+1:m,k+1:n) -= x(k+1:m,k) * x(k+1:n,k)' / xkk
   x(k+1:n,k) /= xkk,
   -------------------------------------------------------------- */
      for(i = 1; i < n; i++){
        xik = x[inz] / xkk;
        subscalarmul(xi, xik, x+inz, m-i);
        x[inz++] = xik;
        xi += m-i;
      }
      inz += coltail;                 /* Let inz point to next column */
    }
/* ------------------------------------------------------------
   If skipping is enabled and this pivot is too small:
   1) don't touch L(k:end,k): allows pivot delaying if desired.
   2) List first+k in skipIr. Set dk = 0 (MEANS SKIPPING).
   -------------------------------------------------------------- */
    else{
      skipIr[nskip++] = first + k;
      d[k] = 0.0;                 /* tag "0": means column skipped in LDL'.*/
      inz += m;                   /* Don't touch nor use L(k:end,k) */
    }
  } /* k=0:ncols-1 */
/* ------------------------------------------------------------
   Return updated number of added or skipped pivots.
   ------------------------------------------------------------ */
  *pnskip = nskip;
}
예제 #14
0
Need ompcore(double D[], double x[], double DtX[], double XtX[], double G[], mwSize n, mwSize m, mwSize L,
                 int T, double eps, int gamma_mode, int profile, double msg_delta, int erroromp)
{
  
  profdata pd;
  /* mxArray *Gamma;*/
  mwIndex i, j, signum, pos, *ind, *gammaIr, *gammaJc, gamma_count;
  mwSize allocated_coefs, allocated_cols;
  int DtX_specified, XtX_specified, batchomp, standardomp, *selected_atoms,*times_atoms ;
  double *alpha, *r, *Lchol, *c, *Gsub, *Dsub, sum, *gammaPr, *tempvec1, *tempvec2; 
  double eps2, resnorm, delta, deltaprev, secs_remain;
  int mins_remain, hrs_remain;
  clock_t lastprint_time, starttime;

  Need my;
  
  /*** status flags ***/
  
  DtX_specified = (DtX!=0);   /* indicates whether D'*x was provided */
  XtX_specified = (XtX!=0);   /* indicates whether sum(x.*x) was provided */
  
  standardomp = (G==0);       /* batch-omp or standard omp are selected depending on availability of G */
  batchomp = !standardomp;
  
  
  
  /*** allocate output matrix ***/
  
  
  if (gamma_mode == FULL_GAMMA) {
    
    /* allocate full matrix of size m X L */
    
   Gamma = mxCreateDoubleMatrix(m, L, mxREAL);
    gammaPr = mxGetPr(Gamma);
    gammaIr = 0;
    gammaJc = 0;
  }
  else {
    
    /* allocate sparse matrix with room for allocated_coefs nonzeros */
    
    /* for error-omp, begin with L*sqrt(n)/2 allocated nonzeros, otherwise allocate L*T nonzeros */
    allocated_coefs = erroromp ? (mwSize)(ceil(L*sqrt((double)n)/2.0) + 1.01) : L*T;
    Gamma = mxCreateSparse(m, L, allocated_coefs, mxREAL);
    gammaPr = mxGetPr(Gamma);
    gammaIr = mxGetIr(Gamma);
    gammaJc = mxGetJc(Gamma);
    gamma_count = 0;
    gammaJc[0] = 0;
  }
  
  
  /*** helper arrays ***/
  
  alpha = (double*)mxMalloc(m*sizeof(double));        /* contains D'*residual */
  ind = (mwIndex*)mxMalloc(n*sizeof(mwIndex));        /* indices of selected atoms */
  selected_atoms = (int*)mxMalloc(m*sizeof(int));     /* binary array with 1's for selected atoms */
  times_atoms = (int*)mxMalloc(m*sizeof(int)); 
  c = (double*)mxMalloc(n*sizeof(double));            /* orthogonal projection result */
  
  /* current number of columns in Dsub / Gsub / Lchol */
  allocated_cols = erroromp ? (mwSize)(ceil(sqrt((double)n)/2.0) + 1.01) : T;
  
  /* Cholesky decomposition of D_I'*D_I */
  Lchol = (double*)mxMalloc(n*allocated_cols*sizeof(double));

  /* temporary vectors for various computations */
  tempvec1 = (double*)mxMalloc(m*sizeof(double));
  tempvec2 = (double*)mxMalloc(m*sizeof(double));
  
  if (batchomp) {
    /* matrix containing G(:,ind) - the columns of G corresponding to the selected atoms, in order of selection */
    Gsub = (double*)mxMalloc(m*allocated_cols*sizeof(double));
  }
  else {
    /* matrix containing D(:,ind) - the selected atoms from D, in order of selection */
    Dsub = (double*)mxMalloc(n*allocated_cols*sizeof(double));
    
    /* stores the residual */
    r = (double*)mxMalloc(n*sizeof(double));        
  }
  
  if (!DtX_specified) {
    /* contains D'*x for the current signal */
    DtX = (double*)mxMalloc(m*sizeof(double));  
  }
  
  
  
  /*** initializations for error omp ***/
  
  if (erroromp) {
    eps2 = eps*eps;        /* compute eps^2 */
    if (T<0 || T>n) {      /* unspecified max atom num - set max atoms to n */
      T = n;
    }
  }
  
  
  
  /*** initialize timers ***/
  
  initprofdata(&pd);             /* initialize profiling counters */
  starttime = clock();           /* record starting time for eta computations */
  lastprint_time = starttime;    /* time of last status display */
  
  
  
  /**********************   perform omp for each signal   **********************/
  
  
  
  for (signum=0; signum<L; ++signum) {
    
    
    /* initialize residual norm and deltaprev for error-omp */
    
    if (erroromp) {
      if (XtX_specified) {
        resnorm = XtX[signum];
      }
      else {
        resnorm = dotprod(x+n*signum, x+n*signum, n);
        addproftime(&pd, XtX_TIME);
      }
      deltaprev = 0;     /* delta tracks the value of gamma'*G*gamma */
    }
    else {
      /* ignore residual norm stopping criterion */
      eps2 = 0;
      resnorm = 1;
    }
    
    
    if (resnorm>eps2 && T>0) {
      
      /* compute DtX */
      
      if (!DtX_specified) {
        matT_vec(1, D, x+n*signum, DtX, n, m);
        addproftime(&pd, DtX_TIME);
      }
      
      
      /* initialize alpha := DtX */
      
      memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double));
      
      
      /* mark all atoms as unselected */
      
      for (i=0; i<m; ++i) {
        selected_atoms[i] = 0;
      }
	   for (i=0; i<m; ++i) {
        times_atoms[i] = 0;
      }
      
    }
    

    /* main loop */
    
    i=0;
    while (resnorm>eps2 && i<T) {

      /* index of next atom */
      
      pos = maxabs(alpha, m);
      addproftime(&pd, MAXABS_TIME);
      
      
      /* stop criterion: selected same atom twice, or inner product too small */
      
      if (selected_atoms[pos] || alpha[pos]*alpha[pos]<1e-14) {
        break;
      }
      
      
      /* mark selected atom */
      
      ind[i] = pos;
      selected_atoms[pos] = 1;
	  times_atoms[pos]++;
      
      
      /* matrix reallocation */
      
      if (erroromp && i>=allocated_cols) {
        
        allocated_cols = (mwSize)(ceil(allocated_cols*MAT_INC_FACTOR) + 1.01);
        
        Lchol = (double*)mxRealloc(Lchol,n*allocated_cols*sizeof(double));
        
        batchomp ? (Gsub = (double*)mxRealloc(Gsub,m*allocated_cols*sizeof(double))) :
                   (Dsub = (double*)mxRealloc(Dsub,n*allocated_cols*sizeof(double))) ;
      }
      
      
      /* append column to Gsub or Dsub */
      
      if (batchomp) {
        memcpy(Gsub+i*m, G+pos*m, m*sizeof(double));
      }
      else {
        memcpy(Dsub+i*n, D+pos*n, n*sizeof(double));
      }
      
      
      /*** Cholesky update ***/
      
      if (i==0) {
        *Lchol = 1;
      }
      else {
        
        /* incremental Cholesky decomposition: compute next row of Lchol */
        
        if (standardomp) {
          matT_vec(1, Dsub, D+n*pos, tempvec1, n, i);      /* compute tempvec1 := Dsub'*d where d is new atom */
          addproftime(&pd, DtD_TIME);
        }
        else {
          vec_assign(tempvec1, Gsub+i*m, ind, i);          /* extract tempvec1 := Gsub(ind,i) */
        }
        backsubst('L', Lchol, tempvec1, tempvec2, n, i);   /* compute tempvec2 = Lchol \ tempvec1 */
        for (j=0; j<i; ++j) {                              /* write tempvec2 to end of Lchol */
          Lchol[j*n+i] = tempvec2[j];
        }
        
        /* compute Lchol(i,i) */
        sum = 0;
        for (j=0; j<i; ++j) {         /* compute sum of squares of last row without Lchol(i,i) */
          sum += SQR(Lchol[j*n+i]);
        }
        if ( (1-sum) <= 1e-14 ) {     /* Lchol(i,i) is zero => selected atoms are dependent */
          break;
        }
        Lchol[i*n+i] = sqrt(1-sum);
      }
      
      addproftime(&pd, LCHOL_TIME);

      i++;
      
      
      /* perform orthogonal projection and compute sparse coefficients */
      
      vec_assign(tempvec1, DtX + m*signum*DtX_specified, ind, i);   /* extract tempvec1 = DtX(ind) */
      cholsolve('L', Lchol, tempvec1, c, n, i);                     /* solve LL'c = tempvec1 for c */
      addproftime(&pd, COMPCOEF_TIME);
      

      /* update alpha = D'*residual */
      
      if (standardomp) {
        mat_vec(-1, Dsub, c, r, n, i);             /* compute r := -Dsub*c */
        vec_sum(1, x+n*signum, r, n);              /* compute r := x+r */
        
        
        /*memcpy(r, x+n*signum, n*sizeof(double));   /* assign r := x */
        /*mat_vec1(-1, Dsub, c, 1, r, n, i);         /* compute r := r-Dsub*c */
        
        addproftime(&pd, COMPRES_TIME);
        matT_vec(1, D, r, alpha, n, m);            /* compute alpha := D'*r */
        addproftime(&pd, DtR_TIME);
        
        /* update residual norm */
        if (erroromp) {
          resnorm = dotprod(r, r, n);
          addproftime(&pd, UPDATE_RESNORM_TIME);
        }
      }
      else {
        mat_vec(1, Gsub, c, tempvec1, m, i);                              /* compute tempvec1 := Gsub*c */
        memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double));    /* set alpha = D'*x */
        vec_sum(-1, tempvec1, alpha, m);                                  /* compute alpha := alpha - tempvec1 */
        addproftime(&pd, UPDATE_DtR_TIME);
        
        /* update residual norm */
        if (erroromp) {
          vec_assign(tempvec2, tempvec1, ind, i);      /* assign tempvec2 := tempvec1(ind) */
          delta = dotprod(c,tempvec2,i);               /* compute c'*tempvec2 */
          resnorm = resnorm - delta + deltaprev;       /* residual norm update */
          deltaprev = delta;
          addproftime(&pd, UPDATE_RESNORM_TIME);
        }
      }
    }
    
    
    /*** generate output vector gamma ***/

    if (gamma_mode == FULL_GAMMA) {    /* write the coefs in c to their correct positions in gamma */
      for (j=0; j<i; ++j) {
        gammaPr[m*signum + ind[j]] = c[j];
      }
    }
    else {
      /* sort the coefs by index before writing them to gamma */
      quicksort(ind,c,i);
      addproftime(&pd, INDEXSORT_TIME);
      
      /* gamma is full - reallocate */
      if (gamma_count+i >= allocated_coefs) {
        
        while(gamma_count+i >= allocated_coefs) {
          allocated_coefs = (mwSize)(ceil(GAMMA_INC_FACTOR*allocated_coefs) + 1.01);
        }
        
        mxSetNzmax(Gamma, allocated_coefs);
        mxSetPr(Gamma, mxRealloc(gammaPr, allocated_coefs*sizeof(double)));
        mxSetIr(Gamma, mxRealloc(gammaIr, allocated_coefs*sizeof(mwIndex)));
        
        gammaPr = mxGetPr(Gamma);
        gammaIr = mxGetIr(Gamma);
      }
      
      /* append coefs to gamma and update the indices */
      for (j=0; j<i; ++j) {
        gammaPr[gamma_count] = c[j];
        gammaIr[gamma_count] = ind[j];
        gamma_count++;
      }
      gammaJc[signum+1] = gammaJc[signum] + i;
    }
    
    
    
    /*** display status messages ***/
    
    if (msg_delta>0 && (clock()-lastprint_time)/(double)CLOCKS_PER_SEC >= msg_delta)
    {
      lastprint_time = clock();
      
      /* estimated remainig time */
      secs2hms( ((L-signum-1)/(double)(signum+1)) * ((lastprint_time-starttime)/(double)CLOCKS_PER_SEC) ,
        &hrs_remain, &mins_remain, &secs_remain);
      
      mexPrintf("omp: signal %d / %d, estimated remaining time: %02d:%02d:%05.2f\n",        
        signum+1, L, hrs_remain, mins_remain, secs_remain);
      mexEvalString("drawnow;");
    }
    
  }
  
  /* end omp */
  
  
  
  /*** print final messages ***/
  
  if (msg_delta>0) {
    mexPrintf("omp: signal %d / %d\n", signum, L);
  }
  
  if (profile) {
    printprofinfo(&pd, erroromp, batchomp, L);
  }
  
  
  
  /* free memory */
  
  if (!DtX_specified) {
    mxFree(DtX);
  }
  if (standardomp) {
    mxFree(r);
    mxFree(Dsub);
  }
  else {
    mxFree(Gsub);
  }  
  mxFree(tempvec2);
  mxFree(tempvec1);
  mxFree(Lchol);
  mxFree(c);
  mxFree(selected_atoms);
  mxFree(ind);
  mxFree(alpha);

  my.qGamma=Gamma;
  my.qtimes__atoms=times__atoms;
  
  /*return Gamma;*/
  return  my;
 
}
예제 #15
0
/*                                                             MAIN PROGRAM */
int main() {
    int n,i,it;
    size_t tape_stats[STAT_SIZE];

    /*--------------------------------------------------------------------------*/
    /* Input */
    fprintf(stdout,"SPEELPENNINGS PRODUCT Type 1 (ADOL-C Example)\n\n");
    fprintf(stdout,"number of independent variables = ?  \n");
    scanf("%d",&n);
    int itu;
    fprintf(stdout,"number of evaluations = ?  \n");
    scanf("%d",&itu);

    /*--------------------------------------------------------------------------*/
    double yp=0.0;                 /* 0. time  (undifferentiated double code) */
    double *xp = new double[n];
    /* Init */
    for (i=0;i<n;i++)
        xp[i] = (i+1.0)/(2.0+i);

    double t00 = myclock(1);
    for (it=0; it<itu; it++) {
        yp = 1.0;
        for (i=0; i<n; i++)
            yp *= xp[i];
    }
    double t01 = myclock();

    /*--------------------------------------------------------------------------*/
    double yout=0;                             /* 1. time (tracing ! no keep) */

    double t10 = myclock();
    trace_on(TAG);
    adouble* x;
    x = new adouble[n];
    adouble y;
    y = 1;
    for (i=0; i<n; i++) {
        x[i] <<= xp[i];
        y *= x[i];
    }
    y >>= yout;
    delete [] x;
    trace_off();
    double t11 = myclock();

    fprintf(stdout,"%E =? %E  function values should be the same \n",yout,yp);

    /*--------------------------------------------------------------------------*/
    tapestats(TAG,tape_stats);

    fprintf(stdout,"\n    independents            %zu\n",tape_stats[NUM_INDEPENDENTS]);
    fprintf(stdout,"    dependents              %zu\n",tape_stats[NUM_DEPENDENTS]);
    fprintf(stdout,"    operations              %zu\n",tape_stats[NUM_OPERATIONS]);
    fprintf(stdout,"    operations buffer size  %zu\n",tape_stats[OP_BUFFER_SIZE]);
    fprintf(stdout,"    locations buffer size   %zu\n",tape_stats[LOC_BUFFER_SIZE]);
    fprintf(stdout,"    constants buffer size   %zu\n",tape_stats[VAL_BUFFER_SIZE]);
    fprintf(stdout,"    maxlive                 %zu\n",tape_stats[NUM_MAX_LIVES]);
    fprintf(stdout,"    valstack size           %zu\n\n",tape_stats[TAY_STACK_SIZE]);

    /*--------------------------------------------------------------------------*/
    double **r = new double*[1];
    r[0] = new double[1];
    r[0][0] = yp;
    double err;
    double *z = new double[n];
    double *g = new double[n];
    double* h = new double[n];
    double *ind = new double[n];

    /*--------------------------------------------------------------------------*/
    double t60 = myclock();                      /* 6. time (forward no keep) */
    for (it=0; it<itu; it++)
        forward(TAG,1,n,0,xp,*r);
    double t61 = myclock();

    /*--------------------------------------------------------------------------*/
    double t20 = myclock();                         /* 2. time (forward+keep) */
    for (it=0; it<itu; it++)
        forward(TAG,1,n,1,xp,*r);
    double t21 = myclock();

    /*--------------------------------------------------------------------------*/
    double t30 = myclock();                              /* 3. time (reverse) */
    for (it=0; it<itu; it++)
        reverse(TAG,1,n,0,1.0,g);
    double t31 = myclock();

    err=0;
    for (i=0; i<n; i++) // Compare with deleted product
    { err = maxabs(err,xp[i]*g[i]/r[0][0] - 1.0);
        ind[i] = xp[i];
    }

    fprintf(stdout,"%E = maximum relative errors in gradient (fw+rv)\n",err);

    /*--------------------------------------------------------------------------*/
    double t40 = myclock();                             /* 4. time (gradient) */
    for (it=0; it<itu; it++)
        gradient(TAG,n,ind,z);  //last argument lagrange is ommitted
    double t41 = myclock();

    err = 0;
    for (i=0; i<n; i++)  // Compare with previous numerical result
        err =  maxabs(err,g[i]/z[i] - 1.0);

    fprintf(stdout,"%E = gradient error should be exactly zero \n",err);

    /*--------------------------------------------------------------------------*/
    double *tan = new double[n];            /* 5. time (first row of Hessian) */
    for (i=1; i<n; i++)
        tan[i] = 0.0 ;
    tan[0]=1.0;

    double t50 = myclock();
    for (it=0; it<itu; it++)
        hess_vec(TAG,n,ind,tan,h);  // Computes Hessian times direction tan.
    double t51 = myclock();

    err = abs(h[0]);
    for (i=1; i<n; i++) //Compare with doubly deleted product
        err = maxabs(err,xp[0]*h[i]/g[i]-1.0);

    fprintf(stdout,"%E = maximum relative error in Hessian column \n",err);

    /*--------------------------------------------------------------------------*/
    double h1n = h[n-1];                                /* Check for symmetry */
    tan[0]=0;
    tan[n-1]=1;
    hess_vec(TAG,n,ind,tan,h);   // Computes Hessian times direction tan.

    fprintf(stdout,
            "%E = %E (1,n) and (n,1) entry should be the same\n",h1n,h[0]);

    /*--------------------------------------------------------------------------*/
    /* output of results */
    if (t01-t00) {
        double rtu = 1.0/(t01-t00);
        fprintf(stdout,"\n\n times for ");
        fprintf(stdout,"\n unitime          : \t%E  seconds",(t01-t00)/itu);
        fprintf(stdout,"\n tracing          : \t%E",(t11-t10)*rtu*itu);
        fprintf(stdout,"   units \t%E seconds",(t11-t10));
        fprintf(stdout,
                "\n----------------------------------------------------------");
        fprintf(stdout,"\n forward (no keep): \t%E",(t61-t60)*rtu);
        fprintf(stdout,"   units \t%E seconds",(t61-t60)/itu);
        fprintf(stdout,"\n forward + keep   : \t%E",(t21-t20)*rtu);
        fprintf(stdout,"   units \t%E seconds",(t21-t20)/itu);
        fprintf(stdout,"\n reverse          : \t%E",(t31-t30)*rtu);
        fprintf(stdout,"   units \t%E seconds",(t31-t30)/itu);
        fprintf(stdout,
                "\n----------------------------------------------------------");
        fprintf(stdout,"\n gradient         : \t%E",(t41-t40)*rtu);
        fprintf(stdout,"   units \t%E seconds",(t41-t40)/itu);
        fprintf(stdout,"\n hess*vec         : \t%E",(t51-t50)*rtu);
        fprintf(stdout,"   units \t%E seconds\n",(t51-t50)/itu);
    } else
        fprintf(stdout,"\n-> zero timing due to small problem dimension \n");

    return 1;
}