void ode_evaluate( CppAD::vector<Float> &x , size_t m , CppAD::vector<Float> &fm ) { typedef CppAD::vector<Float> Vector; size_t n = x.size(); size_t ell; CPPAD_ASSERT_KNOWN( m == 0 || m == 1, "ode_evaluate: m is not zero or one" ); CPPAD_ASSERT_KNOWN( ((m==0) & (fm.size()==n)) || ((m==1) & (fm.size()==n*n)), "ode_evaluate: the size of fm is not correct" ); if( m == 0 ) ell = n; else ell = n + n * n; // set up the case we are integrating Float ti = 0.; Float tf = 1.; Float smin = 1e-5; Float smax = 1.; Float scur = 1.; Float erel = 0.; vector<Float> yi(ell), eabs(ell); size_t i, j; for(i = 0; i < ell; i++) { eabs[i] = 1e-10; if( i < n ) yi[i] = 1.; else yi[i] = 0.; } // return values Vector yf(ell), ef(ell), maxabs(ell); size_t nstep; // construct ode method for taking one step ode_evaluate_method<Float> method(m, x); // solve differential equation yf = OdeErrControl(method, ti, tf, yi, smin, smax, scur, eabs, erel, ef, maxabs, nstep); if( m == 0 ) { for(i = 0; i < n; i++) fm[i] = yf[i]; } else { for(i = 0; i < n; i++) for(j = 0; j < n; j++) fm[i * n + j] = yf[n + i * n + j]; } return; }
void eltestwt (double *x, double *wt, double * mu1,int *Lx1,double *pi,double *lamre) { double mu=mu1[0]; double Lx=Lx1[0]; double allw = summm(wt,Lx); double BU = 0.02*allw/maxabs(x,mu,Lx); double lam0=0.,lo,up; double toldouble[1]={1e-9};/*tolerance used in root searching*/ int MAXITER[1]={1000}; int i=0; if (lamfunC(0,x,mu,wt,allw,Lx) == 0){ lam0 = 0.; } else { if( lamfunC(0,x,mu,wt,allw,Lx) > 0 ) { lo = 0.; up = BU; while(lamfunC(up,x,mu,wt,allw,Lx)>0){ up += BU; } } else { up = 0; lo = - BU; while(lamfunC(lo,x,mu,wt,allw,Lx) < 0 ){ lo = lo - BU; } } lam0 = R_zeroin2surv(lo,up,toldouble,MAXITER,x,mu,wt,allw,Lx); } for (i=0;i<Lx;i++){ pi[i] = wt[i]/(allw + lam0*(x[i]-mu));} lamre[0]=lam0; }
/** normalize vector to max to max;*/ void normalize_max(double *p, long nloc, double max){ if(!nloc) return; double ss=max/maxabs(p,nloc); for(int i=0; i<nloc; i++){ p[i]*=ss; } }
GEODE_ALWAYS_INLINE static inline bool triangle_oriented(const RawField<const Perturbed2,VertexId> X, VertexId v0, VertexId v1, VertexId v2) { const auto x0 = X[v0], x1 = X[v1], x2 = X[v2]; // If we have a nonsentinel triangle, use a normal orientation test if (maxabs(x0.value().x,x1.value().x,x2.value().x)!=bound) return triangle_oriented(x0,x1,x2); // Fall back to sentinel case analysis return triangle_oriented_sentinels(x0,x1,x2); }
//##################################################################### // Function Diagonalize //##################################################################### template<class T,int bandwidth> void BANDED_SYMMETRIC_MATRIX<T,bandwidth>:: Diagonalize() { static const T tolerance=10*std::numeric_limits<T>::epsilon(); int m=Size();if(m<2) return; A(m)[2]=0; // use a_m,m+1 as a sentinel int start=1; for(;;){ // find unreduced section while(!A(start)[2] || abs(A(start)[2])<=tolerance*maxabs(A(start)[1],A(start)[2])){ A(start)[2]=0;if(++start==m) return;} int end=start+1; while(A(end)[2] && abs(A(end)[2])>tolerance*maxabs(A(end)[1],A(end)[2])) end++; A(end)[2]=0; // compute shift T ap=A(end-1)[1],bp=A(end-1)[2],aq=A(end)[1],d=(ap-aq)/2; T shift=d?aq-bp*bp/(d+sign(d)*sqrt(d*d+bp*bp)):aq-bp; Implicit_QR_Step(start,shift);} }
//##################################################################### // Function Print_Spectral_Information //##################################################################### template<class T,int bandwidth> void BANDED_SYMMETRIC_MATRIX<T,bandwidth>:: Print_Spectral_Information() const { #ifndef COMPILE_WITHOUT_READ_WRITE_SUPPORT if(!Size()){ LOG::cout<<"eigenvalue range = null, condition = null"<<std::endl; return;} ARRAY<T> D;Eigenvalues(D); T lambda_min=ARRAYS_COMPUTATIONS::Min(D),lambda_max=ARRAYS_COMPUTATIONS::Max(D); T condition=lambda_min*lambda_max>0?Robust_Divide(maxabs(lambda_min,lambda_max),minabs(lambda_min,lambda_max)):0; LOG::cout<<"eigenvalue range = "<<lambda_min<<" "<<lambda_max<<", condition = "<<condition<<std::endl; Sort(D);LOG::cout<<"eigenvalues = "<<D; #endif }
bool OdeErrMaxabs(void) { bool ok = true; // initial return value CppAD::vector<double> w(2); w[0] = 10.; w[1] = 1.; Method method(w); CppAD::vector<double> xi(2); xi[0] = 1.; xi[1] = 0.; CppAD::vector<double> eabs(2); eabs[0] = 0.; eabs[1] = 0.; CppAD::vector<double> ef(2); CppAD::vector<double> xf(2); CppAD::vector<double> maxabs(2); double ti = 0.; double tf = 1.; double smin = .5; double smax = 1.; double scur = .5; double erel = 1e-4; bool accurate = false; while( ! accurate ) { xf = OdeErrControl(method, ti, tf, xi, smin, smax, scur, eabs, erel, ef, maxabs); accurate = true; size_t i; for(i = 0; i < 2; i++) accurate &= ef[i] <= erel * maxabs[i]; if( ! accurate ) smin = smin / 2; } double x0 = exp(-w[0]*tf); ok &= CppAD::NearEqual(x0, xf[0], erel, 0.); ok &= CppAD::NearEqual(0., ef[0], erel, erel); double x1 = w[0] * (exp(-w[0]*tf) - exp(-w[1]*tf))/(w[1] - w[0]); ok &= CppAD::NearEqual(x1, xf[1], erel, 0.); ok &= CppAD::NearEqual(0., ef[1], erel, erel); return ok; }
bool OdeErrControl_three(void) { bool ok = true; // initial return value double alpha = 10.; Method_three method(alpha); CppAD::vector<double> xi(2); xi[0] = 1.; xi[1] = 0.; CppAD::vector<double> eabs(2); eabs[0] = 1e-4; eabs[1] = 1e-4; // inputs double ti = 0.; double tf = 1.; double smin = 1e-4; double smax = 1.; double scur = 1.; double erel = 0.; // outputs CppAD::vector<double> ef(2); CppAD::vector<double> xf(2); CppAD::vector<double> maxabs(2); size_t nstep; xf = OdeErrControl(method, ti, tf, xi, smin, smax, scur, eabs, erel, ef, maxabs, nstep); double x0 = exp( alpha * tf * tf ); ok &= CppAD::NearEqual(x0, xf[0], 1e-4, 1e-4); ok &= CppAD::NearEqual(0., ef[0], 1e-4, 1e-4); double root_pi = sqrt( 4. * atan(1.)); double root_alpha = sqrt( alpha ); double x1 = CppAD::erf(alpha * tf) * root_pi / (2 * root_alpha); ok &= CppAD::NearEqual(x1, xf[1], 1e-4, 1e-4); ok &= CppAD::NearEqual(0., ef[1], 1e-4, 1e-4); ok &= method.F.was_negative(); return ok; }
Vector OdeErrControl( Method &method, const Scalar &ti , const Scalar &tf , const Vector &xi , const Scalar &smin , const Scalar &smax , Scalar &scur , const Vector &eabs , const Scalar &erel , Vector &ef ) { Vector maxabs(xi.size()); size_t nstep; return OdeErrControl( method, ti, tf, xi, smin, smax, scur, eabs, erel, ef, maxabs, nstep ); }
bool OdeGearControl(void) { bool ok = true; // initial return value CPPAD_TEST_VECTOR<double> w(2); w[0] = 10.; w[1] = 1.; Fun F(w); CPPAD_TEST_VECTOR<double> xi(2); xi[0] = 1.; xi[1] = 0.; CPPAD_TEST_VECTOR<double> eabs(2); eabs[0] = 1e-4; eabs[1] = 1e-4; // return values CPPAD_TEST_VECTOR<double> ef(2); CPPAD_TEST_VECTOR<double> maxabs(2); CPPAD_TEST_VECTOR<double> xf(2); size_t nstep; // input values size_t M = 5; double ti = 0.; double tf = 1.; double smin = 1e-8; double smax = 1.; double sini = 1e-10; double erel = 0.; xf = CppAD::OdeGearControl(F, M, ti, tf, xi, smin, smax, sini, eabs, erel, ef, maxabs, nstep); double x0 = exp(-w[0]*tf); ok &= CppAD::NearEqual(x0, xf[0], 1e-4, 1e-4); ok &= CppAD::NearEqual(0., ef[0], 1e-4, 1e-4); double x1 = w[0] * (exp(-w[0]*tf) - exp(-w[1]*tf))/(w[1] - w[0]); ok &= CppAD::NearEqual(x1, xf[1], 1e-4, 1e-4); ok &= CppAD::NearEqual(0., ef[1], 1e-4, 1e-4); return ok; }
bool OdeErrControl_two(void) { bool ok = true; // initial return value CppAD::vector<double> w(2); w[0] = 10.; w[1] = 1.; Method_two method(w); CppAD::vector<double> xi(2); xi[0] = 1.; xi[1] = 0.; CppAD::vector<double> eabs(2); eabs[0] = 1e-4; eabs[1] = 1e-4; // inputs double ti = 0.; double tf = 1.; double smin = 1e-4; double smax = 1.; double scur = .5; double erel = 0.; // outputs CppAD::vector<double> ef(2); CppAD::vector<double> xf(2); CppAD::vector<double> maxabs(2); size_t nstep; xf = OdeErrControl(method, ti, tf, xi, smin, smax, scur, eabs, erel, ef, maxabs, nstep); double x0 = exp(-w[0]*tf); ok &= CppAD::NearEqual(x0, xf[0], 1e-4, 1e-4); ok &= CppAD::NearEqual(0., ef[0], 1e-4, 1e-4); double x1 = w[0] * (exp(-w[0]*tf) - exp(-w[1]*tf))/(w[1] - w[0]); ok &= CppAD::NearEqual(x1, xf[1], 1e-4, 1e-4); ok &= CppAD::NearEqual(0., ef[1], 1e-4, 1e-4); return ok; }
// Test whether an edge is Delaunay GEODE_ALWAYS_INLINE static inline bool is_delaunay(const TriangleTopology& mesh, RawField<const Perturbed2,VertexId> X, const HalfedgeId edge) { // Boundary edges belong to the sentinel quad and are always Delaunay. const auto rev = mesh.reverse(edge); if (mesh.is_boundary(rev)) return true; // Grab vertices in counterclockwise order const auto v0 = mesh.src(edge), v1 = mesh.src(mesh.prev(rev)), v2 = mesh.dst(edge), v3 = mesh.src(mesh.prev(edge)); const auto x0 = X[v0], x1 = X[v1], x2 = X[v2], x3 = X[v3]; // If we have a nonsentinel interior edge, perform a normal incircle test if (maxabs(x0.value().x,x1.value().x,x2.value().x,x3.value().x)!=bound) return !incircle(x0,x1,x2,x3); // Fall back to sentinel case analysis return is_delaunay_sentinels(x0,x1,x2,x3); }
/* ************************************************************ PROCEDURE cholonBlk - CHOLESKY on a dense diagonal block. Also updates nonzeros below this diagonal block - they need merely be divided by the scalar diagonals "lkk" afterwards. INPUT m - number of rows (length of the first column). ncols - number of columns in the supernode.(n <= m) lb - Length ncols. Skip k-th pivot if drops below lb[k]. ub - max(diag(x)) / maxu^2. No stability check for pivots > ub. maxu - Max. acceptable |lik|/lkk when lkk suffers cancelation. first - global column number of column 0. This is used only to insert the global column numbers into skipIr. UPDATED x - On input, contains the columns of the supernode to be factored. On output, contains the factored columns of the supernode. skipIr - Lists skipped pivots with their global column number in 0:neqns-1. Active range is first:first+ncols-1. Skipped if d(k) suffers cancelation and max(abs(L(:,k)) > maxu. *pnskip - nnz in skip; *pnskip <= order N of sparse matrix. OUTPUT d - Length ncols. Diagonal in L*diag(d)*L' with diag(L)=all-1. ************************************************************ */ void cholonBlk(double *x, double *d, mwIndex m, const mwIndex ncols, const mwIndex first, const double ub, const double maxu, double *lb, mwIndex *skipIr, mwIndex *pnskip) { mwIndex inz,i,k,n,coltail, nskip; double xkk, xik, ubk; double *xi; /* ------------------------------------------------------------ Initialize: ------------------------------------------------------------ */ n = ncols; nskip = *pnskip; inz = 0; coltail = m - ncols; for(k = 0; k < ncols; k++, --m, --n){ /* ------------------------------------------------------- Let xkk = L(k,k), ubk = max(|xik|) / maxu. ------------------------------------------------------- */ xkk = x[inz]; if(xkk > lb[k]){ /* now xkk > 0 */ if(xkk < ub){ ubk = maxabs(x+inz+1,m-1) / maxu; if(xkk < ubk){ /* ------------------------------------------------------------ If we need to add on diagonal, store this in (skipIr, lb(k)). ------------------------------------------------------------ */ skipIr[nskip++] = first + k; lb[k] = ubk - xkk; /* amount added on diagonal */ xkk = ubk; } } /* -------------------------------------------------------------- Set dk = xkk, lkk = 1 (for LDL'). -------------------------------------------------------------- */ d[k] = xkk; /* now d[k] > 0 MEANS NO-SKIPPING */ x[inz] = 1.0; xi = x + inz + m; /* point to next column */ ++inz; /* -------------------------------------------------------------- REGULAR JOB: correct remaining n-k cols with col k. x(k+1:m,k+1:n) -= x(k+1:m,k) * x(k+1:n,k)' / xkk x(k+1:n,k) /= xkk, -------------------------------------------------------------- */ for(i = 1; i < n; i++){ xik = x[inz] / xkk; subscalarmul(xi, xik, x+inz, m-i); x[inz++] = xik; xi += m-i; } inz += coltail; /* Let inz point to next column */ } /* ------------------------------------------------------------ If skipping is enabled and this pivot is too small: 1) don't touch L(k:end,k): allows pivot delaying if desired. 2) List first+k in skipIr. Set dk = 0 (MEANS SKIPPING). -------------------------------------------------------------- */ else{ skipIr[nskip++] = first + k; d[k] = 0.0; /* tag "0": means column skipped in LDL'.*/ inz += m; /* Don't touch nor use L(k:end,k) */ } } /* k=0:ncols-1 */ /* ------------------------------------------------------------ Return updated number of added or skipped pivots. ------------------------------------------------------------ */ *pnskip = nskip; }
Need ompcore(double D[], double x[], double DtX[], double XtX[], double G[], mwSize n, mwSize m, mwSize L, int T, double eps, int gamma_mode, int profile, double msg_delta, int erroromp) { profdata pd; /* mxArray *Gamma;*/ mwIndex i, j, signum, pos, *ind, *gammaIr, *gammaJc, gamma_count; mwSize allocated_coefs, allocated_cols; int DtX_specified, XtX_specified, batchomp, standardomp, *selected_atoms,*times_atoms ; double *alpha, *r, *Lchol, *c, *Gsub, *Dsub, sum, *gammaPr, *tempvec1, *tempvec2; double eps2, resnorm, delta, deltaprev, secs_remain; int mins_remain, hrs_remain; clock_t lastprint_time, starttime; Need my; /*** status flags ***/ DtX_specified = (DtX!=0); /* indicates whether D'*x was provided */ XtX_specified = (XtX!=0); /* indicates whether sum(x.*x) was provided */ standardomp = (G==0); /* batch-omp or standard omp are selected depending on availability of G */ batchomp = !standardomp; /*** allocate output matrix ***/ if (gamma_mode == FULL_GAMMA) { /* allocate full matrix of size m X L */ Gamma = mxCreateDoubleMatrix(m, L, mxREAL); gammaPr = mxGetPr(Gamma); gammaIr = 0; gammaJc = 0; } else { /* allocate sparse matrix with room for allocated_coefs nonzeros */ /* for error-omp, begin with L*sqrt(n)/2 allocated nonzeros, otherwise allocate L*T nonzeros */ allocated_coefs = erroromp ? (mwSize)(ceil(L*sqrt((double)n)/2.0) + 1.01) : L*T; Gamma = mxCreateSparse(m, L, allocated_coefs, mxREAL); gammaPr = mxGetPr(Gamma); gammaIr = mxGetIr(Gamma); gammaJc = mxGetJc(Gamma); gamma_count = 0; gammaJc[0] = 0; } /*** helper arrays ***/ alpha = (double*)mxMalloc(m*sizeof(double)); /* contains D'*residual */ ind = (mwIndex*)mxMalloc(n*sizeof(mwIndex)); /* indices of selected atoms */ selected_atoms = (int*)mxMalloc(m*sizeof(int)); /* binary array with 1's for selected atoms */ times_atoms = (int*)mxMalloc(m*sizeof(int)); c = (double*)mxMalloc(n*sizeof(double)); /* orthogonal projection result */ /* current number of columns in Dsub / Gsub / Lchol */ allocated_cols = erroromp ? (mwSize)(ceil(sqrt((double)n)/2.0) + 1.01) : T; /* Cholesky decomposition of D_I'*D_I */ Lchol = (double*)mxMalloc(n*allocated_cols*sizeof(double)); /* temporary vectors for various computations */ tempvec1 = (double*)mxMalloc(m*sizeof(double)); tempvec2 = (double*)mxMalloc(m*sizeof(double)); if (batchomp) { /* matrix containing G(:,ind) - the columns of G corresponding to the selected atoms, in order of selection */ Gsub = (double*)mxMalloc(m*allocated_cols*sizeof(double)); } else { /* matrix containing D(:,ind) - the selected atoms from D, in order of selection */ Dsub = (double*)mxMalloc(n*allocated_cols*sizeof(double)); /* stores the residual */ r = (double*)mxMalloc(n*sizeof(double)); } if (!DtX_specified) { /* contains D'*x for the current signal */ DtX = (double*)mxMalloc(m*sizeof(double)); } /*** initializations for error omp ***/ if (erroromp) { eps2 = eps*eps; /* compute eps^2 */ if (T<0 || T>n) { /* unspecified max atom num - set max atoms to n */ T = n; } } /*** initialize timers ***/ initprofdata(&pd); /* initialize profiling counters */ starttime = clock(); /* record starting time for eta computations */ lastprint_time = starttime; /* time of last status display */ /********************** perform omp for each signal **********************/ for (signum=0; signum<L; ++signum) { /* initialize residual norm and deltaprev for error-omp */ if (erroromp) { if (XtX_specified) { resnorm = XtX[signum]; } else { resnorm = dotprod(x+n*signum, x+n*signum, n); addproftime(&pd, XtX_TIME); } deltaprev = 0; /* delta tracks the value of gamma'*G*gamma */ } else { /* ignore residual norm stopping criterion */ eps2 = 0; resnorm = 1; } if (resnorm>eps2 && T>0) { /* compute DtX */ if (!DtX_specified) { matT_vec(1, D, x+n*signum, DtX, n, m); addproftime(&pd, DtX_TIME); } /* initialize alpha := DtX */ memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double)); /* mark all atoms as unselected */ for (i=0; i<m; ++i) { selected_atoms[i] = 0; } for (i=0; i<m; ++i) { times_atoms[i] = 0; } } /* main loop */ i=0; while (resnorm>eps2 && i<T) { /* index of next atom */ pos = maxabs(alpha, m); addproftime(&pd, MAXABS_TIME); /* stop criterion: selected same atom twice, or inner product too small */ if (selected_atoms[pos] || alpha[pos]*alpha[pos]<1e-14) { break; } /* mark selected atom */ ind[i] = pos; selected_atoms[pos] = 1; times_atoms[pos]++; /* matrix reallocation */ if (erroromp && i>=allocated_cols) { allocated_cols = (mwSize)(ceil(allocated_cols*MAT_INC_FACTOR) + 1.01); Lchol = (double*)mxRealloc(Lchol,n*allocated_cols*sizeof(double)); batchomp ? (Gsub = (double*)mxRealloc(Gsub,m*allocated_cols*sizeof(double))) : (Dsub = (double*)mxRealloc(Dsub,n*allocated_cols*sizeof(double))) ; } /* append column to Gsub or Dsub */ if (batchomp) { memcpy(Gsub+i*m, G+pos*m, m*sizeof(double)); } else { memcpy(Dsub+i*n, D+pos*n, n*sizeof(double)); } /*** Cholesky update ***/ if (i==0) { *Lchol = 1; } else { /* incremental Cholesky decomposition: compute next row of Lchol */ if (standardomp) { matT_vec(1, Dsub, D+n*pos, tempvec1, n, i); /* compute tempvec1 := Dsub'*d where d is new atom */ addproftime(&pd, DtD_TIME); } else { vec_assign(tempvec1, Gsub+i*m, ind, i); /* extract tempvec1 := Gsub(ind,i) */ } backsubst('L', Lchol, tempvec1, tempvec2, n, i); /* compute tempvec2 = Lchol \ tempvec1 */ for (j=0; j<i; ++j) { /* write tempvec2 to end of Lchol */ Lchol[j*n+i] = tempvec2[j]; } /* compute Lchol(i,i) */ sum = 0; for (j=0; j<i; ++j) { /* compute sum of squares of last row without Lchol(i,i) */ sum += SQR(Lchol[j*n+i]); } if ( (1-sum) <= 1e-14 ) { /* Lchol(i,i) is zero => selected atoms are dependent */ break; } Lchol[i*n+i] = sqrt(1-sum); } addproftime(&pd, LCHOL_TIME); i++; /* perform orthogonal projection and compute sparse coefficients */ vec_assign(tempvec1, DtX + m*signum*DtX_specified, ind, i); /* extract tempvec1 = DtX(ind) */ cholsolve('L', Lchol, tempvec1, c, n, i); /* solve LL'c = tempvec1 for c */ addproftime(&pd, COMPCOEF_TIME); /* update alpha = D'*residual */ if (standardomp) { mat_vec(-1, Dsub, c, r, n, i); /* compute r := -Dsub*c */ vec_sum(1, x+n*signum, r, n); /* compute r := x+r */ /*memcpy(r, x+n*signum, n*sizeof(double)); /* assign r := x */ /*mat_vec1(-1, Dsub, c, 1, r, n, i); /* compute r := r-Dsub*c */ addproftime(&pd, COMPRES_TIME); matT_vec(1, D, r, alpha, n, m); /* compute alpha := D'*r */ addproftime(&pd, DtR_TIME); /* update residual norm */ if (erroromp) { resnorm = dotprod(r, r, n); addproftime(&pd, UPDATE_RESNORM_TIME); } } else { mat_vec(1, Gsub, c, tempvec1, m, i); /* compute tempvec1 := Gsub*c */ memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double)); /* set alpha = D'*x */ vec_sum(-1, tempvec1, alpha, m); /* compute alpha := alpha - tempvec1 */ addproftime(&pd, UPDATE_DtR_TIME); /* update residual norm */ if (erroromp) { vec_assign(tempvec2, tempvec1, ind, i); /* assign tempvec2 := tempvec1(ind) */ delta = dotprod(c,tempvec2,i); /* compute c'*tempvec2 */ resnorm = resnorm - delta + deltaprev; /* residual norm update */ deltaprev = delta; addproftime(&pd, UPDATE_RESNORM_TIME); } } } /*** generate output vector gamma ***/ if (gamma_mode == FULL_GAMMA) { /* write the coefs in c to their correct positions in gamma */ for (j=0; j<i; ++j) { gammaPr[m*signum + ind[j]] = c[j]; } } else { /* sort the coefs by index before writing them to gamma */ quicksort(ind,c,i); addproftime(&pd, INDEXSORT_TIME); /* gamma is full - reallocate */ if (gamma_count+i >= allocated_coefs) { while(gamma_count+i >= allocated_coefs) { allocated_coefs = (mwSize)(ceil(GAMMA_INC_FACTOR*allocated_coefs) + 1.01); } mxSetNzmax(Gamma, allocated_coefs); mxSetPr(Gamma, mxRealloc(gammaPr, allocated_coefs*sizeof(double))); mxSetIr(Gamma, mxRealloc(gammaIr, allocated_coefs*sizeof(mwIndex))); gammaPr = mxGetPr(Gamma); gammaIr = mxGetIr(Gamma); } /* append coefs to gamma and update the indices */ for (j=0; j<i; ++j) { gammaPr[gamma_count] = c[j]; gammaIr[gamma_count] = ind[j]; gamma_count++; } gammaJc[signum+1] = gammaJc[signum] + i; } /*** display status messages ***/ if (msg_delta>0 && (clock()-lastprint_time)/(double)CLOCKS_PER_SEC >= msg_delta) { lastprint_time = clock(); /* estimated remainig time */ secs2hms( ((L-signum-1)/(double)(signum+1)) * ((lastprint_time-starttime)/(double)CLOCKS_PER_SEC) , &hrs_remain, &mins_remain, &secs_remain); mexPrintf("omp: signal %d / %d, estimated remaining time: %02d:%02d:%05.2f\n", signum+1, L, hrs_remain, mins_remain, secs_remain); mexEvalString("drawnow;"); } } /* end omp */ /*** print final messages ***/ if (msg_delta>0) { mexPrintf("omp: signal %d / %d\n", signum, L); } if (profile) { printprofinfo(&pd, erroromp, batchomp, L); } /* free memory */ if (!DtX_specified) { mxFree(DtX); } if (standardomp) { mxFree(r); mxFree(Dsub); } else { mxFree(Gsub); } mxFree(tempvec2); mxFree(tempvec1); mxFree(Lchol); mxFree(c); mxFree(selected_atoms); mxFree(ind); mxFree(alpha); my.qGamma=Gamma; my.qtimes__atoms=times__atoms; /*return Gamma;*/ return my; }
/* MAIN PROGRAM */ int main() { int n,i,it; size_t tape_stats[STAT_SIZE]; /*--------------------------------------------------------------------------*/ /* Input */ fprintf(stdout,"SPEELPENNINGS PRODUCT Type 1 (ADOL-C Example)\n\n"); fprintf(stdout,"number of independent variables = ? \n"); scanf("%d",&n); int itu; fprintf(stdout,"number of evaluations = ? \n"); scanf("%d",&itu); /*--------------------------------------------------------------------------*/ double yp=0.0; /* 0. time (undifferentiated double code) */ double *xp = new double[n]; /* Init */ for (i=0;i<n;i++) xp[i] = (i+1.0)/(2.0+i); double t00 = myclock(1); for (it=0; it<itu; it++) { yp = 1.0; for (i=0; i<n; i++) yp *= xp[i]; } double t01 = myclock(); /*--------------------------------------------------------------------------*/ double yout=0; /* 1. time (tracing ! no keep) */ double t10 = myclock(); trace_on(TAG); adouble* x; x = new adouble[n]; adouble y; y = 1; for (i=0; i<n; i++) { x[i] <<= xp[i]; y *= x[i]; } y >>= yout; delete [] x; trace_off(); double t11 = myclock(); fprintf(stdout,"%E =? %E function values should be the same \n",yout,yp); /*--------------------------------------------------------------------------*/ tapestats(TAG,tape_stats); fprintf(stdout,"\n independents %zu\n",tape_stats[NUM_INDEPENDENTS]); fprintf(stdout," dependents %zu\n",tape_stats[NUM_DEPENDENTS]); fprintf(stdout," operations %zu\n",tape_stats[NUM_OPERATIONS]); fprintf(stdout," operations buffer size %zu\n",tape_stats[OP_BUFFER_SIZE]); fprintf(stdout," locations buffer size %zu\n",tape_stats[LOC_BUFFER_SIZE]); fprintf(stdout," constants buffer size %zu\n",tape_stats[VAL_BUFFER_SIZE]); fprintf(stdout," maxlive %zu\n",tape_stats[NUM_MAX_LIVES]); fprintf(stdout," valstack size %zu\n\n",tape_stats[TAY_STACK_SIZE]); /*--------------------------------------------------------------------------*/ double **r = new double*[1]; r[0] = new double[1]; r[0][0] = yp; double err; double *z = new double[n]; double *g = new double[n]; double* h = new double[n]; double *ind = new double[n]; /*--------------------------------------------------------------------------*/ double t60 = myclock(); /* 6. time (forward no keep) */ for (it=0; it<itu; it++) forward(TAG,1,n,0,xp,*r); double t61 = myclock(); /*--------------------------------------------------------------------------*/ double t20 = myclock(); /* 2. time (forward+keep) */ for (it=0; it<itu; it++) forward(TAG,1,n,1,xp,*r); double t21 = myclock(); /*--------------------------------------------------------------------------*/ double t30 = myclock(); /* 3. time (reverse) */ for (it=0; it<itu; it++) reverse(TAG,1,n,0,1.0,g); double t31 = myclock(); err=0; for (i=0; i<n; i++) // Compare with deleted product { err = maxabs(err,xp[i]*g[i]/r[0][0] - 1.0); ind[i] = xp[i]; } fprintf(stdout,"%E = maximum relative errors in gradient (fw+rv)\n",err); /*--------------------------------------------------------------------------*/ double t40 = myclock(); /* 4. time (gradient) */ for (it=0; it<itu; it++) gradient(TAG,n,ind,z); //last argument lagrange is ommitted double t41 = myclock(); err = 0; for (i=0; i<n; i++) // Compare with previous numerical result err = maxabs(err,g[i]/z[i] - 1.0); fprintf(stdout,"%E = gradient error should be exactly zero \n",err); /*--------------------------------------------------------------------------*/ double *tan = new double[n]; /* 5. time (first row of Hessian) */ for (i=1; i<n; i++) tan[i] = 0.0 ; tan[0]=1.0; double t50 = myclock(); for (it=0; it<itu; it++) hess_vec(TAG,n,ind,tan,h); // Computes Hessian times direction tan. double t51 = myclock(); err = abs(h[0]); for (i=1; i<n; i++) //Compare with doubly deleted product err = maxabs(err,xp[0]*h[i]/g[i]-1.0); fprintf(stdout,"%E = maximum relative error in Hessian column \n",err); /*--------------------------------------------------------------------------*/ double h1n = h[n-1]; /* Check for symmetry */ tan[0]=0; tan[n-1]=1; hess_vec(TAG,n,ind,tan,h); // Computes Hessian times direction tan. fprintf(stdout, "%E = %E (1,n) and (n,1) entry should be the same\n",h1n,h[0]); /*--------------------------------------------------------------------------*/ /* output of results */ if (t01-t00) { double rtu = 1.0/(t01-t00); fprintf(stdout,"\n\n times for "); fprintf(stdout,"\n unitime : \t%E seconds",(t01-t00)/itu); fprintf(stdout,"\n tracing : \t%E",(t11-t10)*rtu*itu); fprintf(stdout," units \t%E seconds",(t11-t10)); fprintf(stdout, "\n----------------------------------------------------------"); fprintf(stdout,"\n forward (no keep): \t%E",(t61-t60)*rtu); fprintf(stdout," units \t%E seconds",(t61-t60)/itu); fprintf(stdout,"\n forward + keep : \t%E",(t21-t20)*rtu); fprintf(stdout," units \t%E seconds",(t21-t20)/itu); fprintf(stdout,"\n reverse : \t%E",(t31-t30)*rtu); fprintf(stdout," units \t%E seconds",(t31-t30)/itu); fprintf(stdout, "\n----------------------------------------------------------"); fprintf(stdout,"\n gradient : \t%E",(t41-t40)*rtu); fprintf(stdout," units \t%E seconds",(t41-t40)/itu); fprintf(stdout,"\n hess*vec : \t%E",(t51-t50)*rtu); fprintf(stdout," units \t%E seconds\n",(t51-t50)/itu); } else fprintf(stdout,"\n-> zero timing due to small problem dimension \n"); return 1; }