/* Absolute value versions of the above */ static int32_t MaxAbsAccumulate(CSOUND *csound, MINMAXACCUM *p) { IGN(csound); uint32_t offset = p->h.insdshead->ksmps_offset; uint32_t early = p->h.insdshead->ksmps_no_end; uint32_t n, nsmps = CS_KSMPS; MYFLT *out = p->accum; MYFLT *in = p->ain; MYFLT inabs; if (UNLIKELY(offset)) memset(out, '\0', offset*sizeof(MYFLT)); if (UNLIKELY(early)) { nsmps -= early; memset(&out[nsmps], '\0', early*sizeof(MYFLT)); } for (n=offset; n<nsmps; n++) { inabs = FABS(in[n]); if (UNLIKELY(inabs > out[n])) out[n] = inabs; } return OK; }
// input : current spectrum in the form of power *spec and phase *phase, // the last two earlier spectrums are at position // 512 and 1024 of the corresponding Input-Arrays. // Array *vocal, which can mark an FFT_Linie as harmonic // output: current amplitude *amp and unpredictability *cw static void CalcUnpred (PsyModel* m, const int MaxLine, const float* spec, const float* phase, const int* vocal, float* amp0, float* phs0, float* cw ) { int n; float amp; float tmp; #define amp1 ((amp0) + 512) // amp[ 512...1023] contains data of frame-1 #define amp2 ((amp0) + 1024) // amp[1024...1535] contains data of frame-2 #define phs1 ((phs0) + 512) // phs[ 512...1023] contains data of frame-1 #define phs2 ((phs0) + 1024) // phs[1024...1535] contains data of frame-2 for ( n = 0; n < MaxLine; n++ ) { tmp = COSF ((phs0[n] = phase[n]) - 2*phs1[n] + phs2[n]); // copy phase to output-array, predict phase and calculate predictive error amp0[n] = SQRTF (spec[n]); // calculate and set amplitude amp = 2*amp1[n] - amp2[n]; // predict amplitude // calculate unpredictability cw[n] = SQRTF (spec[n] + amp * (amp - 2*amp0[n] * tmp)) / (amp0[n] + FABS(amp)); } // postprocessing of harmonic FFT-lines (*cw is set to CVD_UNPRED) if ( m->CVD_used && vocal != NULL ) { for ( n = 0; n < MAX_CVD_LINE; n++, cw++, vocal++ ) if ( *vocal != 0 && *cw > CVD_UNPRED * 0.01 * *vocal ) *cw = CVD_UNPRED * 0.01 * *vocal; } return; }
/* central finite difference approximation to the jacobian of func */ void FDIF_CENT_JAC_APPROX( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* function to differentiate */ LM_REAL *p, /* I: current parameter estimate, mx1 */ LM_REAL *hxm, /* W/O: work array for evaluating func(p-delta), nx1 */ LM_REAL *hxp, /* W/O: work array for evaluating func(p+delta), nx1 */ LM_REAL delta, /* increment for computing the jacobian */ LM_REAL *jac, /* O: array for storing approximated jacobian, nxm */ int m, int n, void *adata) { register int i, j; LM_REAL tmp; register LM_REAL d; for(j=0; j<m; ++j){ /* determine d=max(1E-04*|p[j]|, delta), see HZ */ d=CNST(1E-04)*p[j]; // force evaluation d=FABS(d); if(d<delta) d=delta; tmp=p[j]; p[j]-=d; (*func)(p, hxm, m, n, adata); p[j]=tmp+d; (*func)(p, hxp, m, n, adata); p[j]=tmp; /* restore */ d=CNST(0.5)/d; /* invert so that divisions can be carried out faster as multiplications */ for(i=0; i<n; ++i){ jac[i*m+j]=(hxp[i]-hxm[i])*d; } } }
static int get_absinsno(CSOUND *csound, TRIGINSTR *p, int stringname) { int insno; /* Get absolute instr num */ /* IV - Oct 31 2002: allow string argument for named instruments */ if (stringname) insno = (int)strarg2insno_p(csound, ((STRINGDAT*)p->args[0])->data); else if (ISSTRCOD(*p->args[0])) { char *ss = get_arg_string(csound, *p->args[0]); insno = (int)strarg2insno_p(csound, ss); } else insno = (int)FABS(*p->args[0]); /* Check that instrument is defined */ if (UNLIKELY(insno < 1 || insno > csound->engineState.maxinsno || csound->engineState.instrtxtp[insno] == NULL)) { csound->Warning(csound, Str("schedkwhen ignored. " "Instrument %d undefined\n"), insno); csound->perferrcnt++; return -1; } return insno; }
static double evaluateGTRGAMMAPROT (int *wptr, double *x1, double *x2, double *tipVector, unsigned char *tipX1, int n, double *diagptable) { double sum = 0.0, term; int i, j, l; double *left, *right; if(tipX1) { for (i = 0; i < n; i++) { __m128d tv = _mm_setzero_pd(); left = &(tipVector[20 * tipX1[i]]); for(j = 0, term = 0.0; j < 4; j++) { double *d = &diagptable[j * 20]; right = &(x2[80 * i + 20 * j]); for(l = 0; l < 20; l+=2) { __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l])); tv = _mm_add_pd(tv, _mm_mul_pd(mul, _mm_load_pd(&d[l]))); } } tv = _mm_hadd_pd(tv, tv); _mm_storel_pd(&term, tv); term = LOG(0.25 * FABS(term)); sum += wptr[i] * term; } } else { for (i = 0; i < n; i++) { __m128d tv = _mm_setzero_pd(); for(j = 0, term = 0.0; j < 4; j++) { double *d = &diagptable[j * 20]; left = &(x1[80 * i + 20 * j]); right = &(x2[80 * i + 20 * j]); for(l = 0; l < 20; l+=2) { __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l])); tv = _mm_add_pd(tv, _mm_mul_pd(mul, _mm_load_pd(&d[l]))); } } tv = _mm_hadd_pd(tv, tv); _mm_storel_pd(&term, tv); term = LOG(0.25 * FABS(term)); sum += wptr[i] * term; } } return sum; }
static double evaluateCAT_FLEX (int *cptr, int *wptr, double *x1, double *x2, double *tipVector, unsigned char *tipX1, int n, double *diagptable_start, const int states) { double sum = 0.0, term, *diagptable, *left, *right; int i, l; /* chosing between tip vectors and non tip vectors is identical in all flavors of this function ,regardless of whether we are using CAT, GAMMA, DNA or protein data etc */ if(tipX1) { for (i = 0; i < n; i++) { /* same as in the GAMMA implementation */ left = &(tipVector[states * tipX1[i]]); right = &(x2[states * i]); /* important difference here, we do not have, as for GAMMA 4 P matrices assigned to each site, but just one. However those P-Matrices can be different for the sites. Hence we index into the precalculated P-matrices for individual sites via the category pointer cptr[i] */ diagptable = &diagptable_start[states * cptr[i]]; /* similar to gamma, with the only difference that we do not integrate (sum) over the discrete gamma rates, but simply compute the likelihood of the site and the given P-matrix */ for(l = 0, term = 0.0; l < states; l++) term += left[l] * right[l] * diagptable[l]; /* take the log */ term = LOG(FABS(term)); /* multiply the log with the pattern weight of this site. The site pattern for which we just computed the likelihood may represent several alignment columns sites that have been compressed into one site pattern if they are exactly identical AND evolve under the same model, i.e., form part of the same partition. */ sum += wptr[i] * term; } } else { for (i = 0; i < n; i++) { /* as before we now access the likelihood arrayes of two inner nodes */ left = &x1[states * i]; right = &x2[states * i]; diagptable = &diagptable_start[states * cptr[i]]; for(l = 0, term = 0.0; l < states; l++) term += left[l] * right[l] * diagptable[l]; term = LOG(FABS(term)); sum += wptr[i] * term; } } return sum; }
static double evaluateGTRCATPROT_SAVE (int *cptr, int *wptr, double *x1, double *x2, double *tipVector, unsigned char *tipX1, int n, double *diagptable_start, double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap) { double sum = 0.0, term, *diagptable, *left, *right, *left_ptr = x1, *right_ptr = x2; int i, l; if(tipX1) { for (i = 0; i < n; i++) { left = &(tipVector[20 * tipX1[i]]); if(isGap(x2_gap, i)) right = x2_gapColumn; else { right = right_ptr; right_ptr += 20; } diagptable = &diagptable_start[20 * cptr[i]]; __m128d tv = _mm_setzero_pd(); for(l = 0; l < 20; l+=2) { __m128d lv = _mm_load_pd(&left[l]); __m128d rv = _mm_load_pd(&right[l]); __m128d mul = _mm_mul_pd(lv, rv); __m128d dv = _mm_load_pd(&diagptable[l]); tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv)); } tv = _mm_hadd_pd(tv, tv); _mm_storel_pd(&term, tv); term = LOG(FABS(term)); sum += wptr[i] * term; } } else { for (i = 0; i < n; i++) { if(isGap(x1_gap, i)) left = x1_gapColumn; else { left = left_ptr; left_ptr += 20; } if(isGap(x2_gap, i)) right = x2_gapColumn; else { right = right_ptr; right_ptr += 20; } diagptable = &diagptable_start[20 * cptr[i]]; __m128d tv = _mm_setzero_pd(); for(l = 0; l < 20; l+=2) { __m128d lv = _mm_load_pd(&left[l]); __m128d rv = _mm_load_pd(&right[l]); __m128d mul = _mm_mul_pd(lv, rv); __m128d dv = _mm_load_pd(&diagptable[l]); tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv)); } tv = _mm_hadd_pd(tv, tv); _mm_storel_pd(&term, tv); term = LOG(FABS(term)); sum += wptr[i] * term; } } return sum; }
double attribute_hidden pnchisq_raw(double x, double f, double theta, double errmax, double reltol, int itrmax, Rboolean lower_tail) { double lam, x2, f2, term, bound, f_x_2n, f_2n; double l_lam = -1., l_x = -1.; /* initialized for -Wall */ int n; Rboolean lamSml, tSml, is_r, is_b, is_it; LDOUBLE ans, u, v, t, lt, lu =-1; static const double _dbl_min_exp = M_LN2 * DBL_MIN_EXP; /*= -708.3964 for IEEE double precision */ if (x <= 0.) { if(x == 0. && f == 0.) return lower_tail ? exp(-0.5*theta) : -expm1(-0.5*theta); /* x < 0 or {x==0, f > 0} */ return lower_tail ? 0. : 1.; } if(!R_FINITE(x)) return lower_tail ? 1. : 0.; /* This is principally for use from qnchisq */ #ifndef MATHLIB_STANDALONE R_CheckUserInterrupt(); #endif if(theta < 80) { /* use 110 for Inf, as ppois(110, 80/2, lower.tail=FALSE) is 2e-20 */ LDOUBLE sum = 0, sum2 = 0, lambda = 0.5*theta, pr = EXP(-lambda); // does this need a feature test? double ans; int i; /* we need to renormalize here: the result could be very close to 1 */ for(i = 0; i < 110; pr *= lambda/++i) { sum2 += pr; sum += pr * pchisq(x, f+2*i, lower_tail, FALSE); if (sum2 >= 1-1e-15) break; } ans = (double) (sum/sum2); return ans; } #ifdef DEBUG_pnch REprintf("pnchisq(x=%g, f=%g, theta=%g): ",x,f,theta); #endif lam = .5 * theta; lamSml = (-lam < _dbl_min_exp); if(lamSml) { /* MATHLIB_ERROR( "non centrality parameter (= %g) too large for current algorithm", theta) */ u = 0; lu = -lam;/* == ln(u) */ l_lam = log(lam); } else { u = exp(-lam); } /* evaluate the first term */ v = u; x2 = .5 * x; f2 = .5 * f; f_x_2n = f - x; #ifdef DEBUG_pnch REprintf("-- v=exp(-th/2)=%g, x/2= %g, f/2= %g\n",v,x2,f2); #endif if(f2 * DBL_EPSILON > 0.125 && /* very large f and x ~= f: probably needs */ FABS(t = x2 - f2) < /* another algorithm anyway */ sqrt(DBL_EPSILON) * f2) { /* evade cancellation error */ /* t = exp((1 - t)*(2 - t/(f2 + 1))) / sqrt(2*M_PI*(f2 + 1));*/ lt = (1 - t)*(2 - t/(f2 + 1)) - 0.5 * log(2*M_PI*(f2 + 1)); #ifdef DEBUG_pnch REprintf(" (case I) ==> "); #endif } else { /* Usual case 2: careful not to overflow .. : */ lt = f2*log(x2) -x2 - lgammafn(f2 + 1); } #ifdef DEBUG_pnch REprintf(" lt= %g", lt); #endif tSml = (lt < _dbl_min_exp); if(tSml) { if (x > f + theta + 5* sqrt( 2*(f + 2*theta))) { /* x > E[X] + 5* sigma(X) */ return lower_tail ? 1. : 0.; /* FIXME: We could be more accurate than 0. */ } /* else */ l_x = log(x); ans = term = 0.; t = 0; } else { t = EXP(lt); #ifdef DEBUG_pnch REprintf(", t=exp(lt)= %g\n", t); #endif ans = term = (double) (v * t); } for (n = 1, f_2n = f + 2., f_x_2n += 2.; ; n++, f_2n += 2, f_x_2n += 2) { #ifdef DEBUG_pnch REprintf("\n _OL_: n=%d",n); #endif #ifndef MATHLIB_STANDALONE if(n % 1000) R_CheckUserInterrupt(); #endif /* f_2n === f + 2*n * f_x_2n === f - x + 2*n > 0 <==> (f+2n) > x */ if (f_x_2n > 0) { /* find the error bound and check for convergence */ bound = (double) (t * x / f_x_2n); #ifdef DEBUG_pnch REprintf("\n L10: n=%d; term= %g; bound= %g",n,term,bound); #endif is_r = is_it = FALSE; /* convergence only if BOTH absolute and relative error < 'bnd' */ if (((is_b = (bound <= errmax)) && (is_r = (term <= reltol * ans))) || (is_it = (n > itrmax))) { #ifdef DEBUG_pnch REprintf("BREAK n=%d %s; bound= %g %s, rel.err= %g %s\n", n, (is_it ? "> itrmax" : ""), bound, (is_b ? "<= errmax" : ""), term/ans, (is_r ? "<= reltol" : "")); #endif break; /* out completely */ } } /* evaluate the next term of the */ /* expansion and then the partial sum */ if(lamSml) { lu += l_lam - log(n); /* u = u* lam / n */ if(lu >= _dbl_min_exp) { /* no underflow anymore ==> change regime */ #ifdef DEBUG_pnch REprintf(" n=%d; nomore underflow in u = exp(lu) ==> change\n", n); #endif v = u = EXP(lu); /* the first non-0 'u' */ lamSml = FALSE; } } else { u *= lam / n; v += u; } if(tSml) { lt += l_x - log(f_2n);/* t <- t * (x / f2n) */ if(lt >= _dbl_min_exp) { /* no underflow anymore ==> change regime */ #ifdef DEBUG_pnch REprintf(" n=%d; nomore underflow in t = exp(lt) ==> change\n", n); #endif t = EXP(lt); /* the first non-0 't' */ tSml = FALSE; } } else { t *= x / f_2n; } if(!lamSml && !tSml) { term = (double) (v * t); ans += term; } } /* for(n ...) */ if (is_it) { MATHLIB_WARNING2(_("pnchisq(x=%g, ..): not converged in %d iter."), x, itrmax); } #ifdef DEBUG_pnch REprintf("\n == L_End: n=%d; term= %g; bound=%g\n",n,term,bound); #endif return (double) (lower_tail ? ans : 1 - ans); }
//------------------------------------------------------------------------ void bezier_arc::init(real x, real y, real rx, real ry, real start_angle, real sweep_angle) { start_angle = FMOD(start_angle, 2.0f * pi); if(sweep_angle >= 2.0f * pi) sweep_angle = 2.0f * pi; if(sweep_angle <= -2.0f * pi) sweep_angle = -2.0f * pi; if(FABS(sweep_angle) < 1e-10) { m_num_vertices = 4; m_cmd = path_cmd_line_to; m_vertices[0] = x + rx * (real)cos(start_angle); m_vertices[1] = y + ry * (real)sin(start_angle); m_vertices[2] = x + rx * (real)cos(start_angle + sweep_angle); m_vertices[3] = y + ry * (real)sin(start_angle + sweep_angle); return; } real total_sweep = 0.0f; real local_sweep = 0.0f; real prev_sweep; m_num_vertices = 2; m_cmd = path_cmd_curve4; bool done = false; do { if(sweep_angle < 0.0f) { prev_sweep = total_sweep; local_sweep = -pi * 0.5f; total_sweep -= pi * 0.5f; if(total_sweep <= sweep_angle + bezier_arc_angle_epsilon) { local_sweep = sweep_angle - prev_sweep; done = true; } } else { prev_sweep = total_sweep; local_sweep = pi * 0.5f; total_sweep += pi * 0.5f; if(total_sweep >= sweep_angle - bezier_arc_angle_epsilon) { local_sweep = sweep_angle - prev_sweep; done = true; } } arc_to_bezier(x, y, rx, ry, start_angle, local_sweep, m_vertices + m_num_vertices - 2); m_num_vertices += 6; start_angle += local_sweep; } while(!done && m_num_vertices < 26); }
int LEVMAR_DER( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */ void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata), /* function to evaluate the jacobian \part x / \part p */ LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */ LM_REAL *x, /* I: measurement vector */ int m, /* I: parameter vector dimension (i.e. #unknowns) */ int n, /* I: measurement vector dimension */ int itmax, /* I: maximum number of iterations */ LM_REAL opts[4], /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu, * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used */ LM_REAL info[LM_INFO_SZ], /* O: information regarding the minimization. Set to NULL if don't care * info[0]= ||e||_2 at initial p. * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. * info[5]= # iterations, * info[6]=reason for terminating: 1 - stopped by small gradient J^T e * 2 - stopped by small Dp * 3 - stopped by itmax * 4 - singular matrix. Restart from current p with increased mu * 5 - no further error reduction is possible. Restart with increased mu * 6 - stopped by small ||e||_2 * info[7]= # function evaluations * info[8]= # jacobian evaluations */ LM_REAL *work, /* working memory, allocate if NULL */ LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */ void *adata) /* pointer to possibly additional data, passed uninterpreted to func & jacf. * Set to NULL if not needed */ { register int i, j, k, l; int worksz, freework=0, issolved; /* temp work arrays */ LM_REAL *e, /* nx1 */ *hx, /* \hat{x}_i, nx1 */ *jacTe, /* J^T e_i mx1 */ *jac, /* nxm */ *jacTjac, /* mxm */ *Dp, /* mx1 */ *diag_jacTjac, /* diagonal of J^T J, mx1 */ *pDp; /* p + Dp, mx1 */ register LM_REAL mu, /* damping constant */ tmp; /* mainly used in matrix & vector multiplications */ LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */ LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL; LM_REAL tau, eps1, eps2, eps2_sq, eps3; LM_REAL init_p_eL2; int nu=2, nu2, stop, nfev, njev=0; const int nm=n*m; mu=jacTe_inf=0.0; /* -Wall */ if(n<m){ fprintf(stderr, LCAT(LEVMAR_DER, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m); return -1; } if(!jacf){ fprintf(stderr, RCAT("No function specified for computing the jacobian in ", LEVMAR_DER) RCAT("().\nIf no such function is available, use ", LEVMAR_DIF) RCAT("() rather than ", LEVMAR_DER) "()\n"); return -1; } if(opts){ tau=opts[0]; eps1=opts[1]; eps2=opts[2]; eps2_sq=opts[2]*opts[2]; eps3=opts[3]; } else{ // use default values tau=CNST(LM_INIT_MU); eps1=CNST(LM_STOP_THRESH); eps2=CNST(LM_STOP_THRESH); eps2_sq=CNST(LM_STOP_THRESH)*CNST(LM_STOP_THRESH); eps3=CNST(LM_STOP_THRESH); } if(!work){ worksz=LM_DER_WORKSZ(m, n); //2*n+4*m + n*m + m*m; work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */ if(!work){ fprintf(stderr, LCAT(LEVMAR_DER, "(): memory allocation request failed\n")); return -1; } freework=1; } /* set up work arrays */ e=work; hx=e + n; jacTe=hx + n; jac=jacTe + m; jacTjac=jac + nm; Dp=jacTjac + m*m; diag_jacTjac=Dp + m; pDp=diag_jacTjac + m; /* compute e=x - f(p) and its L2 norm */ (*func)(p, hx, m, n, adata); nfev=1; for(i=0, p_eL2=0.0; i<n; ++i){ e[i]=tmp=x[i]-hx[i]; p_eL2+=tmp*tmp; } init_p_eL2=p_eL2; for(k=stop=0; k<itmax && !stop; ++k){ /* Note that p and e have been updated at a previous iteration */ if(p_eL2<=eps3){ /* error is small */ stop=6; break; } /* Compute the jacobian J at p, J^T J, J^T e, ||J^T e||_inf and ||p||^2. * Since J^T J is symmetric, its computation can be speeded up by computing * only its upper triangular part and copying it to the lower part */ (*jacf)(p, jac, m, n, adata); ++njev; /* J^T J, J^T e */ if(nm<__BLOCKSZ__SQ){ // this is a small problem /* This is the straightforward way to compute J^T J, J^T e. However, due to * its noncontinuous memory access pattern, it incures many cache misses when * applied to large minimization problems (i.e. problems involving a large * number of free variables and measurements), in which J is too large to * fit in the L1 cache. For such problems, a cache-efficient blocking scheme * is preferable. * * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this * performance problem. * * On the other hand, the straightforward algorithm is faster on small * problems since in this case it avoids the overheads of blocking. */ for(i=0; i<m; ++i){ for(j=i; j<m; ++j){ int lm; for(l=0, tmp=0.0; l<n; ++l){ lm=l*m; tmp+=jac[lm+i]*jac[lm+j]; } /* store tmp in the corresponding upper and lower part elements */ jacTjac[i*m+j]=jacTjac[j*m+i]=tmp; } /* J^T e */ for(l=0, tmp=0.0; l<n; ++l) tmp+=jac[l*m+i]*e[l]; jacTe[i]=tmp; } } else{ // this is a large problem /* Cache efficient computation of J^T J based on blocking */ TRANS_MAT_MAT_MULT(jac, jacTjac, n, m); /* cache efficient computation of J^T e */ for(i=0; i<m; ++i) jacTe[i]=0.0; for(i=0; i<n; ++i){ register LM_REAL *jacrow; for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l) jacTe[l]+=jacrow[l]*tmp; } } /* Compute ||J^T e||_inf and ||p||^2 */ for(i=0, p_L2=jacTe_inf=0.0; i<m; ++i){ if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp; diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */ p_L2+=p[i]*p[i]; } //p_L2=sqrt(p_L2); #if 0 if(!(k%10)){ printf("Iter: %d, estimate: ", k); for(i=0; i<m; ++i) printf("%.9g ", p[i]); printf("-- errors %.9g %0.9g\n", jacTe_inf, p_eL2); } #endif /* check for convergence */ if((jacTe_inf <= eps1)){ Dp_L2=0.0; /* no increment for p in this case */ stop=1; break; } /* compute initial damping factor */ if(k==0){ for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */ mu=tau*tmp; } /* determine increment using adaptive damping */ while(1){ /* augment normal equations */ for(i=0; i<m; ++i) jacTjac[i*m+i]+=mu; /* solve augmented equations */ #ifdef HAVE_LAPACK /* 5 alternatives are available: LU, Cholesky, 2 variants of QR decomposition and SVD. * Cholesky is the fastest but might be inaccurate; QR is slower but more accurate; * SVD is the slowest but most accurate; LU offers a tradeoff between accuracy and speed */ issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); //issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m); //issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m); //issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m); //issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m); #else /* use the LU included with levmar */ issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); #endif /* HAVE_LAPACK */ if(issolved){ /* compute p's new estimate and ||Dp||^2 */ for(i=0, Dp_L2=0.0; i<m; ++i){ pDp[i]=p[i] + (tmp=Dp[i]); Dp_L2+=tmp*tmp; } //Dp_L2=sqrt(Dp_L2); if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ //if(Dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ stop=2; break; } if(Dp_L2>=(p_L2+eps2)/(CNST(EPSILON)*CNST(EPSILON))){ /* almost singular */ //if(Dp_L2>=(p_L2+eps2)/CNST(EPSILON)){ /* almost singular */ stop=4; break; } (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */ for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */ hx[i]=tmp=x[i]-hx[i]; pDp_eL2+=tmp*tmp; } for(i=0, dL=0.0; i<m; ++i) dL+=Dp[i]*(mu*Dp[i]+jacTe[i]); dF=p_eL2-pDp_eL2; if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */ tmp=(CNST(2.0)*dF/dL-CNST(1.0)); tmp=CNST(1.0)-tmp*tmp*tmp; mu=mu*( (tmp>=CNST(ONE_THIRD))? tmp : CNST(ONE_THIRD) ); nu=2; for(i=0 ; i<m; ++i) /* update p's estimate */ p[i]=pDp[i]; for(i=0; i<n; ++i) /* update e and ||e||_2 */ e[i]=hx[i]; p_eL2=pDp_eL2; break; } } /* if this point is reached, either the linear system could not be solved or * the error did not reduce; in any case, the increment must be rejected */ mu*=nu; nu2=nu<<1; // 2*nu; if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */ stop=5; break; } nu=nu2; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; } /* inner loop */ } if(k>=itmax) stop=3; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; if(info){ info[0]=init_p_eL2; info[1]=p_eL2; info[2]=jacTe_inf; info[3]=Dp_L2; for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i]; info[4]=mu/tmp; info[5]=(LM_REAL)k; info[6]=(LM_REAL)stop; info[7]=(LM_REAL)nfev; info[8]=(LM_REAL)njev; } /* covariance matrix */ if(covar){ LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n); } if(freework) free(work); return (stop!=4)? k : -1; }
//********************************************************************************** //********************************************************************************** void cuCompCooling(REAL temp, REAL x, REAL nH, REAL *lambda, REAL *tcool, REAL aexp,REAL CLUMPF) { REAL c1,c2,c3,c4,c5,c6; REAL unsurtc; REAL nh2; nh2=nH*1e-6;// ! m-3 ==> cm-3 // Collisional Ionization Cooling c1=EXP(-157809.1e0/temp)*1.27e-21*SQRT(temp)/(1.+SQRT(temp/1e5))*x*(1.-x)*nh2*nh2*CLUMPF; // Case A Recombination Cooling c2=1.778e-29*temp*POW(2e0*157807e0/temp,1.965e0)/POW(1.+POW(2e0*157807e0/temp/0.541e0,0.502e0),2.697e0)*x*x*nh2*nh2*CLUMPF; // Case B Recombination Cooling c6=3.435e-30*temp*POW(2e0*157807e0/temp,1.970e0)/POW(1.+(POW(2e0*157807e0/temp/2.250e0,0.376e0)),3.720e0)*x*x*nh2*nh2*CLUMPF; c6=0.; // Collisional excitation cooling c3=EXP(-118348e0/temp)*7.5e-19/(1+SQRT(temp/1e5))*x*(1.-x)*nh2*nh2*CLUMPF; // Bremmsstrahlung c4=1.42e-27*1.5e0*SQRT(temp)*x*x*nh2*nh2*CLUMPF; // Compton Cooling c5=0; /* c5=1.017e-37*POW(2.727/aexp,4)*(temp-2.727/aexp)*nh2*x; */ /* c5=0.; */ #ifndef WRADTEST c5=5.406e-24*(temp-2.727/aexp)/POW(aexp/0.001,4)*x*nh2; REAL Ta=2.727/aexp; c5=5.406e-36*(temp-Ta)/(aexp*aexp*aexp*aexp)*x*nh2; #endif // Overall Cooling *lambda=c1+c2+c3+c4+c5+c6;// ! erg*cm-3*s-1 // Unit Conversion *lambda=(*lambda)*1e-7*1e6;// ! J*m-3*s-1 // cooling times unsurtc=FMAX(c1,c2); unsurtc=FMAX(unsurtc,c3); unsurtc=FMAX(unsurtc,c4); unsurtc=FMAX(unsurtc,FABS(c5)); unsurtc=FMAX(unsurtc,c6)*1e-7;// ==> J/cm3/s *tcool=1.5e0*nh2*(1.+x)*KBOLTZ*temp/unsurtc; //Myr }
void chemrad(struct RGRID *stencil, int nread, int stride, struct CPUINFO *cpu, REAL dxcur, REAL dtnew, struct RUNPARAMS *param, REAL aexporg, int chemonly) { int i,icell,igrp; //int idloc=0; int nitcool=0; REAL hnu0=13.6*1.6022e-19, Cool, tcool, dtcool, tcool1, currentcool_t=0., alpha, alphab, beta, tloc, xt, eintt, ai_tmp1=0., et[NGRP], p[NGRP]; REAL aexp; REAL ebkg[NGRP]; REAL z=1./aexporg-1.; REAL c=param->clightorg*LIGHT_SPEED_IN_M_PER_S; // switch back to physical velocity m/s REAL hnu[NGRP]; REAL alphae[NGRP]; REAL alphai[NGRP]; REAL factgrp[NGRP]; for(igrp=0;igrp<NGRP;igrp++) { hnu[igrp]=param->atomic.hnu[igrp]; alphae[igrp]=param->atomic.alphae[igrp]; alphai[igrp]=param->atomic.alphai[igrp]; factgrp[igrp]=param->atomic.factgrp[igrp]; } #ifdef S_X REAL E0overI[NGRP]; REAL N2[NGRP]; REAL F2[NGRP]; #endif #define BLOCKCOOL 1 // KEPT FROM CUDATON FOR SIMPLICITY #define idloc3 0 // KEPT FROM CUDATON FOR SIMPLICITY REAL egyloc[BLOCKCOOL*NGRP], floc[3*BLOCKCOOL*NGRP], srcloc[BLOCKCOOL*NGRP], x0[BLOCKCOOL], nH[BLOCKCOOL], eint[BLOCKCOOL]; REAL dt=dtnew*param->unit.unit_t*POW(aexporg,2); REAL emin; struct Rtype R; REAL fudgecool=param->fudgecool; int ncvgcool=param->ncvgcool; REAL E0; #ifdef SCHAYE REAL navg=(param->cosmo->ob/param->cosmo->om)/(PROTON_MASS*MOLECULAR_MU)*param->unit.unit_d; #endif REAL xorg; for(i=0;i<nread;i++){ // we scan the octs for(icell=0;icell<8;icell++){ // we scan the cells if(stencil[i].oct[6].cell[icell].split) continue; // we dont treat split cells memcpy(&R,&stencil[i].New.cell[icell].rfieldnew,sizeof(struct Rtype));// We get the local physical quantities after transport update #ifdef HOMOSOURCE // we override the value with the homogeneous source density R.src=param->bkg; #endif //if(eint[idloc]!=E0) printf("1!\n"); /// ==================== UV Background #ifdef UVBKG if(NGRP>1) printf("WARNING BAD BEHAVIOR FOR BKG with NGRP>1 !\n"); //for(igrp=0;igrp<NGRP;igrp++) ebkg[igrp]=3.6*(z<3?1.:4./(1+z)) ; // Katz simple model // Poor FIT to Haardt & MAdau 2012 /* for(igrp=0;igrp<NGRP;igrp++){ REAL amp=1.2e-16,sig=1.,zavg=2,mz=1e-18,pz=1.2e-17; ebkg[igrp]=amp/(sig*SQRT(2*M_PI))*exp(-POW((z-zavg),2)/(2.*POW(sig,2)))+mz*z+pz; // comoving photons/s/m3 } */ #else for(igrp=0;igrp<NGRP;igrp++) ebkg[igrp]=0.; #endif // switch to physical units, chemistry remains unchanged with and without cosmo for (igrp=0;igrp<NGRP;igrp++) { egyloc[idloc+igrp*BLOCKCOOL] =R.e[igrp]/(aexporg*aexporg*aexporg)*param->unit.unit_N;//+ebkg[igrp]; floc[0+idloc3+igrp*BLOCKCOOL*3]=R.fx[igrp]/POW(aexporg,4)*param->unit.unit_l/param->unit.unit_t*param->unit.unit_N; floc[1+idloc3+igrp*BLOCKCOOL*3]=R.fy[igrp]/POW(aexporg,4)*param->unit.unit_l/param->unit.unit_t*param->unit.unit_N; floc[2+idloc3+igrp*BLOCKCOOL*3]=R.fz[igrp]/POW(aexporg,4)*param->unit.unit_l/param->unit.unit_t*param->unit.unit_N; } x0[idloc]=R.nhplus/R.nh; xorg= x0[idloc]; nH[idloc]=R.nh/(aexporg*aexporg*aexporg)*param->unit.unit_N; eint[idloc]=R.eint/POW(aexporg,5)*param->unit.unit_n*param->unit.unit_d*POW(param->unit.unit_v,2); emin=PMIN/(GAMMA-1.)/POW(aexporg,5)*param->unit.unit_n*param->unit.unit_d*POW(param->unit.unit_v,2); // physical minimal pressure for (igrp=0;igrp<NGRP;igrp++){ srcloc[idloc+igrp*BLOCKCOOL]=(R.src[igrp]*param->unit.unit_N/param->unit.unit_t/(aexporg*aexporg))/POW(aexporg,3); //phot/s/dv (physique) } // R.src phot/unit_t/unit_dv (comobile) REAL eorg=eint[idloc]; REAL etorg=egyloc[idloc]; REAL torg=eint[idloc]/(1.5*nH[idloc]*KBOLTZ*(1.+x0[idloc])); //if(srcloc[0]>0) printf("nh=%e %e %e %e\n",R.nh,R.e[0],eint[idloc],3[idloc]); // at this stage we are ready to do the calculations // DEALING WITH CLUMPING ---------------------- #ifdef WCLUMP REAL CLUMPF2=FMIN(FMAX(POW(nH[idloc]/6.,0.7),1.),40.); REAL CLUMPI=1.; #else REAL CLUMPF2=1.; REAL CLUMPI=1.; #endif for(igrp=0;igrp<NGRP;igrp++) { alphai[igrp] *= CLUMPI; alphae[igrp] *= CLUMPI; } // ------------------------------------------------- /// local cooling loop ------------------------------- aexp=aexporg; fudgecool=param->fudgecool; currentcool_t=0.; nitcool=0.; REAL da; //printf("cpu=%d fudge=%e ncv=%d currentcool_t=%e dt=%e\n",cpu->rank,param->fudgecool,ncvgcool,currentcool_t,dt); // local cooling loop ------------------------------- while(currentcool_t<dt) { /// Cosmological Adiabatic expansion effects ============== #ifdef TESTCOSMO REAL hubblet=param->cosmo->H0*SQRT(param->cosmo->om/aexp+param->cosmo->ov*(aexp*aexp))/aexp*(1e3/(1e6*PARSEC)); // s-1 // SOMETHING TO CHECK HERE #else REAL hubblet=0.; #endif //if(eint[idloc]!=E0) printf("2!\n"); tloc=eint[idloc]/(1.5*nH[idloc]*KBOLTZ*(1.+x0[idloc])); //== Getting a timestep cuCompCooling(tloc,x0[idloc],nH[idloc],&Cool,&tcool1,aexp,CLUMPF2); ai_tmp1=0.; //if(eint[idloc]!=E0) printf("3!\n"); if(fudgecool<1e-20){ printf("eint=%e(%e<%e) nH=%e x0=%e(%e) T=%e(%e) N=%e(%e)\n",eint[idloc],eorg,emin,nH[idloc],x0[idloc],xorg,tloc,torg,et[0],etorg); if(fudgecool<1e-20) abort(); } for (igrp=0;igrp<NGRP;igrp++) ai_tmp1 += ((alphae[igrp])*hnu[igrp]-(alphai[igrp])*hnu0)*egyloc[idloc+igrp*BLOCKCOOL]; tcool=FABS(eint[idloc]/(nH[idloc]*(1.0-x0[idloc])*ai_tmp1*(!chemonly)-Cool)); ai_tmp1=0.; dtcool=FMIN(fudgecool*tcool,dt-currentcool_t); alpha=cucompute_alpha_a(tloc,1.,1.)*CLUMPF2; alphab=cucompute_alpha_b(tloc,1.,1.)*CLUMPF2; beta=cucompute_beta(tloc,1.,1.)*CLUMPF2; //== Update // ABSORPTION int test = 0; REAL factotsa[NGRP]; for(igrp=0;igrp<NGRP;igrp++) { #ifdef OTSA factotsa[igrp]=0; alpha=alphab; // recombination is limited to non ground state levels #else factotsa[igrp]=(igrp==0); #endif ai_tmp1 = alphai[igrp]; if(chemonly){ et[igrp]=egyloc[idloc+igrp*BLOCKCOOL]; } else{ et[igrp]=((alpha-alphab)*x0[idloc]*x0[idloc]*nH[idloc]*nH[idloc]*dtcool*factotsa[igrp]+egyloc[idloc+igrp*BLOCKCOOL]+srcloc[idloc+igrp*BLOCKCOOL]*dtcool*factgrp[igrp])/(1.+dtcool*(ai_tmp1*(1.-x0[idloc])*nH[idloc])); } if((et[igrp]<0)||(isnan(et[igrp]))){ test=1; //printf("eint=%e nH=%e x0=%e T=%e N=%e\n",eint[idloc],nH[idloc],x0[idloc],tloc,et[0]); } p[igrp]=(1.+(alphai[igrp]*nH[idloc]*(1-x0[idloc]))*dtcool); } ai_tmp1=0.; if(test) { fudgecool=fudgecool/10.; continue; } // IONISATION #ifndef S_X #ifdef SEMI_IMPLICIT for(igrp=0;igrp<NGRP;igrp++) {ai_tmp1 += alphai[igrp]*et[igrp]*(!chemonly);} #else for(igrp=0;igrp<NGRP;igrp++) {ai_tmp1 += alphai[igrp]*egyloc[idloc+igrp*BLOCKCOOL]*(!chemonly);} #endif #else N2[0]=1.0; REAL pp=(1.-POW(x0[idloc],0.4092)); if(pp<0.) pp=0.; for(igrp=1;igrp<NGRP;igrp++){ N2[igrp]=1.0+0.3908*POW(pp,1.7592)*E0overI[igrp]; if(N2[igrp]<1.0) N2[igrp]=1.0; } #ifdef SEMI_IMPLICIT for(igrp=0;igrp<NGRP;igrp++) {ai_tmp1 += alphai[igrp]*et[igrp]*N2[igrp]*(!chemonly);} #else for(igrp=0;igrp<NGRP;igrp++) {ai_tmp1 += alphai[igrp]*egyloc[idloc+igrp*BLOCKCOOL]*N2[igrp]*(!chemonly);} #endif #endif xt=1.-(alpha*x0[idloc]*x0[idloc]*nH[idloc]*dtcool+(1. -x0[idloc]))/(1.+dtcool*(beta*x0[idloc]*nH[idloc]+ai_tmp1)); ai_tmp1=0.; if(((xt>1.)||(xt<0.))||(isnan(xt))) { //printf("XION ERR eintt=%e xt=%e et=%e\n",eintt,xt,et[0]); fudgecool/=10.; continue; } #ifdef SEMI_IMPLICIT cuCompCooling(tloc,xt,nH[idloc],&Cool,&tcool1,aexp,CLUMPF2); #else cuCompCooling(tloc,x0[idloc],nH[idloc],&Cool,&tcool1,aexp,CLUMPF2); #endif #ifdef COOLING // HEATING + COOLING int compcool=1; // do we need to compute the cooling ? #ifdef SCHAYE if((nH[idloc]>1e6)&&(R.nh>(param->stars->overdensity_cond*navg))){ REAL tlocs; tlocs=eintt/(1.5*nH[idloc]*KBOLTZ*(1.+xt)); if(tlocs<1e5){ eintt=(1.08e9*KBOLTZ)*POW(nH[idloc]/1e5,4./3.)/(GAMMA-1)/FSCHAYE; // polytropic EOS compcool=0.; // cancel cooling calculation fudgecool=FMIN(fudgecool*1.5,param->fudgecool); } } #endif // SCHAYE if(compcool){ REAL SN = 0; #ifdef SUPERNOVAE SN = R.snfb; if (R.snfb) Cool = 0; // Stop the cooling if supernovae if (R.snfb) printf("dE\t%e\tE0\t%e\tdtcool\t%e\t",R.snfb*dtcool,eintt, dtcool); #endif #ifndef S_X #ifdef SEMI_IMPLICIT for(igrp=0;igrp<NGRP;igrp++) {ai_tmp1 += et[igrp]*(alphae[igrp]*hnu[igrp]-(alphai[igrp]*hnu0))*(!chemonly);} eintt=(eint[idloc]+ dtcool*(nH[idloc]*(1.-xt)*(ai_tmp1)-Cool+SN)); // if (R.snfb) printf("E0\t%e\n",eintt); #else for(igrp=0;igrp<NGRP;igrp++) {ai_tmp1 += egyloc[idloc+igrp*BLOCKCOOL]*(alphae[igrp]*hnu[igrp]-(alphai[igrp]*hnu0))*(!chemonly);} eintt=(eint[idloc]+dtcool*(nH[idloc]*(1.-x0[idloc])*(ai_tmp1)-Cool+SN)); #endif //SEMI #else //===================================== X RAYS ============================== REAL pp2; F2[0]=1.0; //if(eint[idloc]!=E0) printf("7!\n"); #ifdef SEMI_IMPLICIT pp2=1.0-POW(xt,0.2663); #else pp2=1.0-POW(x0[idloc],0.2663); #endif if(pp2<0.) pp2=0.; for(igrp=1;igrp<NGRP;igrp++){ F2[igrp]=1.0; F2[igrp]=0.9971*(1.0-POW(pp2,1.3163)); if(F2[igrp]>1.0) F2[igrp]=1.0; if(F2[igrp]<0.0) F2[igrp]=0.0; } #ifdef SEMI_IMPLICIT for(igrp=0;igrp<NGRP;igrp++) {ai_tmp1 += et[igrp]*(alphae[igrp]*hnu[igrp]-(alphai[igrp]*hnu0))*F2[igrp]*(!chemonly);} eintt=(eint[idloc]+dtcool*(nH[idloc]*(1.-xt)*(ai_tmp1)-Cool+SN)); #else for(igrp=0;igrp<NGRP;igrp++) {ai_tmp1 += egyloc[idloc+igrp*BLOCKCOOL]*(alphae[igrp]*hnu[igrp]-(alphai[igrp]*hnu0))*F2[igrp]*(!chemonly);} eintt=(eint[idloc]+dtcool*(nH[idloc]*(1.-x0[idloc])*(ai_tmp1)-Cool+SN)); #endif //================================================================================ #endif //S_X if(eintt<0.) { //printf("E NEG eintt=%e xt=%e et=%e\n",eintt,xt,et[0]); fudgecool=fudgecool/10.; continue; } if(FABS(eintt-eint[idloc])>FRAC_VAR*eint[idloc]) { // if(srcloc[idloc]==0.){ //printf("DELTA E eintt=%e xt=%e et=%e\n",eintt,xt,et[0]); fudgecool=fudgecool/10.; continue; //} } else{ fudgecool=FMIN(fudgecool*1.5,param->fudgecool); } ai_tmp1=0; eintt=FMAX(emin,eintt); } #else eintt=eint[idloc]; #endif // inner update REAL aold=aexp; #ifdef TESTCOSMO da=hubblet*dtcool*aexp; aexp+=da; #endif for(igrp =0;igrp<NGRP;igrp++) { egyloc[idloc+igrp*BLOCKCOOL]=et[igrp]*POW(aold/aexp,3); if(!chemonly){ floc[0+idloc3+igrp*BLOCKCOOL*3]=floc[0+idloc3+igrp*BLOCKCOOL*3]/p[igrp]*POW(aold/aexp,4); floc[1+idloc3+igrp*BLOCKCOOL*3]=floc[1+idloc3+igrp*BLOCKCOOL*3]/p[igrp]*POW(aold/aexp,4); floc[2+idloc3+igrp*BLOCKCOOL*3]=floc[2+idloc3+igrp*BLOCKCOOL*3]/p[igrp]*POW(aold/aexp,4); } } x0[idloc]=xt; //printf("xt=%e\n",xt); #ifdef COOLING eint[idloc]=eintt*POW(aold/aexp,5); #endif currentcool_t+=dtcool; fudgecool=param->fudgecool; nitcool++; if((nitcool==ncvgcool)&&(ncvgcool!=0)) break; } /// ====================== End of the cooling loop //aexp=aexporg; // FIlling the rad structure to send it back if(!chemonly){ for(igrp=0;igrp<NGRP;igrp++) { R.e[igrp]=FMAX(egyloc[idloc+igrp*BLOCKCOOL]*POW(aexp,3),EMIN*factgrp[igrp])/param->unit.unit_N; R.fx[igrp]=floc[0+idloc3+igrp*BLOCKCOOL*3]*POW(aexp,4)/param->unit.unit_l*param->unit.unit_t/param->unit.unit_N; R.fy[igrp]=floc[1+idloc3+igrp*BLOCKCOOL*3]*POW(aexp,4)/param->unit.unit_l*param->unit.unit_t/param->unit.unit_N; R.fz[igrp]=floc[2+idloc3+igrp*BLOCKCOOL*3]*POW(aexp,4)/param->unit.unit_l*param->unit.unit_t/param->unit.unit_N; } } R.nhplus=x0[idloc]*R.nh; R.eint=eint[idloc]*POW(aexp,5)/param->unit.unit_n/param->unit.unit_d/POW(param->unit.unit_v,2); E2T(&R,aexp,param); memcpy(&stencil[i].New.cell[icell].rfieldnew,&R,sizeof(struct Rtype)); } } }
Dp_Result * make_fast_dp_pair_wise (Alignment *A,int*ns, int **l_s, Constraint_list *CL,Dp_Model *M) { /*SIZE VARIABLES*/ int ndiag; int l0, l1, len_al,len_diag; static int max_len_al, max_len_diag; static int mI, mJ; /*EVALUATION*/ int **mat; int a1, a2; /*DP VARIABLES*/ static int *Mat, *LMat, *trace; int a, i, j,l; int state, cur_state, prev_state; int pos_i, pos_j; int last_i=0, last_j=0; int prev_i, prev_j; int len_i, len_j, len; int t, e, em; int prev_score; int pc, best_pc; int *prev; int model_index; /*TRACEBACK*/ Dp_Result *DPR; int k=0, next_k; int new_i, new_j; ndiag=M->diag[0]; l0=strlen (A->seq_al[l_s[0][0]]); l1=strlen (A->seq_al[l_s[1][0]]); len_al =l0+l1+1; len_diag=ndiag+4; if ( (len_al>max_len_al || len_diag>max_len_diag)) { vfree (Mat); vfree (LMat); vfree(trace); max_len_diag=max_len_al=0; } if (max_len_al==0) { max_len_al=len_al; max_len_diag=len_diag; mI=max_len_al*max_len_diag; mJ=max_len_diag; Mat =vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); LMat =vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); trace=vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); } prev=vcalloc ( M->nstate, sizeof (int)); DPR=vcalloc ( 1, sizeof ( Dp_Result)); DPR->traceback=vcalloc (max_len_al, sizeof (int)); /*PREPARE THE EVALUATION*/ if (ns[0]+ns[1]>2) { fprintf ( stderr, "\nERROR: function make_fasta_cdna_pair_wise can only handle two sequences at a time [FATAL:%s]",PROGRAM); crash (""); } mat=CL->M; /*INITIALIZATION OF THE DP MATRICES*/ for (i=0; i<=l0;i++) { for (j=0; j<=ndiag;j++) { for ( state=0; state<M->nstate; state++) { Mat [state*mI+i*mJ+j]=UNDEFINED; LMat [state*mI+i*mJ+j]=UNDEFINED; trace [state*mI+i*mJ+j]=M->START; } } } M->diag[0]=0; for (i=0; i<=l0; i++) for ( j=0; j<=ndiag; j++) { pos_j=M->diag[j]-l0+i; pos_i=i; if (!(pos_j==0 || pos_i==0))continue; if ( pos_j<0 || pos_i<0)continue; if ( pos_i==0 && pos_j==0) { for ( a=0; a< M->nstate; a++) { Mat [a*mI+i*mJ+j]=0; LMat [a*mI+i*mJ+j]=0; trace[a*mI+i*mJ+j]=M->START; } } else { l=MAX(pos_i,pos_j); for ( state=0; state<M->START; state++) { if (pos_j==0 && M->model_properties[state][M->LEN_J])continue; if (pos_i==0 && M->model_properties[state][M->LEN_I])continue; t=M->model[M->START][state]; e=M->model_properties[state][M->TERM_EMISSION]; Mat [state*mI+i*mJ+j]=t+e*l; LMat [state*mI+i*mJ+j]=l; trace [state*mI+i*mJ+j]=M->START; } } } /*DYNAMIC PROGRAMMING: Forward Pass*/ for (i=1; i<=l0;i++) { for (j=1; j<=ndiag;j++) { pos_j=M->diag[j]-l0+i; pos_i=i; if (pos_j<=0 || pos_j>l1 )continue; last_i=i; last_j=j; for (cur_state=0; cur_state<M->START; cur_state++) { if (M->model_properties[cur_state][M->DELTA_J]) { prev_j=j+M->model_properties[cur_state][M->DELTA_J]; prev_i=i+M->model_properties[cur_state][M->DELTA_I]*FABS((M->diag[j]-M->diag[prev_j])); } else { prev_j=j; prev_i=i+M->model_properties[cur_state][M->DELTA_I]; } len_i=FABS((i-prev_i)); len_j=FABS((M->diag[prev_j]-M->diag[j])); len=MAX(len_i, len_j); a1=A->seq_al[M->model_properties[cur_state][M->F0] ][pos_i-1]; a2=A->seq_al[M->model_properties[cur_state][M->F1]+3][pos_j-1]; if (M->model_properties[cur_state][M->TYPE]==M->CODING0) { if ( a1=='o' || a2=='o')em=-(mat['w'-'A']['w'-'A'])*SCORE_K; else if (a1=='x' || a2=='x')em=UNDEFINED; else if ( a1==0 || a2==0)exit (0); else { em=(mat[a1-'A'][a2-'A'])*SCORE_K; } } else { em=M->model_properties[cur_state][M->EMISSION]; } for (pc=best_pc=UNDEFINED, model_index=1; model_index<=M->bounded_model[cur_state][0]; model_index++) { prev_state=M->bounded_model[cur_state][model_index]; if(prev_i<0 || prev_j<0 ||prev_i>l0 || prev_j>ndiag || len==UNDEFINED)prev_score=UNDEFINED; else prev_score=Mat[prev_state*mI+prev_i*mJ+prev_j]; t=M->model[prev_state][cur_state]; e=em; if (prev_score==UNDEFINED || len==UNDEFINED)e=UNDEFINED; else if (len==0|| e==UNDEFINED)e=UNDEFINED; else e=e*len; if (is_defined_int(3,prev_score,e, t)) { pc=prev_score+t+e; } else pc=UNDEFINED; /*Identify the best previous score*/ if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED)) { prev[cur_state]=prev_state; best_pc=pc; } } Mat[cur_state*mI+i*mJ+j]=best_pc; if ( Mat[cur_state*mI+i*mJ+j]==UNDEFINED) { LMat[cur_state*mI+i*mJ+j]=UNDEFINED; trace[cur_state*mI+i*mJ+j]=UNDEFINED; continue; } else if ( prev[cur_state]==cur_state) { LMat [cur_state*mI+i*mJ+j]= LMat [cur_state*mI+prev_i*mJ+prev_j]+len; trace[cur_state*mI+i*mJ+j]= trace[cur_state*mI+prev_i*mJ+prev_j]; } else { LMat[cur_state*mI+i*mJ+j]=len; trace[cur_state*mI+i*mJ+j]=prev[cur_state]; } } } } i=last_i; j=last_j; for (pc=best_pc=UNDEFINED, state=0; state<M->START; state++) { t=M->model[state][M->END]; e=M->model_properties[state][M->TERM_EMISSION]; l=LMat[state*mI+i*mJ+j]; if (!is_defined_int(4,t,e,Mat[state*mI+i*mJ+j],l))Mat[state*mI+i*mJ+j]=UNDEFINED; else Mat[state*mI+i*mJ+j]+=t+e*(l); pc=Mat[state*mI+i*mJ+j]; if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED)) { k=state; best_pc=pc; } } DPR->score=best_pc; /*TRACEBACK*/ e=0; len=0; while (k!=M->START) { next_k=trace[k*mI+i*mJ+j]; new_i=i; new_j=j; l=LMat[k*mI+i*mJ+j]; for (a=0; a< l; a++) { DPR->traceback[len++]=k; } new_i+=M->model_properties[k][M->DELTA_I]*l; if ( M->model_properties[k][M->DELTA_J]) { while ( next_k!=M->START && FABS((M->diag[j]-M->diag[new_j]))!=l)new_j+=M->model_properties[k][M->DELTA_J]; } i=new_i; j=new_j; k=next_k; } DPR->len=len; DPR->traceback[DPR->len++]=M->START; invert_list_int (DPR->traceback,DPR->len); DPR->traceback[DPR->len]=M->END; vfree (prev); return DPR; }
/* * This function returns the solution of Ax = b * * The function employs LU decomposition followed by forward/back substitution (see * also the LAPACK-based LU solver above) * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ void *buf=NULL; __STATIC__ int buf_sz=0; register int i, j, k; int *idx, maxi=-1, idx_sz, a_sz, work_sz, tot_sz; LM_REAL *a, *work, max, sum, tmp; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ idx_sz=m; a_sz=m*m; work_sz=m; tot_sz=(a_sz+work_sz)*sizeof(LM_REAL) + idx_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */ // Check inputs for validity for(i=0; i<a_sz; i++) if (!LM_FINITE(A[i])) return 0; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(void *)malloc(tot_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(void *)malloc(tot_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=(LM_REAL*)buf; work=a+a_sz; idx=(int *)(work+work_sz); /* avoid destroying A, B by copying them to a, x resp. */ memcpy(a, A, a_sz*sizeof(LM_REAL)); memcpy(x, B, m*sizeof(LM_REAL)); /* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */ for(i=0; i<m; ++i){ max=0.0; for(j=0; j<m; ++j) if((tmp=FABS(a[i*m+j]))>max) max=tmp; if(max==0.0){ fprintf(stderr, RCAT("Singular matrix A in ", AX_EQ_B_LU) "()!\n"); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } work[i]=LM_CNST(1.0)/max; } for(j=0; j<m; ++j){ for(i=0; i<j; ++i){ sum=a[i*m+j]; for(k=0; k<i; ++k) sum-=a[i*m+k]*a[k*m+j]; a[i*m+j]=sum; } max=0.0; for(i=j; i<m; ++i){ sum=a[i*m+j]; for(k=0; k<j; ++k) sum-=a[i*m+k]*a[k*m+j]; a[i*m+j]=sum; if((tmp=work[i]*FABS(sum))>=max){ max=tmp; maxi=i; } } if(j!=maxi){ for(k=0; k<m; ++k){ tmp=a[maxi*m+k]; a[maxi*m+k]=a[j*m+k]; a[j*m+k]=tmp; } work[maxi]=work[j]; } idx[j]=maxi; if(a[j*m+j]==0.0) a[j*m+j]=LM_REAL_EPSILON; if(j!=m-1){ tmp=LM_CNST(1.0)/(a[j*m+j]); for(i=j+1; i<m; ++i) a[i*m+j]*=tmp; } } /* The decomposition has now replaced a. Solve the linear system using * forward and back substitution */ for(i=k=0; i<m; ++i){ j=idx[i]; sum=x[j]; x[j]=x[i]; if(k!=0) for(j=k-1; j<i; ++j) sum-=a[i*m+j]*x[j]; else if(sum!=0.0) k=i+1; x[i]=sum; } for(i=m-1; i>=0; --i){ sum=x[i]; for(j=i+1; j<m; ++j) sum-=a[i*m+j]*x[j]; x[i]=sum/a[i*m+i]; } #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
static MRI * MRIcomputeSurfaceDistanceIntensities(MRI_SURFACE *mris, MRI *mri_ribbon, MRI *mri_aparc, MRI *mri, MRI *mri_aseg, int whalf) { MRI *mri_features, *mri_binary, *mri_white_dist, *mri_pial_dist ; int vno, ngm, outside_of_ribbon, label0, label, ohemi_label, xi, yi, zi, xk, yk, zk, x0, y0, z0, hemi_label, assignable ; double xv, yv, zv, step_size, dist, thickness, wdist, pdist, snx, sny, snz, nx, ny, nz, xl, yl, zl, x, y, z, dot, angle ; VERTEX *v ; mri_features = MRIallocSequence(mris->nvertices, 1, 1, MRI_FLOAT, 1) ; // one samples inwards, one in ribbon, and one outside MRIcopyHeader(mri, mri_features) ; mri_binary = MRIcopy(mri_ribbon, NULL) ; mri_binary = MRIbinarize(mri_ribbon, NULL, 1, 0, 1) ; mri_pial_dist = MRIdistanceTransform(mri_binary, NULL, 1, max_pial_dist+1, DTRANS_MODE_SIGNED,NULL); if (Gdiag & DIAG_WRITE && DIAG_VERBOSE_ON) MRIwrite(mri_pial_dist, "pd.mgz") ; MRIclear(mri_binary) ; MRIcopyLabel(mri_ribbon, mri_binary, Left_Cerebral_White_Matter) ; MRIcopyLabel(mri_ribbon, mri_binary, Right_Cerebral_White_Matter) ; MRIbinarize(mri_binary, mri_binary, 1, 0, 1) ; mri_white_dist = MRIdistanceTransform(mri_binary, NULL, 1, max_white_dist+1, DTRANS_MODE_SIGNED,NULL); if (Gdiag & DIAG_WRITE && DIAG_VERBOSE_ON) MRIwrite(mri_white_dist, "wd.mgz") ; if (mris->hemisphere == LEFT_HEMISPHERE) { ohemi_label = Right_Cerebral_Cortex ; hemi_label = Left_Cerebral_Cortex ; } else { hemi_label = Right_Cerebral_Cortex ; ohemi_label = Left_Cerebral_Cortex ; } step_size = mri->xsize/2 ; for (vno = 0 ; vno < mris->nvertices ; vno++) { v = &mris->vertices[vno] ; if (vno == Gdiag_no) DiagBreak() ; if (v->ripflag) continue ; // not cortex nx = v->pialx - v->whitex ; ny = v->pialy - v->whitey ; nz = v->pialz - v->whitez ; thickness = sqrt(nx*nx + ny*ny + nz*nz) ; if (FZERO(thickness)) continue ; // no cortex here x = (v->pialx + v->whitex)/2 ; y = (v->pialy + v->whitey)/2 ; z = (v->pialz + v->whitez)/2 ; // halfway between white and pial is x0 MRISsurfaceRASToVoxelCached(mris, mri_aseg, x, y, z, &xl, &yl, &zl) ; x0 = nint(xl); y0 = nint(yl) ; z0 = nint(zl) ; label0 = MRIgetVoxVal(mri_aparc, x0, y0, z0,0) ; // compute surface normal in voxel coords MRISsurfaceRASToVoxelCached(mris, mri_aseg, x+v->nx, y+v->ny, z+v->nz, &snx, &sny, &snz) ; snx -= xl ; sny -= yl ; snz -= zl ; for (ngm = 0, xk = -whalf ; xk <= whalf ; xk++) { xi = mri_aseg->xi[x0+xk] ; for (yk = -whalf ; yk <= whalf ; yk++) { yi = mri_aseg->yi[y0+yk] ; for (zk = -whalf ; zk <= whalf ; zk++) { zi = mri_aseg->zi[z0+zk] ; label = MRIgetVoxVal(mri_aseg, xi, yi, zi,0) ; if (xi == Gx && yi == Gy && zi == Gz) DiagBreak() ; if (label != hemi_label) continue ; label = MRIgetVoxVal(mri_aparc, xi, yi, zi,0) ; if (label && label != label0) // if outside the ribbon it won't be assigned to a parcel continue ; // constrain it to be in the same cortical parcel // search along vector connecting x0 to this point to make sure it is we don't perforate wm or leave and re-enter cortex nx = xi-x0 ; ny = yi-y0 ; nz = zi-z0 ; thickness = sqrt(nx*nx + ny*ny + nz*nz) ; assignable = 1 ; // assume this point should be counted if (thickness > 0) { nx /= thickness ; ny /= thickness ; nz /= thickness ; dot = nx*snx + ny*sny + nz*snz ; angle = acos(dot) ; if (FABS(angle) > angle_threshold) assignable = 0 ; outside_of_ribbon = 0 ; for (dist = 0 ; assignable && dist <= thickness ; dist += step_size) { xv = x0+nx*dist ; yv = y0+ny*dist ; zv = z0+nz*dist ; if (nint(xv) == Gx && nint(yv) == Gy && nint(zv) == Gz) DiagBreak() ; MRIsampleVolume(mri_pial_dist, xv, yv, zv, &pdist) ; MRIsampleVolume(mri_white_dist, xv, yv, zv, &wdist) ; label = MRIgetVoxVal(mri_aseg, xi, yi, zi,0) ; if (SKIP_LABEL(label) || label == ohemi_label) assignable = 0 ; if (wdist < 0) // entered wm - not assignable assignable = 0 ; else { if (pdist > 0) // outside pial surface outside_of_ribbon = 1 ; else { if (outside_of_ribbon) // left ribbon and reentered assignable = 0 ; } } } } // close of thickness > 0 if (assignable) ngm++ ; else DiagBreak() ; } } } MRIsetVoxVal(mri_features, vno, 0, 0, 0, ngm) ; } MRIfree(&mri_white_dist) ; MRIfree(&mri_pial_dist) ; MRIfree(&mri_binary) ; return(mri_features) ; }
int LEVMAR_BC_DER( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */ void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata), /* function to evaluate the jacobian \part x / \part p */ LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */ LM_REAL *x, /* I: measurement vector */ int m, /* I: parameter vector dimension (i.e. #unknowns) */ int n, /* I: measurement vector dimension */ LM_REAL *lb, /* I: vector of lower bounds. If NULL, no lower bounds apply */ LM_REAL *ub, /* I: vector of upper bounds. If NULL, no upper bounds apply */ int itmax, /* I: maximum number of iterations */ LM_REAL opts[4], /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu, * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used. * Note that ||J^T e||_inf is computed on free (not equal to lb[i] or ub[i]) variables only. */ LM_REAL info[LM_INFO_SZ], /* O: information regarding the minimization. Set to NULL if don't care * info[0]= ||e||_2 at initial p. * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. * info[5]= # iterations, * info[6]=reason for terminating: 1 - stopped by small gradient J^T e * 2 - stopped by small Dp * 3 - stopped by itmax * 4 - singular matrix. Restart from current p with increased mu * 5 - no further error reduction is possible. Restart with increased mu * 6 - stopped by small ||e||_2 * info[7]= # function evaluations * info[8]= # jacobian evaluations */ LM_REAL *work, /* working memory, allocate if NULL */ LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */ void *adata) /* pointer to possibly additional data, passed uninterpreted to func & jacf. * Set to NULL if not needed */ { register int i, j, k, l; int worksz, freework=0, issolved; /* temp work arrays */ LM_REAL *e, /* nx1 */ *hx, /* \hat{x}_i, nx1 */ *jacTe, /* J^T e_i mx1 */ *jac, /* nxm */ *jacTjac, /* mxm */ *Dp, /* mx1 */ *diag_jacTjac, /* diagonal of J^T J, mx1 */ *pDp; /* p + Dp, mx1 */ register LM_REAL mu, /* damping constant */ tmp; /* mainly used in matrix & vector multiplications */ LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */ LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL; LM_REAL tau, eps1, eps2, eps2_sq, eps3; LM_REAL init_p_eL2; int nu=2, nu2, stop, nfev, njev=0; const int nm=n*m; /* variables for constrained LM */ struct FUNC_STATE fstate; LM_REAL alpha=CNST(1e-4), beta=CNST(0.9), gamma=CNST(0.99995), gamma_sq=gamma*gamma, rho=CNST(1e-8); LM_REAL t, t0; LM_REAL steptl=CNST(1e3)*(LM_REAL)sqrt(LM_REAL_EPSILON), jacTeDp; LM_REAL tmin=CNST(1e-12), tming=CNST(1e-18); /* minimum step length for LS and PG steps */ const LM_REAL tini=CNST(1.0); /* initial step length for LS and PG steps */ int nLMsteps=0, nLSsteps=0, nPGsteps=0, gprevtaken=0; int numactive; mu=jacTe_inf=t=0.0; tmin=tmin; /* -Wall */ if(n<m){ fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m); exit(1); } if(!jacf){ fprintf(stderr, RCAT("No function specified for computing the jacobian in ", LEVMAR_BC_DER) RCAT("().\nIf no such function is available, use ", LEVMAR_BC_DIF) RCAT("() rather than ", LEVMAR_BC_DER) "()\n"); exit(1); } if(!BOXCHECK(lb, ub, m)){ fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): at least one lower bound exceeds the upper one\n")); exit(1); } if(opts){ tau=opts[0]; eps1=opts[1]; eps2=opts[2]; eps2_sq=opts[2]*opts[2]; eps3=opts[3]; } else{ // use default values tau=CNST(LM_INIT_MU); eps1=CNST(LM_STOP_THRESH); eps2=CNST(LM_STOP_THRESH); eps2_sq=CNST(LM_STOP_THRESH)*CNST(LM_STOP_THRESH); eps3=CNST(LM_STOP_THRESH); } if(!work){ worksz=LM_DER_WORKSZ(m, n); //2*n+4*m + n*m + m*m; work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */ if(!work){ fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): memory allocation request failed\n")); exit(1); } freework=1; } /* set up work arrays */ e=work; hx=e + n; jacTe=hx + n; jac=jacTe + m; jacTjac=jac + nm; Dp=jacTjac + m*m; diag_jacTjac=Dp + m; pDp=diag_jacTjac + m; fstate.n=n; fstate.hx=hx; fstate.x=x; fstate.adata=adata; fstate.nfev=&nfev; /* see if starting point is within the feasile set */ for(i=0; i<m; ++i) pDp[i]=p[i]; BOXPROJECT(p, lb, ub, m); /* project to feasible set */ for(i=0; i<m; ++i) if(pDp[i]!=p[i]) fprintf(stderr, RCAT("Warning: component %d of starting point not feasible in ", LEVMAR_BC_DER) "()! [%g projected to %g]\n", i, p[i], pDp[i]); /* compute e=x - f(p) and its L2 norm */ (*func)(p, hx, m, n, adata); nfev=1; for(i=0, p_eL2=0.0; i<n; ++i){ e[i]=tmp=x[i]-hx[i]; p_eL2+=tmp*tmp; } init_p_eL2=p_eL2; for(k=stop=0; k<itmax && !stop; ++k){ //printf("%d %.15g\n", k, 0.5*p_eL2); /* Note that p and e have been updated at a previous iteration */ if(p_eL2<=eps3){ /* error is small */ stop=6; break; } /* Compute the jacobian J at p, J^T J, J^T e, ||J^T e||_inf and ||p||^2. * Since J^T J is symmetric, its computation can be speeded up by computing * only its upper triangular part and copying it to the lower part */ (*jacf)(p, jac, m, n, adata); ++njev; /* J^T J, J^T e */ if(nm<__BLOCKSZ__SQ){ // this is a small problem /* This is the straightforward way to compute J^T J, J^T e. However, due to * its noncontinuous memory access pattern, it incures many cache misses when * applied to large minimization problems (i.e. problems involving a large * number of free variables and measurements), in which J is too large to * fit in the L1 cache. For such problems, a cache-efficient blocking scheme * is preferable. * * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this * performance problem. * * On the other hand, the straightforward algorithm is faster on small * problems since in this case it avoids the overheads of blocking. */ for(i=0; i<m; ++i){ for(j=i; j<m; ++j){ int lm; for(l=0, tmp=0.0; l<n; ++l){ lm=l*m; tmp+=jac[lm+i]*jac[lm+j]; } /* store tmp in the corresponding upper and lower part elements */ jacTjac[i*m+j]=jacTjac[j*m+i]=tmp; } /* J^T e */ for(l=0, tmp=0.0; l<n; ++l) tmp+=jac[l*m+i]*e[l]; jacTe[i]=tmp; } } else{ // this is a large problem /* Cache efficient computation of J^T J based on blocking */ TRANS_MAT_MAT_MULT(jac, jacTjac, n, m); /* cache efficient computation of J^T e */ for(i=0; i<m; ++i) jacTe[i]=0.0; for(i=0; i<n; ++i){ register LM_REAL *jacrow; for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l) jacTe[l]+=jacrow[l]*tmp; } } /* Compute ||J^T e||_inf and ||p||^2. Note that ||J^T e||_inf * is computed for free (i.e. inactive) variables only. * At a local minimum, if p[i]==ub[i] then g[i]>0; * if p[i]==lb[i] g[i]<0; otherwise g[i]=0 */ for(i=j=numactive=0, p_L2=jacTe_inf=0.0; i<m; ++i){ if(ub && p[i]==ub[i]){ ++numactive; if(jacTe[i]>0.0) ++j; } else if(lb && p[i]==lb[i]){ ++numactive; if(jacTe[i]<0.0) ++j; } else if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp; diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */ p_L2+=p[i]*p[i]; } //p_L2=sqrt(p_L2); #if 0 if(!(k%100)){ printf("Current estimate: "); for(i=0; i<m; ++i) printf("%.9g ", p[i]); printf("-- errors %.9g %0.9g, #active %d [%d]\n", jacTe_inf, p_eL2, numactive, j); } #endif /* check for convergence */ if(j==numactive && (jacTe_inf <= eps1)){ Dp_L2=0.0; /* no increment for p in this case */ stop=1; break; } /* compute initial damping factor */ if(k==0){ if(!lb && !ub){ /* no bounds */ for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */ mu=tau*tmp; } else mu=CNST(0.5)*tau*p_eL2; /* use Kanzow's starting mu */ } /* determine increment using a combination of adaptive damping, line search and projected gradient search */ while(1){ /* augment normal equations */ for(i=0; i<m; ++i) jacTjac[i*m+i]+=mu; /* solve augmented equations */ /* 5 alternatives are available: LU, Cholesky, 2 variants of QR decomposition and SVD. * Cholesky is the fastest but might be inaccurate; QR is slower but more accurate; * SVD is the slowest but most accurate; LU offers a tradeoff between accuracy and speed */ issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); //issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m); //issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m); //issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m); //issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m); if(issolved){ for(i=0; i<m; ++i) pDp[i]=p[i] + Dp[i]; /* compute p's new estimate and ||Dp||^2 */ BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */ for(i=0, Dp_L2=0.0; i<m; ++i){ Dp[i]=tmp=pDp[i]-p[i]; Dp_L2+=tmp*tmp; } //Dp_L2=sqrt(Dp_L2); if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ stop=2; break; } if(Dp_L2>=(p_L2+eps2)/(CNST(EPSILON)*CNST(EPSILON))){ /* almost singular */ stop=4; break; } (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */ for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */ hx[i]=tmp=x[i]-hx[i]; pDp_eL2+=tmp*tmp; } if(pDp_eL2<=gamma_sq*p_eL2){ for(i=0, dL=0.0; i<m; ++i) dL+=Dp[i]*(mu*Dp[i]+jacTe[i]); #if 1 if(dL>0.0){ dF=p_eL2-pDp_eL2; tmp=(CNST(2.0)*dF/dL-CNST(1.0)); tmp=CNST(1.0)-tmp*tmp*tmp; mu=mu*( (tmp>=CNST(ONE_THIRD))? tmp : CNST(ONE_THIRD) ); } else mu=(mu>=pDp_eL2)? pDp_eL2 : mu; /* pDp_eL2 is the new pDp_eL2 */ #else mu=(mu>=pDp_eL2)? pDp_eL2 : mu; /* pDp_eL2 is the new pDp_eL2 */ #endif nu=2; for(i=0 ; i<m; ++i) /* update p's estimate */ p[i]=pDp[i]; for(i=0; i<n; ++i) /* update e and ||e||_2 */ e[i]=hx[i]; p_eL2=pDp_eL2; ++nLMsteps; gprevtaken=0; break; } } else{ /* the augmented linear system could not be solved, increase mu */ mu*=nu; nu2=nu<<1; // 2*nu; if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */ stop=5; break; } nu=nu2; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; continue; /* solve again with increased nu */ } /* if this point is reached, the LM step did not reduce the error; * see if it is a descent direction */ /* negate jacTe (i.e. g) & compute g^T * Dp */ for(i=0, jacTeDp=0.0; i<m; ++i){ jacTe[i]=-jacTe[i]; jacTeDp+=jacTe[i]*Dp[i]; } if(jacTeDp<=-rho*pow(Dp_L2, _POW_/CNST(2.0))){ /* Dp is a descent direction; do a line search along it */ int mxtake, iretcd; LM_REAL stepmx; tmp=(LM_REAL)sqrt(p_L2); stepmx=CNST(1e3)*( (tmp>=CNST(1.0))? tmp : CNST(1.0) ); #if 1 /* use Schnabel's backtracking line search; it requires fewer "func" evaluations */ LNSRCH(m, p, p_eL2, jacTe, Dp, alpha, pDp, &pDp_eL2, func, fstate, &mxtake, &iretcd, stepmx, steptl, NULL); /* NOTE: LNSRCH() updates hx */ if(iretcd!=0) goto gradproj; /* rather inelegant but effective way to handle LNSRCH() failures... */ #else /* use the simpler (but slower!) line search described by Kanzow */ for(t=tini; t>tmin; t*=beta){ for(i=0; i<m; ++i){ pDp[i]=p[i] + t*Dp[i]; //pDp[i]=__MEDIAN3(lb[i], pDp[i], ub[i]); /* project to feasible set */ } (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + t*Dp */ for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */ hx[i]=tmp=x[i]-hx[i]; pDp_eL2+=tmp*tmp; } //if(CNST(0.5)*pDp_eL2<=CNST(0.5)*p_eL2 + t*alpha*jacTeDp) break; if(pDp_eL2<=p_eL2 + CNST(2.0)*t*alpha*jacTeDp) break; } #endif ++nLSsteps; gprevtaken=0; /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2. * These values are used below to update their corresponding variables */ } else{ gradproj: /* Note that this point can also be reached via a goto when LNSRCH() fails */ /* jacTe is a descent direction; make a projected gradient step */ /* if the previous step was along the gradient descent, try to use the t employed in that step */ /* compute ||g|| */ for(i=0, tmp=0.0; i<m; ++i) tmp=jacTe[i]*jacTe[i]; tmp=(LM_REAL)sqrt(tmp); tmp=CNST(100.0)/(CNST(1.0)+tmp); t0=(tmp<=tini)? tmp : tini; /* guard against poor scaling & large steps; see (3.50) in C.T. Kelley's book */ for(t=(gprevtaken)? t : t0; t>tming; t*=beta){ for(i=0; i<m; ++i) pDp[i]=p[i] - t*jacTe[i]; BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */ for(i=0; i<m; ++i) Dp[i]=pDp[i]-p[i]; (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p - t*g */ for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */ hx[i]=tmp=x[i]-hx[i]; pDp_eL2+=tmp*tmp; } for(i=0, tmp=0.0; i<m; ++i) /* compute ||g^T * Dp|| */ tmp+=jacTe[i]*Dp[i]; if(gprevtaken && pDp_eL2<=p_eL2 + CNST(2.0)*CNST(0.99999)*tmp){ /* starting t too small */ t=t0; gprevtaken=0; continue; } //if(CNST(0.5)*pDp_eL2<=CNST(0.5)*p_eL2 + alpha*tmp) break; if(pDp_eL2<=p_eL2 + CNST(2.0)*alpha*tmp) break; } ++nPGsteps; gprevtaken=1; /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2 */ } /* update using computed values */ for(i=0, Dp_L2=0.0; i<m; ++i){ tmp=pDp[i]-p[i]; Dp_L2+=tmp*tmp; } //Dp_L2=sqrt(Dp_L2); if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ stop=2; break; } for(i=0 ; i<m; ++i) /* update p's estimate */ p[i]=pDp[i]; for(i=0; i<n; ++i) /* update e and ||e||_2 */ e[i]=hx[i]; p_eL2=pDp_eL2; break; } /* inner loop */ } if(k>=itmax) stop=3; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; if(info){ info[0]=init_p_eL2; info[1]=p_eL2; info[2]=jacTe_inf; info[3]=Dp_L2; for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i]; info[4]=mu/tmp; info[5]=(LM_REAL)k; info[6]=(LM_REAL)stop; info[7]=(LM_REAL)nfev; info[8]=(LM_REAL)njev; } /* covariance matrix */ if(covar){ LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n); } if(freework) free(work); #if 0 printf("%d LM steps, %d line search, %d projected gradient\n", nLMsteps, nLSsteps, nPGsteps); #endif return (stop!=4)? k : -1; }
static void LNSRCH(int m, LM_REAL *x, LM_REAL f, LM_REAL *g, LM_REAL *p, LM_REAL alpha, LM_REAL *xpls, LM_REAL *ffpls, void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), struct FUNC_STATE state, int *mxtake, int *iretcd, LM_REAL stepmx, LM_REAL steptl, LM_REAL *sx) { /* Find a next newton iterate by backtracking line search. * Specifically, finds a \lambda such that for a fixed alpha<0.5 (usually 1e-4), * f(x + \lambda*p) <= f(x) + alpha * \lambda * g^T*p * * Translated (with minor changes) from Schnabel, Koontz & Weiss uncmin.f, v1.3 * PARAMETERS : * m --> dimension of problem (i.e. number of variables) * x(m) --> old iterate: x[k-1] * f --> function value at old iterate, f(x) * g(m) --> gradient at old iterate, g(x), or approximate * p(m) --> non-zero newton step * alpha --> fixed constant < 0.5 for line search (see above) * xpls(m) <-- new iterate x[k] * ffpls <-- function value at new iterate, f(xpls) * func --> name of subroutine to evaluate function * state <--> information other than x and m that func requires. * state is not modified in xlnsrch (but can be modified by func). * iretcd <-- return code * mxtake <-- boolean flag indicating step of maximum length used * stepmx --> maximum allowable step size * steptl --> relative step size at which successive iterates * considered close enough to terminate algorithm * sx(m) --> diagonal scaling matrix for x, can be NULL * internal variables * sln newton length * rln relative length of newton step */ register int i; int firstback = 1; LM_REAL disc; LM_REAL a3, b; LM_REAL t1, t2, t3, lambda, tlmbda, rmnlmb; LM_REAL scl, rln, sln, slp; LM_REAL tmp1, tmp2; LM_REAL fpls, pfpls = 0., plmbda = 0.; /* -Wall */ f*=CNST(0.5); *mxtake = 0; *iretcd = 2; tmp1 = 0.; if(!sx) /* no scaling */ for (i = 0; i < m; ++i) tmp1 += p[i] * p[i]; else for (i = 0; i < m; ++i) tmp1 += sx[i] * sx[i] * p[i] * p[i]; sln = (LM_REAL)sqrt(tmp1); if (sln > stepmx) { /* newton step longer than maximum allowed */ scl = stepmx / sln; for(i=0; i<m; ++i) /* p * scl */ p[i]*=scl; sln = stepmx; } for(i=0, slp=0.; i<m; ++i) /* g^T * p */ slp+=g[i]*p[i]; rln = 0.; if(!sx) /* no scaling */ for (i = 0; i < m; ++i) { tmp1 = (FABS(x[i])>=CNST(1.))? FABS(x[i]) : CNST(1.); tmp2 = FABS(p[i])/tmp1; if(rln < tmp2) rln = tmp2; } else for (i = 0; i < m; ++i) { tmp1 = (FABS(x[i])>=CNST(1.)/sx[i])? FABS(x[i]) : CNST(1.)/sx[i]; tmp2 = FABS(p[i])/tmp1; if(rln < tmp2) rln = tmp2; } rmnlmb = steptl / rln; lambda = CNST(1.0); /* check if new iterate satisfactory. generate new lambda if necessary. */ while(*iretcd > 1) { for (i = 0; i < m; ++i) xpls[i] = x[i] + lambda * p[i]; /* evaluate function at new point */ (*func)(xpls, state.hx, m, state.n, state.adata); for(i=0, tmp1=0.0; i<state.n; ++i){ state.hx[i]=tmp2=state.x[i]-state.hx[i]; tmp1+=tmp2*tmp2; } fpls=CNST(0.5)*tmp1; *ffpls=tmp1; ++(*(state.nfev)); if (fpls <= f + slp * alpha * lambda) { /* solution found */ *iretcd = 0; if (lambda == CNST(1.) && sln > stepmx * CNST(.99)) *mxtake = 1; return; } /* else : solution not (yet) found */ /* First find a point with a finite value */ if (lambda < rmnlmb) { /* no satisfactory xpls found sufficiently distinct from x */ *iretcd = 1; return; } else { /* calculate new lambda */ /* modifications to cover non-finite values */ if (fpls >= LM_REAL_MAX) { lambda *= CNST(0.1); firstback = 1; } else { if (firstback) { /* first backtrack: quadratic fit */ tlmbda = -lambda * slp / ((fpls - f - slp) * CNST(2.)); firstback = 0; } else { /* all subsequent backtracks: cubic fit */ t1 = fpls - f - lambda * slp; t2 = pfpls - f - plmbda * slp; t3 = CNST(1.) / (lambda - plmbda); a3 = CNST(3.) * t3 * (t1 / (lambda * lambda) - t2 / (plmbda * plmbda)); b = t3 * (t2 * lambda / (plmbda * plmbda) - t1 * plmbda / (lambda * lambda)); disc = b * b - a3 * slp; if (disc > b * b) /* only one positive critical point, must be minimum */ tlmbda = (-b + ((a3 < 0)? -(LM_REAL)sqrt(disc): (LM_REAL)sqrt(disc))) /a3; else /* both critical points positive, first is minimum */ tlmbda = (-b + ((a3 < 0)? (LM_REAL)sqrt(disc): -(LM_REAL)sqrt(disc))) /a3; if (tlmbda > lambda * CNST(.5)) tlmbda = lambda * CNST(.5); } plmbda = lambda; pfpls = fpls; if (tlmbda < lambda * CNST(.1)) lambda *= CNST(.1); else lambda = tlmbda; } } } } /* LNSRCH */
int NoDivTriTriIsect(double V0[3],double V1[3],double V2[3], double U0[3],double U1[3],double U2[3]) { double E1[3],E2[3]; double N1[3],N2[3],d1,d2; double du0,du1,du2,dv0,dv1,dv2; double D[3]; double isect1[2], isect2[2]; double du0du1,du0du2,dv0dv1,dv0dv2; short index; double vp0,vp1,vp2; double up0,up1,up2; double bb,cc,max; double a,b,c,x0,x1; double d,e,f,y0,y1; double xx,yy,xxyy,tmp; /* compute plane equation of triangle(V0,V1,V2) */ SUB(E1,V1,V0); SUB(E2,V2,V0); CROSS(N1,E1,E2); d1=-DOT(N1,V0); /* plane equation 1: N1.X+d1=0 */ /* put U0,U1,U2 into plane equation 1 to compute signed distances to the plane*/ du0=DOT(N1,U0)+d1; du1=DOT(N1,U1)+d1; du2=DOT(N1,U2)+d1; /* coplanarity robustness check */ #if USE_EPSILON_TEST==TRUE if(FABS(du0)<EPSILON) du0=0.0; if(FABS(du1)<EPSILON) du1=0.0; if(FABS(du2)<EPSILON) du2=0.0; #endif du0du1=du0*du1; du0du2=du0*du2; if(du0du1>0.0f && du0du2>0.0f) /* same sign on all of them + not equal 0 ? */ return 0; /* no intersection occurs */ /* compute plane of triangle (U0,U1,U2) */ SUB(E1,U1,U0); SUB(E2,U2,U0); CROSS(N2,E1,E2); d2=-DOT(N2,U0); /* plane equation 2: N2.X+d2=0 */ /* put V0,V1,V2 into plane equation 2 */ dv0=DOT(N2,V0)+d2; dv1=DOT(N2,V1)+d2; dv2=DOT(N2,V2)+d2; #if USE_EPSILON_TEST==TRUE if(FABS(dv0)<EPSILON) dv0=0.0; if(FABS(dv1)<EPSILON) dv1=0.0; if(FABS(dv2)<EPSILON) dv2=0.0; #endif dv0dv1=dv0*dv1; dv0dv2=dv0*dv2; if(dv0dv1>0.0f && dv0dv2>0.0f) /* same sign on all of them + not equal 0 ? */ return 0; /* no intersection occurs */ /* compute direction of intersection line */ CROSS(D,N1,N2); /* compute and index to the largest component of D */ max=(double)FABS(D[0]); index=0; bb=(double)FABS(D[1]); cc=(double)FABS(D[2]); if(bb>max) max=bb,index=1; if(cc>max) max=cc,index=2; /* this is the simplified projection onto L*/ vp0=V0[index]; vp1=V1[index]; vp2=V2[index]; up0=U0[index]; up1=U1[index]; up2=U2[index]; /* compute interval for triangle 1 */ NEWCOMPUTE_INTERVALS(vp0,vp1,vp2,dv0,dv1,dv2,dv0dv1,dv0dv2,a,b,c,x0,x1); /* compute interval for triangle 2 */ NEWCOMPUTE_INTERVALS(up0,up1,up2,du0,du1,du2,du0du1,du0du2,d,e,f,y0,y1); xx=x0*x1; yy=y0*y1; xxyy=xx*yy; tmp=a*xxyy; isect1[0]=tmp+b*x1*yy; isect1[1]=tmp+c*x0*yy; tmp=d*xxyy; isect2[0]=tmp+e*xx*y1; isect2[1]=tmp+f*xx*y0; SORT(isect1[0],isect1[1]); SORT(isect2[0],isect2[1]); if(isect1[1]<isect2[0] || isect2[1]<isect1[0]) return 0; return 1; }
/* * Update EC state */ static void echo_supp_update(echo_supp *ec, pj_int16_t *rec_frm, const pj_int16_t *play_frm) { int prev_index; unsigned i, j, frm_level, sum_play_level, ulaw; pj_uint16_t old_rec_frm_level, old_play_frm_level; float play_corr; ++ec->update_cnt; if (ec->update_cnt > 0x7FFFFFFF) ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */ /* Calculate current play frame level */ frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment); ++frm_level; /* to avoid division by zero */ /* Save the oldest frame level for later */ old_play_frm_level = ec->play_hist[0]; /* Push current frame level to the back of the play history */ pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0); ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level; /* Calculate level of current mic frame */ frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment); ++frm_level; /* to avoid division by zero */ /* Save the oldest frame level for later */ old_rec_frm_level = ec->rec_hist[0]; /* Push to the back of the rec history */ pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0); ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level; /* Can't do the calc until the play history is full. */ if (ec->update_cnt < ec->play_hist_cnt) return; /* Skip if learning is done */ if (!ec->learning) return; /* Calculate rec signal pattern */ if (ec->sum_rec_level == 0) { /* Buffer has just been filled up, do full calculation */ ec->rec_corr = 0; ec->sum_rec_level = 0; for (i=0; i < ec->templ_cnt-1; ++i) { float corr; corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i]; ec->rec_corr += corr; ec->sum_rec_level += ec->rec_hist[i]; } ec->sum_rec_level += ec->rec_hist[i]; } else { /* Update from previous calculation */ ec->sum_rec_level = ec->sum_rec_level - old_rec_frm_level + ec->rec_hist[ec->templ_cnt-1]; ec->rec_corr = ec->rec_corr - ((float)ec->rec_hist[0] / old_rec_frm_level) + ((float)ec->rec_hist[ec->templ_cnt-1] / ec->rec_hist[ec->templ_cnt-2]); } /* Iterate through the play history and calculate the signal correlation * for every tail position in the play_hist. Save the result in temporary * array since we may bail out early if the conversation state is not good * to detect echo. */ /* * First phase: do full calculation for the first position */ if (ec->sum_play_level0 == 0) { /* Buffer has just been filled up, do full calculation */ sum_play_level = 0; play_corr = 0; for (j=0; j<ec->templ_cnt-1; ++j) { float corr; corr = (float)ec->play_hist[j+1] / ec->play_hist[j]; play_corr += corr; sum_play_level += ec->play_hist[j]; } sum_play_level += ec->play_hist[j]; ec->sum_play_level0 = sum_play_level; ec->play_corr0 = play_corr; } else { /* Update from previous calculation */ ec->sum_play_level0 = ec->sum_play_level0 - old_play_frm_level + ec->play_hist[ec->templ_cnt-1]; ec->play_corr0 = ec->play_corr0 - ((float)ec->play_hist[0] / old_play_frm_level) + ((float)ec->play_hist[ec->templ_cnt-1] / ec->play_hist[ec->templ_cnt-2]); sum_play_level = ec->sum_play_level0; play_corr = ec->play_corr0; } ec->tmp_corr[0] = FABS(play_corr - ec->rec_corr); ec->tmp_factor[0] = (float)ec->sum_rec_level / sum_play_level; /* Bail out if remote isn't talking */ ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF; if (ulaw < MIN_SIGNAL_ULAW) { echo_supp_set_state(ec, ST_REM_SILENT, ulaw); return; } /* Bail out if local user is talking */ if (ec->sum_rec_level >= sum_play_level) { echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw); return; } /* * Second phase: do incremental calculation for the rest of positions */ for (i=1; i < ec->tail_cnt; ++i) { unsigned end; end = i + ec->templ_cnt; sum_play_level = sum_play_level - ec->play_hist[i-1] + ec->play_hist[end-1]; play_corr = play_corr - ((float)ec->play_hist[i]/ec->play_hist[i-1]) + ((float)ec->play_hist[end-1]/ec->play_hist[end-2]); /* Bail out if remote isn't talking */ ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF; if (ulaw < MIN_SIGNAL_ULAW) { echo_supp_set_state(ec, ST_REM_SILENT, ulaw); return; } /* Bail out if local user is talking */ if (ec->sum_rec_level >= sum_play_level) { echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw); return; } #if 0 // disabled: not a good idea if mic throws out loud echo /* Also bail out if we suspect there's a doubletalk */ ulaw = pjmedia_linear2ulaw(ec->sum_rec_level/ec->templ_cnt) ^ 0xFF; if (ulaw > MIN_SIGNAL_ULAW) { echo_supp_set_state(ec, ST_DOUBLETALK, ulaw); return; } #endif /* Calculate correlation and save to temporary array */ ec->tmp_corr[i] = FABS(play_corr - ec->rec_corr); /* Also calculate the gain factor between mic and speaker level */ ec->tmp_factor[i] = (float)ec->sum_rec_level / sum_play_level; pj_assert(ec->tmp_factor[i] < 1); } /* We seem to have good signal, we can update the EC state */ echo_supp_set_state(ec, ST_REM_TALK, MIN_SIGNAL_ULAW); /* Accummulate the correlation value to the history and at the same * time find the tail index of the best correlation. */ prev_index = ec->tail_index; for (i=1; i<ec->tail_cnt-1; ++i) { float *p = &ec->corr_sum[i], sum; /* Accummulate correlation value for this tail position */ ec->corr_sum[i] += ec->tmp_corr[i]; /* Update the min and avg gain factor for this tail position */ if (ec->tmp_factor[i] < ec->min_factor[i]) ec->min_factor[i] = ec->tmp_factor[i]; ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) + ec->tmp_factor[i]) / (ec->tail_cnt + 1); /* To get the best correlation, also include the correlation * value of the neighbouring tail locations. */ sum = *(p-1) + (*p)*2 + *(p+1); //sum = *p; /* See if we have better correlation value */ if (sum < ec->best_corr) { ec->tail_index = i; ec->best_corr = sum; } } if (ec->tail_index != prev_index) { unsigned duration; int imin, iavg; duration = ec->update_cnt * SEGMENT_PTIME; imin = (int)(ec->min_factor[ec->tail_index] * 1000); iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); PJ_LOG(4,(THIS_FILE, "Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec" ", factor min/avg=%d.%03d/%d.%03d", (duration/1000), (duration%1000), (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, imin/1000, imin%1000, iavg/1000, iavg%1000)); } ++ec->calc_cnt; if (ec->calc_cnt > ec->max_calc) { unsigned duration; int imin, iavg; ec->learning = PJ_FALSE; ec->running_cnt = 0; duration = ec->update_cnt * SEGMENT_PTIME; imin = (int)(ec->min_factor[ec->tail_index] * 1000); iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); PJ_LOG(4,(THIS_FILE, "Echo suppressor learning done at t=%03d.%03ds, tail=%d ms" ", factor min/avg=%d.%03d/%d.%03d", (duration/1000), (duration%1000), (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, imin/1000, imin%1000, iavg/1000, iavg%1000)); } }
static void Loc_cbInfo( LocState *pts ) { if( pts->theInfo.status == AEEGPS_ERR_NO_ERR || (pts->theInfo.status == AEEGPS_ERR_INFO_UNAVAIL && pts->theInfo.fValid) ) { #if MIN_BREW_VERSION(2,1) pts->pResp->lat = WGS84_TO_DEGREES( pts->theInfo.dwLat ); #ifdef AEE_SIMULATOR //FOR TEST pts->pResp->lon = -WGS84_TO_DEGREES( pts->theInfo.dwLon ); #else pts->pResp->lon = WGS84_TO_DEGREES( pts->theInfo.dwLon ); #endif #else double wgsFactor; wgsFactor = FASSIGN_STR("186413.5111"); pts->pResp->lat = FASSIGN_INT(pts->theInfo.dwLat); pts->pResp->lat = FDIV(pts->pResp->lat, wgsFactor); pts->pResp->lon = FASSIGN_INT(pts->theInfo.dwLon); pts->pResp->lon = FDIV(pts->pResp->lon, wgsFactor); #endif /* MIN_BREW_VERSION 2.1 */ pts->pResp->height = pts->theInfo.wAltitude - 500; pts->pResp->velocityHor = FMUL( pts->theInfo.wVelocityHor,0.25); //当前夹角 if (FCMP_G(FABS(pts->lastCoordinate.lat), 0)) { pts->pResp->heading = Loc_Calc_Azimuth(pts->lastCoordinate.lat, pts->lastCoordinate.lon, pts->pResp->lat, pts->pResp->lon); } else { pts->pResp->heading = 0; } //For Test Hack #ifdef AEE_SIMULATOR pts->pResp->lat = 38.0422378880; pts->pResp->lon = 114.4925141047; #endif if (pts->pResp->bSetDestPos) { //计算距离和方位角 pts->pResp->distance = Loc_Calc_Distance(pts->pResp->lat, pts->pResp->lon, pts->pResp->destPos.lat, pts->pResp->destPos.lon); pts->pResp->destHeading = Loc_Calc_Azimuth(pts->pResp->lat, pts->pResp->lon, pts->pResp->destPos.lat, pts->pResp->destPos.lon); } //记录历史定位信息 pts->lastCoordinate.lat = pts->pResp->lat; pts->lastCoordinate.lon = pts->pResp->lon; pts->pResp->dwFixNum++; pts->pResp->nErr = SUCCESS; Loc_Notify( pts ); if( FALSE == pts->bSetForCancellation ) { ISHELL_SetTimerEx( pts->pShell, pts->nLocInterval * 1000, &pts->cbIntervalTimer ); } else { Loc_Stop( pts ); } } }
int main () { int status = 0; /* Subject sequence empty or invalid. */ { const char input[] = ""; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input); ASSERT (errno == 0 || errno == EINVAL); } { const char input[] = " "; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input); ASSERT (errno == 0 || errno == EINVAL); } { const char input[] = " +"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input); ASSERT (errno == 0 || errno == EINVAL); } { const char input[] = " ."; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input); ASSERT (errno == 0 || errno == EINVAL); } { const char input[] = " .e0"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input); /* IRIX 6.5, OSF/1 5.1 */ ASSERT (errno == 0 || errno == EINVAL); } { const char input[] = " +.e-0"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input); /* IRIX 6.5, OSF/1 5.1 */ ASSERT (errno == 0 || errno == EINVAL); } { const char input[] = " in"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input); ASSERT (errno == 0 || errno == EINVAL); } { const char input[] = " na"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input); ASSERT (errno == 0 || errno == EINVAL); } /* Simple floating point values. */ { const char input[] = "1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 1); ASSERT (errno == 0); } { const char input[] = "1."; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 2); ASSERT (errno == 0); } { const char input[] = ".5"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); /* FIXME - gnulib's version is rather inaccurate. It would be nice to guarantee an exact result, but for now, we settle for a 1-ulp error. */ ASSERT (FABS (result - 0.5) < DBL_EPSILON); ASSERT (ptr == input + 2); ASSERT (errno == 0); } { const char input[] = " 1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 2); ASSERT (errno == 0); } { const char input[] = "+1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 2); ASSERT (errno == 0); } { const char input[] = "-1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == -1.0); ASSERT (ptr == input + 2); ASSERT (errno == 0); } { const char input[] = "1e0"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 3); ASSERT (errno == 0); } { const char input[] = "1e+0"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 4); ASSERT (errno == 0); } { const char input[] = "1e-0"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 4); ASSERT (errno == 0); } { const char input[] = "1e1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 10.0); ASSERT (ptr == input + 3); ASSERT (errno == 0); } { const char input[] = "5e-1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); /* FIXME - gnulib's version is rather inaccurate. It would be nice to guarantee an exact result, but for now, we settle for a 1-ulp error. */ ASSERT (FABS (result - 0.5) < DBL_EPSILON); ASSERT (ptr == input + 4); ASSERT (errno == 0); } /* Zero. */ { const char input[] = "0"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 1); ASSERT (errno == 0); } { const char input[] = ".0"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 2); ASSERT (errno == 0); } { const char input[] = "0e0"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 3); ASSERT (errno == 0); } { const char input[] = "0e+9999999"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 10); ASSERT (errno == 0); } { const char input[] = "0e-9999999"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 10); ASSERT (errno == 0); } { const char input[] = "-0"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!!signbit (result) == !!signbit (-zero)); /* IRIX 6.5, OSF/1 4.0 */ ASSERT (ptr == input + 2); ASSERT (errno == 0); } /* Suffixes. */ { const char input[] = "1f"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 1); ASSERT (errno == 0); } { const char input[] = "1.f"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 2); ASSERT (errno == 0); } { const char input[] = "1e"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 1); ASSERT (errno == 0); } { const char input[] = "1e+"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 1); ASSERT (errno == 0); } { const char input[] = "1e-"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 1); ASSERT (errno == 0); } { const char input[] = "1E 2"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); /* HP-UX 11.11, IRIX 6.5, OSF/1 4.0 */ ASSERT (ptr == input + 1); /* HP-UX 11.11, IRIX 6.5 */ ASSERT (errno == 0); } { const char input[] = "0x"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 1); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2 */ ASSERT (errno == 0); } { const char input[] = "00x1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 2); ASSERT (errno == 0); } { const char input[] = "-0x"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!!signbit (result) == !!signbit (-zero)); /* MacOS X 10.3, FreeBSD 6.2, IRIX 6.5, OSF/1 4.0 */ ASSERT (ptr == input + 2); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2 */ ASSERT (errno == 0); } { const char input[] = "0xg"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 1); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2 */ ASSERT (errno == 0); } { const char input[] = "0xp"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 1); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2 */ ASSERT (errno == 0); } { const char input[] = "0x."; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 1); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2 */ ASSERT (errno == 0); } { const char input[] = "0xp+"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 1); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2 */ ASSERT (errno == 0); } { const char input[] = "0xp+1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 1); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2 */ ASSERT (errno == 0); } { const char input[] = "0x.p+1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input + 1); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2 */ ASSERT (errno == 0); } { const char input[] = "1p+1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + 1); ASSERT (errno == 0); } /* Overflow/underflow. */ { const char input[] = "1E1000000"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == HUGE_VAL); ASSERT (ptr == input + 9); /* OSF/1 5.1 */ ASSERT (errno == ERANGE); } { const char input[] = "-1E1000000"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == -HUGE_VAL); ASSERT (ptr == input + 10); ASSERT (errno == ERANGE); } { const char input[] = "1E-100000"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (0.0 <= result && result <= DBL_MIN); ASSERT (!signbit (result)); ASSERT (ptr == input + 9); ASSERT (errno == ERANGE); } { const char input[] = "-1E-100000"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (-DBL_MIN <= result && result <= 0.0); #if 0 /* FIXME - this is glibc bug 5995; POSIX allows returning positive 0 on negative underflow, even though quality of implementation demands preserving the sign. Disable this test until fixed glibc is more prevalent. */ ASSERT (!!signbit (result) == !!signbit (-zero)); /* glibc-2.3.6, mingw */ #endif ASSERT (ptr == input + 10); ASSERT (errno == ERANGE); } /* Infinity. */ { const char input[] = "iNf"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == HUGE_VAL); /* OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr == input + 3); /* OpenBSD 4.0, HP-UX 11.00, IRIX 6.5, OSF/1 5.1, Solaris 9, mingw */ ASSERT (errno == 0); /* HP-UX 11.11, OSF/1 4.0 */ } { const char input[] = "-InF"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == -HUGE_VAL); /* OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr == input + 4); /* OpenBSD 4.0, HP-UX 11.00, IRIX 6.5, OSF/1 4.0, Solaris 9, mingw */ ASSERT (errno == 0); /* HP-UX 11.11, OSF/1 4.0 */ } { const char input[] = "infinite"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == HUGE_VAL); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr == input + 3); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (errno == 0); /* OSF/1 4.0 */ } { const char input[] = "infinitY"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == HUGE_VAL); /* OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr == input + 8); /* OpenBSD 4.0, HP-UX 11.00, IRIX 6.5, OSF/1 5.1, Solaris 9, mingw */ ASSERT (errno == 0); /* HP-UX 11.11, OSF/1 4.0 */ } { const char input[] = "infinitY."; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == HUGE_VAL); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr == input + 8); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (errno == 0); /* OSF/1 4.0 */ } /* NaN. Some processors set the sign bit of the default NaN, so all we check is that using a sign changes the result. */ { const char input[] = "-nan"; char *ptr1; char *ptr2; double result1; double result2; errno = 0; result1 = strtod (input, &ptr1); result2 = strtod (input + 1, &ptr2); #if 1 /* All known CPUs support NaNs. */ ASSERT (isnand (result1)); /* OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (isnand (result2)); /* OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, mingw */ # if 0 /* Sign bits of NaN is a portability sticking point, not worth worrying about. */ ASSERT (!!signbit (result1) != !!signbit (result2)); /* glibc-2.3.6, IRIX 6.5, OSF/1 5.1, mingw */ # endif ASSERT (ptr1 == input + 4); /* OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, Solaris 2.5.1, mingw */ ASSERT (ptr2 == input + 4); /* OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, Solaris 2.5.1, mingw */ ASSERT (errno == 0); /* HP-UX 11.11 */ #else ASSERT (result1 == 0.0); ASSERT (result2 == 0.0); ASSERT (!signbit (result1)); ASSERT (!signbit (result2)); ASSERT (ptr1 == input); ASSERT (ptr2 == input + 1); ASSERT (errno == 0 || errno == EINVAL); #endif } { const char input[] = "+nan("; char *ptr1; char *ptr2; double result1; double result2; errno = 0; result1 = strtod (input, &ptr1); result2 = strtod (input + 1, &ptr2); #if 1 /* All known CPUs support NaNs. */ ASSERT (isnand (result1)); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (isnand (result2)); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (!!signbit (result1) == !!signbit (result2)); ASSERT (ptr1 == input + 4); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 2.5.1, mingw */ ASSERT (ptr2 == input + 4); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 2.5.1, mingw */ ASSERT (errno == 0); #else ASSERT (result1 == 0.0); ASSERT (result2 == 0.0); ASSERT (!signbit (result1)); ASSERT (!signbit (result2)); ASSERT (ptr1 == input); ASSERT (ptr2 == input + 1); ASSERT (errno == 0 || errno == EINVAL); #endif } { const char input[] = "-nan()"; char *ptr1; char *ptr2; double result1; double result2; errno = 0; result1 = strtod (input, &ptr1); result2 = strtod (input + 1, &ptr2); #if 1 /* All known CPUs support NaNs. */ ASSERT (isnand (result1)); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (isnand (result2)); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ # if 0 /* Sign bits of NaN is a portability sticking point, not worth worrying about. */ ASSERT (!!signbit (result1) != !!signbit (result2)); /* glibc-2.3.6, IRIX 6.5, OSF/1 5.1, mingw */ # endif ASSERT (ptr1 == input + 6); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr2 == input + 6); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (errno == 0); #else ASSERT (result1 == 0.0); ASSERT (result2 == 0.0); ASSERT (!signbit (result1)); ASSERT (!signbit (result2)); ASSERT (ptr1 == input); ASSERT (ptr2 == input + 1); ASSERT (errno == 0 || errno == EINVAL); #endif } { const char input[] = " nan()."; char *ptr; double result; errno = 0; result = strtod (input, &ptr); #if 1 /* All known CPUs support NaNs. */ ASSERT (isnand (result)); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr == input + 6); /* glibc-2.3.6, MacOS X 10.3, FreeBSD 6.2, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (errno == 0); #else ASSERT (result == 0.0); ASSERT (!signbit (result)); ASSERT (ptr == input); ASSERT (errno == 0 || errno == EINVAL); #endif } { /* The behavior of nan(0) is implementation-defined, but all implementations we know of which handle optional n-char-sequences handle nan(0) the same as nan(). */ const char input[] = "-nan(0)."; char *ptr1; char *ptr2; double result1; double result2; errno = 0; result1 = strtod (input, &ptr1); result2 = strtod (input + 1, &ptr2); #if 1 /* All known CPUs support NaNs. */ ASSERT (isnand (result1)); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (isnand (result2)); /* OpenBSD 4.0, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ # if 0 /* Sign bits of NaN is a portability sticking point, not worth worrying about. */ ASSERT (!!signbit (result1) != !!signbit (result2)); /* glibc-2.3.6, IRIX 6.5, OSF/1 5.1, mingw */ # endif ASSERT (ptr1 == input + 7); /* glibc-2.3.6, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr2 == input + 7); /* glibc-2.3.6, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (errno == 0); #else ASSERT (result1 == 0.0); ASSERT (result2 == 0.0); ASSERT (!signbit (result1)); ASSERT (!signbit (result2)); ASSERT (ptr1 == input); ASSERT (ptr2 == input + 1); ASSERT (errno == 0 || errno == EINVAL); #endif } /* Hex. */ { const char input[] = "0xa"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 10.0); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (ptr == input + 3); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (errno == 0); } { const char input[] = "0XA"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 10.0); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (ptr == input + 3); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (errno == 0); } { const char input[] = "0x1p"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (ptr == input + 3); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (errno == 0); } { const char input[] = "0x1p+"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (ptr == input + 3); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (errno == 0); } { const char input[] = "0x1p+1"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 2.0); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (ptr == input + 6); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (errno == 0); } { const char input[] = "0x1p+1a"; char *ptr; double result; errno = 0; result = strtod (input, &ptr); ASSERT (result == 2.0); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (ptr == input + 6); /* NetBSD 3.0, OpenBSD 4.0, AIX 5.1, HP-UX 11.11, IRIX 6.5, OSF/1 5.1, Solaris 10, mingw */ ASSERT (errno == 0); } /* Large buffers. */ { size_t m = 1000000; char *input = malloc (m + 1); if (input) { char *ptr; double result; memset (input, '\t', m - 1); input[m - 1] = '1'; input[m] = '\0'; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + m); ASSERT (errno == 0); } free (input); } { size_t m = 1000000; char *input = malloc (m + 1); if (input) { char *ptr; double result; memset (input, '0', m - 1); input[m - 1] = '1'; input[m] = '\0'; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); ASSERT (ptr == input + m); ASSERT (errno == 0); } free (input); } #if 0 /* Newlib has an artificial limit of 20000 for the exponent. TODO - gnulib should fix this. */ { size_t m = 1000000; char *input = malloc (m + 1); if (input) { char *ptr; double result; input[0] = '.'; memset (input + 1, '0', m - 10); input[m - 9] = '1'; input[m - 8] = 'e'; input[m - 7] = '+'; input[m - 6] = '9'; input[m - 5] = '9'; input[m - 4] = '9'; input[m - 3] = '9'; input[m - 2] = '9'; input[m - 1] = '1'; input[m] = '\0'; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); /* MacOS X 10.3, FreeBSD 6.2, NetBSD 3.0, OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr == input + m); /* OSF/1 5.1 */ ASSERT (errno == 0); /* MacOS X 10.3, FreeBSD 6.2, NetBSD 3.0, OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, mingw */ } free (input); } { size_t m = 1000000; char *input = malloc (m + 1); if (input) { char *ptr; double result; input[0] = '1'; memset (input + 1, '0', m - 9); input[m - 8] = 'e'; input[m - 7] = '-'; input[m - 6] = '9'; input[m - 5] = '9'; input[m - 4] = '9'; input[m - 3] = '9'; input[m - 2] = '9'; input[m - 1] = '1'; input[m] = '\0'; errno = 0; result = strtod (input, &ptr); ASSERT (result == 1.0); /* MacOS X 10.3, FreeBSD 6.2, NetBSD 3.0, OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, mingw */ ASSERT (ptr == input + m); ASSERT (errno == 0); /* MacOS X 10.3, FreeBSD 6.2, NetBSD 3.0, OpenBSD 4.0, IRIX 6.5, OSF/1 5.1, mingw */ } free (input); } #endif { size_t m = 1000000; char *input = malloc (m + 1); if (input) { char *ptr; double result; input[0] = '-'; input[1] = '0'; input[2] = 'e'; input[3] = '1'; memset (input + 4, '0', m - 3); input[m] = '\0'; errno = 0; result = strtod (input, &ptr); ASSERT (result == 0.0); ASSERT (!!signbit (result) == !!signbit (-zero)); /* IRIX 6.5, OSF/1 4.0 */ ASSERT (ptr == input + m); ASSERT (errno == 0); } free (input); } /* Rounding. */ /* TODO - is it worth some tests of rounding for typical IEEE corner cases, such as .5 ULP rounding up to the smallest denormal and not causing underflow, or DBL_MIN - .5 ULP not causing an infinite loop? */ return status; }
/** * Op throws any object toss_item. * @param op Living thing throwing something. * @param toss_item Item thrown. * @param dir Direction to throw. */ void do_throw(object *op, object *toss_item, int dir) { object *left_cont, *throw_ob = toss_item, *left = NULL, *tmp_op; tag_t left_tag; rv_vector range_vector; if (!throw_ob) { if (op->type == PLAYER) { new_draw_info(NDI_UNIQUE, op, "You have nothing to throw."); } return; } if (QUERY_FLAG(throw_ob, FLAG_STARTEQUIP)) { if (op->type == PLAYER) { new_draw_info(NDI_UNIQUE, op, "The gods won't let you throw that."); } return; } if (throw_ob->weight <= 0) { new_draw_info_format(NDI_UNIQUE, op, "You can't throw %s.\n", query_base_name(throw_ob, NULL)); return; } /* These are throwing objects left to the player */ left = throw_ob; left_cont = left->env; left_tag = left->count; /* Sometimes get_split_ob can't split an object (because op->nrof==0?) * and returns NULL. We must use 'left' then */ if ((throw_ob = get_split_ob(throw_ob, 1, NULL, 0)) == NULL) { throw_ob = left; remove_ob(left); check_walk_off(left, NULL, MOVE_APPLY_VANISHED); if (op->type == PLAYER) { esrv_del_item(CONTR(op), left->count, left->env); } } else if (op->type == PLAYER) { if (was_destroyed(left, left_tag)) { esrv_del_item(CONTR(op), left_tag, left_cont); } else { esrv_update_item(UPD_NROF, op, left); } } /* Special case: throwing powdery substances like dust, dirt */ if (QUERY_FLAG(throw_ob, FLAG_DUST)) { cast_dust(op, throw_ob, dir); /* update the shooting speed for the player action timer. * We init the used skill with it - its not calculated here. * cast_dust() can change the used skill... */ if (op->type == PLAYER) { op->chosen_skill->stats.maxsp = throw_ob->last_grace; } return; } /* Targetting throwing */ if (!dir && op->type == PLAYER && OBJECT_VALID(CONTR(op)->target_object, CONTR(op)->target_object_count)) { dir = get_dir_to_target(op, CONTR(op)->target_object, &range_vector); } /* Three things here prevent a throw, you aimed at your feet, you * have no effective throwing strength, or you threw at a wall */ if (!dir || wall(op->map, op->x + freearr_x[dir], op->y + freearr_y[dir])) { /* Bounces off 'wall', and drops to feet */ if (!QUERY_FLAG(throw_ob, FLAG_REMOVED)) { remove_ob(throw_ob); if (check_walk_off(throw_ob, NULL, MOVE_APPLY_MOVE) != CHECK_WALK_OK) { return; } } throw_ob->x = op->x; throw_ob->y = op->y; if (!insert_ob_in_map(throw_ob, op->map, op, 0)) { return; } if (op->type == PLAYER) { if (!dir) { new_draw_info_format(NDI_UNIQUE, op, "You drop %s at the ground.", query_name(throw_ob, NULL)); } else { new_draw_info(NDI_UNIQUE, op, "Something is in the way."); } } return; } set_owner(throw_ob, op); set_owner(throw_ob->inv, op); throw_ob->direction = dir; throw_ob->x = op->x; throw_ob->y = op->y; /* Save original wc and dam */ throw_ob->last_heal = throw_ob->stats.wc; throw_ob->stats.hp = throw_ob->stats.dam; /* Speed */ throw_ob->speed = MIN(1.0f, (speed_bonus[op->stats.Str] + 1.0f) / 1.5f); /* Now we get the wc from the used skill. */ if ((tmp_op = SK_skill(op))) { throw_ob->stats.wc += tmp_op->last_heal; } /* Monsters */ else { throw_ob->stats.wc += 10; } throw_ob->stats.wc_range = op->stats.wc_range; if (QUERY_FLAG(throw_ob, FLAG_IS_THROWN)) { throw_ob->stats.dam += throw_ob->magic; throw_ob->stats.wc += throw_ob->magic; /* Adjust for players */ if (op->type == PLAYER) { op->chosen_skill->stats.maxsp = throw_ob->last_grace; throw_ob->stats.dam = FABS((int) ((float) (throw_ob->stats.dam + dam_bonus[op->stats.Str] / 2) * LEVEL_DAMAGE(SK_level(op)))); throw_ob->stats.wc += thaco_bonus[op->stats.Dex] + SK_level(op); } else { throw_ob->stats.dam = FABS((int) ((float) (throw_ob->stats.dam) * LEVEL_DAMAGE(op->level))); throw_ob->stats.wc += 10 + op->level; } throw_ob->stats.grace = throw_ob->last_sp; throw_ob->stats.maxgrace = 60 + (RANDOM() % 12); /* Only throw objects get directional faces */ if (GET_ANIM_ID(throw_ob) && NUM_ANIMATIONS(throw_ob)) { SET_ANIMATION(throw_ob, (NUM_ANIMATIONS(throw_ob) / NUM_FACINGS(throw_ob)) * dir); } /* Adjust damage with item condition */ throw_ob->stats.dam = (sint16) (((float) throw_ob->stats.dam / 100.0f) * (float) throw_ob->item_condition); } if (throw_ob->stats.dam < 0) { throw_ob->stats.dam = 0; } update_ob_speed(throw_ob); throw_ob->speed_left = 0; SET_MULTI_FLAG(throw_ob, FLAG_FLYING); SET_FLAG(throw_ob, FLAG_FLY_ON); SET_FLAG(throw_ob, FLAG_WALK_ON); play_sound_map(op->map, CMD_SOUND_EFFECT, "throw.ogg", op->x, op->y, 0, 0); /* Trigger the THROW event */ trigger_event(EVENT_THROW, op, throw_ob, NULL, NULL, 0, 0, 0, SCRIPT_FIX_ACTIVATOR); if (insert_ob_in_map(throw_ob, op->map, op, 0)) { move_arrow(throw_ob); } }
/* Given a parameter vector p made up of the 3D coordinates of n points, compute in * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. * The jacobian is approximated with the aid of finite differences and is returned in the order * (B_11, ..., B_1m, ..., B_n1, ..., B_nm), where B_ij=dx_ij/db_i (see HZ). * Notice that depending on idxij, some of the B_ij might be missing * * Problem-specific information is assumed to be stored in a structure pointed to by "dat". * * NOTE: This function is provided mainly for illustration purposes; in case that execution time is a concern, * the jacobian should be computed analytically */ static void sba_str_Qs_fdjac( double *p, /* I: current parameter estimate, (n*pnp)x1 */ struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ double *jac, /* O: array for storing the approximated jacobian */ void *dat) /* I: points to a "wrap_str_data_" structure */ { register int i, j, ii, jj; double *pbi; register double *pB; //int m; int n, nnz, Bsz; double tmp; register double d, d1; struct wrap_str_data_ *fdjd; void (*proj)(int j, int i, double *bi, double *xij, void *adata); double *hxij, *hxxij; int pnp, mnp; void *adata; /* retrieve problem-specific information passed in *dat */ fdjd=(struct wrap_str_data_ *)dat; proj=fdjd->proj; pnp=fdjd->pnp; mnp=fdjd->mnp; adata=fdjd->adata; n=idxij->nr; //m=idxij->nc; Bsz=mnp*pnp; /* allocate memory for hxij, hxxij */ if((hxij=malloc(2*mnp*sizeof(double)))==NULL){ fprintf(stderr, "memory allocation request failed in sba_str_Qs_fdjac()!\n"); exit(1); } hxxij=hxij+mnp; /* compute B_ij */ for(i=0; i<n; ++i){ pbi=p+i*pnp; // i-th point parameters nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ for(jj=0; jj<pnp; ++jj){ /* determine d=max(SBA_DELTA_SCALE*|pbi[jj]|, SBA_MIN_DELTA), see HZ */ d=(double)(SBA_DELTA_SCALE)*pbi[jj]; // force evaluation d=FABS(d); if(d<SBA_MIN_DELTA) d=SBA_MIN_DELTA; d1=1.0/d; /* invert so that divisions can be carried out faster as multiplications */ for(j=0; j<nnz; ++j){ (*proj)(rcsubs[j], i, pbi, hxij, adata); // evaluate supplied function on current solution tmp=pbi[jj]; pbi[jj]+=d; (*proj)(rcsubs[j], i, pbi, hxxij, adata); pbi[jj]=tmp; /* restore */ pB=jac + idxij->val[rcidxs[j]]*Bsz; // set pB to point to B_ij for(ii=0; ii<mnp; ++ii) pB[ii*pnp+jj]=(hxxij[ii]-hxij[ii])*d1; } } } free(hxij); }
/** * Process objects with speed, like teleporters, players, etc. */ void process_events (void) { object *op; tag_t tag; /* Put marker object at beginning of active list */ marker.active_next = active_objects; if (marker.active_next) { marker.active_next->active_prev = ▮ } marker.active_prev = NULL; active_objects = ▮ while (marker.active_next) { op = marker.active_next; tag = op->count; /* Move marker forward - swap op and marker */ op->active_prev = marker.active_prev; if (op->active_prev) { op->active_prev->active_next = op; } else { active_objects = op; } marker.active_next = op->active_next; if (marker.active_next) { marker.active_next->active_prev = ▮ } marker.active_prev = op; op->active_next = ▮ /* Now process op */ if (unlikely(OBJECT_FREE(op))) { LOG(ERROR, "Free object on active list"); op->speed = 0; object_update_speed(op); continue; } if (unlikely(QUERY_FLAG(op, FLAG_REMOVED))) { /* * This is not actually an error; object_remove() doesn't remove * active objects from the active list, since the two most common * next steps are to either: re-insert the object elsewhere (for * which we would have to re-add it to the active list), or destroy * the object altogether (which does remove it from the active * list). * * For now, just drop a DEVEL message about this case, so we can * get a better idea of the objects that rely on this behavior. */ LOG(DEVEL, "Removed object on active list: %s", object_get_str(op)); op->speed = 0; object_update_speed(op); continue; } if (unlikely(DBL_EQUAL(op->speed, 0.0))) { LOG(ERROR, "Object has no speed, but is on active list: %s", object_get_str(op)); object_update_speed(op); continue; } if (unlikely(op->map == NULL && op->env == NULL)) { LOG(ERROR, "Object without map or inventory is on active list: %s", object_get_str(op)); op->speed = 0; object_update_speed(op); continue; } /* As long we are > 0, we are not ready to swing. */ if (op->weapon_speed_left > 0) { op->weapon_speed_left -= op->weapon_speed; } if (op->speed_left <= 0) { op->speed_left += FABS(op->speed); } if (op->type == PLAYER && op->speed_left > op->speed) { op->speed_left = op->speed; } if (op->speed_left >= 0 || op->type == PLAYER) { if (op->type != PLAYER) { --op->speed_left; } object_process(op); if (OBJECT_DESTROYED(op, tag)) { continue; } } if (op->anim_flags & ANIM_FLAG_STOP_MOVING) { op->anim_flags &= ~(ANIM_FLAG_MOVING | ANIM_FLAG_STOP_MOVING); } if (op->anim_flags & ANIM_FLAG_STOP_ATTACKING) { if (op->enemy == NULL || !attack_is_melee_range(op, op->enemy)) { op->anim_flags &= ~ANIM_FLAG_ATTACKING; } op->anim_flags &= ~ANIM_FLAG_STOP_ATTACKING; } /* Handle archetype-field anim_speed differently when it comes to * the animation. If we have a value on this we don't animate it * at speed-events. */ if (QUERY_FLAG(op, FLAG_ANIMATE)) { if (op->last_anim >= op->anim_speed) { animate_object(op); op->last_anim = 1; if (op->anim_flags & ANIM_FLAG_ATTACKING) { op->anim_flags |= ANIM_FLAG_STOP_ATTACKING; } if (op->anim_flags & ANIM_FLAG_MOVING) { if ((op->anim_flags & ANIM_FLAG_ATTACKING && !(op->anim_flags & ANIM_FLAG_STOP_ATTACKING)) || op->type == PLAYER || !OBJECT_VALID(op->enemy, op->enemy_count)) { op->anim_flags |= ANIM_FLAG_STOP_MOVING; } } } else { op->last_anim++; } } } /* Remove marker object from active list */ if (marker.active_prev) { marker.active_prev->active_next = NULL; } else { active_objects = NULL; } }
static double evaluateGAMMA_FLEX(int *wptr, double *x1_start, double *x2_start, double *tipVector, unsigned char *tipX1, const int n, double *diagptable, const int states) { double sum = 0.0, term, *x1, *x2; int i, j, k; /* span is the offset within the likelihood array at an inner node that gets us from the values of site i to the values of site i + 1 */ const int span = states * 4; /* we distingusih between two cases here: one node of the two nodes defining the branch at which we put the virtual root is a tip. Both nodes can not be tips because we do not allow for two-taxon trees ;-) Nota that, if a node is a tip, this will always be tipX1. This is done for code simplicity and the flipping of the nodes is done before when we compute the traversal descriptor. */ /* the left node is a tip */ if(tipX1) { /* loop over the sites of this partition */ for (i = 0; i < n; i++) { /* access pre-computed tip vector values via a lookup table */ x1 = &(tipVector[states * tipX1[i]]); /* access the other(inner) node at the other end of the branch */ x2 = &(x2_start[span * i]); /* loop over GAMMA rate categories, hard-coded as 4 in RAxML */ for(j = 0, term = 0.0; j < 4; j++) /* loop over states and multiply them with the P matrix */ for(k = 0; k < states; k++) term += x1[k] * x2[j * states + k] * diagptable[j * states + k]; /* take the log of the likelihood and multiply the per-gamma rate likelihood by 1/4. Under the GAMMA model the 4 discrete GAMMA rates all have the same probability of 0.25 */ term = LOG(0.25 * FABS(term)); sum += wptr[i] * term; } } else { for (i = 0; i < n; i++) { /* same as before, only that now we access two inner likelihood vectors x1 and x2 */ x1 = &(x1_start[span * i]); x2 = &(x2_start[span * i]); for(j = 0, term = 0.0; j < 4; j++) for(k = 0; k < states; k++) term += x1[j * states + k] * x2[j * states + k] * diagptable[j * states + k]; term = LOG(0.25 * FABS(term)); sum += wptr[i] * term; } } return sum; }
/* Similar to the LEVMAR_LEC_DER() function above, except that the jacobian is approximated * with the aid of finite differences (forward or central, see the comment for the opts argument) */ int LEVMAR_LEC_DIF( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */ LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */ LM_REAL *x, /* I: measurement vector */ int m, /* I: parameter vector dimension (i.e. #unknowns) */ int n, /* I: measurement vector dimension */ LM_REAL *A, /* I: constraints matrix, kxm */ LM_REAL *b, /* I: right hand constraints vector, kx1 */ int k, /* I: number of contraints (i.e. A's #rows) */ int itmax, /* I: maximum number of iterations */ LM_REAL opts[5], /* I: opts[0-3] = minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \delta]. Respectively the * scale factor for initial \mu, stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2 and * the step used in difference approximation to the jacobian. Set to NULL for defaults to be used. * If \delta<0, the jacobian is approximated with central differences which are more accurate * (but slower!) compared to the forward differences employed by default. */ LM_REAL info[LM_INFO_SZ], /* O: information regarding the minimization. Set to NULL if don't care * info[0]= ||e||_2 at initial p. * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. * info[5]= # iterations, * info[6]=reason for terminating: 1 - stopped by small gradient J^T e * 2 - stopped by small Dp * 3 - stopped by itmax * 4 - singular matrix. Restart from current p with increased mu * 5 - no further error reduction is possible. Restart with increased mu * 6 - stopped by small ||e||_2 * info[7]= # function evaluations * info[8]= # jacobian evaluations */ LM_REAL *work, /* working memory, allocate if NULL */ LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */ void *adata) /* pointer to possibly additional data, passed uninterpreted to func. * Set to NULL if not needed */ { struct LMLEC_DATA data; LM_REAL *ptr, *Z, *pp, *p0, *Zimm; /* Z is mxmm */ int mm, ret; register int i, j; register LM_REAL tmp; LM_REAL locinfo[LM_INFO_SZ]; mm=m-k; ptr=(LM_REAL *)malloc((2*m + m*mm + mm)*sizeof(LM_REAL)); if(!ptr){ fprintf(stderr, LCAT(LEVMAR_LEC_DIF, "(): memory allocation request failed\n")); exit(1); } data.p=p; p0=ptr; data.c=p0+m; data.Z=Z=data.c+m; data.jac=NULL; pp=data.Z+m*mm; data.ncnstr=k; data.func=func; data.jacf=NULL; data.adata=adata; LMLEC_ELIM(A, b, data.c, NULL, Z, k, m); // compute c, Z /* compute pp s.t. p = c + Z*pp or (Z^T Z)*pp=Z^T*(p-c) * Due to orthogonality, Z^T Z = I and the last equation * becomes pp=Z^T*(p-c). Also, save the starting p in p0 */ for(i=0; i<m; ++i){ p0[i]=p[i]; p[i]-=data.c[i]; } /* Z^T*(p-c) */ for(i=0; i<mm; ++i){ for(j=0, tmp=0.0; j<m; ++j) tmp+=Z[j*mm+i]*p[j]; pp[i]=tmp; } /* compute the p corresponding to pp (i.e. c + Z*pp) and compare with p0 */ for(i=0; i<m; ++i){ Zimm=Z+i*mm; for(j=0, tmp=data.c[i]; j<mm; ++j) tmp+=Zimm[j]*pp[j]; // tmp+=Z[i*mm+j]*pp[j]; if(FABS(tmp-p0[i])>CNST(1E-03)) fprintf(stderr, RCAT("Warning: component %d of starting point not feasible in ", LEVMAR_LEC_DIF) "()! [%.10g reset to %.10g]\n", i, p0[i], tmp); } if(!info) info=locinfo; /* make sure that LEVMAR_DIF() is called with non-null info */ /* note that covariance computation is not requested from LEVMAR_DIF() */ ret=LEVMAR_DIF(LMLEC_FUNC, pp, x, mm, n, itmax, opts, info, work, NULL, (void *)&data); /* p=c + Z*pp */ for(i=0; i<m; ++i){ Zimm=Z+i*mm; for(j=0, tmp=data.c[i]; j<mm; ++j) tmp+=Zimm[j]*pp[j]; // tmp+=Z[i*mm+j]*pp[j]; p[i]=tmp; } /* compute the jacobian with finite differences and use it to estimate the covariance */ if(covar){ LM_REAL *hx, *wrk, *jac; hx=(LM_REAL *)malloc((2*n+n*m)*sizeof(LM_REAL)); if(!work){ fprintf(stderr, LCAT(LEVMAR_LEC_DIF, "(): memory allocation request failed\n")); exit(1); } wrk=hx+n; jac=wrk+n; (*func)(p, hx, m, n, adata); /* evaluate function at p */ FDIF_FORW_JAC_APPROX(func, p, hx, wrk, (LM_REAL)LM_DIFF_DELTA, jac, m, n, adata); /* compute the jacobian at p */ TRANS_MAT_MAT_MULT(jac, covar, n, m, __BLOCKSZ__); /* covar = J^T J */ LEVMAR_COVAR(covar, covar, info[1], m, n); free(hx); } free(ptr); return ret; }
static double evaluateGTRGAMMAPROT_GAPPED_SAVE (int *wptr, double *x1, double *x2, double *tipVector, unsigned char *tipX1, int n, double *diagptable, double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap) { double sum = 0.0, term; int i, j, l; double *left, *right, *x1_ptr = x1, *x2_ptr = x2, *x1v, *x2v; if(tipX1) { for (i = 0; i < n; i++) { if(x2_gap[i / 32] & mask32[i % 32]) x2v = x2_gapColumn; else { x2v = x2_ptr; x2_ptr += 80; } __m128d tv = _mm_setzero_pd(); left = &(tipVector[20 * tipX1[i]]); for(j = 0, term = 0.0; j < 4; j++) { double *d = &diagptable[j * 20]; right = &(x2v[20 * j]); for(l = 0; l < 20; l+=2) { __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l])); tv = _mm_add_pd(tv, _mm_mul_pd(mul, _mm_load_pd(&d[l]))); } } tv = _mm_hadd_pd(tv, tv); _mm_storel_pd(&term, tv); term = LOG(0.25 * FABS(term)); sum += wptr[i] * term; } } else { for (i = 0; i < n; i++) { if(x1_gap[i / 32] & mask32[i % 32]) x1v = x1_gapColumn; else { x1v = x1_ptr; x1_ptr += 80; } if(x2_gap[i / 32] & mask32[i % 32]) x2v = x2_gapColumn; else { x2v = x2_ptr; x2_ptr += 80; } __m128d tv = _mm_setzero_pd(); for(j = 0, term = 0.0; j < 4; j++) { double *d = &diagptable[j * 20]; left = &(x1v[20 * j]); right = &(x2v[20 * j]); for(l = 0; l < 20; l+=2) { __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l])); tv = _mm_add_pd(tv, _mm_mul_pd(mul, _mm_load_pd(&d[l]))); } } tv = _mm_hadd_pd(tv, tv); _mm_storel_pd(&term, tv); term = LOG(0.25 * FABS(term)); sum += wptr[i] * term; } } return sum; }
/* * This function implements an elimination strategy for linearly constrained * optimization problems. The strategy relies on QR decomposition to transform * an optimization problem constrained by Ax=b to an equivalent, unconstrained * one. Also referred to as "null space" or "reduced Hessian" method. * See pp. 430-433 (chap. 15) of "Numerical Optimization" by Nocedal-Wright * for details. * * A is mxn with m<=n and rank(A)=m * Two matrices Y and Z of dimensions nxm and nx(n-m) are computed from A^T so that * their columns are orthonormal and every x can be written as x=Y*b + Z*x_z= * c + Z*x_z, where c=Y*b is a fixed vector of dimension n and x_z is an * arbitrary vector of dimension n-m. Then, the problem of minimizing f(x) * subject to Ax=b is equivalent to minimizing f(c + Z*x_z) with no constraints. * The computed Y and Z are such that any solution of Ax=b can be written as * x=Y*x_y + Z*x_z for some x_y, x_z. Furthermore, A*Y is nonsingular, A*Z=0 * and Z spans the null space of A. * * The function accepts A, b and computes c, Y, Z. If b or c is NULL, c is not * computed. Also, Y can be NULL in which case it is not referenced. * The function returns 0 in case of error, A's computed rank if successfull * */ static int LMLEC_ELIM(LM_REAL *A, LM_REAL *b, LM_REAL *c, LM_REAL *Y, LM_REAL *Z, int m, int n) { static LM_REAL eps=CNST(-1.0); LM_REAL *buf=NULL; LM_REAL *a, *tau, *work, *r; register LM_REAL tmp; int a_sz, jpvt_sz, tau_sz, r_sz, Y_sz, worksz; int info, rank, *jpvt, tot_sz, mintmn, tm, tn; register int i, j, k; if(m>n){ fprintf(stderr, RCAT("matrix of constraints cannot have more rows than columns in", LMLEC_ELIM) "()!\n"); exit(1); } tm=n; tn=m; // transpose dimensions mintmn=m; /* calculate required memory size */ a_sz=tm*tm; // tm*tn is enough for xgeqp3() jpvt_sz=tn; tau_sz=mintmn; r_sz=mintmn*mintmn; // actually smaller if a is not of full row rank worksz=2*tn+(tn+1)*32; // more than needed Y_sz=(Y)? 0 : tm*tn; tot_sz=jpvt_sz*sizeof(int) + (a_sz + tau_sz + r_sz + worksz + Y_sz)*sizeof(LM_REAL); buf=(LM_REAL *)malloc(tot_sz); /* allocate a "big" memory chunk at once */ if(!buf){ fprintf(stderr, RCAT("Memory allocation request failed in ", LMLEC_ELIM) "()\n"); exit(1); } a=(LM_REAL *)buf; jpvt=(int *)(a+a_sz); tau=(LM_REAL *)(jpvt + jpvt_sz); r=tau+tau_sz; work=r+r_sz; if(!Y) Y=work+worksz; /* copy input array so that LAPACK won't destroy it. Note that copying is * done in row-major order, which equals A^T in column-major */ for(i=0; i<tm*tn; ++i) a[i]=A[i]; /* clear jpvt */ for(i=0; i<jpvt_sz; ++i) jpvt[i]=0; /* rank revealing QR decomposition of A^T*/ GEQP3((int *)&tm, (int *)&tn, a, (int *)&tm, jpvt, tau, work, (int *)&worksz, &info); //dgeqpf_((int *)&tm, (int *)&tn, a, (int *)&tm, jpvt, tau, work, &info); /* error checking */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQP3) " in ", LMLEC_ELIM) "()\n", -info); exit(1); } else if(info>0){ fprintf(stderr, RCAT(RCAT("unknown LAPACK error (%d) for ", GEQP3) " in ", LMLEC_ELIM) "()\n", info); free(buf); return 0; } } /* the upper triangular part of a now contains the upper triangle of the unpermuted R */ if(eps<0.0){ LM_REAL aux; /* compute machine epsilon. DBL_EPSILON should do also */ for(eps=CNST(1.0); aux=eps+CNST(1.0), aux-CNST(1.0)>0.0; eps*=CNST(0.5)) ; eps*=CNST(2.0); } tmp=tm*CNST(10.0)*eps*FABS(a[0]); // threshold. tm is max(tm, tn) tmp=(tmp>CNST(1E-12))? tmp : CNST(1E-12); // ensure that threshold is not too small /* compute A^T's numerical rank by counting the non-zeros in R's diagonal */ for(i=rank=0; i<mintmn; ++i) if(a[i*(tm+1)]>tmp || a[i*(tm+1)]<-tmp) ++rank; /* loop across R's diagonal elements */ else break; /* diagonal is arranged in absolute decreasing order */ if(rank<tn){ fprintf(stderr, RCAT("\nConstraints matrix in ", LMLEC_ELIM) "() is not of full row rank (i.e. %d < %d)!\n" "Make sure that you do not specify redundant or inconsistent constraints.\n\n", rank, tn); exit(1); } /* compute the permuted inverse transpose of R */ /* first, copy R from the upper triangular part of a to r. R is rank x rank */ for(j=0; j<rank; ++j){ for(i=0; i<=j; ++i) r[i+j*rank]=a[i+j*tm]; for(i=j+1; i<rank; ++i) r[i+j*rank]=0.0; // lower part is zero } /* compute the inverse */ TRTRI("U", "N", (int *)&rank, r, (int *)&rank, &info); /* error checking */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRI) " in ", LMLEC_ELIM) "()\n", -info); exit(1); } else if(info>0){ fprintf(stderr, RCAT(RCAT("A(%d, %d) is exactly zero for ", TRTRI) " (singular matrix) in ", LMLEC_ELIM) "()\n", info, info); free(buf); return 0; } } /* then, transpose r in place */ for(i=0; i<rank; ++i) for(j=i+1; j<rank; ++j){ tmp=r[i+j*rank]; k=j+i*rank; r[i+j*rank]=r[k]; r[k]=tmp; } /* finally, permute R^-T using Y as intermediate storage */ for(j=0; j<rank; ++j) for(i=0, k=jpvt[j]-1; i<rank; ++i) Y[i+k*rank]=r[i+j*rank]; for(i=0; i<rank*rank; ++i) // copy back to r r[i]=Y[i]; /* resize a to be tm x tm, filling with zeroes */ for(i=tm*tn; i<tm*tm; ++i) a[i]=0.0; /* compute Q in a as the product of elementary reflectors. Q is tm x tm */ ORGQR((int *)&tm, (int *)&tm, (int *)&mintmn, a, (int *)&tm, tau, work, &worksz, &info); /* error checking */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", ORGQR) " in ", LMLEC_ELIM) "()\n", -info); exit(1); } else if(info>0){ fprintf(stderr, RCAT(RCAT("unknown LAPACK error (%d) for ", ORGQR) " in ", LMLEC_ELIM) "()\n", info); free(buf); return 0; } } /* compute Y=Q_1*R^-T*P^T. Y is tm x rank */ for(i=0; i<tm; ++i) for(j=0; j<rank; ++j){ for(k=0, tmp=0.0; k<rank; ++k) tmp+=a[i+k*tm]*r[k+j*rank]; Y[i*rank+j]=tmp; } if(b && c){ /* compute c=Y*b */ for(i=0; i<tm; ++i){ for(j=0, tmp=0.0; j<rank; ++j) tmp+=Y[i*rank+j]*b[j]; c[i]=tmp; } } /* copy Q_2 into Z. Z is tm x (tm-rank) */ for(j=0; j<tm-rank; ++j) for(i=0, k=j+rank; i<tm; ++i) Z[i*(tm-rank)+j]=a[i+k*tm]; free(buf); return rank; }
static double evaluateGTRCATPROT (int *cptr, int *wptr, double *x1, double *x2, double *tipVector, unsigned char *tipX1, int n, double *diagptable_start) { double sum = 0.0, term; double *diagptable, *left, *right; int i, l; if(tipX1) { for (i = 0; i < n; i++) { left = &(tipVector[20 * tipX1[i]]); right = &(x2[20 * i]); diagptable = &diagptable_start[20 * cptr[i]]; __m128d tv = _mm_setzero_pd(); for(l = 0; l < 20; l+=2) { __m128d lv = _mm_load_pd(&left[l]); __m128d rv = _mm_load_pd(&right[l]); __m128d mul = _mm_mul_pd(lv, rv); __m128d dv = _mm_load_pd(&diagptable[l]); tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv)); } tv = _mm_hadd_pd(tv, tv); _mm_storel_pd(&term, tv); term = LOG(FABS(term)); sum += wptr[i] * term; } } else { for (i = 0; i < n; i++) { left = &x1[20 * i]; right = &x2[20 * i]; diagptable = &diagptable_start[20 * cptr[i]]; __m128d tv = _mm_setzero_pd(); for(l = 0; l < 20; l+=2) { __m128d lv = _mm_load_pd(&left[l]); __m128d rv = _mm_load_pd(&right[l]); __m128d mul = _mm_mul_pd(lv, rv); __m128d dv = _mm_load_pd(&diagptable[l]); tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv)); } tv = _mm_hadd_pd(tv, tv); _mm_storel_pd(&term, tv); term = LOG(FABS(term)); sum += wptr[i] * term; } } return sum; }
int APPEND (FUNC_PREFIX, ecvt_r) (FLOAT_TYPE value, int ndigit, int *decpt, int *sign, char *buf, size_t len) { int exponent = 0; if (!ISNAN (value) && !ISINF (value) && value != 0.0) { FLOAT_TYPE (*log10_function) (FLOAT_TYPE) = &LOG10; if (log10_function) { /* Use the reasonable code if -lm is included. */ FLOAT_TYPE dexponent; dexponent = FLOOR (LOG10 (FABS (value))); value *= EXP (dexponent * -M_LN10); exponent = (int) dexponent; } else { /* Slow code that doesn't require -lm functions. */ FLOAT_TYPE d; if (value < 0.0) d = -value; else d = value; if (d < 1.0) { do { d *= 10.0; --exponent; } while (d < 1.0); } else if (d >= 10.0) { do { d *= 0.1; ++exponent; } while (d >= 10.0); } if (value < 0.0) value = -d; else value = d; } } else if (value == 0.0) /* SUSv2 leaves it unspecified whether *DECPT is 0 or 1 for 0.0. * This could be changed to -1 if we want to return 0. */ exponent = 0; if (ndigit <= 0 && len > 0) { buf[0] = '\0'; *decpt = 1; if (!ISINF (value) && !ISNAN (value)) *sign = value < 0.0; else *sign = 0; } else if (APPEND (FUNC_PREFIX, fcvt_r) (value, ndigit - 1, decpt, sign, buf, len)) return -1; *decpt += exponent; return 0; }