void RayGenerator::Generate(int level,int pw,int ph,int x,int y, Vec3q *out) const { { DASSERT(level == 3); GridSampler sampler; Vec3q right(tright), up(tup), tadd = txyz; floatq xoff(x + 0, x + 0, x + 2, x + 2); floatq yoff(y + 0, y + 0, y - 1, y - 1); for(int ty = 0; ty < 16; ty++) { floatq tposx[4] = { floatq(0.0f) + xoff, floatq(4.0f) + xoff, floatq(8.0f) + xoff, floatq(12.0f) + xoff }; floatq tposy = floatq(ty) + yoff; Vec3q tpoint = up * tposy + tadd; Vec3q points[4] = { right * tposx[0] + tpoint, right * tposx[1] + tpoint, right * tposx[2] + tpoint, right * tposx[3] + tpoint }; out[ty * 4 + 0] = points[0] * RSqrt(points[0] | points[0]); out[ty * 4 + 1] = points[1] * RSqrt(points[1] | points[1]); out[ty * 4 + 2] = points[2] * RSqrt(points[2] | points[2]); out[ty * 4 + 3] = points[3] * RSqrt(points[3] | points[3]); } return; } if(level > 2) { int npw = pw >> 1, nph = ph >> 1, nl = level - 1; Generate(nl, npw, nph, x , y , out + 0); Generate(nl, npw, nph, x + npw, y , out + (1 << nl*2)); Generate(nl, npw, nph, x , y + nph, out + (2 << nl*2)); Generate(nl, npw, nph, x + npw, y + nph, out + (3 << nl*2)); return; }
int KINSetRelErrFunc(void *kinmem, realtype relfunc) { KINMem kin_mem; realtype uround; if (kinmem == NULL) { KINProcessError(NULL, KIN_MEM_NULL, "KINSOL", "KINSetRelErrFunc", MSG_NO_MEM); return(KIN_MEM_NULL); } kin_mem = (KINMem) kinmem; if (relfunc < ZERO) { KINProcessError(NULL, KIN_ILL_INPUT, "KINSOL", "KINSetRelErrFunc", MSG_BAD_RELFUNC); return(KIN_ILL_INPUT); } if (relfunc == ZERO) { uround = kin_mem->kin_uround; kin_mem->kin_sqrt_relfunc = RSqrt(uround); } else { kin_mem->kin_sqrt_relfunc = RSqrt(relfunc); } return(KIN_SUCCESS); }
realtype N_VWrmsNormMask_NrnThread(N_Vector x, N_Vector w, N_Vector id) { long int N; N = NV_LENGTH_NT(x); retval = ZERO; xpass wpass idpass nrn_multithread_job(vwrmsnormmask); mydebug2("vwrmsnormmask %.20g\n", RSqrt(retval / N)); return(RSqrt(retval / N)); }
realtype N_VWL2Norm_NrnThread(N_Vector x, N_Vector w) { long int N; retval = ZERO; xpass wpass nrn_multithread_job(vwl2norm); N = NV_LENGTH_NT(x); mydebug2("vwl2norm %.20g\n", RSqrt(retval)); return(RSqrt(retval)); }
static int res(realtype t, N_Vector yy, N_Vector yd, N_Vector res, void *userdata) { UserData data; realtype k1, k2, k3, k4; realtype K, klA, Ks, pCO2, H; realtype y1, y2, y3, y4, y5, y6; realtype yd1, yd2, yd3, yd4, yd5; realtype r1, r2, r3, r4, r5, Fin; data = (UserData) userdata; k1 = data->k1; k2 = data->k2; k3 = data->k3; k4 = data->k4; K = data->K; klA = data->klA; Ks = data->Ks; pCO2 = data->pCO2; H = data->H; y1 = Ith(yy,1); y2 = Ith(yy,2); y3 = Ith(yy,3); y4 = Ith(yy,4); y5 = Ith(yy,5); y6 = Ith(yy,6); yd1 = Ith(yd,1); yd2 = Ith(yd,2); yd3 = Ith(yd,3); yd4 = Ith(yd,4); yd5 = Ith(yd,5); r1 = k1 * RPowerI(y1,4) * RSqrt(y2); r2 = k2 * y3 * y4; r3 = k2/K * y1 * y5; r4 = k3 * y1 * y4 * y4; r5 = k4 * y6 * y6 * RSqrt(y2); Fin = klA * ( pCO2/H - y2 ); Ith(res,1) = yd1 + TWO*r1 - r2 + r3 + r4; Ith(res,2) = yd2 + HALF*r1 + r4 + HALF*r5 - Fin; Ith(res,3) = yd3 - r1 + r2 - r3; Ith(res,4) = yd4 + r2 - r3 + TWO*r4; Ith(res,5) = yd5 - r2 + r3 - r5; Ith(res,6) = Ks*y1*y4 - y6; return(0); }
int ModifiedGS(N_Vector *v, realtype **h, int k, int p, realtype *new_vk_norm) { int i, k_minus_1, i0; realtype new_norm_2, new_product, vk_norm, temp; vk_norm = RSqrt(N_VDotProd(v[k],v[k])); k_minus_1 = k - 1; i0 = MAX(k-p, 0); /* Perform modified Gram-Schmidt */ for (i=i0; i < k; i++) { h[i][k_minus_1] = N_VDotProd(v[i], v[k]); N_VLinearSum(ONE, v[k], -h[i][k_minus_1], v[i], v[k]); } /* Compute the norm of the new vector at v[k] */ *new_vk_norm = RSqrt(N_VDotProd(v[k], v[k])); /* If the norm of the new vector at v[k] is less than FACTOR (== 1000) times unit roundoff times the norm of the input vector v[k], then the vector will be reorthogonalized in order to ensure that nonorthogonality is not being masked by a very small vector length. */ temp = FACTOR * vk_norm; if ((temp + (*new_vk_norm)) != temp) return(0); new_norm_2 = ZERO; for (i=i0; i < k; i++) { new_product = N_VDotProd(v[i], v[k]); temp = FACTOR * h[i][k_minus_1]; if ((temp + new_product) == temp) continue; h[i][k_minus_1] += new_product; N_VLinearSum(ONE, v[k],-new_product, v[i], v[k]); new_norm_2 += SQR(new_product); } if (new_norm_2 != ZERO) { new_product = SQR(*new_vk_norm) - new_norm_2; *new_vk_norm = (new_product > ZERO) ? RSqrt(new_product) : ZERO; } return(0); }
int ClassicalGS(N_Vector *v, realtype **h, int k, int p, realtype *new_vk_norm, N_Vector temp, realtype *s) { int i, k_minus_1, i0; realtype vk_norm; k_minus_1 = k - 1; /* Perform Classical Gram-Schmidt */ vk_norm = RSqrt(N_VDotProd(v[k], v[k])); i0 = MAX(k-p, 0); for (i=i0; i < k; i++) { h[i][k_minus_1] = N_VDotProd(v[i], v[k]); } for (i=i0; i < k; i++) { N_VLinearSum(ONE, v[k], -h[i][k_minus_1], v[i], v[k]); } /* Compute the norm of the new vector at v[k] */ *new_vk_norm = RSqrt(N_VDotProd(v[k], v[k])); /* Reorthogonalize if necessary */ if ((FACTOR * (*new_vk_norm)) < vk_norm) { for (i=i0; i < k; i++) { s[i] = N_VDotProd(v[i], v[k]); } if (i0 < k) { N_VScale(s[i0], v[i0], temp); h[i0][k_minus_1] += s[i0]; } for (i=i0+1; i < k; i++) { N_VLinearSum(s[i], v[i], ONE, temp, temp); h[i][k_minus_1] += s[i]; } N_VLinearSum(ONE, v[k], -ONE, temp, v[k]); *new_vk_norm = RSqrt(N_VDotProd(v[k],v[k])); } return(0); }
int IDABBDPrecReInit(void *bbd_data, long int mudq, long int mldq, realtype dq_rel_yy, IDABBDLocalFn Gres, IDABBDCommFn Gcomm) { IBBDPrecData pdata; IDAMem IDA_mem; long int Nlocal; if (bbd_data == NULL) { IDAProcessError(NULL, IDABBDPRE_PDATA_NULL, "IDABBDPRE", "IDABBDPrecReInit", MSGBBD_PDATA_NULL); return(IDABBDPRE_PDATA_NULL); } pdata =(IBBDPrecData) bbd_data; IDA_mem = (IDAMem) pdata->ida_mem; Nlocal = pdata->n_local; /* Set pointers to res_data, glocal, and gcomm; load half-bandwidths. */ pdata->mudq = MIN(Nlocal-1, MAX(0, mudq)); pdata->mldq = MIN(Nlocal-1, MAX(0, mldq)); pdata->glocal = Gres; pdata->gcomm = Gcomm; /* Set rel_yy based on input value dq_rel_yy (0 implies default). */ pdata->rel_yy = (dq_rel_yy > ZERO) ? dq_rel_yy : RSqrt(uround); /* Re-initialize nge */ pdata->nge = 0; return(IDABBDPRE_SUCCESS); }
/* * Cholesky decomposition of a symmetric positive-definite matrix * A = C^T*C: gaxpy version. * Only the lower triangle of A is accessed and it is overwritten with * the lower triangle of C. */ long int densePOTRF(realtype **a, long int m) { realtype *a_col_j, *a_col_k; realtype a_diag; long int i, j, k; for (j=0; j<m; j++) { a_col_j = a[j]; if (j>0) { for(i=j; i<m; i++) { for(k=0;k<j;k++) { a_col_k = a[k]; a_col_j[i] -= a_col_k[i]*a_col_k[j]; } } } a_diag = a_col_j[j]; if (a_diag <= ZERO) return(j+1); a_diag = RSqrt(a_diag); for(i=j; i<m; i++) a_col_j[i] /= a_diag; } return(0); }
static void KINForcingTerm(KINMem kin_mem, real fnormp) { real eta_max = POINT9, eta_min = POINTOHOHOHONE, eta_safe = 0.5, linmodel_norm; if (etaflag == ETACHOICE1) /* Choice 1 forcing terms. */ { /* compute the norm of f + J p , scaled L2 norm */ linmodel_norm = RSqrt(fnorm * fnorm + TWO * sfdotJp + sJpnorm * sJpnorm); /* Form the safeguarded Choice 1 eta. */ eta_safe = RPowerR(eta, ealpha); eta = ABS(fnormp - linmodel_norm) / fnorm; } if (etaflag == ETACHOICE2) /* Choice 2 forcing terms. */ { eta_safe = egamma * RPowerR(eta, ealpha); eta = egamma * RPowerR(fnormp / fnorm, ealpha); } /* Apply the safeguards. */ if (eta_safe < POINT1) eta_safe = ZERO; eta = MAX(eta, eta_safe); eta = MAX(eta, eta_min); eta = MIN(eta, eta_max); return; }
int CVBBDPrecReInit(void *bbd_data, long int mudq, long int mldq, realtype dqrely, CVLocalFn gloc, CVCommFn cfn) { CVBBDPrecData pdata; CVodeMem cv_mem; long int Nlocal; if ( bbd_data == NULL ) { fprintf(stderr, MSGBBDP_NO_PDATA); return(CV_PDATA_NULL); } pdata = (CVBBDPrecData) bbd_data; cv_mem = (CVodeMem) pdata->cvode_mem; /* Set pointers to gloc and cfn; load half-bandwidths */ pdata->gloc = gloc; pdata->cfn = cfn; Nlocal = pdata->n_local; pdata->mudq = MIN( Nlocal-1, MAX(0,mudq) ); pdata->mldq = MIN( Nlocal-1, MAX(0,mldq) ); /* Set pdata->dqrely based on input dqrely (0 implies default). */ pdata->dqrely = (dqrely > ZERO) ? dqrely : RSqrt(uround); /* Re-initialize nge */ pdata->nge = 0; return(CV_SUCCESS); }
static void CVDenseDQJac(long int N, DenseMat J, realtype t, N_Vector y, N_Vector fy, void *jac_data, N_Vector tmp1, N_Vector tmp2, N_Vector tmp3) { realtype fnorm, minInc, inc, inc_inv, yjsaved, srur; realtype *tmp2_data, *y_data, *ewt_data; N_Vector ftemp, jthCol; long int j; CVodeMem cv_mem; CVDenseMem cvdense_mem; /* jac_data points to cvode_mem */ cv_mem = (CVodeMem) jac_data; cvdense_mem = (CVDenseMem) lmem; /* Save pointer to the array in tmp2 */ tmp2_data = N_VGetArrayPointer(tmp2); /* Rename work vectors for readibility */ ftemp = tmp1; jthCol = tmp2; /* Obtain pointers to the data for ewt, y */ ewt_data = N_VGetArrayPointer(ewt); y_data = N_VGetArrayPointer(y); /* Set minimum increment based on uround and norm of f */ srur = RSqrt(uround); fnorm = N_VWrmsNorm(fy, ewt); minInc = (fnorm != ZERO) ? (MIN_INC_MULT * ABS(h) * uround * N * fnorm) : ONE; /* This is the only for loop for 0..N-1 in CVODE */ for (j = 0; j < N; j++) { /* Generate the jth col of J(tn,y) */ N_VSetArrayPointer(DENSE_COL(J,j), jthCol); yjsaved = y_data[j]; inc = MAX(srur*ABS(yjsaved), minInc/ewt_data[j]); y_data[j] += inc; f(tn, y, ftemp, f_data); y_data[j] = yjsaved; inc_inv = ONE/inc; N_VLinearSum(inc_inv, ftemp, -inc_inv, fy, jthCol); DENSE_COL(J,j) = N_VGetArrayPointer(jthCol); } /* Restore original array pointer in tmp2 */ N_VSetArrayPointer(tmp2_data, tmp2); /* Increment counter nfeD */ nfeD += N; }
AABB Sphere::MaximalContainedAABB() const { AABB aabb; static const float recipSqrt3 = RSqrt(3); float halfSideLength = r * recipSqrt3; aabb.SetFromCenterAndSize(pos, DIR_VEC(halfSideLength,halfSideLength,halfSideLength)); return aabb; }
realtype N_VWrmsNorm_NrnThread(N_Vector x, N_Vector w) { long int N; N = NV_LENGTH_NT(x); #if USELONGDOUBLE longdretval = ZERO; #else retval = ZERO; #endif xpass wpass nrn_multithread_job(vwrmsnorm); #if USELONGDOUBLE retval = longdretval; #endif mydebug2("vwrmsnorm %.20g\n", RSqrt(retval / N)); return(RSqrt(retval / N)); }
static void CVBandPDQJac(CVBandPrecData pdata, realtype t, N_Vector y, N_Vector fy, N_Vector ftemp, N_Vector ytemp) { CVodeMem cv_mem; realtype fnorm, minInc, inc, inc_inv, srur; long int group, i, j, width, ngroups, i1, i2; realtype *col_j, *ewt_data, *fy_data, *ftemp_data, *y_data, *ytemp_data; cv_mem = (CVodeMem) pdata->cvode_mem; /* Obtain pointers to the data for ewt, fy, ftemp, y, ytemp. */ ewt_data = N_VGetArrayPointer(ewt); fy_data = N_VGetArrayPointer(fy); ftemp_data = N_VGetArrayPointer(ftemp); y_data = N_VGetArrayPointer(y); ytemp_data = N_VGetArrayPointer(ytemp); /* Load ytemp with y = predicted y vector. */ N_VScale(ONE, y, ytemp); /* Set minimum increment based on uround and norm of f. */ srur = RSqrt(uround); fnorm = N_VWrmsNorm(fy, ewt); minInc = (fnorm != ZERO) ? (MIN_INC_MULT * ABS(h) * uround * N * fnorm) : ONE; /* Set bandwidth and number of column groups for band differencing. */ width = ml + mu + 1; ngroups = MIN(width, N); for (group = 1; group <= ngroups; group++) { /* Increment all y_j in group. */ for(j = group-1; j < N; j += width) { inc = MAX(srur*ABS(y_data[j]), minInc/ewt_data[j]); ytemp_data[j] += inc; } /* Evaluate f with incremented y. */ f(t, ytemp, ftemp, f_data); nfeBP++; /* Restore ytemp, then form and load difference quotients. */ for (j = group-1; j < N; j += width) { ytemp_data[j] = y_data[j]; col_j = BAND_COL(savedJ,j); inc = MAX(srur*ABS(y_data[j]), minInc/ewt_data[j]); inc_inv = ONE/inc; i1 = MAX(0, j-mu); i2 = MIN(j+ml, N-1); for (i=i1; i <= i2; i++) BAND_COL_ELEM(col_j,i,j) = inc_inv * (ftemp_data[i] - fy_data[i]); } } }
void Quat::ToAxisAngle(float3 &axis, float &angle) const { // Best: 36.868 nsecs / 98.752 ticks, Avg: 37.095 nsecs, Worst: 37.636 nsecs assume2(this->IsNormalized(), *this, this->Length()); float halfAngle = Acos(w); angle = halfAngle * 2.f; // Convert cos to sin via the identity sin^2 + cos^2 = 1, and fuse reciprocal and square root to the same instruction, // since we are about to divide by it. float rcpSinAngle = RSqrt(1.f - w*w); axis.x = x * rcpSinAngle; axis.y = y * rcpSinAngle; axis.z = z * rcpSinAngle; }
static void InitUserData(WebData wdata) { int i, j, ns; realtype *bcoef, *diff, *cox, *coy, dx, dy; realtype (*acoef)[NS]; acoef = wdata->acoef; bcoef = wdata->bcoef; diff = wdata->diff; cox = wdata->cox; coy = wdata->coy; ns = wdata->ns = NS; for (j = 0; j < NS; j++) { for (i = 0; i < NS; i++) acoef[i][j] = ZERO; } for (j = 0; j < NP; j++) { for (i = 0; i < NP; i++) { acoef[NP+i][j] = EE; acoef[i][NP+j] = -GG; } acoef[j][j] = -AA; acoef[NP+j][NP+j] = -AA; bcoef[j] = BB; bcoef[NP+j] = -BB; diff[j] = DPREY; diff[NP+j] = DPRED; } /* Set remaining problem parameters */ wdata->mxns = MXNS; dx = wdata->dx = DX; dy = wdata->dy = DY; for (i = 0; i < ns; i++) { cox[i] = diff[i]/SQR(dx); coy[i] = diff[i]/SQR(dy); } /* Set remaining method parameters */ wdata->mp = MP; wdata->mq = MQ; wdata->mx = MX; wdata->my = MY; wdata->srur = RSqrt(UNIT_ROUNDOFF); wdata->mxmp = MXMP; wdata->ngrp = NGRP; wdata->ngx = NGX; wdata->ngy = NGY; SetGroups(MX, NGX, wdata->jgx, wdata->jigx, wdata->jxr); SetGroups(MY, NGY, wdata->jgy, wdata->jigy, wdata->jyr); }
int denseGEQRF(realtype **a, long int m, long int n, realtype *beta, realtype *v) { realtype ajj, s, mu, v1, v1_2; realtype *col_j, *col_k; long int i, j, k; /* For each column...*/ for(j=0; j<n; j++) { col_j = a[j]; ajj = col_j[j]; /* Compute the j-th Householder vector (of length m-j) */ v[0] = ONE; s = ZERO; for(i=1; i<m-j; i++) { v[i] = col_j[i+j]; s += v[i]*v[i]; } if(s != ZERO) { mu = RSqrt(ajj*ajj+s); v1 = (ajj <= ZERO) ? ajj-mu : -s/(ajj+mu); v1_2 = v1*v1; beta[j] = TWO * v1_2 / (s + v1_2); for(i=1; i<m-j; i++) v[i] /= v1; } else { beta[j] = ZERO; } /* Update upper triangle of A (load R) */ for(k=j; k<n; k++) { col_k = a[k]; s = ZERO; for(i=0; i<m-j; i++) s += col_k[i+j]*v[i]; s *= beta[j]; for(i=0; i<m-j; i++) col_k[i+j] -= s*v[i]; } /* Update A (load Householder vector) */ if(j<m-1) { for(i=1; i<m-j; i++) col_j[i+j] = v[i]; } } return(0); }
static void CVDenseDQJac(integertype N, DenseMat J, RhsFn f, void *f_data, realtype tn, N_Vector y, N_Vector fy, N_Vector ewt, realtype h, realtype uround, void *jac_data, long int *nfePtr, N_Vector vtemp1, N_Vector vtemp2, N_Vector vtemp3) { realtype fnorm, minInc, inc, inc_inv, yjsaved, srur; realtype *y_data, *ewt_data; N_Vector ftemp, jthCol; M_Env machEnv; integertype j; machEnv = y->menv; /* Get machine environment */ ftemp = vtemp1; /* Rename work vector for use as f vector value */ /* Obtain pointers to the data for ewt, y */ ewt_data = N_VGetData(ewt); y_data = N_VGetData(y); /* Set minimum increment based on uround and norm of f */ srur = RSqrt(uround); fnorm = N_VWrmsNorm(fy, ewt); minInc = (fnorm != ZERO) ? (MIN_INC_MULT * ABS(h) * uround * N * fnorm) : ONE; jthCol = N_VMake(N, y_data, machEnv); /* j loop overwrites this data address */ /* This is the only for loop for 0..N-1 in CVODE */ for (j = 0; j < N; j++) { /* Generate the jth col of J(tn,y) */ N_VSetData(DENSE_COL(J, j), jthCol); yjsaved = y_data[j]; inc = MAX(srur * ABS(yjsaved), minInc / ewt_data[j]); y_data[j] += inc; f(N, tn, y, ftemp, f_data); inc_inv = ONE / inc; N_VLinearSum(inc_inv, ftemp, -inc_inv, fy, jthCol); y_data[j] = yjsaved; } N_VDispose(jthCol); /* Increment counter nfe = *nfePtr */ *nfePtr += N; }
realtype N_VWL2Norm_Serial(N_Vector x, N_Vector w) { long int i, N; realtype sum = ZERO, prodi, *xd, *wd; N = NV_LENGTH_S(x); xd = NV_DATA_S(x); wd = NV_DATA_S(w); for (i=0; i < N; i++) { prodi = (*xd++) * (*wd++); sum += prodi * prodi; } return(RSqrt(sum)); }
float N_VWL2Norm(N_Vector x, N_Vector w) { int i, N; float sum = ZERO, prodi, *xd, *wd; N = x->length; xd = x->data; wd = w->data; for (i=0; i < N; ++i ) { prodi = (*xd++) * (*wd++); sum += prodi * prodi; } return(RSqrt(sum)); }
static void force(N_Vector yy, realtype *Q, UserData data) { realtype a, k, c, l0, F; realtype q, x, p; realtype qd, xd, pd; realtype s1, c1, s2, c2, s21, c21; realtype l2, l, ld; realtype f, fl; a = data->a; k = data->params[0]; c = data->params[1]; l0 = data->l0; F = data->F; q = NV_Ith_S(yy,0); x = NV_Ith_S(yy,1); p = NV_Ith_S(yy,2); qd = NV_Ith_S(yy,3); xd = NV_Ith_S(yy,4); pd = NV_Ith_S(yy,5); s1 = sin(q); c1 = cos(q); s2 = sin(p); c2 = cos(p); s21 = s2*c1 - c2*s1; c21 = c2*c1 + s2*s1; l2 = x*x - x*(c2+a*c1) + (ONE + a*a)/FOUR + a*c21/TWO; l = RSqrt(l2); ld = TWO*x*xd - xd*(c2+a*c1) + x*(s2*pd+a*s1*qd) - a*s21*(pd-qd)/TWO; ld /= TWO*l; f = k*(l-l0) + c*ld; fl = f/l; Q[0] = - fl * a * (s21/TWO + x*s1) / TWO; Q[1] = fl * (c2/TWO - x + a*c1/TWO) + F; Q[2] = - fl * (x*s2 - a*s21/TWO) / TWO - F*s2; }
realtype N_VWL2Norm_Serial(N_Vector x, N_Vector w) { long int i, N; realtype sum, prodi, *xd, *wd; sum = ZERO; xd = wd = NULL; N = NV_LENGTH_S(x); xd = NV_DATA_S(x); wd = NV_DATA_S(w); for (i = 0; i < N; i++) { prodi = xd[i]*wd[i]; sum += SQR(prodi); } return(RSqrt(sum)); }
realtype N_VWrmsNormMask_Serial(N_Vector x, N_Vector w, N_Vector id) { long int i, N; realtype sum = ZERO, prodi, *xd, *wd, *idd; N = NV_LENGTH_S(x); xd = NV_DATA_S(x); wd = NV_DATA_S(w); idd = NV_DATA_S(id); for (i=0; i < N; i++) { if (idd[i] > ZERO) { prodi = xd[i] * wd[i]; sum += prodi * prodi; } } return(RSqrt(sum / N)); }
int CVBBDPrecReInit(void *cvode_mem, int mudq, int mldq, realtype dqrely) { CVodeMem cv_mem; CVSpilsMem cvspils_mem; CVBBDPrecData pdata; int Nlocal; if (cvode_mem == NULL) { CVProcessError(NULL, CVSPILS_MEM_NULL, "CVBBDPRE", "CVBBDPrecReInit", MSGBBD_MEM_NULL); return(CVSPILS_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; /* Test if one of the SPILS linear solvers has been attached */ if (cv_mem->cv_lmem == NULL) { CVProcessError(cv_mem, CVSPILS_LMEM_NULL, "CVBBDPRE", "CVBBDPrecReInit", MSGBBD_LMEM_NULL); return(CVSPILS_LMEM_NULL); } cvspils_mem = (CVSpilsMem) cv_mem->cv_lmem; /* Test if the preconditioner data is non-NULL */ if (cvspils_mem->s_P_data == NULL) { CVProcessError(cv_mem, CVSPILS_PMEM_NULL, "CVBBDPRE", "CVBBDPrecReInit", MSGBBD_PMEM_NULL); return(CVSPILS_PMEM_NULL); } pdata = (CVBBDPrecData) cvspils_mem->s_P_data; /* Load half-bandwidths */ Nlocal = pdata->n_local; pdata->mudq = MIN(Nlocal-1, MAX(0,mudq)); pdata->mldq = MIN(Nlocal-1, MAX(0,mldq)); /* Set pdata->dqrely based on input dqrely (0 implies default). */ pdata->dqrely = (dqrely > ZERO) ? dqrely : RSqrt(uround); /* Re-initialize nge */ pdata->nge = 0; return(CVSPILS_SUCCESS); }
int PVBBDPrecon(integer N, real t, N_Vector y, N_Vector fy, boole jok, boole *jcurPtr, real gamma, N_Vector ewt, real h, real uround, long int *nfePtr, void *P_data, N_Vector vtemp1, N_Vector vtemp2, N_Vector vtemp3) { integer Nlocal, ier; real rely, srur; PVBBDData pdata; pdata = (PVBBDData)P_data; Nlocal = N_VLOCLENGTH(y); if (jok) { /* If jok = TRUE, use saved copy of J */ *jcurPtr = FALSE; BandCopy(savedJ, savedP, mukeep, mlkeep); } else { /* Otherwise call PVBBDDQJac for new J value */ *jcurPtr = TRUE; BandZero(savedJ); /* Set relative increment for y via dqrely and uround */ srur = RSqrt(uround); rely = (dqrely == ZERO) ? srur : dqrely; PVBBDDQJac(Nlocal, mudq, mldq, mukeep, mlkeep, rely, gloc, cfn, savedJ, f_data, t, y, ewt, h, uround, vtemp1, vtemp2, vtemp3); nge += 1 + MIN(mldq + mudq + 1, Nlocal); BandCopy(savedJ, savedP, mukeep, mlkeep); } /* Scale and add I to get P = I - gamma*J */ BandScale(-gamma, savedP); BandAddI(savedP); /* Do LU factorization of P in place */ ier = BandFactor(savedP, pivots); /* Return 0 if the LU was complete; otherwise return 1 */ if (ier > 0) return(1); return(0); }
real N_VWL2Norm(N_Vector x, N_Vector w) { integer N, N_global; real sum = ZERO, *xd, *wd, gsum; machEnvType machenv; N = x->length; N_global = x->global_length; xd = x->data; wd = w->data; machenv = x->machEnv; #pragma omp parallel for reduction(+: sum) for (integer i=0; i < N; i++) { real prodi = xd[i] * wd[i]; sum += prodi * prodi; } gsum = PVecAllReduce(sum, 1, machenv); return(RSqrt(gsum)); }
int CPBBDPrecReInit(void *bbd_data, int mudq, int mldq, realtype dqrely, void *gloc, CPBBDCommFn cfn) { CPBBDPrecData pdata; CPodeMem cp_mem; int Nlocal; if (bbd_data == NULL) { cpProcessError(NULL, CPBBDPRE_PDATA_NULL, "CPBBDPRE", "CPBBDPrecReInit", MSGBBDP_PDATA_NULL); return(CPBBDPRE_PDATA_NULL); } pdata = (CPBBDPrecData) bbd_data; cp_mem = (CPodeMem) pdata->cpode_mem; /* Set pointers to gloc and cfn; load half-bandwidths */ switch (ode_type) { case CP_EXPL: pdata->glocE = (CPBBDLocalRhsFn) gloc; pdata->glocI = NULL; break; case CP_IMPL: pdata->glocI = (CPBBDLocalResFn) gloc; pdata->glocE = NULL; break; } pdata->cfn = cfn; Nlocal = pdata->n_local; pdata->mudq = MIN(Nlocal-1, MAX(0,mudq)); pdata->mldq = MIN(Nlocal-1, MAX(0,mldq)); /* Set pdata->dqrely based on input dqrely (0 implies default). */ pdata->dqrely = (dqrely > ZERO) ? dqrely : RSqrt(uround); /* Re-initialize nge */ pdata->nge = 0; return(CPBBDPRE_SUCCESS); }
vec Quat::Axis() const { assume2(this->IsNormalized(), *this, this->Length()); #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE) // Best: 6.145 nsecs / 16.88 ticks, Avg: 6.367 nsecs, Worst: 6.529 nsecs assume2(this->IsNormalized(), *this, this->Length()); simd4f cosAngle = _mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 3)); simd4f rcpSinAngle = rsqrt_ps(sub_ps(set1_ps(1.f), mul_ps(cosAngle, cosAngle))); simd4f a = mul_ps(q, rcpSinAngle); // Set the w component to zero. simd4f highPart = _mm_unpackhi_ps(a, zero_ps()); // [_ _ 0 z] a = _mm_movelh_ps(a, highPart); // [0 z y x] return FLOAT4_TO_DIR(a); #else // Best: 6.529 nsecs / 18.152 ticks, Avg: 6.851 nsecs, Worst: 8.065 nsecs // Convert cos to sin via the identity sin^2 + cos^2 = 1, and fuse reciprocal and square root to the same instruction, // since we are about to divide by it. float rcpSinAngle = RSqrt(1.f - w*w); return DIR_VEC(x, y, z) * rcpSinAngle; #endif }
int CVSpbcg(void *cvode_mem, int pretype, int maxl) { CVodeMem cv_mem; CVSpilsMem cvspils_mem; SpbcgMem spbcg_mem; int mxl; /* Return immediately if cvode_mem is NULL */ if (cvode_mem == NULL) { CVProcessError(NULL, CVSPILS_MEM_NULL, "CVSPBCG", "CVSpbcg", MSGS_CVMEM_NULL); return(CVSPILS_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; /* Check if N_VDotProd is present */ if (vec_tmpl->ops->nvdotprod == NULL) { CVProcessError(cv_mem, CVSPILS_ILL_INPUT, "CVSPBCG", "CVSpbcg", MSGS_BAD_NVECTOR); return(CVSPILS_ILL_INPUT); } if (lfree != NULL) lfree(cv_mem); /* Set four main function fields in cv_mem */ linit = CVSpbcgInit; lsetup = CVSpbcgSetup; lsolve = CVSpbcgSolve; lfree = CVSpbcgFree; /* Get memory for CVSpilsMemRec */ cvspils_mem = NULL; cvspils_mem = (CVSpilsMem) malloc(sizeof(CVSpilsMemRec)); if (cvspils_mem == NULL) { CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL); return(CVSPILS_MEM_FAIL); } /* Set ILS type */ cvspils_mem->s_type = SPILS_SPBCG; /* Set Spbcg parameters that have been passed in call sequence */ cvspils_mem->s_pretype = pretype; mxl = cvspils_mem->s_maxl = (maxl <= 0) ? CVSPILS_MAXL : maxl; /* Set default values for the rest of the Spbcg parameters */ cvspils_mem->s_delt = CVSPILS_DELT; cvspils_mem->s_P_data = NULL; cvspils_mem->s_pset = NULL; cvspils_mem->s_psolve = NULL; cvspils_mem->s_jtimes = CVSpilsDQJtimes; cvspils_mem->s_j_data = cvode_mem; cvspils_mem->s_last_flag = CVSPILS_SUCCESS; setupNonNull = FALSE; /* Check for legal pretype */ if ((pretype != PREC_NONE) && (pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) { CVProcessError(cv_mem, CVSPILS_ILL_INPUT, "CVSPBCG", "CVSpbcg", MSGS_BAD_PRETYPE); return(CVSPILS_ILL_INPUT); } /* Allocate memory for ytemp and x */ ytemp = NULL; ytemp = N_VClone(vec_tmpl); if (ytemp == NULL) { CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL); free(cvspils_mem); cvspils_mem = NULL; return(CVSPILS_MEM_FAIL); } x = NULL; x = N_VClone(vec_tmpl); if (x == NULL) { CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL); N_VDestroy(ytemp); free(cvspils_mem); cvspils_mem = NULL; return(CVSPILS_MEM_FAIL); } /* Compute sqrtN from a dot product */ N_VConst(ONE, ytemp); sqrtN = RSqrt(N_VDotProd(ytemp, ytemp)); /* Call SpbcgMalloc to allocate workspace for Spbcg */ spbcg_mem = NULL; spbcg_mem = SpbcgMalloc(mxl, vec_tmpl); if (spbcg_mem == NULL) { CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL); N_VDestroy(ytemp); N_VDestroy(x); free(cvspils_mem); cvspils_mem = NULL; return(CVSPILS_MEM_FAIL); } /* Attach SPBCG memory to spils memory structure */ spils_mem = (void *) spbcg_mem; /* Attach linear solver memory to integrator memory */ lmem = cvspils_mem; return(CVSPILS_SUCCESS); }