Exemplo n.º 1
0
void RayGenerator::Generate(int level,int pw,int ph,int x,int y, Vec3q *out) const {
	{
		DASSERT(level == 3);

		GridSampler sampler;
		Vec3q right(tright), up(tup), tadd = txyz;
		floatq xoff(x + 0, x + 0, x + 2, x + 2);
		floatq yoff(y + 0, y + 0, y - 1, y - 1);

		for(int ty = 0; ty < 16; ty++) {
			floatq tposx[4] = { floatq(0.0f) + xoff, floatq(4.0f) + xoff,
								floatq(8.0f) + xoff, floatq(12.0f) + xoff };
			floatq tposy = floatq(ty) + yoff;

			Vec3q tpoint = up * tposy + tadd;
			Vec3q points[4] = { right * tposx[0] + tpoint, right * tposx[1] + tpoint,
								right * tposx[2] + tpoint, right * tposx[3] + tpoint };

			out[ty * 4 + 0] = points[0] * RSqrt(points[0] | points[0]);
			out[ty * 4 + 1] = points[1] * RSqrt(points[1] | points[1]);
			out[ty * 4 + 2] = points[2] * RSqrt(points[2] | points[2]);
			out[ty * 4 + 3] = points[3] * RSqrt(points[3] | points[3]);
		}
		return;
	}

	if(level > 2) {
		int npw = pw >> 1, nph = ph >> 1, nl = level - 1;

		Generate(nl, npw, nph, x      , y      , out + 0);
		Generate(nl, npw, nph, x + npw, y      , out + (1 << nl*2));
		Generate(nl, npw, nph, x      , y + nph, out + (2 << nl*2));
		Generate(nl, npw, nph, x + npw, y + nph, out + (3 << nl*2));
		return;
	}
Exemplo n.º 2
0
int KINSetRelErrFunc(void *kinmem, realtype relfunc)
{
  KINMem kin_mem;
  realtype uround;

  if (kinmem == NULL) {
    KINProcessError(NULL, KIN_MEM_NULL, "KINSOL", "KINSetRelErrFunc", MSG_NO_MEM);
    return(KIN_MEM_NULL);
  }

  kin_mem = (KINMem) kinmem;

  if (relfunc < ZERO) {
    KINProcessError(NULL, KIN_ILL_INPUT, "KINSOL", "KINSetRelErrFunc", MSG_BAD_RELFUNC);
    return(KIN_ILL_INPUT);
  }

  if (relfunc == ZERO) {
    uround = kin_mem->kin_uround;
    kin_mem->kin_sqrt_relfunc = RSqrt(uround);
  } else {
    kin_mem->kin_sqrt_relfunc = RSqrt(relfunc);
  }

  return(KIN_SUCCESS);
}
Exemplo n.º 3
0
realtype N_VWrmsNormMask_NrnThread(N_Vector x, N_Vector w, N_Vector id)
{
  long int N;
  N  = NV_LENGTH_NT(x);
  retval = ZERO;
  xpass wpass idpass
  nrn_multithread_job(vwrmsnormmask);
mydebug2("vwrmsnormmask %.20g\n", RSqrt(retval / N));
  return(RSqrt(retval / N));
}
Exemplo n.º 4
0
realtype N_VWL2Norm_NrnThread(N_Vector x, N_Vector w)
{
  long int N;
  retval = ZERO;
  xpass wpass
  nrn_multithread_job(vwl2norm);
  N  = NV_LENGTH_NT(x);
mydebug2("vwl2norm %.20g\n", RSqrt(retval));
  return(RSqrt(retval));
}
static int res(realtype t, N_Vector yy, N_Vector yd, N_Vector res, void *userdata)
{
  UserData data;
  realtype k1, k2, k3, k4;
  realtype K, klA, Ks, pCO2, H;

  realtype y1, y2, y3, y4, y5, y6;
  realtype yd1, yd2, yd3, yd4, yd5;

  realtype r1, r2, r3, r4, r5, Fin;

  data = (UserData) userdata;
  k1 = data->k1;
  k2 = data->k2;
  k3 = data->k3;
  k4 = data->k4;
  K = data->K;
  klA = data->klA;
  Ks = data->Ks;
  pCO2 = data->pCO2;
  H = data->H;

  y1 = Ith(yy,1);
  y2 = Ith(yy,2);
  y3 = Ith(yy,3);
  y4 = Ith(yy,4);
  y5 = Ith(yy,5);
  y6 = Ith(yy,6);

  yd1 = Ith(yd,1);
  yd2 = Ith(yd,2);
  yd3 = Ith(yd,3);
  yd4 = Ith(yd,4);
  yd5 = Ith(yd,5);

  r1 = k1 * RPowerI(y1,4) * RSqrt(y2);
  r2 = k2 * y3 * y4;
  r3 = k2/K * y1 * y5;
  r4 = k3 * y1 * y4 * y4;
  r5 = k4 * y6 * y6 * RSqrt(y2);
  Fin = klA * ( pCO2/H - y2 );

  Ith(res,1) = yd1 + TWO*r1 - r2 + r3 + r4;
  Ith(res,2) = yd2 + HALF*r1 + r4 + HALF*r5 - Fin;
  Ith(res,3) = yd3 - r1 + r2 - r3;
  Ith(res,4) = yd4 + r2 - r3 + TWO*r4;
  Ith(res,5) = yd5 - r2 + r3 - r5;
  Ith(res,6) = Ks*y1*y4 - y6;

  return(0);
}
Exemplo n.º 6
0
int ModifiedGS(N_Vector *v, realtype **h, int k, int p, 
               realtype *new_vk_norm)
{
  int  i, k_minus_1, i0;
  realtype new_norm_2, new_product, vk_norm, temp;
  
  vk_norm = RSqrt(N_VDotProd(v[k],v[k]));
  k_minus_1 = k - 1;
  i0 = MAX(k-p, 0);
  
  /* Perform modified Gram-Schmidt */
  
  for (i=i0; i < k; i++) {
    h[i][k_minus_1] = N_VDotProd(v[i], v[k]);
    N_VLinearSum(ONE, v[k], -h[i][k_minus_1], v[i], v[k]);
  }

  /* Compute the norm of the new vector at v[k] */

  *new_vk_norm = RSqrt(N_VDotProd(v[k], v[k]));

  /* If the norm of the new vector at v[k] is less than
     FACTOR (== 1000) times unit roundoff times the norm of the
     input vector v[k], then the vector will be reorthogonalized
     in order to ensure that nonorthogonality is not being masked
     by a very small vector length. */

  temp = FACTOR * vk_norm;
  if ((temp + (*new_vk_norm)) != temp) return(0);
  
  new_norm_2 = ZERO;

  for (i=i0; i < k; i++) {
    new_product = N_VDotProd(v[i], v[k]);
    temp = FACTOR * h[i][k_minus_1];
    if ((temp + new_product) == temp) continue;
    h[i][k_minus_1] += new_product;
    N_VLinearSum(ONE, v[k],-new_product, v[i], v[k]);
    new_norm_2 += SQR(new_product);
  }

  if (new_norm_2 != ZERO) {
    new_product = SQR(*new_vk_norm) - new_norm_2;
    *new_vk_norm = (new_product > ZERO) ? RSqrt(new_product) : ZERO;
  }

  return(0);
}
Exemplo n.º 7
0
int ClassicalGS(N_Vector *v, realtype **h, int k, int p, 
                realtype *new_vk_norm, N_Vector temp, realtype *s)
{
  int  i, k_minus_1, i0;
  realtype vk_norm;

  k_minus_1 = k - 1;
  
  /* Perform Classical Gram-Schmidt */

  vk_norm = RSqrt(N_VDotProd(v[k], v[k]));

  i0 = MAX(k-p, 0);
  for (i=i0; i < k; i++) {
    h[i][k_minus_1] = N_VDotProd(v[i], v[k]);
  }

  for (i=i0; i < k; i++) {
    N_VLinearSum(ONE, v[k], -h[i][k_minus_1], v[i], v[k]);
  }

  /* Compute the norm of the new vector at v[k] */

  *new_vk_norm = RSqrt(N_VDotProd(v[k], v[k]));

  /* Reorthogonalize if necessary */

  if ((FACTOR * (*new_vk_norm)) < vk_norm) {

    for (i=i0; i < k; i++) {
      s[i] = N_VDotProd(v[i], v[k]);
    }

    if (i0 < k) {
      N_VScale(s[i0], v[i0], temp);
      h[i0][k_minus_1] += s[i0];
    }
    for (i=i0+1; i < k; i++) {
      N_VLinearSum(s[i], v[i], ONE, temp, temp);
      h[i][k_minus_1] += s[i];
    }
    N_VLinearSum(ONE, v[k], -ONE, temp, v[k]);

    *new_vk_norm = RSqrt(N_VDotProd(v[k],v[k]));
  }

  return(0);
}
int IDABBDPrecReInit(void *bbd_data,
		     long int mudq, long int mldq, 
		     realtype dq_rel_yy,  
		     IDABBDLocalFn Gres, IDABBDCommFn Gcomm)
{
  IBBDPrecData pdata;
  IDAMem IDA_mem;
  long int Nlocal;

  if (bbd_data == NULL) {
    IDAProcessError(NULL, IDABBDPRE_PDATA_NULL, "IDABBDPRE", "IDABBDPrecReInit", MSGBBD_PDATA_NULL);
    return(IDABBDPRE_PDATA_NULL);
  } 

  pdata =(IBBDPrecData) bbd_data;
  IDA_mem = (IDAMem) pdata->ida_mem;

  Nlocal = pdata->n_local;

  /* Set pointers to res_data, glocal, and gcomm; load half-bandwidths. */
  pdata->mudq = MIN(Nlocal-1, MAX(0, mudq));
  pdata->mldq = MIN(Nlocal-1, MAX(0, mldq));
  pdata->glocal = Gres;
  pdata->gcomm = Gcomm;

  /* Set rel_yy based on input value dq_rel_yy (0 implies default). */
  pdata->rel_yy = (dq_rel_yy > ZERO) ? dq_rel_yy : RSqrt(uround); 

  /* Re-initialize nge */
  pdata->nge = 0;

  return(IDABBDPRE_SUCCESS);
}
Exemplo n.º 9
0
/*
 * Cholesky decomposition of a symmetric positive-definite matrix
 * A = C^T*C: gaxpy version.
 * Only the lower triangle of A is accessed and it is overwritten with
 * the lower triangle of C.
 */
long int densePOTRF(realtype **a, long int m)
{
  realtype *a_col_j, *a_col_k;
  realtype a_diag;
  long int i, j, k;

  for (j=0; j<m; j++) {

    a_col_j = a[j];
   
    if (j>0) {
      for(i=j; i<m; i++) {
        for(k=0;k<j;k++) {
          a_col_k = a[k];
          a_col_j[i] -= a_col_k[i]*a_col_k[j];
        }
      }
    }

    a_diag = a_col_j[j];
    if (a_diag <= ZERO) return(j+1);
    a_diag = RSqrt(a_diag);

    for(i=j; i<m; i++) a_col_j[i] /= a_diag;
    
  }

  return(0);
}
Exemplo n.º 10
0
static void KINForcingTerm(KINMem kin_mem, real fnormp)
{
  real eta_max = POINT9, eta_min = POINTOHOHOHONE,
    eta_safe = 0.5, linmodel_norm;

  if (etaflag == ETACHOICE1)        /* Choice 1 forcing terms. */

  { /* compute the norm of f + J p , scaled L2 norm  */
    linmodel_norm = RSqrt(fnorm * fnorm + TWO * sfdotJp + sJpnorm * sJpnorm);

    /* Form the safeguarded Choice 1 eta. */

    eta_safe = RPowerR(eta, ealpha);
    eta = ABS(fnormp - linmodel_norm) / fnorm;
  }

  if (etaflag == ETACHOICE2)        /* Choice 2 forcing terms. */

  {
    eta_safe = egamma * RPowerR(eta, ealpha);
    eta = egamma * RPowerR(fnormp / fnorm, ealpha);
  }

  /* Apply the safeguards. */
  if (eta_safe < POINT1)
    eta_safe = ZERO;
  eta = MAX(eta, eta_safe);
  eta = MAX(eta, eta_min);
  eta = MIN(eta, eta_max);

  return;
}
Exemplo n.º 11
0
int CVBBDPrecReInit(void *bbd_data, 
                    long int mudq, long int mldq, 
                    realtype dqrely, 
                    CVLocalFn gloc, CVCommFn cfn)
{
  CVBBDPrecData pdata;
  CVodeMem cv_mem;
  long int Nlocal;

  if ( bbd_data == NULL ) {
    fprintf(stderr, MSGBBDP_NO_PDATA);
    return(CV_PDATA_NULL);
  } 

  pdata = (CVBBDPrecData) bbd_data;
  cv_mem = (CVodeMem) pdata->cvode_mem;

  /* Set pointers to gloc and cfn; load half-bandwidths */
  pdata->gloc = gloc;
  pdata->cfn = cfn;
  Nlocal = pdata->n_local;
  pdata->mudq = MIN( Nlocal-1, MAX(0,mudq) );
  pdata->mldq = MIN( Nlocal-1, MAX(0,mldq) );

  /* Set pdata->dqrely based on input dqrely (0 implies default). */
  pdata->dqrely = (dqrely > ZERO) ? dqrely : RSqrt(uround);

  /* Re-initialize nge */
  pdata->nge = 0;

  return(CV_SUCCESS);
}
Exemplo n.º 12
0
static void CVDenseDQJac(long int N, DenseMat J, realtype t,
                         N_Vector y, N_Vector fy, void *jac_data,
                         N_Vector tmp1, N_Vector tmp2, N_Vector tmp3)
{
    realtype fnorm, minInc, inc, inc_inv, yjsaved, srur;
    realtype *tmp2_data, *y_data, *ewt_data;
    N_Vector ftemp, jthCol;
    long int j;

    CVodeMem cv_mem;
    CVDenseMem  cvdense_mem;

    /* jac_data points to cvode_mem */
    cv_mem = (CVodeMem) jac_data;
    cvdense_mem = (CVDenseMem) lmem;

    /* Save pointer to the array in tmp2 */
    tmp2_data = N_VGetArrayPointer(tmp2);

    /* Rename work vectors for readibility */
    ftemp = tmp1;
    jthCol = tmp2;

    /* Obtain pointers to the data for ewt, y */
    ewt_data = N_VGetArrayPointer(ewt);
    y_data   = N_VGetArrayPointer(y);

    /* Set minimum increment based on uround and norm of f */
    srur = RSqrt(uround);
    fnorm = N_VWrmsNorm(fy, ewt);
    minInc = (fnorm != ZERO) ?
             (MIN_INC_MULT * ABS(h) * uround * N * fnorm) : ONE;

    /* This is the only for loop for 0..N-1 in CVODE */

    for (j = 0; j < N; j++) {

        /* Generate the jth col of J(tn,y) */

        N_VSetArrayPointer(DENSE_COL(J,j), jthCol);

        yjsaved = y_data[j];
        inc = MAX(srur*ABS(yjsaved), minInc/ewt_data[j]);
        y_data[j] += inc;
        f(tn, y, ftemp, f_data);
        y_data[j] = yjsaved;

        inc_inv = ONE/inc;
        N_VLinearSum(inc_inv, ftemp, -inc_inv, fy, jthCol);

        DENSE_COL(J,j) = N_VGetArrayPointer(jthCol);
    }

    /* Restore original array pointer in tmp2 */
    N_VSetArrayPointer(tmp2_data, tmp2);

    /* Increment counter nfeD */
    nfeD += N;
}
Exemplo n.º 13
0
AABB Sphere::MaximalContainedAABB() const
{
	AABB aabb;
	static const float recipSqrt3 = RSqrt(3);
	float halfSideLength = r * recipSqrt3;
	aabb.SetFromCenterAndSize(pos, DIR_VEC(halfSideLength,halfSideLength,halfSideLength));
	return aabb;
}
Exemplo n.º 14
0
realtype N_VWrmsNorm_NrnThread(N_Vector x, N_Vector w)
{
  long int N;
  N  = NV_LENGTH_NT(x);
#if USELONGDOUBLE
  longdretval = ZERO;
#else
  retval = ZERO;
#endif
  xpass  wpass
  nrn_multithread_job(vwrmsnorm);
#if USELONGDOUBLE
  retval = longdretval;
#endif
mydebug2("vwrmsnorm %.20g\n", RSqrt(retval / N));
  return(RSqrt(retval / N));
}
Exemplo n.º 15
0
static void CVBandPDQJac(CVBandPrecData pdata, 
                         realtype t, N_Vector y, N_Vector fy, 
                         N_Vector ftemp, N_Vector ytemp)
{
  CVodeMem cv_mem;
  realtype    fnorm, minInc, inc, inc_inv, srur;
  long int group, i, j, width, ngroups, i1, i2;
  realtype *col_j, *ewt_data, *fy_data, *ftemp_data, *y_data, *ytemp_data;

  cv_mem = (CVodeMem) pdata->cvode_mem;

  /* Obtain pointers to the data for ewt, fy, ftemp, y, ytemp. */
  ewt_data   = N_VGetArrayPointer(ewt);
  fy_data    = N_VGetArrayPointer(fy);
  ftemp_data = N_VGetArrayPointer(ftemp);
  y_data     = N_VGetArrayPointer(y);
  ytemp_data = N_VGetArrayPointer(ytemp);

  /* Load ytemp with y = predicted y vector. */
  N_VScale(ONE, y, ytemp);

  /* Set minimum increment based on uround and norm of f. */
  srur = RSqrt(uround);
  fnorm = N_VWrmsNorm(fy, ewt);
  minInc = (fnorm != ZERO) ?
           (MIN_INC_MULT * ABS(h) * uround * N * fnorm) : ONE;

  /* Set bandwidth and number of column groups for band differencing. */
  width = ml + mu + 1;
  ngroups = MIN(width, N);
  
  for (group = 1; group <= ngroups; group++) {
    
    /* Increment all y_j in group. */
    for(j = group-1; j < N; j += width) {
      inc = MAX(srur*ABS(y_data[j]), minInc/ewt_data[j]);
      ytemp_data[j] += inc;
    }

    /* Evaluate f with incremented y. */

    f(t, ytemp, ftemp, f_data);
    nfeBP++;

    /* Restore ytemp, then form and load difference quotients. */
    for (j = group-1; j < N; j += width) {
      ytemp_data[j] = y_data[j];
      col_j = BAND_COL(savedJ,j);
      inc = MAX(srur*ABS(y_data[j]), minInc/ewt_data[j]);
      inc_inv = ONE/inc;
      i1 = MAX(0, j-mu);
      i2 = MIN(j+ml, N-1);
      for (i=i1; i <= i2; i++)
        BAND_COL_ELEM(col_j,i,j) =
          inc_inv * (ftemp_data[i] - fy_data[i]);
    }
  }
}
Exemplo n.º 16
0
void Quat::ToAxisAngle(float3 &axis, float &angle) const
{
	// Best: 36.868 nsecs / 98.752 ticks, Avg: 37.095 nsecs, Worst: 37.636 nsecs
	assume2(this->IsNormalized(), *this, this->Length());
	float halfAngle = Acos(w);
	angle = halfAngle * 2.f;
	// Convert cos to sin via the identity sin^2 + cos^2 = 1, and fuse reciprocal and square root to the same instruction,
	// since we are about to divide by it.
	float rcpSinAngle = RSqrt(1.f - w*w);
	axis.x = x * rcpSinAngle;
	axis.y = y * rcpSinAngle;
	axis.z = z * rcpSinAngle;
}
Exemplo n.º 17
0
static void InitUserData(WebData wdata)
{
  int i, j, ns;
  realtype *bcoef, *diff, *cox, *coy, dx, dy;
  realtype (*acoef)[NS];

  acoef = wdata->acoef;
  bcoef = wdata->bcoef;
  diff = wdata->diff;
  cox = wdata->cox;
  coy = wdata->coy;
  ns = wdata->ns = NS;

  for (j = 0; j < NS; j++) { for (i = 0; i < NS; i++) acoef[i][j] = ZERO; }
  for (j = 0; j < NP; j++) {
    for (i = 0; i < NP; i++) {
      acoef[NP+i][j] = EE;
      acoef[i][NP+j] = -GG;
    }
    acoef[j][j] = -AA;
    acoef[NP+j][NP+j] = -AA;
    bcoef[j] = BB;
    bcoef[NP+j] = -BB;
    diff[j] = DPREY;
    diff[NP+j] = DPRED;
  }

  /* Set remaining problem parameters */

  wdata->mxns = MXNS;
  dx = wdata->dx = DX;
  dy = wdata->dy = DY;
  for (i = 0; i < ns; i++) {
    cox[i] = diff[i]/SQR(dx);
    coy[i] = diff[i]/SQR(dy);
  }

  /* Set remaining method parameters */

  wdata->mp = MP;
  wdata->mq = MQ;
  wdata->mx = MX;
  wdata->my = MY;
  wdata->srur = RSqrt(UNIT_ROUNDOFF);
  wdata->mxmp = MXMP;
  wdata->ngrp = NGRP;
  wdata->ngx = NGX;
  wdata->ngy = NGY;
  SetGroups(MX, NGX, wdata->jgx, wdata->jigx, wdata->jxr);
  SetGroups(MY, NGY, wdata->jgy, wdata->jigy, wdata->jyr);
}
Exemplo n.º 18
0
int denseGEQRF(realtype **a, long int m, long int n, realtype *beta, realtype *v)
{
  realtype ajj, s, mu, v1, v1_2;
  realtype *col_j, *col_k;
  long int i, j, k;

  /* For each column...*/
  for(j=0; j<n; j++) {

    col_j = a[j];

    ajj = col_j[j];
    
    /* Compute the j-th Householder vector (of length m-j) */
    v[0] = ONE;
    s = ZERO;
    for(i=1; i<m-j; i++) {
      v[i] = col_j[i+j];
      s += v[i]*v[i];
    }

    if(s != ZERO) {
      mu = RSqrt(ajj*ajj+s);
      v1 = (ajj <= ZERO) ? ajj-mu : -s/(ajj+mu);
      v1_2 = v1*v1;
      beta[j] = TWO * v1_2 / (s + v1_2);
      for(i=1; i<m-j; i++) v[i] /= v1;
    } else {
      beta[j] = ZERO;      
    }

    /* Update upper triangle of A (load R) */
    for(k=j; k<n; k++) {
      col_k = a[k];
      s = ZERO;
      for(i=0; i<m-j; i++) s += col_k[i+j]*v[i];
      s *= beta[j];
      for(i=0; i<m-j; i++) col_k[i+j] -= s*v[i];
    }

    /* Update A (load Householder vector) */
    if(j<m-1) {
      for(i=1; i<m-j; i++) col_j[i+j] = v[i];
    }

  }


  return(0);
}
Exemplo n.º 19
0
static void
CVDenseDQJac(integertype N, DenseMat J, RhsFn f, void *f_data,
			 realtype tn, N_Vector y, N_Vector fy, N_Vector ewt,
			 realtype h, realtype uround, void *jac_data,
			 long int *nfePtr, N_Vector vtemp1,
			 N_Vector vtemp2, N_Vector vtemp3)
{
	realtype fnorm, minInc, inc, inc_inv, yjsaved, srur;
	realtype *y_data, *ewt_data;
	N_Vector ftemp, jthCol;
	M_Env machEnv;
	integertype j;

	machEnv = y->menv;			/* Get machine environment */

	ftemp = vtemp1;				/* Rename work vector for use as f vector value */

	/* Obtain pointers to the data for ewt, y */
	ewt_data = N_VGetData(ewt);
	y_data = N_VGetData(y);

	/* Set minimum increment based on uround and norm of f */
	srur = RSqrt(uround);
	fnorm = N_VWrmsNorm(fy, ewt);
	minInc = (fnorm != ZERO) ?
		(MIN_INC_MULT * ABS(h) * uround * N * fnorm) : ONE;

	jthCol = N_VMake(N, y_data, machEnv);	/* j loop overwrites this data address */

	/* This is the only for loop for 0..N-1 in CVODE */
	for (j = 0; j < N; j++)
	{

		/* Generate the jth col of J(tn,y) */

		N_VSetData(DENSE_COL(J, j), jthCol);
		yjsaved = y_data[j];
		inc = MAX(srur * ABS(yjsaved), minInc / ewt_data[j]);
		y_data[j] += inc;
		f(N, tn, y, ftemp, f_data);
		inc_inv = ONE / inc;
		N_VLinearSum(inc_inv, ftemp, -inc_inv, fy, jthCol);
		y_data[j] = yjsaved;
	}

	N_VDispose(jthCol);

	/* Increment counter nfe = *nfePtr */
	*nfePtr += N;
}
Exemplo n.º 20
0
realtype N_VWL2Norm_Serial(N_Vector x, N_Vector w)
{
  long int i, N;
  realtype sum = ZERO, prodi, *xd, *wd;

  N  = NV_LENGTH_S(x);
  xd = NV_DATA_S(x);
  wd = NV_DATA_S(w);

  for (i=0; i < N; i++) {
    prodi = (*xd++) * (*wd++);
    sum += prodi * prodi;
  }

  return(RSqrt(sum));
}
Exemplo n.º 21
0
float N_VWL2Norm(N_Vector x, N_Vector w)
{
  int i, N;
  float sum = ZERO, prodi, *xd, *wd;

  N = x->length;
  xd = x->data;
  wd = w->data;

  for (i=0; i < N; ++i ) {
    prodi = (*xd++) * (*wd++);
    sum += prodi * prodi;
  }

  return(RSqrt(sum));
}
Exemplo n.º 22
0
static void force(N_Vector yy, realtype *Q, UserData data)
{
  realtype a, k, c, l0, F;
  realtype q, x, p;
  realtype qd, xd, pd;  
  realtype s1, c1, s2, c2, s21, c21;
  realtype l2, l, ld;
  realtype f, fl;

  a = data->a;
  k = data->params[0];
  c = data->params[1];
  l0 = data->l0;
  F = data->F;

  q = NV_Ith_S(yy,0);
  x = NV_Ith_S(yy,1);
  p = NV_Ith_S(yy,2);

  qd = NV_Ith_S(yy,3);
  xd = NV_Ith_S(yy,4);
  pd = NV_Ith_S(yy,5);

  s1 = sin(q);
  c1 = cos(q);
  s2 = sin(p);
  c2 = cos(p);
  s21 = s2*c1 - c2*s1;
  c21 = c2*c1 + s2*s1;

  l2 = x*x - x*(c2+a*c1) + (ONE + a*a)/FOUR + a*c21/TWO;
  l = RSqrt(l2);
  ld = TWO*x*xd - xd*(c2+a*c1) + x*(s2*pd+a*s1*qd) - a*s21*(pd-qd)/TWO;
  ld /= TWO*l;

  f = k*(l-l0) + c*ld;
  fl = f/l;

  Q[0] = - fl * a * (s21/TWO + x*s1) / TWO;
  Q[1] = fl * (c2/TWO - x + a*c1/TWO) + F;
  Q[2] = - fl * (x*s2 - a*s21/TWO) / TWO - F*s2;

}
Exemplo n.º 23
0
realtype N_VWL2Norm_Serial(N_Vector x, N_Vector w)
{
  long int i, N;
  realtype sum, prodi, *xd, *wd;

  sum = ZERO;
  xd = wd = NULL;

  N  = NV_LENGTH_S(x);
  xd = NV_DATA_S(x);
  wd = NV_DATA_S(w);

  for (i = 0; i < N; i++) {
    prodi = xd[i]*wd[i];
    sum += SQR(prodi);
  }

  return(RSqrt(sum));
}
Exemplo n.º 24
0
realtype N_VWrmsNormMask_Serial(N_Vector x, N_Vector w, N_Vector id)
{
  long int i, N;
  realtype sum = ZERO, prodi, *xd, *wd, *idd;

  N  = NV_LENGTH_S(x);
  xd  = NV_DATA_S(x);
  wd  = NV_DATA_S(w);
  idd = NV_DATA_S(id);

  for (i=0; i < N; i++) {
    if (idd[i] > ZERO) {
      prodi = xd[i] * wd[i];
      sum += prodi * prodi;
    }
  }

  return(RSqrt(sum / N));
}
Exemplo n.º 25
0
int CVBBDPrecReInit(void *cvode_mem, 
                    int mudq, int mldq, 
                    realtype dqrely)
{
  CVodeMem cv_mem;
  CVSpilsMem cvspils_mem;
  CVBBDPrecData pdata;
  int Nlocal;

  if (cvode_mem == NULL) {
    CVProcessError(NULL, CVSPILS_MEM_NULL, "CVBBDPRE", "CVBBDPrecReInit", MSGBBD_MEM_NULL);
    return(CVSPILS_MEM_NULL);
  }
  cv_mem = (CVodeMem) cvode_mem;

  /* Test if one of the SPILS linear solvers has been attached */
  if (cv_mem->cv_lmem == NULL) {
    CVProcessError(cv_mem, CVSPILS_LMEM_NULL, "CVBBDPRE", "CVBBDPrecReInit", MSGBBD_LMEM_NULL);
    return(CVSPILS_LMEM_NULL);
  }
  cvspils_mem = (CVSpilsMem) cv_mem->cv_lmem;

  /* Test if the preconditioner data is non-NULL */
  if (cvspils_mem->s_P_data == NULL) {
    CVProcessError(cv_mem, CVSPILS_PMEM_NULL, "CVBBDPRE", "CVBBDPrecReInit", MSGBBD_PMEM_NULL);
    return(CVSPILS_PMEM_NULL);
  } 
  pdata = (CVBBDPrecData) cvspils_mem->s_P_data;

  /* Load half-bandwidths */
  Nlocal = pdata->n_local;
  pdata->mudq = MIN(Nlocal-1, MAX(0,mudq));
  pdata->mldq = MIN(Nlocal-1, MAX(0,mldq));

  /* Set pdata->dqrely based on input dqrely (0 implies default). */
  pdata->dqrely = (dqrely > ZERO) ? dqrely : RSqrt(uround);

  /* Re-initialize nge */
  pdata->nge = 0;

  return(CVSPILS_SUCCESS);
}
Exemplo n.º 26
0
int PVBBDPrecon(integer N, real t, N_Vector y, N_Vector fy,
                boole jok, boole *jcurPtr, real gamma, N_Vector ewt,
                real h, real uround, long int *nfePtr, void *P_data,
                N_Vector vtemp1, N_Vector vtemp2, N_Vector vtemp3)
{
  integer Nlocal, ier;
  real rely, srur;
  PVBBDData pdata;

  pdata = (PVBBDData)P_data;

  Nlocal = N_VLOCLENGTH(y);

  if (jok) {
    /* If jok = TRUE, use saved copy of J */
    *jcurPtr = FALSE;
    BandCopy(savedJ, savedP, mukeep, mlkeep);
  } else {
    /* Otherwise call PVBBDDQJac for new J value */
    *jcurPtr = TRUE;
    BandZero(savedJ);
    /* Set relative increment for y via dqrely and uround */
    srur = RSqrt(uround);
    rely = (dqrely == ZERO) ? srur : dqrely;
    PVBBDDQJac(Nlocal, mudq, mldq, mukeep, mlkeep, rely, gloc, cfn, savedJ,
	       f_data, t, y, ewt, h, uround, vtemp1, vtemp2, vtemp3);
    nge += 1 + MIN(mldq + mudq + 1, Nlocal);
    BandCopy(savedJ, savedP, mukeep, mlkeep);
  }
  
  /* Scale and add I to get P = I - gamma*J */
  BandScale(-gamma, savedP);
  BandAddI(savedP);
 
  /* Do LU factorization of P in place */
  ier = BandFactor(savedP, pivots);
 
  /* Return 0 if the LU was complete; otherwise return 1 */
  if (ier > 0) return(1);
  return(0);
}
Exemplo n.º 27
0
real N_VWL2Norm(N_Vector x, N_Vector w)
{
  integer N, N_global;
  real sum = ZERO, *xd, *wd, gsum;
  machEnvType machenv;

  N = x->length;
  N_global = x->global_length;
  xd = x->data;
  wd = w->data;
  machenv = x->machEnv;
  
  #pragma omp parallel for reduction(+: sum)
  for (integer i=0; i < N; i++) {
    real prodi = xd[i] * wd[i];
    sum += prodi * prodi;
  }

  gsum = PVecAllReduce(sum, 1, machenv);
  return(RSqrt(gsum));
}
Exemplo n.º 28
0
int CPBBDPrecReInit(void *bbd_data, int mudq, int mldq, 
                    realtype dqrely,
                    void *gloc, CPBBDCommFn cfn)
{
  CPBBDPrecData pdata;
  CPodeMem cp_mem;
  int Nlocal;

  if (bbd_data == NULL) {
    cpProcessError(NULL, CPBBDPRE_PDATA_NULL, "CPBBDPRE", "CPBBDPrecReInit", MSGBBDP_PDATA_NULL);
    return(CPBBDPRE_PDATA_NULL);
  }
  pdata  = (CPBBDPrecData) bbd_data;
  cp_mem = (CPodeMem) pdata->cpode_mem;

  /* Set pointers to gloc and cfn; load half-bandwidths */
  switch (ode_type) {
  case CP_EXPL:
    pdata->glocE = (CPBBDLocalRhsFn) gloc;
    pdata->glocI = NULL;
    break;
  case CP_IMPL:
    pdata->glocI = (CPBBDLocalResFn) gloc;
    pdata->glocE = NULL;
    break;
  }
  pdata->cfn = cfn;
  Nlocal = pdata->n_local;
  pdata->mudq = MIN(Nlocal-1, MAX(0,mudq));
  pdata->mldq = MIN(Nlocal-1, MAX(0,mldq));

  /* Set pdata->dqrely based on input dqrely (0 implies default). */
  pdata->dqrely = (dqrely > ZERO) ? dqrely : RSqrt(uround);

  /* Re-initialize nge */
  pdata->nge = 0;

  return(CPBBDPRE_SUCCESS);
}
Exemplo n.º 29
0
vec Quat::Axis() const
{
	assume2(this->IsNormalized(), *this, this->Length());
#if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE)
	// Best: 6.145 nsecs / 16.88 ticks, Avg: 6.367 nsecs, Worst: 6.529 nsecs
	assume2(this->IsNormalized(), *this, this->Length());
	simd4f cosAngle = _mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 3));
	simd4f rcpSinAngle = rsqrt_ps(sub_ps(set1_ps(1.f), mul_ps(cosAngle, cosAngle)));
	simd4f a = mul_ps(q, rcpSinAngle);

	// Set the w component to zero.
	simd4f highPart = _mm_unpackhi_ps(a, zero_ps()); // [_ _ 0 z]
	a = _mm_movelh_ps(a, highPart); // [0 z y x]
	return FLOAT4_TO_DIR(a);
#else
	// Best: 6.529 nsecs / 18.152 ticks, Avg: 6.851 nsecs, Worst: 8.065 nsecs

	// Convert cos to sin via the identity sin^2 + cos^2 = 1, and fuse reciprocal and square root to the same instruction,
	// since we are about to divide by it.
	float rcpSinAngle = RSqrt(1.f - w*w);
	return DIR_VEC(x, y, z) * rcpSinAngle;
#endif
}
Exemplo n.º 30
0
int CVSpbcg(void *cvode_mem, int pretype, int maxl)
{
  CVodeMem cv_mem;
  CVSpilsMem cvspils_mem;
  SpbcgMem spbcg_mem;
  int mxl;

  /* Return immediately if cvode_mem is NULL */
  if (cvode_mem == NULL) {
    CVProcessError(NULL, CVSPILS_MEM_NULL, "CVSPBCG", "CVSpbcg", MSGS_CVMEM_NULL);
    return(CVSPILS_MEM_NULL);
  }
  cv_mem = (CVodeMem) cvode_mem;

  /* Check if N_VDotProd is present */
  if (vec_tmpl->ops->nvdotprod == NULL) {
    CVProcessError(cv_mem, CVSPILS_ILL_INPUT, "CVSPBCG", "CVSpbcg", MSGS_BAD_NVECTOR);
    return(CVSPILS_ILL_INPUT);
  }

  if (lfree != NULL) lfree(cv_mem);

  /* Set four main function fields in cv_mem */
  linit  = CVSpbcgInit;
  lsetup = CVSpbcgSetup;
  lsolve = CVSpbcgSolve;
  lfree  = CVSpbcgFree;

  /* Get memory for CVSpilsMemRec */
  cvspils_mem = NULL;
  cvspils_mem = (CVSpilsMem) malloc(sizeof(CVSpilsMemRec));
  if (cvspils_mem == NULL) {
    CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL);
    return(CVSPILS_MEM_FAIL);
  }

  /* Set ILS type */
  cvspils_mem->s_type = SPILS_SPBCG;

  /* Set Spbcg parameters that have been passed in call sequence */
  cvspils_mem->s_pretype = pretype;
  mxl = cvspils_mem->s_maxl = (maxl <= 0) ? CVSPILS_MAXL : maxl;

  /* Set default values for the rest of the Spbcg parameters */
  cvspils_mem->s_delt      = CVSPILS_DELT;
  cvspils_mem->s_P_data    = NULL;
  cvspils_mem->s_pset      = NULL;
  cvspils_mem->s_psolve    = NULL;
  cvspils_mem->s_jtimes    = CVSpilsDQJtimes;
  cvspils_mem->s_j_data    = cvode_mem;
  cvspils_mem->s_last_flag = CVSPILS_SUCCESS;

  setupNonNull = FALSE;

  /* Check for legal pretype */ 
  if ((pretype != PREC_NONE) && (pretype != PREC_LEFT) &&
      (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) {
    CVProcessError(cv_mem, CVSPILS_ILL_INPUT, "CVSPBCG", "CVSpbcg", MSGS_BAD_PRETYPE);
    return(CVSPILS_ILL_INPUT);
  }

  /* Allocate memory for ytemp and x */
  ytemp = NULL;
  ytemp = N_VClone(vec_tmpl);
  if (ytemp == NULL) {
    CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL);
    free(cvspils_mem); cvspils_mem = NULL;
    return(CVSPILS_MEM_FAIL);
  }
  x = NULL;
  x = N_VClone(vec_tmpl);
  if (x == NULL) {
    CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL);
    N_VDestroy(ytemp);
    free(cvspils_mem); cvspils_mem = NULL;
    return(CVSPILS_MEM_FAIL);
  }

  /* Compute sqrtN from a dot product */
  N_VConst(ONE, ytemp);
  sqrtN = RSqrt(N_VDotProd(ytemp, ytemp));

  /* Call SpbcgMalloc to allocate workspace for Spbcg */
  spbcg_mem = NULL;
  spbcg_mem = SpbcgMalloc(mxl, vec_tmpl);
  if (spbcg_mem == NULL) {
    CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL);
    N_VDestroy(ytemp);
    N_VDestroy(x);
    free(cvspils_mem); cvspils_mem = NULL;
    return(CVSPILS_MEM_FAIL);
  }
  
  /* Attach SPBCG memory to spils memory structure */
  spils_mem = (void *) spbcg_mem;

  /* Attach linear solver memory to integrator memory */
  lmem = cvspils_mem;

  return(CVSPILS_SUCCESS);
}