예제 #1
0
EXPORT double cgrid1d_integral_of_square(const cgrid1d *grid) {

  long i, nx = grid->nx;
  double sum = 0;
  double complex *value = grid->value;
  
#pragma omp parallel for firstprivate(nx,value) private(i) reduction(+:sum) default(none) schedule(runtime)
  for(i = 0; i < nx; i++)
      sum += sqnorm(value[i]);
  
  return sum * grid->step;
}
예제 #2
0
EXPORT void cgrid1d_fd_gradient_dot_gradient(const cgrid1d *grid, cgrid1d *grad_dot_grad){

  long i;
  long nx = grid->nx;
  double inv_2delta2 = 1./ (2.*grid->step * 2.*grid->step);
  double complex *gvalue = grad_dot_grad->value;
  
  /*  grad f(x,y,z) dot grad f(x,y,z) = [ |f(+,0,0) - f(-,0,0)|^2 + |f(0,+,0) - f(0,-,0)|^2 + |f(0,0,+) - f(0,0,-)|^2 ] / (2h)^2 */
#pragma omp parallel for firstprivate(nx,gvalue,inv_2delta2,grid) private(i) default(none) schedule(runtime)
  for (i = 0; i < nx; i++)
    gvalue[i] = inv_2delta2 * sqnorm(cgrid1d_value_at_index(grid,i+1) - cgrid1d_value_at_index(grid,i-1));
}
예제 #3
0
EXPORT double complex cgrid1d_grid_expectation_value(const cgrid1d *grida, const cgrid1d *gridb) {

  long i, nx = grida->nx;
  double complex sum = 0.0;
  double complex *avalue = grida->value;
  double complex *bvalue = gridb->value;
  
#pragma omp parallel for firstprivate(nx,avalue,bvalue) private(i) reduction(+:sum) default(none) schedule(runtime)
  for(i = 0; i < nx; i++)
    sum += sqnorm(avalue[i]) * bvalue[i];

  return sum * grida->step;
}
예제 #4
0
파일: gmm.c 프로젝트: awf/autodiff
void gmm_objective(int d, int k, int n,
  double* alphas, 
  double* means,
  double* icf, 
  double *x,
  Wishart wishart, 
  double *err)
{
  int ik, ix, icf_sz;
  double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered;
  double slse, lse_alphas, CONSTANT;
  CONSTANT = -n*d*0.5*log(2 * PI);
  icf_sz = d*(d + 1) / 2;

  main_term = (double *)malloc(k*sizeof(double));
  sum_qs = (double *)malloc(k*sizeof(double));
  Qdiags = (double *)malloc(d*k*sizeof(double));
  xcentered = (double *)malloc(d*sizeof(double));
  Qxcentered = (double *)malloc(d*sizeof(double));

  preprocess_qs(d, k, icf, sum_qs, Qdiags);

  slse = 0.;
  for (ix = 0; ix < n; ix++)
  {
    for (ik = 0; ik < k; ik++)
    {
      subtract(d, &x[ix*d], &means[ik*d], xcentered);
      Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered, Qxcentered);

      main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*sqnorm(d, Qxcentered);
    }
    slse = slse + logsumexp(k, main_term);
  }
  free(main_term);
  free(xcentered);
  free(Qxcentered);

  lse_alphas = logsumexp(k, alphas);
  *err = CONSTANT + slse - n*lse_alphas;

  *err = *err + log_wishart_prior(d, k, wishart, sum_qs, Qdiags, icf);

  free(sum_qs);
  free(Qdiags);

  // this is here so that tapenade would recognize that means and inv_cov_factors are variables
  *err = *err + ((means[0] - means[0]) +
    (icf[0] - icf[0]));
}
예제 #5
0
EXPORT double complex cgrid1d_grid_expectation_value_func(void *arg, double complex (*func)(void *arg, double complex val, double x), const cgrid1d *grida) {
   
  long i, nx = grida->nx;
  double complex sum = 0.0;
  double complex *avalue = grida->value;
  double x, step = grida->step;
  
#pragma omp parallel for firstprivate(func,arg,nx,avalue,step) private(x,i) reduction(+:sum) default(none) schedule(runtime)
  for(i = 0; i < nx; i++) {
    x = (i - nx/2.0) * step;
    sum += sqnorm(avalue[i]) * func(arg, avalue[i], x);
  }
  
  return sum * step;
}
float HitboxComponent::GetColTime(HitboxComponent &ha, HitboxComponent &hb)
{
	Vector2f orig = ha.opos-hb.opos;
	Vector2f speed = (ha.pos-ha.opos)-(hb.pos-hb.opos);
	float minDistTime = - (speed | orig) / sqnorm(speed);

	if(minDistTime < 0)
		// Dist(t) is increasing: min dist was reached just before update 
		return 0;
	else if(minDistTime > 1)
		// Dist(t) is decreasing: min dist is reached at end of update
		return 1;
	else
		// Dist(t) is reached during update at minDistTime
		return minDistTime;
}
예제 #7
0
EXPORT double cgrid1d_weighted_integral_of_square(const cgrid1d *grid, double (*weight)(void *farg, double x), void *farg) {

  long i, nx = grid->nx;
  double x, step = grid->step;
  double sum = 0, w;
  double complex *value = grid->value;
  
#pragma omp parallel for firstprivate(nx,step,value,weight,farg) private(w,i,x) reduction(+:sum) default(none) schedule(runtime)
  for(i = 0; i < nx; i++) {
    x = (i - nx/2.0) * step;
    w = weight(farg, x);
    sum += w * sqnorm(value[i]);
  }
  
  return sum * grid->step;
}
예제 #8
0
EXPORT double cgrid3d_integral_of_square_cyl(const cgrid3d *grid) {

  long i, ij, k, ijnz, nr = grid->nx, nphi = grid->ny, nz = grid->nz;
  double sum = 0, ssum = 0, r, step = grid->step;
  double complex *value = grid->value;
  
#pragma omp parallel for firstprivate(nphi,value,nr,nz,step) private(ij,ijnz,k,ssum,i,r) reduction(+:sum) default(none) schedule(runtime)
  for(ij = 0; ij < nr * nphi; ij++) {
    ijnz = ij * nz;
    i = ij / nphi;
    r = i * step;
    ssum = 0.0;
    for(k = 0; k < nz; k++)
      ssum += sqnorm(value[ijnz + k]);
    sum += ssum * r;
  }
  
  return sum * grid->step * grid->step * (2.0 * M_PI / (double) nphi);
}
예제 #9
0
EXPORT double complex cgrid3d_grid_expectation_value_cyl(const cgrid3d *grida, const cgrid3d *gridb) {

  long i, ij, k, ijnz, nr = grida->nx, nphi = grida->ny, nz = grida->nz;
  double sum = 0.0, ssum, r, step = grida->step;
  double complex *avalue = grida->value;
  double complex *bvalue = gridb->value;
  
#pragma omp parallel for firstprivate(nz,nphi,avalue,bvalue,step,nr) private(ij,ijnz,k,ssum,i,r) reduction(+:sum) default(none) schedule(runtime)
  for(ij = 0; ij < nr * nphi; ij++) {
    ijnz = ij * nz;
    i = ij / nphi;
    r = i * step;
    ssum = 0.0;
    for(k = 0; k < nz; k++)
      ssum += sqnorm(avalue[ijnz + k]) * bvalue[ijnz + k];
    sum += r * ssum;
  }
  
  return sum * grida->step * grida->step * (2.0 * M_PI / (double) nphi);
}
예제 #10
0
EXPORT void cgrid1d_adaptive_map(cgrid1d *grid, double complex (*func)(void *arg, double x), void *farg, int min_ns, int max_ns, double tol) {

  long i, nx = grid->nx, ns;
  double xc, step = grid->step;
  double tol2 = tol * tol;
  double complex sum, sump;
  double complex *value = grid->value;
  
  if (min_ns < 1) min_ns = 1;
  if (max_ns < min_ns) max_ns = min_ns;
  
#pragma omp parallel for firstprivate(farg,nx,min_ns,max_ns,step,func,value,tol2) private(i,ns,xc,sum,sump) default(none) schedule(runtime)
  for (i = 0; i < nx; i++) {      
    xc = (i - nx/2.0) * step;
    sum  = func(farg, xc);
    for(ns = min_ns; ns <= max_ns; ns *= 2) {
      sum  = linearly_weighted_integralc1d(func, farg, xc, step, ns);
      sump = linearly_weighted_integralc1d(func, farg, xc, step, ns+1);
      if(sqnorm(sum - sump) < tol2) break;
      value[i] = 0.5 * (sum + sump);
    }
  }
}
예제 #11
0
파일: gmm.c 프로젝트: awf/autodiff
void gmm_objective_split_inner(int d, int k,
  double* alphas,
  double* means,
  double* icf,
  double *x,
  double *err)
{
  int ik, ix, icf_sz;
  double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered;
  icf_sz = d*(d + 1) / 2;

  main_term = (double *)malloc(k*sizeof(double));
  sum_qs = (double *)malloc(k*sizeof(double));
  Qdiags = (double *)malloc(d*k*sizeof(double));
  xcentered = (double *)malloc(d*sizeof(double));
  Qxcentered = (double *)malloc(d*sizeof(double));

  preprocess_qs(d, k, icf, sum_qs, Qdiags);

  for (ik = 0; ik < k; ik++)
  {
    subtract(d, x, &means[ik*d], xcentered);
    Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered, Qxcentered);
    
    main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*sqnorm(d, Qxcentered);
  }
  *err = logsumexp(k, main_term);
  free(main_term);
  free(xcentered);
  free(Qxcentered);
  free(sum_qs);
  free(Qdiags);

  // this is here so that tapenade would recognize that means and inv_cov_factors are variables
  *err = *err + ((means[0] - means[0]) +
    (icf[0] - icf[0]));
}
예제 #12
0
EXPORT double complex cgrid3d_grid_expectation_value_func_cyl(void *arg, double complex (*func)(void *arg, double val, double r, double phi, double z), const cgrid3d *grida) {
   
  long ij, i, j, k, ijnz, nr = grida->nx, nphi = grida->ny, nz = grida->nz;
  double sum = 0.0, ssum;
  double complex *avalue = grida->value;
  double z, r, phi, step = grida->step, step_phi = 2.0 * M_PI / (double) nphi;
 
#pragma omp parallel for firstprivate(func,arg,nz,nr,nphi,avalue,step,step_phi) private(z,r,phi,i,j,ij,ijnz,k,ssum) reduction(+:sum) default(none) schedule(runtime)
  for(ij = 0; ij < nr * nphi; ij++) {
    ijnz = ij * nz;
    i = ij / nphi;
    j = ij % nphi;
    r = i * step;
    phi = j * step_phi;
    ssum = 0.0;
    for(k = 0; k < nz; k++) {
      z = (k - nz/2.0) * step;
      ssum += sqnorm(avalue[ijnz + k]) * func(arg, avalue[ijnz + k], r, phi, z);
    }
    sum += r * ssum;
  }
  
  return sum * step * step * step_phi;
}
예제 #13
0
 template<typename U> constexpr complex<T>& complex<T>::operator /=(const complex<U>& rhs) {
     const T sn = sqnorm(rhs);
     re = (re*rhs.re+im*rhs.im)/sn;
     im = (re*rhs.im-im*rhs.re)/sn;
     return *this;
 }
예제 #14
0
 constexpr auto operator /(const complex<T>& lhs, const complex<U>& rhs) {
     return _impl::div_impl(lhs, rhs, sqnorm(rhs));
 }
예제 #15
0
 constexpr auto operator /(T lhs, const complex<U>& rhs) {
     return complex<common_type_t<T, U>>(lhs*rhs.re/sqnorm(rhs), -lhs*rhs.im/sqnorm(rhs));
 }
예제 #16
0
파일: gmm_b-all.c 프로젝트: awf/autodiff
/*
Differentiation of gmm_objective_split_inner in reverse (adjoint) mode:
gradient     of useful results: *err
with respect to varying inputs: *err *means *icf *alphas
RW status of diff variables: *err:in-zero *means:out *icf:out
*alphas:out
Plus diff mem management of: err:in means:in icf:in alphas:in
*/
void gmm_objective_split_inner_b(int d, int k, 
  double *alphas, 
  double *alphasb, 
  double *means, 
  double *meansb, 
  double *icf, 
  double *icfb, 
  double *x,
  double *err, 
  double *errb) 
{
  int ik, icf_sz, i;
  double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered;
  double *main_termb, *sum_qsb, *Qdiagsb, *xcenteredb, *Qxcenteredb;
  double result1;
  double result1b;
  main_termb = (double *)malloc(k*sizeof(double));
  main_term = (double *)malloc(k*sizeof(double));
  sum_qsb = (double *)malloc(k*sizeof(double));
  sum_qs = (double *)malloc(k*sizeof(double));
  Qdiagsb = (double *)malloc(d*k*sizeof(double));
  Qdiags = (double *)malloc(d*k*sizeof(double));
  xcenteredb = (double *)malloc(d*sizeof(double));
  xcentered = (double *)malloc(d*sizeof(double));
  Qxcenteredb = (double *)malloc(d*sizeof(double));
  Qxcentered = (double *)malloc(d*sizeof(double));
  icf_sz = d*(d + 1) / 2;

  memset(alphasb, 0, k * sizeof(double));
  memset(meansb, 0, d * k * sizeof(double));
  memset(icfb, 0, icf_sz * k * sizeof(double));
  memset(sum_qsb, 0, k * sizeof(double));
  memset(Qdiagsb, 0, d * k * sizeof(double));
  memset(main_termb, 0, k * sizeof(double));
  memset(xcenteredb, 0, d * sizeof(double));
  memset(Qxcenteredb, 0, d * sizeof(double));

  preprocess_qs(d, k, icf, sum_qs, Qdiags);
  for (ik = 0; ik < k; ++ik) {
    for (i = 0; i < d; i++)
      pushreal8(xcentered[i]);
    subtract(d, x, &means[ik*d], xcentered);
    for (i = 0; i < d; i++)
      pushreal8(Qxcentered[i]);
    Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered, Qxcentered);
    result1 = sqnorm(d, Qxcentered);
    main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*result1;
  }
  *err = logsumexp_b(k, main_term, main_termb, *errb);
  *errb = 0.0;
  *alphasb = 0.0;
  for (ik = k - 1; ik > -1; --ik) {
    alphasb[ik] = alphasb[ik] + main_termb[ik];
    sum_qsb[ik] = sum_qsb[ik] + main_termb[ik];
    result1b = -(0.5*main_termb[ik]);
    main_termb[ik] = 0.0;
    sqnorm_b(d, Qxcentered, Qxcenteredb, result1b);
    for (i = d - 1; i > -1; i--)
      popreal8(&Qxcentered[i]);
    Qtimesx_b(d, &Qdiags[ik*d], &Qdiagsb[ik*d], &icf[ik*icf_sz + d], &icfb
      [ik*icf_sz + d], xcentered, xcenteredb, Qxcentered,
      Qxcenteredb);
    for (i = d - 1; i > -1; i--)
      popreal8(&xcentered[i]);
    subtract_b(d, x, &means[ik*d], &meansb[ik*d], xcentered, xcenteredb);
  }
  preprocess_qs_b(d, k, icf, icfb, sum_qs, sum_qsb, Qdiags, Qdiagsb);
  free(Qxcentered);
  free(Qxcenteredb);
  free(xcentered);
  free(xcenteredb);
  free(Qdiags);
  free(Qdiagsb);
  free(sum_qs);
  free(sum_qsb);
  free(main_term);
  free(main_termb);
  *errb = 0.0;
}
double norm(const point &a) { return sqrt(sqnorm(a)); }
예제 #18
0
파일: gmm_b-all.c 프로젝트: awf/autodiff
/*
  Differentiation of gmm_objective in reverse (adjoint) mode:
   gradient     of useful results: *err
   with respect to varying inputs: *err *means *icf *alphas
   RW status of diff variables: *err:in-zero *means:out *icf:out
                *alphas:out
   Plus diff mem management of: err:in means:in icf:in alphas:in
*/
void gmm_objective_b(int d, int k, int n, double *alphas, double *alphasb,
  double *means, double *meansb, double *icf, double *icfb, double *x,
  Wishart wishart, double *err, double *errb) {
  int ik, ix, icf_sz, i;
  double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered;
  double *main_termb, *sum_qsb, *Qdiagsb, *xcenteredb, *Qxcenteredb;
  double slse, lse_alphas, CONSTANT, log_wish_prior_res;
  double slseb, lse_alphasb;
  double result1;
  double result1b;
  CONSTANT = -n*d*0.5*log(2 * PI);
  icf_sz = d*(d + 1) / 2;

  main_termb = (double *)malloc(k*sizeof(double));
  main_term = (double *)malloc(k*sizeof(double));
  sum_qsb = (double *)malloc(k*sizeof(double));
  sum_qs = (double *)malloc(k*sizeof(double));
  Qdiagsb = (double *)malloc(d*k*sizeof(double));
  Qdiags = (double *)malloc(d*k*sizeof(double));
  xcenteredb = (double *)malloc(d*sizeof(double));
  xcentered = (double *)malloc(d*sizeof(double));
  Qxcenteredb = (double *)malloc(d*sizeof(double));
  Qxcentered = (double *)malloc(d*sizeof(double));

  memset(alphasb, 0, k * sizeof(double));
  memset(meansb, 0, d * k * sizeof(double));
  memset(icfb, 0, icf_sz * k * sizeof(double));
  memset(sum_qsb, 0, k * sizeof(double));
  memset(Qdiagsb, 0, d * k * sizeof(double));
  memset(main_termb, 0, k * sizeof(double));
  memset(xcenteredb, 0, d * sizeof(double));
  memset(Qxcenteredb, 0, d * sizeof(double));

  preprocess_qs(d, k, icf, sum_qs, Qdiags);
  slse = 0.;
  for (ix = 0; ix < n; ++ix)
    for (ik = 0; ik < k; ++ik) {
      for (i = 0; i < d; i++)
        pushreal8(xcentered[i]);
      subtract(d, &x[ix*d], &means[ik*d], xcentered);
      for (i = 0; i < d; i++)
        pushreal8(Qxcentered[i]);
      Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered,
        Qxcentered);
      result1 = sqnorm(d, Qxcentered);
      pushreal8(main_term[ik]);
      main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*result1;
    }
  result1b = *errb;
  log_wish_prior_res = log_wishart_prior_b(d, k, wishart, sum_qs, sum_qsb, Qdiags, Qdiagsb, icf,
    icfb, result1b);
  slseb = *errb;
  lse_alphasb = -(n*(*errb));
  lse_alphas = logsumexp_b(k, alphas, alphasb, lse_alphasb);
  for (ix = n - 1; ix > -1; --ix) {
    result1b = slseb;
    slse = slse + logsumexp_b(k, main_term, main_termb, result1b);
    for (ik = k - 1; ik > -1; --ik) {
      popreal8(&main_term[ik]);
      alphasb[ik] = alphasb[ik] + main_termb[ik];
      sum_qsb[ik] = sum_qsb[ik] + main_termb[ik];
      result1b = -(0.5*main_termb[ik]);
      main_termb[ik] = 0.0;
      sqnorm_b(d, Qxcentered, Qxcenteredb, result1b);
      for (i = d - 1; i > -1; i--)
        popreal8(&Qxcentered[i]);
      Qtimesx_b(d, &Qdiags[ik*d], &Qdiagsb[ik*d], &icf[ik*icf_sz + d], &
        icfb[ik*icf_sz + d], xcentered, xcenteredb, Qxcentered,
        Qxcenteredb);
      for (i = d - 1; i > -1; i--)
        popreal8(&xcentered[i]);
      subtract_b(d, &x[ix*d], &means[ik*d], &meansb[ik*d], xcentered,
        xcenteredb);
    }
  }
  preprocess_qs_b(d, k, icf, icfb, sum_qs, sum_qsb, Qdiags, Qdiagsb);
  free(Qxcentered);
  free(Qxcenteredb);
  free(xcentered);
  free(xcenteredb);
  free(Qdiags);
  free(Qdiagsb);
  free(sum_qs);
  free(sum_qsb);
  free(main_term);
  free(main_termb);
  *err = CONSTANT + slse - n*lse_alphas + log_wish_prior_res;
  *errb = 0.0;
}