EXPORT double cgrid1d_integral_of_square(const cgrid1d *grid) { long i, nx = grid->nx; double sum = 0; double complex *value = grid->value; #pragma omp parallel for firstprivate(nx,value) private(i) reduction(+:sum) default(none) schedule(runtime) for(i = 0; i < nx; i++) sum += sqnorm(value[i]); return sum * grid->step; }
EXPORT void cgrid1d_fd_gradient_dot_gradient(const cgrid1d *grid, cgrid1d *grad_dot_grad){ long i; long nx = grid->nx; double inv_2delta2 = 1./ (2.*grid->step * 2.*grid->step); double complex *gvalue = grad_dot_grad->value; /* grad f(x,y,z) dot grad f(x,y,z) = [ |f(+,0,0) - f(-,0,0)|^2 + |f(0,+,0) - f(0,-,0)|^2 + |f(0,0,+) - f(0,0,-)|^2 ] / (2h)^2 */ #pragma omp parallel for firstprivate(nx,gvalue,inv_2delta2,grid) private(i) default(none) schedule(runtime) for (i = 0; i < nx; i++) gvalue[i] = inv_2delta2 * sqnorm(cgrid1d_value_at_index(grid,i+1) - cgrid1d_value_at_index(grid,i-1)); }
EXPORT double complex cgrid1d_grid_expectation_value(const cgrid1d *grida, const cgrid1d *gridb) { long i, nx = grida->nx; double complex sum = 0.0; double complex *avalue = grida->value; double complex *bvalue = gridb->value; #pragma omp parallel for firstprivate(nx,avalue,bvalue) private(i) reduction(+:sum) default(none) schedule(runtime) for(i = 0; i < nx; i++) sum += sqnorm(avalue[i]) * bvalue[i]; return sum * grida->step; }
void gmm_objective(int d, int k, int n, double* alphas, double* means, double* icf, double *x, Wishart wishart, double *err) { int ik, ix, icf_sz; double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered; double slse, lse_alphas, CONSTANT; CONSTANT = -n*d*0.5*log(2 * PI); icf_sz = d*(d + 1) / 2; main_term = (double *)malloc(k*sizeof(double)); sum_qs = (double *)malloc(k*sizeof(double)); Qdiags = (double *)malloc(d*k*sizeof(double)); xcentered = (double *)malloc(d*sizeof(double)); Qxcentered = (double *)malloc(d*sizeof(double)); preprocess_qs(d, k, icf, sum_qs, Qdiags); slse = 0.; for (ix = 0; ix < n; ix++) { for (ik = 0; ik < k; ik++) { subtract(d, &x[ix*d], &means[ik*d], xcentered); Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered, Qxcentered); main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*sqnorm(d, Qxcentered); } slse = slse + logsumexp(k, main_term); } free(main_term); free(xcentered); free(Qxcentered); lse_alphas = logsumexp(k, alphas); *err = CONSTANT + slse - n*lse_alphas; *err = *err + log_wishart_prior(d, k, wishart, sum_qs, Qdiags, icf); free(sum_qs); free(Qdiags); // this is here so that tapenade would recognize that means and inv_cov_factors are variables *err = *err + ((means[0] - means[0]) + (icf[0] - icf[0])); }
EXPORT double complex cgrid1d_grid_expectation_value_func(void *arg, double complex (*func)(void *arg, double complex val, double x), const cgrid1d *grida) { long i, nx = grida->nx; double complex sum = 0.0; double complex *avalue = grida->value; double x, step = grida->step; #pragma omp parallel for firstprivate(func,arg,nx,avalue,step) private(x,i) reduction(+:sum) default(none) schedule(runtime) for(i = 0; i < nx; i++) { x = (i - nx/2.0) * step; sum += sqnorm(avalue[i]) * func(arg, avalue[i], x); } return sum * step; }
float HitboxComponent::GetColTime(HitboxComponent &ha, HitboxComponent &hb) { Vector2f orig = ha.opos-hb.opos; Vector2f speed = (ha.pos-ha.opos)-(hb.pos-hb.opos); float minDistTime = - (speed | orig) / sqnorm(speed); if(minDistTime < 0) // Dist(t) is increasing: min dist was reached just before update return 0; else if(minDistTime > 1) // Dist(t) is decreasing: min dist is reached at end of update return 1; else // Dist(t) is reached during update at minDistTime return minDistTime; }
EXPORT double cgrid1d_weighted_integral_of_square(const cgrid1d *grid, double (*weight)(void *farg, double x), void *farg) { long i, nx = grid->nx; double x, step = grid->step; double sum = 0, w; double complex *value = grid->value; #pragma omp parallel for firstprivate(nx,step,value,weight,farg) private(w,i,x) reduction(+:sum) default(none) schedule(runtime) for(i = 0; i < nx; i++) { x = (i - nx/2.0) * step; w = weight(farg, x); sum += w * sqnorm(value[i]); } return sum * grid->step; }
EXPORT double cgrid3d_integral_of_square_cyl(const cgrid3d *grid) { long i, ij, k, ijnz, nr = grid->nx, nphi = grid->ny, nz = grid->nz; double sum = 0, ssum = 0, r, step = grid->step; double complex *value = grid->value; #pragma omp parallel for firstprivate(nphi,value,nr,nz,step) private(ij,ijnz,k,ssum,i,r) reduction(+:sum) default(none) schedule(runtime) for(ij = 0; ij < nr * nphi; ij++) { ijnz = ij * nz; i = ij / nphi; r = i * step; ssum = 0.0; for(k = 0; k < nz; k++) ssum += sqnorm(value[ijnz + k]); sum += ssum * r; } return sum * grid->step * grid->step * (2.0 * M_PI / (double) nphi); }
EXPORT double complex cgrid3d_grid_expectation_value_cyl(const cgrid3d *grida, const cgrid3d *gridb) { long i, ij, k, ijnz, nr = grida->nx, nphi = grida->ny, nz = grida->nz; double sum = 0.0, ssum, r, step = grida->step; double complex *avalue = grida->value; double complex *bvalue = gridb->value; #pragma omp parallel for firstprivate(nz,nphi,avalue,bvalue,step,nr) private(ij,ijnz,k,ssum,i,r) reduction(+:sum) default(none) schedule(runtime) for(ij = 0; ij < nr * nphi; ij++) { ijnz = ij * nz; i = ij / nphi; r = i * step; ssum = 0.0; for(k = 0; k < nz; k++) ssum += sqnorm(avalue[ijnz + k]) * bvalue[ijnz + k]; sum += r * ssum; } return sum * grida->step * grida->step * (2.0 * M_PI / (double) nphi); }
EXPORT void cgrid1d_adaptive_map(cgrid1d *grid, double complex (*func)(void *arg, double x), void *farg, int min_ns, int max_ns, double tol) { long i, nx = grid->nx, ns; double xc, step = grid->step; double tol2 = tol * tol; double complex sum, sump; double complex *value = grid->value; if (min_ns < 1) min_ns = 1; if (max_ns < min_ns) max_ns = min_ns; #pragma omp parallel for firstprivate(farg,nx,min_ns,max_ns,step,func,value,tol2) private(i,ns,xc,sum,sump) default(none) schedule(runtime) for (i = 0; i < nx; i++) { xc = (i - nx/2.0) * step; sum = func(farg, xc); for(ns = min_ns; ns <= max_ns; ns *= 2) { sum = linearly_weighted_integralc1d(func, farg, xc, step, ns); sump = linearly_weighted_integralc1d(func, farg, xc, step, ns+1); if(sqnorm(sum - sump) < tol2) break; value[i] = 0.5 * (sum + sump); } } }
void gmm_objective_split_inner(int d, int k, double* alphas, double* means, double* icf, double *x, double *err) { int ik, ix, icf_sz; double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered; icf_sz = d*(d + 1) / 2; main_term = (double *)malloc(k*sizeof(double)); sum_qs = (double *)malloc(k*sizeof(double)); Qdiags = (double *)malloc(d*k*sizeof(double)); xcentered = (double *)malloc(d*sizeof(double)); Qxcentered = (double *)malloc(d*sizeof(double)); preprocess_qs(d, k, icf, sum_qs, Qdiags); for (ik = 0; ik < k; ik++) { subtract(d, x, &means[ik*d], xcentered); Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered, Qxcentered); main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*sqnorm(d, Qxcentered); } *err = logsumexp(k, main_term); free(main_term); free(xcentered); free(Qxcentered); free(sum_qs); free(Qdiags); // this is here so that tapenade would recognize that means and inv_cov_factors are variables *err = *err + ((means[0] - means[0]) + (icf[0] - icf[0])); }
EXPORT double complex cgrid3d_grid_expectation_value_func_cyl(void *arg, double complex (*func)(void *arg, double val, double r, double phi, double z), const cgrid3d *grida) { long ij, i, j, k, ijnz, nr = grida->nx, nphi = grida->ny, nz = grida->nz; double sum = 0.0, ssum; double complex *avalue = grida->value; double z, r, phi, step = grida->step, step_phi = 2.0 * M_PI / (double) nphi; #pragma omp parallel for firstprivate(func,arg,nz,nr,nphi,avalue,step,step_phi) private(z,r,phi,i,j,ij,ijnz,k,ssum) reduction(+:sum) default(none) schedule(runtime) for(ij = 0; ij < nr * nphi; ij++) { ijnz = ij * nz; i = ij / nphi; j = ij % nphi; r = i * step; phi = j * step_phi; ssum = 0.0; for(k = 0; k < nz; k++) { z = (k - nz/2.0) * step; ssum += sqnorm(avalue[ijnz + k]) * func(arg, avalue[ijnz + k], r, phi, z); } sum += r * ssum; } return sum * step * step * step_phi; }
template<typename U> constexpr complex<T>& complex<T>::operator /=(const complex<U>& rhs) { const T sn = sqnorm(rhs); re = (re*rhs.re+im*rhs.im)/sn; im = (re*rhs.im-im*rhs.re)/sn; return *this; }
constexpr auto operator /(const complex<T>& lhs, const complex<U>& rhs) { return _impl::div_impl(lhs, rhs, sqnorm(rhs)); }
constexpr auto operator /(T lhs, const complex<U>& rhs) { return complex<common_type_t<T, U>>(lhs*rhs.re/sqnorm(rhs), -lhs*rhs.im/sqnorm(rhs)); }
/* Differentiation of gmm_objective_split_inner in reverse (adjoint) mode: gradient of useful results: *err with respect to varying inputs: *err *means *icf *alphas RW status of diff variables: *err:in-zero *means:out *icf:out *alphas:out Plus diff mem management of: err:in means:in icf:in alphas:in */ void gmm_objective_split_inner_b(int d, int k, double *alphas, double *alphasb, double *means, double *meansb, double *icf, double *icfb, double *x, double *err, double *errb) { int ik, icf_sz, i; double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered; double *main_termb, *sum_qsb, *Qdiagsb, *xcenteredb, *Qxcenteredb; double result1; double result1b; main_termb = (double *)malloc(k*sizeof(double)); main_term = (double *)malloc(k*sizeof(double)); sum_qsb = (double *)malloc(k*sizeof(double)); sum_qs = (double *)malloc(k*sizeof(double)); Qdiagsb = (double *)malloc(d*k*sizeof(double)); Qdiags = (double *)malloc(d*k*sizeof(double)); xcenteredb = (double *)malloc(d*sizeof(double)); xcentered = (double *)malloc(d*sizeof(double)); Qxcenteredb = (double *)malloc(d*sizeof(double)); Qxcentered = (double *)malloc(d*sizeof(double)); icf_sz = d*(d + 1) / 2; memset(alphasb, 0, k * sizeof(double)); memset(meansb, 0, d * k * sizeof(double)); memset(icfb, 0, icf_sz * k * sizeof(double)); memset(sum_qsb, 0, k * sizeof(double)); memset(Qdiagsb, 0, d * k * sizeof(double)); memset(main_termb, 0, k * sizeof(double)); memset(xcenteredb, 0, d * sizeof(double)); memset(Qxcenteredb, 0, d * sizeof(double)); preprocess_qs(d, k, icf, sum_qs, Qdiags); for (ik = 0; ik < k; ++ik) { for (i = 0; i < d; i++) pushreal8(xcentered[i]); subtract(d, x, &means[ik*d], xcentered); for (i = 0; i < d; i++) pushreal8(Qxcentered[i]); Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered, Qxcentered); result1 = sqnorm(d, Qxcentered); main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*result1; } *err = logsumexp_b(k, main_term, main_termb, *errb); *errb = 0.0; *alphasb = 0.0; for (ik = k - 1; ik > -1; --ik) { alphasb[ik] = alphasb[ik] + main_termb[ik]; sum_qsb[ik] = sum_qsb[ik] + main_termb[ik]; result1b = -(0.5*main_termb[ik]); main_termb[ik] = 0.0; sqnorm_b(d, Qxcentered, Qxcenteredb, result1b); for (i = d - 1; i > -1; i--) popreal8(&Qxcentered[i]); Qtimesx_b(d, &Qdiags[ik*d], &Qdiagsb[ik*d], &icf[ik*icf_sz + d], &icfb [ik*icf_sz + d], xcentered, xcenteredb, Qxcentered, Qxcenteredb); for (i = d - 1; i > -1; i--) popreal8(&xcentered[i]); subtract_b(d, x, &means[ik*d], &meansb[ik*d], xcentered, xcenteredb); } preprocess_qs_b(d, k, icf, icfb, sum_qs, sum_qsb, Qdiags, Qdiagsb); free(Qxcentered); free(Qxcenteredb); free(xcentered); free(xcenteredb); free(Qdiags); free(Qdiagsb); free(sum_qs); free(sum_qsb); free(main_term); free(main_termb); *errb = 0.0; }
double norm(const point &a) { return sqrt(sqnorm(a)); }
/* Differentiation of gmm_objective in reverse (adjoint) mode: gradient of useful results: *err with respect to varying inputs: *err *means *icf *alphas RW status of diff variables: *err:in-zero *means:out *icf:out *alphas:out Plus diff mem management of: err:in means:in icf:in alphas:in */ void gmm_objective_b(int d, int k, int n, double *alphas, double *alphasb, double *means, double *meansb, double *icf, double *icfb, double *x, Wishart wishart, double *err, double *errb) { int ik, ix, icf_sz, i; double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered; double *main_termb, *sum_qsb, *Qdiagsb, *xcenteredb, *Qxcenteredb; double slse, lse_alphas, CONSTANT, log_wish_prior_res; double slseb, lse_alphasb; double result1; double result1b; CONSTANT = -n*d*0.5*log(2 * PI); icf_sz = d*(d + 1) / 2; main_termb = (double *)malloc(k*sizeof(double)); main_term = (double *)malloc(k*sizeof(double)); sum_qsb = (double *)malloc(k*sizeof(double)); sum_qs = (double *)malloc(k*sizeof(double)); Qdiagsb = (double *)malloc(d*k*sizeof(double)); Qdiags = (double *)malloc(d*k*sizeof(double)); xcenteredb = (double *)malloc(d*sizeof(double)); xcentered = (double *)malloc(d*sizeof(double)); Qxcenteredb = (double *)malloc(d*sizeof(double)); Qxcentered = (double *)malloc(d*sizeof(double)); memset(alphasb, 0, k * sizeof(double)); memset(meansb, 0, d * k * sizeof(double)); memset(icfb, 0, icf_sz * k * sizeof(double)); memset(sum_qsb, 0, k * sizeof(double)); memset(Qdiagsb, 0, d * k * sizeof(double)); memset(main_termb, 0, k * sizeof(double)); memset(xcenteredb, 0, d * sizeof(double)); memset(Qxcenteredb, 0, d * sizeof(double)); preprocess_qs(d, k, icf, sum_qs, Qdiags); slse = 0.; for (ix = 0; ix < n; ++ix) for (ik = 0; ik < k; ++ik) { for (i = 0; i < d; i++) pushreal8(xcentered[i]); subtract(d, &x[ix*d], &means[ik*d], xcentered); for (i = 0; i < d; i++) pushreal8(Qxcentered[i]); Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered, Qxcentered); result1 = sqnorm(d, Qxcentered); pushreal8(main_term[ik]); main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*result1; } result1b = *errb; log_wish_prior_res = log_wishart_prior_b(d, k, wishart, sum_qs, sum_qsb, Qdiags, Qdiagsb, icf, icfb, result1b); slseb = *errb; lse_alphasb = -(n*(*errb)); lse_alphas = logsumexp_b(k, alphas, alphasb, lse_alphasb); for (ix = n - 1; ix > -1; --ix) { result1b = slseb; slse = slse + logsumexp_b(k, main_term, main_termb, result1b); for (ik = k - 1; ik > -1; --ik) { popreal8(&main_term[ik]); alphasb[ik] = alphasb[ik] + main_termb[ik]; sum_qsb[ik] = sum_qsb[ik] + main_termb[ik]; result1b = -(0.5*main_termb[ik]); main_termb[ik] = 0.0; sqnorm_b(d, Qxcentered, Qxcenteredb, result1b); for (i = d - 1; i > -1; i--) popreal8(&Qxcentered[i]); Qtimesx_b(d, &Qdiags[ik*d], &Qdiagsb[ik*d], &icf[ik*icf_sz + d], & icfb[ik*icf_sz + d], xcentered, xcenteredb, Qxcentered, Qxcenteredb); for (i = d - 1; i > -1; i--) popreal8(&xcentered[i]); subtract_b(d, &x[ix*d], &means[ik*d], &meansb[ik*d], xcentered, xcenteredb); } } preprocess_qs_b(d, k, icf, icfb, sum_qs, sum_qsb, Qdiags, Qdiagsb); free(Qxcentered); free(Qxcenteredb); free(xcentered); free(xcenteredb); free(Qdiags); free(Qdiagsb); free(sum_qs); free(sum_qsb); free(main_term); free(main_termb); *err = CONSTANT + slse - n*lse_alphas + log_wish_prior_res; *errb = 0.0; }