// Contribution of the element Jacobians to the objective function value and // gradients (2D version) bool OptHOM::addJacObjGrad(double &Obj, alglib::real_1d_array &gradObj) { minJac = 1.e300; maxJac = -1.e300; for (int iEl = 0; iEl < mesh.nEl(); iEl++) { // Scaled Jacobians std::vector<double> sJ(mesh.nBezEl(iEl)); // Gradients of scaled Jacobians std::vector<double> gSJ(mesh.nBezEl(iEl)*mesh.nPCEl(iEl)); mesh.scaledJacAndGradients (iEl,sJ,gSJ); for (int l = 0; l < mesh.nBezEl(iEl); l++) { double f1 = compute_f1(sJ[l], jacBar); Obj += compute_f(sJ[l], jacBar); if (_optimizeBarrierMax) { Obj += compute_f(sJ[l], barrier_min); f1 += compute_f1(sJ[l], barrier_min); } for (int iPC = 0; iPC < mesh.nPCEl(iEl); iPC++) gradObj[mesh.indPCEl(iEl,iPC)] += f1*gSJ[mesh.indGSJ(iEl,l,iPC)]; minJac = std::min(minJac, sJ[l]); maxJac = std::max(maxJac, sJ[l]); } } return true; }
void fill_lanes_naive (int N, int * iarr, int * jarr, int * marr, int * base, int * offs, real * x, real * f, real rsq, void * data) { __assume_aligned(iarr, 64); __assume_aligned(jarr, 64); #pragma simd for (int idx = 0; idx < N; idx++) { const int i = iarr[idx]; const int j = jarr[idx]; const int M = marr[i]; const real xi = x[i]; const int * idxs = base + offs[i]; real acc_fi = 0; const real xj = x[j]; const real dxij = xi - xj; real acc_fj = 0; for (int k = 0; k < M; k++) { const int kk = idxs[k]; const real xk = x[kk]; const real dxik = xi - xk; if (dxik * dxik > rsq) continue; real fi = 0, fj = 0, fk = 0; compute_f(dxij, dxik, &fi, &fj, &fk); acc_fj += fj; acc_fi += fi; memory_reduce_add(&f[kk], fk); } memory_reduce_add(&f[j], acc_fj); memory_reduce_add(&f[i], acc_fi); } }
/* * C is the measurement matrix that has the measured state * and the estimated state. * * C = [ * [ dphi/dq0 dhpi/dq1 ... ] * [ dtheta/qd0 dtheta/dq1 ... ] * [ dpsi/dq0 dpsi/dq1 ... ] * ] */ static void compute_c( m_t C, v_t X ) { m_t DCM; v_t F; /* Compute the new DCM matrix from the quaternion position */ quat2dcm( DCM, X ); /* Build our temporary matrix */ compute_f( F, DCM ); /* Compute the estimated state */ C[0][0] = F[0] * ( X[1] * DCM[2][2] ); C[0][1] = F[0] * ( X[0] * DCM[2][2] + 2.0 * X[1] * DCM[1][2] ); C[0][2] = F[0] * ( X[3] * DCM[2][2] + 2.0 * X[2] * DCM[1][2] ); C[0][3] = F[0] * ( X[2] * DCM[2][2] ); C[1][0] = F[1] * -2.0 * X[2]; C[1][1] = F[1] * 2.0 * X[3]; C[1][2] = F[1] * -2.0 * X[0]; C[1][3] = F[1] * 2.0 * X[1]; C[2][0] = F[2] * ( X[3] * DCM[0][0] ); C[2][1] = F[2] * ( X[2] * DCM[0][0] ); C[2][2] = F[2] * ( X[1] * DCM[0][0] + 2.0 * X[2] * DCM[0][1] ); C[2][3] = F[2] * ( X[0] * DCM[0][0] + 2.0 * X[3] * DCM[0][1] ); }
int main(int argc, char **argv) { float derivative; float x,y; if (argc != 3) { fprintf(stderr,"Usage: %s <float value> <float value>\n",argv[0]); exit(1); } if (sscanf(argv[1],"%f",&x) != 1) { fprintf(stderr,"Invalid command-line argument '%s'\n",argv[1]); exit(1); } if (sscanf(argv[2],"%f",&y) != 1) { fprintf(stderr,"Invalid command-line argument '%s'\n",argv[2]); exit(1); } printf("f(%.5f,%.5f) = %.9f\n",x,y,compute_f(x,y)); exit(0); }
double* inner_outer_gauss_seidel_alg_vecs( Graph g, double alpha, double tol, int maxit, double *v, double *x, double *y, double *f, double beta, double itol, bool resid, bool normed) { size_t n = (size_t)size(g); stime_struct start; simple_time_clock(&start); printf("inoutgs(%6.4f,%6.4f,%8e,%1i) with tol=%8e and maxit=%6i iterations\n", alpha, beta, itol, resid, tol, maxit); fflush(stdout); double odelta, sumy=0.0, dtx, nx, ndiff, ltol=log(tol), la=log(alpha), dt; if (dangling_mult(g, x, y, v, 1.0, &dtx, NULL)) { return (NULL); } odelta = compute_outer_residual(x, y, v, alpha, n); int iter = 0, rval, nresit = 0, nmult = 1, nresids = 0; #ifdef BVALGS_VERBOSE printf(" iogs (outer) : iter = %6i ; odelta = %10e ; dt = %7.1f\n", iter, odelta, elapsed_time(&start)); #endif while (odelta > tol && nmult < maxit) { int iiter=0; double idelta = odelta; compute_f(f, y, v, alpha, beta, n); while (iter+iiter < maxit && idelta > itol) { gauss_seidel_sweep(g, x, NULL, f, beta, 1.0, dtx, false, &nx, &idelta, &dtx); nmult++; if (normed) { shift_and_scale(x,0.0,1./nx,n); dtx=dtx/nx; nx=1.0; } idelta = idelta; iiter++; // adjust for diff and not residual } if (dangling_mult(g, x, y, v, 1.0, &dtx, &dtx)) { return (NULL); } iter++; nmult++; odelta = compute_outer_residual(x,y,v,alpha,n); #ifdef BVALGS_VERBOSE printf(" iogs (outer) : iter = %6i ; odelta = %10e ; dt = %7.1f ; nmult = %9i\n", iter, odelta, elapsed_time(&start), nmult); #endif if (iiter < 2 || odelta < itol) { break; } } dtx = sum_dtx(g,x); dt = elapsed_time(&start); simple_time_clock(&start); while (odelta > tol && nmult-nresids < maxit) { rval = gauss_seidel_sweep(g, x, NULL, v, alpha, (1.0-alpha), dtx, false, &nx, &ndiff, &dtx); nmult++; iter++; if (normed) { shift_and_scale(x,0.0,1./nx,n); dtx=dtx/nx; nx=1.0; } if (rval) { return (NULL); } dt += elapsed_time(&start); /* compute the residual */ if (resid) { odelta = compute_residual(g,x,y,v,alpha,dtx,nx); nmult++; nresids++; simple_time_clock(&start); } else { simple_time_clock(&start); if (ndiff < tol && iter>nresit) { odelta = compute_residual(g,x,y,v,alpha,dtx,nx); nmult++; nresids++; nresit=iter+(int)((ltol - log(odelta))/(2.0*la)); } } #ifdef BVALGS_VERBOSE printf(" iogs ( gs) : iter = %6i ; delta = %10e ; diff = %10e ; dt = %7.1f sec ; nmult = %6i\n", iter, odelta, ndiff, dt, nmult ); #endif } if (odelta > tol) { printf("iogs(%6.4f) did not converge to %8e in %6i sweeps\n", alpha, tol, maxit); fflush(stdout); } else { printf("iogs : solved pagerank(a=%6.4f) in %5i its, %5i sweeps, and %5i mults to %8e tol\n", alpha, iter, nmult-nresids, nmult, tol); fflush(stdout); } return y; }
static int tree_v(double tt, double v0, double kappa, double theta, double omega, int Nt) { int i, j; int z; /*a variable for k_u or k_d, to add to k on n+1 step*/ double Ru, Rd; /*stores k_u(n,k) and k_d(n,k), respectively*/ double mu_r, v_curr; double dt, sqrt_dt; /*Fixed tree for R=f*/ f[0][0] = compute_f(v0, omega); dt = tt / (double)Nt; sqrt_dt = sqrt(dt); V[0][0] = compute_v(f[0][0], omega); f[1][0] = f[0][0] - sqrt_dt; f[1][1] = f[0][0] + sqrt_dt; V[1][0] = compute_v(f[1][0], omega); V[1][1] = compute_v(f[1][1], omega); for (i = 1; i<Nt; i++) for (j = 0; j <= i; j++) { f[i + 1][j] = f[i][j] - sqrt_dt; f[i + 1][j + 1] = f[i][j] + sqrt_dt; V[i + 1][j] = compute_v(f[i + 1][j], omega); V[i + 1][j + 1] = compute_v(f[i + 1][j + 1], omega); } /*Evolve tree for f*/ for (i = 0; i<Nt; i++) { printf("Making voltree. Layer: %d of %d\n", i, Nt - 1); for (j = 0; j <= i; j++) { /*Compute mu_f*/ v_curr = V[i][j]; mu_r = kappa*(theta - v_curr); z = 0; while ((V[i][j] + mu_r*dt<V[i + 1][j - z]) && (j - z >= 0)) { z = z + 1; } f_down[i][j] = -z; Rd = V[i + 1][j - z]; z = 0; while ((V[i][j] + mu_r*dt>V[i + 1][j + z]) && (j + z <= i)) { z = z + 1; } f_up[i][j] = z; Ru = V[i + 1][j + z]; pu_f[i][j] = (V[i][j] + mu_r*dt - Rd) / (Ru - Rd); if ((Ru - 1.e-9>V[i + 1][i + 1]) || (j + f_up[i][j]>i + 1)) { pu_f[i][j] = 1; f_up[i][j] = i + 1 - j; f_down[i][j] = i - j; } if ((Rd + 1.e-9<V[i + 1][0]) || (j + f_down[i][j]<0)) { pu_f[i][j] = 0.; f_up[i][j] = 1 - j; f_down[i][j] = 0 - j; } pd_f[i][j] = 1. - pu_f[i][j]; } } return 1; }