void crf1dc_beta_score(crf1d_context_t* ctx) { int i, t; floatval_t *cur = NULL; floatval_t *row = ctx->row; const floatval_t *next = NULL, *state = NULL, *trans = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; const floatval_t *scale = &ctx->scale_factor[T-1]; /* Compute the beta scores at (T-1, *). */ cur = BETA_SCORE(ctx, T-1); vecset(cur, *scale, L); --scale; /* Compute the beta scores at (t, *). */ for (t = T-2;0 <= t;--t) { cur = BETA_SCORE(ctx, t); next = BETA_SCORE(ctx, t+1); state = EXP_STATE_SCORE(ctx, t+1); veccopy(row, next, L); vecmul(row, state, L); /* Compute the beta score at (t, i). */ for (i = 0;i < L;++i) { trans = EXP_TRANS_SCORE(ctx, i); cur[i] = vecdot(trans, row, L); } vecscale(cur, *scale, L); --scale; } }
static t_vector calcul_normal2(t_env *rt, t_figure object, t_vector light_ray, t_vector n) { t_vector tmp; t_vector tmp2; if (object.name == TRIANGLE || object.name == QUADRILATERAL || object.name == CUBE) { tmp = vecsub(&object.a, &object.b); tmp2 = vecsub(&object.a, &object.c); normalize(&tmp); normalize(&tmp2); n = vecprod(&tmp, &tmp2); } if (object.name == ELLIPSOIDE) vecscale(&n, 0.5); if (object.name == TORUS) n = normal_torus(rt->inter, object); if (object.name == PARABOL) n = normale_parab(rt->inter); rt->angle = vecdot(&n, &light_ray) / (sqrt(light_ray.x * light_ray.x + light_ray.y * light_ray.y + light_ray.z * light_ray.z) * sqrt(n.x * n.x + n.y * n.y + n.z * n.z)); return (n); }
void crf1dc_partial_marginals(crf1d_context_t *ctx, int *mask) { int i, j, t; int *prev_mask, *curr_mask; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute the model expectations of states. p(t,i) = fwd[t][i] * bwd[t][i] / norm = (1. / C[t]) * fwd'[t][i] * bwd'[t][i] */ for (t = 0;t < T;++t) { curr_mask = &mask[t* L]; floatval_t *fwd = PARTIAL_ALPHA_SCORE(ctx, t); floatval_t *bwd = PARTIAL_BETA_SCORE(ctx, t); floatval_t *prob = PARTIAL_STATE_MEXP(ctx, t); veccopy(prob, fwd, L); vecmul(prob, bwd, L); vecscale(prob, 1. / ctx->partial_scale_factor[t], L); } /* Compute the model expectations of transitions. p(t,i,t+1,j) = fwd[t][i] * edge[i][j] * state[t+1][j] * bwd[t+1][j] / norm = (fwd'[t][i] / (C[0] ... C[t])) * edge[i][j] * state[t+1][j] * (bwd'[t+1][j] / (C[t+1] ... C[T-1])) * (C[0] * ... * C[T-1]) = fwd'[t][i] * edge[i][j] * state[t+1][j] * bwd'[t+1][j] The model expectation of a transition (i -> j) is the sum of the marginal probabilities p(t,i,t+1,j) over t. */ for (t = 0;t < T-1;++t) { floatval_t *fwd = PARTIAL_ALPHA_SCORE(ctx, t); floatval_t *state = EXP_STATE_SCORE(ctx, t+1); floatval_t *bwd = PARTIAL_BETA_SCORE(ctx, t+1); floatval_t *row = ctx->row; /* row[j] = state[t+1][j] * bwd'[t+1][j] */ veccopy(row, bwd, L); vecmul(row, state, L); prev_mask = &mask[t*L]; curr_mask = &mask[(t+1)*L]; for (i = 0;i < L;++i) { if (prev_mask[i]) { floatval_t *edge = EXP_TRANS_SCORE(ctx, i); floatval_t *prob = PARTIAL_TRANS_MEXP(ctx, i); for (j = 0;j < L;++j) { if (curr_mask[j]) { prob[j] += fwd[i] * edge[j] * row[j]; // fprintf(stderr, "%lf\n", fwd[i] * edge[j] * row[j]); } } } } } }
static inline void addImpulseAtOffset(vec3* vel, vec3* angVel, float invMass, float invInertia, const vec3* offset, const vec3* impulse) { vec3 tmp; vecaddscale(vel, vel, impulse, invMass); veccross(&tmp, offset, impulse); vecscale(&tmp, &tmp, invInertia); vecadd(angVel, angVel, &tmp); }
void crf1dc_alpha_score(crf1d_context_t* ctx) { int i, t; floatval_t sum, *cur = NULL; floatval_t *scale = &ctx->scale_factor[0]; const floatval_t *prev = NULL, *trans = NULL, *state = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute the alpha scores on nodes (0, *). alpha[0][j] = state[0][j] */ cur = ALPHA_SCORE(ctx, 0); state = EXP_STATE_SCORE(ctx, 0); veccopy(cur, state, L); sum = vecsum(cur, L); *scale = (sum != 0.) ? 1. / sum : 1.; vecscale(cur, *scale, L); ++scale; /* Compute the alpha scores on nodes (t, *). alpha[t][j] = state[t][j] * \sum_{i} alpha[t-1][i] * trans[i][j] */ for (t = 1;t < T;++t) { prev = ALPHA_SCORE(ctx, t-1); cur = ALPHA_SCORE(ctx, t); state = EXP_STATE_SCORE(ctx, t); veczero(cur, L); for (i = 0;i < L;++i) { trans = EXP_TRANS_SCORE(ctx, i); vecaadd(cur, prev[i], trans, L); } vecmul(cur, state, L); sum = vecsum(cur, L); *scale = (sum != 0.) ? 1. / sum : 1.; vecscale(cur, *scale, L); ++scale; } /* Compute the logarithm of the normalization factor here. norm = 1. / (C[0] * C[1] ... * C[T-1]) log(norm) = - \sum_{t = 0}^{T-1} log(C[t]). */ ctx->log_norm = -vecsumlog(ctx->scale_factor, T); }
static void doCorrection(Chassis* c, const vec3* worldOffset, const vec3* axis, float requiredVelocityChange) { float denom = computeDenominator(1.f/c->mass, 1.f/c->inertia, worldOffset, axis); float correction = requiredVelocityChange / denom; vec3 impulse; vecscale(&impulse, axis, correction); addImpulseAtOffset(&c->vel, &c->angVel, 1.f/c->mass, 1.f/c->inertia, worldOffset, &impulse); }
void matrixRotateByVelocity(mtx* out, const mtx* in, const vec3* angvel, float dt) { quaternion q; vecscale(&q.v, angvel, 0.5f*dt); q.w = 1.f - 0.5f*vecsizesq(&q.v); quaternionRotateVector(&out->v[0].v3, &q, &in->v[0].v3); quaternionRotateVector(&out->v[1].v3, &q, &in->v[1].v3); quaternionRotateVector(&out->v[2].v3, &q, &in->v[2].v3); matrixReNormalise(out); }
static inline float computeDenominator(float invMass, float invInertia, const vec3* offset, const vec3* norm) { // If you apply an impulse of 1.0f in the direction of 'norm' // at position specified by 'offset' then the point will change // velocity by the amount calculated here vec3 cross; veccross(&cross, offset, norm); vecscale(&cross, &cross, invInertia); veccross(&cross, &cross, offset); return vecdot(norm, &cross) + invMass; }
void crf1dc_partial_beta_score(crf1d_context_t* ctx, int *mask) { int i, j, t; int *curr_mask, *next_mask; floatval_t *cur = NULL; floatval_t *row = ctx->row; const floatval_t *next = NULL, *state = NULL, *trans = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; const floatval_t *scale = &ctx->partial_scale_factor[T-1]; /* Compute the beta scores at (T-1, *). */ cur = PARTIAL_BETA_SCORE(ctx, T-1); veczero(cur, L); curr_mask = &mask[(T-1)*L]; for (i = 0; i < L; ++ i) { if (curr_mask[i]) { cur[i] = *scale; } } --scale; /* Compute the beta scores at (t, *). */ for (t = T-2;0 <= t;--t) { cur = PARTIAL_BETA_SCORE(ctx, t); next = PARTIAL_BETA_SCORE(ctx, t+1); state = EXP_STATE_SCORE(ctx, t+1); curr_mask = &mask[t * L]; next_mask = &mask[(t+1) * L]; veccopy(row, next, L); veczero(cur, L); for (i = 0; i < L; ++ i) { if (next_mask[i]) { row[i] *= state[i]; } } for (j = 0; j < L; ++ j) { if (curr_mask[j]) { trans = EXP_TRANS_SCORE(ctx, j); for (i = 0; i < L; ++ i) { if (next_mask[i]) { cur[j] += trans[i] * row[i]; } } } } vecscale(cur, *scale, L); --scale; } }
void crf1dc_marginals(crf1d_context_t* ctx) { int i, j, t; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute the model expectations of states. p(t,i) = fwd[t][i] * bwd[t][i] / norm = (1. / C[t]) * fwd'[t][i] * bwd'[t][i] */ for (t = 0;t < T;++t) { floatval_t *fwd = ALPHA_SCORE(ctx, t); floatval_t *bwd = BETA_SCORE(ctx, t); floatval_t *prob = STATE_MEXP(ctx, t); veccopy(prob, fwd, L); vecmul(prob, bwd, L); vecscale(prob, 1. / ctx->scale_factor[t], L); } /* Compute the model expectations of transitions. p(t,i,t+1,j) = fwd[t][i] * edge[i][j] * state[t+1][j] * bwd[t+1][j] / norm = (fwd'[t][i] / (C[0] ... C[t])) * edge[i][j] * state[t+1][j] * (bwd'[t+1][j] / (C[t+1] ... C[T-1])) * (C[0] * ... * C[T-1]) = fwd'[t][i] * edge[i][j] * state[t+1][j] * bwd'[t+1][j] The model expectation of a transition (i -> j) is the sum of the marginal probabilities p(t,i,t+1,j) over t. */ for (t = 0;t < T-1;++t) { floatval_t *fwd = ALPHA_SCORE(ctx, t); floatval_t *state = EXP_STATE_SCORE(ctx, t+1); floatval_t *bwd = BETA_SCORE(ctx, t+1); floatval_t *row = ctx->row; /* row[j] = state[t+1][j] * bwd'[t+1][j] */ veccopy(row, bwd, L); vecmul(row, state, L); for (i = 0;i < L;++i) { floatval_t *edge = EXP_TRANS_SCORE(ctx, i); floatval_t *prob = TRANS_MEXP(ctx, i); for (j = 0;j < L;++j) { prob[j] += fwd[i] * edge[j] * row[j]; } } } }
static void doCorrection3(Chassis* c, const vec3* worldOffset, const vec3* axis, float requiredVelocityChange, float linearRatio) { const float u = 1.f - linearRatio; // Add half to the linear vec3 tmp; vecaddscale(&c->vel, &c->vel, axis, requiredVelocityChange*linearRatio); // Add the other half to the angular vec3 cross; veccross(&cross, worldOffset, axis); veccross(&cross, &cross, worldOffset); float angularResponse = vecdot(axis, &cross); float angVelChange = requiredVelocityChange / angularResponse; vec3 impulse; vecscale(&impulse, axis, angVelChange*u); veccross(&tmp, worldOffset, &impulse); vecadd(&c->angVel, &c->angVel, &tmp); }
int lbfgs( int n, double* x, double* pfx, lbfgs_evaluate_t evaluate, lbfgs_progress_t progress, void* instance, const lbfgs_parameter_t* _param ) { int ret; int i, j, k, ls, end, bound, n_evaluate = 0; int enalbe_owlqn; double step; lbfgs_parameter_t param = (_param) ? (*_param) : default_param; const int m = param.m; double* xp; double* g, *gp, *pg = 0; double* d, *w, *pf = 0; iteration_data_t* lm = 0, *it = 0; double ys, yy; double xnorm, gnorm, rate, beta; double fx; line_search_proc_t linesearch = line_search_morethuente; callback_data_t cd; cd.n = n; cd.instance = instance; cd.evaluate = evaluate; cd.progress = (progress) ? progress : default_lbfgs_progress; /* Check the input parameters for errors. */ if (n <= 0) { return LBFGSERR_INVALID_N; } if (param.epsilon < 0.0) { return LBFGSERR_INVALID_EPSILON; } if (param.past < 0) { return LBFGSERR_INVALID_TESTPERIOD; } if (param.delta < 0.0) { return LBFGSERR_INVALID_DELTA; } if (param.min_step < 0.0) { return LBFGSERR_INVALID_MINSTEP; } if (param.max_step < param.min_step) { return LBFGSERR_INVALID_MAXSTEP; } if (param.ftol < 0.0) { return LBFGSERR_INVALID_FTOL; } if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { if (param.wolfe <= param.ftol || 1. <= param.wolfe) { return LBFGSERR_INVALID_WOLFE; } } if (param.gtol < 0.0) { return LBFGSERR_INVALID_GTOL; } if (param.xtol < 0.0) { return LBFGSERR_INVALID_XTOL; } if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } if (param.orthantwise_c < 0.0) { return LBFGSERR_INVALID_ORTHANTWISE; } if (param.orthantwise_start < 0 || param.orthantwise_start > n) { return LBFGSERR_INVALID_ORTHANTWISE_START; } if (param.orthantwise_end < 0) { param.orthantwise_end = n; } if (param.orthantwise_end > n) { return LBFGSERR_INVALID_ORTHANTWISE_END; } enalbe_owlqn = (param.orthantwise_c != 0.0); if (enalbe_owlqn) { switch (param.linesearch) { case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: linesearch = line_search_backtracking_owlqn; break; default: /* Only the backtracking method is available. */ return LBFGSERR_INVALID_LINESEARCH; } } else { switch (param.linesearch) { case LBFGS_LINESEARCH_MORETHUENTE: linesearch = line_search_morethuente; break; case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: linesearch = line_search_backtracking; break; default: return LBFGSERR_INVALID_LINESEARCH; } } /* Allocate working space. */ xp = vecalloc(n); g = vecalloc(n); gp = vecalloc(n); d = vecalloc(n); w = vecalloc(n); /* Allocate pseudo gradient. */ if (enalbe_owlqn) { pg = vecalloc(n); } /* Allocate and initialize the limited memory storage. */ lm = (iteration_data_t*)xalloc(m * sizeof(iteration_data_t)); for (i = 0; i < m; i++) { it = &lm[i]; it->alpha = 0.0; it->s = vecalloc(n); it->y = vecalloc(n); it->ys = 0.0; } /* Allocate an array for storing previous values of the objective function. */ if (param.past > 0) { pf = vecalloc((size_t)param.past); } fx = cd.evaluate(cd.instance, cd.n, x, g, 0); n_evaluate++; if (enalbe_owlqn) { xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end); fx += xnorm * param.orthantwise_c; owlqn_pseudo_gradient( pg, x, g, n, param.orthantwise_c, param.orthantwise_start, param.orthantwise_end); } /* Store the initial value of the objective function. */ if (pf) { pf[0] = fx; } /** * Compute the direction. * we assume the initial hessian matrix H_0 as the identity matrix. */ if (!enalbe_owlqn) { vecncpy(d, g, n); } else { vecncpy(d, pg, n); } /** * Make sure that the initial variables are not a minimizer. */ vec2norm(&xnorm, x, n); if (!enalbe_owlqn) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto lbfgs_exit; } /** * Compute the initial step: * step = 1.0 / ||d|| */ vec2norminv(&step, d, n); k = 1; end = 0; for (;;) { /* Store the current position and gradient vectors. */ veccpy(xp, x, n); veccpy(gp, g, n); /* Search for an optimal step. */ if (!enalbe_owlqn) { ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m); } else { ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); owlqn_pseudo_gradient( pg, x, g, n, param.orthantwise_c, param.orthantwise_start, param.orthantwise_end ); } if (ls < 0) { /* Revert to the previous point. */ veccpy(x, xp, n); veccpy(g, gp, n); ret = ls; break; } n_evaluate += ls; /* Compute x and g norms. */ vec2norm(&xnorm, x, n); if (!enalbe_owlqn) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } /* Report the progress. */ if ((ret = cd.progress(cd.instance, cd.n, x, g, fx, xnorm, gnorm, step, k, n_evaluate)) != 0) { ret = LBFGSERR_CANCELED; break; } /* Convergence test. */ if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_CONVERGENCE; break; } /* Stopping criterion test. */ if (pf) { /* We don't test the stopping criterion while k < past. */ if (param.past <= k) { /* Compute the relative improvement from the past. */ rate = (pf[k % param.past] - fx) / fx; /* The stopping criterion. */ if (rate < param.delta) { ret = LBFGS_CONVERGENCE_DELTA; break; } } /* Store the current value of the objective function. */ pf[k % param.past] = fx; } if (param.max_iterations != 0 && param.max_iterations < k + 1) { ret = LBFGSERR_MAXIMUMITERATION; break; } /** * Update s and y: * s_{k+1} = x_{k+1} - x_{k} = step * d_{k} * y_{k+1} = g_{k+1} - g_{k} */ it = &lm[end]; vecdiff(it->s, x, xp, n); vecdiff(it->y, g, gp, n); /** * Compute scalars ys and yy: * ys = y^t s = 1 / \rho * yy = y^t y * Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). */ vecdot(&ys, it->y, it->s, n); vecdot(&yy, it->y, it->y, n); it->ys = ys; /** * Recursive formula to compute d = -(H g). * This is described in page 779 of: * Jorge Nocedal. * Updating Quasi-Newton Matrices with Limited Storage. * Mathematics of Computation, Vol. 35, No. 151, * pp. 773--782, 1980. */ bound = (m <= k) ? m : k; k++; end = (end + 1) % m; /* Compute the steepest direction. */ /* Compute the negative of (pseudo) gradient. */ if (!enalbe_owlqn) { vecncpy(d, g, n); } else { vecncpy(d, pg, n); } j = end; for (i = 0; i < bound; i++) { j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ it = &lm[j]; /* \alpha_{j} = \rho_{j} s^{t}_{j} q_{k+1} */ vecdot(&it->alpha, it->s, d, n); it->alpha /= it->ys; /* q_{i} = q_{i+1} - \alpha_{i} y_{i} */ vecadd(d, it->y, -it->alpha, n); } vecscale(d, ys / yy, n); for (i = 0; i < bound; i++) { it = &lm[j]; /* \beta_{j} = \rho_{j} y^t_{j} \gamma_{i} */ vecdot(&beta, it->y, d, n); beta /= it->ys; /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j} */ vecadd(d, it->s, it->alpha - beta, n); j = (j + 1) % m; /* if (++j == m) j = 0; */ } /* Constrain the search direction for orthant-wise updates. */ if (enalbe_owlqn) { owlqn_contrain_line_search(d, pg, param.orthantwise_start, param.orthantwise_end); } /* Now the search direction d is ready. We try step = 1 first. */ step = 1.0; } lbfgs_exit: /* Return the final value of the objective function. */ if (pfx) { *pfx = fx; } vecfree(pf); if (lm != 0) { for (i = 0; i < m; i++) { vecfree(lm[i].s); vecfree(lm[i].y); } xfree(lm); } vecfree(pg); vecfree(w); vecfree(d); vecfree(gp); vecfree(g); vecfree(xp); return ret; }
int cg( int n, double* x, double* pfx, lbfgs_evaluate_t evaluate, lbfgs_progress_t progress, void* instance, const lbfgs_parameter_t* _param ) { static const double RHO = 0.01; static const double SIG = 0.5; static const double INT = 0.1; static const double EXT = 3.0; static const double RATIO = 100.0; int ret; int k, ls_count, ls_success, ls_failed = 0, n_evaluate = 0; lbfgs_parameter_t param = (_param) ? (*_param) : default_param; double f0, f1, f2 = 0.0, f3, d1, d2, d3, z1, z2 = 0.0, z3, limit, A, B, C; double xnorm, gnorm, rate; double* df0, *df1, *df2, *s, *x0; double* pf = 0; if (progress == 0) { progress = default_lbfgs_progress; } if (n <= 0) { return LBFGSERR_INVALID_N; } if (param.epsilon < 0.0) { return LBFGSERR_INVALID_EPSILON; } if (param.past < 0) { return LBFGSERR_INVALID_TESTPERIOD; } if (param.delta < 0.0) { return LBFGSERR_INVALID_DELTA; } if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } df0 = vecalloc(n); df1 = vecalloc(n); df2 = vecalloc(n); s = vecalloc(n); x0 = vecalloc(n); if (param.past > 0) { pf = vecalloc((size_t)param.past); } f1 = evaluate(instance, n, x, df1, 0); n_evaluate++; if (pf) { pf[0] = f1; } vec2norm(&xnorm, x, n); vec2norm(&gnorm, df1, n); if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto cg_exit; } vecncpy(s, df1, n); vecdot(&d1, s, s, n); d1 = -d1; /** * Compute the initial step z1: */ z1 = 1.0 / (1.0 - d1); k = 1; for (;;) { /* Store the current position and gradient vectors. */ f0 = f1; veccpy(x0, x, n); veccpy(df0, df1, n); /* update x using current step: x=x+z1*s */ vecadd(x, s, z1, n); f2 = evaluate(instance, n, x, df2, 0); n_evaluate++; vecdot(&d2, df2, s, n); /* set point 3 equal to point 1 */ f3 = f1; d3 = d1; z3 = -z1; /* begin line search */ ls_success = 0; ls_count = 0; limit = -1.0; for (;;) { while (f2 > f1 + RHO * z1 * d1 || d2 > -SIG * d1) { limit = z1; if (f2 > f1) { /* quadratic fit */ z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3); } else { /* cubic fit */ A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3); B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2); z2 = (sqrt(B * B - A * d2 * z3 * z3) - B) / A; } if (isinf(z2) || isnan(z2)) { /* if we had a numerical problem then bisect */ z2 = z3 / 2.0; } /* don't accept too close to limits */ z2 = max2(min2(z2, INT* z3), (1.0 - INT) * z3); /* update step and x */ z1 = z1 + z2; vecadd(x, s, z2, n); f2 = evaluate(instance, n, x, df2, 0); n_evaluate++; ls_count++; vecdot(&d2, df2, s, n); z3 = z3 - z2; } if (f2 > f1 + z1 * RHO * d1 || d2 > -SIG * d1) { /* a line search failure */ break; } else if (d2 > SIG * d1) { /* a line search success */ ls_success = 1; break; } else if (ls_count >= param.max_linesearch) { ret = LBFGSERR_MAXIMUMLINESEARCH; goto cg_exit; } /* cubic extrapolation */ A = 6.0 * (f2 - f3) / z3 + 3.0 * (d2 + d3); B = 3.0 * (f3 - f2) - z3 * (d3 + 2 * d2); z2 = -d2 * z3 * z3 / (B + sqrt(B * B - A * d2 * z3 * z3)); /* adjust current step z2 for many cases */ if (isnan(z2) || isinf(z2) || z2 < 0.0) { if (limit < -0.5) { z2 = z1 * (EXT - 1.0); } else { z2 = (limit - z1) / 2.0; } } else if (limit > -0.5 && z2 + z1 > limit) { z2 = (limit - z1) / 2.0; } else if (limit < -0.5 && z2 + z1 > z1 * EXT) { z2 = z1 * (EXT - 1.0); } else if (z2 < -z3 * INT) { z2 = -z3 * INT; } else if (limit > -0.5 && z2 < (limit - z1) * (1.0 - INT)) { z2 = (limit - z1) * (1.0 - INT); } /* set point 3 equal to point 2 */ f3 = f2; d3 = d2; z3 = -z2; z1 = z1 + z2; vecadd(x, s, z2, n); f2 = evaluate(instance, n, x, df2, 0); n_evaluate++; ls_count++; vecdot(&d2, df2, s, n); } if (ls_success) { vec2norm(&xnorm, x, n); vec2norm(&gnorm, df2, n); if ((ret = progress(instance, n, x, df2, f2, xnorm, gnorm, z2, k, n_evaluate)) != 0) { ret = LBFGSERR_CANCELED; break; } if (xnorm < 1.0) { xnorm = 1.0; } if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_CONVERGENCE; break; } if (pf) { if (param.past <= k) { rate = (pf[k % param.past] - f2) / f2; if (rate < param.delta) { ret = LBFGS_CONVERGENCE_DELTA; break; } } pf[k % param.past] = f2; } if (param.max_iterations != 0 && param.max_iterations < k + 1) { ret = LBFGSERR_MAXIMUMITERATION; break; } k++; f1 = f2; /** * Polack-Ribiere direction * s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2 */ vecdot(&A, df2, df2, n); vecdot(&B, df1, df2, n); vecdot(&C, df1, df1, n); vecscale(s, (A - B) / C, n); vecadd(s, df2, -1.0, n); vecswap(df1, df2, n); vecdot(&d2, df1, s, n); if (d2 > 0) { vecncpy(s, df1, n); vecdot(&d2, s, s, n); d2 = -d2; } z1 = z1 * min2(RATIO, d1 / (d2 - DBL_MIN)); d1 = d2; ls_failed = 0; } else { /* restore previous point */ f1 = f0; veccpy(x, x0, n); veccpy(df1, df0, n); if (ls_failed) { /* line search failed twice */ ret = LBFGSERR_LINE_SEARCH_FAILED; break; } vecswap(df1, df2, n); vecncpy(s, df1, n);/* try steepest */ vecdot(&d1, s, s, n); d1 = -d1; z1 = 1.0 / (1.0 - d1); ls_failed = 1; } } cg_exit: if (pfx) { *pfx = f2; } vecfree(pf); vecfree(x0); vecfree(s); vecfree(df2); vecfree(df1); vecfree(df0); return ret; }
void lanczos_FO ( struct vtx_data **A, /* graph data structure */ int n, /* number of rows/colums in matrix */ int d, /* problem dimension = # evecs to find */ double **y, /* columns of y are eigenvectors of A */ double *lambda, /* ritz approximation to eigenvals of A */ double *bound, /* on ritz pair approximations to eig pairs of A */ double eigtol, /* tolerance on eigenvectors */ double *vwsqrt, /* square root of vertex weights */ double maxdeg, /* maximum degree of graph */ int version /* 1 = standard mode, 2 = inverse operator mode */ ) { extern FILE *Output_File; /* output file or NULL */ extern int DEBUG_EVECS; /* print debugging output? */ extern int DEBUG_TRACE; /* trace main execution path */ extern int WARNING_EVECS; /* print warning messages? */ extern int LANCZOS_MAXITNS; /* maximum Lanczos iterations allowed */ extern double BISECTION_SAFETY; /* safety factor for bisection algorithm */ extern double SRESTOL; /* resid tol for T evec comp */ extern double DOUBLE_MAX; /* Warning on inaccurate computation of evec of T */ extern double splarax_time; /* time matvecs */ extern double orthog_time; /* time orthogonalization work */ extern double tevec_time; /* time tridiagonal eigvec work */ extern double evec_time; /* time to generate eigenvectors */ extern double ql_time; /* time tridiagonal eigval work */ extern double blas_time; /* time for blas (not assembly coded) */ extern double init_time; /* time for allocating memory, etc. */ extern double scan_time; /* time for scanning bounds list */ extern double debug_time; /* time for debug computations and output */ int i, j; /* indicies */ int maxj; /* maximum number of Lanczos iterations */ double *u, *r; /* Lanczos vectors */ double *Aq; /* sparse matrix-vector product vector */ double *alpha, *beta; /* the Lanczos scalars from each step */ double *ritz; /* copy of alpha for tqli */ double *workj; /* work vector (eg. for tqli) */ double *workn; /* work vector (eg. for checkeig) */ double *s; /* eigenvector of T */ double **q; /* columns of q = Lanczos basis vectors */ double *bj; /* beta(j)*(last element of evecs of T) */ double bis_safety; /* real safety factor for bisection algorithm */ double Sres; /* how well Tevec calculated eigvecs */ double Sres_max; /* Maximum value of Sres */ int inc_bis_safety; /* need to increase bisection safety */ double *Ares; /* how well Lanczos calculated each eigpair */ double *inv_lambda; /* eigenvalues of inverse operator */ int *index; /* the Ritz index of an eigenpair */ struct orthlink *orthlist = NULL; /* vectors to orthogonalize against in Lanczos */ struct orthlink *orthlist2 = NULL; /* vectors to orthogonalize against in Symmlq */ struct orthlink *temp; /* for expanding orthogonalization list */ double *ritzvec=NULL; /* ritz vector for current iteration */ double *zeros=NULL; /* vector of all zeros */ double *ones=NULL; /* vector of all ones */ struct scanlink *scanlist; /* list of fields for min ritz vals */ struct scanlink *curlnk; /* for traversing the scanlist */ double bji_tol; /* tol on bji estimate of A e-residual */ int converged; /* has the iteration converged? */ double time; /* current clock time */ double shift, rtol; /* symmlq input */ long precon, goodb, nout; /* symmlq input */ long checka, intlim; /* symmlq input */ double anorm, acond; /* symmlq output */ double rnorm, ynorm; /* symmlq output */ long istop, itn; /* symmlq output */ double macheps; /* machine precision calculated by symmlq */ double normxlim; /* a stopping criteria for symmlq */ long itnmin; /* enforce minimum number of iterations */ int symmlqitns; /* # symmlq itns */ double *wv1=NULL, *wv2=NULL, *wv3=NULL; /* Symmlq work space */ double *wv4=NULL, *wv5=NULL, *wv6=NULL; /* Symmlq work space */ long long_n; /* long int copy of n for symmlq */ int ritzval_flag = 0; /* status flag for ql() */ double Anorm; /* Norm estimate of the Laplacian matrix */ int left, right; /* ranges on the search for ritzvals */ int memory_ok; /* TRUE as long as don't run out of memory */ double *mkvec(); /* allocates space for a vector */ double *mkvec_ret(); /* mkvec() which returns error code */ double dot(); /* standard dot product routine */ struct orthlink *makeorthlnk(); /* make space for entry in orthog. set */ double ch_norm(); /* vector norm */ double Tevec(); /* calc evec of T by linear recurrence */ struct scanlink *mkscanlist(); /* make scan list for min ritz vecs */ double lanc_seconds(); /* current clock timer */ int symmlq_(), get_ritzvals(); void setvec(), vecscale(), update(), vecran(), strout(); void splarax(), scanmin(), scanmax(), frvec(), orthogonalize(); void orthog1(), orthogvec(), bail(), warnings(), mkeigvecs(); if (DEBUG_TRACE > 0) { printf("<Entering lanczos_FO>\n"); } if (DEBUG_EVECS > 0) { if (version == 1) { printf("Full orthogonalization Lanczos, matrix size = %d\n", n); } else { printf("Full orthogonalization Lanczos, inverted operator, matrix size = %d\n", n); } } /* Initialize time. */ time = lanc_seconds(); if (n < d + 1) { bail("ERROR: System too small for number of eigenvalues requested.",1); /* d+1 since don't use zero eigenvalue pair */ } /* Allocate Lanczos space. */ maxj = LANCZOS_MAXITNS; u = mkvec(1, n); r = mkvec(1, n); Aq = mkvec(1, n); ritzvec = mkvec(1, n); zeros = mkvec(1, n); setvec(zeros, 1, n, 0.0); workn = mkvec(1, n); Ares = mkvec(1, d); inv_lambda = mkvec(1, d); index = smalloc((d + 1) * sizeof(int)); alpha = mkvec(1, maxj); beta = mkvec(1, maxj + 1); ritz = mkvec(1, maxj); s = mkvec(1, maxj); bj = mkvec(1, maxj); workj = mkvec(1, maxj + 1); q = smalloc((maxj + 1) * sizeof(double *)); scanlist = mkscanlist(d); if (version == 2) { /* Allocate Symmlq space all in one chunk. */ wv1 = smalloc(6 * (n + 1) * sizeof(double)); wv2 = &wv1[(n + 1)]; wv3 = &wv1[2 * (n + 1)]; wv4 = &wv1[3 * (n + 1)]; wv5 = &wv1[4 * (n + 1)]; wv6 = &wv1[5 * (n + 1)]; /* Set invariant symmlq parameters */ precon = FALSE; /* FALSE until we figure out a good way */ goodb = FALSE; /* should be FALSE for this application */ checka = FALSE; /* if don't know by now, too bad */ intlim = n; /* set to enforce a maximum number of Symmlq itns */ itnmin = 0; /* set to enforce a minimum number of Symmlq itns */ shift = 0.0; /* since just solving rather than doing RQI */ symmlqitns = 0; /* total number of Symmlq iterations */ nout = 0; /* Effectively disabled - see notes in symmlq.f */ rtol = 1.0e-5; /* requested residual tolerance */ normxlim = DOUBLE_MAX; /* Effectively disables ||x|| termination criterion */ long_n = n; /* copy to long for linting */ } /* Initialize. */ vecran(r, 1, n); if (vwsqrt == NULL) { /* whack one's direction from initial vector */ orthog1(r, 1, n); /* list the ones direction for later use in Symmlq */ if (version == 2) { orthlist2 = makeorthlnk(); ones = mkvec(1, n); setvec(ones, 1, n, 1.0); orthlist2->vec = ones; orthlist2->pntr = NULL; } } else { /* whack vwsqrt direction from initial vector */ orthogvec(r, 1, n, vwsqrt); if (version == 2) { /* list the vwsqrt direction for later use in Symmlq */ orthlist2 = makeorthlnk(); orthlist2->vec = vwsqrt; orthlist2->pntr = NULL; } } beta[1] = ch_norm(r, 1, n); q[0] = zeros; bji_tol = eigtol; orthlist = NULL; Sres_max = 0.0; Anorm = 2 * maxdeg; /* Gershgorin estimate for ||A|| */ bis_safety = BISECTION_SAFETY; inc_bis_safety = FALSE; init_time += lanc_seconds() - time; /* Main Lanczos loop. */ j = 1; converged = FALSE; memory_ok = TRUE; while ((j <= maxj) && (converged == FALSE) && memory_ok) { time = lanc_seconds(); /* Allocate next Lanczos vector. If fail, back up one step and compute approx. eigvec. */ q[j] = mkvec_ret(1, n); if (q[j] == NULL) { memory_ok = FALSE; if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) { strout("WARNING: Lanczos out of memory; computing best approximation available.\n"); } if (j <= 2) { bail("ERROR: Sorry, can't salvage Lanczos.",1); /* ... save yourselves, men. */ } j--; } vecscale(q[j], 1, n, 1.0 / beta[j], r); blas_time += lanc_seconds() - time; time = lanc_seconds(); if (version == 1) { splarax(Aq, A, n, q[j], vwsqrt, workn); } else { symmlq_(&long_n, &(q[j][1]), &wv1[1], &wv2[1], &wv3[1], &wv4[1], &Aq[1], &wv5[1], &wv6[1], &checka, &goodb, &precon, &shift, &nout, &intlim, &rtol, &istop, &itn, &anorm, &acond, &rnorm, &ynorm, (double *) A, vwsqrt, (double *) orthlist2, &macheps, &normxlim, &itnmin); symmlqitns += itn; if (DEBUG_EVECS > 2) { printf("Symmlq report: rtol %g\n", rtol); printf(" system norm %g, solution norm %g\n", anorm, ynorm); printf(" system condition %g, residual %g\n", acond, rnorm); printf(" termination condition %2ld, iterations %3ld\n", istop, itn); } } splarax_time += lanc_seconds() - time; time = lanc_seconds(); update(u, 1, n, Aq, -beta[j], q[j - 1]); alpha[j] = dot(u, 1, n, q[j]); update(r, 1, n, u, -alpha[j], q[j]); blas_time += lanc_seconds() - time; time = lanc_seconds(); if (vwsqrt == NULL) { orthog1(r, 1, n); } else { orthogvec(r, 1, n, vwsqrt); } orthogonalize(r, n, orthlist); temp = orthlist; orthlist = makeorthlnk(); orthlist->vec = q[j]; orthlist->pntr = temp; beta[j + 1] = ch_norm(r, 1, n); orthog_time += lanc_seconds() - time; time = lanc_seconds(); left = j/2; right = j - left + 1; if (inc_bis_safety) { bis_safety *= 10; inc_bis_safety = FALSE; } ritzval_flag = get_ritzvals(alpha, beta+1, j, Anorm, workj+1, ritz, d, left, right, eigtol, bis_safety); /* ... have to off-set beta and workj since full orthogonalization indexes these from 1 to maxj+1 whereas selective orthog. indexes them from 0 to maxj */ if (ritzval_flag != 0) { bail("ERROR: Both Sturm bisection and QL failed.",1); /* ... give up. */ } ql_time += lanc_seconds() - time; /* Convergence check using Paige bji estimates. */ time = lanc_seconds(); for (i = 1; i <= j; i++) { Sres = Tevec(alpha, beta, j, ritz[i], s); if (Sres > Sres_max) { Sres_max = Sres; } if (Sres > SRESTOL) { inc_bis_safety = TRUE; } bj[i] = s[j] * beta[j + 1]; } tevec_time += lanc_seconds() - time; time = lanc_seconds(); if (version == 1) { scanmin(ritz, 1, j, &scanlist); } else { scanmax(ritz, 1, j, &scanlist); } converged = TRUE; if (j < d) converged = FALSE; else { curlnk = scanlist; while (curlnk != NULL) { if (bj[curlnk->indx] > bji_tol) { converged = FALSE; } curlnk = curlnk->pntr; } } scan_time += lanc_seconds() - time; j++; } j--; /* Collect eigenvalue and bound information. */ time = lanc_seconds(); mkeigvecs(scanlist,lambda,bound,index,bj,d,&Sres_max,alpha,beta+1,j,s,y,n,q); evec_time += lanc_seconds() - time; /* Analyze computation for and report additional problems */ time = lanc_seconds(); if (DEBUG_EVECS>0 && version == 2) { printf("\nTotal Symmlq iterations %3d\n", symmlqitns); } if (version == 2) { for (i = 1; i <= d; i++) { lambda[i] = 1.0/lambda[i]; } } warnings(workn, A, y, n, lambda, vwsqrt, Ares, bound, index, d, j, maxj, Sres_max, eigtol, u, Anorm, Output_File); debug_time += lanc_seconds() - time; /* Free any memory allocated in this routine. */ time = lanc_seconds(); frvec(u, 1); frvec(r, 1); frvec(Aq, 1); frvec(ritzvec, 1); frvec(zeros, 1); if (vwsqrt == NULL && version == 2) { frvec(ones, 1); } frvec(workn, 1); frvec(Ares, 1); frvec(inv_lambda, 1); sfree(index); frvec(alpha, 1); frvec(beta, 1); frvec(ritz, 1); frvec(s, 1); frvec(bj, 1); frvec(workj, 1); if (version == 2) { frvec(wv1, 0); } while (scanlist != NULL) { curlnk = scanlist->pntr; sfree(scanlist); scanlist = curlnk; } for (i = 1; i <= j; i++) { frvec(q[i], 1); } while (orthlist != NULL) { temp = orthlist->pntr; sfree(orthlist); orthlist = temp; } while (version == 2 && orthlist2 != NULL) { temp = orthlist2->pntr; sfree(orthlist2); orthlist2 = temp; } sfree(q); init_time += lanc_seconds() - time; }
void crf1dc_partial_alpha_score(crf1d_context_t* ctx, int *mask) { int i, j, t; int *prev_mask, *curr_mask; floatval_t sum, *cur = NULL; floatval_t *scale = &ctx->partial_scale_factor[0]; const floatval_t *prev = NULL, *trans = NULL, *state = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute the alpha scores on nodes (0, *). alpha[0][j] = state[0][j] */ cur = PARTIAL_ALPHA_SCORE(ctx, 0); veczero(cur, L); state = EXP_STATE_SCORE(ctx, 0); curr_mask = &mask[0]; for (i = 0; i < L; ++ i) { if (curr_mask[i]) { cur[i] = state[i]; } } sum = vecsum(cur, L); /* scale is a temporary structure */ *scale = (sum != 0.) ? 1. / sum : 1.; vecscale(cur, *scale, L); ++scale; /* Compute the alpha scores on nodes (t, *). alpha[t][j] = state[t][j] * \sum_{i} alpha[t-1][i] * trans[i][j] */ for (t = 1;t < T;++t) { prev = PARTIAL_ALPHA_SCORE(ctx, t-1); cur = PARTIAL_ALPHA_SCORE(ctx, t); state = EXP_STATE_SCORE(ctx, t); prev_mask = &mask[(t-1) * L]; curr_mask = &mask[t * L]; veczero(cur, L); for (i = 0; i < L; ++ i) { if (prev_mask[i]) { trans = EXP_TRANS_SCORE(ctx, i); for (j = 0; j < L; ++ j) { if (curr_mask[j]) { cur[j] += prev[i] * trans[j]; } } } } for (j = 0; j < L; ++ j) { if (curr_mask[j]) { cur[j] *= state[j]; } } sum = vecsum(cur, L); *scale = (sum != 0.) ? 1. / sum : 1.; vecscale(cur, *scale, L); ++scale; } /* Compute the logarithm of the normalization factor here. norm = 1. / (C[0] * C[1] ... * C[T-1]) log(norm) = - \sum_{t = 0}^{T-1} log(C[t]). */ ctx->partial_log_norm = -vecsumlog(ctx->partial_scale_factor, T); }
static int l2sgd( encoder_t *gm, dataset_t *trainset, dataset_t *testset, floatval_t *w, logging_t *lg, const int N, const floatval_t t0, const floatval_t lambda, const int num_epochs, int calibration, int period, const floatval_t epsilon, floatval_t *ptr_loss ) { int i, epoch, ret = 0; floatval_t t = 0; floatval_t loss = 0, sum_loss = 0; floatval_t best_sum_loss = DBL_MAX; floatval_t eta, gain, decay = 1.; floatval_t improvement = 0.; floatval_t norm2 = 0.; floatval_t *pf = NULL; floatval_t *best_w = NULL; clock_t clk_prev, clk_begin = clock(); const int K = gm->num_features; if (!calibration) { pf = (floatval_t*)malloc(sizeof(floatval_t) * period); best_w = (floatval_t*)calloc(K, sizeof(floatval_t)); if (pf == NULL || best_w == NULL) { ret = CRFSUITEERR_OUTOFMEMORY; goto error_exit; } } /* Initialize the feature weights. */ vecset(w, 0, K); /* Loop for epochs. */ for (epoch = 1;epoch <= num_epochs;++epoch) { clk_prev = clock(); if (!calibration) { logging(lg, "***** Epoch #%d *****\n", epoch); /* Shuffle the training instances. */ dataset_shuffle(trainset); } /* Loop for instances. */ sum_loss = 0.; for (i = 0;i < N;++i) { const crfsuite_instance_t *inst = dataset_get(trainset, i); /* Update various factors. */ eta = 1 / (lambda * (t0 + t)); decay *= (1.0 - eta * lambda); gain = eta / decay; /* Compute the loss and gradients for the instance. */ gm->set_weights(gm, w, decay); gm->set_instance(gm, inst); gm->objective_and_gradients(gm, &loss, w, gain); sum_loss += loss; ++t; } /* Terminate when the loss is abnormal (NaN, -Inf, +Inf). */ if (!isfinite(loss)) { logging(lg, "ERROR: overflow loss\n"); ret = CRFSUITEERR_OVERFLOW; sum_loss = loss; goto error_exit; } /* Scale the feature weights. */ vecscale(w, decay, K); decay = 1.; /* Include the L2 norm of feature weights to the objective. */ /* The factor N is necessary because lambda = 2 * C / N. */ norm2 = vecdot(w, w, K); sum_loss += 0.5 * lambda * norm2 * N; /* One epoch finished. */ if (!calibration) { /* Check if the current epoch is the best. */ if (sum_loss < best_sum_loss) { /* Store the feature weights to best_w. */ best_sum_loss = sum_loss; veccopy(best_w, w, K); } /* We don't test the stopping criterion while period < epoch. */ if (period < epoch) { improvement = (pf[(epoch-1) % period] - sum_loss) / sum_loss; } else { improvement = epsilon; } /* Store the current value of the objective function. */ pf[(epoch-1) % period] = sum_loss; logging(lg, "Loss: %f\n", sum_loss); if (period < epoch) { logging(lg, "Improvement ratio: %f\n", improvement); } logging(lg, "Feature L2-norm: %f\n", sqrt(norm2)); logging(lg, "Learning rate (eta): %f\n", eta); logging(lg, "Total number of feature updates: %.0f\n", t); logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - clk_prev) / (double)CLOCKS_PER_SEC); /* Holdout evaluation if necessary. */ if (testset != NULL) { holdout_evaluation(gm, testset, w, lg); } logging(lg, "\n"); /* Check for the stopping criterion. */ if (improvement < epsilon) { ret = 0; break; } } } /* Output the optimization result. */ if (!calibration) { if (ret == 0) { if (epoch < num_epochs) { logging(lg, "SGD terminated with the stopping criteria\n"); } else { logging(lg, "SGD terminated with the maximum number of iterations\n"); } } else { logging(lg, "SGD terminated with error code (%d)\n", ret); } } /* Restore the best weights. */ if (best_w != NULL) { sum_loss = best_sum_loss; veccopy(w, best_w, K); } error_exit: free(best_w); free(pf); if (ptr_loss != NULL) { *ptr_loss = sum_loss; } return ret; }
int main (int argc, char **argv) { #if 0 const int N = 4; float y[N] = {-0.653828, -0.653828, 0.753333, 0.753333}; float k[N]; float l[2] = {0.f, 0.f}; float kdl[2] = {0.f, 0.f}; float n[2] = {0.f, 0.f}; for (int i=0; i<N; i++) { if (y[i] > 0.f) { l[1] += y[i]; n[1] += 1.f; } else { l[0] -= y[i]; n[0] += 1.f; } } kdl[1] = l[1] / (n[1]*l[0] + n[0]*l[1]); kdl[0] = l[0] / (n[1]*l[0] + n[0]*l[1]); for (int i=0; i<2; i++) { printf("[%d] l = %f kdl = %f\n", i, l[i], kdl[i]); } for (int i=0; i<N; i++) { k[i] = y[i] > 0.f ? kdl[1] : kdl[0]; } float force = 0.f; float torque = 0.f; for (int i=0; i<N; i++) { force += k[i]; torque += y[i] * k[i]; } printf("force = %f\n", force); printf("torque = %f\n", torque); printf("kdl[0]/kdl[1] = %f\n", kdl[0]/kdl[1]); return 0; #endif #if 0 vec3 r = {1.f, -0.3f, 0.f}; vec3 fground = {0.f, 1.f, 0.f}; vec3 fcent = {-1.f, 0.f, 0.f}; vec3 tground; vec3 tcent; veccross(&tground, &r, &fground); veccross(&tcent, &r, &fcent); printf("tground = %f\n", tground.z); printf("tcent = %f\n", tcent.z); return 0; float axleFriction = 200.1f; float mass = 10.f; float invMass = 1.f/mass; float v = 10.0f; float dt = 0.01; for (int r=0; r<100; r++) { float force = -axleFriction*sgn(v); v = v + force*invMass * dt; printf("v = %f\n", v); } return 0; #endif #if 0 float mass = 10.f; float wheelmass = 1.0f; float radius = 0.1f; float wheelInertia = 2.f/5.f*radius*radius*wheelmass; float vel = 0.f; float wheelVel= 0.f; float dt = 0.01f; float torque = 1000.f; float angSpeed = -dt*torque*radius/wheelInertia; float momentum = angSpeed*wheelInertia; printf("momentum put in = %f \n", momentum); for (int repeat = 0; repeat<10; repeat++) { { float contactSpeed = radius * angSpeed + wheelVel; float error = contactSpeed; float denom = 1.f/wheelmass + radius*radius/wheelInertia; float impulse = error / denom; // Add impulse to the wheel wheelVel = wheelVel - impulse/wheelmass; angSpeed = angSpeed - radius*impulse/wheelInertia; } // Axis error { float error = wheelVel - vel; float denom = 1.f/wheelmass + 1.f/mass; float impulse = error/denom; wheelVel = wheelVel - impulse/wheelmass; vel = vel + impulse/mass; } } printf("momentum c = %f\n", vel*mass); printf("momentum w = %f\n", vel*wheelmass); printf("momentum aw = %f\n", angSpeed*wheelInertia); printf("total momentum = %f\n", vel*(mass+wheelmass) + angSpeed*wheelInertia); printf("chassis = %f, wheel = %f\n", vel, wheelVel); return 0; #endif #if 0 float mass = 10.f; float inertia = mass * 0.4f; vec3 wheelOffset = {0.f, 1.5f, -0.2f}; float wheelmass = 1.0f; float radius = 0.1f; float wheelInertia = 2.f/5.f*radius*radius*wheelmass; vec3 vel = {0.f, 0.f, 0.f}; vec3 w = {0.f, 0.f, 0.f}; float wheelVel= 0.f; float dt = 0.01f; float torque = 1000.f; float angSpeed = -dt*torque*radius/wheelInertia; for (int repeat = 0; repeat<100; repeat++) { { float contactSpeed = radius * angSpeed + wheelVel; float error = contactSpeed; float denom = 1.f/wheelmass + radius*radius/wheelInertia; float impulse = error / denom; // Add impulse to the wheel wheelVel = wheelVel - impulse/wheelmass; angSpeed = angSpeed - radius*impulse/wheelInertia; } // Axis error { // float axleVel = vel; vec3 cross; veccross(&cross, &w, &wheelOffset); float axleVel = vel.y + cross.y; vec3 pulldir = {0.f, 1.f, 0.f}; float error = wheelVel - axleVel; if (error < 0.000001f) break; float denom = 1.f/wheelmass + computeDenominator(1.f/mass, 1.f/inertia, &wheelOffset, &pulldir); float impulse = error/denom; wheelVel = wheelVel - impulse/wheelmass; //vel = vel + impulse/mass; { vecscale(&pulldir, &pulldir, impulse); addImpulseAtOffset(&vel, &w, 1.f/mass, 1.f/inertia, &wheelOffset, &pulldir); } } } printf("wheelVel = %f, vel = %f, w = %f\n", wheelVel, vel.y, w.x); printf("%f\n", w.x/vel.y); // Simple force application! { wheelVel = 0.f; vec3 impulse = {0.f, dt * torque / radius, 0.f}; veczero(&vel); veczero(&w); vec3 offset = wheelOffset; //offset.z -= radius; printf("wheelVel = %f, vel = %f, w = %f\n", wheelVel, vel.y, w.x); addImpulseAtOffset(&vel, &w, 1.f/mass, 1.f/inertia, &offset, &impulse); } printf("wheelVel = %f, vel = %f, w = %f\n", wheelVel, vel.y, w.x); printf("%f\n", w.x/vel.y); return 0; #endif #if 0 float x = 100.f; float friction = 20.f; float dt = 0.01f; float r = dt*friction; int n=0; while (n<1000) { //x = x - r*x/(fabsf(x)+r); printf("%f\n", x); n++; } return 0; #endif timerUpdate(&g_time); vehicleInit(); // GLUT Window Initialization: glutInit (&argc, argv); glutInitWindowSize (s_width, s_height); glutInitDisplayMode ( GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); glutCreateWindow ("CS248 GLUT example"); // Initialize OpenGL graphics state initGraphics(); // Register callbacks: glutDisplayFunc (display); glutReshapeFunc (reshape); glutKeyboardFunc (keyboard); glutMouseFunc (mouseButton); glutMotionFunc (mouseMotion); glutIdleFunc (animateScene); //BuildPopupMenu (); //glutAttachMenu (GLUT_RIGHT_BUTTON); // Turn the flow of control over to GLUT glutMainLoop (); return 0; }
void rqi ( struct vtx_data **A, /* matrix/graph being analyzed */ double **yvecs, /* eigenvectors to be refined */ int index, /* index of vector in yvecs to be refined */ int n, /* number of rows/columns in matrix */ double *r1, double *r2, double *v, double *w, double *x, double *y, double *work, /* work space for symmlq */ double tol, /* error tolerance in eigenpair */ double initshift, /* initial shift */ double *evalest, /* returned eigenvalue */ double *vwsqrt, /* square roots of vertex weights */ struct orthlink *orthlist, /* lower evecs to orthogonalize against */ int cube_or_mesh, /* 0 => hypercube, d => d-dimensional mesh */ int nsets, /* number of sets to divide into */ int *assignment, /* set number of each vtx (length n+1) */ int *active, /* space for nvtxs integers */ int mediantype, /* which partitioning strategy to use */ double *goal, /* desired set sizes */ int vwgt_max, /* largest vertex weight */ int ndims /* dimensionality of partition */ ) { extern int DEBUG_EVECS; /* debug flag for eigen computation */ extern int DEBUG_TRACE; /* trace main execution path */ extern int WARNING_EVECS; /* warning flag for eigen computation */ extern int RQI_CONVERGENCE_MODE; /* type of convergence monitoring to do */ int rqisteps; /* # rqi rqisteps */ double res; /* convergence quant for rqi */ double last_res; /* res on previous rqi step */ double macheps; /* machine precision calculated by symmlq */ double normxlim; /* a stopping criteria for symmlq */ double normx; /* norm of the solution vector */ int symmlqitns; /* # symmlq itns */ int inv_it_steps; /* intial steps of inverse iteration */ long itnmin; /* symmlq input */ double shift, rtol; /* symmlq input */ long precon, goodb, nout; /* symmlq input */ long checka, intlim; /* symmlq input */ double anorm, acond; /* symmlq output */ double rnorm, ynorm; /* symmlq output */ long istop, itn; /* symmlq output */ long long_n; /* copy of n for passing to symmlq */ int warning; /* warning on possible misconvergence */ double factor; /* ratio between previous res and new tol */ double minfactor; /* minimum acceptable value of factor */ int converged; /* has process converged yet? */ double *u; /* name of vector being refined */ int *old_assignment=NULL;/* previous assignment vector */ int *assgn_pntr; /* pntr to assignment vector */ int *old_assgn_pntr; /* pntr to previous assignment vector */ int assigndiff=0; /* discrepancies between old and new assignment */ int assigntol=0; /* tolerance on convergence of assignment vector */ int first; /* is this the first RQI step? */ int i; /* loop index */ double dot(), ch_norm(); int symmlq_(); void splarax(), scadd(), vecscale(), doubleout(), assign(), x2y(), strout(); if (DEBUG_TRACE > 0) { printf("<Entering rqi>\n"); } /* Initialize RQI loop */ u = yvecs[index]; splarax(y, A, n, u, vwsqrt, r1); shift = dot(u, 1, n, y); scadd(y, 1, n, -shift, u); res = ch_norm(y, 1, n); /* eigen-residual */ rqisteps = 0; /* a counter */ symmlqitns = 0; /* a counter */ /* Set invariant symmlq parameters */ precon = FALSE; /* FALSE until we figure out a good way */ goodb = TRUE; /* should be TRUE for this application */ nout = 0; /* set to 0 for no Symmlq output; 6 for lots */ checka = FALSE; /* if don't know by now, too bad */ intlim = n; /* set to enforce a maximum number of Symmlq itns */ itnmin = 0; /* set to enforce a minimum number of Symmlq itns */ long_n = n; /* type change for alint */ if (DEBUG_EVECS > 0) { printf("Using RQI/Symmlq refinement on graph with %d vertices.\n", n); } if (DEBUG_EVECS > 1) { printf(" step lambda est. Ares Symmlq its. istop factor delta\n"); printf(" 0"); doubleout(shift, 1); doubleout(res, 1); printf("\n"); } if (RQI_CONVERGENCE_MODE == 1) { assigntol = tol * n; old_assignment = smalloc((n + 1) * sizeof(int)); } /* Perform RQI */ inv_it_steps = 2; warning = FALSE; factor = 10; minfactor = factor / 2; first = TRUE; if (res < tol) converged = TRUE; else converged = FALSE; while (!converged) { if (res / tol < 1.2) { factor = max(factor / 2, minfactor); } rtol = res / factor; /* exit Symmlq if iterate is this large */ normxlim = 1.0 / rtol; if (rqisteps < inv_it_steps) { shift = initshift; } symmlq_(&long_n, &u[1], &r1[1], &r2[1], &v[1], &w[1], &x[1], &y[1], work, &checka, &goodb, &precon, &shift, &nout, &intlim, &rtol, &istop, &itn, &anorm, &acond, &rnorm, &ynorm, (double *) A, vwsqrt, (double *) orthlist, &macheps, &normxlim, &itnmin); symmlqitns += itn; normx = ch_norm(x, 1, n); vecscale(u, 1, n, 1.0 / normx, x); splarax(y, A, n, u, vwsqrt, r1); shift = dot(u, 1, n, y); scadd(y, 1, n, -shift, u); last_res = res; res = ch_norm(y, 1, n); if (res > last_res) { warning = TRUE; } rqisteps++; if (res < tol) converged = TRUE; if (RQI_CONVERGENCE_MODE == 1 && !converged && ndims == 1) { if (first) { assign(A, yvecs, n, 1, cube_or_mesh, nsets, vwsqrt, assignment, active, mediantype, goal, vwgt_max); x2y(yvecs, ndims, n, vwsqrt); first = FALSE; assigndiff = n; /* dummy value for debug chart */ } else { /* copy assignment to old_assignment */ assgn_pntr = assignment; old_assgn_pntr = old_assignment; for (i = n + 1; i; i--) { *old_assgn_pntr++ = *assgn_pntr++; } assign(A, yvecs, n, ndims, cube_or_mesh, nsets, vwsqrt, assignment, active, mediantype, goal, vwgt_max); x2y(yvecs, ndims, n, vwsqrt); /* count differences in assignment */ assigndiff = 0; assgn_pntr = assignment; old_assgn_pntr = old_assignment; for (i = n + 1; i; i--) { if (*old_assgn_pntr++ != *assgn_pntr++) assigndiff++; } assigndiff = min(assigndiff, n - assigndiff); if (assigndiff <= assigntol) converged = TRUE; } } if (DEBUG_EVECS > 1) { printf(" %2d", rqisteps); doubleout(shift, 1); doubleout(res, 1); printf(" %3ld", itn); printf(" %ld", istop); printf(" %g", factor); if (RQI_CONVERGENCE_MODE == 1) printf(" %d\n", assigndiff); else printf("\n"); } } *evalest = shift; if (WARNING_EVECS > 0 && warning) { strout("WARNING: Residual convergence not monotonic; RQI may have misconverged.\n"); } if (DEBUG_EVECS > 0) { printf("Eval "); doubleout(*evalest, 1); printf(" RQI steps %d, Symmlq iterations %d.\n\n", rqisteps, symmlqitns); } if (RQI_CONVERGENCE_MODE == 1) { sfree(old_assignment); } }
static void vehicleSubTick(Chassis* c, float dt) { if (g_step==0) return; if (g_step&1) g_step = 0; vec3* chassisPos = &c->pose.v[3].v3; vec3* x = &c->pose.v[0].v3; vec3* y = &c->pose.v[1].v3; vec3* z = &c->pose.v[2].v3; // This bit is done by the physics engine if(1) { vecaddscale(chassisPos, chassisPos, &c->vel, dt); mtx rot; matrixRotateByVelocity(&rot, &c->pose, &c->angVel, dt); matrixCopy33(&c->pose, &rot); } // Damp vecscale(&c->vel, &c->vel, expf(-dt*1.f)); vecscale(&c->angVel, &c->angVel, expf(-dt*1.f)); if (fabsf(c->angVel.x)<0.01f) c->angVel.x = 0.f; if (fabsf(c->angVel.y)<0.01f) c->angVel.y = 0.f; if (fabsf(c->angVel.z)<0.01f) c->angVel.z = 0.f; ClampedImpulse frictionImpulse[numWheels][2]; c->steer = g_steer; //g_steer *= expf(-dt*3.f); //g_speed *= expf(-dt*3.f); static float latf = 10.f; static float angSpeed = 0.f; if (g_handBrake>0.f) { g_handBrake *= expf(-4.f*dt); g_speed *= expf(-4.f*dt); if (g_handBrake < 0.1f) { g_handBrake = 0.f; } } // Prepare for (int i=0; i<numWheels; i++) { Suspension* s = c->suspension[i]; Wheel* w = s->wheel; // Calculate the world position and offset of the suspension point vec3mtx33mulvec3(&s->worldOffset, &c->pose, &s->offset); vec3mtx43mulvec3(&s->worldDefaultPos, &c->pose, &s->offset); w->pos = s->worldDefaultPos; vec3 pointVel = getPointVel(c, &s->worldOffset); vecadd(&w->vel, &w->vel, &pointVel); float maxFriction0 = 2.0f * dt * c->mass * gravity * (1.f/(float)numWheels); clampedImpulseInit(&frictionImpulse[i][0], maxFriction0); float latfriction = 10.f; float newAngSpeed = vecdot(z, &c->angVel); float changeAngSpeed = (newAngSpeed - angSpeed)/dt; angSpeed = newAngSpeed; printf("changeAngSpeed = %f\n", changeAngSpeed); float speed = fabsf(vecdot(y, &c->vel)); const float base = 0.5f; if (g_speed>=0 && i>=2) { latfriction = 1.f*expf(-5.f*g_handBrake) + base; // latfriction = 1.f*expf(-2.f*fabsf(speed*changeAngSpeed)) + base; // if (angSpeed*g_steer < -0.1f) // { // latfriction = base; // } //if (g_steer == 0.f) //{ // latf += (10.f - latf) * (1.f - exp(-0.1f*dt)); //} //else //{ // latf += (0.1f - latf) * (1.f - exp(-10.f*dt)); //} //latfriction = latf; } else { latfriction = 10.f; } float maxFriction1 = latfriction * dt * c->mass * gravity * (1.f/(float)numWheels); clampedImpulseInit(&frictionImpulse[i][1], maxFriction1); vecset(&s->hitNorm, 0.f, 0.f, 1.f); float steer = w->maxSteer*c->steer * (1.f + 0.3f*s->offset.x*c->steer); vecscale(&w->wheelAxis, x, cosf(steer)); vecsubscale(&w->wheelAxis, &w->wheelAxis, y, sinf(steer)); w->frictionDir[0]; veccross(&w->frictionDir[0], z, &w->wheelAxis); w->frictionDir[1] = w->wheelAxis; w->angSpeed = -40.f*g_speed; } //============= // VERBOSE //============= #define verbose false #define dump if (verbose) printf dump("==========================================\n"); dump("START ITERATION\n"); dump("==========================================\n"); float solverERP = numIterations>1 ? 0.1f : 1.f; float changeSolverERP = numIterations>1 ? (1.f - solverERP)/(0.01f+ (float)(numIterations-1)) : 0.f; for (int repeat=0; repeat<numIterations; repeat++) { dump(" == Start Iter == \n"); for (int i=0; i<numWheels; i++) { Suspension* s = c->suspension[i]; Wheel* w = s->wheel; const bool axisError = true; const bool friction = true; // Friction if (friction) { vec3 lateralVel; vecaddscale(&lateralVel, &w->vel, &s->hitNorm, -vecdot(&s->hitNorm, &w->vel)); vecaddscale(&lateralVel, &lateralVel, &w->frictionDir[0], +w->angSpeed * w->radius); { int dir = 0; float v = vecdot(&lateralVel, &w->frictionDir[dir]); float denom = 1.f/w->mass + w->radius*w->radius*w->invInertia; float impulse = clampedImpulseApply(&frictionImpulse[i][dir], - solverERP * v / denom); vec3 impulseV; vecscale(&impulseV, &w->frictionDir[dir], impulse); vecaddscale(&w->vel, &w->vel, &impulseV, 1.f/w->mass); w->angSpeed = w->angSpeed + (impulse * w->radius * w->invInertia); } if (1) { int dir=1; float v = vecdot(&lateralVel, &w->frictionDir[dir]); float denom = 1.f/w->mass; float impulse = clampedImpulseApply(&frictionImpulse[i][dir], - solverERP * v / denom); vec3 impulseV; vecscale(&impulseV, &w->frictionDir[dir], impulse); vecaddscale(&w->vel, &w->vel, &impulseV, 1.f/w->mass); } //dump("gound collision errorV = %f, vel of wheel after = %f\n", penetration, vecdot(&w->vel, &s->hitNorm)); } if (axisError) // Axis Error { vec3 offset; vecsub(&offset, &w->pos, chassisPos); vec3 pointvel = getPointVel(c, &offset); vec3 error; vecsub(&error, &pointvel, &w->vel); vecaddscale(&error, &error, z, -vecdot(&error, z)); vec3 norm; if (vecsizesq(&error)>0.001f) { dump("axis error %f\n", vecsize(&error)); vecnormalise(&norm, &error); float denom = computeDenominator(1.f/c->mass, 1.f/c->inertia, &offset, &norm) + 1.f/w->mass; vecscale(&error, &error, -solverERP/denom); addImpulseAtOffset(&c->vel, &c->angVel, 1.f/c->mass, 1.f/c->inertia, &offset, &error); vecaddscale(&w->vel, &w->vel, &error, -solverERP/w->mass); } //dump("axis error vel of wheel after = %f, inline = %f\n", vecdot(&w->vel, &s->hitNorm), vecdot(&w->vel, &s->axis)); } } solverERP += changeSolverERP; } for (int i=0; i<numWheels; i++) { Suspension* s = c->suspension[i]; Wheel* w = s->wheel; vec3 pointVel = getPointVel(c, s); // Convert suspension wheel speed back to car space vecsub(&w->vel, &w->vel, &pointVel); } }
void tsuroCardCreate(tsuroCard* card, int input[4][2]) { bool okay = true; memset(card, 0, sizeof(card)); memset(card->paths, 0xff, sizeof(card->paths)); tsuroCardSetAllColours(card, 1.f, 1.f, 1.f); for (int i=0; i<4; i++) { int from = input[i][0]; int to = input[i][1]; if ((card->paths[from] & card->paths[to]) == -1) { card->paths[from] = to; card->paths[to] = from; } else { printf("WARNING: Invalid input for card!\n"); okay = false; } } if (okay) { // Generate the vector path const float s = 0.5f*tsuroCardSize; // half width/scale of card const float a = tsuroCardSize*(1.f/6.f); // position of connection points static const float points[8][2] = { {-a, -s}, {+a, -s}, {+s, -a}, {+s, +a}, {+a, +s}, {-a, +s}, {-s, +a}, {-s, -a}, }; for (int i=0; i<8; i++) // We are doing this twice (but its easier this way!) { int from = i; int to = card->paths[i]; vec3 f = {points[from][0], points[from][1], 0.f}; // from vec3 t = {points[to][0], points[to][1], 0.f}; // to if (to == tsuroEdgeOpposite1[from]) { // Directly opposite vec3 dv; vecsub(&dv, &t, &f); vecscale(&dv, &dv, 1.f/((float)(tsuroVectorPathSize-1))); vec3* v = card->vpaths[i].centre; v[0] = f; for (int n=1; n<tsuroVectorPathSize; n++) { vecadd(&v[n], &v[n-1], &dv); } } else if (to == tsuroEdgeOpposite2[from]) { // Opposite wall vec3 centre = {0.f, 0.f, 0.f}; vec3 adjacentFrom = {points[tsuroEdgeSame[from]][0], points[tsuroEdgeSame[from]][1], 0.f}; // The point that is on the same edge as from vec3 adjacentTo = {points[tsuroEdgeSame[to]][0], points[tsuroEdgeSame[to]][1], 0.f}; // The point that is on the same edge as to vec3 focal1, focal2; vecmidpoint(&focal1, &adjacentFrom, &f); vecmidpoint(&focal2, &adjacentTo, &t); vec3 x,y; vecsub(&x, ¢re, &focal1); vecsub(&y, &f, &focal1); generateQuarterCurve(&card->vpaths[i].centre[0], &focal1,&y,&x, tsuroVectorPathHalfSize+1, 1.5f); vecsub(&x, ¢re, &focal2); vecsub(&y, &t, &focal2); generateQuarterCurve(&card->vpaths[i].centre[tsuroVectorPathHalfSize], &focal2,&x,&y, tsuroVectorPathHalfSize+1, 1.5f); } else if (to == tsuroEdgeSame[from]) { vec3 centre = {0.f, 0.f, 0.f}; // Same wall vec3 midpoint; vecmidpoint(&midpoint, &f, &t); vec3 x,y; vecsub(&x, &f, &midpoint); vecsub(&y, ¢re, &midpoint); vecscale(&y, &y, 0.5f); generateQuarterCurve(&card->vpaths[i].centre[0], &midpoint,&x,&y, tsuroVectorPathHalfSize+1, 0.8); vecneg(&x, &x); generateQuarterCurve(&card->vpaths[i].centre[tsuroVectorPathHalfSize], &midpoint,&y,&x, tsuroVectorPathHalfSize+1, 0.8); } else { vec3 focal; switch(from) { case 0: focal.y = -s; break; case 1: focal.y = -s; break; case 2: focal.x = +s; break; case 3: focal.x = +s; break; case 4: focal.y = +s; break; case 5: focal.y = +s; break; case 6: focal.x = -s; break; case 7: focal.x = -s; break; } switch(to) { case 0: focal.y = -s; break; case 1: focal.y = -s; break; case 2: focal.x = +s; break; case 3: focal.x = +s; break; case 4: focal.y = +s; break; case 5: focal.y = +s; break; case 6: focal.x = -s; break; case 7: focal.x = -s; break; } vec3 x,y; vecsub(&x, &f, &focal); vecsub(&y, &t, &focal); generateQuarterCurve(&card->vpaths[i].centre[0], &focal,&x,&y, tsuroVectorPathSize, 1.2f); // printf("DUMP: from = %f %f %f\n", XYZ(f)); // vec3* v = card->vpaths[i].centre; // for (int n=0; n<tsuroVectorPathSize; n++) // { // printf("%f %f %f\n", XYZp(v)); // v++; // } } } } }
int lbfgs( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *_param ) { int ret; int i, j, k, ls, end, bound; lbfgsfloatval_t step; /* Constant parameters and their default values. */ const lbfgs_parameter_t* param = (_param != NULL) ? _param : &_defparam; const int m = param->m; lbfgsfloatval_t *xp = NULL, *g = NULL, *gp = NULL, *d = NULL, *w = NULL; iteration_data_t *lm = NULL, *it = NULL; lbfgsfloatval_t ys, yy; lbfgsfloatval_t norm, xnorm, gnorm, beta; lbfgsfloatval_t fx = 0.; line_search_proc linesearch = line_search_morethuente; /* Construct a callback data. */ callback_data_t cd; cd.n = n; cd.instance = instance; cd.proc_evaluate = proc_evaluate; cd.proc_progress = proc_progress; #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) /* Round out the number of variables. */ n = round_out_variables(n); #endif/*defined(USE_SSE)*/ /* Check the input parameters for errors. */ if (n <= 0) { return LBFGSERR_INVALID_N; } #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) if (n % 8 != 0) { return LBFGSERR_INVALID_N_SSE; } if (((unsigned short)x & 0x000F) != 0) { return LBFGSERR_INVALID_X_SSE; } #endif/*defined(USE_SSE)*/ if (param->min_step < 0.) { return LBFGSERR_INVALID_MINSTEP; } if (param->max_step < param->min_step) { return LBFGSERR_INVALID_MAXSTEP; } if (param->ftol < 0.) { return LBFGSERR_INVALID_FTOL; } if (param->gtol < 0.) { return LBFGSERR_INVALID_GTOL; } if (param->xtol < 0.) { return LBFGSERR_INVALID_XTOL; } if (param->max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } if (param->orthantwise_c < 0.) { return LBFGSERR_INVALID_ORTHANTWISE; } if (param->orthantwise_start < 0 || n < param->orthantwise_start) { return LBFGSERR_INVALID_ORTHANTWISE_START; } switch (param->linesearch) { case LBFGS_LINESEARCH_MORETHUENTE: linesearch = line_search_morethuente; break; case LBFGS_LINESEARCH_BACKTRACKING: linesearch = line_search_backtracking; break; default: return LBFGSERR_INVALID_LINESEARCH; } /* Allocate working space. */ xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } /* Allocate limited memory storage. */ lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t)); if (lm == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } /* Initialize the limited memory. */ for (i = 0;i < m;++i) { it = &lm[i]; it->alpha = 0; it->ys = 0; it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); if (it->s == NULL || it->y == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } } /* Evaluate the function value and its gradient. */ fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); if (0. < param->orthantwise_c) { /* Compute L1-regularization factor and add it to the object value. */ norm = 0.; for (i = param->orthantwise_start;i < n;++i) { norm += fabs(x[i]); } fx += norm * param->orthantwise_c; } /* We assume the initial hessian matrix H_0 as the identity matrix. */ if (param->orthantwise_c == 0.) { vecncpy(d, g, n); } else { /* Compute the negative of gradients. */ for (i = 0;i < param->orthantwise_start;++i) { d[i] = -g[i]; } /* Compute the negative of psuedo-gradients. */ for (i = param->orthantwise_start;i < n;++i) { if (x[i] < 0.) { /* Differentiable. */ d[i] = -g[i] + param->orthantwise_c; } else if (0. < x[i]) { /* Differentiable. */ d[i] = -g[i] - param->orthantwise_c; } else { if (g[i] < -param->orthantwise_c) { /* Take the right partial derivative. */ d[i] = -g[i] - param->orthantwise_c; } else if (param->orthantwise_c < g[i]) { /* Take the left partial derivative. */ d[i] = -g[i] + param->orthantwise_c; } else { d[i] = 0.; } } } } /* Make sure that the initial variables are not a minimizer. */ vecnorm(&gnorm, g, n); vecnorm(&xnorm, x, n); if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param->epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto lbfgs_exit; } /* Compute the initial step: step = 1.0 / sqrt(vecdot(d, d, n)) */ vecrnorm(&step, d, n); k = 1; end = 0; for (;;) { /* Store the current position and gradient vectors. */ veccpy(xp, x, n); veccpy(gp, g, n); /* Search for an optimal step. */ ls = linesearch(n, x, &fx, g, d, &step, w, &cd, param); if (ls < 0) { ret = ls; goto lbfgs_exit; } /* Compute x and g norms. */ vecnorm(&gnorm, g, n); vecnorm(&xnorm, x, n); /* Report the progress. */ if (cd.proc_progress) { if (ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls)) { goto lbfgs_exit; } } /* Convergence test. The criterion is given by the following formula: |g(x)| / \max(1, |x|) < \epsilon */ if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param->epsilon) { /* Convergence. */ ret = LBFGS_SUCCESS; break; } if (param->max_iterations != 0 && param->max_iterations < k+1) { /* Maximum number of iterations. */ ret = LBFGSERR_MAXIMUMITERATION; break; } /* Update vectors s and y: s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. y_{k+1} = g_{k+1} - g_{k}. */ it = &lm[end]; vecdiff(it->s, x, xp, n); vecdiff(it->y, g, gp, n); /* Compute scalars ys and yy: ys = y^t \cdot s = 1 / \rho. yy = y^t \cdot y. Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). */ vecdot(&ys, it->y, it->s, n); vecdot(&yy, it->y, it->y, n); it->ys = ys; /* Recursive formula to compute dir = -(H \cdot g). This is described in page 779 of: Jorge Nocedal. Updating Quasi-Newton Matrices with Limited Storage. Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. */ bound = (m <= k) ? m : k; ++k; end = (end + 1) % m; if (param->orthantwise_c == 0.) { /* Compute the negative of gradients. */ vecncpy(d, g, n); } else { /* Compute the negative of gradients. */ for (i = 0;i < param->orthantwise_start;++i) { d[i] = -g[i]; } /* Compute the negative of psuedo-gradients. */ for (i = param->orthantwise_start;i < n;++i) { if (x[i] < 0.) { /* Differentiable. */ d[i] = -g[i] + param->orthantwise_c; } else if (0. < x[i]) { /* Differentiable. */ d[i] = -g[i] - param->orthantwise_c; } else { if (g[i] < -param->orthantwise_c) { /* Take the right partial derivative. */ d[i] = -g[i] - param->orthantwise_c; } else if (param->orthantwise_c < g[i]) { /* Take the left partial derivative. */ d[i] = -g[i] + param->orthantwise_c; } else { d[i] = 0.; } } } /* Store the steepest direction.*/ veccpy(w, d, n); } j = end; for (i = 0;i < bound;++i) { j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ it = &lm[j]; /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */ vecdot(&it->alpha, it->s, d, n); it->alpha /= it->ys; /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */ vecadd(d, it->y, -it->alpha, n); } vecscale(d, ys / yy, n); for (i = 0;i < bound;++i) { it = &lm[j]; /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */ vecdot(&beta, it->y, d, n); beta /= it->ys; /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */ vecadd(d, it->s, it->alpha - beta, n); j = (j + 1) % m; /* if (++j == m) j = 0; */ } /* Constrain the search direction for orthant-wise updates. */ if (param->orthantwise_c != 0.) { for (i = param->orthantwise_start;i < n;++i) { if (d[i] * w[i] <= 0) { d[i] = 0; } } } /* Now the search direction d is ready. We try step = 1 first. */ step = 1.0; } lbfgs_exit: /* Return the final value of the objective function. */ if (ptr_fx != NULL) { *ptr_fx = fx; } /* Free memory blocks used by this function. */ if (lm != NULL) { for (i = 0;i < m;++i) { vecfree(lm[i].s); vecfree(lm[i].y); } vecfree(lm); } vecfree(w); vecfree(d); vecfree(gp); vecfree(g); vecfree(xp); return ret; }
bool power_iteration(double **square_mat, int n, int neigs, double **eigs, double *evals, bool initialize) { /* compute the 'neigs' top eigenvectors of 'square_mat' using power iteration */ int i, j; double *tmp_vec = N_GNEW(n, double); double *last_vec = N_GNEW(n, double); double *curr_vector; double len; double angle; double alpha; int iteration = 0; int largest_index; double largest_eval; int Max_iterations = 30 * n; double tol = 1 - p_iteration_threshold; if (neigs >= n) { neigs = n; } for (i = 0; i < neigs; i++) { curr_vector = eigs[i]; /* guess the i-th eigen vector */ choose: if (initialize) for (j = 0; j < n; j++) curr_vector[j] = rand() % 100; /* orthogonalize against higher eigenvectors */ for (j = 0; j < i; j++) { alpha = -dot(eigs[j], 0, n - 1, curr_vector); scadd(curr_vector, 0, n - 1, alpha, eigs[j]); } len = norm(curr_vector, 0, n - 1); if (len < 1e-10) { /* We have chosen a vector colinear with prvious ones */ goto choose; } vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector); iteration = 0; do { iteration++; cpvec(last_vec, 0, n - 1, curr_vector); right_mult_with_vector_d(square_mat, n, n, curr_vector, tmp_vec); cpvec(curr_vector, 0, n - 1, tmp_vec); /* orthogonalize against higher eigenvectors */ for (j = 0; j < i; j++) { alpha = -dot(eigs[j], 0, n - 1, curr_vector); scadd(curr_vector, 0, n - 1, alpha, eigs[j]); } len = norm(curr_vector, 0, n - 1); if (len < 1e-10 || iteration > Max_iterations) { /* We have reached the null space (e.vec. associated with e.val. 0) */ goto exit; } vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector); angle = dot(curr_vector, 0, n - 1, last_vec); } while (fabs(angle) < tol); evals[i] = angle * len; /* this is the Rayleigh quotient (up to errors due to orthogonalization): u*(A*u)/||A*u||)*||A*u||, where u=last_vec, and ||u||=1 */ } exit: for (; i < neigs; i++) { /* compute the smallest eigenvector, which are */ /* probably associated with eigenvalue 0 and for */ /* which power-iteration is dangerous */ curr_vector = eigs[i]; /* guess the i-th eigen vector */ for (j = 0; j < n; j++) curr_vector[j] = rand() % 100; /* orthogonalize against higher eigenvectors */ for (j = 0; j < i; j++) { alpha = -dot(eigs[j], 0, n - 1, curr_vector); scadd(curr_vector, 0, n - 1, alpha, eigs[j]); } len = norm(curr_vector, 0, n - 1); vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector); evals[i] = 0; } /* sort vectors by their evals, for overcoming possible mis-convergence: */ for (i = 0; i < neigs - 1; i++) { largest_index = i; largest_eval = evals[largest_index]; for (j = i + 1; j < neigs; j++) { if (largest_eval < evals[j]) { largest_index = j; largest_eval = evals[largest_index]; } } if (largest_index != i) { /* exchange eigenvectors: */ cpvec(tmp_vec, 0, n - 1, eigs[i]); cpvec(eigs[i], 0, n - 1, eigs[largest_index]); cpvec(eigs[largest_index], 0, n - 1, tmp_vec); evals[largest_index] = evals[i]; evals[i] = largest_eval; } } free(tmp_vec); free(last_vec); return (iteration <= Max_iterations); }
int lbfgs( int n, T *x, T *ptr_fx, typename FuncWrapper<T>::lbfgs_evaluate_t proc_evaluate, typename FuncWrapper<T>::lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *_param ) { int ret; int i, j, k, ls, end, bound; T step; /* Constant parameters and their default values. */ lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam; const int m = param.m; T *xp = NULL; T *g = NULL, *gp = NULL, *pg = NULL; T *d = NULL, *w = NULL, *pf = NULL; iteration_data_t<T> *lm = NULL; iteration_data_t<T>*it = NULL; T ys, yy; T xnorm, gnorm, beta; T fx = 0.; T rate = 0.; typename LineSearchWrapper<T>::line_search_proc linesearch = line_search_morethuente; /* Construct a callback data. */ callback_data_t<T> cd; cd.n = n; cd.instance = instance; cd.proc_evaluate = proc_evaluate; cd.proc_progress = proc_progress; #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) /* Round out the number of variables. */ n = round_out_variables(n); #endif/*defined(USE_SSE)*/ /* Check the input parameters for errors. */ if (n <= 0) { return LBFGSERR_INVALID_N; } #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) if (n % 8 != 0) { return LBFGSERR_INVALID_N_SSE; } if ((uintptr_t)(const void*)x % 16 != 0) { return LBFGSERR_INVALID_X_SSE; } #endif/*defined(USE_SSE)*/ if (param.epsilon < 0.) { return LBFGSERR_INVALID_EPSILON; } if (param.past < 0) { return LBFGSERR_INVALID_TESTPERIOD; } if (param.delta < 0.) { return LBFGSERR_INVALID_DELTA; } if (param.min_step < 0.) { return LBFGSERR_INVALID_MINSTEP; } if (param.max_step < param.min_step) { return LBFGSERR_INVALID_MAXSTEP; } if (param.ftol < 0.) { return LBFGSERR_INVALID_FTOL; } if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { if (param.wolfe <= param.ftol || 1. <= param.wolfe) { return LBFGSERR_INVALID_WOLFE; } } if (param.gtol < 0.) { return LBFGSERR_INVALID_GTOL; } if (param.xtol < 0.) { return LBFGSERR_INVALID_XTOL; } if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } if (param.orthantwise_c < 0.) { return LBFGSERR_INVALID_ORTHANTWISE; } if (param.orthantwise_start < 0 || n < param.orthantwise_start) { return LBFGSERR_INVALID_ORTHANTWISE_START; } if (param.orthantwise_end < 0) { param.orthantwise_end = n; } if (n < param.orthantwise_end) { return LBFGSERR_INVALID_ORTHANTWISE_END; } if (param.orthantwise_c != 0.) { switch (param.linesearch) { case LBFGS_LINESEARCH_BACKTRACKING: linesearch = line_search_backtracking_owlqn; break; default: /* Only the backtracking method is available. */ return LBFGSERR_INVALID_LINESEARCH; } } else { switch (param.linesearch) { case LBFGS_LINESEARCH_MORETHUENTE: linesearch = line_search_morethuente; break; case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: linesearch = line_search_backtracking; break; default: return LBFGSERR_INVALID_LINESEARCH; } } /* Allocate working space. */ xp = (T*)vecalloc(n * sizeof(T)); g = (T*)vecalloc(n * sizeof(T)); gp = (T*)vecalloc(n * sizeof(T)); d = (T*)vecalloc(n * sizeof(T)); w = (T*)vecalloc(n * sizeof(T)); if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } if (param.orthantwise_c != 0.) { /* Allocate working space for OW-LQN. */ pg = (T*)vecalloc(n * sizeof(T)); if (pg == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } } /* Allocate limited memory storage. */ lm = (iteration_data_t<T>*)vecalloc(m * sizeof(iteration_data_t<T>)); if (lm == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } /* Initialize the limited memory. */ for (i = 0;i < m;++i) { it = &lm[i]; it->alpha = 0; it->ys = 0; it->s = (T*)vecalloc(n * sizeof(T)); it->y = (T*)vecalloc(n * sizeof(T)); if (it->s == NULL || it->y == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } } /* Allocate an array for storing previous values of the objective function. */ if (0 < param.past) { pf = (T*)vecalloc(param.past * sizeof(T)); } /* Evaluate the function value and its gradient. */ fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); if (0. != param.orthantwise_c) { /* Compute the L1 norm of the variable and add it to the object value. */ xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end); fx += xnorm * param.orthantwise_c; owlqn_pseudo_gradient( pg, x, g, n, T(param.orthantwise_c), param.orthantwise_start, param.orthantwise_end ); } /* Store the initial value of the objective function. */ if (pf != NULL) { pf[0] = fx; } /* Compute the direction; we assume the initial hessian matrix H_0 as the identity matrix. */ if (param.orthantwise_c == 0.) { vecncpy(d, g, n); } else { vecncpy(d, pg, n); } /* Make sure that the initial variables are not a minimizer. */ vec2norm(&xnorm, x, n); if (param.orthantwise_c == 0.) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto lbfgs_exit; } /* Compute the initial step: step = 1.0 / sqrt(vecdot(d, d, n)) */ vec2norminv(&step, d, n); k = 1; end = 0; for (;;) { /* Store the current position and gradient vectors. */ veccpy(xp, x, n); veccpy(gp, g, n); /* Search for an optimal step. */ if (param.orthantwise_c == 0.) { ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m); } else { ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); owlqn_pseudo_gradient( pg, x, g, n, T(param.orthantwise_c), param.orthantwise_start, param.orthantwise_end ); } if (ls < 0) { /* Revert to the previous point. */ veccpy(x, xp, n); veccpy(g, gp, n); ret = ls; goto lbfgs_exit; } /* Compute x and g norms. */ vec2norm(&xnorm, x, n); if (param.orthantwise_c == 0.) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } /* Report the progress. */ if (cd.proc_progress) { if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) { goto lbfgs_exit; } } /* Convergence test. The criterion is given by the following formula: |g(x)| / \max(1, |x|) < \epsilon */ if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { /* Convergence. */ ret = LBFGS_SUCCESS; break; } /* Test for stopping criterion. The criterion is given by the following formula: (f(past_x) - f(x)) / f(x) < \delta */ if (pf != NULL) { /* We don't test the stopping criterion while k < past. */ if (param.past <= k) { /* Compute the relative improvement from the past. */ rate = (pf[k % param.past] - fx) / fx; /* The stopping criterion. */ if (rate < param.delta) { ret = LBFGS_STOP; break; } } /* Store the current value of the objective function. */ pf[k % param.past] = fx; } if (param.max_iterations != 0 && param.max_iterations < k+1) { /* Maximum number of iterations. */ ret = LBFGSERR_MAXIMUMITERATION; break; } /* Update vectors s and y: s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. y_{k+1} = g_{k+1} - g_{k}. */ it = &lm[end]; vecdiff(it->s, x, xp, n); vecdiff(it->y, g, gp, n); /* Compute scalars ys and yy: ys = y^t \cdot s = 1 / \rho. yy = y^t \cdot y. Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). */ vecdot(&ys, it->y, it->s, n); vecdot(&yy, it->y, it->y, n); it->ys = ys; /* Recursive formula to compute dir = -(H \cdot g). This is described in page 779 of: Jorge Nocedal. Updating Quasi-Newton Matrices with Limited Storage. Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. */ bound = (m <= k) ? m : k; ++k; end = (end + 1) % m; /* Compute the steepest direction. */ if (param.orthantwise_c == 0.) { /* Compute the negative of gradients. */ vecncpy(d, g, n); } else { vecncpy(d, pg, n); } j = end; for (i = 0;i < bound;++i) { j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ it = &lm[j]; /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */ vecdot(&it->alpha, it->s, d, n); it->alpha /= it->ys; /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */ vecadd(d, it->y, -it->alpha, n); } vecscale(d, ys / yy, n); for (i = 0;i < bound;++i) { it = &lm[j]; /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */ vecdot(&beta, it->y, d, n); beta /= it->ys; /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */ vecadd(d, it->s, it->alpha - beta, n); j = (j + 1) % m; /* if (++j == m) j = 0; */ } /* Constrain the search direction for orthant-wise updates. */ if (param.orthantwise_c != 0.) { for (i = param.orthantwise_start;i < param.orthantwise_end;++i) { if (d[i] * pg[i] >= 0) { d[i] = 0; } } } /* Now the search direction d is ready. We try step = 1 first. */ step = 1.0; } lbfgs_exit: /* Return the final value of the objective function. */ if (ptr_fx != NULL) { *ptr_fx = fx; } vecfree(pf); /* Free memory blocks used by this function. */ if (lm != NULL) { for (i = 0;i < m;++i) { vecfree(lm[i].s); vecfree(lm[i].y); } vecfree(lm); } vecfree(pg); vecfree(w); vecfree(d); vecfree(gp); vecfree(g); vecfree(xp); return ret; }
int lanczos_ext(struct vtx_data **A, /* sparse matrix in row linked list format */ int n, /* problem size */ int d, /* problem dimension = number of eigvecs to find */ double ** y, /* columns of y are eigenvectors of A */ double eigtol, /* tolerance on eigenvectors */ double * vwsqrt, /* square roots of vertex weights */ double maxdeg, /* maximum degree of graph */ int version, /* flags which version of sel. orth. to use */ double * gvec, /* the rhs n-vector in the extended eigen problem */ double sigma /* specifies the norm constraint on extended eigenvector */ ) { extern FILE * Output_File; /* output file or null */ extern int LANCZOS_SO_INTERVAL; /* interval between orthogonalizations */ extern int LANCZOS_MAXITNS; /* maximum Lanczos iterations allowed */ extern int DEBUG_EVECS; /* print debugging output? */ extern int DEBUG_TRACE; /* trace main execution path */ extern int WARNING_EVECS; /* print warning messages? */ extern double BISECTION_SAFETY; /* safety for T bisection algorithm */ extern double SRESTOL; /* resid tol for T evec comp */ extern double DOUBLE_EPSILON; /* machine precision */ extern double DOUBLE_MAX; /* largest double value */ extern double splarax_time; /* time matvec */ extern double orthog_time; /* time orthogonalization work */ extern double evec_time; /* time to generate eigenvectors */ extern double ql_time; /* time tridiagonal eigenvalue work */ extern double blas_time; /* time for blas. linear algebra */ extern double init_time; /* time to allocate, intialize variables */ extern double scan_time; /* time for scanning eval and bound lists */ extern double debug_time; /* time for (some of) debug computations */ extern double ritz_time; /* time to generate ritz vectors */ extern double pause_time; /* time to compute whether to pause */ int i, j, k; /* indicies */ int maxj; /* maximum number of Lanczos iterations */ double * u, *r; /* Lanczos vectors */ double * alpha, *beta; /* the Lanczos scalars from each step */ double * ritz; /* copy of alpha for ql */ double * workj; /* work vector, e.g. copy of beta for ql */ double * workn; /* work vector, e.g. product Av for checkeig */ double * s; /* eigenvector of T */ double ** q; /* columns of q are Lanczos basis vectors */ double * bj; /* beta(j)*(last el. of corr. eigvec s of T) */ double Sres; /* how well Tevec calculated eigvec s */ double Sres_max; /* Max value of Sres */ int inc_bis_safety; /* has Sres increased? */ double * Ares; /* how well Lanczos calc. eigpair lambda,y */ int * index; /* the Ritz index of an eigenpair */ struct orthlink **solist; /* vec. of structs with vecs. to orthog. against */ struct scanlink * scanlist; /* linked list of fields to do with min ritz vals */ struct scanlink * curlnk; /* for traversing the scanlist */ double bis_safety; /* real safety for T bisection algorithm */ int converged; /* has the iteration converged? */ double goodtol; /* error tolerance for a good Ritz vector */ int ngood; /* total number of good Ritz pairs at current step */ int maxngood; /* biggest val of ngood through current step */ int left_ngood; /* number of good Ritz pairs on left end */ int lastpause; /* Most recent step with good ritz vecs */ int nopauses; /* Have there been any pauses? */ int interval; /* number of steps between pauses */ double time; /* Current clock time */ int left_goodlim; /* number of ritz pairs checked on left end */ double Anorm; /* Norm estimate of the Laplacian matrix */ int pausemode; /* which Lanczos pausing criterion to use */ int pause; /* whether to pause */ int temp; /* used to prevent redundant index computations */ double * extvec; /* n-vector solving the extended A eigenproblem */ double * v; /* j-vector solving the extended T eigenproblem */ double extval = 0.0; /* computed extended eigenvalue (of both A and T) */ double * work1, *work2; /* work vectors */ double check; /* to check an orthogonality condition */ double numerical_zero; /* used for zero in presense of round-off */ int ritzval_flag; /* status flag for get_ritzvals() */ int memory_ok; /* TRUE until memory runs out */ double * mkvec(); /* allocates space for a vector */ double * mkvec_ret(); /* mkvec() which returns error code */ double dot(); /* standard dot product routine */ struct orthlink *makeorthlnk(); /* makes space for new entry in orthog. set */ double ch_norm(); /* vector norm */ double Tevec(); /* calc eigenvector of T by linear recurrence */ struct scanlink *mkscanlist(); /* init scan list for min ritz vecs */ double lanc_seconds(); /* switcheable timer */ /* free allocated memory safely */ int lanpause(); /* figure when to pause Lanczos iteration */ int get_ritzvals(); /* compute eigenvalues of T */ void setvec(); /* initialize a vector */ void vecscale(); /* scale a vector */ void splarax(); /* matrix vector multiply */ void update(); /* add scalar multiple of a vector to another */ void sorthog(); /* orthogonalize vector against list of others */ void bail(); /* our exit routine */ void scanmin(); /* store small values of vector in linked list */ void frvec(); /* free vector */ void scadd(); /* add scalar multiple of vector to another */ void cpvec(); /* copy a vector */ void orthog1(); /* efficiently orthog. against vector of ones */ void solistout(); /* print out orthogonalization list */ void doubleout(); /* print a double precision number */ void orthogvec(); /* orthogonalize one vector against another */ void get_extval(); /* find extended Ritz values */ void scale_diag(); /* scale vector by diagonal matrix */ void strout(); /* print string to screen and file */ double checkeig_ext(); /* check extended eigenpair residual directly */ if (DEBUG_TRACE > 0) { printf("<Entering lanczos_ext>\n"); } if (DEBUG_EVECS > 0) { printf("Selective orthogonalization Lanczos for extended eigenproblem, matrix size = %d.\n", n); } /* Initialize time. */ time = lanc_seconds(); if (d != 1) { bail("ERROR: Extended Lanczos only available for bisection.", 1); /* ... something must be wrong upstream. */ } if (n < d + 1) { bail("ERROR: System too small for number of eigenvalues requested.", 1); /* ... d+1 since don't use zero eigenvalue pair */ } /* Allocate space. */ maxj = LANCZOS_MAXITNS; u = mkvec(1, n); r = mkvec(1, n); workn = mkvec(1, n); Ares = mkvec(0, d); index = smalloc((d + 1) * sizeof(int)); alpha = mkvec(1, maxj); beta = mkvec(0, maxj); ritz = mkvec(1, maxj); s = mkvec(1, maxj); bj = mkvec(1, maxj); workj = mkvec(0, maxj); q = smalloc((maxj + 1) * sizeof(double *)); solist = smalloc((maxj + 1) * sizeof(struct orthlink *)); scanlist = mkscanlist(d); extvec = mkvec(1, n); v = mkvec(1, maxj); work1 = mkvec(1, maxj); work2 = mkvec(1, maxj); /* Set some constants governing orthogonalization */ ngood = 0; maxngood = 0; Anorm = 2 * maxdeg; /* Gershgorin estimate for ||A|| */ goodtol = Anorm * sqrt(DOUBLE_EPSILON); /* Parlett & Scott's bound, p.224 */ interval = 2 + (int)min(LANCZOS_SO_INTERVAL - 2, n / (2 * LANCZOS_SO_INTERVAL)); bis_safety = BISECTION_SAFETY; numerical_zero = 1.0e-13; if (DEBUG_EVECS > 0) { printf(" maxdeg %g\n", maxdeg); printf(" goodtol %g\n", goodtol); printf(" interval %d\n", interval); printf(" maxj %d\n", maxj); } /* Initialize space. */ cpvec(r, 1, n, gvec); if (vwsqrt != NULL) { scale_diag(r, 1, n, vwsqrt); } check = ch_norm(r, 1, n); if (vwsqrt == NULL) { orthog1(r, 1, n); } else { orthogvec(r, 1, n, vwsqrt); } check = fabs(check - ch_norm(r, 1, n)); if (check > 10 * numerical_zero && WARNING_EVECS > 0) { strout("WARNING: In terminal propagation, rhs should have no component in the"); printf(" nullspace of the Laplacian, so check val %g should be negligible.\n", check); if (Output_File != NULL) { fprintf(Output_File, " nullspace of the Laplacian, so check val %g should be negligible.\n", check); } } beta[0] = ch_norm(r, 1, n); q[0] = mkvec(1, n); setvec(q[0], 1, n, 0.0); setvec(bj, 1, maxj, DOUBLE_MAX); if (beta[0] < numerical_zero) { /* The rhs vector, Dg, of the transformed problem is numerically zero or is in the null space of the Laplacian, so this is not a well posed extended eigenproblem. Set maxj to zero to force a quick exit but still clean-up memory and return(1) to indicate to eigensolve that it should call the default eigensolver routine for the standard eigenproblem. */ maxj = 0; } /* Main Lanczos loop. */ j = 1; lastpause = 0; pausemode = 1; left_ngood = 0; left_goodlim = 0; converged = FALSE; Sres_max = 0.0; inc_bis_safety = FALSE; nopauses = TRUE; memory_ok = TRUE; init_time += lanc_seconds() - time; while ((j <= maxj) && (!converged) && memory_ok) { time = lanc_seconds(); /* Allocate next Lanczos vector. If fail, back up to last pause. */ q[j] = mkvec_ret(1, n); if (q[j] == NULL) { memory_ok = FALSE; if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) { strout("WARNING: Lanczos_ext out of memory; computing best approximation available.\n"); } if (nopauses) { bail("ERROR: Sorry, can't salvage Lanczos_ext.", 1); /* ... save yourselves, men. */ } for (i = lastpause + 1; i <= j - 1; i++) { frvec(q[i], 1); } j = lastpause; } /* Basic Lanczos iteration */ vecscale(q[j], 1, n, 1.0 / beta[j - 1], r); blas_time += lanc_seconds() - time; time = lanc_seconds(); splarax(u, A, n, q[j], vwsqrt, workn); splarax_time += lanc_seconds() - time; time = lanc_seconds(); update(r, 1, n, u, -beta[j - 1], q[j - 1]); alpha[j] = dot(r, 1, n, q[j]); update(r, 1, n, r, -alpha[j], q[j]); blas_time += lanc_seconds() - time; /* Selective orthogonalization */ time = lanc_seconds(); if (vwsqrt == NULL) { orthog1(r, 1, n); } else { orthogvec(r, 1, n, vwsqrt); } if ((j == (lastpause + 1)) || (j == (lastpause + 2))) { sorthog(r, n, solist, ngood); } orthog_time += lanc_seconds() - time; beta[j] = ch_norm(r, 1, n); time = lanc_seconds(); pause = lanpause(j, lastpause, interval, q, n, &pausemode, version, beta[j]); pause_time += lanc_seconds() - time; if (pause) { nopauses = FALSE; lastpause = j; /* Compute limits for checking Ritz pair convergence. */ if (version == 2) { if (left_ngood + 2 > left_goodlim) { left_goodlim = left_ngood + 2; } } /* Special case: need at least d Ritz vals on left. */ left_goodlim = max(left_goodlim, d); /* Special case: can't find more than j total Ritz vals. */ if (left_goodlim > j) { left_goodlim = min(left_goodlim, j); } /* Find Ritz vals using faster of Sturm bisection or ql. */ time = lanc_seconds(); if (inc_bis_safety) { bis_safety *= 10; inc_bis_safety = FALSE; } ritzval_flag = get_ritzvals(alpha, beta, j, Anorm, workj, ritz, d, left_goodlim, 0, eigtol, bis_safety); ql_time += lanc_seconds() - time; if (ritzval_flag != 0) { bail("ERROR: Lanczos_ext failed in computing eigenvalues of T.", 1); /* ... we recover from this in lanczos_SO, but don't worry here. */ } /* Scan for minimum evals of tridiagonal. */ time = lanc_seconds(); scanmin(ritz, 1, j, &scanlist); scan_time += lanc_seconds() - time; /* Compute Ritz pair bounds at left end. */ time = lanc_seconds(); setvec(bj, 1, j, 0.0); for (i = 1; i <= left_goodlim; i++) { Sres = Tevec(alpha, beta - 1, j, ritz[i], s); if (Sres > Sres_max) { Sres_max = Sres; } if (Sres > SRESTOL) { inc_bis_safety = TRUE; } bj[i] = s[j] * beta[j]; } ritz_time += lanc_seconds() - time; /* Show portion of spectrum checked for Ritz convergence. */ if (DEBUG_EVECS > 2) { time = lanc_seconds(); printf("\nindex Ritz vals bji bounds\n"); for (i = 1; i <= left_goodlim; i++) { printf(" %3d", i); doubleout(ritz[i], 1); doubleout(bj[i], 1); printf("\n"); } printf("\n"); curlnk = scanlist; while (curlnk != NULL) { temp = curlnk->indx; if ((temp > left_goodlim) && (temp < j)) { printf(" %3d", temp); doubleout(ritz[temp], 1); doubleout(bj[temp], 1); printf("\n"); } curlnk = curlnk->pntr; } printf(" -------------------\n"); printf(" goodtol: %19.16f\n\n", goodtol); debug_time += lanc_seconds() - time; } get_extval(alpha, beta, j, ritz[1], s, eigtol, beta[0], sigma, &extval, v, work1, work2); /* Check convergence of iteration. */ if (fabs(beta[j] * v[j]) < eigtol) { converged = TRUE; } else { converged = FALSE; } if (!converged) { ngood = 0; left_ngood = 0; /* for setting left_goodlim on next loop */ /* Compute converged Ritz pairs on left end */ time = lanc_seconds(); for (i = 1; i <= left_goodlim; i++) { if (bj[i] <= goodtol) { ngood += 1; left_ngood += 1; if (ngood > maxngood) { maxngood = ngood; solist[ngood] = makeorthlnk(); (solist[ngood])->vec = mkvec(1, n); } (solist[ngood])->index = i; Sres = Tevec(alpha, beta - 1, j, ritz[i], s); if (Sres > Sres_max) { Sres_max = Sres; } if (Sres > SRESTOL) { inc_bis_safety = TRUE; } setvec((solist[ngood])->vec, 1, n, 0.0); for (k = 1; k <= j; k++) { scadd((solist[ngood])->vec, 1, n, s[k], q[k]); } } } ritz_time += lanc_seconds() - time; if (DEBUG_EVECS > 2) { time = lanc_seconds(); /* Show some info on the orthogonalization. */ printf(" j %3d; goodlim lft %2d, rgt %2d; list ", j, left_goodlim, 0); solistout(solist, ngood, j); /* Assemble current approx. eigenvector, check residual directly. */ setvec(y[1], 1, n, 0.0); for (k = 1; k <= j; k++) { scadd(y[1], 1, n, v[k], q[k]); } printf(" extended eigenvalue %g\n", extval); printf(" est. extended residual %g\n", fabs(v[j] * beta[j])); checkeig_ext(workn, u, A, y[1], n, extval, vwsqrt, gvec, eigtol, FALSE); printf("---------------------end of iteration---------------------\n\n"); debug_time += lanc_seconds() - time; } } } j++; } j--; if (DEBUG_EVECS > 0) { time = lanc_seconds(); if (maxj == 0) { printf("Not extended eigenproblem -- calling ordinary eigensolver.\n"); } else { printf(" Lanczos_ext itns: %d\n", j); printf(" extended eigenvalue: %g\n", extval); if (j == maxj) { strout("WARNING: Maximum number of Lanczos iterations reached.\n"); } } debug_time += lanc_seconds() - time; } if (maxj != 0) { /* Compute (scaled) extended eigenvector. */ time = lanc_seconds(); setvec(y[1], 1, n, 0.0); for (k = 1; k <= j; k++) { scadd(y[1], 1, n, v[k], q[k]); } evec_time += lanc_seconds() - time; /* Note: assign() will scale this y vector back to x (since y = Dx) */ /* Compute and check residual directly. */ if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) { time = lanc_seconds(); checkeig_ext(workn, u, A, y[1], n, extval, vwsqrt, gvec, eigtol, TRUE); debug_time += lanc_seconds() - time; } } /* free up memory */ time = lanc_seconds(); frvec(u, 1); frvec(r, 1); frvec(workn, 1); frvec(Ares, 0); sfree(index); frvec(alpha, 1); frvec(beta, 0); frvec(ritz, 1); frvec(s, 1); frvec(bj, 1); frvec(workj, 0); for (i = 0; i <= j; i++) { frvec(q[i], 1); } sfree(q); while (scanlist != NULL) { curlnk = scanlist->pntr; sfree(scanlist); scanlist = curlnk; } for (i = 1; i <= maxngood; i++) { frvec((solist[i])->vec, 1); sfree(solist[i]); } sfree(solist); frvec(extvec, 1); frvec(v, 1); frvec(work1, 1); frvec(work2, 1); init_time += lanc_seconds() - time; if (maxj == 0) return (1); /* see note on beta[0] and maxj above */ else return (0); }