Пример #1
0
void crf1dc_beta_score(crf1d_context_t* ctx)
{
    int i, t;
    floatval_t *cur = NULL;
    floatval_t *row = ctx->row;
    const floatval_t *next = NULL, *state = NULL, *trans = NULL;
    const int T = ctx->num_items;
    const int L = ctx->num_labels;
    const floatval_t *scale = &ctx->scale_factor[T-1];

    /* Compute the beta scores at (T-1, *). */
    cur = BETA_SCORE(ctx, T-1);
    vecset(cur, *scale, L);
    --scale;

    /* Compute the beta scores at (t, *). */
    for (t = T-2;0 <= t;--t) {
        cur = BETA_SCORE(ctx, t);
        next = BETA_SCORE(ctx, t+1);
        state = EXP_STATE_SCORE(ctx, t+1);

        veccopy(row, next, L);
        vecmul(row, state, L);

        /* Compute the beta score at (t, i). */
        for (i = 0;i < L;++i) {
            trans = EXP_TRANS_SCORE(ctx, i);
            cur[i] = vecdot(trans, row, L);
        }
        vecscale(cur, *scale, L);
        --scale;
    }
}
Пример #2
0
static t_vector	calcul_normal2(t_env *rt, t_figure object, t_vector light_ray,
	t_vector n)
{
	t_vector	tmp;
	t_vector	tmp2;

	if (object.name == TRIANGLE || object.name == QUADRILATERAL
		|| object.name == CUBE)
	{
		tmp = vecsub(&object.a, &object.b);
		tmp2 = vecsub(&object.a, &object.c);
		normalize(&tmp);
		normalize(&tmp2);
		n = vecprod(&tmp, &tmp2);
	}
	if (object.name == ELLIPSOIDE)
		vecscale(&n, 0.5);
	if (object.name == TORUS)
		n = normal_torus(rt->inter, object);
	if (object.name == PARABOL)
		n = normale_parab(rt->inter);
	rt->angle = vecdot(&n, &light_ray) / (sqrt(light_ray.x * light_ray.x
		+ light_ray.y * light_ray.y + light_ray.z * light_ray.z) * sqrt(n.x
		* n.x + n.y * n.y + n.z * n.z));
	return (n);
}
Пример #3
0
void crf1dc_partial_marginals(crf1d_context_t *ctx, int *mask)
{
    int i, j, t;
    int *prev_mask, *curr_mask;
    const int T = ctx->num_items;
    const int L = ctx->num_labels;

    /*
        Compute the model expectations of states.
            p(t,i) = fwd[t][i] * bwd[t][i] / norm
                   = (1. / C[t]) * fwd'[t][i] * bwd'[t][i]
     */
    for (t = 0;t < T;++t) {
        curr_mask = &mask[t* L];
        floatval_t *fwd = PARTIAL_ALPHA_SCORE(ctx, t);
        floatval_t *bwd = PARTIAL_BETA_SCORE(ctx, t);
        floatval_t *prob = PARTIAL_STATE_MEXP(ctx, t);
        veccopy(prob, fwd, L);
        vecmul(prob, bwd, L);
        vecscale(prob, 1. / ctx->partial_scale_factor[t], L);
    }

    /*
        Compute the model expectations of transitions.
            p(t,i,t+1,j)
                = fwd[t][i] * edge[i][j] * state[t+1][j] * bwd[t+1][j] / norm
                = (fwd'[t][i] / (C[0] ... C[t])) * edge[i][j] * state[t+1][j] * (bwd'[t+1][j] / (C[t+1] ... C[T-1])) * (C[0] * ... * C[T-1])
                = fwd'[t][i] * edge[i][j] * state[t+1][j] * bwd'[t+1][j]
        The model expectation of a transition (i -> j) is the sum of the marginal
        probabilities p(t,i,t+1,j) over t.
     */
    for (t = 0;t < T-1;++t) {
        floatval_t *fwd = PARTIAL_ALPHA_SCORE(ctx, t);
        floatval_t *state = EXP_STATE_SCORE(ctx, t+1);
        floatval_t *bwd = PARTIAL_BETA_SCORE(ctx, t+1);
        floatval_t *row = ctx->row;

        /* row[j] = state[t+1][j] * bwd'[t+1][j] */
        veccopy(row, bwd, L);
        vecmul(row, state, L);

        prev_mask = &mask[t*L];
        curr_mask = &mask[(t+1)*L];

        for (i = 0;i < L;++i) {
          if (prev_mask[i]) {
            floatval_t *edge = EXP_TRANS_SCORE(ctx, i);
            floatval_t *prob = PARTIAL_TRANS_MEXP(ctx, i);
            for (j = 0;j < L;++j) {
              if (curr_mask[j]) {
                prob[j] += fwd[i] * edge[j] * row[j];
                // fprintf(stderr, "%lf\n", fwd[i] * edge[j] * row[j]);
              }
            }
          }
        }
    }

}
Пример #4
0
static inline void addImpulseAtOffset(vec3* vel, vec3* angVel, float invMass, float invInertia, const vec3* offset, const vec3* impulse)
{
    vec3 tmp;
	vecaddscale(vel, vel, impulse, invMass);
    veccross(&tmp, offset, impulse);
	vecscale(&tmp, &tmp, invInertia);
	vecadd(angVel, angVel, &tmp);
}
Пример #5
0
void crf1dc_alpha_score(crf1d_context_t* ctx)
{
    int i, t;
    floatval_t sum, *cur = NULL;
    floatval_t *scale = &ctx->scale_factor[0];
    const floatval_t *prev = NULL, *trans = NULL, *state = NULL;
    const int T = ctx->num_items;
    const int L = ctx->num_labels;

    /* Compute the alpha scores on nodes (0, *).
        alpha[0][j] = state[0][j]
     */
    cur = ALPHA_SCORE(ctx, 0);
    state = EXP_STATE_SCORE(ctx, 0);
    veccopy(cur, state, L);
    sum = vecsum(cur, L);
    *scale = (sum != 0.) ? 1. / sum : 1.;
    vecscale(cur, *scale, L);
    ++scale;

    /* Compute the alpha scores on nodes (t, *).
        alpha[t][j] = state[t][j] * \sum_{i} alpha[t-1][i] * trans[i][j]
     */
    for (t = 1;t < T;++t) {
        prev = ALPHA_SCORE(ctx, t-1);
        cur = ALPHA_SCORE(ctx, t);
        state = EXP_STATE_SCORE(ctx, t);

        veczero(cur, L);
        for (i = 0;i < L;++i) {
            trans = EXP_TRANS_SCORE(ctx, i);
            vecaadd(cur, prev[i], trans, L);
        }
        vecmul(cur, state, L);
        sum = vecsum(cur, L);
        *scale = (sum != 0.) ? 1. / sum : 1.;
        vecscale(cur, *scale, L);
        ++scale;
    }

    /* Compute the logarithm of the normalization factor here.
        norm = 1. / (C[0] * C[1] ... * C[T-1])
        log(norm) = - \sum_{t = 0}^{T-1} log(C[t]).
     */
    ctx->log_norm = -vecsumlog(ctx->scale_factor, T);
}
Пример #6
0
static void doCorrection(Chassis* c, const vec3* worldOffset, const vec3* axis, float requiredVelocityChange)
{
	float denom = computeDenominator(1.f/c->mass, 1.f/c->inertia, worldOffset, axis);
	float correction = requiredVelocityChange / denom;
	vec3 impulse;
	vecscale(&impulse, axis, correction);
	addImpulseAtOffset(&c->vel, &c->angVel, 1.f/c->mass, 1.f/c->inertia, worldOffset, &impulse);
}
Пример #7
0
void matrixRotateByVelocity(mtx* out, const mtx* in, const vec3* angvel, float dt)
{
    quaternion q;
    vecscale(&q.v, angvel, 0.5f*dt);
    q.w = 1.f - 0.5f*vecsizesq(&q.v);
    quaternionRotateVector(&out->v[0].v3, &q, &in->v[0].v3);
    quaternionRotateVector(&out->v[1].v3, &q, &in->v[1].v3);
    quaternionRotateVector(&out->v[2].v3, &q, &in->v[2].v3);
    matrixReNormalise(out);
}
Пример #8
0
static inline float computeDenominator(float invMass, float invInertia, const vec3* offset, const vec3* norm)
{
    // If you apply an impulse of 1.0f in the direction of 'norm'
    // at position specified by 'offset' then the point will change
    // velocity by the amount calculated here
	vec3 cross;
	veccross(&cross, offset, norm);
	vecscale(&cross, &cross, invInertia);
	veccross(&cross, &cross, offset);
	return vecdot(norm, &cross) + invMass;
}
Пример #9
0
void crf1dc_partial_beta_score(crf1d_context_t* ctx, int *mask) 
{
    int i, j, t;
    int *curr_mask, *next_mask;
    floatval_t *cur = NULL;
    floatval_t *row = ctx->row;
    const floatval_t *next = NULL, *state = NULL, *trans = NULL;
    const int T = ctx->num_items;
    const int L = ctx->num_labels;
    const floatval_t *scale = &ctx->partial_scale_factor[T-1];

    /* Compute the beta scores at (T-1, *). */
    cur = PARTIAL_BETA_SCORE(ctx, T-1);
    veczero(cur, L);
    curr_mask = &mask[(T-1)*L];
    for (i = 0; i < L; ++ i) {
      if (curr_mask[i]) {
        cur[i] = *scale;
      }
    }
    --scale;

    /* Compute the beta scores at (t, *). */
    for (t = T-2;0 <= t;--t) {
        cur = PARTIAL_BETA_SCORE(ctx, t);
        next = PARTIAL_BETA_SCORE(ctx, t+1);
        state = EXP_STATE_SCORE(ctx, t+1);
        curr_mask = &mask[t * L];
        next_mask = &mask[(t+1) * L];

        veccopy(row, next, L);
        veczero(cur, L);
        for (i = 0; i < L; ++ i) {
          if (next_mask[i]) {
            row[i] *= state[i];
          } 
        }

        for (j = 0; j < L; ++ j) {
          if (curr_mask[j]) {
            trans = EXP_TRANS_SCORE(ctx, j);
            for (i = 0; i < L; ++ i) {
              if (next_mask[i]) {
                cur[j] += trans[i] * row[i];
              }
            }
          }
        }

        vecscale(cur, *scale, L);
        --scale;
    }
}
Пример #10
0
void crf1dc_marginals(crf1d_context_t* ctx)
{
    int i, j, t;
    const int T = ctx->num_items;
    const int L = ctx->num_labels;

    /*
        Compute the model expectations of states.
            p(t,i) = fwd[t][i] * bwd[t][i] / norm
                   = (1. / C[t]) * fwd'[t][i] * bwd'[t][i]
     */
    for (t = 0;t < T;++t) {
        floatval_t *fwd = ALPHA_SCORE(ctx, t);
        floatval_t *bwd = BETA_SCORE(ctx, t);
        floatval_t *prob = STATE_MEXP(ctx, t);
        veccopy(prob, fwd, L);
        vecmul(prob, bwd, L);
        vecscale(prob, 1. / ctx->scale_factor[t], L);
    }

    /*
        Compute the model expectations of transitions.
            p(t,i,t+1,j)
                = fwd[t][i] * edge[i][j] * state[t+1][j] * bwd[t+1][j] / norm
                = (fwd'[t][i] / (C[0] ... C[t])) * edge[i][j] * state[t+1][j] * (bwd'[t+1][j] / (C[t+1] ... C[T-1])) * (C[0] * ... * C[T-1])
                = fwd'[t][i] * edge[i][j] * state[t+1][j] * bwd'[t+1][j]
        The model expectation of a transition (i -> j) is the sum of the marginal
        probabilities p(t,i,t+1,j) over t.
     */
    for (t = 0;t < T-1;++t) {
        floatval_t *fwd = ALPHA_SCORE(ctx, t);
        floatval_t *state = EXP_STATE_SCORE(ctx, t+1);
        floatval_t *bwd = BETA_SCORE(ctx, t+1);
        floatval_t *row = ctx->row;

        /* row[j] = state[t+1][j] * bwd'[t+1][j] */
        veccopy(row, bwd, L);
        vecmul(row, state, L);

        for (i = 0;i < L;++i) {
            floatval_t *edge = EXP_TRANS_SCORE(ctx, i);
            floatval_t *prob = TRANS_MEXP(ctx, i);
            for (j = 0;j < L;++j) {
                prob[j] += fwd[i] * edge[j] * row[j];
            }
        }
    }
}
Пример #11
0
static void doCorrection3(Chassis* c, const vec3* worldOffset, const vec3* axis, float requiredVelocityChange, float linearRatio)
{
	const float u = 1.f - linearRatio;
	// Add half to the linear
    vec3 tmp;
	vecaddscale(&c->vel, &c->vel, axis, requiredVelocityChange*linearRatio);

	// Add the other half to the angular
	vec3 cross;
	veccross(&cross, worldOffset, axis);
	veccross(&cross, &cross, worldOffset);
	float angularResponse = vecdot(axis, &cross);
	float angVelChange = requiredVelocityChange / angularResponse;

	vec3 impulse;
	vecscale(&impulse, axis, angVelChange*u);
	veccross(&tmp, worldOffset, &impulse);
	vecadd(&c->angVel, &c->angVel, &tmp);
}
Пример #12
0
int lbfgs(
  int n,
  double* x,
  double* pfx,
  lbfgs_evaluate_t evaluate,
  lbfgs_progress_t progress,
  void* instance,
  const lbfgs_parameter_t* _param
) {
  int ret;
  int i, j, k, ls, end, bound, n_evaluate = 0;
  int enalbe_owlqn;
  double step;
  lbfgs_parameter_t param = (_param) ? (*_param) : default_param;
  const int m = param.m;
  double* xp;
  double* g, *gp, *pg = 0;
  double* d, *w, *pf = 0;
  iteration_data_t* lm = 0, *it = 0;
  double ys, yy;
  double xnorm, gnorm, rate, beta;
  double fx;
  line_search_proc_t linesearch = line_search_morethuente;

  callback_data_t cd;
  cd.n = n;
  cd.instance = instance;
  cd.evaluate = evaluate;
  cd.progress = (progress) ? progress : default_lbfgs_progress;

  /* Check the input parameters for errors. */
  if (n <= 0) {
    return LBFGSERR_INVALID_N;
  }
  if (param.epsilon < 0.0) {
    return LBFGSERR_INVALID_EPSILON;
  }
  if (param.past < 0) {
    return LBFGSERR_INVALID_TESTPERIOD;
  }
  if (param.delta < 0.0) {
    return LBFGSERR_INVALID_DELTA;
  }
  if (param.min_step < 0.0) {
    return LBFGSERR_INVALID_MINSTEP;
  }
  if (param.max_step < param.min_step) {
    return LBFGSERR_INVALID_MAXSTEP;
  }
  if (param.ftol < 0.0) {
    return LBFGSERR_INVALID_FTOL;
  }
  if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE ||
      param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
    if (param.wolfe <= param.ftol || 1. <= param.wolfe) {
      return LBFGSERR_INVALID_WOLFE;
    }
  }
  if (param.gtol < 0.0) {
    return LBFGSERR_INVALID_GTOL;
  }
  if (param.xtol < 0.0) {
    return LBFGSERR_INVALID_XTOL;
  }
  if (param.max_linesearch <= 0) {
    return LBFGSERR_INVALID_MAXLINESEARCH;
  }
  if (param.orthantwise_c < 0.0) {
    return LBFGSERR_INVALID_ORTHANTWISE;
  }
  if (param.orthantwise_start < 0 || param.orthantwise_start > n) {
    return LBFGSERR_INVALID_ORTHANTWISE_START;
  }
  if (param.orthantwise_end < 0) {
    param.orthantwise_end = n;
  }
  if (param.orthantwise_end > n) {
    return LBFGSERR_INVALID_ORTHANTWISE_END;
  }

  enalbe_owlqn = (param.orthantwise_c != 0.0);
  if (enalbe_owlqn) {
    switch (param.linesearch) {
    case LBFGS_LINESEARCH_BACKTRACKING_WOLFE:
      linesearch = line_search_backtracking_owlqn;
      break;
    default:
      /* Only the backtracking method is available. */
      return LBFGSERR_INVALID_LINESEARCH;
    }
  } else {
    switch (param.linesearch) {
    case LBFGS_LINESEARCH_MORETHUENTE:
      linesearch = line_search_morethuente;
      break;
    case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO:
    case LBFGS_LINESEARCH_BACKTRACKING_WOLFE:
    case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE:
      linesearch = line_search_backtracking;
      break;
    default:
      return LBFGSERR_INVALID_LINESEARCH;
    }
  }

  /* Allocate working space. */
  xp = vecalloc(n);
  g = vecalloc(n);
  gp = vecalloc(n);
  d = vecalloc(n);
  w = vecalloc(n);

  /* Allocate pseudo gradient. */
  if (enalbe_owlqn) {
    pg = vecalloc(n);
  }

  /* Allocate and initialize the limited memory storage. */
  lm = (iteration_data_t*)xalloc(m * sizeof(iteration_data_t));
  for (i = 0; i < m; i++) {
    it = &lm[i];
    it->alpha = 0.0;
    it->s = vecalloc(n);
    it->y = vecalloc(n);
    it->ys = 0.0;
  }

  /* Allocate an array for storing previous values of the objective function. */
  if (param.past > 0) {
    pf = vecalloc((size_t)param.past);
  }

  fx = cd.evaluate(cd.instance, cd.n, x, g, 0);
  n_evaluate++;

  if (enalbe_owlqn) {
    xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);
    fx += xnorm * param.orthantwise_c;
    owlqn_pseudo_gradient(
      pg, x, g, n,
      param.orthantwise_c, param.orthantwise_start, param.orthantwise_end);
  }

  /* Store the initial value of the objective function. */
  if (pf) {
    pf[0] = fx;
  }

  /**
  * Compute the direction.
  * we assume the initial hessian matrix H_0 as the identity matrix.
  */
  if (!enalbe_owlqn) {
    vecncpy(d, g, n);
  } else {
    vecncpy(d, pg, n);
  }

  /**
  * Make sure that the initial variables are not a minimizer.
  */
  vec2norm(&xnorm, x, n);
  if (!enalbe_owlqn) {
    vec2norm(&gnorm, g, n);
  } else {
    vec2norm(&gnorm, pg, n);
  }
  if (xnorm < 1.0) {
    xnorm = 1.0;
  }
  if (gnorm / xnorm <= param.epsilon) {
    ret = LBFGS_ALREADY_MINIMIZED;
    goto lbfgs_exit;
  }

  /**
  * Compute the initial step:
  * step = 1.0 / ||d||
  */
  vec2norminv(&step, d, n);

  k = 1;
  end = 0;
  for (;;) {
    /* Store the current position and gradient vectors. */
    veccpy(xp, x, n);
    veccpy(gp, g, n);

    /* Search for an optimal step. */
    if (!enalbe_owlqn) {
      ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, &param);
    } else {
      ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, &param);
      owlqn_pseudo_gradient(
        pg, x, g, n,
        param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
      );
    }

    if (ls < 0) {
      /* Revert to the previous point. */
      veccpy(x, xp, n);
      veccpy(g, gp, n);
      ret = ls;
      break;
    }

    n_evaluate += ls;

    /* Compute x and g norms. */
    vec2norm(&xnorm, x, n);
    if (!enalbe_owlqn) {
      vec2norm(&gnorm, g, n);
    } else {
      vec2norm(&gnorm, pg, n);
    }

    /* Report the progress. */
    if ((ret = cd.progress(cd.instance, cd.n, x, g, fx, xnorm, gnorm, step, k, n_evaluate)) != 0) {
      ret = LBFGSERR_CANCELED;
      break;
    }

    /* Convergence test. */
    if (xnorm < 1.0) {
      xnorm = 1.0;
    }
    if (gnorm / xnorm <= param.epsilon) {
      ret = LBFGS_CONVERGENCE;
      break;
    }

    /* Stopping criterion test. */
    if (pf) {
      /* We don't test the stopping criterion while k < past. */
      if (param.past <= k) {
        /* Compute the relative improvement from the past. */
        rate = (pf[k % param.past] - fx) / fx;

        /* The stopping criterion. */
        if (rate < param.delta) {
          ret = LBFGS_CONVERGENCE_DELTA;
          break;
        }
      }

      /* Store the current value of the objective function. */
      pf[k % param.past] = fx;
    }

    if (param.max_iterations != 0 && param.max_iterations < k + 1) {
      ret = LBFGSERR_MAXIMUMITERATION;
      break;
    }

    /**
    * Update s and y:
    * s_{k+1} = x_{k+1} - x_{k} = step * d_{k}
    * y_{k+1} = g_{k+1} - g_{k}
    */
    it = &lm[end];
    vecdiff(it->s, x, xp, n);
    vecdiff(it->y, g, gp, n);

    /**
    * Compute scalars ys and yy:
    * ys = y^t s = 1 / \rho
    * yy = y^t y
    * Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
    */
    vecdot(&ys, it->y, it->s, n);
    vecdot(&yy, it->y, it->y, n);
    it->ys = ys;

    /**
    * Recursive formula to compute d = -(H g).
    * This is described in page 779 of:
    * Jorge Nocedal.
    * Updating Quasi-Newton Matrices with Limited Storage.
    * Mathematics of Computation, Vol. 35, No. 151,
    * pp. 773--782, 1980.
    */
    bound = (m <= k) ? m : k;
    k++;
    end = (end + 1) % m;

    /* Compute the steepest direction. */
    /* Compute the negative of (pseudo) gradient. */
    if (!enalbe_owlqn) {
      vecncpy(d, g, n);
    } else {
      vecncpy(d, pg, n);
    }

    j = end;
    for (i = 0; i < bound; i++) {
      j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */
      it = &lm[j];
      /* \alpha_{j} = \rho_{j} s^{t}_{j} q_{k+1} */
      vecdot(&it->alpha, it->s, d, n);
      it->alpha /= it->ys;
      /* q_{i} = q_{i+1} - \alpha_{i} y_{i} */
      vecadd(d, it->y, -it->alpha, n);
    }

    vecscale(d, ys / yy, n);

    for (i = 0; i < bound; i++) {
      it = &lm[j];
      /* \beta_{j} = \rho_{j} y^t_{j} \gamma_{i} */
      vecdot(&beta, it->y, d, n);
      beta /= it->ys;
      /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j} */
      vecadd(d, it->s, it->alpha - beta, n);
      j = (j + 1) % m; /* if (++j == m) j = 0; */
    }

    /* Constrain the search direction for orthant-wise updates. */
    if (enalbe_owlqn) {
      owlqn_contrain_line_search(d, pg, param.orthantwise_start, param.orthantwise_end);
    }

    /* Now the search direction d is ready. We try step = 1 first. */
    step = 1.0;
  }

lbfgs_exit:
  /* Return the final value of the objective function. */
  if (pfx) {
    *pfx = fx;
  }

  vecfree(pf);
  if (lm != 0) {
    for (i = 0; i < m; i++) {
      vecfree(lm[i].s);
      vecfree(lm[i].y);
    }
    xfree(lm);
  }
  vecfree(pg);
  vecfree(w);
  vecfree(d);
  vecfree(gp);
  vecfree(g);
  vecfree(xp);
  return ret;
}
Пример #13
0
int cg(
  int n,
  double* x,
  double* pfx,
  lbfgs_evaluate_t evaluate,
  lbfgs_progress_t progress,
  void* instance,
  const lbfgs_parameter_t* _param
) {
  static const double RHO = 0.01;
  static const double SIG = 0.5;
  static const double INT = 0.1;
  static const double EXT = 3.0;
  static const double RATIO = 100.0;

  int ret;
  int k, ls_count, ls_success, ls_failed = 0, n_evaluate = 0;
  lbfgs_parameter_t param = (_param) ? (*_param) : default_param;
  double f0, f1, f2 = 0.0, f3, d1, d2, d3, z1, z2 = 0.0, z3, limit, A, B, C;
  double xnorm, gnorm, rate;
  double* df0, *df1, *df2, *s, *x0;
  double* pf = 0;

  if (progress == 0) {
    progress = default_lbfgs_progress;
  }

  if (n <= 0) {
    return LBFGSERR_INVALID_N;
  }
  if (param.epsilon < 0.0) {
    return LBFGSERR_INVALID_EPSILON;
  }
  if (param.past < 0) {
    return LBFGSERR_INVALID_TESTPERIOD;
  }
  if (param.delta < 0.0) {
    return LBFGSERR_INVALID_DELTA;
  }
  if (param.max_linesearch <= 0) {
    return LBFGSERR_INVALID_MAXLINESEARCH;
  }

  df0 = vecalloc(n);
  df1 = vecalloc(n);
  df2 = vecalloc(n);
  s = vecalloc(n);
  x0 = vecalloc(n);

  if (param.past > 0) {
    pf = vecalloc((size_t)param.past);
  }

  f1 = evaluate(instance, n, x, df1, 0);
  n_evaluate++;

  if (pf) {
    pf[0] = f1;
  }

  vec2norm(&xnorm, x, n);
  vec2norm(&gnorm, df1, n);
  if (xnorm < 1.0) {
    xnorm = 1.0;
  }
  if (gnorm / xnorm <= param.epsilon) {
    ret = LBFGS_ALREADY_MINIMIZED;
    goto cg_exit;
  }

  vecncpy(s, df1, n);
  vecdot(&d1, s, s, n);
  d1 = -d1;
  /**
  * Compute the initial step z1:
  */
  z1 = 1.0 / (1.0 - d1);

  k = 1;
  for (;;) {
    /* Store the current position and gradient vectors. */
    f0 = f1;
    veccpy(x0, x, n);
    veccpy(df0, df1, n);

    /* update x using current step: x=x+z1*s */
    vecadd(x, s, z1, n);

    f2 = evaluate(instance, n, x, df2, 0);
    n_evaluate++;

    vecdot(&d2, df2, s, n);
    /* set point 3 equal to point 1 */
    f3 = f1;
    d3 = d1;
    z3 = -z1;

    /* begin line search */
    ls_success = 0;
    ls_count = 0;
    limit = -1.0;
    for (;;) {
      while (f2 > f1 + RHO * z1 * d1 || d2 > -SIG * d1) {
        limit = z1;
        if (f2 > f1) {
          /* quadratic fit */
          z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3);
        } else {
          /* cubic fit */
          A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3);
          B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2);
          z2 = (sqrt(B * B - A * d2 * z3 * z3) - B) / A;
        }

        if (isinf(z2) || isnan(z2)) {
          /* if we had a numerical problem then bisect */
          z2 = z3 / 2.0;
        }

        /* don't accept too close to limits */
        z2 = max2(min2(z2, INT* z3), (1.0 - INT) * z3);
        /* update step and x */
        z1 = z1 + z2;
        vecadd(x, s, z2, n);

        f2 = evaluate(instance, n, x, df2, 0);
        n_evaluate++;
        ls_count++;

        vecdot(&d2, df2, s, n);
        z3 = z3 - z2;
      }

      if (f2 > f1 + z1 * RHO * d1 || d2 > -SIG * d1) {
        /* a line search failure */
        break;
      } else if (d2 > SIG * d1) {
        /* a line search success */
        ls_success = 1;
        break;
      } else if (ls_count >= param.max_linesearch) {
        ret = LBFGSERR_MAXIMUMLINESEARCH;
        goto cg_exit;
      }

      /* cubic extrapolation */
      A = 6.0 * (f2 - f3) / z3 + 3.0 * (d2 + d3);
      B = 3.0 * (f3 - f2) - z3 * (d3 + 2 * d2);
      z2 = -d2 * z3 * z3 / (B + sqrt(B * B - A * d2 * z3 * z3));
      /* adjust current step z2 for many cases */
      if (isnan(z2) || isinf(z2) || z2 < 0.0) {
        if (limit < -0.5) {
          z2 = z1 * (EXT - 1.0);
        } else {
          z2 = (limit - z1) / 2.0;
        }
      } else if (limit > -0.5 && z2 + z1 > limit) {
        z2 = (limit - z1) / 2.0;
      } else if (limit < -0.5 && z2 + z1 > z1 * EXT) {
        z2 = z1 * (EXT - 1.0);
      } else if (z2 < -z3 * INT) {
        z2 = -z3 * INT;
      } else if (limit > -0.5 && z2 < (limit - z1) * (1.0 - INT)) {
        z2 = (limit - z1) * (1.0 - INT);
      }

      /* set point 3 equal to point 2 */
      f3 = f2;
      d3 = d2;
      z3 = -z2;

      z1 = z1 + z2;
      vecadd(x, s, z2, n);

      f2 = evaluate(instance, n, x, df2, 0);
      n_evaluate++;
      ls_count++;

      vecdot(&d2, df2, s, n);
    }

    if (ls_success) {
      vec2norm(&xnorm, x, n);
      vec2norm(&gnorm, df2, n);
      if ((ret = progress(instance, n, x, df2, f2, xnorm, gnorm, z2, k, n_evaluate)) != 0) {
        ret = LBFGSERR_CANCELED;
        break;
      }
      if (xnorm < 1.0) {
        xnorm = 1.0;
      }
      if (gnorm / xnorm <= param.epsilon) {
        ret = LBFGS_CONVERGENCE;
        break;
      }

      if (pf) {
        if (param.past <= k) {
          rate = (pf[k % param.past] - f2) / f2;
          if (rate < param.delta) {
            ret = LBFGS_CONVERGENCE_DELTA;
            break;
          }
        }
        pf[k % param.past] = f2;
      }

      if (param.max_iterations != 0 && param.max_iterations < k + 1) {
        ret = LBFGSERR_MAXIMUMITERATION;
        break;
      }
      k++;


      f1 = f2;
      /**
      * Polack-Ribiere direction
      * s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2
      */
      vecdot(&A, df2, df2, n);
      vecdot(&B, df1, df2, n);
      vecdot(&C, df1, df1, n);
      vecscale(s, (A - B) / C, n);
      vecadd(s, df2, -1.0, n);

      vecswap(df1, df2, n);
      vecdot(&d2, df1, s, n);

      if (d2 > 0) {
        vecncpy(s, df1, n);
        vecdot(&d2, s, s, n);
        d2 = -d2;
      }

      z1 = z1 * min2(RATIO, d1 / (d2 - DBL_MIN));
      d1 = d2;
      ls_failed = 0;
    } else {
      /* restore previous point */
      f1 = f0;
      veccpy(x, x0, n);
      veccpy(df1, df0, n);

      if (ls_failed) {
        /* line search failed twice */
        ret = LBFGSERR_LINE_SEARCH_FAILED;
        break;
      }

      vecswap(df1, df2, n);
      vecncpy(s, df1, n);/* try steepest */
      vecdot(&d1, s, s, n);
      d1 = -d1;
      z1 = 1.0 / (1.0 - d1);
      ls_failed = 1;
    }
  }

cg_exit:
  if (pfx) {
    *pfx = f2;
  }

  vecfree(pf);
  vecfree(x0);
  vecfree(s);
  vecfree(df2);
  vecfree(df1);
  vecfree(df0);
  return ret;
}
Пример #14
0
void 
lanczos_FO (
    struct vtx_data **A,		/* graph data structure */
    int n,			/* number of rows/colums in matrix */
    int d,			/* problem dimension = # evecs to find */
    double **y,			/* columns of y are eigenvectors of A  */
    double *lambda,		/* ritz approximation to eigenvals of A */
    double *bound,		/* on ritz pair approximations to eig pairs of A */
    double eigtol,		/* tolerance on eigenvectors */
    double *vwsqrt,		/* square root of vertex weights */
    double maxdeg,               /* maximum degree of graph */
    int version		/* 1 = standard mode, 2 = inverse operator mode */
)

{
    extern FILE *Output_File;	/* output file or NULL */
    extern int DEBUG_EVECS;	/* print debugging output? */
    extern int DEBUG_TRACE;	/* trace main execution path */
    extern int WARNING_EVECS;	/* print warning messages? */
    extern int LANCZOS_MAXITNS;         /* maximum Lanczos iterations allowed */
    extern double BISECTION_SAFETY;	/* safety factor for bisection algorithm */
    extern double SRESTOL;		/* resid tol for T evec comp */
    extern double DOUBLE_MAX;	/* Warning on inaccurate computation of evec of T */
    extern double splarax_time;	/* time matvecs */
    extern double orthog_time;	/* time orthogonalization work */
    extern double tevec_time;	/* time tridiagonal eigvec work */
    extern double evec_time;	/* time to generate eigenvectors */
    extern double ql_time;      /* time tridiagonal eigval work */
    extern double blas_time;	/* time for blas (not assembly coded) */
    extern double init_time;	/* time for allocating memory, etc. */
    extern double scan_time;	/* time for scanning bounds list */
    extern double debug_time;	/* time for debug computations and output */
    int       i, j;		/* indicies */
    int       maxj;		/* maximum number of Lanczos iterations */
    double   *u, *r;		/* Lanczos vectors */
    double   *Aq;		/* sparse matrix-vector product vector */
    double   *alpha, *beta;	/* the Lanczos scalars from each step */
    double   *ritz;		/* copy of alpha for tqli */
    double   *workj;		/* work vector (eg. for tqli) */
    double   *workn;		/* work vector (eg. for checkeig) */
    double   *s;		/* eigenvector of T */
    double  **q;		/* columns of q = Lanczos basis vectors */
    double   *bj;		/* beta(j)*(last element of evecs of T) */
    double    bis_safety;	/* real safety factor for bisection algorithm */
    double    Sres;		/* how well Tevec calculated eigvecs */
    double    Sres_max;		/* Maximum value of Sres */
    int       inc_bis_safety;	/* need to increase bisection safety */
    double   *Ares;		/* how well Lanczos calculated each eigpair */
    double   *inv_lambda;	/* eigenvalues of inverse operator */
    int      *index;		/* the Ritz index of an eigenpair */
    struct orthlink *orthlist  = NULL;	/* vectors to orthogonalize against in Lanczos */
    struct orthlink *orthlist2 = NULL;	/* vectors to orthogonalize against in Symmlq */
    struct orthlink *temp;	/* for expanding orthogonalization list */
    double   *ritzvec=NULL;	/* ritz vector for current iteration */
    double   *zeros=NULL;	/* vector of all zeros */
    double   *ones=NULL;	/* vector of all ones */
    struct scanlink *scanlist;	/* list of fields for min ritz vals */
    struct scanlink *curlnk;	/* for traversing the scanlist */
    double    bji_tol;		/* tol on bji estimate of A e-residual */
    int       converged;	/* has the iteration converged? */
    double    time;		/* current clock time */
    double    shift, rtol;		/* symmlq input */
    long      precon, goodb, nout;	/* symmlq input */
    long      checka, intlim;	/* symmlq input */
    double    anorm, acond;	/* symmlq output */
    double    rnorm, ynorm;	/* symmlq output */
    long      istop, itn;	/* symmlq output */
    double    macheps;		/* machine precision calculated by symmlq */
    double    normxlim;		/* a stopping criteria for symmlq */
    long      itnmin;		/* enforce minimum number of iterations */
    int       symmlqitns;	/* # symmlq itns */
    double   *wv1=NULL, *wv2=NULL, *wv3=NULL;	/* Symmlq work space */
    double   *wv4=NULL, *wv5=NULL, *wv6=NULL;	/* Symmlq work space */
    long      long_n;		/* long int copy of n for symmlq */
    int       ritzval_flag = 0;	/* status flag for ql() */
    double    Anorm;            /* Norm estimate of the Laplacian matrix */
    int       left, right;      /* ranges on the search for ritzvals */
    int       memory_ok;        /* TRUE as long as don't run out of memory */

    double   *mkvec();		/* allocates space for a vector */
    double   *mkvec_ret();      /* mkvec() which returns error code */
    double    dot();		/* standard dot product routine */
    struct orthlink *makeorthlnk();	/* make space for entry in orthog. set */
    double    ch_norm();		/* vector norm */
    double    Tevec();		/* calc evec of T by linear recurrence */
    struct scanlink *mkscanlist();	/* make scan list for min ritz vecs */
    double    lanc_seconds();	/* current clock timer */
    int       symmlq_(), get_ritzvals();
    void      setvec(), vecscale(), update(), vecran(), strout();
    void      splarax(), scanmin(), scanmax(), frvec(), orthogonalize();
    void      orthog1(), orthogvec(), bail(), warnings(), mkeigvecs();

    if (DEBUG_TRACE > 0) {
        printf("<Entering lanczos_FO>\n");
    }

    if (DEBUG_EVECS > 0) {
	if (version == 1) {
    	    printf("Full orthogonalization Lanczos, matrix size = %d\n", n);
	}
	else {
    	    printf("Full orthogonalization Lanczos, inverted operator, matrix size = %d\n", n);
	}
    }

    /* Initialize time. */
    time = lanc_seconds();

    if (n < d + 1) {
	bail("ERROR: System too small for number of eigenvalues requested.",1);
	/* d+1 since don't use zero eigenvalue pair */
    }

    /* Allocate Lanczos space. */
    maxj = LANCZOS_MAXITNS;
    u = mkvec(1, n);
    r = mkvec(1, n);
    Aq = mkvec(1, n);
    ritzvec = mkvec(1, n);
    zeros = mkvec(1, n);
    setvec(zeros, 1, n, 0.0);
    workn = mkvec(1, n);
    Ares = mkvec(1, d);
    inv_lambda = mkvec(1, d);
    index = smalloc((d + 1) * sizeof(int));
    alpha = mkvec(1, maxj);
    beta = mkvec(1, maxj + 1);
    ritz = mkvec(1, maxj);
    s = mkvec(1, maxj);
    bj = mkvec(1, maxj);
    workj = mkvec(1, maxj + 1);
    q = smalloc((maxj + 1) * sizeof(double *));
    scanlist = mkscanlist(d);

    if (version == 2) {
        /* Allocate Symmlq space all in one chunk. */
        wv1 = smalloc(6 * (n + 1) * sizeof(double));
        wv2 = &wv1[(n + 1)];
        wv3 = &wv1[2 * (n + 1)];
        wv4 = &wv1[3 * (n + 1)];
        wv5 = &wv1[4 * (n + 1)];
        wv6 = &wv1[5 * (n + 1)];

        /* Set invariant symmlq parameters */
        precon = FALSE;		/* FALSE until we figure out a good way */
        goodb = FALSE;		/* should be FALSE for this application */
        checka = FALSE;		/* if don't know by now, too bad */
        intlim = n;			/* set to enforce a maximum number of Symmlq itns */
        itnmin = 0;			/* set to enforce a minimum number of Symmlq itns */
        shift = 0.0;		/* since just solving rather than doing RQI */
        symmlqitns = 0;		/* total number of Symmlq iterations */
        nout = 0;			/* Effectively disabled - see notes in symmlq.f */
        rtol = 1.0e-5;		/* requested residual tolerance */
        normxlim = DOUBLE_MAX;	/* Effectively disables ||x|| termination criterion */
        long_n = n;			/* copy to long for linting */
    }

    /* Initialize. */
    vecran(r, 1, n);
    if (vwsqrt == NULL) {
	/* whack one's direction from initial vector */
	orthog1(r, 1, n);

	/* list the ones direction for later use in Symmlq */
	if (version == 2) {
	    orthlist2 = makeorthlnk();
	    ones = mkvec(1, n);
	    setvec(ones, 1, n, 1.0);
	    orthlist2->vec = ones;
	    orthlist2->pntr = NULL;
	}
    }
    else {
	/* whack vwsqrt direction from initial vector */
	orthogvec(r, 1, n, vwsqrt);

	if (version == 2) {
	    /* list the vwsqrt direction for later use in Symmlq */
	    orthlist2 = makeorthlnk();
	    orthlist2->vec = vwsqrt;
	    orthlist2->pntr = NULL;
	}
    }
    beta[1] = ch_norm(r, 1, n);
    q[0] = zeros;
    bji_tol = eigtol;
    orthlist = NULL;
    Sres_max = 0.0;
    Anorm = 2 * maxdeg;                         /* Gershgorin estimate for ||A|| */
    bis_safety = BISECTION_SAFETY;
    inc_bis_safety = FALSE;
    init_time += lanc_seconds() - time;

    /* Main Lanczos loop. */
    j = 1;
    converged = FALSE;
    memory_ok = TRUE;
    while ((j <= maxj) && (converged == FALSE) && memory_ok) {
	time = lanc_seconds();

	/* Allocate next Lanczos vector. If fail, back up one step and compute approx. eigvec. */
	q[j] = mkvec_ret(1, n);
        if (q[j] == NULL) {
	    memory_ok = FALSE;
  	    if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
                strout("WARNING: Lanczos out of memory; computing best approximation available.\n");
            }
	    if (j <= 2) {
	        bail("ERROR: Sorry, can't salvage Lanczos.",1); 
  	        /* ... save yourselves, men.  */
	    }
            j--;
	}

	vecscale(q[j], 1, n, 1.0 / beta[j], r);
	blas_time += lanc_seconds() - time;
	time = lanc_seconds();
	if (version == 1) {
            splarax(Aq, A, n, q[j], vwsqrt, workn);
	}
	else {
	    symmlq_(&long_n, &(q[j][1]), &wv1[1], &wv2[1], &wv3[1], &wv4[1], &Aq[1], &wv5[1],
		&wv6[1], &checka, &goodb, &precon, &shift, &nout,
		&intlim, &rtol, &istop, &itn, &anorm, &acond,
		&rnorm, &ynorm, (double *) A, vwsqrt, (double *) orthlist2,
		&macheps, &normxlim, &itnmin);
	    symmlqitns += itn;
	    if (DEBUG_EVECS > 2) {
	        printf("Symmlq report:      rtol %g\n", rtol);
	        printf("  system norm %g, solution norm %g\n", anorm, ynorm);
	        printf("  system condition %g, residual %g\n", acond, rnorm);
	        printf("  termination condition %2ld, iterations %3ld\n", istop, itn);
	    }
	}
	splarax_time += lanc_seconds() - time;
	time = lanc_seconds();
	update(u, 1, n, Aq, -beta[j], q[j - 1]);
	alpha[j] = dot(u, 1, n, q[j]);
	update(r, 1, n, u, -alpha[j], q[j]);
	blas_time += lanc_seconds() - time;
	time = lanc_seconds();
	if (vwsqrt == NULL) {
	    orthog1(r, 1, n);
	}
	else {
	    orthogvec(r, 1, n, vwsqrt);
	}
	orthogonalize(r, n, orthlist);
	temp = orthlist;
	orthlist = makeorthlnk();
	orthlist->vec = q[j];
	orthlist->pntr = temp;
	beta[j + 1] = ch_norm(r, 1, n);
	orthog_time += lanc_seconds() - time;

	time = lanc_seconds();
	left = j/2;
	right = j - left + 1;
	if (inc_bis_safety) {
	    bis_safety *= 10;
	    inc_bis_safety = FALSE;
	}
	ritzval_flag = get_ritzvals(alpha, beta+1, j, Anorm, workj+1, 
                                    ritz, d, left, right, eigtol, bis_safety);
        /* ... have to off-set beta and workj since full orthogonalization
               indexes these from 1 to maxj+1 whereas selective orthog.
               indexes them from 0 to maxj */ 

	if (ritzval_flag != 0) {
            bail("ERROR: Both Sturm bisection and QL failed.",1);
	    /* ... give up. */
 	}
        ql_time += lanc_seconds() - time;

	/* Convergence check using Paige bji estimates. */
	time = lanc_seconds();
	for (i = 1; i <= j; i++) {
	    Sres = Tevec(alpha, beta, j, ritz[i], s);
	    if (Sres > Sres_max) {
		Sres_max = Sres;
	    }
	    if (Sres > SRESTOL) {
		inc_bis_safety = TRUE;
	    }
	    bj[i] = s[j] * beta[j + 1];
	}
	tevec_time += lanc_seconds() - time;


	time = lanc_seconds();
	if (version == 1) {
	    scanmin(ritz, 1, j, &scanlist);
	}
	else {
	    scanmax(ritz, 1, j, &scanlist);
	}
	converged = TRUE;
	if (j < d)
	    converged = FALSE;
	else {
	    curlnk = scanlist;
	    while (curlnk != NULL) {
		if (bj[curlnk->indx] > bji_tol) {
		    converged = FALSE;
		}
		curlnk = curlnk->pntr;
	    }
	}
	scan_time += lanc_seconds() - time;
	j++;
    }
    j--;

    /* Collect eigenvalue and bound information. */
    time = lanc_seconds();
    mkeigvecs(scanlist,lambda,bound,index,bj,d,&Sres_max,alpha,beta+1,j,s,y,n,q);
    evec_time += lanc_seconds() - time;

    /* Analyze computation for and report additional problems */
    time = lanc_seconds();
    if (DEBUG_EVECS>0 && version == 2) {
	printf("\nTotal Symmlq iterations %3d\n", symmlqitns);
    }
    if (version == 2) {
        for (i = 1; i <= d; i++) {
	    lambda[i] = 1.0/lambda[i];
	}
    }
    warnings(workn, A, y, n, lambda, vwsqrt, Ares, bound, index,
             d, j, maxj, Sres_max, eigtol, u, Anorm, Output_File);
    debug_time += lanc_seconds() - time;

    /* Free any memory allocated in this routine. */
    time = lanc_seconds();
    frvec(u, 1);
    frvec(r, 1);
    frvec(Aq, 1);
    frvec(ritzvec, 1);
    frvec(zeros, 1);
    if (vwsqrt == NULL && version == 2) {
	frvec(ones, 1);
    }
    frvec(workn, 1);
    frvec(Ares, 1);
    frvec(inv_lambda, 1);
    sfree(index);
    frvec(alpha, 1);
    frvec(beta, 1);
    frvec(ritz, 1);
    frvec(s, 1);
    frvec(bj, 1);
    frvec(workj, 1);
    if (version == 2) {
	frvec(wv1, 0);
    }
    while (scanlist != NULL) {
	curlnk = scanlist->pntr;
	sfree(scanlist);
	scanlist = curlnk;
    }
    for (i = 1; i <= j; i++) {
	frvec(q[i], 1);
    }
    while (orthlist != NULL) {
	temp = orthlist->pntr;
	sfree(orthlist);
	orthlist = temp;
    }
    while (version == 2 && orthlist2 != NULL) {
	temp = orthlist2->pntr;
	sfree(orthlist2);
	orthlist2 = temp;
    }
    sfree(q);
    init_time += lanc_seconds() - time;
}
Пример #15
0
void crf1dc_partial_alpha_score(crf1d_context_t* ctx, int *mask)
{
    int i, j, t;
    int *prev_mask, *curr_mask;
    floatval_t sum, *cur = NULL;
    floatval_t *scale = &ctx->partial_scale_factor[0];
    const floatval_t *prev = NULL, *trans = NULL, *state = NULL;
    const int T = ctx->num_items;
    const int L = ctx->num_labels;

    /* Compute the alpha scores on nodes (0, *).
        alpha[0][j] = state[0][j]
     */
    cur = PARTIAL_ALPHA_SCORE(ctx, 0);
    veczero(cur, L);
    state = EXP_STATE_SCORE(ctx, 0);
    curr_mask = &mask[0];
    for (i = 0; i < L; ++ i) {
      if (curr_mask[i]) {
        cur[i] = state[i];
      }
    }

    sum = vecsum(cur, L);
    /* scale is a temporary structure */
    *scale = (sum != 0.) ? 1. / sum : 1.;
    vecscale(cur, *scale, L);
    ++scale;

    /* Compute the alpha scores on nodes (t, *).
        alpha[t][j] = state[t][j] * \sum_{i} alpha[t-1][i] * trans[i][j]
     */
    for (t = 1;t < T;++t) {
        prev = PARTIAL_ALPHA_SCORE(ctx, t-1);
        cur = PARTIAL_ALPHA_SCORE(ctx, t);
        state = EXP_STATE_SCORE(ctx, t);
        prev_mask = &mask[(t-1) * L];
        curr_mask = &mask[t * L];

        veczero(cur, L);
        for (i = 0; i < L; ++ i) {
          if (prev_mask[i]) {
            trans = EXP_TRANS_SCORE(ctx, i);
            for (j = 0; j < L; ++ j) {
              if (curr_mask[j]) {
                cur[j] += prev[i] * trans[j];
              }
            }
          }
        }

        for (j = 0; j < L; ++ j) {
          if (curr_mask[j]) {
            cur[j] *= state[j];
          }
        }

        sum = vecsum(cur, L);
        *scale = (sum != 0.) ? 1. / sum : 1.;
        vecscale(cur, *scale, L);
        ++scale;
    }

    /* Compute the logarithm of the normalization factor here.
        norm = 1. / (C[0] * C[1] ... * C[T-1])
        log(norm) = - \sum_{t = 0}^{T-1} log(C[t]).
     */
    ctx->partial_log_norm = -vecsumlog(ctx->partial_scale_factor, T);

}
Пример #16
0
static int l2sgd(
    encoder_t *gm,
    dataset_t *trainset,
    dataset_t *testset,
    floatval_t *w,
    logging_t *lg,
    const int N,
    const floatval_t t0,
    const floatval_t lambda,
    const int num_epochs,
    int calibration,
    int period,
    const floatval_t epsilon,
    floatval_t *ptr_loss
    )
{
    int i, epoch, ret = 0;
    floatval_t t = 0;
    floatval_t loss = 0, sum_loss = 0;
    floatval_t best_sum_loss = DBL_MAX;
    floatval_t eta, gain, decay = 1.;
    floatval_t improvement = 0.;
    floatval_t norm2 = 0.;
    floatval_t *pf = NULL;
    floatval_t *best_w = NULL;
    clock_t clk_prev, clk_begin = clock();
    const int K = gm->num_features;

    if (!calibration) {
        pf = (floatval_t*)malloc(sizeof(floatval_t) * period);
        best_w = (floatval_t*)calloc(K, sizeof(floatval_t));
        if (pf == NULL || best_w == NULL) {
            ret = CRFSUITEERR_OUTOFMEMORY;
            goto error_exit;
        }
    }

    /* Initialize the feature weights. */
    vecset(w, 0, K);

    /* Loop for epochs. */
    for (epoch = 1;epoch <= num_epochs;++epoch) {
        clk_prev = clock();

        if (!calibration) {
            logging(lg, "***** Epoch #%d *****\n", epoch);
            /* Shuffle the training instances. */
            dataset_shuffle(trainset);
        }

        /* Loop for instances. */
        sum_loss = 0.;
        for (i = 0;i < N;++i) {
            const crfsuite_instance_t *inst = dataset_get(trainset, i);

            /* Update various factors. */
            eta = 1 / (lambda * (t0 + t));
            decay *= (1.0 - eta * lambda);
            gain = eta / decay;

            /* Compute the loss and gradients for the instance. */
            gm->set_weights(gm, w, decay);
            gm->set_instance(gm, inst);
            gm->objective_and_gradients(gm, &loss, w, gain);

            sum_loss += loss;
            ++t;
        }

        /* Terminate when the loss is abnormal (NaN, -Inf, +Inf). */
        if (!isfinite(loss)) {
            logging(lg, "ERROR: overflow loss\n");
            ret = CRFSUITEERR_OVERFLOW;
            sum_loss = loss;
            goto error_exit;
        }

        /* Scale the feature weights. */
        vecscale(w, decay, K);
        decay = 1.;

        /* Include the L2 norm of feature weights to the objective. */
        /* The factor N is necessary because lambda = 2 * C / N. */
        norm2 = vecdot(w, w, K);
        sum_loss += 0.5 * lambda * norm2 * N;

        /* One epoch finished. */
        if (!calibration) {
            /* Check if the current epoch is the best. */
            if (sum_loss < best_sum_loss) {
                /* Store the feature weights to best_w. */
                best_sum_loss = sum_loss;
                veccopy(best_w, w, K);
            }

            /* We don't test the stopping criterion while period < epoch. */
            if (period < epoch) {
                improvement = (pf[(epoch-1) % period] - sum_loss) / sum_loss;
            } else {
                improvement = epsilon;
            }

            /* Store the current value of the objective function. */
            pf[(epoch-1) % period] = sum_loss;

            logging(lg, "Loss: %f\n", sum_loss);
            if (period < epoch) {
                logging(lg, "Improvement ratio: %f\n", improvement);
            }
            logging(lg, "Feature L2-norm: %f\n", sqrt(norm2));
            logging(lg, "Learning rate (eta): %f\n", eta);
            logging(lg, "Total number of feature updates: %.0f\n", t);
            logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - clk_prev) / (double)CLOCKS_PER_SEC);

            /* Holdout evaluation if necessary. */
            if (testset != NULL) {
                holdout_evaluation(gm, testset, w, lg);
            }
            logging(lg, "\n");

            /* Check for the stopping criterion. */
            if (improvement < epsilon) {
                ret = 0;
                break;
            }
        }
    }

    /* Output the optimization result. */
    if (!calibration) {
        if (ret == 0) {
            if (epoch < num_epochs) {
                logging(lg, "SGD terminated with the stopping criteria\n");
            } else {
                logging(lg, "SGD terminated with the maximum number of iterations\n");
            }
        } else {
            logging(lg, "SGD terminated with error code (%d)\n", ret);
        }
    }

    /* Restore the best weights. */
    if (best_w != NULL) {
        sum_loss = best_sum_loss;
        veccopy(w, best_w, K);
    }

error_exit:
    free(best_w);
    free(pf);
    if (ptr_loss != NULL) {
        *ptr_loss = sum_loss;
    }
    return ret;
}
Пример #17
0
int main (int argc, char **argv)
{
#if 0
	
	const int N = 4;
	float y[N] = {-0.653828, -0.653828, 0.753333, 0.753333};
	float k[N];

	float l[2] = {0.f, 0.f};
	float kdl[2] = {0.f, 0.f};
	float n[2] = {0.f, 0.f};

	for (int i=0; i<N; i++)
	{
		if (y[i] > 0.f)
		{
			l[1] += y[i];
			n[1] += 1.f;
		}
		else
		{
			l[0] -= y[i];
			n[0] += 1.f;
		}
	}

	kdl[1] = l[1] / (n[1]*l[0] + n[0]*l[1]);
	kdl[0] = l[0] / (n[1]*l[0] + n[0]*l[1]);

	for (int i=0; i<2; i++)
	{
		printf("[%d] l = %f kdl = %f\n", i, l[i], kdl[i]);
	}

	for (int i=0; i<N; i++)
	{
		k[i] = y[i] > 0.f ? kdl[1] : kdl[0];
	}


	float force = 0.f;
	float torque = 0.f;
	for (int i=0; i<N; i++)
	{
		force += k[i]; 
		torque += y[i] * k[i];
	}

	printf("force = %f\n", force);
	printf("torque = %f\n", torque);

	printf("kdl[0]/kdl[1] = %f\n", kdl[0]/kdl[1]);



	return 0;

#endif

#if 0
	vec3 r = {1.f, -0.3f, 0.f};
	vec3 fground = {0.f, 1.f, 0.f};
	vec3 fcent = {-1.f, 0.f, 0.f};

	vec3 tground;
	vec3 tcent;
	veccross(&tground, &r, &fground);
	veccross(&tcent, &r, &fcent);

	printf("tground = %f\n", tground.z);
	printf("tcent = %f\n", tcent.z);

	return 0;
	float axleFriction = 200.1f;
	float mass = 10.f;
	float invMass = 1.f/mass;
	float v = 10.0f;
	float dt = 0.01;

	for (int r=0; r<100; r++)
	{
		float force = -axleFriction*sgn(v);
		v = v + force*invMass * dt;
		printf("v = %f\n", v);
	}

	return 0;
#endif

#if 0
	float mass = 10.f;
	float wheelmass = 1.0f;
	float radius = 0.1f;
	float wheelInertia = 2.f/5.f*radius*radius*wheelmass;
	float vel = 0.f;
	float wheelVel= 0.f;

	float dt = 0.01f;
	float torque = 1000.f;
	float angSpeed = -dt*torque*radius/wheelInertia; 

	float momentum = angSpeed*wheelInertia;
	printf("momentum put in = %f \n", momentum);

	for (int repeat = 0; repeat<10; repeat++)
	{
		{
			float contactSpeed = radius * angSpeed + wheelVel;
			float error = contactSpeed;
			float denom = 1.f/wheelmass + radius*radius/wheelInertia;
			float impulse = error / denom;

			// Add impulse to the wheel
			wheelVel = wheelVel - impulse/wheelmass;
			angSpeed = angSpeed - radius*impulse/wheelInertia;
		}

		// Axis error
		{
			float error = wheelVel - vel;
			float denom = 1.f/wheelmass + 1.f/mass;
			float impulse = error/denom;
			wheelVel = wheelVel - impulse/wheelmass;
			vel = vel + impulse/mass;
		}
	}
	
	printf("momentum c = %f\n", vel*mass);
	printf("momentum w = %f\n", vel*wheelmass);
	printf("momentum aw = %f\n", angSpeed*wheelInertia);
	printf("total momentum = %f\n", vel*(mass+wheelmass) + angSpeed*wheelInertia);
	printf("chassis = %f, wheel = %f\n", vel, wheelVel);

	return 0;
#endif

#if 0
	float mass = 10.f;
	float inertia = mass * 0.4f;
	vec3 wheelOffset = {0.f, 1.5f, -0.2f};
	float wheelmass = 1.0f;
	float radius = 0.1f;
	float wheelInertia = 2.f/5.f*radius*radius*wheelmass;
	vec3 vel = {0.f, 0.f, 0.f};
	vec3 w = {0.f, 0.f, 0.f};
	float wheelVel= 0.f;

	float dt = 0.01f;
	float torque = 1000.f;
	float angSpeed = -dt*torque*radius/wheelInertia; 

	for (int repeat = 0; repeat<100; repeat++)
	{
		{
			float contactSpeed = radius * angSpeed + wheelVel;
			float error = contactSpeed;
			float denom = 1.f/wheelmass + radius*radius/wheelInertia;
			float impulse = error / denom;

			// Add impulse to the wheel
			wheelVel = wheelVel - impulse/wheelmass;
			angSpeed = angSpeed - radius*impulse/wheelInertia;
		}

		// Axis error
		{
			// float axleVel = vel;
			vec3 cross;
			veccross(&cross, &w, &wheelOffset);
			float axleVel = vel.y + cross.y;

			vec3 pulldir = {0.f, 1.f, 0.f};
			float error = wheelVel - axleVel;
			if (error < 0.000001f) break;
			float denom = 1.f/wheelmass + computeDenominator(1.f/mass, 1.f/inertia, &wheelOffset, &pulldir);
			float impulse = error/denom;
			wheelVel = wheelVel - impulse/wheelmass;
			//vel = vel + impulse/mass;
			{
				vecscale(&pulldir, &pulldir, impulse);
				addImpulseAtOffset(&vel, &w, 1.f/mass, 1.f/inertia, &wheelOffset, &pulldir);
			}
		}
	}

	printf("wheelVel = %f, vel = %f, w = %f\n", wheelVel, vel.y, w.x);
	printf("%f\n", w.x/vel.y);

	// Simple force application!
	{
		wheelVel = 0.f;
		vec3 impulse = {0.f, dt * torque / radius, 0.f};
		veczero(&vel);
		veczero(&w);
		vec3 offset = wheelOffset;
		//offset.z -= radius;
		printf("wheelVel = %f, vel = %f, w = %f\n", wheelVel, vel.y, w.x);
		addImpulseAtOffset(&vel, &w, 1.f/mass, 1.f/inertia, &offset, &impulse);
	}

	printf("wheelVel = %f, vel = %f, w = %f\n", wheelVel, vel.y, w.x);

	printf("%f\n", w.x/vel.y);

	return 0;
#endif

#if 0
	float x = 100.f;
	float friction = 20.f;
	float dt = 0.01f;
	float r = dt*friction;
	int n=0;

	while (n<1000)
	{
		//x = x - r*x/(fabsf(x)+r);
		printf("%f\n", x);
		n++;
	}


	return 0;
#endif

	timerUpdate(&g_time);
    vehicleInit();

    // GLUT Window Initialization:
    glutInit (&argc, argv);
    glutInitWindowSize (s_width, s_height);
    glutInitDisplayMode ( GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
    glutCreateWindow ("CS248 GLUT example");

    // Initialize OpenGL graphics state
    initGraphics();

    // Register callbacks:
    glutDisplayFunc (display);
    glutReshapeFunc (reshape);
    glutKeyboardFunc (keyboard);
    glutMouseFunc (mouseButton);
    glutMotionFunc (mouseMotion);
    glutIdleFunc (animateScene);

    //BuildPopupMenu ();
    //glutAttachMenu (GLUT_RIGHT_BUTTON);

    // Turn the flow of control over to GLUT
    glutMainLoop ();

    return 0;
}
Пример #18
0
void 
rqi (
    struct vtx_data **A,		/* matrix/graph being analyzed */
    double **yvecs,		/* eigenvectors to be refined */
    int index,		/* index of vector in yvecs to be refined */
    int n,			/* number of rows/columns in matrix */
    double *r1,
    double *r2,
    double *v,
    double *w,
    double *x,
    double *y,
    double *work,	/* work space for symmlq */
    double tol,			/* error tolerance in eigenpair */
    double initshift,		/* initial shift */
    double *evalest,		/* returned eigenvalue */
    double *vwsqrt,		/* square roots of vertex weights */
    struct orthlink *orthlist,	/* lower evecs to orthogonalize against */
    int cube_or_mesh,		/* 0 => hypercube, d => d-dimensional mesh */
    int nsets,		/* number of sets to divide into */
    int *assignment,		/* set number of each vtx (length n+1) */
    int *active,		/* space for nvtxs integers */
    int mediantype,		/* which partitioning strategy to use */
    double *goal,			/* desired set sizes */
    int vwgt_max,		/* largest vertex weight */
    int ndims		/* dimensionality of partition */
)
{
    extern int DEBUG_EVECS;	/* debug flag for eigen computation */
    extern int DEBUG_TRACE;	/* trace main execution path */
    extern int WARNING_EVECS;	/* warning flag for eigen computation */
    extern int RQI_CONVERGENCE_MODE;	/* type of convergence monitoring to do */
    int       rqisteps;		/* # rqi rqisteps */
    double    res;		/* convergence quant for rqi */
    double    last_res;		/* res on previous rqi step */
    double    macheps;		/* machine precision calculated by symmlq */
    double    normxlim;		/* a stopping criteria for symmlq */
    double    normx;		/* norm of the solution vector */
    int       symmlqitns;	/* # symmlq itns */
    int       inv_it_steps;	/* intial steps of inverse iteration */
    long      itnmin;		/* symmlq input */
    double    shift, rtol;	/* symmlq input */
    long      precon, goodb, nout;	/* symmlq input */
    long      checka, intlim;	/* symmlq input */
    double    anorm, acond;	/* symmlq output */
    double    rnorm, ynorm;	/* symmlq output */
    long      istop, itn;	/* symmlq output */
    long      long_n;		/* copy of n for passing to symmlq */
    int       warning;		/* warning on possible misconvergence */
    double    factor;		/* ratio between previous res and new tol */
    double    minfactor;	/* minimum acceptable value of factor */
    int       converged;	/* has process converged yet? */
    double   *u;		/* name of vector being refined */
    int    *old_assignment=NULL;/* previous assignment vector */
    int    *assgn_pntr;	/* pntr to assignment vector */
    int    *old_assgn_pntr;	/* pntr to previous assignment vector */
    int       assigndiff=0;	/* discrepancies between old and new assignment */
    int       assigntol=0;	/* tolerance on convergence of assignment vector */
    int       first;		/* is this the first RQI step? */
    int       i;		/* loop index */

    double    dot(), ch_norm();
    int       symmlq_();
    void      splarax(), scadd(), vecscale(), doubleout(), assign(), x2y(), strout();


    if (DEBUG_TRACE > 0) {
	printf("<Entering rqi>\n");
    }

    /* Initialize RQI loop */
    u = yvecs[index];
    splarax(y, A, n, u, vwsqrt, r1);
    shift = dot(u, 1, n, y);
    scadd(y, 1, n, -shift, u);
    res = ch_norm(y, 1, n);	/* eigen-residual */
    rqisteps = 0;		/* a counter */
    symmlqitns = 0;		/* a counter */

    /* Set invariant symmlq parameters */
    precon = FALSE;		/* FALSE until we figure out a good way */
    goodb = TRUE;		/* should be TRUE for this application */
    nout = 0;			/* set to 0 for no Symmlq output; 6 for lots */
    checka = FALSE;		/* if don't know by now, too bad */
    intlim = n;			/* set to enforce a maximum number of Symmlq itns */
    itnmin = 0;			/* set to enforce a minimum number of Symmlq itns */
    long_n = n;			/* type change for alint */

    if (DEBUG_EVECS > 0) {
	printf("Using RQI/Symmlq refinement on graph with %d vertices.\n", n);
    }
    if (DEBUG_EVECS > 1) {
	printf("  step      lambda est.            Ares          Symmlq its.   istop  factor  delta\n");
	printf("    0");
	doubleout(shift, 1);
	doubleout(res, 1);
	printf("\n");
    }

    if (RQI_CONVERGENCE_MODE == 1) {
	assigntol = tol * n;
	old_assignment = smalloc((n + 1) * sizeof(int));
    }

    /* Perform RQI */
    inv_it_steps = 2;
    warning = FALSE;
    factor = 10;
    minfactor = factor / 2;
    first = TRUE;
    if (res < tol)
	converged = TRUE;
    else
	converged = FALSE;
    while (!converged) {
	if (res / tol < 1.2) {
	    factor = max(factor / 2, minfactor);
	}
	rtol = res / factor;

	/* exit Symmlq if iterate is this large */
	normxlim = 1.0 / rtol;

	if (rqisteps < inv_it_steps) {
	    shift = initshift;
	}

	symmlq_(&long_n, &u[1], &r1[1], &r2[1], &v[1], &w[1], &x[1], &y[1],
		work, &checka, &goodb, &precon, &shift, &nout,
		&intlim, &rtol, &istop, &itn, &anorm, &acond,
		&rnorm, &ynorm, (double *) A, vwsqrt, (double *) orthlist,
		&macheps, &normxlim, &itnmin);
	symmlqitns += itn;
	normx = ch_norm(x, 1, n);
	vecscale(u, 1, n, 1.0 / normx, x);
	splarax(y, A, n, u, vwsqrt, r1);
	shift = dot(u, 1, n, y);
	scadd(y, 1, n, -shift, u);
	last_res = res;
	res = ch_norm(y, 1, n);
	if (res > last_res) {
	    warning = TRUE;
	}
	rqisteps++;

	if (res < tol)
	    converged = TRUE;

	if (RQI_CONVERGENCE_MODE == 1 && !converged && ndims == 1) {
	    if (first) {
		assign(A, yvecs, n, 1, cube_or_mesh, nsets, vwsqrt, assignment,
		       active, mediantype, goal, vwgt_max);
		x2y(yvecs, ndims, n, vwsqrt);
		first = FALSE;
		assigndiff = n;	/* dummy value for debug chart */
	    }
	    else {
		/* copy assignment to old_assignment */
		assgn_pntr = assignment;
		old_assgn_pntr = old_assignment;
		for (i = n + 1; i; i--) {
		    *old_assgn_pntr++ = *assgn_pntr++;
		}

		assign(A, yvecs, n, ndims, cube_or_mesh, nsets, vwsqrt, assignment,
		       active, mediantype, goal, vwgt_max);
		x2y(yvecs, ndims, n, vwsqrt);

		/* count differences in assignment */
		assigndiff = 0;
		assgn_pntr = assignment;
		old_assgn_pntr = old_assignment;
		for (i = n + 1; i; i--) {
		    if (*old_assgn_pntr++ != *assgn_pntr++)
			assigndiff++;
		}
		assigndiff = min(assigndiff, n - assigndiff);
		if (assigndiff <= assigntol)
		    converged = TRUE;
	    }
	}

	if (DEBUG_EVECS > 1) {
	    printf("   %2d", rqisteps);
	    doubleout(shift, 1);
	    doubleout(res, 1);
	    printf("     %3ld", itn);
	    printf("          %ld", istop);
	    printf("      %g", factor);
	    if (RQI_CONVERGENCE_MODE == 1)
		printf("     %d\n", assigndiff);
	    else
		printf("\n");
	}
    }
    *evalest = shift;

    if (WARNING_EVECS > 0 && warning) {
	strout("WARNING: Residual convergence not monotonic; RQI may have misconverged.\n");
    }

    if (DEBUG_EVECS > 0) {
	printf("Eval ");
	doubleout(*evalest, 1);
	printf("   RQI steps %d,  Symmlq iterations %d.\n\n", rqisteps, symmlqitns);
    }

    if (RQI_CONVERGENCE_MODE == 1) {
	sfree(old_assignment);
    }
}
Пример #19
0
static void vehicleSubTick(Chassis* c, float dt)
{
	if (g_step==0) return;
	if (g_step&1) g_step = 0;

	vec3* chassisPos = &c->pose.v[3].v3;
	vec3* x = &c->pose.v[0].v3;
	vec3* y = &c->pose.v[1].v3;
	vec3* z = &c->pose.v[2].v3;

	// This bit is done by the physics engine
	if(1)
	{
		vecaddscale(chassisPos, chassisPos, &c->vel, dt);
		mtx rot;
		matrixRotateByVelocity(&rot, &c->pose, &c->angVel, dt);
		matrixCopy33(&c->pose, &rot);
	}

	// Damp
	vecscale(&c->vel, &c->vel, expf(-dt*1.f));
	vecscale(&c->angVel, &c->angVel, expf(-dt*1.f));

	if (fabsf(c->angVel.x)<0.01f) c->angVel.x = 0.f;
	if (fabsf(c->angVel.y)<0.01f) c->angVel.y = 0.f;
	if (fabsf(c->angVel.z)<0.01f) c->angVel.z = 0.f;

	ClampedImpulse frictionImpulse[numWheels][2];
	
	c->steer = g_steer;

	//g_steer *= expf(-dt*3.f);
	//g_speed *= expf(-dt*3.f);

	static float latf = 10.f;
	static float angSpeed = 0.f;

	if (g_handBrake>0.f)
	{
		g_handBrake *= expf(-4.f*dt);
		g_speed *= expf(-4.f*dt);
		if (g_handBrake < 0.1f)
		{
			g_handBrake = 0.f;
		}
	}

	// Prepare
	for (int i=0; i<numWheels; i++)
	{
		Suspension* s = c->suspension[i];
		Wheel* w = s->wheel;

		// Calculate the world position and offset of the suspension point
		vec3mtx33mulvec3(&s->worldOffset, &c->pose, &s->offset);
		vec3mtx43mulvec3(&s->worldDefaultPos, &c->pose, &s->offset);

		w->pos = s->worldDefaultPos;
		vec3 pointVel = getPointVel(c, &s->worldOffset);
		vecadd(&w->vel, &w->vel, &pointVel);
		
		float maxFriction0 = 2.0f * dt * c->mass * gravity * (1.f/(float)numWheels);
		clampedImpulseInit(&frictionImpulse[i][0], maxFriction0);

		float latfriction = 10.f;

		float newAngSpeed = vecdot(z, &c->angVel);
		float changeAngSpeed = (newAngSpeed - angSpeed)/dt;
		angSpeed = newAngSpeed;
		printf("changeAngSpeed = %f\n", changeAngSpeed);
		float speed = fabsf(vecdot(y, &c->vel));
		const float base = 0.5f;
		if (g_speed>=0 && i>=2)
		{
			latfriction = 1.f*expf(-5.f*g_handBrake) + base;

			// latfriction = 1.f*expf(-2.f*fabsf(speed*changeAngSpeed)) + base;

			// if (angSpeed*g_steer < -0.1f)
			// {
			// 	latfriction = base;
			// }

			//if (g_steer == 0.f)
			//{
			//	latf += (10.f - latf) * (1.f - exp(-0.1f*dt));
			//}
			//else
			//{
			//	latf += (0.1f - latf) * (1.f - exp(-10.f*dt));
			//}
			//latfriction = latf;
		}
		else
		{ 
			latfriction = 10.f;
		}

		float maxFriction1 = latfriction * dt * c->mass * gravity * (1.f/(float)numWheels);
		clampedImpulseInit(&frictionImpulse[i][1], maxFriction1);
				
		vecset(&s->hitNorm, 0.f, 0.f, 1.f);

		float steer = w->maxSteer*c->steer * (1.f + 0.3f*s->offset.x*c->steer);


		vecscale(&w->wheelAxis, x, cosf(steer));
		vecsubscale(&w->wheelAxis, &w->wheelAxis, y, sinf(steer));

		w->frictionDir[0];
		veccross(&w->frictionDir[0], z, &w->wheelAxis);
		w->frictionDir[1] = w->wheelAxis;

		w->angSpeed = -40.f*g_speed;
	}


	//=============
	//   VERBOSE
	//=============
	#define verbose false
	#define dump if (verbose) printf

	dump("==========================================\n");
	dump("START ITERATION\n");
	dump("==========================================\n");

	float solverERP = numIterations>1 ? 0.1f : 1.f;
	float changeSolverERP = numIterations>1 ? (1.f - solverERP)/(0.01f+ (float)(numIterations-1)) : 0.f;

	for (int repeat=0; repeat<numIterations; repeat++)
	{
		dump(" == Start Iter == \n");

		for (int i=0; i<numWheels; i++)
		{
			Suspension* s = c->suspension[i];
			Wheel* w = s->wheel;
		
			const bool axisError = true;
			const bool friction = true;
		
			// Friction
			if (friction)
			{
				vec3 lateralVel;
				vecaddscale(&lateralVel, &w->vel, &s->hitNorm, -vecdot(&s->hitNorm, &w->vel));
				vecaddscale(&lateralVel, &lateralVel, &w->frictionDir[0], +w->angSpeed * w->radius);

				{
					int dir = 0;
					float v = vecdot(&lateralVel, &w->frictionDir[dir]);
					float denom = 1.f/w->mass + w->radius*w->radius*w->invInertia;
					float impulse = clampedImpulseApply(&frictionImpulse[i][dir], - solverERP * v / denom);
					vec3 impulseV;
					vecscale(&impulseV, &w->frictionDir[dir], impulse);
					vecaddscale(&w->vel, &w->vel, &impulseV, 1.f/w->mass);
					w->angSpeed = w->angSpeed + (impulse * w->radius * w->invInertia);
				}
				if (1)
				{
					int dir=1;
					float v = vecdot(&lateralVel, &w->frictionDir[dir]);
					float denom = 1.f/w->mass;
					float impulse = clampedImpulseApply(&frictionImpulse[i][dir], - solverERP * v / denom);
					vec3 impulseV;
					vecscale(&impulseV, &w->frictionDir[dir], impulse);
					vecaddscale(&w->vel, &w->vel, &impulseV, 1.f/w->mass);
				}

				//dump("gound collision errorV = %f, vel of wheel after = %f\n", penetration, vecdot(&w->vel, &s->hitNorm));
			}	

			if (axisError)	// Axis Error
			{
				vec3 offset;
				vecsub(&offset, &w->pos, chassisPos);
				vec3 pointvel = getPointVel(c, &offset);
				vec3 error;
				vecsub(&error, &pointvel, &w->vel);
				vecaddscale(&error, &error, z, -vecdot(&error, z));

				vec3 norm;
				if (vecsizesq(&error)>0.001f)
				{
					dump("axis error %f\n", vecsize(&error));
					vecnormalise(&norm, &error);

					float denom = computeDenominator(1.f/c->mass, 1.f/c->inertia, &offset, &norm) + 1.f/w->mass;
					vecscale(&error, &error, -solverERP/denom);
					addImpulseAtOffset(&c->vel, &c->angVel, 1.f/c->mass, 1.f/c->inertia, &offset, &error);
					vecaddscale(&w->vel, &w->vel, &error, -solverERP/w->mass);
				}
				//dump("axis error vel of wheel after = %f, inline = %f\n", vecdot(&w->vel, &s->hitNorm), vecdot(&w->vel, &s->axis));
			}
		}
		solverERP += changeSolverERP;
	}

	for (int i=0; i<numWheels; i++)
	{
		Suspension* s = c->suspension[i];
		Wheel* w = s->wheel;
		vec3 pointVel = getPointVel(c, s);
		
		// Convert suspension wheel speed back to car space
		vecsub(&w->vel, &w->vel, &pointVel);
	}
}
Пример #20
0
void tsuroCardCreate(tsuroCard* card, int input[4][2])
{
	bool okay = true;
	memset(card, 0, sizeof(card));
	memset(card->paths, 0xff, sizeof(card->paths));

	tsuroCardSetAllColours(card, 1.f, 1.f, 1.f);

	for (int i=0; i<4; i++)
	{
		int from = input[i][0];
		int to = input[i][1];
		if ((card->paths[from] & card->paths[to]) == -1)
		{
			card->paths[from] = to;
			card->paths[to] = from;
		}
		else
		{
			printf("WARNING: Invalid input for card!\n");
			okay = false;
		}
	}

	if (okay)
	{
		// Generate the vector path
		const float s = 0.5f*tsuroCardSize;				// half width/scale of card
		const float a = tsuroCardSize*(1.f/6.f);		// position of connection points

		static const float points[8][2] = 
		{
			{-a, -s},
			{+a, -s},

			{+s, -a},
			{+s, +a},

			{+a, +s},
			{-a, +s},

			{-s, +a},
			{-s, -a},
		};

		for (int i=0; i<8; i++)	// We are doing this twice (but its easier this way!)
		{
			int from = i;
			int to = card->paths[i];
			vec3 f = {points[from][0], points[from][1], 0.f};	// from
			vec3 t = {points[to][0], points[to][1], 0.f};		// to 

			if (to == tsuroEdgeOpposite1[from])
			{
				// Directly opposite
				vec3 dv;
				vecsub(&dv, &t, &f);
				vecscale(&dv, &dv, 1.f/((float)(tsuroVectorPathSize-1)));
				vec3* v = card->vpaths[i].centre;
				v[0] = f;
				for (int n=1; n<tsuroVectorPathSize; n++)
				{
					vecadd(&v[n], &v[n-1], &dv);
				}
			}
			else if (to == tsuroEdgeOpposite2[from])
			{
				// Opposite wall

				vec3 centre = {0.f, 0.f, 0.f};

				vec3 adjacentFrom = {points[tsuroEdgeSame[from]][0], points[tsuroEdgeSame[from]][1], 0.f};	// The point that is on the same edge as from
				vec3 adjacentTo = {points[tsuroEdgeSame[to]][0], points[tsuroEdgeSame[to]][1], 0.f};			// The point that is on the same edge as to

				vec3 focal1, focal2;
				vecmidpoint(&focal1, &adjacentFrom, &f);
				vecmidpoint(&focal2, &adjacentTo, &t);

				vec3 x,y;

				vecsub(&x, &centre, &focal1);
				vecsub(&y, &f, &focal1);
				generateQuarterCurve(&card->vpaths[i].centre[0], &focal1,&y,&x, tsuroVectorPathHalfSize+1, 1.5f);

				vecsub(&x, &centre, &focal2);
				vecsub(&y, &t, &focal2);
				generateQuarterCurve(&card->vpaths[i].centre[tsuroVectorPathHalfSize], &focal2,&x,&y, tsuroVectorPathHalfSize+1, 1.5f);
			}
			else if (to == tsuroEdgeSame[from])
			{
				vec3 centre = {0.f, 0.f, 0.f};

				// Same wall
				vec3 midpoint;
				vecmidpoint(&midpoint, &f, &t);

				vec3 x,y;
				vecsub(&x, &f, &midpoint);
				vecsub(&y, &centre, &midpoint);
				vecscale(&y, &y, 0.5f);
				generateQuarterCurve(&card->vpaths[i].centre[0], &midpoint,&x,&y, tsuroVectorPathHalfSize+1, 0.8);
				vecneg(&x, &x);
				generateQuarterCurve(&card->vpaths[i].centre[tsuroVectorPathHalfSize], &midpoint,&y,&x, tsuroVectorPathHalfSize+1, 0.8);
			}
			else
			{
				vec3 focal;
				switch(from)
				{
					case 0: focal.y = -s; break;
					case 1: focal.y = -s; break;
					case 2: focal.x = +s; break;
					case 3: focal.x = +s; break;
					case 4: focal.y = +s; break;
					case 5: focal.y = +s; break;
					case 6: focal.x = -s; break;
					case 7: focal.x = -s; break;
				}
				switch(to)
				{
					case 0: focal.y = -s; break;
					case 1: focal.y = -s; break;
					case 2: focal.x = +s; break;
					case 3: focal.x = +s; break;
					case 4: focal.y = +s; break;
					case 5: focal.y = +s; break;
					case 6: focal.x = -s; break;
					case 7: focal.x = -s; break;
				}

				vec3 x,y;
				vecsub(&x, &f, &focal);
				vecsub(&y, &t, &focal);
				generateQuarterCurve(&card->vpaths[i].centre[0], &focal,&x,&y, tsuroVectorPathSize, 1.2f);

				// printf("DUMP: from = %f %f %f\n", XYZ(f));
				// vec3* v = card->vpaths[i].centre;
				// for (int n=0; n<tsuroVectorPathSize; n++)
				// {
				// 	printf("%f %f %f\n", XYZp(v));
				// 	v++;
				// }
			}
		}
	}
}
Пример #21
0
int lbfgs(
    int n,
    lbfgsfloatval_t *x,
    lbfgsfloatval_t *ptr_fx,
    lbfgs_evaluate_t proc_evaluate,
    lbfgs_progress_t proc_progress,
    void *instance,
    lbfgs_parameter_t *_param
    )
{
    int ret;
    int i, j, k, ls, end, bound;
    lbfgsfloatval_t step;

    /* Constant parameters and their default values. */
    const lbfgs_parameter_t* param = (_param != NULL) ? _param : &_defparam;
    const int m = param->m;

    lbfgsfloatval_t *xp = NULL, *g = NULL, *gp = NULL, *d = NULL, *w = NULL;
    iteration_data_t *lm = NULL, *it = NULL;
    lbfgsfloatval_t ys, yy;
    lbfgsfloatval_t norm, xnorm, gnorm, beta;
    lbfgsfloatval_t fx = 0.;
    line_search_proc linesearch = line_search_morethuente;

    /* Construct a callback data. */
    callback_data_t cd;
    cd.n = n;
    cd.instance = instance;
    cd.proc_evaluate = proc_evaluate;
    cd.proc_progress = proc_progress;

#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
    /* Round out the number of variables. */
    n = round_out_variables(n);
#endif/*defined(USE_SSE)*/

    /* Check the input parameters for errors. */
    if (n <= 0) {
        return LBFGSERR_INVALID_N;
    }
#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
    if (n % 8 != 0) {
        return LBFGSERR_INVALID_N_SSE;
    }
    if (((unsigned short)x & 0x000F) != 0) {
        return LBFGSERR_INVALID_X_SSE;
    }
#endif/*defined(USE_SSE)*/
    if (param->min_step < 0.) {
        return LBFGSERR_INVALID_MINSTEP;
    }
    if (param->max_step < param->min_step) {
        return LBFGSERR_INVALID_MAXSTEP;
    }
    if (param->ftol < 0.) {
        return LBFGSERR_INVALID_FTOL;
    }
    if (param->gtol < 0.) {
        return LBFGSERR_INVALID_GTOL;
    }
    if (param->xtol < 0.) {
        return LBFGSERR_INVALID_XTOL;
    }
    if (param->max_linesearch <= 0) {
        return LBFGSERR_INVALID_MAXLINESEARCH;
    }
    if (param->orthantwise_c < 0.) {
        return LBFGSERR_INVALID_ORTHANTWISE;
    }
    if (param->orthantwise_start < 0 || n < param->orthantwise_start) {
        return LBFGSERR_INVALID_ORTHANTWISE_START;
    }
    switch (param->linesearch) {
    case LBFGS_LINESEARCH_MORETHUENTE:
        linesearch = line_search_morethuente;
        break;
    case LBFGS_LINESEARCH_BACKTRACKING:
        linesearch = line_search_backtracking;
        break;
    default:
        return LBFGSERR_INVALID_LINESEARCH;
    }

    /* Allocate working space. */
    xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
    if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) {
        ret = LBFGSERR_OUTOFMEMORY;
        goto lbfgs_exit;
    }

    /* Allocate limited memory storage. */
    lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t));
    if (lm == NULL) {
        ret = LBFGSERR_OUTOFMEMORY;
        goto lbfgs_exit;
    }

    /* Initialize the limited memory. */
    for (i = 0;i < m;++i) {
        it = &lm[i];
        it->alpha = 0;
        it->ys = 0;
        it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
        it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
        if (it->s == NULL || it->y == NULL) {
            ret = LBFGSERR_OUTOFMEMORY;
            goto lbfgs_exit;
        }
    }

    /* Evaluate the function value and its gradient. */
    fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
    if (0. < param->orthantwise_c) {
        /* Compute L1-regularization factor and add it to the object value. */
        norm = 0.;
        for (i = param->orthantwise_start;i < n;++i) {
            norm += fabs(x[i]);
        }
        fx += norm * param->orthantwise_c;
    }

    /* We assume the initial hessian matrix H_0 as the identity matrix. */
    if (param->orthantwise_c == 0.) {
        vecncpy(d, g, n);
    } else {
        /* Compute the negative of gradients. */
        for (i = 0;i < param->orthantwise_start;++i) {
            d[i] = -g[i];
        }

        /* Compute the negative of psuedo-gradients. */
        for (i = param->orthantwise_start;i < n;++i) {
            if (x[i] < 0.) {
                /* Differentiable. */
                d[i] = -g[i] + param->orthantwise_c;
            } else if (0. < x[i]) {
                /* Differentiable. */
                d[i] = -g[i] - param->orthantwise_c;
            } else {
                if (g[i] < -param->orthantwise_c) {
                    /* Take the right partial derivative. */
                    d[i] = -g[i] - param->orthantwise_c;
                } else if (param->orthantwise_c < g[i]) {
                    /* Take the left partial derivative. */
                    d[i] = -g[i] + param->orthantwise_c;
                } else {
                    d[i] = 0.;
                }
            }
        }
    }

    /*
       Make sure that the initial variables are not a minimizer.
     */
    vecnorm(&gnorm, g, n);
    vecnorm(&xnorm, x, n);
    if (xnorm < 1.0) xnorm = 1.0;
    if (gnorm / xnorm <= param->epsilon) {
        ret = LBFGS_ALREADY_MINIMIZED;
        goto lbfgs_exit;
    }

    /* Compute the initial step:
        step = 1.0 / sqrt(vecdot(d, d, n))
     */
    vecrnorm(&step, d, n);

    k = 1;
    end = 0;
    for (;;) {
        /* Store the current position and gradient vectors. */
        veccpy(xp, x, n);
        veccpy(gp, g, n);

        /* Search for an optimal step. */
        ls = linesearch(n, x, &fx, g, d, &step, w, &cd, param);
        if (ls < 0) {
            ret = ls;
            goto lbfgs_exit;
        }

        /* Compute x and g norms. */
        vecnorm(&gnorm, g, n);
        vecnorm(&xnorm, x, n);

        /* Report the progress. */
        if (cd.proc_progress) {
            if (ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls)) {
                goto lbfgs_exit;
            }
        }

        /*
            Convergence test.
            The criterion is given by the following formula:
                |g(x)| / \max(1, |x|) < \epsilon
         */
        if (xnorm < 1.0) xnorm = 1.0;
        if (gnorm / xnorm <= param->epsilon) {
            /* Convergence. */
            ret = LBFGS_SUCCESS;
            break;
        }

        if (param->max_iterations != 0 && param->max_iterations < k+1) {
            /* Maximum number of iterations. */
            ret = LBFGSERR_MAXIMUMITERATION;
            break;
        }

        /*
            Update vectors s and y:
                s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
                y_{k+1} = g_{k+1} - g_{k}.
         */
        it = &lm[end];
        vecdiff(it->s, x, xp, n);
        vecdiff(it->y, g, gp, n);

        /*
            Compute scalars ys and yy:
                ys = y^t \cdot s = 1 / \rho.
                yy = y^t \cdot y.
            Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
         */
        vecdot(&ys, it->y, it->s, n);
        vecdot(&yy, it->y, it->y, n);
        it->ys = ys;

        /*
            Recursive formula to compute dir = -(H \cdot g).
                This is described in page 779 of:
                Jorge Nocedal.
                Updating Quasi-Newton Matrices with Limited Storage.
                Mathematics of Computation, Vol. 35, No. 151,
                pp. 773--782, 1980.
         */
        bound = (m <= k) ? m : k;
        ++k;
        end = (end + 1) % m;

        if (param->orthantwise_c == 0.) {
            /* Compute the negative of gradients. */
            vecncpy(d, g, n);
        } else {
            /* Compute the negative of gradients. */
            for (i = 0;i < param->orthantwise_start;++i) {
                d[i] = -g[i];
            }

            /* Compute the negative of psuedo-gradients. */
            for (i = param->orthantwise_start;i < n;++i) {
                if (x[i] < 0.) {
                    /* Differentiable. */
                    d[i] = -g[i] + param->orthantwise_c;
                } else if (0. < x[i]) {
                    /* Differentiable. */
                    d[i] = -g[i] - param->orthantwise_c;
                } else {
                    if (g[i] < -param->orthantwise_c) {
                        /* Take the right partial derivative. */
                        d[i] = -g[i] - param->orthantwise_c;
                    } else if (param->orthantwise_c < g[i]) {
                        /* Take the left partial derivative. */
                        d[i] = -g[i] + param->orthantwise_c;
                    } else {
                        d[i] = 0.;
                    }
                }
            }
            /* Store the steepest direction.*/
            veccpy(w, d, n);
        }

        j = end;
        for (i = 0;i < bound;++i) {
            j = (j + m - 1) % m;    /* if (--j == -1) j = m-1; */
            it = &lm[j];
            /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
            vecdot(&it->alpha, it->s, d, n);
            it->alpha /= it->ys;
            /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
            vecadd(d, it->y, -it->alpha, n);
        }

        vecscale(d, ys / yy, n);

        for (i = 0;i < bound;++i) {
            it = &lm[j];
            /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */
            vecdot(&beta, it->y, d, n);
            beta /= it->ys;
            /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
            vecadd(d, it->s, it->alpha - beta, n);
            j = (j + 1) % m;        /* if (++j == m) j = 0; */
        }

        /*
            Constrain the search direction for orthant-wise updates.
         */
        if (param->orthantwise_c != 0.) {
            for (i = param->orthantwise_start;i < n;++i) {
                if (d[i] * w[i] <= 0) {
                    d[i] = 0;
                }
            }
        }

        /*
            Now the search direction d is ready. We try step = 1 first.
         */
        step = 1.0;
    }

lbfgs_exit:
    /* Return the final value of the objective function. */
    if (ptr_fx != NULL) {
        *ptr_fx = fx;
    }

    /* Free memory blocks used by this function. */
    if (lm != NULL) {
        for (i = 0;i < m;++i) {
            vecfree(lm[i].s);
            vecfree(lm[i].y);
        }
        vecfree(lm);
    }
    vecfree(w);
    vecfree(d);
    vecfree(gp);
    vecfree(g);
    vecfree(xp);

    return ret;
}
Пример #22
0
bool
power_iteration(double **square_mat, int n, int neigs, double **eigs,
		double *evals, bool initialize)
{
    /* compute the 'neigs' top eigenvectors of 'square_mat' using power iteration */

    int i, j;
    double *tmp_vec = N_GNEW(n, double);
    double *last_vec = N_GNEW(n, double);
    double *curr_vector;
    double len;
    double angle;
    double alpha;
    int iteration = 0;
    int largest_index;
    double largest_eval;
    int Max_iterations = 30 * n;

    double tol = 1 - p_iteration_threshold;

    if (neigs >= n) {
	neigs = n;
    }

    for (i = 0; i < neigs; i++) {
	curr_vector = eigs[i];
	/* guess the i-th eigen vector */
      choose:
	if (initialize)
	    for (j = 0; j < n; j++)
		curr_vector[j] = rand() % 100;
	/* orthogonalize against higher eigenvectors */
	for (j = 0; j < i; j++) {
	    alpha = -dot(eigs[j], 0, n - 1, curr_vector);
	    scadd(curr_vector, 0, n - 1, alpha, eigs[j]);
	}
	len = norm(curr_vector, 0, n - 1);
	if (len < 1e-10) {
	    /* We have chosen a vector colinear with prvious ones */
	    goto choose;
	}
	vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector);
	iteration = 0;
	do {
	    iteration++;
	    cpvec(last_vec, 0, n - 1, curr_vector);

	    right_mult_with_vector_d(square_mat, n, n, curr_vector,
				     tmp_vec);
	    cpvec(curr_vector, 0, n - 1, tmp_vec);

	    /* orthogonalize against higher eigenvectors */
	    for (j = 0; j < i; j++) {
		alpha = -dot(eigs[j], 0, n - 1, curr_vector);
		scadd(curr_vector, 0, n - 1, alpha, eigs[j]);
	    }
	    len = norm(curr_vector, 0, n - 1);
	    if (len < 1e-10 || iteration > Max_iterations) {
		/* We have reached the null space (e.vec. associated with e.val. 0) */
		goto exit;
	    }

	    vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector);
	    angle = dot(curr_vector, 0, n - 1, last_vec);
	} while (fabs(angle) < tol);
	evals[i] = angle * len;	/* this is the Rayleigh quotient (up to errors due to orthogonalization):
				   u*(A*u)/||A*u||)*||A*u||, where u=last_vec, and ||u||=1
				 */
    }
  exit:
    for (; i < neigs; i++) {
	/* compute the smallest eigenvector, which are  */
	/* probably associated with eigenvalue 0 and for */
	/* which power-iteration is dangerous */
	curr_vector = eigs[i];
	/* guess the i-th eigen vector */
	for (j = 0; j < n; j++)
	    curr_vector[j] = rand() % 100;
	/* orthogonalize against higher eigenvectors */
	for (j = 0; j < i; j++) {
	    alpha = -dot(eigs[j], 0, n - 1, curr_vector);
	    scadd(curr_vector, 0, n - 1, alpha, eigs[j]);
	}
	len = norm(curr_vector, 0, n - 1);
	vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector);
	evals[i] = 0;

    }


    /* sort vectors by their evals, for overcoming possible mis-convergence: */
    for (i = 0; i < neigs - 1; i++) {
	largest_index = i;
	largest_eval = evals[largest_index];
	for (j = i + 1; j < neigs; j++) {
	    if (largest_eval < evals[j]) {
		largest_index = j;
		largest_eval = evals[largest_index];
	    }
	}
	if (largest_index != i) {	/* exchange eigenvectors: */
	    cpvec(tmp_vec, 0, n - 1, eigs[i]);
	    cpvec(eigs[i], 0, n - 1, eigs[largest_index]);
	    cpvec(eigs[largest_index], 0, n - 1, tmp_vec);

	    evals[largest_index] = evals[i];
	    evals[i] = largest_eval;
	}
    }

    free(tmp_vec);
    free(last_vec);

    return (iteration <= Max_iterations);
}
Пример #23
0
int lbfgs(
    int n,
    T *x,
    T *ptr_fx,
typename   FuncWrapper<T>::lbfgs_evaluate_t proc_evaluate,
    typename  FuncWrapper<T>::lbfgs_progress_t proc_progress,
    void *instance,
    lbfgs_parameter_t *_param
    )
{
    int ret;
    int i, j, k, ls, end, bound;
    T step;

    /* Constant parameters and their default values. */
    lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam;
    const int m = param.m;

    T *xp = NULL;
    T *g = NULL, *gp = NULL, *pg = NULL;
    T *d = NULL, *w = NULL, *pf = NULL;
    iteration_data_t<T> *lm = NULL;
    iteration_data_t<T>*it = NULL;
    T ys, yy;
    T xnorm, gnorm, beta;
    T fx = 0.;
    T rate = 0.;
typename    LineSearchWrapper<T>::line_search_proc linesearch = line_search_morethuente;

    /* Construct a callback data. */
    callback_data_t<T> cd;
    cd.n = n;
    cd.instance = instance;
    cd.proc_evaluate = proc_evaluate;
    cd.proc_progress = proc_progress;

#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
    /* Round out the number of variables. */
    n = round_out_variables(n);
#endif/*defined(USE_SSE)*/

    /* Check the input parameters for errors. */
    if (n <= 0) {
        return LBFGSERR_INVALID_N;
    }
#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
    if (n % 8 != 0) {
        return LBFGSERR_INVALID_N_SSE;
    }
    if ((uintptr_t)(const void*)x % 16 != 0) {
        return LBFGSERR_INVALID_X_SSE;
    }
#endif/*defined(USE_SSE)*/
    if (param.epsilon < 0.) {
        return LBFGSERR_INVALID_EPSILON;
    }
    if (param.past < 0) {
        return LBFGSERR_INVALID_TESTPERIOD;
    }
    if (param.delta < 0.) {
        return LBFGSERR_INVALID_DELTA;
    }
    if (param.min_step < 0.) {
        return LBFGSERR_INVALID_MINSTEP;
    }
    if (param.max_step < param.min_step) {
        return LBFGSERR_INVALID_MAXSTEP;
    }
    if (param.ftol < 0.) {
        return LBFGSERR_INVALID_FTOL;
    }
    if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE ||
        param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
        if (param.wolfe <= param.ftol || 1. <= param.wolfe) {
            return LBFGSERR_INVALID_WOLFE;
        }
    }
    if (param.gtol < 0.) {
        return LBFGSERR_INVALID_GTOL;
    }
    if (param.xtol < 0.) {
        return LBFGSERR_INVALID_XTOL;
    }
    if (param.max_linesearch <= 0) {
        return LBFGSERR_INVALID_MAXLINESEARCH;
    }
    if (param.orthantwise_c < 0.) {
        return LBFGSERR_INVALID_ORTHANTWISE;
    }
    if (param.orthantwise_start < 0 || n < param.orthantwise_start) {
        return LBFGSERR_INVALID_ORTHANTWISE_START;
    }
    if (param.orthantwise_end < 0) {
        param.orthantwise_end = n;
    }
    if (n < param.orthantwise_end) {
        return LBFGSERR_INVALID_ORTHANTWISE_END;
    }
    if (param.orthantwise_c != 0.) {
        switch (param.linesearch) {
        case LBFGS_LINESEARCH_BACKTRACKING:
            linesearch = line_search_backtracking_owlqn;
            break;
        default:
            /* Only the backtracking method is available. */
            return LBFGSERR_INVALID_LINESEARCH;
        }
    } else {
        switch (param.linesearch) {
        case LBFGS_LINESEARCH_MORETHUENTE:
            linesearch = line_search_morethuente;
            break;
        case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO:
        case LBFGS_LINESEARCH_BACKTRACKING_WOLFE:
        case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE:
            linesearch = line_search_backtracking;
            break;
        default:
            return LBFGSERR_INVALID_LINESEARCH;
        }
    }

    /* Allocate working space. */
    xp = (T*)vecalloc(n * sizeof(T));
    g = (T*)vecalloc(n * sizeof(T));
    gp = (T*)vecalloc(n * sizeof(T));
    d = (T*)vecalloc(n * sizeof(T));
    w = (T*)vecalloc(n * sizeof(T));
    if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) {
        ret = LBFGSERR_OUTOFMEMORY;
        goto lbfgs_exit;
    }

    if (param.orthantwise_c != 0.) {
        /* Allocate working space for OW-LQN. */
        pg = (T*)vecalloc(n * sizeof(T));
        if (pg == NULL) {
            ret = LBFGSERR_OUTOFMEMORY;
            goto lbfgs_exit;
        }
    }

    /* Allocate limited memory storage. */
    lm = (iteration_data_t<T>*)vecalloc(m * sizeof(iteration_data_t<T>));
    if (lm == NULL) {
        ret = LBFGSERR_OUTOFMEMORY;
        goto lbfgs_exit;
    }

    /* Initialize the limited memory. */
    for (i = 0;i < m;++i) {
        it = &lm[i];
        it->alpha = 0;
        it->ys = 0;
        it->s = (T*)vecalloc(n * sizeof(T));
        it->y = (T*)vecalloc(n * sizeof(T));
        if (it->s == NULL || it->y == NULL) {
            ret = LBFGSERR_OUTOFMEMORY;
            goto lbfgs_exit;
        }
    }

    /* Allocate an array for storing previous values of the objective function. */
    if (0 < param.past) {
        pf = (T*)vecalloc(param.past * sizeof(T));
    }

    /* Evaluate the function value and its gradient. */
    fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
    if (0. != param.orthantwise_c) {
        /* Compute the L1 norm of the variable and add it to the object value. */
        xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);
        fx += xnorm * param.orthantwise_c;
        owlqn_pseudo_gradient(
            pg, x, g, n,
            T(param.orthantwise_c), param.orthantwise_start, param.orthantwise_end
            );
    }

    /* Store the initial value of the objective function. */
    if (pf != NULL) {
        pf[0] = fx;
    }

    /*
        Compute the direction;
        we assume the initial hessian matrix H_0 as the identity matrix.
     */
    if (param.orthantwise_c == 0.) {
        vecncpy(d, g, n);
    } else {
        vecncpy(d, pg, n);
    }

    /*
       Make sure that the initial variables are not a minimizer.
     */
    vec2norm(&xnorm, x, n);
    if (param.orthantwise_c == 0.) {
        vec2norm(&gnorm, g, n);
    } else {
        vec2norm(&gnorm, pg, n);
    }
    if (xnorm < 1.0) xnorm = 1.0;
    if (gnorm / xnorm <= param.epsilon) {
        ret = LBFGS_ALREADY_MINIMIZED;
        goto lbfgs_exit;
    }

    /* Compute the initial step:
        step = 1.0 / sqrt(vecdot(d, d, n))
     */
    vec2norminv(&step, d, n);

    k = 1;
    end = 0;
    for (;;) {
        /* Store the current position and gradient vectors. */
        veccpy(xp, x, n);
        veccpy(gp, g, n);

        /* Search for an optimal step. */
        if (param.orthantwise_c == 0.) {
            ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, &param);
        } else {
            ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, &param);
            owlqn_pseudo_gradient(
                pg, x, g, n,
                T(param.orthantwise_c), param.orthantwise_start, param.orthantwise_end
                );
        }
        if (ls < 0) {
            /* Revert to the previous point. */
            veccpy(x, xp, n);
            veccpy(g, gp, n);
            ret = ls;
            goto lbfgs_exit;
        }

        /* Compute x and g norms. */
        vec2norm(&xnorm, x, n);
        if (param.orthantwise_c == 0.) {
            vec2norm(&gnorm, g, n);
        } else {
            vec2norm(&gnorm, pg, n);
        }

        /* Report the progress. */
        if (cd.proc_progress) {
            if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) {
                goto lbfgs_exit;
            }
        }

        /*
            Convergence test.
            The criterion is given by the following formula:
                |g(x)| / \max(1, |x|) < \epsilon
         */
        if (xnorm < 1.0) xnorm = 1.0;
        if (gnorm / xnorm <= param.epsilon) {
            /* Convergence. */
            ret = LBFGS_SUCCESS;
            break;
        }

        /*
            Test for stopping criterion.
            The criterion is given by the following formula:
                (f(past_x) - f(x)) / f(x) < \delta
         */
        if (pf != NULL) {
            /* We don't test the stopping criterion while k < past. */
            if (param.past <= k) {
                /* Compute the relative improvement from the past. */
                rate = (pf[k % param.past] - fx) / fx;

                /* The stopping criterion. */
                if (rate < param.delta) {
                    ret = LBFGS_STOP;
                    break;
                }
            }

            /* Store the current value of the objective function. */
            pf[k % param.past] = fx;
        }

        if (param.max_iterations != 0 && param.max_iterations < k+1) {
            /* Maximum number of iterations. */
            ret = LBFGSERR_MAXIMUMITERATION;
            break;
        }

        /*
            Update vectors s and y:
                s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
                y_{k+1} = g_{k+1} - g_{k}.
         */
        it = &lm[end];
        vecdiff(it->s, x, xp, n);
        vecdiff(it->y, g, gp, n);

        /*
            Compute scalars ys and yy:
                ys = y^t \cdot s = 1 / \rho.
                yy = y^t \cdot y.
            Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
         */
        vecdot(&ys, it->y, it->s, n);
        vecdot(&yy, it->y, it->y, n);
        it->ys = ys;

        /*
            Recursive formula to compute dir = -(H \cdot g).
                This is described in page 779 of:
                Jorge Nocedal.
                Updating Quasi-Newton Matrices with Limited Storage.
                Mathematics of Computation, Vol. 35, No. 151,
                pp. 773--782, 1980.
         */
        bound = (m <= k) ? m : k;
        ++k;
        end = (end + 1) % m;

        /* Compute the steepest direction. */
        if (param.orthantwise_c == 0.) {
            /* Compute the negative of gradients. */
            vecncpy(d, g, n);
        } else {
            vecncpy(d, pg, n);
        }

        j = end;
        for (i = 0;i < bound;++i) {
            j = (j + m - 1) % m;    /* if (--j == -1) j = m-1; */
            it = &lm[j];
            /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
            vecdot(&it->alpha, it->s, d, n);
            it->alpha /= it->ys;
            /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
            vecadd(d, it->y, -it->alpha, n);
        }

        vecscale(d, ys / yy, n);

        for (i = 0;i < bound;++i) {
            it = &lm[j];
            /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */
            vecdot(&beta, it->y, d, n);
            beta /= it->ys;
            /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
            vecadd(d, it->s, it->alpha - beta, n);
            j = (j + 1) % m;        /* if (++j == m) j = 0; */
        }

        /*
            Constrain the search direction for orthant-wise updates.
         */
        if (param.orthantwise_c != 0.) {
            for (i = param.orthantwise_start;i < param.orthantwise_end;++i) {
                if (d[i] * pg[i] >= 0) {
                    d[i] = 0;
                }
            }
        }

        /*
            Now the search direction d is ready. We try step = 1 first.
         */
        step = 1.0;
    }

lbfgs_exit:
    /* Return the final value of the objective function. */
    if (ptr_fx != NULL) {
        *ptr_fx = fx;
    }

    vecfree(pf);

    /* Free memory blocks used by this function. */
    if (lm != NULL) {
        for (i = 0;i < m;++i) {
            vecfree(lm[i].s);
            vecfree(lm[i].y);
        }
        vecfree(lm);
    }
    vecfree(pg);
    vecfree(w);
    vecfree(d);
    vecfree(gp);
    vecfree(g);
    vecfree(xp);

    return ret;
}
Пример #24
0
int lanczos_ext(struct vtx_data **A,       /* sparse matrix in row linked list format */
                int               n,       /* problem size */
                int               d,       /* problem dimension = number of eigvecs to find */
                double **         y,       /* columns of y are eigenvectors of A  */
                double            eigtol,  /* tolerance on eigenvectors */
                double *          vwsqrt,  /* square roots of vertex weights */
                double            maxdeg,  /* maximum degree of graph */
                int               version, /* flags which version of sel. orth. to use */
                double *          gvec,    /* the rhs n-vector in the extended eigen problem */
                double            sigma    /* specifies the norm constraint on extended
                                              eigenvector */
                )
{
  extern FILE *     Output_File;         /* output file or null */
  extern int        LANCZOS_SO_INTERVAL; /* interval between orthogonalizations */
  extern int        LANCZOS_MAXITNS;     /* maximum Lanczos iterations allowed */
  extern int        DEBUG_EVECS;         /* print debugging output? */
  extern int        DEBUG_TRACE;         /* trace main execution path */
  extern int        WARNING_EVECS;       /* print warning messages? */
  extern double     BISECTION_SAFETY;    /* safety for T bisection algorithm */
  extern double     SRESTOL;             /* resid tol for T evec comp */
  extern double     DOUBLE_EPSILON;      /* machine precision */
  extern double     DOUBLE_MAX;          /* largest double value */
  extern double     splarax_time;        /* time matvec */
  extern double     orthog_time;         /* time orthogonalization work */
  extern double     evec_time;           /* time to generate eigenvectors */
  extern double     ql_time;             /* time tridiagonal eigenvalue work */
  extern double     blas_time;           /* time for blas. linear algebra */
  extern double     init_time;           /* time to allocate, intialize variables */
  extern double     scan_time;           /* time for scanning eval and bound lists */
  extern double     debug_time;          /* time for (some of) debug computations */
  extern double     ritz_time;           /* time to generate ritz vectors */
  extern double     pause_time;          /* time to compute whether to pause */
  int               i, j, k;             /* indicies */
  int               maxj;                /* maximum number of Lanczos iterations */
  double *          u, *r;               /* Lanczos vectors */
  double *          alpha, *beta;        /* the Lanczos scalars from each step */
  double *          ritz;                /* copy of alpha for ql */
  double *          workj;               /* work vector, e.g. copy of beta for ql */
  double *          workn;               /* work vector, e.g. product Av for checkeig */
  double *          s;                   /* eigenvector of T */
  double **         q;                   /* columns of q are Lanczos basis vectors */
  double *          bj;                  /* beta(j)*(last el. of corr. eigvec s of T) */
  double            Sres;                /* how well Tevec calculated eigvec s */
  double            Sres_max;            /* Max value of Sres */
  int               inc_bis_safety;      /* has Sres increased? */
  double *          Ares;                /* how well Lanczos calc. eigpair lambda,y */
  int *             index;               /* the Ritz index of an eigenpair */
  struct orthlink **solist;              /* vec. of structs with vecs. to orthog. against */
  struct scanlink * scanlist;            /* linked list of fields to do with min ritz vals */
  struct scanlink * curlnk;              /* for traversing the scanlist */
  double            bis_safety;          /* real safety for T bisection algorithm */
  int               converged;           /* has the iteration converged? */
  double            goodtol;             /* error tolerance for a good Ritz vector */
  int               ngood;               /* total number of good Ritz pairs at current step */
  int               maxngood;            /* biggest val of ngood through current step */
  int               left_ngood;          /* number of good Ritz pairs on left end */
  int               lastpause;           /* Most recent step with good ritz vecs */
  int               nopauses;            /* Have there been any pauses? */
  int               interval;            /* number of steps between pauses */
  double            time;                /* Current clock time */
  int               left_goodlim;        /* number of ritz pairs checked on left end */
  double            Anorm;               /* Norm estimate of the Laplacian matrix */
  int               pausemode;           /* which Lanczos pausing criterion to use */
  int               pause;               /* whether to pause */
  int               temp;                /* used to prevent redundant index computations */
  double *          extvec;              /* n-vector solving the extended A eigenproblem */
  double *          v;                   /* j-vector solving the extended T eigenproblem */
  double            extval = 0.0;        /* computed extended eigenvalue (of both A and T) */
  double *          work1, *work2;       /* work vectors */
  double            check;               /* to check an orthogonality condition */
  double            numerical_zero;      /* used for zero in presense of round-off  */
  int               ritzval_flag;        /* status flag for get_ritzvals() */
  int               memory_ok;           /* TRUE until memory runs out */

  double *         mkvec();        /* allocates space for a vector */
  double *         mkvec_ret();    /* mkvec() which returns error code */
  double           dot();          /* standard dot product routine */
  struct orthlink *makeorthlnk();  /* makes space for new entry in orthog. set */
  double           ch_norm();      /* vector norm */
  double           Tevec();        /* calc eigenvector of T by linear recurrence */
  struct scanlink *mkscanlist();   /* init scan list for min ritz vecs */
  double           lanc_seconds(); /* switcheable timer */
                                   /* free allocated memory safely */
  int    lanpause();               /* figure when to pause Lanczos iteration */
  int    get_ritzvals();           /* compute eigenvalues of T */
  void   setvec();                 /* initialize a vector */
  void   vecscale();               /* scale a vector */
  void   splarax();                /* matrix vector multiply */
  void   update();                 /* add scalar multiple of a vector to another */
  void   sorthog();                /* orthogonalize vector against list of others */
  void   bail();                   /* our exit routine */
  void   scanmin();                /* store small values of vector in linked list */
  void   frvec();                  /* free vector */
  void   scadd();                  /* add scalar multiple of vector to another */
  void   cpvec();                  /* copy a vector */
  void   orthog1();                /* efficiently orthog. against vector of ones */
  void   solistout();              /* print out orthogonalization list */
  void   doubleout();              /* print a double precision number */
  void   orthogvec();              /* orthogonalize one vector against another */
  void   get_extval();             /* find extended Ritz values */
  void   scale_diag();             /* scale vector by diagonal matrix */
  void   strout();                 /* print string to screen and file */
  double checkeig_ext();           /* check extended eigenpair residual directly */

  if (DEBUG_TRACE > 0) {
    printf("<Entering lanczos_ext>\n");
  }

  if (DEBUG_EVECS > 0) {
    printf("Selective orthogonalization Lanczos for extended eigenproblem, matrix size = %d.\n", n);
  }

  /* Initialize time. */
  time = lanc_seconds();

  if (d != 1) {
    bail("ERROR: Extended Lanczos only available for bisection.", 1);
    /* ... something must be wrong upstream. */
  }

  if (n < d + 1) {
    bail("ERROR: System too small for number of eigenvalues requested.", 1);
    /* ... d+1 since don't use zero eigenvalue pair */
  }

  /* Allocate space. */
  maxj     = LANCZOS_MAXITNS;
  u        = mkvec(1, n);
  r        = mkvec(1, n);
  workn    = mkvec(1, n);
  Ares     = mkvec(0, d);
  index    = smalloc((d + 1) * sizeof(int));
  alpha    = mkvec(1, maxj);
  beta     = mkvec(0, maxj);
  ritz     = mkvec(1, maxj);
  s        = mkvec(1, maxj);
  bj       = mkvec(1, maxj);
  workj    = mkvec(0, maxj);
  q        = smalloc((maxj + 1) * sizeof(double *));
  solist   = smalloc((maxj + 1) * sizeof(struct orthlink *));
  scanlist = mkscanlist(d);
  extvec   = mkvec(1, n);
  v        = mkvec(1, maxj);
  work1    = mkvec(1, maxj);
  work2    = mkvec(1, maxj);

  /* Set some constants governing orthogonalization */
  ngood          = 0;
  maxngood       = 0;
  Anorm          = 2 * maxdeg;                   /* Gershgorin estimate for ||A|| */
  goodtol        = Anorm * sqrt(DOUBLE_EPSILON); /* Parlett & Scott's bound, p.224 */
  interval       = 2 + (int)min(LANCZOS_SO_INTERVAL - 2, n / (2 * LANCZOS_SO_INTERVAL));
  bis_safety     = BISECTION_SAFETY;
  numerical_zero = 1.0e-13;

  if (DEBUG_EVECS > 0) {
    printf("  maxdeg %g\n", maxdeg);
    printf("  goodtol %g\n", goodtol);
    printf("  interval %d\n", interval);
    printf("  maxj %d\n", maxj);
  }

  /* Initialize space. */
  cpvec(r, 1, n, gvec);
  if (vwsqrt != NULL) {
    scale_diag(r, 1, n, vwsqrt);
  }
  check = ch_norm(r, 1, n);
  if (vwsqrt == NULL) {
    orthog1(r, 1, n);
  }
  else {
    orthogvec(r, 1, n, vwsqrt);
  }
  check = fabs(check - ch_norm(r, 1, n));
  if (check > 10 * numerical_zero && WARNING_EVECS > 0) {
    strout("WARNING: In terminal propagation, rhs should have no component in the");
    printf("         nullspace of the Laplacian, so check val %g should be negligible.\n", check);
    if (Output_File != NULL) {
      fprintf(Output_File,
              "         nullspace of the Laplacian, so check val %g should be negligible.\n",
              check);
    }
  }
  beta[0] = ch_norm(r, 1, n);
  q[0]    = mkvec(1, n);
  setvec(q[0], 1, n, 0.0);
  setvec(bj, 1, maxj, DOUBLE_MAX);

  if (beta[0] < numerical_zero) {
    /* The rhs vector, Dg, of the transformed problem is numerically zero or is
       in the null space of the Laplacian, so this is not a well posed extended
       eigenproblem. Set maxj to zero to force a quick exit but still clean-up
       memory and return(1) to indicate to eigensolve that it should call the
       default eigensolver routine for the standard eigenproblem. */
    maxj = 0;
  }

  /* Main Lanczos loop. */
  j              = 1;
  lastpause      = 0;
  pausemode      = 1;
  left_ngood     = 0;
  left_goodlim   = 0;
  converged      = FALSE;
  Sres_max       = 0.0;
  inc_bis_safety = FALSE;
  nopauses       = TRUE;
  memory_ok      = TRUE;
  init_time += lanc_seconds() - time;
  while ((j <= maxj) && (!converged) && memory_ok) {
    time = lanc_seconds();

    /* Allocate next Lanczos vector. If fail, back up to last pause. */
    q[j] = mkvec_ret(1, n);
    if (q[j] == NULL) {
      memory_ok = FALSE;
      if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
        strout("WARNING: Lanczos_ext out of memory; computing best approximation available.\n");
      }
      if (nopauses) {
        bail("ERROR: Sorry, can't salvage Lanczos_ext.", 1);
        /* ... save yourselves, men.  */
      }
      for (i = lastpause + 1; i <= j - 1; i++) {
        frvec(q[i], 1);
      }
      j = lastpause;
    }

    /* Basic Lanczos iteration */
    vecscale(q[j], 1, n, 1.0 / beta[j - 1], r);
    blas_time += lanc_seconds() - time;
    time = lanc_seconds();
    splarax(u, A, n, q[j], vwsqrt, workn);
    splarax_time += lanc_seconds() - time;
    time = lanc_seconds();
    update(r, 1, n, u, -beta[j - 1], q[j - 1]);
    alpha[j] = dot(r, 1, n, q[j]);
    update(r, 1, n, r, -alpha[j], q[j]);
    blas_time += lanc_seconds() - time;

    /* Selective orthogonalization */
    time = lanc_seconds();
    if (vwsqrt == NULL) {
      orthog1(r, 1, n);
    }
    else {
      orthogvec(r, 1, n, vwsqrt);
    }
    if ((j == (lastpause + 1)) || (j == (lastpause + 2))) {
      sorthog(r, n, solist, ngood);
    }
    orthog_time += lanc_seconds() - time;
    beta[j] = ch_norm(r, 1, n);
    time    = lanc_seconds();
    pause   = lanpause(j, lastpause, interval, q, n, &pausemode, version, beta[j]);
    pause_time += lanc_seconds() - time;
    if (pause) {
      nopauses  = FALSE;
      lastpause = j;

      /* Compute limits for checking Ritz pair convergence. */
      if (version == 2) {
        if (left_ngood + 2 > left_goodlim) {
          left_goodlim = left_ngood + 2;
        }
      }

      /* Special case: need at least d Ritz vals on left. */
      left_goodlim = max(left_goodlim, d);

      /* Special case: can't find more than j total Ritz vals. */
      if (left_goodlim > j) {
        left_goodlim = min(left_goodlim, j);
      }

      /* Find Ritz vals using faster of Sturm bisection or ql. */
      time = lanc_seconds();
      if (inc_bis_safety) {
        bis_safety *= 10;
        inc_bis_safety = FALSE;
      }
      ritzval_flag =
          get_ritzvals(alpha, beta, j, Anorm, workj, ritz, d, left_goodlim, 0, eigtol, bis_safety);
      ql_time += lanc_seconds() - time;

      if (ritzval_flag != 0) {
        bail("ERROR: Lanczos_ext failed in computing eigenvalues of T.", 1);
        /* ... we recover from this in lanczos_SO, but don't worry here. */
      }

      /* Scan for minimum evals of tridiagonal. */
      time = lanc_seconds();
      scanmin(ritz, 1, j, &scanlist);
      scan_time += lanc_seconds() - time;

      /* Compute Ritz pair bounds at left end. */
      time = lanc_seconds();
      setvec(bj, 1, j, 0.0);
      for (i = 1; i <= left_goodlim; i++) {
        Sres = Tevec(alpha, beta - 1, j, ritz[i], s);
        if (Sres > Sres_max) {
          Sres_max = Sres;
        }
        if (Sres > SRESTOL) {
          inc_bis_safety = TRUE;
        }
        bj[i] = s[j] * beta[j];
      }
      ritz_time += lanc_seconds() - time;

      /* Show portion of spectrum checked for Ritz convergence. */
      if (DEBUG_EVECS > 2) {
        time = lanc_seconds();
        printf("\nindex         Ritz vals            bji bounds\n");
        for (i = 1; i <= left_goodlim; i++) {
          printf("  %3d", i);
          doubleout(ritz[i], 1);
          doubleout(bj[i], 1);
          printf("\n");
        }
        printf("\n");
        curlnk = scanlist;
        while (curlnk != NULL) {
          temp = curlnk->indx;
          if ((temp > left_goodlim) && (temp < j)) {
            printf("  %3d", temp);
            doubleout(ritz[temp], 1);
            doubleout(bj[temp], 1);
            printf("\n");
          }
          curlnk = curlnk->pntr;
        }
        printf("                            -------------------\n");
        printf("                goodtol:    %19.16f\n\n", goodtol);
        debug_time += lanc_seconds() - time;
      }

      get_extval(alpha, beta, j, ritz[1], s, eigtol, beta[0], sigma, &extval, v, work1, work2);

      /* Check convergence of iteration. */
      if (fabs(beta[j] * v[j]) < eigtol) {
        converged = TRUE;
      }
      else {
        converged = FALSE;
      }

      if (!converged) {
        ngood      = 0;
        left_ngood = 0; /* for setting left_goodlim on next loop */

        /* Compute converged Ritz pairs on left end */
        time = lanc_seconds();
        for (i = 1; i <= left_goodlim; i++) {
          if (bj[i] <= goodtol) {
            ngood += 1;
            left_ngood += 1;
            if (ngood > maxngood) {
              maxngood             = ngood;
              solist[ngood]        = makeorthlnk();
              (solist[ngood])->vec = mkvec(1, n);
            }
            (solist[ngood])->index = i;
            Sres                   = Tevec(alpha, beta - 1, j, ritz[i], s);
            if (Sres > Sres_max) {
              Sres_max = Sres;
            }
            if (Sres > SRESTOL) {
              inc_bis_safety = TRUE;
            }
            setvec((solist[ngood])->vec, 1, n, 0.0);
            for (k = 1; k <= j; k++) {
              scadd((solist[ngood])->vec, 1, n, s[k], q[k]);
            }
          }
        }
        ritz_time += lanc_seconds() - time;

        if (DEBUG_EVECS > 2) {
          time = lanc_seconds();

          /* Show some info on the orthogonalization. */
          printf("  j %3d; goodlim lft %2d, rgt %2d; list ", j, left_goodlim, 0);
          solistout(solist, ngood, j);

          /* Assemble current approx. eigenvector, check residual directly. */
          setvec(y[1], 1, n, 0.0);
          for (k = 1; k <= j; k++) {
            scadd(y[1], 1, n, v[k], q[k]);
          }
          printf("  extended eigenvalue %g\n", extval);
          printf("  est. extended residual %g\n", fabs(v[j] * beta[j]));
          checkeig_ext(workn, u, A, y[1], n, extval, vwsqrt, gvec, eigtol, FALSE);

          printf("---------------------end of iteration---------------------\n\n");
          debug_time += lanc_seconds() - time;
        }
      }
    }
    j++;
  }
  j--;

  if (DEBUG_EVECS > 0) {
    time = lanc_seconds();
    if (maxj == 0) {
      printf("Not extended eigenproblem -- calling ordinary eigensolver.\n");
    }
    else {
      printf("  Lanczos_ext itns: %d\n", j);
      printf("  extended eigenvalue: %g\n", extval);
      if (j == maxj) {
        strout("WARNING: Maximum number of Lanczos iterations reached.\n");
      }
    }
    debug_time += lanc_seconds() - time;
  }

  if (maxj != 0) {
    /* Compute (scaled) extended eigenvector. */
    time = lanc_seconds();
    setvec(y[1], 1, n, 0.0);
    for (k = 1; k <= j; k++) {
      scadd(y[1], 1, n, v[k], q[k]);
    }
    evec_time += lanc_seconds() - time;
    /* Note: assign() will scale this y vector back to x (since y = Dx) */

    /* Compute and check residual directly. */
    if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
      time = lanc_seconds();
      checkeig_ext(workn, u, A, y[1], n, extval, vwsqrt, gvec, eigtol, TRUE);
      debug_time += lanc_seconds() - time;
    }
  }

  /* free up memory */
  time = lanc_seconds();
  frvec(u, 1);
  frvec(r, 1);
  frvec(workn, 1);
  frvec(Ares, 0);
  sfree(index);
  frvec(alpha, 1);
  frvec(beta, 0);
  frvec(ritz, 1);
  frvec(s, 1);
  frvec(bj, 1);
  frvec(workj, 0);
  for (i = 0; i <= j; i++) {
    frvec(q[i], 1);
  }

  sfree(q);
  while (scanlist != NULL) {
    curlnk = scanlist->pntr;
    sfree(scanlist);
    scanlist = curlnk;
  }

  for (i = 1; i <= maxngood; i++) {
    frvec((solist[i])->vec, 1);
    sfree(solist[i]);
  }

  sfree(solist);
  frvec(extvec, 1);
  frvec(v, 1);
  frvec(work1, 1);
  frvec(work2, 1);
  init_time += lanc_seconds() - time;

  if (maxj == 0)
    return (1); /* see note on beta[0] and maxj above */
  else
    return (0);
}