Esempio n. 1
0
// Do an iteration of gradient descent and return the post-update objective function.
// Possibly update the optimal step size in place.
double gradient_descent(double *w, double *step_size, double **features, int *grades, int num_samples, int num_features) {
  double *scores = (double *)malloc(num_samples * sizeof(double));
  double *grad = (double *)malloc(num_features * sizeof(double));
  int sample_ind, other_sample_ind;
  double slack_coeff = C / pow(num_samples, 2);
  
  vec_assign(grad, w, 1, num_features);

  for (sample_ind=0; sample_ind<num_samples; ++sample_ind) {
    scores[sample_ind] = dot_product(w, features[sample_ind], num_features);
    for (other_sample_ind=0; other_sample_ind<sample_ind; ++other_sample_ind) {
      if (grades[sample_ind] < grades[other_sample_ind] && scores[sample_ind]+1 > scores[other_sample_ind]) {
        vec_add(grad, features[sample_ind], slack_coeff, num_features);
        vec_add(grad, features[other_sample_ind], -slack_coeff, num_features);
      } else if (grades[sample_ind] > grades[other_sample_ind] && scores[sample_ind] < 1+scores[other_sample_ind]) {
        vec_add(grad, features[sample_ind], -slack_coeff, num_features);
        vec_add(grad, features[other_sample_ind], slack_coeff, num_features);
      }
    }
  }

  // vec_add(w, grad, -step_size, num_features);

  free(scores);
  free(grad);  
  
  // return compute_objective(w, features, grades, num_samples, num_features);
  return take_gradient_step(w, grad, step_size, features, grades, num_samples, num_features);
}
Esempio n. 2
0
void rl_nac::update( double reward, double *statefeats ) {
  vector_t grad_guess;

  // incorporate new information
  add_obs( reward, statefeats );

  // we don't want to solve the linear system of equations every time.
  if ( time_to_check() ) {

    if ( inject_delay ) {
      //cout << "injecting delay of " << injection_nanos << ", for rl_to_sleepidle_ratio = " << rl_to_sleepidle_ratio << endl;
      slowdown_part1();
    }

    solve_for_grad( grad_guess ); //put result in grad_guess

    if ( check_for_convergence(grad_guess, prev_grad_guess) ) {
      farm_out_grad( grad_guess );
      take_gradient_step();
      prev_grad_guess.Fill( 0 );
    } else {
      prev_grad_guess = grad_guess;
    }

    if ( inject_delay )
      slowdown_part2();

  }

  for ( int a=0; a<act_cnt; a++ ) {
    // drive parameters towards zero
    Add( -PARAM_REGULARIZER, (*acts)[a]->params, (*acts)[a]->params );
  }
}