// Do an iteration of gradient descent and return the post-update objective function. // Possibly update the optimal step size in place. double gradient_descent(double *w, double *step_size, double **features, int *grades, int num_samples, int num_features) { double *scores = (double *)malloc(num_samples * sizeof(double)); double *grad = (double *)malloc(num_features * sizeof(double)); int sample_ind, other_sample_ind; double slack_coeff = C / pow(num_samples, 2); vec_assign(grad, w, 1, num_features); for (sample_ind=0; sample_ind<num_samples; ++sample_ind) { scores[sample_ind] = dot_product(w, features[sample_ind], num_features); for (other_sample_ind=0; other_sample_ind<sample_ind; ++other_sample_ind) { if (grades[sample_ind] < grades[other_sample_ind] && scores[sample_ind]+1 > scores[other_sample_ind]) { vec_add(grad, features[sample_ind], slack_coeff, num_features); vec_add(grad, features[other_sample_ind], -slack_coeff, num_features); } else if (grades[sample_ind] > grades[other_sample_ind] && scores[sample_ind] < 1+scores[other_sample_ind]) { vec_add(grad, features[sample_ind], -slack_coeff, num_features); vec_add(grad, features[other_sample_ind], slack_coeff, num_features); } } } // vec_add(w, grad, -step_size, num_features); free(scores); free(grad); // return compute_objective(w, features, grades, num_samples, num_features); return take_gradient_step(w, grad, step_size, features, grades, num_samples, num_features); }
void rl_nac::update( double reward, double *statefeats ) { vector_t grad_guess; // incorporate new information add_obs( reward, statefeats ); // we don't want to solve the linear system of equations every time. if ( time_to_check() ) { if ( inject_delay ) { //cout << "injecting delay of " << injection_nanos << ", for rl_to_sleepidle_ratio = " << rl_to_sleepidle_ratio << endl; slowdown_part1(); } solve_for_grad( grad_guess ); //put result in grad_guess if ( check_for_convergence(grad_guess, prev_grad_guess) ) { farm_out_grad( grad_guess ); take_gradient_step(); prev_grad_guess.Fill( 0 ); } else { prev_grad_guess = grad_guess; } if ( inject_delay ) slowdown_part2(); } for ( int a=0; a<act_cnt; a++ ) { // drive parameters towards zero Add( -PARAM_REGULARIZER, (*acts)[a]->params, (*acts)[a]->params ); } }