void rl_nac::update( double reward, double *statefeats ) {
  vector_t grad_guess;

  // incorporate new information
  add_obs( reward, statefeats );

  // we don't want to solve the linear system of equations every time.
  if ( time_to_check() ) {

    if ( inject_delay ) {
      //cout << "injecting delay of " << injection_nanos << ", for rl_to_sleepidle_ratio = " << rl_to_sleepidle_ratio << endl;
      slowdown_part1();
    }

    solve_for_grad( grad_guess ); //put result in grad_guess

    if ( check_for_convergence(grad_guess, prev_grad_guess) ) {
      farm_out_grad( grad_guess );
      take_gradient_step();
      prev_grad_guess.Fill( 0 );
    } else {
      prev_grad_guess = grad_guess;
    }

    if ( inject_delay )
      slowdown_part2();

  }

  for ( int a=0; a<act_cnt; a++ ) {
    // drive parameters towards zero
    Add( -PARAM_REGULARIZER, (*acts)[a]->params, (*acts)[a]->params );
  }
}
Beispiel #2
0
bool affine_propagator::process_all_eqs(const affine r[], const affine vars[], double& maxProgress) {

	for (int p=0; p<n_vars; ++p) {

		bool has_converged = check_for_convergence(vars);

		if (has_converged) {
			maxProgress = 2.0;
			return false;
		}
		
		const affine& aa = r[p];

		//cout << scientific;
		//cout << endl << "Eq. #" << p << endl << aa << endl;

		const bool to_discard = process_vars_in_eq(aa, vars);

		if (to_discard) {
			affine::isValid = false;
			return true;
		}


		if (!check_for_zero(r, p, p)) {
			//cout << "Deleted" << endl;
			affine::isValid = false;
			return true;
		}
	}

	return false;
}