void rl_nac::update( double reward, double *statefeats ) { vector_t grad_guess; // incorporate new information add_obs( reward, statefeats ); // we don't want to solve the linear system of equations every time. if ( time_to_check() ) { if ( inject_delay ) { //cout << "injecting delay of " << injection_nanos << ", for rl_to_sleepidle_ratio = " << rl_to_sleepidle_ratio << endl; slowdown_part1(); } solve_for_grad( grad_guess ); //put result in grad_guess if ( check_for_convergence(grad_guess, prev_grad_guess) ) { farm_out_grad( grad_guess ); take_gradient_step(); prev_grad_guess.Fill( 0 ); } else { prev_grad_guess = grad_guess; } if ( inject_delay ) slowdown_part2(); } for ( int a=0; a<act_cnt; a++ ) { // drive parameters towards zero Add( -PARAM_REGULARIZER, (*acts)[a]->params, (*acts)[a]->params ); } }
bool affine_propagator::process_all_eqs(const affine r[], const affine vars[], double& maxProgress) { for (int p=0; p<n_vars; ++p) { bool has_converged = check_for_convergence(vars); if (has_converged) { maxProgress = 2.0; return false; } const affine& aa = r[p]; //cout << scientific; //cout << endl << "Eq. #" << p << endl << aa << endl; const bool to_discard = process_vars_in_eq(aa, vars); if (to_discard) { affine::isValid = false; return true; } if (!check_for_zero(r, p, p)) { //cout << "Deleted" << endl; affine::isValid = false; return true; } } return false; }