static int semi_uniform (RLearner *rl, const unsigned int state) { double rand_value = 0.0; int action = 0; if (rl == NULL) { fprintf(stderr, "RLearner-WARNING **: RLearner *rl is NULL\n"); } else { rand_value = g_rand_double(rl->random); if (rand_value > (1.0 - EXPLORATION_THRESHOLD)) { /* Random action */ action = g_rand_int_range(rl->random, 0, rl->num_actions); } else { action = best_action(rl, state); } } return action; }
static double best_qvalue (RLearner *rl, const unsigned int state) { double best_value = 0; if (rl == NULL) { fprintf(stderr, "RLearner-WARNING **: RLearner *rl is NULL\n"); } else { best_value = rl->q_values[ state ][ best_action(rl, state) ]; } return best_value; }
// QUtility void QUtility::learn (QState last_state, QAction action, QState current_state, QAction credit) { QCredit new_util, max_util; QAction a; /* assert(action >= 0 && action < total_actions); for (a = 0; a < total_actions; a++) { QCredit util = utility(current_state, a); if (max_util < util || a == 0) max_util = util; }*/ a = best_action(current_state); max_util = utility(current_state, a); new_util = credit + discount * max_util; adjust(last_state, action, new_util); }
void output_prefered_state_values (RLearner *rl) { int state = 0; int action = 0; if (rl == NULL) { fprintf(stderr, "RLearner-WARNING **: RLearner *rl is NULL\n"); } else { for(state = 0; state < rl->num_states; state++) { printf("\n"); action = best_action (rl, state); printf("R(%d,%d) = %05f ", state, action, rl->q_values[state][action]); } printf("\n"); } }
QAction QUtility::select_action (QState state) { return best_action(state); /* or, you can use Boltzmann here */ }