コード例 #1
0
ファイル: rlearner.c プロジェクト: sunshin-es/openGrasp
static int semi_uniform (RLearner *rl, const unsigned int state)
{
	double rand_value = 0.0; 
	int    action     = 0;
	
    if (rl == NULL)
    {
        fprintf(stderr, "RLearner-WARNING **: RLearner *rl is NULL\n");
    }
    else
    {
        rand_value = g_rand_double(rl->random);
        
        if (rand_value > (1.0 - EXPLORATION_THRESHOLD))
        {
            /* Random action */
            action = g_rand_int_range(rl->random, 0, rl->num_actions);
        }
        else
        {
            action = best_action(rl, state);
        }
    }
    
    return action;
} 
コード例 #2
0
ファイル: rlearner.c プロジェクト: sunshin-es/openGrasp
static double best_qvalue (RLearner *rl, const unsigned int state)
{
    double best_value = 0;
    
    if (rl == NULL)
    {
        fprintf(stderr, "RLearner-WARNING **: RLearner *rl is NULL\n");
    }
    else
    {
        best_value = rl->q_values[ state ][ best_action(rl, state) ];
    }
    
	return best_value;
}
コード例 #3
0
// QUtility
void QUtility::learn (QState last_state, QAction action,
                      QState current_state, QAction credit)
{
    QCredit new_util, max_util;
    QAction a;

/*  assert(action >= 0 && action < total_actions);

    for (a = 0; a < total_actions; a++) {
        QCredit util = utility(current_state, a);
        if (max_util < util || a == 0)
            max_util = util;
    }*/
    a = best_action(current_state);
    max_util = utility(current_state, a);
    new_util = credit + discount * max_util;
    adjust(last_state, action, new_util);
}
コード例 #4
0
ファイル: rlearner.c プロジェクト: sunshin-es/openGrasp
void output_prefered_state_values (RLearner *rl)
{
	int state  = 0;
	int action = 0;
	
    if (rl == NULL)
    {
        fprintf(stderr, "RLearner-WARNING **: RLearner *rl is NULL\n");
    }
    else
    {
        for(state = 0; state < rl->num_states; state++)
        {
            printf("\n");
            
            action = best_action (rl, state);
            printf("R(%d,%d) = %05f ", state, action, rl->q_values[state][action]);
        }
        
        printf("\n");
    }
}  
コード例 #5
0
QAction QUtility::select_action (QState state)
{
    return best_action(state);
    /* or, you can use Boltzmann here */
}