Action QLearner::getAction(const State &state) { Action nextAction; if(getValue(state) < someValue) nextAction = static_cast<Action>(rand() % 3); else nextAction = getBestAction(state); return nextAction; }
LearningProblemState* QLearner::doLearningIteration(LearningProblemState * state) { // Pick a new state once in a while if (randomReal() < nu) { state = problem->getRandomState(); } // Get the list of actions LearningProblemAction* actions = problem->getActions(state); LearningProblemAction* action = NULL; // Check if we should use a random action, or the best one if (randomReal() < rho) { unsigned randPos = randomInt(actions->getCount()); action = actions->getAtPositionInList(randPos); } else { action = getBestAction(state); } // Make sure we've got something to do if (action != NULL) { // Carry out the action LearningProblemActionResult result = problem->getResult(state, action); // Get the current q value real q = getQValue(state, action); // Get the q of the best action from the new state real maxQ = getBestQValue(result.state); // recalculate the q q = ((real)1.0-alpha) * q + alpha * (result.reward + gamma * maxQ); // Store the new Q value storeQValue(state, action, q); return result.state; } // Otherwise we need to get a new state - we've reached the // end of the road. else { return problem->getRandomState(); } }