// virtual
void GQLearner::refinePolicyAndChooseNextAction(const double* pSenses, double* pOutActions)
{
	double reward;
	if(m_teleported)
		reward = UNKNOWN_REAL_VALUE;
	else
		reward = rewardFromLastAction();
	if(reward != UNKNOWN_REAL_VALUE)
	{
		// Find the best next action
		double maxQ = 0;
		double q;
		m_pActionIterator->reset(pSenses);
		int i;
		for(i = 0; i < m_actionCap; i++)
		{
			if(!m_pActionIterator->nextAction(pOutActions))
				break;
			q = getQValue(pSenses, pOutActions);
			if(q > maxQ)
				maxQ = q;
		}

		// Update the Q-values
		q = reward + m_discountFactor * maxQ;
		setQValue(m_pSenses, m_pAction, (1.0 - m_learningRate) * getQValue(m_pSenses, m_pAction) + m_learningRate * q);
	}

	// Decide what to do next
	GVec::copy(m_pSenses, pSenses, m_senseDims);
	chooseAction(pSenses, pOutActions);
	GVec::copy(m_pAction, pOutActions, m_actionDims);
	m_teleported = false;
}
// virtual
void GIncrementalLearnerQAgent::chooseAction(const double* pSenses, double* pActions)
{
	m_pActionIterator->reset(pSenses);
	if(m_explore && m_pRand->uniform() >= m_softMaxThresh)
	{
		// Explore
		m_pActionIterator->randomAction(pActions, m_pRand);
	}
	else
	{
		// Exploit
		double bestQ = -1e200;
		double q;
		int i;
		GTEMPBUF(double, pCand, m_actionDims);
		for(i = 1; i < m_actionCap; i++)
		{
			if(!m_pActionIterator->nextAction(pCand))
				break;
			q = getQValue(pSenses, pCand);
			if(q > bestQ)
			{
				bestQ = q;
				GVec::copy(pActions, pCand, m_actionDims);
			}
		}
	}
}
Beispiel #3
0
    LearningProblemState*
    QLearner::doLearningIteration(LearningProblemState * state)
    {
        // Pick a new state once in a while
        if (randomReal() < nu) {
            state = problem->getRandomState();
        }

        // Get the list of actions
        LearningProblemAction* actions = problem->getActions(state);
        LearningProblemAction* action = NULL;

        // Check if we should use a random action, or the best one
        if (randomReal() < rho) {
            unsigned randPos = randomInt(actions->getCount());
            action = actions->getAtPositionInList(randPos);
        } else {
            action = getBestAction(state);
        }

        // Make sure we've got something to do
        if (action != NULL)
        {
            // Carry out the action
            LearningProblemActionResult result =
                problem->getResult(state, action);

            // Get the current q value
            real q = getQValue(state, action);

            // Get the q of the best action from the new state
            real maxQ = getBestQValue(result.state);

            // recalculate the q
            q = ((real)1.0-alpha) * q + alpha * (result.reward + gamma * maxQ);

            // Store the new Q value
            storeQValue(state, action, q);

            return result.state;
        }
        // Otherwise we need to get a new state - we've reached the
        // end of the road.
        else
        {
            return problem->getRandomState();
        }
    }
Beispiel #4
0
    LearningProblemAction*
    QLearner::getBestAction(LearningProblemState *state)
    {
        // Get the actions
        LearningProblemAction* action = problem->getActions(state);

        // Check them in turn
        real best = (real)0;
        LearningProblemAction * bestAction = action;
        while (action != NULL)
        {
            real q = getQValue(state, action);
            if (q > best) {
                best = q;
                bestAction = action;
            }

            action = action->next;
        }

        return bestAction;
    }