Exemple #1
0
void DoTest(GRand* prng, double rcoeff, double alpha, double gamma, double reward, double penalty, double soft_max_thresh, bool warpRandom)
{
    printf("---------------------------------------\n");
    printf("rcoeff=%lg, alpha=%lg, gamma=%lg, reward=%lg, penalty=%lg, softMaxThresh=%lg, warpRandom=%s\n", rcoeff, alpha, gamma, reward, penalty, soft_max_thresh, warpRandom ? "true" : "false");
    printf("---------------------------------------\n");
    fflush(stdout);
    soft_max_thresh /= 100;
    double soberness = ((rcoeff * 4 / 100) - 1) / 3;
    sp_relation rel;
    GMixedRelation* pRel = new GMixedRelation();
    rel = pRel;
    pRel->addAttr(0); // x
    pRel->addAttr(0); // y
    pRel->addAttr(4); // {E,N,W,S}
    GDiscreteActionIterator it(4);
    double initialstate[2];
    initialstate[0] = 0;
    initialstate[1] = 0;
    TestQAgent agent(rel, initialstate, prng, &it, soft_max_thresh, soberness, reward, penalty, warpRandom);
    agent.setLearningRate(alpha);
    agent.setDiscountFactor(gamma);
    while(agent.GetJourneyCount() < 10000)
        agent.Iterate();
}
Exemple #2
0
	CarOnHillModel(GRand* prng, GImage* pImage, GWidgetTextLabel* pWins)
	{
		m_pWins = pWins;
		m_wins = 0;
		m_pImage = pImage;
		m_carPos = 0;
		m_velocity = 0;
		m_prng = prng;

		// Load the car image and add some border so we can rotate it
		GImage tmp;
		tmp.loadPng("minicar.png");
		m_pCar = new GImage();
		m_pCar->setSize(70, 60);
		GRect r(0, 0, 60, 36);
		m_pCar->blit(5, 5, &tmp, &r);
		m_pRotatedCar = new GImage();

		// Make the agent
		GMixedRelation* pRelAgent = new GMixedRelation();
		sp_relation relAgent;
		relAgent = pRelAgent;
		pRelAgent->addAttr(0); // position
		pRelAgent->addAttr(0); // velocity
		pRelAgent->addAttr(2); // action {forward, reverse}
		double initialState[2];
		initialState[0] = m_carPos;
		initialState[1] = m_velocity;
		double goalState[2];
		goalState[0] = 2;
		goalState[1] = 0;
		m_pActionIterator = new GDiscreteActionIterator(2);
		m_pAgents[0] = new CarQAgent(relAgent, initialState, m_prng, m_pActionIterator);
		((GQLearner*)m_pAgents[0])->setLearningRate(.9);
		((GQLearner*)m_pAgents[0])->setDiscountFactor(0.999);
	}
GIncrementalLearnerQAgent::GIncrementalLearnerQAgent(sp_relation& pObsControlRelation, GIncrementalLearner* pQTable, int actionDims, double* pInitialState, GRand* pRand, GAgentActionIterator* pActionIterator, double softMaxThresh)
: GQLearner(pObsControlRelation, actionDims, pInitialState, pRand, pActionIterator)
{
	// Enable incremental learning
	m_pQTable = pQTable;
	GMixedRelation* pQRelation = new GMixedRelation();
	pQRelation->addAttrs(pObsControlRelation.get());
	pQRelation->addAttr(0); // the Q-value
	sp_relation pRelQtable;
	pRelQtable = pQRelation;
	pQTable->enableIncrementalLearning(pRelQtable, 1, NULL, NULL);

	// Init other stuff
	m_pBuf = new double[pQRelation->size()];
	m_softMaxThresh = softMaxThresh;
	m_pActionIterator = pActionIterator;
	pActionIterator->reset(pInitialState);
}