void DoTest(GRand* prng, double rcoeff, double alpha, double gamma, double reward, double penalty, double soft_max_thresh, bool warpRandom) { printf("---------------------------------------\n"); printf("rcoeff=%lg, alpha=%lg, gamma=%lg, reward=%lg, penalty=%lg, softMaxThresh=%lg, warpRandom=%s\n", rcoeff, alpha, gamma, reward, penalty, soft_max_thresh, warpRandom ? "true" : "false"); printf("---------------------------------------\n"); fflush(stdout); soft_max_thresh /= 100; double soberness = ((rcoeff * 4 / 100) - 1) / 3; sp_relation rel; GMixedRelation* pRel = new GMixedRelation(); rel = pRel; pRel->addAttr(0); // x pRel->addAttr(0); // y pRel->addAttr(4); // {E,N,W,S} GDiscreteActionIterator it(4); double initialstate[2]; initialstate[0] = 0; initialstate[1] = 0; TestQAgent agent(rel, initialstate, prng, &it, soft_max_thresh, soberness, reward, penalty, warpRandom); agent.setLearningRate(alpha); agent.setDiscountFactor(gamma); while(agent.GetJourneyCount() < 10000) agent.Iterate(); }
CarOnHillModel(GRand* prng, GImage* pImage, GWidgetTextLabel* pWins) { m_pWins = pWins; m_wins = 0; m_pImage = pImage; m_carPos = 0; m_velocity = 0; m_prng = prng; // Load the car image and add some border so we can rotate it GImage tmp; tmp.loadPng("minicar.png"); m_pCar = new GImage(); m_pCar->setSize(70, 60); GRect r(0, 0, 60, 36); m_pCar->blit(5, 5, &tmp, &r); m_pRotatedCar = new GImage(); // Make the agent GMixedRelation* pRelAgent = new GMixedRelation(); sp_relation relAgent; relAgent = pRelAgent; pRelAgent->addAttr(0); // position pRelAgent->addAttr(0); // velocity pRelAgent->addAttr(2); // action {forward, reverse} double initialState[2]; initialState[0] = m_carPos; initialState[1] = m_velocity; double goalState[2]; goalState[0] = 2; goalState[1] = 0; m_pActionIterator = new GDiscreteActionIterator(2); m_pAgents[0] = new CarQAgent(relAgent, initialState, m_prng, m_pActionIterator); ((GQLearner*)m_pAgents[0])->setLearningRate(.9); ((GQLearner*)m_pAgents[0])->setDiscountFactor(0.999); }
GIncrementalLearnerQAgent::GIncrementalLearnerQAgent(sp_relation& pObsControlRelation, GIncrementalLearner* pQTable, int actionDims, double* pInitialState, GRand* pRand, GAgentActionIterator* pActionIterator, double softMaxThresh) : GQLearner(pObsControlRelation, actionDims, pInitialState, pRand, pActionIterator) { // Enable incremental learning m_pQTable = pQTable; GMixedRelation* pQRelation = new GMixedRelation(); pQRelation->addAttrs(pObsControlRelation.get()); pQRelation->addAttr(0); // the Q-value sp_relation pRelQtable; pRelQtable = pQRelation; pQTable->enableIncrementalLearning(pRelQtable, 1, NULL, NULL); // Init other stuff m_pBuf = new double[pQRelation->size()]; m_softMaxThresh = softMaxThresh; m_pActionIterator = pActionIterator; pActionIterator->reset(pInitialState); }