// virtual void GKNN::enableIncrementalLearning(sp_relation& pRelation, int labelDims, double* pMins, double* pRanges) { clear(); m_pRelation = pRelation; m_labelDims = labelDims; m_featureDims = pRelation->size() - m_labelDims; m_pInstances = new GData(m_pRelation); GMixedRelation* pMixedRel = new GMixedRelation(); pMixedRel->addAttrs(pRelation.get(), 0, pRelation->size() - labelDims); sp_relation pRelInputs = pMixedRel; m_pDistanceMetric->init(pRelInputs); // Allocate some other buffers int maxOutputValueCount = 0; for(int n = 0; n < m_labelDims; n++) maxOutputValueCount = MAX(maxOutputValueCount, pRelation->valueCount(m_featureDims + n)); m_pValueCounts = new double[maxOutputValueCount]; // Scale factor optimization if(m_optimizeScaleFactors) { m_pCritic = new GKnnScaleFactorCritic(this, m_labelDims, m_featureDims); m_pScaleFactorOptimizer = new GMomentumGreedySearch(m_pCritic); } else { m_pCritic = NULL; m_pScaleFactorOptimizer = NULL; } }
GIncrementalLearnerQAgent::GIncrementalLearnerQAgent(sp_relation& pObsControlRelation, GIncrementalLearner* pQTable, int actionDims, double* pInitialState, GRand* pRand, GAgentActionIterator* pActionIterator, double softMaxThresh) : GQLearner(pObsControlRelation, actionDims, pInitialState, pRand, pActionIterator) { // Enable incremental learning m_pQTable = pQTable; GMixedRelation* pQRelation = new GMixedRelation(); pQRelation->addAttrs(pObsControlRelation.get()); pQRelation->addAttr(0); // the Q-value sp_relation pRelQtable; pRelQtable = pQRelation; pQTable->enableIncrementalLearning(pRelQtable, 1, NULL, NULL); // Init other stuff m_pBuf = new double[pQRelation->size()]; m_softMaxThresh = softMaxThresh; m_pActionIterator = pActionIterator; pActionIterator->reset(pInitialState); }
void transition(GArgReader& args) { // Load the input data GMatrix* pActions = loadData(args.pop_string()); Holder<GMatrix> hActions(pActions); GMatrix* pState = loadData(args.pop_string()); Holder<GMatrix> hState(pState); if(pState->rows() != pActions->rows()) ThrowError("Expected the same number of rows in both datasets"); // Parse options bool delta = false; while(args.size() > 0) { if(args.if_pop("-delta")) delta = true; else ThrowError("Invalid option: ", args.peek()); } // Make the output data size_t actionDims = pActions->cols(); size_t stateDims = pState->cols(); GMixedRelation* pRelation = new GMixedRelation(); sp_relation pRel = pRelation; pRelation->addAttrs(pActions->relation().get()); pRelation->addAttrs(stateDims + stateDims, 0); GMatrix* pTransition = new GMatrix(pRel); pTransition->newRows(pActions->rows() - 1); for(size_t i = 0; i < pActions->rows() - 1; i++) { double* pOut = pTransition->row(i); GVec::copy(pOut, pActions->row(i), actionDims); GVec::copy(pOut + actionDims, pState->row(i), stateDims); GVec::copy(pOut + actionDims + stateDims, pState->row(i + 1), stateDims); if(delta) GVec::subtract(pOut + actionDims + stateDims, pState->row(i), stateDims); } pTransition->print(cout); }
CarOnHillModel(GRand* prng, GImage* pImage, GWidgetTextLabel* pWins) { m_pWins = pWins; m_wins = 0; m_pImage = pImage; m_carPos = 0; m_velocity = 0; m_prng = prng; // Load the car image and add some border so we can rotate it GImage tmp; tmp.loadPng("minicar.png"); m_pCar = new GImage(); m_pCar->setSize(70, 60); GRect r(0, 0, 60, 36); m_pCar->blit(5, 5, &tmp, &r); m_pRotatedCar = new GImage(); // Make the agent GMixedRelation* pRelAgent = new GMixedRelation(); sp_relation relAgent; relAgent = pRelAgent; pRelAgent->addAttr(0); // position pRelAgent->addAttr(0); // velocity pRelAgent->addAttr(2); // action {forward, reverse} double initialState[2]; initialState[0] = m_carPos; initialState[1] = m_velocity; double goalState[2]; goalState[0] = 2; goalState[1] = 0; m_pActionIterator = new GDiscreteActionIterator(2); m_pAgents[0] = new CarQAgent(relAgent, initialState, m_prng, m_pActionIterator); ((GQLearner*)m_pAgents[0])->setLearningRate(.9); ((GQLearner*)m_pAgents[0])->setDiscountFactor(0.999); }
void DoTest(GRand* prng, double rcoeff, double alpha, double gamma, double reward, double penalty, double soft_max_thresh, bool warpRandom) { printf("---------------------------------------\n"); printf("rcoeff=%lg, alpha=%lg, gamma=%lg, reward=%lg, penalty=%lg, softMaxThresh=%lg, warpRandom=%s\n", rcoeff, alpha, gamma, reward, penalty, soft_max_thresh, warpRandom ? "true" : "false"); printf("---------------------------------------\n"); fflush(stdout); soft_max_thresh /= 100; double soberness = ((rcoeff * 4 / 100) - 1) / 3; sp_relation rel; GMixedRelation* pRel = new GMixedRelation(); rel = pRel; pRel->addAttr(0); // x pRel->addAttr(0); // y pRel->addAttr(4); // {E,N,W,S} GDiscreteActionIterator it(4); double initialstate[2]; initialstate[0] = 0; initialstate[1] = 0; TestQAgent agent(rel, initialstate, prng, &it, soft_max_thresh, soberness, reward, penalty, warpRandom); agent.setLearningRate(alpha); agent.setDiscountFactor(gamma); while(agent.GetJourneyCount() < 10000) agent.Iterate(); }