// virtual void GKNN::enableIncrementalLearning(sp_relation& pRelation, int labelDims, double* pMins, double* pRanges) { clear(); m_pRelation = pRelation; m_labelDims = labelDims; m_featureDims = pRelation->size() - m_labelDims; m_pInstances = new GData(m_pRelation); GMixedRelation* pMixedRel = new GMixedRelation(); pMixedRel->addAttrs(pRelation.get(), 0, pRelation->size() - labelDims); sp_relation pRelInputs = pMixedRel; m_pDistanceMetric->init(pRelInputs); // Allocate some other buffers int maxOutputValueCount = 0; for(int n = 0; n < m_labelDims; n++) maxOutputValueCount = MAX(maxOutputValueCount, pRelation->valueCount(m_featureDims + n)); m_pValueCounts = new double[maxOutputValueCount]; // Scale factor optimization if(m_optimizeScaleFactors) { m_pCritic = new GKnnScaleFactorCritic(this, m_labelDims, m_featureDims); m_pScaleFactorOptimizer = new GMomentumGreedySearch(m_pCritic); } else { m_pCritic = NULL; m_pScaleFactorOptimizer = NULL; } }
void transition(GArgReader& args) { // Load the input data GMatrix* pActions = loadData(args.pop_string()); Holder<GMatrix> hActions(pActions); GMatrix* pState = loadData(args.pop_string()); Holder<GMatrix> hState(pState); if(pState->rows() != pActions->rows()) ThrowError("Expected the same number of rows in both datasets"); // Parse options bool delta = false; while(args.size() > 0) { if(args.if_pop("-delta")) delta = true; else ThrowError("Invalid option: ", args.peek()); } // Make the output data size_t actionDims = pActions->cols(); size_t stateDims = pState->cols(); GMixedRelation* pRelation = new GMixedRelation(); sp_relation pRel = pRelation; pRelation->addAttrs(pActions->relation().get()); pRelation->addAttrs(stateDims + stateDims, 0); GMatrix* pTransition = new GMatrix(pRel); pTransition->newRows(pActions->rows() - 1); for(size_t i = 0; i < pActions->rows() - 1; i++) { double* pOut = pTransition->row(i); GVec::copy(pOut, pActions->row(i), actionDims); GVec::copy(pOut + actionDims, pState->row(i), stateDims); GVec::copy(pOut + actionDims + stateDims, pState->row(i + 1), stateDims); if(delta) GVec::subtract(pOut + actionDims + stateDims, pState->row(i), stateDims); } pTransition->print(cout); }
GIncrementalLearnerQAgent::GIncrementalLearnerQAgent(sp_relation& pObsControlRelation, GIncrementalLearner* pQTable, int actionDims, double* pInitialState, GRand* pRand, GAgentActionIterator* pActionIterator, double softMaxThresh) : GQLearner(pObsControlRelation, actionDims, pInitialState, pRand, pActionIterator) { // Enable incremental learning m_pQTable = pQTable; GMixedRelation* pQRelation = new GMixedRelation(); pQRelation->addAttrs(pObsControlRelation.get()); pQRelation->addAttr(0); // the Q-value sp_relation pRelQtable; pRelQtable = pQRelation; pQTable->enableIncrementalLearning(pRelQtable, 1, NULL, NULL); // Init other stuff m_pBuf = new double[pQRelation->size()]; m_softMaxThresh = softMaxThresh; m_pActionIterator = pActionIterator; pActionIterator->reset(pInitialState); }