void CFeatureRewardModel::resetData() { CFeatureMap *featMap; for (unsigned int action = 0; action < getNumActions(); action ++) { for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++) { featMap = rewardTable->get(action, startState); featMap->clear(); } } if (!this->bExternVisitSparse) { for (unsigned int action = 0; action < getNumActions(); action ++) { for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++) { featMap = visitTable->get(action, startState); featMap->clear(); } } } }
void CFeatureRewardModel::loadData(FILE *stream) { CFeatureMap *featMap; fscanf(stream, "Reward Table\n"); int buf, numVal = 0, endState; double reward; for (unsigned int action = 0; action < getNumActions(); action ++) { fscanf(stream, "Action %d:\n", &buf); for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++) { featMap = rewardTable->get(action, startState); featMap->clear(); fscanf(stream, "Startstate %d [%d]: ", &buf, &numVal); for (int i = 0; i < numVal; i ++) { fscanf(stream, "(%d %lf)", &endState, &reward); (*featMap)[endState] = reward; } fscanf(stream, "\n"); } fscanf(stream, "\n"); } if (!this->bExternVisitSparse) { fprintf(stream, "Visit Table\n"); for (unsigned int action = 0; action < getNumActions(); action ++) { fscanf(stream, "Action %d:\n", &buf); for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++) { featMap = visitTable->get(action, startState); featMap->clear(); fscanf(stream, "Startstate %d [%d]: ", &buf, &numVal); for (int i = 0; i < numVal; i ++) { fscanf(stream, "(%d %lf)", &endState, &reward); (*featMap)[endState] = reward; } fscanf(stream, "\n"); } fscanf(stream, "\n"); } } }
int SarsaAgent::selectAction(double state[]){ int action; if(drand48() < epsilon){ action = (int)(drand48() * getNumActions()) % getNumActions(); } else{ action = argmaxQ(state); } return action; }
void CFeatureRewardModel::saveData(FILE *stream) { CFeatureMap::iterator mapIt; CFeatureMap *featMap; fprintf(stream, "Reward Table\n"); for (unsigned int action = 0; action < getNumActions(); action ++) { fprintf(stream, "Action %d:\n", action); for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++) { featMap = rewardTable->get(action, startState); fprintf(stream, "Startstate %d [%d]: ", startState, featMap->size()); for (mapIt = featMap->begin(); mapIt != featMap->end(); mapIt ++) { fprintf(stream, "(%d %f)", (*mapIt).first, (*mapIt).second); } fprintf(stream, "\n"); } fprintf(stream, "\n"); } if (!this->bExternVisitSparse) { fprintf(stream, "Visit Table\n"); for (unsigned int action = 0; action < getNumActions(); action ++) { fprintf(stream, "Action %d:\n", action); for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++) { featMap = visitTable->get(action, startState); fprintf(stream, "Startstate %d [%d]: ", startState, featMap->size()); for (mapIt = featMap->begin(); mapIt != featMap->end(); mapIt ++) { fprintf(stream, "(%d %f)", (*mapIt).first, (*mapIt).second); } fprintf(stream, "\n"); } fprintf(stream, "\n"); } } }
CFeatureRewardModel::CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CStateModifier *discretizer) : CFeatureRewardFunction(discretizer), CSemiMDPRewardListener(function), CActionObject(actions) { int i; this->rewardTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize()); for (i = 0; i < rewardTable->getSize(); i++) { rewardTable->set1D(i, new CFeatureMap()); } this->visitTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize()); for (i = 0; i < visitTable->getSize(); i++) { visitTable->set1D(i, new CFeatureMap()); } this->bExternVisitSparse = false; }
CFeatureRewardModel::CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *discretizer) : CFeatureRewardFunction(discretizer), CSemiMDPRewardListener(function), CActionObject(actions) { this->rewardTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize()); for (int i = 0; i < rewardTable->getSize(); i++) { rewardTable->set1D(i, new CFeatureMap()); } this->model = model; this->bExternVisitSparse = true; }
int SarsaAgent::argmaxQ(double state[]){ double Q[getNumActions()]; FA->setState(state); for(int i = 0; i < getNumActions(); i++){ Q[i] = FA->computeQ(i); } int bestAction = 0; double bestValue = Q[bestAction]; int numTies = 0; double EPS=1.0e-4; for (int a = 1; a < getNumActions(); a++){ double value = Q[a]; if(fabs(value - bestValue) < EPS){ numTies++; if(drand48() < (1.0 / (numTies + 1))){ bestValue = value; bestAction = a; } } else if (value > bestValue){ bestValue = value; bestAction = a; numTies = 0; } } return bestAction; }