Esempio n. 1
0
void CFeatureRewardModel::resetData()
{
	CFeatureMap *featMap;

	for (unsigned int action = 0; action < getNumActions(); action ++)
	{
		for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
		{
			featMap = rewardTable->get(action, startState);

			featMap->clear();
		}
	}

	if (!this->bExternVisitSparse)
	{
		for (unsigned int action = 0; action < getNumActions(); action ++)
		{
			for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
			{
				featMap = visitTable->get(action, startState);

				featMap->clear();
			}
		}
	}
}
Esempio n. 2
0
void CFeatureRewardModel::loadData(FILE *stream)
{
	CFeatureMap *featMap;
	fscanf(stream, "Reward Table\n");

	int buf, numVal = 0, endState;
	double reward;

	for (unsigned int action = 0; action < getNumActions(); action ++)
	{
		fscanf(stream, "Action %d:\n", &buf);
		for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
		{
			featMap = rewardTable->get(action, startState);

			featMap->clear();

			fscanf(stream, "Startstate %d [%d]: ", &buf, &numVal);
			
			for (int i = 0; i < numVal; i ++)
			{
				fscanf(stream, "(%d %lf)", &endState, &reward);
				(*featMap)[endState] = reward;
			}
			fscanf(stream, "\n");
		}
		fscanf(stream, "\n");
	}

	if (!this->bExternVisitSparse)
	{
		fprintf(stream, "Visit Table\n");

		for (unsigned int action = 0; action < getNumActions(); action ++)
		{
			fscanf(stream, "Action %d:\n", &buf);
			for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
			{
				featMap = visitTable->get(action, startState);
	
				featMap->clear();

				fscanf(stream, "Startstate %d [%d]: ", &buf, &numVal);
			
				for (int i = 0; i < numVal; i ++)
				{
					fscanf(stream, "(%d %lf)", &endState, &reward);
					(*featMap)[endState] = reward;
				}
				fscanf(stream, "\n");
			}
			fscanf(stream, "\n");
		}
	}
}
Esempio n. 3
0
int SarsaAgent::selectAction(double state[]){

  int action;

  if(drand48() < epsilon){
    action = (int)(drand48() * getNumActions()) % getNumActions();
  }
  else{
    action = argmaxQ(state);
  }
  
  return action;
}
Esempio n. 4
0
void CFeatureRewardModel::saveData(FILE *stream)
{
	CFeatureMap::iterator mapIt;
	CFeatureMap *featMap;
	fprintf(stream, "Reward Table\n");

	for (unsigned int action = 0; action < getNumActions(); action ++)
	{
		fprintf(stream, "Action %d:\n", action);
		for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
		{
			featMap = rewardTable->get(action, startState);

			fprintf(stream, "Startstate %d [%d]: ", startState, featMap->size());
			
			for (mapIt = featMap->begin(); mapIt != featMap->end(); mapIt ++)
			{
				fprintf(stream, "(%d %f)", (*mapIt).first, (*mapIt).second);			
			}
			fprintf(stream, "\n");
		}
		fprintf(stream, "\n");
	}

	if (!this->bExternVisitSparse)
	{
		fprintf(stream, "Visit Table\n");

		for (unsigned int action = 0; action < getNumActions(); action ++)
		{
			fprintf(stream, "Action %d:\n", action);
			for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
			{
				featMap = visitTable->get(action, startState);
	
				fprintf(stream, "Startstate %d [%d]: ", startState, featMap->size());
			
				for (mapIt = featMap->begin(); mapIt != featMap->end(); mapIt ++)
				{
					fprintf(stream, "(%d %f)", (*mapIt).first, (*mapIt).second);			
				}
				fprintf(stream, "\n");
			}
			fprintf(stream, "\n");
		}
	}
}
Esempio n. 5
0
CFeatureRewardModel::CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CStateModifier *discretizer) : CFeatureRewardFunction(discretizer), CSemiMDPRewardListener(function), CActionObject(actions)
{
	int i;

	this->rewardTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize());

	for (i = 0; i < rewardTable->getSize(); i++)
	{
		rewardTable->set1D(i, new CFeatureMap());
	}	
	
	this->visitTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize());

	for (i = 0; i < visitTable->getSize(); i++)
	{
		visitTable->set1D(i, new CFeatureMap());
	}
	this->bExternVisitSparse = false;
}
Esempio n. 6
0
CFeatureRewardModel::CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *discretizer) : CFeatureRewardFunction(discretizer), CSemiMDPRewardListener(function), CActionObject(actions)
{
	this->rewardTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize());

	for (int i = 0; i < rewardTable->getSize(); i++)
	{
		rewardTable->set1D(i, new CFeatureMap());
	}	
	
	this->model = model;

	this->bExternVisitSparse = true;
}
Esempio n. 7
0
int SarsaAgent::argmaxQ(double state[]){
  
  double Q[getNumActions()];

  FA->setState(state);

  for(int i = 0; i < getNumActions(); i++){
    Q[i] = FA->computeQ(i);
  }
  
  int bestAction = 0;
  double bestValue = Q[bestAction];
  int numTies = 0;

  double EPS=1.0e-4;

  for (int a = 1; a < getNumActions(); a++){

    double value = Q[a];
    if(fabs(value - bestValue) < EPS){
      numTies++;
      
      if(drand48() < (1.0 / (numTies + 1))){
	bestValue = value;
	bestAction = a;
      }
    }
    else if (value > bestValue){
      bestValue = value;
      bestAction = a;
      numTies = 0;
    }
  }
  
  return bestAction;
}