示例#1
0
double HumanAgent::evaluatePolicy(Environment<bool>& env){
#ifdef __USE_SDL
	Action action;
	int reward = 0;
    int totalReward = 0;
	int cumulativeReward = 0;
	
	//Repeat (for each episode):
	for(int episode = 0; episode < numEpisodesToEval; episode++){
		int step = 0;
		while(!env.game_over() && step < maxStepsInEpisode) {
			action = receiveAction();
            //If one wants to save trajectories, this is where the trajectory is saved:
            if(toSaveTrajectory){
                saveTrajectory(action);
            }
			reward = env.act(action);
			cumulativeReward += reward;
			step++;
		}
		printf("Episode %d, Cumulative Reward: %d\n", episode + 1, cumulativeReward);
        totalReward += cumulativeReward;
		cumulativeReward = 0;
		env.reset_game(); //Start the game again when the episode is over
	}
    return double(totalReward)/numEpisodesToEval;
}
示例#2
0
double RandomAgent::evaluatePolicy(Environment<bool>& env){
	int reward = 0;
	int totalReward = 0;
	int cumulativeReward = 0;
	int numActions;
	ActionVect actions;
	//Check if one wants to sample from all possible actions or only the valid ones:
	if(useMinActions){
		actions = env.getMinimalActionSet();
	}
	else{
		actions = env.getLegalActionSet();
	}
	numActions = actions.size();
	printf("Number of Actions: %d\n\n", numActions);
	//Repeat (for each episode):
	for(int episode = 0; episode < numEpisodesToEval; episode++){
		int step = 0;
		while(!env.game_over() && step < maxStepsInEpisode) {
			reward = env.act(actions[rand()%numActions]);
			cumulativeReward += reward;
			step++;
		}
		printf("Episode %d, Cumulative Reward: %d\n", episode + 1, cumulativeReward);
		totalReward += cumulativeReward;
		cumulativeReward = 0;
		env.reset_game(); //Start the game again when the episode is over
	}
	return double(totalReward)/numEpisodesToEval;
}