double HumanAgent::evaluatePolicy(Environment<bool>& env){ #ifdef __USE_SDL Action action; int reward = 0; int totalReward = 0; int cumulativeReward = 0; //Repeat (for each episode): for(int episode = 0; episode < numEpisodesToEval; episode++){ int step = 0; while(!env.game_over() && step < maxStepsInEpisode) { action = receiveAction(); //If one wants to save trajectories, this is where the trajectory is saved: if(toSaveTrajectory){ saveTrajectory(action); } reward = env.act(action); cumulativeReward += reward; step++; } printf("Episode %d, Cumulative Reward: %d\n", episode + 1, cumulativeReward); totalReward += cumulativeReward; cumulativeReward = 0; env.reset_game(); //Start the game again when the episode is over } return double(totalReward)/numEpisodesToEval; }
double RandomAgent::evaluatePolicy(Environment<bool>& env){ int reward = 0; int totalReward = 0; int cumulativeReward = 0; int numActions; ActionVect actions; //Check if one wants to sample from all possible actions or only the valid ones: if(useMinActions){ actions = env.getMinimalActionSet(); } else{ actions = env.getLegalActionSet(); } numActions = actions.size(); printf("Number of Actions: %d\n\n", numActions); //Repeat (for each episode): for(int episode = 0; episode < numEpisodesToEval; episode++){ int step = 0; while(!env.game_over() && step < maxStepsInEpisode) { reward = env.act(actions[rand()%numActions]); cumulativeReward += reward; step++; } printf("Episode %d, Cumulative Reward: %d\n", episode + 1, cumulativeReward); totalReward += cumulativeReward; cumulativeReward = 0; env.reset_game(); //Start the game again when the episode is over } return double(totalReward)/numEpisodesToEval; }