/** * Tell the agent to stop learning, then execute n episodes with his current * policy. Estimate the mean and variance of the return over these episodes. */ evaluation_point_t *evaluate_agent(){ int i=0; double sum=0; double sum_of_squares=0; double this_return=0; double mean; double variance; int n=10; evaluation_point_t *eval_point=0; RL_agent_message("freeze learning"); for(i=0;i<n;i++){ /* We use a cutoff here in case the policy is bad and will never end an episode */ RL_episode(5000); this_return=RL_return(); sum+=this_return; sum_of_squares+=this_return*this_return; } mean=sum/(double)n; variance = (sum_of_squares - (double)n*mean*mean)/((double)n - 1.0f); eval_point=(evaluation_point_t *)malloc(sizeof(evaluation_point_t)); eval_point->mean=mean; eval_point->standard_dev=sqrt(variance); RL_agent_message("unfreeze learning"); return eval_point; }
int main(int argc, char *argv[]) { const char* task_spec; const reward_observation_action_terminal_t *stepResponse; const observation_action_t *startResponse; printf("\n\nExperiment starting up!\n"); task_spec=RL_init(); printf("RL_init called, the environment sent task spec: %s\n",task_spec); // RL_env_message and RL_agent_message may be used to communicate with the environment // and agent, respectively. See RL-Glue documentation for details. // const char* responseMessage; // responseMessage=RL_agent_message("what is your name?"); printf("\n\n----------Running a few episodes----------\n"); // Use the RL-Glue-provided RL_episode to run a few episodes of ALE. // 0 means no limit at all. runEpisode(10000); runEpisode(0); runEpisode(0); runEpisode(0); runEpisode(0); RL_cleanup(); printf("\n\n----------Stepping through an episode----------\n"); // The following demonstrates how to step through an episode. task_spec=RL_init(); // Start the episode startResponse=RL_start(); printf("First action was: %d\n", startResponse->action->intArray[0]); // Run one step stepResponse=RL_step(); // Run until end of episode while(stepResponse->terminal != 1) { stepResponse=RL_step(); } // Demonstrates other RL-Glue functionality. printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(), RL_return()); RL_cleanup(); return 0; }
// This uses RL-Glue to run a single episode. void runEpisode(int stepLimit) { int terminal=RL_episode(stepLimit); printf("Episode %d\t %d steps \t%f total reward\t %d natural end \n",whichEpisode,RL_num_steps(),RL_return(), terminal); whichEpisode++; }
int main(int argc, char *argv[]) { const char* task_spec; const char* responseMessage; const reward_observation_action_terminal_t *stepResponse; const observation_action_t *startResponse; printf("\n\nExperiment starting up!\n"); task_spec=RL_init(); printf("RL_init called, the environment sent task spec: %s\n",task_spec); printf("\n\n----------Sending some sample messages----------\n"); /*Talk to the agent and environment a bit...*/ responseMessage=RL_agent_message("what is your name?"); printf("Agent responded to \"what is your name?\" with: %s\n",responseMessage); responseMessage=RL_agent_message("If at first you don't succeed; call it version 1.0"); printf("Agent responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n\n",responseMessage); responseMessage=RL_env_message("what is your name?"); printf("Environment responded to \"what is your name?\" with: %s\n",responseMessage); responseMessage=RL_env_message("If at first you don't succeed; call it version 1.0"); printf("Environment responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n",responseMessage); printf("\n\n----------Running a few episodes----------\n"); runEpisode(100); runEpisode(100); runEpisode(100); runEpisode(100); runEpisode(100); runEpisode(1); /* Remember that stepLimit of 0 means there is no limit at all!*/ runEpisode(0); RL_cleanup(); printf("\n\n----------Stepping through an episode----------\n"); /*We could also start over and do another experiment */ task_spec=RL_init(); /*We could run one step at a time instead of one episode at a time */ /*Start the episode */ startResponse=RL_start(); printf("First observation and action were: %d %d\n",startResponse->observation->intArray[0],startResponse->action->intArray[0]); /*Run one step */ stepResponse=RL_step(); /*Run until the episode ends*/ while(stepResponse->terminal!=1) { stepResponse=RL_step(); if(stepResponse->terminal!=1) { /*Could optionally print state,action pairs */ /*printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0]);*/ } } printf("\n\n----------Summary----------\n"); printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(),RL_return()); RL_cleanup(); return 0; }