int main(int argc, char *argv[]) { const char* task_spec; long t0,t1; int steps; task_spec=RL_init(); //Run an episode to get the System warmed up, etc RL_episode(500); t0=get_current_ms_time(); RL_episode(0); t1=get_current_ms_time(); steps=RL_num_steps(); printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps)); t0=get_current_ms_time(); RL_episode(0); t1=get_current_ms_time(); steps=RL_num_steps(); printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps)); RL_cleanup(); if(tests_failed!=0) printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__); else printf("Passed all %d checks in %s\n",test_count,__FILE__); return tests_failed; }
int main(int argc, char *argv[]) { const char* task_spec; const reward_observation_action_terminal_t *stepResponse; const observation_action_t *startResponse; printf("\n\nExperiment starting up!\n"); task_spec=RL_init(); printf("RL_init called, the environment sent task spec: %s\n",task_spec); // RL_env_message and RL_agent_message may be used to communicate with the environment // and agent, respectively. See RL-Glue documentation for details. // const char* responseMessage; // responseMessage=RL_agent_message("what is your name?"); printf("\n\n----------Running a few episodes----------\n"); // Use the RL-Glue-provided RL_episode to run a few episodes of ALE. // 0 means no limit at all. runEpisode(10000); runEpisode(0); runEpisode(0); runEpisode(0); runEpisode(0); RL_cleanup(); printf("\n\n----------Stepping through an episode----------\n"); // The following demonstrates how to step through an episode. task_spec=RL_init(); // Start the episode startResponse=RL_start(); printf("First action was: %d\n", startResponse->action->intArray[0]); // Run one step stepResponse=RL_step(); // Run until end of episode while(stepResponse->terminal != 1) { stepResponse=RL_step(); } // Demonstrates other RL-Glue functionality. printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(), RL_return()); RL_cleanup(); return 0; }
int main(int argc, char *argv[]) { RL_init(); /* No cutoff */ int isTerminal = RL_episode(0); check_fail(isTerminal!=1); check_fail(RL_num_steps()!=5); isTerminal = RL_episode(1); check_fail(isTerminal!=0); check_fail(RL_num_steps()!=1); isTerminal = RL_episode(2); check_fail(isTerminal!=0); check_fail(RL_num_steps()!=2); isTerminal = RL_episode(4); check_fail(isTerminal!=0); check_fail(RL_num_steps()!=4); isTerminal = RL_episode(5); check_fail(isTerminal!=0); check_fail(RL_num_steps()!=5); isTerminal = RL_episode(6); check_fail(isTerminal!=1); check_fail(RL_num_steps()!=5); isTerminal = RL_episode(7); check_fail(isTerminal!=1); check_fail(RL_num_steps()!=5); RL_cleanup(); if(tests_failed!=0) printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__); else printf("Passed all %d checks in %s\n",test_count,__FILE__); return tests_failed; }
// This uses RL-Glue to run a single episode. void runEpisode(int stepLimit) { int terminal=RL_episode(stepLimit); printf("Episode %d\t %d steps \t%f total reward\t %d natural end \n",whichEpisode,RL_num_steps(),RL_return(), terminal); whichEpisode++; }
int main(int argc, char *argv[]) { const char* task_spec; const char* responseMessage; const reward_observation_action_terminal_t *stepResponse; const observation_action_t *startResponse; printf("\n\nExperiment starting up!\n"); task_spec=RL_init(); printf("RL_init called, the environment sent task spec: %s\n",task_spec); printf("\n\n----------Sending some sample messages----------\n"); /*Talk to the agent and environment a bit...*/ responseMessage=RL_agent_message("what is your name?"); printf("Agent responded to \"what is your name?\" with: %s\n",responseMessage); responseMessage=RL_agent_message("If at first you don't succeed; call it version 1.0"); printf("Agent responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n\n",responseMessage); responseMessage=RL_env_message("what is your name?"); printf("Environment responded to \"what is your name?\" with: %s\n",responseMessage); responseMessage=RL_env_message("If at first you don't succeed; call it version 1.0"); printf("Environment responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n",responseMessage); printf("\n\n----------Running a few episodes----------\n"); runEpisode(100); runEpisode(100); runEpisode(100); runEpisode(100); runEpisode(100); runEpisode(1); /* Remember that stepLimit of 0 means there is no limit at all!*/ runEpisode(0); RL_cleanup(); printf("\n\n----------Stepping through an episode----------\n"); /*We could also start over and do another experiment */ task_spec=RL_init(); /*We could run one step at a time instead of one episode at a time */ /*Start the episode */ startResponse=RL_start(); printf("First observation and action were: %d %d\n",startResponse->observation->intArray[0],startResponse->action->intArray[0]); /*Run one step */ stepResponse=RL_step(); /*Run until the episode ends*/ while(stepResponse->terminal!=1) { stepResponse=RL_step(); if(stepResponse->terminal!=1) { /*Could optionally print state,action pairs */ /*printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0]);*/ } } printf("\n\n----------Summary----------\n"); printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(),RL_return()); RL_cleanup(); return 0; }