int main(int argc, char *argv[]) { const char* task_spec; long t0,t1; int steps; task_spec=RL_init(); //Run an episode to get the System warmed up, etc RL_episode(500); t0=get_current_ms_time(); RL_episode(0); t1=get_current_ms_time(); steps=RL_num_steps(); printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps)); t0=get_current_ms_time(); RL_episode(0); t1=get_current_ms_time(); steps=RL_num_steps(); printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps)); RL_cleanup(); if(tests_failed!=0) printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__); else printf("Passed all %d checks in %s\n",test_count,__FILE__); return tests_failed; }
/** * Tell the agent to stop learning, then execute n episodes with his current * policy. Estimate the mean and variance of the return over these episodes. */ evaluation_point_t *evaluate_agent(){ int i=0; double sum=0; double sum_of_squares=0; double this_return=0; double mean; double variance; int n=10; evaluation_point_t *eval_point=0; RL_agent_message("freeze learning"); for(i=0;i<n;i++){ /* We use a cutoff here in case the policy is bad and will never end an episode */ RL_episode(5000); this_return=RL_return(); sum+=this_return; sum_of_squares+=this_return*this_return; } mean=sum/(double)n; variance = (sum_of_squares - (double)n*mean*mean)/((double)n - 1.0f); eval_point=(evaluation_point_t *)malloc(sizeof(evaluation_point_t)); eval_point->mean=mean; eval_point->standard_dev=sqrt(variance); RL_agent_message("unfreeze learning"); return eval_point; }
/* This function will freeze the agent's policy and test it after every 25 episodes. */ void offline_demo(){ int i=0; int j=0; evaluation_point_t *this_score=0; evaluation_point_t *statistics[21]; this_score=evaluate_agent(); print_score(0,this_score); statistics[0]=this_score; for(i=0;i<20;i++){ for(j=0;j<25;j++){ RL_episode(0); } this_score=evaluate_agent(); print_score((i+1)*25,this_score); statistics[i+1]=this_score; } save_result_csv(statistics,"results.csv"); for(i=0;i<21;i++){ free(statistics[i]); } }
int main(int argc, char *argv[]) { RL_init(); /* No cutoff */ int isTerminal = RL_episode(0); check_fail(isTerminal!=1); check_fail(RL_num_steps()!=5); isTerminal = RL_episode(1); check_fail(isTerminal!=0); check_fail(RL_num_steps()!=1); isTerminal = RL_episode(2); check_fail(isTerminal!=0); check_fail(RL_num_steps()!=2); isTerminal = RL_episode(4); check_fail(isTerminal!=0); check_fail(RL_num_steps()!=4); isTerminal = RL_episode(5); check_fail(isTerminal!=0); check_fail(RL_num_steps()!=5); isTerminal = RL_episode(6); check_fail(isTerminal!=1); check_fail(RL_num_steps()!=5); isTerminal = RL_episode(7); check_fail(isTerminal!=1); check_fail(RL_num_steps()!=5); RL_cleanup(); if(tests_failed!=0) printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__); else printf("Passed all %d checks in %s\n",test_count,__FILE__); return tests_failed; }
// This uses RL-Glue to run a single episode. void runEpisode(int stepLimit) { int terminal=RL_episode(stepLimit); printf("Episode %d\t %d steps \t%f total reward\t %d natural end \n",whichEpisode,RL_num_steps(),RL_return(), terminal); whichEpisode++; }