int main(int argc, char *argv[]) {
  const char* task_spec;
	long t0,t1;
	int steps;

    task_spec=RL_init();
	//Run an episode to get the System warmed up, etc             
	RL_episode(500);
        
	t0=get_current_ms_time();
	RL_episode(0);
	t1=get_current_ms_time();
	
	steps=RL_num_steps();
	
	printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps));
		
	t0=get_current_ms_time();
	
	RL_episode(0);
	
	t1=get_current_ms_time();

	steps=RL_num_steps();

	printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps));
        
	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
示例#2
0
/**
 * Tell the agent to stop learning, then execute n episodes with his current
 * policy.  Estimate the mean and variance of the return over these episodes.
 */
evaluation_point_t *evaluate_agent(){
	int i=0;
	double sum=0;
	double sum_of_squares=0;
	double this_return=0;
	double mean;
	double variance;
	int n=10;
	evaluation_point_t *eval_point=0;
	
	RL_agent_message("freeze learning");
	for(i=0;i<n;i++){
		/* We use a cutoff here in case the policy is bad
		   and will never end an episode */
		RL_episode(5000);
		this_return=RL_return();
		sum+=this_return;
		sum_of_squares+=this_return*this_return;
	}
	
	mean=sum/(double)n;
	variance = (sum_of_squares - (double)n*mean*mean)/((double)n - 1.0f);
	eval_point=(evaluation_point_t *)malloc(sizeof(evaluation_point_t));
	eval_point->mean=mean;
	eval_point->standard_dev=sqrt(variance);

	RL_agent_message("unfreeze learning");
	return eval_point;
}
示例#3
0
/*
	This function will freeze the agent's policy and test it after every 25 episodes.
*/
void offline_demo(){
	int i=0;
	int j=0;
	evaluation_point_t *this_score=0;
	evaluation_point_t *statistics[21];
	
	this_score=evaluate_agent();
	print_score(0,this_score);
	statistics[0]=this_score;
	
	for(i=0;i<20;i++){
		for(j=0;j<25;j++){
			RL_episode(0);
		}
		this_score=evaluate_agent();
		print_score((i+1)*25,this_score);
		statistics[i+1]=this_score;
	}
	
	save_result_csv(statistics,"results.csv");
	
	for(i=0;i<21;i++){
		free(statistics[i]);
	}
	
}
int main(int argc, char *argv[]) {
	RL_init();
	/* No cutoff */
	int isTerminal = RL_episode(0);
	check_fail(isTerminal!=1);
	check_fail(RL_num_steps()!=5);
	

	isTerminal = RL_episode(1);

	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=1);

	isTerminal = RL_episode(2);
	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=2);

	isTerminal = RL_episode(4);
	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=4);

	isTerminal = RL_episode(5);
	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=5);

	isTerminal = RL_episode(6);
	check_fail(isTerminal!=1);
	check_fail(RL_num_steps()!=5);

	isTerminal = RL_episode(7);
	check_fail(isTerminal!=1);
	check_fail(RL_num_steps()!=5);
	
	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
// This uses RL-Glue to run a single episode.
void runEpisode(int stepLimit) {        
    int terminal=RL_episode(stepLimit);
	printf("Episode %d\t %d steps \t%f total reward\t %d natural end \n",whichEpisode,RL_num_steps(),RL_return(), terminal);
	whichEpisode++;
}