コード例 #1
0
ファイル: SampleExperiment.c プロジェクト: josebigio/RL_GLUE
/**
 * Tell the agent to stop learning, then execute n episodes with his current
 * policy.  Estimate the mean and variance of the return over these episodes.
 */
evaluation_point_t *evaluate_agent(){
	int i=0;
	double sum=0;
	double sum_of_squares=0;
	double this_return=0;
	double mean;
	double variance;
	int n=10;
	evaluation_point_t *eval_point=0;
	
	RL_agent_message("freeze learning");
	for(i=0;i<n;i++){
		/* We use a cutoff here in case the policy is bad
		   and will never end an episode */
		RL_episode(5000);
		this_return=RL_return();
		sum+=this_return;
		sum_of_squares+=this_return*this_return;
	}
	
	mean=sum/(double)n;
	variance = (sum_of_squares - (double)n*mean*mean)/((double)n - 1.0f);
	eval_point=(evaluation_point_t *)malloc(sizeof(evaluation_point_t));
	eval_point->mean=mean;
	eval_point->standard_dev=sqrt(variance);

	RL_agent_message("unfreeze learning");
	return eval_point;
}
コード例 #2
0
int main(int argc, char *argv[]) {
	const char* task_spec;
	const reward_observation_action_terminal_t *stepResponse;
	const observation_action_t *startResponse;

	printf("\n\nExperiment starting up!\n");


	task_spec=RL_init();
	printf("RL_init called, the environment sent task spec: %s\n",task_spec);

	// RL_env_message and RL_agent_message may be used to communicate with the environment
    // and agent, respectively. See RL-Glue documentation for details.
	// const char* responseMessage;
	// responseMessage=RL_agent_message("what is your name?");

	printf("\n\n----------Running a few episodes----------\n");
	// Use the RL-Glue-provided RL_episode to run a few episodes of ALE. 
    // 0 means no limit at all.
	runEpisode(10000);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	RL_cleanup();

	printf("\n\n----------Stepping through an episode----------\n");
	// The following demonstrates how to step through an episode. 
    task_spec=RL_init();

	// Start the episode
	startResponse=RL_start();
	printf("First action was: %d\n", startResponse->action->intArray[0]);

    // Run one step	
	stepResponse=RL_step();
	
	// Run until end of episode
	while(stepResponse->terminal != 1) {
		stepResponse=RL_step();
	}

    // Demonstrates other RL-Glue functionality.
	printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(), RL_return());
	RL_cleanup();


	return 0;
}
コード例 #3
0
// This uses RL-Glue to run a single episode.
void runEpisode(int stepLimit) {        
    int terminal=RL_episode(stepLimit);
	printf("Episode %d\t %d steps \t%f total reward\t %d natural end \n",whichEpisode,RL_num_steps(),RL_return(), terminal);
	whichEpisode++;
}
コード例 #4
0
ファイル: SkeletonExperiment.c プロジェクト: junzhez/rl-glue
int main(int argc, char *argv[]) {
    const char* task_spec;
    const char* responseMessage;
    const reward_observation_action_terminal_t *stepResponse;
    const observation_action_t *startResponse;

    printf("\n\nExperiment starting up!\n");


    task_spec=RL_init();
    printf("RL_init called, the environment sent task spec: %s\n",task_spec);

    printf("\n\n----------Sending some sample messages----------\n");
    /*Talk to the agent and environment a bit...*/
    responseMessage=RL_agent_message("what is your name?");
    printf("Agent responded to \"what is your name?\" with: %s\n",responseMessage);
    responseMessage=RL_agent_message("If at first you don't succeed; call it version 1.0");
    printf("Agent responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n\n",responseMessage);

    responseMessage=RL_env_message("what is your name?");
    printf("Environment responded to \"what is your name?\" with: %s\n",responseMessage);
    responseMessage=RL_env_message("If at first you don't succeed; call it version 1.0");
    printf("Environment responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n",responseMessage);

    printf("\n\n----------Running a few episodes----------\n");
    runEpisode(100);
    runEpisode(100);
    runEpisode(100);
    runEpisode(100);
    runEpisode(100);
    runEpisode(1);
    /* Remember that stepLimit of 0 means there is no limit at all!*/
    runEpisode(0);
    RL_cleanup();

    printf("\n\n----------Stepping through an episode----------\n");
    /*We could also start over and do another experiment */
    task_spec=RL_init();

    /*We could run one step at a time instead of one episode at a time */
    /*Start the episode */
    startResponse=RL_start();
    printf("First observation and action were: %d %d\n",startResponse->observation->intArray[0],startResponse->action->intArray[0]);

    /*Run one step */
    stepResponse=RL_step();

    /*Run until the episode ends*/
    while(stepResponse->terminal!=1) {
        stepResponse=RL_step();
        if(stepResponse->terminal!=1) {
            /*Could optionally print state,action pairs */
            /*printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0]);*/
        }
    }

    printf("\n\n----------Summary----------\n");


    printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(),RL_return());
    RL_cleanup();


    return 0;
}