int main(int argc, char *argv[]) {
  const char* task_spec;
	long t0,t1;
	int steps;

    task_spec=RL_init();
	//Run an episode to get the System warmed up, etc             
	RL_episode(500);
        
	t0=get_current_ms_time();
	RL_episode(0);
	t1=get_current_ms_time();
	
	steps=RL_num_steps();
	
	printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps));
		
	t0=get_current_ms_time();
	
	RL_episode(0);
	
	t1=get_current_ms_time();

	steps=RL_num_steps();

	printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps));
        
	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
int main(int argc, char *argv[]) {
	const char* task_spec;
	const reward_observation_action_terminal_t *stepResponse;
	const observation_action_t *startResponse;

	printf("\n\nExperiment starting up!\n");


	task_spec=RL_init();
	printf("RL_init called, the environment sent task spec: %s\n",task_spec);

	// RL_env_message and RL_agent_message may be used to communicate with the environment
    // and agent, respectively. See RL-Glue documentation for details.
	// const char* responseMessage;
	// responseMessage=RL_agent_message("what is your name?");

	printf("\n\n----------Running a few episodes----------\n");
	// Use the RL-Glue-provided RL_episode to run a few episodes of ALE. 
    // 0 means no limit at all.
	runEpisode(10000);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	RL_cleanup();

	printf("\n\n----------Stepping through an episode----------\n");
	// The following demonstrates how to step through an episode. 
    task_spec=RL_init();

	// Start the episode
	startResponse=RL_start();
	printf("First action was: %d\n", startResponse->action->intArray[0]);

    // Run one step	
	stepResponse=RL_step();
	
	// Run until end of episode
	while(stepResponse->terminal != 1) {
		stepResponse=RL_step();
	}

    // Demonstrates other RL-Glue functionality.
	printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(), RL_return());
	RL_cleanup();


	return 0;
}
int main(int argc, char *argv[]) {
	RL_init();
	/* No cutoff */
	int isTerminal = RL_episode(0);
	check_fail(isTerminal!=1);
	check_fail(RL_num_steps()!=5);
	

	isTerminal = RL_episode(1);

	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=1);

	isTerminal = RL_episode(2);
	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=2);

	isTerminal = RL_episode(4);
	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=4);

	isTerminal = RL_episode(5);
	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=5);

	isTerminal = RL_episode(6);
	check_fail(isTerminal!=1);
	check_fail(RL_num_steps()!=5);

	isTerminal = RL_episode(7);
	check_fail(isTerminal!=1);
	check_fail(RL_num_steps()!=5);
	
	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
// This uses RL-Glue to run a single episode.
void runEpisode(int stepLimit) {        
    int terminal=RL_episode(stepLimit);
	printf("Episode %d\t %d steps \t%f total reward\t %d natural end \n",whichEpisode,RL_num_steps(),RL_return(), terminal);
	whichEpisode++;
}
示例#5
0
int main(int argc, char *argv[]) {
    const char* task_spec;
    const char* responseMessage;
    const reward_observation_action_terminal_t *stepResponse;
    const observation_action_t *startResponse;

    printf("\n\nExperiment starting up!\n");


    task_spec=RL_init();
    printf("RL_init called, the environment sent task spec: %s\n",task_spec);

    printf("\n\n----------Sending some sample messages----------\n");
    /*Talk to the agent and environment a bit...*/
    responseMessage=RL_agent_message("what is your name?");
    printf("Agent responded to \"what is your name?\" with: %s\n",responseMessage);
    responseMessage=RL_agent_message("If at first you don't succeed; call it version 1.0");
    printf("Agent responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n\n",responseMessage);

    responseMessage=RL_env_message("what is your name?");
    printf("Environment responded to \"what is your name?\" with: %s\n",responseMessage);
    responseMessage=RL_env_message("If at first you don't succeed; call it version 1.0");
    printf("Environment responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n",responseMessage);

    printf("\n\n----------Running a few episodes----------\n");
    runEpisode(100);
    runEpisode(100);
    runEpisode(100);
    runEpisode(100);
    runEpisode(100);
    runEpisode(1);
    /* Remember that stepLimit of 0 means there is no limit at all!*/
    runEpisode(0);
    RL_cleanup();

    printf("\n\n----------Stepping through an episode----------\n");
    /*We could also start over and do another experiment */
    task_spec=RL_init();

    /*We could run one step at a time instead of one episode at a time */
    /*Start the episode */
    startResponse=RL_start();
    printf("First observation and action were: %d %d\n",startResponse->observation->intArray[0],startResponse->action->intArray[0]);

    /*Run one step */
    stepResponse=RL_step();

    /*Run until the episode ends*/
    while(stepResponse->terminal!=1) {
        stepResponse=RL_step();
        if(stepResponse->terminal!=1) {
            /*Could optionally print state,action pairs */
            /*printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0]);*/
        }
    }

    printf("\n\n----------Summary----------\n");


    printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(),RL_return());
    RL_cleanup();


    return 0;
}