int main(int argc, char *argv[]) {
	const char* task_spec;
	const reward_observation_action_terminal_t *stepResponse;
	const observation_action_t *startResponse;

	printf("\n\nExperiment starting up!\n");


	task_spec=RL_init();
	printf("RL_init called, the environment sent task spec: %s\n",task_spec);

	// RL_env_message and RL_agent_message may be used to communicate with the environment
    // and agent, respectively. See RL-Glue documentation for details.
	// const char* responseMessage;
	// responseMessage=RL_agent_message("what is your name?");

	printf("\n\n----------Running a few episodes----------\n");
	// Use the RL-Glue-provided RL_episode to run a few episodes of ALE. 
    // 0 means no limit at all.
	runEpisode(10000);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	RL_cleanup();

	printf("\n\n----------Stepping through an episode----------\n");
	// The following demonstrates how to step through an episode. 
    task_spec=RL_init();

	// Start the episode
	startResponse=RL_start();
	printf("First action was: %d\n", startResponse->action->intArray[0]);

    // Run one step	
	stepResponse=RL_step();
	
	// Run until end of episode
	while(stepResponse->terminal != 1) {
		stepResponse=RL_step();
	}

    // Demonstrates other RL-Glue functionality.
	printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(), RL_return());
	RL_cleanup();


	return 0;
}
Example #2
0
int main(int argc, char *argv[]) {
	int whichEpisode=0;
	int whichStep=0;
	const observation_action_t *startTuple;
	const reward_observation_action_terminal_t *stepTuple;
	
	RL_init();
	
	for(whichEpisode=1;whichEpisode<5;whichEpisode++){
		startTuple=RL_start();
		
		if(whichEpisode%2==0){
			check_fail(startTuple->action->numInts!=0);
			check_fail(startTuple->action->numDoubles!=0);
			check_fail(startTuple->action->numChars!=0);

			check_fail(startTuple->observation->numInts!=0);
			check_fail(startTuple->observation->numDoubles!=0);
			check_fail(startTuple->observation->numChars!=0);
		}else{
			check_fail(startTuple->action->numInts!=7);
			check_fail(startTuple->action->numDoubles!=3);
			check_fail(startTuple->action->numChars!=1);

			check_fail(startTuple->observation->numInts!=2);
			check_fail(startTuple->observation->numDoubles!=4);
			check_fail(startTuple->observation->numChars!=5);
		}
		
		for(whichStep=0;whichStep<5;whichStep++){
			stepTuple=RL_step();
			check_fail(stepTuple->terminal!=0);
			check_fail(stepTuple->reward!=0);

			if(whichEpisode%2==0){
				check_fail(stepTuple->action->numInts!=0);
				check_fail(stepTuple->action->numDoubles!=0);
				check_fail(stepTuple->action->numChars!=0);

				check_fail(stepTuple->observation->numInts!=0);
				check_fail(stepTuple->observation->numDoubles!=0);
				check_fail(stepTuple->observation->numChars!=0);
			}else{
				check_fail(stepTuple->action->numInts!=7);
				check_fail(stepTuple->action->numDoubles!=3);
				check_fail(stepTuple->action->numChars!=1);

				check_fail(stepTuple->observation->numInts!=2);
				check_fail(stepTuple->observation->numDoubles!=4);
				check_fail(stepTuple->observation->numChars!=5);
			}
			
		}
		
		
	}
	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
Example #3
0
int main(int argc, char *argv[]) {
  const reward_observation_action_terminal_t *roat;
  const char* task_spec;

    task_spec=RL_init();

	RL_start();
	
	roat=RL_step();

	check_fail(roat->observation->numInts!=1);
	check_fail(roat->observation->numDoubles!=0);
	check_fail(roat->observation->numChars!=0);
	check_fail(roat->observation->intArray[0]!=0);
    check_fail(strcmp("one|1.|one",RL_env_message("one"))!=0);
    check_fail(strcmp("one|1.|one",RL_agent_message("one"))!=0);
	check_fail(roat->terminal!=0);
	

	roat=RL_step();

    check_fail(strcmp("two|2.2.|two",RL_env_message("two"))!=0);
    check_fail(strcmp("two|2.2.|two",RL_agent_message("two"))!=0);
	check_fail(roat->terminal!=0);
	check_fail(roat->observation->numInts!=1);
	check_fail(roat->observation->numDoubles!=0);
	check_fail(roat->observation->numChars!=0);
	check_fail(roat->observation->intArray[0]!=1);

	roat=RL_step();

    check_fail(strcmp("three||three",RL_env_message("three"))!=0);
    check_fail(strcmp("three||three",RL_agent_message("three"))!=0);
	check_fail(roat->terminal!=0);
	check_fail(roat->observation->numInts!=1);
	check_fail(roat->observation->numDoubles!=0);
	check_fail(roat->observation->numChars!=0);	
	check_fail(roat->observation->intArray[0]!=2);

	roat=RL_step();
    check_fail(strcmp("four|4.|four",RL_env_message("four"))!=0);
    check_fail(strcmp("four|4.|four",RL_agent_message("four"))!=0);
	check_fail(roat->terminal!=0);
	check_fail(roat->observation->numInts!=1);
	check_fail(roat->observation->numDoubles!=0);
	check_fail(roat->observation->numChars!=0);
	check_fail(roat->observation->intArray[0]!=3);
	

	roat=RL_step();
    check_fail(strcmp("five|5.5.|five",RL_env_message("five"))!=0);
	check_fail(strcmp("five|4.|five",RL_agent_message("five"))!=0);
	check_fail(roat->terminal==0);
	/* Gabor has made it so this environment will step past terminal.  This is
	   not something we want to do in general at all.

	   But, in order to keep the other tests all working, I'll allow it*/
	
	roat=RL_step();
	check_fail(roat->observation->numInts!=5);
	check_fail(roat->observation->numDoubles!=5);
	check_fail(roat->observation->numChars!=5);
	check_fail(roat->observation->intArray[0]!=173);
	check_fail(roat->observation->intArray[1]!=-173);
	check_fail(roat->observation->intArray[2]!=2147483647);
	check_fail(roat->observation->intArray[3]!=0);
	
	check_fail(roat->observation->intArray[4]!=-2147483648);
	check_fail(roat->observation->doubleArray[0]!=0.0078125);
	check_fail(roat->observation->doubleArray[1]!=-0.0078125);
	check_fail(roat->observation->doubleArray[2]!=0);
	check_fail(roat->observation->doubleArray[3]!=0.0078125e150);
	check_fail(roat->observation->doubleArray[4]!=-0.0078125e150);
	check_fail(roat->observation->charArray[0]!='g');
	check_fail(roat->observation->charArray[1]!='F');
	check_fail(roat->observation->charArray[2]!='?');
	check_fail(roat->observation->charArray[3]!=' ');
	check_fail(roat->observation->charArray[4]!='&');


	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
Example #4
0
int main(int argc, char *argv[]) {
    const char* task_spec;
    const char* responseMessage;
    const reward_observation_action_terminal_t *stepResponse;
    const observation_action_t *startResponse;

    printf("\n\nExperiment starting up!\n");


    task_spec=RL_init();
    printf("RL_init called, the environment sent task spec: %s\n",task_spec);

    printf("\n\n----------Sending some sample messages----------\n");
    /*Talk to the agent and environment a bit...*/
    responseMessage=RL_agent_message("what is your name?");
    printf("Agent responded to \"what is your name?\" with: %s\n",responseMessage);
    responseMessage=RL_agent_message("If at first you don't succeed; call it version 1.0");
    printf("Agent responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n\n",responseMessage);

    responseMessage=RL_env_message("what is your name?");
    printf("Environment responded to \"what is your name?\" with: %s\n",responseMessage);
    responseMessage=RL_env_message("If at first you don't succeed; call it version 1.0");
    printf("Environment responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n",responseMessage);

    printf("\n\n----------Running a few episodes----------\n");
    runEpisode(100);
    runEpisode(100);
    runEpisode(100);
    runEpisode(100);
    runEpisode(100);
    runEpisode(1);
    /* Remember that stepLimit of 0 means there is no limit at all!*/
    runEpisode(0);
    RL_cleanup();

    printf("\n\n----------Stepping through an episode----------\n");
    /*We could also start over and do another experiment */
    task_spec=RL_init();

    /*We could run one step at a time instead of one episode at a time */
    /*Start the episode */
    startResponse=RL_start();
    printf("First observation and action were: %d %d\n",startResponse->observation->intArray[0],startResponse->action->intArray[0]);

    /*Run one step */
    stepResponse=RL_step();

    /*Run until the episode ends*/
    while(stepResponse->terminal!=1) {
        stepResponse=RL_step();
        if(stepResponse->terminal!=1) {
            /*Could optionally print state,action pairs */
            /*printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0]);*/
        }
    }

    printf("\n\n----------Summary----------\n");


    printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(),RL_return());
    RL_cleanup();


    return 0;
}