コード例 #1
0
int main(int argc, char *argv[]) {
	const char* task_spec;
	const reward_observation_action_terminal_t *stepResponse;
	const observation_action_t *startResponse;

	printf("\n\nExperiment starting up!\n");


	task_spec=RL_init();
	printf("RL_init called, the environment sent task spec: %s\n",task_spec);

	// RL_env_message and RL_agent_message may be used to communicate with the environment
    // and agent, respectively. See RL-Glue documentation for details.
	// const char* responseMessage;
	// responseMessage=RL_agent_message("what is your name?");

	printf("\n\n----------Running a few episodes----------\n");
	// Use the RL-Glue-provided RL_episode to run a few episodes of ALE. 
    // 0 means no limit at all.
	runEpisode(10000);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	RL_cleanup();

	printf("\n\n----------Stepping through an episode----------\n");
	// The following demonstrates how to step through an episode. 
    task_spec=RL_init();

	// Start the episode
	startResponse=RL_start();
	printf("First action was: %d\n", startResponse->action->intArray[0]);

    // Run one step	
	stepResponse=RL_step();
	
	// Run until end of episode
	while(stepResponse->terminal != 1) {
		stepResponse=RL_step();
	}

    // Demonstrates other RL-Glue functionality.
	printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(), RL_return());
	RL_cleanup();


	return 0;
}
コード例 #2
0
int main(int argc, char *argv[]) {
  const char* task_spec;
	long t0,t1;
	int steps;

    task_spec=RL_init();
	//Run an episode to get the System warmed up, etc             
	RL_episode(500);
        
	t0=get_current_ms_time();
	RL_episode(0);
	t1=get_current_ms_time();
	
	steps=RL_num_steps();
	
	printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps));
		
	t0=get_current_ms_time();
	
	RL_episode(0);
	
	t1=get_current_ms_time();

	steps=RL_num_steps();

	printf ("\telapsed time in ms: %ld, per step is %f\n", (t1 - t0), ((float)(t1-t0)/(float)steps));
        
	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
コード例 #3
0
ファイル: SampleExperiment.c プロジェクト: josebigio/RL_GLUE
int main(int argc, char *argv[]) {
	printf("Starting offline demo\n----------------------------\nWill alternate learning for 25 episodes, then freeze policy and evaluate for 10 episodes.\n\n");
	printf("After Episode\tMean Return\tStandard Deviation\n-------------------------------------------------------------------------\n");
	RL_init();
	offline_demo();
	
	
	printf("\nNow we will save the agent's learned value function to a file....\n");

	RL_agent_message("save_policy results.dat");

	printf("\nCalling RL_cleanup and RL_init to clear the agent's memory...\n");

	RL_cleanup();
	RL_init();


	printf("Evaluating the agent's default policy:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------\n");
	single_evaluation();
	
	printf("\nLoading up the value function we saved earlier.\n");
	RL_agent_message("load_policy results.dat");

	printf("Evaluating the agent after loading the value function:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------\n");
	single_evaluation();

	printf("Telling the environment to use fixed start state of 2,3.\n");
	RL_env_message("set-start-state 2 3");
	RL_start();
	printf("Telling the environment to print the current state to the screen.\n");
	RL_env_message("print-state");
    printf("Evaluating the agent a few times from a fixed start state of 2,3:\n\t\tMean Return\tStandardDeviation\n-------------------------------------------\n");
	single_evaluation();

	printf("Evaluating the agent again with the random start state:\n\t\tMean Return\tStandardDeviation\n-----------------------------------------------------\n");
    RL_env_message("set-random-start-state");
	single_evaluation();


	RL_cleanup();
	printf("\nProgram Complete.\n");

	return 0;
}
コード例 #4
0
int main(int argc, char *argv[]) {
	RL_init();
	/* No cutoff */
	int isTerminal = RL_episode(0);
	check_fail(isTerminal!=1);
	check_fail(RL_num_steps()!=5);
	

	isTerminal = RL_episode(1);

	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=1);

	isTerminal = RL_episode(2);
	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=2);

	isTerminal = RL_episode(4);
	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=4);

	isTerminal = RL_episode(5);
	check_fail(isTerminal!=0);
	check_fail(RL_num_steps()!=5);

	isTerminal = RL_episode(6);
	check_fail(isTerminal!=1);
	check_fail(RL_num_steps()!=5);

	isTerminal = RL_episode(7);
	check_fail(isTerminal!=1);
	check_fail(RL_num_steps()!=5);
	
	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
コード例 #5
0
int main(int argc, char *argv[]) {
	const char *theResponse;
	
	RL_init();

	check_fail(strcmp("empty",RL_env_message(0))!=0);

	check_fail(strcmp("empty",RL_env_message(""))!=0);

	check_fail(strcmp("empty",RL_agent_message(0))!=0);

	check_fail(strcmp("empty",RL_agent_message(""))!=0);

	check_fail(strcmp("",RL_env_message("empty"))!=0);

	check_fail(strcmp("",RL_agent_message("empty"))!=0);

	theResponse=RL_env_message("null");
	check_fail(!(theResponse!=0 ||strcmp("",theResponse)!=0));
	
	theResponse=RL_agent_message("null");
	check_fail(!(theResponse!=0 ||strcmp("",theResponse)!=0));


	check_fail(strcmp("1",RL_env_message("1"))!=0);
	check_fail(strcmp("1",RL_agent_message("1"))!=0);

	check_fail(strcmp("1000000000000000000000",RL_env_message("1000000000000000000000"))!=0);
	check_fail(strcmp("1000000000000000000000",RL_agent_message("1000000000000000000000"))!=0);

	check_fail(strcmp("21111111111111111111111111111111111111111111111111111111311111111111111111111111111111111111111111111111111111113",RL_env_message("21111111111111111111111111111111111111111111111111111111311111111111111111111111111111111111111111111111111111113"))!=0);
	check_fail(strcmp("45555555555555555555555555555555555555555555555555555555655555555555555555555555555555555555555555555555555555559",RL_agent_message("45555555555555555555555555555555555555555555555555555555655555555555555555555555555555555555555555555555555555559"))!=0);

	RL_cleanup();
	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
コード例 #6
0
ファイル: test_1_experiment.c プロジェクト: IvanLogvinov/soar
int main(int argc, char *argv[]) {
  const reward_observation_action_terminal_t *roat;
  const char* task_spec;

    task_spec=RL_init();

	RL_start();
	
	roat=RL_step();

	check_fail(roat->observation->numInts!=1);
	check_fail(roat->observation->numDoubles!=0);
	check_fail(roat->observation->numChars!=0);
	check_fail(roat->observation->intArray[0]!=0);
    check_fail(strcmp("one|1.|one",RL_env_message("one"))!=0);
    check_fail(strcmp("one|1.|one",RL_agent_message("one"))!=0);
	check_fail(roat->terminal!=0);
	

	roat=RL_step();

    check_fail(strcmp("two|2.2.|two",RL_env_message("two"))!=0);
    check_fail(strcmp("two|2.2.|two",RL_agent_message("two"))!=0);
	check_fail(roat->terminal!=0);
	check_fail(roat->observation->numInts!=1);
	check_fail(roat->observation->numDoubles!=0);
	check_fail(roat->observation->numChars!=0);
	check_fail(roat->observation->intArray[0]!=1);

	roat=RL_step();

    check_fail(strcmp("three||three",RL_env_message("three"))!=0);
    check_fail(strcmp("three||three",RL_agent_message("three"))!=0);
	check_fail(roat->terminal!=0);
	check_fail(roat->observation->numInts!=1);
	check_fail(roat->observation->numDoubles!=0);
	check_fail(roat->observation->numChars!=0);	
	check_fail(roat->observation->intArray[0]!=2);

	roat=RL_step();
    check_fail(strcmp("four|4.|four",RL_env_message("four"))!=0);
    check_fail(strcmp("four|4.|four",RL_agent_message("four"))!=0);
	check_fail(roat->terminal!=0);
	check_fail(roat->observation->numInts!=1);
	check_fail(roat->observation->numDoubles!=0);
	check_fail(roat->observation->numChars!=0);
	check_fail(roat->observation->intArray[0]!=3);
	

	roat=RL_step();
    check_fail(strcmp("five|5.5.|five",RL_env_message("five"))!=0);
	check_fail(strcmp("five|4.|five",RL_agent_message("five"))!=0);
	check_fail(roat->terminal==0);
	/* Gabor has made it so this environment will step past terminal.  This is
	   not something we want to do in general at all.

	   But, in order to keep the other tests all working, I'll allow it*/
	
	roat=RL_step();
	check_fail(roat->observation->numInts!=5);
	check_fail(roat->observation->numDoubles!=5);
	check_fail(roat->observation->numChars!=5);
	check_fail(roat->observation->intArray[0]!=173);
	check_fail(roat->observation->intArray[1]!=-173);
	check_fail(roat->observation->intArray[2]!=2147483647);
	check_fail(roat->observation->intArray[3]!=0);
	
	check_fail(roat->observation->intArray[4]!=-2147483648);
	check_fail(roat->observation->doubleArray[0]!=0.0078125);
	check_fail(roat->observation->doubleArray[1]!=-0.0078125);
	check_fail(roat->observation->doubleArray[2]!=0);
	check_fail(roat->observation->doubleArray[3]!=0.0078125e150);
	check_fail(roat->observation->doubleArray[4]!=-0.0078125e150);
	check_fail(roat->observation->charArray[0]!='g');
	check_fail(roat->observation->charArray[1]!='F');
	check_fail(roat->observation->charArray[2]!='?');
	check_fail(roat->observation->charArray[3]!=' ');
	check_fail(roat->observation->charArray[4]!='&');


	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}
コード例 #7
0
int startExperiment(int argc, char *argv[]) {
	const char* task_spec;
	const char* responseMessage;
	const reward_observation_action_terminal_t *stepResponse;
	const observation_action_t *startResponse;

	printf("\n\nExperiment starting up!\n");
	
	RL_agent_message("set_simulation_on");
	RL_agent_message("set_play_tree");
	RL_agent_message("set_save_of");
	//RL_env_message("set_speed_turbo");
	RL_env_message("set_game_GalaxyPatrol");
	//RL_env_message("set_game_Galaga");
	//RL_env_message("set_game_SuperMarioBros");
	//RL_env_message("set_game_Pacman");

	

	task_spec=RL_init();
	printf("RL_init called, the environment sent task spec: %s\n",task_spec);

	printf("\n\n----------Sending some sample messages----------\n");
	/*Talk to the agent and environment a bit...*/
	responseMessage=RL_agent_message("what is your name?");
	printf("Agent responded to \"what is your name?\" with: %s\n",responseMessage);
	responseMessage=RL_agent_message("If at first you don't succeed; call it version 1.0");
	printf("Agent responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n\n",responseMessage);

	responseMessage=RL_env_message("what is your name?");
	printf("Environment responded to \"what is your name?\" with: %s\n",responseMessage);
	responseMessage=RL_env_message("If at first you don't succeed; call it version 1.0");
	printf("Environment responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n",responseMessage);

	printf("\n\n----------Running a few episodes----------\n");
	/*runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	*/
	/* Remember that stepLimit of 0 means there is no limit at all!*/
	
	while (1){
		runEpisode(0);
	}
	RL_cleanup();

	printf("\n\n----------Stepping through an episode----------\n");
	/*We could also start over and do another experiment */
	//task_spec=RL_init();

	/*We could run one step at a time instead of one episode at a time */
	/*Start the episode */
	//startResponse=RL_start();
	//printf("First observation and action were: %d %d\n",startResponse->observation->intArray[0],startResponse->action->intArray[0]);

	/*Run one step */
	//stepResponse=RL_step();
	
	/*Run until the episode ends*/
	/*while(stepResponse->terminal!=1){
		stepResponse=RL_step();
		if(stepResponse->terminal!=1){*/
			/*Could optionally print state,action pairs */
			/*printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0]);*/
	/*	}
	} */

	printf("\n\n----------Summary----------\n");
	

	/*printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(),RL_return());
	RL_cleanup();*/

	//system("pause");
	return 0;
}
コード例 #8
0
ファイル: test_empty_experiment.c プロジェクト: 10sun/DRL-AI
int main(int argc, char *argv[]) {
	int whichEpisode=0;
	int whichStep=0;
	const observation_action_t *startTuple;
	const reward_observation_action_terminal_t *stepTuple;
	
	RL_init();
	
	for(whichEpisode=1;whichEpisode<5;whichEpisode++){
		startTuple=RL_start();
		
		if(whichEpisode%2==0){
			check_fail(startTuple->action->numInts!=0);
			check_fail(startTuple->action->numDoubles!=0);
			check_fail(startTuple->action->numChars!=0);

			check_fail(startTuple->observation->numInts!=0);
			check_fail(startTuple->observation->numDoubles!=0);
			check_fail(startTuple->observation->numChars!=0);
		}else{
			check_fail(startTuple->action->numInts!=7);
			check_fail(startTuple->action->numDoubles!=3);
			check_fail(startTuple->action->numChars!=1);

			check_fail(startTuple->observation->numInts!=2);
			check_fail(startTuple->observation->numDoubles!=4);
			check_fail(startTuple->observation->numChars!=5);
		}
		
		for(whichStep=0;whichStep<5;whichStep++){
			stepTuple=RL_step();
			check_fail(stepTuple->terminal!=0);
			check_fail(stepTuple->reward!=0);

			if(whichEpisode%2==0){
				check_fail(stepTuple->action->numInts!=0);
				check_fail(stepTuple->action->numDoubles!=0);
				check_fail(stepTuple->action->numChars!=0);

				check_fail(stepTuple->observation->numInts!=0);
				check_fail(stepTuple->observation->numDoubles!=0);
				check_fail(stepTuple->observation->numChars!=0);
			}else{
				check_fail(stepTuple->action->numInts!=7);
				check_fail(stepTuple->action->numDoubles!=3);
				check_fail(stepTuple->action->numChars!=1);

				check_fail(stepTuple->observation->numInts!=2);
				check_fail(stepTuple->observation->numDoubles!=4);
				check_fail(stepTuple->observation->numChars!=5);
			}
			
		}
		
		
	}
	RL_cleanup();

	if(tests_failed!=0)
		printf("Failed %d / %d checks in %s\n",tests_failed,test_count, __FILE__);
	else
		printf("Passed all %d checks in %s\n",test_count,__FILE__);
	return tests_failed;
}