int main(int argc, char *argv[]) {
	const char* task_spec;
	const reward_observation_action_terminal_t *stepResponse;
	const observation_action_t *startResponse;

	printf("\n\nExperiment starting up!\n");


	task_spec=RL_init();
	printf("RL_init called, the environment sent task spec: %s\n",task_spec);

	// RL_env_message and RL_agent_message may be used to communicate with the environment
    // and agent, respectively. See RL-Glue documentation for details.
	// const char* responseMessage;
	// responseMessage=RL_agent_message("what is your name?");

	printf("\n\n----------Running a few episodes----------\n");
	// Use the RL-Glue-provided RL_episode to run a few episodes of ALE. 
    // 0 means no limit at all.
	runEpisode(10000);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	runEpisode(0);
	RL_cleanup();

	printf("\n\n----------Stepping through an episode----------\n");
	// The following demonstrates how to step through an episode. 
    task_spec=RL_init();

	// Start the episode
	startResponse=RL_start();
	printf("First action was: %d\n", startResponse->action->intArray[0]);

    // Run one step	
	stepResponse=RL_step();
	
	// Run until end of episode
	while(stepResponse->terminal != 1) {
		stepResponse=RL_step();
	}

    // Demonstrates other RL-Glue functionality.
	printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(), RL_return());
	RL_cleanup();


	return 0;
}
Пример #2
0
int startExperiment(int argc, char *argv[]) {
	const char* task_spec;
	const char* responseMessage;
	const reward_observation_action_terminal_t *stepResponse;
	const observation_action_t *startResponse;

	printf("\n\nExperiment starting up!\n");
	
	RL_agent_message("set_simulation_on");
	RL_agent_message("set_play_tree");
	RL_agent_message("set_save_of");
	//RL_env_message("set_speed_turbo");
	RL_env_message("set_game_GalaxyPatrol");
	//RL_env_message("set_game_Galaga");
	//RL_env_message("set_game_SuperMarioBros");
	//RL_env_message("set_game_Pacman");

	

	task_spec=RL_init();
	printf("RL_init called, the environment sent task spec: %s\n",task_spec);

	printf("\n\n----------Sending some sample messages----------\n");
	/*Talk to the agent and environment a bit...*/
	responseMessage=RL_agent_message("what is your name?");
	printf("Agent responded to \"what is your name?\" with: %s\n",responseMessage);
	responseMessage=RL_agent_message("If at first you don't succeed; call it version 1.0");
	printf("Agent responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n\n",responseMessage);

	responseMessage=RL_env_message("what is your name?");
	printf("Environment responded to \"what is your name?\" with: %s\n",responseMessage);
	responseMessage=RL_env_message("If at first you don't succeed; call it version 1.0");
	printf("Environment responded to \"If at first you don't succeed; call it version 1.0\" with: %s\n",responseMessage);

	printf("\n\n----------Running a few episodes----------\n");
	/*runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	runEpisode(10);
	*/
	/* Remember that stepLimit of 0 means there is no limit at all!*/
	
	while (1){
		runEpisode(0);
	}
	RL_cleanup();

	printf("\n\n----------Stepping through an episode----------\n");
	/*We could also start over and do another experiment */
	//task_spec=RL_init();

	/*We could run one step at a time instead of one episode at a time */
	/*Start the episode */
	//startResponse=RL_start();
	//printf("First observation and action were: %d %d\n",startResponse->observation->intArray[0],startResponse->action->intArray[0]);

	/*Run one step */
	//stepResponse=RL_step();
	
	/*Run until the episode ends*/
	/*while(stepResponse->terminal!=1){
		stepResponse=RL_step();
		if(stepResponse->terminal!=1){*/
			/*Could optionally print state,action pairs */
			/*printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0]);*/
	/*	}
	} */

	printf("\n\n----------Summary----------\n");
	

	/*printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(),RL_return());
	RL_cleanup();*/

	//system("pause");
	return 0;
}