예제 #1
0
// simulate a sequence of random actions, returning the accumulated reward.
static reward_t playout(Agent &agent, unsigned int playout_len) {
	reward_t r = 0;
	for (unsigned int i = 0; i < playout_len; ++i) {
	    // Pick a random action
	    action_t a = agent.genRandomAction();
	    agent.modelUpdate(a);
		
		// Generate a random percept distributed according to the agent's
		// internal model of the environment.
        percept_t rew;
        percept_t obs;
	    agent.genPerceptAndUpdate(obs, rew);
	    
	    r = r + rew;
    }
	return r;
}
예제 #2
0
파일: main.cpp 프로젝트: jkirschner42/aixi
// The main agent/environment interaction loop
void mainLoop(Agent &ai, Environment &env, options_t &options) {

	// Determine exploration options
	bool explore = options.count("exploration") > 0;
	double explore_rate, explore_decay;
	if (explore) {
		strExtract(options["exploration"], explore_rate);
		strExtract(options["explore-decay"], explore_decay);
		assert(0.0 <= explore_rate && explore_rate <= 1.0);
		assert(0.0 <= explore_decay && explore_decay <= 1.0);
	}

    
	// Determine termination age
	bool terminate_check = options.count("terminate-age") > 0;
	age_t terminate_age;
	if (terminate_check) {
		strExtract(options["terminate-age"], terminate_age);
		assert(0 <= terminate_age);
	}

    // Determine mc-timelimit
    timelimit_t mc_timelimit;
    strExtract(options["mc-timelimit"], mc_timelimit);
    //if we assume that time_limit > agent.numActions() we can be sure 
    //that every action is selected at least once
    if(mc_timelimit < ai.numActions()){
        std::cerr << "WARNING: time_limit not large enough to sample all actions" << std::endl;
    }

	// Determine whether to write cts during the process, or only at the end.
    bool intermediate_ct = true;
    if(options.count("intermediate-ct") > 0){
        intermediate_ct = !(options["intermediate-ct"] == "0");
    }

    std::cout << "starting agent/environment interaction loop...\n"; 
	// Agent/environment interaction loop
	for (unsigned int cycle = 1; !env.isFinished(); cycle++) {

		// check for agent termination
		if (terminate_check && ai.age() >= terminate_age) {
			verboseLog << "info: terminating agent" << std::endl;
			break;
		}

		// Get a percept from the environment
		percept_t observation = env.getObservation();
		percept_t reward = env.getReward();

		// Update agent's environment model with the new percept
		ai.modelUpdate(observation, reward);

		// Determine best exploitive action, or explore
		action_t action;
		bool explored = false;
		if (explore && rand01() < explore_rate) {
			explored = true;
			action = ai.genRandomAction();
		}
		else {
			action = search(ai, mc_timelimit);
		}

		// Send an action to the environment
		env.performAction(action); 

		// Update agent's environment model with the chosen action
		ai.modelUpdate(action); 

		// Log this turn
		verboseLog << "cycle: " << cycle << std::endl;
		verboseLog << "observation: " << observation << std::endl;
		verboseLog << "reward: " << reward << std::endl;
		verboseLog << "action: " << action << std::endl;
		verboseLog << "explored: " << (explored ? "yes" : "no") << std::endl;
		verboseLog << "explore rate: " << explore_rate << std::endl;
		verboseLog << "total reward: " << ai.reward() << std::endl;
		verboseLog << "average reward: " << ai.averageReward() << std::endl;

		// Log the data in a more compact form
		compactLog << cycle << ", " << observation << ", " << reward << ", "
				<< action << ", " << explored << ", " << explore_rate << ", "
				<< ai.reward() << ", " << ai.averageReward() << std::endl;

		// Print to standard output when cycle == 2^n
		if ((cycle & (cycle - 1)) == 0) {
			std::cout << "cycle: " << cycle << std::endl;
			std::cout << "average reward: " << ai.averageReward() << std::endl;
			if (explore) {
				std::cout << "explore rate: " << explore_rate << std::endl;
			}

			// Write context tree file
			if(options["write-ct"] != "" && intermediate_ct){
				// write a ct for each 2^n cycles.
				char cycle_string[256];
				sprintf(cycle_string, "%d", cycle);
				std::ofstream ct((options["write-ct"] + std::string(cycle_string) + ".ct").c_str());
				ai.writeCT(ct);
				ct.close();
			}
		}

		// Update exploration rate
		if (explore) explore_rate *= explore_decay;

	}

	// Print summary to standard output
	std::cout << std::endl << std::endl << "SUMMARY" << std::endl;
	std::cout << "agent age: " << ai.age() << std::endl;
	std::cout << "average reward: " << ai.averageReward() << std::endl;

    // Write context tree file
    if(options["write-ct"] != ""){
    	// write a ct for the final cycle too.
		char cycle_string[256];
		sprintf(cycle_string, "%lld", ai.age());
		std::ofstream ct((options["write-ct"] + std::string(cycle_string) + ".ct").c_str());
		ai.writeCT(ct);
		ct.close();
    }
}
예제 #3
0
파일: main.cpp 프로젝트: gitter-badger/AIXI
// The main agent/environment interaction loop
void mainLoop(Agent &ai, Environment &env, options_t &options) {

	// Determine exploration options
	bool explore = options.count("exploration") > 0;
	double explore_rate, explore_decay;
	if (explore) {
		strExtract(options["exploration"], explore_rate);
		strExtract(options["explore-decay"], explore_decay);
		assert(0.0 <= explore_rate && explore_rate <= 1.0);
		assert(0.0 <= explore_decay && explore_decay <= 1.0);
	}


	// Determine termination lifetime
	bool terminate_check = options.count("terminate-lifetime") > 0;
	lifetime_t terminate_lifetime;
	if (terminate_check) {
		strExtract(options["terminate-lifetime"], terminate_lifetime);
		assert(0 <= terminate_lifetime);
	}

	// Agent/environment interaction loop
	for (unsigned int cycle = 1; !env.isFinished(); cycle++) {

		// check for agent termination
		if (terminate_check && ai.lifetime() > terminate_lifetime) {
			log << "info: terminating lifetiment" << std::endl;
			break;
		}

		// Get a percept from the environment
		percept_t observation = env.getObservation();
		percept_t reward = env.getReward();

		// Update agent's environment model with the new percept
		ai.modelUpdate(observation, reward); // TODO: implement in agent.cpp

		// Determine best exploitive action, or explore
		action_t action;
		bool explored = false;
		if (explore && rand01() < explore_rate) {
			explored = true;
			action = ai.genRandomAction();
		}
		else {
			action = search(ai); // TODO: implement in search.cpp
		}

		// Send an action to the environment
		env.performAction(action); // TODO: implement for each environment

		// Update agent's environment model with the chosen action
		ai.modelUpdate(action); // TODO: implement in agent.cpp

		// Log this turn
		log << "cycle: " << cycle << std::endl;
		log << "observation: " << observation << std::endl;
		log << "reward: " << reward << std::endl;
		log << "action: " << action << std::endl;
		log << "explored: " << (explored ? "yes" : "no") << std::endl;
		log << "explore rate: " << explore_rate << std::endl;
		log << "total reward: " << ai.reward() << std::endl;
		log << "average reward: " << ai.averageReward() << std::endl;

		// Log the data in a more compact form
		compactLog << cycle << ", " << observation << ", " << reward << ", "
				<< action << ", " << explored << ", " << explore_rate << ", "
				<< ai.reward() << ", " << ai.averageReward() << std::endl;

		// Print to standard output when cycle == 2^n
		if ((cycle & (cycle - 1)) == 0) {
			std::cout << "cycle: " << cycle << std::endl;
			std::cout << "average reward: " << ai.averageReward() << std::endl;
			if (explore) {
				std::cout << "explore rate: " << explore_rate << std::endl;
			}
		}

		// Update exploration rate
		if (explore) explore_rate *= explore_decay;

	}

	// Print summary to standard output
	std::cout << std::endl << std::endl << "SUMMARY" << std::endl;
	std::cout << "agent lifetime: " << ai.lifetime() << std::endl;
	std::cout << "average reward: " << ai.averageReward() << std::endl;
}
예제 #4
0
/** The main agent/environment interaction loop. Each interaction cycle begins
 * with the agent receiving an observation and reward from the environment.
 * Subsequently, the agent selects an action and informs the environment. The
 * interactions that took place are logged to the ::logger and ::compactLogger
 * streams. When the cycle equals a power of two, a summary of the interactions
 * is printed to the standard output.
 * \param ai The agent.
 * \param env The environment.
 * \param options The configuration options. */
void mainLoop(Agent &ai, Environment &env, options_t &options) {

	// Apply random seed (Defaut: 0)
	srand(getOption<unsigned int>(options, "random-seed", 0));

	// Verbose output (Default: false)
	bool verbose = getOption<bool>(options, "verbose", false);

	// Determine exploration options (Default: don't explore, don't decay)
	bool explore = options.count("exploration") > 0;
	double explore_rate = getOption<double>(options, "exploration", 0.0);
	double explore_decay = getOption<double>(options, "explore-decay", 1.0);
    assert(0.0 <= explore_rate);
	assert(0.0 <= explore_decay && explore_decay <= 1.0);

	// Determine termination age (Default: don't terminate)
	bool terminate_check = options.count("terminate-age") > 0;
	age_t terminate_age = getOption<age_t>(options, "terminate-age", 0);
	assert(0 <= terminate_age);
	
	// Determine the cycle after which the agent stops learning (if ever)
	int learning_period = getOption<int>(options, "learning-period", 0);
	assert(0 <= learning_period);

	// Agent/environment interaction loop
	for (int cycle = 1; !env.isFinished(); cycle++) {

		// Check for agent termination
		if (terminate_check && ai.age() > terminate_age) {
			break;
		}
		
		// Save the current clock cycle (to compute how long this cycle took)
		clock_t cycle_start = clock();

		// Get a percept from the environment
		percept_t observation = env.getObservation();
		percept_t reward = env.getReward();


		if (learning_period > 0 && cycle > learning_period)
			explore = false;
		
		// Update agent's environment model with the new percept
		ai.modelUpdate(observation, reward);

		// Determine best exploitive action, or explore
		action_t action;
		bool explored = false;

		if (explore && (rand01() < explore_rate)) { // Explore
			explored = true;
			action = ai.genRandomAction();
		}
		else { // Exploit
			action = ai.search();
		}

		// Send an action to the environment
		env.performAction(action);
		
		// Update agent's environment model with the chosen action
		ai.modelUpdate(action);
		
		// Calculate how long this cycle took
		double time = double(clock() - cycle_start) / double(CLOCKS_PER_SEC);

		// Log this turn
		logger << cycle << ", " << observation << ", " << reward << ", "
			<< action << ", " << explored << ", " << explore_rate << ", "
			<< ai.totalReward() << ", " << ai.averageReward() << ", "
			<< time << ", " << ai.modelSize() << std::endl;

		// Print to standard output when cycle == 2^n or on verbose option
		if (verbose || (cycle & (cycle - 1)) == 0) {
			std::cout << "cycle: " << cycle << std::endl;
			std::cout << "average reward: " << ai.averageReward() << std::endl;
			if (explore) {
				std::cout << "explore rate: " << explore_rate << std::endl;
			}
		}

		// Print environment state if verbose option is true
		if (verbose) {
  		    std::cout << env.print();
		}

		// Update exploration rate
		if (explore) explore_rate *= explore_decay;

	}

	// Print summary to standard output
	std::cout << std::endl << std::endl << "SUMMARY" << std::endl;
	std::cout << "agent age: " << ai.age() << std::endl;
	std::cout << "average reward: " << ai.averageReward() << std::endl;
}