C++ (Cpp) Agent::averageReward 예제들

프로그래밍 언어: C++ (Cpp)

클래스/타입: Agent

메소드/함수: averageReward

hotexamples.com에서의 예제들: 3

C++ (Cpp) Agent::averageReward - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C++ (Cpp)의 Agent::averageReward에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

getDatabase(11)

GetType(9)

GetSelf(8)

getDevices(7)

getPosition(6)

getName(6)

getAlias(5)

IsValid(5)

GetCoord(5)

getUnit(5)

Self(5)

GetWorldState(4)

genRandomAction(4)

GetPos(4)

getDeviceFactor(3)

GetStrategy(3)

getType(3)

averageReward(3)

getPos(3)

getTreeStateSize(3)

getAgentId(3)

age(3)

deviceEffortCalc(3)

incSugar(3)

GetEntityID(3)

getDistanceEvaluator(3)

asBottle(2)

getID(2)

addSpecialAgent(2)

Move(2)

beliefs(2)

bestDevDevice(2)

addBelief(2)

addToBuffer(2)

addWeapon(2)

addressee(2)

getPotentialInformation(2)

getEpid(2)

Info(2)

getDeviceByName(2)

callMethodAsync(2)

genPerceptAndUpdate(2)

init(2)

getY(2)

canBuy(2)

get_index(2)

get_location(2)

get_port(2)

id(2)

GetContextId(2)

예제 #1

파일 보기

파일: main.cpp 프로젝트: gitter-badger/AIXI

// The main agent/environment interaction loop
void mainLoop(Agent &ai, Environment &env, options_t &options) {

	// Determine exploration options
	bool explore = options.count("exploration") > 0;
	double explore_rate, explore_decay;
	if (explore) {
		strExtract(options["exploration"], explore_rate);
		strExtract(options["explore-decay"], explore_decay);
		assert(0.0 <= explore_rate && explore_rate <= 1.0);
		assert(0.0 <= explore_decay && explore_decay <= 1.0);
	}


	// Determine termination lifetime
	bool terminate_check = options.count("terminate-lifetime") > 0;
	lifetime_t terminate_lifetime;
	if (terminate_check) {
		strExtract(options["terminate-lifetime"], terminate_lifetime);
		assert(0 <= terminate_lifetime);
	}

	// Agent/environment interaction loop
	for (unsigned int cycle = 1; !env.isFinished(); cycle++) {

		// check for agent termination
		if (terminate_check && ai.lifetime() > terminate_lifetime) {
			log << "info: terminating lifetiment" << std::endl;
			break;
		}

		// Get a percept from the environment
		percept_t observation = env.getObservation();
		percept_t reward = env.getReward();

		// Update agent's environment model with the new percept
		ai.modelUpdate(observation, reward); // TODO: implement in agent.cpp

		// Determine best exploitive action, or explore
		action_t action;
		bool explored = false;
		if (explore && rand01() < explore_rate) {
			explored = true;
			action = ai.genRandomAction();
		}
		else {
			action = search(ai); // TODO: implement in search.cpp
		}

		// Send an action to the environment
		env.performAction(action); // TODO: implement for each environment

		// Update agent's environment model with the chosen action
		ai.modelUpdate(action); // TODO: implement in agent.cpp

		// Log this turn
		log << "cycle: " << cycle << std::endl;
		log << "observation: " << observation << std::endl;
		log << "reward: " << reward << std::endl;
		log << "action: " << action << std::endl;
		log << "explored: " << (explored ? "yes" : "no") << std::endl;
		log << "explore rate: " << explore_rate << std::endl;
		log << "total reward: " << ai.reward() << std::endl;
		log << "average reward: " << ai.averageReward() << std::endl;

		// Log the data in a more compact form
		compactLog << cycle << ", " << observation << ", " << reward << ", "
				<< action << ", " << explored << ", " << explore_rate << ", "
				<< ai.reward() << ", " << ai.averageReward() << std::endl;

		// Print to standard output when cycle == 2^n
		if ((cycle & (cycle - 1)) == 0) {
			std::cout << "cycle: " << cycle << std::endl;
			std::cout << "average reward: " << ai.averageReward() << std::endl;
			if (explore) {
				std::cout << "explore rate: " << explore_rate << std::endl;
			}
		}

		// Update exploration rate
		if (explore) explore_rate *= explore_decay;

	}

	// Print summary to standard output
	std::cout << std::endl << std::endl << "SUMMARY" << std::endl;
	std::cout << "agent lifetime: " << ai.lifetime() << std::endl;
	std::cout << "average reward: " << ai.averageReward() << std::endl;
}

예제 #2

파일 보기

파일: main.cpp 프로젝트: jkirschner42/aixi

// The main agent/environment interaction loop
void mainLoop(Agent &ai, Environment &env, options_t &options) {

	// Determine exploration options
	bool explore = options.count("exploration") > 0;
	double explore_rate, explore_decay;
	if (explore) {
		strExtract(options["exploration"], explore_rate);
		strExtract(options["explore-decay"], explore_decay);
		assert(0.0 <= explore_rate && explore_rate <= 1.0);
		assert(0.0 <= explore_decay && explore_decay <= 1.0);
	}

    
	// Determine termination age
	bool terminate_check = options.count("terminate-age") > 0;
	age_t terminate_age;
	if (terminate_check) {
		strExtract(options["terminate-age"], terminate_age);
		assert(0 <= terminate_age);
	}

    // Determine mc-timelimit
    timelimit_t mc_timelimit;
    strExtract(options["mc-timelimit"], mc_timelimit);
    //if we assume that time_limit > agent.numActions() we can be sure 
    //that every action is selected at least once
    if(mc_timelimit < ai.numActions()){
        std::cerr << "WARNING: time_limit not large enough to sample all actions" << std::endl;
    }

	// Determine whether to write cts during the process, or only at the end.
    bool intermediate_ct = true;
    if(options.count("intermediate-ct") > 0){
        intermediate_ct = !(options["intermediate-ct"] == "0");
    }

    std::cout << "starting agent/environment interaction loop...\n"; 
	// Agent/environment interaction loop
	for (unsigned int cycle = 1; !env.isFinished(); cycle++) {

		// check for agent termination
		if (terminate_check && ai.age() >= terminate_age) {
			verboseLog << "info: terminating agent" << std::endl;
			break;
		}

		// Get a percept from the environment
		percept_t observation = env.getObservation();
		percept_t reward = env.getReward();

		// Update agent's environment model with the new percept
		ai.modelUpdate(observation, reward);

		// Determine best exploitive action, or explore
		action_t action;
		bool explored = false;
		if (explore && rand01() < explore_rate) {
			explored = true;
			action = ai.genRandomAction();
		}
		else {
			action = search(ai, mc_timelimit);
		}

		// Send an action to the environment
		env.performAction(action); 

		// Update agent's environment model with the chosen action
		ai.modelUpdate(action); 

		// Log this turn
		verboseLog << "cycle: " << cycle << std::endl;
		verboseLog << "observation: " << observation << std::endl;
		verboseLog << "reward: " << reward << std::endl;
		verboseLog << "action: " << action << std::endl;
		verboseLog << "explored: " << (explored ? "yes" : "no") << std::endl;
		verboseLog << "explore rate: " << explore_rate << std::endl;
		verboseLog << "total reward: " << ai.reward() << std::endl;
		verboseLog << "average reward: " << ai.averageReward() << std::endl;

		// Log the data in a more compact form
		compactLog << cycle << ", " << observation << ", " << reward << ", "
				<< action << ", " << explored << ", " << explore_rate << ", "
				<< ai.reward() << ", " << ai.averageReward() << std::endl;

		// Print to standard output when cycle == 2^n
		if ((cycle & (cycle - 1)) == 0) {
			std::cout << "cycle: " << cycle << std::endl;
			std::cout << "average reward: " << ai.averageReward() << std::endl;
			if (explore) {
				std::cout << "explore rate: " << explore_rate << std::endl;
			}

			// Write context tree file
			if(options["write-ct"] != "" && intermediate_ct){
				// write a ct for each 2^n cycles.
				char cycle_string[256];
				sprintf(cycle_string, "%d", cycle);
				std::ofstream ct((options["write-ct"] + std::string(cycle_string) + ".ct").c_str());
				ai.writeCT(ct);
				ct.close();
			}
		}

		// Update exploration rate
		if (explore) explore_rate *= explore_decay;

	}

	// Print summary to standard output
	std::cout << std::endl << std::endl << "SUMMARY" << std::endl;
	std::cout << "agent age: " << ai.age() << std::endl;
	std::cout << "average reward: " << ai.averageReward() << std::endl;

    // Write context tree file
    if(options["write-ct"] != ""){
    	// write a ct for the final cycle too.
		char cycle_string[256];
		sprintf(cycle_string, "%lld", ai.age());
		std::ofstream ct((options["write-ct"] + std::string(cycle_string) + ".ct").c_str());
		ai.writeCT(ct);
		ct.close();
    }
}

예제 #3

파일 보기

파일: main.cpp 프로젝트: cooijmanstim/mc-aixi

/** The main agent/environment interaction loop. Each interaction cycle begins
 * with the agent receiving an observation and reward from the environment.
 * Subsequently, the agent selects an action and informs the environment. The
 * interactions that took place are logged to the ::logger and ::compactLogger
 * streams. When the cycle equals a power of two, a summary of the interactions
 * is printed to the standard output.
 * \param ai The agent.
 * \param env The environment.
 * \param options The configuration options. */
void mainLoop(Agent &ai, Environment &env, options_t &options) {

	// Apply random seed (Defaut: 0)
	srand(getOption<unsigned int>(options, "random-seed", 0));

	// Verbose output (Default: false)
	bool verbose = getOption<bool>(options, "verbose", false);

	// Determine exploration options (Default: don't explore, don't decay)
	bool explore = options.count("exploration") > 0;
	double explore_rate = getOption<double>(options, "exploration", 0.0);
	double explore_decay = getOption<double>(options, "explore-decay", 1.0);
    assert(0.0 <= explore_rate);
	assert(0.0 <= explore_decay && explore_decay <= 1.0);

	// Determine termination age (Default: don't terminate)
	bool terminate_check = options.count("terminate-age") > 0;
	age_t terminate_age = getOption<age_t>(options, "terminate-age", 0);
	assert(0 <= terminate_age);
	
	// Determine the cycle after which the agent stops learning (if ever)
	int learning_period = getOption<int>(options, "learning-period", 0);
	assert(0 <= learning_period);

	// Agent/environment interaction loop
	for (int cycle = 1; !env.isFinished(); cycle++) {

		// Check for agent termination
		if (terminate_check && ai.age() > terminate_age) {
			break;
		}
		
		// Save the current clock cycle (to compute how long this cycle took)
		clock_t cycle_start = clock();

		// Get a percept from the environment
		percept_t observation = env.getObservation();
		percept_t reward = env.getReward();


		if (learning_period > 0 && cycle > learning_period)
			explore = false;
		
		// Update agent's environment model with the new percept
		ai.modelUpdate(observation, reward);

		// Determine best exploitive action, or explore
		action_t action;
		bool explored = false;

		if (explore && (rand01() < explore_rate)) { // Explore
			explored = true;
			action = ai.genRandomAction();
		}
		else { // Exploit
			action = ai.search();
		}

		// Send an action to the environment
		env.performAction(action);
		
		// Update agent's environment model with the chosen action
		ai.modelUpdate(action);
		
		// Calculate how long this cycle took
		double time = double(clock() - cycle_start) / double(CLOCKS_PER_SEC);

		// Log this turn
		logger << cycle << ", " << observation << ", " << reward << ", "
			<< action << ", " << explored << ", " << explore_rate << ", "
			<< ai.totalReward() << ", " << ai.averageReward() << ", "
			<< time << ", " << ai.modelSize() << std::endl;

		// Print to standard output when cycle == 2^n or on verbose option
		if (verbose || (cycle & (cycle - 1)) == 0) {
			std::cout << "cycle: " << cycle << std::endl;
			std::cout << "average reward: " << ai.averageReward() << std::endl;
			if (explore) {
				std::cout << "explore rate: " << explore_rate << std::endl;
			}
		}

		// Print environment state if verbose option is true
		if (verbose) {
  		    std::cout << env.print();
		}

		// Update exploration rate
		if (explore) explore_rate *= explore_decay;

	}

	// Print summary to standard output
	std::cout << std::endl << std::endl << "SUMMARY" << std::endl;
	std::cout << "agent age: " << ai.age() << std::endl;
	std::cout << "average reward: " << ai.averageReward() << std::endl;
}