CoinFlip::CoinFlip(options_t &options) { // Determine the probability of the coin landing on heads p = 1.0; if (options.count("coin-flip-p") > 0) { strExtract(options["coin-flip-p"], p); } assert(0.0 <= p); assert(p <= 1.0); // Set up the initial observation m_observation = rand01() < p ? 1 : 0; m_reward = 0; }
void load_trace_txt ( char* fname ) { std::string str, word; char buf[1024]; unsigned long totalBytes = getFileSize ( fname ); unsigned long currBytes = 0; FILE* fp = fopen ( fname, "rt" ); int c; std::vector<std::string> changestates; changestates.push_back ( "x" ); // 0 = BIN_NOTUSED changestates.push_back ( "c" ); // 1 = BIN_CREATE changestates.push_back ( "u" ); // 2 = BIN_CHANGE changestates.push_back ( "s" ); // 3 = BIN_SWITCH changestates.push_back ( "-" ); // 4 = BIN_REUSE changestates.push_back ( "D" ); Call cl; Event e; int lin = 0; int max_lin = 5000; std::string szstr; int sz; unsigned long cstart = 0; int cnum = 0; while (!feof(fp) && lin < max_lin) { currBytes = getFilePos ( fp ); printf ( "%d (%.2f%%)\n", currBytes, currBytes*100.0f/totalBytes ); fgets ( buf, 1024, fp ); str = buf; int bin = 0; str = strTrim ( str ); e.name = word; if ( str.compare (0, 2, "C:") == 0 ) { /*word = strSplit ( str, " " ); word = strSplit ( str, " " ); cl.bin_id = strToI(word); word = strSplit ( str, " " ); cl.size = strToI(word); word = strSplit ( str, " " ); cl.obj_id = strToLI(word); word = strSplit ( str, " " ); cl.val_id = strToLI(word); word = strSplit ( str, " " ); cl.name = word; mCalls.push_back ( cl );*/ cnum++; } else if ( str.compare ( 0, 2, "FR" ) == 0 ) { e.count = 1; for (int n=0; n < NUM_BIN; n++ ) { e.bin_id[n] = -1; e.bin_change[n] = -1; } mEvents.push_back ( e ); } else if ( str.compare ( 0, 2, "Dr" ) == 0 ) { e.count = 1; int bin = 0; word = strLeft ( str, 8 ); str = strTrim ( str ); while ( str.length() > 0 ) { word = strSplit ( str, " " ); c = strExtract ( word, changestates ); szstr = strParse ( word, "[", "]" ); e.bin_id[bin] = strToI ( word ); e.bin_change[bin] = c; e.bin_size[bin] = strToI ( szstr ); bin++; } e.call_start = cstart; e.call_num = cnum; if ( e.bin_size[BIN_DRAW] > mMaxSize ) mMaxSize = e.bin_size[BIN_DRAW]; mEvents.push_back ( e ); cstart += cnum; } lin++; } fclose ( fp ); }
// The main agent/environment interaction loop void mainLoop(Agent &ai, Environment &env, options_t &options) { // Determine exploration options bool explore = options.count("exploration") > 0; double explore_rate, explore_decay; if (explore) { strExtract(options["exploration"], explore_rate); strExtract(options["explore-decay"], explore_decay); assert(0.0 <= explore_rate && explore_rate <= 1.0); assert(0.0 <= explore_decay && explore_decay <= 1.0); } // Determine termination age bool terminate_check = options.count("terminate-age") > 0; age_t terminate_age; if (terminate_check) { strExtract(options["terminate-age"], terminate_age); assert(0 <= terminate_age); } // Determine mc-timelimit timelimit_t mc_timelimit; strExtract(options["mc-timelimit"], mc_timelimit); //if we assume that time_limit > agent.numActions() we can be sure //that every action is selected at least once if(mc_timelimit < ai.numActions()){ std::cerr << "WARNING: time_limit not large enough to sample all actions" << std::endl; } // Determine whether to write cts during the process, or only at the end. bool intermediate_ct = true; if(options.count("intermediate-ct") > 0){ intermediate_ct = !(options["intermediate-ct"] == "0"); } std::cout << "starting agent/environment interaction loop...\n"; // Agent/environment interaction loop for (unsigned int cycle = 1; !env.isFinished(); cycle++) { // check for agent termination if (terminate_check && ai.age() >= terminate_age) { verboseLog << "info: terminating agent" << std::endl; break; } // Get a percept from the environment percept_t observation = env.getObservation(); percept_t reward = env.getReward(); // Update agent's environment model with the new percept ai.modelUpdate(observation, reward); // Determine best exploitive action, or explore action_t action; bool explored = false; if (explore && rand01() < explore_rate) { explored = true; action = ai.genRandomAction(); } else { action = search(ai, mc_timelimit); } // Send an action to the environment env.performAction(action); // Update agent's environment model with the chosen action ai.modelUpdate(action); // Log this turn verboseLog << "cycle: " << cycle << std::endl; verboseLog << "observation: " << observation << std::endl; verboseLog << "reward: " << reward << std::endl; verboseLog << "action: " << action << std::endl; verboseLog << "explored: " << (explored ? "yes" : "no") << std::endl; verboseLog << "explore rate: " << explore_rate << std::endl; verboseLog << "total reward: " << ai.reward() << std::endl; verboseLog << "average reward: " << ai.averageReward() << std::endl; // Log the data in a more compact form compactLog << cycle << ", " << observation << ", " << reward << ", " << action << ", " << explored << ", " << explore_rate << ", " << ai.reward() << ", " << ai.averageReward() << std::endl; // Print to standard output when cycle == 2^n if ((cycle & (cycle - 1)) == 0) { std::cout << "cycle: " << cycle << std::endl; std::cout << "average reward: " << ai.averageReward() << std::endl; if (explore) { std::cout << "explore rate: " << explore_rate << std::endl; } // Write context tree file if(options["write-ct"] != "" && intermediate_ct){ // write a ct for each 2^n cycles. char cycle_string[256]; sprintf(cycle_string, "%d", cycle); std::ofstream ct((options["write-ct"] + std::string(cycle_string) + ".ct").c_str()); ai.writeCT(ct); ct.close(); } } // Update exploration rate if (explore) explore_rate *= explore_decay; } // Print summary to standard output std::cout << std::endl << std::endl << "SUMMARY" << std::endl; std::cout << "agent age: " << ai.age() << std::endl; std::cout << "average reward: " << ai.averageReward() << std::endl; // Write context tree file if(options["write-ct"] != ""){ // write a ct for the final cycle too. char cycle_string[256]; sprintf(cycle_string, "%lld", ai.age()); std::ofstream ct((options["write-ct"] + std::string(cycle_string) + ".ct").c_str()); ai.writeCT(ct); ct.close(); } }
T strExtract(std::string &str) { T val; strExtract(str, val); return val; }
// The main agent/environment interaction loop void mainLoop(Agent &ai, Environment &env, options_t &options) { // Determine exploration options bool explore = options.count("exploration") > 0; double explore_rate, explore_decay; if (explore) { strExtract(options["exploration"], explore_rate); strExtract(options["explore-decay"], explore_decay); assert(0.0 <= explore_rate && explore_rate <= 1.0); assert(0.0 <= explore_decay && explore_decay <= 1.0); } // Determine termination lifetime bool terminate_check = options.count("terminate-lifetime") > 0; lifetime_t terminate_lifetime; if (terminate_check) { strExtract(options["terminate-lifetime"], terminate_lifetime); assert(0 <= terminate_lifetime); } // Agent/environment interaction loop for (unsigned int cycle = 1; !env.isFinished(); cycle++) { // check for agent termination if (terminate_check && ai.lifetime() > terminate_lifetime) { log << "info: terminating lifetiment" << std::endl; break; } // Get a percept from the environment percept_t observation = env.getObservation(); percept_t reward = env.getReward(); // Update agent's environment model with the new percept ai.modelUpdate(observation, reward); // TODO: implement in agent.cpp // Determine best exploitive action, or explore action_t action; bool explored = false; if (explore && rand01() < explore_rate) { explored = true; action = ai.genRandomAction(); } else { action = search(ai); // TODO: implement in search.cpp } // Send an action to the environment env.performAction(action); // TODO: implement for each environment // Update agent's environment model with the chosen action ai.modelUpdate(action); // TODO: implement in agent.cpp // Log this turn log << "cycle: " << cycle << std::endl; log << "observation: " << observation << std::endl; log << "reward: " << reward << std::endl; log << "action: " << action << std::endl; log << "explored: " << (explored ? "yes" : "no") << std::endl; log << "explore rate: " << explore_rate << std::endl; log << "total reward: " << ai.reward() << std::endl; log << "average reward: " << ai.averageReward() << std::endl; // Log the data in a more compact form compactLog << cycle << ", " << observation << ", " << reward << ", " << action << ", " << explored << ", " << explore_rate << ", " << ai.reward() << ", " << ai.averageReward() << std::endl; // Print to standard output when cycle == 2^n if ((cycle & (cycle - 1)) == 0) { std::cout << "cycle: " << cycle << std::endl; std::cout << "average reward: " << ai.averageReward() << std::endl; if (explore) { std::cout << "explore rate: " << explore_rate << std::endl; } } // Update exploration rate if (explore) explore_rate *= explore_decay; } // Print summary to standard output std::cout << std::endl << std::endl << "SUMMARY" << std::endl; std::cout << "agent lifetime: " << ai.lifetime() << std::endl; std::cout << "average reward: " << ai.averageReward() << std::endl; }