void IPPCEvaluator::UpdateTimePerMove(double step_time) { if (step_time < 0.99 * EvalLog::allocated_time) { if (EvalLog::plan_time_ratio < 1.0) EvalLog::plan_time_ratio += 0.01; if (EvalLog::plan_time_ratio > 1.0) EvalLog::plan_time_ratio = 1.0; } else if (step_time > EvalLog::allocated_time) { double delta = (step_time - EvalLog::allocated_time) / (EvalLog::allocated_time + 1E-6); if (delta < 0.02) delta = 0.02; // Minimum reduction per step if (delta > 0.05) delta = 0.05; // Maximum reduction per step EvalLog::plan_time_ratio -= delta; // if (EvalLog::plan_time_ratio < 0) // EvalLog::plan_time_ratio = 0; } EvalLog::curr_inst_remaining_budget = log_.GetRemainingBudget(instance_); EvalLog::curr_inst_remaining_steps--; UpdateTimeInfo(instance_); Globals::config.time_per_move = EvalLog::plan_time_ratio * EvalLog::allocated_time; if (!Globals::config.silence && out_) { *out_ << "Total time: curr_inst / inst_target / remaining / since_start = " << (get_time_second() - EvalLog::curr_inst_start_time) << " / " << (EvalLog::curr_inst_target_time * (EvalLog::curr_inst_steps - EvalLog::curr_inst_remaining_steps)) << " / " << EvalLog::curr_inst_remaining_budget << " / " << (get_time_second() - EvalLog::start_time) << endl; } }
bool IPPCEvaluator::ExecuteAction(int action, double& reward, OBS_TYPE& obs) { double start_t = get_time_second(); client_->sendMessage( client_->createActionMes(pomdpx_->GetActionName(), pomdpx_->GetEnumedAction(action))); if (step_ == Globals::config.sim_len - 1) { return true; } string turnMes = client_->recvMessage(); //get step reward from turn message: added by wkg reward = client_->getStepReward(turnMes); reward_ = reward; total_discounted_reward_ += Discount(step_) * reward; total_undiscounted_reward_ += reward; map<string, string> observs = client_->processTurnMes(turnMes); obs = pomdpx_->GetPOMDPXObservation(observs); double end_t = get_time_second(); if (!Globals::config.silence && out_) { *out_ << "Time for executing action " << (end_t - start_t) << endl; } return false; }
double IPPCEvaluator::EndRound() { double start_t = get_time_second(); string roundEndMes = client_->recvMessage(); double round_reward = client_->processRoundEndMes(roundEndMes); if (!Globals::config.silence && out_) { *out_ << "Total undiscounted reward = " << round_reward << endl; } log_.IncNumOfCompletedRuns(instance_); log_.Save(); double end_t = get_time_second(); if (!Globals::config.silence && out_) { *out_ << "Time for endround msg (save log) " << (end_t - start_t) << endl; } discounted_round_rewards_.push_back(total_discounted_reward_); undiscounted_round_rewards_.push_back(round_reward); return round_reward; }
void POMDPEvaluator::InitRound() { step_ = 0; double start_t, end_t; // Initial state state_ = model_->CreateStartState(); logi << "[POMDPEvaluator::InitRound] Created start state." << endl; if (!Globals::config.silence && out_) { *out_ << "Initial state: " << endl; model_->PrintState(*state_, *out_); *out_ << endl; } // Initial belief start_t = get_time_second(); delete solver_->belief(); end_t = get_time_second(); logi << "[POMDPEvaluator::InitRound] Deleted old belief in " << (end_t - start_t) << "s" << endl; start_t = get_time_second(); Belief* belief = model_->InitialBelief(state_, belief_type_); end_t = get_time_second(); logi << "[POMDPEvaluator::InitRound] Created intial belief " << typeid(*belief).name() << " in " << (end_t - start_t) << "s" << endl; solver_->belief(belief); total_discounted_reward_ = 0; total_undiscounted_reward_ = 0; }
void DESPOT::Update(int action, OBS_TYPE obs) { double start = get_time_second(); belief_->Update(action, obs); history_.Add(action, obs); lower_bound_->belief(belief_); logi << "[Solver::Update] Updated belief, history and root with action " << action << ", observation " << obs << " in " << (get_time_second() - start) << "s" << endl; }
ValuedAction DESPOT::Search() { if (logging::level() >= logging::INFO) { model_->PrintBelief(*belief_); } if (Globals::config.time_per_move <= 0) // Return a random action if no time is allocated for planning return ValuedAction(Random::RANDOM.NextInt(model_->NumActions()), Globals::NEG_INFTY); double start = get_time_second(); vector<State*> particles = belief_->Sample(Globals::config.num_scenarios); logi << "[DESPOT::Search] Time for sampling " << particles.size() << " particles: " << (get_time_second() - start) << "s" << endl; statistics_ = SearchStatistics(); start = get_time_second(); static RandomStreams streams = RandomStreams(Globals::config.num_scenarios, Globals::config.search_depth); LookaheadUpperBound* ub = dynamic_cast<LookaheadUpperBound*>(upper_bound_); if (ub != NULL) { // Avoid using new streams for LookaheadUpperBound static bool initialized = false; if (!initialized ) { lower_bound_->Init(streams); upper_bound_->Init(streams); initialized = true; } } else { streams = RandomStreams(Globals::config.num_scenarios, Globals::config.search_depth); lower_bound_->Init(streams); upper_bound_->Init(streams); } root_ = ConstructTree(particles, streams, lower_bound_, upper_bound_, model_, history_, Globals::config.time_per_move, &statistics_); logi << "[DESPOT::Search] Time for tree construction: " << (get_time_second() - start) << "s" << endl; start = get_time_second(); root_->Free(*model_); logi << "[DESPOT::Search] Time for freeing particles in search tree: " << (get_time_second() - start) << "s" << endl; ValuedAction astar = OptimalAction(root_); start = get_time_second(); delete root_; logi << "[DESPOT::Search] Time for deleting tree: " << (get_time_second() - start) << "s" << endl; logi << "[DESPOT::Search] Search statistics:" << endl << statistics_ << endl; return astar; }
int IPPCEvaluator::Handshake(string instance) { int num_remaining_runs = log_.GetNumRemainingRuns(instance); if (num_remaining_runs == 0) { return 0; } double start_t = get_time_second(); instance_ = instance; client_ = new Client(); client_->setHostName(hostname_); client_->setPort(port_); client_->initializeSocket(); client_->connectToServer(); client_->sendMessage(client_->createSessionRequestMes(instance)); string sessionInitMes = client_->recvMessage(); if (!Globals::config.silence && out_) { *out_ << sessionInitMes << endl; } client_->processSessionInitMes(sessionInitMes); double end_t = get_time_second(); if (!Globals::config.silence && out_) { *out_ << "Time for handsake " << (end_t - start_t) << endl; } log_.SetInitialBudget(instance); EvalLog::curr_inst_steps = num_remaining_runs * Globals::config.sim_len; EvalLog::curr_inst_remaining_steps = num_remaining_runs * Globals::config.sim_len; EvalLog::curr_inst_target_time = EvalLog::curr_inst_budget / EvalLog::curr_inst_steps; UpdateTimeInfo(instance); EvalLog::plan_time_ratio = 1.0; Globals::config.time_per_move = EvalLog::plan_time_ratio * EvalLog::allocated_time; return num_remaining_runs; }
double IPPCEvaluator::End() { double start_t = get_time_second(); string sessionEndMes = client_->recvMessage(); double total_reward = client_->processSessionEndMes(sessionEndMes); client_->closeConnection(); delete client_; double end_t = get_time_second(); if (!Globals::config.silence && out_) { *out_ << "Time for endsession " << (end_t - start_t) << endl << "Total reward for all runs = " << total_reward << endl << "Total time: Real / CPU = " << (get_time_second() - EvalLog::curr_inst_start_time) << " / " << (double(clock() - start_clockt_) / CLOCKS_PER_SEC) << "s" << endl; } return total_reward; }
void IPPCEvaluator::InitRound() { step_ = 0; state_ = NULL; double start_t, end_t; // Initial belief start_t = get_time_second(); delete solver_->belief(); end_t = get_time_second(); logi << "[IPPCEvaluator::InitRound] Deleted initial belief in " << (end_t - start_t) << "s" << endl; start_t = get_time_second(); Belief* belief = model_->InitialBelief(NULL, belief_type_); end_t = get_time_second(); logi << "[IPPCEvaluator::InitRound] Initialized initial belief: " << typeid(*belief).name() << " in " << (end_t - start_t) << "s" << endl; solver_->belief(belief); // Initiate a round with server start_t = get_time_second(); client_->sendMessage(client_->createRoundRequestMes()); string roundMes = client_->recvMessageTwice(); end_t = get_time_second(); logi << "[IPPCEvaluator::InitRound] Time for startround msg " << (end_t - start_t) << "s" << endl; total_discounted_reward_ = 0; total_undiscounted_reward_ = 0; }
POMDPEvaluator::POMDPEvaluator(DSPOMDP* model, string belief_type, Solver* solver, clock_t start_clockt, ostream* out, double target_finish_time, int num_steps) : Evaluator(model, belief_type, solver, start_clockt, out), random_((unsigned) 0) { target_finish_time_ = target_finish_time; if (target_finish_time_ != -1) { EvalLog::allocated_time = (target_finish_time_ - get_time_second()) / num_steps; Globals::config.time_per_move = EvalLog::allocated_time; EvalLog::curr_inst_remaining_steps = num_steps; } }
void SimpleTUI::PrintResult(int num_runs, Evaluator *simulator, clock_t main_clock_start) { cout << "\nCompleted " << num_runs << " run(s)." << endl; cout << "Average total discounted reward (stderr) = " << simulator->AverageDiscountedRoundReward() << " (" << simulator->StderrDiscountedRoundReward() << ")" << endl; cout << "Average total undiscounted reward (stderr) = " << simulator->AverageUndiscountedRoundReward() << " (" << simulator->StderrUndiscountedRoundReward() << ")" << endl; cout << "Total time: Real / CPU = " << (get_time_second() - EvalLog::curr_inst_start_time) << " / " << (double(clock() - main_clock_start) / CLOCKS_PER_SEC) << "s" << endl; }
void POMDPEvaluator::UpdateTimePerMove(double step_time) { if (target_finish_time_ != -1) { if (step_time < 0.99 * EvalLog::allocated_time) { if (EvalLog::plan_time_ratio < 1.0) EvalLog::plan_time_ratio += 0.01; if (EvalLog::plan_time_ratio > 1.0) EvalLog::plan_time_ratio = 1.0; } else if (step_time > EvalLog::allocated_time) { double delta = (step_time - EvalLog::allocated_time) / (EvalLog::allocated_time + 1E-6); if (delta < 0.02) delta = 0.02; // Minimum reduction per step if (delta > 0.05) delta = 0.05; // Maximum reduction per step EvalLog::plan_time_ratio -= delta; // if (EvalLog::plan_time_ratio < 0) // EvalLog::plan_time_ratio = 0; } EvalLog::curr_inst_remaining_budget = target_finish_time_ - get_time_second(); EvalLog::curr_inst_remaining_steps--; if (EvalLog::curr_inst_remaining_steps <= 0) { EvalLog::allocated_time = 0; } else { EvalLog::allocated_time = (EvalLog::curr_inst_remaining_budget - 2.0) / EvalLog::curr_inst_remaining_steps; if (EvalLog::allocated_time > 5.0) EvalLog::allocated_time = 5.0; } Globals::config.time_per_move = EvalLog::plan_time_ratio * EvalLog::allocated_time; } }
double EvalLog::GetRemainingBudget(string instance) const { return curr_inst_budget - (get_time_second() - EvalLog::curr_inst_start_time); }
bool Evaluator::RunStep(int step, int round) { if (target_finish_time_ != -1 && get_time_second() > target_finish_time_) { if (!Globals::config.silence && out_) *out_ << "Exit. (Total time " << (get_time_second() - EvalLog::curr_inst_start_time) << "s exceeded time limit of " << (target_finish_time_ - EvalLog::curr_inst_start_time) << "s)" << endl << "Total time: Real / CPU = " << (get_time_second() - EvalLog::curr_inst_start_time) << " / " << (double(clock() - start_clockt_) / CLOCKS_PER_SEC) << "s" << endl; exit(1); } double step_start_t = get_time_second(); double start_t = get_time_second(); int action = solver_->Search().action; double end_t = get_time_second(); logi << "[RunStep] Time spent in " << typeid(*solver_).name() << "::Search(): " << (end_t - start_t) << endl; double reward; OBS_TYPE obs; start_t = get_time_second(); bool terminal = ExecuteAction(action, reward, obs); end_t = get_time_second(); logi << "[RunStep] Time spent in ExecuteAction(): " << (end_t - start_t) << endl; start_t = get_time_second(); *out_ << "-----------------------------------Round " << round << " Step " << step << "-----------------------------------" << endl; if (!Globals::config.silence && out_) { *out_ << "- Action = "; model_->PrintAction(action, *out_); } if (state_ != NULL) { if (!Globals::config.silence && out_) { *out_ << "- State:\n"; model_->PrintState(*state_, *out_); } } if (!Globals::config.silence && out_) { *out_ << "- Observation = "; model_->PrintObs(*state_, obs, *out_); } if (state_ != NULL) { if (!Globals::config.silence && out_) *out_ << "- ObsProb = " << model_->ObsProb(obs, *state_, action) << endl; } ReportStepReward(); end_t = get_time_second(); double step_end_t; if (terminal) { step_end_t = get_time_second(); logi << "[RunStep] Time for step: actual / allocated = " << (step_end_t - step_start_t) << " / " << EvalLog::allocated_time << endl; if (!Globals::config.silence && out_) *out_ << endl; step_++; return true; } *out_<<endl; start_t = get_time_second(); solver_->Update(action, obs); end_t = get_time_second(); logi << "[RunStep] Time spent in Update(): " << (end_t - start_t) << endl; step_++; return false; }
void SimpleTUI::OptionParse(option::Option *options, int &num_runs, string &simulator_type, string &belief_type, int &time_limit, string &solver_type, bool &search_solver) { if (options[E_SILENCE]) Globals::config.silence = true; if (options[E_DEPTH]) Globals::config.search_depth = atoi(options[E_DEPTH].arg); if (options[E_DISCOUNT]) Globals::config.discount = atof(options[E_DISCOUNT].arg); if (options[E_SEED]) Globals::config.root_seed = atoi(options[E_SEED].arg); else { // last 9 digits of current time in milli second long millis = (long)get_time_second() * 1000; long range = (long)pow((double)10, (int)9); Globals::config.root_seed = (unsigned int)(millis - (millis / range) * range); } if (options[E_TIMEOUT]) Globals::config.time_per_move = atof(options[E_TIMEOUT].arg); if (options[E_NUMPARTICLES]) Globals::config.num_scenarios = atoi(options[E_NUMPARTICLES].arg); if (options[E_PRUNE]) Globals::config.pruning_constant = atof(options[E_PRUNE].arg); if (options[E_GAP]) Globals::config.xi = atof(options[E_GAP].arg); if (options[E_SIM_LEN]) Globals::config.sim_len = atoi(options[E_SIM_LEN].arg); if (options[E_EVALUATOR]) simulator_type = options[E_EVALUATOR].arg; if (options[E_MAX_POLICY_SIM_LEN]) Globals::config.max_policy_sim_len = atoi(options[E_MAX_POLICY_SIM_LEN].arg); if (options[E_DEFAULT_ACTION]) Globals::config.default_action = options[E_DEFAULT_ACTION].arg; if (options[E_RUNS]) num_runs = atoi(options[E_RUNS].arg); if (options[E_BELIEF]) belief_type = options[E_BELIEF].arg; if (options[E_TIME_LIMIT]) time_limit = atoi(options[E_TIME_LIMIT].arg); if (options[E_NOISE]) Globals::config.noise = atof(options[E_NOISE].arg); search_solver = options[E_SEARCH_SOLVER]; if (options[E_SOLVER]) solver_type = options[E_SOLVER].arg; int verbosity = 0; if (options[E_VERBOSITY]) verbosity = atoi(options[E_VERBOSITY].arg); logging::level(verbosity); }
int SimpleTUI::run(int argc, char *argv[]) { clock_t main_clock_start = clock(); EvalLog::curr_inst_start_time = get_time_second(); const char *program = (argc > 0) ? argv[0] : "despot"; argc -= (argc > 0); argv += (argc > 0); // skip program name argv[0] if present option::Stats stats(usage, argc, argv); option::Option *options = new option::Option[stats.options_max]; option::Option *buffer = new option::Option[stats.buffer_max]; option::Parser parse(usage, argc, argv, options, buffer); string solver_type = "DESPOT"; bool search_solver; /* ========================= * Parse required parameters * =========================*/ int num_runs = 1; string simulator_type = "pomdp"; string belief_type = "DEFAULT"; int time_limit = -1; /* ========================================= * Problem specific default parameter values *=========================================*/ InitializeDefaultParameters(); /* ========================= * Parse optional parameters * =========================*/ if (options[E_HELP]) { cout << "Usage: " << program << " [options]" << endl; option::printUsage(std::cout, usage); return 0; } OptionParse(options, num_runs, simulator_type, belief_type, time_limit, solver_type, search_solver); /* ========================= * Global random generator * =========================*/ Seeds::root_seed(Globals::config.root_seed); unsigned world_seed = Seeds::Next(); unsigned seed = Seeds::Next(); Random::RANDOM = Random(seed); /* ========================= * initialize model * =========================*/ DSPOMDP *model = InitializeModel(options); /* ========================= * initialize solver * =========================*/ Solver *solver = InitializeSolver(model, solver_type, options); assert(solver != NULL); /* ========================= * initialize simulator * =========================*/ Evaluator *simulator = NULL; InitializeEvaluator(simulator, options, model, solver, num_runs, main_clock_start, simulator_type, belief_type, time_limit, solver_type); simulator->world_seed(world_seed); int start_run = 0; /* ========================= * Display parameters * =========================*/ DisplayParameters(options, model); /* ========================= * run simulator * =========================*/ RunEvaluator(model, simulator, options, num_runs, search_solver, solver, simulator_type, main_clock_start, start_run); simulator->End(); PrintResult(num_runs, simulator, main_clock_start); return 0; }
void SimpleTUI::RunEvaluator(DSPOMDP *model, Evaluator *simulator, option::Option *options, int num_runs, bool search_solver, Solver *&solver, string simulator_type, clock_t main_clock_start, int start_run) { // Run num_runs simulations vector<double> round_rewards(num_runs); for (int round = start_run; round < start_run + num_runs; round++) { default_out << endl << "####################################### Round " << round << " #######################################" << endl; if (search_solver) { if (round == 0) { solver = InitializeSolver(model, "DESPOT", options); default_out << "Solver: " << typeid(*solver).name() << endl; simulator->solver(solver); } else if (round == 5) { solver = InitializeSolver(model, "POMCP", options); default_out << "Solver: " << typeid(*solver).name() << endl; simulator->solver(solver); } else if (round == 10) { double sum1 = 0, sum2 = 0; for (int i = 0; i < 5; i++) sum1 += round_rewards[i]; for (int i = 5; i < 10; i++) sum2 += round_rewards[i]; if (sum1 < sum2) solver = InitializeSolver(model, "POMCP", options); else solver = InitializeSolver(model, "DESPOT", options); default_out << "Solver: " << typeid(*solver).name() << " DESPOT:" << sum1 << " POMCP:" << sum2 << endl; } simulator->solver(solver); } simulator->InitRound(); for (int i = 0; i < Globals::config.sim_len; i++) { /* default_out << "-----------------------------------Round " << round << " Step " << i << "-----------------------------------" << endl;*/ double step_start_t = get_time_second(); bool terminal = simulator->RunStep(i, round); if (terminal) break; double step_end_t = get_time_second(); logi << "[main] Time for step: actual / allocated = " << (step_end_t - step_start_t) << " / " << EvalLog::allocated_time << endl; simulator->UpdateTimePerMove(step_end_t - step_start_t); logi << "[main] Time per move set to " << Globals::config.time_per_move << endl; logi << "[main] Plan time ratio set to " << EvalLog::plan_time_ratio << endl; // default_out << endl; } default_out << "Simulation terminated in " << simulator->step() << " steps" << endl; double round_reward = simulator->EndRound(); round_rewards[round] = round_reward; } if (simulator_type == "ippc" && num_runs != 30) { cout << "Exit without receiving reward." << endl << "Total time: Real / CPU = " << (get_time_second() - EvalLog::curr_inst_start_time) << " / " << (double(clock() - main_clock_start) / CLOCKS_PER_SEC) << "s" << endl; exit(0); } }