Ejemplo n.º 1
0
void IPPCEvaluator::UpdateTimePerMove(double step_time) {
	if (step_time < 0.99 * EvalLog::allocated_time) {
		if (EvalLog::plan_time_ratio < 1.0)
			EvalLog::plan_time_ratio += 0.01;
		if (EvalLog::plan_time_ratio > 1.0)
			EvalLog::plan_time_ratio = 1.0;
	} else if (step_time > EvalLog::allocated_time) {
		double delta = (step_time - EvalLog::allocated_time)
			/ (EvalLog::allocated_time + 1E-6);
		if (delta < 0.02)
			delta = 0.02; // Minimum reduction per step
		if (delta > 0.05)
			delta = 0.05; // Maximum reduction per step
		EvalLog::plan_time_ratio -= delta;
		// if (EvalLog::plan_time_ratio < 0)
		// EvalLog::plan_time_ratio = 0;
	}

	EvalLog::curr_inst_remaining_budget = log_.GetRemainingBudget(instance_);
	EvalLog::curr_inst_remaining_steps--;

	UpdateTimeInfo(instance_);
	Globals::config.time_per_move = EvalLog::plan_time_ratio
		* EvalLog::allocated_time;

	if (!Globals::config.silence && out_) {
		*out_
			<< "Total time: curr_inst / inst_target / remaining / since_start = "
			<< (get_time_second() - EvalLog::curr_inst_start_time) << " / "
			<< (EvalLog::curr_inst_target_time
				* (EvalLog::curr_inst_steps - EvalLog::curr_inst_remaining_steps))
			<< " / " << EvalLog::curr_inst_remaining_budget << " / "
			<< (get_time_second() - EvalLog::start_time) << endl;
	}
}
Ejemplo n.º 2
0
bool IPPCEvaluator::ExecuteAction(int action, double& reward, OBS_TYPE& obs) {
	double start_t = get_time_second();

	client_->sendMessage(
		client_->createActionMes(pomdpx_->GetActionName(),
			pomdpx_->GetEnumedAction(action)));

	if (step_ == Globals::config.sim_len - 1) {
		return true;
	}

	string turnMes = client_->recvMessage();

	//get step reward from turn message: added by wkg
	reward = client_->getStepReward(turnMes);
	reward_ = reward;
	total_discounted_reward_ += Discount(step_) * reward;
	total_undiscounted_reward_ += reward;

	map<string, string> observs = client_->processTurnMes(turnMes);
	obs = pomdpx_->GetPOMDPXObservation(observs);

	double end_t = get_time_second();

	if (!Globals::config.silence && out_) {
		*out_ << "Time for executing action " << (end_t - start_t) << endl;
	}

	return false;
}
Ejemplo n.º 3
0
double IPPCEvaluator::EndRound() {
	double start_t = get_time_second();

	string roundEndMes = client_->recvMessage();
	double round_reward = client_->processRoundEndMes(roundEndMes);

	if (!Globals::config.silence && out_) {
		*out_ << "Total undiscounted reward = " << round_reward << endl;
	}

	log_.IncNumOfCompletedRuns(instance_);
	log_.Save();

	double end_t = get_time_second();

	if (!Globals::config.silence && out_) {
		*out_ << "Time for endround msg (save log) " << (end_t - start_t)
			<< endl;
	}

	discounted_round_rewards_.push_back(total_discounted_reward_);
	undiscounted_round_rewards_.push_back(round_reward);

	return round_reward;
}
Ejemplo n.º 4
0
void POMDPEvaluator::InitRound() {
	step_ = 0;

	double start_t, end_t;
	// Initial state
	state_ = model_->CreateStartState();
	logi << "[POMDPEvaluator::InitRound] Created start state." << endl;
	if (!Globals::config.silence && out_) {
		*out_ << "Initial state: " << endl;
		model_->PrintState(*state_, *out_);
		*out_ << endl;
	}

	// Initial belief
	start_t = get_time_second();
	delete solver_->belief();
	end_t = get_time_second();
	logi << "[POMDPEvaluator::InitRound] Deleted old belief in "
		<< (end_t - start_t) << "s" << endl;

	start_t = get_time_second();
	Belief* belief = model_->InitialBelief(state_, belief_type_);
	end_t = get_time_second();
	logi << "[POMDPEvaluator::InitRound] Created intial belief "
		<< typeid(*belief).name() << " in " << (end_t - start_t) << "s" << endl;

	solver_->belief(belief);

	total_discounted_reward_ = 0;
	total_undiscounted_reward_ = 0;
}
Ejemplo n.º 5
0
void DESPOT::Update(int action, OBS_TYPE obs) {
	double start = get_time_second();

	belief_->Update(action, obs);
	history_.Add(action, obs);

	lower_bound_->belief(belief_);

	logi << "[Solver::Update] Updated belief, history and root with action "
		<< action << ", observation " << obs
		<< " in " << (get_time_second() - start) << "s" << endl;
}
Ejemplo n.º 6
0
ValuedAction DESPOT::Search() {
	if (logging::level() >= logging::INFO) {
		model_->PrintBelief(*belief_);
	}

	if (Globals::config.time_per_move <= 0) // Return a random action if no time is allocated for planning
		return ValuedAction(Random::RANDOM.NextInt(model_->NumActions()),
			Globals::NEG_INFTY);

	double start = get_time_second();
	vector<State*> particles = belief_->Sample(Globals::config.num_scenarios);
	logi << "[DESPOT::Search] Time for sampling " << particles.size()
		<< " particles: " << (get_time_second() - start) << "s" << endl;

	statistics_ = SearchStatistics();

	start = get_time_second();
	static RandomStreams streams = RandomStreams(Globals::config.num_scenarios,
		Globals::config.search_depth);

	LookaheadUpperBound* ub = dynamic_cast<LookaheadUpperBound*>(upper_bound_);
	if (ub != NULL) { // Avoid using new streams for LookaheadUpperBound
		static bool initialized = false;
		if (!initialized ) {
			lower_bound_->Init(streams);
			upper_bound_->Init(streams);
			initialized = true;
		}
	} else {
		streams = RandomStreams(Globals::config.num_scenarios,
			Globals::config.search_depth);
		lower_bound_->Init(streams);
		upper_bound_->Init(streams);
	}

	root_ = ConstructTree(particles, streams, lower_bound_, upper_bound_,
		model_, history_, Globals::config.time_per_move, &statistics_);
	logi << "[DESPOT::Search] Time for tree construction: "
		<< (get_time_second() - start) << "s" << endl;

	start = get_time_second();
	root_->Free(*model_);
	logi << "[DESPOT::Search] Time for freeing particles in search tree: "
		<< (get_time_second() - start) << "s" << endl;

	ValuedAction astar = OptimalAction(root_);
	start = get_time_second();
	delete root_;

	logi << "[DESPOT::Search] Time for deleting tree: "
		<< (get_time_second() - start) << "s" << endl;
	logi << "[DESPOT::Search] Search statistics:" << endl << statistics_
		<< endl;

	return astar;
}
Ejemplo n.º 7
0
int IPPCEvaluator::Handshake(string instance) {
	int num_remaining_runs = log_.GetNumRemainingRuns(instance);
	if (num_remaining_runs == 0) {
		return 0;
	}

	double start_t = get_time_second();
	instance_ = instance;

	client_ = new Client();
	client_->setHostName(hostname_);
	client_->setPort(port_);

	client_->initializeSocket();
	client_->connectToServer();

	client_->sendMessage(client_->createSessionRequestMes(instance));

	string sessionInitMes = client_->recvMessage();

	if (!Globals::config.silence && out_) {
		*out_ << sessionInitMes << endl;
	}

	client_->processSessionInitMes(sessionInitMes);
	double end_t = get_time_second();

	if (!Globals::config.silence && out_) {
		*out_ << "Time for handsake " << (end_t - start_t) << endl;
	}

	log_.SetInitialBudget(instance);
	EvalLog::curr_inst_steps = num_remaining_runs * Globals::config.sim_len;
	EvalLog::curr_inst_remaining_steps = num_remaining_runs
		* Globals::config.sim_len;
	EvalLog::curr_inst_target_time = EvalLog::curr_inst_budget
		/ EvalLog::curr_inst_steps;
	UpdateTimeInfo(instance);
	EvalLog::plan_time_ratio = 1.0;
	Globals::config.time_per_move = EvalLog::plan_time_ratio
		* EvalLog::allocated_time;

	return num_remaining_runs;
}
Ejemplo n.º 8
0
double IPPCEvaluator::End() {
	double start_t = get_time_second();

	string sessionEndMes = client_->recvMessage();
	double total_reward = client_->processSessionEndMes(sessionEndMes);
	client_->closeConnection();
	delete client_;

	double end_t = get_time_second();

	if (!Globals::config.silence && out_) {
		*out_ << "Time for endsession " << (end_t - start_t) << endl
			<< "Total reward for all runs = " << total_reward << endl
			<< "Total time: Real / CPU = "
			<< (get_time_second() - EvalLog::curr_inst_start_time) << " / "
			<< (double(clock() - start_clockt_) / CLOCKS_PER_SEC) << "s"
			<< endl;
	}

	return total_reward;
}
Ejemplo n.º 9
0
void IPPCEvaluator::InitRound() {
	step_ = 0;
	state_ = NULL;

	double start_t, end_t;

	// Initial belief
	start_t = get_time_second();
	delete solver_->belief();
	end_t = get_time_second();
	logi << "[IPPCEvaluator::InitRound] Deleted initial belief in "
		<< (end_t - start_t) << "s" << endl;

	start_t = get_time_second();
	Belief* belief = model_->InitialBelief(NULL, belief_type_);
	end_t = get_time_second();
	logi << "[IPPCEvaluator::InitRound] Initialized initial belief: "
		<< typeid(*belief).name() << " in " << (end_t - start_t) << "s" << endl;

	solver_->belief(belief);

	// Initiate a round with server
	start_t = get_time_second();
	client_->sendMessage(client_->createRoundRequestMes());
	string roundMes = client_->recvMessageTwice();
	end_t = get_time_second();
	logi << "[IPPCEvaluator::InitRound] Time for startround msg "
		<< (end_t - start_t) << "s" << endl;

	total_discounted_reward_ = 0;
	total_undiscounted_reward_ = 0;
}
Ejemplo n.º 10
0
POMDPEvaluator::POMDPEvaluator(DSPOMDP* model, string belief_type,
	Solver* solver, clock_t start_clockt, ostream* out,
	double target_finish_time, int num_steps) :
	Evaluator(model, belief_type, solver, start_clockt, out),
	random_((unsigned) 0) {
	target_finish_time_ = target_finish_time;

	if (target_finish_time_ != -1) {
		EvalLog::allocated_time = (target_finish_time_ - get_time_second())
			/ num_steps;
		Globals::config.time_per_move = EvalLog::allocated_time;
		EvalLog::curr_inst_remaining_steps = num_steps;
	}
}
Ejemplo n.º 11
0
void SimpleTUI::PrintResult(int num_runs, Evaluator *simulator,
                            clock_t main_clock_start) {

  cout << "\nCompleted " << num_runs << " run(s)." << endl;
  cout << "Average total discounted reward (stderr) = "
       << simulator->AverageDiscountedRoundReward() << " ("
       << simulator->StderrDiscountedRoundReward() << ")" << endl;
  cout << "Average total undiscounted reward (stderr) = "
       << simulator->AverageUndiscountedRoundReward() << " ("
       << simulator->StderrUndiscountedRoundReward() << ")" << endl;
  cout << "Total time: Real / CPU = "
       << (get_time_second() - EvalLog::curr_inst_start_time) << " / "
       << (double(clock() - main_clock_start) / CLOCKS_PER_SEC) << "s" << endl;
}
Ejemplo n.º 12
0
void POMDPEvaluator::UpdateTimePerMove(double step_time) {
	if (target_finish_time_ != -1) {
		if (step_time < 0.99 * EvalLog::allocated_time) {
			if (EvalLog::plan_time_ratio < 1.0)
				EvalLog::plan_time_ratio += 0.01;
			if (EvalLog::plan_time_ratio > 1.0)
				EvalLog::plan_time_ratio = 1.0;
		} else if (step_time > EvalLog::allocated_time) {
			double delta = (step_time - EvalLog::allocated_time)
				/ (EvalLog::allocated_time + 1E-6);
			if (delta < 0.02)
				delta = 0.02; // Minimum reduction per step
			if (delta > 0.05)
				delta = 0.05; // Maximum reduction per step
			EvalLog::plan_time_ratio -= delta;
			// if (EvalLog::plan_time_ratio < 0)
			// EvalLog::plan_time_ratio = 0;
		}

		EvalLog::curr_inst_remaining_budget = target_finish_time_
			- get_time_second();
		EvalLog::curr_inst_remaining_steps--;

		if (EvalLog::curr_inst_remaining_steps <= 0) {
			EvalLog::allocated_time = 0;
		} else {
			EvalLog::allocated_time =
				(EvalLog::curr_inst_remaining_budget - 2.0)
					/ EvalLog::curr_inst_remaining_steps;

			if (EvalLog::allocated_time > 5.0)
				EvalLog::allocated_time = 5.0;
		}

		Globals::config.time_per_move = EvalLog::plan_time_ratio
			* EvalLog::allocated_time;
	}
}
Ejemplo n.º 13
0
double EvalLog::GetRemainingBudget(string instance) const {
	return curr_inst_budget
		- (get_time_second() - EvalLog::curr_inst_start_time);
}
Ejemplo n.º 14
0
bool Evaluator::RunStep(int step, int round) {
	if (target_finish_time_ != -1 && get_time_second() > target_finish_time_) {
		if (!Globals::config.silence && out_)
			*out_ << "Exit. (Total time "
				<< (get_time_second() - EvalLog::curr_inst_start_time)
				<< "s exceeded time limit of "
				<< (target_finish_time_ - EvalLog::curr_inst_start_time) << "s)"
				<< endl
				<< "Total time: Real / CPU = "
				<< (get_time_second() - EvalLog::curr_inst_start_time) << " / "
				<< (double(clock() - start_clockt_) / CLOCKS_PER_SEC) << "s"
				<< endl;
		exit(1);
	}

	double step_start_t = get_time_second();

	double start_t = get_time_second();
	int action = solver_->Search().action;
	double end_t = get_time_second();
	logi << "[RunStep] Time spent in " << typeid(*solver_).name()
		<< "::Search(): " << (end_t - start_t) << endl;

	double reward;
	OBS_TYPE obs;
	start_t = get_time_second();
	bool terminal = ExecuteAction(action, reward, obs);
	end_t = get_time_second();
	logi << "[RunStep] Time spent in ExecuteAction(): " << (end_t - start_t)
		<< endl;

	start_t = get_time_second();
	*out_ << "-----------------------------------Round " << round
				<< " Step " << step << "-----------------------------------"
				<< endl;
	if (!Globals::config.silence && out_) {
		*out_ << "- Action = ";
		model_->PrintAction(action, *out_);
	}

	if (state_ != NULL) {
		if (!Globals::config.silence && out_) {
			*out_ << "- State:\n";
			model_->PrintState(*state_, *out_);
		}
	}

	if (!Globals::config.silence && out_) {
		*out_ << "- Observation = ";
		model_->PrintObs(*state_, obs, *out_);
	}

	if (state_ != NULL) {
		if (!Globals::config.silence && out_)
			*out_ << "- ObsProb = " << model_->ObsProb(obs, *state_, action)
				<< endl;
	}

	ReportStepReward();
	end_t = get_time_second();

	double step_end_t;
	if (terminal) {
		step_end_t = get_time_second();
		logi << "[RunStep] Time for step: actual / allocated = "
			<< (step_end_t - step_start_t) << " / " << EvalLog::allocated_time
			<< endl;
		if (!Globals::config.silence && out_)
			*out_ << endl;
		step_++;
		return true;
	}

	*out_<<endl;

	start_t = get_time_second();
	solver_->Update(action, obs);
	end_t = get_time_second();
	logi << "[RunStep] Time spent in Update(): " << (end_t - start_t) << endl;

	step_++;
	return false;
}
Ejemplo n.º 15
0
void SimpleTUI::OptionParse(option::Option *options, int &num_runs,
                            string &simulator_type, string &belief_type,
                            int &time_limit, string &solver_type,
                            bool &search_solver) {
  if (options[E_SILENCE])
    Globals::config.silence = true;

  if (options[E_DEPTH])
    Globals::config.search_depth = atoi(options[E_DEPTH].arg);

  if (options[E_DISCOUNT])
    Globals::config.discount = atof(options[E_DISCOUNT].arg);

  if (options[E_SEED])
    Globals::config.root_seed = atoi(options[E_SEED].arg);
  else { // last 9 digits of current time in milli second
    long millis = (long)get_time_second() * 1000;
    long range = (long)pow((double)10, (int)9);
    Globals::config.root_seed =
        (unsigned int)(millis - (millis / range) * range);
  }

  if (options[E_TIMEOUT])
    Globals::config.time_per_move = atof(options[E_TIMEOUT].arg);

  if (options[E_NUMPARTICLES])
    Globals::config.num_scenarios = atoi(options[E_NUMPARTICLES].arg);

  if (options[E_PRUNE])
    Globals::config.pruning_constant = atof(options[E_PRUNE].arg);

  if (options[E_GAP])
    Globals::config.xi = atof(options[E_GAP].arg);

  if (options[E_SIM_LEN])
    Globals::config.sim_len = atoi(options[E_SIM_LEN].arg);

  if (options[E_EVALUATOR])
    simulator_type = options[E_EVALUATOR].arg;

  if (options[E_MAX_POLICY_SIM_LEN])
    Globals::config.max_policy_sim_len =
        atoi(options[E_MAX_POLICY_SIM_LEN].arg);

  if (options[E_DEFAULT_ACTION])
    Globals::config.default_action = options[E_DEFAULT_ACTION].arg;

  if (options[E_RUNS])
    num_runs = atoi(options[E_RUNS].arg);

  if (options[E_BELIEF])
    belief_type = options[E_BELIEF].arg;

  if (options[E_TIME_LIMIT])
    time_limit = atoi(options[E_TIME_LIMIT].arg);

  if (options[E_NOISE])
    Globals::config.noise = atof(options[E_NOISE].arg);

  search_solver = options[E_SEARCH_SOLVER];

  if (options[E_SOLVER])
    solver_type = options[E_SOLVER].arg;

  int verbosity = 0;
  if (options[E_VERBOSITY])
    verbosity = atoi(options[E_VERBOSITY].arg);
  logging::level(verbosity);
}
Ejemplo n.º 16
0
int SimpleTUI::run(int argc, char *argv[]) {

  clock_t main_clock_start = clock();
  EvalLog::curr_inst_start_time = get_time_second();

  const char *program = (argc > 0) ? argv[0] : "despot";

  argc -= (argc > 0);
  argv += (argc > 0); // skip program name argv[0] if present

  option::Stats stats(usage, argc, argv);
  option::Option *options = new option::Option[stats.options_max];
  option::Option *buffer = new option::Option[stats.buffer_max];
  option::Parser parse(usage, argc, argv, options, buffer);

  string solver_type = "DESPOT";
  bool search_solver;

  /* =========================
   * Parse required parameters
   * =========================*/
  int num_runs = 1;
  string simulator_type = "pomdp";
  string belief_type = "DEFAULT";
  int time_limit = -1;

  /* =========================================
   * Problem specific default parameter values
*=========================================*/
  InitializeDefaultParameters();

  /* =========================
   * Parse optional parameters
   * =========================*/
  if (options[E_HELP]) {
    cout << "Usage: " << program << " [options]" << endl;
    option::printUsage(std::cout, usage);
    return 0;
  }
  OptionParse(options, num_runs, simulator_type, belief_type, time_limit,
              solver_type, search_solver);

  /* =========================
   * Global random generator
   * =========================*/
  Seeds::root_seed(Globals::config.root_seed);
  unsigned world_seed = Seeds::Next();
  unsigned seed = Seeds::Next();
  Random::RANDOM = Random(seed);

  /* =========================
   * initialize model
   * =========================*/
  DSPOMDP *model = InitializeModel(options);

  /* =========================
   * initialize solver
   * =========================*/
  Solver *solver = InitializeSolver(model, solver_type, options);
  assert(solver != NULL);

  /* =========================
   * initialize simulator
   * =========================*/
  Evaluator *simulator = NULL;
  InitializeEvaluator(simulator, options, model, solver, num_runs,
                      main_clock_start, simulator_type, belief_type, time_limit,
                      solver_type);
  simulator->world_seed(world_seed);

  int start_run = 0;

  /* =========================
   * Display parameters
   * =========================*/
  DisplayParameters(options, model);

  /* =========================
   * run simulator
   * =========================*/
  RunEvaluator(model, simulator, options, num_runs, search_solver, solver,
               simulator_type, main_clock_start, start_run);

  simulator->End();

  PrintResult(num_runs, simulator, main_clock_start);

  return 0;
}
Ejemplo n.º 17
0
void SimpleTUI::RunEvaluator(DSPOMDP *model, Evaluator *simulator,
                             option::Option *options, int num_runs,
                             bool search_solver, Solver *&solver,
                             string simulator_type, clock_t main_clock_start,
                             int start_run) {
  // Run num_runs simulations
  vector<double> round_rewards(num_runs);
  for (int round = start_run; round < start_run + num_runs; round++) {
    default_out << endl
                << "####################################### Round " << round
                << " #######################################" << endl;

    if (search_solver) {
      if (round == 0) {
        solver = InitializeSolver(model, "DESPOT", options);
        default_out << "Solver: " << typeid(*solver).name() << endl;

        simulator->solver(solver);
      } else if (round == 5) {
        solver = InitializeSolver(model, "POMCP", options);
        default_out << "Solver: " << typeid(*solver).name() << endl;

        simulator->solver(solver);
      } else if (round == 10) {
        double sum1 = 0, sum2 = 0;
        for (int i = 0; i < 5; i++)
          sum1 += round_rewards[i];
        for (int i = 5; i < 10; i++)
          sum2 += round_rewards[i];
        if (sum1 < sum2)
          solver = InitializeSolver(model, "POMCP", options);
        else
          solver = InitializeSolver(model, "DESPOT", options);
        default_out << "Solver: " << typeid(*solver).name()
                    << " DESPOT:" << sum1 << " POMCP:" << sum2 << endl;
      }

      simulator->solver(solver);
    }

    simulator->InitRound();

    for (int i = 0; i < Globals::config.sim_len; i++) {
      /*
      default_out << "-----------------------------------Round " << round
                  << " Step " << i << "-----------------------------------"
                  << endl;*/
      double step_start_t = get_time_second();

      bool terminal = simulator->RunStep(i, round);

      if (terminal)
        break;

      double step_end_t = get_time_second();
      logi << "[main] Time for step: actual / allocated = "
           << (step_end_t - step_start_t) << " / " << EvalLog::allocated_time
           << endl;
      simulator->UpdateTimePerMove(step_end_t - step_start_t);
      logi << "[main] Time per move set to " << Globals::config.time_per_move
           << endl;
      logi << "[main] Plan time ratio set to " << EvalLog::plan_time_ratio
           << endl;
    //  default_out << endl;
    }

    default_out << "Simulation terminated in " << simulator->step() << " steps"
                << endl;
    double round_reward = simulator->EndRound();
    round_rewards[round] = round_reward;
  }

  if (simulator_type == "ippc" && num_runs != 30) {
    cout << "Exit without receiving reward." << endl
         << "Total time: Real / CPU = "
         << (get_time_second() - EvalLog::curr_inst_start_time) << " / "
         << (double(clock() - main_clock_start) / CLOCKS_PER_SEC) << "s"
         << endl;
    exit(0);
  }
}