void DESPOT::InitUpperBound(VNode* vnode, ScenarioUpperBound* upper_bound, RandomStreams& streams, History& history) { streams.position(vnode->depth()); double upper = upper_bound->Value(vnode->particles(), streams, history); vnode->utility_upper_bound = upper * Discount(vnode->depth()); upper = upper * Discount(vnode->depth()) - Globals::config.pruning_constant; vnode->upper_bound(upper); }
bool IPPCEvaluator::ExecuteAction(int action, double& reward, OBS_TYPE& obs) { double start_t = get_time_second(); client_->sendMessage( client_->createActionMes(pomdpx_->GetActionName(), pomdpx_->GetEnumedAction(action))); if (step_ == Globals::config.sim_len - 1) { return true; } string turnMes = client_->recvMessage(); //get step reward from turn message: added by wkg reward = client_->getStepReward(turnMes); reward_ = reward; total_discounted_reward_ += Discount(step_) * reward; total_undiscounted_reward_ += reward; map<string, string> observs = client_->processTurnMes(turnMes); obs = pomdpx_->GetPOMDPXObservation(observs); double end_t = get_time_second(); if (!Globals::config.silence && out_) { *out_ << "Time for executing action " << (end_t - start_t) << endl; } return false; }
void DESPOT::InitLowerBound(VNode* vnode, ScenarioLowerBound* lower_bound, RandomStreams& streams, History& history) { streams.position(vnode->depth()); ValuedAction move = lower_bound->Value(vnode->particles(), streams, history); move.value *= Discount(vnode->depth()); vnode->default_move(move); vnode->lower_bound(move.value); }
ValuedAction DESPOT::Evaluate(VNode* root, vector<State*>& particles, RandomStreams& streams, POMCPPrior* prior, const DSPOMDP* model) { double value = 0; for (int i = 0; i < particles.size(); i++) { particles[i]->scenario_id = i; } for (int i = 0; i < particles.size(); i++) { State* particle = particles[i]; VNode* cur = root; State* copy = model->Copy(particle); double discount = 1.0; double val = 0; int steps = 0; while (!streams.Exhausted()) { int action = (cur != NULL) ? OptimalAction(cur).action : prior->GetAction(*copy); assert(action != -1); double reward; OBS_TYPE obs; bool terminal = model->Step(*copy, streams.Entry(copy->scenario_id), action, reward, obs); val += discount * reward; discount *= Discount(); if (!terminal) { prior->Add(action, obs); streams.Advance(); steps++; if (cur != NULL && !cur->IsLeaf()) { QNode* qnode = cur->Child(action); map<OBS_TYPE, VNode*>& vnodes = qnode->children(); cur = vnodes.find(obs) != vnodes.end() ? vnodes[obs] : NULL; } } else { break; } } for (int i = 0; i < steps; i++) { streams.Back(); prior->PopLast(); } model->Free(copy); value += val; } return ValuedAction(OptimalAction(root).action, value / particles.size()); }
void CSlmBuilder::Build() { CountNr(); AppendTails(); Cut(); Discount(); CalcBOW(); }
void AEMS::Update(QNode* qnode) { double lower = qnode->step_reward; double upper = qnode->step_reward; map<OBS_TYPE, VNode*>& children = qnode->children(); for (map<OBS_TYPE, VNode*>::iterator it = children.begin(); it != children.end(); it++) { VNode* vnode = it->second; lower += Discount() * vnode->likelihood * vnode->lower_bound(); upper += Discount() * vnode->likelihood * vnode->upper_bound(); } if (lower > qnode->lower_bound()) qnode->lower_bound(lower); if (upper < qnode->upper_bound()) qnode->upper_bound(upper); }
bool POMDPEvaluator::ExecuteAction(int action, double& reward, OBS_TYPE& obs) { double random_num = random_.NextDouble(); bool terminal = model_->Step(*state_, random_num, action, reward, obs); reward_ = reward; total_discounted_reward_ += Discount(step_) * reward; total_undiscounted_reward_ += reward; return terminal; }
void AEMS::Expand(QNode* qnode, BeliefLowerBound* lb, BeliefUpperBound* ub, const BeliefMDP* model, History& history) { VNode* parent = qnode->parent(); int action = qnode->edge(); map<OBS_TYPE, VNode*>& children = qnode->children(); const Belief* belief = parent->belief(); // cout << *belief << endl; double step_reward = model->StepReward(belief, qnode->edge()); map<OBS_TYPE, double> obss; model->Observe(belief, qnode->edge(), obss); double lower_bound = step_reward; double upper_bound = step_reward; // Create new belief nodes for (map<OBS_TYPE, double>::iterator it = obss.begin(); it != obss.end(); it++) { OBS_TYPE obs = it->first; double weight = it->second; logd << "[AEMS::Expand] Creating node for obs " << obs << " with weight " << weight << endl; VNode* vnode = new VNode(model->Tau(belief, action, obs), parent->depth() + 1, qnode, obs); vnode->likelihood = weight; logd << " New node created!" << endl; children[obs] = vnode; InitLowerBound(vnode, lb, history); InitUpperBound(vnode, ub, history); lower_bound += weight * Discount() * vnode->lower_bound(); upper_bound += weight * Discount() * vnode->upper_bound(); } qnode->step_reward = step_reward; qnode->lower_bound(lower_bound); qnode->upper_bound(upper_bound); }
void AEMS::FindMaxApproxErrorLeaf(VNode* vnode, double likelihood, double& bestAE, VNode*& bestNode) { if (vnode->IsLeaf()) { double curAE = likelihood * vnode->likelihood * Discount(vnode->depth()) * (vnode->upper_bound() - vnode->lower_bound()); if (curAE > bestAE) { bestAE = curAE; bestNode = vnode; } } else { for (int a = 0; a < vnode->children().size(); a++) { FindMaxApproxErrorLeaf(vnode->Child(a), likelihood, bestAE, bestNode); } } }
void LookaheadUpperBound::Init(const RandomStreams& streams) { int num_states = indexer_.NumStates(); int length = streams.Length(); int num_particles = streams.NumStreams(); SetSize(bounds_, num_particles, length + 1, num_states); clock_t start = clock(); for (int p = 0; p < num_particles; p++) { if (p % 10 == 0) cerr << p << " scenarios done! [" << (double(clock() - start) / CLOCKS_PER_SEC) << "s]" << endl; for (int t = length; t >= 0; t--) { if (t == length) { // base case for (int s = 0; s < num_states; s++) { bounds_[p][t][s] = particle_upper_bound_->Value(*indexer_.GetState(s)); } } else { // lookahead for (int s = 0; s < num_states; s++) { double best = Globals::NEG_INFTY; for (int a = 0; a < model_->NumActions(); a++) { double reward = 0; State* copy = model_->Copy(indexer_.GetState(s)); bool terminal = model_->Step(*copy, streams.Entry(p, t), a, reward); model_->Free(copy); reward += (!terminal) * Discount() * bounds_[p][t + 1][indexer_.GetIndex(copy)]; if (reward > best) best = reward; } bounds_[p][t][s] = best; } } } } }
void DESPOT::Expand(QNode* qnode, ScenarioLowerBound* lb, ScenarioUpperBound* ub, const DSPOMDP* model, RandomStreams& streams, History& history) { VNode* parent = qnode->parent(); streams.position(parent->depth()); map<OBS_TYPE, VNode*>& children = qnode->children(); const vector<State*>& particles = parent->particles(); double step_reward = 0; // Partition particles by observation map<OBS_TYPE, vector<State*> > partitions; OBS_TYPE obs; double reward; for (int i = 0; i < particles.size(); i++) { State* particle = particles[i]; logd << " Original: " << *particle << endl; State* copy = model->Copy(particle); logd << " Before step: " << *copy << endl; bool terminal = model->Step(*copy, streams.Entry(copy->scenario_id), qnode->edge(), reward, obs); step_reward += reward * copy->weight; logd << " After step: " << *copy << " " << (reward * copy->weight) << " " << reward << " " << copy->weight << endl; if (!terminal) { partitions[obs].push_back(copy); } else { model->Free(copy); } } step_reward = Discount(parent->depth()) * step_reward - Globals::config.pruning_constant;//pruning_constant is used for regularization double lower_bound = step_reward; double upper_bound = step_reward; // Create new belief nodes for (map<OBS_TYPE, vector<State*> >::iterator it = partitions.begin(); it != partitions.end(); it++) { OBS_TYPE obs = it->first; logd << " Creating node for obs " << obs << endl; VNode* vnode = new VNode(partitions[obs], parent->depth() + 1, qnode, obs); logd << " New node created!" << endl; children[obs] = vnode; history.Add(qnode->edge(), obs); InitBounds(vnode, lb, ub, streams, history); history.RemoveLast(); logd << " New node's bounds: (" << vnode->lower_bound() << ", " << vnode->upper_bound() << ")" << endl; lower_bound += vnode->lower_bound(); upper_bound += vnode->upper_bound(); } qnode->step_reward = step_reward; qnode->lower_bound(lower_bound); qnode->upper_bound(upper_bound); qnode->utility_upper_bound = upper_bound + Globals::config.pruning_constant; qnode->default_value = lower_bound; // for debugging }
ValuedAction TrivialBeliefLowerBound::Value(const Belief* belief) const { ValuedAction va = model_->GetMinRewardAction(); va.value *= 1.0 / (1 - Discount()); return va; }
ValuedAction TrivialParticleLowerBound::Value( const vector<State*>& particles) const { ValuedAction va = model_->GetMinRewardAction(); va.value *= State::Weight(particles) / (1 - Discount()); return va; }
double TrivialParticleUpperBound::Value(const vector<State*>& particles, RandomStreams& streams, History& history) const { return State::Weight(particles) * model_->GetMaxReward() / (1 - Discount()); }
double TrivialParticleUpperBound::Value(const State& state) const { return model_->GetMaxReward() / (1 - Discount()); }
double TrivialBeliefUpperBound::Value(const Belief* belief) const { return model_->GetMaxReward() / (1 - Discount()); }