VNode* DESPOT::ConstructTree(vector<State*>& particles, RandomStreams& streams, ScenarioLowerBound* lower_bound, ScenarioUpperBound* upper_bound, const DSPOMDP* model, History& history, double timeout, SearchStatistics* statistics) { if (statistics != NULL) { statistics->num_particles_before_search = model->NumActiveParticles(); } for (int i = 0; i < particles.size(); i++) { particles[i]->scenario_id = i; } VNode* root = new VNode(particles); logd << "[DESPOT::ConstructTree] START - Initializing lower and upper bounds at the root node."; InitBounds(root, lower_bound, upper_bound, streams, history); logd << "[DESPOT::ConstructTree] END - Initializing lower and upper bounds at the root node."; if (statistics != NULL) { statistics->initial_lb = root->lower_bound(); statistics->initial_ub = root->upper_bound(); } double used_time = 0; int num_trials = 0; do { double start = clock(); VNode* cur = Trial(root, streams, lower_bound, upper_bound, model, history, statistics); used_time += double(clock() - start) / CLOCKS_PER_SEC; start = clock(); Backup(cur); if (statistics != NULL) { statistics->time_backup += double(clock() - start) / CLOCKS_PER_SEC; } used_time += double(clock() - start) / CLOCKS_PER_SEC; num_trials++; } while (used_time * (num_trials + 1.0) / num_trials < timeout && (root->upper_bound() - root->lower_bound()) > 1e-6); if (statistics != NULL) { statistics->num_particles_after_search = model->NumActiveParticles(); statistics->num_policy_nodes = root->PolicyTreeSize(); statistics->num_tree_nodes = root->Size(); statistics->final_lb = root->lower_bound(); statistics->final_ub = root->upper_bound(); statistics->time_search = used_time; statistics->num_trials = num_trials; } return root; }
void DESPOT::Compare() { vector<State*> particles = belief_->Sample(Globals::config.num_scenarios); SearchStatistics statistics; RandomStreams streams = RandomStreams(Globals::config.num_scenarios, Globals::config.search_depth); VNode* root = ConstructTree(particles, streams, lower_bound_, upper_bound_, model_, history_, Globals::config.time_per_move, &statistics); CheckDESPOT(root, root->lower_bound()); CheckDESPOTSTAR(root, root->lower_bound()); delete root; }
void DESPOT::Update(QNode* qnode) { double lower = qnode->step_reward; double upper = qnode->step_reward; double utility_upper = qnode->step_reward + Globals::config.pruning_constant; map<OBS_TYPE, VNode*>& children = qnode->children(); for (map<OBS_TYPE, VNode*>::iterator it = children.begin(); it != children.end(); it++) { VNode* vnode = it->second; lower += vnode->lower_bound(); upper += vnode->upper_bound(); utility_upper += vnode->utility_upper_bound; } if (lower > qnode->lower_bound()) { qnode->lower_bound(lower); } if (upper < qnode->upper_bound()) { qnode->upper_bound(upper); } if (utility_upper < qnode->utility_upper_bound) { qnode->utility_upper_bound = utility_upper; } }
double DESPOT::CheckDESPOTSTAR(const VNode* vnode, double regularized_value) { cout << "--------------------------------------------------------------------------------" << endl; const vector<State*>& particles = vnode->particles(); vector<State*> copy; for (int i = 0; i < particles.size(); i++) { copy.push_back(model_->Copy(particles[i])); } VNode* root = new VNode(copy); RandomStreams streams = RandomStreams(Globals::config.num_scenarios, Globals::config.search_depth); InitBounds(root, lower_bound_, upper_bound_, streams, history_); double used_time = 0; int num_trials = 0; do { double start = clock(); VNode* cur = Trial(root, streams, lower_bound_, upper_bound_, model_, history_); num_trials++; used_time += double(clock() - start) / CLOCKS_PER_SEC; start = clock(); Backup(cur); used_time += double(clock() - start) / CLOCKS_PER_SEC; } while (root->lower_bound() < regularized_value); cout << "DESPOT: # trials = " << num_trials << "; target = " << regularized_value << ", current = " << root->lower_bound() << ", l = " << root->lower_bound() << ", u = " << root->upper_bound() << "; time = " << used_time << endl; cout << "--------------------------------------------------------------------------------" << endl; root->Free(*model_); delete root; return used_time; }
void DESPOT::ExploitBlockers(VNode* vnode) { if (Globals::config.pruning_constant <= 0) { return; } VNode* cur = vnode; while (cur != NULL) { VNode* blocker = FindBlocker(cur); if (blocker != NULL) { if (cur->parent() == NULL || blocker == cur) { double value = cur->default_move().value; cur->lower_bound(value); cur->upper_bound(value); cur->utility_upper_bound = value; } else { const map<OBS_TYPE, VNode*>& siblings = cur->parent()->children(); for (map<OBS_TYPE, VNode*>::const_iterator it = siblings.begin(); it != siblings.end(); it++) { VNode* node = it->second; double value = node->default_move().value; node->lower_bound(value); node->upper_bound(value); node->utility_upper_bound = value; } } Backup(cur); if (cur->parent() == NULL) { cur = NULL; } else { cur = cur->parent()->parent(); } } else { break; } } }
void AEMS::Update(QNode* qnode) { double lower = qnode->step_reward; double upper = qnode->step_reward; map<OBS_TYPE, VNode*>& children = qnode->children(); for (map<OBS_TYPE, VNode*>::iterator it = children.begin(); it != children.end(); it++) { VNode* vnode = it->second; lower += Discount() * vnode->likelihood * vnode->lower_bound(); upper += Discount() * vnode->likelihood * vnode->upper_bound(); } if (lower > qnode->lower_bound()) qnode->lower_bound(lower); if (upper < qnode->upper_bound()) qnode->upper_bound(upper); }
void AEMS::Expand(QNode* qnode, BeliefLowerBound* lb, BeliefUpperBound* ub, const BeliefMDP* model, History& history) { VNode* parent = qnode->parent(); int action = qnode->edge(); map<OBS_TYPE, VNode*>& children = qnode->children(); const Belief* belief = parent->belief(); // cout << *belief << endl; double step_reward = model->StepReward(belief, qnode->edge()); map<OBS_TYPE, double> obss; model->Observe(belief, qnode->edge(), obss); double lower_bound = step_reward; double upper_bound = step_reward; // Create new belief nodes for (map<OBS_TYPE, double>::iterator it = obss.begin(); it != obss.end(); it++) { OBS_TYPE obs = it->first; double weight = it->second; logd << "[AEMS::Expand] Creating node for obs " << obs << " with weight " << weight << endl; VNode* vnode = new VNode(model->Tau(belief, action, obs), parent->depth() + 1, qnode, obs); vnode->likelihood = weight; logd << " New node created!" << endl; children[obs] = vnode; InitLowerBound(vnode, lb, history); InitUpperBound(vnode, ub, history); lower_bound += weight * Discount() * vnode->lower_bound(); upper_bound += weight * Discount() * vnode->upper_bound(); } qnode->step_reward = step_reward; qnode->lower_bound(lower_bound); qnode->upper_bound(upper_bound); }
void DESPOT::Expand(QNode* qnode, ScenarioLowerBound* lb, ScenarioUpperBound* ub, const DSPOMDP* model, RandomStreams& streams, History& history) { VNode* parent = qnode->parent(); streams.position(parent->depth()); map<OBS_TYPE, VNode*>& children = qnode->children(); const vector<State*>& particles = parent->particles(); double step_reward = 0; // Partition particles by observation map<OBS_TYPE, vector<State*> > partitions; OBS_TYPE obs; double reward; for (int i = 0; i < particles.size(); i++) { State* particle = particles[i]; logd << " Original: " << *particle << endl; State* copy = model->Copy(particle); logd << " Before step: " << *copy << endl; bool terminal = model->Step(*copy, streams.Entry(copy->scenario_id), qnode->edge(), reward, obs); step_reward += reward * copy->weight; logd << " After step: " << *copy << " " << (reward * copy->weight) << " " << reward << " " << copy->weight << endl; if (!terminal) { partitions[obs].push_back(copy); } else { model->Free(copy); } } step_reward = Discount(parent->depth()) * step_reward - Globals::config.pruning_constant;//pruning_constant is used for regularization double lower_bound = step_reward; double upper_bound = step_reward; // Create new belief nodes for (map<OBS_TYPE, vector<State*> >::iterator it = partitions.begin(); it != partitions.end(); it++) { OBS_TYPE obs = it->first; logd << " Creating node for obs " << obs << endl; VNode* vnode = new VNode(partitions[obs], parent->depth() + 1, qnode, obs); logd << " New node created!" << endl; children[obs] = vnode; history.Add(qnode->edge(), obs); InitBounds(vnode, lb, ub, streams, history); history.RemoveLast(); logd << " New node's bounds: (" << vnode->lower_bound() << ", " << vnode->upper_bound() << ")" << endl; lower_bound += vnode->lower_bound(); upper_bound += vnode->upper_bound(); } qnode->step_reward = step_reward; qnode->lower_bound(lower_bound); qnode->upper_bound(upper_bound); qnode->utility_upper_bound = upper_bound + Globals::config.pruning_constant; qnode->default_value = lower_bound; // for debugging }
double DESPOT::CheckDESPOT(const VNode* vnode, double regularized_value) { cout << "--------------------------------------------------------------------------------" << endl; const vector<State*>& particles = vnode->particles(); vector<State*> copy; for (int i = 0; i < particles.size(); i ++) { copy.push_back(model_->Copy(particles[i])); } VNode* root = new VNode(copy); double pruning_constant = Globals::config.pruning_constant; Globals::config.pruning_constant = 0; RandomStreams streams = RandomStreams(Globals::config.num_scenarios, Globals::config.search_depth); streams.position(0); InitBounds(root, lower_bound_, upper_bound_, streams, history_); double used_time = 0; int num_trials = 0, prev_num = 0; double pruned_value; do { double start = clock(); VNode* cur = Trial(root, streams, lower_bound_, upper_bound_, model_, history_); num_trials++; used_time += double(clock() - start) / CLOCKS_PER_SEC; start = clock(); Backup(cur); used_time += double(clock() - start) / CLOCKS_PER_SEC; if (double(num_trials - prev_num) > 0.05 * prev_num) { int pruned_action; Globals::config.pruning_constant = pruning_constant; VNode* pruned = Prune(root, pruned_action, pruned_value); Globals::config.pruning_constant = 0; prev_num = num_trials; pruned->Free(*model_); delete pruned; cout << "# trials = " << num_trials << "; target = " << regularized_value << ", current = " << pruned_value << ", l = " << root->lower_bound() << ", u = " << root->upper_bound() << "; time = " << used_time << endl; if (pruned_value >= regularized_value) { break; } } } while (true); cout << "DESPOT: # trials = " << num_trials << "; target = " << regularized_value << ", current = " << pruned_value << ", l = " << root->lower_bound() << ", u = " << root->upper_bound() << "; time = " << used_time << endl; Globals::config.pruning_constant = pruning_constant; cout << "--------------------------------------------------------------------------------" << endl; root->Free(*model_); delete root; return used_time; }