VNode* DESPOT::Trial(VNode* root, RandomStreams& streams, ScenarioLowerBound* lower_bound, ScenarioUpperBound* upper_bound, const DSPOMDP* model, History& history, SearchStatistics* statistics) { VNode* cur = root; int hist_size = history.Size(); do { if (statistics != NULL && cur->depth() > statistics->longest_trial_length) { statistics->longest_trial_length = cur->depth(); } ExploitBlockers(cur); if (Gap(cur) == 0) { break; } if (cur->IsLeaf()) { double start = clock(); Expand(cur, lower_bound, upper_bound, model, streams, history); if (statistics != NULL) { statistics->time_node_expansion += (double) (clock() - start) / CLOCKS_PER_SEC; statistics->num_expanded_nodes++; statistics->num_tree_particles += cur->particles().size(); } } double start = clock(); QNode* qstar = SelectBestUpperBoundNode(cur); VNode* next = SelectBestWEUNode(qstar); if (statistics != NULL) { statistics->time_path += (clock() - start) / CLOCKS_PER_SEC; } if (next == NULL) { break; } cur = next; history.Add(qstar->edge(), cur->edge()); } while (cur->depth() < Globals::config.search_depth && WEU(cur) > 0); history.Truncate(hist_size); return cur; }
void AEMS::Expand(QNode* qnode, BeliefLowerBound* lb, BeliefUpperBound* ub, const BeliefMDP* model, History& history) { VNode* parent = qnode->parent(); int action = qnode->edge(); map<OBS_TYPE, VNode*>& children = qnode->children(); const Belief* belief = parent->belief(); // cout << *belief << endl; double step_reward = model->StepReward(belief, qnode->edge()); map<OBS_TYPE, double> obss; model->Observe(belief, qnode->edge(), obss); double lower_bound = step_reward; double upper_bound = step_reward; // Create new belief nodes for (map<OBS_TYPE, double>::iterator it = obss.begin(); it != obss.end(); it++) { OBS_TYPE obs = it->first; double weight = it->second; logd << "[AEMS::Expand] Creating node for obs " << obs << " with weight " << weight << endl; VNode* vnode = new VNode(model->Tau(belief, action, obs), parent->depth() + 1, qnode, obs); vnode->likelihood = weight; logd << " New node created!" << endl; children[obs] = vnode; InitLowerBound(vnode, lb, history); InitUpperBound(vnode, ub, history); lower_bound += weight * Discount() * vnode->lower_bound(); upper_bound += weight * Discount() * vnode->upper_bound(); } qnode->step_reward = step_reward; qnode->lower_bound(lower_bound); qnode->upper_bound(upper_bound); }
void DESPOT::Expand(QNode* qnode, ScenarioLowerBound* lb, ScenarioUpperBound* ub, const DSPOMDP* model, RandomStreams& streams, History& history) { VNode* parent = qnode->parent(); streams.position(parent->depth()); map<OBS_TYPE, VNode*>& children = qnode->children(); const vector<State*>& particles = parent->particles(); double step_reward = 0; // Partition particles by observation map<OBS_TYPE, vector<State*> > partitions; OBS_TYPE obs; double reward; for (int i = 0; i < particles.size(); i++) { State* particle = particles[i]; logd << " Original: " << *particle << endl; State* copy = model->Copy(particle); logd << " Before step: " << *copy << endl; bool terminal = model->Step(*copy, streams.Entry(copy->scenario_id), qnode->edge(), reward, obs); step_reward += reward * copy->weight; logd << " After step: " << *copy << " " << (reward * copy->weight) << " " << reward << " " << copy->weight << endl; if (!terminal) { partitions[obs].push_back(copy); } else { model->Free(copy); } } step_reward = Discount(parent->depth()) * step_reward - Globals::config.pruning_constant;//pruning_constant is used for regularization double lower_bound = step_reward; double upper_bound = step_reward; // Create new belief nodes for (map<OBS_TYPE, vector<State*> >::iterator it = partitions.begin(); it != partitions.end(); it++) { OBS_TYPE obs = it->first; logd << " Creating node for obs " << obs << endl; VNode* vnode = new VNode(partitions[obs], parent->depth() + 1, qnode, obs); logd << " New node created!" << endl; children[obs] = vnode; history.Add(qnode->edge(), obs); InitBounds(vnode, lb, ub, streams, history); history.RemoveLast(); logd << " New node's bounds: (" << vnode->lower_bound() << ", " << vnode->upper_bound() << ")" << endl; lower_bound += vnode->lower_bound(); upper_bound += vnode->upper_bound(); } qnode->step_reward = step_reward; qnode->lower_bound(lower_bound); qnode->upper_bound(upper_bound); qnode->utility_upper_bound = upper_bound + Globals::config.pruning_constant; qnode->default_value = lower_bound; // for debugging }