void VNode::PrintPolicyTree(int depth, ostream& os) { if (depth != -1 && this->depth() > depth) return; vector<QNode*>& qnodes = children(); if (qnodes.size() == 0) { int astar = this->default_move().action; os << this << "-a=" << astar << endl; } else { QNode* qstar = NULL; for (int a = 0; a < qnodes.size(); a++) { QNode* qnode = qnodes[a]; if (qstar == NULL || qnode->lower_bound() > qstar->lower_bound()) { qstar = qnode; } } os << this << "-a=" << qstar->edge() << endl; vector<OBS_TYPE> labels; map<OBS_TYPE, VNode*>& vnodes = qstar->children(); for (map<OBS_TYPE, VNode*>::iterator it = vnodes.begin(); it != vnodes.end(); it++) { labels.push_back(it->first); } for (int i = 0; i < labels.size(); i++) { if (depth == -1 || this->depth() + 1 <= depth) { os << repeat("| ", this->depth()) << "| o=" << labels[i] << ": "; qstar->Child(labels[i])->PrintPolicyTree(depth, os); } } } }
ValuedAction DESPOT::Evaluate(VNode* root, vector<State*>& particles, RandomStreams& streams, POMCPPrior* prior, const DSPOMDP* model) { double value = 0; for (int i = 0; i < particles.size(); i++) { particles[i]->scenario_id = i; } for (int i = 0; i < particles.size(); i++) { State* particle = particles[i]; VNode* cur = root; State* copy = model->Copy(particle); double discount = 1.0; double val = 0; int steps = 0; while (!streams.Exhausted()) { int action = (cur != NULL) ? OptimalAction(cur).action : prior->GetAction(*copy); assert(action != -1); double reward; OBS_TYPE obs; bool terminal = model->Step(*copy, streams.Entry(copy->scenario_id), action, reward, obs); val += discount * reward; discount *= Discount(); if (!terminal) { prior->Add(action, obs); streams.Advance(); steps++; if (cur != NULL && !cur->IsLeaf()) { QNode* qnode = cur->Child(action); map<OBS_TYPE, VNode*>& vnodes = qnode->children(); cur = vnodes.find(obs) != vnodes.end() ? vnodes[obs] : NULL; } } else { break; } } for (int i = 0; i < steps; i++) { streams.Back(); prior->PopLast(); } model->Free(copy); value += val; } return ValuedAction(OptimalAction(root).action, value / particles.size()); }
void VNode::Free(const DSPOMDP& model) { for (int i = 0; i < particles_.size(); i++) { model.Free(particles_[i]); } for (int a = 0; a < children().size(); a++) { QNode* qnode = Child(a); map<OBS_TYPE, VNode*>& children = qnode->children(); for (map<OBS_TYPE, VNode*>::iterator it = children.begin(); it != children.end(); it++) { it->second->Free(model); } } }
void VNode::PrintTree(int depth, ostream& os) { if (depth != -1 && this->depth() > depth) return; if (this->depth() == 0) { os << "d - default value" << endl << "l - lower bound" << endl << "u - upper bound" << endl << "r - totol weighted one step reward" << endl << "w - total particle weight" << endl; } os << "(" << "d:" << this->default_move().value << " l:" << this->lower_bound() << ", u:" << this->upper_bound() << ", w:" << this->Weight() << ", weu:" << DESPOT::WEU(this) << ")" << endl; vector<QNode*>& qnodes = children(); for (int a = 0; a < qnodes.size(); a++) { QNode* qnode = qnodes[a]; vector<OBS_TYPE> labels; map<OBS_TYPE, VNode*>& vnodes = qnode->children(); for (map<OBS_TYPE, VNode*>::iterator it = vnodes.begin(); it != vnodes.end(); it++) { labels.push_back(it->first); } os << repeat("| ", this->depth()) << "a=" << qnode->edge() << ": " << "(d:" << qnode->default_value << ", l:" << qnode->lower_bound() << ", u:" << qnode->upper_bound() << ", r:" << qnode->step_reward << ")" << endl; for (int i = 0; i < labels.size(); i++) { if (depth == -1 || this->depth() + 1 <= depth) { os << repeat("| ", this->depth()) << "| o=" << labels[i] << ": "; qnode->Child(labels[i])->PrintTree(depth, os); } } } }