ValuedAction DESPOT::Evaluate(VNode* root, vector<State*>& particles, RandomStreams& streams, POMCPPrior* prior, const DSPOMDP* model) { double value = 0; for (int i = 0; i < particles.size(); i++) { particles[i]->scenario_id = i; } for (int i = 0; i < particles.size(); i++) { State* particle = particles[i]; VNode* cur = root; State* copy = model->Copy(particle); double discount = 1.0; double val = 0; int steps = 0; while (!streams.Exhausted()) { int action = (cur != NULL) ? OptimalAction(cur).action : prior->GetAction(*copy); assert(action != -1); double reward; OBS_TYPE obs; bool terminal = model->Step(*copy, streams.Entry(copy->scenario_id), action, reward, obs); val += discount * reward; discount *= Discount(); if (!terminal) { prior->Add(action, obs); streams.Advance(); steps++; if (cur != NULL && !cur->IsLeaf()) { QNode* qnode = cur->Child(action); map<OBS_TYPE, VNode*>& vnodes = qnode->children(); cur = vnodes.find(obs) != vnodes.end() ? vnodes[obs] : NULL; } } else { break; } } for (int i = 0; i < steps; i++) { streams.Back(); prior->PopLast(); } model->Free(copy); value += val; } return ValuedAction(OptimalAction(root).action, value / particles.size()); }
double AEMS::AEMS2Likelihood(QNode* qnode) { VNode* vnode = qnode->parent(); QNode* qstar = NULL; for (int action = 0; action < vnode->children().size(); action++) { QNode* child = vnode->Child(action); if (qstar == NULL || child->upper_bound() > qstar->upper_bound()) qstar = child; } return qstar == qnode; }