Пример #1
0
void DESPOT::InitUpperBound(VNode* vnode, ScenarioUpperBound* upper_bound,
	RandomStreams& streams, History& history) {
	streams.position(vnode->depth());
	double upper = upper_bound->Value(vnode->particles(), streams, history);
	vnode->utility_upper_bound = upper * Discount(vnode->depth());
	upper = upper * Discount(vnode->depth()) - Globals::config.pruning_constant;
	vnode->upper_bound(upper);
}
Пример #2
0
bool IPPCEvaluator::ExecuteAction(int action, double& reward, OBS_TYPE& obs) {
	double start_t = get_time_second();

	client_->sendMessage(
		client_->createActionMes(pomdpx_->GetActionName(),
			pomdpx_->GetEnumedAction(action)));

	if (step_ == Globals::config.sim_len - 1) {
		return true;
	}

	string turnMes = client_->recvMessage();

	//get step reward from turn message: added by wkg
	reward = client_->getStepReward(turnMes);
	reward_ = reward;
	total_discounted_reward_ += Discount(step_) * reward;
	total_undiscounted_reward_ += reward;

	map<string, string> observs = client_->processTurnMes(turnMes);
	obs = pomdpx_->GetPOMDPXObservation(observs);

	double end_t = get_time_second();

	if (!Globals::config.silence && out_) {
		*out_ << "Time for executing action " << (end_t - start_t) << endl;
	}

	return false;
}
Пример #3
0
void DESPOT::InitLowerBound(VNode* vnode, ScenarioLowerBound* lower_bound,
	RandomStreams& streams, History& history) {
	streams.position(vnode->depth());
	ValuedAction move = lower_bound->Value(vnode->particles(), streams, history);
	move.value *= Discount(vnode->depth());
	vnode->default_move(move);
	vnode->lower_bound(move.value);
}
Пример #4
0
ValuedAction DESPOT::Evaluate(VNode* root, vector<State*>& particles,
	RandomStreams& streams, POMCPPrior* prior, const DSPOMDP* model) {
	double value = 0;

	for (int i = 0; i < particles.size(); i++) {
		particles[i]->scenario_id = i;
	}

	for (int i = 0; i < particles.size(); i++) {
		State* particle = particles[i];
		VNode* cur = root;
		State* copy = model->Copy(particle);
		double discount = 1.0;
		double val = 0;
		int steps = 0;

		while (!streams.Exhausted()) {
			int action =
				(cur != NULL) ?
					OptimalAction(cur).action : prior->GetAction(*copy);

			assert(action != -1);

			double reward;
			OBS_TYPE obs;
			bool terminal = model->Step(*copy, streams.Entry(copy->scenario_id),
				action, reward, obs);

			val += discount * reward;
			discount *= Discount();

			if (!terminal) {
				prior->Add(action, obs);
				streams.Advance();
				steps++;

				if (cur != NULL && !cur->IsLeaf()) {
					QNode* qnode = cur->Child(action);
					map<OBS_TYPE, VNode*>& vnodes = qnode->children();
					cur = vnodes.find(obs) != vnodes.end() ? vnodes[obs] : NULL;
				}
			} else {
				break;
			}
		}

		for (int i = 0; i < steps; i++) {
			streams.Back();
			prior->PopLast();
		}

		model->Free(copy);

		value += val;
	}

	return ValuedAction(OptimalAction(root).action, value / particles.size());
}
Пример #5
0
void
CSlmBuilder::Build()
{
    CountNr();
    AppendTails();
    Cut();
    Discount();
    CalcBOW();
}
Пример #6
0
void AEMS::Update(QNode* qnode) {
	double lower = qnode->step_reward;
	double upper = qnode->step_reward;

	map<OBS_TYPE, VNode*>& children = qnode->children();
	for (map<OBS_TYPE, VNode*>::iterator it = children.begin();
			it != children.end(); it++) {
		VNode* vnode = it->second;

		lower += Discount() * vnode->likelihood * vnode->lower_bound();
		upper += Discount() * vnode->likelihood * vnode->upper_bound();
	}

	if (lower > qnode->lower_bound())
		qnode->lower_bound(lower);
	if (upper < qnode->upper_bound())
		qnode->upper_bound(upper);
}
Пример #7
0
bool POMDPEvaluator::ExecuteAction(int action, double& reward, OBS_TYPE& obs) {
	double random_num = random_.NextDouble();
	bool terminal = model_->Step(*state_, random_num, action, reward, obs);

	reward_ = reward;
	total_discounted_reward_ += Discount(step_) * reward;
	total_undiscounted_reward_ += reward;

	return terminal;
}
Пример #8
0
void AEMS::Expand(QNode* qnode, BeliefLowerBound* lb, BeliefUpperBound* ub,
	const BeliefMDP* model, History& history) {
	VNode* parent = qnode->parent();
	int action = qnode->edge();
	map<OBS_TYPE, VNode*>& children = qnode->children();

	const Belief* belief = parent->belief();
	// cout << *belief << endl;

	double step_reward = model->StepReward(belief, qnode->edge());

	map<OBS_TYPE, double> obss;
	model->Observe(belief, qnode->edge(), obss);

	double lower_bound = step_reward;
	double upper_bound = step_reward;

	// Create new belief nodes
	for (map<OBS_TYPE, double>::iterator it = obss.begin(); it != obss.end(); it++) {
		OBS_TYPE obs = it->first;
		double weight = it->second;
		logd << "[AEMS::Expand] Creating node for obs " << obs
			<< " with weight " << weight << endl;
		VNode* vnode = new VNode(model->Tau(belief, action, obs),
			parent->depth() + 1, qnode, obs);
		vnode->likelihood = weight;
		logd << " New node created!" << endl;
		children[obs] = vnode;

		InitLowerBound(vnode, lb, history);
		InitUpperBound(vnode, ub, history);

		lower_bound += weight * Discount() * vnode->lower_bound();
		upper_bound += weight * Discount() * vnode->upper_bound();
	}

	qnode->step_reward = step_reward;
	qnode->lower_bound(lower_bound);
	qnode->upper_bound(upper_bound);
}
Пример #9
0
void AEMS::FindMaxApproxErrorLeaf(VNode* vnode, double likelihood,
	double& bestAE, VNode*& bestNode) {
	if (vnode->IsLeaf()) {
		double curAE = likelihood * vnode->likelihood * Discount(vnode->depth())
			* (vnode->upper_bound() - vnode->lower_bound());
		if (curAE > bestAE) {
			bestAE = curAE;
			bestNode = vnode;
		}
	} else {
		for (int a = 0; a < vnode->children().size(); a++) {
			FindMaxApproxErrorLeaf(vnode->Child(a), likelihood, bestAE,
				bestNode);
		}
	}
}
Пример #10
0
void LookaheadUpperBound::Init(const RandomStreams& streams) {
	int num_states = indexer_.NumStates();
	int length = streams.Length();
	int num_particles = streams.NumStreams();

	SetSize(bounds_, num_particles, length + 1, num_states);

	clock_t start = clock();
	for (int p = 0; p < num_particles; p++) {
		if (p % 10 == 0)
			cerr << p << " scenarios done! ["
				<< (double(clock() - start) / CLOCKS_PER_SEC) << "s]" << endl;
		for (int t = length; t >= 0; t--) {
			if (t == length) { // base case
				for (int s = 0; s < num_states; s++) {
					bounds_[p][t][s] = particle_upper_bound_->Value(*indexer_.GetState(s));
				}
			} else { // lookahead
				for (int s = 0; s < num_states; s++) {
					double best = Globals::NEG_INFTY;

					for (int a = 0; a < model_->NumActions(); a++) {
						double reward = 0;
						State* copy = model_->Copy(indexer_.GetState(s));
						bool terminal = model_->Step(*copy, streams.Entry(p, t),
							a, reward);
						model_->Free(copy);
						reward += (!terminal) * Discount()
							* bounds_[p][t + 1][indexer_.GetIndex(copy)];

						if (reward > best)
							best = reward;
					}

					bounds_[p][t][s] = best;
				}
			}
		}
	}
}
Пример #11
0
void DESPOT::Expand(QNode* qnode, ScenarioLowerBound* lb,
	ScenarioUpperBound* ub, const DSPOMDP* model,
	RandomStreams& streams,
	History& history) {
	VNode* parent = qnode->parent();
	streams.position(parent->depth());
	map<OBS_TYPE, VNode*>& children = qnode->children();

	const vector<State*>& particles = parent->particles();

	double step_reward = 0;

	// Partition particles by observation
	map<OBS_TYPE, vector<State*> > partitions;
	OBS_TYPE obs;
	double reward;
	for (int i = 0; i < particles.size(); i++) {
		State* particle = particles[i];
		logd << " Original: " << *particle << endl;

		State* copy = model->Copy(particle);

		logd << " Before step: " << *copy << endl;

		bool terminal = model->Step(*copy, streams.Entry(copy->scenario_id),
			qnode->edge(), reward, obs);

		step_reward += reward * copy->weight;

		logd << " After step: " << *copy << " " << (reward * copy->weight)
			<< " " << reward << " " << copy->weight << endl;

		if (!terminal) {
			partitions[obs].push_back(copy);
		} else {
			model->Free(copy);
		}
	}
	step_reward = Discount(parent->depth()) * step_reward
		- Globals::config.pruning_constant;//pruning_constant is used for regularization

	double lower_bound = step_reward;
	double upper_bound = step_reward;

	// Create new belief nodes
	for (map<OBS_TYPE, vector<State*> >::iterator it = partitions.begin();
		it != partitions.end(); it++) {
		OBS_TYPE obs = it->first;
		logd << " Creating node for obs " << obs << endl;
		VNode* vnode = new VNode(partitions[obs], parent->depth() + 1,
			qnode, obs);
		logd << " New node created!" << endl;
		children[obs] = vnode;

		history.Add(qnode->edge(), obs);
		InitBounds(vnode, lb, ub, streams, history);
		history.RemoveLast();
		logd << " New node's bounds: (" << vnode->lower_bound() << ", "
			<< vnode->upper_bound() << ")" << endl;

		lower_bound += vnode->lower_bound();
		upper_bound += vnode->upper_bound();
	}

	qnode->step_reward = step_reward;
	qnode->lower_bound(lower_bound);
	qnode->upper_bound(upper_bound);
	qnode->utility_upper_bound = upper_bound + Globals::config.pruning_constant;

	qnode->default_value = lower_bound; // for debugging
}
Пример #12
0
ValuedAction TrivialBeliefLowerBound::Value(const Belief* belief) const {
	ValuedAction va = model_->GetMinRewardAction();
	va.value *= 1.0 / (1 - Discount());
	return va;
}
Пример #13
0
ValuedAction TrivialParticleLowerBound::Value(
	const vector<State*>& particles) const {
	ValuedAction va = model_->GetMinRewardAction();
	va.value *= State::Weight(particles) / (1 - Discount());
	return va;
}
Пример #14
0
double TrivialParticleUpperBound::Value(const vector<State*>& particles,
	RandomStreams& streams, History& history) const {
	return State::Weight(particles) * model_->GetMaxReward() / (1 - Discount());
}
Пример #15
0
double TrivialParticleUpperBound::Value(const State& state) const {
	return model_->GetMaxReward() / (1 - Discount());
}
Пример #16
0
double TrivialBeliefUpperBound::Value(const Belief* belief) const {
	return model_->GetMaxReward() / (1 - Discount());
}