Пример #1
0
VNode* DESPOT::ConstructTree(vector<State*>& particles, RandomStreams& streams,
	ScenarioLowerBound* lower_bound, ScenarioUpperBound* upper_bound,
	const DSPOMDP* model, History& history, double timeout,
	SearchStatistics* statistics) {
	if (statistics != NULL) {
		statistics->num_particles_before_search = model->NumActiveParticles();
	}

	for (int i = 0; i < particles.size(); i++) {
		particles[i]->scenario_id = i;
	}

	VNode* root = new VNode(particles);

	logd
		<< "[DESPOT::ConstructTree] START - Initializing lower and upper bounds at the root node.";
	InitBounds(root, lower_bound, upper_bound, streams, history);
	logd
		<< "[DESPOT::ConstructTree] END - Initializing lower and upper bounds at the root node.";

	if (statistics != NULL) {
		statistics->initial_lb = root->lower_bound();
		statistics->initial_ub = root->upper_bound();
	}

	double used_time = 0;
	int num_trials = 0;
	do {
		double start = clock();
		VNode* cur = Trial(root, streams, lower_bound, upper_bound, model, history, statistics);
		used_time += double(clock() - start) / CLOCKS_PER_SEC;

		start = clock();
		Backup(cur);
		if (statistics != NULL) {
			statistics->time_backup += double(clock() - start) / CLOCKS_PER_SEC;
		}
		used_time += double(clock() - start) / CLOCKS_PER_SEC;

		num_trials++;
	} while (used_time * (num_trials + 1.0) / num_trials < timeout
		&& (root->upper_bound() - root->lower_bound()) > 1e-6);

	if (statistics != NULL) {
		statistics->num_particles_after_search = model->NumActiveParticles();
		statistics->num_policy_nodes = root->PolicyTreeSize();
		statistics->num_tree_nodes = root->Size();
		statistics->final_lb = root->lower_bound();
		statistics->final_ub = root->upper_bound();
		statistics->time_search = used_time;
		statistics->num_trials = num_trials;
	}

	return root;
}
Пример #2
0
void DESPOT::Compare() {
	vector<State*> particles = belief_->Sample(Globals::config.num_scenarios);
	SearchStatistics statistics;

	RandomStreams streams = RandomStreams(Globals::config.num_scenarios,
		Globals::config.search_depth);

	VNode* root = ConstructTree(particles, streams, lower_bound_, upper_bound_,
		model_, history_, Globals::config.time_per_move, &statistics);

	CheckDESPOT(root, root->lower_bound());
	CheckDESPOTSTAR(root, root->lower_bound());
	delete root;
}
Пример #3
0
void DESPOT::Update(QNode* qnode) {
	double lower = qnode->step_reward;
	double upper = qnode->step_reward;
	double utility_upper = qnode->step_reward
		+ Globals::config.pruning_constant;

	map<OBS_TYPE, VNode*>& children = qnode->children();
	for (map<OBS_TYPE, VNode*>::iterator it = children.begin();
		it != children.end(); it++) {
		VNode* vnode = it->second;

		lower += vnode->lower_bound();
		upper += vnode->upper_bound();
		utility_upper += vnode->utility_upper_bound;
	}

	if (lower > qnode->lower_bound()) {
		qnode->lower_bound(lower);
	}
	if (upper < qnode->upper_bound()) {
		qnode->upper_bound(upper);
	}
	if (utility_upper < qnode->utility_upper_bound) {
		qnode->utility_upper_bound = utility_upper;
	}
}
Пример #4
0
double DESPOT::CheckDESPOTSTAR(const VNode* vnode, double regularized_value) {
	cout
		<< "--------------------------------------------------------------------------------"
		<< endl;

	const vector<State*>& particles = vnode->particles();
	vector<State*> copy;
	for (int i = 0; i < particles.size(); i++) {
		copy.push_back(model_->Copy(particles[i]));
	}
	VNode* root = new VNode(copy);

	RandomStreams streams = RandomStreams(Globals::config.num_scenarios,
		Globals::config.search_depth);
	InitBounds(root, lower_bound_, upper_bound_, streams, history_);

	double used_time = 0;
	int num_trials = 0;
	do {
		double start = clock();
		VNode* cur = Trial(root, streams, lower_bound_, upper_bound_, model_, history_);
		num_trials++;
		used_time += double(clock() - start) / CLOCKS_PER_SEC;

		start = clock();
		Backup(cur);
		used_time += double(clock() - start) / CLOCKS_PER_SEC;
	} while (root->lower_bound() < regularized_value);

	cout << "DESPOT: # trials = " << num_trials << "; target = "
		<< regularized_value << ", current = " << root->lower_bound()
		<< ", l = " << root->lower_bound() << ", u = " << root->upper_bound()
		<< "; time = " << used_time << endl;
	cout
		<< "--------------------------------------------------------------------------------"
		<< endl;

	root->Free(*model_);
	delete root;

	return used_time;
}
Пример #5
0
void DESPOT::ExploitBlockers(VNode* vnode) {
	if (Globals::config.pruning_constant <= 0) {
		return;
	}

	VNode* cur = vnode;
	while (cur != NULL) {
		VNode* blocker = FindBlocker(cur);

		if (blocker != NULL) {
			if (cur->parent() == NULL || blocker == cur) {
				double value = cur->default_move().value;
				cur->lower_bound(value);
				cur->upper_bound(value);
				cur->utility_upper_bound = value;
			} else {
				const map<OBS_TYPE, VNode*>& siblings =
					cur->parent()->children();
				for (map<OBS_TYPE, VNode*>::const_iterator it = siblings.begin();
					it != siblings.end(); it++) {
					VNode* node = it->second;
					double value = node->default_move().value;
					node->lower_bound(value);
					node->upper_bound(value);
					node->utility_upper_bound = value;
				}
			}

			Backup(cur);

			if (cur->parent() == NULL) {
				cur = NULL;
			} else {
				cur = cur->parent()->parent();
			}
		} else {
			break;
		}
	}
}
Пример #6
0
void AEMS::Update(QNode* qnode) {
	double lower = qnode->step_reward;
	double upper = qnode->step_reward;

	map<OBS_TYPE, VNode*>& children = qnode->children();
	for (map<OBS_TYPE, VNode*>::iterator it = children.begin();
			it != children.end(); it++) {
		VNode* vnode = it->second;

		lower += Discount() * vnode->likelihood * vnode->lower_bound();
		upper += Discount() * vnode->likelihood * vnode->upper_bound();
	}

	if (lower > qnode->lower_bound())
		qnode->lower_bound(lower);
	if (upper < qnode->upper_bound())
		qnode->upper_bound(upper);
}
Пример #7
0
void AEMS::Expand(QNode* qnode, BeliefLowerBound* lb, BeliefUpperBound* ub,
	const BeliefMDP* model, History& history) {
	VNode* parent = qnode->parent();
	int action = qnode->edge();
	map<OBS_TYPE, VNode*>& children = qnode->children();

	const Belief* belief = parent->belief();
	// cout << *belief << endl;

	double step_reward = model->StepReward(belief, qnode->edge());

	map<OBS_TYPE, double> obss;
	model->Observe(belief, qnode->edge(), obss);

	double lower_bound = step_reward;
	double upper_bound = step_reward;

	// Create new belief nodes
	for (map<OBS_TYPE, double>::iterator it = obss.begin(); it != obss.end(); it++) {
		OBS_TYPE obs = it->first;
		double weight = it->second;
		logd << "[AEMS::Expand] Creating node for obs " << obs
			<< " with weight " << weight << endl;
		VNode* vnode = new VNode(model->Tau(belief, action, obs),
			parent->depth() + 1, qnode, obs);
		vnode->likelihood = weight;
		logd << " New node created!" << endl;
		children[obs] = vnode;

		InitLowerBound(vnode, lb, history);
		InitUpperBound(vnode, ub, history);

		lower_bound += weight * Discount() * vnode->lower_bound();
		upper_bound += weight * Discount() * vnode->upper_bound();
	}

	qnode->step_reward = step_reward;
	qnode->lower_bound(lower_bound);
	qnode->upper_bound(upper_bound);
}
Пример #8
0
void DESPOT::Expand(QNode* qnode, ScenarioLowerBound* lb,
	ScenarioUpperBound* ub, const DSPOMDP* model,
	RandomStreams& streams,
	History& history) {
	VNode* parent = qnode->parent();
	streams.position(parent->depth());
	map<OBS_TYPE, VNode*>& children = qnode->children();

	const vector<State*>& particles = parent->particles();

	double step_reward = 0;

	// Partition particles by observation
	map<OBS_TYPE, vector<State*> > partitions;
	OBS_TYPE obs;
	double reward;
	for (int i = 0; i < particles.size(); i++) {
		State* particle = particles[i];
		logd << " Original: " << *particle << endl;

		State* copy = model->Copy(particle);

		logd << " Before step: " << *copy << endl;

		bool terminal = model->Step(*copy, streams.Entry(copy->scenario_id),
			qnode->edge(), reward, obs);

		step_reward += reward * copy->weight;

		logd << " After step: " << *copy << " " << (reward * copy->weight)
			<< " " << reward << " " << copy->weight << endl;

		if (!terminal) {
			partitions[obs].push_back(copy);
		} else {
			model->Free(copy);
		}
	}
	step_reward = Discount(parent->depth()) * step_reward
		- Globals::config.pruning_constant;//pruning_constant is used for regularization

	double lower_bound = step_reward;
	double upper_bound = step_reward;

	// Create new belief nodes
	for (map<OBS_TYPE, vector<State*> >::iterator it = partitions.begin();
		it != partitions.end(); it++) {
		OBS_TYPE obs = it->first;
		logd << " Creating node for obs " << obs << endl;
		VNode* vnode = new VNode(partitions[obs], parent->depth() + 1,
			qnode, obs);
		logd << " New node created!" << endl;
		children[obs] = vnode;

		history.Add(qnode->edge(), obs);
		InitBounds(vnode, lb, ub, streams, history);
		history.RemoveLast();
		logd << " New node's bounds: (" << vnode->lower_bound() << ", "
			<< vnode->upper_bound() << ")" << endl;

		lower_bound += vnode->lower_bound();
		upper_bound += vnode->upper_bound();
	}

	qnode->step_reward = step_reward;
	qnode->lower_bound(lower_bound);
	qnode->upper_bound(upper_bound);
	qnode->utility_upper_bound = upper_bound + Globals::config.pruning_constant;

	qnode->default_value = lower_bound; // for debugging
}
Пример #9
0
double DESPOT::CheckDESPOT(const VNode* vnode, double regularized_value) {
	cout
		<< "--------------------------------------------------------------------------------"
		<< endl;

	const vector<State*>& particles = vnode->particles();
	vector<State*> copy;
	for (int i = 0; i < particles.size(); i ++) {
		copy.push_back(model_->Copy(particles[i]));
	}
	VNode* root = new VNode(copy);

	double pruning_constant = Globals::config.pruning_constant;
	Globals::config.pruning_constant = 0;

	RandomStreams streams = RandomStreams(Globals::config.num_scenarios,
		Globals::config.search_depth);

	streams.position(0);
	InitBounds(root, lower_bound_, upper_bound_, streams, history_);

	double used_time = 0;
	int num_trials = 0, prev_num = 0;
	double pruned_value;
	do {
		double start = clock();
		VNode* cur = Trial(root, streams, lower_bound_, upper_bound_, model_, history_);
		num_trials++;
		used_time += double(clock() - start) / CLOCKS_PER_SEC;

		start = clock();
		Backup(cur);
		used_time += double(clock() - start) / CLOCKS_PER_SEC;

		if (double(num_trials - prev_num) > 0.05 * prev_num) {
			int pruned_action;
			Globals::config.pruning_constant = pruning_constant;
			VNode* pruned = Prune(root, pruned_action, pruned_value);
			Globals::config.pruning_constant = 0;
			prev_num = num_trials;

			pruned->Free(*model_);
			delete pruned;

			cout << "# trials = " << num_trials << "; target = "
				<< regularized_value << ", current = " << pruned_value
				<< ", l = " << root->lower_bound() << ", u = "
				<< root->upper_bound() << "; time = " << used_time << endl;

			if (pruned_value >= regularized_value) {
				break;
			}
		}
	} while (true);

	cout << "DESPOT: # trials = " << num_trials << "; target = "
		<< regularized_value << ", current = " << pruned_value << ", l = "
		<< root->lower_bound() << ", u = " << root->upper_bound() << "; time = "
		<< used_time << endl;
	Globals::config.pruning_constant = pruning_constant;
	cout
		<< "--------------------------------------------------------------------------------"
		<< endl;

	root->Free(*model_);
	delete root;

	return used_time;
}