예제 #1
0
/// Decodes beliefs to predict observations
void ObservationModel::beliefsToObservations(const GVec& beliefs, GVec& observations)
{
	observations.resize(decoder.outputLayer().outputs());
	if(tutor)
		tutor->state_to_observations(beliefs, observations);
	else
	{
		decoder.forwardProp(beliefs);
		observations.copy(decoder.outputLayer().activation());
	}
}
예제 #2
0
/// Encodes observations to predict beliefs
void ObservationModel::observationsToBeliefs(const GVec& observations, GVec& beliefs)
{
	beliefs.resize(encoder.outputLayer().outputs());
	if(tutor)
		tutor->observations_to_state(observations, beliefs);
	else
	{
		beliefs.put(0, observations, 0, beliefs.size());
		encoder.forwardProp(observations);
		beliefs.copy(encoder.outputLayer().activation());
	}
}
예제 #3
0
/// Predict the belief vector that will result if the specified action is performed
void TransitionModel::anticipateNextBeliefs(const GVec& beliefs, const GVec& actions, GVec& anticipatedBeliefs)
{
	if(tutor)
		tutor->transition(beliefs, actions, anticipatedBeliefs);
	else
	{
		GAssert(beliefs.size() + actions.size() == model.layer(0).inputs());
		buf.resize(beliefs.size() + actions.size());
		buf.put(0, beliefs);
		buf.put(beliefs.size(), actions);
		model.forwardProp(buf);
		anticipatedBeliefs.copy(beliefs);
		anticipatedBeliefs.addScaled(2.0, model.outputLayer().activation());
		anticipatedBeliefs.clip(-1.0, 1.0);
	}
}
예제 #4
0
/// Finds the best plan and copies its first step
void PlanningSystem::chooseNextActions(const GVec& beliefs, GVec& actions)
{
	if(tutor)
		tutor->choose_actions(beliefs, actions);
	else
	{
		// Find the best plan (according to the contentment model) and ask the mentor to evaluate it
		size_t planBestIndex = 0;
		double bestContentment = -1e300;
		for(size_t i = 0; i < plans.size(); i++)
		{
			double d = evaluatePlan(beliefs, *plans[i]);
			if(d > bestContentment)
			{
				bestContentment = d;
				planBestIndex = i;
			}
		}
		//std::cout << "Best contentment: " << to_str(bestContentment) << "\n";
		GMatrix& bestPlan = *plans[planBestIndex];
		askMentorToEvaluatePlan(beliefs, bestPlan);

		// Pick a random plan from the population and ask the mentor to evaluate it (for contrast)
		size_t planBindex = rand.next(plans.size() - 1);
		if(planBindex >= planBestIndex)
			planBindex++;
		askMentorToEvaluatePlan(beliefs, *plans[planBindex]);

		// Make a random one-step plan, and ask the mentor to evaluate it (for contrast)
		GVec& action = randomPlan[0];
		action.fillUniform(rand);
		askMentorToEvaluatePlan(beliefs, randomPlan);

		// Copy the first action vector of the best plan for our chosen action
		GVec* bestActions = &bestPlan[0];
		if(burnIn > 0 || rand.uniform() < explorationRate)
			bestActions = &randomPlan[0];
		if(burnIn > 0)
			burnIn--;
		GAssert(bestActions->size() == actionDims);
		actions.copy(*bestActions);
	}
}
예제 #5
0
/// Refines the encoder and decoder based on the new observation.
void ObservationModel::trainIncremental(const GVec& observation)
{
	// Buffer the pattern
	GVec* dest;
	if(validationPos < trainPos) {
		dest = &validation.row(validationPos);
		if(++validationPos >= validation.rows())
			validationPos = 0;
		validationSize = std::max(validationSize, validationPos);
	} else {
		dest = &train.row(trainPos);
		trainPos++;
		trainSize = std::max(trainSize, trainPos);
		if(trainPos >= train.rows())
			trainPos = 0;
	}
	dest->copy(observation);

	// Train
	size_t iters = std::min(trainIters, trainSize);
	for(size_t i = 0; i < iters; i++)
		doSomeTraining();
}