Ejemplo n.º 1
0
/// Refines this model based on a recently performed action and change in beliefs
void TransitionModel::trainIncremental(const GVec& beliefs, const GVec& actions, const GVec& nextBeliefs)
{
	// Buffer the pattern
	GVec& destIn = trainInput.row(trainPos);
	GVec& destOut = trainOutput.row(trainPos);
	trainPos++;
	trainSize = std::max(trainSize, trainPos);
	if(trainPos >= trainInput.rows())
		trainPos = 0;
	if(beliefs.size() + actions.size() != destIn.size() || beliefs.size() != destOut.size())
		throw Ex("size mismatch");
	destIn.put(0, beliefs);
	destIn.put(beliefs.size(), actions);
	for(size_t i = 0; i < destOut.size(); i++)
		destOut[i] = 0.5 * (nextBeliefs[i] - beliefs[i]);
/*
destIn.print();
std::cout << "->";
destOut.print();
std::cout << "\n";
std::cout << to_str(0.5 * cos(destIn[2])) << ", " << to_str(0.5 * sin(destIn[2])) << "\n";
*/
	// Refine the model
	size_t iters = std::min(trainIters, 1000 * trainSize);
	for(size_t i = 0; i < iters; i++)
		doSomeTraining();
}
Ejemplo n.º 2
0
/// Predict the belief vector that will result if the specified action is performed
void TransitionModel::anticipateNextBeliefs(const GVec& beliefs, const GVec& actions, GVec& anticipatedBeliefs)
{
	if(tutor)
		tutor->transition(beliefs, actions, anticipatedBeliefs);
	else
	{
		GAssert(beliefs.size() + actions.size() == model.layer(0).inputs());
		buf.resize(beliefs.size() + actions.size());
		buf.put(0, beliefs);
		buf.put(beliefs.size(), actions);
		model.forwardProp(buf);
		anticipatedBeliefs.copy(beliefs);
		anticipatedBeliefs.addScaled(2.0, model.outputLayer().activation());
		anticipatedBeliefs.clip(-1.0, 1.0);
	}
}
Ejemplo n.º 3
0
/// Encodes observations to predict beliefs
void ObservationModel::observationsToBeliefs(const GVec& observations, GVec& beliefs)
{
	beliefs.resize(encoder.outputLayer().outputs());
	if(tutor)
		tutor->observations_to_state(observations, beliefs);
	else
	{
		beliefs.put(0, observations, 0, beliefs.size());
		encoder.forwardProp(observations);
		beliefs.copy(encoder.outputLayer().activation());
	}
}
Ejemplo n.º 4
0
/// Finds the best plan and copies its first step
void PlanningSystem::chooseNextActions(const GVec& beliefs, GVec& actions)
{
	if(tutor)
		tutor->choose_actions(beliefs, actions);
	else
	{
		// Find the best plan (according to the contentment model) and ask the mentor to evaluate it
		size_t planBestIndex = 0;
		double bestContentment = -1e300;
		for(size_t i = 0; i < plans.size(); i++)
		{
			double d = evaluatePlan(beliefs, *plans[i]);
			if(d > bestContentment)
			{
				bestContentment = d;
				planBestIndex = i;
			}
		}
		//std::cout << "Best contentment: " << to_str(bestContentment) << "\n";
		GMatrix& bestPlan = *plans[planBestIndex];
		askMentorToEvaluatePlan(beliefs, bestPlan);

		// Pick a random plan from the population and ask the mentor to evaluate it (for contrast)
		size_t planBindex = rand.next(plans.size() - 1);
		if(planBindex >= planBestIndex)
			planBindex++;
		askMentorToEvaluatePlan(beliefs, *plans[planBindex]);

		// Make a random one-step plan, and ask the mentor to evaluate it (for contrast)
		GVec& action = randomPlan[0];
		action.fillUniform(rand);
		askMentorToEvaluatePlan(beliefs, randomPlan);

		// Copy the first action vector of the best plan for our chosen action
		GVec* bestActions = &bestPlan[0];
		if(burnIn > 0 || rand.uniform() < explorationRate)
			bestActions = &randomPlan[0];
		if(burnIn > 0)
			burnIn--;
		GAssert(bestActions->size() == actionDims);
		actions.copy(*bestActions);
	}
}