/// Refines this model based on a recently performed action and change in beliefs void TransitionModel::trainIncremental(const GVec& beliefs, const GVec& actions, const GVec& nextBeliefs) { // Buffer the pattern GVec& destIn = trainInput.row(trainPos); GVec& destOut = trainOutput.row(trainPos); trainPos++; trainSize = std::max(trainSize, trainPos); if(trainPos >= trainInput.rows()) trainPos = 0; if(beliefs.size() + actions.size() != destIn.size() || beliefs.size() != destOut.size()) throw Ex("size mismatch"); destIn.put(0, beliefs); destIn.put(beliefs.size(), actions); for(size_t i = 0; i < destOut.size(); i++) destOut[i] = 0.5 * (nextBeliefs[i] - beliefs[i]); /* destIn.print(); std::cout << "->"; destOut.print(); std::cout << "\n"; std::cout << to_str(0.5 * cos(destIn[2])) << ", " << to_str(0.5 * sin(destIn[2])) << "\n"; */ // Refine the model size_t iters = std::min(trainIters, 1000 * trainSize); for(size_t i = 0; i < iters; i++) doSomeTraining(); }
/// Predict the belief vector that will result if the specified action is performed void TransitionModel::anticipateNextBeliefs(const GVec& beliefs, const GVec& actions, GVec& anticipatedBeliefs) { if(tutor) tutor->transition(beliefs, actions, anticipatedBeliefs); else { GAssert(beliefs.size() + actions.size() == model.layer(0).inputs()); buf.resize(beliefs.size() + actions.size()); buf.put(0, beliefs); buf.put(beliefs.size(), actions); model.forwardProp(buf); anticipatedBeliefs.copy(beliefs); anticipatedBeliefs.addScaled(2.0, model.outputLayer().activation()); anticipatedBeliefs.clip(-1.0, 1.0); } }
/// Encodes observations to predict beliefs void ObservationModel::observationsToBeliefs(const GVec& observations, GVec& beliefs) { beliefs.resize(encoder.outputLayer().outputs()); if(tutor) tutor->observations_to_state(observations, beliefs); else { beliefs.put(0, observations, 0, beliefs.size()); encoder.forwardProp(observations); beliefs.copy(encoder.outputLayer().activation()); } }
/// Finds the best plan and copies its first step void PlanningSystem::chooseNextActions(const GVec& beliefs, GVec& actions) { if(tutor) tutor->choose_actions(beliefs, actions); else { // Find the best plan (according to the contentment model) and ask the mentor to evaluate it size_t planBestIndex = 0; double bestContentment = -1e300; for(size_t i = 0; i < plans.size(); i++) { double d = evaluatePlan(beliefs, *plans[i]); if(d > bestContentment) { bestContentment = d; planBestIndex = i; } } //std::cout << "Best contentment: " << to_str(bestContentment) << "\n"; GMatrix& bestPlan = *plans[planBestIndex]; askMentorToEvaluatePlan(beliefs, bestPlan); // Pick a random plan from the population and ask the mentor to evaluate it (for contrast) size_t planBindex = rand.next(plans.size() - 1); if(planBindex >= planBestIndex) planBindex++; askMentorToEvaluatePlan(beliefs, *plans[planBindex]); // Make a random one-step plan, and ask the mentor to evaluate it (for contrast) GVec& action = randomPlan[0]; action.fillUniform(rand); askMentorToEvaluatePlan(beliefs, randomPlan); // Copy the first action vector of the best plan for our chosen action GVec* bestActions = &bestPlan[0]; if(burnIn > 0 || rand.uniform() < explorationRate) bestActions = &randomPlan[0]; if(burnIn > 0) burnIn--; GAssert(bestActions->size() == actionDims); actions.copy(*bestActions); } }