/// Decodes beliefs to predict observations void ObservationModel::beliefsToObservations(const GVec& beliefs, GVec& observations) { observations.resize(decoder.outputLayer().outputs()); if(tutor) tutor->state_to_observations(beliefs, observations); else { decoder.forwardProp(beliefs); observations.copy(decoder.outputLayer().activation()); } }
/// Encodes observations to predict beliefs void ObservationModel::observationsToBeliefs(const GVec& observations, GVec& beliefs) { beliefs.resize(encoder.outputLayer().outputs()); if(tutor) tutor->observations_to_state(observations, beliefs); else { beliefs.put(0, observations, 0, beliefs.size()); encoder.forwardProp(observations); beliefs.copy(encoder.outputLayer().activation()); } }
/// Predict the belief vector that will result if the specified action is performed void TransitionModel::anticipateNextBeliefs(const GVec& beliefs, const GVec& actions, GVec& anticipatedBeliefs) { if(tutor) tutor->transition(beliefs, actions, anticipatedBeliefs); else { GAssert(beliefs.size() + actions.size() == model.layer(0).inputs()); buf.resize(beliefs.size() + actions.size()); buf.put(0, beliefs); buf.put(beliefs.size(), actions); model.forwardProp(buf); anticipatedBeliefs.copy(beliefs); anticipatedBeliefs.addScaled(2.0, model.outputLayer().activation()); anticipatedBeliefs.clip(-1.0, 1.0); } }
/// Finds the best plan and copies its first step void PlanningSystem::chooseNextActions(const GVec& beliefs, GVec& actions) { if(tutor) tutor->choose_actions(beliefs, actions); else { // Find the best plan (according to the contentment model) and ask the mentor to evaluate it size_t planBestIndex = 0; double bestContentment = -1e300; for(size_t i = 0; i < plans.size(); i++) { double d = evaluatePlan(beliefs, *plans[i]); if(d > bestContentment) { bestContentment = d; planBestIndex = i; } } //std::cout << "Best contentment: " << to_str(bestContentment) << "\n"; GMatrix& bestPlan = *plans[planBestIndex]; askMentorToEvaluatePlan(beliefs, bestPlan); // Pick a random plan from the population and ask the mentor to evaluate it (for contrast) size_t planBindex = rand.next(plans.size() - 1); if(planBindex >= planBestIndex) planBindex++; askMentorToEvaluatePlan(beliefs, *plans[planBindex]); // Make a random one-step plan, and ask the mentor to evaluate it (for contrast) GVec& action = randomPlan[0]; action.fillUniform(rand); askMentorToEvaluatePlan(beliefs, randomPlan); // Copy the first action vector of the best plan for our chosen action GVec* bestActions = &bestPlan[0]; if(burnIn > 0 || rand.uniform() < explorationRate) bestActions = &randomPlan[0]; if(burnIn > 0) burnIn--; GAssert(bestActions->size() == actionDims); actions.copy(*bestActions); } }
/// Refines the encoder and decoder based on the new observation. void ObservationModel::trainIncremental(const GVec& observation) { // Buffer the pattern GVec* dest; if(validationPos < trainPos) { dest = &validation.row(validationPos); if(++validationPos >= validation.rows()) validationPos = 0; validationSize = std::max(validationSize, validationPos); } else { dest = &train.row(trainPos); trainPos++; trainSize = std::max(trainSize, trainPos); if(trainPos >= train.rows()) trainPos = 0; } dest->copy(observation); // Train size_t iters = std::min(trainIters, trainSize); for(size_t i = 0; i < iters; i++) doSomeTraining(); }