void PrioritizedSweeping<M>::stepUpdateQ(size_t s, size_t a) { auto & values = std::get<VALUES>(vfun_); { // Update q[s][a] double newQValue = 0; for ( size_t s1 = 0; s1 < S; ++s1 ) { double probability = model_.getTransitionProbability(s,a,s1); if ( checkDifferentSmall( probability, 0.0 ) ) newQValue += probability * ( model_.getExpectedReward(s,a,s1) + model_.getDiscount() * values[s1] ); } qfun_(s, a) = newQValue; } double p = values[s]; { // Update value and action values[s] = qfun_.row(s).maxCoeff(&std::get<ACTIONS>(vfun_)[s]); } p = std::fabs(values[s] - p); // If it changed enough, we're going to update its parents. if ( p > theta_ ) { auto it = queueHandles_.find(s); if ( it != std::end(queueHandles_) && std::get<PRIORITY>(*(it->second)) < p ) queue_.increase(it->second, std::make_tuple(p, s)); else queueHandles_[s] = queue_.push(std::make_tuple(p, s)); } }
void ValueIterationGeneral<M>::bellmanOperator(const QFunction & q, ValueFunction * v) const { assert(v); auto & values = std::get<VALUES> (*v); auto & actions = std::get<ACTIONS>(*v); for ( size_t s = 0; s < S; ++s ) values(s) = q.row(s).maxCoeff(&actions[s]); }