void PrioritizedSweeping<M>::stepUpdateQ(size_t s, size_t a) {
            auto & values = std::get<VALUES>(vfun_);
            { // Update q[s][a]
                double newQValue = 0;
                for ( size_t s1 = 0; s1 < S; ++s1 ) {
                    double probability = model_.getTransitionProbability(s,a,s1);
                    if ( checkDifferentSmall( probability, 0.0 ) )
                        newQValue += probability * ( model_.getExpectedReward(s,a,s1) + model_.getDiscount() * values[s1] );
                }
                qfun_(s, a) = newQValue;
            }

            double p = values[s];
            {
                // Update value and action
                values[s] = qfun_.row(s).maxCoeff(&std::get<ACTIONS>(vfun_)[s]);
            }

            p = std::fabs(values[s] - p);

            // If it changed enough, we're going to update its parents.
            if ( p > theta_ ) {
                auto it = queueHandles_.find(s);

                if ( it != std::end(queueHandles_) && std::get<PRIORITY>(*(it->second)) < p )
                    queue_.increase(it->second, std::make_tuple(p, s));
                else
                    queueHandles_[s] = queue_.push(std::make_tuple(p, s));
            }
        }
Пример #2
0
        void ValueIterationGeneral<M>::bellmanOperator(const QFunction & q, ValueFunction * v) const {
            assert(v);
            auto & values  = std::get<VALUES> (*v);
            auto & actions = std::get<ACTIONS>(*v);

            for ( size_t s = 0; s < S; ++s )
                values(s) = q.row(s).maxCoeff(&actions[s]);
        }