コード例 #1
0
        QFunction ValueIterationGeneral<M>::computeImmediateRewards(const M & model) const {
            QFunction pr = makeQFunction(S, A);

            for ( size_t s = 0; s < S; ++s )
                for ( size_t a = 0; a < A; ++a )
                    for ( size_t s1 = 0; s1 < S; ++s1 )
                        pr(s, a) += model.getTransitionProbability(s,a,s1) * model.getExpectedReward(s,a,s1);

            return pr;
        }
コード例 #2
0
        std::tuple<bool, ValueFunction, QFunction> ValueIterationGeneral<M>::operator()(const M & model) {
            // Extract necessary knowledge from model so we don't have to pass it around
            S = model.getS();
            A = model.getA();
            discount_ = model.getDiscount();

            {
                // Verify that parameter value function is compatible.
                size_t size = std::get<VALUES>(vParameter_).size();
                if ( size != S ) {
                    if ( size != 0 )
                        std::cerr << "AIToolbox: Size of starting value function in ValueIteration::solve() is incorrect, ignoring...\n";
                    // Defaulting
                    v1_ = makeValueFunction(S);
                }
                else
                    v1_ = vParameter_;
            }

            auto ir = computeImmediateRewards(model);

            unsigned timestep = 0;
            double variation = epsilon_ * 2; // Make it bigger

            Values val0;
            QFunction q = makeQFunction(S, A);

            bool useEpsilon = checkDifferentSmall(epsilon_, 0.0);
            while ( timestep < horizon_ && (!useEpsilon || variation > epsilon_) ) {
                ++timestep;

                auto & val1 = std::get<VALUES>(v1_);
                val0 = val1;

                q = computeQFunction(model, ir);
                bellmanOperator(q, &v1_);

                // We do this only if the epsilon specified is positive, otherwise we
                // continue for all the timesteps.
                if ( useEpsilon )
                    variation = (val1 - val0).cwiseAbs().maxCoeff();
            }

            // We do not guarantee that the Value/QFunctions are the perfect ones, as we stop as within epsilon.
            return std::make_tuple(variation <= epsilon_, v1_, q);
        }
コード例 #3
0
ファイル: SARSA.cpp プロジェクト: CarbonGU/AI-Toolbox
 SARSA::SARSA(size_t ss, size_t aa, double discount, double alpha) : S(ss), A(aa), alpha_(alpha), discount_(discount), q_(makeQFunction(S, A)) {
     if ( discount_ <= 0.0 || alpha_ > 1.0 )        throw std::invalid_argument("Discount parameter must be in (0,1]");
     if ( alpha_ <= 0.0 || alpha_ > 1.0 )        throw std::invalid_argument("Learning rate parameter must be in (0,1]");
 }
コード例 #4
0
 QLearning<M>::QLearning(const M& model, double alpha) : model_(model), S(model_.getS()), A(model_.getA()), alpha_(alpha), discount_(model_.getDiscount()), q_(makeQFunction(S,A)) {
     if ( alpha_ <= 0.0 || alpha_ > 1.0 )        throw std::invalid_argument("Learning rate parameter must be in (0,1]");
 }
コード例 #5
0
 PrioritizedSweeping<M>::PrioritizedSweeping(const M & m, double theta, unsigned n) :
                                                                                                         S(m.getS()),
                                                                                                         A(m.getA()),
                                                                                                         N(n),
                                                                                                         theta_(theta),
                                                                                                         model_(m),
                                                                                                         qfun_(makeQFunction(S,A)),
                                                                                                         vfun_(makeValueFunction(S)) {}
コード例 #6
0
ファイル: ValueIteration.hpp プロジェクト: EHadoux/AI-Toolbox
            }
            else
                v1_ = vParameter_;
        }

        const auto & ir = [&]{
            if constexpr (is_model_eigen_v<M>) return model.getRewardFunction();
            else return computeImmediateRewards(model);
        }();

        unsigned timestep = 0;
        double variation = tolerance_ * 2; // Make it bigger

        Values val0;
        auto & val1 = v1_.values;
        QFunction q = makeQFunction(S, A);

        const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
        while ( timestep < horizon_ && (!useTolerance || variation > tolerance_) ) {
            ++timestep;
            AI_LOGGER(AI_SEVERITY_DEBUG, "Processing timestep " << timestep);

            val0 = val1;

            // We apply the discount directly on the values vector.
            val1 *= model.getDiscount();
            q = computeQFunction(model, val1, ir);

            // Compute the new value function (note that also val1 is overwritten)
            bellmanOperatorInline(q, &v1_);