Пример #1
0
        std::tuple<bool, ValueFunction, QFunction> ValueIterationGeneral<M>::operator()(const M & model) {
            // Extract necessary knowledge from model so we don't have to pass it around
            S = model.getS();
            A = model.getA();
            discount_ = model.getDiscount();

            {
                // Verify that parameter value function is compatible.
                size_t size = std::get<VALUES>(vParameter_).size();
                if ( size != S ) {
                    if ( size != 0 )
                        std::cerr << "AIToolbox: Size of starting value function in ValueIteration::solve() is incorrect, ignoring...\n";
                    // Defaulting
                    v1_ = makeValueFunction(S);
                }
                else
                    v1_ = vParameter_;
            }

            auto ir = computeImmediateRewards(model);

            unsigned timestep = 0;
            double variation = epsilon_ * 2; // Make it bigger

            Values val0;
            QFunction q = makeQFunction(S, A);

            bool useEpsilon = checkDifferentSmall(epsilon_, 0.0);
            while ( timestep < horizon_ && (!useEpsilon || variation > epsilon_) ) {
                ++timestep;

                auto & val1 = std::get<VALUES>(v1_);
                val0 = val1;

                q = computeQFunction(model, ir);
                bellmanOperator(q, &v1_);

                // We do this only if the epsilon specified is positive, otherwise we
                // continue for all the timesteps.
                if ( useEpsilon )
                    variation = (val1 - val0).cwiseAbs().maxCoeff();
            }

            // We do not guarantee that the Value/QFunctions are the perfect ones, as we stop as within epsilon.
            return std::make_tuple(variation <= epsilon_, v1_, q);
        }
Пример #2
0
    std::tuple<double, ValueFunction, QFunction> ValueIteration::operator()(const M & model) {
        // Extract necessary knowledge from model so we don't have to pass it around
        const size_t S = model.getS();
        const size_t A = model.getA();

        {
            // Verify that parameter value function is compatible.
            const size_t size = vParameter_.values.size();
            if ( size != S ) {
                if ( size != 0 ) {
                    AI_LOGGER(AI_SEVERITY_WARNING, "Size of starting value function is incorrect, ignoring...");
                }
                // Defaulting
                v1_ = makeValueFunction(S);
            }
            else
                v1_ = vParameter_;
        }

        const auto & ir = [&]{
            if constexpr (is_model_eigen_v<M>) return model.getRewardFunction();
            else return computeImmediateRewards(model);
        }();