Esempio n. 1
0
        size_t QSoftmaxPolicy::sampleAction(const size_t & s) const {
            if ( temperature_ == 0.0 )
                return greedy_.sampleAction(s);

            Vector actionValues(A);

            unsigned infinities = 0;
            for ( size_t a = 0; a < A; ++a ) {
                actionValues(a) = std::exp(q_(s, a) / temperature_);
                if ( std::isinf(actionValues(a)) )
                    infinities++;
            }

            if (infinities) {
                auto pickDistribution = std::uniform_int_distribution<unsigned>(0, infinities-1);
                unsigned selection = pickDistribution(rand_);

                size_t retval = 0;
                for ( ; retval < A - 1; ++retval) {
                    if ( std::isinf(actionValues(retval)) && !selection )
                        break;
                    --selection;
                }
                return retval;
            } else {
                actionValues.normalize();

                return sampleProbability(A, actionValues, rand_);
            }
        }
Esempio n. 2
0
        void BeliefGenerator<M>::expandBeliefList(size_t max, BeliefList * blp) const {
            assert(blp);
            auto & bl = *blp;
            size_t size = bl.size();

            std::vector<Belief> newBeliefs(A);
            std::vector<double> distances(A);
            auto dBegin = std::begin(distances), dEnd = std::end(distances);

            // L1 distance
            auto computeDistance = [this](const Belief & lhs, const Belief & rhs) {
                double distance = 0.0;
                for ( size_t i = 0; i < S; ++i )
                    distance += std::abs(lhs[i] - rhs[i]);
                return distance;
            };

            Belief helper; double distance;
            // We apply the discovery process also to all beliefs we discover
            // along the way.
            for ( auto it = std::begin(bl); it != std::end(bl); ++it ) {
                // Compute all new beliefs
                for ( size_t a = 0; a < A; ++a ) {
                    distances[a] = 0.0;
                    for ( int j = 0; j < 20; ++j ) {
                        size_t s = sampleProbability(S, *it, rand_);

                        size_t o;
                        std::tie(std::ignore, o, std::ignore) = model_.sampleSOR(s, a);
                        helper = updateBelief(model_, *it, a, o);

                        // Compute distance (here we compare also against elements we just added!)
                        distance = computeDistance(helper, bl.front());
                        for ( auto jt = ++std::begin(bl); jt != std::end(bl); ++jt ) {
                            if ( checkEqualSmall(distance, 0.0) ) break; // We already have it!
                            distance = std::min(distance, computeDistance(helper, *jt));
                        }
                        // Select the best found over 20 times
                        if ( distance > distances[a] ) {
                            distances[a] = distance;
                            newBeliefs[a] = helper;
                        }
                    }
                }
                // Find furthest away, add only if it is new.
                size_t id = std::distance( dBegin, std::max_element(dBegin, dEnd) );
                if ( checkDifferentSmall(distances[id], 0.0) ) {
                    bl.emplace_back(std::move(newBeliefs[id]));
                    ++size;
                    if ( size == max ) break;
                }
            }
        }
Esempio n. 3
0
    std::tuple<size_t, double> SparseRLModel<E>::sampleSR(const size_t s, const size_t a) const {
        const size_t s1 = sampleProbability(S, transitions_[a].row(s), rand_);

        return std::make_tuple(s1, rewards_.coeff(s, a));
    }
Esempio n. 4
0
        std::tuple<size_t, double> Model::sampleSR(size_t s, size_t a) const {
            size_t s1 = sampleProbability(S, transitions_[a].row(s), rand_);

            return std::make_tuple(s1, rewards_[a](s, s1));
        }
Esempio n. 5
0
        std::pair<size_t, double> RLModel::sample(size_t s, size_t a) const {
            size_t s1 = sampleProbability(transitions_[s][a], S, rand_);

            return std::make_pair(s1, rewards_[s][a][s1]);
        }