Beispiel #1
0
        double QSoftmaxPolicy::getActionProbability(const size_t & s, size_t a) const {
            if ( temperature_ == 0.0 )
                return greedy_.getActionProbability(s, a);

            Vector actionValues(A);

            unsigned infinities = 0, isAInfinite = 0;
            for ( size_t aa = 0; aa < A; ++aa ) {
                actionValues(aa) = std::exp(q_(s, aa) / temperature_);
                if ( std::isinf(actionValues(aa)) )
                    infinities++;
                if ( aa == a && std::isinf(actionValues(aa)) )
                    isAInfinite = 1;
            }

            if ( infinities ) {
                if ( isAInfinite ) return 1.0 / infinities;
                return 0.0;
            }

            auto sum = actionValues.sum();
            if ( checkEqualSmall(sum, 0.0) )
                return 1.0 / A;

            return actionValues(a) / actionValues.sum();
        }
Beispiel #2
0
 /**
  * @brief This function checks whether two input ProbabilityVector are equal.
  *
  * This function is approximate. It assumes that the vectors are valid, so
  * they must sum up to one, and each element must be between zero and one.
  * The vector must also be of the same size.
  *
  * This function is approximate, as we're dealing with floating point.
  *
  * @param lhs The left hand side to check.
  * @param rhs The right hand side to check.
  *
  * @return Whether the two ProbabilityVectors are the same.
  */
 inline bool checkEqualProbability(const ProbabilityVector & lhs, const ProbabilityVector & rhs) {
     const auto size = lhs.size();
     for (auto i = 0; i < size; ++i)
         if (!checkEqualSmall(lhs[i], rhs[i]))
             return false;
     return true;
 }
Beispiel #3
0
 bool Model::isTerminal(size_t s) const {
     bool answer = true;
     for ( size_t a = 0; a < A; ++a ) {
         if ( !checkEqualSmall(1.0, transitions_[a](s, s)) ) {
             answer = false;
             break;
         }
     }
     return answer;
 }
        void BeliefGenerator<M>::expandBeliefList(size_t max, BeliefList * blp) const {
            assert(blp);
            auto & bl = *blp;
            size_t size = bl.size();

            std::vector<Belief> newBeliefs(A);
            std::vector<double> distances(A);
            auto dBegin = std::begin(distances), dEnd = std::end(distances);

            // L1 distance
            auto computeDistance = [this](const Belief & lhs, const Belief & rhs) {
                double distance = 0.0;
                for ( size_t i = 0; i < S; ++i )
                    distance += std::abs(lhs[i] - rhs[i]);
                return distance;
            };

            Belief helper; double distance;
            // We apply the discovery process also to all beliefs we discover
            // along the way.
            for ( auto it = std::begin(bl); it != std::end(bl); ++it ) {
                // Compute all new beliefs
                for ( size_t a = 0; a < A; ++a ) {
                    distances[a] = 0.0;
                    for ( int j = 0; j < 20; ++j ) {
                        size_t s = sampleProbability(S, *it, rand_);

                        size_t o;
                        std::tie(std::ignore, o, std::ignore) = model_.sampleSOR(s, a);
                        helper = updateBelief(model_, *it, a, o);

                        // Compute distance (here we compare also against elements we just added!)
                        distance = computeDistance(helper, bl.front());
                        for ( auto jt = ++std::begin(bl); jt != std::end(bl); ++jt ) {
                            if ( checkEqualSmall(distance, 0.0) ) break; // We already have it!
                            distance = std::min(distance, computeDistance(helper, *jt));
                        }
                        // Select the best found over 20 times
                        if ( distance > distances[a] ) {
                            distances[a] = distance;
                            newBeliefs[a] = helper;
                        }
                    }
                }
                // Find furthest away, add only if it is new.
                size_t id = std::distance( dBegin, std::max_element(dBegin, dEnd) );
                if ( checkDifferentSmall(distances[id], 0.0) ) {
                    bl.emplace_back(std::move(newBeliefs[id]));
                    ++size;
                    if ( size == max ) break;
                }
            }
        }
Beispiel #5
0
 /**
  * @brief This function checks if two doubles are reasonably equal.
  *
  * The order of the parameters is not important.
  *
  * @param a The first number to compare.
  * @param b The second number to compare.
  *
  * @return True if the two numbers are close enough, false otherwise.
  */
 inline bool checkEqualGeneral(double a, double b) {
     if ( checkEqualSmall(a,b) ) return true;
     return ( std::fabs(a - b) / std::min(std::fabs(a), std::fabs(b)) < std::numeric_limits<double>::epsilon() );
 }
Beispiel #6
0
 /**
  * @brief This function checks if two doubles near [0,1] are reasonably different.
  *
  * If the numbers are not near [0,1], the result is not guaranteed to be
  * what may be expected. The order of the parameters is not important.
  *
  * @param a The first number to compare.
  * @param b The second number to compare.
  *
  * @return True if the two numbers are far away enough, false otherwise.
  */
 inline bool checkDifferentSmall(double a, double b) {
     return !checkEqualSmall(a,b);
 }
Beispiel #7
0
 bool SparseRLModel<E>::isTerminal(const size_t s) const {
     for ( size_t a = 0; a < A; ++a )
         if ( !checkEqualSmall(1.0, transitions_[a].coeff(s, s)) )
             return false;
     return true;
 }