コード例 #1
0
ファイル: SparseRLModel.hpp プロジェクト: EHadoux/AI-Toolbox
    void SparseRLModel<E>::sync(const size_t s, const size_t a, const size_t s1) {
        const auto visitSum = experience_.getVisitsSum(s, a);
        // The second condition is related to numerical errors. Once in a
        // while we reset those by forcing a true update using real data.
        if ( !(visitSum % 10000ul) ) return sync(s, a);
        if ( visitSum == 1ul ) {
            transitions_[a].coeffRef(s, s) = 0.0;
            transitions_[a].coeffRef(s, s1) = 1.0;
            if (checkDifferentSmall(0.0, experience_.getRewardSum(s, a)))
                rewards_.coeffRef(s, a) = experience_.getRewardSum(s, a);
        } else {
            const double newVisits = static_cast<double>(experience_.getVisits(s, a, s1));
            const double rewValue = experience_.getRewardSum(s, a) / visitSum;

            if (checkDifferentGeneral(rewValue, rewards_.coeff(s, a)))
                rewards_.coeffRef(s, a) = rewValue;

            const double newTransitionValue = newVisits / static_cast<double>(visitSum - 1);
            const double newVectorSum = 1.0 + (newTransitionValue - transitions_[a].coeff(s, s1));
            // This works because as long as all the values in the transition have the same denominator
            // (in this case visitSum-1), then the numerators do not matter, as we can simply normalize.
            // In the end of the process the new values will be the same as if we updated directly using
            // an increased denominator, and thus we will be able to call this function again correctly.
            transitions_[a].coeffRef(s, s1) = newTransitionValue;
            transitions_[a].row(s) /= newVectorSum;
        }
    }
コード例 #2
0
ファイル: QGreedyPolicy.cpp プロジェクト: CarbonGU/AI-Toolbox
        double QGreedyPolicy::getActionProbability(const size_t & s, size_t a) const {
            double max = q_(s, 0); unsigned count = 1;
            for ( size_t aa = 1; aa < A; ++aa ) {
                if ( checkEqualGeneral(q_(s, aa), max) ) ++count;
                else if ( q_(s, aa) > max ) {
                    max = q_(s, aa);
                    count = 1;
                }
            }
            if ( checkDifferentGeneral(q_(s, a), max) ) return 0.0;

            return 1.0 / count;
        }
コード例 #3
0
ファイル: SparseRLModel.hpp プロジェクト: EHadoux/AI-Toolbox
    void SparseRLModel<E>::sync(const size_t s, const size_t a) {
        // Nothing to do
        const auto visitSum = experience_.getVisitsSum(s, a);
        if ( visitSum == 0ul ) return;
        // Clear beginning's identity matrix
        if ( visitSum == 1ul )
            transitions_[a].coeffRef(s, s) = 0.0;

        // Create reciprocal for fast division
        const double visitSumReciprocal = 1.0 / visitSum;

        // Normalize
        for ( size_t s1 = 0; s1 < S; ++s1 ) {
            const auto visits = experience_.getVisits(s, a, s1);
            if (visits > 0)
                transitions_[a].coeffRef(s, s1) = static_cast<double>(visits) * visitSumReciprocal;
        }

        const double rewValue = experience_.getRewardSum(s, a) * visitSumReciprocal;
        if (checkDifferentGeneral(rewValue, rewards_.coeff(s, a)))
            rewards_.coeffRef(s, a) = rewValue;
    }