LargeVector EpsilonGreedySampler::calculateBaseDistribution(const LargeVector &epsilon) const { // Calculate base distribution LargeVector distribution = LargeVector::Constant(epsilon.size(), 0.); std::vector<double> e; fromVector(epsilon, e); e.push_back(0.); // So we can start at ii=1. std::sort(e.begin(), e.end()); for (size_t ii=1; ii < e.size(); ++ii) { if (e[ii] != e[ii-1]) { // Divvy up difference in probability among eligible actions. size_t eligible = 0; for (size_t jj=0; jj < epsilon.size(); ++jj) if (e[ii] <= epsilon[jj]) eligible++; for (size_t jj=0; jj < epsilon.size(); ++jj) if (e[ii] <= epsilon[jj]) distribution[jj] += (e[ii]-e[ii-1])/eligible; } } TRACE("Base distribution is " << distribution); return distribution; }
void ParticleSystem::setState( LargeVector<Vec3d> P, LargeVector<Vec3d> V, bool updateOld ) { assert(P.size() == pts.size() && V.size() == pts.size()); for (size_t i = 0; i < pts.size(); i++) { if (updateOld) { pts[i].op = pts[i].cp; } pts[i].cp = P[i]; pts[i].v = V[i]; } }
void EpsilonGreedySampler::distribution(const LargeVector &values, LargeVector *distribution) const { if (epsilon_.size() > 1) { *distribution = distribution_; (*distribution)[GreedySampler::sample(values)] += 1 - distribution_sum_; } else { GreedySampler::distribution(values, distribution); for (size_t ii=0; ii < values.size(); ++ii) { if ((*distribution)[ii] == 1) (*distribution)[ii] = 1-epsilon_[0]; (*distribution)[ii] += epsilon_[0]/values.size(); } } }
size_t EpsilonGreedySampler::sample(const LargeVector &values, ActionType *at) const { double r = rand_->get(); if (epsilon_.size() > 1) { if (epsilon_.size() != values.size()) throw bad_param("sampler/epsilon_greedy:epsilon"); // Find number of eligible actions size_t eligible=0; for (size_t ii=0; ii < epsilon_.size(); ++ii) if (r < epsilon_[ii]) eligible++; if (eligible > 0) { // Chose one randomly size_t ri = rand_->getInteger(eligible); for (size_t ii=0; ii < epsilon_.size(); ++ii) if (r < epsilon_[ii]) if (!ri--) { if (at) *at = atExploratory; return ii; } } } else if (r < epsilon_[0]) { if (at) *at = atExploratory; return rand_->getInteger(values.size()); } return GreedySampler::sample(values, at); }
size_t GreedySampler::sample(const LargeVector &values, ActionType *at) const { size_t mai = 0; for (size_t ii=1; ii < values.size(); ++ii) if (values[ii] > values[mai]) mai = ii; if (rand_max_) { LargeVector same_values = ConstantVector(values.size(), 0); size_t jj = 0; for (size_t ii=0; ii < values.size(); ++ii) if (values[ii] == values[mai]) same_values[jj++] = ii; if (jj != 0) mai = same_values[rand_->getInteger(jj)]; } if (at) *at = atGreedy; return mai; }
void GreedySampler::distribution(const LargeVector &values, LargeVector *distribution) const { *distribution = LargeVector::Constant(values.size(), 0.); (*distribution)[GreedySampler::sample(values)] = 1; }