Ejemplo n.º 1
0
double normRand(double mean, double sigma)
{
  // Create a Mersenne twister random number generator
  // that is seeded once with #seconds since 1970
  static boost::mt19937 rng(static_cast<unsigned> (std::time(0)));

  // select Gaussian probability distribution
  boost::normal_distribution<double> norm_dist(mean, sigma);

  // bind random number generator to distribution, forming a function
  boost::variate_generator<boost::mt19937&, boost::normal_distribution<double> >  normal_sampler(rng, norm_dist);

  // sample from the distribution
	return normal_sampler();
}
Ejemplo n.º 2
0
int SampleNormal (double mean, double sigma, int seed)
{
    // Create a Mersenne twister random number generator
    // that is seeded once with #seconds since 1970
    static mt19937 rng(static_cast<unsigned> (seed));
 
    // select Gaussian probability distribution
    normal_distribution<double> norm_dist(mean, sigma);
 
    // bind random number generator to distribution, forming a function
    variate_generator<mt19937&, normal_distribution<double> >  normal_sampler(rng, norm_dist);
 
    // sample from the distribution
    return (int) normal_sampler();
}
    void new_episode()
    {
        numActions   = 0;
		numEpisodes += 1;
		if (numEpisodes > 300) 
			normal_sampler = normal_variate_generator( rng, boost::normal_distribution<ValueType>(0, value_type(0.1)) );

        environment.query_state( lastState.begin() );
        lastValue = valueFunction.compute( lastState.begin(), 
                                           lastState.end() );

        // select first action
        bestAction.compute( lastState.begin(), 
                            lastState.end(), 
                            lastAction.begin() );

        for (size_t i = 0; i < actionSize; ++i)
        {
            //lastAction[i] += (*normal_sampler[i])();
            lastAction[i] += normal_sampler();
        }
        
        lastReward = environment.perform_action( lastAction.begin(), 
                                                 lastAction.end() );
        eligibility = vector_type(eligibility.size());
		std::fill(eligibility.begin(), eligibility.end(), value_type());
    }
    bool update()
    {
        bool terminal = environment.query_state( currentState.begin() );
        if (terminal)
        {
            // V_{t+1}(s_t) <- r_t
            valueFunction.update( lastState.begin(), 
                                  lastState.end(), 
                                  lastReward, 
                                  beta );
            return true;
        }
        else  // nonterminal state
        {
            // V_{t+1}(s_t) <- r_t + gamma*V_t(s_{t+1})
            ValueType delta = lastReward
                            + gamma * valueFunction.compute( currentState.begin(), currentState.end() )
                            - lastValue;

            /*valueFunction.update( lastState.begin(), 
                                  lastState.end(), 
                                  value,
                                  beta );
            value = valueFunction.compute( lastState.begin(), lastState.end() );*/
            vector_type gradient(valueFunction.num_params());
            valueFunction.get_gradient(lastState.begin(), lastState.end(), gradient.begin());
            eligibility = gamma*lambda*eligibility + gradient;

            vector_type params(valueFunction.num_params());
            valueFunction.get_params(params.begin());
            params += beta*delta*eligibility;
            valueFunction.set_params(params.begin(), params.end());

            ValueType value = valueFunction.compute( lastState.begin(), lastState.end() );

            if (value > lastValue)
            {
                // update best action
                bestAction.update( lastState.begin(), 
                                   lastState.end(), 
                                   lastAction.begin(), 
                                   lastAction.end(), 
                                   alpha );
            }
            lastValue = valueFunction.compute( currentState.begin(), currentState.end() );
        }

        // select new action
        bestAction.compute( currentState.begin(), 
                            currentState.end(), 
                            lastAction.begin() );
        
        // sum with variation
        for (size_t i = 0; i < actionSize; ++i)
        {
            //lastAction[i] += (*normal_sampler[i])();
            lastAction[i] += normal_sampler();
        }

        value_type distance = value_type();
        for (size_t i = 0; i<currentState.size(); ++i) {
            distance += sqrt( (currentState[i] - lastState[i]) * (currentState[i] - lastState[i]) );
        }

        if (distance < 0.1)
        {
            std::cerr << "random action" << std::endl;
            for (size_t i = 0; i < actionSize; ++i)
            {
                //lastAction[i] += (*normal_sampler[i])();
                lastAction[i] = uniform_sampler();
            }
        }

        averaged_reward = averaged_reward*gamma + lastReward;
        //fileStat << averaged_reward << std::endl;

        lastState.swap(currentState);
        lastReward = environment.perform_action( lastAction.begin(), 
                                                 lastAction.end() );
        ++numActions;

        if (numActions == 20000)
        {
            normal_sampler = normal_variate_generator( rng, boost::normal_distribution<ValueType>(0, value_type(0.05)) );
        }

        return false;
    }