Пример #1
0
int agent_end(double reward)
{
  episodeReturn += reward;
  logger << "agend end, return = " << episodeReturn << "\n";
  if(episodeReturn > bestReturn)
  {
    bestReturn = episodeReturn;
    bestParameters = net.currentParameters();
  }
  RandomNumberGenerator rng;
  opt.setError(-episodeReturn + 0.1 * episodeReturn * rng.sampleNormalDistribution<double>());
  if(opt.terminated())
    opt.restart();
  return 0;
}
Пример #2
0
int agent_init(int num_state_variables, int num_action_variables, int argc, const char* agent_param[])
{
  num_states = num_state_variables;
  num_actions = num_action_variables;

  parameters = 0;
  hiddenUnits = 10;
  if(argc > 0)
    parameters = atoi(agent_param[0]);
  if(argc > 1)
    hiddenUnits = atoi(agent_param[1]);

  net.inputLayer(num_states);
  if(parameters > 0)
  {
    net.compressedLayer(hiddenUnits, parameters, TANH, "dct");
    net.compressedOutputLayer(num_actions, hiddenUnits + 1, LOGISTIC, "dct");
  }
  else
  {
    net.fullyConnectedLayer(hiddenUnits, TANH);
    net.outputLayer(num_actions, LOGISTIC);
  }
  bestParameters = net.currentParameters();
  bestReturn = -std::numeric_limits<double>::max();

  StoppingCriteria stop;
  stop.maximalFunctionEvaluations = 5000;
  stop.maximalRestarts = 1000;
  opt.setOptimizable(net);
  opt.setStopCriteria(stop);
  opt.restart();

  logger << net.dimension() << " parameters, " << num_states
         << " state components, " << num_actions << " action components\n";
  return 0;
}