int agent_end(double reward) { episodeReturn += reward; logger << "agend end, return = " << episodeReturn << "\n"; if(episodeReturn > bestReturn) { bestReturn = episodeReturn; bestParameters = net.currentParameters(); } RandomNumberGenerator rng; opt.setError(-episodeReturn + 0.1 * episodeReturn * rng.sampleNormalDistribution<double>()); if(opt.terminated()) opt.restart(); return 0; }
int agent_init(int num_state_variables, int num_action_variables, int argc, const char* agent_param[]) { num_states = num_state_variables; num_actions = num_action_variables; parameters = 0; hiddenUnits = 10; if(argc > 0) parameters = atoi(agent_param[0]); if(argc > 1) hiddenUnits = atoi(agent_param[1]); net.inputLayer(num_states); if(parameters > 0) { net.compressedLayer(hiddenUnits, parameters, TANH, "dct"); net.compressedOutputLayer(num_actions, hiddenUnits + 1, LOGISTIC, "dct"); } else { net.fullyConnectedLayer(hiddenUnits, TANH); net.outputLayer(num_actions, LOGISTIC); } bestParameters = net.currentParameters(); bestReturn = -std::numeric_limits<double>::max(); StoppingCriteria stop; stop.maximalFunctionEvaluations = 5000; stop.maximalRestarts = 1000; opt.setOptimizable(net); opt.setStopCriteria(stop); opt.restart(); logger << net.dimension() << " parameters, " << num_states << " state components, " << num_actions << " action components\n"; return 0; }