//unsigned char buffer[STATIC_ALLOCATOR_SIZE]; //StaticAllocator myAlloc(buffer, STATIC_ALLOCATOR_SIZE); int main() { //Alloc::init(&myAlloc); // DummyAgent agent; QLearningEGreedyPolicy egreedy(0.1f); NeuralNetwork net(DIM_OBSERVATIONS + DIM_ACTIONS, N_HIDDEN, 1, 0.1f); QLearningAgent agent(&net, DIM_OBSERVATIONS, DIM_ACTIONS, N_ACTIONS, 1.0f, 0.1f, &egreedy, false); // lambda = 1.0 => no history LibMapperEnvironment env; RLQualia qualia(&agent, &env); qualia.init(); qualia.start(); for (;;) { // for (int i=0; i<10; i++) { qualia.step(); #if is_computer() printf("Current agent action: %d\n", agent.currentAction.conflated()); printf("Current environment observation: %f %f\n", (double)env.currentObservation.observations[0], (double)env.currentObservation.observations[1]); #endif } // if (myAlloc.nLeaks) // printf("WARNING: Static Allocator has leaks: %d\n", myAlloc.nLeaks); return 0; }
const action_t *agent_start(const observation_t *this_observation) { int theIntAction=egreedy(this_observation->intArray[0]); this_action.intArray[0]=theIntAction; replaceRLStruct(&this_action, &last_action); replaceRLStruct(this_observation, last_observation); return &this_action; }
void testQLearningDummy() { randomSeed(RANDOM_SEED); NeuralNetwork net(DUMMY_ENVIRONMENT_OBSERVATIONS_DIM + DUMMY_AGENT_ACTIONS_DIM, N_HIDDEN, 1, 0.1f); ActionProperties props(DUMMY_AGENT_ACTIONS_DIM, DUMMY_AGENT_N_ACTIONS); QLearningEGreedyPolicy egreedy(0.1f); QFunction qFunc(&net, DUMMY_ENVIRONMENT_OBSERVATIONS_DIM, &props); QLearningAgent agent(&qFunc, &egreedy, DUMMY_ENVIRONMENT_OBSERVATIONS_DIM, &props, 0.0f, 0.01f, false); // lambda = 0.0 => no history, gamma = 0.01 => opportunistic agent DummyEnvironment env; testQLearning(env, agent); }
void StateActionAlgorithm::explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) { if ( explorationType.compare("boltzmann") == 0 ) { boltzmann( state, action, explorationRate ) ; } else if ( explorationType.compare("egreedy") == 0 ) { egreedy( state, action, explorationRate ) ; } else if ( explorationType.compare("gaussian") == 0 ) { cout << "You are trying to use gaussian exploration for an algorithm that" << endl ; cout << "does not support it. Please check your parameter file." << endl ; #ifdef WIN32 char end; cin>>end; #endif exit(-1) ; } else {
const action_t *agent_step(double reward, const observation_t *this_observation) { int newState=this_observation->intArray[0]; int lastState=last_observation->intArray[0]; int lastAction=last_action.intArray[0]; int newAction=egreedy(newState); double Q_sa=value_function[calculateArrayIndex(lastState,lastAction)]; double Q_sprime_aprime=value_function[calculateArrayIndex(newState,newAction)]; double new_Q_sa=Q_sa + sarsa_stepsize * (reward + sarsa_gamma * Q_sprime_aprime - Q_sa); /* Only update the value function if the policy is not frozen */ if(!policy_frozen){ value_function[calculateArrayIndex(lastState,lastAction)]=new_Q_sa; } this_action.intArray[0]=newAction; replaceRLStruct(&this_action, &last_action); replaceRLStruct(this_observation, last_observation); return &this_action; }