示例#1
0
//unsigned char buffer[STATIC_ALLOCATOR_SIZE];
//StaticAllocator myAlloc(buffer, STATIC_ALLOCATOR_SIZE);
int main() {
  //Alloc::init(&myAlloc);
//  DummyAgent agent;
  QLearningEGreedyPolicy egreedy(0.1f);
  NeuralNetwork net(DIM_OBSERVATIONS + DIM_ACTIONS, N_HIDDEN, 1, 0.1f);
  QLearningAgent agent(&net, DIM_OBSERVATIONS, DIM_ACTIONS, N_ACTIONS,
                       1.0f, 0.1f, &egreedy, false); // lambda = 1.0 => no history
  LibMapperEnvironment env;
  RLQualia qualia(&agent, &env);

  qualia.init();
  qualia.start();

  for (;;) {
//  for (int i=0; i<10; i++) {
    qualia.step();
#if is_computer()
    printf("Current agent action: %d\n", agent.currentAction.conflated());
    printf("Current environment observation: %f %f\n", (double)env.currentObservation.observations[0], (double)env.currentObservation.observations[1]);
#endif
  }

//  if (myAlloc.nLeaks)
//    printf("WARNING: Static Allocator has leaks: %d\n", myAlloc.nLeaks);

  return 0;
}
示例#2
0
const action_t *agent_start(const observation_t *this_observation) {
	int theIntAction=egreedy(this_observation->intArray[0]);
	this_action.intArray[0]=theIntAction;

	replaceRLStruct(&this_action, &last_action);
	replaceRLStruct(this_observation, last_observation);
	
	return &this_action;
}
示例#3
0
文件: dummy.cpp 项目: malloch/qualia
void testQLearningDummy() {
  randomSeed(RANDOM_SEED);
  NeuralNetwork net(DUMMY_ENVIRONMENT_OBSERVATIONS_DIM + DUMMY_AGENT_ACTIONS_DIM, N_HIDDEN, 1, 0.1f);
  ActionProperties props(DUMMY_AGENT_ACTIONS_DIM, DUMMY_AGENT_N_ACTIONS);
  QLearningEGreedyPolicy egreedy(0.1f);
  QFunction qFunc(&net, DUMMY_ENVIRONMENT_OBSERVATIONS_DIM, &props);
  QLearningAgent agent(&qFunc, &egreedy,
                       DUMMY_ENVIRONMENT_OBSERVATIONS_DIM,
                       &props,
                       0.0f, 0.01f, false); // lambda = 0.0  => no history, gamma = 0.01 => opportunistic agent
  DummyEnvironment env;
  testQLearning(env, agent);
}
void StateActionAlgorithm::explore( State * state, Action * action, double explorationRate, string explorationType, bool endOfEpisode ) {

    if ( explorationType.compare("boltzmann") == 0 ) {
        boltzmann( state, action, explorationRate ) ;

    } else if ( explorationType.compare("egreedy") == 0  ) {
        egreedy( state, action, explorationRate ) ;

    } else if ( explorationType.compare("gaussian") == 0  ) {

        cout << "You are trying to use gaussian exploration for an algorithm that" << endl ;
        cout << "does not support it. Please check your parameter file." << endl ;
		#ifdef WIN32
			char end;
			cin>>end;
		#endif
        exit(-1) ;

    } else {
示例#5
0
const action_t *agent_step(double reward, const observation_t *this_observation) {
	int newState=this_observation->intArray[0];
	int lastState=last_observation->intArray[0];
	int lastAction=last_action.intArray[0];
	
	int newAction=egreedy(newState);
	
	double Q_sa=value_function[calculateArrayIndex(lastState,lastAction)];
	double Q_sprime_aprime=value_function[calculateArrayIndex(newState,newAction)];
	
	double new_Q_sa=Q_sa + sarsa_stepsize * (reward + sarsa_gamma * Q_sprime_aprime - Q_sa);
	/*	Only update the value function if the policy is not frozen */
	if(!policy_frozen){
		value_function[calculateArrayIndex(lastState,lastAction)]=new_Q_sa;
	}
	this_action.intArray[0]=newAction;
	
	replaceRLStruct(&this_action, &last_action);
	replaceRLStruct(this_observation, last_observation);
	
	return &this_action;
}