示例#1
0
//unsigned char buffer[STATIC_ALLOCATOR_SIZE];
//StaticAllocator myAlloc(buffer, STATIC_ALLOCATOR_SIZE);
int main() {
  //Alloc::init(&myAlloc);
//  DummyAgent agent;
  QLearningEGreedyPolicy egreedy(0.1f);
  NeuralNetwork net(DIM_OBSERVATIONS + DIM_ACTIONS, N_HIDDEN, 1, 0.1f);
  QLearningAgent agent(&net, DIM_OBSERVATIONS, DIM_ACTIONS, N_ACTIONS,
                       1.0f, 0.1f, &egreedy, false); // lambda = 1.0 => no history
  LibMapperEnvironment env;
  RLQualia qualia(&agent, &env);

  qualia.init();
  qualia.start();

  for (;;) {
//  for (int i=0; i<10; i++) {
    qualia.step();
#if is_computer()
    printf("Current agent action: %d\n", agent.currentAction.conflated());
    printf("Current environment observation: %f %f\n", (double)env.currentObservation.observations[0], (double)env.currentObservation.observations[1]);
#endif
  }

//  if (myAlloc.nLeaks)
//    printf("WARNING: Static Allocator has leaks: %d\n", myAlloc.nLeaks);

  return 0;
}
示例#2
0
文件: dummy.cpp 项目: malloch/qualia
void testMem() {
  DummyAgent agent;
  DummyEnvironment env;
  Qualia qualia(&agent, &env);

  qualia.init();

  for (int i=0; i<10; i++)
    qualia.episode(10);
}
示例#3
0
文件: dummy.cpp 项目: malloch/qualia
void testDummy() {
  DummyAgent agent;
  DummyEnvironment env;
  Qualia qualia(&agent, &env);

  qualia.init();

  for (int i=0; i<10; i++) {
    qualia.episode(10);

    Q_MESSAGE("Current agent action: %d\n", agent.currentAction.conflated());
    Q_MESSAGE("Current environment observation: %f %f\n", (double)env.currentObservation.observations[0], (double)env.currentObservation.observations[1]);
  }
}
示例#4
0
int main(int argc, char** argv) {
  srand(RANDOM_SEED);

  NeuralNetwork net(GLOW_ENVIRONMENT_OBSERVATIONS_DIM + GLOW_AGENT_ACTIONS_DIM, N_HIDDEN, 1, LEARNING_RATE);
  //QLearningEGreedyPolicy policy(EPSILON);
  QLearningSoftmaxPolicy policy(0.5f, EPSILON);

  QLearningAgent agent(&net,
                       GLOW_ENVIRONMENT_OBSERVATIONS_DIM, GLOW_AGENT_ACTIONS_DIM, GLOW_AGENT_N_ACTIONS,
                       1.0f, 0.1f, &policy, false); // lambda = 1.0 => no history

  SimpleGlowEnvironment env(argv[1], LED_OUT, PHOTO_AIN);

//  BigDummyReward rew;
//  DummyRewardEnvironment env(DUMMY_ENVIRONMENT_OBSERVATIONS_DIM, &rew);

  RLQualia qualia(&agent, &env);

  printf("Starting...\n");

  qualia.init();
  for (int i=0; i<10; i++) {
    printf("# %d ", qualia.nEpisodes);
    qualia.episode(1000);
#if is_computer()
    printf("Mean reward: %f (%f / %d)\n", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps);
//    printf("Current agent action: [%d %d] = %d\n", agent.currentAction[0], agent.currentAction[1], agent.currentAction.conflated());
//    printf("Current environment observation: [%f %f] => %f\n", env.currentObservation[0], env.currentObservation[1], env.currentObservation.reward);
#endif
  }

  // Put epsilon on ice.
  printf("Final episode (without random moves)\n");
  ((QLearningEGreedyPolicy *)agent.policy)->epsilon = 0;
  qualia.episode(1000);
#if is_computer()
  printf("Mean reward: %f (%f/%d)\n", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps);
  printf("Current agent action: [%d] = %d\n", agent.currentAction[0], agent.currentAction.conflated());
//  printf("Current environment observation: [%f] => %f\n", env.currentObservation[0], env.currentObservation.reward);
#endif

  return 0;
}
示例#5
0
int main() {
  DummyAgent agent(DIM_ACTIONS, N_ACTIONS);
  MapperBasicEnvironment env(DIM_OBSERVATIONS, DIM_ACTIONS, "agent", PEER_DEVICE_NAME, true, 9000);
  Qualia qualia(&agent, &env);

  qualia.init();
  qualia.start();

  for (;;) {
//  for (int i=0; i<10; i++) {
    qualia.step();
#if is_computer()
    printf("Current agent action: %d\n", (int)agent.currentAction.conflated());
    printf("Current environment observation: %f %f\n", (double)env.currentObservation.observations[0], (double)env.currentObservation.observations[1]);
#endif
  }

//  if (myAlloc.nLeaks)
//    printf("WARNING: Static Allocator has leaks: %d\n", myAlloc.nLeaks);

  return 0;
}
示例#6
0
文件: dummy.cpp 项目: malloch/qualia
void testQLearning(Environment& env, QLearningAgent& agent) {
  RLQualia qualia(&agent, &env);

  Q_MESSAGE("Starting...");
#if is_computer()
  Q_MESSAGE("(this is a computer)");
#endif

  qualia.init();
  agent.isLearning = false;
  qualia.episode(1000);
  Q_MESSAGE("First episode: no learning");
  Q_MESSAGE("Mean reward: %f (%f / %d)", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps);
//    Q_MESSAGE("Current agent action: [%d %d] = %d", agent.currentAction[0], agent.currentAction[1], agent.currentAction.conflated());
//    Q_MESSAGE("Current environment observation: [%f %f] => %f", env.currentObservation[0], env.currentObservation[1], env.currentObservation.reward);

  for (int i=0; i<10; i++) {
    qualia.episode(1000);
    agent.isLearning = true;

    Q_MESSAGE("# %d ", qualia.nEpisodes);
    Q_MESSAGE("Mean reward: %f (%f / %d)", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps);
//    Q_MESSAGE("Current agent action: [%d %d] = %d", agent.currentAction[0], agent.currentAction[1], agent.currentAction.conflated());
//    Q_MESSAGE("Current environment observation: [%f %f] => %f", env.currentObservation[0], env.currentObservation[1], env.currentObservation.reward);
  }

  // Put epsilon on ice.
  ((QLearningEGreedyPolicy *)agent.policy)->epsilon = 0;
  qualia.episode(1000);

  Q_MESSAGE("Final episode (without random moves)");
  Q_MESSAGE("Mean reward: %f (%f/%d)", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps);
  Q_MESSAGE("Current agent action: [%d] = %d", agent.currentAction[0], agent.currentAction.conflated());
//  Q_MESSAGE("Current environment observation: [%f] => %f", env.currentObservation[0], env.currentObservation.reward);

}