//unsigned char buffer[STATIC_ALLOCATOR_SIZE]; //StaticAllocator myAlloc(buffer, STATIC_ALLOCATOR_SIZE); int main() { //Alloc::init(&myAlloc); // DummyAgent agent; QLearningEGreedyPolicy egreedy(0.1f); NeuralNetwork net(DIM_OBSERVATIONS + DIM_ACTIONS, N_HIDDEN, 1, 0.1f); QLearningAgent agent(&net, DIM_OBSERVATIONS, DIM_ACTIONS, N_ACTIONS, 1.0f, 0.1f, &egreedy, false); // lambda = 1.0 => no history LibMapperEnvironment env; RLQualia qualia(&agent, &env); qualia.init(); qualia.start(); for (;;) { // for (int i=0; i<10; i++) { qualia.step(); #if is_computer() printf("Current agent action: %d\n", agent.currentAction.conflated()); printf("Current environment observation: %f %f\n", (double)env.currentObservation.observations[0], (double)env.currentObservation.observations[1]); #endif } // if (myAlloc.nLeaks) // printf("WARNING: Static Allocator has leaks: %d\n", myAlloc.nLeaks); return 0; }
void testMem() { DummyAgent agent; DummyEnvironment env; Qualia qualia(&agent, &env); qualia.init(); for (int i=0; i<10; i++) qualia.episode(10); }
void testDummy() { DummyAgent agent; DummyEnvironment env; Qualia qualia(&agent, &env); qualia.init(); for (int i=0; i<10; i++) { qualia.episode(10); Q_MESSAGE("Current agent action: %d\n", agent.currentAction.conflated()); Q_MESSAGE("Current environment observation: %f %f\n", (double)env.currentObservation.observations[0], (double)env.currentObservation.observations[1]); } }
int main(int argc, char** argv) { srand(RANDOM_SEED); NeuralNetwork net(GLOW_ENVIRONMENT_OBSERVATIONS_DIM + GLOW_AGENT_ACTIONS_DIM, N_HIDDEN, 1, LEARNING_RATE); //QLearningEGreedyPolicy policy(EPSILON); QLearningSoftmaxPolicy policy(0.5f, EPSILON); QLearningAgent agent(&net, GLOW_ENVIRONMENT_OBSERVATIONS_DIM, GLOW_AGENT_ACTIONS_DIM, GLOW_AGENT_N_ACTIONS, 1.0f, 0.1f, &policy, false); // lambda = 1.0 => no history SimpleGlowEnvironment env(argv[1], LED_OUT, PHOTO_AIN); // BigDummyReward rew; // DummyRewardEnvironment env(DUMMY_ENVIRONMENT_OBSERVATIONS_DIM, &rew); RLQualia qualia(&agent, &env); printf("Starting...\n"); qualia.init(); for (int i=0; i<10; i++) { printf("# %d ", qualia.nEpisodes); qualia.episode(1000); #if is_computer() printf("Mean reward: %f (%f / %d)\n", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps); // printf("Current agent action: [%d %d] = %d\n", agent.currentAction[0], agent.currentAction[1], agent.currentAction.conflated()); // printf("Current environment observation: [%f %f] => %f\n", env.currentObservation[0], env.currentObservation[1], env.currentObservation.reward); #endif } // Put epsilon on ice. printf("Final episode (without random moves)\n"); ((QLearningEGreedyPolicy *)agent.policy)->epsilon = 0; qualia.episode(1000); #if is_computer() printf("Mean reward: %f (%f/%d)\n", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps); printf("Current agent action: [%d] = %d\n", agent.currentAction[0], agent.currentAction.conflated()); // printf("Current environment observation: [%f] => %f\n", env.currentObservation[0], env.currentObservation.reward); #endif return 0; }
int main() { DummyAgent agent(DIM_ACTIONS, N_ACTIONS); MapperBasicEnvironment env(DIM_OBSERVATIONS, DIM_ACTIONS, "agent", PEER_DEVICE_NAME, true, 9000); Qualia qualia(&agent, &env); qualia.init(); qualia.start(); for (;;) { // for (int i=0; i<10; i++) { qualia.step(); #if is_computer() printf("Current agent action: %d\n", (int)agent.currentAction.conflated()); printf("Current environment observation: %f %f\n", (double)env.currentObservation.observations[0], (double)env.currentObservation.observations[1]); #endif } // if (myAlloc.nLeaks) // printf("WARNING: Static Allocator has leaks: %d\n", myAlloc.nLeaks); return 0; }
void testQLearning(Environment& env, QLearningAgent& agent) { RLQualia qualia(&agent, &env); Q_MESSAGE("Starting..."); #if is_computer() Q_MESSAGE("(this is a computer)"); #endif qualia.init(); agent.isLearning = false; qualia.episode(1000); Q_MESSAGE("First episode: no learning"); Q_MESSAGE("Mean reward: %f (%f / %d)", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps); // Q_MESSAGE("Current agent action: [%d %d] = %d", agent.currentAction[0], agent.currentAction[1], agent.currentAction.conflated()); // Q_MESSAGE("Current environment observation: [%f %f] => %f", env.currentObservation[0], env.currentObservation[1], env.currentObservation.reward); for (int i=0; i<10; i++) { qualia.episode(1000); agent.isLearning = true; Q_MESSAGE("# %d ", qualia.nEpisodes); Q_MESSAGE("Mean reward: %f (%f / %d)", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps); // Q_MESSAGE("Current agent action: [%d %d] = %d", agent.currentAction[0], agent.currentAction[1], agent.currentAction.conflated()); // Q_MESSAGE("Current environment observation: [%f %f] => %f", env.currentObservation[0], env.currentObservation[1], env.currentObservation.reward); } // Put epsilon on ice. ((QLearningEGreedyPolicy *)agent.policy)->epsilon = 0; qualia.episode(1000); Q_MESSAGE("Final episode (without random moves)"); Q_MESSAGE("Mean reward: %f (%f/%d)", (double) qualia.totalReward / qualia.nSteps, qualia.totalReward, qualia.nSteps); Q_MESSAGE("Current agent action: [%d] = %d", agent.currentAction[0], agent.currentAction.conflated()); // Q_MESSAGE("Current environment observation: [%f] => %f", env.currentObservation[0], env.currentObservation.reward); }