void testQLearningDummy() { randomSeed(RANDOM_SEED); NeuralNetwork net(DUMMY_ENVIRONMENT_OBSERVATIONS_DIM + DUMMY_AGENT_ACTIONS_DIM, N_HIDDEN, 1, 0.1f); ActionProperties props(DUMMY_AGENT_ACTIONS_DIM, DUMMY_AGENT_N_ACTIONS); QLearningEGreedyPolicy egreedy(0.1f); QFunction qFunc(&net, DUMMY_ENVIRONMENT_OBSERVATIONS_DIM, &props); QLearningAgent agent(&qFunc, &egreedy, DUMMY_ENVIRONMENT_OBSERVATIONS_DIM, &props, 0.0f, 0.01f, false); // lambda = 0.0 => no history, gamma = 0.01 => opportunistic agent DummyEnvironment env; testQLearning(env, agent); }
Chain *filter(Chain *it, Quantifier qFunc) { Chain *pass = NULL; Chain *fail = NULL; Chain *selector = NULL; while (it != NULL) { selector = fail; if (qFunc(it->value) == True) { selector = pass; } selector = prepend(selector, it->value); it = it->next; } return pass; }
BinaryVote *vote(Chain *it, Quantifier qFunc) { Chain *pass = NULL; Chain *fail = NULL; Chain *selector = NULL; while (it != NULL) { selector = fail; if (qFunc(it->value) == True) { selector = pass; } selector = prepend(selector, it->value); it = it->next; } BinaryVote *bv = newBinaryVote(); bv->pass = pass; bv->fail = fail; return bv; }