virtual stats local_run_l(int index, bool learn) { DAction* ac = fac; double total_reward = 0; DAction* b = new DAction(prob->getActions(), (int) false ); do { prob->apply(*ac); total_reward += prob->reward(); if(learn) { if(!prob->restrictedAction().restricted) ac = this->computeNextAction(getState(prob), prob->reward(), prob->goal()); else { agent->had_choosed(getState(prob), *b, prob->reward(), false, prob->goal()); ac = b; } // ac = this->computeNextAction(getState(prob), prob->reward(), prob->goal()); } else { if(ac != fac) delete ac; ac = this->agent->decision(getState(prob), false); } } while(!prob->done()); if(!learn) delete ac; delete b; return {prob->step, total_reward, prob->step, index}; }
stats local_run(int index, bool random_init) { prob->init(random_init); agent->startEpisode(getState(prob), *fac); // if(no_learn_knowledge) { local_run_l(index, true); prob->init(random_init); agent->startEpisode(getState(prob), *fac); return local_run_l(index, false); // } // return local_run_l(index, true); }
stats run_best(int index) { prob->init(false); DAction* ac = new DAction(*fac); double total_reward = 0; do { prob->apply(*ac); delete ac; total_reward += prob->reward(); ac = best_policy->decision(getState(prob), false); } while(!prob->done()); delete ac; return {prob->step, total_reward, prob->step, index}; }
void init() { agent = this->createAgent(getState(prob), *fac); agentSet = true; }