Пример #1
0
    virtual stats local_run_l(int index, bool learn) {
        DAction* ac = fac;
        double total_reward = 0;
        DAction* b = new DAction(prob->getActions(), (int) false );
        do
        {
            prob->apply(*ac);
            total_reward += prob->reward();

            if(learn) {
                if(!prob->restrictedAction().restricted)
                    ac = this->computeNextAction(getState(prob), prob->reward(), prob->goal());
                else {
                    agent->had_choosed(getState(prob), *b, prob->reward(), false, prob->goal());
                    ac = b;
                }
//                 ac = this->computeNextAction(getState(prob), prob->reward(), prob->goal());
            }
            else {
                if(ac != fac)
                    delete ac;
                ac = this->agent->decision(getState(prob), false);
            }
        }
        while(!prob->done());

        if(!learn)
            delete ac;

        delete b;

        return {prob->step, total_reward, prob->step, index};
    }
Пример #2
0
    stats local_run(int index, bool random_init) {
        prob->init(random_init);
        agent->startEpisode(getState(prob), *fac);
//         if(no_learn_knowledge) {
        local_run_l(index, true);
        prob->init(random_init);
        agent->startEpisode(getState(prob), *fac);
        return local_run_l(index, false);
//         }

//         return local_run_l(index, true);
    }
Пример #3
0
    stats run_best(int index) {
        prob->init(false);
        DAction* ac = new DAction(*fac);
        double total_reward = 0;
        do
        {
            prob->apply(*ac);
            delete ac;
            total_reward += prob->reward();

            ac = best_policy->decision(getState(prob), false);
        }
        while(!prob->done());

        delete ac;

        return {prob->step, total_reward, prob->step, index};
    }
Пример #4
0
 void init() {
     agent = this->createAgent(getState(prob), *fac);
     agentSet = true;
 }