Esempio n. 1
0
void LRTDPSolver::trial(mlcore::State* s)
{
    mlcore::State* tmp = s;
    std::list<mlcore::State*> visited;
    while (!tmp->checkBits(mdplib::SOLVED)) {
        if (problem_->goal(tmp))
            break;

        visited.push_front(tmp);

        bellmanUpdate(problem_, tmp);

        if (tmp->deadEnd())
            break;

        tmp = randomSuccessor(problem_, tmp, tmp->bestAction());
    }

    while (!visited.empty()) {
        tmp = visited.front();
        visited.pop_front();
        if (!checkSolved(tmp))
            break;
    }
}
Esempio n. 2
0
  double playout(Node *startNode) {
    GameRules* rules = GameRules::instance();

    unsigned curPlayerIndex = startNode->PlayerIndex();
    State *curState = startNode->GetState();

    vector<uptr<State>> playedStates;
    while (!rules->IsTerminalState(*curState)) {
      uptr<State> nextState = randomSuccessor(curState);
      curPlayerIndex = 1 - curPlayerIndex;

      curState = nextState.get();
      playedStates.push_back(move(nextState));
    }

    // Account for the fact that the winner may not be the player of the original startNode.
    // The result of this function should be the utility of the playout for the player owning
    // the startNode.
    double utilFlip = startNode->PlayerIndex() == curPlayerIndex ? 1.0 : -1.0;

    if (rules->IsWin(*curState)) {
      return 1.0 * utilFlip;
    } else if (rules->IsLoss(*curState)) {
      return -1.0 * utilFlip;
    } else {
      return 0.0;
    }
  }
Esempio n. 3
0
void LRTDPSolver::trial(mlcore::State* s) {
    mlcore::State* tmp = s;
    std::list<mlcore::State*> visited;
    double accumulated_cost = 0.0;
    while (!tmp->checkBits(mdplib::SOLVED)) {
        if (problem_->goal(tmp) || accumulated_cost > mdplib::dead_end_cost)
            break;

        visited.push_front(tmp);

        bellmanUpdate(problem_, tmp);

        if (tmp->deadEnd())
            break;

        accumulated_cost += problem_->cost(tmp, tmp->bestAction());
        tmp = randomSuccessor(problem_, tmp, tmp->bestAction());
    }

    if (dont_label_)
        return;

    while (!visited.empty()) {
        tmp = visited.front();
        visited.pop_front();
        bool solved = checkSolved(tmp);
        if (!solved) break;
    }
}
Esempio n. 4
0
double RFFSolver::failProb(mlcore::State* s, int N)
{
    for (mlcore::State* s : terminalStates_)
        probabilitiesTerminals_[s] = 0.0;
    double totalProbabilityTerminals = 0.0;
    double delta = 1.0 / N;
    for (int i = 0; i < N; i++) {
        mlcore::State* currentState = s;
        while (!problem_->goal(currentState) &&
               terminalStates_.count(currentState) == 0) {
            if (currentState->deadEnd()) {
                // Treat dead-ends as goals, otherwise this method
                // might loop endlessly when there are unavoidable dead-ends
                break;
            }
            currentState = randomSuccessor(problem_,
                                           currentState,
                                           currentState->bestAction());
        }
        if (terminalStates_.count(currentState) > 0) {
            probabilitiesTerminals_[s] += delta;
            totalProbabilityTerminals += delta;
        }
    }
    return totalProbabilityTerminals;
}
Esempio n. 5
0
Action* SSiPPSolver::solveOriginal(State* s0)
{
    beginTime_ = std::chrono::high_resolution_clock::now();
    if (maxTime_ > -1) {
        maxTrials_ = 10000000;
    }
    for (int i = 0; i < maxTrials_; i++) {
        mlcore::State* currentState = s0;
        double accumulated_cost = 0.0;
        while (!problem_->goal(currentState)
                && accumulated_cost < mdplib::dead_end_cost) {
            // Creating the short-sighted SSP
            StateSet reachableStates, tipStates;
            if (useTrajProbabilities_) {
                getReachableStatesTrajectoryProbs(
                    problem_, currentState, reachableStates, tipStates, rho_);
            } else {
                reachableStates.insert(currentState);
                getReachableStates(problem_, reachableStates, tipStates, t_);
            }
            // Solving the short-sighted SSP
            WrapperProblem* wrapper = new WrapperProblem(problem_);
            wrapper->setNewInitialState(currentState);
            wrapper->overrideStates(&reachableStates);
            wrapper->overrideGoals(&tipStates);
            VISolver vi(wrapper, maxTrials_);
            // Adjusting maximum planning time for VI
            if (maxTime_ > -1) {
                auto endTime = std::chrono::high_resolution_clock::now();
                auto timeElapsed = std::chrono::duration_cast<
                    std::chrono::milliseconds>(endTime - beginTime_).count();
                vi.maxPlanningTime(std::max(0, maxTime_ - (int) timeElapsed));
            }
            vi.solve();
            if (currentState->deadEnd() || ranOutOfTime()) {
                wrapper->cleanup();
                delete wrapper;
                break;
            }

            // Execute the best action found for the current state.
            Action* action = currentState->bestAction();
            accumulated_cost += problem_->cost(currentState, action);
            currentState = randomSuccessor(problem_, currentState, action);
            wrapper->cleanup();
            delete wrapper;
        }
        if (ranOutOfTime()) {
            break;
        }
    }
    return s0->bestAction();
}
Esempio n. 6
0
Action* SSiPPSolver::solveLabeled(State* s0)
{
    beginTime_ = std::chrono::high_resolution_clock::now();
    while (!s0->checkBits(mdplib::SOLVED_SSiPP)) {
        State* currentState = s0;
        list<State*> visited;
        while (!currentState->checkBits(mdplib::SOLVED_SSiPP)) {
            visited.push_front(currentState);
            if (problem_->goal(currentState))
                break;
            // Constructing short-sighted SSP
            StateSet reachableStates, tipStates;
            if (useTrajProbabilities_) {
                getReachableStatesTrajectoryProbs(
                    problem_, currentState, reachableStates, tipStates, rho_);
            } else {
                reachableStates.insert(currentState);
                getReachableStates(problem_, reachableStates, tipStates, t_);
            }
            WrapperProblem wrapper(problem_);
            wrapper.overrideStates(&reachableStates);
            wrapper.overrideGoals(&tipStates);

            // Solving the short-sighted SSP
            optimalSolver(&wrapper, currentState);
            if (currentState->deadEnd())
                break;
            // Simulate best action
            currentState = randomSuccessor(problem_,
                                           currentState,
                                           greedyAction(problem_,
                                                        currentState));

            wrapper.cleanup();
            // Return if it ran out of time
            if (ranOutOfTime()) {
                return greedyAction(problem_, s0);
            }
        }
        while (!visited.empty()) {
            currentState = visited.front();
            visited.pop_front();
            if (!checkSolved(currentState))
                break;
        }
    }
    return greedyAction(problem_, s0);
}
Esempio n. 7
0
double sampleTrial(mlcore::Problem* problem, mlcore::State* s)
{
    mlcore::State* tmp = s;
    double discount = 1.0;
    double cost = 0.0;
    while (!problem->goal(tmp)) {
        mlcore::Action* a = greedyAction(problem, tmp);
        double discountedCost = discount * problem->cost(tmp, a);
        if (discountedCost < 1.0-6)
            break;  // stop  to avoid infinite loop
        cost += discountedCost;
        tmp = randomSuccessor(problem, tmp, a);
        discount *= problem->gamma();
    }
    return cost;
}
Esempio n. 8
0
void SoftFLARESSolver::trial(State* s) {
    State* currentState = s;
    list<State*> visited;
    double accumulated_cost = 0.0;

    while (true) {
//                                                                                if (problem_->goal(currentState))
//                                                                                    dprint("GOAL!!", accumulated_cost);
        if (problem_->goal(currentState))
            break;

        visited.push_front(currentState);
        bellmanUpdate(problem_, currentState);

        if (currentState->deadEnd()
                || accumulated_cost >= mdplib::dead_end_cost)
            break;

        if (ranOutOfTime())
            return;

        mlcore::Action* greedy_action = greedyAction(problem_, currentState);
        accumulated_cost += problem_->cost(currentState, greedy_action);

        currentState = noLabeling_ ?
            randomSuccessor(problem_, currentState, greedy_action):
            sampleSuccessor(currentState, greedy_action);

        if (currentState == nullptr) {
            break;
        }
    }

    if (noLabeling_) return;
                                                                                dprint("COST ", accumulated_cost);

    while (!visited.empty()) {
        currentState = visited.front();
        visited.pop_front();
        computeResidualDistances(currentState);

        if (!labeledSolved(currentState))
            break;
    }
}