Пример #1
0
Action* SSiPPSolver::solveLabeled(State* s0)
{
    beginTime_ = std::chrono::high_resolution_clock::now();
    while (!s0->checkBits(mdplib::SOLVED_SSiPP)) {
        State* currentState = s0;
        list<State*> visited;
        while (!currentState->checkBits(mdplib::SOLVED_SSiPP)) {
            visited.push_front(currentState);
            if (problem_->goal(currentState))
                break;
            // Constructing short-sighted SSP
            StateSet reachableStates, tipStates;
            if (useTrajProbabilities_) {
                getReachableStatesTrajectoryProbs(
                    problem_, currentState, reachableStates, tipStates, rho_);
            } else {
                reachableStates.insert(currentState);
                getReachableStates(problem_, reachableStates, tipStates, t_);
            }
            WrapperProblem wrapper(problem_);
            wrapper.overrideStates(&reachableStates);
            wrapper.overrideGoals(&tipStates);

            // Solving the short-sighted SSP
            optimalSolver(&wrapper, currentState);
            if (currentState->deadEnd())
                break;
            // Simulate best action
            currentState = randomSuccessor(problem_,
                                           currentState,
                                           greedyAction(problem_,
                                                        currentState));

            wrapper.cleanup();
            // Return if it ran out of time
            if (ranOutOfTime()) {
                return greedyAction(problem_, s0);
            }
        }
        while (!visited.empty()) {
            currentState = visited.front();
            visited.pop_front();
            if (!checkSolved(currentState))
                break;
        }
    }
    return greedyAction(problem_, s0);
}
Пример #2
0
double residual(mlcore::Problem* problem, mlcore::State* s)
{
    mlcore::Action* bestAction = greedyAction(problem, s);
    if (bestAction == nullptr)
        return 0.0; // state is a dead-end, nothing to do here
    double res = qvalue(problem, s, bestAction) - s->cost();
    return fabs(res);
}
Пример #3
0
Action* SoftFLARESSolver::solve(State* s0) {
    int trials = 0;
    beginTime_ = std::chrono::high_resolution_clock::now();
    while (moreTrials(s0, trials, beginTime_)) {
        trial(s0);
        trials++;
//                                                                                dprint(s0->residualDistance(), noLabeling_);
//                                                                                dprint("****************** trial ended", trials);
    }
    return greedyAction(problem_, s0);
}
Пример #4
0
mlcore::Action* LRTDPSolver::solve(mlcore::State* s0)
{
    int trials = 0;
    beginTime_ = std::chrono::high_resolution_clock::now();
    while (!s0->checkBits(mdplib::SOLVED) && trials++ < maxTrials_) {
        trial(s0);
        if (ranOutOfTime()) {
            return greedyAction(problem_, s0);
        }
    }
    return s0->bestAction();
}
Пример #5
0
bool SSiPPSolver::checkSolved(State* s)
{
    std::list<State*> open, closed;

    State* tmp = s;
    if (!tmp->checkBits(mdplib::SOLVED_SSiPP)) {
        open.push_front(s);
        s->setBits(mdplib::CLOSED_SSiPP);
    }
    bool rv = true;
    while (!open.empty()) {
        tmp = open.front();
        open.pop_front();
        closed.push_front(tmp);
        Action* a = greedyAction(problem_, tmp);
        tmp->setBestAction(a);
        if (problem_->goal(tmp))
            continue;
        if (tmp->deadEnd()) {
            rv = false;
            continue;
        }
        if (residual(problem_, tmp) > epsilon_) {
            rv = false;
        }
        // Return if it ran out of time
        if (ranOutOfTime()) {
            return false;
        }
        for (Successor su : problem_->transition(tmp, a)) {
            State* next = su.su_state;
            if (!next->checkBits(mdplib::SOLVED_SSiPP) &&
                !next->checkBits(mdplib::CLOSED_SSiPP)) {
                open.push_front(next);
                next->setBits(mdplib::CLOSED_SSiPP);
            }
        }
    }
    if (rv) {
        for (State* sc : closed) {
            sc->setBits(mdplib::SOLVED_SSiPP);
        }
    } else {
        while (!closed.empty()) {
            tmp = closed.front();
            closed.pop_front();
            tmp->clearBits(mdplib::CLOSED_SSiPP);
            bellmanUpdate(problem_, tmp);
        }
    }
    return rv;
}
Пример #6
0
bool LRTDPSolver::checkSolved(mlcore::State* s)
{
    std::list<mlcore::State*> open, closed;

    mlcore::State* tmp = s;
    if (!tmp->checkBits(mdplib::SOLVED)) {
        open.push_front(s);
    }

    bool rv = true;
    while (!open.empty()) {
        tmp = open.front();
        open.pop_front();

        mlcore::Action* a = greedyAction(problem_, tmp);
        if (problem_->goal(tmp) || tmp->deadEnd())
            continue;

        closed.push_front(tmp);
        tmp->setBits(mdplib::CLOSED);

        if (residual(problem_, tmp) > epsilon_)
            rv = false;

        for (mlcore::Successor su : problem_->transition(tmp, a)) {
            mlcore::State* next = su.su_state;
            if (!next->checkBits(mdplib::SOLVED) &&
                !next->checkBits(mdplib::CLOSED)) {
                open.push_front(next);
            }
        }
    }

    if (rv) {
        for (mlcore::State* sc : closed) {
            sc->setBits(mdplib::SOLVED);
            sc->clearBits(mdplib::CLOSED);
        }
    } else {
        while (!closed.empty()) {
            tmp = closed.front();
            closed.pop_front();
            tmp->clearBits(mdplib::CLOSED);
            bellmanUpdate(problem_, tmp);
        }
    }

    return rv;
}
Пример #7
0
double sampleTrial(mlcore::Problem* problem, mlcore::State* s)
{
    mlcore::State* tmp = s;
    double discount = 1.0;
    double cost = 0.0;
    while (!problem->goal(tmp)) {
        mlcore::Action* a = greedyAction(problem, tmp);
        double discountedCost = discount * problem->cost(tmp, a);
        if (discountedCost < 1.0-6)
            break;  // stop  to avoid infinite loop
        cost += discountedCost;
        tmp = randomSuccessor(problem, tmp, a);
        discount *= problem->gamma();
    }
    return cost;
}
Пример #8
0
void SoftFLARESSolver::trial(State* s) {
    State* currentState = s;
    list<State*> visited;
    double accumulated_cost = 0.0;

    while (true) {
//                                                                                if (problem_->goal(currentState))
//                                                                                    dprint("GOAL!!", accumulated_cost);
        if (problem_->goal(currentState))
            break;

        visited.push_front(currentState);
        bellmanUpdate(problem_, currentState);

        if (currentState->deadEnd()
                || accumulated_cost >= mdplib::dead_end_cost)
            break;

        if (ranOutOfTime())
            return;

        mlcore::Action* greedy_action = greedyAction(problem_, currentState);
        accumulated_cost += problem_->cost(currentState, greedy_action);

        currentState = noLabeling_ ?
            randomSuccessor(problem_, currentState, greedy_action):
            sampleSuccessor(currentState, greedy_action);

        if (currentState == nullptr) {
            break;
        }
    }

    if (noLabeling_) return;
                                                                                dprint("COST ", accumulated_cost);

    while (!visited.empty()) {
        currentState = visited.front();
        visited.pop_front();
        computeResidualDistances(currentState);

        if (!labeledSolved(currentState))
            break;
    }
}
Пример #9
0
void getBestPartialSolutionGraph(mlcore::Problem* problem,
                                 mlcore::State* initialState,
                                 mlcore::StateSet& bpsg)
{
    std::list<mlcore::State *> stateStack;
    stateStack.push_front(initialState);
    while (!stateStack.empty()) {
        mlcore::State* state = stateStack.front();
        stateStack.pop_front();
        if (!bpsg.insert(state).second)
            continue;
        if (problem->goal(state))
            continue;
        mlcore::Action* a = greedyAction(problem, state);
        for (mlcore::Successor sccr : problem->transition(state, a)) {
            stateStack.push_front(sccr.su_state);
        }
    }
}
Пример #10
0
void SoftFLARESSolver::computeResidualDistances(State* s) {
    list<State*> open, closed;

    State* currentState = s;
    if (!currentState->checkBits(mdplib::SOLVED)) {
        open.push_front(currentState);
        currentState->depth(0.0);
    }

    bool should_label = true;
    bool subgraphWithinSearchHorizon = true;
    double effectiveHorizon = sampleEffectiveHorizon();

    while (!open.empty()) {
        State* currentState = open.front();
        open.pop_front();
        double depth = currentState->depth();
        if (depth > 2 * effectiveHorizon) {
            subgraphWithinSearchHorizon = false;
            continue;
        }

        if (problem_->goal(currentState))
            continue;
//                                                                                if (ranOutOfTime()) {
//                                                                                    dprint("ran out of time", closed.size());
//                                                                                }
        if (ranOutOfTime())
            return;

        closed.push_front(currentState);
        currentState->setBits(mdplib::CLOSED);

        Action* a = greedyAction(problem_, currentState);

        if (currentState->deadEnd())
            continue;

        if (residual(problem_, currentState) > epsilon_) {
            should_label = false;
        }

        double maxProbSuccessor = 0.0;
        if (distanceFunction_ == kPlaus) {
            for (Successor& su : problem_->transition(currentState, a)) {
                maxProbSuccessor = std::max(maxProbSuccessor, su.su_prob);
            }
        }
        for (Successor& su : problem_->transition(currentState, a)) {
            State* next = su.su_state;
            if ( (!labeledSolved(next)
                        || effectiveHorizon == kInfiniteDistance_)
                    && !next->checkBits(mdplib::CLOSED)) {
                open.push_front(next);
                next->depth(computeNewDepth(su, depth, maxProbSuccessor));
            } else if (!(next->checkBits(mdplib::SOLVED)
                            || next->checkBits(mdplib::CLOSED))) {
                // If this happens, the state was skipped only due to
                // soft-labeling. Thus, there must be parts of the subgraph
                // that are outside of the horizon
                subgraphWithinSearchHorizon = false;
            }
        }
    }
//                                                                                dprint("closed ", closed.size());

    if (should_label) {
        for (mlcore::State* state : closed) {
            state->clearBits(mdplib::CLOSED);
            if (subgraphWithinSearchHorizon) {
                state->setBits(mdplib::SOLVED);
            } else {
                                                                                assert(effectiveHorizon != kInfiniteDistance_);
                double depth = state->depth();
                if (depth <= effectiveHorizon) {
                    state->residualDistance(effectiveHorizon - depth);
                                                                                dprint(state->residualDistance());
                }
            }
        }
    } else {
        while (!closed.empty()) {
            State* state = closed.front();
            closed.pop_front();
            state->clearBits(mdplib::CLOSED);
            bellmanUpdate(problem_, state);
        }
    }
}
Пример #11
0
bool LRTDPSolver::checkSolved(mlcore::State* s)
{
    std::list<mlcore::State*> open, closed;

    mlcore::State* tmp = s;
    if (!tmp->checkBits(mdplib::SOLVED)) {
        open.push_front(s);
    }

    bool rv = true;
    while (!open.empty()) {
        tmp = open.front();
        open.pop_front();

        if (problem_->goal(tmp))
            continue;

        mlcore::Action* a = greedyAction(problem_, tmp);

        if (tmp->deadEnd())
            continue;

        if (ranOutOfTime())
            return false;

        closed.push_front(tmp);
        tmp->setBits(mdplib::CLOSED);

        if (residual(problem_, tmp) > epsilon_) {
            rv = false;
            // The original paper includes the following line, but the algorithm
            // seems to work significantly faster without it
            /*  continue; */
        }

        for (mlcore::Successor su : problem_->transition(tmp, a)) {
            mlcore::State* next = su.su_state;
            if (!next->checkBits(mdplib::SOLVED) &&
                !next->checkBits(mdplib::CLOSED)) {
                open.push_front(next);
            }
        }
    }

    if (rv) {
        for (mlcore::State* sc : closed) {
            sc->setBits(mdplib::SOLVED);
            sc->clearBits(mdplib::CLOSED);
            sc->setBestAction(greedyAction(problem_, sc));
        }
    } else {
        while (!closed.empty()) {
            tmp = closed.front();
            closed.pop_front();
            tmp->clearBits(mdplib::CLOSED);
            bellmanUpdate(problem_, tmp);
            if (ranOutOfTime())
                return false;
            }
    }

    return rv;
}
Пример #12
0
bool HDPSolver::dfs(mlcore::State* s, double plaus)
{
    auto curTime = chrono::high_resolution_clock::now();
    auto duration = chrono::duration_cast<chrono::milliseconds>(
            curTime - beginTime_).count();
    if (maxTime_ > -1 && duration > maxTime_)
        return false;

    if (plaus > minPlaus_) {
        return false;
    }
    if (s->checkBits(mdplib::SOLVED) ||
            problem_->goal(s) ||
            s->deadEnd()) {
        s->setBits(mdplib::SOLVED);
        solvedStates_.insert(s);
        return false;
    }
    bool neededUpdate = false;
    if (residual(problem_, s) > epsilon_) {
        neededUpdate = true;
        // mini-gpt adds this loop, but this actually worsens convergence.
        // do {
        //     bellmanUpdate(problem_, s);
        // } while (residual(problem_, s) > epsilon_);
        // return true;
    }
    inStack_.insert(s);
    stateStack_.push_back(s);
    indices_[s] = index_;
    low_[s] = index_;
    index_++;

    Action* a = greedyAction(problem_, s);

    if (s->deadEnd()) {
        return false;   // state is a dead-end, nothing to do
    }

    list<Successor> successors = problem_->transition(s, a);
    if (minPlaus_ != INT_MAX)
        computeKappa(successors, kappaList_);
    int i = 0;
    for (auto const & successor : successors) {
        State* next = successor.su_state;
        if (indices_.count(next) == 0) {
            neededUpdate |= dfs(next, plaus + kappaList_.at(i));
            low_[s] = std::min(low_[s], low_[next]);
        } else if (inStack_.count(next) > 0) {
            // State is in the current connected component stack.
            low_[s] = std::min(low_[s], indices_[next]);
        }
        i++;
    }
    if (neededUpdate) {
        bellmanUpdate(problem_, s);
        // same as above (mini-gpt code).
        // do {
        //     bellmanUpdate(problem_, s);
        // } while (residual(problem_, s) > epsilon_);
        //return true;
    } else if (indices_[s] == low_[s]) {
        // State s is the root of a connected component.
        while (true) {
            State* currentState = stateStack_.back();
            stateStack_.pop_back();
            inStack_.erase(currentState);
            currentState->setBits(mdplib::SOLVED);
            solvedStates_.insert(currentState);
            if (currentState == s)
                break;
        }
    }
    return neededUpdate;
}