Action* SSiPPSolver::solveLabeled(State* s0) { beginTime_ = std::chrono::high_resolution_clock::now(); while (!s0->checkBits(mdplib::SOLVED_SSiPP)) { State* currentState = s0; list<State*> visited; while (!currentState->checkBits(mdplib::SOLVED_SSiPP)) { visited.push_front(currentState); if (problem_->goal(currentState)) break; // Constructing short-sighted SSP StateSet reachableStates, tipStates; if (useTrajProbabilities_) { getReachableStatesTrajectoryProbs( problem_, currentState, reachableStates, tipStates, rho_); } else { reachableStates.insert(currentState); getReachableStates(problem_, reachableStates, tipStates, t_); } WrapperProblem wrapper(problem_); wrapper.overrideStates(&reachableStates); wrapper.overrideGoals(&tipStates); // Solving the short-sighted SSP optimalSolver(&wrapper, currentState); if (currentState->deadEnd()) break; // Simulate best action currentState = randomSuccessor(problem_, currentState, greedyAction(problem_, currentState)); wrapper.cleanup(); // Return if it ran out of time if (ranOutOfTime()) { return greedyAction(problem_, s0); } } while (!visited.empty()) { currentState = visited.front(); visited.pop_front(); if (!checkSolved(currentState)) break; } } return greedyAction(problem_, s0); }
double residual(mlcore::Problem* problem, mlcore::State* s) { mlcore::Action* bestAction = greedyAction(problem, s); if (bestAction == nullptr) return 0.0; // state is a dead-end, nothing to do here double res = qvalue(problem, s, bestAction) - s->cost(); return fabs(res); }
Action* SoftFLARESSolver::solve(State* s0) { int trials = 0; beginTime_ = std::chrono::high_resolution_clock::now(); while (moreTrials(s0, trials, beginTime_)) { trial(s0); trials++; // dprint(s0->residualDistance(), noLabeling_); // dprint("****************** trial ended", trials); } return greedyAction(problem_, s0); }
mlcore::Action* LRTDPSolver::solve(mlcore::State* s0) { int trials = 0; beginTime_ = std::chrono::high_resolution_clock::now(); while (!s0->checkBits(mdplib::SOLVED) && trials++ < maxTrials_) { trial(s0); if (ranOutOfTime()) { return greedyAction(problem_, s0); } } return s0->bestAction(); }
bool SSiPPSolver::checkSolved(State* s) { std::list<State*> open, closed; State* tmp = s; if (!tmp->checkBits(mdplib::SOLVED_SSiPP)) { open.push_front(s); s->setBits(mdplib::CLOSED_SSiPP); } bool rv = true; while (!open.empty()) { tmp = open.front(); open.pop_front(); closed.push_front(tmp); Action* a = greedyAction(problem_, tmp); tmp->setBestAction(a); if (problem_->goal(tmp)) continue; if (tmp->deadEnd()) { rv = false; continue; } if (residual(problem_, tmp) > epsilon_) { rv = false; } // Return if it ran out of time if (ranOutOfTime()) { return false; } for (Successor su : problem_->transition(tmp, a)) { State* next = su.su_state; if (!next->checkBits(mdplib::SOLVED_SSiPP) && !next->checkBits(mdplib::CLOSED_SSiPP)) { open.push_front(next); next->setBits(mdplib::CLOSED_SSiPP); } } } if (rv) { for (State* sc : closed) { sc->setBits(mdplib::SOLVED_SSiPP); } } else { while (!closed.empty()) { tmp = closed.front(); closed.pop_front(); tmp->clearBits(mdplib::CLOSED_SSiPP); bellmanUpdate(problem_, tmp); } } return rv; }
bool LRTDPSolver::checkSolved(mlcore::State* s) { std::list<mlcore::State*> open, closed; mlcore::State* tmp = s; if (!tmp->checkBits(mdplib::SOLVED)) { open.push_front(s); } bool rv = true; while (!open.empty()) { tmp = open.front(); open.pop_front(); mlcore::Action* a = greedyAction(problem_, tmp); if (problem_->goal(tmp) || tmp->deadEnd()) continue; closed.push_front(tmp); tmp->setBits(mdplib::CLOSED); if (residual(problem_, tmp) > epsilon_) rv = false; for (mlcore::Successor su : problem_->transition(tmp, a)) { mlcore::State* next = su.su_state; if (!next->checkBits(mdplib::SOLVED) && !next->checkBits(mdplib::CLOSED)) { open.push_front(next); } } } if (rv) { for (mlcore::State* sc : closed) { sc->setBits(mdplib::SOLVED); sc->clearBits(mdplib::CLOSED); } } else { while (!closed.empty()) { tmp = closed.front(); closed.pop_front(); tmp->clearBits(mdplib::CLOSED); bellmanUpdate(problem_, tmp); } } return rv; }
double sampleTrial(mlcore::Problem* problem, mlcore::State* s) { mlcore::State* tmp = s; double discount = 1.0; double cost = 0.0; while (!problem->goal(tmp)) { mlcore::Action* a = greedyAction(problem, tmp); double discountedCost = discount * problem->cost(tmp, a); if (discountedCost < 1.0-6) break; // stop to avoid infinite loop cost += discountedCost; tmp = randomSuccessor(problem, tmp, a); discount *= problem->gamma(); } return cost; }
void SoftFLARESSolver::trial(State* s) { State* currentState = s; list<State*> visited; double accumulated_cost = 0.0; while (true) { // if (problem_->goal(currentState)) // dprint("GOAL!!", accumulated_cost); if (problem_->goal(currentState)) break; visited.push_front(currentState); bellmanUpdate(problem_, currentState); if (currentState->deadEnd() || accumulated_cost >= mdplib::dead_end_cost) break; if (ranOutOfTime()) return; mlcore::Action* greedy_action = greedyAction(problem_, currentState); accumulated_cost += problem_->cost(currentState, greedy_action); currentState = noLabeling_ ? randomSuccessor(problem_, currentState, greedy_action): sampleSuccessor(currentState, greedy_action); if (currentState == nullptr) { break; } } if (noLabeling_) return; dprint("COST ", accumulated_cost); while (!visited.empty()) { currentState = visited.front(); visited.pop_front(); computeResidualDistances(currentState); if (!labeledSolved(currentState)) break; } }
void getBestPartialSolutionGraph(mlcore::Problem* problem, mlcore::State* initialState, mlcore::StateSet& bpsg) { std::list<mlcore::State *> stateStack; stateStack.push_front(initialState); while (!stateStack.empty()) { mlcore::State* state = stateStack.front(); stateStack.pop_front(); if (!bpsg.insert(state).second) continue; if (problem->goal(state)) continue; mlcore::Action* a = greedyAction(problem, state); for (mlcore::Successor sccr : problem->transition(state, a)) { stateStack.push_front(sccr.su_state); } } }
void SoftFLARESSolver::computeResidualDistances(State* s) { list<State*> open, closed; State* currentState = s; if (!currentState->checkBits(mdplib::SOLVED)) { open.push_front(currentState); currentState->depth(0.0); } bool should_label = true; bool subgraphWithinSearchHorizon = true; double effectiveHorizon = sampleEffectiveHorizon(); while (!open.empty()) { State* currentState = open.front(); open.pop_front(); double depth = currentState->depth(); if (depth > 2 * effectiveHorizon) { subgraphWithinSearchHorizon = false; continue; } if (problem_->goal(currentState)) continue; // if (ranOutOfTime()) { // dprint("ran out of time", closed.size()); // } if (ranOutOfTime()) return; closed.push_front(currentState); currentState->setBits(mdplib::CLOSED); Action* a = greedyAction(problem_, currentState); if (currentState->deadEnd()) continue; if (residual(problem_, currentState) > epsilon_) { should_label = false; } double maxProbSuccessor = 0.0; if (distanceFunction_ == kPlaus) { for (Successor& su : problem_->transition(currentState, a)) { maxProbSuccessor = std::max(maxProbSuccessor, su.su_prob); } } for (Successor& su : problem_->transition(currentState, a)) { State* next = su.su_state; if ( (!labeledSolved(next) || effectiveHorizon == kInfiniteDistance_) && !next->checkBits(mdplib::CLOSED)) { open.push_front(next); next->depth(computeNewDepth(su, depth, maxProbSuccessor)); } else if (!(next->checkBits(mdplib::SOLVED) || next->checkBits(mdplib::CLOSED))) { // If this happens, the state was skipped only due to // soft-labeling. Thus, there must be parts of the subgraph // that are outside of the horizon subgraphWithinSearchHorizon = false; } } } // dprint("closed ", closed.size()); if (should_label) { for (mlcore::State* state : closed) { state->clearBits(mdplib::CLOSED); if (subgraphWithinSearchHorizon) { state->setBits(mdplib::SOLVED); } else { assert(effectiveHorizon != kInfiniteDistance_); double depth = state->depth(); if (depth <= effectiveHorizon) { state->residualDistance(effectiveHorizon - depth); dprint(state->residualDistance()); } } } } else { while (!closed.empty()) { State* state = closed.front(); closed.pop_front(); state->clearBits(mdplib::CLOSED); bellmanUpdate(problem_, state); } } }
bool LRTDPSolver::checkSolved(mlcore::State* s) { std::list<mlcore::State*> open, closed; mlcore::State* tmp = s; if (!tmp->checkBits(mdplib::SOLVED)) { open.push_front(s); } bool rv = true; while (!open.empty()) { tmp = open.front(); open.pop_front(); if (problem_->goal(tmp)) continue; mlcore::Action* a = greedyAction(problem_, tmp); if (tmp->deadEnd()) continue; if (ranOutOfTime()) return false; closed.push_front(tmp); tmp->setBits(mdplib::CLOSED); if (residual(problem_, tmp) > epsilon_) { rv = false; // The original paper includes the following line, but the algorithm // seems to work significantly faster without it /* continue; */ } for (mlcore::Successor su : problem_->transition(tmp, a)) { mlcore::State* next = su.su_state; if (!next->checkBits(mdplib::SOLVED) && !next->checkBits(mdplib::CLOSED)) { open.push_front(next); } } } if (rv) { for (mlcore::State* sc : closed) { sc->setBits(mdplib::SOLVED); sc->clearBits(mdplib::CLOSED); sc->setBestAction(greedyAction(problem_, sc)); } } else { while (!closed.empty()) { tmp = closed.front(); closed.pop_front(); tmp->clearBits(mdplib::CLOSED); bellmanUpdate(problem_, tmp); if (ranOutOfTime()) return false; } } return rv; }
bool HDPSolver::dfs(mlcore::State* s, double plaus) { auto curTime = chrono::high_resolution_clock::now(); auto duration = chrono::duration_cast<chrono::milliseconds>( curTime - beginTime_).count(); if (maxTime_ > -1 && duration > maxTime_) return false; if (plaus > minPlaus_) { return false; } if (s->checkBits(mdplib::SOLVED) || problem_->goal(s) || s->deadEnd()) { s->setBits(mdplib::SOLVED); solvedStates_.insert(s); return false; } bool neededUpdate = false; if (residual(problem_, s) > epsilon_) { neededUpdate = true; // mini-gpt adds this loop, but this actually worsens convergence. // do { // bellmanUpdate(problem_, s); // } while (residual(problem_, s) > epsilon_); // return true; } inStack_.insert(s); stateStack_.push_back(s); indices_[s] = index_; low_[s] = index_; index_++; Action* a = greedyAction(problem_, s); if (s->deadEnd()) { return false; // state is a dead-end, nothing to do } list<Successor> successors = problem_->transition(s, a); if (minPlaus_ != INT_MAX) computeKappa(successors, kappaList_); int i = 0; for (auto const & successor : successors) { State* next = successor.su_state; if (indices_.count(next) == 0) { neededUpdate |= dfs(next, plaus + kappaList_.at(i)); low_[s] = std::min(low_[s], low_[next]); } else if (inStack_.count(next) > 0) { // State is in the current connected component stack. low_[s] = std::min(low_[s], indices_[next]); } i++; } if (neededUpdate) { bellmanUpdate(problem_, s); // same as above (mini-gpt code). // do { // bellmanUpdate(problem_, s); // } while (residual(problem_, s) > epsilon_); //return true; } else if (indices_[s] == low_[s]) { // State s is the root of a connected component. while (true) { State* currentState = stateStack_.back(); stateStack_.pop_back(); inStack_.erase(currentState); currentState->setBits(mdplib::SOLVED); solvedStates_.insert(currentState); if (currentState == s) break; } } return neededUpdate; }