void FFReducedModelSolver::lao(mlcore::State* s0) { // This is a stack based implementation of LAO*. // We don't use the existing library implementation because we are going to // solve the reduced states with j=k using FF. StateSet visited; int countExpanded = 0; while (true) { do { visited.clear(); countExpanded = 0; list<mlcore::State*> stateStack; stateStack.push_back(s0); while (!stateStack.empty()) { if (timeHasRunOut(startingPlanningTime_, maxPlanningTime_)) return; mlcore::State* s = stateStack.back(); stateStack.pop_back(); if (!visited.insert(s).second) // state was already visited. continue; if (s->deadEnd() || problem_->goal(s)) continue; int cnt = 0; if (s->bestAction() == nullptr) { // state has never been expanded. this->bellmanUpdate(s); countExpanded++; continue; } else { mlcore::Action* a = s->bestAction(); mlreduced::ReducedState* reducedState = (mlreduced::ReducedState* ) s; for (Successor sccr : problem_->transition(s, a)) { if (!(useFF_ && reducedState->exceptionCount() == 0)) stateStack.push_back(sccr.su_state); } } this->bellmanUpdate(s); } } while (countExpanded != 0); while (true) { visited.clear(); list<mlcore::State*> stateStack; stateStack.push_back(s0); double error = 0.0; while (!stateStack.empty()) { if (timeHasRunOut(startingPlanningTime_, maxPlanningTime_)) return; mlcore::State* s = stateStack.back(); stateStack.pop_back(); if (!visited.insert(s).second) continue; if (s->deadEnd() || problem_->goal(s)) continue; mlcore::Action* prevAction = s->bestAction(); if (prevAction == nullptr) { // if it reaches this point it hasn't converged yet. error = mdplib::dead_end_cost + 1; } else { mlreduced::ReducedState* reducedState = (mlreduced::ReducedState* ) s; for (Successor sccr : problem_->transition(s, prevAction)) { if (!(useFF_ && reducedState->exceptionCount() == 0)) stateStack.push_back(sccr.su_state); } } error = std::max(error, this->bellmanUpdate(s)); if (prevAction != s->bestAction()) { // it hasn't converged because the best action changed. error = mdplib::dead_end_cost + 1; break; } } if (error < epsilon_) return; if (error > mdplib::dead_end_cost) { break; // BPSG changed, must expand tip nodes again } } } }
// This implementation is not used anymore. Re-using the labels is incorrect // because states can be solved in one of the short-sighted SSPs but not another // (due to the horizon mismatch). void SSiPPSolver::optimalSolver(WrapperProblem* problem, State* s0) { // This is a stack based implementation of LAO*. // We don't use the existing library implementation so that we can take // advantage of the SOLVED_SSiPP labels. StateSet visited; int countExpanded = 0; while (true) { do { visited.clear(); countExpanded = 0; list<State*> stateStack; stateStack.push_back(s0); while (!stateStack.empty()) { if (ranOutOfTime()) { return; } State* s = stateStack.back(); stateStack.pop_back(); if (!visited.insert(s).second) // state was already visited. continue; if (s->deadEnd() || problem->goal(s) || s->checkBits(mdplib::SOLVED_SSiPP) || problem->overrideGoals()->count(s) > 0) continue; int cnt = 0; if (s->bestAction() == nullptr) { // state has never been expanded. bellmanUpdate(problem, s); countExpanded++; continue; } else { Action* a = s->bestAction(); for (Successor sccr : problem->transition(s, a)) stateStack.push_back(sccr.su_state); } if (!s->checkBits(mdplib::SOLVED_SSiPP)) { bellmanUpdate(problem, s); } } } while (countExpanded != 0); while (true) { visited.clear(); list<State*> stateStack; stateStack.push_back(s0); double error = 0.0; while (!stateStack.empty()) { if (ranOutOfTime()) { return; } State* s = stateStack.back(); stateStack.pop_back(); if (s->deadEnd() || problem->goal(s) || s->checkBits(mdplib::SOLVED_SSiPP || problem->overrideGoals()->count(s) > 0)) continue; if (!visited.insert(s).second) continue; Action* prevAction = s->bestAction(); if (prevAction == nullptr) { // if it reaches this point it hasn't converged yet. error = mdplib::dead_end_cost + 1; } else { for (Successor sccr : problem->transition(s, prevAction)) stateStack.push_back(sccr.su_state); } error = std::max(error, bellmanUpdate(problem, s)); if (prevAction != s->bestAction()) { // it hasn't converged because the best action changed. error = mdplib::dead_end_cost + 1; break; } } if (error < epsilon_) return; if (error > mdplib::dead_end_cost) { break; // BPSG changed, must expand tip nodes again } } } }