void LRTDPSolver::trial(mlcore::State* s) { mlcore::State* tmp = s; std::list<mlcore::State*> visited; while (!tmp->checkBits(mdplib::SOLVED)) { if (problem_->goal(tmp)) break; visited.push_front(tmp); bellmanUpdate(problem_, tmp); if (tmp->deadEnd()) break; tmp = randomSuccessor(problem_, tmp, tmp->bestAction()); } while (!visited.empty()) { tmp = visited.front(); visited.pop_front(); if (!checkSolved(tmp)) break; } }
double playout(Node *startNode) { GameRules* rules = GameRules::instance(); unsigned curPlayerIndex = startNode->PlayerIndex(); State *curState = startNode->GetState(); vector<uptr<State>> playedStates; while (!rules->IsTerminalState(*curState)) { uptr<State> nextState = randomSuccessor(curState); curPlayerIndex = 1 - curPlayerIndex; curState = nextState.get(); playedStates.push_back(move(nextState)); } // Account for the fact that the winner may not be the player of the original startNode. // The result of this function should be the utility of the playout for the player owning // the startNode. double utilFlip = startNode->PlayerIndex() == curPlayerIndex ? 1.0 : -1.0; if (rules->IsWin(*curState)) { return 1.0 * utilFlip; } else if (rules->IsLoss(*curState)) { return -1.0 * utilFlip; } else { return 0.0; } }
void LRTDPSolver::trial(mlcore::State* s) { mlcore::State* tmp = s; std::list<mlcore::State*> visited; double accumulated_cost = 0.0; while (!tmp->checkBits(mdplib::SOLVED)) { if (problem_->goal(tmp) || accumulated_cost > mdplib::dead_end_cost) break; visited.push_front(tmp); bellmanUpdate(problem_, tmp); if (tmp->deadEnd()) break; accumulated_cost += problem_->cost(tmp, tmp->bestAction()); tmp = randomSuccessor(problem_, tmp, tmp->bestAction()); } if (dont_label_) return; while (!visited.empty()) { tmp = visited.front(); visited.pop_front(); bool solved = checkSolved(tmp); if (!solved) break; } }
double RFFSolver::failProb(mlcore::State* s, int N) { for (mlcore::State* s : terminalStates_) probabilitiesTerminals_[s] = 0.0; double totalProbabilityTerminals = 0.0; double delta = 1.0 / N; for (int i = 0; i < N; i++) { mlcore::State* currentState = s; while (!problem_->goal(currentState) && terminalStates_.count(currentState) == 0) { if (currentState->deadEnd()) { // Treat dead-ends as goals, otherwise this method // might loop endlessly when there are unavoidable dead-ends break; } currentState = randomSuccessor(problem_, currentState, currentState->bestAction()); } if (terminalStates_.count(currentState) > 0) { probabilitiesTerminals_[s] += delta; totalProbabilityTerminals += delta; } } return totalProbabilityTerminals; }
Action* SSiPPSolver::solveOriginal(State* s0) { beginTime_ = std::chrono::high_resolution_clock::now(); if (maxTime_ > -1) { maxTrials_ = 10000000; } for (int i = 0; i < maxTrials_; i++) { mlcore::State* currentState = s0; double accumulated_cost = 0.0; while (!problem_->goal(currentState) && accumulated_cost < mdplib::dead_end_cost) { // Creating the short-sighted SSP StateSet reachableStates, tipStates; if (useTrajProbabilities_) { getReachableStatesTrajectoryProbs( problem_, currentState, reachableStates, tipStates, rho_); } else { reachableStates.insert(currentState); getReachableStates(problem_, reachableStates, tipStates, t_); } // Solving the short-sighted SSP WrapperProblem* wrapper = new WrapperProblem(problem_); wrapper->setNewInitialState(currentState); wrapper->overrideStates(&reachableStates); wrapper->overrideGoals(&tipStates); VISolver vi(wrapper, maxTrials_); // Adjusting maximum planning time for VI if (maxTime_ > -1) { auto endTime = std::chrono::high_resolution_clock::now(); auto timeElapsed = std::chrono::duration_cast< std::chrono::milliseconds>(endTime - beginTime_).count(); vi.maxPlanningTime(std::max(0, maxTime_ - (int) timeElapsed)); } vi.solve(); if (currentState->deadEnd() || ranOutOfTime()) { wrapper->cleanup(); delete wrapper; break; } // Execute the best action found for the current state. Action* action = currentState->bestAction(); accumulated_cost += problem_->cost(currentState, action); currentState = randomSuccessor(problem_, currentState, action); wrapper->cleanup(); delete wrapper; } if (ranOutOfTime()) { break; } } return s0->bestAction(); }
Action* SSiPPSolver::solveLabeled(State* s0) { beginTime_ = std::chrono::high_resolution_clock::now(); while (!s0->checkBits(mdplib::SOLVED_SSiPP)) { State* currentState = s0; list<State*> visited; while (!currentState->checkBits(mdplib::SOLVED_SSiPP)) { visited.push_front(currentState); if (problem_->goal(currentState)) break; // Constructing short-sighted SSP StateSet reachableStates, tipStates; if (useTrajProbabilities_) { getReachableStatesTrajectoryProbs( problem_, currentState, reachableStates, tipStates, rho_); } else { reachableStates.insert(currentState); getReachableStates(problem_, reachableStates, tipStates, t_); } WrapperProblem wrapper(problem_); wrapper.overrideStates(&reachableStates); wrapper.overrideGoals(&tipStates); // Solving the short-sighted SSP optimalSolver(&wrapper, currentState); if (currentState->deadEnd()) break; // Simulate best action currentState = randomSuccessor(problem_, currentState, greedyAction(problem_, currentState)); wrapper.cleanup(); // Return if it ran out of time if (ranOutOfTime()) { return greedyAction(problem_, s0); } } while (!visited.empty()) { currentState = visited.front(); visited.pop_front(); if (!checkSolved(currentState)) break; } } return greedyAction(problem_, s0); }
double sampleTrial(mlcore::Problem* problem, mlcore::State* s) { mlcore::State* tmp = s; double discount = 1.0; double cost = 0.0; while (!problem->goal(tmp)) { mlcore::Action* a = greedyAction(problem, tmp); double discountedCost = discount * problem->cost(tmp, a); if (discountedCost < 1.0-6) break; // stop to avoid infinite loop cost += discountedCost; tmp = randomSuccessor(problem, tmp, a); discount *= problem->gamma(); } return cost; }
void SoftFLARESSolver::trial(State* s) { State* currentState = s; list<State*> visited; double accumulated_cost = 0.0; while (true) { // if (problem_->goal(currentState)) // dprint("GOAL!!", accumulated_cost); if (problem_->goal(currentState)) break; visited.push_front(currentState); bellmanUpdate(problem_, currentState); if (currentState->deadEnd() || accumulated_cost >= mdplib::dead_end_cost) break; if (ranOutOfTime()) return; mlcore::Action* greedy_action = greedyAction(problem_, currentState); accumulated_cost += problem_->cost(currentState, greedy_action); currentState = noLabeling_ ? randomSuccessor(problem_, currentState, greedy_action): sampleSuccessor(currentState, greedy_action); if (currentState == nullptr) { break; } } if (noLabeling_) return; dprint("COST ", accumulated_cost); while (!visited.empty()) { currentState = visited.front(); visited.pop_front(); computeResidualDistances(currentState); if (!labeledSolved(currentState)) break; } }