double UCTAgent::MCTS(const State& state, const int depth) //FIXME: no rollout phase! { if (state.absorbing() || depth > 1.0 / (1.0 - gamma)) { #if DEBUG std::cerr << "[" << state << "]"; #endif return 0.0; } if (state_counts_.count(state)) { Action action = UCB1(state, 10.0); std::pair<State, double> sample = TaxiEnv::Sample(state, action); #if DEBUG std::cerr << "[" << state << ", " << action_name(action) << "] -> "; #endif int n = state_action_counts_[state][action]; double u = qtable_[state][action]; double v = sample.second + gamma * MCTS(sample.first, depth + 1); double k = 1.0 / (n + 1); double w = u + k * (v - u); state_counts_[state] += 1; state_action_counts_[state][action] += 1; qtable_[state][action] = w; return v; } else { state_counts_[state] = 1; return Rollout(state, depth); } }
Action UCTAgent::UCT(const State& state) { const int trials = std::pow(2, 10); state_counts_[state] = 1; for (int i = 0; i < trials; ++i) { #if DEBUG double reward = MCTS(state, 0); std::cerr << ": " << reward <<std::endl; std::cerr << std::endl; #else MCTS(state, 0); #endif } return BestAction(state); }
const Action Janggi::CalculateNextAction(Turn turn) { //mini-max algorithm //Node s = Minmax(curNode, MINMAX_DEPTH, turn); // Minmax returns one of her children. //alpha-beta prunning //Node s = AlphaBeta(curNode, MINMAX_DEPTH, INT_MIN, INT_MAX, turn); //MCTS algorithm Node s = MCTS(turn); return s.GetAction(); }