Exemplo n.º 1
0
double UCTAgent::MCTS(const State& state, const int depth) //FIXME: no rollout phase!
{
    if (state.absorbing() || depth > 1.0 / (1.0 - gamma)) {
#if DEBUG
        std::cerr << "[" << state << "]";
#endif
        return 0.0;
    }

    if (state_counts_.count(state)) {
        Action action = UCB1(state, 10.0);
        std::pair<State, double> sample = TaxiEnv::Sample(state, action);

#if DEBUG
        std::cerr << "[" << state << ", " << action_name(action) << "] -> ";
#endif

        int n = state_action_counts_[state][action];
        double u = qtable_[state][action];
        double v = sample.second + gamma * MCTS(sample.first, depth + 1);
        double k = 1.0 / (n + 1);
        double w = u + k * (v - u);

        state_counts_[state] += 1;
        state_action_counts_[state][action] += 1;
        qtable_[state][action] = w;

        return v;
    }
    else {
        state_counts_[state] = 1;
        return Rollout(state, depth);
    }
}
Exemplo n.º 2
0
Action UCTAgent::UCT(const State& state)
{
    const int trials = std::pow(2, 10);

    state_counts_[state] = 1;

    for (int i = 0; i < trials; ++i) {
#if DEBUG
        double reward = MCTS(state, 0);
        std::cerr << ": " << reward <<std::endl;
        std::cerr << std::endl;
#else
        MCTS(state, 0);
#endif
    }

    return BestAction(state);
}
Exemplo n.º 3
0
const Action Janggi::CalculateNextAction(Turn turn)
{
    //mini-max algorithm
    //Node s = Minmax(curNode, MINMAX_DEPTH, turn); // Minmax returns one of her children.
    
    //alpha-beta prunning
    //Node s = AlphaBeta(curNode, MINMAX_DEPTH, INT_MIN, INT_MAX, turn);
    
    //MCTS algorithm
    Node s = MCTS(turn);
    
    return s.GetAction();
}