Example #1
0
  // single iteration of monte-carlo tree search.
  void mcIteration(Node *root) {
    vector<Node *> pathFromRoot;

    Node *cur = root;
    while (!cur->IsLeaf()) {
      pathFromRoot.push_back(cur);
      cur = cur->Select(P_RANDOM);
    }
    pathFromRoot.push_back(cur);

    Node *playoutNode = cur->Expand();
    if (playoutNode == nullptr) {
      playoutNode = cur;
    } else {
      pathFromRoot.push_back(playoutNode);
    }

    double utility = playout(playoutNode);
    for (int i = pathFromRoot.size() - 1; i >= 0; i--) {
      pathFromRoot[i]->AddUtility(utility);
      utility = -utility;
    }
  }
Example #2
0
Node Janggi::MCTS(Turn turn)
{
  rootNode.Init();
  cout << endl << endl;
  for (int i = 0; i < MCTS_ITERATION; i++) {
    Turn currTurn = turn;
    std::stack<Node*> visited;
    Node* pCur = &rootNode;

    std::stack<double> rewards;

    int selected = 0;
    double curReward = 0.0f;

    // Selection
    Node* first = NULL;
    while (!pCur->isLeaf) {
      selected = pCur->Selection(currTurn);
      pCur = pCur->GetChild(selected);
      if (first == NULL) {
        first = pCur;
      }
      visited.push(pCur);
      curReward = pCur->GetValue();      
      //curReward = pCur->GetScore();
      rewards.push(curReward);
      currTurn = (currTurn == TURN_CHO ? TURN_HAN : TURN_CHO);
    }
#if DEBUG_MCTS
    if (first) {
      cout << "init : (" << first->GetAction().prev.x << "," <<
        first->GetAction().prev.y << ") => (" <<
        first->GetAction().next.x << "," <<
        first->GetAction().next.y << ")" << endl;
    }
#endif
    // Expand
    pCur->Expand(currTurn);
    selected = pCur->Selection(currTurn);
    pCur = pCur->GetChild(selected);
    
    // Simulation
    double value = Simulation(*pCur, currTurn == TURN_CHO ? TURN_HAN : TURN_CHO);
    pCur->totalScore = value;

    // Back Propagation
    while (!visited.empty())
    {
      pCur = visited.top();
      if (currTurn == TURN_CHO)
        pCur->totalScore = min(rewards.top(), value);
      else
        pCur->totalScore = max(rewards.top(), value);

      visited.pop();
      rewards.pop();
      currTurn = (currTurn == TURN_CHO ? TURN_HAN : TURN_CHO);
    }
  }
  int bestNode = 0;
  double bestValue;
  if (turn == TURN_CHO) {
    bestValue = -std::numeric_limits<double>::max();
    for (int i = 0; i < rootNode.children.size(); i++) {
      double value = rootNode.children[i].GetScore();
      if (value > bestValue) {
        bestValue = value;
        bestNode = i;
      }

#if DEBUG_MCTS
      double deb_score = rootNode.children[i].GetScore();
      if (deb_score > 0 && deb_score < 0.001)
        deb_score = 0.001;
      else if (deb_score < 0 && deb_score > -0.001)
        deb_score = -0.001;
      else if (deb_score == 0)
        deb_score = 0;


      std::cout << "(" << rootNode.children[i].GetAction().prev.x << ", "
        << rootNode.children[i].GetAction().prev.y << ") => ("
        << rootNode.children[i].GetAction().next.x << ", "
        << rootNode.children[i].GetAction().next.y << ") : "
        << deb_score << endl;
#endif
    }
  }
  else {
    bestValue = std::numeric_limits<double>::max();
    for (int i = 0; i < rootNode.children.size(); i++) {
      double value = rootNode.children[i].GetScore();
      if (value < bestValue) {
        bestValue = value;
        bestNode = i;
      }
#if DEBUG_MCTS
      double deb_score = rootNode.children[i].GetScore();
      if (deb_score > 0 && deb_score < 0.001)
        deb_score = 0.001;
      else if (deb_score < 0 && deb_score > -0.001)
        deb_score = -0.001;
      else if (deb_score == 0)
        deb_score = 0;


      std::cout << "(" << rootNode.children[i].GetAction().prev.x << ", "
        << rootNode.children[i].GetAction().prev.y << ") => ("
        << rootNode.children[i].GetAction().next.x << ", "
        << rootNode.children[i].GetAction().next.y << ") : "
        << deb_score << endl;
#endif
    }
  }
  return rootNode.children[bestNode];
}