// single iteration of monte-carlo tree search. void mcIteration(Node *root) { vector<Node *> pathFromRoot; Node *cur = root; while (!cur->IsLeaf()) { pathFromRoot.push_back(cur); cur = cur->Select(P_RANDOM); } pathFromRoot.push_back(cur); Node *playoutNode = cur->Expand(); if (playoutNode == nullptr) { playoutNode = cur; } else { pathFromRoot.push_back(playoutNode); } double utility = playout(playoutNode); for (int i = pathFromRoot.size() - 1; i >= 0; i--) { pathFromRoot[i]->AddUtility(utility); utility = -utility; } }
Node Janggi::MCTS(Turn turn) { rootNode.Init(); cout << endl << endl; for (int i = 0; i < MCTS_ITERATION; i++) { Turn currTurn = turn; std::stack<Node*> visited; Node* pCur = &rootNode; std::stack<double> rewards; int selected = 0; double curReward = 0.0f; // Selection Node* first = NULL; while (!pCur->isLeaf) { selected = pCur->Selection(currTurn); pCur = pCur->GetChild(selected); if (first == NULL) { first = pCur; } visited.push(pCur); curReward = pCur->GetValue(); //curReward = pCur->GetScore(); rewards.push(curReward); currTurn = (currTurn == TURN_CHO ? TURN_HAN : TURN_CHO); } #if DEBUG_MCTS if (first) { cout << "init : (" << first->GetAction().prev.x << "," << first->GetAction().prev.y << ") => (" << first->GetAction().next.x << "," << first->GetAction().next.y << ")" << endl; } #endif // Expand pCur->Expand(currTurn); selected = pCur->Selection(currTurn); pCur = pCur->GetChild(selected); // Simulation double value = Simulation(*pCur, currTurn == TURN_CHO ? TURN_HAN : TURN_CHO); pCur->totalScore = value; // Back Propagation while (!visited.empty()) { pCur = visited.top(); if (currTurn == TURN_CHO) pCur->totalScore = min(rewards.top(), value); else pCur->totalScore = max(rewards.top(), value); visited.pop(); rewards.pop(); currTurn = (currTurn == TURN_CHO ? TURN_HAN : TURN_CHO); } } int bestNode = 0; double bestValue; if (turn == TURN_CHO) { bestValue = -std::numeric_limits<double>::max(); for (int i = 0; i < rootNode.children.size(); i++) { double value = rootNode.children[i].GetScore(); if (value > bestValue) { bestValue = value; bestNode = i; } #if DEBUG_MCTS double deb_score = rootNode.children[i].GetScore(); if (deb_score > 0 && deb_score < 0.001) deb_score = 0.001; else if (deb_score < 0 && deb_score > -0.001) deb_score = -0.001; else if (deb_score == 0) deb_score = 0; std::cout << "(" << rootNode.children[i].GetAction().prev.x << ", " << rootNode.children[i].GetAction().prev.y << ") => (" << rootNode.children[i].GetAction().next.x << ", " << rootNode.children[i].GetAction().next.y << ") : " << deb_score << endl; #endif } } else { bestValue = std::numeric_limits<double>::max(); for (int i = 0; i < rootNode.children.size(); i++) { double value = rootNode.children[i].GetScore(); if (value < bestValue) { bestValue = value; bestNode = i; } #if DEBUG_MCTS double deb_score = rootNode.children[i].GetScore(); if (deb_score > 0 && deb_score < 0.001) deb_score = 0.001; else if (deb_score < 0 && deb_score > -0.001) deb_score = -0.001; else if (deb_score == 0) deb_score = 0; std::cout << "(" << rootNode.children[i].GetAction().prev.x << ", " << rootNode.children[i].GetAction().prev.y << ") => (" << rootNode.children[i].GetAction().next.x << ", " << rootNode.children[i].GetAction().next.y << ") : " << deb_score << endl; #endif } } return rootNode.children[bestNode]; }