void UCTNode::kill_superkos(KoState & state) { UCTNode * child = m_firstchild; while (child != nullptr) { int move = child->get_move(); if (move != FastBoard::PASS) { KoState mystate = state; mystate.play_move(move); if (mystate.superko()) { UCTNode * tmp = child->m_nextsibling; delete_child(child); child = tmp; continue; } } child = child->m_nextsibling; } }
int UCTSearch::get_best_move(passflag_t passflag) { int color = m_rootstate.board.get_to_move(); // Make sure best is first m_root->sort_children(color); // Check whether to randomize the best move proportional // to the playout counts, early game only. auto movenum = int(m_rootstate.get_movenum()); if (movenum < cfg_random_cnt) { m_root->randomize_first_proportionally(); } auto first_child = m_root->get_first_child(); assert(first_child != nullptr); auto bestmove = first_child->get_move(); auto bestscore = first_child->get_eval(color); // do we want to fiddle with the best move because of the rule set? if (passflag & UCTSearch::NOPASS) { // were we going to pass? if (bestmove == FastBoard::PASS) { UCTNode * nopass = m_root->get_nopass_child(m_rootstate); if (nopass != nullptr) { myprintf("Preferring not to pass.\n"); bestmove = nopass->get_move(); if (nopass->first_visit()) { bestscore = 1.0f; } else { bestscore = nopass->get_eval(color); } } else { myprintf("Pass is the only acceptable move.\n"); } } } else { if (!cfg_dumbpass && bestmove == FastBoard::PASS) { // Either by forcing or coincidence passing is // on top...check whether passing loses instantly // do full count including dead stones. // In a reinforcement learning setup, it is possible for the // network to learn that, after passing in the tree, the two last // positions are identical, and this means the position is only won // if there are no dead stones in our own territory (because we use // Trump-Taylor scoring there). So strictly speaking, the next // heuristic isn't required for a pure RL network, and we have // a commandline option to disable the behavior during learning. // On the other hand, with a supervised learning setup, we fully // expect that the engine will pass out anything that looks like // a finished game even with dead stones on the board (because the // training games were using scoring with dead stone removal). // So in order to play games with a SL network, we need this // heuristic so the engine can "clean up" the board. It will still // only clean up the bare necessity to win. For full dead stone // removal, kgs-genmove_cleanup and the NOPASS mode must be used. float score = m_rootstate.final_score(); // Do we lose by passing? if ((score > 0.0f && color == FastBoard::WHITE) || (score < 0.0f && color == FastBoard::BLACK)) { myprintf("Passing loses :-(\n"); // Find a valid non-pass move. UCTNode * nopass = m_root->get_nopass_child(m_rootstate); if (nopass != nullptr) { myprintf("Avoiding pass because it loses.\n"); bestmove = nopass->get_move(); if (nopass->first_visit()) { bestscore = 1.0f; } else { bestscore = nopass->get_eval(color); } } else { myprintf("No alternative to passing.\n"); } } else { myprintf("Passing wins :-)\n"); } } else if (!cfg_dumbpass && m_rootstate.get_last_move() == FastBoard::PASS) { // Opponents last move was passing. // We didn't consider passing. Should we have and // end the game immediately? float score = m_rootstate.final_score(); // do we lose by passing? if ((score > 0.0f && color == FastBoard::WHITE) || (score < 0.0f && color == FastBoard::BLACK)) { myprintf("Passing loses, I'll play on.\n"); } else { myprintf("Passing wins, I'll pass out.\n"); bestmove = FastBoard::PASS; } } } // if we aren't passing, should we consider resigning? if (bestmove != FastBoard::PASS) { if (should_resign(passflag, bestscore)) { myprintf("Eval (%.2f%%) looks bad. Resigning.\n", 100.0f * bestscore); bestmove = FastBoard::RESIGN; } } return bestmove; }