UCTNode* UCTNode::uct_select_child(int color) { UCTNode * best = nullptr; float best_value = -1000.0f; LOCK(get_mutex(), lock); // Progressive widening // int childbound = std::max(2, (int)(((log((double)get_visits()) - 3.0) * 3.0) + 2.0)); int childbound = 362; int childcount = 0; UCTNode * child = m_firstchild; // Count parentvisits. // We do this manually to avoid issues with transpositions. int parentvisits = 0; // Make sure we are at a valid successor. while (child != nullptr && !child->valid()) { child = child->m_nextsibling; } while (child != nullptr && childcount < childbound) { parentvisits += child->get_visits(); child = child->m_nextsibling; // Make sure we are at a valid successor. while (child != nullptr && !child->valid()) { child = child->m_nextsibling; } childcount++; } float numerator = std::sqrt((double)parentvisits); childcount = 0; child = m_firstchild; // Make sure we are at a valid successor. while (child != nullptr && !child->valid()) { child = child->m_nextsibling; } if (child == nullptr) { return nullptr; } // Prune bad probabilities // auto parent_log = std::log((float)parentvisits); // auto cutoff_ratio = cfg_cutoff_offset + cfg_cutoff_ratio * parent_log; // auto best_probability = child->get_score(); // assert(best_probability > 0.001f); while (child != nullptr && childcount < childbound) { // Prune bad probabilities // if (child->get_score() * cutoff_ratio < best_probability) { // break; // } // get_eval() will automatically set first-play-urgency float winrate = child->get_eval(color); float psa = child->get_score(); float denom = 1.0f + child->get_visits(); float puct = cfg_puct * psa * (numerator / denom); float value = winrate + puct; assert(value > -1000.0f); if (value > best_value) { best_value = value; best = child; } child = child->m_nextsibling; // Make sure we are at a valid successor. while (child != nullptr && !child->valid()) { child = child->m_nextsibling; } childcount++; } assert(best != nullptr); return best; }
int UCTSearch::get_best_move(passflag_t passflag) { int color = m_rootstate.board.get_to_move(); // Make sure best is first m_root->sort_children(color); // Check whether to randomize the best move proportional // to the playout counts, early game only. auto movenum = int(m_rootstate.get_movenum()); if (movenum < cfg_random_cnt) { m_root->randomize_first_proportionally(); } auto first_child = m_root->get_first_child(); assert(first_child != nullptr); auto bestmove = first_child->get_move(); auto bestscore = first_child->get_eval(color); // do we want to fiddle with the best move because of the rule set? if (passflag & UCTSearch::NOPASS) { // were we going to pass? if (bestmove == FastBoard::PASS) { UCTNode * nopass = m_root->get_nopass_child(m_rootstate); if (nopass != nullptr) { myprintf("Preferring not to pass.\n"); bestmove = nopass->get_move(); if (nopass->first_visit()) { bestscore = 1.0f; } else { bestscore = nopass->get_eval(color); } } else { myprintf("Pass is the only acceptable move.\n"); } } } else { if (!cfg_dumbpass && bestmove == FastBoard::PASS) { // Either by forcing or coincidence passing is // on top...check whether passing loses instantly // do full count including dead stones. // In a reinforcement learning setup, it is possible for the // network to learn that, after passing in the tree, the two last // positions are identical, and this means the position is only won // if there are no dead stones in our own territory (because we use // Trump-Taylor scoring there). So strictly speaking, the next // heuristic isn't required for a pure RL network, and we have // a commandline option to disable the behavior during learning. // On the other hand, with a supervised learning setup, we fully // expect that the engine will pass out anything that looks like // a finished game even with dead stones on the board (because the // training games were using scoring with dead stone removal). // So in order to play games with a SL network, we need this // heuristic so the engine can "clean up" the board. It will still // only clean up the bare necessity to win. For full dead stone // removal, kgs-genmove_cleanup and the NOPASS mode must be used. float score = m_rootstate.final_score(); // Do we lose by passing? if ((score > 0.0f && color == FastBoard::WHITE) || (score < 0.0f && color == FastBoard::BLACK)) { myprintf("Passing loses :-(\n"); // Find a valid non-pass move. UCTNode * nopass = m_root->get_nopass_child(m_rootstate); if (nopass != nullptr) { myprintf("Avoiding pass because it loses.\n"); bestmove = nopass->get_move(); if (nopass->first_visit()) { bestscore = 1.0f; } else { bestscore = nopass->get_eval(color); } } else { myprintf("No alternative to passing.\n"); } } else { myprintf("Passing wins :-)\n"); } } else if (!cfg_dumbpass && m_rootstate.get_last_move() == FastBoard::PASS) { // Opponents last move was passing. // We didn't consider passing. Should we have and // end the game immediately? float score = m_rootstate.final_score(); // do we lose by passing? if ((score > 0.0f && color == FastBoard::WHITE) || (score < 0.0f && color == FastBoard::BLACK)) { myprintf("Passing loses, I'll play on.\n"); } else { myprintf("Passing wins, I'll pass out.\n"); bestmove = FastBoard::PASS; } } } // if we aren't passing, should we consider resigning? if (bestmove != FastBoard::PASS) { if (should_resign(passflag, bestscore)) { myprintf("Eval (%.2f%%) looks bad. Resigning.\n", 100.0f * bestscore); bestmove = FastBoard::RESIGN; } } return bestmove; }