int search_uct(int color, int node_n) { NODE *pN = &node[node_n]; CHILD *c = NULL; int select, z, err, win, current_depth; for (;;) { select = select_best_ucb(node_n, color); c = &pN->child[select]; z = c->z; err = put_stone(z, color, FILL_EYE_ERR); if ( err == 0 ) break; c->z = ILLEGAL_Z; // select other move } current_depth = depth; path[depth++] = c->z; // playout in first time. <= 10 can reduce node. if ( c->games <= 0 || depth == D_MAX || (c->z == 0 && depth>=2 && path[depth-2]==0) ) { win = -playout(flip_color(color)); } else { if ( c->next == NODE_EMPTY ) c->next = create_node(c->z); win = -search_uct(flip_color(color), c->next); } update_rave(pN, color, current_depth, win); // update winrate c->rate = (c->rate * c->games + win) / (c->games + 1); c->games++; pN->child_games_sum++; return win; }
void UCTParallel::search_uct_root(Board& board, const Color color, UCTNode* node, UCTNode* copychild) { // UCBからプレイアウトする手を選択 // rootノードはアトミックに更新するためUCB計算ではロックしない UCTNode* selected_node = select_node_with_ucb(node); // rootでは全て合法手なのでエラーチェックはしない board.move_legal(selected_node->xy, color); // コピーされたノードに変換 UCTNode* selected_node_copy = copychild + (selected_node - node->child); int win; // 閾値以下の場合プレイアウト(全スレッドの合計値) if (selected_node->playout_num < THR) { win = 1 - playout(board, opponent(color)); } else { if (selected_node_copy->child_num == 0) { // ノードを展開 if (selected_node_copy->expand_node(board)) { win = 1 - search_uct(board, opponent(color), selected_node_copy); } else { // ノードプール不足 win = 1 - playout(board, opponent(color)); } } else { win = 1 - search_uct(board, opponent(color), selected_node_copy); } } // 勝率を更新(アトミックに加算) _InterlockedExchangeAdd(&selected_node->win_num, win); _InterlockedIncrement(&selected_node->playout_num); _InterlockedIncrement(&node->playout_num_sum); }
int get_best_uct(int color){ int bestPlace=-1; for (int i = 0; i < loop; i++){ int BoardCopy[BOARD_MAX]; int KoPlaceCopy = ko_z; memcpy(BoardCopy,board,sizeof(board)); search_uct(color); } return bestPlace; }
int get_best_uct(int color) { int next, i, best_z, best_i = -1; int max = -999; NODE *pN; int prev_z = 0; if ( moves > 0 ) prev_z = record[moves-1]; node_num = 0; next = create_node(prev_z); for (i=0; i<uct_loop; i++) { int board_copy[BOARD_MAX]; int ko_z_copy = ko_z; memcpy(board_copy, board, sizeof(board)); depth = 0; search_uct(color, next); ko_z = ko_z_copy; memcpy(board, board_copy, sizeof(board)); } pN = &node[next]; for (i=0; i<pN->child_num; i++) { CHILD *c = &pN->child[i]; if ( c->games > max ) { best_i = i; max = c->games; } prt("%2d:z=%2d,rate=%6.3f,games=%4d, rave_r=%6.3f,g=%4d\n", i, get81(c->z), c->rate, c->games, c->rave_rate, c->rave_games); } best_z = pN->child[best_i].z; prt("best_z=%d,rate=%6.3f,games=%4d,playouts=%d,nodes=%d\n", get81(best_z), pN->child[best_i].rate, max, all_playouts, node_num); return best_z; }