Exemple #1
0
int search_uct(int color, int node_n)
{
  NODE *pN = &node[node_n];
  CHILD *c = NULL;  
  int select, z, err, win, current_depth;
  for (;;) {
    select = select_best_ucb(node_n, color);
    c = &pN->child[select];
    z = c->z;
    err = put_stone(z, color, FILL_EYE_ERR);
    if ( err == 0 ) break;
    c->z = ILLEGAL_Z;     // select other move
  }

  current_depth = depth;
  path[depth++] = c->z;

  // playout in first time. <= 10 can reduce node.
  if ( c->games <= 0 || depth == D_MAX || (c->z == 0 && depth>=2 && path[depth-2]==0) ) {
    win = -playout(flip_color(color));
  } else {
    if ( c->next == NODE_EMPTY ) c->next = create_node(c->z);
    win = -search_uct(flip_color(color), c->next);
  }

  update_rave(pN, color, current_depth, win);
  
  // update winrate
  c->rate = (c->rate * c->games + win) / (c->games + 1);
  c->games++;
  pN->child_games_sum++;
  return win;  
}
void UCTParallel::search_uct_root(Board& board, const Color color, UCTNode* node, UCTNode* copychild)
{
	// UCBからプレイアウトする手を選択
	// rootノードはアトミックに更新するためUCB計算ではロックしない
	UCTNode* selected_node = select_node_with_ucb(node);

	// rootでは全て合法手なのでエラーチェックはしない
	board.move_legal(selected_node->xy, color);

	// コピーされたノードに変換
	UCTNode* selected_node_copy = copychild + (selected_node - node->child);

	int win;

	// 閾値以下の場合プレイアウト(全スレッドの合計値)
	if (selected_node->playout_num < THR)
	{
		win = 1 - playout(board, opponent(color));
	}
	else {
		if (selected_node_copy->child_num == 0)
		{
			// ノードを展開
			if (selected_node_copy->expand_node(board))
			{
				win = 1 - search_uct(board, opponent(color), selected_node_copy);
			}
			else {
				// ノードプール不足
				win = 1 - playout(board, opponent(color));
			}
		}
		else {
			win = 1 - search_uct(board, opponent(color), selected_node_copy);
		}
	}

	// 勝率を更新(アトミックに加算)
	_InterlockedExchangeAdd(&selected_node->win_num, win);
	_InterlockedIncrement(&selected_node->playout_num);
	_InterlockedIncrement(&node->playout_num_sum);
}
Exemple #3
0
int get_best_uct(int color){
	int bestPlace=-1;
	
	for (int i = 0; i < loop; i++){
		int BoardCopy[BOARD_MAX];
		int KoPlaceCopy = ko_z;
		memcpy(BoardCopy,board,sizeof(board));

		search_uct(color);
	}

	return bestPlace;
}
Exemple #4
0
int get_best_uct(int color)
{
  int next, i, best_z, best_i = -1;
  int max = -999;
  NODE *pN;
  int prev_z = 0;

  if ( moves > 0 ) prev_z = record[moves-1];
  node_num = 0;
  next = create_node(prev_z);

  for (i=0; i<uct_loop; i++) {
    int board_copy[BOARD_MAX];
    int ko_z_copy = ko_z;
    memcpy(board_copy, board, sizeof(board));

    depth = 0;
    search_uct(color, next);

    ko_z = ko_z_copy;
    memcpy(board, board_copy, sizeof(board));
  }
  pN = &node[next];
  for (i=0; i<pN->child_num; i++) {
    CHILD *c = &pN->child[i];
    if ( c->games > max ) {
      best_i = i;
      max = c->games;
    }
    prt("%2d:z=%2d,rate=%6.3f,games=%4d, rave_r=%6.3f,g=%4d\n",
        i, get81(c->z), c->rate, c->games, c->rave_rate, c->rave_games);
  }
  best_z = pN->child[best_i].z;
  prt("best_z=%d,rate=%6.3f,games=%4d,playouts=%d,nodes=%d\n",
       get81(best_z), pN->child[best_i].rate, max, all_playouts, node_num);
  return best_z;
}