Ejemplo n.º 1
0
int search_uct(int color, int node_n)
{
  NODE *pN = &node[node_n];
  CHILD *c = NULL;  
  int select, z, err, win, current_depth;
  for (;;) {
    select = select_best_ucb(node_n, color);
    c = &pN->child[select];
    z = c->z;
    err = put_stone(z, color, FILL_EYE_ERR);
    if ( err == 0 ) break;
    c->z = ILLEGAL_Z;     // select other move
  }

  current_depth = depth;
  path[depth++] = c->z;

  // playout in first time. <= 10 can reduce node.
  if ( c->games <= 0 || depth == D_MAX || (c->z == 0 && depth>=2 && path[depth-2]==0) ) {
    win = -playout(flip_color(color));
  } else {
    if ( c->next == NODE_EMPTY ) c->next = create_node(c->z);
    win = -search_uct(flip_color(color), c->next);
  }

  update_rave(pN, color, current_depth, win);
  
  // update winrate
  c->rate = (c->rate * c->games + win) / (c->games + 1);
  c->games++;
  pN->child_games_sum++;
  return win;  
}
Ejemplo n.º 2
0
void test_playout()
{
  flag_test_playout = 1;
  playout(1);
  print_board();
  print_sgf();
}
Ejemplo n.º 3
0
// Sample one possible sequence of future events, up to 'dfr' cycles.
reward_t SearchNode::sample(Agent &agent, unsigned int dfr) {
    double newReward;
    if (dfr == 0) {
        return 0;
    } else if (m_chance_node) {
        // Generate whole observation-reward percept,
        // according to the agent's model of the environment.
        percept_t obs;
        percept_t rew;
        agent.genPerceptAndUpdate(obs, rew);

        // Calculate the index of whole percept
        percept_t percept = (rew << agent.numObsBits()) | obs;
        
        if (m_child.count(percept) == 0) {
            m_child[percept] = new SearchNode(false, agent.numActions());
        }
        newReward = rew + m_child[percept]->sample(agent, dfr - 1);
    } else if (m_visits == 0) {
        newReward = playout(agent, dfr);
    } else {
    	// Select an action to sample.
        action_t action = selectAction(agent, dfr);
        agent.modelUpdate(action);
        newReward = m_child[action]->sample(agent, dfr);
    }
    // Update our estimate of the future reward.
    m_mean = (1.0 / (double) (m_visits + 1)) * (newReward + m_visits * m_mean);
    ++m_visits;
    return newReward;
}
Ejemplo n.º 4
0
int clean_playout(state s, const state_info si, jkiss *jk)
{
    playout(&s, si, jk);
    kill_groups(&s);
    int score = chinese_liberty_score(&s);
    if (s.white_to_play) {
        return -score;
    }
    return score;
}
Ejemplo n.º 5
0
void UCTParallel::search_uct_root(Board& board, const Color color, UCTNode* node, UCTNode* copychild)
{
	// UCBからプレイアウトする手を選択
	// rootノードはアトミックに更新するためUCB計算ではロックしない
	UCTNode* selected_node = select_node_with_ucb(node);

	// rootでは全て合法手なのでエラーチェックはしない
	board.move_legal(selected_node->xy, color);

	// コピーされたノードに変換
	UCTNode* selected_node_copy = copychild + (selected_node - node->child);

	int win;

	// 閾値以下の場合プレイアウト(全スレッドの合計値)
	if (selected_node->playout_num < THR)
	{
		win = 1 - playout(board, opponent(color));
	}
	else {
		if (selected_node_copy->child_num == 0)
		{
			// ノードを展開
			if (selected_node_copy->expand_node(board))
			{
				win = 1 - search_uct(board, opponent(color), selected_node_copy);
			}
			else {
				// ノードプール不足
				win = 1 - playout(board, opponent(color));
			}
		}
		else {
			win = 1 - search_uct(board, opponent(color), selected_node_copy);
		}
	}

	// 勝率を更新(アトミックに加算)
	_InterlockedExchangeAdd(&selected_node->win_num, win);
	_InterlockedIncrement(&selected_node->playout_num);
	_InterlockedIncrement(&node->playout_num_sum);
}
Ejemplo n.º 6
0
Board::Grid UCT::playout(Board::Grid grid, int depth){
    if(depth == 0) return grid;
    if(! Board::alive(grid)) return grid;
    Dir dir = allDirs[mt()%4];
    grid = Board::moveAndBirth(grid, dir).second;
    // for(int i(0); i < 4; ++i){
    //     for(int j(0); j < 4; ++j){
    //         grid = Board::set(grid, i, j, 1);
    //     }
    // }
    return playout(grid, depth - 1);
}
Ejemplo n.º 7
0
int primitive_monte_calro(int color)
{
  int    try_num    = 30; // number of playout
  int    best_z     =  0;
  double best_value;
  double win_rate;
  int x,y,err,i,win_sum,win;

  int ko_z_copy;
  int board_copy[BOARD_MAX];  // keep current board
  ko_z_copy = ko_z;
  memcpy(board_copy, board, sizeof(board));

  best_value = -100;

  // try all empty point
  for (y=0;y<B_SIZE;y++) for (x=0;x<B_SIZE;x++) {
    int z = get_z(x+1,y+1);
    if ( board[z] != 0 ) continue;

    err = put_stone(z, color, FILL_EYE_ERR);
    if ( err != 0 ) continue;

    win_sum = 0;
    for (i=0;i<try_num;i++) {
      int board_copy2[BOARD_MAX];
      int ko_z_copy2 = ko_z;
      memcpy(board_copy2, board, sizeof(board));

      win = -playout(flip_color(color));
      win_sum += win;

      ko_z = ko_z_copy2;
      memcpy(board, board_copy2, sizeof(board));
    }
    win_rate = (double)win_sum / try_num;
//  print_board();
//  prt("z=%d,win=%5.3f\n",get81(z),win_rate);
    
    if ( win_rate > best_value ) {
      best_value = win_rate;
      best_z = z;
//    prt("best_z=%d,color=%d,v=%5.3f,try_num=%d\n",get81(best_z),color,best_value,try_num);
    }

    ko_z = ko_z_copy;
    memcpy(board, board_copy, sizeof(board));  // resume board
  }
  return best_z;
}
Ejemplo n.º 8
0
Dir UCT::decideDir(){
    std::array<int, 4> counts;
    std::array<double, 4> sums;
    int count(4 * ITERATION_BOOT);
    sums.fill(0);
    for(auto dir: allDirs){
        for(int i(0); i < ITERATION_BOOT; ++i){
            sums[dirToInt(dir)] += staticEval(playout(Board::moved(grid, dir), PLAYOUT_DEPTH));
            ++counts[dirToInt(dir)];
        }
    }
    for(int i(0); i < ITERATION; ++i){
        std::array<double, 4> ucb1s;
        for(auto dir: allDirs){
            ucb1s[dirToInt(dir)] = (sums[dirToInt(dir)] / counts[dirToInt(dir)]) + std::sqrt(2 * std::log2(count) / counts[dirToInt(dir)]);
        }
        Dir bestDir = Dir::Up;
        double maxUcb1 = 0.0;
        for(auto dir: allDirs){
            if(maxUcb1 < ucb1s[dirToInt(dir)]){
                bestDir = dir;
                maxUcb1 = ucb1s[dirToInt(dir)];
            }
        }
        sums[dirToInt(bestDir)] += staticEval(playout(Board::moved(grid, bestDir), PLAYOUT_DEPTH));
    }
    Dir bestDir = Dir::Up;
    double maxAve = 0.0;
    for(auto dir: allDirs){
        if(maxAve < sums[dirToInt(dir)] / counts[dirToInt(dir)]){
            bestDir = dir;
            maxAve = sums[dirToInt(dir)] / counts[dirToInt(dir)];
        }
    }
    std::cout << maxAve << std::endl;
    return bestDir;
}
Ejemplo n.º 9
0
  // single iteration of monte-carlo tree search.
  void mcIteration(Node *root) {
    vector<Node *> pathFromRoot;

    Node *cur = root;
    while (!cur->IsLeaf()) {
      pathFromRoot.push_back(cur);
      cur = cur->Select(P_RANDOM);
    }
    pathFromRoot.push_back(cur);

    Node *playoutNode = cur->Expand();
    if (playoutNode == nullptr) {
      playoutNode = cur;
    } else {
      pathFromRoot.push_back(playoutNode);
    }

    double utility = playout(playoutNode);
    for (int i = pathFromRoot.size() - 1; i >= 0; i--) {
      pathFromRoot[i]->AddUtility(utility);
      utility = -utility;
    }
  }
Ejemplo n.º 10
0
int main()
{
    size_t i;
    size_t num_iter_single = 1000000;
    size_t num_iter_parallel = 10000000;

    state base_state = (state) {rectangle(7, 7), 0, 0, 0, 0, 0, 0};
    int asdf;
    make_move(&base_state, one(3, 3), &asdf);
    state_info si;
    init_state(&base_state, &si);

    state s = base_state;
    jkiss jk;
    jkiss_init(&jk);
    printf("%d\n", playout(&s, si, &jk));
    print_state(&s);
    kill_groups(&s);
    print_state(&s);
    printf("%d\n", chinese_liberty_score(&s));

    score_bins sb = score_bins_new(base_state);

    for (i = 0; i < num_iter_single; i++) {
        score_bins_add(sb, clean_playout(base_state, si, &jk));
    }
    print_score_bins(sb, 50);
    printf("-----------------------------\n");

    score_bins_reset(sb);
    monte_carlo(base_state, si, sb, num_iter_parallel);
    print_score_bins(sb, 50);

    /*
    int num_threads = omp_get_max_threads();
    jkiss *jks = malloc(num_threads * sizeof(jkiss));
    bin_t **binss = malloc(num_threads * sizeof(bin_t*));
    for (int i = 0; i < num_threads; i++) {
        binss[i] = calloc(num_bins, sizeof(bin_t));
        jkiss_init(jks + i);
    }
    int tid;
    int finished = 0;
    #pragma omp parallel private(i, s, tid)
    {
        tid = omp_get_thread_num();
        for (i = 0; i < num_iter_parallel; i++) {
            s = base_state;
            playout(&s, si, jks + tid);
            kill_groups(&s);
            int score = chinese_liberty_score(&s);
            if (s.white_to_play) {
                score = -score;
            }
            binss[tid][si.size + score]++;
            // Bailout once one thread finishes.
            if (finished) {
                break;
            }
        }
        finished = 1;
    }

    for (i = 0; i < num_bins; i++) {
        bins[i] = 0;
    }
    for (i = 0; i < num_threads; i++) {
        for (int j = 0; j < num_bins; j++) {
            bins[j] += binss[i][j];
        }
    }
    print_bins(bins, num_bins, 128);
    */

    return 0;
}