Пример #1
0
int main() {
  char buf[100];
  // After that we use moggy to analyze it. (With default parameters).
  void* policy = playout_moggy_init(NULL);

  // Put a few random moves and ask mogo to play it out.
  AllMoves all_moves;
  GroupId4 ids;
  const int num_round = 10000;

  Board b;
  timeit
  for (int j = 0; j < num_round; j++) {
    printf("Round = %d/%d\n", j, num_round);
    ClearBoard(&b);
    //for (int i = 0; i < rand() % 100 + 1; ++i) {
    for (int i = 0; i < 0; ++i) {
      FindAllCandidateMoves(&b, b._next_player, 0, &all_moves);
      // Randomly find one move.
      int idx = rand() % all_moves.num_moves;
      if (TryPlay2(&b, all_moves.moves[idx], &ids)) {
        Play(&b, &ids);
      }
    }

    ShowBoard(&b, SHOW_LAST_MOVE);
    play_random_game(policy, NULL, NULL, &b, -1, FALSE);
    ShowBoard(&b, SHOW_LAST_MOVE);
  }
  endtime

  playout_moggy_destroy(policy);
  return 0;
}
Пример #2
0
static int
uct_leaf_node(struct uct *u, struct board *b, enum stone player_color,
              struct playout_amafmap *amaf,
	      struct uct_descent *descent, int *dlen,
	      struct tree_node *significant[2],
              struct tree *t, struct tree_node *n, enum stone node_color,
	      char *spaces)
{
	enum stone next_color = stone_other(node_color);
	int parity = (next_color == player_color ? 1 : -1);

	if (UDEBUGL(7))
		fprintf(stderr, "%s*-- UCT playout #%d start [%s] %f\n",
			spaces, n->u.playouts, coord2sstr(node_coord(n), t->board),
			tree_node_get_value(t, parity, n->u.value));

	struct uct_playout_callback upc = {
		.uct = u,
		.tree = t,
		/* TODO: Don't necessarily restart the sequence walk when
		 * entering playout. */
		.lnode = NULL,
	};

	struct playout_setup ps = {
		.gamelen = u->gamelen,
		.mercymin = u->mercymin,
		.prepolicy_hook = uct_playout_prepolicy,
		.postpolicy_hook = uct_playout_postpolicy,
		.hook_data = &upc,
	};
	int result = play_random_game(&ps, b, next_color,
	                              u->playout_amaf ? amaf : NULL,
				      &u->ownermap, u->playout);
	if (next_color == S_WHITE) {
		/* We need the result from black's perspective. */
		result = - result;
	}
	if (UDEBUGL(7))
		fprintf(stderr, "%s -- [%d..%d] %s random playout result %d\n",
		        spaces, player_color, next_color, coord2sstr(node_coord(n), t->board), result);

	return result;
}

static floating_t
scale_value(struct uct *u, struct board *b, int result)
{
	floating_t rval = result > 0 ? 1.0 : result < 0 ? 0.0 : 0.5;
	if (u->val_scale && result != 0) {
		int vp = u->val_points;
		if (!vp) {
			vp = board_size(b) - 1; vp *= vp; vp *= 2;
		}

		floating_t sval = (floating_t) abs(result) / vp;
		sval = sval > 1 ? 1 : sval;
		if (result < 0) sval = 1 - sval;
		if (u->val_extra)
			rval += u->val_scale * sval;
		else
			rval = (1 - u->val_scale) * rval + u->val_scale * sval;
		// fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval);
	}
	return rval;
}
Пример #3
0
static coord_t *
montecarlo_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive)
{
	struct montecarlo *mc = e->data;

	if (ti->dim == TD_WALLTIME) {
		fprintf(stderr, "Warning: TD_WALLTIME time mode not supported, resetting to defaults.\n");
		ti->period = TT_NULL;
	}
	if (ti->period == TT_NULL) {
		ti->period = TT_MOVE;
		ti->dim = TD_GAMES;
		ti->len.games = MC_GAMES;
	}
	struct time_stop stop;
	time_stop_conditions(ti, b, 20, 40, 3.0, &stop);

	/* resign when the hope for win vanishes */
	coord_t top_coord = resign;
	floating_t top_ratio = mc->resign_ratio;

	/* We use [0] for pass. Normally, this is an inaccessible corner
	 * of board margin. */
	struct move_stat moves[board_size2(b)];
	memset(moves, 0, sizeof(moves));

	int losses = 0;
	int i, superko = 0, good_games = 0;
	for (i = 0; i < stop.desired.playouts; i++) {
		assert(!b->superko_violation);

		struct board b2;
		board_copy(&b2, b);

		coord_t coord;
		board_play_random(&b2, color, &coord, NULL, NULL);
		if (!is_pass(coord) && !group_at(&b2, coord)) {
			/* Multi-stone suicide. We play chinese rules,
			 * so we can't consider this. (Note that we
			 * unfortunately still consider this in playouts.) */
			if (DEBUGL(4)) {
				fprintf(stderr, "SUICIDE DETECTED at %d,%d:\n", coord_x(coord, b), coord_y(coord, b));
				board_print(b, stderr);
			}
			continue;
		}

		if (DEBUGL(3))
			fprintf(stderr, "[%d,%d color %d] playing random game\n", coord_x(coord, b), coord_y(coord, b), color);

		struct playout_setup ps = { .gamelen = mc->gamelen };
		int result = play_random_game(&ps, &b2, color, NULL, NULL, mc->playout);

		board_done_noalloc(&b2);

		if (result == 0) {
			/* Superko. We just ignore this playout.
			 * And play again. */
			if (unlikely(superko > 2 * stop.desired.playouts)) {
				/* Uhh. Triple ko, or something? */
				if (MCDEBUGL(0))
					fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
				goto pass_wins;
			}
			/* This playout didn't count; we should not
			 * disadvantage moves that lead to a superko.
			 * And it is supposed to be rare. */
			i--, superko++;
			continue;
		}

		if (MCDEBUGL(3))
			fprintf(stderr, "\tresult for other player: %d\n", result);

		int pos = is_pass(coord) ? 0 : coord;

		good_games++;
		moves[pos].games++;

		losses += result > 0;
		moves[pos].wins += 1 - (result > 0);

		if (unlikely(!losses && i == mc->loss_threshold)) {
			/* We played out many games and didn't lose once yet.
			 * This game is over. */
			break;
		}
	}

	if (!good_games) {
		/* No moves to try??? */
		if (MCDEBUGL(0)) {
			fprintf(stderr, "OUT OF MOVES! I will pass. But how did this happen?\n");
			board_print(b, stderr);
		}
pass_wins:
		top_coord = pass; top_ratio = 0.5;
		goto move_found;
	}

	foreach_point(b) {
		if (b->moves < 3) {
			/* Simple heuristic: avoid opening too low. Do not
			 * play on second or first line as first white or
			 * first two black moves.*/
			if (coord_x(c, b) < 3 || coord_x(c, b) > board_size(b) - 4
			    || coord_y(c, b) < 3 || coord_y(c, b) > board_size(b) - 4)
				continue;
		}

		floating_t ratio = (floating_t) moves[c].wins / moves[c].games;
		/* Since pass is [0,0], we will pass only when we have nothing
		 * better to do. */
		if (ratio >= top_ratio) {
			top_ratio = ratio;
			top_coord = c == 0 ? pass : c;
		}
	} foreach_point_end;

	if (MCDEBUGL(2)) {
		board_stats_print(b, moves, stderr);
	}

move_found:
	if (MCDEBUGL(1))
		fprintf(stderr, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord, b), coord_y(top_coord, b), top_ratio, i, superko);

	return coord_copy(top_coord);
}

static void
montecarlo_done(struct engine *e)
{
	struct montecarlo *mc = e->data;
	playout_policy_done(mc->playout);
	joseki_done(mc->jdict);
}

struct montecarlo *
montecarlo_state_init(char *arg, struct board *b)
{
	struct montecarlo *mc = calloc2(1, sizeof(struct montecarlo));

	mc->debug_level = 1;
	mc->gamelen = MC_GAMELEN;
	mc->jdict = joseki_load(b->size);

	if (arg) {
		char *optspec, *next = arg;
		while (*next) {
			optspec = next;
			next += strcspn(next, ",");
			if (*next) { *next++ = 0; } else { *next = 0; }

			char *optname = optspec;
			char *optval = strchr(optspec, '=');
			if (optval) *optval++ = 0;

			if (!strcasecmp(optname, "debug")) {
				if (optval)
					mc->debug_level = atoi(optval);
				else
					mc->debug_level++;
			} else if (!strcasecmp(optname, "gamelen") && optval) {
				mc->gamelen = atoi(optval);
			} else if (!strcasecmp(optname, "playout") && optval) {
				char *playoutarg = strchr(optval, ':');
				if (playoutarg)
					*playoutarg++ = 0;
				if (!strcasecmp(optval, "moggy")) {
					mc->playout = playout_moggy_init(playoutarg, b, mc->jdict);
				} else if (!strcasecmp(optval, "light")) {
					mc->playout = playout_light_init(playoutarg, b);
				} else {
					fprintf(stderr, "MonteCarlo: Invalid playout policy %s\n", optval);
				}
			} else {
				fprintf(stderr, "MonteCarlo: Invalid engine argument %s or missing value\n", optname);
			}
		}
	}

	if (!mc->playout)
		mc->playout = playout_light_init(NULL, b);
	mc->playout->debug_level = mc->debug_level;

	mc->resign_ratio = 0.1; /* Resign when most games are lost. */
	mc->loss_threshold = 5000; /* Stop reading if no loss encountered in first 5000 games. */

	return mc;
}


struct engine *
engine_montecarlo_init(char *arg, struct board *b)
{
	struct montecarlo *mc = montecarlo_state_init(arg, b);
	struct engine *e = calloc2(1, sizeof(struct engine));
	e->name = "MonteCarlo";
	e->comment = "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
	e->genmove = montecarlo_genmove;
	e->done = montecarlo_done;
	e->data = mc;

	return e;
}
Пример #4
0
/* Syntax:
 *   moggy status (last_move) coord [coord...]
 *         Play number of random games starting from last_move
 * 
 *   moggy status     coord [coord...]
 *   moggy status (b) coord [coord...]
 *         Black to play, pick random white last move
 *
 *   moggy status (w) coord [coord...]  
 *         White to play, pick random black last move
 */
static bool
test_moggy_status(struct board *board, char *arg)
{
	int games = 4000;
	coord_t status_at[10];
	int n = 0;
	enum stone color = S_BLACK;
	int pick_random = true;  // Pick random last move for each game

	while (*arg && *arg != '#') {
		if (*arg == ' ' || *arg == '\t') {  arg++; continue;  }		
		if (!strncmp(arg, "(b)", 3))
			color = S_BLACK;
		else if (!strncmp(arg, "(w)", 3))
			color = S_WHITE;
		else if (*arg == '(') {  /* Optional "(last_move)" argument */
			arg++;	assert(isalpha(*arg));
			pick_random = false;
			struct move last;
			last.coord = str2scoord(arg, board_size(board));
			last.color = board_at(board, last.coord);
			assert(last.color == S_BLACK || last.color == S_WHITE);
			color = stone_other(last.color);
			board->last_move = last;
		}
		else {
			assert(isalpha(*arg));
			status_at[n++] = str2scoord(arg, board_size(board));
		}
		arg += strcspn(arg, " \t");
	}
	
	board_print(board, stderr);
	if (DEBUGL(1)) {
		printf("moggy status ");
		for (int i = 0; i < n; i++)
			printf("%s%s", coord2sstr(status_at[i], board), (i != n-1 ? " " : ""));
		printf(", %s to play. Playing %i games %s...\n", 
		       stone2str(color), games, (pick_random ? "(random last move) " : ""));
	}
	
	struct playout_policy *policy = playout_moggy_init(NULL, board, NULL);
	struct playout_setup setup = { .gamelen = MAX_GAMELEN };
	struct board_ownermap ownermap;

	ownermap.playouts = 0;
	ownermap.map = malloc2(board_size2(board) * sizeof(ownermap.map[0]));
	memset(ownermap.map, 0, board_size2(board) * sizeof(ownermap.map[0]));	


	/* Get final status estimate after a number of moggy games */
	int wr = 0;
	double time_start = time_now();
	for (int i = 0; i < games; i++)  {
		struct board b;
		board_copy(&b, board);
		if (pick_random)
			pick_random_last_move(&b, color);
		
		int score = play_random_game(&setup, &b, color, NULL, &ownermap, policy);
		if (color == S_WHITE)
			score = -score;
		wr += (score > 0);
		board_done_noalloc(&b);
	}
	double elapsed = time_now() - time_start;
	printf("moggy status in %.1fs, %i games/s\n\n", elapsed, (int)((float)games / elapsed));
	
	int wr_black = wr * 100 / games;
	int wr_white = (games - wr) * 100 / games;
	if (wr_black > wr_white)
		printf("Winrate: [ black %i%% ]  white %i%%\n\n", wr_black, wr_white);
	else
		printf("Winrate: black %i%%  [ white %i%% ]\n\n", wr_black, wr_white);

	board_print_ownermap(board, stderr, &ownermap);

	for (int i = 0; i < n; i++) {
		coord_t c = status_at[i];
		enum stone color = (ownermap.map[c][S_BLACK] > ownermap.map[c][S_WHITE] ? S_BLACK : S_WHITE);
		fprintf(stderr, "%3s owned by %s: %i%%\n", 
			coord2sstr(c, board), stone2str(color), 
			ownermap.map[c][color] * 100 / ownermap.playouts);
	}
	
	free(ownermap.map);
	playout_policy_done(policy);
	return true;   // Not much of a unit test right now =)
}