Beispiel #1
0
static coord_t *
random_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive)
{
	/* Play a random coordinate. However, we must also guard
	 * against suicide moves; repeat playing while it's a suicide
	 * unless we keep suiciding; in that case, we probably don't
	 * have any other moves available and we pass. */
	coord_t coord;
	int i = 0; bool suicide = false;

	do {
		/* board_play_random() actually plays the move too;
		 * this is desirable for MC simulations but not within
		 * the genmove. Make a scratch new board for it. */
		struct board b2;
		board_copy(&b2, b);

		board_play_random(&b2, color, &coord, NULL, NULL);

		suicide = (coord != pass && !group_at(&b2, coord));
		board_done_noalloc(&b2);
	} while (suicide && i++ < 100);

	return coord_copy(suicide ? pass : coord);
}
Beispiel #2
0
void
board_done(struct board *board)
{
	board_done_noalloc(board);
	free(board);
}
Beispiel #3
0
int
uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree *t)
{
	struct board b2;
	board_copy(&b2, b);

	struct playout_amafmap amaf;
	amaf.gamelen = amaf.game_baselen = 0;

	/* Walk the tree until we find a leaf, then expand it and do
	 * a random playout. */
	struct tree_node *n = t->root;
	enum stone node_color = stone_other(player_color);
	assert(node_color == t->root_color);

	/* Make sure the root node is expanded. */
	if (tree_leaf_node(n) && !__sync_lock_test_and_set(&n->is_expanded, 1))
		tree_expand_node(t, n, &b2, player_color, u, 1);

	/* Tree descent history. */
	/* XXX: This is somewhat messy since @n and descent[dlen-1].node are
	 * redundant. */
	struct uct_descent descent[DESCENT_DLEN];
	descent[0].node = n; descent[0].lnode = NULL;
	int dlen = 1;
	/* Total value of the sequence. */
	struct move_stats seq_value = { .playouts = 0 };
	/* The last "significant" node along the descent (i.e. node
	 * with higher than configured number of playouts). For black
	 * and white. */
	struct tree_node *significant[2] = { NULL, NULL };
	if (n->u.playouts >= u->significant_threshold)
		significant[node_color - 1] = n;

	int result;
	int pass_limit = (board_size(&b2) - 2) * (board_size(&b2) - 2) / 2;
	int passes = is_pass(b->last_move.coord) && b->moves > 0;

	/* debug */
	static char spaces[] = "\0                                                      ";
	/* /debug */
	if (UDEBUGL(8))
		fprintf(stderr, "--- UCT walk with color %d\n", player_color);

	while (!tree_leaf_node(n) && passes < 2) {
		spaces[dlen - 1] = ' '; spaces[dlen] = 0;


		/*** Choose a node to descend to: */

		/* Parity is chosen already according to the child color, since
		 * it is applied to children. */
		node_color = stone_other(node_color);
		int parity = (node_color == player_color ? 1 : -1);

		assert(dlen < DESCENT_DLEN);
		descent[dlen] = descent[dlen - 1];
		if (u->local_tree && (!descent[dlen].lnode || descent[dlen].node->d >= u->tenuki_d)) {
			/* Start new local sequence. */
			/* Remember that node_color already holds color of the
			 * to-be-found child. */
			descent[dlen].lnode = node_color == S_BLACK ? t->ltree_black : t->ltree_white;
		}

		if (!u->random_policy_chance || fast_random(u->random_policy_chance))
			u->policy->descend(u->policy, t, &descent[dlen], parity, b2.moves > pass_limit);
		else
			u->random_policy->descend(u->random_policy, t, &descent[dlen], parity, b2.moves > pass_limit);


		/*** Perform the descent: */

		if (descent[dlen].node->u.playouts >= u->significant_threshold) {
			significant[node_color - 1] = descent[dlen].node;
		}

		seq_value.playouts += descent[dlen].value.playouts;
		seq_value.value += descent[dlen].value.value * descent[dlen].value.playouts;
		n = descent[dlen++].node;
		assert(n == t->root || n->parent);
		if (UDEBUGL(7))
			fprintf(stderr, "%s+-- UCT sent us to [%s:%d] %d,%f\n",
			        spaces, coord2sstr(node_coord(n), t->board),
				node_coord(n), n->u.playouts,
				tree_node_get_value(t, parity, n->u.value));

		/* Add virtual loss if we need to; this is used to discourage
		 * other threads from visiting this node in case of multiple
		 * threads doing the tree search. */
		if (u->virtual_loss)
			stats_add_result(&n->u, node_color == S_BLACK ? 0.0 : 1.0, u->virtual_loss);

		assert(node_coord(n) >= -1);
		record_amaf_move(&amaf, node_coord(n));

		struct move m = { node_coord(n), node_color };
		int res = board_play(&b2, &m);

		if (res < 0 || (!is_pass(m.coord) && !group_at(&b2, m.coord)) /* suicide */
		    || b2.superko_violation) {
			if (UDEBUGL(4)) {
				for (struct tree_node *ni = n; ni; ni = ni->parent)
					fprintf(stderr, "%s<%"PRIhash"> ", coord2sstr(node_coord(ni), t->board), ni->hash);
				fprintf(stderr, "marking invalid %s node %d,%d res %d group %d spk %d\n",
				        stone2str(node_color), coord_x(node_coord(n),b), coord_y(node_coord(n),b),
					res, group_at(&b2, m.coord), b2.superko_violation);
			}
			n->hints |= TREE_HINT_INVALID;
			result = 0;
			goto end;
		}

		if (is_pass(node_coord(n)))
			passes++;
		else
			passes = 0;

		enum stone next_color = stone_other(node_color);
		/* We need to make sure only one thread expands the node. If
		 * we are unlucky enough for two threads to meet in the same
		 * node, the latter one will simply do another simulation from
		 * the node itself, no big deal. t->nodes_size may exceed
		 * the maximum in multi-threaded case but not by much so it's ok.
		 * The size test must be before the test&set not after, to allow
		 * expansion of the node later if enough nodes have been freed. */
		if (tree_leaf_node(n)
		    && n->u.playouts - u->virtual_loss >= u->expand_p && t->nodes_size < u->max_tree_size
		    && !__sync_lock_test_and_set(&n->is_expanded, 1))
			tree_expand_node(t, n, &b2, next_color, u, -parity);
	}

	amaf.game_baselen = amaf.gamelen;

	if (t->use_extra_komi && u->dynkomi->persim) {
		b2.komi += round(u->dynkomi->persim(u->dynkomi, &b2, t, n));
	}

	if (passes >= 2) {
		/* XXX: No dead groups support. */
		floating_t score = board_official_score(&b2, NULL);
		/* Result from black's perspective (no matter who
		 * the player; black's perspective is always
		 * what the tree stores. */
		result = - (score * 2);

		if (UDEBUGL(5))
			fprintf(stderr, "[%d..%d] %s p-p scoring playout result %d (W %f)\n",
				player_color, node_color, coord2sstr(node_coord(n), t->board), result, score);
		if (UDEBUGL(6))
			board_print(&b2, stderr);

		board_ownermap_fill(&u->ownermap, &b2);

	} else { // assert(tree_leaf_node(n));
		/* In case of parallel tree search, the assertion might
		 * not hold if two threads chew on the same node. */
		result = uct_leaf_node(u, &b2, player_color, &amaf, descent, &dlen, significant, t, n, node_color, spaces);
	}

	if (u->policy->wants_amaf && u->playout_amaf_cutoff) {
		unsigned int cutoff = amaf.game_baselen;
		cutoff += (amaf.gamelen - amaf.game_baselen) * u->playout_amaf_cutoff / 100;
		amaf.gamelen = cutoff;
	}

	/* Record the result. */

	assert(n == t->root || n->parent);
	floating_t rval = scale_value(u, b, result);
	u->policy->update(u->policy, t, n, node_color, player_color, &amaf, &b2, rval);

	if (t->use_extra_komi) {
		stats_add_result(&u->dynkomi->score, result / 2, 1);
		stats_add_result(&u->dynkomi->value, rval, 1);
	}

	if (u->local_tree && n->parent && !is_pass(node_coord(n)) && dlen > 0) {
		/* Get the local sequences and record them in ltree. */
		/* We will look for sequence starts in our descent
		 * history, then run record_local_sequence() for each
		 * found sequence start; record_local_sequence() may
		 * pick longer sequences from descent history then,
		 * which is expected as it will create new lnodes. */
		enum stone seq_color = player_color;
		/* First move always starts a sequence. */
		record_local_sequence(u, t, &b2, descent, dlen, 1, seq_color);
		seq_color = stone_other(seq_color);
		for (int dseqi = 2; dseqi < dlen; dseqi++, seq_color = stone_other(seq_color)) {
			if (u->local_tree_allseq) {
				/* We are configured to record all subsequences. */
				record_local_sequence(u, t, &b2, descent, dlen, dseqi, seq_color);
				continue;
			}
			if (descent[dseqi].node->d >= u->tenuki_d) {
				/* Tenuki! Record the fresh sequence. */
				record_local_sequence(u, t, &b2, descent, dlen, dseqi, seq_color);
				continue;
			}
			if (descent[dseqi].lnode && !descent[dseqi].lnode) {
				/* Record result for in-descent picked sequence. */
				record_local_sequence(u, t, &b2, descent, dlen, dseqi, seq_color);
				continue;
			}
		}
	}

end:
	/* We need to undo the virtual loss we added during descend. */
	if (u->virtual_loss) {
		floating_t loss = node_color == S_BLACK ? 0.0 : 1.0;
		for (; n->parent; n = n->parent) {
			stats_rm_result(&n->u, loss, u->virtual_loss);
			loss = 1.0 - loss;
		}
	}

	board_done_noalloc(&b2);
	return result;
}

int
uct_playouts(struct uct *u, struct board *b, enum stone color, struct tree *t, struct time_info *ti)
{
	int i;
	if (ti && ti->dim == TD_GAMES) {
		for (i = 0; t->root->u.playouts <= ti->len.games && !uct_halt; i++)
			uct_playout(u, b, color, t);
	} else {
		for (i = 0; !uct_halt; i++)
			uct_playout(u, b, color, t);
	}
	return i;
}
Beispiel #4
0
static coord_t *
montecarlo_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive)
{
	struct montecarlo *mc = e->data;

	if (ti->dim == TD_WALLTIME) {
		fprintf(stderr, "Warning: TD_WALLTIME time mode not supported, resetting to defaults.\n");
		ti->period = TT_NULL;
	}
	if (ti->period == TT_NULL) {
		ti->period = TT_MOVE;
		ti->dim = TD_GAMES;
		ti->len.games = MC_GAMES;
	}
	struct time_stop stop;
	time_stop_conditions(ti, b, 20, 40, 3.0, &stop);

	/* resign when the hope for win vanishes */
	coord_t top_coord = resign;
	floating_t top_ratio = mc->resign_ratio;

	/* We use [0] for pass. Normally, this is an inaccessible corner
	 * of board margin. */
	struct move_stat moves[board_size2(b)];
	memset(moves, 0, sizeof(moves));

	int losses = 0;
	int i, superko = 0, good_games = 0;
	for (i = 0; i < stop.desired.playouts; i++) {
		assert(!b->superko_violation);

		struct board b2;
		board_copy(&b2, b);

		coord_t coord;
		board_play_random(&b2, color, &coord, NULL, NULL);
		if (!is_pass(coord) && !group_at(&b2, coord)) {
			/* Multi-stone suicide. We play chinese rules,
			 * so we can't consider this. (Note that we
			 * unfortunately still consider this in playouts.) */
			if (DEBUGL(4)) {
				fprintf(stderr, "SUICIDE DETECTED at %d,%d:\n", coord_x(coord, b), coord_y(coord, b));
				board_print(b, stderr);
			}
			continue;
		}

		if (DEBUGL(3))
			fprintf(stderr, "[%d,%d color %d] playing random game\n", coord_x(coord, b), coord_y(coord, b), color);

		struct playout_setup ps = { .gamelen = mc->gamelen };
		int result = play_random_game(&ps, &b2, color, NULL, NULL, mc->playout);

		board_done_noalloc(&b2);

		if (result == 0) {
			/* Superko. We just ignore this playout.
			 * And play again. */
			if (unlikely(superko > 2 * stop.desired.playouts)) {
				/* Uhh. Triple ko, or something? */
				if (MCDEBUGL(0))
					fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
				goto pass_wins;
			}
			/* This playout didn't count; we should not
			 * disadvantage moves that lead to a superko.
			 * And it is supposed to be rare. */
			i--, superko++;
			continue;
		}

		if (MCDEBUGL(3))
			fprintf(stderr, "\tresult for other player: %d\n", result);

		int pos = is_pass(coord) ? 0 : coord;

		good_games++;
		moves[pos].games++;

		losses += result > 0;
		moves[pos].wins += 1 - (result > 0);

		if (unlikely(!losses && i == mc->loss_threshold)) {
			/* We played out many games and didn't lose once yet.
			 * This game is over. */
			break;
		}
	}

	if (!good_games) {
		/* No moves to try??? */
		if (MCDEBUGL(0)) {
			fprintf(stderr, "OUT OF MOVES! I will pass. But how did this happen?\n");
			board_print(b, stderr);
		}
pass_wins:
		top_coord = pass; top_ratio = 0.5;
		goto move_found;
	}

	foreach_point(b) {
		if (b->moves < 3) {
			/* Simple heuristic: avoid opening too low. Do not
			 * play on second or first line as first white or
			 * first two black moves.*/
			if (coord_x(c, b) < 3 || coord_x(c, b) > board_size(b) - 4
			    || coord_y(c, b) < 3 || coord_y(c, b) > board_size(b) - 4)
				continue;
		}

		floating_t ratio = (floating_t) moves[c].wins / moves[c].games;
		/* Since pass is [0,0], we will pass only when we have nothing
		 * better to do. */
		if (ratio >= top_ratio) {
			top_ratio = ratio;
			top_coord = c == 0 ? pass : c;
		}
	} foreach_point_end;

	if (MCDEBUGL(2)) {
		board_stats_print(b, moves, stderr);
	}

move_found:
	if (MCDEBUGL(1))
		fprintf(stderr, "*** WINNER is %d,%d with score %1.4f (%d games, %d superko)\n", coord_x(top_coord, b), coord_y(top_coord, b), top_ratio, i, superko);

	return coord_copy(top_coord);
}

static void
montecarlo_done(struct engine *e)
{
	struct montecarlo *mc = e->data;
	playout_policy_done(mc->playout);
	joseki_done(mc->jdict);
}

struct montecarlo *
montecarlo_state_init(char *arg, struct board *b)
{
	struct montecarlo *mc = calloc2(1, sizeof(struct montecarlo));

	mc->debug_level = 1;
	mc->gamelen = MC_GAMELEN;
	mc->jdict = joseki_load(b->size);

	if (arg) {
		char *optspec, *next = arg;
		while (*next) {
			optspec = next;
			next += strcspn(next, ",");
			if (*next) { *next++ = 0; } else { *next = 0; }

			char *optname = optspec;
			char *optval = strchr(optspec, '=');
			if (optval) *optval++ = 0;

			if (!strcasecmp(optname, "debug")) {
				if (optval)
					mc->debug_level = atoi(optval);
				else
					mc->debug_level++;
			} else if (!strcasecmp(optname, "gamelen") && optval) {
				mc->gamelen = atoi(optval);
			} else if (!strcasecmp(optname, "playout") && optval) {
				char *playoutarg = strchr(optval, ':');
				if (playoutarg)
					*playoutarg++ = 0;
				if (!strcasecmp(optval, "moggy")) {
					mc->playout = playout_moggy_init(playoutarg, b, mc->jdict);
				} else if (!strcasecmp(optval, "light")) {
					mc->playout = playout_light_init(playoutarg, b);
				} else {
					fprintf(stderr, "MonteCarlo: Invalid playout policy %s\n", optval);
				}
			} else {
				fprintf(stderr, "MonteCarlo: Invalid engine argument %s or missing value\n", optname);
			}
		}
	}

	if (!mc->playout)
		mc->playout = playout_light_init(NULL, b);
	mc->playout->debug_level = mc->debug_level;

	mc->resign_ratio = 0.1; /* Resign when most games are lost. */
	mc->loss_threshold = 5000; /* Stop reading if no loss encountered in first 5000 games. */

	return mc;
}


struct engine *
engine_montecarlo_init(char *arg, struct board *b)
{
	struct montecarlo *mc = montecarlo_state_init(arg, b);
	struct engine *e = calloc2(1, sizeof(struct engine));
	e->name = "MonteCarlo";
	e->comment = "I'm playing in Monte Carlo. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
	e->genmove = montecarlo_genmove;
	e->done = montecarlo_done;
	e->data = mc;

	return e;
}
Beispiel #5
0
/* Syntax:
 *   moggy status (last_move) coord [coord...]
 *         Play number of random games starting from last_move
 * 
 *   moggy status     coord [coord...]
 *   moggy status (b) coord [coord...]
 *         Black to play, pick random white last move
 *
 *   moggy status (w) coord [coord...]  
 *         White to play, pick random black last move
 */
static bool
test_moggy_status(struct board *board, char *arg)
{
	int games = 4000;
	coord_t status_at[10];
	int n = 0;
	enum stone color = S_BLACK;
	int pick_random = true;  // Pick random last move for each game

	while (*arg && *arg != '#') {
		if (*arg == ' ' || *arg == '\t') {  arg++; continue;  }		
		if (!strncmp(arg, "(b)", 3))
			color = S_BLACK;
		else if (!strncmp(arg, "(w)", 3))
			color = S_WHITE;
		else if (*arg == '(') {  /* Optional "(last_move)" argument */
			arg++;	assert(isalpha(*arg));
			pick_random = false;
			struct move last;
			last.coord = str2scoord(arg, board_size(board));
			last.color = board_at(board, last.coord);
			assert(last.color == S_BLACK || last.color == S_WHITE);
			color = stone_other(last.color);
			board->last_move = last;
		}
		else {
			assert(isalpha(*arg));
			status_at[n++] = str2scoord(arg, board_size(board));
		}
		arg += strcspn(arg, " \t");
	}
	
	board_print(board, stderr);
	if (DEBUGL(1)) {
		printf("moggy status ");
		for (int i = 0; i < n; i++)
			printf("%s%s", coord2sstr(status_at[i], board), (i != n-1 ? " " : ""));
		printf(", %s to play. Playing %i games %s...\n", 
		       stone2str(color), games, (pick_random ? "(random last move) " : ""));
	}
	
	struct playout_policy *policy = playout_moggy_init(NULL, board, NULL);
	struct playout_setup setup = { .gamelen = MAX_GAMELEN };
	struct board_ownermap ownermap;

	ownermap.playouts = 0;
	ownermap.map = malloc2(board_size2(board) * sizeof(ownermap.map[0]));
	memset(ownermap.map, 0, board_size2(board) * sizeof(ownermap.map[0]));	


	/* Get final status estimate after a number of moggy games */
	int wr = 0;
	double time_start = time_now();
	for (int i = 0; i < games; i++)  {
		struct board b;
		board_copy(&b, board);
		if (pick_random)
			pick_random_last_move(&b, color);
		
		int score = play_random_game(&setup, &b, color, NULL, &ownermap, policy);
		if (color == S_WHITE)
			score = -score;
		wr += (score > 0);
		board_done_noalloc(&b);
	}
	double elapsed = time_now() - time_start;
	printf("moggy status in %.1fs, %i games/s\n\n", elapsed, (int)((float)games / elapsed));
	
	int wr_black = wr * 100 / games;
	int wr_white = (games - wr) * 100 / games;
	if (wr_black > wr_white)
		printf("Winrate: [ black %i%% ]  white %i%%\n\n", wr_black, wr_white);
	else
		printf("Winrate: black %i%%  [ white %i%% ]\n\n", wr_black, wr_white);

	board_print_ownermap(board, stderr, &ownermap);

	for (int i = 0; i < n; i++) {
		coord_t c = status_at[i];
		enum stone color = (ownermap.map[c][S_BLACK] > ownermap.map[c][S_WHITE] ? S_BLACK : S_WHITE);
		fprintf(stderr, "%3s owned by %s: %i%%\n", 
			coord2sstr(c, board), stone2str(color), 
			ownermap.map[c][color] * 100 / ownermap.playouts);
	}
	
	free(ownermap.map);
	playout_policy_done(policy);
	return true;   // Not much of a unit test right now =)
}