예제 #1
0
파일: ai.c 프로젝트: despley/BlueMoonMac
/*
 * Perform a training iteration.
 *
 * The "desired" array is passed only at the end of the game.  Otherwise
 * we train the network to give results (using past inputs) to be like our
 * results using current inputs.
 */
static void perform_training(game *g, int who, double *desired)
{
	double target[2];
	double lambda = 1.0;
	int i;
	net *l;

	/* Get correct network to train */
	l = &learner[who];

	/* Check for uninitialized network */
	if (!l->num_inputs) return;

	/* Get current state */
	eval_game(g, who);

	/* Store current inputs */
	store_net(l);

	/* Check for passed in results */
	if (desired)
	{
		/* Copy results */
		target[0] = desired[0];
		target[1] = desired[1];

		/* Train current inputs with desired outputs */
		train_net(l, lambda, target);
	}
	else
	{
		/* Copy results */
		target[0] = l->win_prob[0];
		target[1] = l->win_prob[1];
	}

	/* Loop over past inputs (starting with most recent) */
	for (i = l->num_past - 2; i >= 0; i--)
	{
		/* Copy past inputs to network */
		memcpy(l->input_value, l->past_input[i],
		       sizeof(int) * (l->num_inputs + 1));

		/* Compute net */
		compute_net(l);

		/* Train using this */
		train_net(l, lambda, target);

		/* Reduce training amount for less recent results */
		lambda *= 0.9;
	}
}
예제 #2
0
/*
 * Train a network so that the current results are more like the desired.
 */
void train_net(net *learn, double lambda, double *desired)
{
	int i, j, k;
	double error, corr, deriv, hderiv;
	double *hidden_corr;

	/* Count error events */
	learn->num_error += lambda;

	/* Loop over output nodes */
	for (i = 0; i < learn->num_output; i++)
	{
		/* Compute error */
		error = lambda * (learn->win_prob[i] - desired[i]);

		/* Accumulate squared error */
		learn->error += error * error;

		/* Output portion of partial derivatives */
		deriv = learn->win_prob[i] * (1.0 - learn->win_prob[i]);

		/* Loop over node's weights */
		for (j = 0; j < learn->num_hidden; j++)
		{
			/* Compute correction */
			corr = -error * learn->hidden_result[j] * deriv;

			/* Compute hidden node's effect on output */
			hderiv = deriv * learn->output_weight[j][i];

			/* Loop over other output nodes */
			for (k = 0; k < learn->num_output; k++)
			{
				/* Skip this output node */
				if (i == k) continue;

				/* Subtract this node's factor */
				hderiv -= learn->output_weight[j][k] *
				          learn->net_result[i] *
				          learn->net_result[k] /
				          (learn->prob_sum * learn->prob_sum);
			}

			/* Compute hidden node's error */
			learn->hidden_error[j] += error * hderiv;

			/* Apply correction */
			learn->output_delta[j][i] += learn->alpha * corr;
		}

		/* Compute bias weight's correction */
		learn->output_delta[j][i] += learn->alpha * -error * deriv;
	}

	/* Create array of hidden weight correction factors */
	hidden_corr = (double *)malloc(sizeof(double) * learn->num_hidden);

	/* Loop over hidden nodes */
	for (i = 0; i < learn->num_hidden; i++)
	{
		/* Output portion of partial derivatives */
		deriv = 1 - (learn->hidden_result[i] * learn->hidden_result[i]);

		/* Calculate correction factor */
		hidden_corr[i] = deriv * -learn->hidden_error[i] * learn->alpha;
	}

	/* Loop over inputs */
	for (i = 0; i < learn->num_inputs + 1; i++)
	{
		/* Skip zero inputs */
		if (!learn->input_value[i]) continue;

		/* Loop over hidden nodes */
		for (j = 0; j < learn->num_hidden; j++)
		{
			/* Adjust weight */
			learn->hidden_delta[i][j] += hidden_corr[j] *
			                                  learn->input_value[i];
		}
	}

	/* Destroy hidden correction factor array */
	free(hidden_corr);

	/* Loop over hidden nodes */
	for (i = 0; i < learn->num_hidden; i++)
	{
		/* Clear node's error */
		learn->hidden_error[i] = 0;

		/* Clear node's stored sum */
		learn->hidden_sum[i] = 0;
	}

	/* Clear previous inputs */
	memset(learn->prev_input, 0, sizeof(double) * (learn->num_inputs + 1));

#ifdef NOISY
	compute_net();
	for (i = 0; i < learn->num_output; i++)
	{
		printf("%lf -> %lf: %lf\n", orig[i], desired[i], learn->win_prob[i]);
	}
#endif
}
예제 #3
0
파일: ai.c 프로젝트: despley/BlueMoonMac
/*
 * Evaluate the current game state.
 */
static double eval_game(game *g, int who)
{
	player *p;
	card *c;
	int n = 0, i, j;
	int power, stack, bluff, bad_bluff;
	net *l;

	/* Get player's network */
	l = &learner[who];

	/* Check for no learner loaded */
	if (!l->num_inputs) return 0.5;

	/* Loop over each player */
	for (i = 0; i < 2; i++)
	{
		/* Get player pointer */
		p = &g->p[i];

		/* Loop over cards in deck */
		for (j = 1; j < DECK_SIZE; j++)
		{
			/* Get card pointer */
			c = &p->deck[j];

			/* Set input for active cards (except leadership) */
			SET_INPUT(l, n++, c->active && !c->random_fake &&
			                  c->d_ptr->type != TYPE_LEADERSHIP &&
			                  (who == i || c->loc_known));
		}

		/* Loop over cards in deck */
		for (j = 1; j < DECK_SIZE; j++)
		{
			/* Get card pointer */
			c = &p->deck[j];

			/* Set input for cards in hand (if known) */
			SET_INPUT(l, n++, (who == i || c->loc_known) &&
			                  c->where == LOC_HAND &&
			                  !c->random_fake);
		}

		/* Loop over cards in deck */
		for (j = 1; j < DECK_SIZE; j++)
		{
			/* Get card pointer */
			c = &p->deck[j];

			/* Set input for used cards */
			SET_INPUT(l, n++, (who == i || c->loc_known) &&
			                  !c->random_fake &&
			                  (c->where == LOC_DISCARD ||
			                   c->where == LOC_LEADERSHIP ||
					   (c->where == LOC_COMBAT &&
			                    !c->active)));
		}

		/* Loop over cards in deck */
		for (j = 1; j < DECK_SIZE; j++)
		{
			/* Get card pointer */
			c = &p->deck[j];

			/* Set input for cards loaded on ship */
			SET_INPUT(l, n++, (c->ship != NULL));
		}
	}

	/* Get evaluating player */
	p = &g->p[who];

	/* Loop over cards in deck */
	for (i = 1; i < DECK_SIZE; i++)
	{
		/* Get card pointer */
		c = &p->deck[i];

		/* Set input for "special" card */
		SET_INPUT(l, n++, c->text_boosted || c->on_bottom || c->bluff);
	}

	/* Assume no bad bluff */
	bad_bluff = 0;

	/* Loop over cards in deck */
	for (i = 1; i < DECK_SIZE; i++)
	{
		/* Get card pointer */
		c = &p->deck[i];

		/* Skip non-bluff cards */
		if (!c->bluff) continue;

		/* Check for bad bluff */
		if ((!g->fight_element && !(c->icons & ICON_BLUFF_F)) ||
		     (g->fight_element && !(c->icons & ICON_BLUFF_E)))
		{
			/* Bluff is bad */
			bad_bluff = 1;
		}
	}

	/* Set input for a bad bluff */
	SET_INPUT(l, n++, bad_bluff);

	/* Set input for game over */
	SET_INPUT(l, n++, g->game_over);

	/* Set input for fight started */
	SET_INPUT(l, n++, g->fight_started);

	/* Set inputs for fight element (only if started) */
	SET_INPUT(l, n++, g->fight_element && g->fight_started);
	SET_INPUT(l, n++, !g->fight_element && g->fight_started);

	/* Loop over players */
	for (i = 0; i < 2; i++)
	{
		/* Get player pointer */
		p = &g->p[i];

		/* Set input if it is this player's turn */
		SET_INPUT(l, n++, g->turn == i && !g->game_over);

		/* Check for fight started */
		if (g->fight_started)
		{
			/* Compute power level */
			power = compute_power(g, i);
		}
		else
		{
			/* Assume no power */
			power = 0;
		}

		/* Loop over possible power values */
		for (j = 0; j < 15; j++)
		{
			/* Set input if player has this much power */
			SET_INPUT(l, n++, power > j);
		}

		/* Count active cards */
		stack = p->stack[LOC_COMBAT] + p->stack[LOC_SUPPORT];

		/* Assume no bluff cards */
		bluff = 0;

		/* Loop over cards */
		for (j = 1; j < DECK_SIZE; j++)
		{
			/* Get card pointer */
			c = &p->deck[j];

			/* Check for bluff card */
			if (c->bluff)
			{
				/* Bluff cards do not count for dragons */
				stack--;

				/* Count bluffs */
				bluff++;
			}
		}

		/* Loop over stack sizes */
		for (j = 0; j < 8; j++)
		{
			/* Set input if player has this many cards played */
			SET_INPUT(l, n++, stack > j);
		}

		/* Loop over bluff counts */
		for (j = 0; j < 4; j++)
		{
			/* Set input if player has this many bluffs */
			SET_INPUT(l, n++, bluff > j);
		}

		/* Count cards in hand */
		stack = p->stack[LOC_HAND];

		/* Loop over hand sizes */
		for (j = 0; j < 10; j++)
		{
			/* Set input if player has this many cards */
			SET_INPUT(l, n++, stack > j);
		}

		/* Count cards in draw deck and hand */
		stack = p->stack[LOC_DRAW] + p->stack[LOC_HAND];

		/* Loop over deck sizes */
		for (j = 0; j < 30; j++)
		{
			/* Set input if player has this many cards */
			SET_INPUT(l, n++, stack > j);
		}

		/* Assume no characters */
		stack = 0;

		/* Loop over deck */
		for (j = 1; j < DECK_SIZE; j++)
		{
			/* Get card pointer */
			c = &p->deck[j];

			/* Skip non-characters */
			if (c->d_ptr->type != TYPE_CHARACTER) continue;

			/* Skip cards not in hand */
			if (c->where == LOC_HAND &&
			    (who == i || c->loc_known) &&
			    !c->random_fake)
			{
				/* Add more character */
				stack++;
			}
		}

		/* Loop over character counts */
		for (j = 0; j < 5; j++)
		{
			/* Set input if player has this many characters */
			SET_INPUT(l, n++, stack > j);
		}

		/* Assume no undisclosed cards */
		stack = 0;

		/* Loop over deck */
		for (j = 1; j < DECK_SIZE; j++)
		{
			/* Get card pointer */
			c = &p->deck[j];

			/* Skip cards not in hand */
			if (c->where != LOC_HAND) continue;

			/* Skip disclosed cards */
			if (c->disclosed) continue;

			/* Count undisclosed cards */
			stack++;
		}

		/* Loop over disclose counts */
		for (j = 0; j < 6; j++)
		{
			/* Set input if player has this cards disclosed */
			SET_INPUT(l, n++, stack > j);
		}

		/* Set input if player is first to run out of cards */
		SET_INPUT(l, n++, p->no_cards);

		/* Loop over dragon counts */
		for (j = 0; j < 3; j++)
		{
			/* Set input if player has this many dragons */
			SET_INPUT(l, n++, p->dragons > j);
		}

		/* Set input if player has instant victory */
		SET_INPUT(l, n++, p->instant_win);
	}

	/* Compute network value */
	compute_net(l);

#ifdef DEBUG
	/* Print score and path to get here */
	if (verbose && !checking_retreat && best_path_pos > 0)
	{
		printf("%.12lf: ", l->win_prob[who]);

		for (i = 0; i <= best_path_pos; i++)
		{
			action a;

			a = cur_path[i];

			switch (a.act)
			{
				case ACT_NONE: break;
				case ACT_RETREAT: printf("Retreat "); break;
				case ACT_RETRIEVE: printf("Retrieve %s ", a.arg->name); break;
				case ACT_PLAY: printf("Play %s ", a.arg->name); break;
				case ACT_PLAY_NO: printf("Play (no) %s ", a.arg->name); break;
				case ACT_ANN_FIRE: printf("Announce fire "); break;
				case ACT_ANN_EARTH: printf("Announce earth "); break;
				case ACT_USE: printf("Use %s ", a.arg->name); break;
				case ACT_SATISFY: printf("Satsify %s ", a.arg->name); break;
				case ACT_CHOOSE: printf("Choose %d ", a.chosen); break;
				case ACT_LAND: printf("Land %s ", a.arg->name); break;
				case ACT_LOAD: printf("Load %s on %s ", a.arg->name, a.target->name); break;
				case ACT_BLUFF: printf("Bluff %s ", a.arg->name); break;
				case ACT_REVEAL: printf("Reveal %s ", a.arg->name); break;
			}
		}

		if (must_retreat) printf("Force retreat");

		if (checking_decline)
		{
			printf("Responding %s", g->fight_element ? "earth" : "fire");
		}
	
		printf("\n");
	}
#endif

	/* Return output */
	return l->win_prob[who];
}