/* * Perform a training iteration. * * The "desired" array is passed only at the end of the game. Otherwise * we train the network to give results (using past inputs) to be like our * results using current inputs. */ static void perform_training(game *g, int who, double *desired) { double target[2]; double lambda = 1.0; int i; net *l; /* Get correct network to train */ l = &learner[who]; /* Check for uninitialized network */ if (!l->num_inputs) return; /* Get current state */ eval_game(g, who); /* Store current inputs */ store_net(l); /* Check for passed in results */ if (desired) { /* Copy results */ target[0] = desired[0]; target[1] = desired[1]; /* Train current inputs with desired outputs */ train_net(l, lambda, target); } else { /* Copy results */ target[0] = l->win_prob[0]; target[1] = l->win_prob[1]; } /* Loop over past inputs (starting with most recent) */ for (i = l->num_past - 2; i >= 0; i--) { /* Copy past inputs to network */ memcpy(l->input_value, l->past_input[i], sizeof(int) * (l->num_inputs + 1)); /* Compute net */ compute_net(l); /* Train using this */ train_net(l, lambda, target); /* Reduce training amount for less recent results */ lambda *= 0.9; } }
/* * Train a network so that the current results are more like the desired. */ void train_net(net *learn, double lambda, double *desired) { int i, j, k; double error, corr, deriv, hderiv; double *hidden_corr; /* Count error events */ learn->num_error += lambda; /* Loop over output nodes */ for (i = 0; i < learn->num_output; i++) { /* Compute error */ error = lambda * (learn->win_prob[i] - desired[i]); /* Accumulate squared error */ learn->error += error * error; /* Output portion of partial derivatives */ deriv = learn->win_prob[i] * (1.0 - learn->win_prob[i]); /* Loop over node's weights */ for (j = 0; j < learn->num_hidden; j++) { /* Compute correction */ corr = -error * learn->hidden_result[j] * deriv; /* Compute hidden node's effect on output */ hderiv = deriv * learn->output_weight[j][i]; /* Loop over other output nodes */ for (k = 0; k < learn->num_output; k++) { /* Skip this output node */ if (i == k) continue; /* Subtract this node's factor */ hderiv -= learn->output_weight[j][k] * learn->net_result[i] * learn->net_result[k] / (learn->prob_sum * learn->prob_sum); } /* Compute hidden node's error */ learn->hidden_error[j] += error * hderiv; /* Apply correction */ learn->output_delta[j][i] += learn->alpha * corr; } /* Compute bias weight's correction */ learn->output_delta[j][i] += learn->alpha * -error * deriv; } /* Create array of hidden weight correction factors */ hidden_corr = (double *)malloc(sizeof(double) * learn->num_hidden); /* Loop over hidden nodes */ for (i = 0; i < learn->num_hidden; i++) { /* Output portion of partial derivatives */ deriv = 1 - (learn->hidden_result[i] * learn->hidden_result[i]); /* Calculate correction factor */ hidden_corr[i] = deriv * -learn->hidden_error[i] * learn->alpha; } /* Loop over inputs */ for (i = 0; i < learn->num_inputs + 1; i++) { /* Skip zero inputs */ if (!learn->input_value[i]) continue; /* Loop over hidden nodes */ for (j = 0; j < learn->num_hidden; j++) { /* Adjust weight */ learn->hidden_delta[i][j] += hidden_corr[j] * learn->input_value[i]; } } /* Destroy hidden correction factor array */ free(hidden_corr); /* Loop over hidden nodes */ for (i = 0; i < learn->num_hidden; i++) { /* Clear node's error */ learn->hidden_error[i] = 0; /* Clear node's stored sum */ learn->hidden_sum[i] = 0; } /* Clear previous inputs */ memset(learn->prev_input, 0, sizeof(double) * (learn->num_inputs + 1)); #ifdef NOISY compute_net(); for (i = 0; i < learn->num_output; i++) { printf("%lf -> %lf: %lf\n", orig[i], desired[i], learn->win_prob[i]); } #endif }
/* * Evaluate the current game state. */ static double eval_game(game *g, int who) { player *p; card *c; int n = 0, i, j; int power, stack, bluff, bad_bluff; net *l; /* Get player's network */ l = &learner[who]; /* Check for no learner loaded */ if (!l->num_inputs) return 0.5; /* Loop over each player */ for (i = 0; i < 2; i++) { /* Get player pointer */ p = &g->p[i]; /* Loop over cards in deck */ for (j = 1; j < DECK_SIZE; j++) { /* Get card pointer */ c = &p->deck[j]; /* Set input for active cards (except leadership) */ SET_INPUT(l, n++, c->active && !c->random_fake && c->d_ptr->type != TYPE_LEADERSHIP && (who == i || c->loc_known)); } /* Loop over cards in deck */ for (j = 1; j < DECK_SIZE; j++) { /* Get card pointer */ c = &p->deck[j]; /* Set input for cards in hand (if known) */ SET_INPUT(l, n++, (who == i || c->loc_known) && c->where == LOC_HAND && !c->random_fake); } /* Loop over cards in deck */ for (j = 1; j < DECK_SIZE; j++) { /* Get card pointer */ c = &p->deck[j]; /* Set input for used cards */ SET_INPUT(l, n++, (who == i || c->loc_known) && !c->random_fake && (c->where == LOC_DISCARD || c->where == LOC_LEADERSHIP || (c->where == LOC_COMBAT && !c->active))); } /* Loop over cards in deck */ for (j = 1; j < DECK_SIZE; j++) { /* Get card pointer */ c = &p->deck[j]; /* Set input for cards loaded on ship */ SET_INPUT(l, n++, (c->ship != NULL)); } } /* Get evaluating player */ p = &g->p[who]; /* Loop over cards in deck */ for (i = 1; i < DECK_SIZE; i++) { /* Get card pointer */ c = &p->deck[i]; /* Set input for "special" card */ SET_INPUT(l, n++, c->text_boosted || c->on_bottom || c->bluff); } /* Assume no bad bluff */ bad_bluff = 0; /* Loop over cards in deck */ for (i = 1; i < DECK_SIZE; i++) { /* Get card pointer */ c = &p->deck[i]; /* Skip non-bluff cards */ if (!c->bluff) continue; /* Check for bad bluff */ if ((!g->fight_element && !(c->icons & ICON_BLUFF_F)) || (g->fight_element && !(c->icons & ICON_BLUFF_E))) { /* Bluff is bad */ bad_bluff = 1; } } /* Set input for a bad bluff */ SET_INPUT(l, n++, bad_bluff); /* Set input for game over */ SET_INPUT(l, n++, g->game_over); /* Set input for fight started */ SET_INPUT(l, n++, g->fight_started); /* Set inputs for fight element (only if started) */ SET_INPUT(l, n++, g->fight_element && g->fight_started); SET_INPUT(l, n++, !g->fight_element && g->fight_started); /* Loop over players */ for (i = 0; i < 2; i++) { /* Get player pointer */ p = &g->p[i]; /* Set input if it is this player's turn */ SET_INPUT(l, n++, g->turn == i && !g->game_over); /* Check for fight started */ if (g->fight_started) { /* Compute power level */ power = compute_power(g, i); } else { /* Assume no power */ power = 0; } /* Loop over possible power values */ for (j = 0; j < 15; j++) { /* Set input if player has this much power */ SET_INPUT(l, n++, power > j); } /* Count active cards */ stack = p->stack[LOC_COMBAT] + p->stack[LOC_SUPPORT]; /* Assume no bluff cards */ bluff = 0; /* Loop over cards */ for (j = 1; j < DECK_SIZE; j++) { /* Get card pointer */ c = &p->deck[j]; /* Check for bluff card */ if (c->bluff) { /* Bluff cards do not count for dragons */ stack--; /* Count bluffs */ bluff++; } } /* Loop over stack sizes */ for (j = 0; j < 8; j++) { /* Set input if player has this many cards played */ SET_INPUT(l, n++, stack > j); } /* Loop over bluff counts */ for (j = 0; j < 4; j++) { /* Set input if player has this many bluffs */ SET_INPUT(l, n++, bluff > j); } /* Count cards in hand */ stack = p->stack[LOC_HAND]; /* Loop over hand sizes */ for (j = 0; j < 10; j++) { /* Set input if player has this many cards */ SET_INPUT(l, n++, stack > j); } /* Count cards in draw deck and hand */ stack = p->stack[LOC_DRAW] + p->stack[LOC_HAND]; /* Loop over deck sizes */ for (j = 0; j < 30; j++) { /* Set input if player has this many cards */ SET_INPUT(l, n++, stack > j); } /* Assume no characters */ stack = 0; /* Loop over deck */ for (j = 1; j < DECK_SIZE; j++) { /* Get card pointer */ c = &p->deck[j]; /* Skip non-characters */ if (c->d_ptr->type != TYPE_CHARACTER) continue; /* Skip cards not in hand */ if (c->where == LOC_HAND && (who == i || c->loc_known) && !c->random_fake) { /* Add more character */ stack++; } } /* Loop over character counts */ for (j = 0; j < 5; j++) { /* Set input if player has this many characters */ SET_INPUT(l, n++, stack > j); } /* Assume no undisclosed cards */ stack = 0; /* Loop over deck */ for (j = 1; j < DECK_SIZE; j++) { /* Get card pointer */ c = &p->deck[j]; /* Skip cards not in hand */ if (c->where != LOC_HAND) continue; /* Skip disclosed cards */ if (c->disclosed) continue; /* Count undisclosed cards */ stack++; } /* Loop over disclose counts */ for (j = 0; j < 6; j++) { /* Set input if player has this cards disclosed */ SET_INPUT(l, n++, stack > j); } /* Set input if player is first to run out of cards */ SET_INPUT(l, n++, p->no_cards); /* Loop over dragon counts */ for (j = 0; j < 3; j++) { /* Set input if player has this many dragons */ SET_INPUT(l, n++, p->dragons > j); } /* Set input if player has instant victory */ SET_INPUT(l, n++, p->instant_win); } /* Compute network value */ compute_net(l); #ifdef DEBUG /* Print score and path to get here */ if (verbose && !checking_retreat && best_path_pos > 0) { printf("%.12lf: ", l->win_prob[who]); for (i = 0; i <= best_path_pos; i++) { action a; a = cur_path[i]; switch (a.act) { case ACT_NONE: break; case ACT_RETREAT: printf("Retreat "); break; case ACT_RETRIEVE: printf("Retrieve %s ", a.arg->name); break; case ACT_PLAY: printf("Play %s ", a.arg->name); break; case ACT_PLAY_NO: printf("Play (no) %s ", a.arg->name); break; case ACT_ANN_FIRE: printf("Announce fire "); break; case ACT_ANN_EARTH: printf("Announce earth "); break; case ACT_USE: printf("Use %s ", a.arg->name); break; case ACT_SATISFY: printf("Satsify %s ", a.arg->name); break; case ACT_CHOOSE: printf("Choose %d ", a.chosen); break; case ACT_LAND: printf("Land %s ", a.arg->name); break; case ACT_LOAD: printf("Load %s on %s ", a.arg->name, a.target->name); break; case ACT_BLUFF: printf("Bluff %s ", a.arg->name); break; case ACT_REVEAL: printf("Reveal %s ", a.arg->name); break; } } if (must_retreat) printf("Force retreat"); if (checking_decline) { printf("Responding %s", g->fight_element ? "earth" : "fire"); } printf("\n"); } #endif /* Return output */ return l->win_prob[who]; }