void update_weights(int winner) { #ifdef LEARNING int unusable = 0; #ifndef USE_POW double lambdas[MAX_PLIES]; double temp_lambda; #endif double max, min; int weight_max = 0, weight_min = 0; double **gradients; int w, p, n = stored.index; if (n < 2) return; tellics("set open 0\n"); memset(d,0,sizeof(d)); /* pre calculate lambda powers */ #ifndef USE_POW temp_lambda = 1.0; for (p=0;p<MAX_PLIES;p++) { lambdas[p] = temp_lambda; temp_lambda *= TD_LAMBDA; } #endif #define SMOOTH_EVAL #ifdef SMOOTH_EVAL tanhvector[1] = tanh(EVAL_SCALE*stored.search_results[1]); for (p=2;p<n;p++) { double tmp; double normalized = stored.search_results[p]; tanhvector[p] = tanh(EVAL_SCALE*normalized); if (stored.search_results[p] > WIN - 50) tmp = 1.0; else if (stored.search_results[p] < LOSE + 50) tmp = -1.0; else tmp = tanhvector[p]; d[p-1] = (tmp-tanhvector[p-1]); } #else for (p=2;p<n-1;p++) { if (stored.search_results[p] > WIN - 50) d[p-1] = stored.search_results[p] - stored.search_results[p-1]; else if (stored.search_results[p] < LOSE + 50) d[p-1] = stored.search_results[p] - stored.search_results[p-1]; else d[p-1] = stored.search_results[p] - stored.search_results[p-1]; } #endif max = 0.0; min = 100000.0; gradients = gradientvector(LAST_WEIGHT, n); printf("Adjusting weights\n"); printf("Number of weights: %d\n", LAST_WEIGHT); printf("Number of plies: %d\n", n); for (w = PAWN_VALUE; w < LAST_WEIGHT; w ++) { if (!xboard_mode && !robo_mode) if (w % 20 == 0) printf("."); delta[w] = 0.0; for (p=1;p<n;p++) { int j; double S2 = 0.0; unusable = 0; printf("p: %d\t", p); for (j=1;j<p-1;j++) { if (stored.usable[j]) { double grad = /* = sech^2 * EVAL_SCALE * grad (chain rule) */ (1.0 - tanhvector[j] * tanhvector[j]) * EVAL_SCALE * gradients[w][j]; #ifdef USE_POW S2 += pow(TD_LAMBDA, p-j) * grad; #else S2 += lambdas[p-j] * grad; #endif } else { unusable ++; } } delta[w] += d[p] * S2 / EVAL_SCALE; } /* update weight accordingly */ base_weights[w] += TD_ALPHA * delta[w]; if (delta[w] > max) { max = delta[w]; weight_max=base_weights[w]; } else if (delta[w] < min) { min = delta[w]; weight_min=base_weights[w]; } } if (!xboard_mode && !robo_mode) printf("\n"); printf("weights adjusted\n"); countmaterial(); if (!xboard_mode && !robo_mode) { printf("unusable states = %d\n", unusable); printf("min = %f (%d), max = %f (%d)\n", min, weight_min, max, weight_max); } release_gradients(gradients, LAST_WEIGHT, n); store_weight_history(); #endif free_learning(); tellics("set open 1\n"); }
/* returns -1 for stalemate or winner's color */ int playchess() { int use_pondering = 0; printboard_and_time(); for (;;) { long starttime, endtime; move m; int g; g = gameoverp(tomove()); if (g) { switch (g) { case END_CHECKMATE: if (tomove() == BLACK) { return end(WHITE, "white mates"); } else { return end(BLACK, "black mates"); } case END_STALEMATE: return end(-1, "stalemate"); case NON_MATERIAL: return end(-1, "insufficient material"); case REP_DRAW: if (!robo_mode) { printf("drawable position\n"); } if (computer[WHITE]||computer[BLACK]) { if (robo_mode) tellics("draw\n"); return end(-1, "draw by repetition of moves"); } break; } } starttime = get_ms(); if (computer[tomove()]) { m = bce(); if ((m!=dummymove)&&(validmove(m)==1)) { printf("move %s\n", movestring(m)); } else { if (robo_mode) { tellics("mailmoves\n"); tellics( "message madhacker valid? = %d, move = %s, wouldbeincheckp() = %d, wouldbeinfullcheckp() = %d, pv = %s\n", validmove(m), movestring(m), wouldbeincheckp(m), wouldbeincheckfullp(m), thoughts); tellics("abort\n"); } else { printf("BCE returned invalid move: %s\n", movestring(m)); printf("valid? = %d\n", validmove(m)); fprintf(stdout, "random seed = %ld\n", seed); fprintf(stdout, "hash = %lld\n", board->hash); fprintf(stdout, "draw? = %d g = %d\n", draw_by_rep(), g); computer[1] = computer[0] = 0; } } use_pondering = 1; } else { if ((ponder_mode && computer[opp(tomove())])&& use_pondering) { ponder(); use_pondering = 0; } m = usermove(); use_pondering = 0; } endtime = get_ms(); chessclock[tomove()] -= (endtime-starttime); chessclock[tomove()] += clockinc; if (m) { domove(m); update_state(m); printboard_and_time(); } } }