コード例 #1
0
ファイル: learn.c プロジェクト: cwbowron/BCE
void update_weights(int winner)
{
#ifdef LEARNING    
    int unusable = 0;
    
#ifndef USE_POW
    double lambdas[MAX_PLIES];
    double temp_lambda;
#endif    

    double max, min;
    int weight_max = 0, weight_min = 0;
    
    double **gradients;

    int w, p, n = stored.index;

    if (n < 2) return;
    
    tellics("set open 0\n");

    memset(d,0,sizeof(d));

    /* pre calculate lambda powers */
#ifndef USE_POW    
    temp_lambda = 1.0;
    for (p=0;p<MAX_PLIES;p++)
    {
	lambdas[p] = temp_lambda;
	temp_lambda *= TD_LAMBDA;
    }
#endif
    
#define SMOOTH_EVAL
#ifdef SMOOTH_EVAL

    tanhvector[1] = tanh(EVAL_SCALE*stored.search_results[1]);

    for (p=2;p<n;p++)
    {
	double tmp;
	double normalized = stored.search_results[p];
	tanhvector[p] = tanh(EVAL_SCALE*normalized);

	if (stored.search_results[p] > WIN - 50)
	    tmp = 1.0;
	else if (stored.search_results[p] < LOSE + 50)
	    tmp = -1.0;
	else 
	    tmp = tanhvector[p];
	
	d[p-1] = (tmp-tanhvector[p-1]);
    }
    
#else

    for (p=2;p<n-1;p++)
    {
	if (stored.search_results[p] > WIN - 50)
	    d[p-1] = stored.search_results[p] - stored.search_results[p-1];
	
	else if (stored.search_results[p] < LOSE + 50)
	    d[p-1] = stored.search_results[p] - stored.search_results[p-1];
	
	else 
	    d[p-1] = stored.search_results[p] - stored.search_results[p-1];
    }
    
#endif    
    
    max = 0.0;
    min = 100000.0;
    
    gradients = gradientvector(LAST_WEIGHT, n);

    printf("Adjusting weights\n");
    printf("Number of weights: %d\n", LAST_WEIGHT);
    printf("Number of plies: %d\n", n);
    
    for (w = PAWN_VALUE; w < LAST_WEIGHT; w ++)
    {
	if (!xboard_mode && !robo_mode)
	    if (w % 20 == 0)
		printf(".");

	delta[w] = 0.0;
	
	for (p=1;p<n;p++)
	{
	    int j;
	    double S2 = 0.0;

	    unusable = 0;
	    
	    printf("p: %d\t", p);
	    
	    for (j=1;j<p-1;j++)
	    {
		if (stored.usable[j])
		{
		    double grad =
			/* = sech^2 * EVAL_SCALE * grad (chain rule) */
			(1.0 - tanhvector[j] * tanhvector[j]) *
			EVAL_SCALE *
			gradients[w][j];

#ifdef USE_POW		    
		    S2 += pow(TD_LAMBDA, p-j) * grad;
#else
		    S2 += lambdas[p-j] * grad;
#endif
		}
		else
		{
		    unusable ++;
		}
	    }
	    delta[w] += d[p] * S2 / EVAL_SCALE;
	}

	/* update weight accordingly */
	base_weights[w] += TD_ALPHA * delta[w];

	if (delta[w] > max)
	{
	    max = delta[w];
	    weight_max=base_weights[w];
	}
	else if (delta[w] < min)
	{
	    min = delta[w];
	    weight_min=base_weights[w];
	}
	
    }
    
    if (!xboard_mode && !robo_mode)
	printf("\n");

    printf("weights adjusted\n");
    
    countmaterial();
    
    if (!xboard_mode && !robo_mode)
    {
	printf("unusable states = %d\n", unusable);
	printf("min = %f (%d), max = %f (%d)\n", min, weight_min,
	       max, weight_max);
    }

    release_gradients(gradients, LAST_WEIGHT, n);

    store_weight_history();
#endif


    
    free_learning();

    tellics("set open 1\n");
}
コード例 #2
0
/* returns -1 for stalemate or winner's color */
int playchess()
{
    int use_pondering = 0;
    
    printboard_and_time();
     
    for (;;)
    {
	long starttime, endtime;
	move m;
	int g;

	g = gameoverp(tomove());

	if (g)
	{
	    switch (g)
	    {
		case END_CHECKMATE:
		    if (tomove() == BLACK)
		    {
			return end(WHITE, "white mates");
		    }
		    else
		    {
			return end(BLACK, "black mates");
		    }
		case END_STALEMATE:
		    return end(-1, "stalemate");
		case NON_MATERIAL:
		    return end(-1, "insufficient material");
		case REP_DRAW:
		    if (!robo_mode)
		    {
			printf("drawable position\n");
		    }
		    if (computer[WHITE]||computer[BLACK])
		    {
			if (robo_mode)
			    tellics("draw\n");
			return end(-1, "draw by repetition of moves");
		    }
		    break;
	    }
	}
    
	starttime = get_ms();
	
	if (computer[tomove()])
	{
	    m = bce();

	    if ((m!=dummymove)&&(validmove(m)==1))
	    {
		printf("move %s\n", movestring(m));
	    }
	    else
	    {
		if (robo_mode)
		{
		    tellics("mailmoves\n");
		    tellics(
			"message madhacker valid? = %d, move = %s, wouldbeincheckp() = %d, wouldbeinfullcheckp() = %d, pv = %s\n",
			validmove(m),
			movestring(m),
			wouldbeincheckp(m),
			wouldbeincheckfullp(m),
			thoughts);
		    tellics("abort\n");
		}
		else
		{
		    printf("BCE returned invalid move: %s\n", movestring(m));
		    printf("valid? = %d\n", validmove(m));

		    fprintf(stdout, "random seed = %ld\n", seed);
		    fprintf(stdout, "hash = %lld\n", board->hash);
		    fprintf(stdout, "draw? = %d g = %d\n",
			    draw_by_rep(), g);
		    computer[1] = computer[0] = 0;
		}
	    }
	    use_pondering = 1;
	}
	else 
	{
	    if ((ponder_mode && computer[opp(tomove())])&&
		use_pondering)
	    {
		ponder();
		use_pondering = 0;
	    }
	    
	    m = usermove();
	    use_pondering = 0;
	}
    
	endtime = get_ms();
	chessclock[tomove()] -= (endtime-starttime);
	chessclock[tomove()] += clockinc;
    
	if (m)
	{
	    domove(m);
	    update_state(m);
	    printboard_and_time();
	}
    }
}