Ejemplo n.º 1
0
double strategytdexp4::getOutputBackgammonWinValue( const vector<double>& middles, const board& brd ) const
{
    // special case - if the other player has taken any pieces in, the gammon win prob is zero
    
    if( brd.bornIn1Raw() > 0 ) return 0;
    
    // also if there are no other player pieces in the player's box, the backgammon win prob is zero
    
    vector<int> checks( brd.checkers1Raw() );
    bool foundOne = brd.hit1Raw() > 0;
    if( !foundOne )
        for( int i=0; i<6; i++ )
            if( checks.at(i) > 0 )
            {
                foundOne = true;
                break;
            }
    if( !foundOne ) return 0;
    
    // otherwise calculate the network value
    
    double val=0;
    for( int i=0; i<nMiddle; i++ )
        val += outputBackgammonWinWeights.at(i) * middles.at(i);
    val += outputBackgammonWinWeights.at(nMiddle); // bias node
    return  1. / ( 1 + exp( -val ) );
}
Ejemplo n.º 2
0
vector<double> strategytdexp4::getInputValues( const board& brd, int turn ) const
{
    vector<double> inputs;
    inputs.resize(198,0);
    vector<int> checks = brd.checkers0Raw();
    vector<int> otherChecks = brd.checkers1Raw();
    int hit = brd.hit0Raw();
    int otherHit = brd.hit1Raw();
    int borneIn = brd.bornIn0Raw();
    int otherBorneIn = brd.bornIn1Raw();
    
    int i, j;
    
    // we put values for the first player in the first half of the inputs and for the second player
    // in the second half. 
    
    for( i=0; i<24; i++ )
    {
        // each spot gets four units. The first is 1 if there is at least one checker on the point,
        // else 0; the 2nd is 1 if there are at least two checkers; the 3rd if there are at least
        // three; and the fourth = max(0,(n-3)/2), where n=# of checkers. That's done for both players.
        
        for( j=0; j<3; j++ )
        {
            if( checks.at(i) > j )      inputs.at(4*i+j)    = 1;
            if( otherChecks.at(i) > j ) inputs.at(4*i+j+99) = 1;
        }
        if( checks.at(i) > 3 )      inputs.at(4*i+3)    = ( checks[i]-3 ) / 2.;
        if( otherChecks.at(i) > 3 ) inputs.at(4*i+3+99) = ( otherChecks[i]-3 ) / 2.;
    }
    
    // one spot for each player records the number on the bar
    
    inputs.at(96)  = hit / 2.;
    inputs.at(195) = otherHit / 2.;
    
    // one spot for each player records the number born in
    
    inputs.at(97)  = borneIn / 15.;
    inputs.at(196) = otherBorneIn / 15.;
    
    // one spot for each player notes whose turn it is
    
    inputs.at(98)  = turn == 0 ? 1 : 0;
    inputs.at(197) = turn == 0 ? 0 : 1;
    
    return inputs;
}
Ejemplo n.º 3
0
void strategytdexp4::update( const board& oldBoard, const board& newBoard )
{
    // get the values from the old board
    
    vector<double> oldInputs   = getInputValues( oldBoard, oldBoard.perspective() );
    vector<double> oldMiddles  = getMiddleValues( oldInputs );
    double oldProbOutput       = getOutputProbValue( oldMiddles );
    double oldGammonWinOutput  = getOutputGammonWinValue( oldMiddles, oldBoard );
    double oldGammonLossOutput = getOutputGammonLossValue( oldMiddles, oldBoard );
    double oldBgWinOutput      = getOutputBackgammonWinValue( oldMiddles, oldBoard );
    double oldBgLossOutput     = getOutputBackgammonLossValue( oldMiddles, oldBoard );
    
    // calculate all the partial derivatives we'll need (of output node values
    // to the various weights)
    
    int i, j;
    
    // then do derivs of the prob nodes to each of the middle->input weights (that's a 2d array), and the derivs of each of
    // the middle nodes to its weights->inputs.
    
    double mid, input, v1, v2, v3, v4, v5;
    for( i=0; i<nMiddle; i++ )
    {
        mid = oldMiddles.at(i);
        v1  = outputProbWeights.at(i);
        v2  = outputGammonWinWeights.at(i);
        v3  = outputGammonLossWeights.at(i);
        v4  = outputBackgammonWinWeights.at(i);
        v5  = outputBackgammonLossWeights.at(i);
        
        probDerivs.at(i)    = mid * oldProbOutput       * ( 1 - oldProbOutput );
        gamWinDerivs.at(i)  = mid * oldGammonWinOutput  * ( 1 - oldGammonWinOutput );
        gamLossDerivs.at(i) = mid * oldGammonLossOutput * ( 1 - oldGammonLossOutput );
        bgWinDerivs.at(i)   = mid * oldBgWinOutput      * ( 1 - oldBgWinOutput );
        bgLossDerivs.at(i)  = mid * oldBgLossOutput     * ( 1 - oldBgLossOutput );
        
        for( j=0; j<198; j++ )
        {
            input = oldInputs.at(j);
            probInputDerivs.at(i).at(j)    = v1 * input * oldProbOutput       * ( 1 - oldProbOutput       ) * mid * ( 1 - mid );
            gamWinInputDerivs.at(i).at(j)  = v2 * input * oldGammonWinOutput  * ( 1 - oldGammonWinOutput  ) * mid * ( 1 - mid );
            gamLossInputDerivs.at(i).at(j) = v3 * input * oldGammonLossOutput * ( 1 - oldGammonLossOutput ) * mid * ( 1 - mid );
            bgWinInputDerivs.at(i).at(j)   = v4 * input * oldBgWinOutput      * ( 1 - oldBgWinOutput      ) * mid * ( 1 - mid );
            bgLossInputDerivs.at(i).at(j)  = v5 * input * oldBgLossOutput     * ( 1 - oldBgLossOutput     ) * mid * ( 1 - mid );
        }
        probInputDerivs.at(i).at(198)    = v1 * oldProbOutput       * ( 1 - oldProbOutput       ) * mid * ( 1 - mid );
        gamWinInputDerivs.at(i).at(198)  = v2 * oldGammonWinOutput  * ( 1 - oldGammonWinOutput  ) * mid * ( 1 - mid );
        gamLossInputDerivs.at(i).at(198) = v3 * oldGammonLossOutput * ( 1 - oldGammonLossOutput ) * mid * ( 1 - mid );
        bgWinInputDerivs.at(i).at(198)   = v4 * oldBgWinOutput      * ( 1 - oldBgWinOutput  )     * mid * ( 1 - mid );
        bgLossInputDerivs.at(i).at(198)  = v5 * oldBgLossOutput     * ( 1 - oldBgLossOutput )     * mid * ( 1 - mid );
    }
    probDerivs.at(nMiddle)    = oldProbOutput       * ( 1 - oldProbOutput );
    gamWinDerivs.at(nMiddle)  = oldGammonWinOutput  * ( 1 - oldGammonWinOutput );
    gamLossDerivs.at(nMiddle) = oldGammonLossOutput * ( 1 - oldGammonLossOutput );
    bgWinDerivs.at(nMiddle)   = oldBgWinOutput      * ( 1 - oldBgWinOutput );
    bgLossDerivs.at(nMiddle)  = oldBgLossOutput     * ( 1 - oldBgLossOutput );
    
    // now calculate the next estimate of the outputs. That's known if the game is over; otherwise we use the network's 
    // estimate on the new board as a proxy. Note that the update fn is only ever called by the game when the player wins, not when
    // the player loses, just because the winner is the last one to play. But we need to train on prob of losing a gammon too,
    // so we flip board perspective and train again based on that.
    
    bool trainGammonLoss = true;
    bool trainGammonWin  = true;
    bool trainBgLoss     = true;
    bool trainBgWin      = true;
    double newProbOutput, newGammonWinOutput, newGammonLossOutput, newBgWinOutput, newBgLossOutput;
    
    if( newBoard.bornIn0Raw() == 15 )
    {
        trainGammonLoss = false; // can't train the conditional prob of a gammon loss if there isn't a loss
        trainBgLoss     = false; // ditto for backgammon loss
        
        newProbOutput = 1.;
        if( newBoard.bornIn1Raw() == 0 ) // gammon or backgammon
        {
            newGammonWinOutput = 1.;
            vector<int> checks( newBoard.checkers1Raw() );
            bool foundOne = newBoard.hit1Raw() > 0;
            if( !foundOne )
            {
                for( int i=0; i<6; i++ )
                    if( checks.at(i) > 0 )
                    {
                        foundOne = true;
                        break;
                    }
            }
            newBgWinOutput = foundOne ? 1 : 0;
        }
        else
        {
            newGammonWinOutput = 0.;
            trainBgWin = false; // no gammon win so can't train conditional bg win prob
        }
    }
    else if( newBoard.bornIn1Raw() == 15 )
    {
        trainGammonWin = false;
        trainBgWin     = false;
        
        newProbOutput = 0.;
        
        if( newBoard.bornIn0Raw() == 0 ) // gammon loss or backgammon loss
        {
            newGammonLossOutput = 1;
            vector<int> checks( newBoard.checkers0Raw() );
            bool foundOne = newBoard.hit0Raw() > 0;
            if( !foundOne )
            {
                for( int i=18; i<24; i++ )
                    if( checks.at(i) > 0 )
                    {
                        foundOne = true;
                        break;
                    }
            }
            newBgLossOutput = foundOne ? 1 : 0;
        }
        else
        {
            newGammonLossOutput = 0;
            trainBgLoss = false;
        }
    }
    else
    {
        // estimate from the new board's outputs, remembering that after the move is done,
        // the other player gets the dice.
        
        vector<double> midVals( getMiddleValues( getInputValues( newBoard, !newBoard.perspective() ) ) );
        newProbOutput       = getOutputProbValue( midVals );
        newGammonWinOutput  = getOutputGammonWinValue( midVals, newBoard );
        newGammonLossOutput = getOutputGammonLossValue( midVals, newBoard );
        newBgWinOutput      = getOutputBackgammonWinValue( midVals, newBoard );
        newBgLossOutput     = getOutputBackgammonLossValue( midVals, newBoard );
    }
    
    // train the nodes as appropriate
    
    for( i=0; i<nMiddle; i++ )
    {
        outputProbWeights.at(i) += alpha * ( newProbOutput - oldProbOutput ) * probDerivs.at(i);
        
        if( trainGammonWin )
            outputGammonWinWeights.at(i) += alpha * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinDerivs.at(i);
        
        if( trainGammonLoss )
            outputGammonLossWeights.at(i) += alpha * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossDerivs.at(i);
        
        if( trainBgWin )
            outputBackgammonWinWeights.at(i) += alpha * ( newBgWinOutput - oldBgWinOutput ) * bgWinDerivs.at(i);
        
        if( trainBgLoss )
            outputBackgammonLossWeights.at(i) += alpha * ( newBgLossOutput - oldBgLossOutput ) * bgLossDerivs.at(i);
        
        for( j=0; j<199; j++ )
        {
            middleWeights.at(i).at(j) += beta * ( newProbOutput - oldProbOutput ) * probInputDerivs.at(i).at(j);
            if( trainGammonWin )
                middleWeights.at(i).at(j) += beta * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinInputDerivs.at(i).at(j);
            if( trainGammonLoss )
                middleWeights.at(i).at(j) += beta * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossInputDerivs.at(i).at(j);
            if( trainBgWin )
                middleWeights.at(i).at(j) += beta * ( newBgWinOutput - oldBgWinOutput ) * bgWinInputDerivs.at(i).at(j);
            if( trainBgLoss )
                middleWeights.at(i).at(j) += beta * ( newBgLossOutput - oldBgLossOutput ) * bgLossInputDerivs.at(i).at(j);
        }
    }
    
    outputProbWeights.at(nMiddle) += alpha * ( newProbOutput - oldProbOutput ) * probDerivs.at(nMiddle);
    if( trainGammonWin )
        outputGammonWinWeights.at(nMiddle) += alpha * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinDerivs.at(nMiddle);
    if( trainGammonLoss )
        outputGammonLossWeights.at(nMiddle) += alpha * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossDerivs.at(nMiddle);
    if( trainBgWin )
        outputBackgammonWinWeights.at(nMiddle) += alpha * ( newBgWinOutput - oldBgWinOutput ) * bgWinDerivs.at(nMiddle);
    if( trainBgLoss )
        outputBackgammonLossWeights.at(nMiddle) += alpha * ( newBgLossOutput - oldBgLossOutput ) * bgLossDerivs.at(nMiddle);
}