Beispiel #1
0
double strategytdexp4::boardValue( const board& brd, const hash_map<string,int>* context ) const
{
    // get the inputs from the board, assuming the player holds the dice
    
    vector<double> inputs = getInputValues( brd, brd.perspective() );
    
    // calculate the middle layer node values
    
    vector<double> middles = getMiddleValues( inputs );
    
    // calculate the output node values from the middles
    
    double probWin            = getOutputProbValue( middles );
    double probCondGammonWin  = getOutputGammonWinValue( middles, brd );
    double probCondGammonLoss = getOutputGammonLossValue( middles, brd );
    double probCondBgWin      = getOutputBackgammonWinValue( middles, brd );
    double probCondBgLoss     = getOutputBackgammonLossValue( middles, brd );
    
    // calculate the expected number of points the player will win. probWin
    // corresponds to the probability of a win (any win); probCondGammon
    // corresponds to the probability of a gammon conditional on a win;
    // probCondGammonLoss corresponds to the probability of a gammon loss
    // conditional on a loss. Ditto for backgammon win/loss conditional probs.
    
    double ppg = probWin         * ( 1 * ( 1 - probCondGammonWin )  + 2 * probCondGammonWin  * ( 1 - probCondBgWin  ) + 3 * probCondGammonWin  * probCondBgWin  )
               - ( 1 - probWin ) * ( 1 * ( 1 - probCondGammonLoss ) + 2 * probCondGammonLoss * ( 1 - probCondBgLoss ) + 3 * probCondGammonLoss * probCondBgLoss );
    
    // the ppg is always from player 0's perspective. But we want it from the board
    // perspective.
    
    if( brd.perspective() == 0 )
        return ppg;
    else
        return -ppg;
}
Beispiel #2
0
bool doublestratmatch::takeDouble( strategyprob& strat, const board& b, int cube )
{
    // need probabilities when the opponent holds the dice; that's what the strategy's
    // boardProbabilities returns so just use that.
    
    gameProbabilities probs( boardProbabilities(strat,b,false) );
    
    // calculate the equity at the doubled cube level assuming the player holds the dice
    
    interpMEdata deadME( equityInterpFn( probs, b.perspective(), 2*cube, b.perspective(), true ) );
    interpMEdata liveME( equityInterpFn( probs, b.perspective(), 2*cube, b.perspective(), false ) );
    
    double equityDouble = cubeLifeIndex*liveME(probs.probWin) + (1-cubeLifeIndex)*deadME(probs.probWin);
    
    // calculate the equity we'd give up if we passed. Respect the Crawford rule.
    
    int n = b.perspective() == 0 ? currMatch->getTarget() - currMatch->playerScore() : currMatch->getTarget() - currMatch->opponentScore();
    int m = b.perspective() == 0 ? currMatch->getTarget() - currMatch->opponentScore() : currMatch->getTarget() - currMatch->playerScore();
    
    double singleLossME;
    if( n == 1 )
        singleLossME = MET.matchEquityPostCrawford(m-cube);
    else if( m == 1 )
        singleLossME = -1;
    else
        singleLossME = MET.matchEquity(n, m-cube);

    return equityDouble > singleLossME;
}
Beispiel #3
0
double doublestratmatch::equity( strategyprob& strat, const board& b, int cube, bool ownsCube, bool holdsDice )
{
    gameProbabilities probs( boardProbabilities(strat, b, holdsDice) );
    int cubeOwner = ownsCube ? b.perspective() : 1-b.perspective();
    interpMEdata deadME( equityInterpFn(probs, b.perspective(), cube, cubeOwner,true) );
    interpMEdata liveME( equityInterpFn(probs, b.perspective(), cube, cubeOwner,false) );
    
    return cubeLifeIndex*liveME(probs.probWin) + (1-cubeLifeIndex)*deadME(probs.probWin);
}
Beispiel #4
0
bool doublestratmatch::offerDouble( strategyprob& strat, const board& b, int cube )
{
    // no double allowed if the player can already win at this level
    
    int n = b.perspective() == 0 ? currMatch->getTarget() - currMatch->playerScore() : currMatch->getTarget() - currMatch->opponentScore();
    int m = b.perspective() == 0 ? currMatch->getTarget() - currMatch->opponentScore() : currMatch->getTarget() - currMatch->playerScore();
    
    if( n-cube<=0 ) return false;
    
    // automatic double case: if the opponent is definitely going to win the match on a win
    // the player should double.
    
    if( m-cube<=0 and n-cube>0 ) return true;
    
    // get the various game probabilities. For this we want game probabilities
    // before the dice are thrown. The strategy's boardProbabilities returns the
    // game probs *after* the dice are thrown, so we need to flip the board around,
    // get the probs, then flip their perspective back.
    
    gameProbabilities probs(boardProbabilities(strat, b, true));
    
    // calculate the dead cube equity - assuming there's no value to holding the cube
    
    interpMEdata deadME1( equityInterpFn( probs, b.perspective(), cube, b.perspective(), true ) );
    interpMEdata deadME2( equityInterpFn( probs, b.perspective(), 2*cube, 1-b.perspective(), true ) );
    
    // get the live cube match equity for this cube level and for a doubled cube. Current cube this player
    // owns the cube; doubled the opponent owns it.
    
    interpMEdata liveME1( equityInterpFn( probs, b.perspective(), cube, b.perspective(), false ) );
    interpMEdata liveME2( equityInterpFn( probs, b.perspective(), 2*cube, 1-b.perspective(), false ) );
    
    // double if that equity is larger.
    
    double equityDouble   = cubeLifeIndex * liveME2(probs.probWin) + ( 1 - cubeLifeIndex ) * deadME2(probs.probWin);
    double equityNoDouble = cubeLifeIndex * liveME1(probs.probWin) + ( 1 - cubeLifeIndex ) * deadME1(probs.probWin);
    
    // the doubled equity is never more than the pass equity. Respect the Crawford rule.
    
    double singleWinME;
    if( n == 1 )
        singleWinME = 1;
    else if( m == 1 )
        singleWinME = -MET.matchEquityPostCrawford(n-cube);
    else
        singleWinME = MET.matchEquity(n-cube, m);
    
    if( equityDouble > singleWinME ) equityDouble = singleWinME;
    
    // Leave a little threshold
    
    return equityDouble > equityNoDouble + 1e-6;
}
Beispiel #5
0
void strategytdexp4::update( const board& oldBoard, const board& newBoard )
{
    // get the values from the old board
    
    vector<double> oldInputs   = getInputValues( oldBoard, oldBoard.perspective() );
    vector<double> oldMiddles  = getMiddleValues( oldInputs );
    double oldProbOutput       = getOutputProbValue( oldMiddles );
    double oldGammonWinOutput  = getOutputGammonWinValue( oldMiddles, oldBoard );
    double oldGammonLossOutput = getOutputGammonLossValue( oldMiddles, oldBoard );
    double oldBgWinOutput      = getOutputBackgammonWinValue( oldMiddles, oldBoard );
    double oldBgLossOutput     = getOutputBackgammonLossValue( oldMiddles, oldBoard );
    
    // calculate all the partial derivatives we'll need (of output node values
    // to the various weights)
    
    int i, j;
    
    // then do derivs of the prob nodes to each of the middle->input weights (that's a 2d array), and the derivs of each of
    // the middle nodes to its weights->inputs.
    
    double mid, input, v1, v2, v3, v4, v5;
    for( i=0; i<nMiddle; i++ )
    {
        mid = oldMiddles.at(i);
        v1  = outputProbWeights.at(i);
        v2  = outputGammonWinWeights.at(i);
        v3  = outputGammonLossWeights.at(i);
        v4  = outputBackgammonWinWeights.at(i);
        v5  = outputBackgammonLossWeights.at(i);
        
        probDerivs.at(i)    = mid * oldProbOutput       * ( 1 - oldProbOutput );
        gamWinDerivs.at(i)  = mid * oldGammonWinOutput  * ( 1 - oldGammonWinOutput );
        gamLossDerivs.at(i) = mid * oldGammonLossOutput * ( 1 - oldGammonLossOutput );
        bgWinDerivs.at(i)   = mid * oldBgWinOutput      * ( 1 - oldBgWinOutput );
        bgLossDerivs.at(i)  = mid * oldBgLossOutput     * ( 1 - oldBgLossOutput );
        
        for( j=0; j<198; j++ )
        {
            input = oldInputs.at(j);
            probInputDerivs.at(i).at(j)    = v1 * input * oldProbOutput       * ( 1 - oldProbOutput       ) * mid * ( 1 - mid );
            gamWinInputDerivs.at(i).at(j)  = v2 * input * oldGammonWinOutput  * ( 1 - oldGammonWinOutput  ) * mid * ( 1 - mid );
            gamLossInputDerivs.at(i).at(j) = v3 * input * oldGammonLossOutput * ( 1 - oldGammonLossOutput ) * mid * ( 1 - mid );
            bgWinInputDerivs.at(i).at(j)   = v4 * input * oldBgWinOutput      * ( 1 - oldBgWinOutput      ) * mid * ( 1 - mid );
            bgLossInputDerivs.at(i).at(j)  = v5 * input * oldBgLossOutput     * ( 1 - oldBgLossOutput     ) * mid * ( 1 - mid );
        }
        probInputDerivs.at(i).at(198)    = v1 * oldProbOutput       * ( 1 - oldProbOutput       ) * mid * ( 1 - mid );
        gamWinInputDerivs.at(i).at(198)  = v2 * oldGammonWinOutput  * ( 1 - oldGammonWinOutput  ) * mid * ( 1 - mid );
        gamLossInputDerivs.at(i).at(198) = v3 * oldGammonLossOutput * ( 1 - oldGammonLossOutput ) * mid * ( 1 - mid );
        bgWinInputDerivs.at(i).at(198)   = v4 * oldBgWinOutput      * ( 1 - oldBgWinOutput  )     * mid * ( 1 - mid );
        bgLossInputDerivs.at(i).at(198)  = v5 * oldBgLossOutput     * ( 1 - oldBgLossOutput )     * mid * ( 1 - mid );
    }
    probDerivs.at(nMiddle)    = oldProbOutput       * ( 1 - oldProbOutput );
    gamWinDerivs.at(nMiddle)  = oldGammonWinOutput  * ( 1 - oldGammonWinOutput );
    gamLossDerivs.at(nMiddle) = oldGammonLossOutput * ( 1 - oldGammonLossOutput );
    bgWinDerivs.at(nMiddle)   = oldBgWinOutput      * ( 1 - oldBgWinOutput );
    bgLossDerivs.at(nMiddle)  = oldBgLossOutput     * ( 1 - oldBgLossOutput );
    
    // now calculate the next estimate of the outputs. That's known if the game is over; otherwise we use the network's 
    // estimate on the new board as a proxy. Note that the update fn is only ever called by the game when the player wins, not when
    // the player loses, just because the winner is the last one to play. But we need to train on prob of losing a gammon too,
    // so we flip board perspective and train again based on that.
    
    bool trainGammonLoss = true;
    bool trainGammonWin  = true;
    bool trainBgLoss     = true;
    bool trainBgWin      = true;
    double newProbOutput, newGammonWinOutput, newGammonLossOutput, newBgWinOutput, newBgLossOutput;
    
    if( newBoard.bornIn0Raw() == 15 )
    {
        trainGammonLoss = false; // can't train the conditional prob of a gammon loss if there isn't a loss
        trainBgLoss     = false; // ditto for backgammon loss
        
        newProbOutput = 1.;
        if( newBoard.bornIn1Raw() == 0 ) // gammon or backgammon
        {
            newGammonWinOutput = 1.;
            vector<int> checks( newBoard.checkers1Raw() );
            bool foundOne = newBoard.hit1Raw() > 0;
            if( !foundOne )
            {
                for( int i=0; i<6; i++ )
                    if( checks.at(i) > 0 )
                    {
                        foundOne = true;
                        break;
                    }
            }
            newBgWinOutput = foundOne ? 1 : 0;
        }
        else
        {
            newGammonWinOutput = 0.;
            trainBgWin = false; // no gammon win so can't train conditional bg win prob
        }
    }
    else if( newBoard.bornIn1Raw() == 15 )
    {
        trainGammonWin = false;
        trainBgWin     = false;
        
        newProbOutput = 0.;
        
        if( newBoard.bornIn0Raw() == 0 ) // gammon loss or backgammon loss
        {
            newGammonLossOutput = 1;
            vector<int> checks( newBoard.checkers0Raw() );
            bool foundOne = newBoard.hit0Raw() > 0;
            if( !foundOne )
            {
                for( int i=18; i<24; i++ )
                    if( checks.at(i) > 0 )
                    {
                        foundOne = true;
                        break;
                    }
            }
            newBgLossOutput = foundOne ? 1 : 0;
        }
        else
        {
            newGammonLossOutput = 0;
            trainBgLoss = false;
        }
    }
    else
    {
        // estimate from the new board's outputs, remembering that after the move is done,
        // the other player gets the dice.
        
        vector<double> midVals( getMiddleValues( getInputValues( newBoard, !newBoard.perspective() ) ) );
        newProbOutput       = getOutputProbValue( midVals );
        newGammonWinOutput  = getOutputGammonWinValue( midVals, newBoard );
        newGammonLossOutput = getOutputGammonLossValue( midVals, newBoard );
        newBgWinOutput      = getOutputBackgammonWinValue( midVals, newBoard );
        newBgLossOutput     = getOutputBackgammonLossValue( midVals, newBoard );
    }
    
    // train the nodes as appropriate
    
    for( i=0; i<nMiddle; i++ )
    {
        outputProbWeights.at(i) += alpha * ( newProbOutput - oldProbOutput ) * probDerivs.at(i);
        
        if( trainGammonWin )
            outputGammonWinWeights.at(i) += alpha * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinDerivs.at(i);
        
        if( trainGammonLoss )
            outputGammonLossWeights.at(i) += alpha * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossDerivs.at(i);
        
        if( trainBgWin )
            outputBackgammonWinWeights.at(i) += alpha * ( newBgWinOutput - oldBgWinOutput ) * bgWinDerivs.at(i);
        
        if( trainBgLoss )
            outputBackgammonLossWeights.at(i) += alpha * ( newBgLossOutput - oldBgLossOutput ) * bgLossDerivs.at(i);
        
        for( j=0; j<199; j++ )
        {
            middleWeights.at(i).at(j) += beta * ( newProbOutput - oldProbOutput ) * probInputDerivs.at(i).at(j);
            if( trainGammonWin )
                middleWeights.at(i).at(j) += beta * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinInputDerivs.at(i).at(j);
            if( trainGammonLoss )
                middleWeights.at(i).at(j) += beta * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossInputDerivs.at(i).at(j);
            if( trainBgWin )
                middleWeights.at(i).at(j) += beta * ( newBgWinOutput - oldBgWinOutput ) * bgWinInputDerivs.at(i).at(j);
            if( trainBgLoss )
                middleWeights.at(i).at(j) += beta * ( newBgLossOutput - oldBgLossOutput ) * bgLossInputDerivs.at(i).at(j);
        }
    }
    
    outputProbWeights.at(nMiddle) += alpha * ( newProbOutput - oldProbOutput ) * probDerivs.at(nMiddle);
    if( trainGammonWin )
        outputGammonWinWeights.at(nMiddle) += alpha * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinDerivs.at(nMiddle);
    if( trainGammonLoss )
        outputGammonLossWeights.at(nMiddle) += alpha * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossDerivs.at(nMiddle);
    if( trainBgWin )
        outputBackgammonWinWeights.at(nMiddle) += alpha * ( newBgWinOutput - oldBgWinOutput ) * bgWinDerivs.at(nMiddle);
    if( trainBgLoss )
        outputBackgammonLossWeights.at(nMiddle) += alpha * ( newBgLossOutput - oldBgLossOutput ) * bgLossDerivs.at(nMiddle);
}