double strategytdexp4::boardValue( const board& brd, const hash_map<string,int>* context ) const { // get the inputs from the board, assuming the player holds the dice vector<double> inputs = getInputValues( brd, brd.perspective() ); // calculate the middle layer node values vector<double> middles = getMiddleValues( inputs ); // calculate the output node values from the middles double probWin = getOutputProbValue( middles ); double probCondGammonWin = getOutputGammonWinValue( middles, brd ); double probCondGammonLoss = getOutputGammonLossValue( middles, brd ); double probCondBgWin = getOutputBackgammonWinValue( middles, brd ); double probCondBgLoss = getOutputBackgammonLossValue( middles, brd ); // calculate the expected number of points the player will win. probWin // corresponds to the probability of a win (any win); probCondGammon // corresponds to the probability of a gammon conditional on a win; // probCondGammonLoss corresponds to the probability of a gammon loss // conditional on a loss. Ditto for backgammon win/loss conditional probs. double ppg = probWin * ( 1 * ( 1 - probCondGammonWin ) + 2 * probCondGammonWin * ( 1 - probCondBgWin ) + 3 * probCondGammonWin * probCondBgWin ) - ( 1 - probWin ) * ( 1 * ( 1 - probCondGammonLoss ) + 2 * probCondGammonLoss * ( 1 - probCondBgLoss ) + 3 * probCondGammonLoss * probCondBgLoss ); // the ppg is always from player 0's perspective. But we want it from the board // perspective. if( brd.perspective() == 0 ) return ppg; else return -ppg; }
bool doublestratmatch::takeDouble( strategyprob& strat, const board& b, int cube ) { // need probabilities when the opponent holds the dice; that's what the strategy's // boardProbabilities returns so just use that. gameProbabilities probs( boardProbabilities(strat,b,false) ); // calculate the equity at the doubled cube level assuming the player holds the dice interpMEdata deadME( equityInterpFn( probs, b.perspective(), 2*cube, b.perspective(), true ) ); interpMEdata liveME( equityInterpFn( probs, b.perspective(), 2*cube, b.perspective(), false ) ); double equityDouble = cubeLifeIndex*liveME(probs.probWin) + (1-cubeLifeIndex)*deadME(probs.probWin); // calculate the equity we'd give up if we passed. Respect the Crawford rule. int n = b.perspective() == 0 ? currMatch->getTarget() - currMatch->playerScore() : currMatch->getTarget() - currMatch->opponentScore(); int m = b.perspective() == 0 ? currMatch->getTarget() - currMatch->opponentScore() : currMatch->getTarget() - currMatch->playerScore(); double singleLossME; if( n == 1 ) singleLossME = MET.matchEquityPostCrawford(m-cube); else if( m == 1 ) singleLossME = -1; else singleLossME = MET.matchEquity(n, m-cube); return equityDouble > singleLossME; }
double doublestratmatch::equity( strategyprob& strat, const board& b, int cube, bool ownsCube, bool holdsDice ) { gameProbabilities probs( boardProbabilities(strat, b, holdsDice) ); int cubeOwner = ownsCube ? b.perspective() : 1-b.perspective(); interpMEdata deadME( equityInterpFn(probs, b.perspective(), cube, cubeOwner,true) ); interpMEdata liveME( equityInterpFn(probs, b.perspective(), cube, cubeOwner,false) ); return cubeLifeIndex*liveME(probs.probWin) + (1-cubeLifeIndex)*deadME(probs.probWin); }
bool doublestratmatch::offerDouble( strategyprob& strat, const board& b, int cube ) { // no double allowed if the player can already win at this level int n = b.perspective() == 0 ? currMatch->getTarget() - currMatch->playerScore() : currMatch->getTarget() - currMatch->opponentScore(); int m = b.perspective() == 0 ? currMatch->getTarget() - currMatch->opponentScore() : currMatch->getTarget() - currMatch->playerScore(); if( n-cube<=0 ) return false; // automatic double case: if the opponent is definitely going to win the match on a win // the player should double. if( m-cube<=0 and n-cube>0 ) return true; // get the various game probabilities. For this we want game probabilities // before the dice are thrown. The strategy's boardProbabilities returns the // game probs *after* the dice are thrown, so we need to flip the board around, // get the probs, then flip their perspective back. gameProbabilities probs(boardProbabilities(strat, b, true)); // calculate the dead cube equity - assuming there's no value to holding the cube interpMEdata deadME1( equityInterpFn( probs, b.perspective(), cube, b.perspective(), true ) ); interpMEdata deadME2( equityInterpFn( probs, b.perspective(), 2*cube, 1-b.perspective(), true ) ); // get the live cube match equity for this cube level and for a doubled cube. Current cube this player // owns the cube; doubled the opponent owns it. interpMEdata liveME1( equityInterpFn( probs, b.perspective(), cube, b.perspective(), false ) ); interpMEdata liveME2( equityInterpFn( probs, b.perspective(), 2*cube, 1-b.perspective(), false ) ); // double if that equity is larger. double equityDouble = cubeLifeIndex * liveME2(probs.probWin) + ( 1 - cubeLifeIndex ) * deadME2(probs.probWin); double equityNoDouble = cubeLifeIndex * liveME1(probs.probWin) + ( 1 - cubeLifeIndex ) * deadME1(probs.probWin); // the doubled equity is never more than the pass equity. Respect the Crawford rule. double singleWinME; if( n == 1 ) singleWinME = 1; else if( m == 1 ) singleWinME = -MET.matchEquityPostCrawford(n-cube); else singleWinME = MET.matchEquity(n-cube, m); if( equityDouble > singleWinME ) equityDouble = singleWinME; // Leave a little threshold return equityDouble > equityNoDouble + 1e-6; }
void strategytdexp4::update( const board& oldBoard, const board& newBoard ) { // get the values from the old board vector<double> oldInputs = getInputValues( oldBoard, oldBoard.perspective() ); vector<double> oldMiddles = getMiddleValues( oldInputs ); double oldProbOutput = getOutputProbValue( oldMiddles ); double oldGammonWinOutput = getOutputGammonWinValue( oldMiddles, oldBoard ); double oldGammonLossOutput = getOutputGammonLossValue( oldMiddles, oldBoard ); double oldBgWinOutput = getOutputBackgammonWinValue( oldMiddles, oldBoard ); double oldBgLossOutput = getOutputBackgammonLossValue( oldMiddles, oldBoard ); // calculate all the partial derivatives we'll need (of output node values // to the various weights) int i, j; // then do derivs of the prob nodes to each of the middle->input weights (that's a 2d array), and the derivs of each of // the middle nodes to its weights->inputs. double mid, input, v1, v2, v3, v4, v5; for( i=0; i<nMiddle; i++ ) { mid = oldMiddles.at(i); v1 = outputProbWeights.at(i); v2 = outputGammonWinWeights.at(i); v3 = outputGammonLossWeights.at(i); v4 = outputBackgammonWinWeights.at(i); v5 = outputBackgammonLossWeights.at(i); probDerivs.at(i) = mid * oldProbOutput * ( 1 - oldProbOutput ); gamWinDerivs.at(i) = mid * oldGammonWinOutput * ( 1 - oldGammonWinOutput ); gamLossDerivs.at(i) = mid * oldGammonLossOutput * ( 1 - oldGammonLossOutput ); bgWinDerivs.at(i) = mid * oldBgWinOutput * ( 1 - oldBgWinOutput ); bgLossDerivs.at(i) = mid * oldBgLossOutput * ( 1 - oldBgLossOutput ); for( j=0; j<198; j++ ) { input = oldInputs.at(j); probInputDerivs.at(i).at(j) = v1 * input * oldProbOutput * ( 1 - oldProbOutput ) * mid * ( 1 - mid ); gamWinInputDerivs.at(i).at(j) = v2 * input * oldGammonWinOutput * ( 1 - oldGammonWinOutput ) * mid * ( 1 - mid ); gamLossInputDerivs.at(i).at(j) = v3 * input * oldGammonLossOutput * ( 1 - oldGammonLossOutput ) * mid * ( 1 - mid ); bgWinInputDerivs.at(i).at(j) = v4 * input * oldBgWinOutput * ( 1 - oldBgWinOutput ) * mid * ( 1 - mid ); bgLossInputDerivs.at(i).at(j) = v5 * input * oldBgLossOutput * ( 1 - oldBgLossOutput ) * mid * ( 1 - mid ); } probInputDerivs.at(i).at(198) = v1 * oldProbOutput * ( 1 - oldProbOutput ) * mid * ( 1 - mid ); gamWinInputDerivs.at(i).at(198) = v2 * oldGammonWinOutput * ( 1 - oldGammonWinOutput ) * mid * ( 1 - mid ); gamLossInputDerivs.at(i).at(198) = v3 * oldGammonLossOutput * ( 1 - oldGammonLossOutput ) * mid * ( 1 - mid ); bgWinInputDerivs.at(i).at(198) = v4 * oldBgWinOutput * ( 1 - oldBgWinOutput ) * mid * ( 1 - mid ); bgLossInputDerivs.at(i).at(198) = v5 * oldBgLossOutput * ( 1 - oldBgLossOutput ) * mid * ( 1 - mid ); } probDerivs.at(nMiddle) = oldProbOutput * ( 1 - oldProbOutput ); gamWinDerivs.at(nMiddle) = oldGammonWinOutput * ( 1 - oldGammonWinOutput ); gamLossDerivs.at(nMiddle) = oldGammonLossOutput * ( 1 - oldGammonLossOutput ); bgWinDerivs.at(nMiddle) = oldBgWinOutput * ( 1 - oldBgWinOutput ); bgLossDerivs.at(nMiddle) = oldBgLossOutput * ( 1 - oldBgLossOutput ); // now calculate the next estimate of the outputs. That's known if the game is over; otherwise we use the network's // estimate on the new board as a proxy. Note that the update fn is only ever called by the game when the player wins, not when // the player loses, just because the winner is the last one to play. But we need to train on prob of losing a gammon too, // so we flip board perspective and train again based on that. bool trainGammonLoss = true; bool trainGammonWin = true; bool trainBgLoss = true; bool trainBgWin = true; double newProbOutput, newGammonWinOutput, newGammonLossOutput, newBgWinOutput, newBgLossOutput; if( newBoard.bornIn0Raw() == 15 ) { trainGammonLoss = false; // can't train the conditional prob of a gammon loss if there isn't a loss trainBgLoss = false; // ditto for backgammon loss newProbOutput = 1.; if( newBoard.bornIn1Raw() == 0 ) // gammon or backgammon { newGammonWinOutput = 1.; vector<int> checks( newBoard.checkers1Raw() ); bool foundOne = newBoard.hit1Raw() > 0; if( !foundOne ) { for( int i=0; i<6; i++ ) if( checks.at(i) > 0 ) { foundOne = true; break; } } newBgWinOutput = foundOne ? 1 : 0; } else { newGammonWinOutput = 0.; trainBgWin = false; // no gammon win so can't train conditional bg win prob } } else if( newBoard.bornIn1Raw() == 15 ) { trainGammonWin = false; trainBgWin = false; newProbOutput = 0.; if( newBoard.bornIn0Raw() == 0 ) // gammon loss or backgammon loss { newGammonLossOutput = 1; vector<int> checks( newBoard.checkers0Raw() ); bool foundOne = newBoard.hit0Raw() > 0; if( !foundOne ) { for( int i=18; i<24; i++ ) if( checks.at(i) > 0 ) { foundOne = true; break; } } newBgLossOutput = foundOne ? 1 : 0; } else { newGammonLossOutput = 0; trainBgLoss = false; } } else { // estimate from the new board's outputs, remembering that after the move is done, // the other player gets the dice. vector<double> midVals( getMiddleValues( getInputValues( newBoard, !newBoard.perspective() ) ) ); newProbOutput = getOutputProbValue( midVals ); newGammonWinOutput = getOutputGammonWinValue( midVals, newBoard ); newGammonLossOutput = getOutputGammonLossValue( midVals, newBoard ); newBgWinOutput = getOutputBackgammonWinValue( midVals, newBoard ); newBgLossOutput = getOutputBackgammonLossValue( midVals, newBoard ); } // train the nodes as appropriate for( i=0; i<nMiddle; i++ ) { outputProbWeights.at(i) += alpha * ( newProbOutput - oldProbOutput ) * probDerivs.at(i); if( trainGammonWin ) outputGammonWinWeights.at(i) += alpha * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinDerivs.at(i); if( trainGammonLoss ) outputGammonLossWeights.at(i) += alpha * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossDerivs.at(i); if( trainBgWin ) outputBackgammonWinWeights.at(i) += alpha * ( newBgWinOutput - oldBgWinOutput ) * bgWinDerivs.at(i); if( trainBgLoss ) outputBackgammonLossWeights.at(i) += alpha * ( newBgLossOutput - oldBgLossOutput ) * bgLossDerivs.at(i); for( j=0; j<199; j++ ) { middleWeights.at(i).at(j) += beta * ( newProbOutput - oldProbOutput ) * probInputDerivs.at(i).at(j); if( trainGammonWin ) middleWeights.at(i).at(j) += beta * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinInputDerivs.at(i).at(j); if( trainGammonLoss ) middleWeights.at(i).at(j) += beta * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossInputDerivs.at(i).at(j); if( trainBgWin ) middleWeights.at(i).at(j) += beta * ( newBgWinOutput - oldBgWinOutput ) * bgWinInputDerivs.at(i).at(j); if( trainBgLoss ) middleWeights.at(i).at(j) += beta * ( newBgLossOutput - oldBgLossOutput ) * bgLossInputDerivs.at(i).at(j); } } outputProbWeights.at(nMiddle) += alpha * ( newProbOutput - oldProbOutput ) * probDerivs.at(nMiddle); if( trainGammonWin ) outputGammonWinWeights.at(nMiddle) += alpha * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinDerivs.at(nMiddle); if( trainGammonLoss ) outputGammonLossWeights.at(nMiddle) += alpha * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossDerivs.at(nMiddle); if( trainBgWin ) outputBackgammonWinWeights.at(nMiddle) += alpha * ( newBgWinOutput - oldBgWinOutput ) * bgWinDerivs.at(nMiddle); if( trainBgLoss ) outputBackgammonLossWeights.at(nMiddle) += alpha * ( newBgLossOutput - oldBgLossOutput ) * bgLossDerivs.at(nMiddle); }