double strategytdexp4::getOutputBackgammonLossValue( const vector<double>& middles, const board& brd ) const { // special case - if the player 0 has taken any pieces in, the gammon loss prob is zero if( brd.bornIn0Raw() > 0 ) return 0; // also if there are no player 0 pieces in player 1's box, the backgammon win prob is zero vector<int> checks( brd.checkers0Raw() ); bool foundOne = brd.hit0Raw() > 0; if( !foundOne ) for( int i=18; i<24; i++ ) if( checks.at(i) > 0 ) { foundOne = true; break; } if( !foundOne ) return 0; // otherwise calculate the network value double val=0; for( int i=0; i<nMiddle; i++ ) val += outputBackgammonLossWeights.at(i) * middles.at(i); val += outputBackgammonLossWeights.at(nMiddle); // bias node return 1. / ( 1 + exp( -val ) ); }
vector<double> strategytdexp4::getInputValues( const board& brd, int turn ) const { vector<double> inputs; inputs.resize(198,0); vector<int> checks = brd.checkers0Raw(); vector<int> otherChecks = brd.checkers1Raw(); int hit = brd.hit0Raw(); int otherHit = brd.hit1Raw(); int borneIn = brd.bornIn0Raw(); int otherBorneIn = brd.bornIn1Raw(); int i, j; // we put values for the first player in the first half of the inputs and for the second player // in the second half. for( i=0; i<24; i++ ) { // each spot gets four units. The first is 1 if there is at least one checker on the point, // else 0; the 2nd is 1 if there are at least two checkers; the 3rd if there are at least // three; and the fourth = max(0,(n-3)/2), where n=# of checkers. That's done for both players. for( j=0; j<3; j++ ) { if( checks.at(i) > j ) inputs.at(4*i+j) = 1; if( otherChecks.at(i) > j ) inputs.at(4*i+j+99) = 1; } if( checks.at(i) > 3 ) inputs.at(4*i+3) = ( checks[i]-3 ) / 2.; if( otherChecks.at(i) > 3 ) inputs.at(4*i+3+99) = ( otherChecks[i]-3 ) / 2.; } // one spot for each player records the number on the bar inputs.at(96) = hit / 2.; inputs.at(195) = otherHit / 2.; // one spot for each player records the number born in inputs.at(97) = borneIn / 15.; inputs.at(196) = otherBorneIn / 15.; // one spot for each player notes whose turn it is inputs.at(98) = turn == 0 ? 1 : 0; inputs.at(197) = turn == 0 ? 0 : 1; return inputs; }
void strategytdexp4::update( const board& oldBoard, const board& newBoard ) { // get the values from the old board vector<double> oldInputs = getInputValues( oldBoard, oldBoard.perspective() ); vector<double> oldMiddles = getMiddleValues( oldInputs ); double oldProbOutput = getOutputProbValue( oldMiddles ); double oldGammonWinOutput = getOutputGammonWinValue( oldMiddles, oldBoard ); double oldGammonLossOutput = getOutputGammonLossValue( oldMiddles, oldBoard ); double oldBgWinOutput = getOutputBackgammonWinValue( oldMiddles, oldBoard ); double oldBgLossOutput = getOutputBackgammonLossValue( oldMiddles, oldBoard ); // calculate all the partial derivatives we'll need (of output node values // to the various weights) int i, j; // then do derivs of the prob nodes to each of the middle->input weights (that's a 2d array), and the derivs of each of // the middle nodes to its weights->inputs. double mid, input, v1, v2, v3, v4, v5; for( i=0; i<nMiddle; i++ ) { mid = oldMiddles.at(i); v1 = outputProbWeights.at(i); v2 = outputGammonWinWeights.at(i); v3 = outputGammonLossWeights.at(i); v4 = outputBackgammonWinWeights.at(i); v5 = outputBackgammonLossWeights.at(i); probDerivs.at(i) = mid * oldProbOutput * ( 1 - oldProbOutput ); gamWinDerivs.at(i) = mid * oldGammonWinOutput * ( 1 - oldGammonWinOutput ); gamLossDerivs.at(i) = mid * oldGammonLossOutput * ( 1 - oldGammonLossOutput ); bgWinDerivs.at(i) = mid * oldBgWinOutput * ( 1 - oldBgWinOutput ); bgLossDerivs.at(i) = mid * oldBgLossOutput * ( 1 - oldBgLossOutput ); for( j=0; j<198; j++ ) { input = oldInputs.at(j); probInputDerivs.at(i).at(j) = v1 * input * oldProbOutput * ( 1 - oldProbOutput ) * mid * ( 1 - mid ); gamWinInputDerivs.at(i).at(j) = v2 * input * oldGammonWinOutput * ( 1 - oldGammonWinOutput ) * mid * ( 1 - mid ); gamLossInputDerivs.at(i).at(j) = v3 * input * oldGammonLossOutput * ( 1 - oldGammonLossOutput ) * mid * ( 1 - mid ); bgWinInputDerivs.at(i).at(j) = v4 * input * oldBgWinOutput * ( 1 - oldBgWinOutput ) * mid * ( 1 - mid ); bgLossInputDerivs.at(i).at(j) = v5 * input * oldBgLossOutput * ( 1 - oldBgLossOutput ) * mid * ( 1 - mid ); } probInputDerivs.at(i).at(198) = v1 * oldProbOutput * ( 1 - oldProbOutput ) * mid * ( 1 - mid ); gamWinInputDerivs.at(i).at(198) = v2 * oldGammonWinOutput * ( 1 - oldGammonWinOutput ) * mid * ( 1 - mid ); gamLossInputDerivs.at(i).at(198) = v3 * oldGammonLossOutput * ( 1 - oldGammonLossOutput ) * mid * ( 1 - mid ); bgWinInputDerivs.at(i).at(198) = v4 * oldBgWinOutput * ( 1 - oldBgWinOutput ) * mid * ( 1 - mid ); bgLossInputDerivs.at(i).at(198) = v5 * oldBgLossOutput * ( 1 - oldBgLossOutput ) * mid * ( 1 - mid ); } probDerivs.at(nMiddle) = oldProbOutput * ( 1 - oldProbOutput ); gamWinDerivs.at(nMiddle) = oldGammonWinOutput * ( 1 - oldGammonWinOutput ); gamLossDerivs.at(nMiddle) = oldGammonLossOutput * ( 1 - oldGammonLossOutput ); bgWinDerivs.at(nMiddle) = oldBgWinOutput * ( 1 - oldBgWinOutput ); bgLossDerivs.at(nMiddle) = oldBgLossOutput * ( 1 - oldBgLossOutput ); // now calculate the next estimate of the outputs. That's known if the game is over; otherwise we use the network's // estimate on the new board as a proxy. Note that the update fn is only ever called by the game when the player wins, not when // the player loses, just because the winner is the last one to play. But we need to train on prob of losing a gammon too, // so we flip board perspective and train again based on that. bool trainGammonLoss = true; bool trainGammonWin = true; bool trainBgLoss = true; bool trainBgWin = true; double newProbOutput, newGammonWinOutput, newGammonLossOutput, newBgWinOutput, newBgLossOutput; if( newBoard.bornIn0Raw() == 15 ) { trainGammonLoss = false; // can't train the conditional prob of a gammon loss if there isn't a loss trainBgLoss = false; // ditto for backgammon loss newProbOutput = 1.; if( newBoard.bornIn1Raw() == 0 ) // gammon or backgammon { newGammonWinOutput = 1.; vector<int> checks( newBoard.checkers1Raw() ); bool foundOne = newBoard.hit1Raw() > 0; if( !foundOne ) { for( int i=0; i<6; i++ ) if( checks.at(i) > 0 ) { foundOne = true; break; } } newBgWinOutput = foundOne ? 1 : 0; } else { newGammonWinOutput = 0.; trainBgWin = false; // no gammon win so can't train conditional bg win prob } } else if( newBoard.bornIn1Raw() == 15 ) { trainGammonWin = false; trainBgWin = false; newProbOutput = 0.; if( newBoard.bornIn0Raw() == 0 ) // gammon loss or backgammon loss { newGammonLossOutput = 1; vector<int> checks( newBoard.checkers0Raw() ); bool foundOne = newBoard.hit0Raw() > 0; if( !foundOne ) { for( int i=18; i<24; i++ ) if( checks.at(i) > 0 ) { foundOne = true; break; } } newBgLossOutput = foundOne ? 1 : 0; } else { newGammonLossOutput = 0; trainBgLoss = false; } } else { // estimate from the new board's outputs, remembering that after the move is done, // the other player gets the dice. vector<double> midVals( getMiddleValues( getInputValues( newBoard, !newBoard.perspective() ) ) ); newProbOutput = getOutputProbValue( midVals ); newGammonWinOutput = getOutputGammonWinValue( midVals, newBoard ); newGammonLossOutput = getOutputGammonLossValue( midVals, newBoard ); newBgWinOutput = getOutputBackgammonWinValue( midVals, newBoard ); newBgLossOutput = getOutputBackgammonLossValue( midVals, newBoard ); } // train the nodes as appropriate for( i=0; i<nMiddle; i++ ) { outputProbWeights.at(i) += alpha * ( newProbOutput - oldProbOutput ) * probDerivs.at(i); if( trainGammonWin ) outputGammonWinWeights.at(i) += alpha * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinDerivs.at(i); if( trainGammonLoss ) outputGammonLossWeights.at(i) += alpha * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossDerivs.at(i); if( trainBgWin ) outputBackgammonWinWeights.at(i) += alpha * ( newBgWinOutput - oldBgWinOutput ) * bgWinDerivs.at(i); if( trainBgLoss ) outputBackgammonLossWeights.at(i) += alpha * ( newBgLossOutput - oldBgLossOutput ) * bgLossDerivs.at(i); for( j=0; j<199; j++ ) { middleWeights.at(i).at(j) += beta * ( newProbOutput - oldProbOutput ) * probInputDerivs.at(i).at(j); if( trainGammonWin ) middleWeights.at(i).at(j) += beta * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinInputDerivs.at(i).at(j); if( trainGammonLoss ) middleWeights.at(i).at(j) += beta * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossInputDerivs.at(i).at(j); if( trainBgWin ) middleWeights.at(i).at(j) += beta * ( newBgWinOutput - oldBgWinOutput ) * bgWinInputDerivs.at(i).at(j); if( trainBgLoss ) middleWeights.at(i).at(j) += beta * ( newBgLossOutput - oldBgLossOutput ) * bgLossInputDerivs.at(i).at(j); } } outputProbWeights.at(nMiddle) += alpha * ( newProbOutput - oldProbOutput ) * probDerivs.at(nMiddle); if( trainGammonWin ) outputGammonWinWeights.at(nMiddle) += alpha * ( newGammonWinOutput - oldGammonWinOutput ) * gamWinDerivs.at(nMiddle); if( trainGammonLoss ) outputGammonLossWeights.at(nMiddle) += alpha * ( newGammonLossOutput - oldGammonLossOutput ) * gamLossDerivs.at(nMiddle); if( trainBgWin ) outputBackgammonWinWeights.at(nMiddle) += alpha * ( newBgWinOutput - oldBgWinOutput ) * bgWinDerivs.at(nMiddle); if( trainBgLoss ) outputBackgammonLossWeights.at(nMiddle) += alpha * ( newBgLossOutput - oldBgLossOutput ) * bgLossDerivs.at(nMiddle); }