StateEvalScore UCTSearch::traverse(UCTNode & node, GameState & currentState) { StateEvalScore playoutVal; _results.totalVisits++; // if we haven't visited this node yet, do a playout if (node.numVisits() == 0) { // update the status of the current state with this node's moves //updateState(node, currentState, !node.hasChildren()); updateState(node, currentState, true); // do the playout playoutVal = currentState.eval(_params.maxPlayer(), _params.evalMethod(), _params.simScript(Players::Player_One), _params.simScript(Players::Player_Two)); _results.nodesVisited++; } // otherwise we have seen this node before else { // update the state for a non-leaf node updateState(node, currentState, false); if (currentState.isTerminal()) { playoutVal = currentState.eval(_params.maxPlayer(), EvaluationMethods::LTD2); } else { // if the children haven't been generated yet if (!node.hasChildren()) { generateChildren(node, currentState); } UCTNode & next = UCTNodeSelect(node); playoutVal = traverse(next, currentState); } } node.incVisits(); if (playoutVal.val() > 0) { node.addWins(1); } else if (playoutVal.val() == 0) { node.addWins(0.5); } return playoutVal; }
const StateEvalScore GameState::eval(const IDType & player, const IDType & evalMethod, const IDType p1Script, const IDType p2Script) const { StateEvalScore score; const IDType enemyPlayer(getEnemy(player)); // if both players are dead, return 0 if (playerDead(enemyPlayer) && playerDead(player)) { return StateEvalScore(0, 0); } StateEvalScore simEval; if (evalMethod == SparCraft::EvaluationMethods::LTD) { score = StateEvalScore(evalLTD(player), 0); } else if (evalMethod == SparCraft::EvaluationMethods::LTD2) { score = StateEvalScore(evalLTD2(player), 0); } else if (evalMethod == SparCraft::EvaluationMethods::Playout) { score = evalSim(player, p1Script, p2Script); } if (score.val() == 0) { return score; } ScoreType winBonus(0); if (playerDead(enemyPlayer) && !playerDead(player)) { winBonus = 100000; } else if (playerDead(player) && !playerDead(enemyPlayer)) { winBonus = -100000; } return StateEvalScore(score.val() + winBonus, score.numMoves()); }
AlphaBetaValue AlphaBetaSearch::alphaBeta(GameState & state, size_t depth, const IDType lastPlayerToMove, std::vector<Action> * prevSimMove, StateEvalScore alpha, StateEvalScore beta) { // update statistics _results.nodesExpanded++; if (searchTimeOut()) { throw 1; } if (terminalState(state, depth)) { // return the value, but the move will not be valid since none was performed StateEvalScore evalScore = state.eval(_params.maxPlayer(), _params.evalMethod(), _params.simScript(Players::Player_One), _params.simScript(Players::Player_Two)); return AlphaBetaValue(StateEvalScore(evalScore.val(), state.getNumMovements(_params.maxPlayer()) + evalScore.numMoves() ), AlphaBetaMove()); } // figure out which player is to move const IDType playerToMove(getPlayerToMove(state, depth, lastPlayerToMove, !prevSimMove)); // is the player to move the max player? bool maxPlayer = (playerToMove == _params.maxPlayer()); // Transposition Table Logic TTLookupValue TTval; if (isTranspositionLookupState(state, prevSimMove)) { TTval = TTlookup(state, alpha, beta, depth); // if this is a TT cut, return the proper value if (TTval.cut()) { return AlphaBetaValue(TTval.entry()->getScore(), getAlphaBetaMove(TTval, playerToMove)); } } bool bestMoveSet(false); // move generation MoveArray & moves = _allMoves[depth]; state.generateMoves(moves, playerToMove); moves.shuffleMoveActions(); generateOrderedMoves(state, moves, TTval, playerToMove, depth); // while we have more simultaneous moves AlphaBetaMove bestMove, bestSimResponse; size_t moveNumber(0); std::vector<Action> moveVec; // for each child while (getNextMoveVec(playerToMove, moves, moveNumber, TTval, depth, moveVec)) { // the value of the recursive AB we will call AlphaBetaValue val; // generate the child state GameState child(state); bool firstMove = true; // if this is the first player in a simultaneous move state if (state.bothCanMove() && !prevSimMove && (depth != 1)) { firstMove = true; // don't generate a child yet, just pass on the move we are investigating val = alphaBeta(state, depth-1, playerToMove, &moveVec, alpha, beta); } else { firstMove = false; // if this is the 2nd move of a simultaneous move state if (prevSimMove) { // do the previous move selected by the first player to move during this state child.makeMoves(*prevSimMove); } // do the moves of the current player child.makeMoves(moveVec); child.finishedMoving(); // get the alpha beta value val = alphaBeta(child, depth-1, playerToMove, NULL, alpha, beta); } // set alpha or beta based on maxplayer if (maxPlayer && (val.score() > alpha)) { alpha = val.score(); bestMove = AlphaBetaMove(moveVec, true); bestMoveSet = true; if (state.bothCanMove() && !prevSimMove) { bestSimResponse = val.abMove(); } // if this is depth 1 of the first try at depth 1, store the best in results } else if (!maxPlayer && (val.score() < beta)) { beta = val.score(); bestMove = AlphaBetaMove(moveVec, true); bestMoveSet = true; if (state.bothCanMove() && prevSimMove) { bestSimResponse = val.abMove(); } } if (alpha.val() == -10000000 && beta.val() == 10000000) { fprintf(stderr, "\n\nALPHA BETA ERROR, NO VALUE SET\n\n"); } // alpha-beta cut if (alpha >= beta) { break; } moveNumber++; } if (isTranspositionLookupState(state, prevSimMove)) { TTsave(state, maxPlayer ? alpha : beta, alpha, beta, depth, playerToMove, bestMove, bestSimResponse); } return maxPlayer ? AlphaBetaValue(alpha, bestMove) : AlphaBetaValue(beta, bestMove); }