StateEvalScore UCTSearch::traverse(UCTNode & node, GameState & currentState) { StateEvalScore playoutVal; _results.totalVisits++; // if we haven't visited this node yet, do a playout if (node.numVisits() == 0) { // update the status of the current state with this node's moves //updateState(node, currentState, !node.hasChildren()); updateState(node, currentState, true); // do the playout playoutVal = currentState.eval(_params.maxPlayer(), _params.evalMethod(), _params.simScript(Players::Player_One), _params.simScript(Players::Player_Two)); _results.nodesVisited++; } // otherwise we have seen this node before else { // update the state for a non-leaf node updateState(node, currentState, false); if (currentState.isTerminal()) { playoutVal = currentState.eval(_params.maxPlayer(), EvaluationMethods::LTD2); } else { // if the children haven't been generated yet if (!node.hasChildren()) { generateChildren(node, currentState); } UCTNode & next = UCTNodeSelect(node); playoutVal = traverse(next, currentState); } } node.incVisits(); if (playoutVal.val() > 0) { node.addWins(1); } else if (playoutVal.val() == 0) { node.addWins(0.5); } return playoutVal; }
void GreedyPlayer::run() { const QList<Move> moves = state->moves(type); GameState next; double bestEval; if (type == Attack) bestEval = GameState::evalMin; else bestEval = GameState::evalMax; foreach (const Move& m, moves) { state->copyAndMove(m, next); double eval = next.eval(); if ((type == Attack && eval > bestEval) || (type == Defense && eval < bestEval)) { bestEval = eval; chosen_move = m; } }
AlphaBetaValue AlphaBeta::alphaBeta(GameState & state, size_t depth, const IDType lastPlayerToMove, const MoveTuple * prevSimMove, AlphaBetaScore alpha, AlphaBetaScore beta) { // update statistics _results.nodesExpanded++; if (searchTimeOut()) { throw 1; } if (terminalState(state, depth)) { // return the value, but the move will not be valid since none was performed AlphaBetaScore evalScore = state.eval(_params.maxPlayer(), _params.evalMethod(), _params.modelSimMethod()); return AlphaBetaValue(AlphaBetaScore(evalScore.val(), state.getNumMovements(_params.maxPlayer()) + evalScore.numMoves() ), AlphaBetaMove(0, false)); } // figure out which player is to move const IDType playerToMove(getPlayerToMove(state, depth, lastPlayerToMove, !prevSimMove)); // is the player to move the max player? bool maxPlayer = (playerToMove == _params.maxPlayer()); // Transposition Table Logic TTLookupValue TTval; if (isTranspositionLookupState(state, prevSimMove)) { TTval = TTlookup(state, alpha, beta, depth); // if this is a TT cut, return the proper value if (TTval.cut()) { return AlphaBetaValue(TTval.entry()->getScore(), getAlphaBetaMove(TTval, playerToMove)); } } bool bestMoveSet(false); // move generation MoveArray & moves = _allMoves[depth]; state.generateMoves(moves, playerToMove); generateOrderedMoves(state, moves, TTval, playerToMove, depth); // while we have more simultaneous move tuples AlphaBetaMove bestMove, bestSimResponse; MoveTuple numMoveTuples(getNumMoveTuples(moves, TTval, playerToMove, depth)); for (MoveTuple t(0); t < numMoveTuples; ++t) { // get the tuple that will be implemented const MoveTuple tuple = getNextMoveTuple(t, depth); // the value of the recursive AB we will call AlphaBetaValue val; // generate the child state GameState child(state); bool firstMove = true; // if this is the first player in a simultaneous move state if (state.bothCanMove() && !prevSimMove && (depth != 1)) { firstMove = true; // don't generate a child yet, just pass on the move we are investigating val = alphaBeta(state, depth-1, playerToMove, &tuple, alpha, beta); } else { firstMove = false; // if this is the 2nd move of a simultaneous move state if (prevSimMove) { // do the previous move tuple selected by the first player to move during this state doTupleMoves(child, _allMoves[depth+1], *prevSimMove); } // do the moves of the current player doTupleMoves(child, moves, tuple); child.finishedMoving(true); // get the alpha beta value val = alphaBeta(child, depth-1, playerToMove, NULL, alpha, beta); } // set alpha or beta based on maxplayer if (maxPlayer && (val.score() > alpha)) { alpha = val.score(); bestMove = AlphaBetaMove(tuple, true); bestMoveSet = true; if (state.bothCanMove() && !prevSimMove) { bestSimResponse = val.abMove(); } // if this is depth 1 of the first try at depth 1, store the best in results } else if (!maxPlayer && (val.score() < beta)) { beta = val.score(); bestMove = AlphaBetaMove(tuple, true); bestMoveSet = true; if (state.bothCanMove() && prevSimMove) { bestSimResponse = val.abMove(); } } if (alpha.val() == -10000000 && beta.val() == 10000000) { fprintf(stderr, "\n\nALPHA BETA ERROR, NO VALUE SET\n\n"); } // alpha-beta cut if (alpha >= beta) { break; } } if (isTranspositionLookupState(state, prevSimMove)) { TTsave(state, maxPlayer ? alpha : beta, alpha, beta, depth, playerToMove, bestMove, bestSimResponse); } return maxPlayer ? AlphaBetaValue(alpha, bestMove) : AlphaBetaValue(beta, bestMove); }