예제 #1
0
StateEvalScore UCTSearch::traverse(UCTNode & node, GameState & currentState)
{
    StateEvalScore playoutVal;

    _results.totalVisits++;

    // if we haven't visited this node yet, do a playout
    if (node.numVisits() == 0)
    {
        // update the status of the current state with this node's moves
        //updateState(node, currentState, !node.hasChildren());
        updateState(node, currentState, true);

        // do the playout
        playoutVal = currentState.eval(_params.maxPlayer(), _params.evalMethod(), _params.simScript(Players::Player_One), _params.simScript(Players::Player_Two));

        _results.nodesVisited++;
    }
    // otherwise we have seen this node before
    else
    {
        // update the state for a non-leaf node
        updateState(node, currentState, false);

        if (currentState.isTerminal())
        {
            playoutVal = currentState.eval(_params.maxPlayer(), EvaluationMethods::LTD2);
        }
        else
        {
            // if the children haven't been generated yet
            if (!node.hasChildren())
            {
                generateChildren(node, currentState);
            }

            UCTNode & next = UCTNodeSelect(node);
            playoutVal = traverse(next, currentState);
        }
    }

    node.incVisits();
    
    if (playoutVal.val() > 0)
    {
        node.addWins(1);
    }
    else if (playoutVal.val() == 0)
    {
        node.addWins(0.5);
    }

    return playoutVal;
}
예제 #2
0
void GreedyPlayer::run()
{
    const QList<Move> moves = state->moves(type);
    
    GameState next;
    double bestEval;
    
    if (type == Attack) bestEval = GameState::evalMin;
    else bestEval = GameState::evalMax;
    
    foreach (const Move& m, moves) {
        state->copyAndMove(m, next);
        double eval = next.eval();
        
        if ((type == Attack  && eval > bestEval) ||
            (type == Defense && eval < bestEval)) {
            bestEval = eval;
            chosen_move = m;
        }
    }
예제 #3
0
AlphaBetaValue AlphaBeta::alphaBeta(GameState & state, size_t depth, const IDType lastPlayerToMove, const MoveTuple * prevSimMove, AlphaBetaScore alpha, AlphaBetaScore beta)
{
	// update statistics
	_results.nodesExpanded++;

	if (searchTimeOut())
	{
		throw 1;
	}

	if (terminalState(state, depth))
	{
		// return the value, but the move will not be valid since none was performed
		AlphaBetaScore evalScore = state.eval(_params.maxPlayer(), _params.evalMethod(), _params.modelSimMethod());
		
		return AlphaBetaValue(AlphaBetaScore(evalScore.val(), state.getNumMovements(_params.maxPlayer()) + evalScore.numMoves() ), AlphaBetaMove(0, false));
	}

	// figure out which player is to move
	const IDType playerToMove(getPlayerToMove(state, depth, lastPlayerToMove, !prevSimMove));

	// is the player to move the max player?
	bool maxPlayer = (playerToMove == _params.maxPlayer());

	// Transposition Table Logic
	TTLookupValue TTval;
	if (isTranspositionLookupState(state, prevSimMove))
	{
		TTval = TTlookup(state, alpha, beta, depth);

		// if this is a TT cut, return the proper value
		if (TTval.cut())
		{
			return AlphaBetaValue(TTval.entry()->getScore(), getAlphaBetaMove(TTval, playerToMove));
		}
	}

	bool bestMoveSet(false);

	// move generation
	MoveArray & moves = _allMoves[depth];
	state.generateMoves(moves, playerToMove);
	generateOrderedMoves(state, moves, TTval, playerToMove, depth);

	// while we have more simultaneous move tuples
	AlphaBetaMove bestMove, bestSimResponse;
	MoveTuple numMoveTuples(getNumMoveTuples(moves, TTval, playerToMove, depth));
	for (MoveTuple t(0); t < numMoveTuples; ++t)
	{
		// get the tuple that will be implemented
		const MoveTuple tuple = getNextMoveTuple(t, depth);

		// the value of the recursive AB we will call
		AlphaBetaValue val;
		
		// generate the child state
		GameState child(state);

		bool firstMove = true;

		// if this is the first player in a simultaneous move state
		if (state.bothCanMove() && !prevSimMove && (depth != 1))
		{
			firstMove = true;
			// don't generate a child yet, just pass on the move we are investigating
			val = alphaBeta(state, depth-1, playerToMove, &tuple, alpha, beta);
		}
		else
		{
			firstMove = false;

			// if this is the 2nd move of a simultaneous move state
			if (prevSimMove)
			{
				// do the previous move tuple selected by the first player to move during this state
				doTupleMoves(child, _allMoves[depth+1], *prevSimMove);
			}

			// do the moves of the current player
			doTupleMoves(child, moves, tuple);
			child.finishedMoving(true);

			// get the alpha beta value
			val = alphaBeta(child, depth-1, playerToMove, NULL, alpha, beta);
		}

		// set alpha or beta based on maxplayer
		if (maxPlayer && (val.score() > alpha)) 
		{
			alpha = val.score();
			bestMove = AlphaBetaMove(tuple, true);
			bestMoveSet = true;

			if (state.bothCanMove() && !prevSimMove)
			{
				bestSimResponse = val.abMove();
			}

			// if this is depth 1 of the first try at depth 1, store the best in results
		}
		else if (!maxPlayer && (val.score() < beta))
		{
			beta = val.score();
			bestMove = AlphaBetaMove(tuple, true);
			bestMoveSet = true;

			if (state.bothCanMove() && prevSimMove)
			{
				bestSimResponse = val.abMove();
			}
		}

		if (alpha.val() == -10000000 && beta.val() == 10000000)
		{
			fprintf(stderr, "\n\nALPHA BETA ERROR, NO VALUE SET\n\n");
		}

		// alpha-beta cut
		if (alpha >= beta) 
		{ 
			break; 
		}
	}
	
	if (isTranspositionLookupState(state, prevSimMove))
	{
		TTsave(state, maxPlayer ? alpha : beta, alpha, beta, depth, playerToMove, bestMove, bestSimResponse);
	}

	return maxPlayer ? AlphaBetaValue(alpha, bestMove) : AlphaBetaValue(beta, bestMove);
}