Ejemplo n.º 1
0
StateEvalScore UCTSearch::traverse(UCTNode & node, GameState & currentState)
{
    StateEvalScore playoutVal;

    _results.totalVisits++;

    // if we haven't visited this node yet, do a playout
    if (node.numVisits() == 0)
    {
        // update the status of the current state with this node's moves
        //updateState(node, currentState, !node.hasChildren());
        updateState(node, currentState, true);

        // do the playout
        playoutVal = currentState.eval(_params.maxPlayer(), _params.evalMethod(), _params.simScript(Players::Player_One), _params.simScript(Players::Player_Two));

        _results.nodesVisited++;
    }
    // otherwise we have seen this node before
    else
    {
        // update the state for a non-leaf node
        updateState(node, currentState, false);

        if (currentState.isTerminal())
        {
            playoutVal = currentState.eval(_params.maxPlayer(), EvaluationMethods::LTD2);
        }
        else
        {
            // if the children haven't been generated yet
            if (!node.hasChildren())
            {
                generateChildren(node, currentState);
            }

            UCTNode & next = UCTNodeSelect(node);
            playoutVal = traverse(next, currentState);
        }
    }

    node.incVisits();
    
    if (playoutVal.val() > 0)
    {
        node.addWins(1);
    }
    else if (playoutVal.val() == 0)
    {
        node.addWins(0.5);
    }

    return playoutVal;
}
const StateEvalScore GameState::eval(const IDType & player, const IDType & evalMethod, const IDType p1Script, const IDType p2Script) const
{
	StateEvalScore score;
	const IDType enemyPlayer(getEnemy(player));

	// if both players are dead, return 0
	if (playerDead(enemyPlayer) && playerDead(player))
	{
		return StateEvalScore(0, 0);
	}

	StateEvalScore simEval;

	if (evalMethod == SparCraft::EvaluationMethods::LTD)
	{
		score = StateEvalScore(evalLTD(player), 0);
	}
	else if (evalMethod == SparCraft::EvaluationMethods::LTD2)
	{
		score = StateEvalScore(evalLTD2(player), 0);
	}
	else if (evalMethod == SparCraft::EvaluationMethods::Playout)
	{
		score = evalSim(player, p1Script, p2Script);
	}

	if (score.val() == 0)
	{
		return score;
	}

	ScoreType winBonus(0);

	if (playerDead(enemyPlayer) && !playerDead(player))
	{
		winBonus = 100000;
	}
	else if (playerDead(player) && !playerDead(enemyPlayer))
	{
		winBonus = -100000;
	}

	return StateEvalScore(score.val() + winBonus, score.numMoves());
}
AlphaBetaValue AlphaBetaSearch::alphaBeta(GameState & state, size_t depth, const IDType lastPlayerToMove, std::vector<Action> * prevSimMove, StateEvalScore alpha, StateEvalScore beta)
{
	// update statistics
	_results.nodesExpanded++;

	if (searchTimeOut())
	{
		throw 1;
	}
    
	if (terminalState(state, depth))
	{
		// return the value, but the move will not be valid since none was performed
        StateEvalScore evalScore = state.eval(_params.maxPlayer(), _params.evalMethod(), _params.simScript(Players::Player_One), _params.simScript(Players::Player_Two));
		
		return AlphaBetaValue(StateEvalScore(evalScore.val(), state.getNumMovements(_params.maxPlayer()) + evalScore.numMoves() ), AlphaBetaMove());
	}

	// figure out which player is to move
	const IDType playerToMove(getPlayerToMove(state, depth, lastPlayerToMove, !prevSimMove));

	// is the player to move the max player?
	bool maxPlayer = (playerToMove == _params.maxPlayer());

	// Transposition Table Logic
	TTLookupValue TTval;
	if (isTranspositionLookupState(state, prevSimMove))
	{
		TTval = TTlookup(state, alpha, beta, depth);

		// if this is a TT cut, return the proper value
		if (TTval.cut())
		{
			return AlphaBetaValue(TTval.entry()->getScore(), getAlphaBetaMove(TTval, playerToMove));
		}
	}

	bool bestMoveSet(false);

	// move generation
	MoveArray & moves = _allMoves[depth];
	state.generateMoves(moves, playerToMove);
    moves.shuffleMoveActions();
	generateOrderedMoves(state, moves, TTval, playerToMove, depth);

	// while we have more simultaneous moves
	AlphaBetaMove bestMove, bestSimResponse;
	    
    size_t moveNumber(0);
    std::vector<Action> moveVec;

    // for each child
    while (getNextMoveVec(playerToMove, moves, moveNumber, TTval, depth, moveVec))
	{
        // the value of the recursive AB we will call
		AlphaBetaValue val;
		
		// generate the child state
		GameState child(state);

		bool firstMove = true;

		// if this is the first player in a simultaneous move state
		if (state.bothCanMove() && !prevSimMove && (depth != 1))
		{
			firstMove = true;
			// don't generate a child yet, just pass on the move we are investigating
			val = alphaBeta(state, depth-1, playerToMove, &moveVec, alpha, beta);
		}
		else
		{
			firstMove = false;

			// if this is the 2nd move of a simultaneous move state
			if (prevSimMove)
			{
				// do the previous move selected by the first player to move during this state
                child.makeMoves(*prevSimMove);
			}

			// do the moves of the current player
            child.makeMoves(moveVec);
			child.finishedMoving();

			// get the alpha beta value
			val = alphaBeta(child, depth-1, playerToMove, NULL, alpha, beta);
		}

		// set alpha or beta based on maxplayer
		if (maxPlayer && (val.score() > alpha)) 
		{
			alpha = val.score();
			bestMove = AlphaBetaMove(moveVec, true);
			bestMoveSet = true;

			if (state.bothCanMove() && !prevSimMove)
			{
				bestSimResponse = val.abMove();
			}

			// if this is depth 1 of the first try at depth 1, store the best in results
		}
		else if (!maxPlayer && (val.score() < beta))
		{
			beta = val.score();
			bestMove = AlphaBetaMove(moveVec, true);
			bestMoveSet = true;

			if (state.bothCanMove() && prevSimMove)
			{
				bestSimResponse = val.abMove();
			}
		}

		if (alpha.val() == -10000000 && beta.val() == 10000000)
		{
			fprintf(stderr, "\n\nALPHA BETA ERROR, NO VALUE SET\n\n");
		}

		// alpha-beta cut
		if (alpha >= beta) 
		{ 
			break; 
		}

        moveNumber++;
	}
	
	if (isTranspositionLookupState(state, prevSimMove))
	{
		TTsave(state, maxPlayer ? alpha : beta, alpha, beta, depth, playerToMove, bestMove, bestSimResponse);
	}

	return maxPlayer ? AlphaBetaValue(alpha, bestMove) : AlphaBetaValue(beta, bestMove);
}