Beispiel #1
0
Action AIAgent::iterativeDeepeningSearch() {
	int depth = 1;
	depthLimitedSearch(depth);
	int score = alphaBetaSearch(gameTree->getRoot(), -1000000000, 100000000, ai_player);
	Action _action = findMove(score);
	for (; depth < 6; depth++) {
		depthLimitedSearch(depth);
		score = alphaBetaSearch(gameTree->getRoot(), -1000000000, 100000000, ai_player);
		_action = findMove(score);
		if (score == 10000000) {
			return _action;
		}
	}
	return _action;
}
	std::vector< std::vector< const T * > > findShortestPaths( const T * from, unsigned int maxDepth )
	{
		std::vector< std::vector< const T * > > paths;
		for( unsigned int depth = 0; depth < maxDepth; depth++ )
		{
			paths = depthLimitedSearch( from, depth );
			if( paths.size() != 0 )
				break;
		}
		return paths;
	}
void TDNeuralNetPlayer::makeMove(const BoardState *currentState, Grid *&nextMove)
{
    if(m_currentRound >= m_oldWeights.numRounds())
    {
        m_oldWeights.increaseSize();
    }

    //Make sure nextMove does not contain other data.
    if(nextMove != NULL)
        delete nextMove;

    nextMove = m_rulesEngine->createGameSpecificGrid();

    //Select a random number between 0 and RANDOM_MOVE_INTERVAL
    //If the result is not zero, perform a normal move action.
    //Otherwise, randomly select a move from nextMoves.
    if((rand() % RANDOM_MOVE_INTERVAL)/* true*/)
    {
        int bestIndex = 0;

#ifdef USE_DLS
        //////////////////////////////
        //Using Depth-limited search//
        //////////////////////////////

        //NOTE
        //Because of the way getResults works with CurrentPlayer, currentPlayer must be inverted
        //before depthLimitedSearch is called.
        Elements::PlayerType nextPlayer;
        if(m_player == Elements::PLAYER_1)
            nextPlayer = Elements::PLAYER_2;
        else
            nextPlayer = Elements::PLAYER_1;

        //DLS returns the value of the best state it found, but we don't really care about that.
        depthLimitedSearch(currentState, m_searchDepth, bestIndex, nextPlayer);

#ifdef DEBUG_MOVECHOICE
        printLine2("Looking for highest valued move.  Found best index: ", bestIndex);
#endif

#else
        /////////////////////////////////////////
        ////Use minimax to determine best state//
        /////////////////////////////////////////
        double *results = NULL;
        //Create an array to house results from separate board states.
        double *allResults = new double[currentState->getNumNextStates()];
        //Create an array for the outputs.  There is only one.

#ifdef DEBUG_MOVECHOICE
            print("Getting values for each next state: \n");
#endif
        //Collect state worth from all next states.
        for(int x = 0; x < currentState->getNumNextStates(); ++x)
        {
            getResults(currentState->getStateWithIndex(x)->getCurrentGrid(), m_player, results);
            allResults[x] = results[0];

#ifdef DEBUG_MOVECHOICE
            print2(" ", allResults[x]);
            printLine2("\tfor board: ", *currentState->getStateWithIndex(x)->getCurrentGrid());
#endif
        }

#ifdef DEBUG_MOVECHOICE
        print("\n");
#endif

        //Player 1 calculates a highest value (Best move for p1, worst for p2)
        //Player 2 calculates a lowest value (Worst move for p1, best for p2)
        if(calcAsMax)
        {
            //Find the state that had the highest perceived reward.
            for(int x = 1; x < currentState->getNumNextStates(); ++x)
            {
                if(allResults[x] > allResults[bestIndex])
                    bestIndex = x;
            }
#ifdef DEBUG_MOVECHOICE
            printLine2("Looking for highest valued move.  Found: ", allResults[bestIndex]);
            printLine2("Looking for highest valued move.  Found best index: ", bestIndex);
#endif
        }
        else
        {
            //Find the state that had the lowest perceived reward.
            for(int x = 1; x < currentState->getNumNextStates(); ++x)
            {
                if(allResults[x] < allResults[bestIndex])
                    bestIndex = x;
            }
#ifdef DEBUG_MOVECHOICE
            printLine2("Looking for lowest valued move.  Found: ", allResults[bestIndex]);
#endif
        }

        //TODO: Show neural network the previous move so that the expected reward value can be stored as well.

        //Store the values from the nerual net's calculation.
        //There can only be one output for TD-learning.  So, only use
        //m_oldWeights.previousOutputs[m_currentRound][0] = allResults[bestIndex];

        delete [] results;
        delete [] allResults;

//End of #ifdef USE_DLS; #else
#endif
        //Return the grid of the best state.
        *nextMove = *(currentState->getStateWithIndex(bestIndex)->getCurrentGrid());
    }
    else
    {
        //Randomly select a move.
        *nextMove = *(currentState->getStateWithIndex(rand() % currentState->getNumNextStates())->getCurrentGrid());
        //Evaluate that move and store the result.
        //getResults(nextMove, m_player, m_oldWeights.previousOutputs[m_currentRound]);
    }

    //Skip the counter to cover opponent's turn.
    m_currentRound += 2;
}
double TDNeuralNetPlayer::depthLimitedSearch(const BoardState *currentState, int searchDepth, int &bestIndex, Elements::PlayerType player)
{
    //If an end state is found or we have reached the maximum search depth
    if(searchDepth == 0 || currentState->getNumNextStates() == 0)
    {
        //There is only one output from the network for board evaluations.
        double *results = NULL;

        getResults(currentState->getCurrentGrid(), player, results);

        bestIndex = DLS_EVALUATED_STATE;

        double retVal = results[0];

#ifdef DEBUG_MOVECHOICE
            print2(" ", retVal);
            printLine2("\t for board: ", *currentState->getCurrentGrid());
#endif

        delete [] results;
        return retVal;
    }
    else if(searchDepth > 0)
    {
        Elements::PlayerType nextPlayer;
        double bestResult, nextResult;

        //The current player only matters on a final state.
        if(player == Elements::PLAYER_1)
            nextPlayer = Elements::PLAYER_2;
        else
            nextPlayer = Elements::PLAYER_1;

        //Don't actually care about the best index from child states; only the calling function does.
        int throwaway;

        //Get the first result.
#ifdef DEBUG_MOVECHOICE
        print("Getting values for each next state: \n");
      //  print2(" ", bestResult);
#endif
        bestResult = depthLimitedSearch(currentState->getStateWithIndex(0), searchDepth - 1, throwaway, nextPlayer);

        double resultTotal = bestResult;

        //BestIndex will only matter for the initial call to DLS, but I'm not sure how to optimize it out at this point,
        //except to exclude the initial state from DLS entirely.
        bestIndex = 0;

        //Loop to find the rest of the results.
        for(int x = 1; x < currentState->getNumNextStates(); ++x)
        {
            nextResult  = depthLimitedSearch(currentState->getStateWithIndex(x), searchDepth - 1, throwaway, nextPlayer);
            resultTotal += nextResult;            

            //If calcAsMax is true, look for the highest value.  Otherwise, look for the lowest.
            if(calcAsMax)
            {
                if(nextResult > bestResult)
                {
                    bestResult = nextResult;
                    bestIndex = x;
                }
            }
            else
            {
                if(nextResult < bestResult)
                {
                    bestResult = nextResult;
                    bestIndex = x;
                }
            }
#ifdef DEBUG_MOVECHOICE
           // print2(" ", nextResult);
#endif
        }

        //Add the value of the current state to the calculation.
       // double *results = NULL;
        //getResults(currentState->getCurrentGrid(), player, (double *&)results);
        //resultTotal += results[0];
        //delete [] results;

#ifdef DEBUG_MOVECHOICE
        printLine2("\nLooking for highest valued move.  Found: ", bestResult);
#endif

        //Return the highest or lowest value.

        //return bestResult;
        //Return the summation of all child routes.
        return resultTotal;
    }
    else
    {
        return -1;
    }
}