Action AIAgent::iterativeDeepeningSearch() { int depth = 1; depthLimitedSearch(depth); int score = alphaBetaSearch(gameTree->getRoot(), -1000000000, 100000000, ai_player); Action _action = findMove(score); for (; depth < 6; depth++) { depthLimitedSearch(depth); score = alphaBetaSearch(gameTree->getRoot(), -1000000000, 100000000, ai_player); _action = findMove(score); if (score == 10000000) { return _action; } } return _action; }
std::vector< std::vector< const T * > > findShortestPaths( const T * from, unsigned int maxDepth ) { std::vector< std::vector< const T * > > paths; for( unsigned int depth = 0; depth < maxDepth; depth++ ) { paths = depthLimitedSearch( from, depth ); if( paths.size() != 0 ) break; } return paths; }
void TDNeuralNetPlayer::makeMove(const BoardState *currentState, Grid *&nextMove) { if(m_currentRound >= m_oldWeights.numRounds()) { m_oldWeights.increaseSize(); } //Make sure nextMove does not contain other data. if(nextMove != NULL) delete nextMove; nextMove = m_rulesEngine->createGameSpecificGrid(); //Select a random number between 0 and RANDOM_MOVE_INTERVAL //If the result is not zero, perform a normal move action. //Otherwise, randomly select a move from nextMoves. if((rand() % RANDOM_MOVE_INTERVAL)/* true*/) { int bestIndex = 0; #ifdef USE_DLS ////////////////////////////// //Using Depth-limited search// ////////////////////////////// //NOTE //Because of the way getResults works with CurrentPlayer, currentPlayer must be inverted //before depthLimitedSearch is called. Elements::PlayerType nextPlayer; if(m_player == Elements::PLAYER_1) nextPlayer = Elements::PLAYER_2; else nextPlayer = Elements::PLAYER_1; //DLS returns the value of the best state it found, but we don't really care about that. depthLimitedSearch(currentState, m_searchDepth, bestIndex, nextPlayer); #ifdef DEBUG_MOVECHOICE printLine2("Looking for highest valued move. Found best index: ", bestIndex); #endif #else ///////////////////////////////////////// ////Use minimax to determine best state// ///////////////////////////////////////// double *results = NULL; //Create an array to house results from separate board states. double *allResults = new double[currentState->getNumNextStates()]; //Create an array for the outputs. There is only one. #ifdef DEBUG_MOVECHOICE print("Getting values for each next state: \n"); #endif //Collect state worth from all next states. for(int x = 0; x < currentState->getNumNextStates(); ++x) { getResults(currentState->getStateWithIndex(x)->getCurrentGrid(), m_player, results); allResults[x] = results[0]; #ifdef DEBUG_MOVECHOICE print2(" ", allResults[x]); printLine2("\tfor board: ", *currentState->getStateWithIndex(x)->getCurrentGrid()); #endif } #ifdef DEBUG_MOVECHOICE print("\n"); #endif //Player 1 calculates a highest value (Best move for p1, worst for p2) //Player 2 calculates a lowest value (Worst move for p1, best for p2) if(calcAsMax) { //Find the state that had the highest perceived reward. for(int x = 1; x < currentState->getNumNextStates(); ++x) { if(allResults[x] > allResults[bestIndex]) bestIndex = x; } #ifdef DEBUG_MOVECHOICE printLine2("Looking for highest valued move. Found: ", allResults[bestIndex]); printLine2("Looking for highest valued move. Found best index: ", bestIndex); #endif } else { //Find the state that had the lowest perceived reward. for(int x = 1; x < currentState->getNumNextStates(); ++x) { if(allResults[x] < allResults[bestIndex]) bestIndex = x; } #ifdef DEBUG_MOVECHOICE printLine2("Looking for lowest valued move. Found: ", allResults[bestIndex]); #endif } //TODO: Show neural network the previous move so that the expected reward value can be stored as well. //Store the values from the nerual net's calculation. //There can only be one output for TD-learning. So, only use //m_oldWeights.previousOutputs[m_currentRound][0] = allResults[bestIndex]; delete [] results; delete [] allResults; //End of #ifdef USE_DLS; #else #endif //Return the grid of the best state. *nextMove = *(currentState->getStateWithIndex(bestIndex)->getCurrentGrid()); } else { //Randomly select a move. *nextMove = *(currentState->getStateWithIndex(rand() % currentState->getNumNextStates())->getCurrentGrid()); //Evaluate that move and store the result. //getResults(nextMove, m_player, m_oldWeights.previousOutputs[m_currentRound]); } //Skip the counter to cover opponent's turn. m_currentRound += 2; }
double TDNeuralNetPlayer::depthLimitedSearch(const BoardState *currentState, int searchDepth, int &bestIndex, Elements::PlayerType player) { //If an end state is found or we have reached the maximum search depth if(searchDepth == 0 || currentState->getNumNextStates() == 0) { //There is only one output from the network for board evaluations. double *results = NULL; getResults(currentState->getCurrentGrid(), player, results); bestIndex = DLS_EVALUATED_STATE; double retVal = results[0]; #ifdef DEBUG_MOVECHOICE print2(" ", retVal); printLine2("\t for board: ", *currentState->getCurrentGrid()); #endif delete [] results; return retVal; } else if(searchDepth > 0) { Elements::PlayerType nextPlayer; double bestResult, nextResult; //The current player only matters on a final state. if(player == Elements::PLAYER_1) nextPlayer = Elements::PLAYER_2; else nextPlayer = Elements::PLAYER_1; //Don't actually care about the best index from child states; only the calling function does. int throwaway; //Get the first result. #ifdef DEBUG_MOVECHOICE print("Getting values for each next state: \n"); // print2(" ", bestResult); #endif bestResult = depthLimitedSearch(currentState->getStateWithIndex(0), searchDepth - 1, throwaway, nextPlayer); double resultTotal = bestResult; //BestIndex will only matter for the initial call to DLS, but I'm not sure how to optimize it out at this point, //except to exclude the initial state from DLS entirely. bestIndex = 0; //Loop to find the rest of the results. for(int x = 1; x < currentState->getNumNextStates(); ++x) { nextResult = depthLimitedSearch(currentState->getStateWithIndex(x), searchDepth - 1, throwaway, nextPlayer); resultTotal += nextResult; //If calcAsMax is true, look for the highest value. Otherwise, look for the lowest. if(calcAsMax) { if(nextResult > bestResult) { bestResult = nextResult; bestIndex = x; } } else { if(nextResult < bestResult) { bestResult = nextResult; bestIndex = x; } } #ifdef DEBUG_MOVECHOICE // print2(" ", nextResult); #endif } //Add the value of the current state to the calculation. // double *results = NULL; //getResults(currentState->getCurrentGrid(), player, (double *&)results); //resultTotal += results[0]; //delete [] results; #ifdef DEBUG_MOVECHOICE printLine2("\nLooking for highest valued move. Found: ", bestResult); #endif //Return the highest or lowest value. //return bestResult; //Return the summation of all child routes. return resultTotal; } else { return -1; } }