move getminimaxmove( const dtype board[16][16], const dtype available[2][21], const int player, const dtype maxsearchdepth){ move moves[2000]; int movesFound, i; movesFound = getMoveList( moves, board, available, player, score ); int move_num = -1; float alpha = -FLT_MAX, beta = FLT_MAX; for( i = 0; i < movesFound; i++ ){ dtype n_board[16][16]; dtype n_available[2][21]; int n_score[2]; int n_player; simulateMove( moves[i], board, available, score, player, n_board, n_available, n_score, &n_player ); float newUtility = minimax( n_board, n_available, n_score, n_player, maxsearchdepth, alpha, beta ); if( player == player_max ) { if( newUtility > alpha ) { alpha = newUtility; move_num = i; } } else if( newUtility < beta ) { beta = newUtility; move_num = i; } } if(move_num == -1) move_num = 0; move m = moves[move_num]; score[player-1] += piece_sizes[m.piece]; return m; }
float minimax( const dtype board[16][16], const dtype available[2][21], const int score[2], const int player, const int depth, float alpha, float beta ){ if(depth == 0){ float utility = evalfunc ( board, available, score, player_max); return utility; } move moves[2000]; int movesFound, i,j; movesFound = getMoveList( moves, board, available, player, score ); if(movesFound == 0){ dtype n_board[16][16]; dtype n_available[2][21]; int n_score[2]; int n_player; // Copy board data to output for( i = 0; i < 16; i++ ) for( j = 0; j < 16; j++ ) n_board[i][j] = board[i][j]; // Copy piece data to output for( i = 0; i < 2; i++ ) for( j = 0; j < 21; j++ ) n_available[i][j] = available[i][j]; // Copy piece data to output array for( i = 0; i < 2; i++ ) n_score[i] = score[i]; n_player = 3-player; float newUtility = minimax( n_board, n_available, n_score, n_player, depth-1, alpha, beta ); if( player == player_max ) { if( newUtility > alpha ) { alpha = newUtility; } } else if( newUtility < beta ) { beta = newUtility; } return (player==player_max) ? alpha : beta; } for( i = 0; i < movesFound; i++ ){ dtype n_board[16][16]; dtype n_available[2][21]; int n_score[2]; int n_player; simulateMove( moves[i], board, available, score, player, n_board, n_available, n_score, &n_player ); float newUtility = minimax( n_board, n_available, n_score, n_player, depth-1, alpha, beta ); if( player == player_max ) { if( newUtility > alpha ) { alpha = newUtility; } } else if( newUtility < beta ) { beta = newUtility; } //prune if( beta <= alpha ) break; } return (player==player_max) ? alpha : beta; }
move getminimaxmove_mt( const dtype board[16][16], const dtype available[2][21], const int player, const dtype maxsearchdepth) { // Create thread state arrays MtGameState threadStates[MAX_THREADS]; pthread_t pth[MAX_THREADS]; // Get available moves list move moves[2000]; int movesFound; movesFound = getMoveList( moves, board, available, player, score ); int move_num = -1; float alpha = -FLT_MAX, beta = FLT_MAX; // Set thread count limit int nThreads = MAX_THREADS; if( nThreads > movesFound ) nThreads = movesFound; //init thread data int i = 0; for( i = 0; i < nThreads; i++ ) { threadStates[i].completed = TRUE; threadStates[i].depth = maxsearchdepth; threadStates[i].utility = (player == player_max) ? alpha : beta; } // Get utility values for each move int nextMoveIndex = 0; while( nextMoveIndex < movesFound+nThreads ) { // Check for a completed thread for( i = 0; i < nThreads; i++ ){ if( threadStates[i].completed ){ // Mark completed thread threadStates[i].completed = FALSE; // Update alpha-beta parameters if( player == player_max ) { if( threadStates[i].utility > alpha ) { alpha = threadStates[i].utility; move_num = threadStates[i].moveindex; } } else { if( threadStates[i].utility < beta ) { beta = threadStates[i].utility; move_num = threadStates[i].moveindex; } } // Check for all threads initiated if( nextMoveIndex < movesFound ) { // Store the new threads move index threadStates[i].moveindex = nextMoveIndex; // Set new alpha-beta parameters threadStates[i].alpha = alpha; threadStates[i].beta = beta; // Simulate the selected move on the board for minimax simulateMove( moves[nextMoveIndex], board, available, score, player, threadStates[i].board, threadStates[i].available, threadStates[i].score, &threadStates[i].player ); // Begin the utility ranking thread pthread_create(&(pth[i]),NULL,&getminimaxutility,&(threadStates[i])); } // Increment index nextMoveIndex++; } } // Yield CPU //Sleep( 5 ); } if(move_num == -1) move_num = 0; move m = moves[move_num]; score[player-1] += piece_sizes[m.piece]; return m; }
std::vector<UCBrow> UpperConfidence::generateUCBTable(int color, GoGame* game) { LOG_DEBUG<<"Generating UCB table"<<std::endl; time_t timer; time(&timer); std::vector<UCBrow> ucbtable; LOG_VERBOSE<<"Began UCB"<<std::endl; workingBoard = new GoBoard(game->Board->Size()); workingBoard->resetAndReplayMoves(game->Board); this->color = color; LOG_VERBOSE<<"SD: "<<color<<std::endl; this->game = game; LOG_VERBOSE<<"SD: "<<game<<std::endl; LOG_VERBOSE << "Generating UCB move for "<<color<<std::endl; //Generate possible moves moves = preselRandMoves; if(moves.size() == 0) moves = getPossibleMoves(color, game); float expected[moves.size()]; int numPlayed[moves.size()]; int totalNumPlayed = 0; int initialPlayNum = 1; LOG_VERBOSE << "Play all 1 time"; //Play all moves "once" for(size_t j = 0; j<moves.size(); ++j) { numPlayed[j] = 0; expected[j] = 0; for(int i = 0; i<initialPlayNum; ++i) { float result = simulateMove(moves[j]); if(result > 0) result = 1; else result = 0; float oldWins = expected[j] * numPlayed[j]; ++numPlayed[j]; ++totalNumPlayed; expected[j] = ((float)(result+oldWins)/(float)numPlayed[j]); } } float maximisedVal = 0.0; int nextToPlay = 0; int numSim = numSimulations; while(totalNumPlayed<numSim && playUntilStopped) { //Maximise for all following plays for(size_t i = 0; i<moves.size(); ++i) { float ucbVal = expected[i] + sqrt( expRatio * log(totalNumPlayed) / numPlayed[i]); // LOG_VERBOSE <<i<< " " <<expected[i]<< " "<< sqrt( expRatio * log(totalNumPlayed) / numPlayed[i]) << std::endl; if(ucbVal > maximisedVal) { maximisedVal = ucbVal; nextToPlay = i; } } // LOG_VERBOSE <<nextToPlay<< " " <<expected[nextToPlay]<< " "<< sqrt( expRatio * log(totalNumPlayed) / numPlayed[nextToPlay]) << std::endl; //Play best move and update expected return float result = simulateMove(moves[nextToPlay]); if(result > 0) result = 1; else result = 0; ++numPlayed[nextToPlay]; ++totalNumPlayed; if(playUntilStopped) ++numSim; if(totalNumPlayed%1000==0) std::cerr<<"Simulated "<<totalNumPlayed<<" games"<<std::endl; float oldWins = expected[nextToPlay] * (numPlayed[nextToPlay]-1); expected[nextToPlay] = ((float)(result+oldWins)/(float)numPlayed[nextToPlay]); maximisedVal = 0; nextToPlay = 0; } for(size_t i = 0; i<moves.size(); ++i) { UCBrow u; u.pos = moves[i]; u.expected = expected[i]; u.timesPlayed = numPlayed[i]; ucbtable.push_back(u); } time_t now; time(&now); int perf = (float)totalNumPlayed/difftime(now,timer); std::cerr<<"child; " <<perf<<" sims per sec"<<std::endl; LOG_VERBOSE<<"Completed UCB table"<<std::endl; return ucbtable; }