void Player::PlayerUCT::iterate(){ if(player->profile){ timestamps[0] = Time(); stage = 0; } movelist.reset(&(player->rootboard)); player->root.exp.addvloss(); Board copy = player->rootboard; use_rave = (unitrand() < player->userave); use_explore = (unitrand() < player->useexplore); walk_tree(copy, & player->root, 0); player->root.exp.addv(movelist.getexp(3-player->rootboard.toplay())); if(player->profile){ times[0] += timestamps[1] - timestamps[0]; times[1] += timestamps[2] - timestamps[1]; times[2] += timestamps[3] - timestamps[2]; times[3] += Time() - timestamps[3]; } }
double normrand (double randfn(void), double mean, double var) { /* --- cumulative distribution fn. */ assert(var >= 0); /* check the function arguments */ if (var == 1) return mean +unitrand(randfn); return mean +unitrand(randfn) *sqrt(var); } /* normrand() */
DTYPE scaledrand(int i, int j) { DTYPE val = unitrand(i, j); return val*__SCALING; }
//play a random game starting from a board state, and return the results of who won int Player::PlayerUCT::rollout(Board & board, Move move, int depth){ int won; int num = board.movesremain(); bool wrand = (player->weightedrandom); if(wrand){ wtree[0].resize(board.vecsize()); wtree[1].resize(board.vecsize()); int set = 0; for(Board::MoveIterator m = board.moveit(false, false); !m.done(); ++m){ int i = board.xy(*m); moves[i] = *m; unsigned int p = board.pattern(i); wtree[0].set_weight_fast(i, player->gammas[p]); wtree[1].set_weight_fast(i, player->gammas[board.pattern_invert(p)]); set++; } wtree[0].rebuild_tree(); wtree[1].rebuild_tree(); }else{ int i = 0; for(Board::MoveIterator m = board.moveit(false, false); !m.done(); ++m) moves[i++] = *m; i = num; while(i > 1){ int j = rand32() % i--; Move tmp = moves[j]; moves[j] = moves[i]; moves[i] = tmp; } // random_shuffle(moves, moves + num); } int doinstwin = player->instwindepth; if(doinstwin < 0) doinstwin *= - board.get_size(); bool checkrings = (unitrand() < player->checkrings); //only check rings to the specified depth int checkdepth = (int)player->checkringdepth; //if it's negative, check for that fraction of the remaining moves if(player->checkringdepth < 0) checkdepth = (int)ceil(num * player->checkringdepth * -1); //only allow rings bigger than the minimum ring size, incrementing by the ringincr after each move int minringsize = (int)player->minringsize; int ringcounterfull = (int)player->ringincr; //if it's negative, scale by the fraction of remaining moves if(player->ringincr < 0) ringcounterfull = (int)ceil(num * player->ringincr * -1); int ringcounter = ringcounterfull; int ringperm = player->ringperm; Move * nextmove = moves; Move forced = M_UNKNOWN; while((won = board.won()) < 0){ int turn = board.toplay(); if(forced == M_UNKNOWN){ //do a complex choice PairMove pair = rollout_choose_move(board, move, doinstwin, checkrings); move = pair.a; forced = pair.b; //or the simple random choice if complex found nothing if(move == M_UNKNOWN){ do{ if(wrand){ int j = wtree[turn-1].choose(); // assert(j >= 0); wtree[0].set_weight(j, 0); wtree[1].set_weight(j, 0); move = moves[j]; }else{ move = *nextmove; nextmove++; } }while(!board.valid_move_fast(move)); } }else{ move = forced; forced = M_UNKNOWN; } movelist.addrollout(move, turn); board.move(move, true, false, (checkrings ? minringsize : 0), ringperm); if(--ringcounter == 0){ minringsize++; ringcounter = ringcounterfull; } depth++; checkrings &= (depth < checkdepth); if(wrand){ //update neighbour weights for(const MoveValid * i = board.nb_begin(move), *e = board.nb_end(i); i < e; i++){ if(i->onboard() && board.get(i->xy) == 0){ unsigned int p = board.pattern(i->xy); wtree[0].set_weight(i->xy, player->gammas[p]); wtree[1].set_weight(i->xy, player->gammas[board.pattern_invert(p)]); } } } } gamelen.add(depth); if(won > 0) wintypes[won-1][(int)board.getwintype()].add(depth); //update the last good reply table if(player->lastgoodreply && won > 0){ MoveList::RaveMove * rave = movelist.begin(), *raveend = movelist.end(); int m = -1; while(rave != raveend){ if(m >= 0){ if(rave->player == won && *rave != M_SWAP) goodreply[rave->player - 1][m] = *rave; else if(player->lastgoodreply == 2) goodreply[rave->player - 1][m] = M_UNKNOWN; } m = board.xy(*rave); ++rave; } } movelist.finishrollout(won); return won; }
VecD GaussianSampler::GaussianSampler::Sample() const { VecD unitrand(gaussian.dim); generate(unitrand.begin(), unitrand.end(), &SampleStdNormal); return gaussian.mean + chol.LowerTriangle() * unitrand; }