void PositionMonteCarloTreeSearch::go(unsigned int depth) { (void)depth; // depth wird hier nicht benötigt srand(time(NULL)); MCTSNode *rootNode = new MCTSNode(); int iterations=0; while(1) { PositionMonteCarloTreeSearch *rootPos = new PositionMonteCarloTreeSearch(*this); MCTSNode *selected = rootNode->select(rootPos); MCTSNode *expanded = selected->expand(rootPos); double result = expanded->simulate(rootPos); expanded->update(result); iterations++; if (iterations%100==0) { if (timer.isTimeout() || timer.checkTimeout()) break; } delete rootPos; } string foundMove=rootNode->getMove(); delete rootNode; /************************************ * Das Ergebnis der Suche ausgeben. * ************************************/ timer.stopTimer(); double seconds = timer.getStartEndDiffSeconds(); cout << "Move: " << foundMove << endl; cout << "Time: " << seconds << " seconds" << endl; cout << "bestmove " << foundMove << endl; }
void MCTSPolicy::act(double time, const Observation &in, Action *out) { // Clear tree at start of episode if (time == 0.) { safe_delete(&root_); trunk_ = NULL; } // Try warm start if (trunk_ && trunk_->children()) { double maxdiff = 0; MCTSNode *selected = trunk_->select(0); Vector predicted = selected->state(); for (size_t ii=0; ii < in.size(); ++ii) maxdiff = fmax(maxdiff, fabs(in[ii]-predicted[ii])); if (maxdiff < 0.05) { trunk_ = selected; selected->orphanize(); CRAWL("Trunk set to selected state " << trunk_->state()); } else { safe_delete(&root_); trunk_ = NULL; TRACE("Cannot use warm start: predicted state " << predicted << " differs from actual state " << in); } } // Allocate new tree if warm start was not possible if (!trunk_) { allocate(); root_->init(NULL, 0, in, 0, false); root_->allocate(discretizer_->size(in)); trunk_ = root_; } CRAWL("Trunk set to state " << trunk_->state()); // Search until budget is up timer t; size_t searches=0; while (t.elapsed() < budget_) { MCTSNode *node = treePolicy(), *it=node; size_t depth=0; while ((it = it->parent())) depth++; double reward = 0; CRAWL("Tree policy selected node with state " << node->state() << " at depth " << depth); if (!node->terminal() && depth < horizon_) reward = defaultPolicy(node->state(), horizon_-depth); CRAWL("Default policy got reward " << reward); do { node->update(reward); reward = gamma_*reward + node->reward(); } while ((node = node->parent())); searches++; } // Select best action if (trunk_->children()) { MCTSNode *node = trunk_->select(0); *out = discretizer_->at(trunk_->state(), node->action()); out->type = atGreedy; TRACE("Selected action " << *out << " (Q " << node->q()/node->visits() << ") after " << searches << " searches"); } else { *out = discretizer_->at(in, lrand48()%discretizer_->size(in)); out->type = atExploratory; TRACE("Selected random action " << *out); } }