void PositionMonteCarloTreeSearch::go(unsigned int depth) {
    (void)depth; // depth wird hier nicht benötigt
    srand(time(NULL));
    MCTSNode *rootNode = new MCTSNode();
    int iterations=0;
    while(1) {
        PositionMonteCarloTreeSearch *rootPos = new PositionMonteCarloTreeSearch(*this);
        MCTSNode *selected = rootNode->select(rootPos);
        MCTSNode *expanded = selected->expand(rootPos);
        double result = expanded->simulate(rootPos);
        expanded->update(result);
        iterations++;
        if (iterations%100==0) {
            if (timer.isTimeout() || timer.checkTimeout())
                break;
        }
        delete rootPos;
    }
    string foundMove=rootNode->getMove();
    delete rootNode;
    
    /************************************
     * Das Ergebnis der Suche ausgeben. *
     ************************************/
    timer.stopTimer();
    double seconds = timer.getStartEndDiffSeconds();
    cout << "Move: " << foundMove << endl;
    cout << "Time: " << seconds << " seconds" << endl;
    cout << "bestmove " << foundMove << endl;
}
예제 #2
0
파일: mcts.cpp 프로젝트: ikoryakovskiy/grl
void MCTSPolicy::act(double time, const Observation &in, Action *out)
{
  // Clear tree at start of episode
  if (time == 0.)
  {
    safe_delete(&root_);
    trunk_ = NULL;
  }

  // Try warm start
  if (trunk_ && trunk_->children())
  {
    double maxdiff = 0;
    MCTSNode *selected = trunk_->select(0);
    Vector predicted = selected->state();
    
    for (size_t ii=0; ii < in.size(); ++ii)
      maxdiff = fmax(maxdiff, fabs(in[ii]-predicted[ii]));
      
    if (maxdiff < 0.05)
    {
      trunk_ = selected;
      selected->orphanize();

      CRAWL("Trunk set to selected state " << trunk_->state());
    }
    else
    {
      safe_delete(&root_);
      trunk_ = NULL;
      TRACE("Cannot use warm start: predicted state " << predicted << " differs from actual state " << in);
    }
  }

  // Allocate new tree if warm start was not possible
  if (!trunk_)
  {
    allocate();
    root_->init(NULL, 0, in, 0, false);
    root_->allocate(discretizer_->size(in));
    trunk_ = root_;
  }
  
  CRAWL("Trunk set to state " << trunk_->state());

  // Search until budget is up
  timer t;
  size_t searches=0;

  while (t.elapsed() < budget_)
  {
    MCTSNode *node = treePolicy(), *it=node;
    size_t depth=0;
    
    while ((it = it->parent()))
      depth++;
    
    double reward = 0;
    
    CRAWL("Tree policy selected node with state " << node->state() << " at depth " << depth);
    
    if (!node->terminal() && depth < horizon_)
      reward = defaultPolicy(node->state(), horizon_-depth);
     
    CRAWL("Default policy got reward " << reward);

    do
    {
      node->update(reward);
      reward = gamma_*reward + node->reward();
    } while ((node = node->parent()));
    
    searches++;
  }
  
  // Select best action
  if (trunk_->children())
  {
    MCTSNode *node = trunk_->select(0);
    *out = discretizer_->at(trunk_->state(), node->action());
    out->type = atGreedy;

    TRACE("Selected action " << *out << " (Q " << node->q()/node->visits() << ") after " << searches << " searches");
  }
  else
  {
    *out = discretizer_->at(in, lrand48()%discretizer_->size(in));
    out->type = atExploratory;

    TRACE("Selected random action " << *out);
  }
}