void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr) const { if (history.Size() >= (uint) maxDepth) return; for (int action = 0; action < NumChildren; action++) { history.Add(action,-1); Children[action].DisplayValue(history, maxDepth, ostr); history.Pop(); } }
void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr, const std::vector<double> *qvalues) const { if (history.Size() >= maxDepth) return; for (int action = 0; action < NumChildren; action++) { history.Add(action); const QNODE &qnode = Children[action]; if (qnode.Applicable()) { ostr << "n=" << qnode.GetCount() << " "; if (qvalues) { qnode.DisplayValue(history, maxDepth, ostr, &(qvalues->at(action))); } else { qnode.DisplayValue(history, maxDepth, ostr); } } history.Pop(); } }
void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const { if (history.Size() >= (uint) maxDepth) return; double bestq = -Infinity; int besta = -1; for (int action = 0; action < NumChildren; action++) { if (Children[action].Value.GetValue() > bestq) { besta = action; bestq = Children[action].Value.GetValue(); } } if (besta != -1) { history.Add((uint)besta,0); Children[besta].DisplayPolicy(history, maxDepth, ostr); history.Pop(); } }
void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const { if (history.Size() >= maxDepth) return; // double bestq = -Infinity; int besta = -1; for (int action = 0; action < NumChildren; action++) { // if (Children[action].Dirichlet.GetValue() > bestq) //XXX // { // besta = action; // bestq = Children[action].Dirichlet.GetValue(); // } } if (besta != -1) { history.Add(besta); Children[besta].DisplayPolicy(history, maxDepth, ostr); history.Pop(); } }
void EXPERIMENT::Run() { boost::timer timer; MCTS mcts(Simulator, SearchParams); double undiscountedReturn = 0.0; double discountedReturn = 0.0; double discount = 1.0; bool terminal = false; bool outOfParticles = false; int t; STATE* state = Real.CreateStartState(); if (SearchParams.Verbose >= 1) Real.DisplayState(*state, cout); for (t = 0; t < ExpParams.NumSteps; t++) { int observation; double reward; int action = mcts.SelectAction(); terminal = Real.Step(*state, action, observation, reward); Results.Reward.Add(reward); undiscountedReturn += reward; discountedReturn += reward * discount; discount *= Real.GetDiscount(); if (SearchParams.Verbose >= 1) { Real.DisplayAction(action, cout); Real.DisplayState(*state, cout); Real.DisplayObservation(*state, observation, cout); Real.DisplayReward(reward, cout); } if (terminal) { cout << "Terminated" << endl; break; } outOfParticles = !mcts.Update(action, observation, reward); if (outOfParticles) break; if (timer.elapsed() > ExpParams.TimeOut) { cout << "Timed out after " << t << " steps in " << Results.Time.GetTotal() << "seconds" << endl; break; } } if (outOfParticles) { cout << "Out of particles, finishing episode with SelectRandom" << endl; HISTORY history = mcts.GetHistory(); while (++t < ExpParams.NumSteps) { int observation; double reward; // This passes real state into simulator! // SelectRandom must only use fully observable state // to avoid "cheating" int action = Simulator.SelectRandom(*state, history, mcts.GetStatus()); terminal = Real.Step(*state, action, observation, reward); Results.Reward.Add(reward); undiscountedReturn += reward; discountedReturn += reward * discount; discount *= Real.GetDiscount(); if (SearchParams.Verbose >= 1) { Real.DisplayAction(action, cout); Real.DisplayState(*state, cout); Real.DisplayObservation(*state, observation, cout); Real.DisplayReward(reward, cout); } if (terminal) { cout << "Terminated" << endl; break; } history.Add(action, observation); } } Results.Time.Add(timer.elapsed()); Results.UndiscountedReturn.Add(undiscountedReturn); Results.DiscountedReturn.Add(discountedReturn); cout << "Discounted return = " << discountedReturn << ", average = " << Results.DiscountedReturn.GetMean() << endl; cout << "Undiscounted return = " << undiscountedReturn << ", average = " << Results.UndiscountedReturn.GetMean() << endl; }