void POCMAN::GeneratePreferred(const STATE& state, const HISTORY& history, vector<int>& actions, const STATUS& status) const { const POCMAN_STATE& pocstate = safe_cast<const POCMAN_STATE&>(state); if (history.Size()) { int action = history.Back().Action; int observation = history.Back().Observation; // If power pill and can see a ghost then chase it if (pocstate.PowerSteps > 0 && (observation & 15 != 0)) { for (int a = 0; a < 4; ++a) if (CheckFlag(observation, a)) actions.push_back(a); } // Otherwise avoid observed ghosts and avoid changing directions else { for (int a = 0; a < 4; ++a) { COORD newpos = NextPos(pocstate.PocmanPos, a); if (newpos.Valid() && !CheckFlag(observation, a) && COORD::Opposite(a) != action) actions.push_back(a); } } } }
void QNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr, const double *qvalue) const { history.Display(ostr); if (qvalue) { ostr << "q=" << *qvalue; } ImmediateReward.Print(": r=", ostr); Observation.Print(", o=", ostr); ostr << std::endl; for (int observation = 0; observation < NumChildren; observation++) { if (Children[observation]) { std::stringstream ss; ss << "\t\t\t#" << observation; // Children[observation]->GetCumulativeReward().Print(ss.str().c_str(), ostr); } } if (history.Size() >= maxDepth) return; for (int observation = 0; observation < NumChildren; observation++) { if (Children[observation]) { history.Back().Observation = observation; Children[observation]->DisplayValue(history, maxDepth, ostr); } } }
void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr) const { if (history.Size() >= (uint) maxDepth) return; for (int action = 0; action < NumChildren; action++) { history.Add(action,-1); Children[action].DisplayValue(history, maxDepth, ostr); history.Pop(); } }
void QNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const { history.Display(ostr); ostr << ": " << Value.GetValue() << " (" << Value.GetCount() << ")\n"; if (history.Size() >= (uint) maxDepth) return; for (int observation = 0; observation < NumChildren; observation++) { if (Children[observation]) { history.Back().Observation = observation; Children[observation]->DisplayPolicy(history, maxDepth, ostr); } } }
void QNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const { history.Display(ostr); ImmediateReward.Print("r=", ostr); Observation.Print(", o=", ostr); ostr << std::endl; if (history.Size() >= maxDepth) return; for (int observation = 0; observation < NumChildren; observation++) { if (Children[observation]) { history.Back().Observation = observation; Children[observation]->DisplayPolicy(history, maxDepth, ostr); } } }
void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr, const std::vector<double> *qvalues) const { if (history.Size() >= maxDepth) return; for (int action = 0; action < NumChildren; action++) { history.Add(action); const QNODE &qnode = Children[action]; if (qnode.Applicable()) { ostr << "n=" << qnode.GetCount() << " "; if (qvalues) { qnode.DisplayValue(history, maxDepth, ostr, &(qvalues->at(action))); } else { qnode.DisplayValue(history, maxDepth, ostr); } } history.Pop(); } }
bool POCMAN::LocalMove(STATE& state, const HISTORY& history, int stepObs, const STATUS& status) const { _unused(stepObs); _unused(status); POCMAN_STATE& pocstate = safe_cast<POCMAN_STATE&>(state); int numGhosts = Random(1, 3); // Change 1 or 2 ghosts at a time for (int i = 0; i < numGhosts; ++i) { int g = Random(NumGhosts); pocstate.GhostPos[g] = COORD( Random(Maze.GetXSize()), Random(Maze.GetYSize())); if (!Passable(pocstate.GhostPos[g]) || pocstate.GhostPos[g] == pocstate.PocmanPos) return false; } COORD smellPos; for (smellPos.X = -SmellRange; smellPos.X <= SmellRange; smellPos.X++) { for (smellPos.Y = -SmellRange; smellPos.Y <= SmellRange; smellPos.Y++) { COORD pos = pocstate.PocmanPos + smellPos; if (smellPos != COORD(0, 0) && Maze.Inside(pos) && CheckFlag(Maze(pos), E_SEED)) pocstate.Food[Maze.Index(pos)] = Bernoulli(FoodProb * 0.5); } } // Just check the last time-step, don't check for full consistency if (history.Size() == 0) return true; int observation = MakeObservations(pocstate); return history.Back().Observation == observation; }
void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const { if (history.Size() >= (uint) maxDepth) return; double bestq = -Infinity; int besta = -1; for (int action = 0; action < NumChildren; action++) { if (Children[action].Value.GetValue() > bestq) { besta = action; bestq = Children[action].Value.GetValue(); } } if (besta != -1) { history.Add((uint)besta,0); Children[besta].DisplayPolicy(history, maxDepth, ostr); history.Pop(); } }
void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const { if (history.Size() >= maxDepth) return; // double bestq = -Infinity; int besta = -1; for (int action = 0; action < NumChildren; action++) { // if (Children[action].Dirichlet.GetValue() > bestq) //XXX // { // besta = action; // bestq = Children[action].Dirichlet.GetValue(); // } } if (besta != -1) { history.Add(besta); Children[besta].DisplayPolicy(history, maxDepth, ostr); history.Pop(); } }
void ROCKSAMPLE::GeneratePreferred(const STATE& state, const HISTORY& history, vector<int>& actions, const STATUS& status) const { _unused(status); static const bool UseBlindPolicy = false; if (UseBlindPolicy) { actions.push_back(COORD::E_EAST); return; } const ROCKSAMPLE_STATE& rockstate = safe_cast<const ROCKSAMPLE_STATE&>(state); // Sample rocks with more +ve than -ve observations int rock = Grid(rockstate.AgentPos); if (rock >= 0 && !rockstate.Rocks[rock].Collected) { int total = 0; for (int t = 0; t < history.Size(); ++t) { if (history[t].Action == rock + 1 + E_SAMPLE) { if (history[t].Observation == E_GOOD) total++; if (history[t].Observation == E_BAD) total--; } } if (total > 0) { actions.push_back(E_SAMPLE); return; } } // processes the rocks bool all_bad = true; bool north_interesting = false; bool south_interesting = false; bool west_interesting = false; bool east_interesting = false; for (int rock = 0; rock < NumRocks; ++rock) { const ROCKSAMPLE_STATE::ENTRY& entry = rockstate.Rocks[rock]; if (!entry.Collected) { int total = 0; for (int t = 0; t < history.Size(); ++t) { if (history[t].Action == rock + 1 + E_SAMPLE) { if (history[t].Observation == E_GOOD) total++; if (history[t].Observation == E_BAD) total--; } } if (total >= 0) { all_bad = false; if (RockPos[rock].Y > rockstate.AgentPos.Y) north_interesting = true; if (RockPos[rock].Y < rockstate.AgentPos.Y) south_interesting = true; if (RockPos[rock].X < rockstate.AgentPos.X) west_interesting = true; if (RockPos[rock].X > rockstate.AgentPos.X) east_interesting = true; } } } // if all remaining rocks seem bad, then head east if (all_bad) { actions.push_back(COORD::E_EAST); return; } // generate a random legal move, with the exceptions that: // a) there is no point measuring a rock that is already collected // b) there is no point measuring a rock too often // c) there is no point measuring a rock which is clearly bad or good // d) we never sample a rock (since we need to be sure) // e) we never move in a direction that doesn't take us closer to // either the edge of the map or an interesting rock if (rockstate.AgentPos.Y + 1 < Size && north_interesting) actions.push_back(COORD::E_NORTH); if (east_interesting) actions.push_back(COORD::E_EAST); if (rockstate.AgentPos.Y - 1 >= 0 && south_interesting) actions.push_back(COORD::E_SOUTH); if (rockstate.AgentPos.X - 1 >= 0 && west_interesting) actions.push_back(COORD::E_WEST); for (rock = 0; rock < NumRocks; ++rock) { if (!rockstate.Rocks[rock].Collected && rockstate.Rocks[rock].ProbValuable != 0.0 && rockstate.Rocks[rock].ProbValuable != 1.0 && rockstate.Rocks[rock].Measured < 5 && std::abs(rockstate.Rocks[rock].Count) < 2) { actions.push_back(rock + 1 + E_SAMPLE); } } }