void POCMAN::GeneratePreferred(const STATE& state, const HISTORY& history, vector<int>& actions, const STATUS& status) const { const POCMAN_STATE& pocstate = safe_cast<const POCMAN_STATE&>(state); if (history.Size()) { int action = history.Back().Action; int observation = history.Back().Observation; // If power pill and can see a ghost then chase it if (pocstate.PowerSteps > 0 && (observation & 15 != 0)) { for (int a = 0; a < 4; ++a) if (CheckFlag(observation, a)) actions.push_back(a); } // Otherwise avoid observed ghosts and avoid changing directions else { for (int a = 0; a < 4; ++a) { COORD newpos = NextPos(pocstate.PocmanPos, a); if (newpos.Valid() && !CheckFlag(observation, a) && COORD::Opposite(a) != action) actions.push_back(a); } } } }
bool ROCKSAMPLE::LocalMove(STATE& state, const HISTORY& history, int stepObs, const STATUS& status) const { _unused(status); ROCKSAMPLE_STATE& rockstate = safe_cast<ROCKSAMPLE_STATE&>(state); int rock = Random(NumRocks); rockstate.Rocks[rock].Valuable = !rockstate.Rocks[rock].Valuable; if (history.Back().Action > E_SAMPLE) // check rock { rock = history.Back().Action - E_SAMPLE - 1; int realObs = history.Back().Observation; // Condition new state on real observation int newObs = GetObservation(rockstate, rock); if (newObs != realObs) return false; // Update counts to be consistent with real observation if (realObs == E_GOOD && stepObs == E_BAD) rockstate.Rocks[rock].Count += 2; if (realObs == E_BAD && stepObs == E_GOOD) rockstate.Rocks[rock].Count -= 2; } return true; }
void QNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr, const double *qvalue) const { history.Display(ostr); if (qvalue) { ostr << "q=" << *qvalue; } ImmediateReward.Print(": r=", ostr); Observation.Print(", o=", ostr); ostr << std::endl; for (int observation = 0; observation < NumChildren; observation++) { if (Children[observation]) { std::stringstream ss; ss << "\t\t\t#" << observation; // Children[observation]->GetCumulativeReward().Print(ss.str().c_str(), ostr); } } if (history.Size() >= maxDepth) return; for (int observation = 0; observation < NumChildren; observation++) { if (Children[observation]) { history.Back().Observation = observation; Children[observation]->DisplayValue(history, maxDepth, ostr); } } }
bool ROOMS::LocalMove(STATE &state, const HISTORY &history, int) const { ROOMS_STATE rstate = safe_cast<ROOMS_STATE &>(state); if (GetObservation(rstate) == history.Back().Observation) { return true; } return false; }
void QNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const { history.Display(ostr); ostr << ": " << Value.GetValue() << " (" << Value.GetCount() << ")\n"; if (history.Size() >= (uint) maxDepth) return; for (int observation = 0; observation < NumChildren; observation++) { if (Children[observation]) { history.Back().Observation = observation; Children[observation]->DisplayPolicy(history, maxDepth, ostr); } } }
bool ROBOT_NAVIGATION::LocalMove(STATE& state, const HISTORY& history, int stepObs, const STATUS& status) const { ROBOT_STATE& robotstate = safe_cast<ROBOT_STATE&>(state); /* * We assume perfect observation of the grid cells immediately surrounding the agent. * As a result, the robot can observe the wall configurations in surrounding squares, * but not its own actual location. */ int obs = 0; for (int i=0; i<8; i++) { int obsx = robotstate.X + ROBOT_NAVIGATION::DeltaObs[i][0]; int obsy = robotstate.Y + ROBOT_NAVIGATION::DeltaObs[i][1]; if (ROBOT_NAVIGATION::Map[obsx][obsy] == 1) obs += 1 << i; } return (obs == history.Back().Observation); }
void QNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const { history.Display(ostr); ImmediateReward.Print("r=", ostr); Observation.Print(", o=", ostr); ostr << std::endl; if (history.Size() >= maxDepth) return; for (int observation = 0; observation < NumChildren; observation++) { if (Children[observation]) { history.Back().Observation = observation; Children[observation]->DisplayPolicy(history, maxDepth, ostr); } } }
bool POCMAN::LocalMove(STATE& state, const HISTORY& history, int stepObs, const STATUS& status) const { _unused(stepObs); _unused(status); POCMAN_STATE& pocstate = safe_cast<POCMAN_STATE&>(state); int numGhosts = Random(1, 3); // Change 1 or 2 ghosts at a time for (int i = 0; i < numGhosts; ++i) { int g = Random(NumGhosts); pocstate.GhostPos[g] = COORD( Random(Maze.GetXSize()), Random(Maze.GetYSize())); if (!Passable(pocstate.GhostPos[g]) || pocstate.GhostPos[g] == pocstate.PocmanPos) return false; } COORD smellPos; for (smellPos.X = -SmellRange; smellPos.X <= SmellRange; smellPos.X++) { for (smellPos.Y = -SmellRange; smellPos.Y <= SmellRange; smellPos.Y++) { COORD pos = pocstate.PocmanPos + smellPos; if (smellPos != COORD(0, 0) && Maze.Inside(pos) && CheckFlag(Maze(pos), E_SEED)) pocstate.Food[Maze.Index(pos)] = Bernoulli(FoodProb * 0.5); } } // Just check the last time-step, don't check for full consistency if (history.Size() == 0) return true; int observation = MakeObservations(pocstate); return history.Back().Observation == observation; }