Beispiel #1
0
void POCMAN::GeneratePreferred(const STATE& state, const HISTORY& history, 
    vector<int>& actions, const STATUS& status) const
{
    const POCMAN_STATE& pocstate = safe_cast<const POCMAN_STATE&>(state);
    if (history.Size())
    {
        int action = history.Back().Action;
        int observation = history.Back().Observation;

        // If power pill and can see a ghost then chase it
        if (pocstate.PowerSteps > 0 && (observation & 15 != 0))
        {
            for (int a = 0; a < 4; ++a)
                if (CheckFlag(observation, a))
                    actions.push_back(a);
        }
        
        // Otherwise avoid observed ghosts and avoid changing directions
        else
        {
            for (int a = 0; a < 4; ++a)
            {
                COORD newpos = NextPos(pocstate.PocmanPos, a);        
                if (newpos.Valid() && !CheckFlag(observation, a)
                    && COORD::Opposite(a) != action)
                    actions.push_back(a);
            }
        }
    }
}
Beispiel #2
0
void QNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr, const double *qvalue) const
{
	history.Display(ostr);
	if (qvalue) {
		ostr << "q=" << *qvalue;
	}

	ImmediateReward.Print(": r=", ostr);
	Observation.Print(", o=", ostr);
	ostr << std::endl;

    for (int observation = 0; observation < NumChildren; observation++)
    {
        if (Children[observation])
        {
        	std::stringstream ss;
        	ss << "\t\t\t#" << observation;
//            Children[observation]->GetCumulativeReward().Print(ss.str().c_str(), ostr);
        }
    }

    if (history.Size() >= maxDepth)
        return;

    for (int observation = 0; observation < NumChildren; observation++)
    {
        if (Children[observation])
        {
            history.Back().Observation = observation;
            Children[observation]->DisplayValue(history, maxDepth, ostr);
        }
    }
}
Beispiel #3
0
void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr) const
{
    if (history.Size() >= (uint) maxDepth)
        return;

    for (int action = 0; action < NumChildren; action++)
    {
        history.Add(action,-1);
        Children[action].DisplayValue(history, maxDepth, ostr);
        history.Pop();
    }
}
Beispiel #4
0
void QNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const
{
    history.Display(ostr);
    ostr << ": " << Value.GetValue() << " (" << Value.GetCount() << ")\n";
    if (history.Size() >= (uint) maxDepth)
        return;

    for (int observation = 0; observation < NumChildren; observation++)
    {
        if (Children[observation])
        {
            history.Back().Observation = observation;
            Children[observation]->DisplayPolicy(history, maxDepth, ostr);
        }
    }
}
Beispiel #5
0
void QNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const
{
    history.Display(ostr);

    ImmediateReward.Print("r=", ostr);
	Observation.Print(", o=", ostr);
	ostr << std::endl;

    if (history.Size() >= maxDepth)
        return;

    for (int observation = 0; observation < NumChildren; observation++)
    {
        if (Children[observation])
        {
            history.Back().Observation = observation;
            Children[observation]->DisplayPolicy(history, maxDepth, ostr);
        }
    }
}
Beispiel #6
0
void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr, const std::vector<double> *qvalues) const
{
    if (history.Size() >= maxDepth)
        return;

    for (int action = 0; action < NumChildren; action++)
    {
        history.Add(action);
        const QNODE &qnode = Children[action];

        if (qnode.Applicable()) {
        	ostr << "n=" << qnode.GetCount() << " ";
        	if (qvalues) {
        		qnode.DisplayValue(history, maxDepth, ostr, &(qvalues->at(action)));
        	}
        	else {
        		qnode.DisplayValue(history, maxDepth, ostr);
        	}
        }
        history.Pop();
    }
}
Beispiel #7
0
bool POCMAN::LocalMove(STATE& state, const HISTORY& history,
    int stepObs, const STATUS& status) const
{
    _unused(stepObs);
    _unused(status);

    POCMAN_STATE& pocstate = safe_cast<POCMAN_STATE&>(state);
    
    int numGhosts = Random(1, 3); // Change 1 or 2 ghosts at a time
    for (int i = 0; i < numGhosts; ++i)
    {
        int g = Random(NumGhosts);
        pocstate.GhostPos[g] = COORD(
            Random(Maze.GetXSize()),
            Random(Maze.GetYSize()));
        if (!Passable(pocstate.GhostPos[g]) 
            || pocstate.GhostPos[g] == pocstate.PocmanPos)
            return false;
    }

    COORD smellPos;
    for (smellPos.X = -SmellRange; smellPos.X <= SmellRange; smellPos.X++)
    {
        for (smellPos.Y = -SmellRange; smellPos.Y <= SmellRange; smellPos.Y++)
        {
            COORD pos = pocstate.PocmanPos + smellPos;
            if (smellPos != COORD(0, 0) &&
                Maze.Inside(pos) && 
                CheckFlag(Maze(pos), E_SEED))
                pocstate.Food[Maze.Index(pos)] = Bernoulli(FoodProb * 0.5);
        }
    }

    // Just check the last time-step, don't check for full consistency
    if (history.Size() == 0)
        return true;
    int observation = MakeObservations(pocstate);
    return history.Back().Observation == observation;
}
Beispiel #8
0
void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const
{
    if (history.Size() >= (uint) maxDepth)
        return;

    double bestq = -Infinity;
    int besta = -1;
    for (int action = 0; action < NumChildren; action++)
    {
        if (Children[action].Value.GetValue() > bestq)
        {
            besta = action;
            bestq = Children[action].Value.GetValue();
        }
    }

    if (besta != -1)
    {
        history.Add((uint)besta,0);
        Children[besta].DisplayPolicy(history, maxDepth, ostr);
        history.Pop();
    }
}
Beispiel #9
0
void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const
{
    if (history.Size() >= maxDepth)
        return;

//    double bestq = -Infinity;
    int besta = -1;
    for (int action = 0; action < NumChildren; action++)
    {
//        if (Children[action].Dirichlet.GetValue() > bestq) //XXX
//        {
//            besta = action;
//            bestq = Children[action].Dirichlet.GetValue();
//        }
    }

    if (besta != -1)
    {
        history.Add(besta);
        Children[besta].DisplayPolicy(history, maxDepth, ostr);
        history.Pop();
    }
}
Beispiel #10
0
void ROCKSAMPLE::GeneratePreferred(const STATE& state, const HISTORY& history,
    vector<int>& actions, const STATUS& status) const
{
    _unused(status);

	static const bool UseBlindPolicy = false;

	if (UseBlindPolicy)
	{
		actions.push_back(COORD::E_EAST);
		return;
	}

	const ROCKSAMPLE_STATE& rockstate =
	        safe_cast<const ROCKSAMPLE_STATE&>(state);

	// Sample rocks with more +ve than -ve observations
	int rock = Grid(rockstate.AgentPos);
	if (rock >= 0 && !rockstate.Rocks[rock].Collected)
	{
		int total = 0;
		for (int t = 0; t < history.Size(); ++t)
		{
			if (history[t].Action == rock + 1 + E_SAMPLE)
			{
				if (history[t].Observation == E_GOOD)
					total++;
				if (history[t].Observation == E_BAD)
					total--;
			}
		}
		if (total > 0)
		{
			actions.push_back(E_SAMPLE);
			return;
		}

	}

	// processes the rocks
	bool all_bad = true;
	bool north_interesting = false;
	bool south_interesting = false;
	bool west_interesting  = false;
	bool east_interesting  = false;

	for (int rock = 0; rock < NumRocks; ++rock)
	{
		const ROCKSAMPLE_STATE::ENTRY& entry = rockstate.Rocks[rock];
		if (!entry.Collected)
		{
			int total = 0;
			for (int t = 0; t < history.Size(); ++t)
			{
				if (history[t].Action == rock + 1 + E_SAMPLE)
				{
					if (history[t].Observation == E_GOOD)
						total++;
					if (history[t].Observation == E_BAD)
						total--;
				}
			}

			if (total >= 0)
			{
				all_bad = false;

				if (RockPos[rock].Y > rockstate.AgentPos.Y)
					north_interesting = true;
				if (RockPos[rock].Y < rockstate.AgentPos.Y)
					south_interesting = true;
				if (RockPos[rock].X < rockstate.AgentPos.X)
					west_interesting = true;
				if (RockPos[rock].X > rockstate.AgentPos.X)
					east_interesting = true;
			}
		}
	}

	// if all remaining rocks seem bad, then head east
	if (all_bad)
	{
		actions.push_back(COORD::E_EAST);
		return;
	}

	// generate a random legal move, with the exceptions that:
	//   a) there is no point measuring a rock that is already collected
	//   b) there is no point measuring a rock too often
	//   c) there is no point measuring a rock which is clearly bad or good
	//   d) we never sample a rock (since we need to be sure)
	//   e) we never move in a direction that doesn't take us closer to
	//      either the edge of the map or an interesting rock
	if (rockstate.AgentPos.Y + 1 < Size && north_interesting)
			actions.push_back(COORD::E_NORTH);

	if (east_interesting)
		actions.push_back(COORD::E_EAST);

	if (rockstate.AgentPos.Y - 1 >= 0 && south_interesting)
		actions.push_back(COORD::E_SOUTH);

	if (rockstate.AgentPos.X - 1 >= 0 && west_interesting)
		actions.push_back(COORD::E_WEST);


	for (rock = 0; rock < NumRocks; ++rock)
	{
		if (!rockstate.Rocks[rock].Collected    &&
			rockstate.Rocks[rock].ProbValuable != 0.0 &&
			rockstate.Rocks[rock].ProbValuable != 1.0 &&
			rockstate.Rocks[rock].Measured < 5  &&
			std::abs(rockstate.Rocks[rock].Count) < 2)
		{
			actions.push_back(rock + 1 + E_SAMPLE);
		}
	}
}