Пример #1
0
void PacMan::performAction(action_t action) {
	timestep++;

	int newX, newY;

	m_reward = 200;

	if (action == 0) {//Go up
		newX = pacmanX;
		newY = pacmanY - 1;
	} else if (action == 1) { //Go right
		newX = pacmanX + 1;
		newY = pacmanY;
	} else if (action == 2) { //Go down
		newX = pacmanX;
		newY = pacmanY + 1;
	} else if (action == 3) { // Go left
		newX = pacmanX - 1;
		newY = pacmanY;
	}

	movePacmanAndUpdateReward(newX, newY);

	moveGhostAndUpdateReward(&aGhostX, &aGhostY, &sniffA, &aGhostCovering, 'A');
	moveGhostAndUpdateReward(&bGhostX, &bGhostY, &sniffB, &bGhostCovering, 'B');
	moveGhostAndUpdateReward(&cGhostX, &cGhostY, &sniffC, &cGhostCovering, 'C');
	moveGhostAndUpdateReward(&dGhostX, &dGhostY, &sniffD, &dGhostCovering, 'D');

	updateObservation();
	m_action = action;

	if (reset) {
		resetEpisode();
	}

	//printMap();
}
Пример #2
0
void HotPlateTest::runTest(bool useContinuousInputs, 
	const std::string& baseFilename)
{
	DataFile dataFile(mNumTrialsPerRun);

	for (unsigned int run = 0; run < mNumRuns; ++run)
	{
		verve::AgentDescriptor desc;

		if (useContinuousInputs)
		{
			desc.addContinuousSensor(); // Robot position.

			// If we're using dynamic RBFs, we need to keep the resolution 
			// smaller than the actual discrete grid world's resolution.  
			// Otherwise, we'll get a sparse set of RBFs with tiny receptive 
			// fields.  (This is sort of a special case; normally we would 
			// not use continuous sensors in a discrete world.)
			desc.setContinuousSensorResolution(10);
		}
		else
		{
			desc.addDiscreteSensor(20); // Robot position.
		}

		desc.setNumOutputs(3);
		verve::Agent a(desc);
		verve::Observation obs;
		obs.init(a);
		a.setTDLearningRate((verve::real)0.1, 10);

		for (unsigned int trial = 0; trial < mNumTrialsPerRun; ++trial)
		{
			a.resetShortTermMemory();
			mWorld.randomizeRobotPosition();
			unsigned int stepCount = 0;

			while (!isRobotAtGoal() && stepCount < mMaxStepsPerTrial)
			{
				updateObservation(obs, useContinuousInputs);

				// Update the Agent.
				unsigned int action = a.update(computeReward(), obs, 
					(verve::real)0.1);

				switch(action)
				{
					case 0:
						mWorld.moveRobotLeft();
						break;
					case 1:
						mWorld.moveRobotRight();
						break;
					case 2:
						// Don't move.
						break;
					default:
						assert(false);
						break;
				}

				++stepCount;
			}

			// If the Agent has actually reached the goal (and did not 
			// simply run out of time), this will reward it.
			updateObservation(obs, useContinuousInputs);
			a.update(computeReward(), obs, (verve::real)0.1);

			dataFile.storeData("trial", trial, (float)trial);
			dataFile.storeData("steps to goal", trial, (float)stepCount);

			//// Print value function data.
			//if (0 == run && 
			//	(trial == 0 || trial == 4 || trial == 9 || trial == 29))
			//{
			//	char fileStr[1024];
			//	sprintf(fileStr, "%s-trial%d-value.dat", 
			//		baseFilename.c_str(), trial);
			//	a.saveValueData(400, fileStr);
			//}
		}

		if (run % 50 == 0)
		{
			printRunStatus(run);
		}

		//if (0 == run)
		//{
		//	a.saveValueData(400, baseFilename + "-valueData.dat");
		//	a.saveStateRBFData(baseFilename + "-stateRBFData.dat");
		//}
	}

	dataFile.printToFile(baseFilename + "-performance.dat");
}
Пример #3
0
void CuriosityTest::runTest(bool useContinuousInputs, 
	const std::string& baseFilename)
{
	DataFile dataFile(mNumTrialsPerRun);

	for (unsigned int run = 0; run < mNumRuns; ++run)
	{
		verve::AgentDescriptor desc;
		desc.setArchitecture(verve::CURIOUS_MODEL_RL);
		desc.setMaxNumPlanningSteps(50);

		if (useContinuousInputs)
		{
			desc.addContinuousSensor(); // Robot x position.
			desc.addContinuousSensor(); // Robot y position.

			// If we're using dynamic RBFs, we need to keep the resolution 
			// smaller than the actual discrete grid world's resolution.  
			// Otherwise, we'll get a sparse set of RBFs with tiny receptive 
			// fields.  (This is sort of a special case; normally we would 
			// not use continuous sensors in a discrete world.)
			desc.setContinuousSensorResolution(15);
		}
		else
		{
			desc.addDiscreteSensor(mWorld.getGridXSize()); // Robot x position.
			desc.addDiscreteSensor(mWorld.getGridYSize()); // Robot y position.
		}

		desc.setNumOutputs(5);
		verve::Agent a(desc);
		verve::Observation obs;
		obs.init(a);
		a.setModelLearningRate((verve::real)0.0);
		a.setTDLearningRate((verve::real)0.1, (verve::real)2.0);

		for (unsigned int trial = 0; trial < mNumTrialsPerRun; ++trial)
		{
			a.resetShortTermMemory();
			mWorld.setRobotPosition(mRobotStartPosition[0], 
				mRobotStartPosition[1]);
			unsigned int stepCount = 0;
			verve::real rewardSum = 0;

			while (stepCount < mNumStepsPerTrial)
			{
				updateObservation(obs, useContinuousInputs);

				// Update the Agent.
				unsigned int action = a.update(computeReward(), obs, 
					(verve::real)0.1);

				switch(action)
				{
					case 0:
						mWorld.moveRobotLeft();
						break;
					case 1:
						mWorld.moveRobotRight();
						break;
					case 2:
						mWorld.moveRobotUp();
						break;
					case 3:
						mWorld.moveRobotDown();
						break;
					case 4:
						// Don't move.
						break;
					default:
						assert(false);
						break;
				}

				++stepCount;
				rewardSum += computeReward();
			}

			// If the Agent has actually reached the goal (and did not 
			// simply run out of time), this will reward it.
			updateObservation(obs, useContinuousInputs);
			a.update(computeReward(), obs, (verve::real)0.1);

			dataFile.storeData("trial", trial, (float)trial);
			dataFile.storeData("reward sum", trial, rewardSum);

			// Print value function data.
			if (0 == run && 
				(trial == 1 || trial == 9 || trial == 49 || trial == 79))
			{
				char fileStr[1024];
				sprintf(fileStr, "%s-trial%d-value.dat", 
					baseFilename.c_str(), trial);
				a.saveValueData(400, fileStr);
			}

			if (trial % 5 == 0)
			{
				printTrialAndRunStatus(run, trial, rewardSum);
			}
		}
	}

	dataFile.printToFile(baseFilename + "-performance.dat");
}