Esempio n. 1
0
void Task::randomize()
{
    opponent = rand() % getAIDeckCount() + 1;
    opponentName = "";
    setExpiration((rand() % 3) + 1);
    reward = computeReward();
}
Esempio n. 2
0
void handlePhysicsPostStepEvent()
{
	// At the end of each physics step, update the robot.
	verve::real reward = computeReward();
	//mAvgRewardPerStep += reward;
	gRobot->update(reward, gPhysicsStepSize);
	//mCurrentTrialTime += gPhysicsStepSize;
}
Esempio n. 3
0
int Task::getReward()
{
    if (reward == 0)
    {
        reward = computeReward();
    }

    return reward;
}
Esempio n. 4
0
void PendulumTest::handlePostStepEvent()
{
	// If the trial is not finished, update the Pendulum.
	if (mCurrentTrialTime < mTrialLength)
	{
		verve::real reward = computeReward();
		mAvgRewardPerStep += reward;
		mPendulum->update(reward, mPhysicsStepSize);
		mCurrentTrialTime += mPhysicsStepSize;
	}
}
Esempio n. 5
0
TTTState::TTTState(const TTTState& prev,const a_ptr& a)
:TTTState(prev){
	//assert(board[a->ID()] == EMPTY);
	board[a->ID()] = turn;
	board.rehash(); //recalc hash
	turn = (turn==O)?X:O;
	computeReward();
	auto aFind = [](const std::vector<a_ptr>& n, const a_ptr& a){
		for(auto i = n.begin(); i != n.end(); ++i){
			if(**i==*a)
				return i;
		}
		return n.end();
	};
	//assert(aFind(prev._next,a) != prev._next.end());
	//assert(aFind(_next,a) != _next.end());
	_next.erase(aFind(_next,a));
	if(_reward != 0.0 || _next.size() == 0)
		done = true;
	_ID = board.ID();
	_hash = board.hash();
	if(done)
		_next.clear();
}
Esempio n. 6
0
void HotPlateTest::runTest(bool useContinuousInputs, 
	const std::string& baseFilename)
{
	DataFile dataFile(mNumTrialsPerRun);

	for (unsigned int run = 0; run < mNumRuns; ++run)
	{
		verve::AgentDescriptor desc;

		if (useContinuousInputs)
		{
			desc.addContinuousSensor(); // Robot position.

			// If we're using dynamic RBFs, we need to keep the resolution 
			// smaller than the actual discrete grid world's resolution.  
			// Otherwise, we'll get a sparse set of RBFs with tiny receptive 
			// fields.  (This is sort of a special case; normally we would 
			// not use continuous sensors in a discrete world.)
			desc.setContinuousSensorResolution(10);
		}
		else
		{
			desc.addDiscreteSensor(20); // Robot position.
		}

		desc.setNumOutputs(3);
		verve::Agent a(desc);
		verve::Observation obs;
		obs.init(a);
		a.setTDLearningRate((verve::real)0.1, 10);

		for (unsigned int trial = 0; trial < mNumTrialsPerRun; ++trial)
		{
			a.resetShortTermMemory();
			mWorld.randomizeRobotPosition();
			unsigned int stepCount = 0;

			while (!isRobotAtGoal() && stepCount < mMaxStepsPerTrial)
			{
				updateObservation(obs, useContinuousInputs);

				// Update the Agent.
				unsigned int action = a.update(computeReward(), obs, 
					(verve::real)0.1);

				switch(action)
				{
					case 0:
						mWorld.moveRobotLeft();
						break;
					case 1:
						mWorld.moveRobotRight();
						break;
					case 2:
						// Don't move.
						break;
					default:
						assert(false);
						break;
				}

				++stepCount;
			}

			// If the Agent has actually reached the goal (and did not 
			// simply run out of time), this will reward it.
			updateObservation(obs, useContinuousInputs);
			a.update(computeReward(), obs, (verve::real)0.1);

			dataFile.storeData("trial", trial, (float)trial);
			dataFile.storeData("steps to goal", trial, (float)stepCount);

			//// Print value function data.
			//if (0 == run && 
			//	(trial == 0 || trial == 4 || trial == 9 || trial == 29))
			//{
			//	char fileStr[1024];
			//	sprintf(fileStr, "%s-trial%d-value.dat", 
			//		baseFilename.c_str(), trial);
			//	a.saveValueData(400, fileStr);
			//}
		}

		if (run % 50 == 0)
		{
			printRunStatus(run);
		}

		//if (0 == run)
		//{
		//	a.saveValueData(400, baseFilename + "-valueData.dat");
		//	a.saveStateRBFData(baseFilename + "-stateRBFData.dat");
		//}
	}

	dataFile.printToFile(baseFilename + "-performance.dat");
}
Esempio n. 7
0
void CuriosityTest::runTest(bool useContinuousInputs, 
	const std::string& baseFilename)
{
	DataFile dataFile(mNumTrialsPerRun);

	for (unsigned int run = 0; run < mNumRuns; ++run)
	{
		verve::AgentDescriptor desc;
		desc.setArchitecture(verve::CURIOUS_MODEL_RL);
		desc.setMaxNumPlanningSteps(50);

		if (useContinuousInputs)
		{
			desc.addContinuousSensor(); // Robot x position.
			desc.addContinuousSensor(); // Robot y position.

			// If we're using dynamic RBFs, we need to keep the resolution 
			// smaller than the actual discrete grid world's resolution.  
			// Otherwise, we'll get a sparse set of RBFs with tiny receptive 
			// fields.  (This is sort of a special case; normally we would 
			// not use continuous sensors in a discrete world.)
			desc.setContinuousSensorResolution(15);
		}
		else
		{
			desc.addDiscreteSensor(mWorld.getGridXSize()); // Robot x position.
			desc.addDiscreteSensor(mWorld.getGridYSize()); // Robot y position.
		}

		desc.setNumOutputs(5);
		verve::Agent a(desc);
		verve::Observation obs;
		obs.init(a);
		a.setModelLearningRate((verve::real)0.0);
		a.setTDLearningRate((verve::real)0.1, (verve::real)2.0);

		for (unsigned int trial = 0; trial < mNumTrialsPerRun; ++trial)
		{
			a.resetShortTermMemory();
			mWorld.setRobotPosition(mRobotStartPosition[0], 
				mRobotStartPosition[1]);
			unsigned int stepCount = 0;
			verve::real rewardSum = 0;

			while (stepCount < mNumStepsPerTrial)
			{
				updateObservation(obs, useContinuousInputs);

				// Update the Agent.
				unsigned int action = a.update(computeReward(), obs, 
					(verve::real)0.1);

				switch(action)
				{
					case 0:
						mWorld.moveRobotLeft();
						break;
					case 1:
						mWorld.moveRobotRight();
						break;
					case 2:
						mWorld.moveRobotUp();
						break;
					case 3:
						mWorld.moveRobotDown();
						break;
					case 4:
						// Don't move.
						break;
					default:
						assert(false);
						break;
				}

				++stepCount;
				rewardSum += computeReward();
			}

			// If the Agent has actually reached the goal (and did not 
			// simply run out of time), this will reward it.
			updateObservation(obs, useContinuousInputs);
			a.update(computeReward(), obs, (verve::real)0.1);

			dataFile.storeData("trial", trial, (float)trial);
			dataFile.storeData("reward sum", trial, rewardSum);

			// Print value function data.
			if (0 == run && 
				(trial == 1 || trial == 9 || trial == 49 || trial == 79))
			{
				char fileStr[1024];
				sprintf(fileStr, "%s-trial%d-value.dat", 
					baseFilename.c_str(), trial);
				a.saveValueData(400, fileStr);
			}

			if (trial % 5 == 0)
			{
				printTrialAndRunStatus(run, trial, rewardSum);
			}
		}
	}

	dataFile.printToFile(baseFilename + "-performance.dat");
}