void PacMan::performAction(action_t action) { timestep++; int newX, newY; m_reward = 200; if (action == 0) {//Go up newX = pacmanX; newY = pacmanY - 1; } else if (action == 1) { //Go right newX = pacmanX + 1; newY = pacmanY; } else if (action == 2) { //Go down newX = pacmanX; newY = pacmanY + 1; } else if (action == 3) { // Go left newX = pacmanX - 1; newY = pacmanY; } movePacmanAndUpdateReward(newX, newY); moveGhostAndUpdateReward(&aGhostX, &aGhostY, &sniffA, &aGhostCovering, 'A'); moveGhostAndUpdateReward(&bGhostX, &bGhostY, &sniffB, &bGhostCovering, 'B'); moveGhostAndUpdateReward(&cGhostX, &cGhostY, &sniffC, &cGhostCovering, 'C'); moveGhostAndUpdateReward(&dGhostX, &dGhostY, &sniffD, &dGhostCovering, 'D'); updateObservation(); m_action = action; if (reset) { resetEpisode(); } //printMap(); }
void HotPlateTest::runTest(bool useContinuousInputs, const std::string& baseFilename) { DataFile dataFile(mNumTrialsPerRun); for (unsigned int run = 0; run < mNumRuns; ++run) { verve::AgentDescriptor desc; if (useContinuousInputs) { desc.addContinuousSensor(); // Robot position. // If we're using dynamic RBFs, we need to keep the resolution // smaller than the actual discrete grid world's resolution. // Otherwise, we'll get a sparse set of RBFs with tiny receptive // fields. (This is sort of a special case; normally we would // not use continuous sensors in a discrete world.) desc.setContinuousSensorResolution(10); } else { desc.addDiscreteSensor(20); // Robot position. } desc.setNumOutputs(3); verve::Agent a(desc); verve::Observation obs; obs.init(a); a.setTDLearningRate((verve::real)0.1, 10); for (unsigned int trial = 0; trial < mNumTrialsPerRun; ++trial) { a.resetShortTermMemory(); mWorld.randomizeRobotPosition(); unsigned int stepCount = 0; while (!isRobotAtGoal() && stepCount < mMaxStepsPerTrial) { updateObservation(obs, useContinuousInputs); // Update the Agent. unsigned int action = a.update(computeReward(), obs, (verve::real)0.1); switch(action) { case 0: mWorld.moveRobotLeft(); break; case 1: mWorld.moveRobotRight(); break; case 2: // Don't move. break; default: assert(false); break; } ++stepCount; } // If the Agent has actually reached the goal (and did not // simply run out of time), this will reward it. updateObservation(obs, useContinuousInputs); a.update(computeReward(), obs, (verve::real)0.1); dataFile.storeData("trial", trial, (float)trial); dataFile.storeData("steps to goal", trial, (float)stepCount); //// Print value function data. //if (0 == run && // (trial == 0 || trial == 4 || trial == 9 || trial == 29)) //{ // char fileStr[1024]; // sprintf(fileStr, "%s-trial%d-value.dat", // baseFilename.c_str(), trial); // a.saveValueData(400, fileStr); //} } if (run % 50 == 0) { printRunStatus(run); } //if (0 == run) //{ // a.saveValueData(400, baseFilename + "-valueData.dat"); // a.saveStateRBFData(baseFilename + "-stateRBFData.dat"); //} } dataFile.printToFile(baseFilename + "-performance.dat"); }
void CuriosityTest::runTest(bool useContinuousInputs, const std::string& baseFilename) { DataFile dataFile(mNumTrialsPerRun); for (unsigned int run = 0; run < mNumRuns; ++run) { verve::AgentDescriptor desc; desc.setArchitecture(verve::CURIOUS_MODEL_RL); desc.setMaxNumPlanningSteps(50); if (useContinuousInputs) { desc.addContinuousSensor(); // Robot x position. desc.addContinuousSensor(); // Robot y position. // If we're using dynamic RBFs, we need to keep the resolution // smaller than the actual discrete grid world's resolution. // Otherwise, we'll get a sparse set of RBFs with tiny receptive // fields. (This is sort of a special case; normally we would // not use continuous sensors in a discrete world.) desc.setContinuousSensorResolution(15); } else { desc.addDiscreteSensor(mWorld.getGridXSize()); // Robot x position. desc.addDiscreteSensor(mWorld.getGridYSize()); // Robot y position. } desc.setNumOutputs(5); verve::Agent a(desc); verve::Observation obs; obs.init(a); a.setModelLearningRate((verve::real)0.0); a.setTDLearningRate((verve::real)0.1, (verve::real)2.0); for (unsigned int trial = 0; trial < mNumTrialsPerRun; ++trial) { a.resetShortTermMemory(); mWorld.setRobotPosition(mRobotStartPosition[0], mRobotStartPosition[1]); unsigned int stepCount = 0; verve::real rewardSum = 0; while (stepCount < mNumStepsPerTrial) { updateObservation(obs, useContinuousInputs); // Update the Agent. unsigned int action = a.update(computeReward(), obs, (verve::real)0.1); switch(action) { case 0: mWorld.moveRobotLeft(); break; case 1: mWorld.moveRobotRight(); break; case 2: mWorld.moveRobotUp(); break; case 3: mWorld.moveRobotDown(); break; case 4: // Don't move. break; default: assert(false); break; } ++stepCount; rewardSum += computeReward(); } // If the Agent has actually reached the goal (and did not // simply run out of time), this will reward it. updateObservation(obs, useContinuousInputs); a.update(computeReward(), obs, (verve::real)0.1); dataFile.storeData("trial", trial, (float)trial); dataFile.storeData("reward sum", trial, rewardSum); // Print value function data. if (0 == run && (trial == 1 || trial == 9 || trial == 49 || trial == 79)) { char fileStr[1024]; sprintf(fileStr, "%s-trial%d-value.dat", baseFilename.c_str(), trial); a.saveValueData(400, fileStr); } if (trial % 5 == 0) { printTrialAndRunStatus(run, trial, rewardSum); } } } dataFile.printToFile(baseFilename + "-performance.dat"); }