void Task::randomize() { opponent = rand() % getAIDeckCount() + 1; opponentName = ""; setExpiration((rand() % 3) + 1); reward = computeReward(); }
void handlePhysicsPostStepEvent() { // At the end of each physics step, update the robot. verve::real reward = computeReward(); //mAvgRewardPerStep += reward; gRobot->update(reward, gPhysicsStepSize); //mCurrentTrialTime += gPhysicsStepSize; }
int Task::getReward() { if (reward == 0) { reward = computeReward(); } return reward; }
void PendulumTest::handlePostStepEvent() { // If the trial is not finished, update the Pendulum. if (mCurrentTrialTime < mTrialLength) { verve::real reward = computeReward(); mAvgRewardPerStep += reward; mPendulum->update(reward, mPhysicsStepSize); mCurrentTrialTime += mPhysicsStepSize; } }
TTTState::TTTState(const TTTState& prev,const a_ptr& a) :TTTState(prev){ //assert(board[a->ID()] == EMPTY); board[a->ID()] = turn; board.rehash(); //recalc hash turn = (turn==O)?X:O; computeReward(); auto aFind = [](const std::vector<a_ptr>& n, const a_ptr& a){ for(auto i = n.begin(); i != n.end(); ++i){ if(**i==*a) return i; } return n.end(); }; //assert(aFind(prev._next,a) != prev._next.end()); //assert(aFind(_next,a) != _next.end()); _next.erase(aFind(_next,a)); if(_reward != 0.0 || _next.size() == 0) done = true; _ID = board.ID(); _hash = board.hash(); if(done) _next.clear(); }
void HotPlateTest::runTest(bool useContinuousInputs, const std::string& baseFilename) { DataFile dataFile(mNumTrialsPerRun); for (unsigned int run = 0; run < mNumRuns; ++run) { verve::AgentDescriptor desc; if (useContinuousInputs) { desc.addContinuousSensor(); // Robot position. // If we're using dynamic RBFs, we need to keep the resolution // smaller than the actual discrete grid world's resolution. // Otherwise, we'll get a sparse set of RBFs with tiny receptive // fields. (This is sort of a special case; normally we would // not use continuous sensors in a discrete world.) desc.setContinuousSensorResolution(10); } else { desc.addDiscreteSensor(20); // Robot position. } desc.setNumOutputs(3); verve::Agent a(desc); verve::Observation obs; obs.init(a); a.setTDLearningRate((verve::real)0.1, 10); for (unsigned int trial = 0; trial < mNumTrialsPerRun; ++trial) { a.resetShortTermMemory(); mWorld.randomizeRobotPosition(); unsigned int stepCount = 0; while (!isRobotAtGoal() && stepCount < mMaxStepsPerTrial) { updateObservation(obs, useContinuousInputs); // Update the Agent. unsigned int action = a.update(computeReward(), obs, (verve::real)0.1); switch(action) { case 0: mWorld.moveRobotLeft(); break; case 1: mWorld.moveRobotRight(); break; case 2: // Don't move. break; default: assert(false); break; } ++stepCount; } // If the Agent has actually reached the goal (and did not // simply run out of time), this will reward it. updateObservation(obs, useContinuousInputs); a.update(computeReward(), obs, (verve::real)0.1); dataFile.storeData("trial", trial, (float)trial); dataFile.storeData("steps to goal", trial, (float)stepCount); //// Print value function data. //if (0 == run && // (trial == 0 || trial == 4 || trial == 9 || trial == 29)) //{ // char fileStr[1024]; // sprintf(fileStr, "%s-trial%d-value.dat", // baseFilename.c_str(), trial); // a.saveValueData(400, fileStr); //} } if (run % 50 == 0) { printRunStatus(run); } //if (0 == run) //{ // a.saveValueData(400, baseFilename + "-valueData.dat"); // a.saveStateRBFData(baseFilename + "-stateRBFData.dat"); //} } dataFile.printToFile(baseFilename + "-performance.dat"); }
void CuriosityTest::runTest(bool useContinuousInputs, const std::string& baseFilename) { DataFile dataFile(mNumTrialsPerRun); for (unsigned int run = 0; run < mNumRuns; ++run) { verve::AgentDescriptor desc; desc.setArchitecture(verve::CURIOUS_MODEL_RL); desc.setMaxNumPlanningSteps(50); if (useContinuousInputs) { desc.addContinuousSensor(); // Robot x position. desc.addContinuousSensor(); // Robot y position. // If we're using dynamic RBFs, we need to keep the resolution // smaller than the actual discrete grid world's resolution. // Otherwise, we'll get a sparse set of RBFs with tiny receptive // fields. (This is sort of a special case; normally we would // not use continuous sensors in a discrete world.) desc.setContinuousSensorResolution(15); } else { desc.addDiscreteSensor(mWorld.getGridXSize()); // Robot x position. desc.addDiscreteSensor(mWorld.getGridYSize()); // Robot y position. } desc.setNumOutputs(5); verve::Agent a(desc); verve::Observation obs; obs.init(a); a.setModelLearningRate((verve::real)0.0); a.setTDLearningRate((verve::real)0.1, (verve::real)2.0); for (unsigned int trial = 0; trial < mNumTrialsPerRun; ++trial) { a.resetShortTermMemory(); mWorld.setRobotPosition(mRobotStartPosition[0], mRobotStartPosition[1]); unsigned int stepCount = 0; verve::real rewardSum = 0; while (stepCount < mNumStepsPerTrial) { updateObservation(obs, useContinuousInputs); // Update the Agent. unsigned int action = a.update(computeReward(), obs, (verve::real)0.1); switch(action) { case 0: mWorld.moveRobotLeft(); break; case 1: mWorld.moveRobotRight(); break; case 2: mWorld.moveRobotUp(); break; case 3: mWorld.moveRobotDown(); break; case 4: // Don't move. break; default: assert(false); break; } ++stepCount; rewardSum += computeReward(); } // If the Agent has actually reached the goal (and did not // simply run out of time), this will reward it. updateObservation(obs, useContinuousInputs); a.update(computeReward(), obs, (verve::real)0.1); dataFile.storeData("trial", trial, (float)trial); dataFile.storeData("reward sum", trial, rewardSum); // Print value function data. if (0 == run && (trial == 1 || trial == 9 || trial == 49 || trial == 79)) { char fileStr[1024]; sprintf(fileStr, "%s-trial%d-value.dat", baseFilename.c_str(), trial); a.saveValueData(400, fileStr); } if (trial % 5 == 0) { printTrialAndRunStatus(run, trial, rewardSum); } } } dataFile.printToFile(baseFilename + "-performance.dat"); }