int main(int argc, char** argv) { if (argc < 2) { std::cerr << "Usage: " << argv[0] << " rom_file" << std::endl; return 1; } ALEInterface ale; // Get & Set the desired settings ale.setInt("random_seed", 123); //The default is already 0.25, this is just an example ale.setFloat("repeat_action_probability", 0.25); #ifdef __USE_SDL ale.setBool("display_screen", true); ale.setBool("sound", true); #endif // Load the ROM file. (Also resets the system for new settings to // take effect.) ale.loadROM(argv[1]); // Get the vector of legal actions ActionVect legal_actions = ale.getLegalActionSet(); // Play 10 episodes for (int episode=0; episode<1; episode++) { float totalReward = 0; while (!ale.game_over()) { /* const ALEScreen screen = ale.getScreen(); float row_sum = 0, column_sum = 0, tot = 0.0; for (int i = 0; i < screen.height(); ++i) { for (int j = 0; j < screen.width(); ++j) { pixel_t tmp = screen.get(i, j); if(tmp >= 41 && tmp <= 43) { row_sum += i; column_sum += j; tot++; } } } cout << (row_sum/tot) << "," << (column_sum/tot) << endl; */ Action a = legal_actions[rand() % legal_actions.size()]; // Apply the action and get the resulting reward float reward = ale.act(a); totalReward += reward; } cout << "Episode " << episode << " ended with score: " << totalReward << endl; ale.reset_game(); } return 0; }
int main(int argc, char** argv) { if (argc < 2) { std::cout << "Usage: " << argv[0] << " rom_file" << std::endl; return 1; } ALEInterface ale; // Get & Set the desired settings ale.setInt("random_seed", 123); // We enable both screen and sound, which we will need for recording. ale.setBool("display_screen", true); // You may leave sound disabled (by setting this flag to false) if so desired. ale.setBool("sound", true); std::string recordPath = "record"; std::cout << std::endl; // Set record flags ale.setString("record_screen_dir", recordPath.c_str()); ale.setString("record_sound_filename", (recordPath + "/sound.wav").c_str()); // We set fragsize to 64 to ensure proper sound sync ale.setInt("fragsize", 64); // Not completely portable, but will work in most cases std::string cmd = "mkdir "; cmd += recordPath; system(cmd.c_str()); // Load the ROM file. (Also resets the system for new settings to // take effect.) ale.loadROM(argv[1]); // Get the vector of legal actions ActionVect legal_actions = ale.getLegalActionSet(); // Play a single episode, which we record. while (!ale.game_over()) { Action a = legal_actions[rand() % legal_actions.size()]; // Apply the action (discard the resulting reward) ale.act(a); } std::cout << std::endl; std::cout << "Recording complete. To create a video, you may want to run \n" " doc/scripts/videoRecordingExampleJoinXXX.sh. See manual for details.." << std::endl; return 0; }
Learner::Learner(ALEInterface& ale, Parameters *param) : bproFeatures(param->gameName){ delta = 0.0; cumReward = 0; prevCumReward = 0; cumIntrReward = 0; prevCumIntrReward = 0; maxFeatVectorNorm = 1; pathToSaveLearnedWeights = param->outputPath; for(int i = 0; i < (ramFeatures.getNumberOfFeatures() - 1) * 2; i++){ transitions.push_back(0); } actions = ale.getLegalActionSet(); numOptions = param->numOptions; numBasicActions = actions.size(); numTotalActions = numBasicActions + numOptions; //Reading file containing the vector that describes the reward for the option learning //The first X positions encode the transition 0->1 and the other X encode 1->0. pathToRewardDescription = param->eigVectorPath; std::ifstream infile1(pathToRewardDescription.c_str()); float value; while(infile1 >> value){ eigVector.push_back(value); } pathToStatsDescription = param->statEigVectorPath; std::ifstream infile2((pathToStatsDescription + "_mean.out").c_str()); while(infile2 >> value){ mean.push_back(value); } std::ifstream infile3((pathToStatsDescription + "_std.out").c_str()); while(infile3 >> value){ std.push_back(value); } numFeatures = bproFeatures.getNumberOfFeatures(); for(int i = 0; i < numTotalActions; i++){ //Initialize Q; Q.push_back(0); Qnext.push_back(0); //Initialize e: e.push_back(vector<float>(numFeatures, 0.0)); w.push_back(vector<float>(numFeatures, 0.0)); nonZeroElig.push_back(vector<int>()); } }
int main(int argc, char** argv) { if (argc < 2) { std::cerr << "Usage: " << argv[0] << " rom_file" << std::endl; return 1; } ALEInterface ale; // Get & Set the desired settings ale.setInt("random_seed", 123); //The default is already 0.25, this is just an example ale.setFloat("repeat_action_probability", 0.25); #ifdef __USE_SDL ale.setBool("display_screen", true); ale.setBool("sound", true); #endif // Load the ROM file. (Also resets the system for new settings to // take effect.) ale.loadROM(argv[1]); // Get the vector of legal actions ActionVect legal_actions = ale.getLegalActionSet(); // Play 10 episodes for (int episode=0; episode<10; episode++) { float totalReward = 0; while (!ale.game_over()) { Action a = legal_actions[rand() % legal_actions.size()]; // Apply the action and get the resulting reward float reward = ale.act(a); totalReward += reward; if (reward != 0){ cout<<"totalReward: "<<totalReward<<endl; } } cout << "Episode " << episode << " ended with score: " << totalReward << endl; ale.reset_game(); } return 0; }
RLLearner::RLLearner(ALEInterface& ale, Parameters *param){ frame = 0; randomActionTaken = 0; gamma = param->getGamma(); epsilon = param->getEpsilon(); toUseOnlyRewardSign = param->getUseRewardSign(); numStepsPerAction = param->getNumStepsPerAction(); toBeOptimistic = param->getOptimisticInitialization(); episodeLength = param->getEpisodeLength(); numEpisodesEval = param->getNumEpisodesEval(); numEpisodesLearn = param->getNumEpisodesLearn(); //Get the number of effective actions: if(param->isMinimalAction()){ actions = ale.getMinimalActionSet(); } else{ actions = ale.getLegalActionSet(); } numActions = actions.size(); }