void ALEState::reset(int numResetSteps) { // reset the rom m_settings->reset(); // Reset the paddles default_paddles(); // Reset the emulator m_osystem->console().system().reset(); // NOOP for 60 steps for (int i = 0; i < 60; i++) { apply_action(PLAYER_A_NOOP, PLAYER_B_NOOP); simulate(); // Don't count these frames frame_number--; } // reset for n steps for (int i = 0; i < numResetSteps; i++) { apply_action(RESET, PLAYER_B_NOOP); simulate(); // Don't count these frames frame_number--; } // Apply necessary actions specified by the rom itself ActionVect startingActions = m_settings->getStartingActions(); for (int i = 0; i < startingActions.size(); i++) { apply_action(startingActions[i], PLAYER_B_NOOP); simulate(); frame_number--; } }
/** Resets the system to its start state. */ void StellaEnvironment::reset() { // RNG for generating environments Random randGen; // Reset the paddles m_state.resetVariables(m_osystem->event()); // Reset the emulator m_osystem->console().system().reset(); // NOOP for 60 steps in the deterministic environment setting, or some random amount otherwise int noopSteps; if (m_stochastic_start) noopSteps = 60 + rand() % NUM_RANDOM_ENVIRONMENTS; else noopSteps = 60; emulate(PLAYER_A_NOOP, PLAYER_B_NOOP, noopSteps); // reset for n steps emulate(RESET, PLAYER_B_NOOP, m_num_reset_steps); // reset the rom (after emulating, in case the NOOPs led to reward) m_settings->reset(); // Apply necessary actions specified by the rom itself if (m_use_starting_actions) { ActionVect startingActions = m_settings->getStartingActions(); for (size_t i = 0; i < startingActions.size(); i++) emulate(startingActions[i], PLAYER_B_NOOP); } }
/** Resets the system to its start state. */ void StellaEnvironment::reset() { m_state.resetEpisodeFrameNumber(); // Reset the paddles m_state.resetPaddles(m_osystem->event()); // Reset the emulator m_osystem->console().system().reset(); // NOOP for 60 steps in the deterministic environment setting, or some random amount otherwise int noopSteps; noopSteps = 60; emulate(PLAYER_A_NOOP, PLAYER_B_NOOP, noopSteps); // reset for n steps emulate(RESET, PLAYER_B_NOOP, m_num_reset_steps); // reset the rom (after emulating, in case the NOOPs led to reward) m_settings->reset(); // Apply necessary actions specified by the rom itself ActionVect startingActions = m_settings->getStartingActions(); for (size_t i = 0; i < startingActions.size(); i++) { emulate(startingActions[i], PLAYER_B_NOOP); } }
double RandomAgent::evaluatePolicy(Environment<bool>& env){ int reward = 0; int totalReward = 0; int cumulativeReward = 0; int numActions; ActionVect actions; //Check if one wants to sample from all possible actions or only the valid ones: if(useMinActions){ actions = env.getMinimalActionSet(); } else{ actions = env.getLegalActionSet(); } numActions = actions.size(); printf("Number of Actions: %d\n\n", numActions); //Repeat (for each episode): for(int episode = 0; episode < numEpisodesToEval; episode++){ int step = 0; while(!env.game_over() && step < maxStepsInEpisode) { reward = env.act(actions[rand()%numActions]); cumulativeReward += reward; step++; } printf("Episode %d, Cumulative Reward: %d\n", episode + 1, cumulativeReward); totalReward += cumulativeReward; cumulativeReward = 0; env.reset_game(); //Start the game again when the episode is over } return double(totalReward)/numEpisodesToEval; }
int main(int argc, char** argv){ ALEInterface ale(1); if(argc != 4){ printf("Usage: %s rom_file path_to_save_bits seed\n", argv[0]); exit(1); } int seed = atoi(argv[3]); ale.setInt("frame_skip", 5); ale.setInt("random_seed", seed); ale.setInt("max_num_frames_per_episode", 18000); ale.setFloat("repeat_action_prob", 0.00); ale.loadROM(argv[1]); string outputFile = argv[2]; srand(seed); ActionVect actions = ale.getLegalActionSet(); RAMFeatures features; vector<bool> F; ofstream outFile; outFile.open(outputFile); int reward = 0; F.clear(); features.getCompleteFeatureVector(ale.getRAM(), F); for(int i = 0; i < F.size(); i++){ outFile << F[i] << ","; } outFile << endl; while(!ale.game_over()) { reward += ale.act(actions[rand() % actions.size()]); F.clear(); features.getCompleteFeatureVector(ale.getRAM(), F); for(int i = 0; i < F.size(); i++){ outFile << F[i] << ","; } outFile << endl; } printf("Episode ended with a score of %d points\n", reward); outFile.close(); return 0; }
Action getAction(ActionVect av, ALEState state, ALEInterface& ale) { float bestReward = 0; Action bestAction = av[rand() % av.size()]; // for(int i = 0; i < av.size(); i++) { // float reward = ale.act(av[i]); // if(reward > bestReward) { // bestAction = av[i]; // bestReward = reward; // } // ale.restoreState(state); // } return bestAction; }
std::string PrintQValues( const std::vector<float>& q_values, const ActionVect& actions) { assert(!q_values.empty()); assert(!actions.empty()); assert(q_values.size() == actions.size()); std::ostringstream actions_buf; std::ostringstream q_values_buf; for (auto i = 0; i < q_values.size(); ++i) { const auto a_str = boost::algorithm::replace_all_copy( action_to_string(actions[i]), "PLAYER_A_", ""); const auto q_str = std::to_string(q_values[i]); const auto column_size = std::max(a_str.size(), q_str.size()) + 1; actions_buf.width(column_size); actions_buf << a_str; q_values_buf.width(column_size); q_values_buf << q_str; } actions_buf << std::endl; q_values_buf << std::endl; return actions_buf.str() + q_values_buf.str(); }
ActionVect Pong2Player025Settings::getStartingActions() { ActionVect startingActions; startingActions.push_back(SELECT); startingActions.push_back(PLAYER_A_NOOP); startingActions.push_back(SELECT); startingActions.push_back(PLAYER_A_NOOP); startingActions.push_back(SELECT); startingActions.push_back(RESET); return startingActions; }
int main(int argc, char** argv){ //For the use of options: RAMFeatures ramFeatures; vector<bool> FRam, FnextRam; vector<double> transitions((ramFeatures.getNumberOfFeatures() - 1)*2, 0); readParameters(argc, argv); readParamFiles(); srand(seed); //Initializing ALE: ALEInterface ale(1); ale.setFloat("frame_skip", NUM_STEPS_PER_ACTION); ale.setFloat("stochasticity", STOCHASTICITY); ale.setInt("random_seed", seed); ale.setInt("max_num_frames_per_episode", MAX_LENGTH_EPISODE); ale.loadROM(romPath.c_str()); //Initializing useful things to agent: BPROFeatures features; actions = ale.getLegalActionSet(); numActions = actions.size(); numFeatures = NUM_COLUMNS * NUM_ROWS * NUM_COLORS + (2 * NUM_COLUMNS - 1) * (2 * NUM_ROWS - 1) * NUM_COLORS * NUM_COLORS + 1; for(int i = 0; i < numActions; i++){ //Initialize Q; Q.push_back(0); w.push_back(vector<double>(numFeatures, 0.0)); } loadWeights(wgtPath); int reward = 0; double intr_reward = 0.0; FRam.clear(); ramFeatures.getCompleteFeatureVector(ale.getScreen(), ale.getRAM(), FRam); while(!ale.game_over()){ //Get state and features active on that state: F.clear(); features.getActiveFeaturesIndices(ale.getScreen(), ale.getRAM(), F); updateQValues(); //Update Q-values for each possible action currentAction = epsilonGreedy(); //Take action, observe reward and next state: reward += ale.act(actions[currentAction]); FnextRam.clear(); ramFeatures.getCompleteFeatureVector(ale.getScreen(), ale.getRAM(), FnextRam); updateTransitionVector(FRam, FnextRam, transitions); //Calculating intrinsic reward: for(int i = 0; i < transitions.size(); i++){ transitions[i] = (transitions[i] - mean[i])/var[i]; } intr_reward = 0.0; for(int i = 0; i < transitions.size(); i++){ intr_reward += option[i] * transitions[i]; } printf("%f\n", intr_reward); FRam = FnextRam; } printf("Final score: %d\n", reward); return 0; }
ActionVect JourneyEscapeSettings::getStartingActions() { ActionVect startingActions; startingActions.push_back(PLAYER_A_FIRE); return startingActions; }
ActionVect AirRaidSettings::getStartingActions() { ActionVect startingActions; startingActions.push_back(PLAYER_A_FIRE); return startingActions; }
ActionVect SirLancelotSettings::getStartingActions() { ActionVect startingActions; startingActions.push_back(RESET); startingActions.push_back(PLAYER_A_LEFT); return startingActions; }
ActionVect NBAGiveNGoSettings::getStartingActions(){ int num_of_nops(100); ActionVect startingActions; // startingActions.reserve(num_of_xs*num_of_nops); // wait for intro to end startingActions.insert(startingActions.end(), 9*num_of_nops, JOYPAD_NOOP); // press start to begin startingActions.push_back(JOYPAD_START); // select arcade startingActions.insert(startingActions.end(), 2*num_of_nops, JOYPAD_NOOP); startingActions.push_back(JOYPAD_START); // select 1 vs cpu startingActions.insert(startingActions.end(), 1.5*num_of_nops, JOYPAD_NOOP); startingActions.push_back(JOYPAD_START); // select team startingActions.insert(startingActions.end(), 1.5*num_of_nops, JOYPAD_NOOP); startingActions.push_back(JOYPAD_START); // select team startingActions.insert(startingActions.end(), 1.5*num_of_nops, JOYPAD_NOOP); startingActions.push_back(JOYPAD_START); startingActions.insert(startingActions.end(), 15.1*num_of_nops, JOYPAD_NOOP); return startingActions; }
ActionVect FZeroSettings::getStartingActions(){ int num_of_nops(100); ActionVect startingActions; // startingActions.reserve(num_of_xs*num_of_nops); // wait for intro to end startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP); // press start to begin startingActions.push_back(JOYPAD_START); // select arcade startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP); startingActions.push_back(JOYPAD_START); // select 1 vs cpu startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP); startingActions.push_back(JOYPAD_START); // select team startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP); startingActions.push_back(JOYPAD_START); // select team startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP); startingActions.push_back(JOYPAD_START); // wait for race countdown startingActions.insert(startingActions.end(), 5.5*num_of_nops, JOYPAD_NOOP); return startingActions; }
ActionVect BeamRiderSettings::getStartingActions() { ActionVect startingActions; startingActions.push_back(PLAYER_A_RIGHT); return startingActions; }
ActionVect PrivateEyeSettings::getStartingActions() { ActionVect startingActions; startingActions.push_back(PLAYER_A_UP); return startingActions; }
ActionVect ElevatorActionSettings::getStartingActions() { ActionVect startingActions; for (int i=0; i<16; i++) startingActions.push_back(PLAYER_A_FIRE); return startingActions; }
ActionVect YarsRevengeSettings::getStartingActions() { ActionVect startingActions; startingActions.push_back(PLAYER_A_FIRE); return startingActions; }