int main(int argc, char** argv) {
    ALEInterface ale;

    // Get & Set the desired settings
    ale.setInt("random_seed", 123);
    //The default is now 0 because we don't want stochasity
    ale.setFloat("repeat_action_probability", 0);

#ifdef __USE_SDL
    ale.setBool("display_screen", false);
    ale.setBool("sound", false);
#endif

    /// Uncomment to Record
       // std::string recordPath = "record";
       // std::cout << std::endl;
    
       // // Set record flags
       // ale.setString("record_screen_dir", recordPath.c_str());
       // ale.setString("record_sound_filename", (recordPath + "/sound.wav").c_str());
       // // We set fragsize to 64 to ensure proper sound sync
       // ale.setInt("fragsize", 64);
    
       // // Not completely portable, but will work in most cases
       // std::string cmd = "mkdir ";
       // cmd += recordPath;
       // system(cmd.c_str());


    // Load the ROM file. (Also resets the system for new settings to
    // take effect.)
    ale.loadROM("gravitar.bin");

    // Get the vector of minimal actions
    const ActionVect minimal_actions = ale.getMinimalActionSet();

    // Erase actions that move, but don't fire
    //minimal_actions.erase(minimal_actions.begin() + 2, minimal_actions.begin() + 10);

    // Store all rewards earned in all episodes
    float allRewards = 0;
    double allTimes = 0;
    Timer timer;

    // Play 10 episodes
    int episodes = 200;
    int number = 0;
    int count = 0;
    int lastLives = ale.lives();
    bool reset = false;

    Decision decision = Decision(ale.getMinimalActionSet(), ale.getScreen());


    for (int episode=0; episode<episodes; episode++) {
        float totalReward = 0;
        double episodeTime = 0;
        timer.start();
        while (!ale.game_over()) {
            if (ale.lives() < lastLives){
                lastLives = ale.lives();
                number = 0;
                count = 0;
                reset = true;
                //cout << " DIE " << endl;
            } else{
            	reset = false;
            }

            // Apply the action and get the resulting reward
            float reward = ale.act(decision.getDecision(ale.getScreen(), ale.lives(), reset));
			//decision.print();
            totalReward += reward;
        }
        timer.stop();
        episodeTime = timer();
        timer.reset();
        count = 0;
        number = 0;
        allRewards += totalReward;
        allTimes += episodeTime;
        cout << "Episode " << episode << " ended with score: " << totalReward << " with time: "<< episodeTime <<endl;
        ale.reset_game();
    }

    // Display average reward per game
    cout << "Average Reward: " << (allRewards / episodes) << " Average Time: " << (allTimes/episodes) << endl;

    return 0;
}
Esempio n. 2
0
static hexq::Reward
move_to_the(ALEInterface &ale, DisplayScreen *display, const Action action, const hexq::Reward discount_rate, hexq::MontezumaOptionsMdp &mdp, size_t &elapsed_time, hexq::Reward &nophi_reward, hexq::Reward &phi_reward, vector<pair<hexq::Reward,hexq::State> > &all_steps) {
	const vector<Action> *axis_actions;
	unsigned int unchanging_addr, changing_addr;
	if(action == PLAYER_A_LEFT || action == PLAYER_A_RIGHT) {
		axis_actions = &vertical_actions;
		unchanging_addr = ADDR_Y;
		changing_addr = ADDR_X;
	} else {
		axis_actions = &horizontal_actions;
		unchanging_addr = ADDR_X;
		changing_addr = ADDR_Y;
	}

	const bool initial_cannot_change_axis =
		!does_value_change(ale, *axis_actions, unchanging_addr);
	hexq::State prev_s = mdp.StateUniqueID();
	phi_reward = mdp.ComputeState(ale.act(action), nophi_reward);

	vector<pair<pair<hexq::Reward, hexq::Reward>, ALEState> > frames;
	frames.push_back(make_pair(make_pair(phi_reward, nophi_reward), ale.cloneSystemState()));
	all_steps.push_back(make_pair(phi_reward, prev_s));
	byte_t prev_changing = ale.getRAM().get(changing_addr);
	const int initial_lives = ale.lives();
	int n_frames_unchanged;
	bool controllable = true;
	bool lost_life =  false;
	for(size_t max_n_iterations=0; !lost_life && max_n_iterations<MAX_FRAMES; max_n_iterations++) {
		hexq::Reward nophi_r;
		prev_s = mdp.StateUniqueID();
		hexq::Reward reward = mdp.ComputeState(ale.act(action), nophi_r);

		frames.push_back(make_pair(make_pair(reward, nophi_r), ale.cloneSystemState()));
		all_steps.push_back(make_pair(reward, prev_s));
		DISPLAY(display);
		controllable = !(ale.getRAM().get(0xd8) != 0x00 || ale.getRAM().get(0xd6) != 0xff);
		if(!controllable)break;
		bool stop_for_axis_change = initial_cannot_change_axis &&
			does_value_change(ale, *axis_actions, unchanging_addr);
		if(stop_for_axis_change) {
//			printf("Break because axis change possibility %d\n", ABHG++);
			break;
		}
//		printf("X: %d Y: %d\n", ale.getRAM().get(ADDR_X), ale.getRAM().get(ADDR_Y));
		byte_t new_changing = ale.getRAM().get(changing_addr);
		if(new_changing == prev_changing && controllable) {
			n_frames_unchanged++;
			if(n_frames_unchanged >= NOT_MOVING_FRAMES) {
//				printf("Break because not moving %d\n", ABHG++);
				break;
			}
		} else {
			n_frames_unchanged = 0;
			prev_changing = new_changing;
		}
		lost_life = ale.lives() < initial_lives;
	}
	if((lost_life || !controllable) && frames.size() > N_BACK_FRAMES) {
		size_t new_size = frames.size() - N_BACK_FRAMES;
		frames.resize(new_size);
		all_steps.resize(new_size);
		printf("went back\n");
		ale.restoreSystemState(frames.rbegin()->second);
		ale.environment->processRAM();
		ale.environment->processScreen();
		DISPLAY(display);
		hexq::Reward r;
		(void)mdp.ComputeState(0, r);
	}
	hexq::Reward discount = 1.;
	hexq::Reward total_reward = 0;
	phi_reward = nophi_reward = 0;
	for(size_t i=0; i<frames.size(); i++) {
		total_reward += discount*frames.at(i).first.first;
		discount *= discount_rate;
		nophi_reward += frames.at(i).first.second;
		phi_reward += frames.at(i).first.first;
	}
	elapsed_time += frames.size();
	return total_reward;
}