예제 #1
0
RLLearner::RLLearner(ALEInterface& ale, Parameters *param){
	frame               = 0;
	randomActionTaken   = 0;

	gamma               = param->getGamma();
	epsilon             = param->getEpsilon();
	toUseOnlyRewardSign = param->getUseRewardSign();
	numStepsPerAction   = param->getNumStepsPerAction();
	toBeOptimistic      = param->getOptimisticInitialization();
	
	episodeLength       = param->getEpisodeLength();
	numEpisodesEval     = param->getNumEpisodesEval();
	numEpisodesLearn    = param->getNumEpisodesLearn();

	//Get the number of effective actions:
	if(param->isMinimalAction()){
		actions = ale.getMinimalActionSet();
	}
	else{
		actions = ale.getLegalActionSet();
	}
	numActions = actions.size();
}
int main(int argc, char** argv) {
    ALEInterface ale;

    // Get & Set the desired settings
    ale.setInt("random_seed", 123);
    //The default is now 0 because we don't want stochasity
    ale.setFloat("repeat_action_probability", 0);

#ifdef __USE_SDL
    ale.setBool("display_screen", false);
    ale.setBool("sound", false);
#endif

    /// Uncomment to Record
       // std::string recordPath = "record";
       // std::cout << std::endl;
    
       // // Set record flags
       // ale.setString("record_screen_dir", recordPath.c_str());
       // ale.setString("record_sound_filename", (recordPath + "/sound.wav").c_str());
       // // We set fragsize to 64 to ensure proper sound sync
       // ale.setInt("fragsize", 64);
    
       // // Not completely portable, but will work in most cases
       // std::string cmd = "mkdir ";
       // cmd += recordPath;
       // system(cmd.c_str());


    // Load the ROM file. (Also resets the system for new settings to
    // take effect.)
    ale.loadROM("gravitar.bin");

    // Get the vector of minimal actions
    const ActionVect minimal_actions = ale.getMinimalActionSet();

    // Erase actions that move, but don't fire
    //minimal_actions.erase(minimal_actions.begin() + 2, minimal_actions.begin() + 10);

    // Store all rewards earned in all episodes
    float allRewards = 0;
    double allTimes = 0;
    Timer timer;

    // Play 10 episodes
    int episodes = 200;
    int number = 0;
    int count = 0;
    int lastLives = ale.lives();
    bool reset = false;

    Decision decision = Decision(ale.getMinimalActionSet(), ale.getScreen());


    for (int episode=0; episode<episodes; episode++) {
        float totalReward = 0;
        double episodeTime = 0;
        timer.start();
        while (!ale.game_over()) {
            if (ale.lives() < lastLives){
                lastLives = ale.lives();
                number = 0;
                count = 0;
                reset = true;
                //cout << " DIE " << endl;
            } else{
            	reset = false;
            }

            // Apply the action and get the resulting reward
            float reward = ale.act(decision.getDecision(ale.getScreen(), ale.lives(), reset));
			//decision.print();
            totalReward += reward;
        }
        timer.stop();
        episodeTime = timer();
        timer.reset();
        count = 0;
        number = 0;
        allRewards += totalReward;
        allTimes += episodeTime;
        cout << "Episode " << episode << " ended with score: " << totalReward << " with time: "<< episodeTime <<endl;
        ale.reset_game();
    }

    // Display average reward per game
    cout << "Average Reward: " << (allRewards / episodes) << " Average Time: " << (allTimes/episodes) << endl;

    return 0;
}