int main(int argc, char** argv) {
    if (argc < 2) {
        std::cerr << "Usage: " << argv[0] << " rom_file" << std::endl;
        return 1;
    }

    ALEInterface ale;

    // Get & Set the desired settings
    ale.setInt("random_seed", 123);
    //The default is already 0.25, this is just an example
    ale.setFloat("repeat_action_probability", 0.25);

#ifdef __USE_SDL
    ale.setBool("display_screen", true);
    ale.setBool("sound", true);
#endif

    // Load the ROM file. (Also resets the system for new settings to
    // take effect.)
    ale.loadROM(argv[1]);

    // Get the vector of legal actions
    ActionVect legal_actions = ale.getLegalActionSet();

    // Play 10 episodes
    for (int episode=0; episode<1; episode++) {
        float totalReward = 0;
        while (!ale.game_over()) {
            /*
            const ALEScreen screen = ale.getScreen();
            float row_sum = 0, column_sum = 0, tot = 0.0;
            for (int i = 0; i < screen.height(); ++i)
            {
                for (int j = 0; j < screen.width(); ++j)
                {
                    pixel_t tmp = screen.get(i, j);
                    if(tmp >= 41 && tmp <= 43) {
                        row_sum += i;
                        column_sum += j;
                        tot++;
                    }
                }
            }
            cout << (row_sum/tot) << "," << (column_sum/tot) << endl;
            */
            Action a = legal_actions[rand() % legal_actions.size()];
            // Apply the action and get the resulting reward
            float reward = ale.act(a);
            totalReward += reward;
        }
        cout << "Episode " << episode << " ended with score: " << totalReward << endl;
        ale.reset_game();
    }

    return 0;
}
int main(int argc, char** argv) {
    if (argc < 2) {
        std::cout << "Usage: " << argv[0] << " rom_file" << std::endl;
        return 1;
    }

    ALEInterface ale;

    // Get & Set the desired settings
    ale.setInt("random_seed", 123);

    // We enable both screen and sound, which we will need for recording.
    ale.setBool("display_screen", true);
    // You may leave sound disabled (by setting this flag to false) if so desired.
    ale.setBool("sound", true);

    std::string recordPath = "record";
    std::cout << std::endl;

    // Set record flags
    ale.setString("record_screen_dir", recordPath.c_str());
    ale.setString("record_sound_filename", (recordPath + "/sound.wav").c_str());
    // We set fragsize to 64 to ensure proper sound sync
    ale.setInt("fragsize", 64);

    // Not completely portable, but will work in most cases
    std::string cmd = "mkdir ";
    cmd += recordPath;
    system(cmd.c_str());

    // Load the ROM file. (Also resets the system for new settings to
    // take effect.)
    ale.loadROM(argv[1]);

    // Get the vector of legal actions
    ActionVect legal_actions = ale.getLegalActionSet();

    // Play a single episode, which we record.
    while (!ale.game_over()) {

        Action a = legal_actions[rand() % legal_actions.size()];
        // Apply the action (discard the resulting reward)
        ale.act(a);
    }

    std::cout << std::endl;
    std::cout << "Recording complete. To create a video, you may want to run \n"
              "  doc/scripts/videoRecordingExampleJoinXXX.sh. See manual for details.." << std::endl;

    return 0;
}
Esempio n. 3
0
Learner::Learner(ALEInterface& ale, Parameters *param) : bproFeatures(param->gameName){
	delta = 0.0;
	cumReward = 0; 
	prevCumReward = 0;
	cumIntrReward = 0;
	prevCumIntrReward = 0;
	maxFeatVectorNorm = 1;
	pathToSaveLearnedWeights = param->outputPath;

	for(int i = 0; i < (ramFeatures.getNumberOfFeatures() - 1) * 2; i++){
		transitions.push_back(0);
	}

	actions = ale.getLegalActionSet();

	numOptions      = param->numOptions;
	numBasicActions = actions.size();
	numTotalActions = numBasicActions + numOptions;

	//Reading file containing the vector that describes the reward for the option learning
	//The first X positions encode the transition 0->1 and the other X encode 1->0.
	pathToRewardDescription = param->eigVectorPath;
	std::ifstream infile1(pathToRewardDescription.c_str());
	float value;
	while(infile1 >> value){
		eigVector.push_back(value);
	}
	pathToStatsDescription = param->statEigVectorPath;
	std::ifstream infile2((pathToStatsDescription + "_mean.out").c_str());
	while(infile2 >> value){
		mean.push_back(value);
	}
	std::ifstream infile3((pathToStatsDescription + "_std.out").c_str());
	while(infile3 >> value){
		std.push_back(value);
	}

	numFeatures = bproFeatures.getNumberOfFeatures();
	
	for(int i = 0; i < numTotalActions; i++){
		//Initialize Q;
		Q.push_back(0);
		Qnext.push_back(0);
		//Initialize e:
		e.push_back(vector<float>(numFeatures, 0.0));
		w.push_back(vector<float>(numFeatures, 0.0));
		nonZeroElig.push_back(vector<int>());
	}
}
int main(int argc, char** argv) {
    if (argc < 2) {
        std::cerr << "Usage: " << argv[0] << " rom_file" << std::endl;
        return 1;
    }

    ALEInterface ale;

    // Get & Set the desired settings
    ale.setInt("random_seed", 123);
    //The default is already 0.25, this is just an example
    ale.setFloat("repeat_action_probability", 0.25);

#ifdef __USE_SDL
    ale.setBool("display_screen", true);
    ale.setBool("sound", true);
#endif

    // Load the ROM file. (Also resets the system for new settings to
    // take effect.)
    ale.loadROM(argv[1]);

    // Get the vector of legal actions
    ActionVect legal_actions = ale.getLegalActionSet();

    // Play 10 episodes
    for (int episode=0; episode<10; episode++) {
        float totalReward = 0;
        while (!ale.game_over()) {
            Action a = legal_actions[rand() % legal_actions.size()];
            // Apply the action and get the resulting reward
            float reward = ale.act(a);
            totalReward += reward;
            if (reward != 0){
              cout<<"totalReward: "<<totalReward<<endl;
            }
        }
        cout << "Episode " << episode << " ended with score: " << totalReward << endl;
        ale.reset_game();
    }

    return 0;
}
Esempio n. 5
0
RLLearner::RLLearner(ALEInterface& ale, Parameters *param){
	frame               = 0;
	randomActionTaken   = 0;

	gamma               = param->getGamma();
	epsilon             = param->getEpsilon();
	toUseOnlyRewardSign = param->getUseRewardSign();
	numStepsPerAction   = param->getNumStepsPerAction();
	toBeOptimistic      = param->getOptimisticInitialization();
	
	episodeLength       = param->getEpisodeLength();
	numEpisodesEval     = param->getNumEpisodesEval();
	numEpisodesLearn    = param->getNumEpisodesLearn();

	//Get the number of effective actions:
	if(param->isMinimalAction()){
		actions = ale.getMinimalActionSet();
	}
	else{
		actions = ale.getLegalActionSet();
	}
	numActions = actions.size();
}