int main(int argc, char** argv) {
    if (argc < 2) {
        std::cerr << "Usage: " << argv[0] << " rom_file" << std::endl;
        return 1;
    }

    ALEInterface ale;

    // Get & Set the desired settings
    ale.setInt("random_seed", 123);
    //The default is already 0.25, this is just an example
    ale.setFloat("repeat_action_probability", 0.25);

#ifdef __USE_SDL
    ale.setBool("display_screen", true);
    ale.setBool("sound", true);
#endif

    // Load the ROM file. (Also resets the system for new settings to
    // take effect.)
    ale.loadROM(argv[1]);

    // Get the vector of legal actions
    ActionVect legal_actions = ale.getLegalActionSet();

    // Play 10 episodes
    for (int episode=0; episode<1; episode++) {
        float totalReward = 0;
        while (!ale.game_over()) {
            /*
            const ALEScreen screen = ale.getScreen();
            float row_sum = 0, column_sum = 0, tot = 0.0;
            for (int i = 0; i < screen.height(); ++i)
            {
                for (int j = 0; j < screen.width(); ++j)
                {
                    pixel_t tmp = screen.get(i, j);
                    if(tmp >= 41 && tmp <= 43) {
                        row_sum += i;
                        column_sum += j;
                        tot++;
                    }
                }
            }
            cout << (row_sum/tot) << "," << (column_sum/tot) << endl;
            */
            Action a = legal_actions[rand() % legal_actions.size()];
            // Apply the action and get the resulting reward
            float reward = ale.act(a);
            totalReward += reward;
        }
        cout << "Episode " << episode << " ended with score: " << totalReward << endl;
        ale.reset_game();
    }

    return 0;
}
int main(int argc, char** argv) {
    if (argc < 2) {
        std::cout << "Usage: " << argv[0] << " rom_file" << std::endl;
        return 1;
    }

    ALEInterface ale;

    // Get & Set the desired settings
    ale.setInt("random_seed", 123);

    // We enable both screen and sound, which we will need for recording.
    ale.setBool("display_screen", true);
    // You may leave sound disabled (by setting this flag to false) if so desired.
    ale.setBool("sound", true);

    std::string recordPath = "record";
    std::cout << std::endl;

    // Set record flags
    ale.setString("record_screen_dir", recordPath.c_str());
    ale.setString("record_sound_filename", (recordPath + "/sound.wav").c_str());
    // We set fragsize to 64 to ensure proper sound sync
    ale.setInt("fragsize", 64);

    // Not completely portable, but will work in most cases
    std::string cmd = "mkdir ";
    cmd += recordPath;
    system(cmd.c_str());

    // Load the ROM file. (Also resets the system for new settings to
    // take effect.)
    ale.loadROM(argv[1]);

    // Get the vector of legal actions
    ActionVect legal_actions = ale.getLegalActionSet();

    // Play a single episode, which we record.
    while (!ale.game_over()) {

        Action a = legal_actions[rand() % legal_actions.size()];
        // Apply the action (discard the resulting reward)
        ale.act(a);
    }

    std::cout << std::endl;
    std::cout << "Recording complete. To create a video, you may want to run \n"
              "  doc/scripts/videoRecordingExampleJoinXXX.sh. See manual for details.." << std::endl;

    return 0;
}
示例#3
0
//Run the Arcade Learning Environment using the DQN agent.
void run_ale(int argc, char** argv)
{
	//Create Arcade Learning Environment
	ALEInterface* ale = new ALEInterface(false);
	//Load the Atari Rom we are going to play
	ale->loadROM(argv[1]);
	//Get the set of possible actions from ALE ROM
	ActionVect action_set = {PLAYER_A_LEFTFIRE,PLAYER_A_FIRE,PLAYER_A_RIGHTFIRE}; //ale->getMinimalActionSet();
	//Create action descriptor
	ActionDescriptor descriptor({action_set.size()},{});
	//Create Learning System
	DACN system(descriptor,0.9,4,1000000,100,32);
	//Set exploration rate
	system.exploration_rate(0.9);

	cudaProfilerStart(); nvtxRangePushA("2 Atari Games");
	for(int episode=0; episode<2; episode++)
	{	
		string tmp = "episode: " + to_string(episode);
		nvtxRangePushA(tmp.c_str());

		//Restart the game
		ale->reset_game();

		//Game Loop
		while(!ale->game_over())
		{
			nvtxRangePushA("step");
				//Convert screen to input
				ALEScreen screen = ale->getScreen();
				gray8_image_t img = to_image(screen);
				vector<unsigned char> input = to_input(img);
				float raw_action = system.forward( input )[0];
				//cast action
				int action = static_cast<int>(raw_action);
				//Execute the action and get the reward
				float reward = ale->act(action_set[action]);
				//Normalize the reward
				float normalized_reward = max(min(1.0f,reward),-1.0f);
				//Backward the result
				system.backward(normalized_reward,ale->game_over());
			nvtxRangePop();
		}
		nvtxRangePop();
	}
	cudaProfilerStop(); nvtxRangePop();
}
int main(int argc, char** argv) {
    if (argc < 2) {
        std::cerr << "Usage: " << argv[0] << " rom_file" << std::endl;
        return 1;
    }

    ALEInterface ale;

    // Get & Set the desired settings
    ale.setInt("random_seed", 123);
    //The default is already 0.25, this is just an example
    ale.setFloat("repeat_action_probability", 0.25);

#ifdef __USE_SDL
    ale.setBool("display_screen", true);
    ale.setBool("sound", true);
#endif

    // Load the ROM file. (Also resets the system for new settings to
    // take effect.)
    ale.loadROM(argv[1]);

    // Get the vector of legal actions
    ActionVect legal_actions = ale.getLegalActionSet();

    // Play 10 episodes
    for (int episode=0; episode<10; episode++) {
        float totalReward = 0;
        while (!ale.game_over()) {
            Action a = legal_actions[rand() % legal_actions.size()];
            // Apply the action and get the resulting reward
            float reward = ale.act(a);
            totalReward += reward;
            if (reward != 0){
              cout<<"totalReward: "<<totalReward<<endl;
            }
        }
        cout << "Episode " << episode << " ended with score: " << totalReward << endl;
        ale.reset_game();
    }

    return 0;
}
int main(int argc, char** argv) {
    ALEInterface ale;

    // Get & Set the desired settings
    ale.setInt("random_seed", 123);
    //The default is now 0 because we don't want stochasity
    ale.setFloat("repeat_action_probability", 0);

#ifdef __USE_SDL
    ale.setBool("display_screen", false);
    ale.setBool("sound", false);
#endif

    /// Uncomment to Record
       // std::string recordPath = "record";
       // std::cout << std::endl;
    
       // // Set record flags
       // ale.setString("record_screen_dir", recordPath.c_str());
       // ale.setString("record_sound_filename", (recordPath + "/sound.wav").c_str());
       // // We set fragsize to 64 to ensure proper sound sync
       // ale.setInt("fragsize", 64);
    
       // // Not completely portable, but will work in most cases
       // std::string cmd = "mkdir ";
       // cmd += recordPath;
       // system(cmd.c_str());


    // Load the ROM file. (Also resets the system for new settings to
    // take effect.)
    ale.loadROM("gravitar.bin");

    // Get the vector of minimal actions
    const ActionVect minimal_actions = ale.getMinimalActionSet();

    // Erase actions that move, but don't fire
    //minimal_actions.erase(minimal_actions.begin() + 2, minimal_actions.begin() + 10);

    // Store all rewards earned in all episodes
    float allRewards = 0;
    double allTimes = 0;
    Timer timer;

    // Play 10 episodes
    int episodes = 200;
    int number = 0;
    int count = 0;
    int lastLives = ale.lives();
    bool reset = false;

    Decision decision = Decision(ale.getMinimalActionSet(), ale.getScreen());


    for (int episode=0; episode<episodes; episode++) {
        float totalReward = 0;
        double episodeTime = 0;
        timer.start();
        while (!ale.game_over()) {
            if (ale.lives() < lastLives){
                lastLives = ale.lives();
                number = 0;
                count = 0;
                reset = true;
                //cout << " DIE " << endl;
            } else{
            	reset = false;
            }

            // Apply the action and get the resulting reward
            float reward = ale.act(decision.getDecision(ale.getScreen(), ale.lives(), reset));
			//decision.print();
            totalReward += reward;
        }
        timer.stop();
        episodeTime = timer();
        timer.reset();
        count = 0;
        number = 0;
        allRewards += totalReward;
        allTimes += episodeTime;
        cout << "Episode " << episode << " ended with score: " << totalReward << " with time: "<< episodeTime <<endl;
        ale.reset_game();
    }

    // Display average reward per game
    cout << "Average Reward: " << (allRewards / episodes) << " Average Time: " << (allTimes/episodes) << endl;

    return 0;
}