C++ (Cpp) ALEInterface::getEpisodeFrameNumber Exemples

Langage de programmation: C++ (Cpp)

Class/Type: ALEInterface

Méthode/Fonction: getEpisodeFrameNumber

Exemples au hotexamples.com: 3

C++ (Cpp) ALEInterface::getEpisodeFrameNumber - 3 exemples trouvés. Ce sont les exemples réels les mieux notés de ALEInterface::getEpisodeFrameNumber extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

game_over(13)

act(12)

reset_game(9)

getScreen(8)

getRAM(6)

getLegalActionSet(5)

loadROM(5)

setBool(4)

setInt(4)

getEpisodeFrameNumber(3)

setFloat(3)

cloneSystemState(2)

getMinimalActionSet(2)

lives(2)

restoreSystemState(2)

setString(1)

Méthodes fréquemment utilisées

game_over (13)

act (12)

reset_game (9)

getScreen (8)

getRAM (6)

getLegalActionSet (5)

loadROM (5)

setBool (4)

setInt (4)

getEpisodeFrameNumber (3)

Méthodes fréquemment utilisées

setFloat (3)

cloneSystemState (2)

getMinimalActionSet (2)

lives (2)

restoreSystemState (2)

setString (1)

Exemple #1

0

Afficher le fichier

Fichier : genSamples.cpp Projet : mcmachado/ALEResearch

int playGame(ALEInterface& ale, RAMFeatures *ram, BPROFeatures *bpro, vector<vector<vector<float> > > &w, Parameters param, int totalNumFrames, int gameId){ ale.reset_game(); vector<bool> F(NUM_BITS, 0); //Set of active features vector<bool> Fprev; int score = 0; while(!ale.game_over() && totalNumFrames + ale.getEpisodeFrameNumber() < MAX_NUM_FRAMES){ int nextAction = getNextAction(ale, param.numOptions); score += actUpdatingAvg(ale, ram, bpro, nextAction, w, param, totalNumFrames, gameId, F, Fprev); } totalNumFrames += ale.getEpisodeFrameNumber(); printf("Episode: %d, Final score: %d, Total Num. Frames: %d\n", gameId+1, score, totalNumFrames); return totalNumFrames; }

Exemple #2

0

Afficher le fichier

Fichier : genSamples.cpp Projet : mcmachado/ALEResearch

int actUpdatingAvg(ALEInterface& ale, RAMFeatures *ram, BPROFeatures *features, int nextAction, vector<vector<vector<float> > > &w, Parameters param, int totalNumFrames, int gameId, vector<bool> &F, vector<bool> &Fprev){ int reward = 0; //If the selected action was one of the primitive actions if(nextAction < NUM_ACTIONS){ for(int i = 0; i < FRAME_SKIP && totalNumFrames + ale.getEpisodeFrameNumber() < MAX_NUM_FRAMES; i++){ reward += ale.act((Action) nextAction); Fprev.swap(F); F.clear(); ram->getCompleteFeatureVector(ale.getRAM(), F); F.pop_back(); updateAverage(Fprev, F, ale.getEpisodeFrameNumber(), param, gameId); } } //If the selected action was one of the options else{ int currentAction; vector<int> Fbpro; //Set of features active vector<float> Q(NUM_ACTIONS, 0.0); //Q(a) entries int option = nextAction - NUM_ACTIONS; while(rand()%1000 > 1000 * PROB_TERMINATION && !ale.game_over() && totalNumFrames + ale.getEpisodeFrameNumber() < MAX_NUM_FRAMES){ //Get state and features active on that state: Fbpro.clear(); features->getActiveFeaturesIndices(ale.getScreen(), Fbpro); updateQValues(Fbpro, Q, w, option); //Update Q-values for each possible action currentAction = epsilonGreedy(Q); //Take action, observe reward and next state: reward += ale.act((Action) currentAction); Fprev.swap(F); F.clear(); ram->getCompleteFeatureVector(ale.getRAM(), F); F.pop_back(); updateAverage(Fprev, F, ale.getEpisodeFrameNumber(), param, gameId); } } return reward; }

Exemple #3

0

Afficher le fichier

Fichier : Learner.cpp Projet : mcmachado/ALEResearch

void Learner::learnPolicy(ALEInterface& ale, vector<vector<vector<float> > > &learnedOptions){ vector<float> reward; //Repeat (for each episode): int episode, totalNumberFrames = 0; //This is going to be interrupted by the ALE code since I set max_num_frames beforehand for(episode = 0; totalNumberFrames < MAX_NUM_FRAMES; episode++){ //We have to clean the traces every episode: for(unsigned int a = 0; a < nonZeroElig.size(); a++){ for(unsigned int i = 0; i < nonZeroElig[a].size(); i++){ int idx = nonZeroElig[a][i]; e[a][idx] = 0.0; } nonZeroElig[a].clear(); } F.clear(); bproFeatures.getActiveFeaturesIndices(ale.getScreen(), F); updateQValues(F, Q); currentAction = epsilonGreedy(Q); //Repeat(for each step of episode) until game is over: gettimeofday(&tvBegin, NULL); //This also stops when the maximum number of steps per episode is reached while(!ale.game_over()){ reward.clear(); reward.push_back(0.0); reward.push_back(0.0); updateQValues(F, Q); sanityCheck(); //Take action, observe reward and next state: act(ale, currentAction, reward, learnedOptions); cumIntrReward += reward[0]; cumReward += reward[1]; if(!ale.game_over()){ //Obtain active features in the new state: Fnext.clear(); bproFeatures.getActiveFeaturesIndices(ale.getScreen(), Fnext); updateQValues(Fnext, Qnext); //Update Q-values for the new active features nextAction = epsilonGreedy(Qnext); } else{ nextAction = 0; for(unsigned int i = 0; i < Qnext.size(); i++){ Qnext[i] = 0; } } //To ensure the learning rate will never increase along //the time, Marc used such approach in his JAIR paper if (F.size() > maxFeatVectorNorm){ maxFeatVectorNorm = F.size(); } delta = reward[0] + GAMMA * Qnext[nextAction] - Q[currentAction]; updateReplTrace(currentAction, F); //Update weights vector: float stepSize = ALPHA/maxFeatVectorNorm; for(unsigned int a = 0; a < nonZeroElig.size(); a++){ for(unsigned int i = 0; i < nonZeroElig[a].size(); i++){ int idx = nonZeroElig[a][i]; w[a][idx] = w[a][idx] + stepSize * delta * e[a][idx]; } } F = Fnext; FRam = FnextRam; currentAction = nextAction; } gettimeofday(&tvEnd, NULL); timeval_subtract(&tvDiff, &tvEnd, &tvBegin); elapsedTime = float(tvDiff.tv_sec) + float(tvDiff.tv_usec)/1000000.0; float fps = float(ale.getEpisodeFrameNumber())/elapsedTime; printf("episode: %d,\t%.0f points,\tavg. return: %.1f,\tnovelty reward: %.2f (%.2f),\t%d frames,\t%.0f fps\n", episode + 1, cumReward - prevCumReward, (float)cumReward/(episode + 1.0), cumIntrReward - prevCumIntrReward, cumIntrReward/(episode + 1.0), ale.getEpisodeFrameNumber(), fps); totalNumberFrames += ale.getEpisodeFrameNumber(); prevCumReward = cumReward; prevCumIntrReward = cumIntrReward; ale.reset_game(); } stringstream ss; ss << episode; saveWeightsToFile(ss.str()); }