Action SMDPAgent::step(double reward, const State &state, std::ostream &output) { giveReward(reward, output); Action action = nextAction(state, output); applyAction(action, output); return action; }
void SMDPAgent::endEpisode(double reward, std::ostream &output) { giveReward(reward, output); finalize(output); }
void MRLAgent::giveMotivationReward(){ //Initialise novelty float novelty=0; //Initialise the state-action vector to be fed to expectation map. vector<float> observation_action(num_inputs, 0); //If less than 2 observations have been made, leave observation_action a zero vector. Otherwise set to second last observation. if ((int)observations.size()>=2){ observation_action = observations[observations.size()-2]; } //Add action to state in which the action is taken observation_action.push_back(actions[actions.size()-2]); //Get expected observation and actual observation. vector<float> expected_observation = expectation_map->getValues(observation_action); vector<float> actual_observation = observations[observations.size()-1]; //Setup values for learning. Moves the expected_observation along the straight line toward actual_observation in R^n. vector<vector<float> > obs(1,observation_action);//Setup vector with one input to be used for expectation_map learning. vector<vector<float> > val(1,expected_observation);//setup unit list of values. //Change values: for (int j=0; j<expected_observation.size();j++){ float diff = actual_observation[j]-expected_observation[j]; val[0][j]+= lambda*diff; } /*If expected observation is the zero vector it is highly likely that the state has not been visited before. In this case set the novelty to zero. */ float sum_exp = 0; for (int j=0; j<expected_observation.size();j++){ sum_exp = expected_observation[j]*expected_observation[j]; } if(sum_exp == 0){ //Do learning for expectation_map expectation_map->doLearningEpisode(obs,val,1.0,1); float motivation = wundtFunction(0); //cout<<"MRLAGENT:: giveMotivationReward - novelty = "<<0<<endl; giveReward(motivation); return; } //If expected observation non-zero, calculate error in prediction to get novelty: for (int j=0; j<expected_observation.size();j++){ float diff = 0; diff = (expected_observation[j]-actual_observation[j]); novelty+= diff*diff; } //cout<<"Novelty = "<<novelty<<endl; //Do learning for expectation_map expectation_map->doLearningEpisode(obs,val,1.0,1); float motivation = wundtFunction(novelty); //cout<<"MRLAGENT:: giveMotivationReward - novelty = "<<novelty<< " Reward = "<< motivation<<endl; giveReward(motivation); /*OLD NOVELTY //Debug // cout<<"obs list size = "<<observations.size()<<endl; float memory_decay = 0.5; int count=0; //Iterate backwards over all observations and calculate a sum of euclidean metrics of latest observation with old observations. for (int i=observations.size()-2; i>=std::max(0,((int)observations.size()-memory_length-1));i--){ for (int j=0; j<observations[i].size();j++){ float diff = observations[observations.size()-1][j]-observations[i][j]; novelty+= memory_decay*diff*diff; } count++; memory_decay*=memory_decay; } END OLD NOVELTY */ }