void agent_cleanup() { clearRLStruct(&this_action); clearRLStruct(&last_action); freeRLStructPointer(last_observation); if(value_function!=0){ free(value_function); value_function=0; } }
void agent_end(double reward) { int lastState=last_observation->intArray[0]; int lastAction=last_action.intArray[0]; double Q_sa=value_function[calculateArrayIndex(lastState,lastAction)]; double new_Q_sa=Q_sa + sarsa_stepsize * (reward - Q_sa); /* Only update the value function if the policy is not frozen */ if(!policy_frozen){ value_function[calculateArrayIndex(lastState,lastAction)]=new_Q_sa; } clearRLStruct(&last_action); clearRLStruct(last_observation); }
const observation_t *env_start() { episodeCount++; stepCount=0; clearRLStruct(o); __RL_CHECK_STRUCT(o) return o; }
void RL_cleanup() { int experimentState = kRLCleanup; assert(theExperimentConnection != 0); rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLCleanup); clearRLStruct(&clientexp_observation); clearRLStruct(&clientexp_action); /*safe even if it is null */ free(clientexp_message); clientexp_message = 0; clientexp_messagecapacity = 0; }
void agent_cleanup() { clearRLStruct(¤tAction); for (CStringUnorderedMap< double* >::iterator iter = tableQ.begin(); iter != tableQ.end();) { double total_interesting_moves = 0.0; double* values = iter->second; char* tmpChar = iter->first; iter = tableQ.erase(iter); free(tmpChar); delete[] values; } }
const reward_observation_terminal_t *env_step(const action_t *a) { int terminal=0; stepCount++; clearRLStruct(o); /*Short episode with big observations*/ if(episodeCount%2==0){ __RL_CHECK_STRUCT(o) set_k_ints_in_abstract_type(o, 50000); __RL_CHECK_STRUCT(o) set_k_doubles_in_abstract_type(o, 50000); __RL_CHECK_STRUCT(o) if(stepCount==200)terminal=1; }
static void onAgentCleanup(int theConnection) { /* Read the data in the buffer (data from server) */ /* No data sent for agent cleanup */ /* Call RL method on the recv'd data */ agent_cleanup(); /* Prepare the buffer for sending data back to the server */ rlBufferClear(&theBuffer); /* Cleanup our resources */ clearRLStruct(&clientagent_observation); free(theTaskSpec); free(clientagent_inmessage); theTaskSpec = 0; clientagent_inmessage = 0; clientagent_inmessagecapacity = 0; }
void agent_init(const char * task_spec) { clearRLStruct(emptyAction); }
void env_cleanup() { clearRLStruct(&this_observation); }