const reward_observation_action_terminal_t* RL_step() { int experimentState = kRLStep; static reward_observation_action_terminal_t roat = {0, 0,0, 0}; unsigned int offset = 0; assert(theExperimentConnection != 0); rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); /* Recv Data from Server */ rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLStep); offset = rlBufferRead(&clientexp_rlbuffer, offset, &roat.terminal, 1, sizeof(int)); offset = rlBufferRead(&clientexp_rlbuffer, offset, &roat.reward, 1, sizeof(double)); offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_observation); offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_action); __RL_CHECK_STRUCT(&clientexp_observation) __RL_CHECK_STRUCT(&clientexp_action) roat.observation = &clientexp_observation; roat.action = &clientexp_action; return &roat; }
const reward_observation_terminal_t *env_step(const action_t *theAction) { int envState = kEnvStep; static reward_observation_terminal_t ro = {0,0,0}; unsigned int offset = 0; __RL_CHECK_STRUCT(theAction) rlBufferClear(&theBuffer); offset = 0; /* Send theAction to the client environment */ offset = rlCopyADTToBuffer(theAction, &theBuffer, offset); rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvStep); /* Receive theObservation from the client environment */ offset = 0; offset = rlBufferRead(&theBuffer, offset, &ro.terminal, 1, sizeof(int)); offset = rlBufferRead(&theBuffer, offset, &ro.reward, 1, sizeof(double)); if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, theObservation); __RL_CHECK_STRUCT(theObservation) ro.observation = theObservation; return &ro; }
const reward_observation_terminal_t *env_step(const action_t *a) { int terminal=0; stepCount++; clearRLStruct(o); /*Short episode with big observations*/ if(episodeCount%2==0){ __RL_CHECK_STRUCT(o) set_k_ints_in_abstract_type(o, 50000); __RL_CHECK_STRUCT(o) set_k_doubles_in_abstract_type(o, 50000); __RL_CHECK_STRUCT(o) if(stepCount==200)terminal=1; }
const action_t *agent_step(const double reward, const observation_t *o) { __RL_CHECK_STRUCT(o); stepCount++; freeRLStructPointer(action); action=duplicateRLStructToPointer(o); __RL_CHECK_STRUCT(action) return action; }
static void onAgentStart(int theConnection) { const action_t *theAction; unsigned int offset = 0; /* Read the data in the buffer (data from server) */ offset = rlCopyBufferToADT(&theBuffer, offset, &clientagent_observation); __RL_CHECK_STRUCT(&clientagent_observation) /* Call RL method on the recv'd data */ theAction = agent_start(&clientagent_observation); __RL_CHECK_STRUCT(theAction) /* Prepare the buffer for sending data back to the server */ rlBufferClear(&theBuffer); offset = 0; offset = rlCopyADTToBuffer(theAction, &theBuffer, offset); }
const observation_action_t *RL_start() { int experimentState = kRLStart; static observation_action_t oa = { 0,0}; unsigned int offset = 0; assert(theExperimentConnection != 0); rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLStart); offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_observation); offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_action); __RL_CHECK_STRUCT(&clientexp_observation) __RL_CHECK_STRUCT(&clientexp_action) oa.observation = &clientexp_observation; oa.action = &clientexp_action; return &oa; }
const observation_t *env_start() { int envState = kEnvStart; unsigned int offset = 0; rlBufferClear(&theBuffer); rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvStart); if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0); __RL_CHECK_STRUCT(theObservation) offset = rlCopyBufferToADT(&theBuffer, offset, theObservation); return theObservation; }
/* Send the observation to the agent, receive the action and return it */ const action_t *agent_start(const observation_t *theObservation) { int agentState = kAgentStart; unsigned int offset = 0; __RL_CHECK_STRUCT(theObservation); rlBufferClear(&theBuffer); offset = 0; offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset); rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentStart); offset = 0; if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, globalAction); return globalAction; }