const reward_observation_terminal_t *env_step(const action_t *theAction) { int envState = kEnvStep; static reward_observation_terminal_t ro = {0,0,0}; unsigned int offset = 0; __RL_CHECK_STRUCT(theAction) rlBufferClear(&theBuffer); offset = 0; /* Send theAction to the client environment */ offset = rlCopyADTToBuffer(theAction, &theBuffer, offset); rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvStep); /* Receive theObservation from the client environment */ offset = 0; offset = rlBufferRead(&theBuffer, offset, &ro.terminal, 1, sizeof(int)); offset = rlBufferRead(&theBuffer, offset, &ro.reward, 1, sizeof(double)); if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, theObservation); __RL_CHECK_STRUCT(theObservation) ro.observation = theObservation; return &ro; }
static void onAgentStart(int theConnection) { const action_t *theAction; unsigned int offset = 0; /* Read the data in the buffer (data from server) */ offset = rlCopyBufferToADT(&theBuffer, offset, &clientagent_observation); __RL_CHECK_STRUCT(&clientagent_observation) /* Call RL method on the recv'd data */ theAction = agent_start(&clientagent_observation); __RL_CHECK_STRUCT(theAction) /* Prepare the buffer for sending data back to the server */ rlBufferClear(&theBuffer); offset = 0; offset = rlCopyADTToBuffer(theAction, &theBuffer, offset); }
/* Send the observation to the agent, receive the action and return it */ const action_t *agent_start(const observation_t *theObservation) { int agentState = kAgentStart; unsigned int offset = 0; __RL_CHECK_STRUCT(theObservation); rlBufferClear(&theBuffer); offset = 0; offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset); rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentStart); offset = 0; if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, globalAction); return globalAction; }
/* Send the reward and the observation to the agent, receive the action and return it */ const action_t *agent_step(const double theReward, const observation_t *theObservation) { int agentState = kAgentStep; unsigned int offset = 0; rlBufferClear(&theBuffer); offset = 0; offset = rlBufferWrite(&theBuffer, offset, &theReward, 1, sizeof(double)); offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset); rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentStep); offset = 0; if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, globalAction); return globalAction; }