예제 #1
0
const reward_observation_action_terminal_t* RL_step() {
  int experimentState = kRLStep;
  static reward_observation_action_terminal_t roat = {0, 0,0, 0};
  unsigned int offset = 0;
  
  assert(theExperimentConnection != 0);

  rlBufferClear(&clientexp_rlbuffer);
  rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

  /* Recv Data from Server */
  rlBufferClear(&clientexp_rlbuffer);
  rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState);
  assert(experimentState == kRLStep);

  offset = rlBufferRead(&clientexp_rlbuffer, offset, &roat.terminal, 1, sizeof(int));
  offset = rlBufferRead(&clientexp_rlbuffer, offset, &roat.reward, 1, sizeof(double));
  offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_observation);
  offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_action);
	__RL_CHECK_STRUCT(&clientexp_observation)
	__RL_CHECK_STRUCT(&clientexp_action)

  roat.observation = &clientexp_observation;
  roat.action = &clientexp_action;

  return &roat;
}
예제 #2
0
const reward_observation_terminal_t *env_step(const action_t *theAction) {
  int envState = kEnvStep;
  static reward_observation_terminal_t ro = {0,0,0};
  unsigned int offset = 0;

  __RL_CHECK_STRUCT(theAction)
  rlBufferClear(&theBuffer);
  offset = 0;
  /* Send theAction to the client environment */
  offset = rlCopyADTToBuffer(theAction, &theBuffer, offset);
  rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState);

  rlBufferClear(&theBuffer);
  rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState);
  assert(envState == kEnvStep);

  /* Receive theObservation from the client environment */
  offset = 0;
  offset = rlBufferRead(&theBuffer, offset, &ro.terminal, 1, sizeof(int));
  offset = rlBufferRead(&theBuffer, offset, &ro.reward, 1, sizeof(double));

	if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0);
  offset = rlCopyBufferToADT(&theBuffer, offset, theObservation);
  __RL_CHECK_STRUCT(theObservation)

  ro.observation = theObservation;
  return &ro;
}
예제 #3
0
const reward_observation_terminal_t *env_step(const action_t *a)
{
	int terminal=0;
	stepCount++;
	clearRLStruct(o);
	    
        /*Short episode with big observations*/
        if(episodeCount%2==0){
			__RL_CHECK_STRUCT(o)
            set_k_ints_in_abstract_type(o, 50000);
			__RL_CHECK_STRUCT(o)
            set_k_doubles_in_abstract_type(o, 50000);
			__RL_CHECK_STRUCT(o)

            if(stepCount==200)terminal=1;
        }
예제 #4
0
const action_t *agent_step(const double reward, const observation_t *o) {
	__RL_CHECK_STRUCT(o);
	stepCount++;

	freeRLStructPointer(action);
	action=duplicateRLStructToPointer(o);
	__RL_CHECK_STRUCT(action)
	return action;
}
예제 #5
0
static void onAgentStart(int theConnection) {
	const action_t *theAction;
	unsigned int offset = 0;

	/* Read the data in the buffer (data from server) */
	offset = rlCopyBufferToADT(&theBuffer, offset, &clientagent_observation);
	__RL_CHECK_STRUCT(&clientagent_observation)

	/* Call RL method on the recv'd data */
	theAction = agent_start(&clientagent_observation);
	__RL_CHECK_STRUCT(theAction)

	/* Prepare the buffer for sending data back to the server */
	rlBufferClear(&theBuffer);
	offset = 0;
	offset = rlCopyADTToBuffer(theAction, &theBuffer, offset);

}
예제 #6
0
const observation_action_t *RL_start() {
  int experimentState = kRLStart;
  static observation_action_t oa = { 0,0};
  unsigned int offset = 0;

  assert(theExperimentConnection != 0);

  rlBufferClear(&clientexp_rlbuffer);
  rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

  rlBufferClear(&clientexp_rlbuffer);
  rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); 
  assert(experimentState == kRLStart);

  offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_observation);
  offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_action);
	__RL_CHECK_STRUCT(&clientexp_observation)
	__RL_CHECK_STRUCT(&clientexp_action)

  oa.observation = &clientexp_observation;
  oa.action = &clientexp_action;

  return &oa;
}
예제 #7
0
const observation_t *env_start() {
	int envState = kEnvStart;
	unsigned int offset = 0;

	rlBufferClear(&theBuffer);
	rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState);

	rlBufferClear(&theBuffer);
	rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState);
	assert(envState == kEnvStart);

	if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0);
	__RL_CHECK_STRUCT(theObservation)
	
	
	offset = rlCopyBufferToADT(&theBuffer, offset, theObservation);
	return theObservation;
}
예제 #8
0
/* Send the observation to the agent, receive the action and return it */
const action_t *agent_start(const observation_t *theObservation) {
	int agentState = kAgentStart;
	unsigned int offset = 0;

	__RL_CHECK_STRUCT(theObservation);
	rlBufferClear(&theBuffer);
	offset = 0;
	offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset);
	rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState);

	rlBufferClear(&theBuffer);
	rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState);
	assert(agentState == kAgentStart);
  
	offset = 0;

	if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0);
	offset = rlCopyBufferToADT(&theBuffer, offset, globalAction);

	return globalAction;
}