const reward_observation_terminal_t *env_step(const action_t *theAction) {
  int envState = kEnvStep;
  static reward_observation_terminal_t ro = {0,0,0};
  unsigned int offset = 0;

  __RL_CHECK_STRUCT(theAction)
  rlBufferClear(&theBuffer);
  offset = 0;
  /* Send theAction to the client environment */
  offset = rlCopyADTToBuffer(theAction, &theBuffer, offset);
  rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState);

  rlBufferClear(&theBuffer);
  rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState);
  assert(envState == kEnvStep);

  /* Receive theObservation from the client environment */
  offset = 0;
  offset = rlBufferRead(&theBuffer, offset, &ro.terminal, 1, sizeof(int));
  offset = rlBufferRead(&theBuffer, offset, &ro.reward, 1, sizeof(double));

	if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0);
  offset = rlCopyBufferToADT(&theBuffer, offset, theObservation);
  __RL_CHECK_STRUCT(theObservation)

  ro.observation = theObservation;
  return &ro;
}
const char* env_init() {
	/* Setup the connection */
	int envState = kEnvInit;
	unsigned int theTaskSpecLength = 0;
	unsigned int offset = 0;

	if (theBuffer.capacity == 0){
		rlBufferCreate(&theBuffer, 65536);
	}

	/* env init-specific data */
	rlBufferClear(&theBuffer);
	rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState);

	rlBufferClear(&theBuffer);
	rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState);
	assert(envState == kEnvInit);

	offset = 0;
	offset = rlBufferRead(&theBuffer, offset, &theTaskSpecLength, 1, sizeof(int));  
	if (theTaskSpecLength > 0) {
		if (theTaskSpec != 0) {
			free(theTaskSpec);
			theTaskSpec = 0;
		}

		/*Read the task spec off the wire and then add \0 at the end, to be sure? */
		/*Are we actually stripping the \0 before we send it or is this just for good measure */
		theTaskSpec = (char*)calloc(theTaskSpecLength+1, sizeof(char));
		offset = rlBufferRead(&theBuffer, offset, theTaskSpec, theTaskSpecLength, sizeof(char));
		theTaskSpec[theTaskSpecLength] = '\0';
	}

	return theTaskSpec;
	}
void env_cleanup() {
	int envState = kEnvCleanup;

	rlBufferClear(&theBuffer);
	rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState);

	rlBufferClear(&theBuffer);
	rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState);
	assert(envState == kEnvCleanup);

	rlBufferDestroy(&theBuffer);

	freeRLStructPointer(theObservation);
	theObservation=0;
	
	if (theTaskSpec != 0) {
		free(theTaskSpec);
		theTaskSpec = 0;
	}

	if (theOutMessage != 0) {
		free(theOutMessage);
		theOutMessage = 0;
	}
}
Beispiel #4
0
/* Send the task spec to the agent */
void agent_init(const char * theTaskSpec) {
  int agentState = kAgentInit;
  unsigned int theTaskSpecLength = 0;
  unsigned int offset = 0;
  
  if (theTaskSpec != NULL)
    theTaskSpecLength = strlen(theTaskSpec);

  if (theBuffer.capacity == 0)
    rlBufferCreate(&theBuffer, 65536);

  /* send across agent_init specific data */
  rlBufferClear(&theBuffer);
  offset = 0;

  /* Strings are always preceeded by their length, and do not include their null terminating character */
  offset = rlBufferWrite(&theBuffer, offset, &theTaskSpecLength, 1, sizeof(int));
  if (theTaskSpecLength > 0) {
    offset = rlBufferWrite(&theBuffer, offset, theTaskSpec, theTaskSpecLength, sizeof(char));
  }
  rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState);

  /* Receive the receipt from the Client, to ensure that AgentInit has been completed */
  rlBufferClear(&theBuffer);
  rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState);
  assert(agentState == kAgentInit);
}
const reward_observation_action_terminal_t* RL_step() {
  int experimentState = kRLStep;
  static reward_observation_action_terminal_t roat = {0, 0,0, 0};
  unsigned int offset = 0;
  
  assert(theExperimentConnection != 0);

  rlBufferClear(&clientexp_rlbuffer);
  rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

  /* Recv Data from Server */
  rlBufferClear(&clientexp_rlbuffer);
  rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState);
  assert(experimentState == kRLStep);

  offset = rlBufferRead(&clientexp_rlbuffer, offset, &roat.terminal, 1, sizeof(int));
  offset = rlBufferRead(&clientexp_rlbuffer, offset, &roat.reward, 1, sizeof(double));
  offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_observation);
  offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_action);
	__RL_CHECK_STRUCT(&clientexp_observation)
	__RL_CHECK_STRUCT(&clientexp_action)

  roat.observation = &clientexp_observation;
  roat.action = &clientexp_action;

  return &roat;
}
Beispiel #6
0
/* Send the final reward to the agent */
void agent_end(const double theReward) { 
  int agentState = kAgentEnd;
  unsigned int offset = 0;

  rlBufferClear(&theBuffer);
  /*offset = rlBufferWrite(&theBuffer, offset, &agentState, 1, sizeof(int));*/ /* Removed, shouldn't have been sent. */
  offset = rlBufferWrite(&theBuffer, offset, &theReward, 1, sizeof(double));
  rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState);

  rlBufferClear(&theBuffer);
  rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState);
  assert(agentState == kAgentEnd);
}
double RL_return() {
  int experimentState = kRLReturn;
  double theReward = 0;
  unsigned int offset = 0;

  assert(theExperimentConnection != 0);

  rlBufferClear(&clientexp_rlbuffer);
  rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

  rlBufferClear(&clientexp_rlbuffer);
  rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState);
  assert(experimentState == kRLReturn);

  offset = rlBufferRead(&clientexp_rlbuffer, offset, &theReward, 1, sizeof(double));

  return theReward;
}
int RL_num_episodes() {
	int experimentState = kRLNumEpisodes;
	int numEpisodes = 0;
	unsigned int offset = 0;

	assert(theExperimentConnection != 0);

	rlBufferClear(&clientexp_rlbuffer);
	rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

	rlBufferClear(&clientexp_rlbuffer);
	rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState);
	assert(experimentState == kRLNumEpisodes);

	offset = rlBufferRead(&clientexp_rlbuffer, offset, &numEpisodes, 1, sizeof(int));

	return numEpisodes;
}
const observation_t *env_start() {
	int envState = kEnvStart;
	unsigned int offset = 0;

	rlBufferClear(&theBuffer);
	rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState);

	rlBufferClear(&theBuffer);
	rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState);
	assert(envState == kEnvStart);

	if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0);
	__RL_CHECK_STRUCT(theObservation)
	
	
	offset = rlCopyBufferToADT(&theBuffer, offset, theObservation);
	return theObservation;
}
static void runAgentEventLoop(int theConnection) {
  int agentState = 0;

  do {
    rlBufferClear(&theBuffer);
    rlRecvBufferData(theConnection, &theBuffer, &agentState);

    switch(agentState) {
    case kAgentInit:
      onAgentInit(theConnection);
      break;

    case kAgentStart:
      onAgentStart(theConnection);
      break;

    case kAgentStep:
      onAgentStep(theConnection);
      break;

    case kAgentEnd:
      onAgentEnd(theConnection);
      break;

    case kAgentCleanup:
      onAgentCleanup(theConnection);
      break;

    case kAgentMessage:
      onAgentMessage(theConnection);
      break;

    case kRLTerm:
      break;
    
    default:
      fprintf(stderr, kUnknownMessage, agentState);
      exit(0);
      break;
    };

    rlSendBufferData(theConnection, &theBuffer, agentState);
  } while (agentState != kRLTerm);
}
const char* RL_env_message(const char *message) {
	int experimentState = kRLEnvMessage;
	unsigned int messageLength = 0;
	unsigned int offset = 0;

	if (message != 0){
		messageLength = strlen(message);
	}
	forceConnection();

	rlBufferClear(&clientexp_rlbuffer);
	offset = 0;
	offset = rlBufferWrite(&clientexp_rlbuffer, offset, &messageLength, 1, sizeof(int));
	if (messageLength > 0) {
		offset = rlBufferWrite(&clientexp_rlbuffer, offset, message, messageLength, sizeof(char));
	}
	rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

	rlBufferClear(&clientexp_rlbuffer);
	rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState);
	assert(experimentState == kRLEnvMessage);

	offset = 0;
	offset = rlBufferRead(&clientexp_rlbuffer, offset, &messageLength, 1, sizeof(int));

	/* Sept 12 2008 made this >= instead of > so that we'd at least have size 1 */
	if (messageLength >= clientexp_messagecapacity) {
		if(clientexp_message!=0){
			free(clientexp_message);
			clientexp_message=0;
		}	
		clientexp_message = (char*)calloc(messageLength+1, sizeof(char));
		clientexp_messagecapacity = messageLength;
	}

	if (messageLength > 0) {
		offset = rlBufferRead(&clientexp_rlbuffer, offset, clientexp_message, messageLength, sizeof(char));
	}
	/* Sept 12 2008 moved this out of the if statement so we actually null terminate at the right place if we get a "" message */
	clientexp_message[messageLength] = '\0';

	return clientexp_message;
}
const char* env_message(const char* inMessage) {
  int envState = kEnvMessage;
  unsigned int theInMessageLength = 0;
  unsigned int theOutMessageLength = 0;
  unsigned int offset = 0;

  if (inMessage != NULL) {
    theInMessageLength = strlen(inMessage);
  }

  if (theBuffer.capacity == 0)
    rlBufferCreate(&theBuffer, 65356);

  rlBufferClear(&theBuffer);
  offset = 0;
  offset = rlBufferWrite(&theBuffer, offset, &theInMessageLength, 1, sizeof(int));
  if (theInMessageLength > 0) {
    offset = rlBufferWrite(&theBuffer, offset, inMessage, theInMessageLength, sizeof(char));
  }
  rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState);

  rlBufferClear(&theBuffer);
  rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState);
  assert(envState == kEnvMessage);

  offset = 0;
  offset = rlBufferRead(&theBuffer, offset, &theOutMessageLength, 1, sizeof(int));
/*Free and point the old message to null */
    if (theOutMessage != 0) {
      free(theOutMessage);
      theOutMessage = 0;
    }
/* Allocated memory for the new message, maybe just 1 byte for the terminator */
    theOutMessage = (char*)calloc(theOutMessageLength+1, sizeof(char));

/* Fill up the string from the buffer */
if (theOutMessageLength > 0) {
    offset = rlBufferRead(&theBuffer, offset, theOutMessage, theOutMessageLength, sizeof(char));
  }
/* Set the terminator */
    theOutMessage[theOutMessageLength] = '\0';
  return theOutMessage;
}
Beispiel #13
0
/* Tell the agent that we're cleaning up */
void agent_cleanup() {
	int agentState = kAgentCleanup;

	rlBufferClear(&theBuffer);
	rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState);

	rlBufferClear(&theBuffer);
	rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState);
	assert(agentState == kAgentCleanup);

        rlBufferDestroy(&theBuffer);
	
	freeRLStructPointer(globalAction);
	globalAction=0;

	if (theOutMessage != 0) {
	  free(theOutMessage);
	  theOutMessage = 0;
	}
}
int RL_episode(unsigned int numSteps) {
	int terminal=0;
	unsigned int offset = 0;
	int experimentState = kRLEpisode;

	assert(theExperimentConnection != 0);

	rlBufferClear(&clientexp_rlbuffer);
	offset = 0;
	offset = rlBufferWrite(&clientexp_rlbuffer, offset, &numSteps, 1, sizeof(int));
	rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

	rlBufferClear(&clientexp_rlbuffer);
	/*Brian Sept 8 2008 :: Not really sure if I should be resetting offset to 0 here.  Seems to work as is*/
	offset=0;
	rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState);
	offset = rlBufferRead(&clientexp_rlbuffer, offset, &terminal, 1, sizeof(int));
	assert(experimentState == kRLEpisode);
	return terminal;
}
Beispiel #15
0
/* Send the reward and the observation to the agent, receive the action and return it */
const action_t *agent_step(const double theReward, const observation_t *theObservation) {
  int agentState = kAgentStep;
  unsigned int offset = 0;

  rlBufferClear(&theBuffer);
  offset = 0;
  offset = rlBufferWrite(&theBuffer, offset, &theReward, 1, sizeof(double));
  offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset);
  rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState);

  rlBufferClear(&theBuffer);
  rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState);

  assert(agentState == kAgentStep);

  offset = 0;
	if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0);
  offset = rlCopyBufferToADT(&theBuffer, offset, globalAction);

  return globalAction;
}
Beispiel #16
0
/* Send the observation to the agent, receive the action and return it */
const action_t *agent_start(const observation_t *theObservation) {
	int agentState = kAgentStart;
	unsigned int offset = 0;

	__RL_CHECK_STRUCT(theObservation);
	rlBufferClear(&theBuffer);
	offset = 0;
	offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset);
	rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState);

	rlBufferClear(&theBuffer);
	rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState);
	assert(agentState == kAgentStart);
  
	offset = 0;

	if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0);
	offset = rlCopyBufferToADT(&theBuffer, offset, globalAction);

	return globalAction;
}
void RL_cleanup() {
	int experimentState = kRLCleanup;

	assert(theExperimentConnection != 0);

	rlBufferClear(&clientexp_rlbuffer);
	rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

	rlBufferClear(&clientexp_rlbuffer);
	rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState);
	assert(experimentState == kRLCleanup);

	clearRLStruct(&clientexp_observation);
	clearRLStruct(&clientexp_action);

	/*safe even if it is null */
	free(clientexp_message);
	clientexp_message = 0;

	clientexp_messagecapacity = 0;
}
const char* RL_init() {
  unsigned int offset=0;
  unsigned int messageLength=0;
  int experimentState = kRLInit;

  forceConnection();

  /* Remote call RL_init */
  rlBufferClear(&clientexp_rlbuffer);
  rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

  /* Recv back a reply from RL_init */
  rlBufferClear(&clientexp_rlbuffer);
  rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState);
  assert(experimentState == kRLInit);

 /* Brian added Sept 8 so that RL_init returns the task spec */
 /* We'll reuse messageLength and clientexp_message from Agent_message*/
  offset = rlBufferRead(&clientexp_rlbuffer, offset, &messageLength, 1, sizeof(unsigned int));
  if (messageLength >= clientexp_messagecapacity) {
	if(clientexp_message!=0){
    	free(clientexp_message);
		clientexp_message=0;
	}	

    clientexp_message = (char*)calloc(messageLength+1, sizeof(char));
    clientexp_messagecapacity = messageLength;
  }

  if (messageLength > 0) {
    offset = rlBufferRead(&clientexp_rlbuffer, offset, clientexp_message, messageLength, sizeof(char));
  }
  /*Need to move this outside of the if statement, so that we get null termination for empty messages*/
  clientexp_message[messageLength] = '\0';

  return clientexp_message;
}
const observation_action_t *RL_start() {
  int experimentState = kRLStart;
  static observation_action_t oa = { 0,0};
  unsigned int offset = 0;

  assert(theExperimentConnection != 0);

  rlBufferClear(&clientexp_rlbuffer);
  rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

  rlBufferClear(&clientexp_rlbuffer);
  rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); 
  assert(experimentState == kRLStart);

  offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_observation);
  offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_action);
	__RL_CHECK_STRUCT(&clientexp_observation)
	__RL_CHECK_STRUCT(&clientexp_action)

  oa.observation = &clientexp_observation;
  oa.action = &clientexp_action;

  return &oa;
}