const reward_observation_terminal_t *env_step(const action_t *theAction) { int envState = kEnvStep; static reward_observation_terminal_t ro = {0,0,0}; unsigned int offset = 0; __RL_CHECK_STRUCT(theAction) rlBufferClear(&theBuffer); offset = 0; /* Send theAction to the client environment */ offset = rlCopyADTToBuffer(theAction, &theBuffer, offset); rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvStep); /* Receive theObservation from the client environment */ offset = 0; offset = rlBufferRead(&theBuffer, offset, &ro.terminal, 1, sizeof(int)); offset = rlBufferRead(&theBuffer, offset, &ro.reward, 1, sizeof(double)); if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, theObservation); __RL_CHECK_STRUCT(theObservation) ro.observation = theObservation; return &ro; }
const char* env_init() { /* Setup the connection */ int envState = kEnvInit; unsigned int theTaskSpecLength = 0; unsigned int offset = 0; if (theBuffer.capacity == 0){ rlBufferCreate(&theBuffer, 65536); } /* env init-specific data */ rlBufferClear(&theBuffer); rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvInit); offset = 0; offset = rlBufferRead(&theBuffer, offset, &theTaskSpecLength, 1, sizeof(int)); if (theTaskSpecLength > 0) { if (theTaskSpec != 0) { free(theTaskSpec); theTaskSpec = 0; } /*Read the task spec off the wire and then add \0 at the end, to be sure? */ /*Are we actually stripping the \0 before we send it or is this just for good measure */ theTaskSpec = (char*)calloc(theTaskSpecLength+1, sizeof(char)); offset = rlBufferRead(&theBuffer, offset, theTaskSpec, theTaskSpecLength, sizeof(char)); theTaskSpec[theTaskSpecLength] = '\0'; } return theTaskSpec; }
void env_cleanup() { int envState = kEnvCleanup; rlBufferClear(&theBuffer); rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvCleanup); rlBufferDestroy(&theBuffer); freeRLStructPointer(theObservation); theObservation=0; if (theTaskSpec != 0) { free(theTaskSpec); theTaskSpec = 0; } if (theOutMessage != 0) { free(theOutMessage); theOutMessage = 0; } }
/* Send the task spec to the agent */ void agent_init(const char * theTaskSpec) { int agentState = kAgentInit; unsigned int theTaskSpecLength = 0; unsigned int offset = 0; if (theTaskSpec != NULL) theTaskSpecLength = strlen(theTaskSpec); if (theBuffer.capacity == 0) rlBufferCreate(&theBuffer, 65536); /* send across agent_init specific data */ rlBufferClear(&theBuffer); offset = 0; /* Strings are always preceeded by their length, and do not include their null terminating character */ offset = rlBufferWrite(&theBuffer, offset, &theTaskSpecLength, 1, sizeof(int)); if (theTaskSpecLength > 0) { offset = rlBufferWrite(&theBuffer, offset, theTaskSpec, theTaskSpecLength, sizeof(char)); } rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); /* Receive the receipt from the Client, to ensure that AgentInit has been completed */ rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentInit); }
const reward_observation_action_terminal_t* RL_step() { int experimentState = kRLStep; static reward_observation_action_terminal_t roat = {0, 0,0, 0}; unsigned int offset = 0; assert(theExperimentConnection != 0); rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); /* Recv Data from Server */ rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLStep); offset = rlBufferRead(&clientexp_rlbuffer, offset, &roat.terminal, 1, sizeof(int)); offset = rlBufferRead(&clientexp_rlbuffer, offset, &roat.reward, 1, sizeof(double)); offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_observation); offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_action); __RL_CHECK_STRUCT(&clientexp_observation) __RL_CHECK_STRUCT(&clientexp_action) roat.observation = &clientexp_observation; roat.action = &clientexp_action; return &roat; }
/* Send the final reward to the agent */ void agent_end(const double theReward) { int agentState = kAgentEnd; unsigned int offset = 0; rlBufferClear(&theBuffer); /*offset = rlBufferWrite(&theBuffer, offset, &agentState, 1, sizeof(int));*/ /* Removed, shouldn't have been sent. */ offset = rlBufferWrite(&theBuffer, offset, &theReward, 1, sizeof(double)); rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentEnd); }
int main(int argc, char** argv) { int theConnection = 0; const char *usage = "The following environment variables are used by the agent to control its function:\n" "RLGLUE_HOST : If set the agent will use this ip or hostname to connect to rather than %s\n" "RLGLUE_PORT : If set the agent will use this port to connect on rather than %d\n"; struct hostent *host_ent; char* host = kLocalHost; short port = kDefaultPort; char* envptr = 0; if (argc > 1) { fprintf(stderr, usage, kLocalHost, kDefaultPort); exit(1); } host = getenv("RLGLUE_HOST"); if (host == 0) { host = kLocalHost; } envptr = getenv("RLGLUE_PORT"); if (envptr != 0) { port = strtol(envptr, 0, 10); if (port == 0) { port = kDefaultPort; } } if (isalpha(host[0])) { /*This method is apparently deprecated, we should update at some point*/ host_ent = gethostbyname(host); if(host_ent==0){ fprintf(stderr,"Couldn't find IP address for host: %s\n",host); exit(55); } host = inet_ntoa(*(struct in_addr*)host_ent->h_addr_list[0]); } fprintf(stdout, "RL-Glue C Agent Codec Version %s, Build %s\n\tConnecting to host=%s on port=%d...\n", VERSION,__rlglue_get_codec_svn_version(),host, port); fflush(stdout); /* Allocate what should be plenty of space for the buffer - it will dynamically resize if it is too small */ rlBufferCreate(&theBuffer, 4096); theConnection = rlWaitForConnection(host, port, kRetryTimeout); fprintf(stdout, "\tRL-Glue C Agent Codec :: Connected\n"); rlBufferClear(&theBuffer); rlSendBufferData(theConnection, &theBuffer, kAgentConnection); runAgentEventLoop(theConnection); rlClose(theConnection); rlBufferDestroy(&theBuffer); return 0; }
int RL_num_episodes() { int experimentState = kRLNumEpisodes; int numEpisodes = 0; unsigned int offset = 0; assert(theExperimentConnection != 0); rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLNumEpisodes); offset = rlBufferRead(&clientexp_rlbuffer, offset, &numEpisodes, 1, sizeof(int)); return numEpisodes; }
double RL_return() { int experimentState = kRLReturn; double theReward = 0; unsigned int offset = 0; assert(theExperimentConnection != 0); rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLReturn); offset = rlBufferRead(&clientexp_rlbuffer, offset, &theReward, 1, sizeof(double)); return theReward; }
const observation_t *env_start() { int envState = kEnvStart; unsigned int offset = 0; rlBufferClear(&theBuffer); rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvStart); if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0); __RL_CHECK_STRUCT(theObservation) offset = rlCopyBufferToADT(&theBuffer, offset, theObservation); return theObservation; }
const char* RL_env_message(const char *message) { int experimentState = kRLEnvMessage; unsigned int messageLength = 0; unsigned int offset = 0; if (message != 0){ messageLength = strlen(message); } forceConnection(); rlBufferClear(&clientexp_rlbuffer); offset = 0; offset = rlBufferWrite(&clientexp_rlbuffer, offset, &messageLength, 1, sizeof(int)); if (messageLength > 0) { offset = rlBufferWrite(&clientexp_rlbuffer, offset, message, messageLength, sizeof(char)); } rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLEnvMessage); offset = 0; offset = rlBufferRead(&clientexp_rlbuffer, offset, &messageLength, 1, sizeof(int)); /* Sept 12 2008 made this >= instead of > so that we'd at least have size 1 */ if (messageLength >= clientexp_messagecapacity) { if(clientexp_message!=0){ free(clientexp_message); clientexp_message=0; } clientexp_message = (char*)calloc(messageLength+1, sizeof(char)); clientexp_messagecapacity = messageLength; } if (messageLength > 0) { offset = rlBufferRead(&clientexp_rlbuffer, offset, clientexp_message, messageLength, sizeof(char)); } /* Sept 12 2008 moved this out of the if statement so we actually null terminate at the right place if we get a "" message */ clientexp_message[messageLength] = '\0'; return clientexp_message; }
const char* env_message(const char* inMessage) { int envState = kEnvMessage; unsigned int theInMessageLength = 0; unsigned int theOutMessageLength = 0; unsigned int offset = 0; if (inMessage != NULL) { theInMessageLength = strlen(inMessage); } if (theBuffer.capacity == 0) rlBufferCreate(&theBuffer, 65356); rlBufferClear(&theBuffer); offset = 0; offset = rlBufferWrite(&theBuffer, offset, &theInMessageLength, 1, sizeof(int)); if (theInMessageLength > 0) { offset = rlBufferWrite(&theBuffer, offset, inMessage, theInMessageLength, sizeof(char)); } rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvMessage); offset = 0; offset = rlBufferRead(&theBuffer, offset, &theOutMessageLength, 1, sizeof(int)); /*Free and point the old message to null */ if (theOutMessage != 0) { free(theOutMessage); theOutMessage = 0; } /* Allocated memory for the new message, maybe just 1 byte for the terminator */ theOutMessage = (char*)calloc(theOutMessageLength+1, sizeof(char)); /* Fill up the string from the buffer */ if (theOutMessageLength > 0) { offset = rlBufferRead(&theBuffer, offset, theOutMessage, theOutMessageLength, sizeof(char)); } /* Set the terminator */ theOutMessage[theOutMessageLength] = '\0'; return theOutMessage; }
int RL_episode(unsigned int numSteps) { int terminal=0; unsigned int offset = 0; int experimentState = kRLEpisode; assert(theExperimentConnection != 0); rlBufferClear(&clientexp_rlbuffer); offset = 0; offset = rlBufferWrite(&clientexp_rlbuffer, offset, &numSteps, 1, sizeof(int)); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); rlBufferClear(&clientexp_rlbuffer); /*Brian Sept 8 2008 :: Not really sure if I should be resetting offset to 0 here. Seems to work as is*/ offset=0; rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); offset = rlBufferRead(&clientexp_rlbuffer, offset, &terminal, 1, sizeof(int)); assert(experimentState == kRLEpisode); return terminal; }
/* Tell the agent that we're cleaning up */ void agent_cleanup() { int agentState = kAgentCleanup; rlBufferClear(&theBuffer); rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentCleanup); rlBufferDestroy(&theBuffer); freeRLStructPointer(globalAction); globalAction=0; if (theOutMessage != 0) { free(theOutMessage); theOutMessage = 0; } }
/* Send the observation to the agent, receive the action and return it */ const action_t *agent_start(const observation_t *theObservation) { int agentState = kAgentStart; unsigned int offset = 0; __RL_CHECK_STRUCT(theObservation); rlBufferClear(&theBuffer); offset = 0; offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset); rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentStart); offset = 0; if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, globalAction); return globalAction; }
static void onAgentEnd(int theConnection) { double theReward = 0; /* Read the data in the buffer (data from server) */ rlBufferRead(&theBuffer, 0, &theReward, 1, sizeof(double)); /* Call RL method on the recv'd data */ agent_end(theReward); /* Prepare the buffer for sending data back to the server */ rlBufferClear(&theBuffer); }
void RL_cleanup() { int experimentState = kRLCleanup; assert(theExperimentConnection != 0); rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLCleanup); clearRLStruct(&clientexp_observation); clearRLStruct(&clientexp_action); /*safe even if it is null */ free(clientexp_message); clientexp_message = 0; clientexp_messagecapacity = 0; }
/* Send the reward and the observation to the agent, receive the action and return it */ const action_t *agent_step(const double theReward, const observation_t *theObservation) { int agentState = kAgentStep; unsigned int offset = 0; rlBufferClear(&theBuffer); offset = 0; offset = rlBufferWrite(&theBuffer, offset, &theReward, 1, sizeof(double)); offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset); rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentStep); offset = 0; if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, globalAction); return globalAction; }
const char* RL_init() { unsigned int offset=0; unsigned int messageLength=0; int experimentState = kRLInit; forceConnection(); /* Remote call RL_init */ rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); /* Recv back a reply from RL_init */ rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLInit); /* Brian added Sept 8 so that RL_init returns the task spec */ /* We'll reuse messageLength and clientexp_message from Agent_message*/ offset = rlBufferRead(&clientexp_rlbuffer, offset, &messageLength, 1, sizeof(unsigned int)); if (messageLength >= clientexp_messagecapacity) { if(clientexp_message!=0){ free(clientexp_message); clientexp_message=0; } clientexp_message = (char*)calloc(messageLength+1, sizeof(char)); clientexp_messagecapacity = messageLength; } if (messageLength > 0) { offset = rlBufferRead(&clientexp_rlbuffer, offset, clientexp_message, messageLength, sizeof(char)); } /*Need to move this outside of the if statement, so that we get null termination for empty messages*/ clientexp_message[messageLength] = '\0'; return clientexp_message; }
const observation_action_t *RL_start() { int experimentState = kRLStart; static observation_action_t oa = { 0,0}; unsigned int offset = 0; assert(theExperimentConnection != 0); rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState); rlBufferClear(&clientexp_rlbuffer); rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState); assert(experimentState == kRLStart); offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_observation); offset = rlCopyBufferToADT(&clientexp_rlbuffer, offset, &clientexp_action); __RL_CHECK_STRUCT(&clientexp_observation) __RL_CHECK_STRUCT(&clientexp_action) oa.observation = &clientexp_observation; oa.action = &clientexp_action; return &oa; }
static void onAgentStart(int theConnection) { const action_t *theAction; unsigned int offset = 0; /* Read the data in the buffer (data from server) */ offset = rlCopyBufferToADT(&theBuffer, offset, &clientagent_observation); __RL_CHECK_STRUCT(&clientagent_observation) /* Call RL method on the recv'd data */ theAction = agent_start(&clientagent_observation); __RL_CHECK_STRUCT(theAction) /* Prepare the buffer for sending data back to the server */ rlBufferClear(&theBuffer); offset = 0; offset = rlCopyADTToBuffer(theAction, &theBuffer, offset); }
static void runAgentEventLoop(int theConnection) { int agentState = 0; do { rlBufferClear(&theBuffer); rlRecvBufferData(theConnection, &theBuffer, &agentState); switch(agentState) { case kAgentInit: onAgentInit(theConnection); break; case kAgentStart: onAgentStart(theConnection); break; case kAgentStep: onAgentStep(theConnection); break; case kAgentEnd: onAgentEnd(theConnection); break; case kAgentCleanup: onAgentCleanup(theConnection); break; case kAgentMessage: onAgentMessage(theConnection); break; case kRLTerm: break; default: fprintf(stderr, kUnknownMessage, agentState); exit(0); break; }; rlSendBufferData(theConnection, &theBuffer, agentState); } while (agentState != kRLTerm); }
static void forceConnection() { struct hostent *host_ent; char* host = kLocalHost; short port = kDefaultPort; char* envptr = 0; if (theExperimentConnection == 0) { host = getenv("RLGLUE_HOST"); if (host == 0) { host = kLocalHost; } envptr = getenv("RLGLUE_PORT"); if (envptr != 0) { port = strtol(envptr, 0, 10); if (port == 0) { port = kDefaultPort; } } if (isalpha(host[0])) { /*This method is apparently deprecated, we should update at some point*/ host_ent = gethostbyname(host); if(host_ent==0){ fprintf(stderr,"Couldn't find IP address for host: %s\n",host); exit(55); } host = inet_ntoa(*(struct in_addr*)host_ent->h_addr_list[0]); } fprintf(stdout, "RL-Glue C Experiment Codec Version %s, Build %s\n\tConnecting to host=%s on port=%d...\n", VERSION,__rlglue_get_codec_svn_version(),host, port); fflush(stdout); theExperimentConnection = rlWaitForConnection(host, port, kRetryTimeout); fprintf(stdout, "\tRL-Glue C Experiment Codec :: Connected\n"); /* Send the connection type */ atexit(cleanupExperimentAtExit); rlBufferCreate(&clientexp_rlbuffer, 65536); rlBufferClear(&clientexp_rlbuffer); rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, kExperimentConnection); } }
static void onAgentCleanup(int theConnection) { /* Read the data in the buffer (data from server) */ /* No data sent for agent cleanup */ /* Call RL method on the recv'd data */ agent_cleanup(); /* Prepare the buffer for sending data back to the server */ rlBufferClear(&theBuffer); /* Cleanup our resources */ clearRLStruct(&clientagent_observation); free(theTaskSpec); free(clientagent_inmessage); theTaskSpec = 0; clientagent_inmessage = 0; clientagent_inmessagecapacity = 0; }
static void onAgentInit(int theConnection) { unsigned int theTaskSpecLength = 0; unsigned int offset = 0; /* Read the data in the buffer (data from server) */ offset = rlBufferRead(&theBuffer, offset, &theTaskSpecLength, 1, sizeof(int)); if (theTaskSpecLength > 0) { theTaskSpec = (char*)calloc(theTaskSpecLength+1, sizeof(char)); offset = rlBufferRead(&theBuffer, offset, theTaskSpec, theTaskSpecLength, sizeof(char)); /*Make sure the string is null terminated */ theTaskSpec[theTaskSpecLength]='\0'; } /* Call RL method on the recv'd data */ agent_init(theTaskSpec); /* Prepare the buffer for sending data back to the server */ rlBufferClear(&theBuffer); }
static void onAgentMessage(int theConnection) { unsigned int inMessageLength = 0; unsigned int outMessageLength = 0; char* inMessage = 0; const char* outMessage = 0; unsigned int offset = 0; /* Read the data in the buffer (data from server) */ offset = 0; offset = rlBufferRead(&theBuffer, offset, &inMessageLength, 1, sizeof(int)); if (inMessageLength >= clientagent_inmessagecapacity) { inMessage = (char*)calloc(inMessageLength+1, sizeof(char)); free(clientagent_inmessage); clientagent_inmessage = inMessage; clientagent_inmessagecapacity = inMessageLength; } if (inMessageLength > 0) { offset = rlBufferRead(&theBuffer, offset, clientagent_inmessage, inMessageLength, sizeof(char)); } /*Make sure to null terminate the string */ clientagent_inmessage[inMessageLength]='\0'; /* Call RL method on the recv'd data */ outMessage = agent_message(clientagent_inmessage); if (outMessage != NULL) { outMessageLength = strlen(outMessage); } /* Prepare the buffer for sending data back to the server */ /* we want to start sending, so we're going to reset the offset to 0 so we write to the beginning of the buffer */ rlBufferClear(&theBuffer); offset = 0; offset = rlBufferWrite(&theBuffer, offset, &outMessageLength, 1, sizeof(int)); if (outMessageLength > 0) { offset = rlBufferWrite(&theBuffer, offset, outMessage, outMessageLength, sizeof(char)); } }