void agent_init(const char * task_spec){ emptyAction=allocateRLStructPointer(0,0,0); nonEmptyAction=allocateRLStructPointer(0,0,0); set_k_ints_in_abstract_type(nonEmptyAction,7); set_k_doubles_in_abstract_type(nonEmptyAction,3); set_k_chars_in_abstract_type(nonEmptyAction,1); whichEpisode=0; }
const char* env_init() { env_whichEpisode=0; emptyObservation=allocateRLStructPointer(0,0,0); nonEmptyObservation=allocateRLStructPointer(0,0,0); set_k_ints_in_abstract_type(nonEmptyObservation,2); set_k_doubles_in_abstract_type(nonEmptyObservation,4); set_k_chars_in_abstract_type(nonEmptyObservation,5); return ""; }
const reward_observation_terminal_t *env_step(const action_t *theAction) { int envState = kEnvStep; static reward_observation_terminal_t ro = {0,0,0}; unsigned int offset = 0; __RL_CHECK_STRUCT(theAction) rlBufferClear(&theBuffer); offset = 0; /* Send theAction to the client environment */ offset = rlCopyADTToBuffer(theAction, &theBuffer, offset); rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvStep); /* Receive theObservation from the client environment */ offset = 0; offset = rlBufferRead(&theBuffer, offset, &ro.terminal, 1, sizeof(int)); offset = rlBufferRead(&theBuffer, offset, &ro.reward, 1, sizeof(double)); if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, theObservation); __RL_CHECK_STRUCT(theObservation) ro.observation = theObservation; return &ro; }
/** * Pass off the call to env_step */ JNIEXPORT void JNICALL Java_org_rlcommunity_rlviz_environmentshell_JNIEnvironment_JNIenvstep(JNIEnv *env, jobject obj, jintArray intArray, jdoubleArray doubleArray, jcharArray charArray) { //create a new action to pass in from the 4 parameters. This is needed because the actual Java object cannot be passed in, //so the data from the object is passed in, then put into the C equivalent of an action jsize numInts,numDoubles,numChars=0; numInts = env->GetArrayLength(intArray); numDoubles = env->GetArrayLength(doubleArray); numChars = env->GetArrayLength(charArray); action_t* theAction = allocateRLStructPointer(numInts, numDoubles, numChars); // action_t a; // a.numInts = numInts; // a.intArray = (int*) malloc(sizeof (int) * a.numInts); // a.numDoubles = numDoubles; // a.doubleArray = (double*) malloc(sizeof (double) * a.numDoubles); // a.numChars = numChars; // a.charArray = (char*) malloc(sizeof (char) * a.numChars); env->GetIntArrayRegion(intArray, 0, numInts, (jint*) theAction->intArray); env->GetDoubleArrayRegion(doubleArray, 0, numDoubles, (jdouble*) theAction->doubleArray); env->GetCharArrayRegion(charArray, 0, numChars, (jchar*) theAction->charArray); // get the return from env_step and parse it into a form that java can check. rewardObs = envFuncPointers.env_step(theAction); freeRLStructPointer(theAction); sharedReturnVariable = (observation_t *) rewardObs->observation; }
void agent_init(const char* task_spec) { /*Struct to hold the parsed task spec*/ taskspec_t *ts=(taskspec_t*)malloc(sizeof(taskspec_t)); int decode_result = decode_taskspec( ts, task_spec ); if(decode_result!=0){ printf("Could not decode task spec, code: %d for task spec: %s\n",decode_result,task_spec); exit(1); } this_action=allocateRLStructPointer(getNumIntAct(ts),getNumDoubleAct(ts),0); }
void agent_init(const char* task_spec) { /*Struct to hold the parsed task spec*/ taskspec_t *ts=(taskspec_t*)malloc(sizeof(taskspec_t)); int decode_result = decode_taskspec( ts, task_spec ); if(decode_result!=0){ printf("Could not decode task spec, code: %d for task spec: %s\n",decode_result,task_spec); exit(1); } /* Lots of assertions to make sure that we can handle this problem. */ assert(getNumIntObs(ts)==1); assert(getNumDoubleObs(ts)==0); assert(isIntObsMax_special(ts,0)==0); assert(isIntObsMin_special(ts,0)==0); numStates=getIntObsMax(ts,0)+1; assert(getNumIntAct(ts)==1); assert(getNumDoubleAct(ts)==0); assert(isIntActMax_special(ts,0)==0); assert(isIntActMin_special(ts,0)==0); numActions=getIntActMax(ts,0)+1; free_taskspec_struct(ts); /* Make the taskspec struct a "blank slate" */ free(ts); /* Free the structure itself */ /*Here is where you might allocate storage for parameters (value function or policy, last action, last observation, etc)*/ /*Here you would parse the task spec if you felt like it*/ /*Allocate memory for a one-dimensional integer action using utility functions from RLStruct_util*/ allocateRLStruct(&this_action,1,0,0); allocateRLStruct(&last_action,1,0,0); /* That is equivalent to: this_action.numInts = 1; this_action.intArray = (int*)calloc(1,sizeof(int)); this_action.numDoubles = 0; this_action.doubleArray = 0; this_action.numChars = 0; this_action.charArray = 0; */ /*Allocate memory for a one-dimensional integer observation using utility functions from RLStruct_util*/ last_observation=allocateRLStructPointer(1,0,0); /*Later we will parse this from the task spec, but for now*/ value_function=(double *)calloc(numActions*numStates,sizeof(double)); }
const observation_t *env_start() { int envState = kEnvStart; unsigned int offset = 0; rlBufferClear(&theBuffer); rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState); assert(envState == kEnvStart); if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0); __RL_CHECK_STRUCT(theObservation) offset = rlCopyBufferToADT(&theBuffer, offset, theObservation); return theObservation; }
/* Send the observation to the agent, receive the action and return it */ const action_t *agent_start(const observation_t *theObservation) { int agentState = kAgentStart; unsigned int offset = 0; __RL_CHECK_STRUCT(theObservation); rlBufferClear(&theBuffer); offset = 0; offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset); rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentStart); offset = 0; if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, globalAction); return globalAction; }
/* Send the reward and the observation to the agent, receive the action and return it */ const action_t *agent_step(const double theReward, const observation_t *theObservation) { int agentState = kAgentStep; unsigned int offset = 0; rlBufferClear(&theBuffer); offset = 0; offset = rlBufferWrite(&theBuffer, offset, &theReward, 1, sizeof(double)); offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset); rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState); rlBufferClear(&theBuffer); rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState); assert(agentState == kAgentStep); offset = 0; if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0); offset = rlCopyBufferToADT(&theBuffer, offset, globalAction); return globalAction; }
const char* env_init() { o=allocateRLStructPointer(0,0,0); return "sample task spec"; }
const observation_t *env_start() { theObservation=allocateRLStructPointer(0,0,0); return theObservation; }