示例#1
0
void agent_init(const char * task_spec){
	emptyAction=allocateRLStructPointer(0,0,0);
	nonEmptyAction=allocateRLStructPointer(0,0,0);
	
	set_k_ints_in_abstract_type(nonEmptyAction,7);
	set_k_doubles_in_abstract_type(nonEmptyAction,3);
	set_k_chars_in_abstract_type(nonEmptyAction,1);

	whichEpisode=0;
}
const char* env_init()
{    
	env_whichEpisode=0;

	emptyObservation=allocateRLStructPointer(0,0,0);
	nonEmptyObservation=allocateRLStructPointer(0,0,0);

	set_k_ints_in_abstract_type(nonEmptyObservation,2);
	set_k_doubles_in_abstract_type(nonEmptyObservation,4);
	set_k_chars_in_abstract_type(nonEmptyObservation,5);

	return "";
}
示例#3
0
const reward_observation_terminal_t *env_step(const action_t *theAction) {
  int envState = kEnvStep;
  static reward_observation_terminal_t ro = {0,0,0};
  unsigned int offset = 0;

  __RL_CHECK_STRUCT(theAction)
  rlBufferClear(&theBuffer);
  offset = 0;
  /* Send theAction to the client environment */
  offset = rlCopyADTToBuffer(theAction, &theBuffer, offset);
  rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState);

  rlBufferClear(&theBuffer);
  rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState);
  assert(envState == kEnvStep);

  /* Receive theObservation from the client environment */
  offset = 0;
  offset = rlBufferRead(&theBuffer, offset, &ro.terminal, 1, sizeof(int));
  offset = rlBufferRead(&theBuffer, offset, &ro.reward, 1, sizeof(double));

	if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0);
  offset = rlCopyBufferToADT(&theBuffer, offset, theObservation);
  __RL_CHECK_STRUCT(theObservation)

  ro.observation = theObservation;
  return &ro;
}
/**
 * Pass off the call to env_step
 */
JNIEXPORT void JNICALL Java_org_rlcommunity_rlviz_environmentshell_JNIEnvironment_JNIenvstep(JNIEnv *env, jobject obj, jintArray intArray, jdoubleArray doubleArray, jcharArray charArray) {
    //create a new action to pass in from the 4 parameters. This is needed because the actual Java object cannot be passed in,
    //so the data from the object is passed in, then put into the C equivalent of an action

    jsize numInts,numDoubles,numChars=0;
    numInts = env->GetArrayLength(intArray);
    numDoubles = env->GetArrayLength(doubleArray);
    numChars = env->GetArrayLength(charArray);

    action_t* theAction = allocateRLStructPointer(numInts, numDoubles, numChars);
    //    action_t a;
    //    a.numInts = numInts;
    //    a.intArray = (int*) malloc(sizeof (int) * a.numInts);
    //    a.numDoubles = numDoubles;
    //    a.doubleArray = (double*) malloc(sizeof (double) * a.numDoubles);
    //    a.numChars = numChars;
    //    a.charArray = (char*) malloc(sizeof (char) * a.numChars);
    env->GetIntArrayRegion(intArray, 0, numInts, (jint*) theAction->intArray);
    env->GetDoubleArrayRegion(doubleArray, 0, numDoubles, (jdouble*) theAction->doubleArray);
    env->GetCharArrayRegion(charArray, 0, numChars, (jchar*) theAction->charArray);

    // get the return from env_step and parse it into a form that java can check.
    rewardObs = envFuncPointers.env_step(theAction);
    freeRLStructPointer(theAction);
    sharedReturnVariable = (observation_t *) rewardObs->observation;
}
示例#5
0
void agent_init(const char* task_spec)
{
	/*Struct to hold the parsed task spec*/
	taskspec_t *ts=(taskspec_t*)malloc(sizeof(taskspec_t));
	int decode_result = decode_taskspec( ts, task_spec );
	if(decode_result!=0){
		printf("Could not decode task spec, code: %d for task spec: %s\n",decode_result,task_spec);
		exit(1);
	}
	this_action=allocateRLStructPointer(getNumIntAct(ts),getNumDoubleAct(ts),0);
}
示例#6
0
void agent_init(const char* task_spec)
{
	/*Struct to hold the parsed task spec*/
	taskspec_t *ts=(taskspec_t*)malloc(sizeof(taskspec_t));
	int decode_result = decode_taskspec( ts, task_spec );
	if(decode_result!=0){
		printf("Could not decode task spec, code: %d for task spec: %s\n",decode_result,task_spec);
		exit(1);
	}
	
	/* Lots of assertions to make sure that we can handle this problem.  */
	assert(getNumIntObs(ts)==1);
	assert(getNumDoubleObs(ts)==0);
	assert(isIntObsMax_special(ts,0)==0);
	assert(isIntObsMin_special(ts,0)==0);

	
	numStates=getIntObsMax(ts,0)+1;

	assert(getNumIntAct(ts)==1);
	assert(getNumDoubleAct(ts)==0);
	assert(isIntActMax_special(ts,0)==0);
	assert(isIntActMin_special(ts,0)==0);

	numActions=getIntActMax(ts,0)+1;

	free_taskspec_struct(ts); /* Make the taskspec struct a "blank slate" */
	free(ts); /* Free the structure itself */
	/*Here is where you might allocate storage for parameters (value function or policy, last action, last observation, etc)*/
	
	/*Here you would parse the task spec if you felt like it*/
	
	/*Allocate memory for a one-dimensional integer action using utility functions from RLStruct_util*/
	allocateRLStruct(&this_action,1,0,0);
	allocateRLStruct(&last_action,1,0,0);
	/* That is equivalent to:
			 this_action.numInts     =  1;
			 this_action.intArray    = (int*)calloc(1,sizeof(int));
			 this_action.numDoubles  = 0;
			 this_action.doubleArray = 0;
			 this_action.numChars    = 0;
			 this_action.charArray   = 0;
	*/

	/*Allocate memory for a one-dimensional integer observation using utility functions from RLStruct_util*/
	last_observation=allocateRLStructPointer(1,0,0);
	
	/*Later we will parse this from the task spec, but for now*/
	value_function=(double *)calloc(numActions*numStates,sizeof(double));
	
}
示例#7
0
const observation_t *env_start() {
	int envState = kEnvStart;
	unsigned int offset = 0;

	rlBufferClear(&theBuffer);
	rlSendBufferData(rlGetEnvironmentConnection(), &theBuffer, envState);

	rlBufferClear(&theBuffer);
	rlRecvBufferData(rlGetEnvironmentConnection(), &theBuffer, &envState);
	assert(envState == kEnvStart);

	if(theObservation==0)theObservation=allocateRLStructPointer(0,0,0);
	__RL_CHECK_STRUCT(theObservation)
	
	
	offset = rlCopyBufferToADT(&theBuffer, offset, theObservation);
	return theObservation;
}
示例#8
0
/* Send the observation to the agent, receive the action and return it */
const action_t *agent_start(const observation_t *theObservation) {
	int agentState = kAgentStart;
	unsigned int offset = 0;

	__RL_CHECK_STRUCT(theObservation);
	rlBufferClear(&theBuffer);
	offset = 0;
	offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset);
	rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState);

	rlBufferClear(&theBuffer);
	rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState);
	assert(agentState == kAgentStart);
  
	offset = 0;

	if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0);
	offset = rlCopyBufferToADT(&theBuffer, offset, globalAction);

	return globalAction;
}
示例#9
0
/* Send the reward and the observation to the agent, receive the action and return it */
const action_t *agent_step(const double theReward, const observation_t *theObservation) {
  int agentState = kAgentStep;
  unsigned int offset = 0;

  rlBufferClear(&theBuffer);
  offset = 0;
  offset = rlBufferWrite(&theBuffer, offset, &theReward, 1, sizeof(double));
  offset = rlCopyADTToBuffer(theObservation, &theBuffer, offset);
  rlSendBufferData(rlGetAgentConnection(), &theBuffer, agentState);

  rlBufferClear(&theBuffer);
  rlRecvBufferData(rlGetAgentConnection(), &theBuffer, &agentState);

  assert(agentState == kAgentStep);

  offset = 0;
	if(globalAction==0)globalAction=allocateRLStructPointer(0,0,0);
  offset = rlCopyBufferToADT(&theBuffer, offset, globalAction);

  return globalAction;
}
示例#10
0
const char* env_init()
{    
	o=allocateRLStructPointer(0,0,0);
	return "sample task spec";
}
const observation_t *env_start()
{
	theObservation=allocateRLStructPointer(0,0,0);
	return theObservation;
}