Example #1
0
void agent_init(const char* task_spec)
{
	/*Struct to hold the parsed task spec*/
	taskspec_t *ts=(taskspec_t*)malloc(sizeof(taskspec_t));
	int decode_result = decode_taskspec( ts, task_spec );
	if(decode_result!=0){
		printf("Could not decode task spec, code: %d for task spec: %s\n",decode_result,task_spec);
		exit(1);
	}
	
	/* Lots of assertions to make sure that we can handle this problem.  */
	assert(getNumIntObs(ts)==1);
	assert(getNumDoubleObs(ts)==0);
	assert(isIntObsMax_special(ts,0)==0);
	assert(isIntObsMin_special(ts,0)==0);

	
	numStates=getIntObsMax(ts,0)+1;

	assert(getNumIntAct(ts)==1);
	assert(getNumDoubleAct(ts)==0);
	assert(isIntActMax_special(ts,0)==0);
	assert(isIntActMin_special(ts,0)==0);

	numActions=getIntActMax(ts,0)+1;

	free_taskspec_struct(ts); /* Make the taskspec struct a "blank slate" */
	free(ts); /* Free the structure itself */
	/*Here is where you might allocate storage for parameters (value function or policy, last action, last observation, etc)*/
	
	/*Here you would parse the task spec if you felt like it*/
	
	/*Allocate memory for a one-dimensional integer action using utility functions from RLStruct_util*/
	allocateRLStruct(&this_action,1,0,0);
	allocateRLStruct(&last_action,1,0,0);
	/* That is equivalent to:
			 this_action.numInts     =  1;
			 this_action.intArray    = (int*)calloc(1,sizeof(int));
			 this_action.numDoubles  = 0;
			 this_action.doubleArray = 0;
			 this_action.numChars    = 0;
			 this_action.charArray   = 0;
	*/

	/*Allocate memory for a one-dimensional integer observation using utility functions from RLStruct_util*/
	last_observation=allocateRLStructPointer(1,0,0);
	
	/*Later we will parse this from the task spec, but for now*/
	value_function=(double *)calloc(numActions*numStates,sizeof(double));
	
}
Example #2
0
/*****************************

	RL-Glue Methods 
	
*******************************/
const char* env_init(){    
	char *task_spec_string="VERSION RL-Glue-3.0 PROBLEMTYPE episodic "
							 "DISCOUNTFACTOR 1 OBSERVATIONS INTS (0 107) "
							 "ACTIONS INTS (0 3)  REWARDS (-100.0 10.0) "
							"EXTRA SampleMinesEnvironment(C/C++) by Brian Tanner.";

	  the_world.numRows = 6;
	  the_world.numCols = 18;


	/* Allocate the observation variable */
	allocateRLStruct(&this_observation,1,0,0);
	/* That is equivalent to:
		 this_observation.numInts     =  1;
		 this_observation.intArray    = (int*)calloc(1,sizeof(int));
		 this_observation.numDoubles  = 0;
		 this_observation.doubleArray = 0;
		 this_observation.numChars    = 0;
		 this_observation.charArray   = 0;
	*/
	/* Setup the reward_observation variable */
	this_reward_observation.observation=&this_observation;
	this_reward_observation.reward=0;
	this_reward_observation.terminal=0;

   return task_spec_string;
}
Example #3
0
void agent_init(const char* task_spec)
{
	srand(time(0));

	/*Struct to hold the parsed task spec*/
	taskspec_t *ts = (taskspec_t*)malloc(sizeof(taskspec_t));
	int decode_result = decode_taskspec(ts, task_spec);
	if (decode_result != 0){
		printf("Could not decode task spec, code: %d for task spec: %s\n", decode_result, task_spec);
		exit(1);
	}

	possibleActions = getIntActMax(ts,0)-1;
	allocateRLStruct(&currentAction, 1, 0, 0);
	
	//load();
}