Ejemplo n.º 1
0
void agent_init(const char* task_spec)
{
	/*Struct to hold the parsed task spec*/
	taskspec_t *ts=(taskspec_t*)malloc(sizeof(taskspec_t));
	int decode_result = decode_taskspec( ts, task_spec );
	if(decode_result!=0){
		printf("Could not decode task spec, code: %d for task spec: %s\n",decode_result,task_spec);
		exit(1);
	}
	
	/* Lots of assertions to make sure that we can handle this problem.  */
	assert(getNumIntObs(ts)==1);
	assert(getNumDoubleObs(ts)==0);
	assert(isIntObsMax_special(ts,0)==0);
	assert(isIntObsMin_special(ts,0)==0);

	
	numStates=getIntObsMax(ts,0)+1;

	assert(getNumIntAct(ts)==1);
	assert(getNumDoubleAct(ts)==0);
	assert(isIntActMax_special(ts,0)==0);
	assert(isIntActMin_special(ts,0)==0);

	numActions=getIntActMax(ts,0)+1;

	free_taskspec_struct(ts); /* Make the taskspec struct a "blank slate" */
	free(ts); /* Free the structure itself */
	/*Here is where you might allocate storage for parameters (value function or policy, last action, last observation, etc)*/
	
	/*Here you would parse the task spec if you felt like it*/
	
	/*Allocate memory for a one-dimensional integer action using utility functions from RLStruct_util*/
	allocateRLStruct(&this_action,1,0,0);
	allocateRLStruct(&last_action,1,0,0);
	/* That is equivalent to:
			 this_action.numInts     =  1;
			 this_action.intArray    = (int*)calloc(1,sizeof(int));
			 this_action.numDoubles  = 0;
			 this_action.doubleArray = 0;
			 this_action.numChars    = 0;
			 this_action.charArray   = 0;
	*/

	/*Allocate memory for a one-dimensional integer observation using utility functions from RLStruct_util*/
	last_observation=allocateRLStructPointer(1,0,0);
	
	/*Later we will parse this from the task spec, but for now*/
	value_function=(double *)calloc(numActions*numStates,sizeof(double));
	
}
Ejemplo n.º 2
0
void agent_init(const char* task_spec)
{
	srand(time(0));

	/*Struct to hold the parsed task spec*/
	taskspec_t *ts = (taskspec_t*)malloc(sizeof(taskspec_t));
	int decode_result = decode_taskspec(ts, task_spec);
	if (decode_result != 0){
		printf("Could not decode task spec, code: %d for task spec: %s\n", decode_result, task_spec);
		exit(1);
	}

	possibleActions = getIntActMax(ts,0)-1;
	allocateRLStruct(&currentAction, 1, 0, 0);
	
	//load();
}
Ejemplo n.º 3
0
void test_accessors( taskspec_t *ts )
{
	int_range_t ir;
	double_range_t dr;
	int i, array_len;

	if (ts == NULL)
		return;

	/* determine problem type */
	printf( "Problem type: " );
	if (isEpisodic( ts ) == 1) {
		printf( "episodic\n" );
	} else if (isContinuing( ts ) == 1) {
		printf( "continuing\n" );
	} else if (isOtherType( ts ) == 1) {
		printf( "other\n" );
	} else {
		printf( "ERROR\n" );
	}

	/* observation types and ranges */
	printf( "OBSERVATIONS:\n" );
	array_len = getNumIntObs( ts );
	printf( "  INTS (%d total): ", array_len );
	for (i = 0; i < array_len; i++) {
		ir = getIntObs( ts, i );
		printf( "(%d ", ir.repeat_count );
		
		if (isIntObsMin_special( ts, i ) == 1) {
			if (isIntObsMin_negInf( ts, i ) == 1) {
				printf( "NEGINF " );
			} else if (isIntObsMin_unspec( ts, i ) == 1) {
				printf( "UNSPEC " );
			} else {
				printf( "ERROR " );
			}
		} else {
			/* printf( "%d ", ir.min ); */
			printf( "%d ", getIntObsMin( ts, i ) );
		}

		if (isIntObsMax_special( ts, i ) == 1) {
			if (isIntObsMax_posInf( ts, i ) == 1) {
				printf( "POSINF) " );
			} else if (isIntObsMax_unspec( ts, i ) == 1) {
				printf( "UNSPEC) " );
			} else {
				printf( "ERROR) " );
			}
		} else {
			/* printf( "%d) ", ir.max ); */
			printf( "%d) ", getIntObsMax( ts, i ) );
		}
	}
	
	array_len = getNumDoubleObs( ts );
	printf( "\n  DOUBLES (%d total): ", array_len );
	for (i = 0; i < array_len; i++) {
		dr = getDoubleObs( ts, i );
		printf( "(%d ", dr.repeat_count );
		
		if (isDoubleObsMin_special( ts, i ) == 1) {
			if (isDoubleObsMin_negInf( ts, i ) == 1) {
				printf( "NEGINF " );
			} else if (isDoubleObsMin_unspec( ts, i ) == 1) {
				printf( "UNSPEC " );
			} else {
				printf( "ERROR " );
			}
		} else {
			/* printf( "%g ", dr.min ); */
			printf( "%g ", getDoubleObsMin( ts, i ) );
		}

		if (isDoubleObsMax_special( ts, i ) == 1) {
			if (isDoubleObsMax_posInf( ts, i ) == 1) {
				printf( "POSINF) " );
			} else if (isDoubleObsMax_unspec( ts, i ) == 1) {
				printf( "UNSPEC) " );
			} else {
				printf( "ERROR) " );
			}
		} else {
			/* printf( "%g) ", dr.max ); */
			printf( "%g) ", getDoubleObsMax( ts, i ) );
		}
	}

	printf( "\n  CHARCOUNT: %d", getCharcountObs( ts ) );

	/* action types and ranges */
	printf( "\nACTIONS:\n" );
	array_len = getNumIntAct( ts );
	printf( "  INTS (%d total): ", array_len );
	for (i = 0; i < array_len; i++) {
		ir = getIntAct( ts, i );
		printf( "(%d ", ir.repeat_count );
		
		if (isIntActMin_special( ts, i ) == 1) {
			if (isIntActMin_negInf( ts, i ) == 1) {
				printf( "NEGINF " );
			} else if (isIntActMin_unspec( ts, i ) == 1) {
				printf( "UNSPEC " );
			} else {
				printf( "ERROR " );
			}
		} else {
			/* printf( "%d ", ir.min ); */
			printf( "%d ", getIntActMin( ts, i ) );
		}

		if (isIntActMax_special( ts, i ) == 1) {
			if (isIntActMax_posInf( ts, i ) == 1) {
				printf( "POSINF) " );
			} else if (isIntActMax_unspec( ts, i ) == 1) {
				printf( "UNSPEC) " );
			} else {
				printf( "ERROR) " );
			}
		} else {
			/* printf( "%d) ", ir.max ); */
			printf( "%d) ", getIntActMax( ts, i ) );
		}
	}
	
	array_len = getNumDoubleAct( ts );
	printf( "\n  DOUBLES (%d total): ", array_len );
	for (i = 0; i < array_len; i++) {
		dr = getDoubleAct( ts, i );
		printf( "(%d ", dr.repeat_count );
		
		if (isDoubleActMin_special( ts, i ) == 1) {
			if (isDoubleActMin_negInf( ts, i ) == 1) {
				printf( "NEGINF " );
			} else if (isDoubleActMin_unspec( ts, i ) == 1) {
				printf( "UNSPEC " );
			} else {
				printf( "ERROR " );
			}
		} else {
			/* printf( "%g ", dr.min ); */
			printf( "%g ", getDoubleActMin( ts, i ) );
		}

		if (isDoubleActMax_special( ts, i ) == 1) {
			if (isDoubleActMax_posInf( ts, i ) == 1) {
				printf( "POSINF) " );
			} else if (isDoubleActMax_unspec( ts, i ) == 1) {
				printf( "UNSPEC) " );
			} else {
				printf( "ERROR) " );
			}
		} else {
			/* printf( "%g) ", dr.max ); */
			printf( "%g) ", getDoubleActMax( ts, i ) );
		}
	}

	printf( "\n  CHARCOUNT: %d", getCharcountAct( ts ) );

	/* reward range */
	printf( "\nREWARD: (" );
		
	if (isRewardMin_special( ts ) == 1) {
		if (isRewardMin_negInf( ts ) == 1) {
			printf( "NEGINF " );
		} else if (isRewardMin_unspec( ts ) == 1) {
			printf( "UNSPEC " );
		} else {
			printf( "ERROR " );
		}
	} else {
		printf( "%g ", getRewardMin( ts ) );
	}

	if (isRewardMax_special( ts ) == 1) {
		if (isRewardMax_posInf( ts ) == 1) {
			printf( "POSINF) " );
		} else if (isRewardMax_unspec( ts ) == 1) {
			printf( "UNSPEC) " );
		} else {
			printf( "ERROR) " );
		}
	} else {
		printf( "%g) ", getRewardMax( ts ) );
	}
	printf( "\n" );
}