void agent_init(const char* task_spec) { /*Struct to hold the parsed task spec*/ taskspec_t *ts=(taskspec_t*)malloc(sizeof(taskspec_t)); int decode_result = decode_taskspec( ts, task_spec ); if(decode_result!=0){ printf("Could not decode task spec, code: %d for task spec: %s\n",decode_result,task_spec); exit(1); } /* Lots of assertions to make sure that we can handle this problem. */ assert(getNumIntObs(ts)==1); assert(getNumDoubleObs(ts)==0); assert(isIntObsMax_special(ts,0)==0); assert(isIntObsMin_special(ts,0)==0); numStates=getIntObsMax(ts,0)+1; assert(getNumIntAct(ts)==1); assert(getNumDoubleAct(ts)==0); assert(isIntActMax_special(ts,0)==0); assert(isIntActMin_special(ts,0)==0); numActions=getIntActMax(ts,0)+1; free_taskspec_struct(ts); /* Make the taskspec struct a "blank slate" */ free(ts); /* Free the structure itself */ /*Here is where you might allocate storage for parameters (value function or policy, last action, last observation, etc)*/ /*Here you would parse the task spec if you felt like it*/ /*Allocate memory for a one-dimensional integer action using utility functions from RLStruct_util*/ allocateRLStruct(&this_action,1,0,0); allocateRLStruct(&last_action,1,0,0); /* That is equivalent to: this_action.numInts = 1; this_action.intArray = (int*)calloc(1,sizeof(int)); this_action.numDoubles = 0; this_action.doubleArray = 0; this_action.numChars = 0; this_action.charArray = 0; */ /*Allocate memory for a one-dimensional integer observation using utility functions from RLStruct_util*/ last_observation=allocateRLStructPointer(1,0,0); /*Later we will parse this from the task spec, but for now*/ value_function=(double *)calloc(numActions*numStates,sizeof(double)); }
void agent_init(const char* task_spec) { srand(time(0)); /*Struct to hold the parsed task spec*/ taskspec_t *ts = (taskspec_t*)malloc(sizeof(taskspec_t)); int decode_result = decode_taskspec(ts, task_spec); if (decode_result != 0){ printf("Could not decode task spec, code: %d for task spec: %s\n", decode_result, task_spec); exit(1); } possibleActions = getIntActMax(ts,0)-1; allocateRLStruct(¤tAction, 1, 0, 0); //load(); }
void test_accessors( taskspec_t *ts ) { int_range_t ir; double_range_t dr; int i, array_len; if (ts == NULL) return; /* determine problem type */ printf( "Problem type: " ); if (isEpisodic( ts ) == 1) { printf( "episodic\n" ); } else if (isContinuing( ts ) == 1) { printf( "continuing\n" ); } else if (isOtherType( ts ) == 1) { printf( "other\n" ); } else { printf( "ERROR\n" ); } /* observation types and ranges */ printf( "OBSERVATIONS:\n" ); array_len = getNumIntObs( ts ); printf( " INTS (%d total): ", array_len ); for (i = 0; i < array_len; i++) { ir = getIntObs( ts, i ); printf( "(%d ", ir.repeat_count ); if (isIntObsMin_special( ts, i ) == 1) { if (isIntObsMin_negInf( ts, i ) == 1) { printf( "NEGINF " ); } else if (isIntObsMin_unspec( ts, i ) == 1) { printf( "UNSPEC " ); } else { printf( "ERROR " ); } } else { /* printf( "%d ", ir.min ); */ printf( "%d ", getIntObsMin( ts, i ) ); } if (isIntObsMax_special( ts, i ) == 1) { if (isIntObsMax_posInf( ts, i ) == 1) { printf( "POSINF) " ); } else if (isIntObsMax_unspec( ts, i ) == 1) { printf( "UNSPEC) " ); } else { printf( "ERROR) " ); } } else { /* printf( "%d) ", ir.max ); */ printf( "%d) ", getIntObsMax( ts, i ) ); } } array_len = getNumDoubleObs( ts ); printf( "\n DOUBLES (%d total): ", array_len ); for (i = 0; i < array_len; i++) { dr = getDoubleObs( ts, i ); printf( "(%d ", dr.repeat_count ); if (isDoubleObsMin_special( ts, i ) == 1) { if (isDoubleObsMin_negInf( ts, i ) == 1) { printf( "NEGINF " ); } else if (isDoubleObsMin_unspec( ts, i ) == 1) { printf( "UNSPEC " ); } else { printf( "ERROR " ); } } else { /* printf( "%g ", dr.min ); */ printf( "%g ", getDoubleObsMin( ts, i ) ); } if (isDoubleObsMax_special( ts, i ) == 1) { if (isDoubleObsMax_posInf( ts, i ) == 1) { printf( "POSINF) " ); } else if (isDoubleObsMax_unspec( ts, i ) == 1) { printf( "UNSPEC) " ); } else { printf( "ERROR) " ); } } else { /* printf( "%g) ", dr.max ); */ printf( "%g) ", getDoubleObsMax( ts, i ) ); } } printf( "\n CHARCOUNT: %d", getCharcountObs( ts ) ); /* action types and ranges */ printf( "\nACTIONS:\n" ); array_len = getNumIntAct( ts ); printf( " INTS (%d total): ", array_len ); for (i = 0; i < array_len; i++) { ir = getIntAct( ts, i ); printf( "(%d ", ir.repeat_count ); if (isIntActMin_special( ts, i ) == 1) { if (isIntActMin_negInf( ts, i ) == 1) { printf( "NEGINF " ); } else if (isIntActMin_unspec( ts, i ) == 1) { printf( "UNSPEC " ); } else { printf( "ERROR " ); } } else { /* printf( "%d ", ir.min ); */ printf( "%d ", getIntActMin( ts, i ) ); } if (isIntActMax_special( ts, i ) == 1) { if (isIntActMax_posInf( ts, i ) == 1) { printf( "POSINF) " ); } else if (isIntActMax_unspec( ts, i ) == 1) { printf( "UNSPEC) " ); } else { printf( "ERROR) " ); } } else { /* printf( "%d) ", ir.max ); */ printf( "%d) ", getIntActMax( ts, i ) ); } } array_len = getNumDoubleAct( ts ); printf( "\n DOUBLES (%d total): ", array_len ); for (i = 0; i < array_len; i++) { dr = getDoubleAct( ts, i ); printf( "(%d ", dr.repeat_count ); if (isDoubleActMin_special( ts, i ) == 1) { if (isDoubleActMin_negInf( ts, i ) == 1) { printf( "NEGINF " ); } else if (isDoubleActMin_unspec( ts, i ) == 1) { printf( "UNSPEC " ); } else { printf( "ERROR " ); } } else { /* printf( "%g ", dr.min ); */ printf( "%g ", getDoubleActMin( ts, i ) ); } if (isDoubleActMax_special( ts, i ) == 1) { if (isDoubleActMax_posInf( ts, i ) == 1) { printf( "POSINF) " ); } else if (isDoubleActMax_unspec( ts, i ) == 1) { printf( "UNSPEC) " ); } else { printf( "ERROR) " ); } } else { /* printf( "%g) ", dr.max ); */ printf( "%g) ", getDoubleActMax( ts, i ) ); } } printf( "\n CHARCOUNT: %d", getCharcountAct( ts ) ); /* reward range */ printf( "\nREWARD: (" ); if (isRewardMin_special( ts ) == 1) { if (isRewardMin_negInf( ts ) == 1) { printf( "NEGINF " ); } else if (isRewardMin_unspec( ts ) == 1) { printf( "UNSPEC " ); } else { printf( "ERROR " ); } } else { printf( "%g ", getRewardMin( ts ) ); } if (isRewardMax_special( ts ) == 1) { if (isRewardMax_posInf( ts ) == 1) { printf( "POSINF) " ); } else if (isRewardMax_unspec( ts ) == 1) { printf( "UNSPEC) " ); } else { printf( "ERROR) " ); } } else { printf( "%g) ", getRewardMax( ts ) ); } printf( "\n" ); }