Esempio n. 1
0
void 
allocateIntermediateMDP() {
/*
   Assumes that the gProblemType has been set and that the variables
   gNumStates, gNumActions, and gNumObservation have the appropriate 
   values.  It will allocate the memory that will be needed to store
   the problem.  This allocates the space for the intermediate 
   representation representation for the transitions and observations,
   the latter for POMDPs only.
*/

  int a;

  /* We need an intermediate matrix for transition probs. for each
     action.  */
  IP = (I_Matrix *) XMALLOC( gNumActions * sizeof( *IP ));
  
  for( a = 0; a < gNumActions; a++ )
    IP[a] = newIMatrix( gNumStates );

  /* Only need observation probabilities if it is a POMDP */
  if( gProblemType == POMDP_problem_type ) {
   
    /* We need an intermediate matrix for observation probs. for each
       action.  */
    IR = (I_Matrix *) XMALLOC( gNumActions * sizeof( *IR ));
  
    for( a = 0; a < gNumActions; a++ )
      IR[a] = newIMatrix( gNumStates );

    /* Note that the immediate values are stored in a special way, so
       we do not need to allocate anything at this time. */

    /* For POMDPs, we will keep a starting belief state, since many */
    /* type of algorithms use a simulation approach and would want to */
    /* start it in a particular place. This is not kept in a sparse */
    /* way, so it is just a vector of the number of states. We */
    /* initialize it to be all zeroes.  */

    gInitialBelief = (double *) XCALLOC( gNumStates, sizeof( double ));

  }  /* if POMDP */

  /* Regardless of whether there is an MDP or POMDP, the immediate
     rewards for action-state pairs will always exist as an expectation
     over the next states and possibly actions.  These will be computed
     after parsing from the special immediate reward representation.
     */

  IQ = newIMatrix( gNumActions );
  
} /* allocateIntermediateMDP */
void newImmReward( int action, int cur_state, int next_state, int obs ) {
  
  /* First we will allocate a new node for this entry */
  gCurImmRewardNode = (Imm_Reward_List) malloc( sizeof(*gCurImmRewardNode ));
  checkAllocatedPointer((void *)gCurImmRewardNode );

  gCurImmRewardNode->action = action;
  gCurImmRewardNode->cur_state = cur_state;
  gCurImmRewardNode->next_state = next_state;
  gCurImmRewardNode->obs = obs;
  gCurImmRewardNode->next = NULL;

  switch( gProblemType ) {

  case POMDP_problem_type:
    if( obs == NOT_PRESENT) {
      
      if( next_state == NOT_PRESENT ) {
       
	/* This is the situation where we will need to keep a sparse 
	   matrix, so let us initialize the global I_Matrix variable */
	
       gCurIMatrix = newIMatrix( gNumStates );
       gCurImmRewardNode->rep.matrix = NULL;
       gCurImmRewardNode->type = ir_matrix;
       
     } /* next_state == NOT_PRESENT */
      
      else { /* we will need a vector of numbers, not a matrix */
	
	gCurImmRewardNode->rep.vector = (REAL_VALUE *) calloc( gNumObservations,
							  sizeof(REAL_VALUE));
	gCurImmRewardNode->type = ir_vector;
	
      }  /* else need vector, not matrix */
      
    }  /* obs == NOT_PRESENT */
    
    else {  /* We only need a single value, so let us just initialize it */
      /* to zero */
      
      gCurImmRewardNode->rep.value = 0.0;
      gCurImmRewardNode->type = ir_value;
    }
    break;

  case MDP_problem_type:
    /* for this case we completely ignor 'obs' parameters */
      
    if( next_state == NOT_PRESENT ) {
       
      if( cur_state == NOT_PRESENT ) {
	/* This is the situation where we will need to keep a sparse 
	   matrix, so let us initialize the global I_Matrix variable.
	   */
	
	gCurIMatrix = newIMatrix( gNumStates );
	gCurImmRewardNode->rep.matrix = NULL;
	gCurImmRewardNode->type = ir_matrix;
	
      } /* cur_state == NOT_PRESENT */
      
      else { /* we will need a vector of numbers, not a matrix */
	
	gCurImmRewardNode->rep.vector = (REAL_VALUE *) calloc( gNumStates,
							  sizeof(REAL_VALUE));
	gCurImmRewardNode->type = ir_vector;
	
      }  /* else need vector, not matrix */
      
    }  /* next_state == NOT_PRESENT */
    
    else {  /* We only need a single value, so let us just initialize it */
      /* to zero */
      
      gCurImmRewardNode->rep.value = 0.0;
      gCurImmRewardNode->type = ir_value;
    }
    break;
    
  default:
    fprintf( stderr, "**ERR** newImmReward: Unreckognised problem type.\n");
    exit( -1 );
    break;

  }  /* switch */

}  /* newImmReward */