Exemplo n.º 1
0
void 
convertMatrices() {
/*
   This routine is called after the parsing has been succesfully done.
   It will assume that the intermediate representations for the transition
   and observation matrices have been allocated and had their values set.
   It also assumes that the special immediate reward representation
   has been set.  

   This routine will do two functions.  It will convert the intermediate
   sparse representations for the transitions and observations to the 
   actual true sparse representation.  It will also compute the action-state
   immeidate reward pairs as an expectation over next states and possibly
   observations from the special immediate reward representation.  This
   will be the final step toward the use of the MDP/POMDP model in 
   computation.
   */

  int a;

  /* Allocate room for each action */
  P = (Matrix *) XMALLOC( gNumActions * sizeof( *P ) );
  R = (Matrix *) XMALLOC( gNumActions * sizeof( *R ) );

  /* First convert the intermediate sparse matrices for trans. and obs. */

  for( a = 0; a < gNumActions; a++ ) {

    P[a] = transformIMatrix( IP[a] );
    destroyIMatrix( IP[a] );

    if( gProblemType == POMDP_problem_type ) {
      R[a] = transformIMatrix( IR[a] );
      destroyIMatrix( IR[a] );
    }

  }

  XFREE( IP );
  
  if( gProblemType == POMDP_problem_type )
    XFREE( IR );

  /* Calculate expected immediate rewards for action-state pairs, but
     do it in the sparse matrix representation to eliminate zeroes */

  computeRewards();

  /* Then convert it into the real representation */
  Q = transformIMatrix( IQ );
  destroyIMatrix( IQ );

}  /* convertMatrices */
Exemplo n.º 2
0
void doneImmReward() {
  
  if( gCurImmRewardNode == NULL )
    return;

  switch( gCurImmRewardNode->type ) {
  case ir_value:
  case ir_vector:
    /* Do nothing for these cases */
    break;
    
  case ir_matrix:
    gCurImmRewardNode->rep.matrix = transformIMatrix( gCurIMatrix );
    destroyIMatrix( gCurIMatrix );
    gCurIMatrix = NULL;
    break;

  default:
    fprintf( stderr, "** ERR ** Unreckognized IR_Type in doneImmReward().\n");
    exit( -1 );
    break;
  }  /* switch */

#if USE_DECISION_TREE
  irAddToDecisionTree(gCurImmRewardNode);
#endif

  gImmRewardList = appendImmRewardList( gImmRewardList,
				       gCurImmRewardNode );
  gCurImmRewardNode = NULL;

}  /* doneImmReward */
Exemplo n.º 3
0
Arquivo: mdp.c Projeto: Rongya/zmdp
void convertMatrices() {
/*
   This routine is called after the parsing has been succesfully done.
   It will assume that the intermediate representations for the transition
   and observation matrices have been allocated and had their values set.
   It also assumes that the special immediate reward representation
   has been set.  

   This routine will do two functions.  It will convert the intermediate
   sparse representations for the transitions and observations to the 
   actual true sparse representation.  It will also compute the action-state
   immeidate reward pairs as an expectation over next states and possibly
   observations from the special immediate reward representation.  This
   will be the final step toward the use of the MDP/POMDP model in 
   computation.
   */

  int a;
  struct timeval startTime, endTime;

  /* Allocate room for each action */
  P = (Matrix *) malloc( gNumActions * sizeof( *P ) );
  R = (Matrix *) malloc( gNumActions * sizeof( *R ) );

  /* First convert the intermediate sparse matrices for trans. and obs. */

  for( a = 0; a < gNumActions; a++ ) {

    if (zmdpDebugLevelG >= 1) {
      printf("pomdp_spec: transforming transition matrix [a=%d]\n", a);
    }

    P[a] = transformIMatrix( IP[a] );
    destroyIMatrix( IP[a] );

    if (zmdpDebugLevelG >= 1) {
      printf("pomdp_spec: transforming obs matrix [a=%d]\n", a);
    }

    if( gProblemType == POMDP_problem_type ) {
      R[a] = transformIMatrix( IR[a] );
      destroyIMatrix( IR[a] );
    }

  }

  free( IP );
  
  if( gProblemType == POMDP_problem_type )
    free( IR );

  /* Calculate expected immediate rewards for action-state pairs, but
     do it in the sparse matrix representation to eliminate zeroes */

  if (zmdpDebugLevelG >= 1) {
    printf("pomdp_spec: computing rewards\n");
    gettimeofday(&startTime, NULL);
  }

  computeRewards();

  if (zmdpDebugLevelG >= 1) {
    gettimeofday(&endTime, NULL);
    printf("  (took %lf seconds)\n",
	   endTime.tv_sec - startTime.tv_sec + 1e-6 * (endTime.tv_usec - startTime.tv_usec));
    printf("pomdp_spec: transforming reward matrix\n");
  }

  /* Then convert it into the real representation */
  Q = transformIMatrix( IQ );
  destroyIMatrix( IQ );

}  /* convertMatrices */