示例#1
0
文件: mdp.c 项目: AAHays/python-rl
void 
deallocateIntermediateMDP() {
/*
   This routine is made available in case something goes wrong
   before converting the matrices from the intermediate form
   to the final form.  Normally the conversion routine convertMatrices()
   will deallocate the intermediate matrices, but it might be desirable
   to get rid of them before converting (especially if something
   has gone wrong) so that things can be started over.
*/
  int a;

  for( a = 0; a < gNumActions; a++ ) {

    destroyIMatrix( IP[a] );

    if( gProblemType == POMDP_problem_type ) {
      destroyIMatrix( IR[a] );
    }

  }

  XFREE( IP );
  
  if( gProblemType == POMDP_problem_type ) {
    XFREE( IR );
    XFREE( gInitialBelief );
  }

  destroyIMatrix( IQ );

}  /* deallocateIntermediateMDP */
示例#2
0
void 
convertMatrices() {
/*
   This routine is called after the parsing has been succesfully done.
   It will assume that the intermediate representations for the transition
   and observation matrices have been allocated and had their values set.
   It also assumes that the special immediate reward representation
   has been set.  

   This routine will do two functions.  It will convert the intermediate
   sparse representations for the transitions and observations to the 
   actual true sparse representation.  It will also compute the action-state
   immeidate reward pairs as an expectation over next states and possibly
   observations from the special immediate reward representation.  This
   will be the final step toward the use of the MDP/POMDP model in 
   computation.
   */

  int a;

  /* Allocate room for each action */
  P = (Matrix *) XMALLOC( gNumActions * sizeof( *P ) );
  R = (Matrix *) XMALLOC( gNumActions * sizeof( *R ) );

  /* First convert the intermediate sparse matrices for trans. and obs. */

  for( a = 0; a < gNumActions; a++ ) {

    P[a] = transformIMatrix( IP[a] );
    destroyIMatrix( IP[a] );

    if( gProblemType == POMDP_problem_type ) {
      R[a] = transformIMatrix( IR[a] );
      destroyIMatrix( IR[a] );
    }

  }

  XFREE( IP );
  
  if( gProblemType == POMDP_problem_type )
    XFREE( IR );

  /* Calculate expected immediate rewards for action-state pairs, but
     do it in the sparse matrix representation to eliminate zeroes */

  computeRewards();

  /* Then convert it into the real representation */
  Q = transformIMatrix( IQ );
  destroyIMatrix( IQ );

}  /* convertMatrices */
void doneImmReward() {
  
  if( gCurImmRewardNode == NULL )
    return;

  switch( gCurImmRewardNode->type ) {
  case ir_value:
  case ir_vector:
    /* Do nothing for these cases */
    break;
    
  case ir_matrix:
    gCurImmRewardNode->rep.matrix = transformIMatrix( gCurIMatrix );
    destroyIMatrix( gCurIMatrix );
    gCurIMatrix = NULL;
    break;

  default:
    fprintf( stderr, "** ERR ** Unreckognized IR_Type in doneImmReward().\n");
    exit( -1 );
    break;
  }  /* switch */

#if USE_DECISION_TREE
  irAddToDecisionTree(gCurImmRewardNode);
#endif

  gImmRewardList = appendImmRewardList( gImmRewardList,
				       gCurImmRewardNode );
  gCurImmRewardNode = NULL;

}  /* doneImmReward */
示例#4
0
文件: mdp.c 项目: Rongya/zmdp
void convertMatrices() {
/*
   This routine is called after the parsing has been succesfully done.
   It will assume that the intermediate representations for the transition
   and observation matrices have been allocated and had their values set.
   It also assumes that the special immediate reward representation
   has been set.  

   This routine will do two functions.  It will convert the intermediate
   sparse representations for the transitions and observations to the 
   actual true sparse representation.  It will also compute the action-state
   immeidate reward pairs as an expectation over next states and possibly
   observations from the special immediate reward representation.  This
   will be the final step toward the use of the MDP/POMDP model in 
   computation.
   */

  int a;
  struct timeval startTime, endTime;

  /* Allocate room for each action */
  P = (Matrix *) malloc( gNumActions * sizeof( *P ) );
  R = (Matrix *) malloc( gNumActions * sizeof( *R ) );

  /* First convert the intermediate sparse matrices for trans. and obs. */

  for( a = 0; a < gNumActions; a++ ) {

    if (zmdpDebugLevelG >= 1) {
      printf("pomdp_spec: transforming transition matrix [a=%d]\n", a);
    }

    P[a] = transformIMatrix( IP[a] );
    destroyIMatrix( IP[a] );

    if (zmdpDebugLevelG >= 1) {
      printf("pomdp_spec: transforming obs matrix [a=%d]\n", a);
    }

    if( gProblemType == POMDP_problem_type ) {
      R[a] = transformIMatrix( IR[a] );
      destroyIMatrix( IR[a] );
    }

  }

  free( IP );
  
  if( gProblemType == POMDP_problem_type )
    free( IR );

  /* Calculate expected immediate rewards for action-state pairs, but
     do it in the sparse matrix representation to eliminate zeroes */

  if (zmdpDebugLevelG >= 1) {
    printf("pomdp_spec: computing rewards\n");
    gettimeofday(&startTime, NULL);
  }

  computeRewards();

  if (zmdpDebugLevelG >= 1) {
    gettimeofday(&endTime, NULL);
    printf("  (took %lf seconds)\n",
	   endTime.tv_sec - startTime.tv_sec + 1e-6 * (endTime.tv_usec - startTime.tv_usec));
    printf("pomdp_spec: transforming reward matrix\n");
  }

  /* Then convert it into the real representation */
  Q = transformIMatrix( IQ );
  destroyIMatrix( IQ );

}  /* convertMatrices */