void deallocateIntermediateMDP() { /* This routine is made available in case something goes wrong before converting the matrices from the intermediate form to the final form. Normally the conversion routine convertMatrices() will deallocate the intermediate matrices, but it might be desirable to get rid of them before converting (especially if something has gone wrong) so that things can be started over. */ int a; for( a = 0; a < gNumActions; a++ ) { destroyIMatrix( IP[a] ); if( gProblemType == POMDP_problem_type ) { destroyIMatrix( IR[a] ); } } XFREE( IP ); if( gProblemType == POMDP_problem_type ) { XFREE( IR ); XFREE( gInitialBelief ); } destroyIMatrix( IQ ); } /* deallocateIntermediateMDP */
void convertMatrices() { /* This routine is called after the parsing has been succesfully done. It will assume that the intermediate representations for the transition and observation matrices have been allocated and had their values set. It also assumes that the special immediate reward representation has been set. This routine will do two functions. It will convert the intermediate sparse representations for the transitions and observations to the actual true sparse representation. It will also compute the action-state immeidate reward pairs as an expectation over next states and possibly observations from the special immediate reward representation. This will be the final step toward the use of the MDP/POMDP model in computation. */ int a; /* Allocate room for each action */ P = (Matrix *) XMALLOC( gNumActions * sizeof( *P ) ); R = (Matrix *) XMALLOC( gNumActions * sizeof( *R ) ); /* First convert the intermediate sparse matrices for trans. and obs. */ for( a = 0; a < gNumActions; a++ ) { P[a] = transformIMatrix( IP[a] ); destroyIMatrix( IP[a] ); if( gProblemType == POMDP_problem_type ) { R[a] = transformIMatrix( IR[a] ); destroyIMatrix( IR[a] ); } } XFREE( IP ); if( gProblemType == POMDP_problem_type ) XFREE( IR ); /* Calculate expected immediate rewards for action-state pairs, but do it in the sparse matrix representation to eliminate zeroes */ computeRewards(); /* Then convert it into the real representation */ Q = transformIMatrix( IQ ); destroyIMatrix( IQ ); } /* convertMatrices */
void doneImmReward() { if( gCurImmRewardNode == NULL ) return; switch( gCurImmRewardNode->type ) { case ir_value: case ir_vector: /* Do nothing for these cases */ break; case ir_matrix: gCurImmRewardNode->rep.matrix = transformIMatrix( gCurIMatrix ); destroyIMatrix( gCurIMatrix ); gCurIMatrix = NULL; break; default: fprintf( stderr, "** ERR ** Unreckognized IR_Type in doneImmReward().\n"); exit( -1 ); break; } /* switch */ #if USE_DECISION_TREE irAddToDecisionTree(gCurImmRewardNode); #endif gImmRewardList = appendImmRewardList( gImmRewardList, gCurImmRewardNode ); gCurImmRewardNode = NULL; } /* doneImmReward */
void convertMatrices() { /* This routine is called after the parsing has been succesfully done. It will assume that the intermediate representations for the transition and observation matrices have been allocated and had their values set. It also assumes that the special immediate reward representation has been set. This routine will do two functions. It will convert the intermediate sparse representations for the transitions and observations to the actual true sparse representation. It will also compute the action-state immeidate reward pairs as an expectation over next states and possibly observations from the special immediate reward representation. This will be the final step toward the use of the MDP/POMDP model in computation. */ int a; struct timeval startTime, endTime; /* Allocate room for each action */ P = (Matrix *) malloc( gNumActions * sizeof( *P ) ); R = (Matrix *) malloc( gNumActions * sizeof( *R ) ); /* First convert the intermediate sparse matrices for trans. and obs. */ for( a = 0; a < gNumActions; a++ ) { if (zmdpDebugLevelG >= 1) { printf("pomdp_spec: transforming transition matrix [a=%d]\n", a); } P[a] = transformIMatrix( IP[a] ); destroyIMatrix( IP[a] ); if (zmdpDebugLevelG >= 1) { printf("pomdp_spec: transforming obs matrix [a=%d]\n", a); } if( gProblemType == POMDP_problem_type ) { R[a] = transformIMatrix( IR[a] ); destroyIMatrix( IR[a] ); } } free( IP ); if( gProblemType == POMDP_problem_type ) free( IR ); /* Calculate expected immediate rewards for action-state pairs, but do it in the sparse matrix representation to eliminate zeroes */ if (zmdpDebugLevelG >= 1) { printf("pomdp_spec: computing rewards\n"); gettimeofday(&startTime, NULL); } computeRewards(); if (zmdpDebugLevelG >= 1) { gettimeofday(&endTime, NULL); printf(" (took %lf seconds)\n", endTime.tv_sec - startTime.tv_sec + 1e-6 * (endTime.tv_usec - startTime.tv_usec)); printf("pomdp_spec: transforming reward matrix\n"); } /* Then convert it into the real representation */ Q = transformIMatrix( IQ ); destroyIMatrix( IQ ); } /* convertMatrices */