Exemple #1
0
void 
computeRewards() {
  int a, i, j, z, next_state, obs;
  double sum, inner_sum;

  /* For the some problems, where we may want to shift all the reward
     values to remove negative rewards, it will help to maintain the
     minimum reward. Because all unrepresented values are zero, this
     is our starting point. */
  gMinimumImmediateReward = 0.0;

  /* Now do the expectation thing for action-state reward values */

  for( a = 0; a < gNumActions; a++ )
    for( i = 0; i < gNumStates; i++ ) {

      sum = 0.0;

      /* Note: 'j' is not a state. It is an index into an array */
      for( j = P[a]->row_start[i]; 
	  j < P[a]->row_start[i] +  P[a]->row_length[i];
	  j++ ) {

	next_state = P[a]->col[j];

	if( gProblemType == POMDP_problem_type ) {

	  inner_sum = 0.0;
	    
	  /* Note: 'z' is not a state. It is an index into an array */
	  for( z = R[a]->row_start[next_state]; 
	      z < (R[a]->row_start[next_state] +  R[a]->row_length[next_state]);
	      z++ ) {

	    obs = R[a]->col[z];

	    inner_sum += R[a]->mat_val[z] 
	      * getImmediateReward( a, i, next_state, obs );
	  }  /* for z */
	}  /* if POMDP */

	else /* it is an MDP */
	  inner_sum = getImmediateReward( a, i, next_state, 0 );

	sum += P[a]->mat_val[j] * inner_sum;
	
      }  /* for j */

      /* Update the minimum reward we are maintaining. */
      gMinimumImmediateReward 
        = (gMinimumImmediateReward) < (sum) 
        ? (gMinimumImmediateReward) 
        : (sum);

      addEntryToIMatrix( IQ, a, i, sum );

    }  /* for i */

}  /* computeRewards */
Exemple #2
0
Fichier : mdp.c Projet : esteve/gpt
void computeRewards() {
  int a, i, j, z, next_state, obs;
  double sum, inner_sum;

  /* Now do the expectation thing for action-state reward values */

  for( a = 0; a < gNumActions; a++ )
    for( i = 0; i < gNumStates; i++ ) {

      sum = 0.0;

      /* Note: 'j' is not a state. It is an index into an array */
      for( j = P[a]->row_start[i]; 
	  j < P[a]->row_start[i] +  P[a]->row_length[i];
	  j++ ) {

	next_state = P[a]->col[j];

	if( gProblemType == POMDP_problem_type ) {

	  inner_sum = 0.0;
	    
	  /* Note: 'z' is not a state. It is an index into an array */
	  for( z = R[a]->row_start[next_state]; 
	      z < (R[a]->row_start[next_state] +  R[a]->row_length[next_state]);
	      z++ ) {

	    obs = R[a]->col[z];

	    inner_sum += R[a]->mat_val[z] 
	      * getImmediateReward( a, i, next_state, obs );
	  }  /* for z */
	}  /* if POMDP */

	else /* it is an MDP */
	  inner_sum = getImmediateReward( a, i, next_state, 0 );

	sum += P[a]->mat_val[j] * inner_sum;
	
      }  /* for j */

      addEntryToIMatrix( IQ, a, i, sum );

    }  /* for i */
  
}  /* computeRewards */