コード例 #1
0
void enterImmReward( int cur_state, int next_state, int obs, 
		    REAL_VALUE value ) {

/* cur_state is ignored for a POMDP, and obs is ignored for an MDP */

  assert( gCurImmRewardNode != NULL );

  switch( gCurImmRewardNode->type ) {
  case ir_value:
    gCurImmRewardNode->rep.value = value;
    break;

  case ir_vector:
    if( gProblemType == POMDP_problem_type )
      gCurImmRewardNode->rep.vector[obs] = value;
    else
      gCurImmRewardNode->rep.vector[next_state] = value;
    break;

  case ir_matrix:
    if( gProblemType == POMDP_problem_type )
      addEntryToIMatrix( gCurIMatrix, next_state, obs, value );
    else
      addEntryToIMatrix( gCurIMatrix, cur_state, next_state, value );
    break;

  default:
    fprintf( stderr, "** ERR ** Unreckognized IR_Type in enterImmReward().\n");
    exit( -1 );
    break;
  }  /* switch */

}  /* enterImmReward */
コード例 #2
0
ファイル: mdp.c プロジェクト: AAHays/python-rl
void 
computeRewards() {
  int a, i, j, z, next_state, obs;
  double sum, inner_sum;

  /* For the some problems, where we may want to shift all the reward
     values to remove negative rewards, it will help to maintain the
     minimum reward. Because all unrepresented values are zero, this
     is our starting point. */
  gMinimumImmediateReward = 0.0;

  /* Now do the expectation thing for action-state reward values */

  for( a = 0; a < gNumActions; a++ )
    for( i = 0; i < gNumStates; i++ ) {

      sum = 0.0;

      /* Note: 'j' is not a state. It is an index into an array */
      for( j = P[a]->row_start[i]; 
	  j < P[a]->row_start[i] +  P[a]->row_length[i];
	  j++ ) {

	next_state = P[a]->col[j];

	if( gProblemType == POMDP_problem_type ) {

	  inner_sum = 0.0;
	    
	  /* Note: 'z' is not a state. It is an index into an array */
	  for( z = R[a]->row_start[next_state]; 
	      z < (R[a]->row_start[next_state] +  R[a]->row_length[next_state]);
	      z++ ) {

	    obs = R[a]->col[z];

	    inner_sum += R[a]->mat_val[z] 
	      * getImmediateReward( a, i, next_state, obs );
	  }  /* for z */
	}  /* if POMDP */

	else /* it is an MDP */
	  inner_sum = getImmediateReward( a, i, next_state, 0 );

	sum += P[a]->mat_val[j] * inner_sum;
	
      }  /* for j */

      /* Update the minimum reward we are maintaining. */
      gMinimumImmediateReward 
        = (gMinimumImmediateReward) < (sum) 
        ? (gMinimumImmediateReward) 
        : (sum);

      addEntryToIMatrix( IQ, a, i, sum );

    }  /* for i */

}  /* computeRewards */
コード例 #3
0
ファイル: mdp.c プロジェクト: esteve/gpt
void computeRewards() {
  int a, i, j, z, next_state, obs;
  double sum, inner_sum;

  /* Now do the expectation thing for action-state reward values */

  for( a = 0; a < gNumActions; a++ )
    for( i = 0; i < gNumStates; i++ ) {

      sum = 0.0;

      /* Note: 'j' is not a state. It is an index into an array */
      for( j = P[a]->row_start[i]; 
	  j < P[a]->row_start[i] +  P[a]->row_length[i];
	  j++ ) {

	next_state = P[a]->col[j];

	if( gProblemType == POMDP_problem_type ) {

	  inner_sum = 0.0;
	    
	  /* Note: 'z' is not a state. It is an index into an array */
	  for( z = R[a]->row_start[next_state]; 
	      z < (R[a]->row_start[next_state] +  R[a]->row_length[next_state]);
	      z++ ) {

	    obs = R[a]->col[z];

	    inner_sum += R[a]->mat_val[z] 
	      * getImmediateReward( a, i, next_state, obs );
	  }  /* for z */
	}  /* if POMDP */

	else /* it is an MDP */
	  inner_sum = getImmediateReward( a, i, next_state, 0 );

	sum += P[a]->mat_val[j] * inner_sum;
	
      }  /* for j */

      addEntryToIMatrix( IQ, a, i, sum );

    }  /* for i */
  
}  /* computeRewards */