Beispiel #1
0
double CFeatureRewardFunction::getReward(CState *oldState, CAction *action, CState *newState)
{
	double reward = 0;
	
	int type = oldState->getStateProperties()->getType() & (FEATURESTATE | DISCRETESTATE);
	switch (type)
	{
		case FEATURESTATE:
			{
				for (unsigned int oldS = 0; oldS < oldState->getNumDiscreteStates(); oldS++)
				{
					for (unsigned int newS = 0; newS < newState->getNumDiscreteStates(); newS++)
					{
						reward += getReward(oldState->getDiscreteState(oldS), action, newState->getDiscreteState(newS)) * oldState->getContinuousState(oldS) * newState->getContinuousState(newS);
					}
				}
				break;
			}
		case DISCRETESTATE:
			{
				reward = getReward(oldState->getDiscreteState(0), action, newState->getDiscreteState(0));
				break;
			}
		default:
			{
				reward = getReward(oldState->getDiscreteStateNumber(), action, newState->getDiscreteStateNumber());
			}
	}
	return reward;
}
Beispiel #2
0
// -----------------------------------------------------------------------
	void BanditLearner::save(ofstream& outputStream, int numTabs)
	{
		BaseLearner::save(outputStream, numTabs);

		outputStream << Serialization::standardTag("reward", getReward(), numTabs) << endl;
		outputStream << Serialization::standardTag("K", getK(), numTabs) << endl;
		outputStream << Serialization::standardTag("arm", getKeyToString(), numTabs) << endl;
	}
Beispiel #3
0
void Task::passOneDay()
{
    expiresIn--;
    reward = (int) (getReward() * 0.9); // Todo: degradation and minreward constants
    if (reward < 33)
    {
        reward = 33;
    }
}
Beispiel #4
0
void CRewardEpisode::saveBIN(FILE *stream)
{
	int buf = getNumRewards();
	fwrite(&buf, sizeof(int), 1, stream);
	for (int i = 0; i < buf; i ++)
	{
		double dBuf = getReward(i);
		fwrite(&dBuf, sizeof(double), 1, stream);
	}
}
Beispiel #5
0
double CRewardEpisode::getLastStepsMeanReward(int Steps)
{
	assert(getNumRewards() > 0);
	double mean = 0;
	for (int i = getNumRewards() - Steps; i < getNumRewards(); i ++)
	{
		mean += getReward(i);
	}
	return mean / (getNumRewards() - Steps);
}
Beispiel #6
0
double CRewardEpisode::getMeanReward()
{
	assert(getNumRewards() > 0);
	double mean = 0;
	for (int i = 0; i < getNumRewards(); i ++)
	{
		mean += getReward(i);
	}
	return mean / getNumRewards();
}
Beispiel #7
0
double CRewardEpisode::getSummedReward(double gamma)
{
	double sum = 0;
	double l_gamma = 1.0;
	for (int i = 0; i < getNumRewards(); i ++)
	{
		sum += l_gamma * getReward(i);
		l_gamma = gamma * l_gamma;
	}
	return sum;
}
void TurntableSystem::changeBox(int boxid,bool change)
{
	CCPoint thisboxpiont = this->getChildByTag(TAG_ZP_BD)->getChildByTag(boxid)->getPosition();
	this->getChildByTag(TAG_ZP_BD)->removeChildByTag(boxid);
	Zp_BoxData* thisbox = new Zp_BoxData();
	thisbox->set_boxid(boxid);
	thisbox->set_xuanzhong(change);
	thisbox->set_boxReward(getReward(boxid%6));
	thisbox->set_point(thisboxpiont);
	Gzi* gz = new Gzi((CCSprite*)this->getChildByTag(TAG_ZP_BD),thisbox);
}
Beispiel #9
0
double CFeatureStateRewardModel::getReward(CState *, CAction *, CState *newState)
{
	double reward = 0.0;

	if (newState->getStateProperties()->isType(FEATURESTATE))
	{
		for (unsigned int i = 0; i < newState->getNumContinuousStates(); i++)
		{
			reward += newState->getContinuousState(i) * getReward(newState->getDiscreteState(i));
		}
	}
	else
	{
		if (newState->getStateProperties()->isType(DISCRETESTATE))
		{
			reward = getReward(newState->getDiscreteState(0));
		}
	}
	return reward;
}
Beispiel #10
0
void Enemy::setHealth(float hp)
{ 
    m_health = hp; 
    if (m_health < 0.f)
    {
        getObject()->getScene().findChild("GC")->getComponent<GameController>()->addMoney(getReward());
		getObject()->getScene().findChild("GC")->getComponent<GameController>()->addScore(getReward());
        getObject()->removeSelf();
    }

    getObject()->setScale((m_health * 2.f / 100.f) + 0.25f);
}
/**
 *  Performs the iteration of BRUE
 */
void performIteration(treeNode* root,  int switchingPoint, heuristics_t heuristic, int budget){
	int level = 0; 
	treeNode* n = root, *leaf = NULL;
	while (n != NULL && n->n>0){
		leaf = n;
		POLICY p = level < switchingPoint ? EXPLORATION : EXPLOITATION;
		n = nextNode(n,p);	
		level++;
	}
	double reward = getReward(leaf, heuristic, budget);
	backpropagate(leaf, reward);
} 
Beispiel #12
0
void CRewardEpisode::saveData(FILE *stream)
{
	fprintf(stream, "NumRewards: %d\n", getNumRewards());
	
	for (int i = 0; i < getNumRewards(); i ++)
	{
		double dBuf = getReward(i);
		fprintf(stream, "%lf ", dBuf);
	}
	fprintf(stream, "\n");

}
Beispiel #13
0
double CFeatureRewardFunction::getReward(CFeatureList *oldState, CAction *action, CFeatureList *newState)
{
	double reward = 0;

	CFeatureList::iterator oldIt;
	CFeatureList::iterator newIt;

	for (oldIt = oldState->begin(); oldIt != oldState->end(); oldIt++)
	{
		for (newIt = newState->begin(); newIt != newState->end(); newIt++)
		{
			reward += getReward((*oldIt)->featureIndex, action, (*newIt)->featureIndex) * (*oldIt)->factor * (*newIt)->factor;
		}
	}
	return reward;
}
Beispiel #14
0
static void usage(char *command)
{
  fprintf(stderr,
          "\nPixel-to-pixel stereo algorithm.\n\n"
          "usage:  %s [options] file1 file2 maxdisparity [file_dm]\n\n"
          "Files 'file1' and 'file2' are read, the stereo algorithm (including\n"
          "postprocessing) is run, and the resulting disparity map and depth\n"
          "discontinuities are saved in files '%s' and '%s'.  Note that maxdisp\n"
          "must lie between 14 and 50.  The original images must be in PGM file\n"
          "format.  If the -b option is selected, then the results after processing\n"
          "the scanlines independently are also saved, in '%s' and '%s'.  All results\n"
          "are written to the current directory.  Options:\n\n"
          "        [-h]     displays this help message\n"
          "        [-o n]   sets the occlusion penalty to 'n' (default: %d)\n"
          "        [-r n]   sets the reward to 'n' (default: %d)\n"
          "        [-d dir] looks for input files in directory 'dir'\n"
          "                 (default: current directory)\n"
          "        [-b]     also writes out the results after independent matching\n"
          "        [-rel n]  sets the reliable threshold to n (default: %d)\n"
          "        [-alpha f]  sets alpha (for reliability) to f (default: %f)\n"
          "        [-ma n]  sets the max-attraction threshold to n (default: %d)\n\n"
          "        [-wpi]   writes the intermediate results during postprocessing\n"
          "                 (default:  off)\n"
          "        [-np]    do not postprocess disparity map\n"
          "        [-jp]    just postprocess disparity map\n"
          "                 (i.e., 'file_dm' is taken to be a disparity map to\n"
          "                 postprocess.  'file2' is ignored).\n"
          "        [-ig f]  for matching, uses intensity gradients specified\n"
          "                 in 'f' (E.g., f='ig%%c.pgm'). x & y go into '%%c'\n"
          "Example:  %s -o 15 -hr 30 -d ~/images img1.pgm img2.pgm 20\n\n",
          command, fname_dm, fname_dd, fname_dm_intermediate, fname_dd_intermediate,
          getOcclusionPenalty(), getReward(), 
          getReliableThreshold(), getAlpha(), 
          getMaxAttractionThreshold(),
          command);
  exit(1);
}
void matchScanlines(uchar *imgL,
                    uchar *imgR,
                    uchar *disparity_map,
                    uchar *depth_discontinuities,
                    char *ptr_ig)
{
  int *phi;      /* cost of a match sequence */
  int *pie_y;    /* points to the immediately */
  int *pie_d;    /*    preceding match */
  int *dis;      /* dissimilarity b/w two pixels */
  int *no_igL;                /* indicates no intensity gradient */
  int *no_igR;
  int scanline;                         /* the current scanline */
  int y, deltaa;                        /* the match following (y_p, delta_p) */
  int y_p, delta_p;                     /* the current match */
  int ymin, *xmin;          /* used to prune bad nodes */
  int phi_new;
  int phi_best, pie_y_best, pie_d_best;
  int *q_no_igL;          /* indicates no intensity gradient */
  int *q_no_igR;

  
  phi = malloc((g_cols+g_slop)*(g_maxdisp + 1)*sizeof(int));
  if (phi == NULL)
     error("(matchScanlines) Memory not allocated");
  pie_y = malloc((g_cols+g_slop)*(g_maxdisp + 1)*sizeof(int));
  if (pie_y == NULL)
     error("(matchScanlines) Memory not allocated");
  pie_d = malloc((g_cols+g_slop)*(g_maxdisp + 1)*sizeof(int));
  if (pie_d == NULL)
     error("(matchScanlines) Memory not allocated");
  dis = malloc((g_cols+g_slop)*(g_maxdisp + 1)*sizeof(int));
  if (dis == NULL)
     error("(matchScanlines) Memory not allocated");
  no_igL = malloc((g_cols+g_slop)*sizeof(int));
  if (no_igL == NULL)
     error("(matchScanlines) Memory not allocated");
  no_igR = malloc((g_cols+g_slop)*sizeof(int));
  if (no_igR == NULL)
     error("(matchScanlines) Memory not allocated");
  xmin = malloc((g_cols+g_slop)*sizeof(int));
  if (xmin == NULL)
     error("(matchScanlines) Memory not allocated");
  q_no_igL = malloc((g_rows)*(g_cols+g_slop)*sizeof(int));
  if (q_no_igL == NULL)
     error("(matchScanlines) Memory not allocated");
  q_no_igR = malloc((g_rows)*(g_cols+g_slop)*sizeof(int));
  if (q_no_igR == NULL)
     error("(matchScanlines) Memory not allocated");
  
  printf("Parameters:  occ=%d, rew=%d, ptr_ig='%s'\n", 
         getOcclusionPenalty(), getReward(), ptr_ig ? ptr_ig : "NULL");
  
  if (ptr_ig != NULL) {
    readIGXFiles(ptr_ig, q_no_igL, q_no_igR);
  }
  
  for (scanline = 0 ; scanline < g_rows ; scanline++)  {
    if (scanline % 50 == 0 && g_rows > 200)  printf("     scanline %d\n", scanline);
    
    /* Fill tables */
    fillDissimilarityTable(imgL, imgR, dis, scanline);
    if (ptr_ig == NULL)
      computeIntensityGradientsX(imgL, imgR, scanline, no_igL, no_igR);
    else {
      memcpy(no_igL, (void *)q_no_igL[scanline], (g_cols+g_slop)*sizeof(int));
      memcpy(no_igR, (void *)q_no_igR[scanline], (g_cols+g_slop)*sizeof(int));
    }
    
#ifdef BACKWARD_LOOKING
    
    for (delta_p = 0 ; delta_p <= g_maxdisp ; delta_p++)  {
      phi[(g_maxdisp + 1)*0+delta_p] = DEFAULT_COST + dis[(g_maxdisp + 1)*0+delta_p];
      pie_y[(g_maxdisp + 1)*0+delta_p] = FIRST_MATCH;
      pie_d[(g_maxdisp + 1)*0+delta_p] = FIRST_MATCH;
    }
    
    for (y = 1 ; y < g_cols ; y++)  {
      /* printf("y=%d\n", y);  fflush(stdout); */
      for (deltaa = 0 ; deltaa <= g_maxdisp ; deltaa++)  {
        
        phi_best = INF;
        
        for (delta_p = 0 ; delta_p <= g_maxdisp ; delta_p++)  {
          y_p = y - max(1, delta_p - deltaa + 1);
          if (y_p>=0) {
            if (deltaa==delta_p ||
                (deltaa>delta_p && !no_igL[y+deltaa-1]) ||
                (deltaa<delta_p && !no_igR[y_p+1])) {
              phi_new = phi[(g_maxdisp + 1)*y_p+delta_p] + occ_pen * (deltaa != delta_p);
              if (phi_new < phi_best) {
                phi_best = phi_new;
                pie_y_best = y_p;
                pie_d_best = delta_p;
              }
            }
          }
        }
        phi[(g_maxdisp + 1)*y+deltaa] = phi_best + dis[(g_maxdisp + 1)*y+deltaa]-reward;
        pie_y[(g_maxdisp + 1)*y+deltaa] = pie_y_best;
        pie_d[(g_maxdisp + 1)*y+deltaa] = pie_d_best;
      }
    }
    
#else
    /* Initialize arrays */
    for (y_p = 1 ; y_p < g_cols ; y_p++)  {
      xmin[y_p] = INF;
      for (delta_p = 0 ; delta_p <= g_maxdisp ; delta_p++)
        phi[(g_maxdisp + 1)*y_p+delta_p] = INF;
    }
    
    for (delta_p = 0 ; delta_p <= g_maxdisp ; delta_p++)  {
      phi[(g_maxdisp + 1)*0+delta_p] = DEFAULT_COST + dis[(g_maxdisp + 1)*0+delta_p];
      pie_y[(g_maxdisp + 1)*0+delta_p] = FIRST_MATCH;
      pie_d[(g_maxdisp + 1)*0+delta_p] = FIRST_MATCH;
      xmin[0] = phi[(g_maxdisp + 1)*0+delta_p];
    }
    
    for (y_p = 0 ; y_p < g_cols ; y_p++)  {
      
      /* Determine ymin */
      ymin = INF;
      for (delta_p = 0 ; delta_p <= g_maxdisp ; delta_p++)  {
        ymin = min(ymin, phi[(g_maxdisp + 1)*y_p+delta_p]);
      }
      
      for (delta_p = 0 ; delta_p <= g_maxdisp ; delta_p++)  {
        
        /* Expand good y nodes */
        if ( phi[(g_maxdisp + 1)*y_p+delta_p] <= ymin )  {
          y = y_p + 1;
          for (deltaa = delta_p + 1 ; deltaa <= g_maxdisp ; deltaa++)  {
            phi_new = phi[(g_maxdisp + 1)*y_p+delta_p] + dis[(g_maxdisp + 1)*y+deltaa] - reward + occ_pen
              + no_igL[y + deltaa];
            if (phi_new < phi[(g_maxdisp + 1)*y+deltaa])  {
              phi[(g_maxdisp + 1)*y+deltaa] = phi_new;
              pie_y[(g_maxdisp + 1)*y+deltaa] = y_p;
              pie_d[(g_maxdisp + 1)*y+deltaa] = delta_p;
              xmin[y+deltaa] = min(xmin[y+deltaa], phi_new);
            }  /* end if(phi_new) */
          }  /* end for(deltaa) */
        }  /* end if(phi[][] <= ymin) */
        
        /* Expand good x nodes */
        if ( phi[(g_maxdisp + 1)*y_p+delta_p] <= xmin[y_p+delta_p] ) {
          for (deltaa = 0 ; deltaa < delta_p ; deltaa++)  {
            y = y_p + delta_p - deltaa + 1;
            phi_new = phi[(g_maxdisp + 1)*y_p+delta_p] + dis[(g_maxdisp + 1)*y+deltaa] - reward + occ_pen
              + no_igR[y_p];
            if (phi_new < phi[(g_maxdisp + 1)*y+deltaa])  {
              phi[(g_maxdisp + 1)*y+deltaa] = phi_new;
              pie_y[(g_maxdisp + 1)*y+deltaa] = y_p;
              pie_d[(g_maxdisp + 1)*y+deltaa] = delta_p;
              xmin[y+deltaa] = min(xmin[y+deltaa], phi_new);
            }  /* end if(phi_new) */
          }  /* end for(deltaa) */
        }  /* end if(phi[][] <= xmin[]) */
        
        /* Expand all nodes */
        phi_new = phi[(g_maxdisp + 1)*y_p+delta_p] + dis[(g_maxdisp + 1)*(y_p+1)+delta_p] - reward;
        if ( phi_new < phi[(g_maxdisp + 1)*(y_p+1)+delta_p] )  {
          phi[(g_maxdisp + 1)*(y_p+1)+delta_p] = phi_new;
          pie_y[(g_maxdisp + 1)*(y_p+1)+delta_p] = y_p;
          pie_d[(g_maxdisp + 1)*(y_p+1)+delta_p] = delta_p;
          xmin[y_p+1+delta_p] = min(xmin[y_p+1+delta_p], phi_new);
        }  /* end(if) */
      }  /* end for(delta_p) */
    }  /* end for(y_p) */
    
#endif  /* BACKWARD_LOOKING */
    
    
    /* find ending match $m_k$ */
    
    phi_best = INF;
    for (deltaa = 0 ; deltaa <= g_maxdisp ; deltaa++)  {
      y = g_cols - 1 - deltaa;
      if (phi[(g_maxdisp + 1)*y+deltaa] <= phi_best)  {
        phi_best = phi[(g_maxdisp + 1)*y+deltaa];
        pie_y_best = y;
        pie_d_best = deltaa;
      }
    }
    
    
/******** */
#if 0 
    /* This code extracts matches from phi and pie tables. */
    /* It is only included for debugging purposes, and its */
    /* results are used by no one. */
    
    {
      int *matches;
      int num_matches;
      int tmp_y, tmp_d;
      int zz = 0;

      matches = malloc(2*g_cols*sizeof(int)];
      if (matches == NULL)
        error("(matchScanlines) Memory not allocated");
      
      y_p = pie_y_best;
      delta_p = pie_d_best;
      while (y_p != FIRST_MATCH && delta_p != FIRST_MATCH)  {
        matches[(g_cols)*0+zz] = y_p + delta_p;
        matches[(g_cols)1+zz] = y_p;
        tmp_y = pie_y[(g_maxdisp + 1)*y_p+delta_p];
        tmp_d = pie_d[(g_maxdisp + 1)*y_p+delta_p];
        y_p = tmp_y;
        delta_p = tmp_d;
        zz++;
      }
      num_matches = y;

      free(matches);
    }
#endif
/******** */
    
    
    
    /* Compute disparity map and depth discontinuities */
    {
      int x, x1, x2, y1, y2, deltaa1, deltaa2;
      
      y1 = pie_y_best;         deltaa1 = pie_d_best;         x1 = y1 + deltaa1;
      y2 = pie_y[(g_maxdisp + 1)*y1+deltaa1]; deltaa2 = pie_d[(g_maxdisp + 1)*y1+deltaa1]; x2 = y2 + deltaa2;
      
      for (x = g_cols - 1 ; x >= x1 ; x--)  {
        disparity_map[(g_cols)*scanline+x] = deltaa1;
        depth_discontinuities[(g_cols)*scanline+x] = NO_DISCONTINUITY;
      }
      
      while (y2 != FIRST_MATCH)  {
        if (deltaa1 == deltaa2)  {
          disparity_map[(g_cols)*scanline+x2] = deltaa2;
          depth_discontinuities[(g_cols)*scanline+x2] = NO_DISCONTINUITY;
        }
        else if (deltaa2 > deltaa1)  {
          disparity_map[(g_cols)*scanline+x2] = deltaa2;
          depth_discontinuities[(g_cols)*scanline+x2] = DISCONTINUITY;
        }
        else {
          disparity_map[(g_cols)*scanline+x1 - 1] = deltaa2;
          depth_discontinuities[(g_cols)*scanline+x1 - 1] = DISCONTINUITY;
          for (x = x1 - 2 ; x >= x2 ; x--)  {
            disparity_map[(g_cols)*scanline+x] = deltaa2;
            depth_discontinuities[(g_cols)*scanline+x] = NO_DISCONTINUITY;
          }
        }
        y1 = y2;                 deltaa1 = deltaa2;            x1 = y1 + deltaa1;
        y2 = pie_y[(g_maxdisp + 1)*y1+deltaa1]; deltaa2 = pie_d[(g_maxdisp + 1)*y1+deltaa1]; x2 = y2 + deltaa2;
      }
      
      for (x = y1 + deltaa1 - 1 ; x >= 0 ; x--)  {
        disparity_map[(g_cols)*scanline+x] = deltaa1;
        depth_discontinuities[(g_cols)*scanline+x] = NO_DISCONTINUITY;
      }
    }
    
  }  /* endfor -- scanline */
  
  free(phi);
  free(pie_y);
  free(pie_d);
  free(dis);
  free(no_igL);
  free(no_igR);
  free(xmin);
  free(q_no_igL);
  free(q_no_igR);
}
Beispiel #16
0
void offenseAgent(int port, int numTMates, int numEpi, double learnR,
                  int suffix, bool oppPres, double eps) {

  // Number of features
  int numF = oppPres ? (4 + 4 * numTMates) : (3 + 3 * numTMates);
  // Number of actions
  int numA = 2 + numTMates;

  double discFac = 1;

  // Tile coding parameter
  double resolution = 0.1;

  double range[numF];
  double min[numF];
  double res[numF];
  for(int i = 0; i < numF; i++) {
      min[i] = -1;
      range[i] = 2;
      res[i] = resolution;
  }

  // Weights file
  char *wtFile;
  std::string s = "weights_" + std::to_string(port) +
                  "_" + std::to_string(numTMates + 1) +
                  "_" + std::to_string(suffix);
  wtFile = &s[0u];
  double lambda = 0;
  CMAC *fa = new CMAC(numF, numA, range, min, res);
  SarsaAgent *sa = new SarsaAgent(numF, numA, learnR, eps, lambda, fa, wtFile, wtFile);

  hfo::HFOEnvironment hfo;
  hfo::status_t status;
  hfo::action_t a;
  double state[numF];
  int action = -1;
  double reward;
  hfo.connectToServer(hfo::HIGH_LEVEL_FEATURE_SET,"../../bin/teams/base/config/formations-dt",6000,"localhost","base_left",false,"");
  for (int episode=0; episode < numEpi; episode++) {
    int count = 0;
    status = hfo::IN_GAME;
    action = -1;
    while (status == hfo::IN_GAME) {
      const std::vector<float>& state_vec = hfo.getState();
      // If has ball
      if(state_vec[5] == 1) {
        if(action != -1) {
          reward = getReward(status);
          sa->update(state, action, reward, discFac);
        }

        // Fill up state array
        purgeFeatures(state, state_vec, numTMates, oppPres);

	// Get raw action
        action = sa->selectAction(state);

        // Get hfo::Action
        a = toAction(action, state_vec);

      } else {
            a = hfo::MOVE;
      }
      if (a== hfo::PASS) {
           hfo.act(a,state_vec[(9+6*numTMates) - (action-2)*3]);
           //std::cout<<(9+6*numTMates) - (action-2)*3;
        } else {
           hfo.act(a);
        }
      status = hfo.step();
    }
    // End of episode
    if(action != -1) {
      reward = getReward(status);
      sa->update(state, action, reward, discFac);
      sa->endEpisode();
    }
  }

  delete sa;
  delete fa;
}
Beispiel #17
0
void doGridLearning() {
int i, j, k, moves, oldx, oldy, reward, ret;
int count = 0;
int up, down, left, right;
for( count; count < EPISODES; count++ ) {
	episodeNumber++;
	//randomly choose features phi0 to phi2
/*	for( i = 0; i < 3; i++ ) {
		if( ( (float)random() / (float)RAND_MAX ) < 0.5 ) {
			phi[i] = 1;
		} else {
			phi[i] = 0;
		}
	}
*/	if( ((float)random() / (float)RAND_MAX) < 0.7 ) {
		phi[0] = 1;
	} else {
		phi[0] = 0;
	}
	if( ((float)random() / (float)RAND_MAX) < 0.3 ) {
		phi[1] = 1;
	} else { 
		phi[1] = 0;
	}
	if( ((float)random() / (float)RAND_MAX) < 0.5 ) {
		phi[2] = 1;
	} else { 
		phi[2] = 0;
	}
	if( ( phi[0] == 1 && phi[1] == 0 ) || ( phi[0] == 0 && phi[1] == 1 )) {
		rewardx = rewardAx;
		rewardy = rewardAy;
	} else {
		rewardx = rewardBx;
		rewardy = rewardBy;
	}
	for( k = 0; k < 3; k++ ) {
		phiVal[rewardAx][rewardAy][k] = 0;
		phiVal[rewardBx][rewardBy][k] = 0;
		phiVal[rewardx][rewardy][k] = REWARD;
	}
	val[rewardAx][rewardAy] = 0;
	val[rewardBx][rewardBy] = 0;
	grid[rewardAx][rewardAy] = FLOOR;
	grid[rewardBx][rewardBy] = FLOOR;
	grid[rewardx][rewardy] = REWARD;
	val[rewardx][rewardy] = REWARD;
	//randomly chose starting position
        do{
                i = random() % MAXX;
                j = random() % MAXY;
	}while( grid[i][j] == WALL || (i == rewardx && j == rewardy) );
        xloc = i;
        yloc = j;
	step = 0;
	do{
		step++;
		//reset freedom of movement
		for( moves = 0; moves < 4; moves++ ) {
			canmove[moves] = 0;
		}
		//determine freedom of movement
		if( xloc > 0 ) { 
			if( grid[xloc-1][yloc] != WALL ) {
				canmove[LEFT] = 1;
			}
		}
		if( xloc < (MAXX-1) ) {
			if( grid[xloc+1][yloc] != WALL ) {
				canmove[RIGHT] = 1;
			}
		} 
		if( yloc > 0 ) {
			if( grid[xloc][yloc-1] != WALL ) {
				canmove[UP] = 1;
			}
		}
		if( yloc < (MAXY-1) ) {
			if( grid[xloc][yloc+1] != WALL ) {
				canmove[DOWN] = 1;
			}
		}
		oldx = xloc;
		oldy = yloc;
		do{
			ret = doMove(choseMove());
		}while( ret == -1 );
		reward = getReward();
		val[oldx][oldy] += ALPHA*(reward+(GAMMA*val[xloc][yloc])-val[oldx][oldy]);
		for( k = 0; k < 3; k ++ ) {
			if( phi[k] == 1 ) {
				phiVal[oldx][oldy][k] += ALPHA*(reward+(GAMMA*phiVal[xloc][yloc][k])-phiVal[oldx][oldy][k]);
			}
		}
		if( episodeNumber % 1000 == 0 && step == 1 ) {
			showMap();
			usleep((useconds_t)SLEEPTIME);
		}
	}while( step < MAXSTEPS && ( xloc != rewardx || yloc != rewardy ));
}
return;
}
Beispiel #18
0
double CFeatureRewardFunction::getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState)
{
	
	return getReward(oldState->getState(properties), action, newState->getState(properties));
}
Beispiel #19
0
double CFeatureStateRewardFunction::getReward(int , CAction *, int newState)
{
	return getReward(newState);
}
TurntableSystem::TurntableSystem(CCLayer* layer)
{
	CCSprite* allbd = CCSprite::create(IMG_ZP_BD.c_str());
	allbd->setPosition(VisibleRect::center());

	int allbd_w =  allbd->getContentSize().width;
	int allbd_h =  allbd->getContentSize().height;

	CCSprite* cbd = CCSprite::create(IMG_ZP_CENTERBD.c_str());
	cbd->setPosition(ccp(allbd_w/2,allbd_h/2));
	

	//格子s
	CCSprite* gezi_bd = CCSprite::create(IMG_ZP_KUANGBD.c_str());
	int gezi_w =  gezi_bd->getContentSize().width;
	int gezi_h =  gezi_bd->getContentSize().height;

	//创建一个矩阵格子阵
	boxgezi = CCArray::create();
	int bid = 0;
	for (int i = 0; i < 4; i++)
	{
		for (int j = 0; j <=5; j++)
		{

			Zp_BoxData* thisbox = new Zp_BoxData();
			thisbox->set_boxid(bid);
			thisbox->set_boxReward(getReward(bid%6));
			if(bid==4)
			{
				thisbox->set_xuanzhong(true);
			}
			else
			{
				thisbox->set_xuanzhong(false);
			}
			thisbox->set_tag(bid);
			switch (i)
			{
			case 0:
				thisbox->set_point(ccp(55+gezi_w*j,allbd_h-30));
				boxgezi->addObject(thisbox);
				break;
			case 1:
				if(j<=2)
				{
					thisbox->set_point(ccp(55+gezi_w*5,allbd_h-30-gezi_h-gezi_h*j));
					boxgezi->addObject(thisbox);
				}
				break;
			case 2:
				thisbox->set_point(ccp(55+gezi_w*5-gezi_w*j,30));
				boxgezi->addObject(thisbox);
				break;
			case 3:
				if(j<=2)
				{
					thisbox->set_point(ccp(55,30+gezi_h+gezi_h*j));
					boxgezi->addObject(thisbox);
				}
				break;
			default:

				break;
			}
			bid++;
		}
	}
	vector<int> maua ;

	for (int i = 0; i < boxgezi->count(); i++)
	{
		Zp_BoxData* bdata = (Zp_BoxData*)boxgezi->objectAtIndex(i);
		Gzi* gz = new Gzi(allbd,bdata);
		maua.push_back(bdata->get_boxid());
	}
	GlobalInfo::getInstance()->set_gizilist(maua);

	CCMenuItemImage *item1 = CCMenuItemImage::create(IMG_ZP_START.c_str(), IMG_ZP_START0.c_str(), layer, menu_selector(TurntableSystem::runPlay));
	item1->setPosition(ccp(cbd->getContentSize().width/2, cbd->getContentSize().height/2+10));
	CCMenu *menu = CCMenu::create(item1, NULL);
	menu->setPosition(CCPointZero);
	cbd->addChild(menu, 0,TAG_ZP_PLAY);

	allbd->addChild(cbd,2,TAG_ZP_ZHONGJIAN);
	layer->addChild(allbd,3,TAG_ZP_BD);

}