Esempio n. 1
0
void main()
{
	int action,box,i;
	long success,trial;
	double x, x_dot, theta, theta_dot,reinf,predicted_value;
	FILE *fptr;
	fptr=fopen("rand_restart.txt","w");
	x=x_dot=theta=theta_dot=rnd(-BETA,BETA);

	success=0;
	trial=1;
	reinf=0.0;
	while (success<1000000)    /* If the pole doesn't fall during 1-million trials,assume it succcess.*/
	{
		action=get_action(x,x_dot,theta,theta_dot,reinf);
		cart_pole(action,&x,&x_dot,&theta,&theta_dot);
		box=get_box(x,x_dot,theta,theta_dot);
		if (box==-1)
		{	reinf=-1.0;
			predicted_value = 0.0;
	        q_val[prev_state][prev_action]
				  += ALPHA * (reinf + GAMMA * predicted_value - q_val[prev_state][prev_action]);
			reset_controller();
			x=x_dot=theta=theta_dot=rnd(-BETA,BETA);
			trial++;
			printf("At %d success ,try %d trials\n",success,trial);
			fprintf(fptr,"%d\t%d\n",trial,success);
			success=0;
		}else{
			  success++;
			  reinf=0.0;
			}
	}
	printf("Success at %d trials \n",trial);
	for (i=0;i<NUM_BOXES;i++)
		fprintf(fptr,"%g %g\n",q_val[i][0],q_val[i][1]);
	fclose(fptr);

}
Esempio n. 2
0
//     cart_and_pole() was take directly from the pole simulator written
//     by Richard Sutton and Charles Anderson.
int go_cart(Network *net,int max_steps,int thresh)
{
   float x,			/* cart position, meters */
         x_dot,			/* cart velocity */
         theta,			/* pole angle, radians */
         theta_dot;		/* pole angular velocity */
   int steps=0,y;

   int random_start=1;

   double in[5];  //Input loading array

   double out1;
   double out2;

//     double one_degree= 0.0174532;	/* 2pi/360 */
//     double six_degrees=0.1047192;
   double twelve_degrees=0.2094384;
//     double thirty_six_degrees= 0.628329;
//     double fifty_degrees=0.87266;

   vector<NNode*>::iterator out_iter;

   if (random_start) {
     /*set up random start state*/
     x = (lrand48()%4800)/1000.0 - 2.4;
     x_dot = (lrand48()%2000)/1000.0 - 1;
     theta = (lrand48()%400)/1000.0 - .2;
     theta_dot = (lrand48()%3000)/1000.0 - 1.5;
    }
   else 
     x = x_dot = theta = theta_dot = 0.0;
   
   /*--- Iterate through the action-learn loop. ---*/
   while (steps++ < max_steps)
     {
       
       /*-- setup the input layer based on the four iputs --*/
       //setup_input(net,x,x_dot,theta,theta_dot);
       in[0]=1.0;  //Bias
       in[1]=(x + 2.4) / 4.8;;
       in[2]=(x_dot + .75) / 1.5;
       in[3]=(theta + twelve_degrees) / .41;
       in[4]=(theta_dot + 1.0) / 2.0;
       net->load_sensors(in);

       //activate_net(net);   /*-- activate the network based on the input --*/
       //Activate the net
       //If it loops, exit returning only fitness of 1 step
       if (!(net->activate())) return 1;

      /*-- decide which way to push via which output unit is greater --*/
       out_iter=net->outputs.begin();
       out1=(*out_iter)->activation;
       ++out_iter;
       out2=(*out_iter)->activation;
       if (out1 > out2)
	 y = 0;
       else
	 y = 1;
       
       /*--- Apply action to the simulated cart-pole ---*/
       cart_pole(y, &x, &x_dot, &theta, &theta_dot);
       
       /*--- Check for failure.  If so, return steps ---*/
       if (x < -2.4 || x > 2.4  || theta < -twelve_degrees ||
	   theta > twelve_degrees) 
         return steps;             
     }
   
   return steps;
} 
Esempio n. 3
0
int main(int argc, char *argv[])
{
   float x,                         /* cart position, meters */
         x_dot,                     /* cart velocity */
         theta,                     /* pole angle, radians */
         theta_dot;                 /* pole angular velocity */
   int action;                      /* 0 for push-left, 1 for push-right */
   int steps = 0;                   /* duration of trial, in 0.02 sec steps */
   int failures = 0;                /* number of failed trials */
   int best_steps = 0;              /* number of steps in best trial */
   int best_trial = 0;              /* trial number of best trial */

   void reset_state(float *x, float *x_dot, float *theta, float *theta_dot);
   void cart_pole(int action, float *x, float *x_dot, 
                  float *theta, float *theta_dot);
   int fail(float x, float x_dot, float theta, float theta_dot);
   extern int get_action(float x, float x_dot, float theta, float theta_dot,
                         float reinforcement);
   extern void reset_controller(void);
/* extern void print_controller_info(); */

   printf("Driver: %s\n", rcs_driver_id);
   if (TILTED)
      printf("Pole will have tilted reset,");
   else
      printf("Pole has normal reset,");
   if (JUPITER_GRAV)
      printf(" and \"Jupiter\" gravity.\n");
   else
      printf(" and normal gravity.\n");

   if (ECHO_STATE)
   {
      echo_file = fopen("poledata", "w");
      if (echo_file == NULL)
         printf("ERROR: Cannot open \"poledata\" for output.\n");
   }

   if (argc > 1)
      RND_SEED = atoi(argv[1]);
   else
      RND_SEED = 0;

   reset_state(&x, &x_dot, &theta, &theta_dot);

   /*--- Iterate through the action-learn loop. ---*/
   while (steps++ < MAX_STEPS && failures < MAX_FAILURES)
   {
      action = get_action(x, x_dot, theta, theta_dot, 0.0);  
      
      /*--- Apply action to the simulated cart-pole ---*/
      cart_pole(action, &x, &x_dot, &theta, &theta_dot);

      if (fail(x, x_dot, theta, theta_dot))
      {
	  failures++;
	  printf("Trial %d was %d steps.\n", failures, steps);
          if (steps > best_steps)
          {
             best_steps = steps;
             best_trial = failures;
          }

          /* Call controller with negative feedback for learning */
          get_action(x, x_dot, theta, theta_dot, -1.0);

          reset_controller();
          reset_state(&x, &x_dot, &theta, &theta_dot);
	  steps = 0;
      }
   }

   /* Diagnose result */
   if (failures == MAX_FAILURES) 
   {
      printf("Pole not balanced. Stopping after %d failures.\n",failures);
      printf("High water mark: %d steps in trial %d.\n\n", 
             best_steps, best_trial);
   }
   else
    printf("Pole balanced successfully for at least %d steps in trial %d.\n\n",
            steps - 1, failures + 1);

/* print_controller_info();*/
   if (echo_file != NULL)
      fclose(echo_file);
   return 0;
}
Esempio n. 4
0
int main()
{
	int action,box,i;
	long success,trial;
	double x, x_dot, theta, theta_dot,reinf,predicted_value;
	FILE *fptr;
	FILE *fptr1;
	fptr=fopen("rand_restart.txt","w");
	fptr1=fopen("output.csv","w");
	x=x_dot=theta=theta_dot=rnd(-BETA,BETA);
    double angle;
	success=0;
	trial=1;
	reinf=0.0;
	double force;
	double j,k;
	double best_ALPHA=0;
	double best_GAMMA=0;
    
    while (success<1000000)    /* If the pole doesn't fall during 1-million trials,assume it succcess.*/
	{
          
          //getchar();
		action=get_action(x,x_dot,theta,theta_dot,reinf);
		cart_pole(action,&x,&x_dot,&theta,&theta_dot);
		
		//printf("%d")
        if(action==0)
        force=10;
        else if(action==1)
        force=5;
        else if(action==2)
        force=0;
        else if(action==3)
        force=-5;
        else
        force=-10;
		
		
		
        fprintf(fptr1,"%.2f,%.2f,%.2f,%.2f,%f\n",x,theta,x_dot,theta_dot,force);
		angle=theta*180/3.1415926;
		//printf("x%.2f,angle%.2f,%.2f,%.2f,%d\n",x,angle,x_dot,theta_dot,action);
		
		box=get_box(x,x_dot,theta,theta_dot);
		if (box==-1)
		{	reinf=-1.0;
			predicted_value = 0.0;
	        q_val[prev_state][prev_action]
				  += ALPHA * (reinf + GAMMA * predicted_value - q_val[prev_state][prev_action]);
			reset_controller();
			x=x_dot=theta=theta_dot=rnd(-BETA,BETA);
			trial++;
			//printf("At %d success ,try %d trials\n",success,trial);
			printf("At trial %d : success--->%d (mean last how long)\n",trial,success);
			fprintf(fptr,"trials%d\t success%d\n",trial,success);
			success=0;
		}else{
			  success++;
			  reinf=0.0;
			  /*if(success>1000000-2)
			  {
              printf("asfasdfasdf");        
              break;
              }*/
			  
			}
	}
		printf("If success > 1000000 \n Success at %d trials \n",trial);
        for (i=0;i<NUM_BOXES;i++)
		fprintf(fptr,"%g %g\n",q_val[i][0],q_val[i][1]);
	fclose(fptr);
    fclose(fptr1);
    
 
 



system("pause");

}
Esempio n. 5
0
main()
{
  float x,      /* cart position, meters */
        x_dot,      /* cart velocity */
        theta,      /* pole angle, radians */
        theta_dot;    /* pole angular velocity */
  vector  w,      /* vector of action weights */
          v,      /* vector of critic weights */
          e,      /* vector of action weight eligibilities */
          xbar;     /* vector of critic weight eligibilities */
  float p, oldp, rhat, r;
  int box, i, y, steps = 0, failures=0, failed;


  printf("Seed? ");
  scanf("%d",&i);
  srand(i);

  //srand (time(NULL));

  /*--- Initialize action and heuristic critic weights and traces. ---*/
  for (i = 0; i < N_BOXES; i++)
    w[i] = v[i] = xbar[i] = e[i] = 0.0;

  /*--- Starting state is (0 0 0 0) ---*/
  x = x_dot = theta = theta_dot = 0.0;

  /*--- Find box in state space containing start state ---*/
  box = get_box(x, x_dot, theta, theta_dot);

  /*--- Iterate through the action-learn loop. ---*/
  while (steps++ < MAX_STEPS && failures < MAX_FAILURES)
    {
      /*--- Choose action randomly, biased by current weight. ---*/
      y = (random < prob_push_right(w[box]));

      /*--- Update traces. ---*/
      e[box] += (1.0 - LAMBDAw) * (y - 0.5);
      xbar[box] += (1.0 - LAMBDAv);

      /*--- Remember prediction of failure for current state ---*/
      oldp = v[box];

      /*--- Apply action to the simulated cart-pole ---*/
      cart_pole(y, &x, &x_dot, &theta, &theta_dot);

      /*--- Get box of state space containing the resulting state. ---*/
      box = get_box(x, x_dot, theta, theta_dot);

      if (box < 0)
  {
    /*--- Failure occurred. ---*/
    failed = 1;
    failures++;
    printf("Trial %d was %d steps.\n", failures, steps);
    steps = 0;

    /*--- Reset state to (0 0 0 0).  Find the box. ---*/
    x = x_dot = theta = theta_dot = 0.0;
    box = get_box(x, x_dot, theta, theta_dot);

    /*--- Reinforcement upon failure is -1. Prediction of failure is 0. ---*/
    r = -1.0;
    p = 0.;
  }
      else
  {
    /*--- Not a failure. ---*/
    failed = 0;

    /*--- Reinforcement is 0. Prediction of failure given by v weight. ---*/
    r = 0;
    p= v[box];
  }

      /*--- Heuristic reinforcement is:   current reinforcement
        + gamma * new failure prediction - previous failure prediction ---*/
      rhat = r + GAMMA * p - oldp;

      for (i = 0; i < N_BOXES; i++)
  {
    /*--- Update all weights. ---*/
    w[i] += ALPHA * rhat * e[i];
    v[i] += BETA * rhat * xbar[i];
    if (v[i] < -1.0)
      v[i] = v[i];

    if (failed)
      {
        /*--- If failure, zero all traces. ---*/
        e[i] = 0.;
        xbar[i] = 0.;
      }
    else
      {
        /*--- Otherwise, update (decay) the traces. ---*/
        e[i] *= LAMBDAw;
        xbar[i] *= LAMBDAv;
      }
  }

    }
  if (failures == MAX_FAILURES)
    printf("Pole not balanced. Stopping after %d failures.",failures);
  else
    printf("Pole balanced successfully for at least %d steps\n", steps);
}