Exemplo n.º 1
0
bool monomialOrderingToMatrix(
    const struct MonomialOrdering& mo,
    std::vector<int>& mat,
    bool& base_is_revlex,
    int& component_direction,     // -1 is Down, +1 is Up, 0 is not present
    int& component_is_before_row  // -1 means: at the end. 0 means before the
                                  // order.
    // and r means considered before row 'r' of the matrix.
    )
{
  // a false return value means an error has occurred.
  int nvars = rawNumberOfVariables(&mo);
  base_is_revlex = true;
  enum LastBlock { LEX, REVLEX, WEIGHTS, NONE };
  LastBlock last = NONE;
  int nwts = 0;  // local var used in MO_WEIGHTS section
  int nrows = 0;
  int firstvar = 0;
  component_direction = 0;
  component_is_before_row =
      -2;                   // what should the default value be?  Probably: -1.
  size_t last_element = 0;  // The vector 'mat' will be resized back to this
                            // value if the last part of the order is lex or
                            // revlex.
  for (int i = 0; i < mo.len; i++)
    {
      mon_part p = mo.array[i];
      switch (p->type)
        {
          case MO_LEX:
          case MO_LEX2:
          case MO_LEX4:
            // printf("lex %d\n", p->nvars);
            last_element = mat.size();
            for (int j = 0; j < p->nvars; j++)
              {
                write_row(mat, nvars, firstvar + j, 1);
              }
            last = LEX;
            firstvar += p->nvars;
            nrows += p->nvars;
            break;
          case MO_GREVLEX:
          case MO_GREVLEX2:
          case MO_GREVLEX4:
            // printf("grevlex %d %ld\n", p->nvars, p->wts);
            write_weights(mat, nvars, firstvar, p->wts, p->nvars);
            last_element = mat.size();
            for (int j = p->nvars - 1; j >= 1; --j)
              {
                write_row(mat, nvars, firstvar + j, -1);
              }
            last = REVLEX;
            firstvar += p->nvars;
            nrows += p->nvars;
            break;
          case MO_GREVLEX_WTS:
          case MO_GREVLEX2_WTS:
          case MO_GREVLEX4_WTS:
            // printf("grevlex_wts %d %ld\n", p->nvars, p->wts);
            write_weights(mat, nvars, firstvar, p->wts, p->nvars);
            last_element = mat.size();
            for (int j = p->nvars - 1; j >= 1; --j)
              {
                write_row(mat, nvars, firstvar + j, -1);
              }
            last = REVLEX;
            firstvar += p->nvars;
            nrows += p->nvars;
            break;
          case MO_REVLEX:
            // printf("revlex %d\n", p->nvars);
            last_element = mat.size();
            for (int j = p->nvars - 1; j >= 0; --j)
              {
                write_row(mat, nvars, firstvar + j, -1);
              }
            last = REVLEX;
            firstvar += p->nvars;
            nrows += p->nvars;
            break;
          case MO_WEIGHTS:
            // printf("matsize= %d weights %d p->wts=%lu\n", mat.size(),
            // p->nvars, p->wts);
            nwts = (p->nvars > nvars ? nvars : p->nvars);
            write_weights(mat, nvars, 0, p->wts, nwts);
            nrows++;
            last_element = mat.size();
            last = WEIGHTS;
            break;
          case MO_LAURENT:
          case MO_LAURENT_REVLEX:
          case MO_NC_LEX:
            return false;
            break;
          case MO_POSITION_UP:
            component_direction = 1;
            component_is_before_row = nrows;
            break;
          case MO_POSITION_DOWN:
            component_direction = -1;
            component_is_before_row = nrows;
            break;
          default:
            // DO nothing
            break;
        }
    }
  if (last == LEX)
    {
      // last block was lex, so use lex tie-breaker
      mat.resize(last_element);
      if (nrows == component_is_before_row) component_is_before_row = -1;
      base_is_revlex = false;
    }
  else if (last == REVLEX)
    {
      // last block was revlex, so use revlex tie-breaker
      if (nrows == component_is_before_row) component_is_before_row = -1;
      mat.resize(last_element);
    }
  else
    {
      // last block is a weight vector, so use revlex as the tie-breaker.
      // nothing to change here.
    }
  return true;
}
Exemplo n.º 2
0
int main(int argc, char *argv[]) {
    /* Initialize variables used by the Agent thread */
    int p;
    int a, aprime;
    float s[DECAY_COUNT], sprime[DECAY_COUNT];
    int reward;
    int tile_array[feature_count];
    unsigned int i;
    double delta;
    double Q[ACTION_COUNT];
    
    // Learning parameters
    double stepsize = 0.1 / (float) num_tilings;
    double lambda = 0.9;
    double gamma = 0.9;
    
    struct sigaction act;
    struct sigaction oldact;

    act.sa_handler = endProgram;
    sigemptyset(&act.sa_mask);
    act.sa_flags = 0;
    sigaction(SIGINT, &act, &oldact);
    
    srand(0);
    pthread_mutex_init(&pktNumMutex, NULL);
    pthread_mutex_init(&actionMutex, NULL);
    pthread_mutex_init(&rewardMusicMutex, NULL);

	trajectoryFile.open("trajectory.txt"); 

    if(!trajectoryFile.is_open())
		printf("Trajectory file could not be opened.\n");

    /* Set up variables used by individual policy components */
    // --- begin initialize variables for Tians code
    timeStep = 0;
    leftCount=0, rightCount =0;
    diff = 0;
    actionToTake = 1;
    count = 0;
    alignPhase = 1;
    notInRightMode = false; 
    // --- end initialize variables for Tians code
    
    // --- begin initialize variables for Amirs code
    cwTurn = 1;
    // --- end initialize variables for Amirs code
    
    // initialize weights
    // first try to read the weight file and if there is no file, then initialize randomly
    if(!read_weights(weights)){
	    for (i = 0; i < memory_size; i++) {
		weights[i] = -100.0/num_tilings;
	    }
    }

    for (i = 0; i < memory_size; i++) {
		e[i] = 0;
    }
	
    // Set up timing + packet number
    p = pktNum;

    // State based on IR byte
    s[0] = redDecay;
    s[1] = greenDecay;
    s[2] = bumpDecay;
    s[3] = leftDecay;
    s[4] = rightDecay;
    s[5] = forwardDecay;
    s[6] = backwardDecay;
    s[7] = stopDecay;
    s[8] = chargeDecay;
    a = sAPrime[p];//epsilonGreedy(weights, s, epsilon);

	// Use a lock to ensure action is changed separately
    pthread_mutex_lock( &actionMutex );
    action = a; // sets up action to be taken by csp thread
    pthread_mutex_unlock( &actionMutex );    
    prevPktNum = myPktNum;
    // Main agent loop
    while (TRUE) {
        int rval = getNextPacket();
        if (rval == -1) {
            write_weights(weights);
            printf("Complete! Weights saved to weights.txt. Ran %d episodes.", episode + 1);
            break;
            
        } else if (rval == 1) {
            // Episode complete
            for (i = 0; i < memory_size; i++) {
                e[i] = 0;
            }        
            episode++;
        }
        
        // Get the packet number
        p = pktNum;
        
        // Update decays
        updateDecay(p, prevPktNum, myPktNum);
        //printf("ir: %d\n", sIRbyte[p]);
        
        // Reward of -1 per step
        reward = -1;
        
        // Determine the next observation
        // TODO: Change this to new state representation
        sprime[0] = redDecay;
        sprime[1] = greenDecay;
        sprime[2] = bumpDecay;
        sprime[3] = leftDecay;
        sprime[4] = rightDecay;
        sprime[5] = forwardDecay;
        sprime[6] = backwardDecay;
        sprime[7] = stopDecay;
        sprime[8] = chargeDecay;
        aprime = sAPrime[p];//epsilonGreedy(weights, sprime, epsilon);
        
        // Set action variable
        pthread_mutex_lock( &actionMutex );
        action = aprime; // sets up action to be taken by csp thread
        pthread_mutex_unlock( &actionMutex );  
    
        // Get Q values
        getQ(Q, s, weights, num_tilings, memory_size);
        delta = reward - Q[a];
            
        getQ(Q, sprime, weights, num_tilings, memory_size);
        delta += gamma * Q[aprime];
        
        // Update weights
        get_tiles(s, a, tile_array, num_tilings, memory_size);
        for (i = 0; i < feature_count; i++) {
            e[tile_array[i]] = 1;
        }
        
        //printf("Docking: s a r s' a':%d %d %d %d %d\n", s, a, reward, sprime, aprime);
        for (i = 0; i < memory_size; i++ ) {
            weights[i] += stepsize * delta * e[i];
            e[i] *= lambda;
        }
        
        // Decay sensor traces
        performDecay();
        
        for (i = 0; i < DECAY_COUNT; i++) {
            s[i] = sprime[i];
        }
        a = aprime;
        prevPktNum = myPktNum;
        
    }
    return 0;
}