bool monomialOrderingToMatrix( const struct MonomialOrdering& mo, std::vector<int>& mat, bool& base_is_revlex, int& component_direction, // -1 is Down, +1 is Up, 0 is not present int& component_is_before_row // -1 means: at the end. 0 means before the // order. // and r means considered before row 'r' of the matrix. ) { // a false return value means an error has occurred. int nvars = rawNumberOfVariables(&mo); base_is_revlex = true; enum LastBlock { LEX, REVLEX, WEIGHTS, NONE }; LastBlock last = NONE; int nwts = 0; // local var used in MO_WEIGHTS section int nrows = 0; int firstvar = 0; component_direction = 0; component_is_before_row = -2; // what should the default value be? Probably: -1. size_t last_element = 0; // The vector 'mat' will be resized back to this // value if the last part of the order is lex or // revlex. for (int i = 0; i < mo.len; i++) { mon_part p = mo.array[i]; switch (p->type) { case MO_LEX: case MO_LEX2: case MO_LEX4: // printf("lex %d\n", p->nvars); last_element = mat.size(); for (int j = 0; j < p->nvars; j++) { write_row(mat, nvars, firstvar + j, 1); } last = LEX; firstvar += p->nvars; nrows += p->nvars; break; case MO_GREVLEX: case MO_GREVLEX2: case MO_GREVLEX4: // printf("grevlex %d %ld\n", p->nvars, p->wts); write_weights(mat, nvars, firstvar, p->wts, p->nvars); last_element = mat.size(); for (int j = p->nvars - 1; j >= 1; --j) { write_row(mat, nvars, firstvar + j, -1); } last = REVLEX; firstvar += p->nvars; nrows += p->nvars; break; case MO_GREVLEX_WTS: case MO_GREVLEX2_WTS: case MO_GREVLEX4_WTS: // printf("grevlex_wts %d %ld\n", p->nvars, p->wts); write_weights(mat, nvars, firstvar, p->wts, p->nvars); last_element = mat.size(); for (int j = p->nvars - 1; j >= 1; --j) { write_row(mat, nvars, firstvar + j, -1); } last = REVLEX; firstvar += p->nvars; nrows += p->nvars; break; case MO_REVLEX: // printf("revlex %d\n", p->nvars); last_element = mat.size(); for (int j = p->nvars - 1; j >= 0; --j) { write_row(mat, nvars, firstvar + j, -1); } last = REVLEX; firstvar += p->nvars; nrows += p->nvars; break; case MO_WEIGHTS: // printf("matsize= %d weights %d p->wts=%lu\n", mat.size(), // p->nvars, p->wts); nwts = (p->nvars > nvars ? nvars : p->nvars); write_weights(mat, nvars, 0, p->wts, nwts); nrows++; last_element = mat.size(); last = WEIGHTS; break; case MO_LAURENT: case MO_LAURENT_REVLEX: case MO_NC_LEX: return false; break; case MO_POSITION_UP: component_direction = 1; component_is_before_row = nrows; break; case MO_POSITION_DOWN: component_direction = -1; component_is_before_row = nrows; break; default: // DO nothing break; } } if (last == LEX) { // last block was lex, so use lex tie-breaker mat.resize(last_element); if (nrows == component_is_before_row) component_is_before_row = -1; base_is_revlex = false; } else if (last == REVLEX) { // last block was revlex, so use revlex tie-breaker if (nrows == component_is_before_row) component_is_before_row = -1; mat.resize(last_element); } else { // last block is a weight vector, so use revlex as the tie-breaker. // nothing to change here. } return true; }
int main(int argc, char *argv[]) { /* Initialize variables used by the Agent thread */ int p; int a, aprime; float s[DECAY_COUNT], sprime[DECAY_COUNT]; int reward; int tile_array[feature_count]; unsigned int i; double delta; double Q[ACTION_COUNT]; // Learning parameters double stepsize = 0.1 / (float) num_tilings; double lambda = 0.9; double gamma = 0.9; struct sigaction act; struct sigaction oldact; act.sa_handler = endProgram; sigemptyset(&act.sa_mask); act.sa_flags = 0; sigaction(SIGINT, &act, &oldact); srand(0); pthread_mutex_init(&pktNumMutex, NULL); pthread_mutex_init(&actionMutex, NULL); pthread_mutex_init(&rewardMusicMutex, NULL); trajectoryFile.open("trajectory.txt"); if(!trajectoryFile.is_open()) printf("Trajectory file could not be opened.\n"); /* Set up variables used by individual policy components */ // --- begin initialize variables for Tians code timeStep = 0; leftCount=0, rightCount =0; diff = 0; actionToTake = 1; count = 0; alignPhase = 1; notInRightMode = false; // --- end initialize variables for Tians code // --- begin initialize variables for Amirs code cwTurn = 1; // --- end initialize variables for Amirs code // initialize weights // first try to read the weight file and if there is no file, then initialize randomly if(!read_weights(weights)){ for (i = 0; i < memory_size; i++) { weights[i] = -100.0/num_tilings; } } for (i = 0; i < memory_size; i++) { e[i] = 0; } // Set up timing + packet number p = pktNum; // State based on IR byte s[0] = redDecay; s[1] = greenDecay; s[2] = bumpDecay; s[3] = leftDecay; s[4] = rightDecay; s[5] = forwardDecay; s[6] = backwardDecay; s[7] = stopDecay; s[8] = chargeDecay; a = sAPrime[p];//epsilonGreedy(weights, s, epsilon); // Use a lock to ensure action is changed separately pthread_mutex_lock( &actionMutex ); action = a; // sets up action to be taken by csp thread pthread_mutex_unlock( &actionMutex ); prevPktNum = myPktNum; // Main agent loop while (TRUE) { int rval = getNextPacket(); if (rval == -1) { write_weights(weights); printf("Complete! Weights saved to weights.txt. Ran %d episodes.", episode + 1); break; } else if (rval == 1) { // Episode complete for (i = 0; i < memory_size; i++) { e[i] = 0; } episode++; } // Get the packet number p = pktNum; // Update decays updateDecay(p, prevPktNum, myPktNum); //printf("ir: %d\n", sIRbyte[p]); // Reward of -1 per step reward = -1; // Determine the next observation // TODO: Change this to new state representation sprime[0] = redDecay; sprime[1] = greenDecay; sprime[2] = bumpDecay; sprime[3] = leftDecay; sprime[4] = rightDecay; sprime[5] = forwardDecay; sprime[6] = backwardDecay; sprime[7] = stopDecay; sprime[8] = chargeDecay; aprime = sAPrime[p];//epsilonGreedy(weights, sprime, epsilon); // Set action variable pthread_mutex_lock( &actionMutex ); action = aprime; // sets up action to be taken by csp thread pthread_mutex_unlock( &actionMutex ); // Get Q values getQ(Q, s, weights, num_tilings, memory_size); delta = reward - Q[a]; getQ(Q, sprime, weights, num_tilings, memory_size); delta += gamma * Q[aprime]; // Update weights get_tiles(s, a, tile_array, num_tilings, memory_size); for (i = 0; i < feature_count; i++) { e[tile_array[i]] = 1; } //printf("Docking: s a r s' a':%d %d %d %d %d\n", s, a, reward, sprime, aprime); for (i = 0; i < memory_size; i++ ) { weights[i] += stepsize * delta * e[i]; e[i] *= lambda; } // Decay sensor traces performDecay(); for (i = 0; i < DECAY_COUNT; i++) { s[i] = sprime[i]; } a = aprime; prevPktNum = myPktNum; } return 0; }