void set_up_initial_allocations(void) { current_mns = INITIAL_MNS; firstst = allocate_integer_array( current_mns ); lastst = allocate_integer_array( current_mns ); finalst = allocate_integer_array( current_mns ); transchar = allocate_integer_array( current_mns ); trans1 = allocate_integer_array( current_mns ); trans2 = allocate_integer_array( current_mns ); accptnum = allocate_integer_array( current_mns ); assoc_rule = allocate_integer_array( current_mns ); state_type = allocate_integer_array( current_mns ); current_max_rules = INITIAL_MAX_RULES; rule_type = allocate_integer_array( current_max_rules ); rule_linenum = allocate_integer_array( current_max_rules ); rule_useful = allocate_integer_array( current_max_rules ); current_max_scs = INITIAL_MAX_SCS; scset = allocate_integer_array( current_max_scs ); scbol = allocate_integer_array( current_max_scs ); scxclu = allocate_integer_array( current_max_scs ); sceof = allocate_integer_array( current_max_scs ); scname = allocate_char_ptr_array( current_max_scs ); current_maxccls = INITIAL_MAX_CCLS; cclmap = allocate_integer_array( current_maxccls ); ccllen = allocate_integer_array( current_maxccls ); cclng = allocate_integer_array( current_maxccls ); current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE; ccltbl = allocate_Character_array( current_max_ccl_tbl_size ); current_max_dfa_size = INITIAL_MAX_DFA_SIZE; current_max_xpairs = INITIAL_MAX_XPAIRS; nxt = allocate_integer_array( current_max_xpairs ); chk = allocate_integer_array( current_max_xpairs ); current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS; tnxt = allocate_integer_array( current_max_template_xpairs ); current_max_dfas = INITIAL_MAX_DFAS; base = allocate_integer_array( current_max_dfas ); def = allocate_integer_array( current_max_dfas ); dfasiz = allocate_integer_array( current_max_dfas ); accsiz = allocate_integer_array( current_max_dfas ); dhash = allocate_integer_array( current_max_dfas ); dss = allocate_int_ptr_array( current_max_dfas ); dfaacc = allocate_dfaacc_union( current_max_dfas ); nultrans = NULL; }
int snstods (int sns[], int numstates, int accset[], int nacc, int hashval, int *newds_addr) { int didsort = 0; int i, j; int newds, *oldsns; for (i = 1; i <= lastdfa; ++i) if (hashval == dhash[i]) { if (numstates == dfasiz[i]) { oldsns = dss[i]; if (!didsort) { /* We sort the states in sns so we * can compare it to oldsns quickly. */ qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp); didsort = 1; } for (j = 1; j <= numstates; ++j) if (sns[j] != oldsns[j]) break; if (j > numstates) { ++dfaeql; *newds_addr = i; return 0; } ++hshcol; } else ++hshsave; } /* Make a new dfa. */ if (++lastdfa >= current_max_dfas) increase_max_dfas (); newds = lastdfa; dss[newds] = allocate_integer_array (numstates + 1); /* If we haven't already sorted the states in sns, we do so now, * so that future comparisons with it can be made quickly. */ if (!didsort) qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp); for (i = 1; i <= numstates; ++i) dss[newds][i] = sns[i]; dfasiz[newds] = numstates; dhash[newds] = hashval; if (nacc == 0) { if (reject) dfaacc[newds].dfaacc_set = NULL; else dfaacc[newds].dfaacc_state = 0; accsiz[newds] = 0; } else if (reject) { /* We sort the accepting set in increasing order so the * disambiguating rule that the first rule listed is considered * match in the event of ties will work. */ qsort (&accset [1], (size_t) nacc, sizeof (accset [1]), intcmp); dfaacc[newds].dfaacc_set = allocate_integer_array (nacc + 1); /* Save the accepting set for later */ for (i = 1; i <= nacc; ++i) { dfaacc[newds].dfaacc_set[i] = accset[i]; if (accset[i] <= num_rules) /* Who knows, perhaps a REJECT can yield * this rule. */ rule_useful[accset[i]] = true; } accsiz[newds] = nacc; } else { /* Find lowest numbered rule so the disambiguating rule * will work. */ j = num_rules + 1; for (i = 1; i <= nacc; ++i) if (accset[i] < j) j = accset[i]; dfaacc[newds].dfaacc_state = j; if (j <= num_rules) rule_useful[j] = true; } *newds_addr = newds; return 1; }
int *epsclosure (int *t, int *ns_addr, int accset[], int *nacc_addr, int *hv_addr) { int stkpos, ns, tsp; int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; int stkend, nstate; static int did_stk_init = false, *stk; #define MARK_STATE(state) \ do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0) #define IS_MARKED(state) (trans1[state] < 0) #define UNMARK_STATE(state) \ do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0) #define CHECK_ACCEPT(state) \ do{ \ nfaccnum = accptnum[state]; \ if ( nfaccnum != NIL ) \ accset[++nacc] = nfaccnum; \ }while(0) #define DO_REALLOCATION() \ do { \ current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ ++num_reallocs; \ t = reallocate_integer_array( t, current_max_dfa_size ); \ stk = reallocate_integer_array( stk, current_max_dfa_size ); \ }while(0) \ #define PUT_ON_STACK(state) \ do { \ if ( ++stkend >= current_max_dfa_size ) \ DO_REALLOCATION(); \ stk[stkend] = state; \ MARK_STATE(state); \ }while(0) #define ADD_STATE(state) \ do { \ if ( ++numstates >= current_max_dfa_size ) \ DO_REALLOCATION(); \ t[numstates] = state; \ hashval += state; \ }while(0) #define STACK_STATE(state) \ do { \ PUT_ON_STACK(state); \ CHECK_ACCEPT(state); \ if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ ADD_STATE(state); \ }while(0) if (!did_stk_init) { stk = allocate_integer_array (current_max_dfa_size); did_stk_init = true; } nacc = stkend = hashval = 0; for (nstate = 1; nstate <= numstates; ++nstate) { ns = t[nstate]; /* The state could be marked if we've already pushed it onto * the stack. */ if (!IS_MARKED (ns)) { PUT_ON_STACK (ns); CHECK_ACCEPT (ns); hashval += ns; } } for (stkpos = 1; stkpos <= stkend; ++stkpos) { ns = stk[stkpos]; transsym = transchar[ns]; if (transsym == SYM_EPSILON) { tsp = trans1[ns] + MARKER_DIFFERENCE; if (tsp != NO_TRANSITION) { if (!IS_MARKED (tsp)) STACK_STATE (tsp); tsp = trans2[ns]; if (tsp != NO_TRANSITION && !IS_MARKED (tsp)) STACK_STATE (tsp); } } } /* Clear out "visit" markers. */ for (stkpos = 1; stkpos <= stkend; ++stkpos) { if (IS_MARKED (stk[stkpos])) UNMARK_STATE (stk[stkpos]); else flexfatal (_ ("consistency check failed in epsclosure()")); } *ns_addr = numstates; *hv_addr = hashval; *nacc_addr = nacc; return t; }
void ntod (void) { int *accset, ds, nacc, newds; int sym, hashval, numstates, dsize; int num_full_table_rows=0; /* used only for -f */ int *nset, *dset; int targptr, totaltrans, i, comstate, comfreq, targ; int symlist[CSIZE + 1]; int num_start_states; int todo_head, todo_next; struct yytbl_data *yynxt_tbl = 0; flex_int32_t *yynxt_data = 0, yynxt_curr = 0; /* Note that the following are indexed by *equivalence classes* * and not by characters. Since equivalence classes are indexed * beginning with 1, even if the scanner accepts NUL's, this * means that (since every character is potentially in its own * equivalence class) these arrays must have room for indices * from 1 to CSIZE, so their size must be CSIZE + 1. */ int duplist[CSIZE + 1], state[CSIZE + 1]; int targfreq[CSIZE + 1] = {0}, targstate[CSIZE + 1]; /* accset needs to be large enough to hold all of the rules present * in the input, *plus* their YY_TRAILING_HEAD_MASK variants. */ accset = allocate_integer_array ((num_rules + 1) * 2); nset = allocate_integer_array (current_max_dfa_size); /* The "todo" queue is represented by the head, which is the DFA * state currently being processed, and the "next", which is the * next DFA state number available (not in use). We depend on the * fact that snstods() returns DFA's \in increasing order/, and thus * need only know the bounds of the dfas to be processed. */ todo_head = todo_next = 0; for (i = 0; i <= csize; ++i) { duplist[i] = NIL; symlist[i] = false; } for (i = 0; i <= num_rules; ++i) accset[i] = NIL; if (trace) { dumpnfa (scset[1]); fputs (_("\n\nDFA Dump:\n\n"), stderr); } inittbl (); /* Check to see whether we should build a separate table for * transitions on NUL characters. We don't do this for full-speed * (-F) scanners, since for them we don't have a simple state * number lying around with which to index the table. We also * don't bother doing it for scanners unless (1) NUL is in its own * equivalence class (indicated by a positive value of * ecgroup[NUL]), (2) NUL's equivalence class is the last * equivalence class, and (3) the number of equivalence classes is * the same as the number of characters. This latter case comes * about when useecs is false or when it's true but every character * still manages to land in its own class (unlikely, but it's * cheap to check for). If all these things are true then the * character code needed to represent NUL's equivalence class for * indexing the tables is going to take one more bit than the * number of characters, and therefore we won't be assured of * being able to fit it into a YY_CHAR variable. This rules out * storing the transitions in a compressed table, since the code * for interpreting them uses a YY_CHAR variable (perhaps it * should just use an integer, though; this is worth pondering ... * ###). * * Finally, for full tables, we want the number of entries in the * table to be a power of two so the array references go fast (it * will just take a shift to compute the major index). If * encoding NUL's transitions in the table will spoil this, we * give it its own table (note that this will be the case if we're * not using equivalence classes). */ /* Note that the test for ecgroup[0] == numecs below accomplishes * both (1) and (2) above */ if (!fullspd && ecgroup[0] == numecs) { /* NUL is alone in its equivalence class, which is the * last one. */ int use_NUL_table = (numecs == csize); if (fulltbl && !use_NUL_table) { /* We still may want to use the table if numecs * is a power of 2. */ int power_of_two; for (power_of_two = 1; power_of_two <= csize; power_of_two *= 2) if (numecs == power_of_two) { use_NUL_table = true; break; } } if (use_NUL_table) nultrans = allocate_integer_array (current_max_dfas); /* From now on, nultrans != nil indicates that we're * saving null transitions for later, separate encoding. */ } if (fullspd) { for (i = 0; i <= numecs; ++i) state[i] = 0; place_state (state, 0, 0); dfaacc[0].dfaacc_state = 0; } else if (fulltbl) { if (nultrans) /* We won't be including NUL's transitions in the * table, so build it for entries from 0 .. numecs - 1. */ num_full_table_rows = numecs; else /* Take into account the fact that we'll be including * the NUL entries in the transition table. Build it * from 0 .. numecs. */ num_full_table_rows = numecs + 1; /* Begin generating yy_nxt[][] * This spans the entire LONG function. * This table is tricky because we don't know how big it will be. * So we'll have to realloc() on the way... * we'll wait until we can calculate yynxt_tbl->td_hilen. */ yynxt_tbl = calloc(1, sizeof (struct yytbl_data)); yytbl_data_init (yynxt_tbl, YYTD_ID_NXT); yynxt_tbl->td_hilen = 1; yynxt_tbl->td_lolen = (flex_uint32_t) num_full_table_rows; yynxt_tbl->td_data = yynxt_data = calloc(yynxt_tbl->td_lolen * yynxt_tbl->td_hilen, sizeof (flex_int32_t)); yynxt_curr = 0; buf_prints (&yydmap_buf, "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n", long_align ? "flex_int32_t" : "flex_int16_t"); /* Unless -Ca, declare it "short" because it's a real * long-shot that that won't be large enough. */ if (gentables) out_str_dec ("static const %s yy_nxt[][%d] =\n {\n", long_align ? "flex_int32_t" : "flex_int16_t", num_full_table_rows); else { out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows); out_str ("static const %s *yy_nxt =0;\n", long_align ? "flex_int32_t" : "flex_int16_t"); } if (gentables) outn (" {"); /* Generate 0 entries for state #0. */ for (i = 0; i < num_full_table_rows; ++i) { mk2data (0); yynxt_data[yynxt_curr++] = 0; } dataflush (); if (gentables) outn (" },\n"); } /* Create the first states. */ num_start_states = lastsc * 2; for (i = 1; i <= num_start_states; ++i) { numstates = 1; /* For each start condition, make one state for the case when * we're at the beginning of the line (the '^' operator) and * one for the case when we're not. */ if (i % 2 == 1) nset[numstates] = scset[(i / 2) + 1]; else nset[numstates] = mkbranch (scbol[i / 2], scset[i / 2]); nset = epsclosure (nset, &numstates, accset, &nacc, &hashval); if (snstods (nset, numstates, accset, nacc, hashval, &ds)) { numas += nacc; totnst += numstates; ++todo_next; if (variable_trailing_context_rules && nacc > 0) check_trailing_context (nset, numstates, accset, nacc); } } if (!fullspd) { if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state)) flexfatal (_ ("could not create unique end-of-buffer state")); ++numas; ++num_start_states; ++todo_next; } while (todo_head < todo_next) { targptr = 0; totaltrans = 0; for (i = 1; i <= numecs; ++i) state[i] = 0; ds = ++todo_head; dset = dss[ds]; dsize = dfasiz[ds]; if (trace) fprintf (stderr, _("state # %d:\n"), ds); sympartition (dset, dsize, symlist, duplist); for (sym = 1; sym <= numecs; ++sym) { if (symlist[sym]) { symlist[sym] = 0; if (duplist[sym] == NIL) { /* Symbol has unique out-transitions. */ numstates = symfollowset (dset, dsize, sym, nset); nset = epsclosure (nset, &numstates, accset, &nacc, &hashval); if (snstods (nset, numstates, accset, nacc, hashval, &newds)) { totnst = totnst + numstates; ++todo_next; numas += nacc; if (variable_trailing_context_rules && nacc > 0) check_trailing_context (nset, numstates, accset, nacc); } state[sym] = newds; if (trace) fprintf (stderr, "\t%d\t%d\n", sym, newds); targfreq[++targptr] = 1; targstate[targptr] = newds; ++numuniq; } else { /* sym's equivalence class has the same * transitions as duplist(sym)'s * equivalence class. */ targ = state[duplist[sym]]; state[sym] = targ; if (trace) fprintf (stderr, "\t%d\t%d\n", sym, targ); /* Update frequency count for * destination state. */ i = 0; while (targstate[++i] != targ) ; ++targfreq[i]; ++numdup; } ++totaltrans; duplist[sym] = NIL; } } numsnpairs += totaltrans; if (ds > num_start_states) check_for_backing_up (ds, state); if (nultrans) { nultrans[ds] = state[NUL_ec]; state[NUL_ec] = 0; /* remove transition */ } if (fulltbl) { /* Each time we hit here, it's another td_hilen, so we realloc. */ yynxt_tbl->td_hilen++; yynxt_tbl->td_data = yynxt_data = realloc (yynxt_data, yynxt_tbl->td_hilen * yynxt_tbl->td_lolen * sizeof (flex_int32_t)); if (gentables) outn (" {"); /* Supply array's 0-element. */ if (ds == end_of_buffer_state) { mk2data (-end_of_buffer_state); yynxt_data[yynxt_curr++] = -end_of_buffer_state; } else { mk2data (end_of_buffer_state); yynxt_data[yynxt_curr++] = end_of_buffer_state; } for (i = 1; i < num_full_table_rows; ++i) { /* Jams are marked by negative of state * number. */ mk2data (state[i] ? state[i] : -ds); yynxt_data[yynxt_curr++] = state[i] ? state[i] : -ds; } dataflush (); if (gentables) outn (" },\n"); } else if (fullspd) place_state (state, ds, totaltrans); else if (ds == end_of_buffer_state) /* Special case this state to make sure it does what * it's supposed to, i.e., jam on end-of-buffer. */ stack1 (ds, 0, 0, JAMSTATE); else { /* normal, compressed state */ /* Determine which destination state is the most * common, and how many transitions to it there are. */ comfreq = 0; comstate = 0; for (i = 1; i <= targptr; ++i) if (targfreq[i] > comfreq) { comfreq = targfreq[i]; comstate = targstate[i]; } bldtbl (state, ds, totaltrans, comstate, comfreq); } } if (fulltbl) { dataend (); if (tablesext) { yytbl_data_compress (yynxt_tbl); if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0) flexerror (_ ("Could not write yynxt_tbl[][]")); } if (yynxt_tbl) { yytbl_data_destroy (yynxt_tbl); yynxt_tbl = 0; } } else if (!fullspd) { cmptmps (); /* create compressed template entries */ /* Create tables for all the states with only one * out-transition. */ while (onesp > 0) { mk1tbl (onestate[onesp], onesym[onesp], onenext[onesp], onedef[onesp]); --onesp; } mkdeftbl (); } free(accset); free(nset); }
void gentabs(void) { int i, j, k, *accset, nacc, *acc_array, total_states; int end_of_buffer_action = num_rules + 1; acc_array = allocate_integer_array( current_max_dfas ); nummt = 0; /* The compressed table format jams by entering the "jam state", * losing information about the previous state in the process. * In order to recover the previous state, we effectively need * to keep backing-up information. */ ++num_backing_up; if ( reject ) { /* Write out accepting list and pointer list. * * First we generate the "yy_acclist" array. In the process, * we compute the indices that will go into the "yy_accept" * array, and save the indices in the dfaacc array. */ int EOB_accepting_list[2]; /* Set up accepting structures for the End Of Buffer state. */ EOB_accepting_list[0] = 0; EOB_accepting_list[1] = end_of_buffer_action; accsiz[end_of_buffer_state] = 1; dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; out_str_dec( long_align ? C_long_decl : C_short_decl, "yy_acclist", MAX( numas, 1 ) + 1 ); j = 1; /* index into "yy_acclist" array */ for ( i = 1; i <= lastdfa; ++i ) { acc_array[i] = j; if ( accsiz[i] != 0 ) { accset = dfaacc[i].dfaacc_set; nacc = accsiz[i]; if ( trace ) fprintf( stderr, _( "state # %d accepts: " ), i ); for ( k = 1; k <= nacc; ++k ) { int accnum = accset[k]; ++j; if ( variable_trailing_context_rules && ! (accnum & YY_TRAILING_HEAD_MASK) && accnum > 0 && accnum <= num_rules && rule_type[accnum] == RULE_VARIABLE ) { /* Special hack to flag * accepting number as part * of trailing context rule. */ accnum |= YY_TRAILING_MASK; } mkdata( accnum ); if ( trace ) { fprintf( stderr, "[%d]", accset[k] ); if ( k < nacc ) fputs( ", ", stderr ); else putc( '\n', stderr ); } } } } /* add accepting number for the "jam" state */ acc_array[i] = j; dataend(); } else { dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; for ( i = 1; i <= lastdfa; ++i ) acc_array[i] = dfaacc[i].dfaacc_state; /* add accepting number for jam state */ acc_array[i] = 0; } /* Spit out "yy_accept" array. If we're doing "reject", it'll be * pointers into the "yy_acclist" array. Otherwise it's actual * accepting numbers. In either case, we just dump the numbers. */ /* "lastdfa + 2" is the size of "yy_accept"; includes room for C arrays * beginning at 0 and for "jam" state. */ k = lastdfa + 2; if ( reject ) /* We put a "cap" on the table associating lists of accepting * numbers with state numbers. This is needed because we tell * where the end of an accepting list is by looking at where * the list for the next state starts. */ ++k; out_str_dec( long_align ? C_long_decl : C_short_decl, "yy_accept", k ); for ( i = 1; i <= lastdfa; ++i ) { mkdata( acc_array[i] ); if ( ! reject && trace && acc_array[i] ) fprintf( stderr, _( "state # %d accepts: [%d]\n" ), i, acc_array[i] ); } /* Add entry for "jam" state. */ mkdata( acc_array[i] ); if ( reject ) /* Add "cap" for the list. */ mkdata( acc_array[i] ); dataend(); if ( useecs ) genecs(); if ( usemecs ) { /* Write out meta-equivalence classes (used to index * templates with). */ if ( trace ) fputs( _( "\n\nMeta-Equivalence Classes:\n" ), stderr ); out_str_dec( C_int_decl, "yy_meta", numecs + 1 ); for ( i = 1; i <= numecs; ++i ) { if ( trace ) fprintf( stderr, "%d = %d\n", i, ABS( tecbck[i] ) ); mkdata( ABS( tecbck[i] ) ); } dataend(); } total_states = lastdfa + numtemps; out_str_dec( (tblend >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_base", total_states + 1 ); for ( i = 1; i <= lastdfa; ++i ) { int d = def[i]; if ( base[i] == JAMSTATE ) base[i] = jambase; if ( d == JAMSTATE ) def[i] = jamstate; else if ( d < 0 ) { /* Template reference. */ ++tmpuses; def[i] = lastdfa - d + 1; } mkdata( base[i] ); } /* Generate jam state's base index. */ mkdata( base[i] ); for ( ++i /* skip jam state */; i <= total_states; ++i ) { mkdata( base[i] ); def[i] = jamstate; } dataend(); out_str_dec( (total_states >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_def", total_states + 1 ); for ( i = 1; i <= total_states; ++i ) mkdata( def[i] ); dataend(); out_str_dec( (total_states >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_nxt", tblend + 1 ); for ( i = 1; i <= tblend; ++i ) { /* Note, the order of the following test is important. * If chk[i] is 0, then nxt[i] is undefined. */ if ( chk[i] == 0 || nxt[i] == 0 ) nxt[i] = jamstate; /* new state is the JAM state */ mkdata( nxt[i] ); } dataend(); out_str_dec( (total_states >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_chk", tblend + 1 ); for ( i = 1; i <= tblend; ++i ) { if ( chk[i] == 0 ) ++nummt; mkdata( chk[i] ); } dataend(); }