static void generate_token_text(void) { u_short lowest_id; u_short highest_id; u_short id_count; u_short id; u_short i; /* sort ntp_keywords in token ID order */ qsort(ntp_keywords, COUNTOF(ntp_keywords), sizeof(ntp_keywords[0]), compare_key_tok_id); lowest_id = ntp_keywords[0].token; highest_id = ntp_keywords[COUNTOF(ntp_keywords) - 1].token; id_count = highest_id - lowest_id + 1; printf("#define LOWEST_KEYWORD_ID %d\n\n", lowest_id); printf("const char * const keyword_text[%d] = {", id_count); id = lowest_id; i = 0; while (i < COUNTOF(ntp_keywords)) { while (id < ntp_keywords[i].token) { printf(",\n\t/* %-5d %5d %20s */\tNULL", id - lowest_id, id, symbname(id)); id++; } if (i > 0) printf(","); /* coverity[leaked_storage] */ printf("\n\t/* %-5d %5d %20s */\t\"%s\"", id - lowest_id, id, symbname(id), ntp_keywords[i].key); i++; id++; } printf("\n};\n\n"); }
/* Define a function to create the states of the scanner. This function * is used by the create_keyword_scanner function below. * * This function takes a suffix of a keyword, the token to be returned on * recognizing the complete keyword, and any pre-existing state that exists * for some other keyword that has the same prefix as the current one. */ static u_short create_scan_states( char * text, u_short token, follby followedby, u_short prev_state ) { u_short my_state; u_short return_state; u_short prev_char_s; u_short curr_char_s; return_state = prev_state; curr_char_s = prev_state; prev_char_s = 0; /* Find the correct position to insert the state. * All states should be in alphabetical order */ while (curr_char_s && (text[0] < sst[curr_char_s].ch)) { prev_char_s = curr_char_s; curr_char_s = sst[curr_char_s].other_next_s; } /* * Check if a previously seen keyword has the same prefix as * the current keyword. If so, simply use the state for that * keyword as my_state, otherwise, allocate a new state. */ if (curr_char_s && (text[0] == sst[curr_char_s].ch)) { my_state = curr_char_s; if ('\0' == text[1]) { fprintf(stderr, "Duplicate entries for keyword '%s' in" " keyword_gen.c ntp_keywords[].\n", current_keyword); exit(2); } } else { do my_state = sst_highwater++; while (my_state < COUNTOF(sst) && sst[my_state].finishes_token); if (my_state >= COUNTOF(sst)) { fprintf(stderr, "fatal, keyword scanner state array " "sst[%d] is too small, modify\n" "keyword-gen.c to increase.\n", (int)COUNTOF(sst)); exit(3); } /* Store the next character of the keyword */ sst[my_state].ch = text[0]; sst[my_state].other_next_s = curr_char_s; sst[my_state].followedby = FOLLBY_NON_ACCEPTING; if (prev_char_s) sst[prev_char_s].other_next_s = my_state; else return_state = my_state; } /* Check if the next character is '\0'. * If yes, we are done with the recognition and this is an accepting * state. * If not, we need to continue scanning */ if ('\0' == text[1]) { sst[my_state].finishes_token = (u_short)token; sst[my_state].followedby = (char)followedby; if (sst[token].finishes_token != (u_short)token) { fprintf(stderr, "fatal, sst[%d] not reserved for %s.\n", token, symbname(token)); exit(6); } /* relocate so token id is sst[] index */ if (my_state != token) { sst[token] = sst[my_state]; ZERO(sst[my_state]); do sst_highwater--; while (sst[sst_highwater].finishes_token); my_state = token; if (prev_char_s) sst[prev_char_s].other_next_s = my_state; else return_state = my_state; } } else sst[my_state].match_next_s = create_scan_states( &text[1], token, followedby, sst[my_state].match_next_s); return return_state; }
static void generate_fsm(void) { char rprefix[MAX_TOK_LEN + 1]; char prefix[MAX_TOK_LEN + 1]; char token_id_comment[16 + MAX_TOK_LEN + 1]; size_t prefix_len; char *p; char *r; u_short initial_state; u_short this_state; u_short prev_state; u_short state; u_short i; u_short token; /* * Sort ntp_keywords in alphabetical keyword order. This is * not necessary, but minimizes nonfunctional changes in the * generated finite state machine when keywords are modified. */ qsort(ntp_keywords, COUNTOF(ntp_keywords), sizeof(ntp_keywords[0]), compare_key_tok_text); /* * To save space, reserve the state array entry matching each * token number for its terminal state, so the token identifier * does not need to be stored in each state, but can be * recovered trivially. To mark the entry reserved, * finishes_token is nonzero. */ for (i = 0; i < COUNTOF(ntp_keywords); i++) { token = ntp_keywords[i].token; if (1 > token || token >= COUNTOF(sst)) { fprintf(stderr, "keyword-gen sst[%u] too small " "for keyword '%s' id %d\n", (int)COUNTOF(sst), ntp_keywords[i].key, token); exit(4); } sst[token].finishes_token = token; } initial_state = create_keyword_scanner(); fprintf(stderr, "%d keywords consumed %d states of %d max.\n", (int)COUNTOF(ntp_keywords), sst_highwater - 1, (int)COUNTOF(sst) - 1); printf("#define SCANNER_INIT_S %d\n\n", initial_state); printf("const scan_state sst[%d] = {\n" "/*SS_T( ch,\tf-by, match, other ),\t\t\t\t */\n" " 0,\t\t\t\t /* %5d %-17s */\n", sst_highwater, 0, ""); for (i = 1; i < sst_highwater; i++) { /* verify fields will fit */ if (sst[i].followedby & ~0x3) { fprintf(stderr, "keyword-gen internal error " "sst[%d].followedby %d too big\n", i, sst[i].followedby); exit(7); } if (sst_highwater <= sst[i].match_next_s || sst[i].match_next_s & ~0x7ff) { fprintf(stderr, "keyword-gen internal error " "sst[%d].match_next_s %d too big\n", i, sst[i].match_next_s); exit(8); } if (sst_highwater <= sst[i].other_next_s || sst[i].other_next_s & ~0x7ff) { fprintf(stderr, "keyword-gen internal error " "sst[%d].other_next_s %d too big\n", i, sst[i].other_next_s); exit(9); } if (sst[i].finishes_token) { snprintf(token_id_comment, sizeof(token_id_comment), "%5d %-17s", i, symbname(sst[i].finishes_token)); if (i != sst[i].finishes_token) { fprintf(stderr, "keyword-gen internal error " "entry %d finishes token %d\n", i, sst[i].finishes_token); exit(5); } } else { /* * Determine the keyword prefix that leads to this * state. This is expensive but keyword-gen is run * only when it changes. Distributing keyword-gen-utd * achieves that, which is why it must be committed * at the same time as keyword-gen.c and ntp_keyword.h. * * Scan the state array iteratively looking for a state * which leads to the current one, collecting matching * characters along the way. There is only one such * path back to the starting state given the way our * scanner state machine is built and the practice of * using the spelling of the keyword as its T_* token * identifier, which results in never having two * spellings result in the same T_* value. */ prefix_len = 0; prev_state = 0; this_state = i; do { for (state = 1; state < sst_highwater; state++) if (sst[state].other_next_s == this_state) { this_state = state; break; } else if (sst[state].match_next_s == this_state) { this_state = state; rprefix[prefix_len] = sst[state].ch; prefix_len++; break; } } while (this_state != initial_state); if (prefix_len) { /* reverse rprefix into prefix */ p = prefix + prefix_len; r = rprefix; while (r < rprefix + prefix_len) *--p = *r++; } prefix[prefix_len] = '\0'; snprintf(token_id_comment, sizeof(token_id_comment), "%5d %-17s", i, (initial_state == i) ? "[initial state]" : prefix); } printf(" S_ST( '%c',\t%d, %5u, %5u )%s /* %s */\n", sst[i].ch, sst[i].followedby, sst[i].match_next_s, sst[i].other_next_s, (i + 1 < sst_highwater) ? "," : " ", token_id_comment); } printf("};\n\n"); }