void rep( uint8_t x ) { printf( "%4d_{(10)} = ", x ); for( int i = ( BITSOF( x ) - 1 ); i >= 0; i-- ) { printf( "%d", ( x >> i ) & 1 ); } printf( "_{(2)}\n" ); }
/* TODO */ u32 run_testfile(struct Lexer *lexer, struct Printer *p) { u32 err = 0; u32 lineno = 1; /* TODO Arena */ static u32 c2s[256]; static u32 eqc_bitmat[DIV_CEIL(8 * ARRAY_COUNT(c2s), BITSOF(u64))]; struct regex_eqc eqc; static struct regex_nfaparser_stackpost nfaparser_stack[4096]; static struct NFA_state nfa_states[4*4096]; struct NFA nfa; u32 nfa_fin_state; struct regex_nfaparser parser; u32 regex_lex_state = REGEX_DFA_START_STATE; static u32 dfa_acc[POW2(20)]; static u32 dfa_mat[ARRAY_COUNT(dfa_acc) * ARRAY_COUNT(c2s)]; struct DFA_mat dfa; struct nfa2dfa_data nfa2dfa_dat; static u32 nfa2dfa_dat_epsstack[ARRAY_COUNT(nfa_states)]; static u32 nfa2dfa_dat_visited[DIV_CEIL(ARRAY_COUNT(nfa_states), BITSOF(u32))]; static u32 nfa2dfa_dat_setoff[ARRAY_COUNT(dfa_acc)]; static u32 nfa2dfa_dat_hashtable[ARRAY_COUNT(dfa_acc)]; static u32 nfa2dfa_dat_stack[ARRAY_COUNT(dfa_acc)]; static u32 nfa2dfa_dat_setdata[16 * ARRAY_COUNT(dfa_acc)]; { nfa.states = nfa_states; nfa.cap = ARRAY_COUNT(nfa_states); nfa.len = 0; nfa.start_state = MAX_U32; eqc.bitmat = eqc_bitmat; eqc.width = ARRAY_COUNT(c2s); eqc.cap = 8; eqc.len = 0; parser.nfa = &nfa; parser.stack = nfaparser_stack; parser.stack_cap = ARRAY_COUNT(nfaparser_stack); parser.stack_len = 0; parser.eqc = &eqc; assert(nfa.len < nfa.cap); nfa_fin_state = nfa.len++; nfa.states[nfa_fin_state].c = NFA_FIN; nfa.states[nfa_fin_state].val.fin = 0; parser.stack[parser.stack_len].end = nfa_fin_state; parser.stack[parser.stack_len].start = MAX_U32; parser.stack[parser.stack_len].seq_first = MAX_U32; parser.stack[parser.stack_len].seq_last = MAX_U32; parser.stack[parser.stack_len].unary_first = MAX_U32; parser.stack[parser.stack_len].unary_last = MAX_U32; parser.stack[parser.stack_len].eqc = MAX_U32; /* no need to set eqc_* if eqc == MAX_U32 */ ++parser.stack_len; } while (1) { u32 acc_val; u32 c = lexer->c; switch (c) { case '\n': c = READER_EOF; break; case READER_EOF: return PUTERR("error: unexpected EOF"), 1; case READER_ERROR: return PUTERRNO("read"), 1; } regex_lex_state = regex_dfa_delta(regex_lex_state, c); acc_val = regex_dfa_acc(regex_lex_state); if (REGEX_DFA_ERROR_STATE(regex_lex_state)) { PUTERR("error: regex_lex"); return 1; } if (acc_val) { struct regex_token tok; if (acc_val - 1 < REGEX_TOKEN_TYPE_COUNT) { tok.type = acc_val - 1; tok.c = 0; /* tok.c may be whatever */ assert(tok.type != REGEX_TOK_C); } else { tok.type = REGEX_TOK_C; tok.c = (acc_val - 1) - REGEX_TOKEN_TYPE_COUNT; } regex_lex_state = REGEX_DFA_START_STATE; if (regex_nfaparse(&parser, tok)) { (void)writer_puterr_prefix(stde(), __FILE__, __LINE__); (void)writer_puts(stde(), "error: "); (void)writer_puts(stde(), regex_nfaparse_status2string(parser.error)); (void)writer_putc(stde(), '\n'); (void)writer_flush(stde()); return 1; } } if (lexer->c == '\n') { lexer_next(lexer); break; } lexer_next(lexer); } ++lineno; if (parser.stack_len) { PUTERR("error: regex_parse unexpected end of regex"); return 1; } nfa.start_state = parser.start; if (0&&DEBUG) { pr_str(p, "nfalen = "); pr_u32(p, nfa.len); pr_str(p, "\n"); } if (0&&DEBUG) { p->err = p->err || nfa_print_dot(&nfa, p->writer, nfa2dfa_dat_epsstack, nfa2dfa_dat_visited); } { dfa.mat = dfa_mat; dfa.width = ARRAY_COUNT(c2s); dfa.tot_elems = ARRAY_COUNT(dfa_mat); dfa.len = 0; dfa.acc = dfa_acc; dfa.acc_cap = ARRAY_COUNT(dfa_acc); STRUCT_ZERO(nfa2dfa_dat); nfa2dfa_dat.nfa = &nfa; nfa2dfa_dat.dfa = &dfa; nfa2dfa_dat.eqc = &eqc; nfa2dfa_dat.epsstack = nfa2dfa_dat_epsstack; nfa2dfa_dat.visited = nfa2dfa_dat_visited; nfa2dfa_dat.setoff = nfa2dfa_dat_setoff; nfa2dfa_dat.hashtable = nfa2dfa_dat_hashtable; nfa2dfa_dat.stack = nfa2dfa_dat_stack; nfa2dfa_dat.stack_cap = ARRAY_COUNT(nfa2dfa_dat_stack); nfa2dfa_dat.setdata = nfa2dfa_dat_setdata; nfa2dfa_dat.setdata_cap = ARRAY_COUNT(nfa2dfa_dat_setdata); } switch (nfa2dfa(&nfa2dfa_dat)) { case NFA2DFA_STATUS_OK: break; case NFA2DFA_STATUS_OUT_OF_SETDATA_MEM: return PUTERR("error: nfa2dfa: out of setdata mem"), 1; case NFA2DFA_STATUS_OUT_OF_STACK_MEM: return PUTERR("error: nfa2dfa: out of stack mem"), 1; case NFA2DFA_STATUS_OUT_OF_DFA_MEM: return PUTERR("error: nfa2dfa: out of dfa mem"), 1; case NFA2DFA_STATUS_BROKEN: return assert(0), PUTERR("error: nfa2dfa: broken"), 1; } if (1) { STATIC_ASSERT_LOCAL(ARRAY_COUNT(nfa2dfa_dat_epsstack) >= ARRAY_COUNT(c2s), can_reuse_epsstack_in_c2s); dfa_c2s(&dfa, nfa2dfa_dat_epsstack, c2s); } else { u32 i; for (i = 0; i < ARRAY_COUNT(c2s); ++i) { c2s[i] = i; } } if (1&&DEBUG) { pr_str(p, "dfalen = "); pr_u32(p, dfa.len); pr_str(p, "\n"); pr_str(p, "dfawidth = "); pr_u32(p, dfa.width); pr_str(p, "\n"); } if (1&&DEBUG) { p->err = p->err || dfa_print_dot(&dfa, p->writer, NULL); } if (1) { dfa_minimize(&dfa, nfa2dfa_dat_setoff, nfa2dfa_dat_stack, nfa2dfa_dat_hashtable); /* do c2s again */ if (0) { u32 i; u32 *extra_c2s = nfa2dfa_dat_setoff; STATIC_ASSERT_LOCAL(ARRAY_COUNT(nfa2dfa_dat_epsstack) >= ARRAY_COUNT(c2s), can_reuse_epsstack_in_min_c2s); STATIC_ASSERT_LOCAL(ARRAY_COUNT(nfa2dfa_dat_setoff) >= ARRAY_COUNT(c2s), can_reuse_setoff_in_min_c2s); dfa_c2s(&dfa, nfa2dfa_dat_epsstack, extra_c2s); for (i = 0; i < ARRAY_COUNT(c2s); ++i) { c2s[i] = extra_c2s[c2s[i]]; } } if (1&&DEBUG) { pr_str(p, "mindfalen = "); pr_u32(p, dfa.len); pr_str(p, "\n"); pr_str(p, "mindfawidth = "); pr_u32(p, dfa.width); pr_str(p, "\n"); } if (1&&DEBUG) { p->err = p->err || dfa_print_dot(&dfa, p->writer, NULL); } } lexer_skip_space(lexer); switch (lexer->c) { case '%': lexer_next(lexer); break; case READER_EOF: return PUTERR("error: unexpected EOF"), 1; case READER_ERROR: return PUTERRNO("read"), 1; default: return PUTERR("error: expected '%'"), 1; } while (lexer->c != '\n') { if (lexer->c == READER_EOF) { return PUTERR("error: unexpected EOF"), 1; } else if (lexer->c == READER_ERROR) { return PUTERRNO("read"), 1; } else if (!lexer_is_space(lexer->c)) { return PUTERR("error: expected whitespace or EOL"), 1; } lexer_next(lexer); } lexer_next(lexer); ++lineno; { /* strings expected to be accepted */ while (lexer->c != '%') { u32 state = dfa.start_state; while (lexer->c != '\n') { if (lexer->c == READER_EOF) { return PUTERR("error: unexpected EOF"), 1; } else if (lexer->c == READER_ERROR) { return PUTERRNO("read"), 1; } state = dfa_delta(&dfa, state, c2s[lexer->c]); lexer_next(lexer); } if (!(state < dfa.len && dfa.acc[state])) { err = 1; pr_str(p, "fail: line "); pr_u32(p, lineno); pr_str(p, ": expected ACC got REJ\n"); } ++lineno; lexer_next(lexer); } lexer_next(lexer); } while (lexer->c != '\n') { if (lexer->c == READER_EOF) { return PUTERR("error: unexpected EOF"), 1; } else if (lexer->c == READER_ERROR) { return PUTERRNO("read"), 1; } else if (!lexer_is_space(lexer->c)) { return PUTERR("error: expected whitespace or EOL"), 1; } lexer_next(lexer); } lexer_next(lexer); ++lineno; { /* strings expected to be rejected */ while (lexer->c != READER_EOF) { u32 state = dfa.start_state; while (lexer->c != '\n' && lexer->c != READER_EOF) { if (lexer->c == READER_ERROR) { return PUTERRNO("read"), 1; } if (lexer->c == '%') { return PUTERR("error: % in pattern"), 1; } state = dfa_delta(&dfa, state, c2s[lexer->c]); lexer_next(lexer); } if (state < dfa.len && dfa.acc[state]) { err = 1; pr_str(p, "fail: line "); pr_u32(p, lineno); pr_str(p, ": expected REJ got ACC\n"); } if (lexer->c != READER_EOF) { lexer_next(lexer); ++lineno; } } } return err; }