void rep( uint8_t x ) {
      printf( "%4d_{(10)} = ", x );

        for( int i = ( BITSOF( x ) - 1 ); i >= 0; i-- ) {
                printf( "%d", ( x >> i ) & 1 );
                  }

          printf( "_{(2)}\n" );
}
Beispiel #2
0
/* TODO */
u32 run_testfile(struct Lexer *lexer, struct Printer *p)
{
    u32 err = 0;
    u32 lineno = 1;

    /* TODO Arena */

    static u32 c2s[256];

    static u32 eqc_bitmat[DIV_CEIL(8 * ARRAY_COUNT(c2s), BITSOF(u64))];

    struct regex_eqc eqc;

    static struct regex_nfaparser_stackpost nfaparser_stack[4096];

    static struct NFA_state nfa_states[4*4096];
    struct NFA nfa;
    u32 nfa_fin_state;

    struct regex_nfaparser parser;

    u32 regex_lex_state = REGEX_DFA_START_STATE;

    static u32 dfa_acc[POW2(20)];
    static u32 dfa_mat[ARRAY_COUNT(dfa_acc) * ARRAY_COUNT(c2s)];

    struct DFA_mat dfa;

    struct nfa2dfa_data nfa2dfa_dat;

    static u32 nfa2dfa_dat_epsstack[ARRAY_COUNT(nfa_states)];
    static u32 nfa2dfa_dat_visited[DIV_CEIL(ARRAY_COUNT(nfa_states),
                                            BITSOF(u32))];

    static u32 nfa2dfa_dat_setoff[ARRAY_COUNT(dfa_acc)];
    static u32 nfa2dfa_dat_hashtable[ARRAY_COUNT(dfa_acc)];
    static u32 nfa2dfa_dat_stack[ARRAY_COUNT(dfa_acc)];

    static u32 nfa2dfa_dat_setdata[16 * ARRAY_COUNT(dfa_acc)];

    {
        nfa.states      = nfa_states;
        nfa.cap         = ARRAY_COUNT(nfa_states);
        nfa.len         = 0;
        nfa.start_state = MAX_U32;

        eqc.bitmat = eqc_bitmat;
        eqc.width  = ARRAY_COUNT(c2s);
        eqc.cap    = 8;
        eqc.len    = 0;

        parser.nfa       = &nfa;
        parser.stack     = nfaparser_stack;
        parser.stack_cap = ARRAY_COUNT(nfaparser_stack);
        parser.stack_len = 0;

        parser.eqc       = &eqc;

        assert(nfa.len < nfa.cap);
        nfa_fin_state = nfa.len++;

        nfa.states[nfa_fin_state].c       = NFA_FIN;
        nfa.states[nfa_fin_state].val.fin = 0;

        parser.stack[parser.stack_len].end         = nfa_fin_state;
        parser.stack[parser.stack_len].start       = MAX_U32;
        parser.stack[parser.stack_len].seq_first   = MAX_U32;
        parser.stack[parser.stack_len].seq_last    = MAX_U32;
        parser.stack[parser.stack_len].unary_first = MAX_U32;
        parser.stack[parser.stack_len].unary_last  = MAX_U32;
        parser.stack[parser.stack_len].eqc         = MAX_U32;
        /* no need to set eqc_* if eqc == MAX_U32 */

        ++parser.stack_len;
    }


    while (1) {
        u32 acc_val;
        u32 c = lexer->c;
        switch (c) {
            case '\n':         c = READER_EOF; break;
            case READER_EOF:   return PUTERR("error: unexpected EOF"), 1;
            case READER_ERROR: return PUTERRNO("read"), 1;
        }
        regex_lex_state   = regex_dfa_delta(regex_lex_state, c);
        acc_val = regex_dfa_acc(regex_lex_state);
        if (REGEX_DFA_ERROR_STATE(regex_lex_state)) {
            PUTERR("error: regex_lex");
            return 1;
        }
        if (acc_val) {
            struct regex_token tok;
            if (acc_val - 1 < REGEX_TOKEN_TYPE_COUNT) {
                tok.type = acc_val - 1;
                tok.c    = 0; /* tok.c may be whatever */
                assert(tok.type != REGEX_TOK_C);
            } else {
                tok.type = REGEX_TOK_C;
                tok.c    = (acc_val - 1) - REGEX_TOKEN_TYPE_COUNT;
            }
            regex_lex_state = REGEX_DFA_START_STATE;

            if (regex_nfaparse(&parser, tok)) {
                (void)writer_puterr_prefix(stde(), __FILE__, __LINE__);
                (void)writer_puts(stde(), "error: ");
                (void)writer_puts(stde(),
                            regex_nfaparse_status2string(parser.error));
                (void)writer_putc(stde(), '\n');
                (void)writer_flush(stde());
                return 1;
            }
        }

        if (lexer->c == '\n') {
            lexer_next(lexer);
            break;
        }
        lexer_next(lexer);
    }

    ++lineno;

    if (parser.stack_len) {
        PUTERR("error: regex_parse unexpected end of regex");
        return 1;
    }

    nfa.start_state = parser.start;

    if (0&&DEBUG) {
        pr_str(p, "nfalen = ");
        pr_u32(p, nfa.len);
        pr_str(p, "\n");
    }

    if (0&&DEBUG) {
        p->err = p->err || nfa_print_dot(&nfa, p->writer, nfa2dfa_dat_epsstack,
                                         nfa2dfa_dat_visited);
    }

    {
        dfa.mat       = dfa_mat;
        dfa.width     = ARRAY_COUNT(c2s);
        dfa.tot_elems = ARRAY_COUNT(dfa_mat);
        dfa.len       = 0;
        dfa.acc       = dfa_acc;
        dfa.acc_cap   = ARRAY_COUNT(dfa_acc);

        STRUCT_ZERO(nfa2dfa_dat);
        nfa2dfa_dat.nfa         = &nfa;
        nfa2dfa_dat.dfa         = &dfa;
        nfa2dfa_dat.eqc         = &eqc;

        nfa2dfa_dat.epsstack    = nfa2dfa_dat_epsstack;
        nfa2dfa_dat.visited     = nfa2dfa_dat_visited;

        nfa2dfa_dat.setoff      = nfa2dfa_dat_setoff;
        nfa2dfa_dat.hashtable   = nfa2dfa_dat_hashtable;

        nfa2dfa_dat.stack       = nfa2dfa_dat_stack;
        nfa2dfa_dat.stack_cap   = ARRAY_COUNT(nfa2dfa_dat_stack);

        nfa2dfa_dat.setdata     = nfa2dfa_dat_setdata;
        nfa2dfa_dat.setdata_cap = ARRAY_COUNT(nfa2dfa_dat_setdata);
    }

    switch (nfa2dfa(&nfa2dfa_dat)) {
        case NFA2DFA_STATUS_OK:
            break;
        case NFA2DFA_STATUS_OUT_OF_SETDATA_MEM:
            return PUTERR("error: nfa2dfa: out of setdata mem"), 1;
        case NFA2DFA_STATUS_OUT_OF_STACK_MEM:
            return PUTERR("error: nfa2dfa: out of stack mem"), 1;
        case NFA2DFA_STATUS_OUT_OF_DFA_MEM:
            return PUTERR("error: nfa2dfa: out of dfa mem"), 1;
        case NFA2DFA_STATUS_BROKEN:
            return assert(0), PUTERR("error: nfa2dfa: broken"), 1;
    }

    if (1) {
        STATIC_ASSERT_LOCAL(ARRAY_COUNT(nfa2dfa_dat_epsstack)
                            >= ARRAY_COUNT(c2s), can_reuse_epsstack_in_c2s);
        dfa_c2s(&dfa, nfa2dfa_dat_epsstack, c2s);
    } else {
        u32 i;
        for (i = 0; i < ARRAY_COUNT(c2s); ++i) {
            c2s[i] = i;
        }
    }

    if (1&&DEBUG) {
        pr_str(p, "dfalen = ");
        pr_u32(p, dfa.len);
        pr_str(p, "\n");

        pr_str(p, "dfawidth = ");
        pr_u32(p, dfa.width);
        pr_str(p, "\n");
    }

    if (1&&DEBUG) {
        p->err = p->err || dfa_print_dot(&dfa, p->writer, NULL);
    }




    if (1) {
        dfa_minimize(&dfa, nfa2dfa_dat_setoff, nfa2dfa_dat_stack,
                     nfa2dfa_dat_hashtable);

        /* do c2s again */
        if (0) {
            u32 i;
            u32 *extra_c2s = nfa2dfa_dat_setoff;
            STATIC_ASSERT_LOCAL(ARRAY_COUNT(nfa2dfa_dat_epsstack)
                                >= ARRAY_COUNT(c2s),
                                can_reuse_epsstack_in_min_c2s);
            STATIC_ASSERT_LOCAL(ARRAY_COUNT(nfa2dfa_dat_setoff)
                                >= ARRAY_COUNT(c2s),
                                can_reuse_setoff_in_min_c2s);

            dfa_c2s(&dfa, nfa2dfa_dat_epsstack, extra_c2s);
            for (i = 0; i < ARRAY_COUNT(c2s); ++i) {
                c2s[i] = extra_c2s[c2s[i]];
            }
        }

        if (1&&DEBUG) {
            pr_str(p, "mindfalen = ");
            pr_u32(p, dfa.len);
            pr_str(p, "\n");

            pr_str(p, "mindfawidth = ");
            pr_u32(p, dfa.width);
            pr_str(p, "\n");
        }

        if (1&&DEBUG) {
            p->err = p->err || dfa_print_dot(&dfa, p->writer, NULL);
        }
    }

    lexer_skip_space(lexer);

    switch (lexer->c) {
        case '%': lexer_next(lexer); break;
        case READER_EOF:     return PUTERR("error: unexpected EOF"), 1;
        case READER_ERROR:   return PUTERRNO("read"), 1;
        default:             return PUTERR("error: expected '%'"), 1;
    }

    while (lexer->c != '\n') {
        if (lexer->c == READER_EOF) {
            return PUTERR("error: unexpected EOF"), 1;
        } else if (lexer->c == READER_ERROR) {
            return PUTERRNO("read"), 1;
        } else if (!lexer_is_space(lexer->c)) {
            return PUTERR("error: expected whitespace or EOL"), 1;
        }
        lexer_next(lexer);
    }
    lexer_next(lexer);
    ++lineno;

    { /* strings expected to be accepted */
        while (lexer->c != '%') {
            u32 state = dfa.start_state;
            while (lexer->c != '\n') {
                if (lexer->c == READER_EOF) {
                    return PUTERR("error: unexpected EOF"), 1;
                } else if (lexer->c == READER_ERROR) {
                    return PUTERRNO("read"), 1;
                }

                state = dfa_delta(&dfa, state, c2s[lexer->c]);

                lexer_next(lexer);
            }

            if (!(state < dfa.len && dfa.acc[state])) {
                err = 1;
                pr_str(p, "fail: line ");
                pr_u32(p, lineno);
                pr_str(p, ": expected ACC got REJ\n");
            }

            ++lineno;
            lexer_next(lexer);
        }
        lexer_next(lexer);
    }

    while (lexer->c != '\n') {
        if (lexer->c == READER_EOF) {
            return PUTERR("error: unexpected EOF"), 1;
        } else if (lexer->c == READER_ERROR) {
            return PUTERRNO("read"), 1;
        } else if (!lexer_is_space(lexer->c)) {
            return PUTERR("error: expected whitespace or EOL"), 1;
        }
        lexer_next(lexer);
    }
    lexer_next(lexer);
    ++lineno;


    { /* strings expected to be rejected */
        while (lexer->c != READER_EOF) {
            u32 state = dfa.start_state;
            while (lexer->c != '\n' && lexer->c != READER_EOF) {
                if (lexer->c == READER_ERROR) {
                    return PUTERRNO("read"), 1;
                }

                if (lexer->c == '%') {
                    return PUTERR("error: % in pattern"), 1;
                }

                state = dfa_delta(&dfa, state, c2s[lexer->c]);

                lexer_next(lexer);
            }

            if (state < dfa.len && dfa.acc[state]) {
                err = 1;
                pr_str(p, "fail: line ");
                pr_u32(p, lineno);
                pr_str(p, ": expected REJ got ACC\n");
            }

            if (lexer->c != READER_EOF) {
                lexer_next(lexer);
                ++lineno;
            }
        }
    }

    return err;
}