Ejemplo n.º 1
0
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
                                           const CompileContext &cc,
                                           set<dstate_id_t> *accel_states) {
    u16 total_daddy = 0;
    dfa_info info(strat);
    bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;

    if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
                          * mode with our semantics */
        raw.stripExtraEodReports();
    }

    bool has_eod_reports = raw.hasEodReports();
    bool any_cyclic_near_anchored_state = is_cyclic_near(raw,
                                                         raw.start_anchored);

    for (u32 i = 0; i < info.size(); i++) {
        find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state,
                          cc.grey);
        total_daddy += info.extra[i].daddytaken;
    }

    DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
                 info.size() * info.impl_alpha_size, info.size(),
                 info.impl_alpha_size);

    aligned_unique_ptr<NFA> nfa;
    if (!using8bit) {
        nfa = mcclellanCompile16(info, cc);
    } else {
        nfa = mcclellanCompile8(info, cc);
    }

    if (has_eod_reports) {
        nfa->flags |= NFA_ACCEPTS_EOD;
    }

    if (accel_states && nfa) {
        fillAccelOut(info, accel_states);
    }

    DEBUG_PRINTF("compile done\n");
    return nfa;
}
Ejemplo n.º 2
0
static
u32 count_dots(const raw_dfa &raw) {
    assert(raw.start_anchored == INIT_STATE);

    u32 i = INIT_STATE;
    for (; i < raw.states.size() && i != raw.start_floating; i++) {
        DEBUG_PRINTF("checking %u\n", i);
        assert(raw.states[i].reports.empty());
        assert(raw.states[i].reports_eod.empty());

        for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) {
            DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]);
            if (raw.states[i].next[s] != i + 1) {
                goto validate;
            }
        }

        if (!raw.states[raw.states[i].next[0]].reports.empty()
            || !raw.states[raw.states[i].next[0]].reports_eod.empty()) {
            goto validate;
        }

        DEBUG_PRINTF("got dot\n");
    }

 validate:
    u32 dot_count = i - INIT_STATE;

    /* we need to check that no later state has a transition into these leading
     * dots */
    for (; i < raw.states.size(); i++) {
        for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) {
            DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]);
            dstate_id_t n = raw.states[i].next[s];
            if (n != DEAD_STATE && n <= dot_count) {
                return 0;
            }
        }
    }

    return dot_count;
}
Ejemplo n.º 3
0
static
bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
    symbol_t alphasize = raw.getImplAlphaSize();
    for (symbol_t s = 0; s < alphasize; s++) {
        dstate_id_t succ_id = raw.states[root].next[s];
        if (succ_id == DEAD_STATE) {
            continue;
        }

        const dstate &succ = raw.states[succ_id];
        for (symbol_t t = 0; t < alphasize; t++) {
            if (succ.next[t] == root || succ.next[t] == succ_id) {
                return true;
            }
        }
    }
    return false;
}
Ejemplo n.º 4
0
static
bool is_slow(const raw_dfa &rdfa, const set<dstate_id_t> &accel,
             u32 roseQuality) {
    /* we consider a dfa as slow if there is no way to quickly get into an accel
     * state/dead state. In these cases, it is more likely that we will be
     * running at our unaccelerated dfa speeds so the small write engine is only
     * competitive over a small region where start up costs are dominant. */

    if (roseQuality) {
        return true;
    }

    set<dstate_id_t> visited;
    set<dstate_id_t> next;
    set<dstate_id_t> curr;
    curr.insert(rdfa.start_anchored);

    u32 ialpha_size = rdfa.getImplAlphaSize();

    for (u32 i = 0; i < MAX_GOOD_ACCEL_DEPTH; i++) {
        next.clear();
        for (dstate_id_t s : curr) {
            if (contains(visited, s)) {
                continue;
            }
            visited.insert(s);
            if (s == DEAD_STATE || contains(accel, s)) {
                return false;
            }

            for (size_t j = 0; j < ialpha_size; j++) {
                next.insert(rdfa.states[s].next[j]);
            }
        }
        curr.swap(next);
    }

    return true;
}