aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, const CompileContext &cc, set<dstate_id_t> *accel_states) { u16 total_daddy = 0; dfa_info info(strat); bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming * mode with our semantics */ raw.stripExtraEodReports(); } bool has_eod_reports = raw.hasEodReports(); bool any_cyclic_near_anchored_state = is_cyclic_near(raw, raw.start_anchored); for (u32 i = 0; i < info.size(); i++) { find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state, cc.grey); total_daddy += info.extra[i].daddytaken; } DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, info.size() * info.impl_alpha_size, info.size(), info.impl_alpha_size); aligned_unique_ptr<NFA> nfa; if (!using8bit) { nfa = mcclellanCompile16(info, cc); } else { nfa = mcclellanCompile8(info, cc); } if (has_eod_reports) { nfa->flags |= NFA_ACCEPTS_EOD; } if (accel_states && nfa) { fillAccelOut(info, accel_states); } DEBUG_PRINTF("compile done\n"); return nfa; }
static u32 count_dots(const raw_dfa &raw) { assert(raw.start_anchored == INIT_STATE); u32 i = INIT_STATE; for (; i < raw.states.size() && i != raw.start_floating; i++) { DEBUG_PRINTF("checking %u\n", i); assert(raw.states[i].reports.empty()); assert(raw.states[i].reports_eod.empty()); for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) { DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]); if (raw.states[i].next[s] != i + 1) { goto validate; } } if (!raw.states[raw.states[i].next[0]].reports.empty() || !raw.states[raw.states[i].next[0]].reports_eod.empty()) { goto validate; } DEBUG_PRINTF("got dot\n"); } validate: u32 dot_count = i - INIT_STATE; /* we need to check that no later state has a transition into these leading * dots */ for (; i < raw.states.size(); i++) { for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) { DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]); dstate_id_t n = raw.states[i].next[s]; if (n != DEAD_STATE && n <= dot_count) { return 0; } } } return dot_count; }
static bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { symbol_t alphasize = raw.getImplAlphaSize(); for (symbol_t s = 0; s < alphasize; s++) { dstate_id_t succ_id = raw.states[root].next[s]; if (succ_id == DEAD_STATE) { continue; } const dstate &succ = raw.states[succ_id]; for (symbol_t t = 0; t < alphasize; t++) { if (succ.next[t] == root || succ.next[t] == succ_id) { return true; } } } return false; }
static bool is_slow(const raw_dfa &rdfa, const set<dstate_id_t> &accel, u32 roseQuality) { /* we consider a dfa as slow if there is no way to quickly get into an accel * state/dead state. In these cases, it is more likely that we will be * running at our unaccelerated dfa speeds so the small write engine is only * competitive over a small region where start up costs are dominant. */ if (roseQuality) { return true; } set<dstate_id_t> visited; set<dstate_id_t> next; set<dstate_id_t> curr; curr.insert(rdfa.start_anchored); u32 ialpha_size = rdfa.getImplAlphaSize(); for (u32 i = 0; i < MAX_GOOD_ACCEL_DEPTH; i++) { next.clear(); for (dstate_id_t s : curr) { if (contains(visited, s)) { continue; } visited.insert(s); if (s == DEAD_STATE || contains(accel, s)) { return false; } for (size_t j = 0; j < ialpha_size; j++) { next.insert(rdfa.states[s].next[j]); } } curr.swap(next); } return true; }