void describeEdge(FILE *f, const u16 *t, u16 i) { for (u16 s = 0; s < N_CHARS; s++) { if (!t[s]) { continue; } u16 ss; for (ss = 0; ss < s; ss++) { if (t[s] == t[ss]) { break; } } if (ss != s) { continue; } CharReach reach; for (ss = s; ss < 256; ss++) { if (t[s] == t[ss]) { reach.set(ss); } } fprintf(f, "%u -> %u [ label = \"", i, t[s]); describeClass(f, reach, 5, CC_OUT_DOT); fprintf(f, "\" ];\n"); } }
static void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info, const inputT &input, dynamic_bitset<> *states, bool kill_sds) { dynamic_bitset<> &curr = *states; dynamic_bitset<> next(curr.size()); DEBUG_PRINTF("%zu states in\n", states->count()); for (const auto &e : input) { DEBUG_PRINTF("processing %s\n", describeClass(e).c_str()); step(g, info, curr, &next); if (kill_sds) { next.reset(NODE_START_DOTSTAR); } filter_by_reach(info, &next, e); next.swap(curr); if (curr.empty()) { DEBUG_PRINTF("went dead\n"); break; } } DEBUG_PRINTF("%zu states out\n", states->size()); }
static void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) { for (u16 s = 0; s < N_CHARS; s++) { if (!t[s]) { continue; } u16 ss; for (ss = 0; ss < s; ss++) { if (t[s] == t[ss]) { break; } } if (ss != s) { continue; } CharReach reach; for (ss = s; ss < 256; ss++) { if (t[s] == t[ss]) { reach.set(ss); } } fprintf(f, "%u -> %u [ ", i, t[s]); if (i < m->sheng_end && t[s] < m->sheng_end) { fprintf(f, "color = red, fontcolor = red "); } fprintf(f, "label = \""); describeClass(f, reach, 5, CC_OUT_DOT); fprintf(f, "\" ];\n"); } }
static UNUSED string dumpTrigger(const vector<CharReach> &trigger) { string s; for (const auto &cr : trigger) { s += describeClass(cr); } return s; }
static UNUSED string dump(const map<s32, CharReach> &look) { ostringstream oss; for (auto it = look.begin(), ite = look.end(); it != ite; ++it) { if (it != look.begin()) { oss << ", "; } oss << "{" << it->first << ": " << describeClass(it->second) << "}"; } return oss.str(); }
static bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) { for (const auto &m : look) { const CharReach &look_cr = m.second; if (!overlaps(look_cr, flood_cr)) { return false; } } DEBUG_PRINTF("look can't escape flood on %s\n", describeClass(flood_cr).c_str()); return true; }
static void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, set<CharReach> &flood_reach) { for (u32 lit_id : tbi.g[v].literals) { const ue2_literal &s = tbi.literals.right.at(lit_id).s; if (s.empty()) { continue; } if (is_flood(s)) { CharReach cr(*s.begin()); DEBUG_PRINTF("flood-prone with reach: %s\n", describeClass(cr).c_str()); flood_reach.insert(cr); } } }
void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_SHUF); StdioFile f(base + ".txt", "w"); const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa); lbrDumpCommon(&ls->common, f); CharReach cr = shufti2cr((const u8 *)&ls->mask_lo, (const u8 *)&ls->mask_hi); fprintf(f, "SHUF model, scanning for: %s (%zu chars)\n", describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_TRUF); StdioFile f(base + ".txt", "w"); const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa); lbrDumpCommon(<->common, f); CharReach cr = truffle2cr((const u8 *)<->mask1, (const u8 *)<->mask2); fprintf(f, "TRUFFLE model, scanning for: %s (%zu chars)\n", describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then * build an eight-bit mask per character C, with each bit representing the * depth before the location of character C (if encountered) that the NFA would * be in a predictable start state. */ vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) { const depth max_depth(MAX_STOP_DEPTH); const InitDepths depths(g); const map<NFAVertex, BoundedRepeatSummary> no_vertices; vector<CharReach> reach(MAX_STOP_DEPTH); for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } CharReach v_cr; if (som == SOM_NONE) { v_cr = reduced_cr(v, g, no_vertices); } else { v_cr = g[v].char_reach; } u32 d = min(max_depth, depths.maxDist(g, v)); for (u32 i = 0; i < d; i++) { reach[i] |= v_cr; } } #ifdef DEBUG for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { DEBUG_PRINTF("depth %u, stop chars: ", i); describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT); printf("\n"); } #endif vector<u8> stop(N_CHARS, 0); for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { CharReach cr = ~reach[i]; // invert reach for stop chars. const u8 mask = 1U << i; for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) { stop[c] |= mask; } } return stop; }
void describeAlphabet(FILE *f, const mcclellan *m) { map<u8, CharReach> rev; for (u16 i = 0; i < N_CHARS; i++) { rev[m->remap[i]].clear(); } for (u16 i = 0; i < N_CHARS; i++) { rev[m->remap[i]].set(i); } map<u8, CharReach>::const_iterator it; fprintf(f, "\nAlphabet\n"); for (it = rev.begin(); it != rev.end(); ++it) { fprintf(f, "%3hhu: ", it->first); describeClass(f, it->second, 10240, CC_OUT_TEXT); fprintf(f, "\n"); } fprintf(f, "\n"); }
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle, UNUSED som_type som) { const depth max_width = findMaxWidth(castle); DEBUG_PRINTF("castle has reach %s and max width %s\n", describeClass(castle.reach()).c_str(), max_width.str().c_str()); const CharReach escape = ~castle.reach(); // invert reach for stop chars. u32 d = min(max_width, depth(MAX_STOP_DEPTH)); const u8 mask = verify_u8((1U << d) - 1); vector<u8> stop(N_CHARS, 0); for (size_t c = escape.find_first(); c != escape.npos; c = escape.find_next(c)) { stop[c] |= mask; } return stop; }
static aligned_unique_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n", repeatMin.str().c_str(), repeatMax.str().c_str(), describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(), report); assert(repeatMin <= repeatMax); assert(repeatMax.is_reachable()); aligned_unique_ptr<NFA> nfa = buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); if (!nfa) { nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } if (!nfa) { nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } if (!nfa) { nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } if (!nfa) { nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } if (!nfa) { assert(0); return nullptr; } return nfa; }
void mergeLookaround(vector<LookEntry> &lookaround, const vector<LookEntry> &more_lookaround) { if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) { DEBUG_PRINTF("big enough!\n"); return; } // Don't merge lookarounds at offsets we already have entries for. ue2::flat_set<s8> offsets; for (const auto &e : lookaround) { offsets.insert(e.offset); } map<s32, CharReach> more; LookPriority cmp(more); priority_queue<s32, vector<s32>, LookPriority> pq(cmp); for (const auto &e : more_lookaround) { if (!contains(offsets, e.offset)) { more.emplace(e.offset, e.reach); pq.push(e.offset); } } while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) { const s32 offset = pq.top(); pq.pop(); const auto &cr = more.at(offset); DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str()); lookaround.emplace_back(verify_s8(offset), cr); } // Order by offset. sort(begin(lookaround), end(lookaround), [](const LookEntry &a, const LookEntry &b) { return a.offset < b.offset; }); }
static u32 findMaxInfixMatches(const CastleProto &castle, const set<ue2_literal> &lits) { DEBUG_PRINTF("castle=%p, %zu literals\n", &castle, lits.size()); if (castle.repeats.size() > 1) { DEBUG_PRINTF("more than one top!\n"); return NO_MATCH_LIMIT; } assert(!castle.repeats.empty()); const PureRepeat &pr = castle.repeats.begin()->second; DEBUG_PRINTF("repeat=%s reach=%s\n", pr.bounds.str().c_str(), describeClass(pr.reach).c_str()); size_t max_count = 0; for (const auto &s : lits) { DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str()); if (s.empty()) { // Likely an anchored case, be conservative here. return NO_MATCH_LIMIT; } size_t count = 0; auto f = find_if(s.rbegin(), s.rend(), ReachMismatch(pr.reach)); if (f == s.rbegin()) { DEBUG_PRINTF("lit can't terminate inside infix\n"); count = 0; } else if (f != s.rend()) { size_t suffix_len = distance(s.rbegin(), f); DEBUG_PRINTF("suffix of len %zu matches at start\n", suffix_len); if (pr.bounds.max.is_finite()) { count = min(suffix_len, (size_t)pr.bounds.max); } else { count = suffix_len; } } else { DEBUG_PRINTF("whole lit can match inside infix (repeatedly)\n"); if (pr.bounds.max.is_finite()) { count = pr.bounds.max; } else { DEBUG_PRINTF("inf bound\n"); return NO_MATCH_LIMIT; } } DEBUG_PRINTF("count=%zu\n", count); max_count = max(max_count, count); } DEBUG_PRINTF("max_count %zu\n", max_count); if (max_count > NO_MATCH_LIMIT) { assert(0); // This would be a surprise. return NO_MATCH_LIMIT; } return (u32)max_count; }
static void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) { if (look.size() <= MAX_LOOKAROUND_ENTRIES) { return; } DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str()); // First, remove floods that we already can't escape; they shouldn't affect // the analysis below. for (auto it = flood_reach.begin(); it != flood_reach.end();) { if (isFloodProne(look, *it)) { DEBUG_PRINTF("removing inescapable flood on %s from analysis\n", describeClass(*it).c_str()); flood_reach.erase(it++); } else { ++it; } } LookPriority cmp(look); priority_queue<s32, vector<s32>, LookPriority> pq(cmp); for (const auto &m : look) { pq.push(m.first); } while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) { s32 d = pq.top(); assert(contains(look, d)); const CharReach cr(look[d]); // copy pq.pop(); DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str()); look.erase(d); // If removing this entry would result in us becoming flood_prone on a // particular flood_reach case, reinstate it and move on. if (isFloodProne(look, flood_reach)) { DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d, describeClass(cr).c_str()); look.insert(make_pair(d, cr)); } } while (!pq.empty()) { s32 d = pq.top(); assert(contains(look, d)); const CharReach cr(look[d]); // copy pq.pop(); if (cr.count() < LOOKAROUND_WIDE_REACH) { continue; } DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str()); look.erase(d); // If removing this entry would result in us becoming flood_prone on a // particular flood_reach case, reinstate it and move on. if (isFloodProne(look, flood_reach)) { DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d, describeClass(cr).c_str()); look.insert(make_pair(d, cr)); } } DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str()); }
static void findForwardAccelScheme(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups, AccelAux *aux) { DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); u32 min_len = MAX_ACCEL_OFFSET; vector<const hwlmLiteral *> filtered_lits; filterLits(lits, expected_groups, &filtered_lits, &min_len); if (filtered_lits.empty()) { return; } if (findDVerm(filtered_lits, aux) || findSVerm(filtered_lits, aux)) { return; } vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach()); for (const auto &lit : lits) { if (!(lit.groups & expected_groups)) { continue; } for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) { unsigned char c = lit.s[i]; if (lit.nocase) { DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i); DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i); reach[i].set(mytoupper(c)); reach[i].set(mytolower(c)); } else { DEBUG_PRINTF("adding %02hhx to %u\n", c, i); reach[i].set(c); } } } u32 min_count = ~0U; u32 min_offset = ~0U; for (u32 i = 0; i < min_len; i++) { size_t count = reach[i].count(); DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, describeClass(reach[i]).c_str(), count); if (count < min_count) { min_count = (u32)count; min_offset = i; } } assert(min_offset <= min_len); if (min_count > MAX_SHUFTI_WIDTH) { DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count); return; } const CharReach &cr = reach[min_offset]; if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) { DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", describeClass(cr).c_str(), cr.count(), min_offset); aux->shufti.accel_type = ACCEL_SHUFTI; aux->shufti.offset = verify_u8(min_offset); return; } DEBUG_PRINTF("fail\n"); }