/** Some squash states are clearly not advantageous in the NFA, as they do * incur the cost of an exception: * -# acyclic states * -# squash only a few acyclic states */ void filterSquashers(const NGHolder &g, map<NFAVertex, NFAStateSet> &squash) { DEBUG_PRINTF("filtering\n"); map<u32, NFAVertex> rev; /* vertex_index -> vertex */ for (auto v : vertices_range(g)) { rev[g[v].index] = v; } for (auto v : vertices_range(g)) { if (!contains(squash, v)) { continue; } DEBUG_PRINTF("looking at squash set for vertex %u\n", g[v].index); if (!hasSelfLoop(v, g)) { DEBUG_PRINTF("acyclic\n"); squash.erase(v); continue; } NFAStateSet squashed = squash[v]; squashed.flip(); /* default sense for mask of survivors */ for (NFAStateSet::size_type sq = squashed.find_first(); sq != squashed.npos; sq = squashed.find_next(sq)) { NFAVertex u = rev[sq]; if (hasSelfLoop(u, g)) { DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq); goto next_vertex; } } if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) { DEBUG_PRINTF("squash set too small\n"); squash.erase(v); continue; } next_vertex:; DEBUG_PRINTF("squash set ok\n"); } }
/** * Builds a squash mask based on the pdom tree of v and the given char reach. * The built squash mask is a bit conservative for non-dot cases and could * be improved with a bit of thought. */ static void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, const CharReach &cr, const NFAStateSet &init, const vector<NFAVertex> &vByIndex, const PostDomTree &tree, som_type som, const vector<DepthMinMax> &som_depths, const ue2::unordered_map<NFAVertex, u32> ®ion_map, smgb_cache &cache) { DEBUG_PRINTF("build base squash mask for vertex %u)\n", g[v].index); vector<NFAVertex> q; PostDomTree::const_iterator it = tree.find(v); if (it != tree.end()) { q.insert(q.end(), it->second.begin(), it->second.end()); } const u32 v_index = g[v].index; while (!q.empty()) { NFAVertex u = q.back(); q.pop_back(); const CharReach &cru = g[u].char_reach; if ((cru & ~cr).any()) { /* bail: bad cr on vertex u */ /* TODO: this could be better * * we still need to ensure that we record any paths leading to u. * Hence all vertices R which can reach u must be excluded from the * squash mask. Note: R != pdom(u) and there may exist an x in (R - * pdom(u)) which is in pdom(y) where y is in q. Clear ? */ mask.set(); return; } const u32 u_index = g[u].index; if (som) { /* We cannot add a state u to the squash mask of v if it may have an * earlier start of match offset. ie for us to add a state u to v * maxSomDist(u) <= minSomDist(v) */ const depth &max_som_dist_u = som_depths[u_index].max; const depth &min_som_dist_v = som_depths[v_index].min; if (max_som_dist_u.is_infinite()) { /* it is hard to tell due to the INF if u can actually store an * earlier SOM than w (state we are building the squash mask * for) - need to think more deeply */ if (mustBeSetBefore(u, v, g, cache) && !somMayGoBackwards(u, g, region_map, cache)) { DEBUG_PRINTF("u %u v %u\n", u_index, v_index); goto squash_ok; } } if (max_som_dist_u > min_som_dist_v) { /* u can't be squashed as it may be storing an earlier SOM */ goto add_children_to_queue; } } squash_ok: mask.set(u_index); DEBUG_PRINTF("pdom'ed %u\n", u_index); add_children_to_queue: it = tree.find(u); if (it != tree.end()) { q.insert(q.end(), it->second.begin(), it->second.end()); } } if (cr.all()) { /* the init states aren't in the pdom tree. If all their succ states * are set (or v), we can consider them post dominated */ /* Note: init states will always result in a later som */ for (size_t i = init.find_first(); i != init.npos; i = init.find_next(i)) { /* Yes vacuous patterns do exist */ NFAVertex iv = vByIndex[i]; for (auto w : adjacent_vertices_range(iv, g)) { if (w == g.accept || w == g.acceptEod) { DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i); goto next_init_state; } u32 vert_id = g[w].index; if (w != iv && w != v && !mask.test(vert_id)) { DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id); goto next_init_state; } } DEBUG_PRINTF("pdom'ed %zu\n", i); mask.set(i); next_init_state:; } } mask.flip(); }