static never_inline bool cplVerticesMatch(const NGHolder &ga, NFAVertex va, const NGHolder &gb, NFAVertex vb) { // Must have the same reachability. if (ga[va].char_reach != gb[vb].char_reach) { return false; } // If they're start vertices, they must be the same one. if (is_any_start(va, ga) || is_any_start(vb, gb)) { if (ga[va].index != gb[vb].index) { return false; } } bool va_accept = edge(va, ga.accept, ga).second; bool vb_accept = edge(vb, gb.accept, gb).second; bool va_acceptEod = edge(va, ga.acceptEod, ga).second; bool vb_acceptEod = edge(vb, gb.acceptEod, gb).second; // Must have the same accept/acceptEod edges. if (va_accept != vb_accept || va_acceptEod != vb_acceptEod) { return false; } return true; }
/** Remove vacuous edges in graphs where the min_offset or min_length * constraints dictate that they can never produce a match. */ static void pruneVacuousEdges(NGWrapper &g) { if (!g.min_length && !g.min_offset) { return; } vector<NFAEdge> dead; for (const auto &e : edges_range(g)) { const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); // Special case: Crudely remove vacuous edges from start in graphs with a // min_offset. if (g.min_offset && u == g.start && is_any_accept(v, g)) { DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); dead.push_back(e); continue; } // If a min_length is set, vacuous edges can be removed. if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) { DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); dead.push_back(e); continue; } } if (dead.empty()) { return; } remove_edges(dead, g); pruneUseless(g); }
static bool isExclusive(const NGHolder &h, const u32 num, unordered_set<u32> &tailId, map<u32, unordered_set<u32>> &skipList, const RoleInfo<role_id> &role1, const RoleInfo<role_id> &role2) { const u32 id1 = role1.id; const u32 id2 = role2.id; if (contains(skipList, id1) && contains(skipList[id1], id2)) { return false; } const auto &triggers1 = role1.literals; const auto &triggers2 = role2.literals; if (isSuffix(triggers1, triggers2)) { skipList[id2].insert(id1); return false; } DEBUG_PRINTF("role id2:%u\n", id2); const auto &cr1 = role1.cr; if (overlaps(cr1, role2.last_cr)) { CharReach cr = cr1 | role1.prefix_cr; flat_set<NFAVertex> states; for (const auto &lit : triggers2) { auto lit1 = findStartPos(cr, lit); if (lit1.empty()) { continue; } states.clear(); if (lit1.size() < lit.size()) { // Only starts. states.insert(h.start); states.insert(h.startDs); } else { // All vertices. insert(&states, vertices(h)); } auto activeStates = execute_graph(h, lit1, states); // Check if only literal states are on for (const auto &s : activeStates) { if ((!is_any_start(s, h) && h[s].index <= num) || contains(tailId, h[s].index)) { skipList[id2].insert(id1); return false; } } } } return true; }
/** One final, FINAL optimisation. Drop either start or startDs if it's unused * in this graph. We leave this until this late because having both vertices in * the graph, with fixed state indices, is useful for merging and other * analyses. */ void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states) { u32 adj = 0; if (startIsRedundant(g)) { DEBUG_PRINTF("dropping unused start\n"); states[g.start] = NO_STATE; adj++; } if (proper_out_degree(g.startDs, g) == 0) { DEBUG_PRINTF("dropping unused startDs\n"); states[g.startDs] = NO_STATE; adj++; } if (!adj) { DEBUG_PRINTF("both start and startDs must remain\n"); return; } // We have removed one or both of the starts. Walk the non-special vertices // in the graph with state indices assigned to them and subtract // adj from all of them. for (auto v : vertices_range(g)) { u32 &state = states[v]; // note ref if (state == NO_STATE) { continue; } if (is_any_start(v, g)) { assert(state <= 1); state = 0; // one start remains } else { assert(!is_special(v, g)); assert(state >= adj); state -= adj; } } }
map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) { map<NFAVertex, NFAStateSet> squash; // Number of bits to use for all our masks. If we're a triggered graph, // tops have already been assigned, so we don't have to account for them. const u32 numStates = num_vertices(g); // Build post-dominator tree. PostDomTree pdom_tree; buildPDomTree(g, pdom_tree); // Build list of vertices by state ID and a set of init states. vector<NFAVertex> vByIndex(numStates, NFAGraph::null_vertex()); NFAStateSet initStates(numStates); smgb_cache cache(g); // Mappings used for SOM mode calculations, otherwise left empty. unordered_map<NFAVertex, u32> region_map; vector<DepthMinMax> som_depths; if (som) { region_map = assignRegions(g); som_depths = getDistancesFromSOM(g); } for (auto v : vertices_range(g)) { const u32 vert_id = g[v].index; DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates); assert(vert_id < numStates); vByIndex[vert_id] = v; if (is_any_start(v, g) || !in_degree(v, g)) { initStates.set(vert_id); } } for (u32 i = 0; i < numStates; i++) { NFAVertex v = vByIndex[i]; assert(v != NFAGraph::null_vertex()); const CharReach &cr = g[v].char_reach; /* only non-init cyclics can be squashers */ if (!hasSelfLoop(v, g) || initStates.test(i)) { continue; } DEBUG_PRINTF("state %u is cyclic\n", i); NFAStateSet mask(numStates), succ(numStates), pred(numStates); buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som, som_depths, region_map, cache); buildSucc(succ, g, v); buildPred(pred, g, v); const auto &reports = g[v].reports; for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) { NFAVertex vj = vByIndex[j]; NFAStateSet pred2(numStates); buildPred(pred2, g, vj); if (pred2 == pred) { DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i); NFAStateSet tmp(numStates); buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, som, som_depths, region_map, cache); mask &= tmp; } } for (size_t j = pred.find_first(); j != pred.npos; j = pred.find_next(j)) { NFAVertex vj = vByIndex[j]; NFAStateSet succ2(numStates); buildSucc(succ2, g, vj); /* we can use j as a basis for squashing if its succs are a subset * of ours */ if ((succ2 & ~succ).any()) { continue; } if (som) { /* We cannot use j to add to the squash mask of v if it may * have an earlier start of match offset. ie for us j as a * basis for the squash mask of v we require: * maxSomDist(j) <= minSomDist(v) */ /* ** TODO ** */ const depth &max_som_dist_j = som_depths[g[vj].index].max; const depth &min_som_dist_v = som_depths[g[v].index].min; if (max_som_dist_j > min_som_dist_v || max_som_dist_j.is_infinite()) { /* j can't be used as it may be storing an earlier SOM */ continue; } } const CharReach &crv = g[vj].char_reach; /* we also require that j's report information be a subset of ours */ bool seen_special = false; for (auto w : adjacent_vertices_range(vj, g)) { if (is_special(w, g)) { if (!edge(v, w, g).second) { goto next_j; } seen_special = true; } } // FIXME: should be subset check? if (seen_special && g[vj].reports != reports) { continue; } /* ok we can use j */ if ((crv & ~cr).none()) { NFAStateSet tmp(numStates); buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, som, som_depths, region_map, cache); mask &= tmp; mask.reset(j); } next_j:; } mask.set(i); /* never clear ourselves */ if ((~mask).any()) { // i.e. some bits unset in mask DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count()); squash.emplace(v, mask); } } findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som, som_depths, region_map, cache); clearMutualSquashers(g, vByIndex, squash); return squash; }