/** Remove vacuous edges in graphs where the min_offset or min_length * constraints dictate that they can never produce a match. */ static void pruneVacuousEdges(NGWrapper &g) { if (!g.min_length && !g.min_offset) { return; } vector<NFAEdge> dead; for (const auto &e : edges_range(g)) { const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); // Special case: Crudely remove vacuous edges from start in graphs with a // min_offset. if (g.min_offset && u == g.start && is_any_accept(v, g)) { DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); dead.push_back(e); continue; } // If a min_length is set, vacuous edges can be removed. if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) { DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); dead.push_back(e); continue; } } if (dead.empty()) { return; } remove_edges(dead, g); pruneUseless(g); }
/** Remove edges to accepts that can never produce a match long enough to * satisfy our min_length and max_offset constraints. */ static void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) { if (!g.min_length) { return; } vector<DepthMinMax> depths = getDistancesFromSOM(g); pruneUnmatchable(g, depths, rm, g.accept); pruneUnmatchable(g, depths, rm, g.acceptEod); pruneUseless(g); }
/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex * where possible, based on the assumption that we will always be matching * against well-formed input. */ void utf8DotRestoration(NGHolder &h, bool som) { vector<NFAVertex> seeds; /* cyclic ascii vertices */ findSeeds(h, som, &seeds); bool changes = false; for (auto v : seeds) { changes |= expandCyclic(h, v); } if (changes) { pruneUseless(h); } }
static void pruneExtUnreachable(NGWrapper &g) { vector<NFAVertexBidiDepth> depths; calcDepths(g, depths); vector<NFAEdge> dead; for (const auto &e : edges_range(g)) { if (isEdgePrunable(g, depths, e)) { DEBUG_PRINTF("pruning\n"); dead.push_back(e); } } if (dead.empty()) { return; } remove_edges(dead, g); pruneUseless(g); }
/** \brief Convert temporary assert vertices (from construction method) to * edge-based flags. * * Remove the horrors that are the temporary assert vertices which arise from * our construction method. Allows the rest of our code base to live in * blissful ignorance of their existence. */ void removeAssertVertices(ReportManager &rm, NGWrapper &g) { size_t num = 0; DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g)); // Sweep over the graph and ascertain that we do actually have vertices // with assertion flags set. Otherwise, we're done. if (!hasAssertVertices(g)) { DEBUG_PRINTF("no assert vertices, done\n"); return; } u32 assert_edge_count = 0; // Build a cache of (u, v) vertex pairs to edge descriptors. edge_cache_t edge_cache; for (const auto &e : edges_range(g)) { edge_cache[make_pair(source(e, g), target(e, g))] = e; } for (auto v : vertices_range(g)) { if (g[v].assert_flags & WORDBOUNDARY_FLAGS) { replaceAssertVertex(g, v, edge_cache, assert_edge_count); num++; } } checkForMultilineStart(rm, g); if (num) { DEBUG_PRINTF("resolved %zu assert vertices\n", num); pruneUseless(g); pruneEmptyVertices(g); g.renumberVertices(); g.renumberEdges(); } DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g)); assert(!hasAssertVertices(g)); }
/** This code removes any vertices which do not accept any symbols. Any * vertices which no longer lie on a path from a start to an accept are also * pruned. */ void pruneEmptyVertices(NGHolder &g) { DEBUG_PRINTF("pruning empty vertices\n"); vector<NFAVertex> dead; for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } const CharReach &cr = g[v].char_reach; if (cr.none()) { DEBUG_PRINTF("empty: %u\n", g[v].index); dead.push_back(v); } } if (dead.empty()) { return; } remove_vertices(dead, g); pruneUseless(g); }
/** Remove any edges from vertices that generate accepts (for Highlander * graphs). */ void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) { // Safety check: all reports must be simple exhaustible reports, or this is // not safe. This optimisation should be called early enough that no // internal reports have been added. for (auto report_id : all_reports(g)) { const Report &ir = rm.getReport(report_id); if (ir.ekey == INVALID_EKEY || ir.hasBounds() || !isExternalReport(ir)) { DEBUG_PRINTF("report %u is not external highlander with " "no bounds\n", report_id); return; } } vector<NFAEdge> dead; for (auto u : inv_adjacent_vertices_range(g.accept, g)) { if (is_special(u, g)) { continue; } // We can prune any out-edges that aren't accepts for (const auto &e : out_edges_range(u, g)) { if (!is_any_accept(target(e, g), g)) { dead.push_back(e); } } } if (dead.empty()) { return; } DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size()); remove_edges(dead, g); pruneUseless(g); }
bool somMayGoBackwards(NFAVertex u, const NGHolder &g, const ue2::unordered_map<NFAVertex, u32> ®ion_map, smgb_cache &cache) { /* Need to ensure all matches of the graph g up to u contain no infixes * which are also matches of the graph to u. * * This is basically the same as firstMatchIsFirst except we g is not * always a dag. As we haven't gotten around to writing an execute_graph * that operates on general graphs, we take some (hopefully) conservative * short cuts. * * Note: if the u can be jumped we will take jump edges * into account as a possibility of som going backwards * * TODO: write a generalised ng_execute_graph/make this less hacky */ assert(&g == &cache.g); if (contains(cache.smgb, u)) { return cache.smgb[u]; } DEBUG_PRINTF("checking if som can go backwards on %u\n", g[u].index); set<NFAEdge> be; BackEdges<set<NFAEdge>> backEdgeVisitor(be); depth_first_search( g.g, visitor(backEdgeVisitor) .root_vertex(g.start) .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); bool rv; if (0) { exit: DEBUG_PRINTF("using cached result\n"); cache.smgb[u] = rv; return rv; } assert(contains(region_map, u)); const u32 u_region = region_map.at(u); for (const auto &e : be) { NFAVertex s = source(e, g); NFAVertex t = target(e, g); /* only need to worry about big cycles including/before u */ DEBUG_PRINTF("back edge %u %u\n", g[s].index, g[t].index); if (s != t && region_map.at(s) <= u_region) { DEBUG_PRINTF("eek big cycle\n"); rv = true; /* big cycle -> eek */ goto exit; } } ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy; NGHolder c_g; cloneHolder(c_g, g, &orig_to_copy); for (NFAVertex v : vertices_range(g)) { if (!is_virtual_start(v, g)) { continue; } NFAVertex c_v = orig_to_copy[v]; orig_to_copy[v] = c_g.startDs; for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) { add_edge_if_not_present(c_g.startDs, c_w, c_g); } clear_vertex(c_v, c_g); } NFAVertex c_u = orig_to_copy[u]; clear_in_edges(c_g.acceptEod, c_g); add_edge(c_g.accept, c_g.acceptEod, c_g); clear_in_edges(c_g.accept, c_g); clear_out_edges(c_u, c_g); if (hasSelfLoop(u, g)) { add_edge(c_u, c_u, c_g); } add_edge(c_u, c_g.accept, c_g); set<NFAVertex> u_succ; insert(&u_succ, adjacent_vertices(u, g)); u_succ.erase(u); for (auto t : inv_adjacent_vertices_range(u, g)) { if (t == u) { continue; } for (auto v : adjacent_vertices_range(t, g)) { if (contains(u_succ, v)) { add_edge(orig_to_copy[t], c_g.accept, c_g); break; } } } pruneUseless(c_g); be.clear(); depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start). vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g))); for (const auto &e : be) { NFAVertex s = source(e, c_g); NFAVertex t = target(e, c_g); DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index); if (s != t) { assert(0); DEBUG_PRINTF("eek big cycle\n"); rv = true; /* big cycle -> eek */ goto exit; } } DEBUG_PRINTF("checking acyclic+selfloop graph\n"); rv = !firstMatchIsFirst(c_g); DEBUG_PRINTF("som may regress? %d\n", (int)rv); goto exit; }
void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { vector<NFAVertex> reporters; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { for (const auto &report_id : g[v].reports) { const Report &r = rm.getReport(report_id); if (isSimpleExhaustible(r)) { reporters.push_back(v); break; } } } for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { for (const auto &report_id : g[v].reports) { const Report &r = rm.getReport(report_id); if (isSimpleExhaustible(r)) { reporters.push_back(v); break; } } } if (reporters.empty()) { return; } sort(begin(reporters), end(reporters), make_index_ordering(g)); reporters.erase(unique(begin(reporters), end(reporters)), end(reporters)); DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n", reporters.size()); const auto &dom = findDominators(g); bool modified = false; // If a reporter vertex is dominated by another with the same report, we // can remove that report; if all reports are removed, we can remove the // vertex entirely. for (const auto v : reporters) { const auto reports = g[v].reports; // copy, as we're going to mutate for (const auto &report_id : reports) { if (!isSimpleExhaustible(rm.getReport(report_id))) { continue; } if (isDominatedByReporter(g, dom, v, report_id)) { DEBUG_PRINTF("removed dominated report %u from vertex %u\n", report_id, g[v].index); g[v].reports.erase(report_id); } } if (g[v].reports.empty()) { DEBUG_PRINTF("removed edges to accepts from %u, no reports left\n", g[v].index); remove_edge(v, g.accept, g); remove_edge(v, g.acceptEod, g); modified = true; } } // If a reporter vertex has a self-loop, but otherwise only leads to accept // (note: NOT acceptEod) and has simple exhaustible reports, we can delete // the self-loop. for (const auto v : reporters) { if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) { remove_edge(v, v, g); modified = true; DEBUG_PRINTF("removed self-loop on %u\n", g[v].index); } } if (!modified) { return; } pruneUseless(g); // We may have only removed self-loops, in which case pruneUseless wouldn't // renumber, so we do edge renumbering explicitly here. g.renumberEdges(); }