vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) { // We operate on a temporary copy of the original graph here, so we don't // have to mutate the original. NGHolder g; ue2::unordered_map<NFAVertex, NFAVertex> vmap; // vertex in g_orig to vertex in g cloneHolder(g, g_orig, &vmap); vector<NFAVertex> vstarts; for (auto v : vertices_range(g)) { if (is_virtual_start(v, g)) { vstarts.push_back(v); } } vstarts.push_back(g.startDs); // wire the successors of every virtual start or startDs to g.start. for (auto v : vstarts) { wireSuccessorsToStart(g, v); } // drop the in-edges of every virtual start so that they don't participate // in the depth calculation. for (auto v : vstarts) { clear_in_edges(v, g); } //dumpGraph("som_depth.dot", g.g); vector<DepthMinMax> temp_depths; // numbered by vertex index in g calcDepthsFrom(g, g.start, temp_depths); // Transfer depths, indexed by vertex index in g_orig. vector<DepthMinMax> depths(num_vertices(g_orig)); for (auto v_orig : vertices_range(g_orig)) { assert(contains(vmap, v_orig)); NFAVertex v_new = vmap[v_orig]; u32 orig_idx = g_orig[v_orig].index; DepthMinMax &d = depths.at(orig_idx); if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) { // StartDs and virtual starts always have zero depth. d = DepthMinMax(0, 0); } else { u32 new_idx = g[v_new].index; d = temp_depths.at(new_idx); } } return depths; }
static void gather_vars(const GoughGraph &g, vector<const GoughSSAVar *> *vars, map<const GoughSSAVar *, string> *names, map<const GoughSSAVar *, string> *src_label, set<const GoughSSAVar *> *reporters) { for (auto v : vertices_range(g)) { for (const auto &r : g[v].reports) { reporters->insert(r.second); } for (const auto &r : g[v].reports_eod) { reporters->insert(r.second); } for (u32 i = 0; i < g[v].vars.size(); i++) { const GoughSSAVar *vp = g[v].vars[i].get(); stringstream ss; ss << dump_name(g[v]) << "_" << i; vars->push_back(vp); names->insert(make_pair(vp, ss.str())); src_label->insert(make_pair(vp, dump_name(g[v]))); } } for (const auto &e : edges_range(g)) { for (u32 i = 0; i < g[e].vars.size(); i++) { const GoughSSAVar *vp = g[e].vars[i].get(); stringstream ss; ss << dump_name(g, e) << "_" << i; vars->push_back(vp); names->insert(make_pair(vp, ss.str())); src_label->insert(make_pair(vp, dump_name(g, e))); } } }
static vector<NFAVertex> getSortedVA(const NGHolder &g, const ue2::unordered_map<NFAVertex, u32> &state_ids) { vector<NFAVertex> out; out.reserve(num_vertices(g)); for (auto v : vertices_range(g)) { assert(contains(state_ids, v)); if (state_ids.at(v) == NO_STATE) { continue; } out.push_back(v); } // Order vertices by their state indices. sort(begin(out), end(out), [&state_ids](NFAVertex a, NFAVertex b) { return state_ids.at(a) < state_ids.at(b); }); #ifndef NDEBUG // State indices should match vector indices. for (u32 i = 0; i < out.size(); i++) { assert(state_ids.at(out.at(i)) == i); } #endif return out; }
static void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) { stringstream ss; ss << grey.dumpPath << "gough_" << base << ".dot"; FILE *f = fopen(ss.str().c_str(), "w"); fprintf(f, "digraph NFA {\n"); fprintf(f, "rankdir=LR;\n"); fprintf(f, "size=\"11.5,8\"\n"); fprintf(f, "node [ shape = circle ];\n"); fprintf(f, "START [style=invis];\n"); for (auto v : vertices_range(g)) { fprintf(f, "%s [ width = 1, fixedsize = true, fontsize = 12, ", dump_name(g[v]).c_str()); if (!g[v].reports.empty() || !g[v].reports_eod.empty()) { fprintf(f, "shape = doublecircle "); } fprintf(f, "label = \"%u\"];\n", g[v].state_id); } for (const auto &e : edges_range(g)) { GoughVertex s = source(e, g); GoughVertex t = target(e, g); fprintf(f, "%s -> %s\n", dump_name(g[s]).c_str(), dump_name(g[t]).c_str()); } fprintf(f, "}\n"); fclose(f); }
/** Find the set of characters that are not present in the reachability of * graph \p g after a certain depth (currently 8). If a character in this set * is encountered, it means that the NFA is either dead or has not progressed * more than 8 characters from its start states. */ CharReach findStopAlphabet(const NGHolder &g, som_type som) { const depth max_depth(MAX_STOP_DEPTH); const InitDepths depths(g); const map<NFAVertex, BoundedRepeatSummary> no_vertices; CharReach stopcr; for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } if (depths.maxDist(g, v) >= max_depth) { if (som == SOM_NONE) { stopcr |= reduced_cr(v, g, no_vertices); } else { stopcr |= g[v].char_reach; } } } // Turn alphabet into stops. stopcr.flip(); return stopcr; }
static bool pruneForwardUseless(NGHolder &h, const nfag_t &g, NFAVertex s, vector<default_color_type> &vertexColor) { // Begin with all vertices set to white, as DFV only marks visited // vertices. fill(vertexColor.begin(), vertexColor.end(), boost::white_color); auto index_map = get(&NFAGraphVertexProps::index, g); depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), make_iterator_property_map(vertexColor.begin(), index_map)); vector<NFAVertex> dead; // All non-special vertices that are still white can be removed. for (auto v : vertices_range(g)) { u32 idx = g[v].index; if (!is_special(v, g) && vertexColor[idx] == boost::white_color) { DEBUG_PRINTF("vertex %u is unreachable from %u\n", g[v].index, g[s].index); dead.push_back(v); } } if (dead.empty()) { return false; } DEBUG_PRINTF("removing %zu vertices\n", dead.size()); remove_vertices(dead, h, false); return true; }
bool firstMatchIsFirst(const NGHolder &p) { /* If the first match (by end offset) is not the first match (by start * offset) then we can't create a lock after it. * * Consider: 4009:/(foobar|ob).*bugger/s * * We don't care about races on the last byte as they can be resolved easily * at runtime /(foobar|obar).*hi/ * * It should be obvious we don't care about one match being a prefix * of another as they share the same start offset. * * Therefore, the case were we cannot establish that the som does not * regress is when there exists s1 and s2 in the language of p and s2 is a * proper infix of s1. * * It is tempting to add the further restriction that there does not exist a * prefix of s1 that is in the language of p (as in which case we would * presume, the lock has already been set). However, we have no way of * knowing if the lock can be cleared by some characters, and if so, if it * is still set. TODO: if we knew the lock's escapes where we could verify * that the rest of s1 does not clear the lock. (1) */ DEBUG_PRINTF("entry\n"); /* If there are any big cycles throw up our hands in despair */ if (hasBigCycles(p)) { DEBUG_PRINTF("fail, big cycles\n"); return false; } set<NFAVertex> states; /* turn on all states (except starts - avoid suffix matches) */ /* If we were doing (1) we would also except states leading to accepts - avoid prefix matches */ for (auto v : vertices_range(p)) { assert(!is_virtual_start(v, p)); if (!is_special(v, p)) { DEBUG_PRINTF("turning on %u\n", p[v].index); states.insert(v); } } /* run the prefix the main graph */ execute_graph(p, p, &states); for (auto v : states) { /* need to check if this vertex may represent an infix match - ie * it does not have an edge to accept. */ DEBUG_PRINTF("check %u\n", p[v].index); if (!edge(v, p.accept, p).second) { DEBUG_PRINTF("fail %u\n", p[v].index); return false; } } DEBUG_PRINTF("done first is first check\n"); return true; }
/** Remove any vertices that can't be reached by traversing the graph in * reverse from acceptEod. */ void pruneUnreachable(NGHolder &g) { deque<NFAVertex> dead; if (!hasGreaterInDegree(1, g.acceptEod, g) && !hasGreaterInDegree(0, g.accept, g) && edge(g.accept, g.acceptEod, g).second) { // Trivial case: there are no in-edges to our accepts (other than // accept->acceptEod), so all non-specials are unreachable. for (auto v : vertices_range(g)) { if (!is_special(v, g)) { dead.push_back(v); } } } else { // Walk a reverse graph from acceptEod with Boost's depth_first_visit // call. typedef reverse_graph<NFAGraph, NFAGraph&> RevNFAGraph; RevNFAGraph revg(g.g); map<NFAVertex, default_color_type> colours; depth_first_visit(revg, g.acceptEod, make_dfs_visitor(boost::null_visitor()), make_assoc_property_map(colours)); DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size()); // All non-special vertices that aren't in the colour map (because they // weren't reached) can be removed. for (auto v : vertices_range(revg)) { if (is_special(v, revg)) { continue; } if (!contains(colours, v)) { dead.push_back(v); } } } if (dead.empty()) { DEBUG_PRINTF("no unreachable vertices\n"); return; } remove_vertices(dead, g, false); DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size()); }
static void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) { for (auto v : vertices_range(g)) { push_back_all_raw(out, g[v].vars); } for (const auto &e : edges_range(g)) { push_back_all_raw(out, g[e].vars); } }
static bool hasAssertVertices(const NGHolder &g) { for (auto v : vertices_range(g)) { int flags = g[v].assert_flags; if (flags & WORDBOUNDARY_FLAGS) { return true; } } return false; }
static CharReach getReachability(const NGHolder &h) { CharReach cr; for (const auto &v : vertices_range(h)) { if (!is_special(v, h)) { cr |= h[v].char_reach; } } return cr; }
/** Some squash states are clearly not advantageous in the NFA, as they do * incur the cost of an exception: * -# acyclic states * -# squash only a few acyclic states */ void filterSquashers(const NGHolder &g, map<NFAVertex, NFAStateSet> &squash) { DEBUG_PRINTF("filtering\n"); map<u32, NFAVertex> rev; /* vertex_index -> vertex */ for (auto v : vertices_range(g)) { rev[g[v].index] = v; } for (auto v : vertices_range(g)) { if (!contains(squash, v)) { continue; } DEBUG_PRINTF("looking at squash set for vertex %u\n", g[v].index); if (!hasSelfLoop(v, g)) { DEBUG_PRINTF("acyclic\n"); squash.erase(v); continue; } NFAStateSet squashed = squash[v]; squashed.flip(); /* default sense for mask of survivors */ for (NFAStateSet::size_type sq = squashed.find_first(); sq != squashed.npos; sq = squashed.find_next(sq)) { NFAVertex u = rev[sq]; if (hasSelfLoop(u, g)) { DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq); goto next_vertex; } } if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) { DEBUG_PRINTF("squash set too small\n"); squash.erase(v); continue; } next_vertex:; DEBUG_PRINTF("squash set ok\n"); } }
static vector<StateInfo> makeInfoTable(const NGHolder &g) { vector<StateInfo> info(num_vertices(g)); for (auto v : vertices_range(g)) { u32 idx = g[v].index; const CharReach &cr = g[v].char_reach; assert(idx < info.size()); info[idx] = StateInfo(v, cr); } return info; }
/* crude, deterministic assignment of symbolic register slots. * returns number of slots given out */ static u32 initial_slots(const GoughGraph &g) { u32 next_slot = 0; for (auto v : vertices_range(g)) { set_initial_slots(g[v].vars, &next_slot); } for (const auto &e : edges_range(g)) { set_initial_slots(g[e].vars, &next_slot); } return next_slot; }
static void dump_var_mapping(const GoughGraph &g, const string &base, const Grey &grey) { stringstream ss; ss << grey.dumpPath << "gough_" << base << "_vars.txt"; FILE *f = fopen(ss.str().c_str(), "w"); for (auto v : vertices_range(g)) { set<const GoughSSAVar *> used = uses(g[v]); if (g[v].vars.empty() && used.empty()) { continue; } fprintf(f, "%s\n", dump_name(g[v]).c_str()); for (u32 i = 0; i < g[v].vars.size(); i++) { const GoughSSAVar *vp = g[v].vars[i].get(); fprintf(f, "\t%u: slot %u\n", i, vp->slot); } if (!used.empty()) { fprintf(f, "\tuses:"); vector<u32> used_id; for (const GoughSSAVar *var : used) { used_id.push_back(var->slot); } for (const u32 &id : used_id) { fprintf(f, " %u", id); } fprintf(f, "\n"); } } for (const auto &e : edges_range(g)) { set<const GoughSSAVar *> used = uses(g[e]); if (g[e].vars.empty() && used.empty()) { continue; } fprintf(f, "%s\n", dump_name(g, e).c_str()); for (u32 i = 0; i < g[e].vars.size(); i++) { const GoughSSAVar *vp = g[e].vars[i].get(); fprintf(f, "\t%u: slot %u\n", i, vp->slot); } if (!used.empty()) { fprintf(f, "\tuses:"); vector<u32> used_id; for (const GoughSSAVar *var : used) { used_id.push_back(var->slot); } for (const u32 &id : used_id) { fprintf(f, " %u", id); } fprintf(f, "\n"); } } fclose(f); }
/** \brief loose hash of an NGHolder; equal if is_equal would return true. */ u64a hash_holder(const NGHolder &g) { size_t rv = 0; for (auto v : vertices_range(g)) { boost::hash_combine(rv, g[v].index); boost::hash_combine(rv, g[v].char_reach); for (auto w : adjacent_vertices_range(v, g)) { boost::hash_combine(rv, g[w].index); } } return rv; }
void fct(const LCC& lcc) { vertex_range vr(vertices(lcc)); std::cout << "new for loop" << std::endl; for(vertex_descriptor vd : vr){ std::cout << vd->point() << std::endl; } std::cout << "boost::tie + std::for_each" << std::endl; vertex_iterator vb, ve; boost::tie(vb,ve) = vertices_range(lcc); std::for_each(vb,ve, Fct()); }
// Returns the number of states. static ue2::unordered_map<NFAVertex, u32> getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) { ue2::unordered_map<NFAVertex, u32> states; for (const auto &v : vertices_range(h)) { states[v] = NO_STATE; } u32 stateNum = 0; for (auto v : ordering) { DEBUG_PRINTF("assigning state num %u to vertex %u\n", stateNum, h[v].index); states[v] = stateNum++; } return states; }
// populate VertexInfo table static ptr_vector<VertexInfo> getVertexInfos(const NGHolder &g) { const size_t num_verts = num_vertices(g); ptr_vector<VertexInfo> infos; infos.reserve(num_verts * 2); vector<VertexInfo *> vertex_map; // indexed by vertex_index property vertex_map.resize(num_verts); for (auto v : vertices_range(g)) { VertexInfo *vi = new VertexInfo(v, g); // insert our new shiny VertexInfo into the info map infos.push_back(vi); vertex_map[g[v].index] = vi; } // now, go through each vertex and populate its predecessor and successor lists for (VertexInfo &cur_vi : infos) { // find predecessors for (const auto &e : in_edges_range(cur_vi.v, g)) { NFAVertex u = source(e, g); VertexInfo *vmi = vertex_map[g[u].index]; cur_vi.pred_cr |= vmi->cr; cur_vi.pred.insert(vmi); // also set up edge tops if (is_triggered(g) && u == g.start) { cur_vi.edge_top = g[e].top; } } // find successors for (auto w : adjacent_vertices_range(cur_vi.v, g)) { VertexInfo *vmi = vertex_map[g[w].index]; cur_vi.succ_cr |= vmi->cr; cur_vi.succ.insert(vmi); } assert(!hasEdgeAsserts(cur_vi.v, g)); } return infos; }
static void buildPDomTree(const NGHolder &g, PostDomTree &tree) { ue2::unordered_map<NFAVertex, NFAVertex> postdominators = findPostDominators(g); for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } NFAVertex pdom = postdominators[v]; if (pdom) { DEBUG_PRINTF("vertex %u -> %u\n", g[pdom].index, g[v].index); tree[pdom].insert(v); } } }
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then * build an eight-bit mask per character C, with each bit representing the * depth before the location of character C (if encountered) that the NFA would * be in a predictable start state. */ vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) { const depth max_depth(MAX_STOP_DEPTH); const InitDepths depths(g); const map<NFAVertex, BoundedRepeatSummary> no_vertices; vector<CharReach> reach(MAX_STOP_DEPTH); for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } CharReach v_cr; if (som == SOM_NONE) { v_cr = reduced_cr(v, g, no_vertices); } else { v_cr = g[v].char_reach; } u32 d = min(max_depth, depths.maxDist(g, v)); for (u32 i = 0; i < d; i++) { reach[i] |= v_cr; } } #ifdef DEBUG for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { DEBUG_PRINTF("depth %u, stop chars: ", i); describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT); printf("\n"); } #endif vector<u8> stop(N_CHARS, 0); for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { CharReach cr = ~reach[i]; // invert reach for stop chars. const u8 mask = 1U << i; for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) { stop[c] |= mask; } } return stop; }
/** \brief Relax forbidden UTF-8 sequences. * * Some byte sequences can not appear in valid UTF-8 as they encode code points * above \\x{10ffff} or they represent overlong encodings. As we require valid * UTF-8 input, we have no defined behaviour in these cases, as a result we can * accept them if it simplifies the graph. */ void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) { if (!expr.utf8) { return; } const CharReach e0(0xe0); const CharReach f0(0xf0); const CharReach f4(0xf4); for (auto v : vertices_range(g)) { const CharReach &cr = g[v].char_reach; if (cr == e0 || cr == f0 || cr == f4) { u8 pred_char = cr.find_first(); for (auto t : adjacent_vertices_range(v, g)) { allowIllegal(g, t, pred_char); } } } }
static vector<NFAVertex> findUnreachable(const NGHolder &g) { const boost::reverse_graph<NFAGraph, const NFAGraph &> revg(g.g); ue2::unordered_map<NFAVertex, boost::default_color_type> colours; colours.reserve(num_vertices(g)); depth_first_visit(revg, g.acceptEod, make_dfs_visitor(boost::null_visitor()), make_assoc_property_map(colours)); // Unreachable vertices are not in the colour map. vector<NFAVertex> unreach; for (auto v : vertices_range(revg)) { if (!contains(colours, v)) { unreach.push_back(v); } } return unreach; }
/** \brief Convert temporary assert vertices (from construction method) to * edge-based flags. * * Remove the horrors that are the temporary assert vertices which arise from * our construction method. Allows the rest of our code base to live in * blissful ignorance of their existence. */ void removeAssertVertices(ReportManager &rm, NGWrapper &g) { size_t num = 0; DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g)); // Sweep over the graph and ascertain that we do actually have vertices // with assertion flags set. Otherwise, we're done. if (!hasAssertVertices(g)) { DEBUG_PRINTF("no assert vertices, done\n"); return; } u32 assert_edge_count = 0; // Build a cache of (u, v) vertex pairs to edge descriptors. edge_cache_t edge_cache; for (const auto &e : edges_range(g)) { edge_cache[make_pair(source(e, g), target(e, g))] = e; } for (auto v : vertices_range(g)) { if (g[v].assert_flags & WORDBOUNDARY_FLAGS) { replaceAssertVertex(g, v, edge_cache, assert_edge_count); num++; } } checkForMultilineStart(rm, g); if (num) { DEBUG_PRINTF("resolved %zu assert vertices\n", num); pruneUseless(g); pruneEmptyVertices(g); g.renumberVertices(); g.renumberEdges(); } DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g)); assert(!hasAssertVertices(g)); }
/** Cheaply check whether this graph can't be reduced at all, because it is * just a chain of vertices with no other edges. */ static bool isIrreducible(const NGHolder &g) { for (auto v : vertices_range(g)) { // skip specials if (is_special(v, g)) { continue; } // we want meaningful in_degree to be 1. we also want to make sure we // don't count self-loop + 1 incoming edge as not irreducible if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) { return false; } // we want meaningful out_degree to be 1. we also want to make sure we // don't count self-loop + 1 outgoing edge as not irreducible if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) { return false; } } return true; }
/** This code removes any vertices which do not accept any symbols. Any * vertices which no longer lie on a path from a start to an accept are also * pruned. */ void pruneEmptyVertices(NGHolder &g) { DEBUG_PRINTF("pruning empty vertices\n"); vector<NFAVertex> dead; for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } const CharReach &cr = g[v].char_reach; if (cr.none()) { DEBUG_PRINTF("empty: %u\n", g[v].index); dead.push_back(v); } } if (dead.empty()) { return; } remove_vertices(dead, g); pruneUseless(g); }
/** One final, FINAL optimisation. Drop either start or startDs if it's unused * in this graph. We leave this until this late because having both vertices in * the graph, with fixed state indices, is useful for merging and other * analyses. */ void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states) { u32 adj = 0; if (startIsRedundant(g)) { DEBUG_PRINTF("dropping unused start\n"); states[g.start] = NO_STATE; adj++; } if (proper_out_degree(g.startDs, g) == 0) { DEBUG_PRINTF("dropping unused startDs\n"); states[g.startDs] = NO_STATE; adj++; } if (!adj) { DEBUG_PRINTF("both start and startDs must remain\n"); return; } // We have removed one or both of the starts. Walk the non-special vertices // in the graph with state indices assigned to them and subtract // adj from all of them. for (auto v : vertices_range(g)) { u32 &state = states[v]; // note ref if (state == NO_STATE) { continue; } if (is_any_start(v, g)) { assert(state <= 1); state = 0; // one start remains } else { assert(!is_special(v, g)); assert(state >= adj); state -= adj; } } }
static never_inline void fill_aux(const GoughGraph &g, GoughGraphAux *aux) { for (auto v : vertices_range(g)) { for (const auto &var : g[v].vars) { aux->containing_v[var.get()] = v; DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id); } for (GoughSSAVar *var : g[v].reports | map_values) { aux->reporters[var].insert(v); } for (GoughSSAVar *var : g[v].reports_eod | map_values) { aux->reporters[var].insert(v); } } for (const auto &e : edges_range(g)) { for (const auto &var : g[e].vars) { aux->containing_e[var.get()] = e; DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot, g[source(e, g)].state_id, g[target(e, g)].state_id); } } }
bool somMayGoBackwards(NFAVertex u, const NGHolder &g, const ue2::unordered_map<NFAVertex, u32> ®ion_map, smgb_cache &cache) { /* Need to ensure all matches of the graph g up to u contain no infixes * which are also matches of the graph to u. * * This is basically the same as firstMatchIsFirst except we g is not * always a dag. As we haven't gotten around to writing an execute_graph * that operates on general graphs, we take some (hopefully) conservative * short cuts. * * Note: if the u can be jumped we will take jump edges * into account as a possibility of som going backwards * * TODO: write a generalised ng_execute_graph/make this less hacky */ assert(&g == &cache.g); if (contains(cache.smgb, u)) { return cache.smgb[u]; } DEBUG_PRINTF("checking if som can go backwards on %u\n", g[u].index); set<NFAEdge> be; BackEdges<set<NFAEdge>> backEdgeVisitor(be); depth_first_search( g.g, visitor(backEdgeVisitor) .root_vertex(g.start) .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); bool rv; if (0) { exit: DEBUG_PRINTF("using cached result\n"); cache.smgb[u] = rv; return rv; } assert(contains(region_map, u)); const u32 u_region = region_map.at(u); for (const auto &e : be) { NFAVertex s = source(e, g); NFAVertex t = target(e, g); /* only need to worry about big cycles including/before u */ DEBUG_PRINTF("back edge %u %u\n", g[s].index, g[t].index); if (s != t && region_map.at(s) <= u_region) { DEBUG_PRINTF("eek big cycle\n"); rv = true; /* big cycle -> eek */ goto exit; } } ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy; NGHolder c_g; cloneHolder(c_g, g, &orig_to_copy); for (NFAVertex v : vertices_range(g)) { if (!is_virtual_start(v, g)) { continue; } NFAVertex c_v = orig_to_copy[v]; orig_to_copy[v] = c_g.startDs; for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) { add_edge_if_not_present(c_g.startDs, c_w, c_g); } clear_vertex(c_v, c_g); } NFAVertex c_u = orig_to_copy[u]; clear_in_edges(c_g.acceptEod, c_g); add_edge(c_g.accept, c_g.acceptEod, c_g); clear_in_edges(c_g.accept, c_g); clear_out_edges(c_u, c_g); if (hasSelfLoop(u, g)) { add_edge(c_u, c_u, c_g); } add_edge(c_u, c_g.accept, c_g); set<NFAVertex> u_succ; insert(&u_succ, adjacent_vertices(u, g)); u_succ.erase(u); for (auto t : inv_adjacent_vertices_range(u, g)) { if (t == u) { continue; } for (auto v : adjacent_vertices_range(t, g)) { if (contains(u_succ, v)) { add_edge(orig_to_copy[t], c_g.accept, c_g); break; } } } pruneUseless(c_g); be.clear(); depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start). vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g))); for (const auto &e : be) { NFAVertex s = source(e, c_g); NFAVertex t = target(e, c_g); DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index); if (s != t) { assert(0); DEBUG_PRINTF("eek big cycle\n"); rv = true; /* big cycle -> eek */ goto exit; } } DEBUG_PRINTF("checking acyclic+selfloop graph\n"); rv = !firstMatchIsFirst(c_g); DEBUG_PRINTF("som may regress? %d\n", (int)rv); goto exit; }
bool sentClearsTail(const NGHolder &g, const ue2::unordered_map<NFAVertex, u32> ®ion_map, const NGHolder &sent, u32 last_head_region, u32 *bad_region) { /* if a subsequent match from the prefix clears the rest of the pattern * we can just keep track of the last match of the prefix. * To see if this property holds, we could: * * 1A: turn on all states in the tail and run all strings that may * match the prefix past the tail, if we are still in any states then * this property does not hold. * * 1B: we turn on the initial states of the tail and run any strings which * may finish any partial matches in the prefix and see if we end up with * anything which would also imply that this property does not hold. * * OR * * 2: we just turn everything and run the prefix inputs past it and see what * we are left with. I think that is equivalent to scheme 1 and is easier to * implement. TODO: ponder * * Anyway, we are going with scheme 2 until further notice. */ u32 first_bad_region = ~0U; set<NFAVertex> states; /* turn on all states */ DEBUG_PRINTF("region %u is cutover\n", last_head_region); for (auto v : vertices_range(g)) { if (v != g.accept && v != g.acceptEod) { states.insert(v); } } for (UNUSED auto v : states) { DEBUG_PRINTF("start state: %u\n", g[v].index); } /* run the prefix the main graph */ execute_graph(g, sent, &states); /* .. and check if we are left with anything in the tail region */ for (auto v : states) { if (v == g.start || v == g.startDs) { continue; /* not in tail */ } DEBUG_PRINTF("v %u is still on\n", g[v].index); assert(v != g.accept && v != g.acceptEod); /* no cr */ assert(contains(region_map, v)); const u32 v_region = region_map.at(v); if (v_region > last_head_region) { DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region); first_bad_region = min(first_bad_region, v_region); } } if (first_bad_region != ~0U) { DEBUG_PRINTF("first bad region is %u\n", first_bad_region); *bad_region = first_bad_region; return false; } return true; }