Esempio n. 1
/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */
bool shouldMerge(NGHolder &ha,
                 const ue2::unordered_map<NFAVertex, u32> &a_state_ids,
                 NGHolder &hb,
                 const ue2::unordered_map<NFAVertex, u32> &b_state_ids,
                 size_t cpl, const ReportManager *rm,
                 const CompileContext &cc) {
    size_t combinedStateCount =
        countStates(ha, a_state_ids) + countStates(hb, b_state_ids) - cpl;

    if (combinedStateCount > FAST_STATE_LIMIT) {
        // More complex implementability check.
        NGHolder h_temp;
        cloneHolder(h_temp, ha);
        assert(h_temp.kind == hb.kind);
        mergeNfaComponent(h_temp, hb, cpl);
        reduceImplementableGraph(h_temp, SOM_NONE, rm, cc);
        u32 numStates = isImplementableNFA(h_temp, rm, cc);
        DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates);
        if (!numStates) {
            DEBUG_PRINTF("not implementable\n");
            return false;
        } else if (numStates > FAST_STATE_LIMIT) {
            DEBUG_PRINTF("too many states to merge\n");
            return false;

    return true;
Esempio n. 2
void SmallWriteBuildImpl::add(const NGWrapper &w) {
    // If the graph is poisoned (i.e. we can't build a SmallWrite version),
    // we don't even try.
    if (poisoned) {

    if (w.som || w.min_length || isVacuous(w)) { /* cannot support in smwr */
        poisoned = true;

    DEBUG_PRINTF("w=%p\n", &w);

    // make a copy of the graph so that we can modify it for our purposes
    unique_ptr<NGHolder> h = cloneHolder(w);

    reduceGraph(*h, SOM_NONE, w.utf8, cc);

    // If the earliest match location is outside the small write region,
    // then we don't need to build a SmallWrite version.
    // However, we don't poison this case either, since it is simply a case,
    // where we know the resulting graph won't match.
    if (findMinWidth(*h) > depth(cc.grey.smallWriteLargestBuffer)) {

    // Now we can actually build the McClellan DFA
    assert(h->kind == NFA_OUTFIX);
    auto r = buildMcClellan(*h, &rm, cc.grey);

    // If we couldn't build a McClellan DFA for this portion, we won't be able
    // build a smwr which represents the pattern set
    if (!r) {
        DEBUG_PRINTF("failed to determinise\n");
        poisoned = true;

    prune_overlong(*r, cc.grey.smallWriteLargestBuffer);

    if (rdfa) {
        // do a merge of the new dfa with the existing dfa
        auto merged = mergeTwoDfas(rdfa.get(), r.get(), DFA_MERGE_MAX_STATES,
                                   &rm, cc.grey);
        if (!merged) {
            DEBUG_PRINTF("merge failed\n");
            poisoned = true;
        DEBUG_PRINTF("merge succeeded, built %p\n", merged.get());
        rdfa = move(merged);
    } else {
        rdfa = move(r);
Esempio n. 3
vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) {
    // We operate on a temporary copy of the original graph here, so we don't
    // have to mutate the original.
    NGHolder g;
    ue2::unordered_map<NFAVertex, NFAVertex> vmap; // vertex in g_orig to vertex in g
    cloneHolder(g, g_orig, &vmap);

    vector<NFAVertex> vstarts;
    for (auto v : vertices_range(g)) {
        if (is_virtual_start(v, g)) {

    // wire the successors of every virtual start or startDs to g.start.
    for (auto v : vstarts) {
        wireSuccessorsToStart(g, v);

    // drop the in-edges of every virtual start so that they don't participate
    // in the depth calculation.
    for (auto v : vstarts) {
        clear_in_edges(v, g);

    //dumpGraph("", g.g);

    vector<DepthMinMax> temp_depths; // numbered by vertex index in g
    calcDepthsFrom(g, g.start, temp_depths);

    // Transfer depths, indexed by vertex index in g_orig.
    vector<DepthMinMax> depths(num_vertices(g_orig));

    for (auto v_orig : vertices_range(g_orig)) {
        assert(contains(vmap, v_orig));
        NFAVertex v_new = vmap[v_orig];

        u32 orig_idx = g_orig[v_orig].index;

        DepthMinMax &d =;

        if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) {
            // StartDs and virtual starts always have zero depth.
            d = DepthMinMax(0, 0);
        } else {
            u32 new_idx = g[v_new].index;
            d =;

    return depths;
Esempio n. 4
/** Populates squash masks for states that can be switched off by highlander
 * (single match) reporters. */
map<NFAVertex, NFAStateSet>
findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) {
    map<NFAVertex, NFAStateSet> squash;

    set<NFAVertex> verts;
    getHighlanderReporters(g, g.accept, rm, verts);
    getHighlanderReporters(g, g.acceptEod, rm, verts);
    if (verts.empty()) {
        DEBUG_PRINTF("no highlander reports\n");
        return squash;

    const u32 numStates = num_vertices(g);

    for (auto v : verts) {
        DEBUG_PRINTF("vertex %u with %zu reports\n", g[v].index,

        // Find the set of vertices that lead to v or any other reporter with a
        // subset of v's reports. We do this by creating a copy of the graph,
        // cutting the appropriate out-edges to accept and seeing which
        // vertices become unreachable.

        ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
        NGHolder h;
        cloneHolder(h, g, &orig_to_copy);
        removeEdgesToAccept(h, orig_to_copy[v]);

        vector<NFAVertex> unreach = findUnreachable(h);
        DEBUG_PRINTF("can squash %zu vertices\n", unreach.size());
        if (unreach.empty()) {

        if (!contains(squash, v)) {
            squash[v] = NFAStateSet(numStates);

        NFAStateSet &mask = squash[v];

        for (auto uv : unreach) {
            DEBUG_PRINTF("squashes index %u\n", h[uv].index);

    return squash;
u32 prepareRoleGraph(NGHolder &h, const role_id &s1) {
    u32 num = 0;
    if (s1.castle()) {
        num = num_vertices(h);
        NFAVertex u = add_vertex(h);
        h[u].char_reach = s1.castle()->reach();
        add_edge(h.startDs, u, h);
        // add self loop to repeat characters
        add_edge(u, u, h);
    } else if (s1.graph()) {
        const NGHolder &g = *s1.graph();
        cloneHolder(h, g);
        num = num_vertices(h);
    } else {
        // only infixes and suffixes with graph properties are possible
        // candidates, already filtered out other cases before
        // exclusive analysis

    return num;
Esempio n. 6
bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
                       const ue2::unordered_map<NFAVertex, u32> &region_map,
                       smgb_cache &cache) {
    /* Need to ensure all matches of the graph g up to u contain no infixes
     * which are also matches of the graph to u.
     * This is basically the same as firstMatchIsFirst except we g is not
     * always a dag. As we haven't gotten around to writing an execute_graph
     * that operates on general graphs, we take some (hopefully) conservative
     * short cuts.
     * Note: if the u can be jumped we will take jump edges
     * into account as a possibility of som going backwards
     * TODO: write a generalised ng_execute_graph/make this less hacky
    assert(&g == &cache.g);
    if (contains(cache.smgb, u)) {
        return cache.smgb[u];

    DEBUG_PRINTF("checking if som can go backwards on %u\n",

    set<NFAEdge> be;
    BackEdges<set<NFAEdge>> backEdgeVisitor(be);
        g.g, visitor(backEdgeVisitor)
                 .vertex_index_map(get(&NFAGraphVertexProps::index, g.g)));

    bool rv;
    if (0) {
        DEBUG_PRINTF("using cached result\n");
        cache.smgb[u] = rv;
        return rv;

    assert(contains(region_map, u));
    const u32 u_region =;

    for (const auto &e : be) {
        NFAVertex s = source(e, g);
        NFAVertex t = target(e, g);
        /* only need to worry about big cycles including/before u */
        DEBUG_PRINTF("back edge %u %u\n", g[s].index,
        if (s != t && <= u_region) {
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;

    ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
    NGHolder c_g;
    cloneHolder(c_g, g, &orig_to_copy);

    for (NFAVertex v : vertices_range(g)) {
        if (!is_virtual_start(v, g)) {
        NFAVertex c_v = orig_to_copy[v];
        orig_to_copy[v] = c_g.startDs;
        for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
            add_edge_if_not_present(c_g.startDs, c_w, c_g);
        clear_vertex(c_v, c_g);

    NFAVertex c_u = orig_to_copy[u];
    clear_in_edges(c_g.acceptEod, c_g);
    add_edge(c_g.accept, c_g.acceptEod, c_g);
    clear_in_edges(c_g.accept, c_g);
    clear_out_edges(c_u, c_g);
    if (hasSelfLoop(u, g)) {
        add_edge(c_u, c_u, c_g);
    add_edge(c_u, c_g.accept, c_g);

    set<NFAVertex> u_succ;
    insert(&u_succ, adjacent_vertices(u, g));

    for (auto t : inv_adjacent_vertices_range(u, g)) {
        if (t == u) {
        for (auto v : adjacent_vertices_range(t, g)) {
            if (contains(u_succ, v)) {
                add_edge(orig_to_copy[t], c_g.accept, c_g);


    depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start).
                       vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g)));

    for (const auto &e : be) {
        NFAVertex s = source(e, c_g);
        NFAVertex t = target(e, c_g);
        DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index);
        if (s != t) {
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;

    DEBUG_PRINTF("checking acyclic+selfloop graph\n");

    rv = !firstMatchIsFirst(c_g);
    DEBUG_PRINTF("som may regress? %d\n", (int)rv);
    goto exit;
Esempio n. 7
u32 findMaxInfixMatches(const NGHolder &h, const set<ue2_literal> &lits) {
    DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size());
    //dumpGraph("", h.g);

    if (!onlyOneTop(h)) {
        DEBUG_PRINTF("more than one top!n");
        return NO_MATCH_LIMIT;

    // Indices of vertices that could terminate any of the literals in 'lits'.
    set<u32> terms;

    for (const auto &s : lits) {
        DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
        if (s.empty()) {
            // Likely an anchored case, be conservative here.
            return NO_MATCH_LIMIT;

        for (auto v : vertices_range(h)) {
            if (is_special(v, h)) {

            if (couldEndLiteral(s, v, h)) {
                u32 idx = h[v].index;
                DEBUG_PRINTF("vertex %u could terminate lit\n", idx);

    if (terms.empty()) {
        DEBUG_PRINTF("literals cannot match inside infix\n");
        return 0;

    NGHolder g;
    cloneHolder(g, h);
    vector<NFAVertex> dead;

    // The set of all edges in the graph is used for existence checks in contractVertex.
    ue2::unordered_set<pair<NFAVertex, NFAVertex>> all_edges;
    for (const auto &e : edges_range(g)) {
        all_edges.emplace(source(e, g), target(e, g));

    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
        if (contains(terms, g[v].index)) {

        contractVertex(g, v, all_edges);

    remove_vertices(dead, g);
    //dumpGraph("", g.g);

    depth maxWidth = findMaxWidth(g);
    DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());

    if (maxWidth.is_infinite()) {
        // Cycle detected, so we can likely squeeze an unlimited number of
        // matches into this graph.
        return NO_MATCH_LIMIT;

    assert(terms.size() >= maxWidth);
    return maxWidth;