Ejemplo n.º 1
0
/** Remove vacuous edges in graphs where the min_offset or min_length
 * constraints dictate that they can never produce a match. */
static
void pruneVacuousEdges(NGWrapper &g) {
    if (!g.min_length && !g.min_offset) {
        return;
    }

    vector<NFAEdge> dead;

    for (const auto &e : edges_range(g)) {
        const NFAVertex u = source(e, g);
        const NFAVertex v = target(e, g);

        // Special case: Crudely remove vacuous edges from start in graphs with a
        // min_offset.
        if (g.min_offset && u == g.start && is_any_accept(v, g)) {
            DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
            dead.push_back(e);
            continue;
        }

        // If a min_length is set, vacuous edges can be removed.
        if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
            DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
            dead.push_back(e);
            continue;
        }
    }

    if (dead.empty()) {
        return;
    }

    remove_edges(dead, g);
    pruneUseless(g);
}
Ejemplo n.º 2
0
/** Remove edges to accepts that can never produce a match long enough to
 * satisfy our min_length and max_offset constraints. */
static
void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) {
    if (!g.min_length) {
        return;
    }

    vector<DepthMinMax> depths = getDistancesFromSOM(g);

    pruneUnmatchable(g, depths, rm, g.accept);
    pruneUnmatchable(g, depths, rm, g.acceptEod);

    pruneUseless(g);
}
Ejemplo n.º 3
0
/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
 * where possible, based on the assumption that we will always be matching
 * against well-formed input. */
void utf8DotRestoration(NGHolder &h, bool som) {
    vector<NFAVertex> seeds; /* cyclic ascii vertices */
    findSeeds(h, som, &seeds);

    bool changes = false;
    for (auto v : seeds) {
        changes |= expandCyclic(h, v);
    }

    if (changes) {
        pruneUseless(h);
    }
}
Ejemplo n.º 4
0
static
void pruneExtUnreachable(NGWrapper &g) {
    vector<NFAVertexBidiDepth> depths;
    calcDepths(g, depths);

    vector<NFAEdge> dead;

    for (const auto &e : edges_range(g)) {
        if (isEdgePrunable(g, depths, e)) {
            DEBUG_PRINTF("pruning\n");
            dead.push_back(e);
        }
    }

    if (dead.empty()) {
        return;
    }

    remove_edges(dead, g);
    pruneUseless(g);
}
Ejemplo n.º 5
0
/** \brief Convert temporary assert vertices (from construction method) to
 * edge-based flags.
 *
 * Remove the horrors that are the temporary assert vertices which arise from
 * our construction method. Allows the rest of our code base to live in
 * blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
    size_t num = 0;

    DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));

    // Sweep over the graph and ascertain that we do actually have vertices
    // with assertion flags set. Otherwise, we're done.
    if (!hasAssertVertices(g)) {
        DEBUG_PRINTF("no assert vertices, done\n");
        return;
    }

    u32 assert_edge_count = 0;

    // Build a cache of (u, v) vertex pairs to edge descriptors.
    edge_cache_t edge_cache;
    for (const auto &e : edges_range(g)) {
        edge_cache[make_pair(source(e, g), target(e, g))] = e;
    }

    for (auto v : vertices_range(g)) {
        if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
            replaceAssertVertex(g, v, edge_cache, assert_edge_count);
            num++;
        }
    }

    checkForMultilineStart(rm, g);

    if (num) {
        DEBUG_PRINTF("resolved %zu assert vertices\n", num);
        pruneUseless(g);
        pruneEmptyVertices(g);
        g.renumberVertices();
        g.renumberEdges();
    }

    DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g));
    assert(!hasAssertVertices(g));
}
Ejemplo n.º 6
0
/** This code removes any vertices which do not accept any symbols. Any
 * vertices which no longer lie on a path from a start to an accept are also
 * pruned. */
void pruneEmptyVertices(NGHolder &g) {
    DEBUG_PRINTF("pruning empty vertices\n");
    vector<NFAVertex> dead;
    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }

        const CharReach &cr = g[v].char_reach;
        if (cr.none()) {
            DEBUG_PRINTF("empty: %u\n", g[v].index);
            dead.push_back(v);
        }
    }

    if (dead.empty()) {
        return;
    }

    remove_vertices(dead, g);
    pruneUseless(g);
}
Ejemplo n.º 7
0
/** Remove any edges from vertices that generate accepts (for Highlander
 * graphs). */
void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
    // Safety check: all reports must be simple exhaustible reports, or this is
    // not safe. This optimisation should be called early enough that no
    // internal reports have been added.
    for (auto report_id : all_reports(g)) {
        const Report &ir = rm.getReport(report_id);

        if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
            !isExternalReport(ir)) {
            DEBUG_PRINTF("report %u is not external highlander with "
                         "no bounds\n", report_id);
            return;
        }
    }

    vector<NFAEdge> dead;
    for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
        if (is_special(u, g)) {
            continue;
        }

        // We can prune any out-edges that aren't accepts
        for (const auto &e : out_edges_range(u, g)) {
            if (!is_any_accept(target(e, g), g)) {
                dead.push_back(e);
            }
        }
    }

    if (dead.empty()) {
        return;
    }

    DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
    remove_edges(dead, g);
    pruneUseless(g);
}
Ejemplo n.º 8
0
bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
                       const ue2::unordered_map<NFAVertex, u32> &region_map,
                       smgb_cache &cache) {
    /* Need to ensure all matches of the graph g up to u contain no infixes
     * which are also matches of the graph to u.
     *
     * This is basically the same as firstMatchIsFirst except we g is not
     * always a dag. As we haven't gotten around to writing an execute_graph
     * that operates on general graphs, we take some (hopefully) conservative
     * short cuts.
     *
     * Note: if the u can be jumped we will take jump edges
     * into account as a possibility of som going backwards
     *
     * TODO: write a generalised ng_execute_graph/make this less hacky
     */
    assert(&g == &cache.g);
    if (contains(cache.smgb, u)) {
        return cache.smgb[u];
    }

    DEBUG_PRINTF("checking if som can go backwards on %u\n",
                  g[u].index);

    set<NFAEdge> be;
    BackEdges<set<NFAEdge>> backEdgeVisitor(be);
    depth_first_search(
        g.g, visitor(backEdgeVisitor)
                 .root_vertex(g.start)
                 .vertex_index_map(get(&NFAGraphVertexProps::index, g.g)));

    bool rv;
    if (0) {
    exit:
        DEBUG_PRINTF("using cached result\n");
        cache.smgb[u] = rv;
        return rv;
    }

    assert(contains(region_map, u));
    const u32 u_region = region_map.at(u);

    for (const auto &e : be) {
        NFAVertex s = source(e, g);
        NFAVertex t = target(e, g);
        /* only need to worry about big cycles including/before u */
        DEBUG_PRINTF("back edge %u %u\n", g[s].index,
                      g[t].index);
        if (s != t && region_map.at(s) <= u_region) {
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
    NGHolder c_g;
    cloneHolder(c_g, g, &orig_to_copy);

    for (NFAVertex v : vertices_range(g)) {
        if (!is_virtual_start(v, g)) {
            continue;
        }
        NFAVertex c_v = orig_to_copy[v];
        orig_to_copy[v] = c_g.startDs;
        for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
            add_edge_if_not_present(c_g.startDs, c_w, c_g);
        }
        clear_vertex(c_v, c_g);
    }

    NFAVertex c_u = orig_to_copy[u];
    clear_in_edges(c_g.acceptEod, c_g);
    add_edge(c_g.accept, c_g.acceptEod, c_g);
    clear_in_edges(c_g.accept, c_g);
    clear_out_edges(c_u, c_g);
    if (hasSelfLoop(u, g)) {
        add_edge(c_u, c_u, c_g);
    }
    add_edge(c_u, c_g.accept, c_g);

    set<NFAVertex> u_succ;
    insert(&u_succ, adjacent_vertices(u, g));
    u_succ.erase(u);

    for (auto t : inv_adjacent_vertices_range(u, g)) {
        if (t == u) {
            continue;
        }
        for (auto v : adjacent_vertices_range(t, g)) {
            if (contains(u_succ, v)) {
                add_edge(orig_to_copy[t], c_g.accept, c_g);
                break;
            }
        }
    }

    pruneUseless(c_g);

    be.clear();
    depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start).
                       vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g)));

    for (const auto &e : be) {
        NFAVertex s = source(e, c_g);
        NFAVertex t = target(e, c_g);
        DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index);
        if (s != t) {
            assert(0);
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    DEBUG_PRINTF("checking acyclic+selfloop graph\n");

    rv = !firstMatchIsFirst(c_g);
    DEBUG_PRINTF("som may regress? %d\n", (int)rv);
    goto exit;
}
Ejemplo n.º 9
0
void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
    vector<NFAVertex> reporters;
    for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
        for (const auto &report_id : g[v].reports) {
            const Report &r = rm.getReport(report_id);
            if (isSimpleExhaustible(r)) {
                reporters.push_back(v);
                break;
            }
        }
    }
    for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
        for (const auto &report_id : g[v].reports) {
            const Report &r = rm.getReport(report_id);
            if (isSimpleExhaustible(r)) {
                reporters.push_back(v);
                break;
            }
        }
    }

    if (reporters.empty()) {
        return;
    }


    sort(begin(reporters), end(reporters), make_index_ordering(g));
    reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));

    DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
                 reporters.size());

    const auto &dom = findDominators(g);
    bool modified = false;

    // If a reporter vertex is dominated by another with the same report, we
    // can remove that report; if all reports are removed, we can remove the
    // vertex entirely.
    for (const auto v : reporters) {
        const auto reports = g[v].reports; // copy, as we're going to mutate
        for (const auto &report_id : reports) {
            if (!isSimpleExhaustible(rm.getReport(report_id))) {
                continue;
            }
            if (isDominatedByReporter(g, dom, v, report_id)) {
                DEBUG_PRINTF("removed dominated report %u from vertex %u\n",
                             report_id, g[v].index);
                g[v].reports.erase(report_id);
            }
        }

        if (g[v].reports.empty()) {
            DEBUG_PRINTF("removed edges to accepts from %u, no reports left\n",
                          g[v].index);
            remove_edge(v, g.accept, g);
            remove_edge(v, g.acceptEod, g);
            modified = true;
        }
    }

    // If a reporter vertex has a self-loop, but otherwise only leads to accept
    // (note: NOT acceptEod) and has simple exhaustible reports, we can delete
    // the self-loop.
    for (const auto v : reporters) {
        if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
            remove_edge(v, v, g);
            modified = true;
            DEBUG_PRINTF("removed self-loop on %u\n", g[v].index);
        }
    }

    if (!modified) {
        return;
    }

    pruneUseless(g);

    // We may have only removed self-loops, in which case pruneUseless wouldn't
    // renumber, so we do edge renumbering explicitly here.
    g.renumberEdges();
}