Пример #1
0
static
void getHighlanderReporters(const NGHolder &g, const NFAVertex accept,
                            const ReportManager &rm,
                            set<NFAVertex> &verts) {
    for (auto v : inv_adjacent_vertices_range(accept, g)) {
        if (v == g.accept) {
            continue;
        }

        const auto &reports = g[v].reports;
        if (reports.empty()) {
            assert(0);
            continue;
        }

        // Must be _all_ highlander callback reports.
        for (auto report : reports) {
            const Report &ir = rm.getReport(report);
            if (ir.ekey == INVALID_EKEY || ir.type != EXTERNAL_CALLBACK) {
                goto next_vertex;
            }

            // If there's any bounds, these are handled outside the NFA and
            // probably shouldn't be pre-empted.
            if (ir.hasBounds()) {
                goto next_vertex;
            }
        }

        verts.insert(v);
    next_vertex:
        continue;
    }
}
Пример #2
0
static
void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
                      map<s32, CharReach> &look) {
    ue2::flat_set<NFAVertex> curr, next;

    for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
        if (contains(g[v].reports, report)) {
            curr.insert(v);
        }
    }

    for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
        if (curr.empty() || contains(curr, g.start) ||
            contains(curr, g.startDs)) {
            break;
        }

        next.clear();
        CharReach cr;

        for (auto v : curr) {
            assert(!is_special(v, g));
            cr |= g[v].char_reach;
            insert(&next, inv_adjacent_vertices(v, g));
        }

        assert(cr.any());
        look[0 - i] |= cr;
        curr.swap(next);
    }
}
Пример #3
0
static
void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) {
    for (auto u : inv_adjacent_vertices_range(v, g)) {
        if (!is_special(u, g)) {
            pred.set(g[u].index);
        }
    }
}
Пример #4
0
static
bool hasPredInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
    for (auto u : inv_adjacent_vertices_range(v, g)) {
        if (contains(s, u)) {
            return true;
        }
    }
    return false;
}
Пример #5
0
static
bool inIsIrreducible(NFAVertex &v, const NGHolder &g) {
    unsigned nonSpecialVertices = 0;
    for (auto u : inv_adjacent_vertices_range(v, g)) {
        if (!is_special(u, g) && u != v) {
            nonSpecialVertices++;
        }
    }
    return nonSpecialVertices == 1;
}
Пример #6
0
static
bool getTransientPrefixReach(const NGHolder &g, u32 lag,
                             map<s32, CharReach> &look) {
    if (in_degree(g.accept, g) != 1) {
        DEBUG_PRINTF("more than one accept\n");
        return false;
    }

    // Must be a floating chain wired to startDs.
    if (!hasSingleFloatingStart(g)) {
        DEBUG_PRINTF("not a single floating start\n");
        return false;
    }

    NFAVertex v = *(inv_adjacent_vertices(g.accept, g).first);
    u32 i = lag + 1;
    while (v != g.startDs) {
        DEBUG_PRINTF("i=%u, v=%u\n", i, g[v].index);
        if (is_special(v, g)) {
            DEBUG_PRINTF("special\n");
            return false;
        }

        look[0 - i] = g[v].char_reach;

        NFAVertex next = NGHolder::null_vertex();
        for (auto u : inv_adjacent_vertices_range(v, g)) {
            if (u == g.start) {
                continue; // Benign, checked by hasSingleFloatingStart
            }
            if (next == NGHolder::null_vertex()) {
                next = u;
                continue;
            }
            DEBUG_PRINTF("branch\n");
            return false;
        }

        if (next == NGHolder::null_vertex() || next == v) {
            DEBUG_PRINTF("no predecessor or only self-loop\n");
            // This graph is malformed -- all vertices in a graph that makes it
            // to this analysis should have predecessors.
            assert(0);
            return false;
        }

        v = next;
        i++;
    }

    DEBUG_PRINTF("done\n");
    return true;
}
Пример #7
0
static
bool requiresDedupe(const NGHolder &h, const flat_set<ReportID> &reports,
                    const Grey &grey) {
    /* TODO: tighten */
    NFAVertex seen_vert = NGHolder::null_vertex();

    for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
        if (has_intersection(h[v].reports, reports)) {
            if (seen_vert != NGHolder::null_vertex()) {
                return true;
            }
            seen_vert = v;
        }
    }

    for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
        if (has_intersection(h[v].reports, reports)) {
            if (seen_vert != NGHolder::null_vertex()) {
                return true;
            }
            seen_vert = v;
        }
    }

    if (seen_vert) {
        /* if the reporting vertex is part of of a terminal repeat, the
         * construction process may reform the graph splitting it into two
         * vertices (pos, cyclic) and hence require dedupe */
        vector<GraphRepeatInfo> repeats;
        findRepeats(h, grey.minExtBoundedRepeatSize, &repeats);
        for (const auto &repeat : repeats) {
            if (find(repeat.vertices.begin(), repeat.vertices.end(),
                     seen_vert) != repeat.vertices.end()) {
                return true;
            }
        }
    }

    return false;
}
Пример #8
0
/** \brief Find the (min, max) length of any match for the given holder. */
static
DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
    DepthMinMax match_depths;

    vector<DepthMinMax> depths = getDistancesFromSOM(g);

    pair<s32, s32> adj;

    for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
        u32 idx = g[v].index;
        DepthMinMax d = depths[idx]; // copy
        adj = getMinMaxOffsetAdjust(rm, g, v);
        DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
                     d.str().c_str(), adj.first, adj.second);
        d.min += adj.first;
        d.max += adj.second;
        match_depths = unionDepthMinMax(match_depths, d);
    }

    for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
        if (v == g.accept) {
            continue;
        }
        u32 idx = g[v].index;
        DepthMinMax d = depths[idx]; // copy
        adj = getMinMaxOffsetAdjust(rm, g, v);
        DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
                     d.str().c_str(), adj.first, adj.second);
        d.min += adj.first;
        d.max += adj.second;
        match_depths = unionDepthMinMax(match_depths, d);
    }

    DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str());

    assert(match_depths.min.is_reachable());
    assert(match_depths.max.is_reachable());
    return match_depths;
}
Пример #9
0
/** \brief Replace the graph's reports with new reports that specify bounds. */
static
void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept,
                        set<NFAVertex> &done) {
    for (auto v : inv_adjacent_vertices_range(accept, g)) {
        // Don't operate on g.accept itself.
        if (v == g.accept) {
            assert(accept == g.acceptEod);
            continue;
        }

        // Don't operate on a vertex we've already done.
        if (contains(done, v)) {
            continue;
        }
        done.insert(v);

        flat_set<ReportID> new_reports;
        auto &reports = g[v].reports;

        for (auto id : reports) {
            Report ir = rm.getReport(id); // make a copy
            assert(!ir.hasBounds());

            // Note that we need to cope with offset adjustment here.

            ir.minOffset = g.min_offset - ir.offsetAdjust;
            if (g.max_offset == MAX_OFFSET) {
                ir.maxOffset = MAX_OFFSET;
            } else {
                ir.maxOffset = g.max_offset - ir.offsetAdjust;
            }
            assert(ir.maxOffset >= ir.minOffset);

            ir.minLength = g.min_length;
            if (g.min_length && !g.som) {
                ir.quashSom = true;
            }

            DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, "
                         "min_length=%llu\n",
                         id, ir.minOffset, ir.maxOffset, ir.minLength);
            new_reports.insert(rm.getInternalId(ir));
        }

        DEBUG_PRINTF("swapping reports on vertex %u\n",
                     g[v].index);
        reports.swap(new_reports);
    }
}
Пример #10
0
static
void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
                          const PostDomTree &pdom_tree, const NFAStateSet &init,
                          map<NFAVertex, NFAStateSet> *squash, som_type som,
                          const vector<DepthMinMax> &som_depths,
                          const ue2::unordered_map<NFAVertex, u32> &region_map,
                          smgb_cache &cache) {
    deque<NFAVertex> remaining;
    for (const auto &m : *squash) {
        remaining.push_back(m.first);
    }

    while (!remaining.empty()) {
        NFAVertex v = remaining.back();
        remaining.pop_back();

        for (auto u : inv_adjacent_vertices_range(v, g)) {
            if (is_special(u, g)) {
                continue;
            }

            if (g[v].char_reach != g[u].char_reach) {
                continue;
            }

            if (out_degree(u, g) != 1) {
                continue;
            }

            NFAStateSet u_squash(init.size());
            u32 u_index = g[u].index;

            buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex,
                            pdom_tree, som, som_depths, region_map, cache);

            u_squash.set(u_index); /* never clear ourselves */

            if ((~u_squash).any()) { // i.e. some bits unset in mask
                DEBUG_PRINTF("%u is an upstream squasher of %u\n", u_index,
                             g[v].index);
                (*squash)[u] = u_squash;
                remaining.push_back(u);
            }
        }
    }
}
Пример #11
0
/** Remove any edges from vertices that generate accepts (for Highlander
 * graphs). */
void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
    // Safety check: all reports must be simple exhaustible reports, or this is
    // not safe. This optimisation should be called early enough that no
    // internal reports have been added.
    for (auto report_id : all_reports(g)) {
        const Report &ir = rm.getReport(report_id);

        if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
            !isExternalReport(ir)) {
            DEBUG_PRINTF("report %u is not external highlander with "
                         "no bounds\n", report_id);
            return;
        }
    }

    vector<NFAEdge> dead;
    for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
        if (is_special(u, g)) {
            continue;
        }

        // We can prune any out-edges that aren't accepts
        for (const auto &e : out_edges_range(u, g)) {
            if (!is_any_accept(target(e, g), g)) {
                dead.push_back(e);
            }
        }
    }

    if (dead.empty()) {
        return;
    }

    DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
    remove_edges(dead, g);
    pruneUseless(g);
}
Пример #12
0
static
void contractVertex(NGHolder &g, NFAVertex v,
                    ue2::unordered_set<pair<NFAVertex, NFAVertex>> &all_edges) {
    for (auto u : inv_adjacent_vertices_range(v, g)) {
        if (u == v) {
            continue; // self-edge
        }
        for (auto w : adjacent_vertices_range(v, g)) {
            if (w == v) {
                continue; // self-edge
            }

            // Construct edge (u, v) only if it doesn't already exist. We use
            // the all_edges container here, as checking existence inside the
            // graph is expensive when u or v have large degree.
            if (all_edges.emplace(u, w).second) {
                add_edge(u, w, g);
            }
        }
    }

    // Note that edges to/from v will remain in all_edges.
    clear_vertex(v, g);
}
Пример #13
0
static
void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) {
    set<NFAVertex> bad; /* from zero-width asserts near accepts, etc */
    for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
        const CharReach &cr = h[v].char_reach;
        if (!isutf8ascii(cr) && !isutf8start(cr)) {
            bad.insert(v);
        }
    }

    for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
        const CharReach &cr = h[v].char_reach;
        if (!isutf8ascii(cr) && !isutf8start(cr)) {
            bad.insert(v);
        }
    }

    // we want to be careful with asserts connected to starts
    // as well as they may not finish a code point
    for (auto v : vertices_range(h)) {
        if (is_virtual_start(v, h)) {
            bad.insert(v);
            insert(&bad, adjacent_vertices(v, h));
        }
    }

    /* we cannot handle vertices connected to accept as would report matches in
     * the middle of codepoints. acceptEod is not a problem as the input must
     * end at a codepoint boundary */
    bad.insert(h.accept);

    // If we're in SOM mode, we don't want to mess with vertices that have a
    // direct edge from startDs.
    if (som) {
        insert(&bad, adjacent_vertices(h.startDs, h));
    }

    set<NFAVertex> already_seeds; /* already marked as seeds */
    for (auto v : vertices_range(h)) {
        const CharReach &cr = h[v].char_reach;

        if (!isutf8ascii(cr) || !hasSelfLoop(v, h)) {
            continue;
        }

        if (hasSuccInSet(h, v, bad)) {
            continue;
        }

        // Skip vertices that are directly connected to other vertices already
        // in the seeds list: we can't collapse two of these directly next to
        // each other.
        if (hasPredInSet(h, v, already_seeds) ||
            hasSuccInSet(h, v, already_seeds)) {
            continue;
        }

        DEBUG_PRINTF("%zu is a seed\n", h[v].index);
        seeds->push_back(v);
        already_seeds.insert(v);
    }
}
Пример #14
0
bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
                       const ue2::unordered_map<NFAVertex, u32> &region_map,
                       smgb_cache &cache) {
    /* Need to ensure all matches of the graph g up to u contain no infixes
     * which are also matches of the graph to u.
     *
     * This is basically the same as firstMatchIsFirst except we g is not
     * always a dag. As we haven't gotten around to writing an execute_graph
     * that operates on general graphs, we take some (hopefully) conservative
     * short cuts.
     *
     * Note: if the u can be jumped we will take jump edges
     * into account as a possibility of som going backwards
     *
     * TODO: write a generalised ng_execute_graph/make this less hacky
     */
    assert(&g == &cache.g);
    if (contains(cache.smgb, u)) {
        return cache.smgb[u];
    }

    DEBUG_PRINTF("checking if som can go backwards on %u\n",
                  g[u].index);

    set<NFAEdge> be;
    BackEdges<set<NFAEdge>> backEdgeVisitor(be);
    depth_first_search(
        g.g, visitor(backEdgeVisitor)
                 .root_vertex(g.start)
                 .vertex_index_map(get(&NFAGraphVertexProps::index, g.g)));

    bool rv;
    if (0) {
    exit:
        DEBUG_PRINTF("using cached result\n");
        cache.smgb[u] = rv;
        return rv;
    }

    assert(contains(region_map, u));
    const u32 u_region = region_map.at(u);

    for (const auto &e : be) {
        NFAVertex s = source(e, g);
        NFAVertex t = target(e, g);
        /* only need to worry about big cycles including/before u */
        DEBUG_PRINTF("back edge %u %u\n", g[s].index,
                      g[t].index);
        if (s != t && region_map.at(s) <= u_region) {
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
    NGHolder c_g;
    cloneHolder(c_g, g, &orig_to_copy);

    for (NFAVertex v : vertices_range(g)) {
        if (!is_virtual_start(v, g)) {
            continue;
        }
        NFAVertex c_v = orig_to_copy[v];
        orig_to_copy[v] = c_g.startDs;
        for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
            add_edge_if_not_present(c_g.startDs, c_w, c_g);
        }
        clear_vertex(c_v, c_g);
    }

    NFAVertex c_u = orig_to_copy[u];
    clear_in_edges(c_g.acceptEod, c_g);
    add_edge(c_g.accept, c_g.acceptEod, c_g);
    clear_in_edges(c_g.accept, c_g);
    clear_out_edges(c_u, c_g);
    if (hasSelfLoop(u, g)) {
        add_edge(c_u, c_u, c_g);
    }
    add_edge(c_u, c_g.accept, c_g);

    set<NFAVertex> u_succ;
    insert(&u_succ, adjacent_vertices(u, g));
    u_succ.erase(u);

    for (auto t : inv_adjacent_vertices_range(u, g)) {
        if (t == u) {
            continue;
        }
        for (auto v : adjacent_vertices_range(t, g)) {
            if (contains(u_succ, v)) {
                add_edge(orig_to_copy[t], c_g.accept, c_g);
                break;
            }
        }
    }

    pruneUseless(c_g);

    be.clear();
    depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start).
                       vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g)));

    for (const auto &e : be) {
        NFAVertex s = source(e, c_g);
        NFAVertex t = target(e, c_g);
        DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index);
        if (s != t) {
            assert(0);
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    DEBUG_PRINTF("checking acyclic+selfloop graph\n");

    rv = !firstMatchIsFirst(c_g);
    DEBUG_PRINTF("som may regress? %d\n", (int)rv);
    goto exit;
}
Пример #15
0
/** If the pattern has a min_length and is of "ratchet" form with one unbounded
 * repeat, that repeat can become a bounded repeat.
 *
 *     /foo.*bar/{min_length=100} --> /foo.{94,}bar/
 */
static
bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
    assert(g.min_length);

    if (g.min_length > MAX_MINLENGTH_TO_CONVERT) {
        return false;
    }

    // If the pattern has virtual starts, we probably don't want to touch it.
    if (hasVirtualStarts(g)) {
        DEBUG_PRINTF("virtual starts, bailing\n");
        return false;
    }

    // The graph must contain a single cyclic vertex (other than startDs), and
    // that vertex can have one pred and one successor.
    NFAVertex cyclic = findSingleCyclic(g);
    if (cyclic == NGHolder::null_vertex()) {
        return false;
    }

    NGHolder::adjacency_iterator ai, ae;
    tie(ai, ae) = adjacent_vertices(g.start, g);
    if (*ai == g.startDs) {
        ++ai;
    }
    NFAVertex v = *ai;
    if (++ai != ae) {
        DEBUG_PRINTF("more than one initial vertex\n");
        return false;
    }

    u32 width = 0;


    // Walk from the start vertex to the cyclic state and ensure we have a
    // chain of vertices.
    while (v != cyclic) {
        DEBUG_PRINTF("vertex %u\n", g[v].index);
        width++;
        tie(ai, ae) = adjacent_vertices(v, g);
        set<NFAVertex> succ(ai, ae);
        if (contains(succ, cyclic)) {
            if (succ.size() == 1) {
                v = cyclic;
            } else if (succ.size() == 2) {
                // Cyclic and jump edge.
                succ.erase(cyclic);
                NFAVertex v2 = *succ.begin();
                if (!edge(cyclic, v2, g).second) {
                    DEBUG_PRINTF("bad form\n");
                    return false;
                }
                v = cyclic;
            } else {
                DEBUG_PRINTF("bad form\n");
                return false;
            }
        } else {
            if (succ.size() != 1) {
                DEBUG_PRINTF("bad form\n");
                return false;
            }
            v = *succ.begin();
        }
    }

    // Check the cyclic state is A-OK.
    v = getSoleDestVertex(g, cyclic);
    if (v == NGHolder::null_vertex()) {
        DEBUG_PRINTF("cyclic has more than one successor\n");
        return false;
    }

    // Walk from the cyclic state to an accept and ensure we have a chain of
    // vertices.
    while (!is_any_accept(v, g)) {
        DEBUG_PRINTF("vertex %u\n", g[v].index);
        width++;
        tie(ai, ae) = adjacent_vertices(v, g);
        set<NFAVertex> succ(ai, ae);
        if (succ.size() != 1) {
            DEBUG_PRINTF("bad form\n");
            return false;
        }
        v = *succ.begin();
    }

    int offsetAdjust = 0;
    if (!hasOffsetAdjust(rm, g, &offsetAdjust)) {
        return false;
    }
    DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust);
    width += offsetAdjust;

    DEBUG_PRINTF("width=%u, vertex %u is cyclic\n", width,
                  g[cyclic].index);

    if (width >= g.min_length) {
        DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
                      g.min_length, width);
        g.min_length = 0;
        return true;
    }

    vector<NFAVertex> preds;
    vector<NFAEdge> dead;
    for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
        DEBUG_PRINTF("pred %u\n", g[u].index);
        if (u == cyclic) {
            continue;
        }
        preds.push_back(u);

        // We want to delete the out-edges of each predecessor, but need to
        // make sure we don't delete the startDs self loop.
        for (const auto &e : out_edges_range(u, g)) {
            if (target(e, g) != g.startDs) {
                dead.push_back(e);
            }
        }
    }

    remove_edges(dead, g);

    assert(!preds.empty());

    const CharReach &cr = g[cyclic].char_reach;

    for (u32 i = 0; i < g.min_length - width - 1; ++i) {
        v = add_vertex(g);
        g[v].char_reach = cr;

        for (auto u : preds) {
            add_edge(u, v, g);
        }
        preds.clear();
        preds.push_back(v);
    }
    assert(!preds.empty());
    for (auto u : preds) {
        add_edge(u, cyclic, g);
    }

    g.renumberVertices();
    g.renumberEdges();
    clearReports(g);

    g.min_length = 0;
    return true;
}
Пример #16
0
u32 commonPrefixLength(const NGHolder &ga,
                       const ue2::unordered_map<NFAVertex, u32> &a_state_ids,
                       const NGHolder &gb,
                       const ue2::unordered_map<NFAVertex, u32> &b_state_ids) {
    vector<NFAVertex> a = getSortedVA(ga, a_state_ids);
    vector<NFAVertex> b = getSortedVA(gb, b_state_ids);

    /* upper bound on the common region based on local properties */
    u32 max = cplCommonReachAndSimple(ga, a, gb, b);
    DEBUG_PRINTF("cpl upper bound %u\n", max);

    while (max > 0) {
        bool ok = true;

        /* shrink max region based on in-edges from outside the region */
        for (size_t j = max; j > 0; j--) {
            for (auto u : inv_adjacent_vertices_range(a[j - 1], ga)) {
                u32 state_id = a_state_ids.at(u);
                if (state_id != NO_STATE && state_id >= max) {
                    max = j - 1;
                    DEBUG_PRINTF("lowering max to %u\n", max);
                    goto next_vertex;
                }
            }

            for (auto u : inv_adjacent_vertices_range(b[j - 1], gb)) {
                u32 state_id = b_state_ids.at(u);
                if (state_id != NO_STATE && state_id >= max) {
                    max = j - 1;
                    DEBUG_PRINTF("lowering max to %u\n", max);
                    goto next_vertex;
                }
            }

        next_vertex:;
        }

        /* Ensure that every pair of vertices has same out-edges to vertices in
           the region. */
        for (size_t i = 0; ok && i < max; i++) {
            size_t a_count = 0;
            size_t b_count = 0;

            NFAGraph::out_edge_iterator ei, ee;
            for (tie(ei, ee) = out_edges(a[i], ga); ok && ei != ee; ++ei) {
                u32 sid = a_state_ids.at(target(*ei, ga));
                if (sid == NO_STATE || sid >= max) {
                    continue;
                }

                a_count++;

                NFAEdge b_edge;
                bool has_b_edge;
                tie(b_edge, has_b_edge) = edge(b[i], b[sid], gb);

                if (!has_b_edge) {
                    max = i;
                    ok = false;
                    DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n",
                                 max, i, sid);
                    break;
                }

                if (ga[*ei].top != gb[b_edge].top) {
                    max = i;
                    ok = false;
                    DEBUG_PRINTF("tops don't match on edge %zu->%u\n",
                                 i, sid);
                }
            }

            NFAGraph::adjacency_iterator ai, ae;
            for (tie(ai, ae) = adjacent_vertices(b[i], gb); ok && ai != ae;
                 ++ai) {
                u32 sid = b_state_ids.at(*ai);
                if (sid == NO_STATE || sid >= max) {
                    continue;
                }

                b_count++;
            }

            if (a_count != b_count) {
                max = i;
                DEBUG_PRINTF("lowering max to %u due to a,b count "
                             "(a_count=%zu, b_count=%zu)\n", max, a_count,
                             b_count);
                ok = false;
            }
        }

        if (ok) {
            DEBUG_PRINTF("survived checks, returning cpl %u\n", max);
            return max;
        }
    }

    DEBUG_PRINTF("failed to find any common region\n");
    return 0;
}
Пример #17
0
static
bool expandCyclic(NGHolder &h, NFAVertex v) {
    DEBUG_PRINTF("inspecting %zu\n", h[v].index);
    bool changes = false;

    auto v_preds = preds(v, h);
    auto v_succs = succs(v, h);

    set<NFAVertex> start_siblings;
    set<NFAVertex> end_siblings;

    CharReach &v_cr = h[v].char_reach;

    /* We need to find start vertices which have all of our preds.
     * As we have a self loop, it must be one of our succs. */
    for (auto a : adjacent_vertices_range(v, h)) {
        auto a_preds = preds(a, h);

        if (a_preds == v_preds && isutf8start(h[a].char_reach)) {
            DEBUG_PRINTF("%zu is a start v\n", h[a].index);
            start_siblings.insert(a);
        }
    }

    /* We also need to find full cont vertices which have all our own succs;
     * As we have a self loop, it must be one of our preds. */
    for (auto a : inv_adjacent_vertices_range(v, h)) {
        auto a_succs = succs(a, h);

        if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) {
            DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index);
            end_siblings.insert(a);
        }
    }

    for (auto s : start_siblings) {
        if (out_degree(s, h) != 1) {
            continue;
        }

        const CharReach &cr = h[s].char_reach;
        if (cr.isSubsetOf(UTF_TWO_START_CR)) {
            if (end_siblings.find(*adjacent_vertices(s, h).first)
                == end_siblings.end()) {
                DEBUG_PRINTF("%zu is odd\n", h[s].index);
                continue;
            }
        } else if (cr.isSubsetOf(UTF_THREE_START_CR)) {
            NFAVertex m = *adjacent_vertices(s, h).first;

            if (h[m].char_reach != UTF_CONT_CR
                || out_degree(m, h) != 1) {
                continue;
            }
            if (end_siblings.find(*adjacent_vertices(m, h).first)
                == end_siblings.end()) {
                DEBUG_PRINTF("%zu is odd\n", h[s].index);
                continue;
            }
        } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) {
            NFAVertex m1 = *adjacent_vertices(s, h).first;

            if (h[m1].char_reach != UTF_CONT_CR
                || out_degree(m1, h) != 1) {
                continue;
            }

            NFAVertex m2 = *adjacent_vertices(m1, h).first;

            if (h[m2].char_reach != UTF_CONT_CR
                || out_degree(m2, h) != 1) {
                continue;
            }

            if (end_siblings.find(*adjacent_vertices(m2, h).first)
                == end_siblings.end()) {
                DEBUG_PRINTF("%zu is odd\n", h[s].index);
                continue;
            }
        } else {
            DEBUG_PRINTF("%zu is bad\n", h[s].index);
          continue;
        }

        v_cr |= cr;
        clear_vertex(s, h);
        changes = true;
    }

    if (changes) {
        v_cr |= UTF_CONT_CR; /* we need to add in cont reach */
        v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require
                         * valid unicode data */
        v_cr.set(0xc1);
        v_cr |= CharReach(0xf5, 0xff);
    }

    return changes;
}
Пример #18
0
bool RoseDedupeAuxImpl::requiresDedupeSupport(
    const flat_set<ReportID> &reports_in) const {
    /* TODO: this could be expanded to check for offset or character
       constraints */

    // We don't want to consider dead reports (tracked by ReportManager but no
    // longer used) for the purposes of assigning dupe keys.
    flat_set<ReportID> reports;
    for (auto id : reports_in) {
        if (contains(live_reports, id)) {
            reports.insert(id);
        }
    }

    DEBUG_PRINTF("live reports: %s\n", as_string_list(reports).c_str());

    const RoseGraph &g = build.g;

    bool has_suffix = false;
    bool has_outfix = false;

    if (!hasSafeMultiReports(reports)) {
        DEBUG_PRINTF("multiple reports not safe\n");
        return true;
    }

    set<RoseVertex> roles;
    set<suffix_id> suffixes;
    set<const OutfixInfo *> outfixes;
    set<const raw_puff *> puffettes;
    for (ReportID r : reports) {
        if (contains(vert_map, r)) {
            insert(&roles, vert_map.at(r));
        }
        if (contains(suffix_map, r)) {
            insert(&suffixes, suffix_map.at(r));
        }

        if (contains(outfix_map, r)) {
            insert(&outfixes, outfix_map.at(r));
        }

        if (contains(puff_map, r)) {
            insert(&puffettes, puff_map.at(r));
        }
    }

    /* roles */

    map<u32, u32> lits; // Literal ID -> count of occurrences.

    const bool has_role = !roles.empty();
    for (auto v : roles) {
        for (const auto &lit : g[v].literals) {
            lits[lit]++;
        }
        if (g[v].eod_accept) {
            // Literals plugged into this EOD accept must be taken into account
            // as well.
            for (auto u : inv_adjacent_vertices_range(v, g)) {
                for (const auto &lit : g[u].literals) {
                    lits[lit]++;
                }
            }
        }
    }

    /* literals */

    for (const auto &m : lits) {
        if (m.second > 1) {
            DEBUG_PRINTF("lit %u used by >1 reporting roles\n", m.first);
            return true;
        }
    }

    for (auto it = begin(lits); it != end(lits); ++it) {
        const auto &lit1 = build.literals.at(it->first);
        for (auto jt = next(it); jt != end(lits); ++jt) {
            const auto &lit2 = build.literals.at(jt->first);
            if (literalsCouldRace(lit1, lit2)) {
                DEBUG_PRINTF("literals could race\n");
                return true;
            }
        }
    }

    /* suffixes */

    for (const auto &suffix : suffixes) {
        if (has_suffix || has_role) {
            return true; /* scope for badness */
        }

        has_suffix = true;

        /* some lesser suffix engines (nfas, haig, castle) can raise multiple
         * matches for a report id at the same offset if there are multiple
         * report states live. */
        if (suffix.haig()) {
            return true;
        }
        if (suffix.graph() &&
            requiresDedupe(*suffix.graph(), reports, build.cc.grey)) {
            return true;
        }
        if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) {
            return true;
        }
    }

    /* outfixes */

    for (const auto &outfix_ptr : outfixes) {
        assert(outfix_ptr);
        const OutfixInfo &out = *outfix_ptr;

        if (has_outfix || has_role || has_suffix) {
            return true;
        }
        has_outfix = true;

        if (out.haig()) {
            return true; /* haig may report matches with different SOM at the
                            same offset */
        }

        if (out.holder() &&
            requiresDedupe(*out.holder(), reports, build.cc.grey)) {
            return true;
        }
    }

    /* mpv */
    for (UNUSED const auto &puff : puffettes) {
        if (has_outfix || has_role || has_suffix) {
            return true;
        }
        has_outfix = true;
    }

    /* boundary */
    if (has_intersection(build.boundary.report_at_eod, reports)) {
        if (has_outfix || has_role || has_suffix) {
            return true;
        }
    }

    return false;
}
Пример #19
0
void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
    vector<NFAVertex> reporters;
    for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
        for (const auto &report_id : g[v].reports) {
            const Report &r = rm.getReport(report_id);
            if (isSimpleExhaustible(r)) {
                reporters.push_back(v);
                break;
            }
        }
    }
    for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
        for (const auto &report_id : g[v].reports) {
            const Report &r = rm.getReport(report_id);
            if (isSimpleExhaustible(r)) {
                reporters.push_back(v);
                break;
            }
        }
    }

    if (reporters.empty()) {
        return;
    }


    sort(begin(reporters), end(reporters), make_index_ordering(g));
    reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));

    DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
                 reporters.size());

    const auto &dom = findDominators(g);
    bool modified = false;

    // If a reporter vertex is dominated by another with the same report, we
    // can remove that report; if all reports are removed, we can remove the
    // vertex entirely.
    for (const auto v : reporters) {
        const auto reports = g[v].reports; // copy, as we're going to mutate
        for (const auto &report_id : reports) {
            if (!isSimpleExhaustible(rm.getReport(report_id))) {
                continue;
            }
            if (isDominatedByReporter(g, dom, v, report_id)) {
                DEBUG_PRINTF("removed dominated report %u from vertex %u\n",
                             report_id, g[v].index);
                g[v].reports.erase(report_id);
            }
        }

        if (g[v].reports.empty()) {
            DEBUG_PRINTF("removed edges to accepts from %u, no reports left\n",
                          g[v].index);
            remove_edge(v, g.accept, g);
            remove_edge(v, g.acceptEod, g);
            modified = true;
        }
    }

    // If a reporter vertex has a self-loop, but otherwise only leads to accept
    // (note: NOT acceptEod) and has simple exhaustible reports, we can delete
    // the self-loop.
    for (const auto v : reporters) {
        if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
            remove_edge(v, v, g);
            modified = true;
            DEBUG_PRINTF("removed self-loop on %u\n", g[v].index);
        }
    }

    if (!modified) {
        return;
    }

    pruneUseless(g);

    // We may have only removed self-loops, in which case pruneUseless wouldn't
    // renumber, so we do edge renumbering explicitly here.
    g.renumberEdges();
}