Esempio n. 1
0
static
vector<NFAVertex> getSortedVA(const NGHolder &g,
            const ue2::unordered_map<NFAVertex, u32> &state_ids) {
    vector<NFAVertex> out;
    out.reserve(num_vertices(g));

    for (auto v : vertices_range(g)) {
        assert(contains(state_ids, v));
        if (state_ids.at(v) == NO_STATE) {
            continue;
        }
        out.push_back(v);
    }

    // Order vertices by their state indices.
    sort(begin(out), end(out), [&state_ids](NFAVertex a, NFAVertex b) {
        return state_ids.at(a) < state_ids.at(b);
    });

#ifndef NDEBUG
    // State indices should match vector indices.
    for (u32 i = 0; i < out.size(); i++) {
        assert(state_ids.at(out.at(i)) == i);
    }
#endif

    return out;
}
Esempio n. 2
0
u32 countStates(const NGHolder &g,
                const ue2::unordered_map<NFAVertex, u32> &state_ids,
                bool addTops) {
    if (state_ids.empty()) {
        return 0;
    }

    u32 max_state = 0;
    for (const auto &m : state_ids) {
        if (m.second != NO_STATE) {
            max_state = max(m.second, max_state);
        }
    }

    u32 num_states = max_state + 1;

    assert(contains(state_ids, g.start));
    if (addTops && state_ids.at(g.start) != NO_STATE) {
        num_states--;
        set<u32> tops;
        for (auto e : out_edges_range(g.start, g)) {
            tops.insert(g[e].top);
        }
        num_states += tops.size();
    }

    return num_states;
}
Esempio n. 3
0
static
bool isDominatedByReporter(const NGHolder &g,
                           const ue2::unordered_map<NFAVertex, NFAVertex> &dom,
                           NFAVertex v, ReportID report_id) {
    for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) {
        NFAVertex u = it->second;
        // Note: reporters with edges only to acceptEod are not considered to
        // dominate.
        if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) {
            DEBUG_PRINTF("%u is dominated by %u, and both report %u\n",
                          g[v].index, g[u].index, report_id);
            return true;
        }
        v = u;
    }
    return false;
}
Esempio n. 4
0
bool sentClearsTail(const NGHolder &g,
                    const ue2::unordered_map<NFAVertex, u32> &region_map,
                    const NGHolder &sent, u32 last_head_region,
                    u32 *bad_region) {
    /* if a subsequent match from the prefix clears the rest of the pattern
     * we can just keep track of the last match of the prefix.
     * To see if this property holds, we could:
     *
     * 1A: turn on all states in the tail and run all strings that may
     *    match the prefix past the tail, if we are still in any states then
     *    this property does not hold.
     *
     * 1B: we turn on the initial states of the tail and run any strings which
     *   may finish any partial matches in the prefix and see if we end up with
     *   anything which would also imply that this property does not hold.
     *
     * OR
     *
     * 2: we just turn everything and run the prefix inputs past it and see what
     * we are left with. I think that is equivalent to scheme 1 and is easier to
     * implement. TODO: ponder
     *
     * Anyway, we are going with scheme 2 until further notice.
     */

    u32 first_bad_region = ~0U;
    set<NFAVertex> states;
    /* turn on all states */
    DEBUG_PRINTF("region %u is cutover\n", last_head_region);
    for (auto v : vertices_range(g)) {
        if (v != g.accept && v != g.acceptEod) {
            states.insert(v);
        }
    }

    for (UNUSED auto v : states) {
        DEBUG_PRINTF("start state: %u\n", g[v].index);
    }

    /* run the prefix the main graph */
    execute_graph(g, sent, &states);

    /* .. and check if we are left with anything in the tail region */
    for (auto v : states) {
        if (v == g.start || v == g.startDs) {
            continue; /* not in tail */
        }

        DEBUG_PRINTF("v %u is still on\n", g[v].index);
        assert(v != g.accept && v != g.acceptEod); /* no cr */

        assert(contains(region_map, v));
        const u32 v_region = region_map.at(v);
        if (v_region > last_head_region) {
            DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region);
            first_bad_region = min(first_bad_region, v_region);
        }
    }

    if (first_bad_region != ~0U) {
        DEBUG_PRINTF("first bad region is %u\n", first_bad_region);
        *bad_region = first_bad_region;
        return false;
    }

    return true;
}
Esempio n. 5
0
bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
                       const ue2::unordered_map<NFAVertex, u32> &region_map,
                       smgb_cache &cache) {
    /* Need to ensure all matches of the graph g up to u contain no infixes
     * which are also matches of the graph to u.
     *
     * This is basically the same as firstMatchIsFirst except we g is not
     * always a dag. As we haven't gotten around to writing an execute_graph
     * that operates on general graphs, we take some (hopefully) conservative
     * short cuts.
     *
     * Note: if the u can be jumped we will take jump edges
     * into account as a possibility of som going backwards
     *
     * TODO: write a generalised ng_execute_graph/make this less hacky
     */
    assert(&g == &cache.g);
    if (contains(cache.smgb, u)) {
        return cache.smgb[u];
    }

    DEBUG_PRINTF("checking if som can go backwards on %u\n",
                  g[u].index);

    set<NFAEdge> be;
    BackEdges<set<NFAEdge>> backEdgeVisitor(be);
    depth_first_search(
        g.g, visitor(backEdgeVisitor)
                 .root_vertex(g.start)
                 .vertex_index_map(get(&NFAGraphVertexProps::index, g.g)));

    bool rv;
    if (0) {
    exit:
        DEBUG_PRINTF("using cached result\n");
        cache.smgb[u] = rv;
        return rv;
    }

    assert(contains(region_map, u));
    const u32 u_region = region_map.at(u);

    for (const auto &e : be) {
        NFAVertex s = source(e, g);
        NFAVertex t = target(e, g);
        /* only need to worry about big cycles including/before u */
        DEBUG_PRINTF("back edge %u %u\n", g[s].index,
                      g[t].index);
        if (s != t && region_map.at(s) <= u_region) {
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
    NGHolder c_g;
    cloneHolder(c_g, g, &orig_to_copy);

    for (NFAVertex v : vertices_range(g)) {
        if (!is_virtual_start(v, g)) {
            continue;
        }
        NFAVertex c_v = orig_to_copy[v];
        orig_to_copy[v] = c_g.startDs;
        for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
            add_edge_if_not_present(c_g.startDs, c_w, c_g);
        }
        clear_vertex(c_v, c_g);
    }

    NFAVertex c_u = orig_to_copy[u];
    clear_in_edges(c_g.acceptEod, c_g);
    add_edge(c_g.accept, c_g.acceptEod, c_g);
    clear_in_edges(c_g.accept, c_g);
    clear_out_edges(c_u, c_g);
    if (hasSelfLoop(u, g)) {
        add_edge(c_u, c_u, c_g);
    }
    add_edge(c_u, c_g.accept, c_g);

    set<NFAVertex> u_succ;
    insert(&u_succ, adjacent_vertices(u, g));
    u_succ.erase(u);

    for (auto t : inv_adjacent_vertices_range(u, g)) {
        if (t == u) {
            continue;
        }
        for (auto v : adjacent_vertices_range(t, g)) {
            if (contains(u_succ, v)) {
                add_edge(orig_to_copy[t], c_g.accept, c_g);
                break;
            }
        }
    }

    pruneUseless(c_g);

    be.clear();
    depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start).
                       vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g)));

    for (const auto &e : be) {
        NFAVertex s = source(e, c_g);
        NFAVertex t = target(e, c_g);
        DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index);
        if (s != t) {
            assert(0);
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    DEBUG_PRINTF("checking acyclic+selfloop graph\n");

    rv = !firstMatchIsFirst(c_g);
    DEBUG_PRINTF("som may regress? %d\n", (int)rv);
    goto exit;
}
Esempio n. 6
0
static never_inline
void mergeNfa(NGHolder &dest, vector<NFAVertex> &destStateMap,
              ue2::unordered_map<NFAVertex, u32> &dest_state_ids,
              NGHolder &vic, vector<NFAVertex> &vicStateMap,
              size_t common_len) {
    map<NFAVertex, NFAVertex> vmap; // vic -> dest

    vmap[vic.start]     = dest.start;
    vmap[vic.startDs]   = dest.startDs;
    vmap[vic.accept]    = dest.accept;
    vmap[vic.acceptEod] = dest.acceptEod;
    vmap[nullptr] = nullptr;

    u32 stateNum = countStates(dest, dest_state_ids);

    // For vertices in the common len, add to vmap and merge in the reports, if
    // any.
    for (u32 i = 0; i < common_len; i++) {
        NFAVertex v_old = vicStateMap[i], v = destStateMap[i];
        vmap[v_old] = v;

        const auto &reports = vic[v_old].reports;
        dest[v].reports.insert(reports.begin(), reports.end());
    }

    // Add in vertices beyond the common len, giving them state numbers
    // starting at stateNum.
    for (u32 i = common_len; i < vicStateMap.size(); i++) {
        NFAVertex v_old = vicStateMap[i];

        if (is_special(v_old, vic)) {
            // Dest already has start vertices, just merge the reports.
            u32 idx = vic[v_old].index;
            NFAVertex v = dest.getSpecialVertex(idx);
            const auto &reports = vic[v_old].reports;
            dest[v].reports.insert(reports.begin(), reports.end());
            continue;
        }

        NFAVertex v = add_vertex(vic[v_old], dest);
        dest_state_ids[v] = stateNum++;
        vmap[v_old] = v;
    }

    /* add edges */
    DEBUG_PRINTF("common_len=%zu\n", common_len);
    for (const auto &e : edges_range(vic)) {
        NFAVertex u_old = source(e, vic), v_old = target(e, vic);
        NFAVertex u = vmap[u_old], v = vmap[v_old];
        bool uspecial = is_special(u, dest);
        bool vspecial = is_special(v, dest);

        // Skip stylised edges that are already present.
        if (uspecial && vspecial && edge(u, v, dest).second) {
            continue;
        }

        // We're in the common region if v's state ID is low enough, unless v
        // is a special (an accept), in which case we use u's state ID.
        assert(contains(dest_state_ids, v));
        bool in_common_region = dest_state_ids.at(v) < common_len;
        if (vspecial && dest_state_ids.at(u) < common_len) {
            in_common_region = true;
        }

        DEBUG_PRINTF("adding idx=%u (state %u) -> idx=%u (state %u)%s\n",
                     dest[u].index, dest_state_ids.at(u),
                     dest[v].index, dest_state_ids.at(v),
                     in_common_region ? " [common]" : "");

        if (in_common_region) {
            if (!is_special(v, dest)) {
                DEBUG_PRINTF("skipping common edge\n");
                assert(edge(u, v, dest).second);
                // Should never merge edges with different top values.
                assert(vic[e].top == dest[edge(u, v, dest).first].top);
                continue;
            } else {
                assert(is_any_accept(v, dest));
                // If the edge exists in both graphs, skip it.
                if (edge(u, v, dest).second) {
                    DEBUG_PRINTF("skipping common edge to accept\n");
                    continue;
                }
            }
        }

        assert(!edge(u, v, dest).second);
        add_edge(u, v, vic[e], dest);
    }

    dest.renumberEdges();
    dest.renumberVertices();
}
Esempio n. 7
0
u32 commonPrefixLength(const NGHolder &ga,
                       const ue2::unordered_map<NFAVertex, u32> &a_state_ids,
                       const NGHolder &gb,
                       const ue2::unordered_map<NFAVertex, u32> &b_state_ids) {
    vector<NFAVertex> a = getSortedVA(ga, a_state_ids);
    vector<NFAVertex> b = getSortedVA(gb, b_state_ids);

    /* upper bound on the common region based on local properties */
    u32 max = cplCommonReachAndSimple(ga, a, gb, b);
    DEBUG_PRINTF("cpl upper bound %u\n", max);

    while (max > 0) {
        bool ok = true;

        /* shrink max region based on in-edges from outside the region */
        for (size_t j = max; j > 0; j--) {
            for (auto u : inv_adjacent_vertices_range(a[j - 1], ga)) {
                u32 state_id = a_state_ids.at(u);
                if (state_id != NO_STATE && state_id >= max) {
                    max = j - 1;
                    DEBUG_PRINTF("lowering max to %u\n", max);
                    goto next_vertex;
                }
            }

            for (auto u : inv_adjacent_vertices_range(b[j - 1], gb)) {
                u32 state_id = b_state_ids.at(u);
                if (state_id != NO_STATE && state_id >= max) {
                    max = j - 1;
                    DEBUG_PRINTF("lowering max to %u\n", max);
                    goto next_vertex;
                }
            }

        next_vertex:;
        }

        /* Ensure that every pair of vertices has same out-edges to vertices in
           the region. */
        for (size_t i = 0; ok && i < max; i++) {
            size_t a_count = 0;
            size_t b_count = 0;

            NFAGraph::out_edge_iterator ei, ee;
            for (tie(ei, ee) = out_edges(a[i], ga); ok && ei != ee; ++ei) {
                u32 sid = a_state_ids.at(target(*ei, ga));
                if (sid == NO_STATE || sid >= max) {
                    continue;
                }

                a_count++;

                NFAEdge b_edge;
                bool has_b_edge;
                tie(b_edge, has_b_edge) = edge(b[i], b[sid], gb);

                if (!has_b_edge) {
                    max = i;
                    ok = false;
                    DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n",
                                 max, i, sid);
                    break;
                }

                if (ga[*ei].top != gb[b_edge].top) {
                    max = i;
                    ok = false;
                    DEBUG_PRINTF("tops don't match on edge %zu->%u\n",
                                 i, sid);
                }
            }

            NFAGraph::adjacency_iterator ai, ae;
            for (tie(ai, ae) = adjacent_vertices(b[i], gb); ok && ai != ae;
                 ++ai) {
                u32 sid = b_state_ids.at(*ai);
                if (sid == NO_STATE || sid >= max) {
                    continue;
                }

                b_count++;
            }

            if (a_count != b_count) {
                max = i;
                DEBUG_PRINTF("lowering max to %u due to a,b count "
                             "(a_count=%zu, b_count=%zu)\n", max, a_count,
                             b_count);
                ok = false;
            }
        }

        if (ok) {
            DEBUG_PRINTF("survived checks, returning cpl %u\n", max);
            return max;
        }
    }

    DEBUG_PRINTF("failed to find any common region\n");
    return 0;
}