Esempio n. 1
0
static never_inline
bool cplVerticesMatch(const NGHolder &ga, NFAVertex va,
                      const NGHolder &gb, NFAVertex vb) {
    // Must have the same reachability.
    if (ga[va].char_reach != gb[vb].char_reach) {
        return false;
    }

    // If they're start vertices, they must be the same one.
    if (is_any_start(va, ga) || is_any_start(vb, gb)) {
        if (ga[va].index != gb[vb].index) {
            return false;
        }
    }

    bool va_accept = edge(va, ga.accept, ga).second;
    bool vb_accept = edge(vb, gb.accept, gb).second;
    bool va_acceptEod = edge(va, ga.acceptEod, ga).second;
    bool vb_acceptEod = edge(vb, gb.acceptEod, gb).second;

    // Must have the same accept/acceptEod edges.
    if (va_accept != vb_accept || va_acceptEod != vb_acceptEod) {
        return false;
    }

    return true;
}
Esempio n. 2
0
/** Remove vacuous edges in graphs where the min_offset or min_length
 * constraints dictate that they can never produce a match. */
static
void pruneVacuousEdges(NGWrapper &g) {
    if (!g.min_length && !g.min_offset) {
        return;
    }

    vector<NFAEdge> dead;

    for (const auto &e : edges_range(g)) {
        const NFAVertex u = source(e, g);
        const NFAVertex v = target(e, g);

        // Special case: Crudely remove vacuous edges from start in graphs with a
        // min_offset.
        if (g.min_offset && u == g.start && is_any_accept(v, g)) {
            DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
            dead.push_back(e);
            continue;
        }

        // If a min_length is set, vacuous edges can be removed.
        if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
            DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
            dead.push_back(e);
            continue;
        }
    }

    if (dead.empty()) {
        return;
    }

    remove_edges(dead, g);
    pruneUseless(g);
}
static
bool isExclusive(const NGHolder &h,
                 const u32 num, unordered_set<u32> &tailId,
                 map<u32, unordered_set<u32>> &skipList,
                 const RoleInfo<role_id> &role1,
                 const RoleInfo<role_id> &role2) {
    const u32 id1 = role1.id;
    const u32 id2 = role2.id;

    if (contains(skipList, id1) && contains(skipList[id1], id2)) {
        return false;
    }

    const auto &triggers1 = role1.literals;
    const auto &triggers2 = role2.literals;
    if (isSuffix(triggers1, triggers2)) {
        skipList[id2].insert(id1);
        return false;
    }

    DEBUG_PRINTF("role id2:%u\n", id2);
    const auto &cr1 = role1.cr;
    if (overlaps(cr1, role2.last_cr)) {
        CharReach cr = cr1 | role1.prefix_cr;
        flat_set<NFAVertex> states;
        for (const auto &lit : triggers2) {
            auto lit1 = findStartPos(cr, lit);
            if (lit1.empty()) {
                continue;
            }

            states.clear();

            if (lit1.size() < lit.size()) {
                // Only starts.
                states.insert(h.start);
                states.insert(h.startDs);
            } else {
                // All vertices.
                insert(&states, vertices(h));
            }

            auto activeStates = execute_graph(h, lit1, states);
            // Check if only literal states are on
            for (const auto &s : activeStates) {
                if ((!is_any_start(s, h) && h[s].index <= num) ||
                    contains(tailId, h[s].index)) {
                    skipList[id2].insert(id1);
                    return false;
                }
            }
        }
    }

    return true;
}
Esempio n. 4
0
/** One final, FINAL optimisation. Drop either start or startDs if it's unused
 * in this graph. We leave this until this late because having both vertices in
 * the graph, with fixed state indices, is useful for merging and other
 * analyses. */
void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states) {
    u32 adj = 0;

    if (startIsRedundant(g)) {
        DEBUG_PRINTF("dropping unused start\n");
        states[g.start] = NO_STATE;
        adj++;
    }

    if (proper_out_degree(g.startDs, g) == 0) {
        DEBUG_PRINTF("dropping unused startDs\n");
        states[g.startDs] = NO_STATE;
        adj++;
    }

    if (!adj) {
        DEBUG_PRINTF("both start and startDs must remain\n");
        return;
    }

    // We have removed one or both of the starts. Walk the non-special vertices
    // in the graph with state indices assigned to them and subtract
    // adj from all of them.
    for (auto v : vertices_range(g)) {
        u32 &state = states[v]; // note ref
        if (state == NO_STATE) {
            continue;
        }
        if (is_any_start(v, g)) {
            assert(state <= 1);
            state = 0; // one start remains
        } else {
            assert(!is_special(v, g));
            assert(state >= adj);
            state -= adj;
        }
    }
}
Esempio n. 5
0
map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) {
    map<NFAVertex, NFAStateSet> squash;

    // Number of bits to use for all our masks. If we're a triggered graph,
    // tops have already been assigned, so we don't have to account for them.
    const u32 numStates = num_vertices(g);

    // Build post-dominator tree.
    PostDomTree pdom_tree;
    buildPDomTree(g, pdom_tree);

    // Build list of vertices by state ID and a set of init states.
    vector<NFAVertex> vByIndex(numStates, NFAGraph::null_vertex());
    NFAStateSet initStates(numStates);
    smgb_cache cache(g);

    // Mappings used for SOM mode calculations, otherwise left empty.
    unordered_map<NFAVertex, u32> region_map;
    vector<DepthMinMax> som_depths;
    if (som) {
        region_map = assignRegions(g);
        som_depths = getDistancesFromSOM(g);
    }

    for (auto v : vertices_range(g)) {
        const u32 vert_id = g[v].index;
        DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates);
        assert(vert_id < numStates);
        vByIndex[vert_id] = v;

        if (is_any_start(v, g) || !in_degree(v, g)) {
            initStates.set(vert_id);
        }
    }

    for (u32 i = 0; i < numStates; i++) {
        NFAVertex v = vByIndex[i];
        assert(v != NFAGraph::null_vertex());
        const CharReach &cr = g[v].char_reach;

        /* only non-init cyclics can be squashers */
        if (!hasSelfLoop(v, g) || initStates.test(i)) {
            continue;
        }

        DEBUG_PRINTF("state %u is cyclic\n", i);

        NFAStateSet mask(numStates), succ(numStates), pred(numStates);
        buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
                        som_depths, region_map, cache);
        buildSucc(succ, g, v);
        buildPred(pred, g, v);
        const auto &reports = g[v].reports;

        for (size_t j = succ.find_first(); j != succ.npos;
             j = succ.find_next(j)) {
            NFAVertex vj = vByIndex[j];
            NFAStateSet pred2(numStates);
            buildPred(pred2, g, vj);
            if (pred2 == pred) {
                DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
                NFAStateSet tmp(numStates);
                buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
                                som, som_depths, region_map, cache);
                mask &= tmp;
            }
        }

        for (size_t j = pred.find_first(); j != pred.npos;
             j = pred.find_next(j)) {
            NFAVertex vj = vByIndex[j];
            NFAStateSet succ2(numStates);
            buildSucc(succ2, g, vj);
            /* we can use j as a basis for squashing if its succs are a subset
             * of ours */
            if ((succ2 & ~succ).any()) {
                continue;
            }

            if (som) {
                /* We cannot use j to add to the squash mask of v if it may
                 * have an earlier start of match offset. ie for us j as a
                 * basis for the squash mask of v we require:
                 * maxSomDist(j) <= minSomDist(v)
                 */

                /* ** TODO ** */

                const depth &max_som_dist_j =
                    som_depths[g[vj].index].max;
                const depth &min_som_dist_v =
                    som_depths[g[v].index].min;
                if (max_som_dist_j > min_som_dist_v ||
                    max_som_dist_j.is_infinite()) {
                    /* j can't be used as it may be storing an earlier SOM */
                    continue;
                }
            }

            const CharReach &crv = g[vj].char_reach;

            /* we also require that j's report information be a subset of ours
             */
            bool seen_special = false;
            for (auto w : adjacent_vertices_range(vj, g)) {
                if (is_special(w, g)) {
                    if (!edge(v, w, g).second) {
                        goto next_j;
                    }
                    seen_special = true;
                }
            }

            // FIXME: should be subset check?
            if (seen_special && g[vj].reports != reports) {
                continue;
            }

            /* ok we can use j */
            if ((crv & ~cr).none()) {
                NFAStateSet tmp(numStates);
                buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
                                som, som_depths, region_map, cache);
                mask &= tmp;
                mask.reset(j);
            }

        next_j:;
        }

        mask.set(i); /* never clear ourselves */

        if ((~mask).any()) { // i.e. some bits unset in mask
            DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count());
            squash.emplace(v, mask);
        }
    }

    findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som,
                         som_depths, region_map, cache);

    clearMutualSquashers(g, vByIndex, squash);

    return squash;
}