示例#1
0
/** Some squash states are clearly not advantageous in the NFA, as they do
 * incur the cost of an exception:
 * -# acyclic states
 * -# squash only a few acyclic states
 */
void filterSquashers(const NGHolder &g,
                     map<NFAVertex, NFAStateSet> &squash) {
    DEBUG_PRINTF("filtering\n");
    map<u32, NFAVertex> rev; /* vertex_index -> vertex */
    for (auto v : vertices_range(g)) {
        rev[g[v].index] = v;
    }

    for (auto v : vertices_range(g)) {
        if (!contains(squash, v)) {
            continue;
        }
        DEBUG_PRINTF("looking at squash set for vertex %u\n",
                     g[v].index);

        if (!hasSelfLoop(v, g)) {
            DEBUG_PRINTF("acyclic\n");
            squash.erase(v);
            continue;
        }

        NFAStateSet squashed = squash[v];
        squashed.flip(); /* default sense for mask of survivors */
        for (NFAStateSet::size_type sq = squashed.find_first();
             sq != squashed.npos; sq = squashed.find_next(sq)) {
            NFAVertex u = rev[sq];
            if (hasSelfLoop(u, g)) {
                DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq);
                goto next_vertex;
            }
        }

        if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) {
            DEBUG_PRINTF("squash set too small\n");
            squash.erase(v);
            continue;
        }

    next_vertex:;
        DEBUG_PRINTF("squash set ok\n");
    }
}
示例#2
0
/**
 * Builds a squash mask based on the pdom tree of v and the given char reach.
 * The built squash mask is a bit conservative for non-dot cases and could
 * be improved with a bit of thought.
 */
static
void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
                     const CharReach &cr, const NFAStateSet &init,
                     const vector<NFAVertex> &vByIndex, const PostDomTree &tree,
                     som_type som, const vector<DepthMinMax> &som_depths,
                     const ue2::unordered_map<NFAVertex, u32> &region_map,
                     smgb_cache &cache) {
    DEBUG_PRINTF("build base squash mask for vertex %u)\n",
                 g[v].index);

    vector<NFAVertex> q;

    PostDomTree::const_iterator it = tree.find(v);
    if (it != tree.end()) {
        q.insert(q.end(), it->second.begin(), it->second.end());
    }

    const u32 v_index = g[v].index;

    while (!q.empty()) {
        NFAVertex u = q.back();
        q.pop_back();
        const CharReach &cru = g[u].char_reach;

        if ((cru & ~cr).any()) {
            /* bail: bad cr on vertex u */
            /* TODO: this could be better
             *
             * we still need to ensure that we record any paths leading to u.
             * Hence all vertices R which can reach u must be excluded from the
             * squash mask. Note: R != pdom(u) and there may exist an x in (R -
             * pdom(u)) which is in pdom(y) where y is in q. Clear ?
             */
            mask.set();
            return;
        }

        const u32 u_index = g[u].index;

        if (som) {
            /* We cannot add a state u to the squash mask of v if it may have an
             * earlier start of match offset. ie for us to add a state u to v
             * maxSomDist(u) <= minSomDist(v)
             */
            const depth &max_som_dist_u = som_depths[u_index].max;
            const depth &min_som_dist_v = som_depths[v_index].min;

            if (max_som_dist_u.is_infinite()) {
                /* it is hard to tell due to the INF if u can actually store an
                 * earlier SOM than w (state we are building the squash mask
                 * for) - need to think more deeply
                 */

                if (mustBeSetBefore(u, v, g, cache)
                    && !somMayGoBackwards(u, g, region_map, cache)) {
                    DEBUG_PRINTF("u %u v %u\n", u_index, v_index);
                    goto squash_ok;
                }
            }

           if (max_som_dist_u > min_som_dist_v) {
                /* u can't be squashed as it may be storing an earlier SOM */
                goto add_children_to_queue;
            }

        }

    squash_ok:
        mask.set(u_index);
        DEBUG_PRINTF("pdom'ed %u\n", u_index);
    add_children_to_queue:
        it = tree.find(u);
        if (it != tree.end()) {
            q.insert(q.end(), it->second.begin(), it->second.end());
        }
    }

    if (cr.all()) {
        /* the init states aren't in the pdom tree. If all their succ states
         * are set (or v), we can consider them post dominated */

        /* Note: init states will always result in a later som */
        for (size_t i = init.find_first(); i != init.npos;
             i = init.find_next(i)) {
            /* Yes vacuous patterns do exist */
            NFAVertex iv = vByIndex[i];
            for (auto w : adjacent_vertices_range(iv, g)) {
                if (w == g.accept || w == g.acceptEod) {
                    DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i);
                    goto next_init_state;
                }

                u32 vert_id = g[w].index;
                if (w != iv && w != v && !mask.test(vert_id)) {
                    DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id);
                    goto next_init_state;
                }
            }
            DEBUG_PRINTF("pdom'ed %zu\n", i);
            mask.set(i);
        next_init_state:;
        }
    }

    mask.flip();
}