コード例 #1
0
static
bool hasSingleFloatingStart(const NGHolder &g) {
    NFAVertex initial = NGHolder::null_vertex();
    for (auto v : adjacent_vertices_range(g.startDs, g)) {
        if (v == g.startDs) {
            continue;
        }
        if (initial != NGHolder::null_vertex()) {
            DEBUG_PRINTF("more than one start\n");
            return false;
        }
        initial = v;
    }

    if (initial == NGHolder::null_vertex()) {
        DEBUG_PRINTF("no floating starts\n");
        return false;
    }

    // Anchored start must have no successors other than startDs and initial.
    for (auto v : adjacent_vertices_range(g.start, g)) {
        if (v != initial && v != g.startDs) {
            DEBUG_PRINTF("anchored start\n");
            return false;
        }
    }

    return true;
}
コード例 #2
0
ファイル: asserts.cpp プロジェクト: 0x4e38/hyperscan
static
void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
    vector<NFAEdge> dead;
    for (auto v : adjacent_vertices_range(g.start, g)) {
        if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
            continue;
        }
        DEBUG_PRINTF("mls %u %08x\n", g[v].index,
                     g[v].assert_flags);

        /* we have found a multi-line start (maybe more than one) */

        /* we need to interpose a dummy dot vertex between v and accept if
         * required so that ^ doesn't match trailing \n */
         for (const auto &e : out_edges_range(v, g)) {
            if (target(e, g) == g.accept) {
                dead.push_back(e);
            }
        }
        /* assert has been resolved; clear flag */
        g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
    }

    for (const auto &e : dead) {
        NFAVertex dummy = add_vertex(g);
        g[dummy].char_reach.setall();
        setReportId(rm, g, dummy, -1);
        add_edge(source(e, g), dummy, g[e], g);
        add_edge(dummy, g.accept, g);
    }

    remove_edges(dead, g);
}
コード例 #3
0
ファイル: ng_squash.cpp プロジェクト: 210230/hyperscan
static
void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) {
    for (auto w : adjacent_vertices_range(v, g)) {
        if (!is_special(w, g)) {
            succ.set(g[w].index);
        }
    }
}
コード例 #4
0
ファイル: ng_extparam.cpp プロジェクト: 01org/hyperscan
static
bool hasVirtualStarts(const NGHolder &g) {
    for (auto v : adjacent_vertices_range(g.start, g)) {
        if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
            return true;
        }
    }
    return false;
}
コード例 #5
0
ファイル: ng_utf8.cpp プロジェクト: tomzhang/hyperscan
static
bool hasSuccInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
    for (auto w : adjacent_vertices_range(v, g)) {
        if (contains(s, w)) {
            return true;
        }
    }
    return false;
}
コード例 #6
0
ファイル: ng_equivalence.cpp プロジェクト: 01org/hyperscan
static
bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
    unsigned nonSpecialVertices = 0;
    for (auto w : adjacent_vertices_range(v, g)) {
        if (!is_special(w, g) && w != v) {
            nonSpecialVertices++;
        }
    }
    return nonSpecialVertices == 1;
}
コード例 #7
0
ファイル: ng_extparam.cpp プロジェクト: 01org/hyperscan
static
bool isUnanchored(const NGHolder &g) {
    for (auto v : adjacent_vertices_range(g.start, g)) {
        if (!edge(g.startDs, v, g).second) {
            DEBUG_PRINTF("fail, %u is anchored vertex\n",
                         g[v].index);
            return false;
        }
    }
    return true;
}
コード例 #8
0
ファイル: ng_execute.cpp プロジェクト: starius/hyperscan
static
void step(const NGHolder &g, const vector<StateInfo> &info,
          const dynamic_bitset<> &in, dynamic_bitset<> *out) {
    out->reset();
    for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
        NFAVertex u = info[i].vertex;
        for (auto v : adjacent_vertices_range(u, g)) {
            out->set(g[v].index);
        }
    }
}
コード例 #9
0
ファイル: ng_is_equal.cpp プロジェクト: 0x4e38/hyperscan
/** \brief loose hash of an NGHolder; equal if is_equal would return true. */
u64a hash_holder(const NGHolder &g) {
    size_t rv = 0;

    for (auto v : vertices_range(g)) {
        boost::hash_combine(rv, g[v].index);
        boost::hash_combine(rv, g[v].char_reach);

        for (auto w : adjacent_vertices_range(v, g)) {
            boost::hash_combine(rv, g[w].index);
        }
    }

    return rv;
}
コード例 #10
0
/* add prefix literals to engine graph */
static
bool addPrefixLiterals(NGHolder &h, unordered_set<u32> &tailId,
                       const vector<vector<CharReach>> &triggers) {
    DEBUG_PRINTF("add literals to graph\n");

    NFAVertex start = h.start;
    vector<NFAVertex> heads;
    vector<NFAVertex> tails;
    for (const auto &lit : triggers) {
        NFAVertex last = start;
        if (lit.empty()) {
            return false;
        }
        u32 i = 0;
        for (const auto &c : lit) {
            DEBUG_PRINTF("lit:%s \n", c.to_string().c_str());
            NFAVertex u = add_vertex(h);
            h[u].char_reach = c;
            if (!i++) {
                heads.push_back(u);
                last = u;
                continue;
            }
            add_edge(last, u, h);
            last = u;
        }
        tails.push_back(last);
        tailId.insert(h[last].index);
    }

    for (auto v : adjacent_vertices_range(start, h)) {
        if (v != h.startDs) {
            for (auto &t : tails) {
                add_edge(t, v, h);
            }
        }
    }

    clear_out_edges(start, h);
    add_edge(h.start, h.start, h);
    for (auto &t : heads) {
        add_edge(start, t, h);
    }

    DEBUG_PRINTF("literals addition done\n");
    return true;
}
コード例 #11
0
ファイル: ng_equivalence.cpp プロジェクト: 01org/hyperscan
// populate VertexInfo table
static
ptr_vector<VertexInfo> getVertexInfos(const NGHolder &g) {
    const size_t num_verts = num_vertices(g);

    ptr_vector<VertexInfo> infos;
    infos.reserve(num_verts * 2);

    vector<VertexInfo *> vertex_map; // indexed by vertex_index property
    vertex_map.resize(num_verts);

    for (auto v : vertices_range(g)) {
        VertexInfo *vi = new VertexInfo(v, g);

        // insert our new shiny VertexInfo into the info map
        infos.push_back(vi);

        vertex_map[g[v].index] = vi;
    }

    // now, go through each vertex and populate its predecessor and successor lists
    for (VertexInfo &cur_vi : infos) {
        // find predecessors
        for (const auto &e : in_edges_range(cur_vi.v, g)) {
            NFAVertex u = source(e, g);
            VertexInfo *vmi = vertex_map[g[u].index];

            cur_vi.pred_cr |= vmi->cr;
            cur_vi.pred.insert(vmi);

            // also set up edge tops
            if (is_triggered(g) && u == g.start) {
                cur_vi.edge_top = g[e].top;
            }
        }

        // find successors
        for (auto w : adjacent_vertices_range(cur_vi.v, g)) {
            VertexInfo *vmi = vertex_map[g[w].index];
            cur_vi.succ_cr |= vmi->cr;
            cur_vi.succ.insert(vmi);
        }
        assert(!hasEdgeAsserts(cur_vi.v, g));
    }

    return infos;
}
コード例 #12
0
ファイル: ng_utf8.cpp プロジェクト: tomzhang/hyperscan
/** \brief Relax forbidden UTF-8 sequences.
 *
 * Some byte sequences can not appear in valid UTF-8 as they encode code points
 * above \\x{10ffff} or they represent overlong encodings. As we require valid
 * UTF-8 input, we have no defined behaviour in these cases, as a result we can
 * accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) {
    if (!expr.utf8) {
        return;
    }

    const CharReach e0(0xe0);
    const CharReach f0(0xf0);
    const CharReach f4(0xf4);

    for (auto v : vertices_range(g)) {
        const CharReach &cr = g[v].char_reach;
        if (cr == e0 || cr == f0 || cr == f4) {
            u8 pred_char = cr.find_first();
            for (auto t : adjacent_vertices_range(v, g)) {
                allowIllegal(g, t, pred_char);
            }
        }
    }
}
コード例 #13
0
ファイル: ng_prune.cpp プロジェクト: 0x4e38/hyperscan
/**
 * True if the vertex has (a) a self-loop, (b) only out-edges to accept and
 * itself and (c) only simple exhaustible reports.
 */
static
bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g,
                                          const ReportManager &rm,
                                          NFAVertex v) {
    if (!edge(v, v, g).second) {
        return false;
    }

    for (auto w : adjacent_vertices_range(v, g)) {
        if (w != v && w != g.accept) {
            return false;
        }
    }

    for (const auto &report_id : g[v].reports) {
        if (!isSimpleExhaustible(rm.getReport(report_id))) {
            return false;
        }
    }

    return true;
}
コード例 #14
0
ファイル: rose_build_infix.cpp プロジェクト: 0x4e38/hyperscan
static
void contractVertex(NGHolder &g, NFAVertex v,
                    ue2::unordered_set<pair<NFAVertex, NFAVertex>> &all_edges) {
    for (auto u : inv_adjacent_vertices_range(v, g)) {
        if (u == v) {
            continue; // self-edge
        }
        for (auto w : adjacent_vertices_range(v, g)) {
            if (w == v) {
                continue; // self-edge
            }

            // Construct edge (u, v) only if it doesn't already exist. We use
            // the all_edges container here, as checking existence inside the
            // graph is expensive when u or v have large degree.
            if (all_edges.emplace(u, w).second) {
                add_edge(u, w, g);
            }
        }
    }

    // Note that edges to/from v will remain in all_edges.
    clear_vertex(v, g);
}
コード例 #15
0
ファイル: ng_squash.cpp プロジェクト: 210230/hyperscan
map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) {
    map<NFAVertex, NFAStateSet> squash;

    // Number of bits to use for all our masks. If we're a triggered graph,
    // tops have already been assigned, so we don't have to account for them.
    const u32 numStates = num_vertices(g);

    // Build post-dominator tree.
    PostDomTree pdom_tree;
    buildPDomTree(g, pdom_tree);

    // Build list of vertices by state ID and a set of init states.
    vector<NFAVertex> vByIndex(numStates, NFAGraph::null_vertex());
    NFAStateSet initStates(numStates);
    smgb_cache cache(g);

    // Mappings used for SOM mode calculations, otherwise left empty.
    unordered_map<NFAVertex, u32> region_map;
    vector<DepthMinMax> som_depths;
    if (som) {
        region_map = assignRegions(g);
        som_depths = getDistancesFromSOM(g);
    }

    for (auto v : vertices_range(g)) {
        const u32 vert_id = g[v].index;
        DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates);
        assert(vert_id < numStates);
        vByIndex[vert_id] = v;

        if (is_any_start(v, g) || !in_degree(v, g)) {
            initStates.set(vert_id);
        }
    }

    for (u32 i = 0; i < numStates; i++) {
        NFAVertex v = vByIndex[i];
        assert(v != NFAGraph::null_vertex());
        const CharReach &cr = g[v].char_reach;

        /* only non-init cyclics can be squashers */
        if (!hasSelfLoop(v, g) || initStates.test(i)) {
            continue;
        }

        DEBUG_PRINTF("state %u is cyclic\n", i);

        NFAStateSet mask(numStates), succ(numStates), pred(numStates);
        buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
                        som_depths, region_map, cache);
        buildSucc(succ, g, v);
        buildPred(pred, g, v);
        const auto &reports = g[v].reports;

        for (size_t j = succ.find_first(); j != succ.npos;
             j = succ.find_next(j)) {
            NFAVertex vj = vByIndex[j];
            NFAStateSet pred2(numStates);
            buildPred(pred2, g, vj);
            if (pred2 == pred) {
                DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
                NFAStateSet tmp(numStates);
                buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
                                som, som_depths, region_map, cache);
                mask &= tmp;
            }
        }

        for (size_t j = pred.find_first(); j != pred.npos;
             j = pred.find_next(j)) {
            NFAVertex vj = vByIndex[j];
            NFAStateSet succ2(numStates);
            buildSucc(succ2, g, vj);
            /* we can use j as a basis for squashing if its succs are a subset
             * of ours */
            if ((succ2 & ~succ).any()) {
                continue;
            }

            if (som) {
                /* We cannot use j to add to the squash mask of v if it may
                 * have an earlier start of match offset. ie for us j as a
                 * basis for the squash mask of v we require:
                 * maxSomDist(j) <= minSomDist(v)
                 */

                /* ** TODO ** */

                const depth &max_som_dist_j =
                    som_depths[g[vj].index].max;
                const depth &min_som_dist_v =
                    som_depths[g[v].index].min;
                if (max_som_dist_j > min_som_dist_v ||
                    max_som_dist_j.is_infinite()) {
                    /* j can't be used as it may be storing an earlier SOM */
                    continue;
                }
            }

            const CharReach &crv = g[vj].char_reach;

            /* we also require that j's report information be a subset of ours
             */
            bool seen_special = false;
            for (auto w : adjacent_vertices_range(vj, g)) {
                if (is_special(w, g)) {
                    if (!edge(v, w, g).second) {
                        goto next_j;
                    }
                    seen_special = true;
                }
            }

            // FIXME: should be subset check?
            if (seen_special && g[vj].reports != reports) {
                continue;
            }

            /* ok we can use j */
            if ((crv & ~cr).none()) {
                NFAStateSet tmp(numStates);
                buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
                                som, som_depths, region_map, cache);
                mask &= tmp;
                mask.reset(j);
            }

        next_j:;
        }

        mask.set(i); /* never clear ourselves */

        if ((~mask).any()) { // i.e. some bits unset in mask
            DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count());
            squash.emplace(v, mask);
        }
    }

    findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som,
                         som_depths, region_map, cache);

    clearMutualSquashers(g, vByIndex, squash);

    return squash;
}
コード例 #16
0
ファイル: ng_utf8.cpp プロジェクト: tomzhang/hyperscan
static
bool expandCyclic(NGHolder &h, NFAVertex v) {
    DEBUG_PRINTF("inspecting %zu\n", h[v].index);
    bool changes = false;

    auto v_preds = preds(v, h);
    auto v_succs = succs(v, h);

    set<NFAVertex> start_siblings;
    set<NFAVertex> end_siblings;

    CharReach &v_cr = h[v].char_reach;

    /* We need to find start vertices which have all of our preds.
     * As we have a self loop, it must be one of our succs. */
    for (auto a : adjacent_vertices_range(v, h)) {
        auto a_preds = preds(a, h);

        if (a_preds == v_preds && isutf8start(h[a].char_reach)) {
            DEBUG_PRINTF("%zu is a start v\n", h[a].index);
            start_siblings.insert(a);
        }
    }

    /* We also need to find full cont vertices which have all our own succs;
     * As we have a self loop, it must be one of our preds. */
    for (auto a : inv_adjacent_vertices_range(v, h)) {
        auto a_succs = succs(a, h);

        if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) {
            DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index);
            end_siblings.insert(a);
        }
    }

    for (auto s : start_siblings) {
        if (out_degree(s, h) != 1) {
            continue;
        }

        const CharReach &cr = h[s].char_reach;
        if (cr.isSubsetOf(UTF_TWO_START_CR)) {
            if (end_siblings.find(*adjacent_vertices(s, h).first)
                == end_siblings.end()) {
                DEBUG_PRINTF("%zu is odd\n", h[s].index);
                continue;
            }
        } else if (cr.isSubsetOf(UTF_THREE_START_CR)) {
            NFAVertex m = *adjacent_vertices(s, h).first;

            if (h[m].char_reach != UTF_CONT_CR
                || out_degree(m, h) != 1) {
                continue;
            }
            if (end_siblings.find(*adjacent_vertices(m, h).first)
                == end_siblings.end()) {
                DEBUG_PRINTF("%zu is odd\n", h[s].index);
                continue;
            }
        } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) {
            NFAVertex m1 = *adjacent_vertices(s, h).first;

            if (h[m1].char_reach != UTF_CONT_CR
                || out_degree(m1, h) != 1) {
                continue;
            }

            NFAVertex m2 = *adjacent_vertices(m1, h).first;

            if (h[m2].char_reach != UTF_CONT_CR
                || out_degree(m2, h) != 1) {
                continue;
            }

            if (end_siblings.find(*adjacent_vertices(m2, h).first)
                == end_siblings.end()) {
                DEBUG_PRINTF("%zu is odd\n", h[s].index);
                continue;
            }
        } else {
            DEBUG_PRINTF("%zu is bad\n", h[s].index);
          continue;
        }

        v_cr |= cr;
        clear_vertex(s, h);
        changes = true;
    }

    if (changes) {
        v_cr |= UTF_CONT_CR; /* we need to add in cont reach */
        v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require
                         * valid unicode data */
        v_cr.set(0xc1);
        v_cr |= CharReach(0xf5, 0xff);
    }

    return changes;
}
コード例 #17
0
ファイル: ng_squash.cpp プロジェクト: 210230/hyperscan
/**
 * Builds a squash mask based on the pdom tree of v and the given char reach.
 * The built squash mask is a bit conservative for non-dot cases and could
 * be improved with a bit of thought.
 */
static
void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
                     const CharReach &cr, const NFAStateSet &init,
                     const vector<NFAVertex> &vByIndex, const PostDomTree &tree,
                     som_type som, const vector<DepthMinMax> &som_depths,
                     const ue2::unordered_map<NFAVertex, u32> &region_map,
                     smgb_cache &cache) {
    DEBUG_PRINTF("build base squash mask for vertex %u)\n",
                 g[v].index);

    vector<NFAVertex> q;

    PostDomTree::const_iterator it = tree.find(v);
    if (it != tree.end()) {
        q.insert(q.end(), it->second.begin(), it->second.end());
    }

    const u32 v_index = g[v].index;

    while (!q.empty()) {
        NFAVertex u = q.back();
        q.pop_back();
        const CharReach &cru = g[u].char_reach;

        if ((cru & ~cr).any()) {
            /* bail: bad cr on vertex u */
            /* TODO: this could be better
             *
             * we still need to ensure that we record any paths leading to u.
             * Hence all vertices R which can reach u must be excluded from the
             * squash mask. Note: R != pdom(u) and there may exist an x in (R -
             * pdom(u)) which is in pdom(y) where y is in q. Clear ?
             */
            mask.set();
            return;
        }

        const u32 u_index = g[u].index;

        if (som) {
            /* We cannot add a state u to the squash mask of v if it may have an
             * earlier start of match offset. ie for us to add a state u to v
             * maxSomDist(u) <= minSomDist(v)
             */
            const depth &max_som_dist_u = som_depths[u_index].max;
            const depth &min_som_dist_v = som_depths[v_index].min;

            if (max_som_dist_u.is_infinite()) {
                /* it is hard to tell due to the INF if u can actually store an
                 * earlier SOM than w (state we are building the squash mask
                 * for) - need to think more deeply
                 */

                if (mustBeSetBefore(u, v, g, cache)
                    && !somMayGoBackwards(u, g, region_map, cache)) {
                    DEBUG_PRINTF("u %u v %u\n", u_index, v_index);
                    goto squash_ok;
                }
            }

           if (max_som_dist_u > min_som_dist_v) {
                /* u can't be squashed as it may be storing an earlier SOM */
                goto add_children_to_queue;
            }

        }

    squash_ok:
        mask.set(u_index);
        DEBUG_PRINTF("pdom'ed %u\n", u_index);
    add_children_to_queue:
        it = tree.find(u);
        if (it != tree.end()) {
            q.insert(q.end(), it->second.begin(), it->second.end());
        }
    }

    if (cr.all()) {
        /* the init states aren't in the pdom tree. If all their succ states
         * are set (or v), we can consider them post dominated */

        /* Note: init states will always result in a later som */
        for (size_t i = init.find_first(); i != init.npos;
             i = init.find_next(i)) {
            /* Yes vacuous patterns do exist */
            NFAVertex iv = vByIndex[i];
            for (auto w : adjacent_vertices_range(iv, g)) {
                if (w == g.accept || w == g.acceptEod) {
                    DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i);
                    goto next_init_state;
                }

                u32 vert_id = g[w].index;
                if (w != iv && w != v && !mask.test(vert_id)) {
                    DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id);
                    goto next_init_state;
                }
            }
            DEBUG_PRINTF("pdom'ed %zu\n", i);
            mask.set(i);
        next_init_state:;
        }
    }

    mask.flip();
}
コード例 #18
0
ファイル: ng_som_util.cpp プロジェクト: chris1201/hyperscan
static
void wireSuccessorsToStart(NGHolder &g, NFAVertex u) {
    for (auto v : adjacent_vertices_range(u, g)) {
        add_edge_if_not_present(g.start, v, g);
    }
}
コード例 #19
0
ファイル: ng_som_util.cpp プロジェクト: chris1201/hyperscan
bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
                       const ue2::unordered_map<NFAVertex, u32> &region_map,
                       smgb_cache &cache) {
    /* Need to ensure all matches of the graph g up to u contain no infixes
     * which are also matches of the graph to u.
     *
     * This is basically the same as firstMatchIsFirst except we g is not
     * always a dag. As we haven't gotten around to writing an execute_graph
     * that operates on general graphs, we take some (hopefully) conservative
     * short cuts.
     *
     * Note: if the u can be jumped we will take jump edges
     * into account as a possibility of som going backwards
     *
     * TODO: write a generalised ng_execute_graph/make this less hacky
     */
    assert(&g == &cache.g);
    if (contains(cache.smgb, u)) {
        return cache.smgb[u];
    }

    DEBUG_PRINTF("checking if som can go backwards on %u\n",
                  g[u].index);

    set<NFAEdge> be;
    BackEdges<set<NFAEdge>> backEdgeVisitor(be);
    depth_first_search(
        g.g, visitor(backEdgeVisitor)
                 .root_vertex(g.start)
                 .vertex_index_map(get(&NFAGraphVertexProps::index, g.g)));

    bool rv;
    if (0) {
    exit:
        DEBUG_PRINTF("using cached result\n");
        cache.smgb[u] = rv;
        return rv;
    }

    assert(contains(region_map, u));
    const u32 u_region = region_map.at(u);

    for (const auto &e : be) {
        NFAVertex s = source(e, g);
        NFAVertex t = target(e, g);
        /* only need to worry about big cycles including/before u */
        DEBUG_PRINTF("back edge %u %u\n", g[s].index,
                      g[t].index);
        if (s != t && region_map.at(s) <= u_region) {
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
    NGHolder c_g;
    cloneHolder(c_g, g, &orig_to_copy);

    for (NFAVertex v : vertices_range(g)) {
        if (!is_virtual_start(v, g)) {
            continue;
        }
        NFAVertex c_v = orig_to_copy[v];
        orig_to_copy[v] = c_g.startDs;
        for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
            add_edge_if_not_present(c_g.startDs, c_w, c_g);
        }
        clear_vertex(c_v, c_g);
    }

    NFAVertex c_u = orig_to_copy[u];
    clear_in_edges(c_g.acceptEod, c_g);
    add_edge(c_g.accept, c_g.acceptEod, c_g);
    clear_in_edges(c_g.accept, c_g);
    clear_out_edges(c_u, c_g);
    if (hasSelfLoop(u, g)) {
        add_edge(c_u, c_u, c_g);
    }
    add_edge(c_u, c_g.accept, c_g);

    set<NFAVertex> u_succ;
    insert(&u_succ, adjacent_vertices(u, g));
    u_succ.erase(u);

    for (auto t : inv_adjacent_vertices_range(u, g)) {
        if (t == u) {
            continue;
        }
        for (auto v : adjacent_vertices_range(t, g)) {
            if (contains(u_succ, v)) {
                add_edge(orig_to_copy[t], c_g.accept, c_g);
                break;
            }
        }
    }

    pruneUseless(c_g);

    be.clear();
    depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start).
                       vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g)));

    for (const auto &e : be) {
        NFAVertex s = source(e, c_g);
        NFAVertex t = target(e, c_g);
        DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index);
        if (s != t) {
            assert(0);
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    DEBUG_PRINTF("checking acyclic+selfloop graph\n");

    rv = !firstMatchIsFirst(c_g);
    DEBUG_PRINTF("som may regress? %d\n", (int)rv);
    goto exit;
}
コード例 #20
0
ファイル: ng_extparam.cpp プロジェクト: 01org/hyperscan
/** If the pattern is unanchored, has a max_offset and has not asked for SOM,
 * we can use that knowledge to anchor it which will limit its lifespan. Note
 * that we can't use this transformation if there's a min_length, as it's
 * currently handled using "sly SOM".
 *
 * Note that it is possible to handle graphs that have a combination of
 * anchored and unanchored paths, but it's too tricky for the moment.
 */
static
bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
                                    const depth &maxWidth) {
    assert(!g.som);
    assert(g.max_offset != MAX_OFFSET);
    assert(minWidth <= maxWidth);
    assert(maxWidth.is_reachable());

    DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
                 minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset,
                 g.max_offset);

    if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
        return false;
    }

    if (g.max_offset < minWidth) {
        assert(0);
        return false;
    }

    // If the pattern has virtual starts, we probably don't want to touch it.
    if (hasVirtualStarts(g)) {
        DEBUG_PRINTF("virtual starts, bailing\n");
        return false;
    }

    // Similarly, bail if the pattern is vacuous. TODO: this could be done, we
    // would just need to be a little careful with reports.
    if (isVacuous(g)) {
        DEBUG_PRINTF("vacuous, bailing\n");
        return false;
    }

    u32 min_bound, max_bound;
    if (maxWidth.is_infinite()) {
        min_bound = 0;
        max_bound = g.max_offset - minWidth;
    } else {
        min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0;
        max_bound = g.max_offset - minWidth;
    }

    DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);

    vector<NFAVertex> initials;
    for (auto v : adjacent_vertices_range(g.startDs, g)) {
        if (v == g.startDs) {
            continue;
        }
        initials.push_back(v);
    }
    if (initials.empty()) {
        DEBUG_PRINTF("no initial vertices\n");
        return false;
    }

    // Wire up 'min_offset' mandatory dots from anchored start.
    NFAVertex u = g.start;
    for (u32 i = 0; i < min_bound; i++) {
        NFAVertex v = add_vertex(g);
        g[v].char_reach.setall();
        add_edge(u, v, g);
        u = v;
    }

    NFAVertex head = u;

    // Wire up optional dots for (max_offset - min_offset).
    for (u32 i = 0; i < max_bound - min_bound; i++) {
        NFAVertex v = add_vertex(g);
        g[v].char_reach.setall();
        if (head != u) {
            add_edge(head, v, g);
        }
        add_edge(u, v, g);
        u = v;
    }

    // Remove edges from starts and wire both head and u to our initials.
    for (auto v : initials) {
        remove_edge(g.startDs, v, g);
        remove_edge(g.start, v, g);

        if (head != u) {
            add_edge(head, v, g);
        }
        add_edge(u, v, g);
    }

    g.renumberVertices();
    g.renumberEdges();

    return true;
}