Example #1
0
vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) {
    // We operate on a temporary copy of the original graph here, so we don't
    // have to mutate the original.
    NGHolder g;
    ue2::unordered_map<NFAVertex, NFAVertex> vmap; // vertex in g_orig to vertex in g
    cloneHolder(g, g_orig, &vmap);

    vector<NFAVertex> vstarts;
    for (auto v : vertices_range(g)) {
        if (is_virtual_start(v, g)) {
            vstarts.push_back(v);
        }
    }
    vstarts.push_back(g.startDs);

    // wire the successors of every virtual start or startDs to g.start.
    for (auto v : vstarts) {
        wireSuccessorsToStart(g, v);
    }

    // drop the in-edges of every virtual start so that they don't participate
    // in the depth calculation.
    for (auto v : vstarts) {
        clear_in_edges(v, g);
    }

    //dumpGraph("som_depth.dot", g.g);

    vector<DepthMinMax> temp_depths; // numbered by vertex index in g
    calcDepthsFrom(g, g.start, temp_depths);

    // Transfer depths, indexed by vertex index in g_orig.
    vector<DepthMinMax> depths(num_vertices(g_orig));

    for (auto v_orig : vertices_range(g_orig)) {
        assert(contains(vmap, v_orig));
        NFAVertex v_new = vmap[v_orig];

        u32 orig_idx = g_orig[v_orig].index;

        DepthMinMax &d = depths.at(orig_idx);

        if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) {
            // StartDs and virtual starts always have zero depth.
            d = DepthMinMax(0, 0);
        } else {
            u32 new_idx = g[v_new].index;
            d = temp_depths.at(new_idx);
        }
    }

    return depths;
}
Example #2
0
static
void gather_vars(const GoughGraph &g, vector<const GoughSSAVar *> *vars,
                 map<const GoughSSAVar *, string> *names,
                 map<const GoughSSAVar *, string> *src_label,
                 set<const GoughSSAVar *> *reporters) {
    for (auto v : vertices_range(g)) {
        for (const auto &r : g[v].reports) {
            reporters->insert(r.second);
        }
        for (const auto &r : g[v].reports_eod) {
            reporters->insert(r.second);
        }

        for (u32 i = 0; i < g[v].vars.size(); i++) {
            const GoughSSAVar *vp = g[v].vars[i].get();
            stringstream ss;
            ss << dump_name(g[v]) << "_" << i;
            vars->push_back(vp);
            names->insert(make_pair(vp, ss.str()));
            src_label->insert(make_pair(vp, dump_name(g[v])));
        }
    }

    for (const auto &e : edges_range(g)) {
        for (u32 i = 0; i < g[e].vars.size(); i++) {
            const GoughSSAVar *vp = g[e].vars[i].get();
            stringstream ss;
            ss << dump_name(g, e) << "_" << i;
            vars->push_back(vp);
            names->insert(make_pair(vp, ss.str()));
            src_label->insert(make_pair(vp, dump_name(g, e)));
        }
    }
}
static
vector<NFAVertex> getSortedVA(const NGHolder &g,
            const ue2::unordered_map<NFAVertex, u32> &state_ids) {
    vector<NFAVertex> out;
    out.reserve(num_vertices(g));

    for (auto v : vertices_range(g)) {
        assert(contains(state_ids, v));
        if (state_ids.at(v) == NO_STATE) {
            continue;
        }
        out.push_back(v);
    }

    // Order vertices by their state indices.
    sort(begin(out), end(out), [&state_ids](NFAVertex a, NFAVertex b) {
        return state_ids.at(a) < state_ids.at(b);
    });

#ifndef NDEBUG
    // State indices should match vector indices.
    for (u32 i = 0; i < out.size(); i++) {
        assert(state_ids.at(out.at(i)) == i);
    }
#endif

    return out;
}
Example #4
0
static
void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) {
    stringstream ss;
    ss << grey.dumpPath << "gough_" << base << ".dot";

    FILE *f = fopen(ss.str().c_str(), "w");

    fprintf(f, "digraph NFA {\n");
    fprintf(f, "rankdir=LR;\n");
    fprintf(f, "size=\"11.5,8\"\n");
    fprintf(f, "node [ shape = circle ];\n");
    fprintf(f, "START [style=invis];\n");

    for (auto v : vertices_range(g)) {
        fprintf(f, "%s [ width = 1, fixedsize = true, fontsize = 12, ",
                dump_name(g[v]).c_str());
        if (!g[v].reports.empty() || !g[v].reports_eod.empty()) {
            fprintf(f, "shape = doublecircle ");
        }

        fprintf(f, "label = \"%u\"];\n", g[v].state_id);
    }
    for (const auto &e : edges_range(g)) {
        GoughVertex s = source(e, g);
        GoughVertex t = target(e, g);

        fprintf(f, "%s -> %s\n",
                dump_name(g[s]).c_str(), dump_name(g[t]).c_str());
    }
    fprintf(f, "}\n");

    fclose(f);
}
Example #5
0
/** Find the set of characters that are not present in the reachability of
 * graph \p g after a certain depth (currently 8). If a character in this set
 * is encountered, it means that the NFA is either dead or has not progressed
 * more than 8 characters from its start states. */
CharReach findStopAlphabet(const NGHolder &g, som_type som) {
    const depth max_depth(MAX_STOP_DEPTH);
    const InitDepths depths(g);
    const map<NFAVertex, BoundedRepeatSummary> no_vertices;

    CharReach stopcr;

    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }

        if (depths.maxDist(g, v) >= max_depth) {
            if (som == SOM_NONE) {
                stopcr |= reduced_cr(v, g, no_vertices);
            } else {
                stopcr |= g[v].char_reach;
            }
        }
    }

    // Turn alphabet into stops.
    stopcr.flip();

    return stopcr;
}
Example #6
0
static
bool pruneForwardUseless(NGHolder &h, const nfag_t &g, NFAVertex s,
                         vector<default_color_type> &vertexColor) {
    // Begin with all vertices set to white, as DFV only marks visited
    // vertices.
    fill(vertexColor.begin(), vertexColor.end(), boost::white_color);

    auto index_map = get(&NFAGraphVertexProps::index, g);

    depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()),
                      make_iterator_property_map(vertexColor.begin(),
                                                 index_map));

    vector<NFAVertex> dead;

    // All non-special vertices that are still white can be removed.
    for (auto v : vertices_range(g)) {
        u32 idx = g[v].index;
        if (!is_special(v, g) && vertexColor[idx] == boost::white_color) {
            DEBUG_PRINTF("vertex %u is unreachable from %u\n",
                         g[v].index, g[s].index);
            dead.push_back(v);
        }
    }

    if (dead.empty()) {
        return false;
    }

    DEBUG_PRINTF("removing %zu vertices\n", dead.size());
    remove_vertices(dead, h, false);
    return true;
}
Example #7
0
bool firstMatchIsFirst(const NGHolder &p) {
    /* If the first match (by end offset) is not the first match (by start
     * offset) then we can't create a lock after it.
     *
     * Consider: 4009:/(foobar|ob).*bugger/s
     *
     * We don't care about races on the last byte as they can be resolved easily
     * at runtime /(foobar|obar).*hi/
     *
     * It should be obvious we don't care about one match being a prefix
     * of another as they share the same start offset.
     *
     * Therefore, the case were we cannot establish that the som does not
     * regress is when there exists s1 and s2 in the language of p and s2 is a
     * proper infix of s1.
     *
     * It is tempting to add the further restriction that there does not exist a
     * prefix of s1 that is in the language of p (as in which case we would
     * presume, the lock has already been set). However, we have no way of
     * knowing if the lock can be cleared by some characters, and if so, if it
     * is still set. TODO: if we knew the lock's escapes where we could verify
     * that the rest of s1 does not clear the lock. (1)
     */

    DEBUG_PRINTF("entry\n");

    /* If there are any big cycles throw up our hands in despair */
    if (hasBigCycles(p)) {
        DEBUG_PRINTF("fail, big cycles\n");
        return false;
    }

    set<NFAVertex> states;
    /* turn on all states (except starts - avoid suffix matches) */
    /* If we were doing (1) we would also except states leading to accepts -
       avoid prefix matches */
    for (auto v : vertices_range(p)) {
        assert(!is_virtual_start(v, p));
        if (!is_special(v, p)) {
            DEBUG_PRINTF("turning on %u\n", p[v].index);
            states.insert(v);
        }
    }

    /* run the prefix the main graph */
    execute_graph(p, p, &states);

    for (auto v : states) {
        /* need to check if this vertex may represent an infix match - ie
         * it does not have an edge to accept. */
        DEBUG_PRINTF("check %u\n", p[v].index);
        if (!edge(v, p.accept, p).second) {
            DEBUG_PRINTF("fail %u\n", p[v].index);
            return false;
        }
    }

    DEBUG_PRINTF("done first is first check\n");
    return true;
}
Example #8
0
/** Remove any vertices that can't be reached by traversing the graph in
 * reverse from acceptEod. */
void pruneUnreachable(NGHolder &g) {
    deque<NFAVertex> dead;

    if (!hasGreaterInDegree(1, g.acceptEod, g) &&
            !hasGreaterInDegree(0, g.accept, g) &&
            edge(g.accept, g.acceptEod, g).second) {
        // Trivial case: there are no in-edges to our accepts (other than
        // accept->acceptEod), so all non-specials are unreachable.
        for (auto v : vertices_range(g)) {
            if (!is_special(v, g)) {
                dead.push_back(v);
            }
        }
    } else {
        // Walk a reverse graph from acceptEod with Boost's depth_first_visit
        // call.
        typedef reverse_graph<NFAGraph, NFAGraph&> RevNFAGraph;
        RevNFAGraph revg(g.g);

        map<NFAVertex, default_color_type> colours;

        depth_first_visit(revg, g.acceptEod,
                          make_dfs_visitor(boost::null_visitor()),
                          make_assoc_property_map(colours));

        DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size());

        // All non-special vertices that aren't in the colour map (because they
        // weren't reached) can be removed.
        for (auto v : vertices_range(revg)) {
            if (is_special(v, revg)) {
                continue;
            }
            if (!contains(colours, v)) {
                dead.push_back(v);
            }
        }
    }

    if (dead.empty()) {
        DEBUG_PRINTF("no unreachable vertices\n");
        return;
    }

    remove_vertices(dead, g, false);
    DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size());
}
static
void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) {
    for (auto v : vertices_range(g)) {
        push_back_all_raw(out, g[v].vars);
    }
    for (const auto &e : edges_range(g)) {
        push_back_all_raw(out, g[e].vars);
    }
}
Example #10
0
static
bool hasAssertVertices(const NGHolder &g) {
    for (auto v : vertices_range(g)) {
        int flags = g[v].assert_flags;
        if (flags & WORDBOUNDARY_FLAGS) {
            return true;
        }
    }
    return false;
}
static
CharReach getReachability(const NGHolder &h) {
    CharReach cr;
    for (const auto &v : vertices_range(h)) {
        if (!is_special(v, h)) {
            cr |= h[v].char_reach;
        }
    }
    return cr;
}
Example #12
0
/** Some squash states are clearly not advantageous in the NFA, as they do
 * incur the cost of an exception:
 * -# acyclic states
 * -# squash only a few acyclic states
 */
void filterSquashers(const NGHolder &g,
                     map<NFAVertex, NFAStateSet> &squash) {
    DEBUG_PRINTF("filtering\n");
    map<u32, NFAVertex> rev; /* vertex_index -> vertex */
    for (auto v : vertices_range(g)) {
        rev[g[v].index] = v;
    }

    for (auto v : vertices_range(g)) {
        if (!contains(squash, v)) {
            continue;
        }
        DEBUG_PRINTF("looking at squash set for vertex %u\n",
                     g[v].index);

        if (!hasSelfLoop(v, g)) {
            DEBUG_PRINTF("acyclic\n");
            squash.erase(v);
            continue;
        }

        NFAStateSet squashed = squash[v];
        squashed.flip(); /* default sense for mask of survivors */
        for (NFAStateSet::size_type sq = squashed.find_first();
             sq != squashed.npos; sq = squashed.find_next(sq)) {
            NFAVertex u = rev[sq];
            if (hasSelfLoop(u, g)) {
                DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq);
                goto next_vertex;
            }
        }

        if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) {
            DEBUG_PRINTF("squash set too small\n");
            squash.erase(v);
            continue;
        }

    next_vertex:;
        DEBUG_PRINTF("squash set ok\n");
    }
}
Example #13
0
static
vector<StateInfo> makeInfoTable(const NGHolder &g) {
    vector<StateInfo> info(num_vertices(g));
    for (auto v : vertices_range(g)) {
        u32 idx = g[v].index;
        const CharReach &cr = g[v].char_reach;
        assert(idx < info.size());
        info[idx] = StateInfo(v, cr);
    }
    return info;
}
/* crude, deterministic assignment of symbolic register slots.
 * returns number of slots given out
 */
static
u32 initial_slots(const GoughGraph &g) {
    u32 next_slot = 0;
    for (auto v : vertices_range(g)) {
        set_initial_slots(g[v].vars, &next_slot);
    }
    for (const auto &e : edges_range(g)) {
        set_initial_slots(g[e].vars, &next_slot);
    }

    return next_slot;
}
Example #15
0
static
void dump_var_mapping(const GoughGraph &g, const string &base,
                      const Grey &grey) {
    stringstream ss;
    ss << grey.dumpPath << "gough_" << base << "_vars.txt";
    FILE *f = fopen(ss.str().c_str(), "w");
    for (auto v : vertices_range(g)) {
        set<const GoughSSAVar *> used = uses(g[v]);
        if (g[v].vars.empty() && used.empty()) {
            continue;
        }
        fprintf(f, "%s\n", dump_name(g[v]).c_str());
        for (u32 i = 0; i < g[v].vars.size(); i++) {
            const GoughSSAVar *vp = g[v].vars[i].get();
            fprintf(f, "\t%u: slot %u\n", i, vp->slot);
        }
        if (!used.empty()) {
            fprintf(f, "\tuses:");
            vector<u32> used_id;
            for (const GoughSSAVar *var : used) {
                used_id.push_back(var->slot);
            }
            for (const u32 &id : used_id) {
                fprintf(f, " %u", id);
            }
            fprintf(f, "\n");
        }
    }
    for (const auto &e : edges_range(g)) {
        set<const GoughSSAVar *> used = uses(g[e]);
        if (g[e].vars.empty() && used.empty()) {
            continue;
        }
        fprintf(f, "%s\n", dump_name(g, e).c_str());
        for (u32 i = 0; i < g[e].vars.size(); i++) {
            const GoughSSAVar *vp = g[e].vars[i].get();
            fprintf(f, "\t%u: slot %u\n", i, vp->slot);
        }
        if (!used.empty()) {
            fprintf(f, "\tuses:");
            vector<u32> used_id;
            for (const GoughSSAVar *var : used) {
                used_id.push_back(var->slot);
            }
            for (const u32 &id : used_id) {
                fprintf(f, " %u", id);
            }
            fprintf(f, "\n");
        }
    }
    fclose(f);
}
Example #16
0
/** \brief loose hash of an NGHolder; equal if is_equal would return true. */
u64a hash_holder(const NGHolder &g) {
    size_t rv = 0;

    for (auto v : vertices_range(g)) {
        boost::hash_combine(rv, g[v].index);
        boost::hash_combine(rv, g[v].char_reach);

        for (auto w : adjacent_vertices_range(v, g)) {
            boost::hash_combine(rv, g[w].index);
        }
    }

    return rv;
}
Example #17
0
void fct(const LCC& lcc)
{
  vertex_range vr(vertices(lcc));
  
  std::cout << "new for loop" << std::endl;
  for(vertex_descriptor vd : vr){
    std::cout << vd->point() << std::endl;
  }
  
  std::cout << "boost::tie + std::for_each" << std::endl;
  vertex_iterator vb, ve;
  
  boost::tie(vb,ve) = vertices_range(lcc);
  std::for_each(vb,ve, Fct());
}
Example #18
0
// Returns the number of states.
static
ue2::unordered_map<NFAVertex, u32>
getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) {
    ue2::unordered_map<NFAVertex, u32> states;
    for (const auto &v : vertices_range(h)) {
        states[v] = NO_STATE;
    }

    u32 stateNum = 0;
    for (auto v : ordering) {
        DEBUG_PRINTF("assigning state num %u to vertex %u\n", stateNum,
                     h[v].index);
        states[v] = stateNum++;
    }
    return states;
}
Example #19
0
// populate VertexInfo table
static
ptr_vector<VertexInfo> getVertexInfos(const NGHolder &g) {
    const size_t num_verts = num_vertices(g);

    ptr_vector<VertexInfo> infos;
    infos.reserve(num_verts * 2);

    vector<VertexInfo *> vertex_map; // indexed by vertex_index property
    vertex_map.resize(num_verts);

    for (auto v : vertices_range(g)) {
        VertexInfo *vi = new VertexInfo(v, g);

        // insert our new shiny VertexInfo into the info map
        infos.push_back(vi);

        vertex_map[g[v].index] = vi;
    }

    // now, go through each vertex and populate its predecessor and successor lists
    for (VertexInfo &cur_vi : infos) {
        // find predecessors
        for (const auto &e : in_edges_range(cur_vi.v, g)) {
            NFAVertex u = source(e, g);
            VertexInfo *vmi = vertex_map[g[u].index];

            cur_vi.pred_cr |= vmi->cr;
            cur_vi.pred.insert(vmi);

            // also set up edge tops
            if (is_triggered(g) && u == g.start) {
                cur_vi.edge_top = g[e].top;
            }
        }

        // find successors
        for (auto w : adjacent_vertices_range(cur_vi.v, g)) {
            VertexInfo *vmi = vertex_map[g[w].index];
            cur_vi.succ_cr |= vmi->cr;
            cur_vi.succ.insert(vmi);
        }
        assert(!hasEdgeAsserts(cur_vi.v, g));
    }

    return infos;
}
Example #20
0
static
void buildPDomTree(const NGHolder &g, PostDomTree &tree) {
    ue2::unordered_map<NFAVertex, NFAVertex> postdominators =
        findPostDominators(g);

    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }
        NFAVertex pdom = postdominators[v];
        if (pdom) {
            DEBUG_PRINTF("vertex %u -> %u\n", g[pdom].index,
                         g[v].index);
            tree[pdom].insert(v);
        }
    }
}
Example #21
0
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
 * build an eight-bit mask per character C, with each bit representing the
 * depth before the location of character C (if encountered) that the NFA would
 * be in a predictable start state. */
vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) {
    const depth max_depth(MAX_STOP_DEPTH);
    const InitDepths depths(g);
    const map<NFAVertex, BoundedRepeatSummary> no_vertices;

    vector<CharReach> reach(MAX_STOP_DEPTH);

    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }
        CharReach v_cr;
        if (som == SOM_NONE) {
            v_cr = reduced_cr(v, g, no_vertices);
        } else {
            v_cr = g[v].char_reach;
        }

        u32 d = min(max_depth, depths.maxDist(g, v));
        for (u32 i = 0; i < d; i++) {
            reach[i] |= v_cr;
        }
    }

#ifdef DEBUG
    for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
        DEBUG_PRINTF("depth %u, stop chars: ", i);
        describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT);
        printf("\n");
    }
#endif

    vector<u8> stop(N_CHARS, 0);

    for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
        CharReach cr = ~reach[i]; // invert reach for stop chars.
        const u8 mask = 1U << i;
        for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
            stop[c] |= mask;
        }
    }

    return stop;
}
Example #22
0
/** \brief Relax forbidden UTF-8 sequences.
 *
 * Some byte sequences can not appear in valid UTF-8 as they encode code points
 * above \\x{10ffff} or they represent overlong encodings. As we require valid
 * UTF-8 input, we have no defined behaviour in these cases, as a result we can
 * accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) {
    if (!expr.utf8) {
        return;
    }

    const CharReach e0(0xe0);
    const CharReach f0(0xf0);
    const CharReach f4(0xf4);

    for (auto v : vertices_range(g)) {
        const CharReach &cr = g[v].char_reach;
        if (cr == e0 || cr == f0 || cr == f4) {
            u8 pred_char = cr.find_first();
            for (auto t : adjacent_vertices_range(v, g)) {
                allowIllegal(g, t, pred_char);
            }
        }
    }
}
Example #23
0
static
vector<NFAVertex> findUnreachable(const NGHolder &g) {
    const boost::reverse_graph<NFAGraph, const NFAGraph &> revg(g.g);

    ue2::unordered_map<NFAVertex, boost::default_color_type> colours;
    colours.reserve(num_vertices(g));

    depth_first_visit(revg, g.acceptEod,
                      make_dfs_visitor(boost::null_visitor()),
                      make_assoc_property_map(colours));

    // Unreachable vertices are not in the colour map.
    vector<NFAVertex> unreach;
    for (auto v : vertices_range(revg)) {
        if (!contains(colours, v)) {
            unreach.push_back(v);
        }
    }
    return unreach;
}
Example #24
0
/** \brief Convert temporary assert vertices (from construction method) to
 * edge-based flags.
 *
 * Remove the horrors that are the temporary assert vertices which arise from
 * our construction method. Allows the rest of our code base to live in
 * blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
    size_t num = 0;

    DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));

    // Sweep over the graph and ascertain that we do actually have vertices
    // with assertion flags set. Otherwise, we're done.
    if (!hasAssertVertices(g)) {
        DEBUG_PRINTF("no assert vertices, done\n");
        return;
    }

    u32 assert_edge_count = 0;

    // Build a cache of (u, v) vertex pairs to edge descriptors.
    edge_cache_t edge_cache;
    for (const auto &e : edges_range(g)) {
        edge_cache[make_pair(source(e, g), target(e, g))] = e;
    }

    for (auto v : vertices_range(g)) {
        if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
            replaceAssertVertex(g, v, edge_cache, assert_edge_count);
            num++;
        }
    }

    checkForMultilineStart(rm, g);

    if (num) {
        DEBUG_PRINTF("resolved %zu assert vertices\n", num);
        pruneUseless(g);
        pruneEmptyVertices(g);
        g.renumberVertices();
        g.renumberEdges();
    }

    DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g));
    assert(!hasAssertVertices(g));
}
Example #25
0
/** Cheaply check whether this graph can't be reduced at all, because it is
 * just a chain of vertices with no other edges. */
static
bool isIrreducible(const NGHolder &g) {
    for (auto v : vertices_range(g)) {
        // skip specials
        if (is_special(v, g)) {
            continue;
        }

        // we want meaningful in_degree to be 1. we also want to make sure we
        // don't count self-loop + 1 incoming edge as not irreducible
        if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) {
            return false;
        }
        // we want meaningful out_degree to be 1. we also want to make sure we
        // don't count self-loop + 1 outgoing edge as not irreducible
        if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) {
            return false;
        }
    }

    return true;
}
Example #26
0
/** This code removes any vertices which do not accept any symbols. Any
 * vertices which no longer lie on a path from a start to an accept are also
 * pruned. */
void pruneEmptyVertices(NGHolder &g) {
    DEBUG_PRINTF("pruning empty vertices\n");
    vector<NFAVertex> dead;
    for (auto v : vertices_range(g)) {
        if (is_special(v, g)) {
            continue;
        }

        const CharReach &cr = g[v].char_reach;
        if (cr.none()) {
            DEBUG_PRINTF("empty: %u\n", g[v].index);
            dead.push_back(v);
        }
    }

    if (dead.empty()) {
        return;
    }

    remove_vertices(dead, g);
    pruneUseless(g);
}
Example #27
0
/** One final, FINAL optimisation. Drop either start or startDs if it's unused
 * in this graph. We leave this until this late because having both vertices in
 * the graph, with fixed state indices, is useful for merging and other
 * analyses. */
void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states) {
    u32 adj = 0;

    if (startIsRedundant(g)) {
        DEBUG_PRINTF("dropping unused start\n");
        states[g.start] = NO_STATE;
        adj++;
    }

    if (proper_out_degree(g.startDs, g) == 0) {
        DEBUG_PRINTF("dropping unused startDs\n");
        states[g.startDs] = NO_STATE;
        adj++;
    }

    if (!adj) {
        DEBUG_PRINTF("both start and startDs must remain\n");
        return;
    }

    // We have removed one or both of the starts. Walk the non-special vertices
    // in the graph with state indices assigned to them and subtract
    // adj from all of them.
    for (auto v : vertices_range(g)) {
        u32 &state = states[v]; // note ref
        if (state == NO_STATE) {
            continue;
        }
        if (is_any_start(v, g)) {
            assert(state <= 1);
            state = 0; // one start remains
        } else {
            assert(!is_special(v, g));
            assert(state >= adj);
            state -= adj;
        }
    }
}
static never_inline
void fill_aux(const GoughGraph &g, GoughGraphAux *aux) {
    for (auto v : vertices_range(g)) {
        for (const auto &var : g[v].vars) {
            aux->containing_v[var.get()] = v;
            DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id);
        }

        for (GoughSSAVar *var : g[v].reports | map_values) {
            aux->reporters[var].insert(v);
        }

        for (GoughSSAVar *var : g[v].reports_eod | map_values) {
            aux->reporters[var].insert(v);
        }
    }
    for (const auto &e : edges_range(g)) {
        for (const auto &var : g[e].vars) {
            aux->containing_e[var.get()] = e;
            DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot,
                         g[source(e, g)].state_id, g[target(e, g)].state_id);
        }
    }
}
Example #29
0
bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
                       const ue2::unordered_map<NFAVertex, u32> &region_map,
                       smgb_cache &cache) {
    /* Need to ensure all matches of the graph g up to u contain no infixes
     * which are also matches of the graph to u.
     *
     * This is basically the same as firstMatchIsFirst except we g is not
     * always a dag. As we haven't gotten around to writing an execute_graph
     * that operates on general graphs, we take some (hopefully) conservative
     * short cuts.
     *
     * Note: if the u can be jumped we will take jump edges
     * into account as a possibility of som going backwards
     *
     * TODO: write a generalised ng_execute_graph/make this less hacky
     */
    assert(&g == &cache.g);
    if (contains(cache.smgb, u)) {
        return cache.smgb[u];
    }

    DEBUG_PRINTF("checking if som can go backwards on %u\n",
                  g[u].index);

    set<NFAEdge> be;
    BackEdges<set<NFAEdge>> backEdgeVisitor(be);
    depth_first_search(
        g.g, visitor(backEdgeVisitor)
                 .root_vertex(g.start)
                 .vertex_index_map(get(&NFAGraphVertexProps::index, g.g)));

    bool rv;
    if (0) {
    exit:
        DEBUG_PRINTF("using cached result\n");
        cache.smgb[u] = rv;
        return rv;
    }

    assert(contains(region_map, u));
    const u32 u_region = region_map.at(u);

    for (const auto &e : be) {
        NFAVertex s = source(e, g);
        NFAVertex t = target(e, g);
        /* only need to worry about big cycles including/before u */
        DEBUG_PRINTF("back edge %u %u\n", g[s].index,
                      g[t].index);
        if (s != t && region_map.at(s) <= u_region) {
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
    NGHolder c_g;
    cloneHolder(c_g, g, &orig_to_copy);

    for (NFAVertex v : vertices_range(g)) {
        if (!is_virtual_start(v, g)) {
            continue;
        }
        NFAVertex c_v = orig_to_copy[v];
        orig_to_copy[v] = c_g.startDs;
        for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
            add_edge_if_not_present(c_g.startDs, c_w, c_g);
        }
        clear_vertex(c_v, c_g);
    }

    NFAVertex c_u = orig_to_copy[u];
    clear_in_edges(c_g.acceptEod, c_g);
    add_edge(c_g.accept, c_g.acceptEod, c_g);
    clear_in_edges(c_g.accept, c_g);
    clear_out_edges(c_u, c_g);
    if (hasSelfLoop(u, g)) {
        add_edge(c_u, c_u, c_g);
    }
    add_edge(c_u, c_g.accept, c_g);

    set<NFAVertex> u_succ;
    insert(&u_succ, adjacent_vertices(u, g));
    u_succ.erase(u);

    for (auto t : inv_adjacent_vertices_range(u, g)) {
        if (t == u) {
            continue;
        }
        for (auto v : adjacent_vertices_range(t, g)) {
            if (contains(u_succ, v)) {
                add_edge(orig_to_copy[t], c_g.accept, c_g);
                break;
            }
        }
    }

    pruneUseless(c_g);

    be.clear();
    depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start).
                       vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g)));

    for (const auto &e : be) {
        NFAVertex s = source(e, c_g);
        NFAVertex t = target(e, c_g);
        DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index);
        if (s != t) {
            assert(0);
            DEBUG_PRINTF("eek big cycle\n");
            rv = true; /* big cycle -> eek */
            goto exit;
        }
    }

    DEBUG_PRINTF("checking acyclic+selfloop graph\n");

    rv = !firstMatchIsFirst(c_g);
    DEBUG_PRINTF("som may regress? %d\n", (int)rv);
    goto exit;
}
Example #30
0
bool sentClearsTail(const NGHolder &g,
                    const ue2::unordered_map<NFAVertex, u32> &region_map,
                    const NGHolder &sent, u32 last_head_region,
                    u32 *bad_region) {
    /* if a subsequent match from the prefix clears the rest of the pattern
     * we can just keep track of the last match of the prefix.
     * To see if this property holds, we could:
     *
     * 1A: turn on all states in the tail and run all strings that may
     *    match the prefix past the tail, if we are still in any states then
     *    this property does not hold.
     *
     * 1B: we turn on the initial states of the tail and run any strings which
     *   may finish any partial matches in the prefix and see if we end up with
     *   anything which would also imply that this property does not hold.
     *
     * OR
     *
     * 2: we just turn everything and run the prefix inputs past it and see what
     * we are left with. I think that is equivalent to scheme 1 and is easier to
     * implement. TODO: ponder
     *
     * Anyway, we are going with scheme 2 until further notice.
     */

    u32 first_bad_region = ~0U;
    set<NFAVertex> states;
    /* turn on all states */
    DEBUG_PRINTF("region %u is cutover\n", last_head_region);
    for (auto v : vertices_range(g)) {
        if (v != g.accept && v != g.acceptEod) {
            states.insert(v);
        }
    }

    for (UNUSED auto v : states) {
        DEBUG_PRINTF("start state: %u\n", g[v].index);
    }

    /* run the prefix the main graph */
    execute_graph(g, sent, &states);

    /* .. and check if we are left with anything in the tail region */
    for (auto v : states) {
        if (v == g.start || v == g.startDs) {
            continue; /* not in tail */
        }

        DEBUG_PRINTF("v %u is still on\n", g[v].index);
        assert(v != g.accept && v != g.acceptEod); /* no cr */

        assert(contains(region_map, v));
        const u32 v_region = region_map.at(v);
        if (v_region > last_head_region) {
            DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region);
            first_bad_region = min(first_bad_region, v_region);
        }
    }

    if (first_bad_region != ~0U) {
        DEBUG_PRINTF("first bad region is %u\n", first_bad_region);
        *bad_region = first_bad_region;
        return false;
    }

    return true;
}