/** Remove edges to accepts that can never produce a match long enough to * satisfy our min_length and max_offset constraints. */ static void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) { if (!g.min_length) { return; } vector<DepthMinMax> depths = getDistancesFromSOM(g); pruneUnmatchable(g, depths, rm, g.accept); pruneUnmatchable(g, depths, rm, g.acceptEod); pruneUseless(g); }
/** \brief Find the (min, max) length of any match for the given holder. */ static DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) { DepthMinMax match_depths; vector<DepthMinMax> depths = getDistancesFromSOM(g); pair<s32, s32> adj; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { u32 idx = g[v].index; DepthMinMax d = depths[idx]; // copy adj = getMinMaxOffsetAdjust(rm, g, v); DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx, d.str().c_str(), adj.first, adj.second); d.min += adj.first; d.max += adj.second; match_depths = unionDepthMinMax(match_depths, d); } for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { if (v == g.accept) { continue; } u32 idx = g[v].index; DepthMinMax d = depths[idx]; // copy adj = getMinMaxOffsetAdjust(rm, g, v); DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx, d.str().c_str(), adj.first, adj.second); d.min += adj.first; d.max += adj.second; match_depths = unionDepthMinMax(match_depths, d); } DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str()); assert(match_depths.min.is_reachable()); assert(match_depths.max.is_reachable()); return match_depths; }
map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) { map<NFAVertex, NFAStateSet> squash; // Number of bits to use for all our masks. If we're a triggered graph, // tops have already been assigned, so we don't have to account for them. const u32 numStates = num_vertices(g); // Build post-dominator tree. PostDomTree pdom_tree; buildPDomTree(g, pdom_tree); // Build list of vertices by state ID and a set of init states. vector<NFAVertex> vByIndex(numStates, NFAGraph::null_vertex()); NFAStateSet initStates(numStates); smgb_cache cache(g); // Mappings used for SOM mode calculations, otherwise left empty. unordered_map<NFAVertex, u32> region_map; vector<DepthMinMax> som_depths; if (som) { region_map = assignRegions(g); som_depths = getDistancesFromSOM(g); } for (auto v : vertices_range(g)) { const u32 vert_id = g[v].index; DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates); assert(vert_id < numStates); vByIndex[vert_id] = v; if (is_any_start(v, g) || !in_degree(v, g)) { initStates.set(vert_id); } } for (u32 i = 0; i < numStates; i++) { NFAVertex v = vByIndex[i]; assert(v != NFAGraph::null_vertex()); const CharReach &cr = g[v].char_reach; /* only non-init cyclics can be squashers */ if (!hasSelfLoop(v, g) || initStates.test(i)) { continue; } DEBUG_PRINTF("state %u is cyclic\n", i); NFAStateSet mask(numStates), succ(numStates), pred(numStates); buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som, som_depths, region_map, cache); buildSucc(succ, g, v); buildPred(pred, g, v); const auto &reports = g[v].reports; for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) { NFAVertex vj = vByIndex[j]; NFAStateSet pred2(numStates); buildPred(pred2, g, vj); if (pred2 == pred) { DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i); NFAStateSet tmp(numStates); buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, som, som_depths, region_map, cache); mask &= tmp; } } for (size_t j = pred.find_first(); j != pred.npos; j = pred.find_next(j)) { NFAVertex vj = vByIndex[j]; NFAStateSet succ2(numStates); buildSucc(succ2, g, vj); /* we can use j as a basis for squashing if its succs are a subset * of ours */ if ((succ2 & ~succ).any()) { continue; } if (som) { /* We cannot use j to add to the squash mask of v if it may * have an earlier start of match offset. ie for us j as a * basis for the squash mask of v we require: * maxSomDist(j) <= minSomDist(v) */ /* ** TODO ** */ const depth &max_som_dist_j = som_depths[g[vj].index].max; const depth &min_som_dist_v = som_depths[g[v].index].min; if (max_som_dist_j > min_som_dist_v || max_som_dist_j.is_infinite()) { /* j can't be used as it may be storing an earlier SOM */ continue; } } const CharReach &crv = g[vj].char_reach; /* we also require that j's report information be a subset of ours */ bool seen_special = false; for (auto w : adjacent_vertices_range(vj, g)) { if (is_special(w, g)) { if (!edge(v, w, g).second) { goto next_j; } seen_special = true; } } // FIXME: should be subset check? if (seen_special && g[vj].reports != reports) { continue; } /* ok we can use j */ if ((crv & ~cr).none()) { NFAStateSet tmp(numStates); buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, som, som_depths, region_map, cache); mask &= tmp; mask.reset(j); } next_j:; } mask.set(i); /* never clear ourselves */ if ((~mask).any()) { // i.e. some bits unset in mask DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count()); squash.emplace(v, mask); } } findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som, som_depths, region_map, cache); clearMutualSquashers(g, vByIndex, squash); return squash; }