// Ugly but simple. string make_pattern() { std::ostringstream oss; oss << "^["; for (size_t i = reach.find_first(); i != CharReach::npos; i = reach.find_next(i)) { oss << "\\x" << std::hex << std::setw(2) << std::setfill('0') << (unsigned)(i & 0xff) << std::dec; } oss << "]{" << min << "," << max << "}"; return oss.str(); }
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then * build an eight-bit mask per character C, with each bit representing the * depth before the location of character C (if encountered) that the NFA would * be in a predictable start state. */ vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) { const depth max_depth(MAX_STOP_DEPTH); const InitDepths depths(g); const map<NFAVertex, BoundedRepeatSummary> no_vertices; vector<CharReach> reach(MAX_STOP_DEPTH); for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } CharReach v_cr; if (som == SOM_NONE) { v_cr = reduced_cr(v, g, no_vertices); } else { v_cr = g[v].char_reach; } u32 d = min(max_depth, depths.maxDist(g, v)); for (u32 i = 0; i < d; i++) { reach[i] |= v_cr; } } #ifdef DEBUG for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { DEBUG_PRINTF("depth %u, stop chars: ", i); describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT); printf("\n"); } #endif vector<u8> stop(N_CHARS, 0); for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { CharReach cr = ~reach[i]; // invert reach for stop chars. const u8 mask = 1U << i; for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) { stop[c] |= mask; } } return stop; }
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle, UNUSED som_type som) { const depth max_width = findMaxWidth(castle); DEBUG_PRINTF("castle has reach %s and max width %s\n", describeClass(castle.reach()).c_str(), max_width.str().c_str()); const CharReach escape = ~castle.reach(); // invert reach for stop chars. u32 d = min(max_width, depth(MAX_STOP_DEPTH)); const u8 mask = verify_u8((1U << d) - 1); vector<u8> stop(N_CHARS, 0); for (size_t c = escape.find_first(); c != escape.npos; c = escape.find_next(c)) { stop[c] |= mask; } return stop; }
TEST(ng_charreach, count) { CharReach cr; cr.set(1); cr.set(2); cr.set('a'); cr.set('Z'); cr.set('m'); cr.set('~'); cr.set(210); size_t n = cr.find_first(); ASSERT_FALSE(n == CharReach::npos); unsigned int i = 0; while (n != CharReach::npos) { i++; n = cr.find_next(n); } ASSERT_EQ(i, cr.count()); }