// Ugly but simple. string make_pattern() { std::ostringstream oss; oss << "^["; for (size_t i = reach.find_first(); i != CharReach::npos; i = reach.find_next(i)) { oss << "\\x" << std::hex << std::setw(2) << std::setfill('0') << (unsigned)(i & 0xff) << std::dec; } oss << "]{" << min << "," << max << "}"; return oss.str(); }
TEST(ng_charreach, setRange) { // Exhaustive test: every possible contiguous range. for (unsigned range = 0; range < 256; range++) { for (unsigned from = 0; from < 256 - range; from++) { unsigned to = from + range; CharReach cr; cr.setRange(from, to); ASSERT_EQ(from, cr.find_first()); ASSERT_EQ(to, cr.find_last()); ASSERT_EQ(range + 1, cr.count()); } } }
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then * build an eight-bit mask per character C, with each bit representing the * depth before the location of character C (if encountered) that the NFA would * be in a predictable start state. */ vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) { const depth max_depth(MAX_STOP_DEPTH); const InitDepths depths(g); const map<NFAVertex, BoundedRepeatSummary> no_vertices; vector<CharReach> reach(MAX_STOP_DEPTH); for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } CharReach v_cr; if (som == SOM_NONE) { v_cr = reduced_cr(v, g, no_vertices); } else { v_cr = g[v].char_reach; } u32 d = min(max_depth, depths.maxDist(g, v)); for (u32 i = 0; i < d; i++) { reach[i] |= v_cr; } } #ifdef DEBUG for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { DEBUG_PRINTF("depth %u, stop chars: ", i); describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT); printf("\n"); } #endif vector<u8> stop(N_CHARS, 0); for (u32 i = 0; i < MAX_STOP_DEPTH; i++) { CharReach cr = ~reach[i]; // invert reach for stop chars. const u8 mask = 1U << i; for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) { stop[c] |= mask; } } return stop; }
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle, UNUSED som_type som) { const depth max_width = findMaxWidth(castle); DEBUG_PRINTF("castle has reach %s and max width %s\n", describeClass(castle.reach()).c_str(), max_width.str().c_str()); const CharReach escape = ~castle.reach(); // invert reach for stop chars. u32 d = min(max_width, depth(MAX_STOP_DEPTH)); const u8 mask = verify_u8((1U << d) - 1); vector<u8> stop(N_CHARS, 0); for (size_t c = escape.find_first(); c != escape.npos; c = escape.find_next(c)) { stop[c] |= mask; } return stop; }
TEST(ng_charreach, count) { CharReach cr; cr.set(1); cr.set(2); cr.set('a'); cr.set('Z'); cr.set('m'); cr.set('~'); cr.set(210); size_t n = cr.find_first(); ASSERT_FALSE(n == CharReach::npos); unsigned int i = 0; while (n != CharReach::npos) { i++; n = cr.find_next(n); } ASSERT_EQ(i, cr.count()); }
static aligned_unique_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { const CharReach escapes(~cr); if (escapes.count() != 1) { return nullptr; } enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); aligned_unique_ptr<NFA> nfa = makeLbrNfa<lbr_verm>(LBR_NFA_Verm, rtype, repeatMax); struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); lv->c = escapes.find_first(); fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax, minPeriod, rtype); DEBUG_PRINTF("built verm lbr\n"); return nfa; }