/** Find the set of characters that are not present in the reachability of * graph \p g after a certain depth (currently 8). If a character in this set * is encountered, it means that the NFA is either dead or has not progressed * more than 8 characters from its start states. */ CharReach findStopAlphabet(const NGHolder &g, som_type som) { const depth max_depth(MAX_STOP_DEPTH); const InitDepths depths(g); const map<NFAVertex, BoundedRepeatSummary> no_vertices; CharReach stopcr; for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } if (depths.maxDist(g, v) >= max_depth) { if (som == SOM_NONE) { stopcr |= reduced_cr(v, g, no_vertices); } else { stopcr |= g[v].char_reach; } } } // Turn alphabet into stops. stopcr.flip(); return stopcr; }
TEST(ng_charreach, flip) { CharReach cr; ASSERT_EQ(0U, cr.count()); ASSERT_TRUE(cr.none()); cr.flip(); ASSERT_EQ(cr.size(), cr.count()); ASSERT_TRUE(cr.all()); cr.flip(); ASSERT_EQ(0U, cr.count()); ASSERT_TRUE(cr.none()); cr.flip(25); ASSERT_FALSE(cr.none()); ASSERT_FALSE(cr.all()); ASSERT_EQ(1U, cr.count()); cr.flip(); ASSERT_EQ(cr.size() - 1, cr.count()); }
TEST(ng_charreach, string) { CharReach cr; cr.set(1); cr.set(2); cr.set('a'); cr.set('Z'); cr.set('m'); cr.set('~'); cr.set(210); ASSERT_FALSE(cr.isAlpha()); cr.flip(1); cr.flip(2); cr.flip('~'); cr.flip(210); ASSERT_TRUE(cr.isAlpha()); ASSERT_EQ("Zam", cr.to_string()); }
void AsciiComponentClass::add(PredefinedClass c, bool negative) { if (in_cand_range) { // can't form a range here throw LocatedParseError("Invalid range in character class"); } DEBUG_PRINTF("getting %u %s\n", (u32)c, negative ? "^" : ""); if (mode.ucp) { c = translateForUcpMode(c, mode); } // Note: caselessness is handled by getPredefinedCharReach. CharReach pcr = getPredefinedCharReach(c, mode); if (negative) { pcr.flip(); } cr |= pcr; range_start = INVALID_UNICODE; in_cand_range = false; }