TEST(ng_charreach, find_nth) { const size_t npos = CharReach::npos; // One bit cases. for (size_t i = 0; i < 256; i++) { CharReach cr((unsigned char)i); ASSERT_EQ(i, cr.find_nth(0)); ASSERT_EQ(npos, cr.find_nth(1)); } // All bits set. CharReach dot = CharReach::dot(); for (size_t i = 0; i < 256; i++) { ASSERT_EQ(i, dot.find_nth(i)); } // Trivial two bit cases. for (size_t i = 0; i < 128; i++) { CharReach cr; cr.set(i); cr.set(256 - i); ASSERT_EQ(i, cr.find_nth(0)); ASSERT_EQ(256 - i, cr.find_nth(1)); ASSERT_EQ(npos, cr.find_nth(3)); } // More complex case. const std::string str("\x01\x02\x03\x05\x06\x20!#$%&./0123568:;ABCDEFMNOPUYZbcdefwxyz"); CharReach cr(str); for (size_t i = 0; i < str.length(); i++) { ASSERT_EQ(str[i], cr.find_nth(i)); } ASSERT_EQ(npos, cr.find_nth(str.length())); }
TEST(ng_charreach, caseless) { CharReach cr; cr.set('a'); ASSERT_FALSE(cr.isCaselessChar()); cr.set('A'); ASSERT_TRUE(cr.isCaselessChar()); cr.set('b'); ASSERT_FALSE(cr.isCaselessChar()); cr.set('B'); ASSERT_FALSE(cr.isCaselessChar()); }
TEST(ng_charreach, assignment) { CharReach cr; cr.set('f'); cr.set('l'); cr.set('y'); CharReach cr2; cr2 = cr; ASSERT_EQ(cr.count(), cr2.count()); ASSERT_TRUE(cr == cr2); }
TEST(ng_charreach, alpha) { CharReach cr; ASSERT_EQ(0U, cr.count()); ASSERT_FALSE(cr.isAlpha()); cr.set('a'); ASSERT_FALSE(0 == cr.count()); ASSERT_TRUE(cr.isAlpha()); cr.set('A'); cr.set('b'); cr.set('z'); ASSERT_TRUE(cr.isAlpha()); cr.set(1); ASSERT_FALSE(cr.isAlpha()); }
TEST(ReverseTruffle, ExecMatch3) { m128 mask1, mask2; CharReach chars; chars.set('a'); chars.set('B'); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaBbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; size_t len = strlen(t1); for (size_t i = 0; i < 16; i++) { const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i); ASSERT_NE((const u8 *)t1 - 1, rv); // not found EXPECT_EQ('B', (char)*rv); ASSERT_EQ((const u8 *)t1 + 32, rv); } // check that we match the 'a' bytes as well. ASSERT_EQ('B', t1[32]); t1[32] = 'b'; for (size_t i = 0; i < 16; i++) { const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i); ASSERT_NE((const u8 *)t1 - 1, rv); // not found EXPECT_EQ('a', (char)*rv); ASSERT_EQ((const u8 *)t1 + 31, rv); } }
TEST(ng_charreach, bitwise) { CharReach cr; CharReach cr2; CharReach cr3; CharReach cr4; cr.set('a'); cr2.set('z'); cr3.set('a'); cr3.set('z'); ASSERT_TRUE(cr < cr3); cr4 |= cr; cr4 |= cr2; ASSERT_TRUE(cr3 == cr4); ASSERT_TRUE(cr3 == (cr | cr2)); ASSERT_TRUE(cr4 == (cr | cr2)); ASSERT_TRUE(cr == (cr & cr3)); ASSERT_TRUE(cr2 == (cr2 & cr3)); cr3 &= cr; ASSERT_FALSE(cr3.test('z')); }
void describeEdge(FILE *f, const u16 *t, u16 i) { for (u16 s = 0; s < N_CHARS; s++) { if (!t[s]) { continue; } u16 ss; for (ss = 0; ss < s; ss++) { if (t[s] == t[ss]) { break; } } if (ss != s) { continue; } CharReach reach; for (ss = s; ss < 256; ss++) { if (t[s] == t[ss]) { reach.set(ss); } } fprintf(f, "%u -> %u [ label = \"", i, t[s]); describeClass(f, reach, 5, CC_OUT_DOT); fprintf(f, "\" ];\n"); } }
static void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) { if (rdfa.states.size() < 2) { return; } ue2::flat_set<dstate_id_t> curr, next; curr.insert(rdfa.start_anchored); for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) { next.clear(); CharReach cr; for (const auto state_id : curr) { const dstate &ds = rdfa.states[state_id]; if (!ds.reports.empty() || !ds.reports_eod.empty()) { return; } for (unsigned c = 0; c < N_CHARS; c++) { dstate_id_t succ = ds.next[rdfa.alpha_remap[c]]; if (succ != DEAD_STATE) { cr.set(c); next.insert(succ); } } } assert(cr.any()); look[i] |= cr; curr.swap(next); } }
static void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) { for (u16 s = 0; s < N_CHARS; s++) { if (!t[s]) { continue; } u16 ss; for (ss = 0; ss < s; ss++) { if (t[s] == t[ss]) { break; } } if (ss != s) { continue; } CharReach reach; for (ss = s; ss < 256; ss++) { if (t[s] == t[ss]) { reach.set(ss); } } fprintf(f, "%u -> %u [ ", i, t[s]); if (i < m->sheng_end && t[s] < m->sheng_end) { fprintf(f, "color = red, fontcolor = red "); } fprintf(f, "label = \""); describeClass(f, reach, 5, CC_OUT_DOT); fprintf(f, "\" ];\n"); } }
TEST(ng_charreach, copy) { CharReach cr; cr.set('a'); cr.set('z'); CharReach cr2(cr); ASSERT_EQ(cr.count(), cr2.count()); ASSERT_TRUE(cr == cr2); }
TEST(ng_charreach, string) { CharReach cr; cr.set(1); cr.set(2); cr.set('a'); cr.set('Z'); cr.set('m'); cr.set('~'); cr.set(210); ASSERT_FALSE(cr.isAlpha()); cr.flip(1); cr.flip(2); cr.flip('~'); cr.flip(210); ASSERT_TRUE(cr.isAlpha()); ASSERT_EQ("Zam", cr.to_string()); }
TEST(ng_charreach, set) { CharReach cr; ASSERT_EQ(0U, cr.count()); ASSERT_TRUE(cr.none()); ASSERT_FALSE(cr.all()); cr.set('q'); ASSERT_EQ(1U, cr.count()); cr.setall(); ASSERT_EQ(cr.size(), cr.count()); ASSERT_TRUE(cr.all()); }
TEST(ng_charreach, clear) { CharReach cr; ASSERT_EQ(0U, cr.count()); ASSERT_TRUE(cr.none()); ASSERT_FALSE(cr.all()); cr.set('q'); cr.set('u'); cr.set('a'); cr.set('r'); cr.set('k'); ASSERT_EQ(5U, cr.count()); cr.clear('r'); ASSERT_EQ(4U, cr.count()); ASSERT_FALSE(cr.test('r')); cr.setall(); ASSERT_EQ(cr.size(), cr.count()); ASSERT_TRUE(cr.all()); cr.clear(0xff); ASSERT_FALSE(cr.all()); }
TEST(ng_charreach, count) { CharReach cr; cr.set(1); cr.set(2); cr.set('a'); cr.set('Z'); cr.set('m'); cr.set('~'); cr.set(210); size_t n = cr.find_first(); ASSERT_FALSE(n == CharReach::npos); unsigned int i = 0; while (n != CharReach::npos) { i++; n = cr.find_next(n); } ASSERT_EQ(i, cr.count()); }
TEST(ReverseTruffle, ExecMiniMatch0) { m128 lo, hi; CharReach chars; chars.set('a'); truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "a"; const u8 *rv = rtruffleExec(lo, hi, (u8 *)t1, (u8 *)t1 + strlen(t1)); ASSERT_EQ((size_t)t1, (size_t)rv); }
TEST(Truffle, ExecMiniMatch3) { m128 lo, hi; CharReach chars; chars.set('a'); truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "\0\0\0\0\0\0\0a\0\0\0"; const u8 *rv = truffleExec(lo, hi, (u8 *)t1, (u8 *)t1 + 11); ASSERT_EQ((size_t)t1 + 7, (size_t)rv); }
TEST(Truffle, CompileChars) { m128 mask1, mask2; CharReach chars; // test one char at a time for (u32 c = 0; c < 256; ++c) { mask1 = zeroes128(); mask2 = zeroes128(); chars.clear(); chars.set((u8)c); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); } // set all chars up to dot for (u32 c = 0; c < 256; ++c) { mask1 = zeroes128(); mask2 = zeroes128(); chars.set((u8)c); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); } // unset all chars from dot for (u32 c = 0; c < 256; ++c) { mask1 = zeroes128(); mask2 = zeroes128(); chars.clear((u8)c); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); } }
TEST(Truffle, ExecMatch4) { m128 mask1, mask2; CharReach chars; chars.set('a'); chars.set('C'); chars.set('A'); chars.set('c'); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; char t2[] = "bbbbbbbbbbbbbbbbbCaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; char t3[] = "bbbbbbbbbbbbbbbbbcaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; char t4[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; for (size_t i = 0; i < 16; i++) { const u8 *rv = truffleExec(mask1, mask2, (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); ASSERT_EQ((size_t)t1 + 17, (size_t)rv); rv = truffleExec(mask1, mask2, (u8 *)t2 + i, (u8 *)t2 + strlen(t1)); ASSERT_EQ((size_t)t2 + 17, (size_t)rv); rv = truffleExec(mask1, mask2, (u8 *)t3 + i, (u8 *)t3 + strlen(t3)); ASSERT_EQ((size_t)t3 + 17, (size_t)rv); rv = truffleExec(mask1, mask2, (u8 *)t4 + i, (u8 *)t4 + strlen(t4)); ASSERT_EQ((size_t)t4 + 17, (size_t)rv); } }
TEST(ReverseTruffle, ExecMatch4) { m128 mask1, mask2; CharReach chars; chars.set('a'); chars.set('C'); chars.set('A'); chars.set('c'); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaAbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; char t2[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaCbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; char t3[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaacbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; char t4[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbb"; size_t len = strlen(t1); for (size_t i = 0; i < 16; i++) { const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i); EXPECT_EQ('A', (char)*rv); ASSERT_EQ((const u8 *)t1 + 32, rv); rv = rtruffleExec(mask1, mask2, (u8 *)t2, (u8 *)t2 + len - i); EXPECT_EQ('C', (char)*rv); ASSERT_EQ((const u8 *)t2 + 32, rv); rv = rtruffleExec(mask1, mask2, (u8 *)t3, (u8 *)t3 + len - i); EXPECT_EQ('c', (char)*rv); ASSERT_EQ((const u8 *)t3 + 32, rv); rv = rtruffleExec(mask1, mask2, (u8 *)t4, (u8 *)t4 + len - i); EXPECT_EQ('a', (char)*rv); ASSERT_EQ((const u8 *)t4 + 32, rv); } }
u32 mcclellanStartReachSize(const raw_dfa *raw) { if (raw->states.size() < 2) { return 0; } const dstate &ds = raw->states[raw->start_anchored]; CharReach out; for (unsigned i = 0; i < N_CHARS; i++) { if (ds.next[raw->alpha_remap[i]] != DEAD_STATE) { out.set(i); } } return out.count(); }
TEST(ReverseTruffle, ExecNoMatch3) { m128 mask1, mask2; CharReach chars; chars.set('V'); /* V = 0x56, e = 0x65 */ truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; char *t1 = t + 1; size_t len = strlen(t1); for (size_t i = 0; i < 16; i++) { const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i); ASSERT_EQ((const u8 *)t, rv); } }
TEST(ReverseTruffle, ExecMiniMatch2) { m128 mask1, mask2; CharReach chars; chars.set('a'); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "babbbbbabbbb"; size_t len = strlen(t1); const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len); ASSERT_NE((const u8 *)t1 - 1, rv); // not found EXPECT_EQ('a', (char)*rv); ASSERT_EQ((const u8 *)t1 + 7, rv); }
TEST(Truffle, ExecNoMatch3) { m128 mask1, mask2; CharReach chars; chars.set('V'); /* V = 0x56, e = 0x65 */ truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; for (size_t i = 0; i < 16; i++) { const u8 *rv = truffleExec(mask1, mask2, (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); ASSERT_EQ((size_t)t1 + strlen(t1), (size_t)rv); } }
TEST(Truffle, ExecMatchBig) { m128 lo, hi; CharReach chars; chars.set('a'); truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); std::array<u8, 400> t1; t1.fill('b'); t1[120] = 'a'; for (size_t i = 0; i < 16; i++) { const u8 *rv = truffleExec(lo, hi, (u8 *)t1.data() + i, (u8 *)t1.data() + 399); ASSERT_LE(((size_t)t1.data() + 120) & ~0xf, (size_t)rv); } }
TEST(Truffle, ExecMatch2) { m128 mask1, mask2; CharReach chars; chars.set('a'); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; for (size_t i = 0; i < 16; i++) { const u8 *rv = truffleExec(mask1, mask2, (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); ASSERT_EQ((size_t)t1 + 17, (size_t)rv); } }
TEST(Truffle, ExecMatch5) { m128 mask1, mask2; CharReach chars; chars.set('a'); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; for (size_t i = 0; i < 31; i++) { t1[48 - i] = 'a'; const u8 *rv = truffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + strlen(t1)); ASSERT_EQ((size_t)&t1[48 - i], (size_t)rv); } }
TEST(ng_charreach, caseless2) { // Test every pair of characters. for (size_t i = 0; i < 256; i++) { ASSERT_FALSE(CharReach((unsigned char)i).isCaselessChar()); for (size_t j = 0; j < 256; j++) { CharReach cr; cr.set(i); cr.set(j); bool upper_lower = (i >= 'A' && i <= 'Z') && j == i + 0x20; bool lower_upper = (i >= 'a' && i <= 'z') && i == j + 0x20; bool caseless_pair = upper_lower | lower_upper; ASSERT_EQ(caseless_pair, cr.isCaselessChar()) << "Failed for i=" << i << ", j=" << j; } } }
TEST(ReverseTruffle, ExecMatch5) { m128 mask1, mask2; CharReach chars; chars.set('a'); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; size_t len = strlen(t1); for (size_t i = 0; i < len; i++) { t1[i] = 'a'; const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len); ASSERT_EQ((const u8 *)t1 + i, rv); } }
TEST(Truffle, ExecNoMatch1) { m128 mask1, mask2; memset(&mask1, 0, sizeof(mask1)); memset(&mask2, 0, sizeof(mask2)); CharReach chars; chars.set('a'); truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\xff"; for (size_t i = 0; i < 16; i++) { const u8 *rv = truffleExec(mask1, mask2, (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); ASSERT_EQ((size_t)t1 + strlen(t1), (size_t)rv); } }
TEST(ng_charreach, find_last) { CharReach cr; cr.set('a'); ASSERT_EQ(cr.find_last(), (size_t)'a'); cr.set('b'); ASSERT_EQ(cr.find_last(), (size_t)'b'); cr.set(192); ASSERT_EQ(cr.find_last(), (size_t)192); cr.set(207); ASSERT_EQ(cr.find_last(), (size_t)207); cr.set(223); ASSERT_EQ(cr.find_last(), (size_t)223); cr.set(255); ASSERT_EQ(cr.find_last(), (size_t)255); cr.clear(); ASSERT_EQ(cr.find_last(), cr.size()); cr.set(0); ASSERT_EQ(cr.find_last(), (size_t)0); cr.set(1); ASSERT_EQ(cr.find_last(), (size_t)1); }