static size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) { size_t offset = 0; for (size_t i = 0; i + 1 < len; i++) { int diff = 0; const char c = lit[i]; const char d = lit[i + 1]; if (nocase && ourisalpha(c)) { diff = (mytoupper(c) != mytoupper(d)); } else { diff = (c != d); } offset = i; if (diff) { break; } } return offset; }
bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk, const vector<u8> &cmp) { string::const_reverse_iterator si = s.rbegin(); vector<u8>::const_reverse_iterator mi = msk.rbegin(), ci = cmp.rbegin(); for (; si != s.rend() && mi != msk.rend(); ++si, ++mi, ++ci) { u8 c = *si, m = *mi, v = *ci; if (nocase && ourisalpha(c)) { m &= ~CASE_BIT; v &= ~CASE_BIT; } assert(ci != cmp.rend()); if ((c & m) != v) { DEBUG_PRINTF("c = %02hhx; *ci = %02hhx m =%02hhx\n", c, *ci, m); DEBUG_PRINTF("s = %s; dist = %zd\n", s.c_str(), si - s.rbegin()); return false; } } return true; }
static size_t findNoodFragOffset(const hwlmLiteral &lit) { const auto &s = lit.s; const size_t len = lit.s.length(); size_t offset = 0; for (size_t i = 0; i + 1 < len; i++) { int diff = 0; const char c = s[i]; const char d = s[i + 1]; if (lit.nocase && ourisalpha(c)) { diff = (mytoupper(c) != mytoupper(d)); } else { diff = (c != d); } offset = i; if (diff) { break; } } return offset; }
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits, const EngineDescription &eng) { vector<FDRFlood> tmpFlood(N_CHARS); u32 default_suffix = eng.getDefaultFloodSuffixLength(); // zero everything to avoid spurious distinctions in the compares memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood)); for (u32 c = 0; c < N_CHARS; c++) { tmpFlood[c].suffix = default_suffix; } for (const auto &lit : lits) { DEBUG_PRINTF("lit: '%s'%s\n", escapeString(lit.s).c_str(), lit.nocase ? " (nocase)" : ""); u32 litSize = verify_u32(lit.s.size()); u32 maskSize = (u32)lit.msk.size(); u8 c = lit.s[litSize - 1]; bool nocase = ourisalpha(c) ? lit.nocase : false; if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) { c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c); nocase = false; } u32 iEnd = MAX(litSize, maskSize); u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length // for case-less, or as a suffix length for case-sensitive; u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix // length; for (u32 i = 0; i < iEnd; i++) { if (i < litSize) { if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) { DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n", i, c, lit.s[litSize - i - 1]); upSuffix = MIN(upSuffix, i); loSuffix = MIN(loSuffix, i); // makes sense only for case-less break; } } if (i < maskSize) { u8 m = lit.msk[maskSize - i - 1]; u8 cm = lit.cmp[maskSize - i - 1] & m; if(nocase) { if ((mytoupper(c) & m) != cm) { DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, mytoupper(c), cm); upSuffix = MIN(upSuffix, i); } if ((mytolower(c) & m) != cm) { DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, mytolower(c), cm); loSuffix = MIN(loSuffix, i); } if (loSuffix != iEnd && upSuffix != iEnd) { break; } } else if ((c & m) != cm) { DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, c, cm); upSuffix = MIN(upSuffix, i); break; } } } if(upSuffix != iEnd) { updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix); } else { addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix); } if (nocase) { if(loSuffix != iEnd) { updateFloodSuffix(tmpFlood, mytolower(c), loSuffix); } else { addFlood(tmpFlood, mytolower(c), lit, loSuffix); } } } #ifdef DEBUG for (u32 i = 0; i < N_CHARS; i++) { FDRFlood &fl = tmpFlood[i]; if (!fl.idCount) { continue; } printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is " "%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups); for (u32 j = 0; j < fl.idCount; j++) { printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j, fl.groups[j], fl.len[j]); } } #endif map<FDRFlood, CharReach, FloodComparator> flood2chars; for (u32 i = 0; i < N_CHARS; i++) { FDRFlood fl = tmpFlood[i]; flood2chars[fl].set(i); } u32 nDistinctFloods = flood2chars.size(); size_t floodHeaderSize = sizeof(u32) * N_CHARS; size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods; size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize); u8 *buf = (u8 *)aligned_zmalloc(totalSize); assert(buf); // otherwise would have thrown std::bad_alloc u32 *floodHeader = (u32 *)buf; FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize); u32 currentFloodIndex = 0; for (const auto &m : flood2chars) { const FDRFlood &fl = m.first; const CharReach &cr = m.second; layoutFlood[currentFloodIndex] = fl; for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) { floodHeader[c] = currentFloodIndex; } currentFloodIndex++; } DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n", floodHeaderSize, floodStructSize, totalSize); return make_pair((u8 *)buf, totalSize); }
static bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) { const hwlmLiteral &first = *lits.front(); struct candidate { candidate(void) : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} candidate(const hwlmLiteral &base, u32 offset) : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), b5insens(false), valid(true) {} char c1; char c2; u32 max_offset; bool b5insens; bool valid; bool operator>(const candidate &other) const { if (!valid) { return false; } if (!other.valid) { return true; } if (other.cdiffers() && !cdiffers()) { return false; } if (!other.cdiffers() && cdiffers()) { return true; } if (!other.b5insens && b5insens) { return false; } if (other.b5insens && !b5insens) { return true; } if (max_offset > other.max_offset) { return false; } return true; } bool cdiffers(void) const { if (!b5insens) { return c1 != c2; } return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); } }; candidate best; for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { candidate curr(first, i); /* check to see if this pair appears in each string */ for (const auto &lit_ptr : lits) { const hwlmLiteral &lit = *lit_ptr; if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { curr.b5insens = true; /* no choice but to be case insensitive */ } bool found = false; bool found_nc = false; for (u32 j = 0; !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); if (curr.b5insens) { found = found_nc; } } if (!curr.b5insens && !found && found_nc) { curr.b5insens = true; found = true; } if (!found) { goto next_candidate; } } /* check to find the max offset where this appears */ for (const auto &lit_ptr : lits) { const hwlmLiteral &lit = *lit_ptr; for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { bool found = false; if (curr.b5insens) { found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); } else { found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; } if (found) { curr.max_offset = MAX(curr.max_offset, j); break; } } } if (curr > best) { best = curr; } next_candidate:; } if (!best.valid) { return false; } aux->dverm.offset = verify_u8(best.max_offset); if (!best.b5insens) { aux->dverm.accel_type = ACCEL_DVERM; aux->dverm.c1 = best.c1; aux->dverm.c2 = best.c2; DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", aux->dverm.c1, aux->dverm.c2); } else { aux->dverm.accel_type = ACCEL_DVERM_NOCASE; aux->dverm.c1 = best.c1 & CASE_CLEAR; aux->dverm.c2 = best.c2 & CASE_CLEAR; DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", aux->dverm.c1, aux->dverm.c2); } return true; }