/** * Take in a collection of exclusive sub engines and produces a tamarama, also * returns via out_top_remap, a mapping indicating how tops in the subengines in * relate to the tamarama's tops. */ aligned_unique_ptr<NFA> buildTamarama(const TamaInfo &tamaInfo, const u32 queue, map<pair<const NFA *, u32>, u32> &out_top_remap) { vector<u32> top_base; remapTops(tamaInfo, top_base, out_top_remap); size_t subSize = tamaInfo.subengines.size(); DEBUG_PRINTF("subSize:%lu\n", subSize); size_t total_size = sizeof(NFA) + // initial NFA structure sizeof(Tamarama) + // Tamarama structure sizeof(u32) * subSize + // base top event value for subengines, // used for top remapping at runtime sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and // padding for subengines for (const auto &sub : tamaInfo.subengines) { total_size += ROUNDUP_CL(sub->length); } // use subSize as a sentinel value for no active subengines, // so add one to subSize here u32 activeIdxSize = calcPackedBytes(subSize + 1); aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size); nfa->type = verify_u8(TAMARAMA_NFA_0); nfa->length = verify_u32(total_size); nfa->queueIndex = queue; char *ptr = (char *)nfa.get() + sizeof(NFA); char *base_offset = ptr; Tamarama *t = (Tamarama *)ptr; t->numSubEngines = verify_u32(subSize); t->activeIdxSize = verify_u8(activeIdxSize); ptr += sizeof(Tamarama); copy_bytes(ptr, top_base); ptr += byte_length(top_base); u32 *offsets = (u32*)ptr; char *sub_nfa_offset = ptr + sizeof(u32) * subSize; copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset, activeIdxSize); assert((size_t)(sub_nfa_offset - (char *)nfa.get()) <= total_size); return nfa; }
template <class LbrStruct> static void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) { assert(nfa); RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod); DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n", repeatTypeName(rtype), repeatMin.str().c_str(), repeatMax.str().c_str()); // Fill the lbr_common structure first. Note that the RepeatInfo structure // directly follows the LbrStruct. const u32 info_offset = sizeof(LbrStruct); c->repeatInfoOffset = info_offset; c->report = report; RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset); info->type = verify_u8(rtype); info->repeatMin = depth_to_u32(repeatMin); info->repeatMax = depth_to_u32(repeatMax); info->stateSize = rsi.stateSize; info->packedCtrlSize = rsi.packedCtrlSize; info->horizon = rsi.horizon; info->minPeriod = minPeriod; copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes); info->patchCount = rsi.patchCount; info->patchSize = rsi.patchSize; info->encodingSize = rsi.encodingSize; info->patchesOffset = rsi.patchesOffset; // Fill the NFA structure. nfa->nPositions = repeatMin; nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); nfa->scratchStateSize = (u32)sizeof(lbr_state); nfa->minWidth = verify_u32(repeatMin); nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0; // Fill the lbr table for sparse lbr model. if (rtype == REPEAT_SPARSE_OPTIMAL_P) { u64a *table = getTable<LbrStruct>(nfa); // Adjust table length according to the optimal patch length. size_t len = nfa->length; assert((u32)repeatMax >= rsi.patchSize); len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize); nfa->length = verify_u32(len); info->length = verify_u32(sizeof(RepeatInfo) + sizeof(u64a) * (rsi.patchSize + 1)); copy_bytes(table, rsi.table); } }
template <class LbrStruct> static aligned_unique_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype, const depth &repeatMax) { size_t tableLen = 0; if (rtype == REPEAT_SPARSE_OPTIMAL_P) { tableLen = sizeof(u64a) * (repeatMax + 1); } size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) + tableLen + sizeof(u64a); aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(len); nfa->type = verify_u8(nfa_type); nfa->length = verify_u32(len); return nfa; }
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle, UNUSED som_type som) { const depth max_width = findMaxWidth(castle); DEBUG_PRINTF("castle has reach %s and max width %s\n", describeClass(castle.reach()).c_str(), max_width.str().c_str()); const CharReach escape = ~castle.reach(); // invert reach for stop chars. u32 d = min(max_width, depth(MAX_STOP_DEPTH)); const u8 mask = verify_u8((1U << d) - 1); vector<u8> stop(N_CHARS, 0); for (size_t c = escape.find_first(); c != escape.npos; c = escape.find_next(c)) { stop[c] |= mask; } return stop; }
static bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) { const hwlmLiteral &first = *lits.front(); struct candidate { candidate(void) : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} candidate(const hwlmLiteral &base, u32 offset) : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), b5insens(false), valid(true) {} char c1; char c2; u32 max_offset; bool b5insens; bool valid; bool operator>(const candidate &other) const { if (!valid) { return false; } if (!other.valid) { return true; } if (other.cdiffers() && !cdiffers()) { return false; } if (!other.cdiffers() && cdiffers()) { return true; } if (!other.b5insens && b5insens) { return false; } if (other.b5insens && !b5insens) { return true; } if (max_offset > other.max_offset) { return false; } return true; } bool cdiffers(void) const { if (!b5insens) { return c1 != c2; } return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); } }; candidate best; for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { candidate curr(first, i); /* check to see if this pair appears in each string */ for (const auto &lit_ptr : lits) { const hwlmLiteral &lit = *lit_ptr; if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { curr.b5insens = true; /* no choice but to be case insensitive */ } bool found = false; bool found_nc = false; for (u32 j = 0; !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); if (curr.b5insens) { found = found_nc; } } if (!curr.b5insens && !found && found_nc) { curr.b5insens = true; found = true; } if (!found) { goto next_candidate; } } /* check to find the max offset where this appears */ for (const auto &lit_ptr : lits) { const hwlmLiteral &lit = *lit_ptr; for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { bool found = false; if (curr.b5insens) { found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); } else { found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; } if (found) { curr.max_offset = MAX(curr.max_offset, j); break; } } } if (curr > best) { best = curr; } next_candidate:; } if (!best.valid) { return false; } aux->dverm.offset = verify_u8(best.max_offset); if (!best.b5insens) { aux->dverm.accel_type = ACCEL_DVERM; aux->dverm.c1 = best.c1; aux->dverm.c2 = best.c2; DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", aux->dverm.c1, aux->dverm.c2); } else { aux->dverm.accel_type = ACCEL_DVERM_NOCASE; aux->dverm.c1 = best.c1 & CASE_CLEAR; aux->dverm.c2 = best.c2 & CASE_CLEAR; DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", aux->dverm.c1, aux->dverm.c2); } return true; }
static void findForwardAccelScheme(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups, AccelAux *aux) { DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); u32 min_len = MAX_ACCEL_OFFSET; vector<const hwlmLiteral *> filtered_lits; filterLits(lits, expected_groups, &filtered_lits, &min_len); if (filtered_lits.empty()) { return; } if (findDVerm(filtered_lits, aux) || findSVerm(filtered_lits, aux)) { return; } vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach()); for (const auto &lit : lits) { if (!(lit.groups & expected_groups)) { continue; } for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) { unsigned char c = lit.s[i]; if (lit.nocase) { DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i); DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i); reach[i].set(mytoupper(c)); reach[i].set(mytolower(c)); } else { DEBUG_PRINTF("adding %02hhx to %u\n", c, i); reach[i].set(c); } } } u32 min_count = ~0U; u32 min_offset = ~0U; for (u32 i = 0; i < min_len; i++) { size_t count = reach[i].count(); DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, describeClass(reach[i]).c_str(), count); if (count < min_count) { min_count = (u32)count; min_offset = i; } } assert(min_offset <= min_len); if (min_count > MAX_SHUFTI_WIDTH) { DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count); return; } const CharReach &cr = reach[min_offset]; if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) { DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", describeClass(cr).c_str(), cr.count(), min_offset); aux->shufti.accel_type = ACCEL_SHUFTI; aux->shufti.offset = verify_u8(min_offset); return; } DEBUG_PRINTF("fail\n"); }