size_t hwlmSize(const HWLM *h) { size_t engSize = 0; switch (h->type) { case HWLM_ENGINE_NOOD: engSize = noodSize((const noodTable *)HWLM_C_DATA(h)); break; case HWLM_ENGINE_FDR: engSize = fdrSize((const FDR *)HWLM_C_DATA(h)); break; } if (!engSize) { return 0; } return engSize + ROUNDUP_CL(sizeof(*h)); }
/** * Take in a collection of exclusive sub engines and produces a tamarama, also * returns via out_top_remap, a mapping indicating how tops in the subengines in * relate to the tamarama's tops. */ aligned_unique_ptr<NFA> buildTamarama(const TamaInfo &tamaInfo, const u32 queue, map<pair<const NFA *, u32>, u32> &out_top_remap) { vector<u32> top_base; remapTops(tamaInfo, top_base, out_top_remap); size_t subSize = tamaInfo.subengines.size(); DEBUG_PRINTF("subSize:%lu\n", subSize); size_t total_size = sizeof(NFA) + // initial NFA structure sizeof(Tamarama) + // Tamarama structure sizeof(u32) * subSize + // base top event value for subengines, // used for top remapping at runtime sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and // padding for subengines for (const auto &sub : tamaInfo.subengines) { total_size += ROUNDUP_CL(sub->length); } // use subSize as a sentinel value for no active subengines, // so add one to subSize here u32 activeIdxSize = calcPackedBytes(subSize + 1); aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size); nfa->type = verify_u8(TAMARAMA_NFA_0); nfa->length = verify_u32(total_size); nfa->queueIndex = queue; char *ptr = (char *)nfa.get() + sizeof(NFA); char *base_offset = ptr; Tamarama *t = (Tamarama *)ptr; t->numSubEngines = verify_u32(subSize); t->activeIdxSize = verify_u8(activeIdxSize); ptr += sizeof(Tamarama); copy_bytes(ptr, top_base); ptr += byte_length(top_base); u32 *offsets = (u32*)ptr; char *sub_nfa_offset = ptr + sizeof(u32) * subSize; copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset, activeIdxSize); assert((size_t)(sub_nfa_offset - (char *)nfa.get()) <= total_size); return nfa; }
bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc, UNUSED hwlm_group_t expected_groups) { size_t engSize = 0; shared_ptr<void> eng; const auto &lits = proto.lits; DEBUG_PRINTF("building table with %zu strings\n", lits.size()); if (proto.engType == HWLM_ENGINE_NOOD) { DEBUG_PRINTF("build noodle table\n"); const hwlmLiteral &lit = lits.front(); auto noodle = noodBuildTable(lit); if (noodle) { engSize = noodle.size(); } eng = move(noodle); } else { DEBUG_PRINTF("building a new deal\n"); auto fdr = fdrBuildTable(proto, cc.grey); if (fdr) { engSize = fdr.size(); } eng = move(fdr); } if (!eng) { return nullptr; } assert(engSize); if (engSize > cc.grey.limitLiteralMatcherSize) { throw ResourceLimitError(); } const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize; auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64); h->type = proto.engType; memcpy(HWLM_DATA(h.get()), eng.get(), engSize); return h; }
/** * update stream state and scratch state sizes and copy in * subengines in Tamarama. */ static void copyInSubnfas(const char *base_offset, NFA &nfa, const TamaInfo &tamaInfo, u32 *offsets, char *sub_nfa_offset, const u32 activeIdxSize) { u32 maxStreamStateSize = 0; u32 maxScratchStateSize = 0; sub_nfa_offset = ROUNDUP_PTR(sub_nfa_offset, 64); bool infinite_max_width = false; for (auto &sub : tamaInfo.subengines) { u32 streamStateSize = verify_u32(sub->streamStateSize); u32 scratchStateSize = verify_u32(sub->scratchStateSize); maxStreamStateSize = max(maxStreamStateSize, streamStateSize); maxScratchStateSize = max(maxScratchStateSize, scratchStateSize); sub->queueIndex = nfa.queueIndex; memcpy(sub_nfa_offset, sub, sub->length); *offsets = verify_u32(sub_nfa_offset - base_offset); DEBUG_PRINTF("type:%u offsets:%u\n", sub->type, *offsets); ++offsets; sub_nfa_offset += ROUNDUP_CL(sub->length); // update nfa properties nfa.flags |= sub->flags; if (!sub->maxWidth) { infinite_max_width = true; } else if (!infinite_max_width) { nfa.maxWidth = max(nfa.maxWidth, sub->maxWidth); } } if (infinite_max_width) { nfa.maxWidth = 0; } nfa.maxBiAnchoredWidth = 0; nfa.streamStateSize = activeIdxSize + maxStreamStateSize; nfa.scratchStateSize = maxScratchStateSize; }
aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, hwlmStreamingControl *stream_control, bool make_small, const CompileContext &cc, hwlm_group_t expected_groups) { assert(!lits.empty()); dumpLits(lits); if (stream_control) { assert(stream_control->history_min <= stream_control->history_max); } // Check that we haven't exceeded the maximum number of literals. if (lits.size() > cc.grey.limitLiteralCount) { throw ResourceLimitError(); } // Safety and resource limit checks. u64a total_chars = 0; for (const auto &lit : lits) { assert(!lit.s.empty()); if (lit.s.length() > cc.grey.limitLiteralLength) { throw ResourceLimitError(); } total_chars += lit.s.length(); if (total_chars > cc.grey.limitLiteralMatcherChars) { throw ResourceLimitError(); } // We do not allow the all-ones ID, as we reserve that for internal use // within literal matchers. if (lit.id == 0xffffffffu) { assert(!"reserved id 0xffffffff used"); throw CompileError("Internal error."); } } u8 engType = 0; size_t engSize = 0; shared_ptr<void> eng; DEBUG_PRINTF("building table with %zu strings\n", lits.size()); assert(everyoneHasGroups(lits)); if (isNoodleable(lits, stream_control, cc)) { DEBUG_PRINTF("build noodle table\n"); engType = HWLM_ENGINE_NOOD; const hwlmLiteral &lit = lits.front(); auto noodle = noodBuildTable((const u8 *)lit.s.c_str(), lit.s.length(), lit.nocase, lit.id); if (noodle) { engSize = noodSize(noodle.get()); } if (stream_control) { // For now, a single literal still goes to noodle and asks // for a great big history stream_control->literal_history_required = lit.s.length() - 1; assert(stream_control->literal_history_required <= stream_control->history_max); stream_control->literal_stream_state_required = 0; } eng = move(noodle); } else { DEBUG_PRINTF("building a new deal\n"); engType = HWLM_ENGINE_FDR; auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey, stream_control); if (fdr) { engSize = fdrSize(fdr.get()); } eng = move(fdr); } if (!eng) { return nullptr; } assert(engSize); if (engSize > cc.grey.limitLiteralMatcherSize) { throw ResourceLimitError(); } auto h = aligned_zmalloc_unique<HWLM>(ROUNDUP_CL(sizeof(HWLM)) + engSize); h->type = engType; memcpy(HWLM_DATA(h.get()), eng.get(), engSize); if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) { buildForwardAccel(h.get(), lits, expected_groups); } if (stream_control) { DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n", stream_control->literal_history_required, stream_control->history_max); assert(stream_control->literal_history_required <= stream_control->history_max); } return h; }