/** \brief Dump a sparse iterator's keys to stdout. */ void mmbit_sparse_iter_dump(const struct mmbit_sparse_iter *it, u32 total_bits) { // Expediency and future-proofing: create a temporary multibit of the right // size with all the bits on, then walk it with this sparse iterator. size_t bytes = mmbit_size(total_bits); u8 *bits = malloc(bytes); if (!bits) { printf("Failed to alloc %zu bytes for temp multibit", bytes); return; } for (u32 i = 0; i < total_bits; i++) { mmbit_set_i(bits, total_bits, i); } struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES]; u32 idx = 0; for (u32 i = mmbit_sparse_iter_begin(bits, total_bits, &idx, it, s); i != MMB_INVALID; i = mmbit_sparse_iter_next(bits, total_bits, i, &idx, it, s)) { printf("%u ", i); } printf("(%u keys)", idx + 1); free(bits); }
u32 fatbit_size(u32 total_bits) { return max(u32{sizeof(struct fatbit)}, mmbit_size(total_bits)); }
/** Used by hs_alloc_scratch and hs_clone_scratch to allocate a complete * scratch region from a prototype structure. */ static hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { u32 queueCount = proto->queueCount; u32 deduperCount = proto->deduper.log_size; u32 bStateSize = proto->bStateSize; u32 tStateSize = proto->tStateSize; u32 fullStateSize = proto->fullStateSize; u32 anchored_region_len = proto->anchored_region_len; u32 anchored_region_width = proto->anchored_region_width; u32 anchored_literal_region_len = proto->anchored_literal_region_len; u32 anchored_literal_region_width = proto->anchored_literal_count; u32 som_store_size = proto->som_store_count * sizeof(u64a); u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a); u32 som_now_size = fatbit_size(proto->som_store_count); u32 som_attempted_size = fatbit_size(proto->som_store_count); struct hs_scratch *s; struct hs_scratch *s_tmp; size_t queue_size = queueCount * sizeof(struct mq); size_t qmpq_size = queueCount * sizeof(struct queue_match); assert(anchored_region_len < 8 * sizeof(s->am_log_sum)); assert(anchored_literal_region_len < 8 * sizeof(s->am_log_sum)); size_t anchored_region_size = anchored_region_len * (mmbit_size(anchored_region_width) + sizeof(u8 *)); anchored_region_size = ROUNDUP_N(anchored_region_size, 8); size_t anchored_literal_region_size = anchored_literal_region_len * (mmbit_size(anchored_literal_region_width) + sizeof(u8 *)); anchored_literal_region_size = ROUNDUP_N(anchored_literal_region_size, 8); size_t delay_size = mmbit_size(proto->delay_count) * DELAY_SLOT_COUNT; size_t nfa_context_size = 2 * sizeof(struct NFAContext512) + 127; // the size is all the allocated stuff, not including the struct itself size_t size = queue_size + 63 + bStateSize + tStateSize + fullStateSize + 63 /* cacheline padding */ + nfa_context_size + fatbit_size(proto->roleCount) /* handled roles */ + fatbit_size(queueCount) /* active queue array */ + 2 * fatbit_size(deduperCount) /* need odd and even logs */ + 2 * fatbit_size(deduperCount) /* ditto som logs */ + 2 * sizeof(u64a) * deduperCount /* start offsets for som */ + anchored_region_size + anchored_literal_region_size + qmpq_size + delay_size + som_store_size + som_now_size + som_attempted_size + som_attempted_store_size + proto->sideScratchSize + 15; /* the struct plus the allocated stuff plus padding for cacheline * alignment */ const size_t alloc_size = sizeof(struct hs_scratch) + size + 256; s_tmp = hs_scratch_alloc(alloc_size); hs_error_t err = hs_check_alloc(s_tmp); if (err != HS_SUCCESS) { hs_scratch_free(s_tmp); *scratch = NULL; return err; } memset(s_tmp, 0, alloc_size); s = ROUNDUP_PTR(s_tmp, 64); DEBUG_PRINTF("allocated %zu bytes at %p but realigning to %p\n", alloc_size, s_tmp, s); DEBUG_PRINTF("sizeof %zu\n", sizeof(struct hs_scratch)); *s = *proto; s->magic = SCRATCH_MAGIC; s->scratchSize = alloc_size; s->scratch_alloc = (char *)s_tmp; // each of these is at an offset from the previous char *current = (char *)s + sizeof(*s); // align current so that the following arrays are naturally aligned: this // is accounted for in the padding allocated current = ROUNDUP_PTR(current, 8); s->queues = (struct mq *)current; current += queue_size; assert(ISALIGNED_N(current, 8)); s->som_store = (u64a *)current; current += som_store_size; s->som_attempted_store = (u64a *)current; current += som_attempted_store_size; s->delay_slots = (u8 *)current; current += delay_size; current = ROUNDUP_PTR(current, 8); s->am_log = (u8 **)current; current += sizeof(u8 *) * anchored_region_len; for (u32 i = 0; i < anchored_region_len; i++) { s->am_log[i] = (u8 *)current; current += mmbit_size(anchored_region_width); } current = ROUNDUP_PTR(current, 8); s->al_log = (u8 **)current; current += sizeof(u8 *) * anchored_literal_region_len; for (u32 i = 0; i < anchored_literal_region_len; i++) { s->al_log[i] = (u8 *)current; current += mmbit_size(anchored_literal_region_width); } current = ROUNDUP_PTR(current, 8); s->catchup_pq.qm = (struct queue_match *)current; current += qmpq_size; s->bstate = (char *)current; s->bStateSize = bStateSize; current += bStateSize; s->tstate = (char *)current; s->tStateSize = tStateSize; current += tStateSize; current = ROUNDUP_PTR(current, 64); assert(ISALIGNED_CL(current)); s->nfaContext = current; current += sizeof(struct NFAContext512); current = ROUNDUP_PTR(current, 64); assert(ISALIGNED_CL(current)); s->nfaContextSom = current; current += sizeof(struct NFAContext512); assert(ISALIGNED_N(current, 8)); s->deduper.som_start_log[0] = (u64a *)current; current += sizeof(u64a) * deduperCount; s->deduper.som_start_log[1] = (u64a *)current; current += sizeof(u64a) * deduperCount; assert(ISALIGNED_N(current, 8)); s->aqa = (struct fatbit *)current; current += fatbit_size(queueCount); s->handled_roles = (struct fatbit *)current; current += fatbit_size(proto->roleCount); s->deduper.log[0] = (struct fatbit *)current; current += fatbit_size(deduperCount); s->deduper.log[1] = (struct fatbit *)current; current += fatbit_size(deduperCount); s->deduper.som_log[0] = (struct fatbit *)current; current += fatbit_size(deduperCount); s->deduper.som_log[1] = (struct fatbit *)current; current += fatbit_size(deduperCount); s->som_set_now = (struct fatbit *)current; current += som_now_size; s->som_attempted_set = (struct fatbit *)current; current += som_attempted_size; current = ROUNDUP_PTR(current, 16); s->side_scratch = (void *)current; current += proto->sideScratchSize; current = ROUNDUP_PTR(current, 64); assert(ISALIGNED_CL(current)); s->fullState = (char *)current; s->fullStateSize = fullStateSize; current += fullStateSize; *scratch = s; // Don't get too big for your boots assert((size_t)(current - (char *)s) <= alloc_size); // Init q->scratch ptr for every queue. for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) { qi->scratch = s; } return HS_SUCCESS; }
RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, const depth &repeatMax, u32 minPeriod) : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0), patchSize(0), encodingSize(0), patchesOffset(0) { assert(repeatMin <= repeatMax); assert(repeatMax.is_reachable()); assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P); switch (type) { case REPEAT_FIRST: assert(repeatMin.is_finite()); stateSize = 0; // everything is in the control block. horizon = repeatMin; packedCtrlSize = calcPackedBytes(horizon + 1); break; case REPEAT_LAST: assert(repeatMax.is_finite()); stateSize = 0; // everything is in the control block. horizon = repeatMax + 1; packedCtrlSize = calcPackedBytes(horizon + 1); break; case REPEAT_RING: assert(repeatMax.is_finite()); stateSize = mmbit_size(repeatMax + 1); horizon = repeatMax * 2 + 1; /* TODO: investigate tightening */ // Packed offset member, plus two bytes for each ring index, reduced to // one byte each if they'll fit in eight bits. { u32 offset_len = calcPackedBytes(horizon + 1); u32 ring_indices_len = repeatMax < depth(254) ? 2 : 4; packedCtrlSize = offset_len + ring_indices_len; } break; case REPEAT_RANGE: assert(repeatMax.is_finite()); assert(repeatMin < repeatMax); stateSize = numRangeSlots(repeatMin, repeatMax) * sizeof(u16); horizon = repeatMax * 2 + 1; // Packed offset member, plus one byte for the number of range // elements. packedCtrlSize = calcPackedBytes(horizon + 1) + 1; break; case REPEAT_BITMAP: stateSize = 0; // everything is in the control block. horizon = 0; // unused packedCtrlSize = ROUNDUP_N(repeatMax + 1, 8) / 8; break; case REPEAT_SPARSE_OPTIMAL_P: assert(minPeriod); assert(repeatMax.is_finite()); { u32 rv = repeatRecurTable(this, repeatMax, minPeriod); u32 repeatTmp = 0; if ((u32)repeatMax < minPeriod) { repeatTmp = repeatMax; patchCount = 1; } else { // find optimal patch size repeatTmp = findOptimalPatchSize(this, repeatMax, minPeriod, rv); assert(patchCount < 65536); } DEBUG_PRINTF("repeat[%u %u], period=%u\n", (u32)repeatMin, (u32)repeatMax, minPeriod); u64a maxVal = table[repeatTmp]; encodingSize = calcPackedBytes(maxVal); patchSize = repeatTmp; assert(encodingSize <= 64); patchesOffset = mmbit_size(patchCount); stateSize = patchesOffset + encodingSize * patchCount; horizon = (repeatTmp * patchCount) * 2 + 1; u32 ring_indices_len = patchCount < depth(254) ? 2 : 4; packedCtrlSize = calcPackedBytes(horizon + 1) + ring_indices_len; } break; case REPEAT_TRAILER: assert(repeatMax.is_finite()); assert(repeatMin <= depth(64)); stateSize = 0; // everything is in the control block. horizon = repeatMax + 1; packedFieldSizes.resize(2); packedFieldSizes[0] = calcPackedBits(horizon + 1); packedFieldSizes[1] = repeatMin; packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U; break; } DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize, packedCtrlSize, horizon); assert(packedCtrlSize <= sizeof(RepeatControl)); }