static really_inline char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux, ReportID report) { assert(sh && aux); const struct report_list *rl = get_rl(sh, aux); assert(ISALIGNED_N(rl, 4)); DEBUG_PRINTF("report list has %u entries\n", rl->count); for (u32 i = 0; i < rl->count; i++) { if (rl->report[i] == report) { DEBUG_PRINTF("reporting %u\n", rl->report[i]); return 1; } } return 0; }
/** Used by hs_alloc_scratch and hs_clone_scratch to allocate a complete * scratch region from a prototype structure. */ static hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { u32 queueCount = proto->queueCount; u32 deduperCount = proto->deduper.log_size; u32 bStateSize = proto->bStateSize; u32 tStateSize = proto->tStateSize; u32 fullStateSize = proto->fullStateSize; u32 anchored_region_len = proto->anchored_region_len; u32 anchored_region_width = proto->anchored_region_width; u32 anchored_literal_region_len = proto->anchored_literal_region_len; u32 anchored_literal_region_width = proto->anchored_literal_count; u32 som_store_size = proto->som_store_count * sizeof(u64a); u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a); u32 som_now_size = fatbit_size(proto->som_store_count); u32 som_attempted_size = fatbit_size(proto->som_store_count); struct hs_scratch *s; struct hs_scratch *s_tmp; size_t queue_size = queueCount * sizeof(struct mq); size_t qmpq_size = queueCount * sizeof(struct queue_match); assert(anchored_region_len < 8 * sizeof(s->am_log_sum)); assert(anchored_literal_region_len < 8 * sizeof(s->am_log_sum)); size_t anchored_region_size = anchored_region_len * (mmbit_size(anchored_region_width) + sizeof(u8 *)); anchored_region_size = ROUNDUP_N(anchored_region_size, 8); size_t anchored_literal_region_size = anchored_literal_region_len * (mmbit_size(anchored_literal_region_width) + sizeof(u8 *)); anchored_literal_region_size = ROUNDUP_N(anchored_literal_region_size, 8); size_t delay_size = mmbit_size(proto->delay_count) * DELAY_SLOT_COUNT; size_t nfa_context_size = 2 * sizeof(struct NFAContext512) + 127; // the size is all the allocated stuff, not including the struct itself size_t size = queue_size + 63 + bStateSize + tStateSize + fullStateSize + 63 /* cacheline padding */ + nfa_context_size + fatbit_size(proto->roleCount) /* handled roles */ + fatbit_size(queueCount) /* active queue array */ + 2 * fatbit_size(deduperCount) /* need odd and even logs */ + 2 * fatbit_size(deduperCount) /* ditto som logs */ + 2 * sizeof(u64a) * deduperCount /* start offsets for som */ + anchored_region_size + anchored_literal_region_size + qmpq_size + delay_size + som_store_size + som_now_size + som_attempted_size + som_attempted_store_size + proto->sideScratchSize + 15; /* the struct plus the allocated stuff plus padding for cacheline * alignment */ const size_t alloc_size = sizeof(struct hs_scratch) + size + 256; s_tmp = hs_scratch_alloc(alloc_size); hs_error_t err = hs_check_alloc(s_tmp); if (err != HS_SUCCESS) { hs_scratch_free(s_tmp); *scratch = NULL; return err; } memset(s_tmp, 0, alloc_size); s = ROUNDUP_PTR(s_tmp, 64); DEBUG_PRINTF("allocated %zu bytes at %p but realigning to %p\n", alloc_size, s_tmp, s); DEBUG_PRINTF("sizeof %zu\n", sizeof(struct hs_scratch)); *s = *proto; s->magic = SCRATCH_MAGIC; s->scratchSize = alloc_size; s->scratch_alloc = (char *)s_tmp; // each of these is at an offset from the previous char *current = (char *)s + sizeof(*s); // align current so that the following arrays are naturally aligned: this // is accounted for in the padding allocated current = ROUNDUP_PTR(current, 8); s->queues = (struct mq *)current; current += queue_size; assert(ISALIGNED_N(current, 8)); s->som_store = (u64a *)current; current += som_store_size; s->som_attempted_store = (u64a *)current; current += som_attempted_store_size; s->delay_slots = (u8 *)current; current += delay_size; current = ROUNDUP_PTR(current, 8); s->am_log = (u8 **)current; current += sizeof(u8 *) * anchored_region_len; for (u32 i = 0; i < anchored_region_len; i++) { s->am_log[i] = (u8 *)current; current += mmbit_size(anchored_region_width); } current = ROUNDUP_PTR(current, 8); s->al_log = (u8 **)current; current += sizeof(u8 *) * anchored_literal_region_len; for (u32 i = 0; i < anchored_literal_region_len; i++) { s->al_log[i] = (u8 *)current; current += mmbit_size(anchored_literal_region_width); } current = ROUNDUP_PTR(current, 8); s->catchup_pq.qm = (struct queue_match *)current; current += qmpq_size; s->bstate = (char *)current; s->bStateSize = bStateSize; current += bStateSize; s->tstate = (char *)current; s->tStateSize = tStateSize; current += tStateSize; current = ROUNDUP_PTR(current, 64); assert(ISALIGNED_CL(current)); s->nfaContext = current; current += sizeof(struct NFAContext512); current = ROUNDUP_PTR(current, 64); assert(ISALIGNED_CL(current)); s->nfaContextSom = current; current += sizeof(struct NFAContext512); assert(ISALIGNED_N(current, 8)); s->deduper.som_start_log[0] = (u64a *)current; current += sizeof(u64a) * deduperCount; s->deduper.som_start_log[1] = (u64a *)current; current += sizeof(u64a) * deduperCount; assert(ISALIGNED_N(current, 8)); s->aqa = (struct fatbit *)current; current += fatbit_size(queueCount); s->handled_roles = (struct fatbit *)current; current += fatbit_size(proto->roleCount); s->deduper.log[0] = (struct fatbit *)current; current += fatbit_size(deduperCount); s->deduper.log[1] = (struct fatbit *)current; current += fatbit_size(deduperCount); s->deduper.som_log[0] = (struct fatbit *)current; current += fatbit_size(deduperCount); s->deduper.som_log[1] = (struct fatbit *)current; current += fatbit_size(deduperCount); s->som_set_now = (struct fatbit *)current; current += som_now_size; s->som_attempted_set = (struct fatbit *)current; current += som_attempted_size; current = ROUNDUP_PTR(current, 16); s->side_scratch = (void *)current; current += proto->sideScratchSize; current = ROUNDUP_PTR(current, 64); assert(ISALIGNED_CL(current)); s->fullState = (char *)current; s->fullStateSize = fullStateSize; current += fullStateSize; *scratch = s; // Don't get too big for your boots assert((size_t)(current - (char *)s) <= alloc_size); // Init q->scratch ptr for every queue. for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) { qi->scratch = s; } return HS_SUCCESS; }
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { assert(ISALIGNED_N(accel, alignof(union AccelAux))); const u8 *rv; switch (accel->accel_type) { case ACCEL_NONE: DEBUG_PRINTF("accel none %p %p\n", c, c_end); return c; case ACCEL_VERM: DEBUG_PRINTF("accel verm %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = vermicelliExec(accel->verm.c, 0, c, c_end); break; case ACCEL_VERM_NOCASE: DEBUG_PRINTF("accel verm nc %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = vermicelliExec(accel->verm.c, 1, c, c_end); break; case ACCEL_DVERM: DEBUG_PRINTF("accel dverm %p %p\n", c, c_end); if (c + 16 + 1 >= c_end) { return c; } /* need to stop one early to get an accurate end state */ rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 0, c, c_end - 1); break; case ACCEL_DVERM_NOCASE: DEBUG_PRINTF("accel dverm nc %p %p\n", c, c_end); if (c + 16 + 1 >= c_end) { return c; } /* need to stop one early to get an accurate end state */ rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 1, c, c_end - 1); break; case ACCEL_DVERM_MASKED: DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end); if (c + 16 + 1 >= c_end) { return c; } /* need to stop one early to get an accurate end state */ rv = vermicelliDoubleMaskedExec(accel->dverm.c1, accel->dverm.c2, accel->dverm.m1, accel->dverm.m2, c, c_end - 1); break; case ACCEL_SHUFTI: DEBUG_PRINTF("accel shufti %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = shuftiExec(accel->shufti.lo, accel->shufti.hi, c, c_end); break; case ACCEL_TRUFFLE: DEBUG_PRINTF("accel Truffle %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = truffleExec(accel->truffle.mask1, accel->truffle.mask2, c, c_end); break; case ACCEL_DSHUFTI: DEBUG_PRINTF("accel dshufti %p %p\n", c, c_end); if (c + 15 + 1 >= c_end) { return c; } /* need to stop one early to get an accurate end state */ rv = shuftiDoubleExec(accel->dshufti.lo1, accel->dshufti.hi1, accel->dshufti.lo2, accel->dshufti.hi2, c, c_end - 1); break; case ACCEL_RED_TAPE: DEBUG_PRINTF("accel red tape %p %p\n", c, c_end); rv = c_end; break; /* multibyte matchers */ case ACCEL_MLVERM: DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); break; case ACCEL_MLVERM_NOCASE: DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); break; case ACCEL_MLGVERM: DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); break; case ACCEL_MLGVERM_NOCASE: DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); break; case ACCEL_MSVERM: DEBUG_PRINTF("accel msverm %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); break; case ACCEL_MSVERM_NOCASE: DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); break; case ACCEL_MSGVERM: DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); break; case ACCEL_MSGVERM_NOCASE: DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); break; case ACCEL_MDSVERM: DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end, accel->mdverm.len1, accel->mdverm.len2); break; case ACCEL_MDSVERM_NOCASE: DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end, accel->mdverm.len1, accel->mdverm.len2); break; case ACCEL_MDSGVERM: DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end, accel->mdverm.len1, accel->mdverm.len2); break; case ACCEL_MDSGVERM_NOCASE: DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end, accel->mdverm.len1, accel->mdverm.len2); break; case ACCEL_MLSHUFTI: DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, accel->mshufti.len); break; case ACCEL_MLGSHUFTI: DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, accel->mshufti.len); break; case ACCEL_MSSHUFTI: DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, accel->mshufti.len); break; case ACCEL_MSGSHUFTI: DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, accel->mshufti.len); break; case ACCEL_MDSSHUFTI: DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, accel->mdshufti.len1, accel->mdshufti.len2); break; case ACCEL_MDSGSHUFTI: DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, accel->mdshufti.len1, accel->mdshufti.len2); break; case ACCEL_MLTRUFFLE: DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, c, c_end, accel->mtruffle.len); break; case ACCEL_MLGTRUFFLE: DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, c, c_end, accel->mtruffle.len); break; case ACCEL_MSTRUFFLE: DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, c, c_end, accel->mtruffle.len); break; case ACCEL_MSGTRUFFLE: DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, c, c_end, accel->mtruffle.len); break; case ACCEL_MDSTRUFFLE: DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = doubleshift_truffleExec(accel->mdtruffle.mask1, accel->mdtruffle.mask2, c, c_end, accel->mdtruffle.len1, accel->mdtruffle.len2); break; case ACCEL_MDSGTRUFFLE: DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end); if (c + 15 >= c_end) { return c; } rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1, accel->mdtruffle.mask2, c, c_end, accel->mdtruffle.len1, accel->mdtruffle.len2); break; default: assert(!"not here"); return c; } DEBUG_PRINTF("adjusting for offset %u\n", accel->generic.offset); /* adjust offset to take into account the offset */ rv = MAX(c + accel->generic.offset, rv); rv -= accel->generic.offset; DEBUG_PRINTF("advanced %zd\n", rv - c); return rv; }