// Saves out stream state for all our active suffix NFAs. static rose_inline void roseSaveNfaStreamState(const struct RoseEngine *t, char *state, struct hs_scratch *scratch) { struct mq *queues = scratch->queues; u8 *aa = getActiveLeafArray(t, state); u32 aaCount = t->activeArrayCount; if (scratch->tctxt.mpv_inactive) { DEBUG_PRINTF("mpv is dead as a doornail\n"); /* mpv if it exists is queue 0 */ mmbit_unset(aa, aaCount, 0); } for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; qi = mmbit_iterate(aa, aaCount, qi)) { DEBUG_PRINTF("saving stream state for qi=%u\n", qi); struct mq *q = queues + qi; // If it's active, it should have an active queue (as we should have // done some work!) assert(fatbit_isset(scratch->aqa, t->queueCount, qi)); const struct NFA *nfa = getNfaByQueue(t, qi); saveStreamState(nfa, q, q_cur_loc(q)); } }
char nfaExecTamarama0_Q2(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("exec to match\n"); struct mq q1; char rv = 0; char copy = 0; const struct Tamarama *t = getImplNfa(n); while (q->cur < q->end && q_cur_loc(q) <= end && rv != MO_MATCHES_PENDING) { updateQueues(t, q, &q1); rv = nfaQueueExec2_raw(q1.nfa, &q1, end); q->report_current = q1.report_current; copy = 1; } if (copy) { copyBack(t, q, &q1); } return rv; }
char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("exec\n"); struct mq q1; char rv = MO_ALIVE; char copy = 0; const struct Tamarama *t = getImplNfa(n); while (q->cur < q->end && q_cur_loc(q) <= end) { updateQueues(t, q, &q1); rv = nfaQueueExec_raw(q1.nfa, &q1, end); q->report_current = q1.report_current; copy = 1; if (can_stop_matching(q->scratch)) { break; } } if (copy) { copyBack(t, q, &q1); } return rv; }
static never_inline char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, enum MatchMode mode) { u8 state = *(u8 *)q->state; u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; u8 cached_accept_state = 0; ReportID cached_accept_id = 0; DEBUG_PRINTF("starting Sheng execution in state %u\n", state & SHENG_STATE_MASK); if (q->report_current) { DEBUG_PRINTF("reporting current pending matches\n"); assert(sh); q->report_current = 0; int rv; if (single) { rv = fireSingleReport(q->cb, q->context, sh->report, q_cur_offset(q)); } else { rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q), &cached_accept_state, &cached_accept_id, 0); } if (rv == MO_HALT_MATCHING) { DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); return MO_DEAD; } DEBUG_PRINTF("proceeding with matching\n"); } assert(q_cur_type(q) == MQE_START); s64a start = q_cur_loc(q); DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start, mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : mode == NO_MATCHES ? "NO MATCHES" : mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???"); DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), q_cur_type(q) == MQE_START ? "START" : q_cur_type(q) == MQE_TOP ? "TOP" : q_cur_type(q) == MQE_END ? "END" : "???"); const u8* cur_buf; if (start < 0) { DEBUG_PRINTF("negative location, scanning history\n"); DEBUG_PRINTF("min location: %zd\n", -q->hlength); cur_buf = q->history + q->hlength; } else { DEBUG_PRINTF("positive location, scanning buffer\n"); DEBUG_PRINTF("max location: %lli\n", b_end); cur_buf = q->buffer; } /* if we our queue event is past our end */ if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { DEBUG_PRINTF("current location past buffer end\n"); DEBUG_PRINTF("setting q location to %llu\n", b_end); DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); q->items[q->cur].location = b_end; return MO_ALIVE; } q->cur++; s64a cur_start = start; while (1) { DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), q_cur_type(q) == MQE_START ? "START" : q_cur_type(q) == MQE_TOP ? "TOP" : q_cur_type(q) == MQE_END ? "END" : "???"); s64a end = q_cur_loc(q); if (mode != NO_MATCHES) { end = MIN(end, b_end); } assert(end <= (s64a) q->length); s64a cur_end = end; /* we may cross the border between history and current buffer */ if (cur_start < 0) { cur_end = MIN(0, cur_end); } DEBUG_PRINTF("start: %lli end: %lli\n", start, end); /* don't scan zero length buffer */ if (cur_start != cur_end) { const u8 * scanned = cur_buf; char rv; /* if we're in nomatch mode or if we're scanning history buffer */ if (mode == NO_MATCHES || (cur_start < 0 && mode == CALLBACK_OUTPUT)) { runShengNm(sh, q->cb, q->context, q->offset, &cached_accept_state, &cached_accept_id, cur_buf, cur_buf + cur_start, cur_buf + cur_end, can_die, has_accel, single, &scanned, &state); } else if (mode == CALLBACK_OUTPUT) { rv = runShengCb(sh, q->cb, q->context, q->offset, &cached_accept_state, &cached_accept_id, cur_buf, cur_buf + cur_start, cur_buf + cur_end, can_die, has_accel, single, &scanned, &state); if (rv == MO_DEAD) { DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); return MO_DEAD; } } else if (mode == STOP_AT_MATCH) { rv = runShengSam(sh, q->cb, q->context, q->offset, &cached_accept_state, &cached_accept_id, cur_buf, cur_buf + cur_start, cur_buf + cur_end, can_die, has_accel, single, &scanned, &state); if (rv == MO_DEAD) { DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); return rv; } else if (rv == MO_MATCHES_PENDING) { assert(q->cur); DEBUG_PRINTF("found a match, setting q location to %zd\n", scanned - cur_buf + 1); q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = scanned - cur_buf + 1; /* due to exiting early */ *(u8 *)q->state = state; DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); return rv; } } else { assert(!"invalid scanning mode!"); } assert(scanned == cur_buf + cur_end); cur_start = cur_end; } /* if we our queue event is past our end */ if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { DEBUG_PRINTF("current location past buffer end\n"); DEBUG_PRINTF("setting q location to %llu\n", b_end); DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = b_end; *(u8 *)q->state = state; return MO_ALIVE; } /* crossing over into actual buffer */ if (cur_start == 0) { DEBUG_PRINTF("positive location, scanning buffer\n"); DEBUG_PRINTF("max offset: %lli\n", b_end); cur_buf = q->buffer; } /* continue scanning the same buffer */ if (end != cur_end) { continue; } switch (q_cur_type(q)) { case MQE_END: *(u8 *)q->state = state; q->cur++; DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); if (can_die) { return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE; } return MO_ALIVE; case MQE_TOP: if (q->offset + cur_start == 0) { DEBUG_PRINTF("Anchored start, going to state %u\n", sh->anchored); state = sh->anchored; } else { u8 new_state = get_aux(sh, state)->top; DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK, new_state & SHENG_STATE_MASK); state = new_state; } break; default: assert(!"invalid queue event"); break; } q->cur++; } }
static really_inline char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u32 qi, const struct LeftNfaInfo *left) { assert(!left->transient); // active roses only struct core_info *ci = &scratch->core_info; const u32 qCount = t->queueCount; struct mq *q = scratch->queues + qi; const struct NFA *nfa = getNfaByQueue(t, qi); if (nfaSupportsZombie(nfa) && ci->buf_offset /* prefix can be alive with no q */ && !fatbit_isset(scratch->aqa, qCount, qi) && isZombie(t, state, left)) { DEBUG_PRINTF("yawn - zombie\n"); return 1; } if (left->stopTable) { enum MiracleAction mrv = roseScanForMiracles(t, state, scratch, qi, left, nfa); switch (mrv) { case MIRACLE_DEAD: return 0; case MIRACLE_SAVED: return 1; default: assert(mrv == MIRACLE_CONTINUE); break; } } if (!fatbit_set(scratch->aqa, qCount, qi)) { initRoseQueue(t, qi, left, scratch); s32 sp; if (ci->buf_offset) { sp = -(s32)loadRoseDelay(t, state, left); } else { sp = 0; } DEBUG_PRINTF("ci->len=%zu, sp=%d, historyRequired=%u\n", ci->len, sp, t->historyRequired); if ( ci->len - sp + 1 < t->historyRequired) { // we'll end up safely in the history region. DEBUG_PRINTF("safely in history, skipping\n"); storeRoseDelay(t, state, left, (s64a)ci->len - sp); return 1; } pushQueueAt(q, 0, MQE_START, sp); if (left->infix || ci->buf_offset + sp > 0) { loadStreamState(nfa, q, sp); } else { pushQueueAt(q, 1, MQE_TOP, sp); nfaQueueInitState(nfa, q); } } else { DEBUG_PRINTF("queue already active\n"); if (q->end - q->cur == 1 && q_cur_type(q) == MQE_START) { DEBUG_PRINTF("empty queue, start loc=%lld\n", q_cur_loc(q)); s64a last_loc = q_cur_loc(q); if (ci->len - last_loc + 1 < t->historyRequired) { // we'll end up safely in the history region. DEBUG_PRINTF("safely in history, saving state and skipping\n"); saveStreamState(nfa, q, last_loc); storeRoseDelay(t, state, left, (s64a)ci->len - last_loc); return 1; } } } // Determine whether the byte before last_loc will be in the history // buffer on the next stream write. s64a last_loc = q_last_loc(q); s64a leftovers = ci->len - last_loc; if (leftovers + 1 >= t->historyRequired) { u32 catchup_offset = left->maxLag ? left->maxLag - 1 : 0; last_loc = (s64a)ci->len - catchup_offset; } if (left->infix) { if (infixTooOld(q, last_loc)) { DEBUG_PRINTF("infix died of old age\n"); return 0; } reduceInfixQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth); } DEBUG_PRINTF("end scan at %lld\n", last_loc); pushQueueNoMerge(q, MQE_END, last_loc); #ifdef DEBUG debugQueue(q); #endif char rv = nfaQueueExecRose(nfa, q, MO_INVALID_IDX); if (!rv) { /* nfa is dead */ DEBUG_PRINTF("died catching up to stream boundary\n"); return 0; } else { DEBUG_PRINTF("alive, saving stream state\n"); if (nfaSupportsZombie(nfa) && nfaGetZombieStatus(nfa, q, last_loc) == NFA_ZOMBIE_ALWAYS_YES) { DEBUG_PRINTF("not so fast - zombie\n"); setAsZombie(t, state, left); } else { saveStreamState(nfa, q, last_loc); storeRoseDelay(t, state, left, (s64a)ci->len - last_loc); } } return 1; }
static really_inline enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u32 qi, const struct LeftNfaInfo *left, const struct NFA *nfa) { struct core_info *ci = &scratch->core_info; const u32 qCount = t->queueCount; struct mq *q = scratch->queues + qi; const char q_active = fatbit_isset(scratch->aqa, qCount, qi); DEBUG_PRINTF("q_active=%d\n", q_active); const s64a begin_loc = q_active ? q_cur_loc(q) : 0; const s64a end_loc = ci->len; s64a miracle_loc; if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { goto found_miracle; } if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { goto found_miracle; } DEBUG_PRINTF("no miracle\n"); return MIRACLE_CONTINUE; found_miracle: DEBUG_PRINTF("miracle at %lld\n", miracle_loc); if (left->infix) { if (!q_active) { DEBUG_PRINTF("killing infix\n"); return MIRACLE_DEAD; } DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc); q_skip_forward_to(q, miracle_loc); if (q_last_type(q) == MQE_START) { DEBUG_PRINTF("miracle caused infix to die\n"); return MIRACLE_DEAD; } DEBUG_PRINTF("re-init infix state\n"); assert(q->items[q->cur].type == MQE_START); q->items[q->cur].location = miracle_loc; nfaQueueInitState(q->nfa, q); } else { if (miracle_loc > end_loc - t->historyRequired) { char *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset; u64a offset = ci->buf_offset + miracle_loc; u8 key = offset ? getByteBefore(ci, miracle_loc) : 0; DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset); if (!nfaInitCompressedState(nfa, offset, streamState, key)) { return MIRACLE_DEAD; } storeRoseDelay(t, state, left, (s64a)ci->len - miracle_loc); return MIRACLE_SAVED; } DEBUG_PRINTF("re-init prefix (skip %lld->%lld)\n", begin_loc, miracle_loc); if (!q_active) { fatbit_set(scratch->aqa, qCount, qi); initRoseQueue(t, qi, left, scratch); } q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, miracle_loc); pushQueueAt(q, 1, MQE_TOP, miracle_loc); nfaQueueInitState(q->nfa, q); } return MIRACLE_CONTINUE; }
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, struct hs_scratch *scratch, u32 event, u64a top_squash_distance, u64a end, char in_catchup) { assert(event == MQE_TOP || event >= MQE_TOP_FIRST); struct core_info *ci = &scratch->core_info; u8 *aa = getActiveLeafArray(t, scratch->core_info.state); u32 aaCount = t->activeArrayCount; struct fatbit *activeQueues = scratch->aqa; u32 qCount = t->queueCount; const u32 qi = 0; /* MPV is always queue 0 if it exists */ struct mq *q = &scratch->queues[qi]; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); s64a loc = (s64a)end - ci->buf_offset; assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); if (!mmbit_set(aa, aaCount, qi)) { initQueue(q, qi, t, scratch); nfaQueueInitState(q->nfa, q); pushQueueAt(q, 0, MQE_START, loc); fatbit_set(activeQueues, qCount, qi); } else if (info->no_retrigger) { DEBUG_PRINTF("yawn\n"); /* nfa only needs one top; we can go home now */ return HWLM_CONTINUE_MATCHING; } else if (!fatbit_set(activeQueues, qCount, qi)) { initQueue(q, qi, t, scratch); loadStreamState(q->nfa, q, 0); pushQueueAt(q, 0, MQE_START, 0); } else if (isQueueFull(q)) { DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); /* we know it is a chained nfa and the suffixes/outfixes must already * be known to be consistent */ if (ensureMpvQueueFlushed(t, scratch, qi, loc, in_catchup) == HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("terminating...\n"); return HWLM_TERMINATE_MATCHING; } } if (top_squash_distance) { assert(q->cur != q->end); struct mq_item *last = &q->items[q->end - 1]; if (last->type == event && last->location >= loc - (s64a)top_squash_distance) { last->location = loc; goto event_enqueued; } } pushQueue(q, event, loc); event_enqueued: if (q_cur_loc(q) == (s64a)ci->len) { /* we may not run the nfa; need to ensure state is fine */ DEBUG_PRINTF("empty run\n"); pushQueueNoMerge(q, MQE_END, loc); char alive = nfaQueueExec(q->nfa, q, loc); if (alive) { scratch->tctxt.mpv_inactive = 0; q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, loc); } else { mmbit_unset(aa, aaCount, qi); fatbit_unset(scratch->aqa, qCount, qi); } } DEBUG_PRINTF("added mpv event at %lld\n", loc); scratch->tctxt.next_mpv_offset = 0; /* the top event may result in matches * earlier than expected */ return HWLM_CONTINUE_MATCHING; }