char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q) { DEBUG_PRINTF("init state\n"); const struct Tamarama *t = getImplNfa(n); char *ptr = q->streamState; // Use activeIdxSize as a sentinel value and initialize the state to // an invalid engine as nothing has been triggered yet storeActiveIdx(t, ptr, t->numSubEngines); return 0; }
static const mstate_aux *getAux(const NFA *n, dstate_id_t i) { auto *m = (const mcsheng *)getImplNfa(n); auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset); const mstate_aux *aux = aux_base + i; assert((const char *)aux < (const char *)n + m->length); return aux; }
void nfaExecLbrNVerm_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_NVERM); const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa); StdioFile f(base + ".txt", "w"); lbrDumpCommon(&lv->common, f); fprintf(f, "NEGATED VERM model, scanning for 0x%02x\n", lv->c); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
void nfaExecLbrDot_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_DOT); const lbr_dot *ld = (const lbr_dot *)getImplNfa(nfa); StdioFile f(base + ".txt", "w"); lbrDumpCommon(&ld->common, f); fprintf(f, "DOT model\n"); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { return 0; } const struct NFA *sub = getSubEngine(t, activeIdx); struct mq q1; copyQueue(t, sub, q, &q1, activeIdx); return nfaInAnyAcceptState(sub, &q1); }
const mstate_aux *getAux(const NFA *n, dstate_id_t i) { assert(n && isDfaType(n->type)); const mcclellan *m = (const mcclellan *)getImplNfa(n); const mstate_aux *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset); const mstate_aux *aux = aux_base + i; assert((const char *)aux < (const char *)n + m->length); return aux; }
char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { return 1; } const struct NFA *sub = getSubEngine(t, activeIdx); struct mq q1; copyQueue(t, sub, q, &q1, activeIdx); return nfaReportCurrentMatches(sub, &q1); }
static void mcclellanGetTransitions(const NFA *n, u16 s, u16 *t) { assert(isMcClellanType(n->type)); const mcclellan *m = (const mcclellan *)getImplNfa(n); const mstate_aux *aux = getAux(n, s); const u32 as = m->alphaShift; if (n->type == MCCLELLAN_NFA_8) { const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcclellan)); for (u16 c = 0; c < N_CHARS; c++) { t[c] = succ_table[((u32)s << as) + m->remap[c]]; } } else { u16 base_s = s; const char *winfo_base = (const char *)n + m->sherman_offset; const char *state_base = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit); if (s >= m->sherman_limit) { base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET); } const u16 *succ_table = (const u16 *)((const char *)m + sizeof(mcclellan)); for (u16 c = 0; c < N_CHARS; c++) { const u8 *addr = (const u8*)(succ_table + (((u32)base_s << as) + m->remap[c])); t[c] = unaligned_load_u16(addr); t[c] &= STATE_MASK; } if (s >= m->sherman_limit) { UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET); assert(type == SHERMAN_STATE); u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base); const char *chars = state_base + SHERMAN_CHARS_OFFSET; const u16 *states = (const u16 *)(state_base + SHERMAN_STATES_OFFSET(len)); for (u8 i = 0; i < len; i++) { for (u16 c = 0; c < N_CHARS; c++) { if (m->remap[c] == chars[i]) { t[c] = unaligned_load_u16((const u8*)&states[i]) & STATE_MASK; } } } } } t[TOP] = aux->top & STATE_MASK; }
char nfaExecTamarama0_expandState(const struct NFA *n, void *dest, const void *src, u64a offset, u8 key) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(src, t->activeIdxSize); if (activeIdx == t->numSubEngines) { return 0; } const struct NFA *sub = getSubEngine(t, activeIdx); const char *subStreamState = (const char *)src + t->activeIdxSize; return nfaExpandState(sub, dest, subStreamState, offset, key); }
enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n, struct mq *q, s64a loc) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { return NFA_ZOMBIE_NO; } const struct NFA *sub = getSubEngine(t, activeIdx); struct mq q1; copyQueue(t, sub, q, &q1, activeIdx); return nfaGetZombieStatus(sub, &q1, loc); }
char nfaExecTamarama0_queueCompressState(const struct NFA *n, const struct mq *q, s64a loc) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { return 0; } const struct NFA *sub = getSubEngine(t, activeIdx); struct mq q1; copyQueueProperties(q, &q1, t->activeIdxSize); return nfaQueueCompressState(sub, &q1, loc); }
void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_SHUF); StdioFile f(base + ".txt", "w"); const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa); lbrDumpCommon(&ls->common, f); CharReach cr = shufti2cr((const u8 *)&ls->mask_lo, (const u8 *)&ls->mask_hi); fprintf(f, "SHUF model, scanning for: %s (%zu chars)\n", describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_TRUF); StdioFile f(base + ".txt", "w"); const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa); lbrDumpCommon(<->common, f); CharReach cr = truffle2cr((const u8 *)<->mask1, (const u8 *)<->mask2); fprintf(f, "TRUFFLE model, scanning for: %s (%zu chars)\n", describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
static void dump_text_8(const NFA *nfa, FILE *f) { auto m = (const mcsheng *)getImplNfa(nfa); auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); fprintf(f, "mcsheng 8\n"); dumpCommonHeader(f, m); fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8, m->accept_limit_8); fprintf(f, "\n"); describeAlphabet(f, m); dumpAccelMasks(f, m, aux); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
static void dump_text_16(const NFA *nfa, FILE *f) { auto *m = (const mcsheng *)getImplNfa(nfa); auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); fprintf(f, "mcsheng 16\n"); dumpCommonHeader(f, m); fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit, (int)m->sherman_end); fprintf(f, "\n"); describeAlphabet(f, m); dumpAccelMasks(f, m, aux); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
static void dump_dot_8(const NFA *nfa, FILE *f) { auto m = (const mcsheng *)getImplNfa(nfa); dumpDotPreambleDfa(f); for (u16 i = 1; i < m->state_count; i++) { describeNode(nfa, m, i, f); u16 t[ALPHABET_SIZE]; next_states(nfa, i, t); describeEdge(f, m, t, i); } fprintf(f, "}\n"); }
char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, ReportID report) { DEBUG_PRINTF("exec rose\n"); struct mq q1; q1.cur = q1.end = 0; char rv = 0; const struct Tamarama *t = getImplNfa(n); while (q->cur < q->end) { updateQueues(t, q, &q1); } if (q1.cur < q1.end) { rv = nfaQueueExecRose(q1.nfa, &q1, report); } DEBUG_PRINTF("exec rose rv:%u\n", rv); return rv; }
static void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); dumpDotPreambleDfa(f); for (u16 i = 1; i < m->state_count; i++) { describeNode(nfa, m, i, f); u16 t[ALPHABET_SIZE]; mcclellanGetTransitions(nfa, i, t); describeEdge(f, t, i); } fprintf(f, "}\n"); }
char nfaExecTamarama0_Q2(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("exec to match\n"); struct mq q1; char rv = 0; char copy = 0; const struct Tamarama *t = getImplNfa(n); while (q->cur < q->end && q_cur_loc(q) <= end && rv != MO_MATCHES_PENDING) { updateQueues(t, q, &q1); rv = nfaQueueExec2_raw(q1.nfa, &q1, end); q->report_current = q1.report_current; copy = 1; } if (copy) { copyBack(t, q, &q1); } return rv; }
char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state, const char *streamState, u64a offset, NfaCallback callback, void *context) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { return MO_CONTINUE_MATCHING; } const struct NFA *sub = getSubEngine(t, activeIdx); if (nfaAcceptsEod(sub)) { assert(!isContainerType(sub->type)); const char *subStreamState = streamState + t->activeIdxSize; return nfaCheckFinalState(sub, state, subStreamState, offset, callback, context); } return MO_CONTINUE_MATCHING; }
char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("exec\n"); struct mq q1; char rv = MO_ALIVE; char copy = 0; const struct Tamarama *t = getImplNfa(n); while (q->cur < q->end && q_cur_loc(q) <= end) { updateQueues(t, q, &q1); rv = nfaQueueExec_raw(q1.nfa, &q1, end); q->report_current = q1.report_current; copy = 1; if (can_stop_matching(q->scratch)) { break; } } if (copy) { copyBack(t, q, &q1); } return rv; }
static void nfaExecMcClellan16_dumpText(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); const mstate_aux *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); fprintf(f, "mcclellan 16\n"); dumpCommonHeader(f, m); fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit, (int)m->sherman_end); fprintf(f, "\n"); describeAlphabet(f, m); dumpTransitions(f, nfa, m, aux); dumpAccelMasks(f, m, aux); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
static void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); const mstate_aux *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); fprintf(f, "mcclellan 8\n"); dumpCommonHeader(f, m); fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8, m->accept_limit_8); fprintf(f, "\n"); describeAlphabet(f, m); dumpTransitions(f, nfa, m, aux); dumpAccelMasks(f, m, aux); fprintf(f, "\n"); dumpTextReverse(nfa, f); }
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) { const struct sheng *sh = (const struct sheng *)getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; u8 s = *(u8 *)q->state; const struct sstate_aux *aux = get_aux(sh, s); u64a offset = q_cur_offset(q); u8 cached_state_id = 0; ReportID cached_report_id = 0; assert(q_cur_type(q) == MQE_START); if (aux->accept) { if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { fireSingleReport(cb, ctxt, sh->report, offset); } else { fireReports(sh, cb, ctxt, s, offset, &cached_state_id, &cached_report_id, 1); } } return 0; }
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context) { DEBUG_PRINTF("smallwrite Sheng\n"); assert(n->type == SHENG_NFA_0); const struct sheng *sh = getImplNfa(n); u8 state = sh->anchored; u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; u8 cached_accept_state = 0; ReportID cached_accept_id = 0; /* scan and report all matches */ int rv; s64a end = length; const u8 *scanned; rv = runShengCb(sh, cb, context, offset, &cached_accept_state, &cached_accept_id, buffer, buffer, buffer + end, can_die, has_accel, single, &scanned, &state); if (rv == MO_DEAD) { DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); return MO_DEAD; } DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK); const struct sstate_aux *aux = get_aux(sh, state); if (aux->accept_eod) { DEBUG_PRINTF("Reporting EOD matches\n"); fireReports(sh, cb, context, state, end + offset, &cached_accept_state, &cached_accept_id, 1); } return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE; }
static really_inline const struct sheng *get_sheng(const struct NFA *n) { return (const struct sheng *)getImplNfa(n); }
static void next_states(const NFA *n, u16 s, u16 *t) { const mcsheng *m = (const mcsheng *)getImplNfa(n); const mstate_aux *aux = getAux(n, s); const u32 as = m->alphaShift; assert(s != DEAD_STATE); if (s < m->sheng_end) { for (u16 c = 0; c < N_CHARS; c++) { u8 sheng_s = s - 1; auto trans_for_c = (const char *)&m->sheng_masks[c]; assert(sheng_s < sizeof(m128)); u8 raw_succ = trans_for_c[sheng_s]; if (raw_succ == m->sheng_end - 1) { t[c] = DEAD_STATE; } else if (raw_succ < m->sheng_end) { t[c] = raw_succ + 1; } else { t[c] = raw_succ; } } } else if (n->type == MCSHENG_NFA_8) { const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng)); for (u16 c = 0; c < N_CHARS; c++) { u32 normal_id = s - m->sheng_end; t[c] = succ_table[(normal_id << as) + m->remap[c]]; } } else { u16 base_s = s; const char *winfo_base = (const char *)n + m->sherman_offset; const char *state_base = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit); if (s >= m->sherman_limit) { base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET); assert(base_s >= m->sheng_end); } const u16 *succ_table = (const u16 *)((const char *)m + sizeof(mcsheng)); for (u16 c = 0; c < N_CHARS; c++) { u32 normal_id = base_s - m->sheng_end; t[c] = succ_table[(normal_id << as) + m->remap[c]]; } if (s >= m->sherman_limit) { UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET); assert(type == SHERMAN_STATE); u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base); const char *chars = state_base + SHERMAN_CHARS_OFFSET; const u16 *states = (const u16 *)(state_base + SHERMAN_STATES_OFFSET(len)); for (u8 i = 0; i < len; i++) { for (u16 c = 0; c < N_CHARS; c++) { if (m->remap[c] == chars[i]) { t[c] = unaligned_load_u16((const u8*)&states[i]); } } } } for (u16 c = 0; c < N_CHARS; c++) { t[c] &= STATE_MASK; } } t[TOP] = aux->top & STATE_MASK; }