Пример #1
0
char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q) {
    DEBUG_PRINTF("init state\n");
    const struct Tamarama *t = getImplNfa(n);
    char *ptr = q->streamState;
    // Use activeIdxSize as a sentinel value and initialize the state to
    // an invalid engine as nothing has been triggered yet
    storeActiveIdx(t, ptr, t->numSubEngines);
    return 0;
}
Пример #2
0
static
const mstate_aux *getAux(const NFA *n, dstate_id_t i) {
    auto *m = (const mcsheng *)getImplNfa(n);
    auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset);

    const mstate_aux *aux = aux_base + i;

    assert((const char *)aux < (const char *)n + m->length);
    return aux;
}
Пример #3
0
void nfaExecLbrNVerm_dump(const NFA *nfa, const string &base) {
    assert(nfa);
    assert(nfa->type == LBR_NFA_NVERM);
    const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa);
    StdioFile f(base + ".txt", "w");
    lbrDumpCommon(&lv->common, f);
    fprintf(f, "NEGATED VERM model, scanning for 0x%02x\n", lv->c);
    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Пример #4
0
void nfaExecLbrDot_dump(const NFA *nfa, const string &base) {
    assert(nfa);
    assert(nfa->type == LBR_NFA_DOT);
    const lbr_dot *ld = (const lbr_dot *)getImplNfa(nfa);
    StdioFile f(base + ".txt", "w");
    lbrDumpCommon(&ld->common, f);
    fprintf(f, "DOT model\n");
    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Пример #5
0
char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q) {
    const struct Tamarama *t = getImplNfa(n);
    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
    if (activeIdx == t->numSubEngines) {
        return 0;
    }
    const struct NFA *sub = getSubEngine(t, activeIdx);

    struct mq q1;
    copyQueue(t, sub, q, &q1, activeIdx);
    return nfaInAnyAcceptState(sub, &q1);
}
Пример #6
0
const mstate_aux *getAux(const NFA *n, dstate_id_t i) {
    assert(n && isDfaType(n->type));

    const mcclellan *m = (const mcclellan *)getImplNfa(n);
    const mstate_aux *aux_base
        = (const mstate_aux *)((const char *)n + m->aux_offset);

    const mstate_aux *aux = aux_base + i;

    assert((const char *)aux < (const char *)n + m->length);
    return aux;
}
Пример #7
0
char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q) {
    const struct Tamarama *t = getImplNfa(n);
    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
    if (activeIdx == t->numSubEngines) {
        return 1;
    }

    const struct NFA *sub = getSubEngine(t, activeIdx);
    struct mq q1;
    copyQueue(t, sub, q, &q1, activeIdx);
    return nfaReportCurrentMatches(sub, &q1);
}
Пример #8
0
static
void mcclellanGetTransitions(const NFA *n, u16 s, u16 *t) {
    assert(isMcClellanType(n->type));
    const mcclellan *m = (const mcclellan *)getImplNfa(n);
    const mstate_aux *aux = getAux(n, s);
    const u32 as = m->alphaShift;

    if (n->type == MCCLELLAN_NFA_8) {
        const u8 *succ_table = (const u8 *)((const char *)m
                                            + sizeof(mcclellan));
        for (u16 c = 0; c < N_CHARS; c++) {
            t[c] = succ_table[((u32)s << as) + m->remap[c]];
        }
    } else {
        u16 base_s = s;
        const char *winfo_base = (const char *)n + m->sherman_offset;
        const char *state_base
                = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);

        if (s >= m->sherman_limit) {
            base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
        }

        const u16 *succ_table = (const u16 *)((const char *)m
                                              + sizeof(mcclellan));
        for (u16 c = 0; c < N_CHARS; c++) {
            const u8 *addr = (const u8*)(succ_table + (((u32)base_s << as)
                                                       + m->remap[c]));
            t[c] = unaligned_load_u16(addr);
            t[c] &= STATE_MASK;
        }

        if (s >= m->sherman_limit) {
            UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
            assert(type == SHERMAN_STATE);
            u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
            const char *chars = state_base + SHERMAN_CHARS_OFFSET;
            const u16 *states = (const u16 *)(state_base
                                              + SHERMAN_STATES_OFFSET(len));

            for (u8 i = 0; i < len; i++) {
                for (u16 c = 0; c < N_CHARS; c++) {
                    if (m->remap[c] == chars[i]) {
                        t[c] = unaligned_load_u16((const u8*)&states[i]) & STATE_MASK;
                    }
                }
            }
        }
    }

    t[TOP] = aux->top & STATE_MASK;
}
Пример #9
0
char nfaExecTamarama0_expandState(const struct NFA *n, void *dest,
                                  const void *src, u64a offset, u8 key) {
    const struct Tamarama *t = getImplNfa(n);
    u32 activeIdx = loadActiveIdx(src, t->activeIdxSize);
    if (activeIdx == t->numSubEngines) {
        return 0;
    }

    const struct NFA *sub = getSubEngine(t, activeIdx);

    const char *subStreamState = (const char *)src + t->activeIdxSize;
    return nfaExpandState(sub, dest, subStreamState, offset, key);
}
Пример #10
0
enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n,
                                                      struct mq *q, s64a loc) {
    const struct Tamarama *t = getImplNfa(n);
    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
    if (activeIdx == t->numSubEngines) {
        return NFA_ZOMBIE_NO;
    }
    const struct NFA *sub = getSubEngine(t, activeIdx);

    struct mq q1;
    copyQueue(t, sub, q, &q1, activeIdx);
    return nfaGetZombieStatus(sub, &q1, loc);
}
Пример #11
0
char nfaExecTamarama0_queueCompressState(const struct NFA *n,
                                         const struct mq *q, s64a loc) {
    const struct Tamarama *t = getImplNfa(n);
    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
    if (activeIdx == t->numSubEngines) {
        return 0;
    }

    const struct NFA *sub = getSubEngine(t, activeIdx);

    struct mq q1;
    copyQueueProperties(q, &q1, t->activeIdxSize);
    return nfaQueueCompressState(sub, &q1, loc);
}
Пример #12
0
void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) {
    assert(nfa);
    assert(nfa->type == LBR_NFA_SHUF);

    StdioFile f(base + ".txt", "w");

    const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa);
    lbrDumpCommon(&ls->common, f);

    CharReach cr = shufti2cr((const u8 *)&ls->mask_lo,
                             (const u8 *)&ls->mask_hi);
    fprintf(f, "SHUF model, scanning for: %s (%zu chars)\n",
            describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Пример #13
0
void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) {
    assert(nfa);
    assert(nfa->type == LBR_NFA_TRUF);

    StdioFile f(base + ".txt", "w");

    const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa);
    lbrDumpCommon(&lt->common, f);

    CharReach cr = truffle2cr((const u8 *)&lt->mask1,
                              (const u8 *)&lt->mask2);
    fprintf(f, "TRUFFLE model, scanning for: %s (%zu chars)\n",
            describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Пример #14
0
static
void dump_text_8(const NFA *nfa, FILE *f) {
    auto m = (const mcsheng *)getImplNfa(nfa);
    auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);

    fprintf(f, "mcsheng 8\n");
    dumpCommonHeader(f, m);
    fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
            m->accept_limit_8);
    fprintf(f, "\n");

    describeAlphabet(f, m);
    dumpAccelMasks(f, m, aux);

    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Пример #15
0
static
void dump_text_16(const NFA *nfa, FILE *f) {
    auto *m = (const mcsheng *)getImplNfa(nfa);
    auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);

    fprintf(f, "mcsheng 16\n");
    dumpCommonHeader(f, m);
    fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit,
            (int)m->sherman_end);
    fprintf(f, "\n");

    describeAlphabet(f, m);
    dumpAccelMasks(f, m, aux);

    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Пример #16
0
static
void dump_dot_8(const NFA *nfa, FILE *f) {
    auto m = (const mcsheng *)getImplNfa(nfa);

    dumpDotPreambleDfa(f);

    for (u16 i = 1; i < m->state_count; i++) {
        describeNode(nfa, m, i, f);

        u16 t[ALPHABET_SIZE];

        next_states(nfa, i, t);

        describeEdge(f, m, t, i);
    }

    fprintf(f, "}\n");
}
Пример #17
0
char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q,
                         ReportID report) {
    DEBUG_PRINTF("exec rose\n");
    struct mq q1;
    q1.cur = q1.end = 0;
    char rv = 0;
    const struct Tamarama *t = getImplNfa(n);
    while (q->cur < q->end) {
        updateQueues(t, q, &q1);
    }

    if (q1.cur < q1.end) {
        rv = nfaQueueExecRose(q1.nfa, &q1, report);
    }

    DEBUG_PRINTF("exec rose rv:%u\n", rv);
    return rv;
}
Пример #18
0
static
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) {
    assert(nfa->type == MCCLELLAN_NFA_8);
    const mcclellan *m = (const mcclellan *)getImplNfa(nfa);

    dumpDotPreambleDfa(f);

    for (u16 i = 1; i < m->state_count; i++) {
        describeNode(nfa, m, i, f);

        u16 t[ALPHABET_SIZE];

        mcclellanGetTransitions(nfa, i, t);

        describeEdge(f, t, i);
    }

    fprintf(f, "}\n");
}
Пример #19
0
char nfaExecTamarama0_Q2(const struct NFA *n,
                         struct mq *q, s64a end) {
    DEBUG_PRINTF("exec to match\n");
    struct mq q1;
    char rv = 0;
    char copy = 0;
    const struct Tamarama *t = getImplNfa(n);
    while (q->cur < q->end && q_cur_loc(q) <= end &&
           rv != MO_MATCHES_PENDING) {
        updateQueues(t, q, &q1);
        rv = nfaQueueExec2_raw(q1.nfa, &q1, end);
        q->report_current = q1.report_current;
        copy = 1;
    }
    if (copy) {
        copyBack(t, q, &q1);
    }
    return rv;
}
Пример #20
0
char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state,
                              const char *streamState, u64a offset,
                              NfaCallback callback, void *context) {
    const struct Tamarama *t = getImplNfa(n);
    u32 activeIdx = loadActiveIdx(streamState, t->activeIdxSize);
    if (activeIdx == t->numSubEngines) {
        return MO_CONTINUE_MATCHING;
    }

    const struct NFA *sub = getSubEngine(t, activeIdx);
    if (nfaAcceptsEod(sub)) {
        assert(!isContainerType(sub->type));
        const char *subStreamState = streamState + t->activeIdxSize;
        return nfaCheckFinalState(sub, state, subStreamState, offset, callback,
                                  context);
    }

    return MO_CONTINUE_MATCHING;
}
Пример #21
0
char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) {
    DEBUG_PRINTF("exec\n");
    struct mq q1;
    char rv = MO_ALIVE;
    char copy = 0;
    const struct Tamarama *t = getImplNfa(n);
    while (q->cur < q->end && q_cur_loc(q) <= end) {
        updateQueues(t, q, &q1);
        rv = nfaQueueExec_raw(q1.nfa, &q1, end);
        q->report_current = q1.report_current;
        copy = 1;
        if (can_stop_matching(q->scratch)) {
            break;
        }
    }
    if (copy) {
        copyBack(t, q, &q1);
    }
    return rv;
}
Пример #22
0
static
void nfaExecMcClellan16_dumpText(const NFA *nfa, FILE *f) {
    assert(nfa->type == MCCLELLAN_NFA_16);
    const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
    const mstate_aux *aux =
        (const mstate_aux *)((const char *)nfa + m->aux_offset);

    fprintf(f, "mcclellan 16\n");
    dumpCommonHeader(f, m);
    fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit,
            (int)m->sherman_end);
    fprintf(f, "\n");

    describeAlphabet(f, m);
    dumpTransitions(f, nfa, m, aux);
    dumpAccelMasks(f, m, aux);

    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Пример #23
0
static
void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) {
    assert(nfa->type == MCCLELLAN_NFA_8);
    const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
    const mstate_aux *aux =
        (const mstate_aux *)((const char *)nfa + m->aux_offset);

    fprintf(f, "mcclellan 8\n");
    dumpCommonHeader(f, m);
    fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
            m->accept_limit_8);
    fprintf(f, "\n");

    describeAlphabet(f, m);
    dumpTransitions(f, nfa, m, aux);
    dumpAccelMasks(f, m, aux);

    fprintf(f, "\n");
    dumpTextReverse(nfa, f);
}
Пример #24
0
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) {
    const struct sheng *sh = (const struct sheng *)getImplNfa(n);
    NfaCallback cb = q->cb;
    void *ctxt = q->context;
    u8 s = *(u8 *)q->state;
    const struct sstate_aux *aux = get_aux(sh, s);
    u64a offset = q_cur_offset(q);
    u8 cached_state_id = 0;
    ReportID cached_report_id = 0;
    assert(q_cur_type(q) == MQE_START);

    if (aux->accept) {
        if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
            fireSingleReport(cb, ctxt, sh->report, offset);
        } else {
            fireReports(sh, cb, ctxt, s, offset, &cached_state_id,
                        &cached_report_id, 1);
        }
    }

    return 0;
}
Пример #25
0
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
                     size_t length, NfaCallback cb, void *context) {
    DEBUG_PRINTF("smallwrite Sheng\n");
    assert(n->type == SHENG_NFA_0);
    const struct sheng *sh = getImplNfa(n);
    u8 state = sh->anchored;
    u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
    u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
    u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
    u8 cached_accept_state = 0;
    ReportID cached_accept_id = 0;

    /* scan and report all matches */
    int rv;
    s64a end = length;
    const u8 *scanned;

    rv = runShengCb(sh, cb, context, offset, &cached_accept_state,
                    &cached_accept_id, buffer, buffer, buffer + end, can_die,
                    has_accel, single, &scanned, &state);
    if (rv == MO_DEAD) {
        DEBUG_PRINTF("exiting in state %u\n",
                     state & SHENG_STATE_MASK);
        return MO_DEAD;
    }

    DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK);

    const struct sstate_aux *aux = get_aux(sh, state);

    if (aux->accept_eod) {
        DEBUG_PRINTF("Reporting EOD matches\n");
        fireReports(sh, cb, context, state, end + offset, &cached_accept_state,
                    &cached_accept_id, 1);
    }

    return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE;
}
Пример #26
0
static really_inline
const struct sheng *get_sheng(const struct NFA *n) {
    return (const struct sheng *)getImplNfa(n);
}
Пример #27
0
static
void next_states(const NFA *n, u16 s, u16 *t) {
    const mcsheng *m = (const mcsheng *)getImplNfa(n);
    const mstate_aux *aux = getAux(n, s);
    const u32 as = m->alphaShift;
    assert(s != DEAD_STATE);

    if (s < m->sheng_end) {
        for (u16 c = 0; c < N_CHARS; c++) {
            u8 sheng_s = s - 1;
            auto trans_for_c = (const char *)&m->sheng_masks[c];
            assert(sheng_s < sizeof(m128));
            u8 raw_succ = trans_for_c[sheng_s];
            if (raw_succ == m->sheng_end - 1) {
                t[c] = DEAD_STATE;
            } else if (raw_succ < m->sheng_end) {
                t[c] = raw_succ + 1;
            } else {
                t[c] = raw_succ;
            }
        }
    } else  if (n->type == MCSHENG_NFA_8) {
        const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng));
        for (u16 c = 0; c < N_CHARS; c++) {
            u32 normal_id = s - m->sheng_end;
            t[c] = succ_table[(normal_id << as) + m->remap[c]];
        }
    } else {
        u16 base_s = s;
        const char *winfo_base = (const char *)n + m->sherman_offset;
        const char *state_base
                = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);

        if (s >= m->sherman_limit) {
            base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
            assert(base_s >= m->sheng_end);
        }

        const u16 *succ_table = (const u16 *)((const char *)m
                                              + sizeof(mcsheng));
        for (u16 c = 0; c < N_CHARS; c++) {
            u32 normal_id = base_s - m->sheng_end;
            t[c] = succ_table[(normal_id << as) + m->remap[c]];
        }

        if (s >= m->sherman_limit) {
            UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
            assert(type == SHERMAN_STATE);
            u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
            const char *chars = state_base + SHERMAN_CHARS_OFFSET;
            const u16 *states = (const u16 *)(state_base
                                              + SHERMAN_STATES_OFFSET(len));

            for (u8 i = 0; i < len; i++) {
                for (u16 c = 0; c < N_CHARS; c++) {
                    if (m->remap[c] == chars[i]) {
                        t[c] = unaligned_load_u16((const u8*)&states[i]);
                    }
                }
            }
        }

        for (u16 c = 0; c < N_CHARS; c++) {
            t[c] &= STATE_MASK;
        }

    }

    t[TOP] = aux->top & STATE_MASK;
}