Пример #1
0
static
void mcclellanGetTransitions(const NFA *n, u16 s, u16 *t) {
    assert(isMcClellanType(n->type));
    const mcclellan *m = (const mcclellan *)getImplNfa(n);
    const mstate_aux *aux = getAux(n, s);
    const u32 as = m->alphaShift;

    if (n->type == MCCLELLAN_NFA_8) {
        const u8 *succ_table = (const u8 *)((const char *)m
                                            + sizeof(mcclellan));
        for (u16 c = 0; c < N_CHARS; c++) {
            t[c] = succ_table[((u32)s << as) + m->remap[c]];
        }
    } else {
        u16 base_s = s;
        const char *winfo_base = (const char *)n + m->sherman_offset;
        const char *state_base
                = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);

        if (s >= m->sherman_limit) {
            base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
        }

        const u16 *succ_table = (const u16 *)((const char *)m
                                              + sizeof(mcclellan));
        for (u16 c = 0; c < N_CHARS; c++) {
            const u8 *addr = (const u8*)(succ_table + (((u32)base_s << as)
                                                       + m->remap[c]));
            t[c] = unaligned_load_u16(addr);
            t[c] &= STATE_MASK;
        }

        if (s >= m->sherman_limit) {
            UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
            assert(type == SHERMAN_STATE);
            u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
            const char *chars = state_base + SHERMAN_CHARS_OFFSET;
            const u16 *states = (const u16 *)(state_base
                                              + SHERMAN_STATES_OFFSET(len));

            for (u8 i = 0; i < len; i++) {
                for (u16 c = 0; c < N_CHARS; c++) {
                    if (m->remap[c] == chars[i]) {
                        t[c] = unaligned_load_u16((const u8*)&states[i]) & STATE_MASK;
                    }
                }
            }
        }
    }

    t[TOP] = aux->top & STATE_MASK;
}
Пример #2
0
static rose_inline
void runAnchoredTableStream(const struct RoseEngine *t, const void *atable,
                            size_t alen, u64a offset,
                            struct hs_scratch *scratch) {
    char *state_base = scratch->core_info.state + t->stateOffsets.anchorState;
    const struct anchored_matcher_info *curr = atable;

    do {
        DEBUG_PRINTF("--anchored nfa (+%u) no %u so %u\n",
                     curr->anchoredMinDistance, curr->next_offset,
                     curr->state_offset);
        const struct NFA *nfa
            = (const struct NFA *)((const char *)curr + sizeof(*curr));
        assert(ISALIGNED_CL(nfa));
        assert(isMcClellanType(nfa->type));

        char *state = state_base + curr->state_offset;

        char start = 0;
        size_t adj = 0;

        if (offset <= curr->anchoredMinDistance) {
            adj = curr->anchoredMinDistance - offset;
            if (adj >= alen) {
                goto next_nfa;
            }

            start = 1;
        } else {
            // (No state decompress necessary.)
            if (nfa->type == MCCLELLAN_NFA_8) {
                if (!*(u8 *)state) {
                    goto next_nfa;
                }
            } else {
                if (!unaligned_load_u16(state)) {
                    goto next_nfa;
                }
            }
        }

        if (nfa->type == MCCLELLAN_NFA_8) {
            nfaExecMcClellan8_SimpStream(nfa, state, scratch->core_info.buf,
                                         start, adj, alen, roseAnchoredCallback,
                                         scratch);
        } else {
            nfaExecMcClellan16_SimpStream(nfa, state, scratch->core_info.buf,
                                          start, adj, alen,
                                          roseAnchoredCallback, scratch);
        }

    next_nfa:
        if (!curr->next_offset) {
            break;
        }

        curr = (const void *)((const char *)curr + curr->next_offset);
    } while (1);
}
Пример #3
0
static
void next_states(const NFA *n, u16 s, u16 *t) {
    const mcsheng *m = (const mcsheng *)getImplNfa(n);
    const mstate_aux *aux = getAux(n, s);
    const u32 as = m->alphaShift;
    assert(s != DEAD_STATE);

    if (s < m->sheng_end) {
        for (u16 c = 0; c < N_CHARS; c++) {
            u8 sheng_s = s - 1;
            auto trans_for_c = (const char *)&m->sheng_masks[c];
            assert(sheng_s < sizeof(m128));
            u8 raw_succ = trans_for_c[sheng_s];
            if (raw_succ == m->sheng_end - 1) {
                t[c] = DEAD_STATE;
            } else if (raw_succ < m->sheng_end) {
                t[c] = raw_succ + 1;
            } else {
                t[c] = raw_succ;
            }
        }
    } else  if (n->type == MCSHENG_NFA_8) {
        const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng));
        for (u16 c = 0; c < N_CHARS; c++) {
            u32 normal_id = s - m->sheng_end;
            t[c] = succ_table[(normal_id << as) + m->remap[c]];
        }
    } else {
        u16 base_s = s;
        const char *winfo_base = (const char *)n + m->sherman_offset;
        const char *state_base
                = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);

        if (s >= m->sherman_limit) {
            base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
            assert(base_s >= m->sheng_end);
        }

        const u16 *succ_table = (const u16 *)((const char *)m
                                              + sizeof(mcsheng));
        for (u16 c = 0; c < N_CHARS; c++) {
            u32 normal_id = base_s - m->sheng_end;
            t[c] = succ_table[(normal_id << as) + m->remap[c]];
        }

        if (s >= m->sherman_limit) {
            UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
            assert(type == SHERMAN_STATE);
            u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
            const char *chars = state_base + SHERMAN_CHARS_OFFSET;
            const u16 *states = (const u16 *)(state_base
                                              + SHERMAN_STATES_OFFSET(len));

            for (u8 i = 0; i < len; i++) {
                for (u16 c = 0; c < N_CHARS; c++) {
                    if (m->remap[c] == chars[i]) {
                        t[c] = unaligned_load_u16((const u8*)&states[i]);
                    }
                }
            }
        }

        for (u16 c = 0; c < N_CHARS; c++) {
            t[c] &= STATE_MASK;
        }

    }

    t[TOP] = aux->top & STATE_MASK;
}