Example #1
0
// Saves out stream state for all our active suffix NFAs.
static rose_inline
void roseSaveNfaStreamState(const struct RoseEngine *t, char *state,
                            struct hs_scratch *scratch) {
    struct mq *queues = scratch->queues;
    u8 *aa = getActiveLeafArray(t, state);
    u32 aaCount = t->activeArrayCount;

    if (scratch->tctxt.mpv_inactive) {
        DEBUG_PRINTF("mpv is dead as a doornail\n");
        /* mpv if it exists is queue 0 */
        mmbit_unset(aa, aaCount, 0);
    }

    for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
         qi = mmbit_iterate(aa, aaCount, qi)) {
        DEBUG_PRINTF("saving stream state for qi=%u\n", qi);

        struct mq *q = queues + qi;

        // If it's active, it should have an active queue (as we should have
        // done some work!)
        assert(fatbit_isset(scratch->aqa, t->queueCount, qi));

        const struct NFA *nfa = getNfaByQueue(t, qi);
        saveStreamState(nfa, q, q_cur_loc(q));
    }
}
Example #2
0
char nfaExecTamarama0_Q2(const struct NFA *n,
                         struct mq *q, s64a end) {
    DEBUG_PRINTF("exec to match\n");
    struct mq q1;
    char rv = 0;
    char copy = 0;
    const struct Tamarama *t = getImplNfa(n);
    while (q->cur < q->end && q_cur_loc(q) <= end &&
           rv != MO_MATCHES_PENDING) {
        updateQueues(t, q, &q1);
        rv = nfaQueueExec2_raw(q1.nfa, &q1, end);
        q->report_current = q1.report_current;
        copy = 1;
    }
    if (copy) {
        copyBack(t, q, &q1);
    }
    return rv;
}
Example #3
0
char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) {
    DEBUG_PRINTF("exec\n");
    struct mq q1;
    char rv = MO_ALIVE;
    char copy = 0;
    const struct Tamarama *t = getImplNfa(n);
    while (q->cur < q->end && q_cur_loc(q) <= end) {
        updateQueues(t, q, &q1);
        rv = nfaQueueExec_raw(q1.nfa, &q1, end);
        q->report_current = q1.report_current;
        copy = 1;
        if (can_stop_matching(q->scratch)) {
            break;
        }
    }
    if (copy) {
        copyBack(t, q, &q1);
    }
    return rv;
}
Example #4
0
static never_inline
char runSheng(const struct sheng *sh, struct mq *q, s64a b_end,
              enum MatchMode mode) {
    u8 state = *(u8 *)q->state;
    u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
    u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
    u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;

    u8 cached_accept_state = 0;
    ReportID cached_accept_id = 0;

    DEBUG_PRINTF("starting Sheng execution in state %u\n",
                 state & SHENG_STATE_MASK);

    if (q->report_current) {
        DEBUG_PRINTF("reporting current pending matches\n");
        assert(sh);

        q->report_current = 0;

        int rv;
        if (single) {
            rv = fireSingleReport(q->cb, q->context, sh->report,
                                  q_cur_offset(q));
        } else {
            rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q),
                             &cached_accept_state, &cached_accept_id, 0);
        }
        if (rv == MO_HALT_MATCHING) {
            DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
            return MO_DEAD;
        }

        DEBUG_PRINTF("proceeding with matching\n");
    }

    assert(q_cur_type(q) == MQE_START);
    s64a start = q_cur_loc(q);

    DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
                 mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
                     mode == NO_MATCHES ? "NO MATCHES" :
                         mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");

    DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
                 q_cur_type(q) == MQE_START ? "START" :
                     q_cur_type(q) == MQE_TOP ? "TOP" :
                         q_cur_type(q) == MQE_END ? "END" : "???");

    const u8* cur_buf;
    if (start < 0) {
        DEBUG_PRINTF("negative location, scanning history\n");
        DEBUG_PRINTF("min location: %zd\n", -q->hlength);
        cur_buf = q->history + q->hlength;
    } else {
        DEBUG_PRINTF("positive location, scanning buffer\n");
        DEBUG_PRINTF("max location: %lli\n", b_end);
        cur_buf = q->buffer;
    }

    /* if we our queue event is past our end */
    if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
        DEBUG_PRINTF("current location past buffer end\n");
        DEBUG_PRINTF("setting q location to %llu\n", b_end);
        DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
        q->items[q->cur].location = b_end;
        return MO_ALIVE;
    }

    q->cur++;

    s64a cur_start = start;

    while (1) {
        DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
                     q_cur_type(q) == MQE_START ? "START" :
                             q_cur_type(q) == MQE_TOP ? "TOP" :
                                     q_cur_type(q) == MQE_END ? "END" : "???");
        s64a end = q_cur_loc(q);
        if (mode != NO_MATCHES) {
            end = MIN(end, b_end);
        }
        assert(end <= (s64a) q->length);
        s64a cur_end = end;

        /* we may cross the border between history and current buffer */
        if (cur_start < 0) {
            cur_end = MIN(0, cur_end);
        }

        DEBUG_PRINTF("start: %lli end: %lli\n", start, end);

        /* don't scan zero length buffer */
        if (cur_start != cur_end) {
            const u8 * scanned = cur_buf;
            char rv;

            /* if we're in nomatch mode or if we're scanning history buffer */
            if (mode == NO_MATCHES ||
                (cur_start < 0 && mode == CALLBACK_OUTPUT)) {
                runShengNm(sh, q->cb, q->context, q->offset,
                           &cached_accept_state, &cached_accept_id, cur_buf,
                           cur_buf + cur_start, cur_buf + cur_end, can_die,
                           has_accel, single, &scanned, &state);
            } else if (mode == CALLBACK_OUTPUT) {
                rv = runShengCb(sh, q->cb, q->context, q->offset,
                                &cached_accept_state, &cached_accept_id,
                                cur_buf, cur_buf + cur_start, cur_buf + cur_end,
                                can_die, has_accel, single, &scanned, &state);
                if (rv == MO_DEAD) {
                    DEBUG_PRINTF("exiting in state %u\n",
                                 state & SHENG_STATE_MASK);
                    return MO_DEAD;
                }
            } else if (mode == STOP_AT_MATCH) {
                rv = runShengSam(sh, q->cb, q->context, q->offset,
                                 &cached_accept_state, &cached_accept_id,
                                 cur_buf, cur_buf + cur_start,
                                 cur_buf + cur_end, can_die, has_accel, single,
                                 &scanned, &state);
                if (rv == MO_DEAD) {
                    DEBUG_PRINTF("exiting in state %u\n",
                                 state & SHENG_STATE_MASK);
                    return rv;
                } else if (rv == MO_MATCHES_PENDING) {
                    assert(q->cur);
                    DEBUG_PRINTF("found a match, setting q location to %zd\n",
                                 scanned - cur_buf + 1);
                    q->cur--;
                    q->items[q->cur].type = MQE_START;
                    q->items[q->cur].location =
                            scanned - cur_buf + 1; /* due to exiting early */
                    *(u8 *)q->state = state;
                    DEBUG_PRINTF("exiting in state %u\n",
                                 state & SHENG_STATE_MASK);
                    return rv;
                }
            } else {
                assert(!"invalid scanning mode!");
            }
            assert(scanned == cur_buf + cur_end);

            cur_start = cur_end;
        }

        /* if we our queue event is past our end */
        if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
            DEBUG_PRINTF("current location past buffer end\n");
            DEBUG_PRINTF("setting q location to %llu\n", b_end);
            DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
            q->cur--;
            q->items[q->cur].type = MQE_START;
            q->items[q->cur].location = b_end;
            *(u8 *)q->state = state;
            return MO_ALIVE;
        }

        /* crossing over into actual buffer */
        if (cur_start == 0) {
            DEBUG_PRINTF("positive location, scanning buffer\n");
            DEBUG_PRINTF("max offset: %lli\n", b_end);
            cur_buf = q->buffer;
        }

        /* continue scanning the same buffer */
        if (end != cur_end) {
            continue;
        }

        switch (q_cur_type(q)) {
        case MQE_END:
            *(u8 *)q->state = state;
            q->cur++;
            DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
            if (can_die) {
                return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
            }
            return MO_ALIVE;
        case MQE_TOP:
            if (q->offset + cur_start == 0) {
                DEBUG_PRINTF("Anchored start, going to state %u\n",
                             sh->anchored);
                state = sh->anchored;
            } else {
                u8 new_state = get_aux(sh, state)->top;
                DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK,
                             new_state & SHENG_STATE_MASK);
                state = new_state;
            }
            break;
        default:
            assert(!"invalid queue event");
            break;
        }
        q->cur++;
    }
}
Example #5
0
static really_inline
char roseCatchUpLeftfix(const struct RoseEngine *t, char *state,
                        struct hs_scratch *scratch, u32 qi,
                        const struct LeftNfaInfo *left) {
    assert(!left->transient); // active roses only

    struct core_info *ci = &scratch->core_info;
    const u32 qCount = t->queueCount;
    struct mq *q = scratch->queues + qi;
    const struct NFA *nfa = getNfaByQueue(t, qi);

    if (nfaSupportsZombie(nfa)
        && ci->buf_offset /* prefix can be alive with no q */
        && !fatbit_isset(scratch->aqa, qCount, qi)
        && isZombie(t, state, left)) {
        DEBUG_PRINTF("yawn - zombie\n");
        return 1;
    }

    if (left->stopTable) {
        enum MiracleAction mrv =
            roseScanForMiracles(t, state, scratch, qi, left, nfa);
        switch (mrv) {
        case MIRACLE_DEAD:
            return 0;
        case MIRACLE_SAVED:
            return 1;
        default:
            assert(mrv == MIRACLE_CONTINUE);
            break;
        }
    }

    if (!fatbit_set(scratch->aqa, qCount, qi)) {
        initRoseQueue(t, qi, left, scratch);

        s32 sp;
        if (ci->buf_offset) {
            sp = -(s32)loadRoseDelay(t, state, left);
        } else {
            sp = 0;
        }

        DEBUG_PRINTF("ci->len=%zu, sp=%d, historyRequired=%u\n", ci->len, sp,
                     t->historyRequired);

        if ( ci->len - sp + 1 < t->historyRequired) {
            // we'll end up safely in the history region.
            DEBUG_PRINTF("safely in history, skipping\n");
            storeRoseDelay(t, state, left, (s64a)ci->len - sp);
            return 1;
        }

        pushQueueAt(q, 0, MQE_START, sp);
        if (left->infix || ci->buf_offset + sp > 0) {
            loadStreamState(nfa, q, sp);
        } else {
            pushQueueAt(q, 1, MQE_TOP, sp);
            nfaQueueInitState(nfa, q);
        }
    } else {
        DEBUG_PRINTF("queue already active\n");
        if (q->end - q->cur == 1 && q_cur_type(q) == MQE_START) {
            DEBUG_PRINTF("empty queue, start loc=%lld\n", q_cur_loc(q));
            s64a last_loc = q_cur_loc(q);
            if (ci->len - last_loc + 1 < t->historyRequired) {
                // we'll end up safely in the history region.
                DEBUG_PRINTF("safely in history, saving state and skipping\n");
                saveStreamState(nfa, q, last_loc);
                storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
                return 1;
            }
        }
    }

    // Determine whether the byte before last_loc will be in the history
    // buffer on the next stream write.
    s64a last_loc = q_last_loc(q);
    s64a leftovers = ci->len - last_loc;
    if (leftovers + 1 >= t->historyRequired) {
        u32 catchup_offset = left->maxLag ? left->maxLag - 1 : 0;
        last_loc = (s64a)ci->len - catchup_offset;
    }

    if (left->infix) {
        if (infixTooOld(q, last_loc)) {
            DEBUG_PRINTF("infix died of old age\n");
            return 0;
        }
        reduceInfixQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth);
    }

    DEBUG_PRINTF("end scan at %lld\n", last_loc);
    pushQueueNoMerge(q, MQE_END, last_loc);

#ifdef DEBUG
    debugQueue(q);
#endif

    char rv = nfaQueueExecRose(nfa, q, MO_INVALID_IDX);
    if (!rv) { /* nfa is dead */
        DEBUG_PRINTF("died catching up to stream boundary\n");
        return 0;
    } else {
        DEBUG_PRINTF("alive, saving stream state\n");
        if (nfaSupportsZombie(nfa) &&
            nfaGetZombieStatus(nfa, q, last_loc) == NFA_ZOMBIE_ALWAYS_YES) {
            DEBUG_PRINTF("not so fast - zombie\n");
            setAsZombie(t, state, left);
        } else {
            saveStreamState(nfa, q, last_loc);
            storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
        }
    }

    return 1;
}
Example #6
0
static really_inline
enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, char *state,
                                       struct hs_scratch *scratch, u32 qi,
                                       const struct LeftNfaInfo *left,
                                       const struct NFA *nfa) {
    struct core_info *ci = &scratch->core_info;
    const u32 qCount = t->queueCount;
    struct mq *q = scratch->queues + qi;

    const char q_active = fatbit_isset(scratch->aqa, qCount, qi);
    DEBUG_PRINTF("q_active=%d\n", q_active);

    const s64a begin_loc = q_active ? q_cur_loc(q) : 0;
    const s64a end_loc = ci->len;

    s64a miracle_loc;
    if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
        goto found_miracle;
    }

    if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
                                  &miracle_loc)) {
        goto found_miracle;
    }

    DEBUG_PRINTF("no miracle\n");
    return MIRACLE_CONTINUE;

found_miracle:
    DEBUG_PRINTF("miracle at %lld\n", miracle_loc);

    if (left->infix) {
        if (!q_active) {
            DEBUG_PRINTF("killing infix\n");
            return MIRACLE_DEAD;
        }

        DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc);
        q_skip_forward_to(q, miracle_loc);
        if (q_last_type(q) == MQE_START) {
            DEBUG_PRINTF("miracle caused infix to die\n");
            return MIRACLE_DEAD;
        }

        DEBUG_PRINTF("re-init infix state\n");
        assert(q->items[q->cur].type == MQE_START);
        q->items[q->cur].location = miracle_loc;
        nfaQueueInitState(q->nfa, q);
    } else {
        if (miracle_loc > end_loc - t->historyRequired) {
            char *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset;
            u64a offset = ci->buf_offset + miracle_loc;
            u8 key = offset ? getByteBefore(ci, miracle_loc) : 0;
            DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset);
            if (!nfaInitCompressedState(nfa, offset, streamState, key)) {
                return MIRACLE_DEAD;
            }
            storeRoseDelay(t, state, left, (s64a)ci->len - miracle_loc);
            return MIRACLE_SAVED;
        }

        DEBUG_PRINTF("re-init prefix (skip %lld->%lld)\n", begin_loc,
                     miracle_loc);
        if (!q_active) {
            fatbit_set(scratch->aqa, qCount, qi);
            initRoseQueue(t, qi, left, scratch);
        }
        q->cur = q->end = 0;
        pushQueueAt(q, 0, MQE_START, miracle_loc);
        pushQueueAt(q, 1, MQE_TOP, miracle_loc);
        nfaQueueInitState(q->nfa, q);
    }

    return MIRACLE_CONTINUE;
}
Example #7
0
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
                                 struct hs_scratch *scratch, u32 event,
                                 u64a top_squash_distance, u64a end,
                                 char in_catchup) {
    assert(event == MQE_TOP || event >= MQE_TOP_FIRST);
    struct core_info *ci = &scratch->core_info;

    u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
    u32 aaCount = t->activeArrayCount;
    struct fatbit *activeQueues = scratch->aqa;
    u32 qCount = t->queueCount;

    const u32 qi = 0; /* MPV is always queue 0 if it exists */
    struct mq *q = &scratch->queues[qi];
    const struct NfaInfo *info = getNfaInfoByQueue(t, qi);

    s64a loc = (s64a)end - ci->buf_offset;
    assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen);

    if (!mmbit_set(aa, aaCount, qi)) {
        initQueue(q, qi, t, scratch);
        nfaQueueInitState(q->nfa, q);
        pushQueueAt(q, 0, MQE_START, loc);
        fatbit_set(activeQueues, qCount, qi);
    } else if (info->no_retrigger) {
        DEBUG_PRINTF("yawn\n");
        /* nfa only needs one top; we can go home now */
        return HWLM_CONTINUE_MATCHING;
    } else if (!fatbit_set(activeQueues, qCount, qi)) {
        initQueue(q, qi, t, scratch);
        loadStreamState(q->nfa, q, 0);
        pushQueueAt(q, 0, MQE_START, 0);
    } else if (isQueueFull(q)) {
        DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi);
        /* we know it is a chained nfa and the suffixes/outfixes must already
         * be known to be consistent */
        if (ensureMpvQueueFlushed(t, scratch, qi, loc, in_catchup)
            == HWLM_TERMINATE_MATCHING) {
            DEBUG_PRINTF("terminating...\n");
            return HWLM_TERMINATE_MATCHING;
        }
    }

    if (top_squash_distance) {
        assert(q->cur != q->end);
        struct mq_item *last = &q->items[q->end - 1];
        if (last->type == event
            && last->location >= loc - (s64a)top_squash_distance) {
            last->location = loc;
            goto event_enqueued;
        }
    }

    pushQueue(q, event, loc);

event_enqueued:
    if (q_cur_loc(q) == (s64a)ci->len) {
        /* we may not run the nfa; need to ensure state is fine  */
        DEBUG_PRINTF("empty run\n");
        pushQueueNoMerge(q, MQE_END, loc);
        char alive = nfaQueueExec(q->nfa, q, loc);
        if (alive) {
            scratch->tctxt.mpv_inactive = 0;
            q->cur = q->end = 0;
            pushQueueAt(q, 0, MQE_START, loc);
        } else {
            mmbit_unset(aa, aaCount, qi);
            fatbit_unset(scratch->aqa, qCount, qi);
        }
    }

    DEBUG_PRINTF("added mpv event at %lld\n", loc);
    scratch->tctxt.next_mpv_offset = 0; /* the top event may result in matches
                                         * earlier than expected */
    return HWLM_CONTINUE_MATCHING;
}