Beispiel #1
0
static really_inline
void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
                   struct hs_scratch *scratch, const char is_streaming) {
    assert(t);
    assert(scratch->core_info.buf || scratch->core_info.hbuf);
    assert(!scratch->core_info.buf || !scratch->core_info.hbuf);
    assert(!can_stop_matching(scratch));

    // Fire the special EOD event literal.
    if (t->hasEodEventLiteral) {
        DEBUG_PRINTF("firing eod event id %u at offset %llu\n",
                     t->eodLiteralId, offset);
        const struct core_info *ci = &scratch->core_info;
        size_t len = ci->buf ? ci->len : ci->hlen;
        assert(len || !ci->buf); /* len may be 0 if no history is required
                                  * (bounds checks only can lead to this) */

        roseRunEvent(len, t->eodLiteralId, &scratch->tctxt);
        if (can_stop_matching(scratch)) {
            DEBUG_PRINTF("user told us to stop\n");
            return;
        }
    }

    roseCheckNfaEod(t, state, scratch, offset, is_streaming);

    if (!t->eodIterOffset && !t->ematcherOffset) {
        DEBUG_PRINTF("no eod accepts\n");
        return;
    }

    // Handle pending EOD reports.
    int itrv = roseEodRunIterator(t, state, offset, scratch);
    if (itrv == MO_HALT_MATCHING) {
        return;
    }

    // Run the EOD anchored matcher if there is one.
    if (t->ematcherOffset) {
        assert(t->ematcherRegionSize);
        // Unset the reports we just fired so we don't fire them again below.
        mmbit_clear(getRoleState(state), t->rolesWithStateCount);
        mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount);
        sidecar_enabled_populate(t, scratch, state);

        hwlmcb_rv_t rv = roseEodRunMatcher(t, offset, scratch, is_streaming);
        if (rv == HWLM_TERMINATE_MATCHING) {
            return;
        }

        cleanupAfterEodMatcher(t, state, offset, scratch);

        // Fire any new EOD reports.
        roseEodRunIterator(t, state, offset, scratch);

        roseCheckEodSuffixes(t, state, offset, scratch);
    }
}
Beispiel #2
0
static really_inline
void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable,
                struct hs_scratch *scratch) {
    assert(!can_stop_matching(scratch));
    size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength);
    const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len;
    DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len);

    scratch->core_info.status &= ~STATUS_DELAY_DIRTY;

    hwlmExec(ftable, buf, len, 0, roseDelayRebuildCallback, scratch,
             scratch->tctxt.groups);
    assert(!can_stop_matching(scratch));
}
Beispiel #3
0
int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {
    struct hs_scratch *scratch = ctx;
    assert(scratch && scratch->magic == SCRATCH_MAGIC);
    struct RoseContext *tctxt = &scratch->tctxt;
    struct core_info *ci = &scratch->core_info;
    const struct RoseEngine *t = ci->rose;

    u64a real_end = ci->buf_offset + end; // index after last byte

    DEBUG_PRINTF("MATCH id=%u offsets=[???,%llu]\n", id, real_end);
    DEBUG_PRINTF("STATE groups=0x%016llx\n", tctxt->groups);

    if (can_stop_matching(scratch)) {
        DEBUG_PRINTF("received a match when we're already dead!\n");
        return MO_HALT_MATCHING;
    }

    const size_t match_len = 0;

    /* delayed literals need to be delivered before real literals; however
     * delayed literals only come from the floating table so if we are going
     * to deliver a literal here it must be too early for a delayed literal */

    /* no history checks from anchored region and we are before the flush
     * boundary */

    if (real_end <= t->floatingMinLiteralMatchOffset) {
        roseFlushLastByteHistory(t, scratch, real_end);
        tctxt->lastEndOffset = real_end;
    }

    const u32 *programs = getByOffset(t, t->litProgramOffset);
    assert(id < t->literalCount);
    const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED;
    if (roseRunProgram(t, scratch, programs[id], start, real_end, match_len,
                       flags) == HWLM_TERMINATE_MATCHING) {
        assert(can_stop_matching(scratch));
        DEBUG_PRINTF("caller requested termination\n");
        return MO_HALT_MATCHING;
    }

    DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);

    if (real_end > t->floatingMinLiteralMatchOffset) {
        recordAnchoredLiteralMatch(t, scratch, id, real_end);
    }

    return MO_CONTINUE_MATCHING;
}
Beispiel #4
0
static really_inline
hwlmcb_rv_t roseCallback_i(size_t start, size_t end, u32 id, void *ctxt) {
    struct hs_scratch *scratch = ctxt;
    struct RoseContext *tctx = &scratch->tctxt;
    const struct RoseEngine *t = scratch->core_info.rose;

    u64a real_end = end + tctx->lit_offset_adjust;

#if defined(DEBUG)
    DEBUG_PRINTF("MATCH id=%u offsets=[%llu,%llu]: ", id,
                 start + tctx->lit_offset_adjust, real_end);
    printMatch(&scratch->core_info, start + tctx->lit_offset_adjust, real_end);
    printf("\n");
#endif
    DEBUG_PRINTF("last end %llu\n", tctx->lastEndOffset);

    DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups);

    if (can_stop_matching(scratch)) {
        DEBUG_PRINTF("received a match when we're already dead!\n");
        return HWLM_TERMINATE_MATCHING;
    }

    hwlmcb_rv_t rv = flushQueuedLiterals(t, scratch, real_end);
    /* flushDelayed may have advanced tctx->lastEndOffset */

    if (real_end >= t->floatingMinLiteralMatchOffset) {
        roseFlushLastByteHistory(t, scratch, real_end);
        tctx->lastEndOffset = real_end;
    }

    if (rv == HWLM_TERMINATE_MATCHING) {
        return HWLM_TERMINATE_MATCHING;
    }

    size_t match_len = end - start + 1;
    rv = roseProcessMatchInline(t, scratch, real_end, match_len, id);

    DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups);

    if (rv != HWLM_TERMINATE_MATCHING) {
        return tctx->groups;
    }

    assert(can_stop_matching(scratch));
    DEBUG_PRINTF("user requested halt\n");
    return HWLM_TERMINATE_MATCHING;
}
Beispiel #5
0
static never_inline
void soleOutfixStreamExec(struct hs_stream *stream_state,
                          struct hs_scratch *scratch) {
    assert(stream_state);
    assert(scratch);
    assert(!can_stop_matching(scratch));

    const struct RoseEngine *t = stream_state->rose;
    assert(t->outfixEndQueue == 1);
    assert(!t->amatcherOffset);
    assert(!t->ematcherOffset);
    assert(!t->fmatcherOffset);

    const struct NFA *nfa = getNfaByQueue(t, 0);

    struct mq *q = scratch->queues;
    initOutfixQueue(q, 0, t, scratch);
    if (!scratch->core_info.buf_offset) {
        nfaQueueInitState(nfa, q);
        pushQueueAt(q, 0, MQE_START, 0);
        pushQueueAt(q, 1, MQE_TOP, 0);
        pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
    } else {
        nfaExpandState(nfa, q->state, q->streamState, q->offset,
                       queue_prev_byte(q, 0));
        pushQueueAt(q, 0, MQE_START, 0);
        pushQueueAt(q, 1, MQE_END, scratch->core_info.len);
    }

    if (nfaQueueExec(q->nfa, q, scratch->core_info.len)) {
        nfaQueueCompressState(nfa, q, scratch->core_info.len);
    } else if (!told_to_stop_matching(scratch)) {
        scratch->core_info.status |= STATUS_EXHAUSTED;
    }
}
Beispiel #6
0
static never_inline
void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
    const struct RoseEngine *t = id->rose;

    if (can_stop_matching(scratch)) {
        DEBUG_PRINTF("stream already broken\n");
        return;
    }

    if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
        DEBUG_PRINTF("stream exhausted\n");
        return;
    }

    assert(t->outfixEndQueue == 1);
    assert(!t->amatcherOffset);
    assert(!t->ematcherOffset);
    assert(!t->fmatcherOffset);

    const struct NFA *nfa = getNfaByQueue(t, 0);

    struct mq *q = scratch->queues;
    initOutfixQueue(q, 0, t, scratch);
    if (!scratch->core_info.buf_offset) {
        DEBUG_PRINTF("buf_offset is zero\n");
        return; /* no vacuous engines */
    }

    nfaExpandState(nfa, q->state, q->streamState, q->offset,
                   queue_prev_byte(q, 0));

    assert(nfaAcceptsEod(nfa));
    nfaCheckFinalState(nfa, q->state, q->streamState, q->offset, q->cb,
                       q->som_cb, scratch);
}
Beispiel #7
0
void roseBlockEodExec(const struct RoseEngine *t, u64a offset,
                      struct hs_scratch *scratch) {
    assert(t->requiresEodCheck);
    assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF
           || offset <= t->maxBiAnchoredWidth);

    assert(!can_stop_matching(scratch));

    u8 *state = (u8 *)scratch->core_info.state;

    // Ensure that history is correct before we look for EOD matches
    prepForEod(t, state, scratch->core_info.len, &scratch->tctxt);

    roseEodExec_i(t, state, offset, scratch, 0);
}
Beispiel #8
0
static really_inline
void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
    const struct RoseEngine *rose = id->rose;

    if (can_stop_matching(scratch)) {
        DEBUG_PRINTF("stream already broken\n");
        return;
    }

    if (isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
        DEBUG_PRINTF("stream exhausted\n");
        return;
    }

    roseEodExec(rose, id->offset, scratch);
}
Beispiel #9
0
static really_inline
void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) {
    assert(stream_state);
    assert(scratch);
    assert(!can_stop_matching(scratch));

    DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
                 stream_state->offset, scratch->core_info.len);

    const struct RoseEngine *rose = stream_state->rose;
    assert(rose);
    roseStreamExec(rose, scratch);

    if (!told_to_stop_matching(scratch) &&
        isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
        DEBUG_PRINTF("stream exhausted\n");
        scratch->core_info.status |= STATUS_EXHAUSTED;
    }
}
Beispiel #10
0
char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) {
    DEBUG_PRINTF("exec\n");
    struct mq q1;
    char rv = MO_ALIVE;
    char copy = 0;
    const struct Tamarama *t = getImplNfa(n);
    while (q->cur < q->end && q_cur_loc(q) <= end) {
        updateQueues(t, q, &q1);
        rv = nfaQueueExec_raw(q1.nfa, &q1, end);
        q->report_current = q1.report_current;
        copy = 1;
        if (can_stop_matching(q->scratch)) {
            break;
        }
    }
    if (copy) {
        copyBack(t, q, &q1);
    }
    return rv;
}
Beispiel #11
0
int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
    struct hs_scratch *scratch = context;
    assert(scratch && scratch->magic == SCRATCH_MAGIC);

    DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end);

    const struct RoseEngine *rose = scratch->core_info.rose;

    // Our match ID is the program offset.
    const u32 program = id;
    const size_t match_len = 0; // Unused in this path.
    const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;
    hwlmcb_rv_t rv =
        roseRunProgram(rose, scratch, program, start, end, match_len, flags);
    if (rv == HWLM_TERMINATE_MATCHING) {
        return MO_HALT_MATCHING;
    }

    return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING;
}
Beispiel #12
0
static really_inline
void pureLiteralStreamExec(struct hs_stream *stream_state,
                           struct hs_scratch *scratch) {
    assert(stream_state);
    assert(scratch);
    assert(!can_stop_matching(scratch));

    char *state = getMultiState(stream_state);

    const struct RoseEngine *rose = stream_state->rose;
    const struct HWLM *ftable = getFLiteralMatcher(rose);

    size_t len2 = scratch->core_info.len;

    u8 *hwlm_stream_state;
    if (rose->floatingStreamState) {
        hwlm_stream_state = getFloatingMatcherState(rose, state);
    } else {
        hwlm_stream_state = NULL;
    }

    DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
                 stream_state->offset, scratch->core_info.len);

    // Pure literal cases don't have floatingMinDistance set, so we always
    // start the match region at zero.
    const size_t start = 0;

    hwlmExecStreaming(ftable, scratch, len2, start, rosePureLiteralCallback,
                      scratch, rose->initialGroups, hwlm_stream_state);

    if (!told_to_stop_matching(scratch) &&
        isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
        DEBUG_PRINTF("stream exhausted\n");
        scratch->core_info.status |= STATUS_EXHAUSTED;
    }
}
Beispiel #13
0
/**
 * \brief Execute a boundary report program.
 *
 * Returns MO_HALT_MATCHING if the stream is exhausted or the user has
 * instructed us to halt, or MO_CONTINUE_MATCHING otherwise.
 */
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
                           u64a stream_offset, struct hs_scratch *scratch) {
    DEBUG_PRINTF("running boundary program at offset %u\n", program);

    if (can_stop_matching(scratch)) {
        DEBUG_PRINTF("can stop matching\n");
        return MO_HALT_MATCHING;
    }

    if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) {
        /* we cannot delay the initialization of the som deduper logs any longer
         * as we are reporting matches. This is done explicitly as we are
         * shortcutting the som handling in the vacuous repeats as we know they
         * all come from non-som patterns. */
        fatbit_clear(scratch->deduper.som_log[0]);
        fatbit_clear(scratch->deduper.som_log[1]);
        scratch->deduper.som_log_dirty = 0;
    }

    // Keep assertions in program report path happy. At offset zero, there can
    // have been no earlier reports. At EOD, all earlier reports should have
    // been handled and we will have been caught up to the stream offset by the
    // time we are running boundary report programs.
    scratch->tctxt.minMatchOffset = stream_offset;

    const u64a som = 0;
    const size_t match_len = 0;
    const u8 flags = 0;
    hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, som, stream_offset,
                                    match_len, flags);
    if (rv == HWLM_TERMINATE_MATCHING) {
        return MO_HALT_MATCHING;
    }

    return MO_CONTINUE_MATCHING;
}
Beispiel #14
0
void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
    DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset,
                 scratch->core_info.buf_offset + (u64a)scratch->core_info.len);
    assert(t);
    assert(scratch->core_info.hbuf);
    assert(scratch->core_info.buf);

    // We should not have been called if we've already been told to terminate
    // matching.
    assert(!told_to_stop_matching(scratch));

    assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
           < MAX_SPARSE_ITER_STATES);

    size_t length = scratch->core_info.len;
    u64a offset = scratch->core_info.buf_offset;

    // We may have a maximum width (for engines constructed entirely
    // of bi-anchored patterns). If this write would result in us progressing
    // beyond this point, we cannot possibly match.
    if (t->maxBiAnchoredWidth != ROSE_BOUND_INF
        && offset + length > t->maxBiAnchoredWidth) {
        DEBUG_PRINTF("bailing, write would progress beyond maxBAWidth\n");
        return;
    }

    char *state = scratch->core_info.state;

    struct RoseContext *tctxt = &scratch->tctxt;
    tctxt->mpv_inactive = 0;
    tctxt->groups = loadGroups(t, state);
    tctxt->lit_offset_adjust = offset + 1; // index after last byte
    tctxt->delayLastEndOffset = offset;
    tctxt->lastEndOffset = offset;
    tctxt->filledDelayedSlots = 0;
    tctxt->lastMatchOffset = 0;
    tctxt->minMatchOffset = offset;
    tctxt->minNonMpvMatchOffset = offset;
    tctxt->next_mpv_offset = 0;
    DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n",
                 scratch->core_info.hlen, scratch->core_info.len, tctxt->groups);

    fatbit_clear(scratch->aqa);
    scratch->al_log_sum = 0;
    scratch->catchup_pq.qm_size = 0;

    if (t->outfixBeginQueue != t->outfixEndQueue) {
        streamInitSufPQ(t, state, scratch);
    }

    runEagerPrefixesStream(t, scratch);

    u32 alen = t->anchoredDistance > offset ?
        MIN(length + offset, t->anchoredDistance) - offset : 0;

    const struct anchored_matcher_info *atable = getALiteralMatcher(t);
    if (atable && alen) {
        DEBUG_PRINTF("BEGIN ANCHORED %zu/%u\n", scratch->core_info.hlen, alen);
        runAnchoredTableStream(t, atable, alen, offset, scratch);

        if (can_stop_matching(scratch)) {
            goto exit;
        }
    }

    const struct HWLM *ftable = getFLiteralMatcher(t);
    if (ftable) {
        if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) {
            DEBUG_PRINTF("skip FLOATING: no inflight matches\n");
            goto flush_delay_and_exit;
        }

        size_t flen = length;
        if (t->floatingDistance != ROSE_BOUND_INF) {
            flen = t->floatingDistance > offset ?
                MIN(t->floatingDistance, length + offset) - offset : 0;
        }

        size_t hlength = scratch->core_info.hlen;

        char rebuild = hlength &&
                       (scratch->core_info.status & STATUS_DELAY_DIRTY) &&
                       (t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
                        offset < t->maxFloatingDelayedMatch);
        DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
                     rebuild, scratch->core_info.status,
                     t->maxFloatingDelayedMatch, offset);

        if (!flen) {
            if (rebuild) { /* rebuild floating delayed match stuff */
                do_rebuild(t, ftable, scratch);
            }
            goto flush_delay_and_exit;
        }

        if (rebuild) { /* rebuild floating delayed match stuff */
            do_rebuild(t, ftable, scratch);
        }

        if (flen + offset <= t->floatingMinDistance) {
            DEBUG_PRINTF("skip FLOATING: before floating min\n");
            goto flush_delay_and_exit;
        }

        size_t start = 0;
        if (offset < t->floatingMinDistance) {
            // This scan crosses the floating min distance, so we can use that
            // to set HWLM's "start" offset.
            start = t->floatingMinDistance - offset;
        }
        DEBUG_PRINTF("start=%zu\n", start);

        u8 *stream_state;
        if (t->floatingStreamState) {
            stream_state = getFloatingMatcherState(t, state);
        } else {
            stream_state = NULL;
        }

        DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
        hwlmExecStreaming(ftable, scratch, flen, start, roseFloatingCallback,
                          scratch, tctxt->groups & t->floating_group_mask,
                          stream_state);
    }

flush_delay_and_exit:
    DEBUG_PRINTF("flushing floating\n");
    if (cleanUpDelayed(t, scratch, length, offset) == HWLM_TERMINATE_MATCHING) {
        return;
    }

exit:
    DEBUG_PRINTF("CLEAN UP TIME\n");
    if (!can_stop_matching(scratch)) {
        ensureStreamNeatAndTidy(t, state, scratch, length, offset);
    }
    DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n",
                 scratch->core_info.status);
    return;
}
Beispiel #15
0
static really_inline
int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id,
                     void *context, char is_simple) {
    assert(id != MO_INVALID_IDX); // Should never get an invalid ID.

    u32 flags = 0;

    struct hs_scratch *scratch = (struct hs_scratch *)context;
    struct core_info *ci = &scratch->core_info;
    const struct RoseEngine *rose = ci->rose;
    const struct internal_report *ri = getInternalReport(rose, id);

    /* internal events should be handled by rose directly */
    assert(ri->type == EXTERNAL_CALLBACK);

    DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
                 "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch,
                 ri->offsetAdjust);

    if (unlikely(can_stop_matching(scratch))) {
        DEBUG_PRINTF("pre broken - halting\n");
        return MO_HALT_MATCHING;
    }

    if (!is_simple && ri->hasBounds) {
        assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET);
        if (to_offset < ri->minOffset || to_offset > ri->maxOffset) {
            DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n",
                         to_offset, ri->minOffset, ri->maxOffset);
            return MO_CONTINUE_MATCHING;
        }
    }

    int halt = 0;

    if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) {
        DEBUG_PRINTF("ate exhausted match\n");
        goto do_return;
    }

#ifdef DEDUPE_MATCHES
    u64a offset = to_offset;
#endif

    to_offset += ri->offsetAdjust;
    assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);

    if (!is_simple && ri->minLength) {
        if (from_offset != HS_OFFSET_PAST_HORIZON &&
                (to_offset - from_offset < ri->minLength)) {
            return MO_CONTINUE_MATCHING;
        }
        if (ri->quashSom) {
            from_offset = 0;
        }
    }

    DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
                 from_offset, to_offset, ri->onmatch, ci->userContext);

#ifndef RELEASE_BUILD
    if (ri->offsetAdjust != 0) {
        // alert testing tools that we've got adjusted matches
        flags |= HS_MATCH_FLAG_ADJUSTED;
    }
#endif

#ifdef DEDUPE_MATCHES
    u32 dkeyCount = rose->dkeyCount;

    if (offset != scratch->deduper.current_report_offset) {

        assert(scratch->deduper.current_report_offset == ~0ULL
               || scratch->deduper.current_report_offset < offset);
        if (offset == scratch->deduper.current_report_offset + 1) {
            fatbit_clear(scratch->deduper.log[offset % 2]);
        } else {
            fatbit_clear(scratch->deduper.log[0]);
            fatbit_clear(scratch->deduper.log[1]);
        }

        halt = flushStoredSomMatches(scratch, offset);
        if (halt) {
            goto do_return;
        }

        scratch->deduper.current_report_offset = offset;
    }

    u32 dkey = ri->dkey;
    if (dkey != MO_INVALID_IDX) {
        if (ri->quashSom) {
            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
            assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1);
            if (fatbit_set(scratch->deduper.log[to_offset % 2], dkeyCount,
                           dkey)) {
                /* we have already raised this report at this offset, squash
                 * dupe match. */
                DEBUG_PRINTF("dedupe\n");
                goto do_return;
            }
        } else {
            /* SOM external event */
            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
            assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1);
            u64a *starts = scratch->deduper.som_start_log[to_offset % 2];
            if (fatbit_set(scratch->deduper.som_log[to_offset % 2], dkeyCount,
                           dkey)) {
                starts[dkey] = MIN(starts[dkey], from_offset);
            } else {
                starts[dkey] = from_offset;
            }

            if (ri->offsetAdjust) {
                scratch->deduper.som_log_dirty |= 1;
            } else {
                scratch->deduper.som_log_dirty |= 2;
            }

            goto do_return;
        }
    }
#endif

    halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset,
                            flags, ci->userContext);

    if (!is_simple) {
        markAsMatched(ci->exhaustionVector, ri->ekey);
    }

do_return:
    if (halt) {
        DEBUG_PRINTF("callback requested to terminate matches\n");

        setBroken(ci->state, BROKEN_FROM_USER);
        ci->broken = BROKEN_FROM_USER;

        return MO_HALT_MATCHING;
    }

    return MO_CONTINUE_MATCHING;
}
Beispiel #16
0
static inline
hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
                                   unsigned length, UNUSED unsigned flags,
                                   hs_scratch_t *scratch,
                                   match_event_handler onEvent, void *context) {
    if (unlikely(!id || !scratch || !data || !validScratch(id->rose, scratch))) {
        return HS_INVALID;
    }

    const struct RoseEngine *rose = id->rose;
    char *state = getMultiState(id);

    u8 broken = getBroken(state);
    if (broken) {
        DEBUG_PRINTF("stream is broken, halting scan\n");
        if (broken == BROKEN_FROM_USER) {
            return HS_SCAN_TERMINATED;
        } else {
            assert(broken == BROKEN_EXHAUSTED);
            return HS_SUCCESS;
        }
    }

    // We avoid doing any work if the user has given us zero bytes of data to
    // scan. Arguably we should define some semantics for how we treat vacuous
    // cases here.
    if (unlikely(length == 0)) {
        DEBUG_PRINTF("zero length block\n");
        assert(getBroken(state) != BROKEN_FROM_USER);
        return HS_SUCCESS;
    }

    u32 historyAmount = getHistoryAmount(rose, id->offset);
    populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
                     getHistory(state, rose, id->offset), historyAmount,
                     id->offset, flags);
    assert(scratch->core_info.hlen <= id->offset
           && scratch->core_info.hlen <= rose->historyRequired);

    prefetch_data(data, length);

    if (rose->somLocationCount) {
        loadSomFromStream(scratch, id->offset);
    }

    if (!id->offset && rose->boundary.reportZeroOffset) {
        DEBUG_PRINTF("zero reports\n");
        processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch);
    }

    switch (rose->runtimeImpl) {
    default:
        assert(0);
    case ROSE_RUNTIME_FULL_ROSE:
        rawStreamExec(id, scratch);
        break;
    case ROSE_RUNTIME_PURE_LITERAL:
        pureLiteralStreamExec(id, scratch);
        break;
    case ROSE_RUNTIME_SINGLE_OUTFIX:
        soleOutfixStreamExec(id, scratch);
    }

    if (rose->hasSom && !told_to_stop_matching(scratch)) {
        int halt = flushStoredSomMatches(scratch, ~0ULL);
        if (halt) {
            setBroken(state, BROKEN_FROM_USER);
            scratch->core_info.broken = BROKEN_FROM_USER;
        }
    }

    if (likely(!can_stop_matching(scratch))) {
        maintainHistoryBuffer(id->rose, getMultiState(id), data, length);
        id->offset += length; /* maintain offset */

        if (rose->somLocationCount) {
            storeSomToStream(scratch, id->offset);
        }
    } else if (told_to_stop_matching(scratch)) {
        return HS_SCAN_TERMINATED;
    } else { /* exhausted */
        setBroken(state, BROKEN_EXHAUSTED);
    }

    return HS_SUCCESS;
}