示例#1
0
HS_PUBLIC_API
hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags,
                          hs_stream_t **stream) {
    if (unlikely(!stream)) {
        return HS_INVALID;
    }

    *stream = NULL;

    hs_error_t err = validDatabase(db);
    if (unlikely(err != HS_SUCCESS)) {
        return err;
    }

    const struct RoseEngine *rose = hs_get_bytecode(db);
    if (unlikely(!ISALIGNED_16(rose))) {
        return HS_INVALID;
    }

    if (unlikely(rose->mode != HS_MODE_STREAM)) {
        return HS_DB_MODE_ERROR;
    }

    size_t stateSize = rose->stateOffsets.end;
    struct hs_stream *s = hs_stream_alloc(sizeof(struct hs_stream) + stateSize);
    if (unlikely(!s)) {
        return HS_NOMEM;
    }

    init_stream(s, rose);

    *stream = s;
    return HS_SUCCESS;
}
示例#2
0
HS_PUBLIC_API
hs_error_t hs_stream_size(const hs_database_t *db, size_t *stream_size) {
    if (!stream_size) {
        return HS_INVALID;
    }

    hs_error_t ret = validDatabase(db);
    if (ret != HS_SUCCESS) {
        return ret;
    }

    const struct RoseEngine *rose = hs_get_bytecode(db);
    if (!ISALIGNED_16(rose)) {
        return HS_INVALID;
    }

    if (rose->mode != HS_MODE_STREAM) {
        return HS_DB_MODE_ERROR;
    }

    u32 base_stream_size = rose->stateOffsets.end;

    // stream state plus the hs_stream struct itself
    *stream_size = base_stream_size + sizeof(struct hs_stream);

    return HS_SUCCESS;
}
示例#3
0
HS_PUBLIC_API
hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data,
                          const unsigned int *length, unsigned int count,
                          UNUSED unsigned int flags, hs_scratch_t *scratch,
                          match_event_handler onEvent, void *context) {
    if (unlikely(!scratch || !data || !length)) {
        return HS_INVALID;
    }

    hs_error_t err = validDatabase(db);
    if (unlikely(err != HS_SUCCESS)) {
        return err;
    }

    const struct RoseEngine *rose = hs_get_bytecode(db);
    if (unlikely(!ISALIGNED_16(rose))) {
        return HS_INVALID;
    }

    if (unlikely(rose->mode != HS_MODE_VECTORED)) {
        return HS_DB_MODE_ERROR;
    }

    if (unlikely(!validScratch(rose, scratch))) {
        return HS_INVALID;
    }

    hs_stream_t *id = (hs_stream_t *)(scratch->bstate);

    init_stream(id, rose); /* open stream */

    for (u32 i = 0; i < count; i++) {
        DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset,
                     length[i]);
#ifdef DEBUG
        dumpData(data[i], length[i]);
#endif
        hs_error_t ret
            = hs_scan_stream_internal(id, data[i], length[i], 0, scratch,
                                      onEvent, context);
        if (ret != HS_SUCCESS) {
            return ret;
        }
    }

    /* close stream */
    if (onEvent) {
        report_eod_matches(id, scratch, onEvent, context);

        if (told_to_stop_matching(scratch)) {
            return HS_SCAN_TERMINATED;
        }
    }

    return HS_SUCCESS;
}
示例#4
0
HS_PUBLIC_API
hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) {
    if (!db || !scratch) {
        return HS_INVALID;
    }

    /* We need to do some real sanity checks on the database as some users mmap
     * in old deserialised databases, so this is the first real opportunity we
     * have to make sure it is sane.
     */
    hs_error_t rv = dbIsValid(db);
    if (rv != HS_SUCCESS) {
        return rv;
    }

    /* We can also sanity-check the scratch parameter: if it points to an
     * existing scratch area, that scratch should have valid magic bits. */
    if (*scratch != NULL) {
        /* has to be aligned before we can do anything with it */
        if (!ISALIGNED_CL(*scratch)) {
            return HS_INVALID;
        }
        if ((*scratch)->magic != SCRATCH_MAGIC) {
            return HS_INVALID;
        }
    }

    const struct RoseEngine *rose = hs_get_bytecode(db);
    int resize = 0;

    hs_scratch_t *proto;
    hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256);
    hs_error_t proto_ret = hs_check_alloc(proto_tmp);
    if (proto_ret != HS_SUCCESS) {
        hs_scratch_free(proto_tmp);
        hs_scratch_free(*scratch);
        *scratch = NULL;
        return proto_ret;
    }

    proto = ROUNDUP_PTR(proto_tmp, 64);

    if (*scratch) {
        *proto = **scratch;
    } else {
        memset(proto, 0, sizeof(*proto));
        resize = 1;
    }
    proto->scratch_alloc = (char *)proto_tmp;

    u32 max_anchored_match = rose->anchoredDistance;
    if (max_anchored_match > rose->maxSafeAnchoredDROffset) {
        u32 anchored_region_len = max_anchored_match
            - rose->maxSafeAnchoredDROffset;
        if (anchored_region_len > proto->anchored_region_len) {
            resize = 1;
            proto->anchored_region_len = anchored_region_len;
        }
    }

    u32 anchored_region_width = rose->anchoredMatches;
    if (anchored_region_width > proto->anchored_region_width) {
        resize = 1;
        proto->anchored_region_width = anchored_region_width;
    }

    if (rose->anchoredDistance > proto->anchored_literal_region_len) {
        resize = 1;
        proto->anchored_literal_region_len = rose->anchoredDistance;
    }

    if (rose->anchored_count > proto->anchored_literal_count) {
        resize = 1;
        proto->anchored_literal_count = rose->anchored_count;
    }

    if (rose->delay_count > proto->delay_count) {
        resize = 1;
        proto->delay_count = rose->delay_count;
    }

    if (rose->roleCount > proto->roleCount) {
        resize = 1;
        proto->roleCount = rose->roleCount;
    }

    if (rose->tStateSize > proto->tStateSize) {
        resize = 1;
        proto->tStateSize = rose->tStateSize;
    }

    const struct sidecar *side = getSLiteralMatcher(rose);
    if (side && sidecarScratchSize(side) > proto->sideScratchSize) {
        resize = 1;
        proto->sideScratchSize = sidecarScratchSize(side);
    }

    u32 som_store_count = rose->somLocationCount;
    if (som_store_count > proto->som_store_count) {
        resize = 1;
        proto->som_store_count = som_store_count;
    }

    u32 queueCount = rose->queueCount;
    if (queueCount > proto->queueCount) {
        resize = 1;
        proto->queueCount = queueCount;
    }

    u32 bStateSize = 0;
    if (rose->mode == HS_MODE_BLOCK) {
        bStateSize = rose->stateOffsets.end;
    } else if (rose->mode == HS_MODE_VECTORED) {
        /* vectoring database require a full stream state (inc header) */
        bStateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
    }

    if (bStateSize > proto->bStateSize) {
        resize = 1;
        proto->bStateSize = bStateSize;
    }

    u32 fullStateSize = rose->scratchStateSize;
    if (fullStateSize > proto->fullStateSize) {
        resize = 1;
        proto->fullStateSize = fullStateSize;
    }

    if (rose->dkeyCount > proto->deduper.log_size) {
        resize = 1;
        proto->deduper.log_size = rose->dkeyCount;
    }

    if (resize) {
        if (*scratch) {
            hs_scratch_free((*scratch)->scratch_alloc);
        }

        hs_error_t alloc_ret = alloc_scratch(proto, scratch);
        hs_scratch_free(proto_tmp); /* kill off temp used for sizing */
        if (alloc_ret != HS_SUCCESS) {
            *scratch = NULL;
            return alloc_ret;
        }
    } else {
        hs_scratch_free(proto_tmp); /* kill off temp used for sizing */
    }

    return HS_SUCCESS;
}
示例#5
0
HS_PUBLIC_API
hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length,
                   unsigned flags, hs_scratch_t *scratch,
                   match_event_handler onEvent, void *userCtx) {
    if (unlikely(!scratch || !data)) {
        return HS_INVALID;
    }

    hs_error_t err = validDatabase(db);
    if (unlikely(err != HS_SUCCESS)) {
        return err;
    }

    const struct RoseEngine *rose = hs_get_bytecode(db);
    if (unlikely(!ISALIGNED_16(rose))) {
        return HS_INVALID;
    }

    if (unlikely(rose->mode != HS_MODE_BLOCK)) {
        return HS_DB_MODE_ERROR;
    }

    if (unlikely(!validScratch(rose, scratch))) {
        return HS_INVALID;
    }

    if (rose->minWidth > length) {
        DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length);
        return HS_SUCCESS;
    }

    prefetch_data(data, length);

    /* populate core info in scratch */
    populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data,
                     length, NULL, 0, 0, flags);

    clearEvec(scratch->core_info.exhaustionVector, rose);

    if (!length) {
        if (rose->boundary.reportZeroEodOffset) {
            processReportList(rose, rose->boundary.reportZeroEodOffset, 0,
                              scratch);
        }
        goto set_retval;
    }

    if (rose->boundary.reportZeroOffset) {
        processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch);
    }

    if (rose->minWidthExcludingBoundaries > length) {
        DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n",
                     rose->minWidthExcludingBoundaries, length);
        goto done_scan;
    }

    // Similarly, we may have a maximum width (for engines constructed entirely
    // of bi-anchored patterns).
    if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF
        && length > rose->maxBiAnchoredWidth) {
        DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length,
                     rose->maxBiAnchoredWidth);
        goto done_scan;
    }

    // Is this a small write case?
    if (rose->smallWriteOffset) {
        const struct SmallWriteEngine *smwr = getSmallWrite(rose);
        assert(smwr);

        // Apply the small write engine if and only if the block (buffer) is
        // small enough. Otherwise, we allow rose &co to deal with it.
        if (length < smwr->largestBuffer) {
            DEBUG_PRINTF("Attempting small write of block %u bytes long.\n",
                         length);
            runSmallWriteEngine(smwr, scratch);
            goto done_scan;
        }
    }

    switch (rose->runtimeImpl) {
    default:
        assert(0);
    case ROSE_RUNTIME_FULL_ROSE:
        rawBlockExec(rose, scratch);
        break;
    case ROSE_RUNTIME_PURE_LITERAL:
        pureLiteralBlockExec(rose, scratch);
        break;
    case ROSE_RUNTIME_SINGLE_OUTFIX:
        soleOutfixBlockExec(rose, scratch);
        break;
    }

done_scan:
    if (told_to_stop_matching(scratch)) {
        return HS_SCAN_TERMINATED;
    }

    if (rose->hasSom) {
        int halt = flushStoredSomMatches(scratch, ~0ULL);
        if (halt) {
            return HS_SCAN_TERMINATED;
        }
    }

    if (rose->boundary.reportEodOffset) {
        processReportList(rose, rose->boundary.reportEodOffset, length, scratch);
    }

set_retval:
    DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
                 told_to_stop_matching(scratch));
    return told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED : HS_SUCCESS;
}