HS_PUBLIC_API hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags, hs_stream_t **stream) { if (unlikely(!stream)) { return HS_INVALID; } *stream = NULL; hs_error_t err = validDatabase(db); if (unlikely(err != HS_SUCCESS)) { return err; } const struct RoseEngine *rose = hs_get_bytecode(db); if (unlikely(!ISALIGNED_16(rose))) { return HS_INVALID; } if (unlikely(rose->mode != HS_MODE_STREAM)) { return HS_DB_MODE_ERROR; } size_t stateSize = rose->stateOffsets.end; struct hs_stream *s = hs_stream_alloc(sizeof(struct hs_stream) + stateSize); if (unlikely(!s)) { return HS_NOMEM; } init_stream(s, rose); *stream = s; return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_stream_size(const hs_database_t *db, size_t *stream_size) { if (!stream_size) { return HS_INVALID; } hs_error_t ret = validDatabase(db); if (ret != HS_SUCCESS) { return ret; } const struct RoseEngine *rose = hs_get_bytecode(db); if (!ISALIGNED_16(rose)) { return HS_INVALID; } if (rose->mode != HS_MODE_STREAM) { return HS_DB_MODE_ERROR; } u32 base_stream_size = rose->stateOffsets.end; // stream state plus the hs_stream struct itself *stream_size = base_stream_size + sizeof(struct hs_stream); return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data, const unsigned int *length, unsigned int count, UNUSED unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (unlikely(!scratch || !data || !length)) { return HS_INVALID; } hs_error_t err = validDatabase(db); if (unlikely(err != HS_SUCCESS)) { return err; } const struct RoseEngine *rose = hs_get_bytecode(db); if (unlikely(!ISALIGNED_16(rose))) { return HS_INVALID; } if (unlikely(rose->mode != HS_MODE_VECTORED)) { return HS_DB_MODE_ERROR; } if (unlikely(!validScratch(rose, scratch))) { return HS_INVALID; } hs_stream_t *id = (hs_stream_t *)(scratch->bstate); init_stream(id, rose); /* open stream */ for (u32 i = 0; i < count; i++) { DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset, length[i]); #ifdef DEBUG dumpData(data[i], length[i]); #endif hs_error_t ret = hs_scan_stream_internal(id, data[i], length[i], 0, scratch, onEvent, context); if (ret != HS_SUCCESS) { return ret; } } /* close stream */ if (onEvent) { report_eod_matches(id, scratch, onEvent, context); if (told_to_stop_matching(scratch)) { return HS_SCAN_TERMINATED; } } return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { if (!db || !scratch) { return HS_INVALID; } /* We need to do some real sanity checks on the database as some users mmap * in old deserialised databases, so this is the first real opportunity we * have to make sure it is sane. */ hs_error_t rv = dbIsValid(db); if (rv != HS_SUCCESS) { return rv; } /* We can also sanity-check the scratch parameter: if it points to an * existing scratch area, that scratch should have valid magic bits. */ if (*scratch != NULL) { /* has to be aligned before we can do anything with it */ if (!ISALIGNED_CL(*scratch)) { return HS_INVALID; } if ((*scratch)->magic != SCRATCH_MAGIC) { return HS_INVALID; } } const struct RoseEngine *rose = hs_get_bytecode(db); int resize = 0; hs_scratch_t *proto; hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256); hs_error_t proto_ret = hs_check_alloc(proto_tmp); if (proto_ret != HS_SUCCESS) { hs_scratch_free(proto_tmp); hs_scratch_free(*scratch); *scratch = NULL; return proto_ret; } proto = ROUNDUP_PTR(proto_tmp, 64); if (*scratch) { *proto = **scratch; } else { memset(proto, 0, sizeof(*proto)); resize = 1; } proto->scratch_alloc = (char *)proto_tmp; u32 max_anchored_match = rose->anchoredDistance; if (max_anchored_match > rose->maxSafeAnchoredDROffset) { u32 anchored_region_len = max_anchored_match - rose->maxSafeAnchoredDROffset; if (anchored_region_len > proto->anchored_region_len) { resize = 1; proto->anchored_region_len = anchored_region_len; } } u32 anchored_region_width = rose->anchoredMatches; if (anchored_region_width > proto->anchored_region_width) { resize = 1; proto->anchored_region_width = anchored_region_width; } if (rose->anchoredDistance > proto->anchored_literal_region_len) { resize = 1; proto->anchored_literal_region_len = rose->anchoredDistance; } if (rose->anchored_count > proto->anchored_literal_count) { resize = 1; proto->anchored_literal_count = rose->anchored_count; } if (rose->delay_count > proto->delay_count) { resize = 1; proto->delay_count = rose->delay_count; } if (rose->roleCount > proto->roleCount) { resize = 1; proto->roleCount = rose->roleCount; } if (rose->tStateSize > proto->tStateSize) { resize = 1; proto->tStateSize = rose->tStateSize; } const struct sidecar *side = getSLiteralMatcher(rose); if (side && sidecarScratchSize(side) > proto->sideScratchSize) { resize = 1; proto->sideScratchSize = sidecarScratchSize(side); } u32 som_store_count = rose->somLocationCount; if (som_store_count > proto->som_store_count) { resize = 1; proto->som_store_count = som_store_count; } u32 queueCount = rose->queueCount; if (queueCount > proto->queueCount) { resize = 1; proto->queueCount = queueCount; } u32 bStateSize = 0; if (rose->mode == HS_MODE_BLOCK) { bStateSize = rose->stateOffsets.end; } else if (rose->mode == HS_MODE_VECTORED) { /* vectoring database require a full stream state (inc header) */ bStateSize = sizeof(struct hs_stream) + rose->stateOffsets.end; } if (bStateSize > proto->bStateSize) { resize = 1; proto->bStateSize = bStateSize; } u32 fullStateSize = rose->scratchStateSize; if (fullStateSize > proto->fullStateSize) { resize = 1; proto->fullStateSize = fullStateSize; } if (rose->dkeyCount > proto->deduper.log_size) { resize = 1; proto->deduper.log_size = rose->dkeyCount; } if (resize) { if (*scratch) { hs_scratch_free((*scratch)->scratch_alloc); } hs_error_t alloc_ret = alloc_scratch(proto, scratch); hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ if (alloc_ret != HS_SUCCESS) { *scratch = NULL; return alloc_ret; } } else { hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ } return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *userCtx) { if (unlikely(!scratch || !data)) { return HS_INVALID; } hs_error_t err = validDatabase(db); if (unlikely(err != HS_SUCCESS)) { return err; } const struct RoseEngine *rose = hs_get_bytecode(db); if (unlikely(!ISALIGNED_16(rose))) { return HS_INVALID; } if (unlikely(rose->mode != HS_MODE_BLOCK)) { return HS_DB_MODE_ERROR; } if (unlikely(!validScratch(rose, scratch))) { return HS_INVALID; } if (rose->minWidth > length) { DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length); return HS_SUCCESS; } prefetch_data(data, length); /* populate core info in scratch */ populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data, length, NULL, 0, 0, flags); clearEvec(scratch->core_info.exhaustionVector, rose); if (!length) { if (rose->boundary.reportZeroEodOffset) { processReportList(rose, rose->boundary.reportZeroEodOffset, 0, scratch); } goto set_retval; } if (rose->boundary.reportZeroOffset) { processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); } if (rose->minWidthExcludingBoundaries > length) { DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n", rose->minWidthExcludingBoundaries, length); goto done_scan; } // Similarly, we may have a maximum width (for engines constructed entirely // of bi-anchored patterns). if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF && length > rose->maxBiAnchoredWidth) { DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length, rose->maxBiAnchoredWidth); goto done_scan; } // Is this a small write case? if (rose->smallWriteOffset) { const struct SmallWriteEngine *smwr = getSmallWrite(rose); assert(smwr); // Apply the small write engine if and only if the block (buffer) is // small enough. Otherwise, we allow rose &co to deal with it. if (length < smwr->largestBuffer) { DEBUG_PRINTF("Attempting small write of block %u bytes long.\n", length); runSmallWriteEngine(smwr, scratch); goto done_scan; } } switch (rose->runtimeImpl) { default: assert(0); case ROSE_RUNTIME_FULL_ROSE: rawBlockExec(rose, scratch); break; case ROSE_RUNTIME_PURE_LITERAL: pureLiteralBlockExec(rose, scratch); break; case ROSE_RUNTIME_SINGLE_OUTFIX: soleOutfixBlockExec(rose, scratch); break; } done_scan: if (told_to_stop_matching(scratch)) { return HS_SCAN_TERMINATED; } if (rose->hasSom) { int halt = flushStoredSomMatches(scratch, ~0ULL); if (halt) { return HS_SCAN_TERMINATED; } } if (rose->boundary.reportEodOffset) { processReportList(rose, rose->boundary.reportEodOffset, length, scratch); } set_retval: DEBUG_PRINTF("done. told_to_stop_matching=%d\n", told_to_stop_matching(scratch)); return told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED : HS_SUCCESS; }