HS_PUBLIC_API hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (unlikely(!id || !scratch || !data || !validScratch(id->rose, scratch))) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { return HS_SCRATCH_IN_USE; } hs_error_t rv = hs_scan_stream_internal(id, data, length, flags, scratch, onEvent, context); unmarkScratchInUse(scratch); return rv; }
HS_PUBLIC_API hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (!id) { return HS_INVALID; } if (onEvent) { if (!scratch || !validScratch(id->rose, scratch)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { return HS_SCRATCH_IN_USE; } report_eod_matches(id, scratch, onEvent, context); unmarkScratchInUse(scratch); } hs_stream_free(id); return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_free_scratch(hs_scratch_t *scratch) { if (scratch) { /* has to be aligned before we can do anything with it */ if (!ISALIGNED_CL(scratch)) { return HS_INVALID; } if (scratch->magic != SCRATCH_MAGIC) { return HS_INVALID; } if (markScratchInUse(scratch)) { return HS_SCRATCH_IN_USE; } scratch->magic = 0; assert(scratch->scratch_alloc); DEBUG_PRINTF("scratch %p is really at %p : freeing\n", scratch, scratch->scratch_alloc); hs_scratch_free(scratch->scratch_alloc); } return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id, const hs_stream_t *from_id, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (!from_id || !from_id->rose) { return HS_INVALID; } if (!to_id || to_id->rose != from_id->rose) { return HS_INVALID; } if (to_id == from_id) { return HS_INVALID; } if (onEvent) { if (!scratch || !validScratch(to_id->rose, scratch)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { return HS_SCRATCH_IN_USE; } report_eod_matches(to_id, scratch, onEvent, context); unmarkScratchInUse(scratch); } size_t stateSize = sizeof(struct hs_stream) + from_id->rose->stateOffsets.end; memcpy(to_id, from_id, stateSize); return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { if (!db || !scratch) { return HS_INVALID; } /* We need to do some real sanity checks on the database as some users mmap * in old deserialised databases, so this is the first real opportunity we * have to make sure it is sane. */ hs_error_t rv = dbIsValid(db); if (rv != HS_SUCCESS) { return rv; } /* We can also sanity-check the scratch parameter: if it points to an * existing scratch area, that scratch should have valid magic bits. */ if (*scratch != NULL) { /* has to be aligned before we can do anything with it */ if (!ISALIGNED_CL(*scratch)) { return HS_INVALID; } if ((*scratch)->magic != SCRATCH_MAGIC) { return HS_INVALID; } if (markScratchInUse(*scratch)) { return HS_SCRATCH_IN_USE; } } const struct RoseEngine *rose = hs_get_bytecode(db); int resize = 0; hs_scratch_t *proto; hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256); hs_error_t proto_ret = hs_check_alloc(proto_tmp); if (proto_ret != HS_SUCCESS) { hs_scratch_free(proto_tmp); hs_scratch_free(*scratch); *scratch = NULL; return proto_ret; } proto = ROUNDUP_PTR(proto_tmp, 64); if (*scratch) { *proto = **scratch; } else { memset(proto, 0, sizeof(*proto)); resize = 1; } proto->scratch_alloc = (char *)proto_tmp; if (rose->anchoredDistance > proto->anchored_literal_region_len) { resize = 1; proto->anchored_literal_region_len = rose->anchoredDistance; } if (rose->anchored_count > proto->anchored_literal_count) { resize = 1; proto->anchored_literal_count = rose->anchored_count; } if (rose->delay_count > proto->delay_count) { resize = 1; proto->delay_count = rose->delay_count; } if (rose->handledKeyCount > proto->handledKeyCount) { resize = 1; proto->handledKeyCount = rose->handledKeyCount; } if (rose->tStateSize > proto->tStateSize) { resize = 1; proto->tStateSize = rose->tStateSize; } u32 som_store_count = rose->somLocationCount; if (som_store_count > proto->som_store_count) { resize = 1; proto->som_store_count = som_store_count; } u32 queueCount = rose->queueCount; if (queueCount > proto->queueCount) { resize = 1; proto->queueCount = queueCount; } u32 bStateSize = 0; if (rose->mode == HS_MODE_BLOCK) { bStateSize = rose->stateOffsets.end; } else if (rose->mode == HS_MODE_VECTORED) { /* vectoring database require a full stream state (inc header) */ bStateSize = sizeof(struct hs_stream) + rose->stateOffsets.end; } if (bStateSize > proto->bStateSize) { resize = 1; proto->bStateSize = bStateSize; } u32 fullStateSize = rose->scratchStateSize; if (fullStateSize > proto->fullStateSize) { resize = 1; proto->fullStateSize = fullStateSize; } if (rose->dkeyCount > proto->deduper.log_size) { resize = 1; proto->deduper.log_size = rose->dkeyCount; } if (resize) { if (*scratch) { hs_scratch_free((*scratch)->scratch_alloc); } hs_error_t alloc_ret = alloc_scratch(proto, scratch); hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ if (alloc_ret != HS_SUCCESS) { *scratch = NULL; return alloc_ret; } } else { hs_scratch_free(proto_tmp); /* kill off temp used for sizing */ unmarkScratchInUse(*scratch); } assert(!(*scratch)->in_use); return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *userCtx) { if (unlikely(!scratch || !data)) { return HS_INVALID; } hs_error_t err = validDatabase(db); if (unlikely(err != HS_SUCCESS)) { return err; } const struct RoseEngine *rose = hs_get_bytecode(db); if (unlikely(!ISALIGNED_16(rose))) { return HS_INVALID; } if (unlikely(rose->mode != HS_MODE_BLOCK)) { return HS_DB_MODE_ERROR; } if (unlikely(!validScratch(rose, scratch))) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { return HS_SCRATCH_IN_USE; } if (rose->minWidth > length) { DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length); unmarkScratchInUse(scratch); return HS_SUCCESS; } prefetch_data(data, length); /* populate core info in scratch */ populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data, length, NULL, 0, 0, 0, flags); clearEvec(rose, scratch->core_info.exhaustionVector); if (!length) { if (rose->boundary.reportZeroEodOffset) { roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0, scratch); } goto set_retval; } if (rose->boundary.reportZeroOffset) { int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset, 0, scratch); if (rv == MO_HALT_MATCHING) { goto set_retval; } } if (rose->minWidthExcludingBoundaries > length) { DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n", rose->minWidthExcludingBoundaries, length); goto done_scan; } // Similarly, we may have a maximum width (for engines constructed entirely // of bi-anchored patterns). if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF && length > rose->maxBiAnchoredWidth) { DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length, rose->maxBiAnchoredWidth); goto done_scan; } // Is this a small write case? if (rose->smallWriteOffset) { const struct SmallWriteEngine *smwr = getSmallWrite(rose); assert(smwr); // Apply the small write engine if and only if the block (buffer) is // small enough. Otherwise, we allow rose &co to deal with it. if (length < smwr->largestBuffer) { DEBUG_PRINTF("Attempting small write of block %u bytes long.\n", length); runSmallWriteEngine(smwr, scratch); goto done_scan; } } switch (rose->runtimeImpl) { default: assert(0); case ROSE_RUNTIME_FULL_ROSE: rawBlockExec(rose, scratch); break; case ROSE_RUNTIME_PURE_LITERAL: pureLiteralBlockExec(rose, scratch); break; case ROSE_RUNTIME_SINGLE_OUTFIX: soleOutfixBlockExec(rose, scratch); break; } done_scan: if (told_to_stop_matching(scratch)) { unmarkScratchInUse(scratch); return HS_SCAN_TERMINATED; } if (rose->hasSom) { int halt = flushStoredSomMatches(scratch, ~0ULL); if (halt) { unmarkScratchInUse(scratch); return HS_SCAN_TERMINATED; } } if (rose->boundary.reportEodOffset) { roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length, scratch); } set_retval: DEBUG_PRINTF("done. told_to_stop_matching=%d\n", told_to_stop_matching(scratch)); hs_error_t rv = told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED : HS_SUCCESS; unmarkScratchInUse(scratch); return rv; }
HS_PUBLIC_API hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data, const unsigned int *length, unsigned int count, UNUSED unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (unlikely(!scratch || !data || !length)) { return HS_INVALID; } hs_error_t err = validDatabase(db); if (unlikely(err != HS_SUCCESS)) { return err; } const struct RoseEngine *rose = hs_get_bytecode(db); if (unlikely(!ISALIGNED_16(rose))) { return HS_INVALID; } if (unlikely(rose->mode != HS_MODE_VECTORED)) { return HS_DB_MODE_ERROR; } if (unlikely(!validScratch(rose, scratch))) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { return HS_SCRATCH_IN_USE; } hs_stream_t *id = (hs_stream_t *)(scratch->bstate); init_stream(id, rose); /* open stream */ for (u32 i = 0; i < count; i++) { DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset, length[i]); #ifdef DEBUG dumpData(data[i], length[i]); #endif hs_error_t ret = hs_scan_stream_internal(id, data[i], length[i], 0, scratch, onEvent, context); if (ret != HS_SUCCESS) { unmarkScratchInUse(scratch); return ret; } } /* close stream */ if (onEvent) { report_eod_matches(id, scratch, onEvent, context); if (told_to_stop_matching(scratch)) { unmarkScratchInUse(scratch); return HS_SCAN_TERMINATED; } } unmarkScratchInUse(scratch); return HS_SUCCESS; }