HS_PUBLIC_API hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id, const hs_stream_t *from_id, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (!from_id || !from_id->rose) { return HS_INVALID; } if (!to_id || to_id->rose != from_id->rose) { return HS_INVALID; } if (to_id == from_id) { return HS_INVALID; } if (!scratch || !validScratch(to_id->rose, scratch)) { return HS_INVALID; } if (onEvent) { report_eod_matches(to_id, scratch, onEvent, context); } size_t stateSize = sizeof(struct hs_stream) + from_id->rose->stateOffsets.end; memcpy(to_id, from_id, stateSize); return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data, const unsigned int *length, unsigned int count, UNUSED unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (unlikely(!scratch || !data || !length)) { return HS_INVALID; } hs_error_t err = validDatabase(db); if (unlikely(err != HS_SUCCESS)) { return err; } const struct RoseEngine *rose = hs_get_bytecode(db); if (unlikely(!ISALIGNED_16(rose))) { return HS_INVALID; } if (unlikely(rose->mode != HS_MODE_VECTORED)) { return HS_DB_MODE_ERROR; } if (unlikely(!validScratch(rose, scratch))) { return HS_INVALID; } hs_stream_t *id = (hs_stream_t *)(scratch->bstate); init_stream(id, rose); /* open stream */ for (u32 i = 0; i < count; i++) { DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset, length[i]); #ifdef DEBUG dumpData(data[i], length[i]); #endif hs_error_t ret = hs_scan_stream_internal(id, data[i], length[i], 0, scratch, onEvent, context); if (ret != HS_SUCCESS) { return ret; } } /* close stream */ if (onEvent) { report_eod_matches(id, scratch, onEvent, context); if (told_to_stop_matching(scratch)) { return HS_SCAN_TERMINATED; } } return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (!id || !scratch || !validScratch(id->rose, scratch)) { return HS_INVALID; } /* user wants eod matches */ if (onEvent) { report_eod_matches(id, scratch, onEvent, context); } init_stream(id, id->rose); return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (unlikely(!id || !scratch || !data || !validScratch(id->rose, scratch))) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { return HS_SCRATCH_IN_USE; } hs_error_t rv = hs_scan_stream_internal(id, data, length, flags, scratch, onEvent, context); unmarkScratchInUse(scratch); return rv; }
HS_PUBLIC_API hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (!id) { return HS_INVALID; } if (onEvent) { if (!scratch || !validScratch(id->rose, scratch)) { return HS_INVALID; } report_eod_matches(id, scratch, onEvent, context); } hs_stream_free(id); return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (!id) { return HS_INVALID; } if (onEvent) { if (!scratch || !validScratch(id->rose, scratch)) { return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { return HS_SCRATCH_IN_USE; } report_eod_matches(id, scratch, onEvent, context); unmarkScratchInUse(scratch); } init_stream(id, id->rose); return HS_SUCCESS; }
HS_PUBLIC_API hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *userCtx) { if (unlikely(!scratch || !data)) { return HS_INVALID; } hs_error_t err = validDatabase(db); if (unlikely(err != HS_SUCCESS)) { return err; } const struct RoseEngine *rose = hs_get_bytecode(db); if (unlikely(!ISALIGNED_16(rose))) { return HS_INVALID; } if (unlikely(rose->mode != HS_MODE_BLOCK)) { return HS_DB_MODE_ERROR; } if (unlikely(!validScratch(rose, scratch))) { return HS_INVALID; } if (rose->minWidth > length) { DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length); return HS_SUCCESS; } prefetch_data(data, length); /* populate core info in scratch */ populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data, length, NULL, 0, 0, flags); clearEvec(scratch->core_info.exhaustionVector, rose); if (!length) { if (rose->boundary.reportZeroEodOffset) { processReportList(rose, rose->boundary.reportZeroEodOffset, 0, scratch); } goto set_retval; } if (rose->boundary.reportZeroOffset) { processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); } if (rose->minWidthExcludingBoundaries > length) { DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n", rose->minWidthExcludingBoundaries, length); goto done_scan; } // Similarly, we may have a maximum width (for engines constructed entirely // of bi-anchored patterns). if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF && length > rose->maxBiAnchoredWidth) { DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length, rose->maxBiAnchoredWidth); goto done_scan; } // Is this a small write case? if (rose->smallWriteOffset) { const struct SmallWriteEngine *smwr = getSmallWrite(rose); assert(smwr); // Apply the small write engine if and only if the block (buffer) is // small enough. Otherwise, we allow rose &co to deal with it. if (length < smwr->largestBuffer) { DEBUG_PRINTF("Attempting small write of block %u bytes long.\n", length); runSmallWriteEngine(smwr, scratch); goto done_scan; } } switch (rose->runtimeImpl) { default: assert(0); case ROSE_RUNTIME_FULL_ROSE: rawBlockExec(rose, scratch); break; case ROSE_RUNTIME_PURE_LITERAL: pureLiteralBlockExec(rose, scratch); break; case ROSE_RUNTIME_SINGLE_OUTFIX: soleOutfixBlockExec(rose, scratch); break; } done_scan: if (told_to_stop_matching(scratch)) { return HS_SCAN_TERMINATED; } if (rose->hasSom) { int halt = flushStoredSomMatches(scratch, ~0ULL); if (halt) { return HS_SCAN_TERMINATED; } } if (rose->boundary.reportEodOffset) { processReportList(rose, rose->boundary.reportEodOffset, length, scratch); } set_retval: DEBUG_PRINTF("done. told_to_stop_matching=%d\n", told_to_stop_matching(scratch)); return told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED : HS_SUCCESS; }
static inline hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, unsigned length, UNUSED unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context) { if (unlikely(!id || !scratch || !data || !validScratch(id->rose, scratch))) { return HS_INVALID; } const struct RoseEngine *rose = id->rose; char *state = getMultiState(id); u8 broken = getBroken(state); if (broken) { DEBUG_PRINTF("stream is broken, halting scan\n"); if (broken == BROKEN_FROM_USER) { return HS_SCAN_TERMINATED; } else { assert(broken == BROKEN_EXHAUSTED); return HS_SUCCESS; } } // We avoid doing any work if the user has given us zero bytes of data to // scan. Arguably we should define some semantics for how we treat vacuous // cases here. if (unlikely(length == 0)) { DEBUG_PRINTF("zero length block\n"); assert(getBroken(state) != BROKEN_FROM_USER); return HS_SUCCESS; } u32 historyAmount = getHistoryAmount(rose, id->offset); populateCoreInfo(scratch, rose, state, onEvent, context, data, length, getHistory(state, rose, id->offset), historyAmount, id->offset, flags); assert(scratch->core_info.hlen <= id->offset && scratch->core_info.hlen <= rose->historyRequired); prefetch_data(data, length); if (rose->somLocationCount) { loadSomFromStream(scratch, id->offset); } if (!id->offset && rose->boundary.reportZeroOffset) { DEBUG_PRINTF("zero reports\n"); processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); } switch (rose->runtimeImpl) { default: assert(0); case ROSE_RUNTIME_FULL_ROSE: rawStreamExec(id, scratch); break; case ROSE_RUNTIME_PURE_LITERAL: pureLiteralStreamExec(id, scratch); break; case ROSE_RUNTIME_SINGLE_OUTFIX: soleOutfixStreamExec(id, scratch); } if (rose->hasSom && !told_to_stop_matching(scratch)) { int halt = flushStoredSomMatches(scratch, ~0ULL); if (halt) { setBroken(state, BROKEN_FROM_USER); scratch->core_info.broken = BROKEN_FROM_USER; } } if (likely(!can_stop_matching(scratch))) { maintainHistoryBuffer(id->rose, getMultiState(id), data, length); id->offset += length; /* maintain offset */ if (rose->somLocationCount) { storeSomToStream(scratch, id->offset); } } else if (told_to_stop_matching(scratch)) { return HS_SCAN_TERMINATED; } else { /* exhausted */ setBroken(state, BROKEN_EXHAUSTED); } return HS_SUCCESS; }