U_CAPI UChar* U_EXPORT2 u_memchr(const UChar* s, UChar c, int32_t count) { if (count <= 0) { return NULL; /* no string */ } else if (U16_IS_SURROGATE(c)) { /* make sure to not find half of a surrogate pair */ return u_strFindFirst(s, count, &c, 1); } else { /* trivial search for a BMP code point */ const UChar* limit = s + count; do { if (*s == c) { return (UChar*) s; } } while (++s != limit); return NULL; } }
U_CAPI UChar* U_EXPORT2 u_strchr(const UChar* s, UChar c) { if (U16_IS_SURROGATE(c)) { /* make sure to not find half of a surrogate pair */ return u_strFindFirst(s, -1, &c, 1); } else { UChar cs; /* trivial search for a BMP code point */ for (; ;) { if ((cs = *s) == c) { return (UChar*) s; } if (cs == 0) { return NULL; } ++s; } } }
UBool binary_fwd_n( UBreakIterator *ubrk, const UString *pattern, const UString *subject, DArray *array, /* NULL to skip n matches */ int32_t n, int32_t *r ) { UChar *m; int32_t pos; pos = *r; // *r = USEARCH_DONE; while (n > 0 && NULL != (m = u_strFindFirst(subject->ptr + pos, subject->len - pos, pattern->ptr, pattern->len))) { pos = m - subject->ptr; if (NULL == ubrk || (ubrk_isBoundary(ubrk, pos) && ubrk_isBoundary(ubrk, pos + pattern->len))) { --n; if (NULL != array) { // debug(">%.*S<", pos - *r, subject->ptr + *r); add_match(array, subject, *r, pos); } *r = pos + pattern->len; // TODO: don't repeat following pos += pattern->len; } pos += pattern->len; } if (0 == n) { *r = pos; return TRUE; } else { if (NULL != array) { // debug(">%.*S<", pos - *r, subject->ptr + *r); add_match(array, subject, *r, subject->len); } *r = USEARCH_DONE; return FALSE; } }
U_CAPI UChar* U_EXPORT2 u_strstr(const UChar* s, const UChar* substring) { return u_strFindFirst(s, -1, substring, -1); }
static engine_return_t engine_fixed_match(error_t **error, void *data, const UString *subject) { int32_t ret; UErrorCode status; FETCH_DATA(data, p, fixed_pattern_t); status = U_ZERO_ERROR; if (ustring_empty(p->pattern)) { if (IS_WORD_BOUNDED(p->flags)) { if (ustring_empty(subject)) { return ENGINE_MATCH_FOUND; } else { int32_t l, u, lastState, state; ubrk_setText(p->ubrk, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "ubrk_setText"); return ENGINE_FAILURE; } if (UBRK_DONE != (l = ubrk_first(p->ubrk))) { lastState = ubrk_getRuleStatus(p->ubrk); while (UBRK_DONE != (u = ubrk_next(p->ubrk))) { state = ubrk_getRuleStatus(p->ubrk); if (UBRK_WORD_NONE == lastState && lastState == state) { return ENGINE_MATCH_FOUND; } lastState = state; l = u; } } return ENGINE_NO_MATCH; } } else { return ENGINE_MATCH_FOUND; } } else if (NULL != p->usearch) { if (subject->len > 0) { usearch_setText(p->usearch, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_setText"); return ENGINE_FAILURE; } ret = usearch_first(p->usearch, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_first"); return ENGINE_FAILURE; } usearch_unbindText(p->usearch); return (ret != USEARCH_DONE ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH); } else { return ENGINE_NO_MATCH; } } else { UChar *m; int32_t pos; pos = 0; ret = ENGINE_NO_MATCH; if (NULL != p->ubrk) { ubrk_setText(p->ubrk, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "ubrk_setText"); return ENGINE_FAILURE; } } while (NULL != (m = u_strFindFirst(subject->ptr + pos, subject->len - pos, p->pattern->ptr, p->pattern->len))) { pos = m - subject->ptr; if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, pos) && ubrk_isBoundary(p->ubrk, pos + p->pattern->len))) { ret = ENGINE_MATCH_FOUND; } pos += p->pattern->len; } ubrk_unbindText(p->ubrk); return ret; } }
static UBool engine_fixed_split(error_t **error, void *data, const UString *subject, DArray *array, interval_list_t *intervals) { UErrorCode status; int32_t l, lastU; dlist_element_t *el; FETCH_DATA(data, p, fixed_pattern_t); lastU = l = 0; status = U_ZERO_ERROR; if (NULL != p->usearch) { usearch_setText(p->usearch, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_setText"); return FALSE; } /* <X> */ if (NULL == intervals) { int32_t u; while (U_SUCCESS(status) && USEARCH_DONE != (u = usearch_next(p->usearch, &status))) { add_match(array, subject, l, u); l = u += usearch_getMatchedLength(p->usearch); } add_match(array, subject, l, subject->len); } else { /* </X> */ for (el = intervals->head; NULL != el; el = el->next) { FETCH_DATA(el->data, i, interval_t); if (i->lower_limit > 0) { if (!usearch_fwd_n(p->usearch, subject, NULL, i->lower_limit - lastU, &l, &status)) { break; } } if (!usearch_fwd_n(p->usearch, subject, array, i->upper_limit - i->lower_limit, &l, &status)) { break; } lastU = i->upper_limit; } /* <X> */ } /* </X> */ usearch_unbindText(p->usearch); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_next"); return FALSE; } } else { if (NULL != p->ubrk) { ubrk_setText(p->ubrk, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "ubrk_setText"); return FALSE; } } /* <X> */ if (NULL == intervals) { UChar *m; int32_t u; u = 0; while (NULL != (m = u_strFindFirst(subject->ptr + u, subject->len - u, p->pattern->ptr, p->pattern->len))) { u = m - subject->ptr; if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, u) && ubrk_isBoundary(p->ubrk, u + p->pattern->len))) { add_match(array, subject, l, u); } l = u = u + p->pattern->len; } add_match(array, subject, l, subject->len); } else { /* </X> */ for (el = intervals->head; NULL != el; el = el->next) { FETCH_DATA(el->data, i, interval_t); if (i->lower_limit > 0) { if (!binary_fwd_n(p->ubrk, p->pattern, subject, NULL, i->lower_limit - lastU, &l)) { break; } } if (!binary_fwd_n(p->ubrk, p->pattern, subject, array, i->upper_limit - i->lower_limit, &l)) { break; } lastU = i->upper_limit; } /* <X> */ } /* </X> */ ubrk_unbindText(p->ubrk); } return TRUE; }
static engine_return_t engine_fixed_match_all(error_t **error, void *data, const UString *subject, interval_list_t *intervals) { int32_t matches; UErrorCode status; FETCH_DATA(data, p, fixed_pattern_t); matches = 0; status = U_ZERO_ERROR; if (ustring_empty(p->pattern)) { if (IS_WORD_BOUNDED(p->flags)) { if (ustring_empty(subject)) { return ENGINE_MATCH_FOUND; } else { int32_t l, u, lastState, state; ubrk_setText(p->ubrk, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "ubrk_setText"); return ENGINE_FAILURE; } if (UBRK_DONE != (l = ubrk_first(p->ubrk))) { lastState = ubrk_getRuleStatus(p->ubrk); while (UBRK_DONE != (u = ubrk_next(p->ubrk))) { state = ubrk_getRuleStatus(p->ubrk); if (UBRK_WORD_NONE == lastState && lastState == state) { return ENGINE_MATCH_FOUND; } lastState = state; l = u; } } return ENGINE_NO_MATCH; } } else { return ENGINE_MATCH_FOUND; } } else if (NULL != p->usearch) { int32_t l, u; if (subject->len > 0) { usearch_setText(p->usearch, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_setText"); return ENGINE_FAILURE; } for (l = usearch_first(p->usearch, &status); U_SUCCESS(status) && USEARCH_DONE != l; l = usearch_next(p->usearch, &status)) { matches++; u = l + usearch_getMatchedLength(p->usearch); if (interval_list_add(intervals, subject->len, l, u)) { return ENGINE_WHOLE_LINE_MATCH; } } if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_[first|next]"); return ENGINE_FAILURE; } usearch_unbindText(p->usearch); return (matches ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH); } else { return ENGINE_NO_MATCH; } } else { UChar *m; int32_t pos; pos = 0; if (NULL != p->ubrk) { ubrk_setText(p->ubrk, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "ubrk_setText"); return ENGINE_FAILURE; } } while (NULL != (m = u_strFindFirst(subject->ptr + pos, subject->len - pos, p->pattern->ptr, p->pattern->len))) { pos = m - subject->ptr; if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, pos) && ubrk_isBoundary(p->ubrk, pos + p->pattern->len))) { matches++; if (interval_list_add(intervals, subject->len, pos, pos + p->pattern->len)) { return ENGINE_WHOLE_LINE_MATCH; } } pos += p->pattern->len; } ubrk_unbindText(p->ubrk); return (matches ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH); } }