// Collator.contains {{{ static PyObject * icu_Collator_contains(icu_Collator *self, PyObject *args, PyObject *kwargs) { PyObject *a_ = NULL, *b_ = NULL; UChar *a = NULL, *b = NULL; int32_t asz = 0, bsz = 0, pos = -1; uint8_t found = 0; UErrorCode status = U_ZERO_ERROR; UStringSearch *search = NULL; if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL; a = python_to_icu(a_, &asz, 1); if (a == NULL) goto end; if (asz == 0) { found = TRUE; goto end; } b = python_to_icu(b_, &bsz, 1); if (b == NULL) goto end; search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status); if (U_SUCCESS(status)) { pos = usearch_first(search, &status); if (pos != USEARCH_DONE) found = TRUE; } end: if (search != NULL) usearch_close(search); if (a != NULL) free(a); if (b != NULL) free(b); if (PyErr_Occurred()) return NULL; if (found) Py_RETURN_TRUE; Py_RETURN_FALSE; } // }}}
/* Return value is a "Win32 BOOL" (1 = true, 0 = false) */ extern "C" int32_t StartsWith( const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) { int32_t result = FALSE; UErrorCode err = U_ZERO_ERROR; UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err); if (U_SUCCESS(err)) { UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err); int32_t idx = USEARCH_DONE; if (U_SUCCESS(err)) { idx = usearch_first(pSearch, &err); if (idx != USEARCH_DONE) { if (idx == 0) { result = TRUE; } else { UCollationElements* pCollElem = ucol_openElements(pColl, lpSource, idx, &err); if (U_SUCCESS(err)) { int32_t curCollElem = UCOL_NULLORDER; result = TRUE; while ((curCollElem = ucol_next(pCollElem, &err)) != UCOL_NULLORDER) { if (curCollElem != 0) { // Non ignorable collation element found between start of the // string and the first match for lpTarget. result = FALSE; break; } } if (U_FAILURE(err)) { result = FALSE; } ucol_closeElements(pCollElem); } } } usearch_close(pSearch); } ucol_close(pColl); } return result; }
static engine_return_t engine_fixed_whole_line_match(error_t **error, void *data, const UString *subject) { FETCH_DATA(data, p, fixed_pattern_t); if (ustring_empty(p->pattern)) { return ustring_empty(subject) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH; } else if (NULL != p->usearch) { int32_t ret; UErrorCode status; status = U_ZERO_ERROR; usearch_setText(p->usearch, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_setText"); return ENGINE_FAILURE; } ret = usearch_first(p->usearch, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_first"); return ENGINE_FAILURE; } usearch_unbindText(p->usearch); return (ret != USEARCH_DONE && ((size_t) usearch_getMatchedLength(p->usearch)) == subject->len ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH); } else { if (IS_CASE_INSENSITIVE(p->flags)) { return (0 == u_strcasecmp(p->pattern->ptr, subject->ptr, 0) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH); } else { return (0 == u_strcmp(p->pattern->ptr, subject->ptr) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH); } } }
// Collator.find {{{ static PyObject * icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) { PyObject *a_, *b_; int32_t asz, bsz; UChar *a, *b; wchar_t *aw, *bw; UErrorCode status = U_ZERO_ERROR; UStringSearch *search = NULL; int32_t pos = -1, length = -1; if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL; asz = (int32_t)PyUnicode_GetSize(a_); bsz = (int32_t)PyUnicode_GetSize(b_); a = (UChar*)calloc(asz*4 + 2, sizeof(UChar)); b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar)); aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t)); bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t)); if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory(); PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1); PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1); u_strFromWCS(a, asz*4 + 1, NULL, aw, -1, &status); u_strFromWCS(b, bsz*4 + 1, NULL, bw, -1, &status); if (U_SUCCESS(status)) { search = usearch_openFromCollator(a, -1, b, -1, self->collator, NULL, &status); if (U_SUCCESS(status)) { pos = usearch_first(search, &status); if (pos != USEARCH_DONE) length = usearch_getMatchedLength(search); else pos = -1; } if (search != NULL) usearch_close(search); } free(a); free(b); free(aw); free(bw); return Py_BuildValue("ii", pos, length); } // }}}
// Collator.find {{{ static PyObject * icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) { #if PY_VERSION_HEX >= 0x03030000 #error Not implemented for python >= 3.3 #endif PyObject *a_ = NULL, *b_ = NULL; UChar *a = NULL, *b = NULL; int32_t asz = 0, bsz = 0, pos = -1, length = -1; UErrorCode status = U_ZERO_ERROR; UStringSearch *search = NULL; if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL; a = python_to_icu(a_, &asz, 1); if (a == NULL) goto end; b = python_to_icu(b_, &bsz, 1); if (b == NULL) goto end; search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status); if (U_SUCCESS(status)) { pos = usearch_first(search, &status); if (pos != USEARCH_DONE) { length = usearch_getMatchedLength(search); #ifdef Py_UNICODE_WIDE // We have to return number of unicode characters since the string // could contain surrogate pairs which are represented as a single // character in python wide builds length = u_countChar32(b + pos, length); pos = u_countChar32(b, pos); #endif } else pos = -1; } end: if (search != NULL) usearch_close(search); if (a != NULL) free(a); if (b != NULL) free(b); return (PyErr_Occurred()) ? NULL : Py_BuildValue("ii", pos, length); } // }}}
/* Function: IndexOf */ extern "C" int32_t IndexOf(SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) { static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found"); int32_t result = USEARCH_DONE; UErrorCode err = U_ZERO_ERROR; const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); if (U_SUCCESS(err)) { UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err); if (U_SUCCESS(err)) { result = usearch_first(pSearch, &err); usearch_close(pSearch); } } return result; }
Boolean CFStringFindWithOptionsAndLocale (CFStringRef str, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFLocaleRef locale, CFRange *result) { UniChar *pattern; UniChar *text; CFIndex patternLength; CFIndex textLength; CFIndex start; CFIndex end; CFAllocatorRef alloc; UCollator *ucol; UStringSearch *usrch; UErrorCode err = U_ZERO_ERROR; if (rangeToSearch.length == 0) return false; alloc = CFAllocatorGetDefault (); textLength = CFStringGetLength (stringToFind); if (textLength == 0) return false; patternLength = rangeToSearch.length; pattern = CFAllocatorAllocate (alloc, patternLength * sizeof(UniChar), 0); CFStringGetCharacters (str, rangeToSearch, pattern); text = CFAllocatorAllocate (alloc, textLength * sizeof(UniChar), 0); CFStringGetCharacters (stringToFind, CFRangeMake(0, textLength), text); ucol = CFStringICUCollatorOpen (searchOptions, locale); usrch = usearch_openFromCollator (text, textLength, pattern, patternLength, ucol, NULL, &err); if (U_FAILURE(err)) return false; /* FIXME: need to handle kCFCompareAnchored */ if (searchOptions & kCFCompareBackwards) { start = usearch_last (usrch, &err); } else { start = usearch_first (usrch, &err); } if (start == USEARCH_DONE) { CFAllocatorDeallocate (alloc, pattern); CFAllocatorDeallocate (alloc, text); return false; } end = usearch_getMatchedLength (usrch); usearch_close (usrch); CFStringICUCollatorClose (ucol); if (result) *result = CFRangeMake (start + rangeToSearch.location, end); CFAllocatorDeallocate (alloc, pattern); CFAllocatorDeallocate (alloc, text); return true; }
static engine_return_t engine_fixed_match(error_t **error, void *data, const UString *subject) { int32_t ret; UErrorCode status; FETCH_DATA(data, p, fixed_pattern_t); status = U_ZERO_ERROR; if (ustring_empty(p->pattern)) { if (IS_WORD_BOUNDED(p->flags)) { if (ustring_empty(subject)) { return ENGINE_MATCH_FOUND; } else { int32_t l, u, lastState, state; ubrk_setText(p->ubrk, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "ubrk_setText"); return ENGINE_FAILURE; } if (UBRK_DONE != (l = ubrk_first(p->ubrk))) { lastState = ubrk_getRuleStatus(p->ubrk); while (UBRK_DONE != (u = ubrk_next(p->ubrk))) { state = ubrk_getRuleStatus(p->ubrk); if (UBRK_WORD_NONE == lastState && lastState == state) { return ENGINE_MATCH_FOUND; } lastState = state; l = u; } } return ENGINE_NO_MATCH; } } else { return ENGINE_MATCH_FOUND; } } else if (NULL != p->usearch) { if (subject->len > 0) { usearch_setText(p->usearch, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_setText"); return ENGINE_FAILURE; } ret = usearch_first(p->usearch, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_first"); return ENGINE_FAILURE; } usearch_unbindText(p->usearch); return (ret != USEARCH_DONE ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH); } else { return ENGINE_NO_MATCH; } } else { UChar *m; int32_t pos; pos = 0; ret = ENGINE_NO_MATCH; if (NULL != p->ubrk) { ubrk_setText(p->ubrk, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "ubrk_setText"); return ENGINE_FAILURE; } } while (NULL != (m = u_strFindFirst(subject->ptr + pos, subject->len - pos, p->pattern->ptr, p->pattern->len))) { pos = m - subject->ptr; if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, pos) && ubrk_isBoundary(p->ubrk, pos + p->pattern->len))) { ret = ENGINE_MATCH_FOUND; } pos += p->pattern->len; } ubrk_unbindText(p->ubrk); return ret; } }
static engine_return_t engine_fixed_match_all(error_t **error, void *data, const UString *subject, interval_list_t *intervals) { int32_t matches; UErrorCode status; FETCH_DATA(data, p, fixed_pattern_t); matches = 0; status = U_ZERO_ERROR; if (ustring_empty(p->pattern)) { if (IS_WORD_BOUNDED(p->flags)) { if (ustring_empty(subject)) { return ENGINE_MATCH_FOUND; } else { int32_t l, u, lastState, state; ubrk_setText(p->ubrk, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "ubrk_setText"); return ENGINE_FAILURE; } if (UBRK_DONE != (l = ubrk_first(p->ubrk))) { lastState = ubrk_getRuleStatus(p->ubrk); while (UBRK_DONE != (u = ubrk_next(p->ubrk))) { state = ubrk_getRuleStatus(p->ubrk); if (UBRK_WORD_NONE == lastState && lastState == state) { return ENGINE_MATCH_FOUND; } lastState = state; l = u; } } return ENGINE_NO_MATCH; } } else { return ENGINE_MATCH_FOUND; } } else if (NULL != p->usearch) { int32_t l, u; if (subject->len > 0) { usearch_setText(p->usearch, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_setText"); return ENGINE_FAILURE; } for (l = usearch_first(p->usearch, &status); U_SUCCESS(status) && USEARCH_DONE != l; l = usearch_next(p->usearch, &status)) { matches++; u = l + usearch_getMatchedLength(p->usearch); if (interval_list_add(intervals, subject->len, l, u)) { return ENGINE_WHOLE_LINE_MATCH; } } if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "usearch_[first|next]"); return ENGINE_FAILURE; } usearch_unbindText(p->usearch); return (matches ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH); } else { return ENGINE_NO_MATCH; } } else { UChar *m; int32_t pos; pos = 0; if (NULL != p->ubrk) { ubrk_setText(p->ubrk, subject->ptr, subject->len, &status); if (U_FAILURE(status)) { icu_error_set(error, FATAL, status, "ubrk_setText"); return ENGINE_FAILURE; } } while (NULL != (m = u_strFindFirst(subject->ptr + pos, subject->len - pos, p->pattern->ptr, p->pattern->len))) { pos = m - subject->ptr; if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, pos) && ubrk_isBoundary(p->ubrk, pos + p->pattern->len))) { matches++; if (interval_list_add(intervals, subject->len, pos, pos + p->pattern->len)) { return ENGINE_WHOLE_LINE_MATCH; } } pos += p->pattern->len; } ubrk_unbindText(p->ubrk); return (matches ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH); } }