StringSearch::~StringSearch() { if (m_strsrch_ != NULL) { usearch_close(m_strsrch_); m_search_ = NULL; } }
/** * Main -- process command line, read in and pre-process the test file, * call other functions to do the actual tests. */ int main(int argc, const char** argv) { if (processOptions(argc, argv, opts) != TRUE || opt_help) { printf(gHelpString); return -1; } if (processCollator() != TRUE) { fprintf(stderr, "Error creating collator\n"); return -1; } if (processStringSearch() != TRUE) { fprintf(stderr, "Error creating string search\n"); return -1; } fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern, opt_source); findPattern(); ucol_close(collator); usearch_close(search); return 0; }
// Collator.contains {{{ static PyObject * icu_Collator_contains(icu_Collator *self, PyObject *args, PyObject *kwargs) { PyObject *a_ = NULL, *b_ = NULL; UChar *a = NULL, *b = NULL; int32_t asz = 0, bsz = 0, pos = -1; uint8_t found = 0; UErrorCode status = U_ZERO_ERROR; UStringSearch *search = NULL; if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL; a = python_to_icu(a_, &asz, 1); if (a == NULL) goto end; if (asz == 0) { found = TRUE; goto end; } b = python_to_icu(b_, &bsz, 1); if (b == NULL) goto end; search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status); if (U_SUCCESS(status)) { pos = usearch_first(search, &status); if (pos != USEARCH_DONE) found = TRUE; } end: if (search != NULL) usearch_close(search); if (a != NULL) free(a); if (b != NULL) free(b); if (PyErr_Occurred()) return NULL; if (found) Py_RETURN_TRUE; Py_RETURN_FALSE; } // }}}
/* Return value is a "Win32 BOOL" (1 = true, 0 = false) */ extern "C" int32_t EndsWith( SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) { int32_t result = FALSE; UErrorCode err = U_ZERO_ERROR; const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); if (U_SUCCESS(err)) { UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err); int32_t idx = USEARCH_DONE; if (U_SUCCESS(err)) { idx = usearch_last(pSearch, &err); if (idx != USEARCH_DONE) { if ((idx + usearch_getMatchedLength(pSearch)) == cwSourceLength) { result = TRUE; } // TODO (dotnet/corefx#3467): We should do something similar to what // StartsWith does where we can ignore // some collation elements at the end of the string if they are zero. } usearch_close(pSearch); } } return result; }
static void free_searches(UStringSearch **searches, int32_t count) { int32_t i = 0; for (i = 0; i < count; i++) { if (searches[i] != NULL) usearch_close(searches[i]); searches[i] = NULL; } }
/* Return value is a "Win32 BOOL" (1 = true, 0 = false) */ extern "C" int32_t StartsWith( const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) { int32_t result = FALSE; UErrorCode err = U_ZERO_ERROR; UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err); if (U_SUCCESS(err)) { UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err); int32_t idx = USEARCH_DONE; if (U_SUCCESS(err)) { idx = usearch_first(pSearch, &err); if (idx != USEARCH_DONE) { if (idx == 0) { result = TRUE; } else { UCollationElements* pCollElem = ucol_openElements(pColl, lpSource, idx, &err); if (U_SUCCESS(err)) { int32_t curCollElem = UCOL_NULLORDER; result = TRUE; while ((curCollElem = ucol_next(pCollElem, &err)) != UCOL_NULLORDER) { if (curCollElem != 0) { // Non ignorable collation element found between start of the // string and the first match for lpTarget. result = FALSE; break; } } if (U_FAILURE(err)) { result = FALSE; } ucol_closeElements(pCollElem); } } } usearch_close(pSearch); } ucol_close(pColl); } return result; }
/** Destructor * */ StriContainerUStringSearch::~StriContainerUStringSearch() { if (lastMatcher) { usearch_close(lastMatcher); lastMatcher = NULL; } col = NULL; // col is owned by the caller }
/** the returned matcher shall not be deleted by the user * * it is assumed that \code{vectorize_next()} is used: * for \code{i >= this->n} the last matcher is returned * * * @param i index * @param searchStr string to search in * @param searchStr_len string length in UChars */ UStringSearch* StriContainerUStringSearch::getMatcher(R_len_t i, const UChar* searchStr, int32_t searchStr_len) { if (!lastMatcher) { this->lastMatcherIndex = (i % n); UErrorCode status = U_ZERO_ERROR; lastMatcher = usearch_openFromCollator(this->get(i).getBuffer(), this->get(i).length(), searchStr, searchStr_len, col, NULL, &status); STRI__CHECKICUSTATUS_THROW(status, {usearch_close(lastMatcher); lastMatcher = NULL;}) return lastMatcher;
static void fixed_pattern_destroy(fixed_pattern_t *p) { if (NULL != p->usearch) { usearch_close(p->usearch); } if (NULL != p->ubrk) { ubrk_close(p->ubrk); } ustring_destroy(p->pattern); free(p); }
StringSearchPerformanceTest::~StringSearchPerformanceTest() { CollData *data = bms->getData(); UCollator *coll = data->getCollator(); delete bms; delete targetString; CollData::close(data); ucol_close(coll); if (pttrn != NULL) { free(pttrn); } #ifndef TEST_BOYER_MOORE_SEARCH if (srch != NULL) { usearch_close(srch); } #endif }
// Collator.find {{{ static PyObject * icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) { PyObject *a_, *b_; int32_t asz, bsz; UChar *a, *b; wchar_t *aw, *bw; UErrorCode status = U_ZERO_ERROR; UStringSearch *search = NULL; int32_t pos = -1, length = -1; if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL; asz = (int32_t)PyUnicode_GetSize(a_); bsz = (int32_t)PyUnicode_GetSize(b_); a = (UChar*)calloc(asz*4 + 2, sizeof(UChar)); b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar)); aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t)); bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t)); if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory(); PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1); PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1); u_strFromWCS(a, asz*4 + 1, NULL, aw, -1, &status); u_strFromWCS(b, bsz*4 + 1, NULL, bw, -1, &status); if (U_SUCCESS(status)) { search = usearch_openFromCollator(a, -1, b, -1, self->collator, NULL, &status); if (U_SUCCESS(status)) { pos = usearch_first(search, &status); if (pos != USEARCH_DONE) length = usearch_getMatchedLength(search); else pos = -1; } if (search != NULL) usearch_close(search); } free(a); free(b); free(aw); free(bw); return Py_BuildValue("ii", pos, length); } // }}}
// operator overloading --------------------------------------------- StringSearch & StringSearch::operator=(const StringSearch &that) { if ((*this) != that) { UErrorCode status = U_ZERO_ERROR; m_text_ = that.m_text_; m_breakiterator_ = that.m_breakiterator_; m_pattern_ = that.m_pattern_; // all m_search_ in the parent class is linked up with m_strsrch_ usearch_close(m_strsrch_); m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), that.m_strsrch_->collator, NULL, &status); // Alias the collator m_collator_.setUCollator((UCollator *)m_strsrch_->collator); m_search_ = m_strsrch_->search; } return *this; }
// Collator.find {{{ static PyObject * icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) { #if PY_VERSION_HEX >= 0x03030000 #error Not implemented for python >= 3.3 #endif PyObject *a_ = NULL, *b_ = NULL; UChar *a = NULL, *b = NULL; int32_t asz = 0, bsz = 0, pos = -1, length = -1; UErrorCode status = U_ZERO_ERROR; UStringSearch *search = NULL; if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL; a = python_to_icu(a_, &asz, 1); if (a == NULL) goto end; b = python_to_icu(b_, &bsz, 1); if (b == NULL) goto end; search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status); if (U_SUCCESS(status)) { pos = usearch_first(search, &status); if (pos != USEARCH_DONE) { length = usearch_getMatchedLength(search); #ifdef Py_UNICODE_WIDE // We have to return number of unicode characters since the string // could contain surrogate pairs which are represented as a single // character in python wide builds length = u_countChar32(b + pos, length); pos = u_countChar32(b, pos); #endif } else pos = -1; } end: if (search != NULL) usearch_close(search); if (a != NULL) free(a); if (b != NULL) free(b); return (PyErr_Occurred()) ? NULL : Py_BuildValue("ii", pos, length); } // }}}
/* Function: LastIndexOf */ extern "C" int32_t LastIndexOf( SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) { static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found"); int32_t result = USEARCH_DONE; UErrorCode err = U_ZERO_ERROR; const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); if (U_SUCCESS(err)) { UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err); if (U_SUCCESS(err)) { result = usearch_last(pSearch, &err); usearch_close(pSearch); } } return result; }
// operator overloading --------------------------------------------- StringSearch & StringSearch::operator=(const StringSearch &that) { if ((*this) != that) { UErrorCode status = U_ZERO_ERROR; m_text_ = that.m_text_; m_breakiterator_ = that.m_breakiterator_; m_pattern_ = that.m_pattern_; // all m_search_ in the parent class is linked up with m_strsrch_ usearch_close(m_strsrch_); m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), that.m_strsrch_->collator, NULL, &status); int32_t length; const UChar *rules = ucol_getRules(m_strsrch_->collator, &length); m_collation_rules_.setTo(rules, length); m_collator_.setUCollator((UCollator *)m_strsrch_->collator, &m_collation_rules_); m_search_ = m_strsrch_->search; } return *this; }
Boolean CFStringFindWithOptionsAndLocale (CFStringRef str, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFLocaleRef locale, CFRange *result) { UniChar *pattern; UniChar *text; CFIndex patternLength; CFIndex textLength; CFIndex start; CFIndex end; CFAllocatorRef alloc; UCollator *ucol; UStringSearch *usrch; UErrorCode err = U_ZERO_ERROR; if (rangeToSearch.length == 0) return false; alloc = CFAllocatorGetDefault (); textLength = CFStringGetLength (stringToFind); if (textLength == 0) return false; patternLength = rangeToSearch.length; pattern = CFAllocatorAllocate (alloc, patternLength * sizeof(UniChar), 0); CFStringGetCharacters (str, rangeToSearch, pattern); text = CFAllocatorAllocate (alloc, textLength * sizeof(UniChar), 0); CFStringGetCharacters (stringToFind, CFRangeMake(0, textLength), text); ucol = CFStringICUCollatorOpen (searchOptions, locale); usrch = usearch_openFromCollator (text, textLength, pattern, patternLength, ucol, NULL, &err); if (U_FAILURE(err)) return false; /* FIXME: need to handle kCFCompareAnchored */ if (searchOptions & kCFCompareBackwards) { start = usearch_last (usrch, &err); } else { start = usearch_first (usrch, &err); } if (start == USEARCH_DONE) { CFAllocatorDeallocate (alloc, pattern); CFAllocatorDeallocate (alloc, text); return false; } end = usearch_getMatchedLength (usrch); usearch_close (usrch); CFStringICUCollatorClose (ucol); if (result) *result = CFRangeMake (start + rangeToSearch.location, end); CFAllocatorDeallocate (alloc, pattern); CFAllocatorDeallocate (alloc, text); return true; }
/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last) { UChar *uhaystack = NULL, *uneedle = NULL; int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; UBreakIterator* bi = NULL; UErrorCode status; UStringSearch* src = NULL; UCollator *coll; if(puchar_pos) { *puchar_pos = -1; } /* convert the strings to UTF-16. */ status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, haystack, haystack_len, &status ); STRPOS_CHECK_STATUS(status, "Error converting input string to UTF-16"); status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, needle, needle_len, &status ); STRPOS_CHECK_STATUS(status, "Error converting needle string to UTF-16"); /* get a pointer to the haystack taking into account the offset */ status = U_ZERO_ERROR; bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status ); STRPOS_CHECK_STATUS(status, "Failed to get iterator"); status = U_ZERO_ERROR; ubrk_setText(bi, uhaystack, uhaystack_len, &status); STRPOS_CHECK_STATUS(status, "Failed to set up iterator"); status = U_ZERO_ERROR; src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status); STRPOS_CHECK_STATUS(status, "Error creating search object"); if(f_ignore_case) { coll = usearch_getCollator(src); status = U_ZERO_ERROR; ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &status); STRPOS_CHECK_STATUS(status, "Error setting collation strength"); usearch_reset(src); } if(offset != 0) { offset_pos = grapheme_get_haystack_offset(bi, offset); if(offset_pos == -1) { status = U_ILLEGAL_ARGUMENT_ERROR; STRPOS_CHECK_STATUS(status, "Invalid search offset"); } status = U_ZERO_ERROR; usearch_setOffset(src, offset_pos, &status); STRPOS_CHECK_STATUS(status, "Invalid search offset"); } if(last) { char_pos = usearch_last(src, &status); if(char_pos < offset_pos) { /* last one is beyound our start offset */ char_pos = USEARCH_DONE; } } else { char_pos = usearch_next(src, &status); } STRPOS_CHECK_STATUS(status, "Error looking up string"); if(char_pos != USEARCH_DONE && ubrk_isBoundary(bi, char_pos)) { ret_pos = grapheme_count_graphemes(bi, uhaystack,char_pos); if(puchar_pos) { *puchar_pos = char_pos; } } else { ret_pos = -1; } if (uhaystack) { efree( uhaystack ); } if (uneedle) { efree( uneedle ); } ubrk_close (bi); usearch_close (src); return ret_pos; }