StringSearch::StringSearch(const UnicodeString &pattern, CharacterIterator &text, const Locale &locale, BreakIterator *breakiter, UErrorCode &status) : SearchIterator(text, breakiter), m_collator_(), m_pattern_(pattern) { if (U_FAILURE(status)) { m_strsrch_ = NULL; return; } m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), locale.getName(), (UBreakIterator *)breakiter, &status); uprv_free(m_search_); m_search_ = NULL; if (U_SUCCESS(status)) { int32_t length; const UChar *rules = ucol_getRules(m_strsrch_->collator, &length); m_collation_rules_.setTo(rules, length); m_collator_.setUCollator((UCollator *)m_strsrch_->collator, &m_collation_rules_); // m_search_ has been created by the base SearchIterator class m_search_ = m_strsrch_->search; } }
StringSearch::StringSearch(const UnicodeString &pattern, const UnicodeString &text, const Locale &locale, BreakIterator *breakiter, UErrorCode &status) : SearchIterator(text, breakiter), m_pattern_(pattern) { if (U_FAILURE(status)) { m_strsrch_ = NULL; return; } m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), locale.getName(), (UBreakIterator *)breakiter, &status); uprv_free(m_search_); m_search_ = NULL; if (U_SUCCESS(status)) { // m_search_ has been created by the base SearchIterator class m_search_ = m_strsrch_->search; } }
static UStringSearch* createSearcher() { // Provide a non-empty pattern and non-empty text so usearch_open will not fail, // but it doesn't matter exactly what it is, since we don't perform any searches // without setting both the pattern and the text. UErrorCode status = U_ZERO_ERROR; String searchCollatorName = currentSearchLocaleID() + String("@collation=search"); UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status); ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING); return searcher; }
static void *engine_fixed_compile(error_t **error, UString *ustr, uint32_t flags) { UErrorCode status; fixed_pattern_t *p; p = mem_new(*p); p->pattern = ustr; // not needed with usearch ? p->flags = flags; p->ubrk = NULL; p->usearch = NULL; status = U_ZERO_ERROR; if (ustring_empty(ustr)) { if (IS_WORD_BOUNDED(flags)) { p->ubrk = ubrk_open(UBRK_WORD, NULL, NULL, 0, &status); } } else { if (!IS_WHOLE_LINE(flags)) { if (IS_WORD_BOUNDED(flags)) { p->ubrk = ubrk_open(UBRK_WORD, NULL, NULL, 0, &status); } else if (WITH_GRAPHEME()) { p->ubrk = ubrk_open(UBRK_CHARACTER, NULL, NULL, 0, &status); } if (U_FAILURE(status)) { fixed_pattern_destroy(p); icu_error_set(error, FATAL, status, "ubrk_open"); return NULL; } } if (IS_WORD_BOUNDED(flags) || (IS_CASE_INSENSITIVE(flags) && !IS_WHOLE_LINE(flags))) { p->usearch = usearch_open(ustr->ptr, ustr->len, USEARCH_FAKE_USTR, uloc_getDefault(), p->ubrk, &status); if (U_FAILURE(status)) { if (NULL != p->ubrk) { ubrk_close(p->ubrk); } fixed_pattern_destroy(p); icu_error_set(error, FATAL, status, "usearch_open"); return NULL; } if (IS_CASE_INSENSITIVE(flags)) { UCollator *ucol; ucol = usearch_getCollator(p->usearch); ucol_setStrength(ucol, (flags & ~OPT_MASK) > 1 ? UCOL_SECONDARY : UCOL_PRIMARY); } } } return p; }
StringSearch::StringSearch(const UnicodeString &pattern, const UnicodeString &text, const Locale &locale, BreakIterator *breakiter, UErrorCode &status) : SearchIterator(text, breakiter), m_collator_(), m_pattern_(pattern) { if (U_FAILURE(status)) { m_strsrch_ = NULL; return; } m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), locale.getName(), (UBreakIterator *)breakiter, &status); uprv_free(m_search_); m_search_ = NULL; // !!! dlf m_collator_ is an odd beast. basically it is an aliasing // wrapper around the internal collator and rules, which (here) are // owned by this stringsearch object. this means 1) it's destructor // _should not_ delete the ucollator or rules, and 2) changes made // to the exposed collator (setStrength etc) _should_ modify the // ucollator. thus the collator is not a copy-on-write alias, and it // needs to distinguish itself not merely from 'stand alone' colators // but also from copy-on-write ones. it needs additional state, which // setUCollator should set. if (U_SUCCESS(status)) { int32_t length; const UChar *rules = ucol_getRules(m_strsrch_->collator, &length); m_collation_rules_.setTo(rules, length); m_collator_.setUCollator((UCollator *)m_strsrch_->collator, &m_collation_rules_); // m_search_ has been created by the base SearchIterator class m_search_ = m_strsrch_->search; } }
StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) :UPerfTest(argc,argv,status){ int32_t start, end; #ifdef TEST_BOYER_MOORE_SEARCH bms = NULL; #else srch = NULL; #endif pttrn = NULL; if(status== U_ILLEGAL_ARGUMENT_ERROR || line_mode){ fprintf(stderr,gUsageString, "strsrchperf"); return; } /* Get the Text */ src = getBuffer(srcLen, status); #if 0 /* Get a word to find. Do this by selecting a random word with a word breakiterator. */ UBreakIterator* brk = ubrk_open(UBRK_WORD, locale, src, srcLen, &status); if(U_FAILURE(status)){ fprintf(stderr, "FAILED to create pattern for searching. Error: %s\n", u_errorName(status)); return; } start = ubrk_preceding(brk, 1000); end = ubrk_following(brk, start); pttrnLen = end - start; UChar* temp = (UChar*)malloc(sizeof(UChar)*(pttrnLen)); for (int i = 0; i < pttrnLen; i++) { temp[i] = src[start++]; } pttrn = temp; /* store word in pttrn */ ubrk_close(brk); #else /* The first line of the file contains the pattern */ start = 0; for(end = start; ; end += 1) { UChar ch = src[end]; if (ch == 0x000A || ch == 0x000D || ch == 0x2028) { break; } } pttrnLen = end - start; UChar* temp = (UChar*)malloc(sizeof(UChar)*(pttrnLen)); for (int i = 0; i < pttrnLen; i++) { temp[i] = src[start++]; } pttrn = temp; /* store word in pttrn */ #endif #ifdef TEST_BOYER_MOORE_SEARCH UnicodeString patternString(pttrn, pttrnLen); UCollator *coll = ucol_open(locale, &status); CollData *data = CollData::open(coll, status); targetString = new UnicodeString(src, srcLen); bms = new BoyerMooreSearch(data, patternString, targetString, status); #else /* Create the StringSearch object to be use in performance test. */ srch = usearch_open(pttrn, pttrnLen, src, srcLen, locale, NULL, &status); #endif if(U_FAILURE(status)){ fprintf(stderr, "FAILED to create UPerfTest object. Error: %s\n", u_errorName(status)); return; } }
/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last) { UChar *uhaystack = NULL, *uneedle = NULL; int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; UBreakIterator* bi = NULL; UErrorCode status; UStringSearch* src = NULL; UCollator *coll; if(puchar_pos) { *puchar_pos = -1; } /* convert the strings to UTF-16. */ status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, haystack, haystack_len, &status ); STRPOS_CHECK_STATUS(status, "Error converting input string to UTF-16"); status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, needle, needle_len, &status ); STRPOS_CHECK_STATUS(status, "Error converting needle string to UTF-16"); /* get a pointer to the haystack taking into account the offset */ status = U_ZERO_ERROR; bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status ); STRPOS_CHECK_STATUS(status, "Failed to get iterator"); status = U_ZERO_ERROR; ubrk_setText(bi, uhaystack, uhaystack_len, &status); STRPOS_CHECK_STATUS(status, "Failed to set up iterator"); status = U_ZERO_ERROR; src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status); STRPOS_CHECK_STATUS(status, "Error creating search object"); if(f_ignore_case) { coll = usearch_getCollator(src); status = U_ZERO_ERROR; ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &status); STRPOS_CHECK_STATUS(status, "Error setting collation strength"); usearch_reset(src); } if(offset != 0) { offset_pos = grapheme_get_haystack_offset(bi, offset); if(offset_pos == -1) { status = U_ILLEGAL_ARGUMENT_ERROR; STRPOS_CHECK_STATUS(status, "Invalid search offset"); } status = U_ZERO_ERROR; usearch_setOffset(src, offset_pos, &status); STRPOS_CHECK_STATUS(status, "Invalid search offset"); } if(last) { char_pos = usearch_last(src, &status); if(char_pos < offset_pos) { /* last one is beyound our start offset */ char_pos = USEARCH_DONE; } } else { char_pos = usearch_next(src, &status); } STRPOS_CHECK_STATUS(status, "Error looking up string"); if(char_pos != USEARCH_DONE && ubrk_isBoundary(bi, char_pos)) { ret_pos = grapheme_count_graphemes(bi, uhaystack,char_pos); if(puchar_pos) { *puchar_pos = char_pos; } } else { ret_pos = -1; } if (uhaystack) { efree( uhaystack ); } if (uneedle) { efree( uneedle ); } ubrk_close (bi); usearch_close (src); return ret_pos; }