void StringSearch::setPattern(const UnicodeString &pattern, UErrorCode &status) { if (U_SUCCESS(status)) { m_pattern_ = pattern; usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), &status); } }
inline SearchBuffer::~SearchBuffer() { // Leave the static object pointing to valid strings (pattern=targer, // text=buffer). Otheriwse, usearch_reset() will results in 'use-after-free' // error. UErrorCode status = U_ZERO_ERROR; usearch_setPattern(blink::searcher(), &newlineCharacter, 1, &status); usearch_setText(blink::searcher(), &newlineCharacter, 1, &status); ASSERT(status == U_ZERO_ERROR); unlockSearcher(); }
inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) : m_options(options) , m_prefixLength(0) , m_numberOfCharactersJustAppended(0) , m_atBreak(true) , m_needsMoreContext(options & AtWordStarts) , m_targetRequiresKanaWorkaround(containsKanaLetters(target)) { ASSERT(!target.isEmpty()); target.appendTo(m_target); // FIXME: We'd like to tailor the searcher to fold quote marks for us instead // of doing it in a separate replacement pass here, but ICU doesn't offer a way // to add tailoring on top of the locale-specific tailoring as of this writing. foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); size_t targetLength = m_target.size(); m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize)); m_overlap = m_buffer.capacity() / 4; if ((m_options & AtWordStarts) && targetLength) { UChar32 targetFirstCharacter; U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); // Characters in the separator category never really occur at the beginning of a word, // so if the target begins with such a character, we just ignore the AtWordStart option. if (isSeparator(targetFirstCharacter)) { m_options &= ~AtWordStarts; m_needsMoreContext = false; } } // Grab the single global searcher. // If we ever have a reason to do more than once search buffer at once, we'll have // to move to multiple searchers. lockSearcher(); UStringSearch* searcher = blink::searcher(); UCollator* collator = usearch_getCollator(searcher); UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; if (ucol_getStrength(collator) != strength) { ucol_setStrength(collator, strength); usearch_reset(searcher); } UErrorCode status = U_ZERO_ERROR; usearch_setPattern(searcher, m_target.data(), targetLength, &status); ASSERT(status == U_ZERO_ERROR); // The kana workaround requires a normalized copy of the target string. if (m_targetRequiresKanaWorkaround) normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget); }
int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern, const char *name, const char *strength, uint32_t seed) { UErrorCode status = U_ZERO_ERROR; int32_t actualStart = -1, actualEnd = -1; //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length(); int32_t expectedStart = -1, expectedEnd = -1; int32_t notFoundCount = 0; LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), testCase.getBuffer(), testCase.length(), coll, NULL, // the break iterator &status)); // **** TODO: find *all* matches, not just first one **** simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd); usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status); if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { errln("Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n" " strength=%s seed=%d", name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed); } if (expectedStart == -1 && actualStart == -1) { notFoundCount += 1; } // **** TODO: find *all* matches, not just first one **** simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd); usearch_setPattern(uss.getAlias(), altPattern.getBuffer(), altPattern.length(), &status); usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status); if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { errln("Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n" " strength=%s seed=%d", name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed); } if (expectedStart == -1 && actualStart == -1) { notFoundCount += 1; } return notFoundCount; }
void TextSearcherICU::setPattern(const UChar* pattern, size_t length) { UErrorCode status = U_ZERO_ERROR; usearch_setPattern(m_searcher, pattern, length, &status); DCHECK_EQ(status, U_ZERO_ERROR); }