inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) : m_options(options) , m_prefixLength(0) , m_numberOfCharactersJustAppended(0) , m_atBreak(true) , m_needsMoreContext(options & AtWordStarts) , m_targetRequiresKanaWorkaround(containsKanaLetters(target)) { ASSERT(!target.isEmpty()); target.appendTo(m_target); // FIXME: We'd like to tailor the searcher to fold quote marks for us instead // of doing it in a separate replacement pass here, but ICU doesn't offer a way // to add tailoring on top of the locale-specific tailoring as of this writing. foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); size_t targetLength = m_target.size(); m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize)); m_overlap = m_buffer.capacity() / 4; if ((m_options & AtWordStarts) && targetLength) { UChar32 targetFirstCharacter; U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); // Characters in the separator category never really occur at the beginning of a word, // so if the target begins with such a character, we just ignore the AtWordStart option. if (isSeparator(targetFirstCharacter)) { m_options &= ~AtWordStarts; m_needsMoreContext = false; } } // Grab the single global searcher. // If we ever have a reason to do more than once search buffer at once, we'll have // to move to multiple searchers. lockSearcher(); UStringSearch* searcher = blink::searcher(); UCollator* collator = usearch_getCollator(searcher); UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; if (ucol_getStrength(collator) != strength) { ucol_setStrength(collator, strength); usearch_reset(searcher); } UErrorCode status = U_ZERO_ERROR; usearch_setPattern(searcher, m_target.data(), targetLength, &status); ASSERT(status == U_ZERO_ERROR); // The kana workaround requires a normalized copy of the target string. if (m_targetRequiresKanaWorkaround) normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget); }
inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) : m_options(options), m_prefixLength(0), m_numberOfCharactersJustAppended(0), m_atBreak(true), m_needsMoreContext(options & AtWordStarts), m_targetRequiresKanaWorkaround(containsKanaLetters(target)) { DCHECK(!target.isEmpty()) << target; target.appendTo(m_target); // FIXME: We'd like to tailor the searcher to fold quote marks for us instead // of doing it in a separate replacement pass here, but ICU doesn't offer a // way to add tailoring on top of the locale-specific tailoring as of this // writing. foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); size_t targetLength = m_target.size(); m_buffer.reserveInitialCapacity( std::max(targetLength * 8, kMinimumSearchBufferSize)); m_overlap = m_buffer.capacity() / 4; if ((m_options & AtWordStarts) && targetLength) { const UChar32 targetFirstCharacter = getCodePointAt(m_target.data(), 0, targetLength); // Characters in the separator category never really occur at the beginning // of a word, so if the target begins with such a character, we just ignore // the AtWordStart option. if (isSeparator(targetFirstCharacter)) { m_options &= ~AtWordStarts; m_needsMoreContext = false; } } m_textSearcher = WTF::makeUnique<TextSearcherICU>(); m_textSearcher->setPattern(StringView(m_target.data(), m_target.size()), !(m_options & CaseInsensitive)); // The kana workaround requires a normalized copy of the target string. if (m_targetRequiresKanaWorkaround) normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget); }