inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) : m_options(options) , m_prefixLength(0) , m_numberOfCharactersJustAppended(0) , m_atBreak(true) , m_needsMoreContext(options & AtWordStarts) , m_targetRequiresKanaWorkaround(containsKanaLetters(target)) { ASSERT(!target.isEmpty()); target.appendTo(m_target); // FIXME: We'd like to tailor the searcher to fold quote marks for us instead // of doing it in a separate replacement pass here, but ICU doesn't offer a way // to add tailoring on top of the locale-specific tailoring as of this writing. foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); size_t targetLength = m_target.size(); m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize)); m_overlap = m_buffer.capacity() / 4; if ((m_options & AtWordStarts) && targetLength) { UChar32 targetFirstCharacter; U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); // Characters in the separator category never really occur at the beginning of a word, // so if the target begins with such a character, we just ignore the AtWordStart option. if (isSeparator(targetFirstCharacter)) { m_options &= ~AtWordStarts; m_needsMoreContext = false; } } // Grab the single global searcher. // If we ever have a reason to do more than once search buffer at once, we'll have // to move to multiple searchers. lockSearcher(); UStringSearch* searcher = blink::searcher(); UCollator* collator = usearch_getCollator(searcher); UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; if (ucol_getStrength(collator) != strength) { ucol_setStrength(collator, strength); usearch_reset(searcher); } UErrorCode status = U_ZERO_ERROR; usearch_setPattern(searcher, m_target.data(), targetLength, &status); ASSERT(status == U_ZERO_ERROR); // The kana workaround requires a normalized copy of the target string. if (m_targetRequiresKanaWorkaround) normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget); }
inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) : m_options(options), m_prefixLength(0), m_numberOfCharactersJustAppended(0), m_atBreak(true), m_needsMoreContext(options & AtWordStarts), m_targetRequiresKanaWorkaround(containsKanaLetters(target)) { DCHECK(!target.isEmpty()) << target; target.appendTo(m_target); // FIXME: We'd like to tailor the searcher to fold quote marks for us instead // of doing it in a separate replacement pass here, but ICU doesn't offer a // way to add tailoring on top of the locale-specific tailoring as of this // writing. foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); size_t targetLength = m_target.size(); m_buffer.reserveInitialCapacity( std::max(targetLength * 8, kMinimumSearchBufferSize)); m_overlap = m_buffer.capacity() / 4; if ((m_options & AtWordStarts) && targetLength) { const UChar32 targetFirstCharacter = getCodePointAt(m_target.data(), 0, targetLength); // Characters in the separator category never really occur at the beginning // of a word, so if the target begins with such a character, we just ignore // the AtWordStart option. if (isSeparator(targetFirstCharacter)) { m_options &= ~AtWordStarts; m_needsMoreContext = false; } } m_textSearcher = WTF::makeUnique<TextSearcherICU>(); m_textSearcher->setPattern(StringView(m_target.data(), m_target.size()), !(m_options & CaseInsensitive)); // The kana workaround requires a normalized copy of the target string. if (m_targetRequiresKanaWorkaround) normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget); }
inline void SearchBuffer::append(const CharType* characters, size_t length) { DCHECK(length); if (m_atBreak) { m_buffer.shrink(0); m_prefixLength = 0; m_atBreak = false; } else if (m_buffer.size() == m_buffer.capacity()) { memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar)); m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap); m_buffer.shrink(m_overlap); } size_t oldLength = m_buffer.size(); size_t usableLength = std::min(m_buffer.capacity() - oldLength, length); DCHECK(usableLength); m_buffer.resize(oldLength + usableLength); UChar* destination = m_buffer.data() + oldLength; StringImpl::copyChars(destination, characters, usableLength); foldQuoteMarksAndSoftHyphens(destination, usableLength); m_numberOfCharactersJustAppended = usableLength; }