Exemplo n.º 1
0
inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
    : m_options(options)
    , m_prefixLength(0)
    , m_numberOfCharactersJustAppended(0)
    , m_atBreak(true)
    , m_needsMoreContext(options & AtWordStarts)
    , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
{
    ASSERT(!target.isEmpty());
    target.appendTo(m_target);

    // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
    // of doing it in a separate replacement pass here, but ICU doesn't offer a way
    // to add tailoring on top of the locale-specific tailoring as of this writing.
    foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());

    size_t targetLength = m_target.size();
    m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
    m_overlap = m_buffer.capacity() / 4;

    if ((m_options & AtWordStarts) && targetLength) {
        UChar32 targetFirstCharacter;
        U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
        // Characters in the separator category never really occur at the beginning of a word,
        // so if the target begins with such a character, we just ignore the AtWordStart option.
        if (isSeparator(targetFirstCharacter)) {
            m_options &= ~AtWordStarts;
            m_needsMoreContext = false;
        }
    }

    // Grab the single global searcher.
    // If we ever have a reason to do more than once search buffer at once, we'll have
    // to move to multiple searchers.
    lockSearcher();

    UStringSearch* searcher = blink::searcher();
    UCollator* collator = usearch_getCollator(searcher);

    UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
    if (ucol_getStrength(collator) != strength) {
        ucol_setStrength(collator, strength);
        usearch_reset(searcher);
    }

    UErrorCode status = U_ZERO_ERROR;
    usearch_setPattern(searcher, m_target.data(), targetLength, &status);
    ASSERT(status == U_ZERO_ERROR);

    // The kana workaround requires a normalized copy of the target string.
    if (m_targetRequiresKanaWorkaround)
        normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget);
}
Exemplo n.º 2
0
inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
    : m_options(options),
      m_prefixLength(0),
      m_numberOfCharactersJustAppended(0),
      m_atBreak(true),
      m_needsMoreContext(options & AtWordStarts),
      m_targetRequiresKanaWorkaround(containsKanaLetters(target)) {
  DCHECK(!target.isEmpty()) << target;
  target.appendTo(m_target);

  // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
  // of doing it in a separate replacement pass here, but ICU doesn't offer a
  // way to add tailoring on top of the locale-specific tailoring as of this
  // writing.
  foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());

  size_t targetLength = m_target.size();
  m_buffer.reserveInitialCapacity(
      std::max(targetLength * 8, kMinimumSearchBufferSize));
  m_overlap = m_buffer.capacity() / 4;

  if ((m_options & AtWordStarts) && targetLength) {
    const UChar32 targetFirstCharacter =
        getCodePointAt(m_target.data(), 0, targetLength);
    // Characters in the separator category never really occur at the beginning
    // of a word, so if the target begins with such a character, we just ignore
    // the AtWordStart option.
    if (isSeparator(targetFirstCharacter)) {
      m_options &= ~AtWordStarts;
      m_needsMoreContext = false;
    }
  }

  m_textSearcher = WTF::makeUnique<TextSearcherICU>();
  m_textSearcher->setPattern(StringView(m_target.data(), m_target.size()),
                             !(m_options & CaseInsensitive));

  // The kana workaround requires a normalized copy of the target string.
  if (m_targetRequiresKanaWorkaround)
    normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(),
                                   m_normalizedTarget);
}
Exemplo n.º 3
0
inline void SearchBuffer::append(const CharType* characters, size_t length) {
  DCHECK(length);

  if (m_atBreak) {
    m_buffer.shrink(0);
    m_prefixLength = 0;
    m_atBreak = false;
  } else if (m_buffer.size() == m_buffer.capacity()) {
    memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap,
           m_overlap * sizeof(UChar));
    m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
    m_buffer.shrink(m_overlap);
  }

  size_t oldLength = m_buffer.size();
  size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
  DCHECK(usableLength);
  m_buffer.resize(oldLength + usableLength);
  UChar* destination = m_buffer.data() + oldLength;
  StringImpl::copyChars(destination, characters, usableLength);
  foldQuoteMarksAndSoftHyphens(destination, usableLength);
  m_numberOfCharactersJustAppended = usableLength;
}