inline size_t SearchBuffer::search(size_t& start) { size_t size = m_buffer.size(); if (m_atBreak) { if (!size) return 0; } else { if (size != m_buffer.capacity()) return 0; } m_textSearcher->setText(m_buffer.data(), size); m_textSearcher->setOffset(m_prefixLength); MatchResult match; nextMatch: if (!m_textSearcher->nextMatchResult(match)) return 0; // Matches that start in the overlap area are only tentative. // The same match may appear later, matching more characters, // possibly including a combining character that's not yet in the buffer. if (!m_atBreak && match.start >= size - m_overlap) { size_t overlap = m_overlap; if (m_options & AtWordStarts) { // Ensure that there is sufficient context before matchStart the next time // around for determining if it is at a word boundary. int wordBoundaryContextStart = match.start; U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); wordBoundaryContextStart = startOfLastWordBoundaryContext( m_buffer.data(), wordBoundaryContextStart); overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart)); } memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); m_prefixLength -= std::min(m_prefixLength, size - overlap); m_buffer.shrink(overlap); return 0; } SECURITY_DCHECK(match.start + match.length <= size); // If this match is "bad", move on to the next match. if (isBadMatch(m_buffer.data() + match.start, match.length) || ((m_options & AtWordStarts) && !isWordStartMatch(match.start, match.length))) { goto nextMatch; } size_t newSize = size - (match.start + 1); memmove(m_buffer.data(), m_buffer.data() + match.start + 1, newSize * sizeof(UChar)); m_prefixLength -= std::min<size_t>(m_prefixLength, match.start + 1); m_buffer.shrink(newSize); start = size - match.start; return match.length; }
inline size_t SearchBuffer::search(size_t& start) { size_t size = m_buffer.size(); if (m_atBreak) { if (!size) return 0; } else { if (size != m_buffer.capacity()) return 0; } UStringSearch* searcher = blink::searcher(); UErrorCode status = U_ZERO_ERROR; usearch_setText(searcher, m_buffer.data(), size, &status); ASSERT(status == U_ZERO_ERROR); usearch_setOffset(searcher, m_prefixLength, &status); ASSERT(status == U_ZERO_ERROR); int matchStart = usearch_next(searcher, &status); ASSERT(status == U_ZERO_ERROR); nextMatch: if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { ASSERT(matchStart == USEARCH_DONE); return 0; } // Matches that start in the overlap area are only tentative. // The same match may appear later, matching more characters, // possibly including a combining character that's not yet in the buffer. if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { size_t overlap = m_overlap; if (m_options & AtWordStarts) { // Ensure that there is sufficient context before matchStart the next time around for // determining if it is at a word boundary. int wordBoundaryContextStart = matchStart; U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart); overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart)); } memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); m_prefixLength -= std::min(m_prefixLength, size - overlap); m_buffer.shrink(overlap); return 0; } size_t matchedLength = usearch_getMatchedLength(searcher); ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size); // If this match is "bad", move on to the next match. if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) { matchStart = usearch_next(searcher, &status); ASSERT(status == U_ZERO_ERROR); goto nextMatch; } size_t newSize = size - (matchStart + 1); memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1); m_buffer.shrink(newSize); start = size - matchStart; return matchedLength; }