int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward) { TextBreakIterator* it = wordBreakIterator(chars, len); if (forward) { position = textBreakFollowing(it, position); while (position != TextBreakDone) { // We stop searching when the character preceeding the break // is alphanumeric. if (position < len && isAlphanumeric(chars[position - 1])) return position; position = textBreakFollowing(it, position); } return len; } else { position = textBreakPreceding(it, position); while (position != TextBreakDone) { // We stop searching when the character following the break // is alphanumeric. if (position > 0 && isAlphanumeric(chars[position])) return position; position = textBreakPreceding(it, position); } return 0; } }
int findNextWordFromIndex(StringView text, int position, bool forward) { TextBreakIterator* it = wordBreakIterator(text); if (forward) { position = textBreakFollowing(it, position); while (position != TextBreakDone) { // We stop searching when the character preceeding the break is alphanumeric. if (static_cast<unsigned>(position) < text.length() && u_isalnum(text[position - 1])) return position; position = textBreakFollowing(it, position); } return text.length(); } else { position = textBreakPreceding(it, position); while (position != TextBreakDone) { // We stop searching when the character following the break is alphanumeric. if (position && u_isalnum(text[position])) return position; position = textBreakPreceding(it, position); } return 0; } }
unsigned CharacterData::parserAppendData(const UChar* data, unsigned dataLength, unsigned lengthLimit) { unsigned oldLength = m_data.length(); unsigned end = min(dataLength, lengthLimit - oldLength); // Check that we are not on an unbreakable boundary. // Some text break iterator implementations work best if the passed buffer is as small as possible, // see <https://bugs.webkit.org/show_bug.cgi?id=29092>. // We need at least two characters look-ahead to account for UTF-16 surrogates. if (end < dataLength) { NonSharedCharacterBreakIterator it(data, (end + 2 > dataLength) ? dataLength : end + 2); if (!isTextBreak(it, end)) end = textBreakPreceding(it, end); } if (!end) return 0; m_data.append(data, end); updateRenderer(oldLength, 0); document()->incDOMTreeVersion(); // We don't call dispatchModifiedEvent here because we don't want the // parser to dispatch DOM mutation events. if (parentNode()) parentNode()->childrenChanged(); return end; }
PassRefPtr<Text> Text::createWithLengthLimit(Document* document, const String& data, unsigned& charsLeft, unsigned maxChars) { unsigned dataLength = data.length(); if (charsLeft == dataLength && charsLeft <= maxChars) { charsLeft = 0; return create(document, data); } unsigned start = dataLength - charsLeft; unsigned end = start + min(charsLeft, maxChars); // Check we are not on an unbreakable boundary. // Some text break iterator implementations work best if the passed buffer is as small as possible, // see <https://bugs.webkit.org/show_bug.cgi?id=29092>. // We need at least two characters look-ahead to account for UTF-16 surrogates. if (end < dataLength) { TextBreakIterator* it = characterBreakIterator(data.characters() + start, (end + 2 > dataLength) ? dataLength - start : end - start + 2); if (!isTextBreak(it, end - start)) end = textBreakPreceding(it, end - start) + start; } // If we have maxChars of unbreakable characters the above could lead to // an infinite loop. // FIXME: It would be better to just have the old value of end before calling // textBreakPreceding rather than this, because this exceeds the length limit. if (end <= start) end = dataLength; charsLeft = dataLength - end; return create(document, data.substring(start, end - start)); }
static inline int textBreakAtOrPreceding(TextBreakIterator* it, int offset) { if (isTextBreak(it, offset)) return offset; int result = textBreakPreceding(it, offset); return result == TextBreakDone ? 0 : result; }
unsigned CharacterData::parserAppendData(const String& string, unsigned offset, unsigned lengthLimit) { unsigned oldLength = m_data.length(); ASSERT(lengthLimit >= oldLength); unsigned characterLength = string.length() - offset; unsigned characterLengthLimit = std::min(characterLength, lengthLimit - oldLength); // Check that we are not on an unbreakable boundary. // Some text break iterator implementations work best if the passed buffer is as small as possible, // see <https://bugs.webkit.org/show_bug.cgi?id=29092>. // We need at least two characters look-ahead to account for UTF-16 surrogates. if (characterLengthLimit < characterLength) { NonSharedCharacterBreakIterator it(StringView(string).substring(offset, (characterLengthLimit + 2 > characterLength) ? characterLength : characterLengthLimit + 2)); if (!isTextBreak(it, characterLengthLimit)) characterLengthLimit = textBreakPreceding(it, characterLengthLimit); } if (!characterLengthLimit) return 0; if (string.is8Bit()) m_data.append(string.characters8() + offset, characterLengthLimit); else m_data.append(string.characters16() + offset, characterLengthLimit); ASSERT(!renderer() || is<Text>(*this)); if (is<Text>(*this) && parentNode()) downcast<Text>(*this).updateRendererAfterContentChange(oldLength, 0); document().incDOMTreeVersion(); // We don't call dispatchModifiedEvent here because we don't want the // parser to dispatch DOM mutation events. if (parentNode()) { ContainerNode::ChildChange change = { ContainerNode::TextChanged, ElementTraversal::previousSibling(*this), ElementTraversal::nextSibling(*this), ContainerNode::ChildChangeSourceParser }; parentNode()->childrenChanged(change); } return characterLengthLimit; }
unsigned CharacterData::parserAppendData(const String& string, unsigned offset, unsigned lengthLimit) { unsigned oldLength = m_data.length(); ASSERT(lengthLimit >= oldLength); unsigned characterLength = string.length() - offset; unsigned characterLengthLimit = min(characterLength, lengthLimit - oldLength); // Check that we are not on an unbreakable boundary. // Some text break iterator implementations work best if the passed buffer is as small as possible, // see <https://bugs.webkit.org/show_bug.cgi?id=29092>. // We need at least two characters look-ahead to account for UTF-16 surrogates. ASSERT(!string.is8Bit() || string.containsOnlyLatin1()); // Latin-1 doesn't have unbreakable boundaries. if (characterLengthLimit < characterLength && !string.is8Bit()) { NonSharedCharacterBreakIterator it(string.characters16() + offset, (characterLengthLimit + 2 > characterLength) ? characterLength : characterLengthLimit + 2); if (!isTextBreak(it, characterLengthLimit)) characterLengthLimit = textBreakPreceding(it, characterLengthLimit); } if (!characterLengthLimit) return 0; if (string.is8Bit()) m_data.append(string.characters8() + offset, characterLengthLimit); else m_data.append(string.characters16() + offset, characterLengthLimit); ASSERT(!renderer() || isTextNode()); if (isTextNode()) toText(this)->updateTextRenderer(oldLength, 0); document()->incDOMTreeVersion(); // We don't call dispatchModifiedEvent here because we don't want the // parser to dispatch DOM mutation events. if (parentNode()) parentNode()->childrenChanged(); return characterLengthLimit; }
PassRefPtr<Text> Text::createWithLengthLimit(Document* doc, const String& text, unsigned& charsLeft, unsigned maxChars) { if (charsLeft == text.length() && charsLeft <= maxChars) { charsLeft = 0; return new Text(doc, text); } unsigned start = text.length() - charsLeft; unsigned end = start + std::min(charsLeft, maxChars); // check we are not on an unbreakable boundary TextBreakIterator* it = characterBreakIterator(text.characters(), text.length()); if (end < text.length() && !isTextBreak(it, end)) end = textBreakPreceding(it, end); // maxChars of unbreakable characters could lead to infinite loop if (end <= start) end = text.length(); String nodeText = text.substring(start, end - start); charsLeft = text.length() - end; return new Text(doc, nodeText); }
int CoreTextController::offsetForPosition(int h, bool includePartialGlyphs) { // FIXME: For positions occurring within a ligature, we should return the closest "ligature caret" or // approximate it by dividing the width of the ligature by the number of characters it encompasses. // However, Core Text does not expose a low-level API for directly finding // out how many characters a ligature encompasses (the "attachment count"). if (h >= m_totalWidth) return m_run.ltr() ? m_end : 0; if (h < 0) return m_run.ltr() ? 0 : m_end; CGFloat x = h; size_t runCount = m_coreTextRuns.size(); size_t offsetIntoAdjustedGlyphs = 0; for (size_t r = 0; r < runCount; ++r) { const CoreTextRun& coreTextRun = m_coreTextRuns[r]; for (unsigned j = 0; j < coreTextRun.glyphCount(); ++j) { CGFloat adjustedAdvance = m_adjustedAdvances[offsetIntoAdjustedGlyphs + j].width; if (x <= adjustedAdvance) { CFIndex hitIndex = coreTextRun.indexAt(j); int stringLength = coreTextRun.stringLength(); TextBreakIterator* characterIterator = characterBreakIterator(coreTextRun.characters(), stringLength); int clusterStart; if (isTextBreak(characterIterator, hitIndex)) clusterStart = hitIndex; else { clusterStart = textBreakPreceding(characterIterator, hitIndex); if (clusterStart == TextBreakDone) clusterStart = 0; } if (!includePartialGlyphs) return coreTextRun.stringLocation() + clusterStart; int clusterEnd = textBreakFollowing(characterIterator, hitIndex); if (clusterEnd == TextBreakDone) clusterEnd = stringLength; CGFloat clusterWidth = adjustedAdvance; // FIXME: The search stops at the boundaries of coreTextRun. In theory, it should go on into neighboring CoreTextRuns // derived from the same CTLine. In practice, we do not expect there to be more than one CTRun in a CTLine, as no // reordering and on font fallback should occur within a CTLine. if (clusterEnd - clusterStart > 1) { int firstGlyphBeforeCluster = j - 1; while (firstGlyphBeforeCluster && coreTextRun.indexAt(firstGlyphBeforeCluster) >= clusterStart && coreTextRun.indexAt(firstGlyphBeforeCluster) < clusterEnd) { CGFloat width = m_adjustedAdvances[offsetIntoAdjustedGlyphs + firstGlyphBeforeCluster].width; clusterWidth += width; x += width; firstGlyphBeforeCluster--; } unsigned firstGlyphAfterCluster = j + 1; while (firstGlyphAfterCluster < coreTextRun.glyphCount() && coreTextRun.indexAt(firstGlyphAfterCluster) >= clusterStart && coreTextRun.indexAt(firstGlyphAfterCluster) < clusterEnd) { clusterWidth += m_adjustedAdvances[offsetIntoAdjustedGlyphs + firstGlyphAfterCluster].width; firstGlyphAfterCluster++; } } if (x <= clusterWidth / 2) return coreTextRun.stringLocation() + (m_run.ltr() ? clusterStart : clusterEnd); else return coreTextRun.stringLocation() + (m_run.ltr() ? clusterEnd : clusterStart); } x -= adjustedAdvance; } offsetIntoAdjustedGlyphs += coreTextRun.glyphCount(); } ASSERT_NOT_REACHED(); return 0; }