unsigned CharacterData::parserAppendData(const UChar* data, unsigned dataLength, unsigned lengthLimit) { unsigned oldLength = m_data->length(); unsigned end = min(dataLength, lengthLimit - oldLength); // Check that we are not on an unbreakable boundary. // Some text break iterator implementations work best if the passed buffer is as small as possible, // see <https://bugs.webkit.org/show_bug.cgi?id=29092>. // We need at least two characters look-ahead to account for UTF-16 surrogates. if (end < dataLength) { TextBreakIterator* it = characterBreakIterator(data, (end + 2 > dataLength) ? dataLength : end + 2); if (!isTextBreak(it, end)) end = textBreakPreceding(it, end); } if (!end) return 0; String newStr = m_data; newStr.append(data, end); m_data = newStr.impl(); updateRenderer(oldLength, 0); // We don't call dispatchModifiedEvent here because we don't want the // parser to dispatch DOM mutation events. if (parentNode()) parentNode()->childrenChanged(); return end; }
static unsigned nextWordOffset(StringView text, unsigned currentOffset) { // FIXME: avoid creating textIterator object here, it could be passed as a parameter. // isTextBreak() leaves the iterator pointing to the first boundary position at // or after "offset" (ubrk_isBoundary side effect). // For many word separators, the method doesn't properly determine the boundaries // without resetting the iterator. TextBreakIterator* textIterator = wordBreakIterator(text); if (!textIterator) return currentOffset; unsigned wordOffset = currentOffset; while (wordOffset < text.length() && isTextBreak(textIterator, wordOffset)) ++wordOffset; // Do not treat the word's boundary as a separator. if (!currentOffset && wordOffset == 1) return currentOffset; // Omit multiple separators. if ((wordOffset - currentOffset) > 1) --wordOffset; return wordOffset; }
static inline int textBreakAtOrPreceding(TextBreakIterator* it, int offset) { if (isTextBreak(it, offset)) return offset; int result = textBreakPreceding(it, offset); return result == TextBreakDone ? 0 : result; }
unsigned CharacterData::parserAppendData(const String& string, unsigned offset, unsigned lengthLimit) { unsigned oldLength = m_data.length(); ASSERT(lengthLimit >= oldLength); unsigned characterLength = string.length() - offset; unsigned characterLengthLimit = std::min(characterLength, lengthLimit - oldLength); // Check that we are not on an unbreakable boundary. // Some text break iterator implementations work best if the passed buffer is as small as possible, // see <https://bugs.webkit.org/show_bug.cgi?id=29092>. // We need at least two characters look-ahead to account for UTF-16 surrogates. if (characterLengthLimit < characterLength) { NonSharedCharacterBreakIterator it(StringView(string).substring(offset, (characterLengthLimit + 2 > characterLength) ? characterLength : characterLengthLimit + 2)); if (!isTextBreak(it, characterLengthLimit)) characterLengthLimit = textBreakPreceding(it, characterLengthLimit); } if (!characterLengthLimit) return 0; if (string.is8Bit()) m_data.append(string.characters8() + offset, characterLengthLimit); else m_data.append(string.characters16() + offset, characterLengthLimit); ASSERT(!renderer() || is<Text>(*this)); if (is<Text>(*this) && parentNode()) downcast<Text>(*this).updateRendererAfterContentChange(oldLength, 0); document().incDOMTreeVersion(); // We don't call dispatchModifiedEvent here because we don't want the // parser to dispatch DOM mutation events. if (parentNode()) { ContainerNode::ChildChange change = { ContainerNode::TextChanged, ElementTraversal::previousSibling(*this), ElementTraversal::nextSibling(*this), ContainerNode::ChildChangeSourceParser }; parentNode()->childrenChanged(change); } return characterLengthLimit; }
Vector<TextCheckingResult> TextChecker::checkTextOfParagraph(int64_t spellDocumentTag, StringView text, int32_t insertionPoint, uint64_t checkingTypes, bool) { UNUSED_PARAM(insertionPoint); Vector<TextCheckingResult> paragraphCheckingResult; #if ENABLE(SPELLCHECK) if (checkingTypes & TextCheckingTypeSpelling) { TextBreakIterator* textIterator = wordBreakIterator(text); if (!textIterator) return paragraphCheckingResult; // Omit the word separators at the beginning/end of the text to don't unnecessarily // involve the client to check spelling for them. unsigned offset = nextWordOffset(text, 0); unsigned lengthStrip = text.length(); while (lengthStrip > 0 && isTextBreak(textIterator, lengthStrip - 1)) --lengthStrip; while (offset < lengthStrip) { int32_t misspellingLocation = -1; int32_t misspellingLength = 0; checkSpellingOfString(spellDocumentTag, text.substring(offset, lengthStrip - offset), misspellingLocation, misspellingLength); if (!misspellingLength) break; TextCheckingResult misspellingResult; misspellingResult.type = TextCheckingTypeSpelling; misspellingResult.location = offset + misspellingLocation; misspellingResult.length = misspellingLength; paragraphCheckingResult.append(misspellingResult); offset += misspellingLocation + misspellingLength; // Generally, we end up checking at the word separator, move to the adjacent word. offset = nextWordOffset(text.substring(0, lengthStrip), offset); } } #else UNUSED_PARAM(spellDocumentTag); UNUSED_PARAM(text); UNUSED_PARAM(insertionPoint); UNUSED_PARAM(checkingTypes); #endif return paragraphCheckingResult; }
unsigned CharacterData::parserAppendData(const String& string, unsigned offset, unsigned lengthLimit) { unsigned oldLength = m_data.length(); ASSERT(lengthLimit >= oldLength); unsigned characterLength = string.length() - offset; unsigned characterLengthLimit = min(characterLength, lengthLimit - oldLength); // Check that we are not on an unbreakable boundary. // Some text break iterator implementations work best if the passed buffer is as small as possible, // see <https://bugs.webkit.org/show_bug.cgi?id=29092>. // We need at least two characters look-ahead to account for UTF-16 surrogates. ASSERT(!string.is8Bit() || string.containsOnlyLatin1()); // Latin-1 doesn't have unbreakable boundaries. if (characterLengthLimit < characterLength && !string.is8Bit()) { NonSharedCharacterBreakIterator it(string.characters16() + offset, (characterLengthLimit + 2 > characterLength) ? characterLength : characterLengthLimit + 2); if (!isTextBreak(it, characterLengthLimit)) characterLengthLimit = textBreakPreceding(it, characterLengthLimit); } if (!characterLengthLimit) return 0; if (string.is8Bit()) m_data.append(string.characters8() + offset, characterLengthLimit); else m_data.append(string.characters16() + offset, characterLengthLimit); ASSERT(!renderer() || isTextNode()); if (isTextNode()) toText(this)->updateTextRenderer(oldLength, 0); document()->incDOMTreeVersion(); // We don't call dispatchModifiedEvent here because we don't want the // parser to dispatch DOM mutation events. if (parentNode()) parentNode()->childrenChanged(); return characterLengthLimit; }
PassRefPtr<Text> Text::createWithLengthLimit(Document* doc, const String& text, unsigned& charsLeft, unsigned maxChars) { if (charsLeft == text.length() && charsLeft <= maxChars) { charsLeft = 0; return new Text(doc, text); } unsigned start = text.length() - charsLeft; unsigned end = start + std::min(charsLeft, maxChars); // check we are not on an unbreakable boundary TextBreakIterator* it = characterBreakIterator(text.characters(), text.length()); if (end < text.length() && !isTextBreak(it, end)) end = textBreakPreceding(it, end); // maxChars of unbreakable characters could lead to infinite loop if (end <= start) end = text.length(); String nodeText = text.substring(start, end - start); charsLeft = text.length() - end; return new Text(doc, nodeText); }
int CoreTextController::offsetForPosition(int h, bool includePartialGlyphs) { // FIXME: For positions occurring within a ligature, we should return the closest "ligature caret" or // approximate it by dividing the width of the ligature by the number of characters it encompasses. // However, Core Text does not expose a low-level API for directly finding // out how many characters a ligature encompasses (the "attachment count"). if (h >= m_totalWidth) return m_run.ltr() ? m_end : 0; if (h < 0) return m_run.ltr() ? 0 : m_end; CGFloat x = h; size_t runCount = m_coreTextRuns.size(); size_t offsetIntoAdjustedGlyphs = 0; for (size_t r = 0; r < runCount; ++r) { const CoreTextRun& coreTextRun = m_coreTextRuns[r]; for (unsigned j = 0; j < coreTextRun.glyphCount(); ++j) { CGFloat adjustedAdvance = m_adjustedAdvances[offsetIntoAdjustedGlyphs + j].width; if (x <= adjustedAdvance) { CFIndex hitIndex = coreTextRun.indexAt(j); int stringLength = coreTextRun.stringLength(); TextBreakIterator* characterIterator = characterBreakIterator(coreTextRun.characters(), stringLength); int clusterStart; if (isTextBreak(characterIterator, hitIndex)) clusterStart = hitIndex; else { clusterStart = textBreakPreceding(characterIterator, hitIndex); if (clusterStart == TextBreakDone) clusterStart = 0; } if (!includePartialGlyphs) return coreTextRun.stringLocation() + clusterStart; int clusterEnd = textBreakFollowing(characterIterator, hitIndex); if (clusterEnd == TextBreakDone) clusterEnd = stringLength; CGFloat clusterWidth = adjustedAdvance; // FIXME: The search stops at the boundaries of coreTextRun. In theory, it should go on into neighboring CoreTextRuns // derived from the same CTLine. In practice, we do not expect there to be more than one CTRun in a CTLine, as no // reordering and on font fallback should occur within a CTLine. if (clusterEnd - clusterStart > 1) { int firstGlyphBeforeCluster = j - 1; while (firstGlyphBeforeCluster && coreTextRun.indexAt(firstGlyphBeforeCluster) >= clusterStart && coreTextRun.indexAt(firstGlyphBeforeCluster) < clusterEnd) { CGFloat width = m_adjustedAdvances[offsetIntoAdjustedGlyphs + firstGlyphBeforeCluster].width; clusterWidth += width; x += width; firstGlyphBeforeCluster--; } unsigned firstGlyphAfterCluster = j + 1; while (firstGlyphAfterCluster < coreTextRun.glyphCount() && coreTextRun.indexAt(firstGlyphAfterCluster) >= clusterStart && coreTextRun.indexAt(firstGlyphAfterCluster) < clusterEnd) { clusterWidth += m_adjustedAdvances[offsetIntoAdjustedGlyphs + firstGlyphAfterCluster].width; firstGlyphAfterCluster++; } } if (x <= clusterWidth / 2) return coreTextRun.stringLocation() + (m_run.ltr() ? clusterStart : clusterEnd); else return coreTextRun.stringLocation() + (m_run.ltr() ? clusterEnd : clusterStart); } x -= adjustedAdvance; } offsetIntoAdjustedGlyphs += coreTextRun.glyphCount(); } ASSERT_NOT_REACHED(); return 0; }