Example #1
0
unsigned CharacterData::parserAppendData(const UChar* data, unsigned dataLength, unsigned lengthLimit)
{
    unsigned oldLength = m_data->length();

    unsigned end = min(dataLength, lengthLimit - oldLength);

    // Check that we are not on an unbreakable boundary.
    // Some text break iterator implementations work best if the passed buffer is as small as possible, 
    // see <https://bugs.webkit.org/show_bug.cgi?id=29092>. 
    // We need at least two characters look-ahead to account for UTF-16 surrogates.
    if (end < dataLength) {
        TextBreakIterator* it = characterBreakIterator(data, (end + 2 > dataLength) ? dataLength : end + 2);
        if (!isTextBreak(it, end))
            end = textBreakPreceding(it, end);
    }
    
    if (!end)
        return 0;

    String newStr = m_data;
    newStr.append(data, end);
    m_data = newStr.impl();

    updateRenderer(oldLength, 0);
    // We don't call dispatchModifiedEvent here because we don't want the
    // parser to dispatch DOM mutation events.
    if (parentNode())
        parentNode()->childrenChanged();
    
    return end;
}
static unsigned nextWordOffset(StringView text, unsigned currentOffset)
{
    // FIXME: avoid creating textIterator object here, it could be passed as a parameter.
    //        isTextBreak() leaves the iterator pointing to the first boundary position at
    //        or after "offset" (ubrk_isBoundary side effect).
    //        For many word separators, the method doesn't properly determine the boundaries
    //        without resetting the iterator.
    TextBreakIterator* textIterator = wordBreakIterator(text);
    if (!textIterator)
        return currentOffset;

    unsigned wordOffset = currentOffset;
    while (wordOffset < text.length() && isTextBreak(textIterator, wordOffset))
        ++wordOffset;

    // Do not treat the word's boundary as a separator.
    if (!currentOffset && wordOffset == 1)
        return currentOffset;

    // Omit multiple separators.
    if ((wordOffset - currentOffset) > 1)
        --wordOffset;

    return wordOffset;
}
static inline int textBreakAtOrPreceding(TextBreakIterator* it, int offset)
{
    if (isTextBreak(it, offset))
        return offset;

    int result = textBreakPreceding(it, offset);
    return result == TextBreakDone ? 0 : result;
}
unsigned CharacterData::parserAppendData(const String& string, unsigned offset, unsigned lengthLimit)
{
    unsigned oldLength = m_data.length();

    ASSERT(lengthLimit >= oldLength);

    unsigned characterLength = string.length() - offset;
    unsigned characterLengthLimit = std::min(characterLength, lengthLimit - oldLength);

    // Check that we are not on an unbreakable boundary.
    // Some text break iterator implementations work best if the passed buffer is as small as possible,
    // see <https://bugs.webkit.org/show_bug.cgi?id=29092>.
    // We need at least two characters look-ahead to account for UTF-16 surrogates.
    if (characterLengthLimit < characterLength) {
        NonSharedCharacterBreakIterator it(StringView(string).substring(offset, (characterLengthLimit + 2 > characterLength) ? characterLength : characterLengthLimit + 2));
        if (!isTextBreak(it, characterLengthLimit))
            characterLengthLimit = textBreakPreceding(it, characterLengthLimit);
    }

    if (!characterLengthLimit)
        return 0;

    if (string.is8Bit())
        m_data.append(string.characters8() + offset, characterLengthLimit);
    else
        m_data.append(string.characters16() + offset, characterLengthLimit);

    ASSERT(!renderer() || is<Text>(*this));
    if (is<Text>(*this) && parentNode())
        downcast<Text>(*this).updateRendererAfterContentChange(oldLength, 0);

    document().incDOMTreeVersion();
    // We don't call dispatchModifiedEvent here because we don't want the
    // parser to dispatch DOM mutation events.
    if (parentNode()) {
        ContainerNode::ChildChange change = {
            ContainerNode::TextChanged,
            ElementTraversal::previousSibling(*this),
            ElementTraversal::nextSibling(*this),
            ContainerNode::ChildChangeSourceParser
        };
        parentNode()->childrenChanged(change);
    }

    return characterLengthLimit;
}
Example #5
0
Vector<TextCheckingResult> TextChecker::checkTextOfParagraph(int64_t spellDocumentTag, StringView text, int32_t insertionPoint, uint64_t checkingTypes, bool)
{
    UNUSED_PARAM(insertionPoint);

    Vector<TextCheckingResult> paragraphCheckingResult;
#if ENABLE(SPELLCHECK)
    if (checkingTypes & TextCheckingTypeSpelling) {
        TextBreakIterator* textIterator = wordBreakIterator(text);
        if (!textIterator)
            return paragraphCheckingResult;

        // Omit the word separators at the beginning/end of the text to don't unnecessarily
        // involve the client to check spelling for them.
        unsigned offset = nextWordOffset(text, 0);
        unsigned lengthStrip = text.length();
        while (lengthStrip > 0 && isTextBreak(textIterator, lengthStrip - 1))
            --lengthStrip;

        while (offset < lengthStrip) {
            int32_t misspellingLocation = -1;
            int32_t misspellingLength = 0;
            checkSpellingOfString(spellDocumentTag, text.substring(offset, lengthStrip - offset), misspellingLocation, misspellingLength);
            if (!misspellingLength)
                break;

            TextCheckingResult misspellingResult;
            misspellingResult.type = TextCheckingTypeSpelling;
            misspellingResult.location = offset + misspellingLocation;
            misspellingResult.length = misspellingLength;
            paragraphCheckingResult.append(misspellingResult);
            offset += misspellingLocation + misspellingLength;
            // Generally, we end up checking at the word separator, move to the adjacent word.
            offset = nextWordOffset(text.substring(0, lengthStrip), offset);
        }
    }
#else
    UNUSED_PARAM(spellDocumentTag);
    UNUSED_PARAM(text);
    UNUSED_PARAM(insertionPoint);
    UNUSED_PARAM(checkingTypes);
#endif
    return paragraphCheckingResult;
}
Example #6
0
unsigned CharacterData::parserAppendData(const String& string, unsigned offset, unsigned lengthLimit)
{
    unsigned oldLength = m_data.length();

    ASSERT(lengthLimit >= oldLength);

    unsigned characterLength = string.length() - offset;
    unsigned characterLengthLimit = min(characterLength, lengthLimit - oldLength);

    // Check that we are not on an unbreakable boundary.
    // Some text break iterator implementations work best if the passed buffer is as small as possible,
    // see <https://bugs.webkit.org/show_bug.cgi?id=29092>.
    // We need at least two characters look-ahead to account for UTF-16 surrogates.
    ASSERT(!string.is8Bit() || string.containsOnlyLatin1()); // Latin-1 doesn't have unbreakable boundaries.
    if (characterLengthLimit < characterLength && !string.is8Bit()) {
        NonSharedCharacterBreakIterator it(string.characters16() + offset, (characterLengthLimit + 2 > characterLength) ? characterLength : characterLengthLimit + 2);
        if (!isTextBreak(it, characterLengthLimit))
            characterLengthLimit = textBreakPreceding(it, characterLengthLimit);
    }

    if (!characterLengthLimit)
        return 0;

    if (string.is8Bit())
        m_data.append(string.characters8() + offset, characterLengthLimit);
    else
        m_data.append(string.characters16() + offset, characterLengthLimit);

    ASSERT(!renderer() || isTextNode());
    if (isTextNode())
        toText(this)->updateTextRenderer(oldLength, 0);

    document()->incDOMTreeVersion();
    // We don't call dispatchModifiedEvent here because we don't want the
    // parser to dispatch DOM mutation events.
    if (parentNode())
        parentNode()->childrenChanged();

    return characterLengthLimit;
}
Example #7
0
PassRefPtr<Text> Text::createWithLengthLimit(Document* doc, const String& text, unsigned& charsLeft, unsigned maxChars)
{
    if (charsLeft == text.length() && charsLeft <= maxChars) {
        charsLeft = 0;
        return new Text(doc, text);
    }
    
    unsigned start = text.length() - charsLeft;
    unsigned end = start + std::min(charsLeft, maxChars);
    
    // check we are not on an unbreakable boundary
    TextBreakIterator* it = characterBreakIterator(text.characters(), text.length());
    if (end < text.length() && !isTextBreak(it, end))
        end = textBreakPreceding(it, end);
        
    // maxChars of unbreakable characters could lead to infinite loop
    if (end <= start)
        end = text.length();
    
    String nodeText = text.substring(start, end - start);
    charsLeft = text.length() - end;
        
    return new Text(doc, nodeText);
}
int CoreTextController::offsetForPosition(int h, bool includePartialGlyphs)
{
    // FIXME: For positions occurring within a ligature, we should return the closest "ligature caret" or
    // approximate it by dividing the width of the ligature by the number of characters it encompasses.
    // However, Core Text does not expose a low-level API for directly finding
    // out how many characters a ligature encompasses (the "attachment count").
    if (h >= m_totalWidth)
        return m_run.ltr() ? m_end : 0;
    if (h < 0)
        return m_run.ltr() ? 0 : m_end;

    CGFloat x = h;

    size_t runCount = m_coreTextRuns.size();
    size_t offsetIntoAdjustedGlyphs = 0;

    for (size_t r = 0; r < runCount; ++r) {
        const CoreTextRun& coreTextRun = m_coreTextRuns[r];
        for (unsigned j = 0; j < coreTextRun.glyphCount(); ++j) {
            CGFloat adjustedAdvance = m_adjustedAdvances[offsetIntoAdjustedGlyphs + j].width;
            if (x <= adjustedAdvance) {
                CFIndex hitIndex = coreTextRun.indexAt(j);
                int stringLength = coreTextRun.stringLength();
                TextBreakIterator* characterIterator = characterBreakIterator(coreTextRun.characters(), stringLength);
                int clusterStart;
                if (isTextBreak(characterIterator, hitIndex))
                    clusterStart = hitIndex;
                else {
                    clusterStart = textBreakPreceding(characterIterator, hitIndex);
                    if (clusterStart == TextBreakDone)
                        clusterStart = 0;
                }

                if (!includePartialGlyphs)
                    return coreTextRun.stringLocation() + clusterStart;

                int clusterEnd = textBreakFollowing(characterIterator, hitIndex);
                if (clusterEnd == TextBreakDone)
                    clusterEnd = stringLength;

                CGFloat clusterWidth = adjustedAdvance;
                // FIXME: The search stops at the boundaries of coreTextRun. In theory, it should go on into neighboring CoreTextRuns
                // derived from the same CTLine. In practice, we do not expect there to be more than one CTRun in a CTLine, as no
                // reordering and on font fallback should occur within a CTLine.
                if (clusterEnd - clusterStart > 1) {
                    int firstGlyphBeforeCluster = j - 1;
                    while (firstGlyphBeforeCluster && coreTextRun.indexAt(firstGlyphBeforeCluster) >= clusterStart && coreTextRun.indexAt(firstGlyphBeforeCluster) < clusterEnd) {
                        CGFloat width = m_adjustedAdvances[offsetIntoAdjustedGlyphs + firstGlyphBeforeCluster].width;
                        clusterWidth += width;
                        x += width;
                        firstGlyphBeforeCluster--;
                    }
                    unsigned firstGlyphAfterCluster = j + 1;
                    while (firstGlyphAfterCluster < coreTextRun.glyphCount() && coreTextRun.indexAt(firstGlyphAfterCluster) >= clusterStart && coreTextRun.indexAt(firstGlyphAfterCluster) < clusterEnd) {
                        clusterWidth += m_adjustedAdvances[offsetIntoAdjustedGlyphs + firstGlyphAfterCluster].width;
                        firstGlyphAfterCluster++;
                    }
                }
                if (x <= clusterWidth / 2)
                    return coreTextRun.stringLocation() + (m_run.ltr() ? clusterStart : clusterEnd);
                else
                    return coreTextRun.stringLocation() + (m_run.ltr() ? clusterEnd : clusterStart);
            }
            x -= adjustedAdvance;
        }
        offsetIntoAdjustedGlyphs += coreTextRun.glyphCount();
    }

    ASSERT_NOT_REACHED();
    return 0;
}