Exemple #1
0
bool HarfBuzzShaper::collectHarfBuzzRuns()
{
    const UChar* normalizedBufferEnd = m_normalizedBuffer.get() + m_normalizedBufferLength;
    SurrogatePairAwareTextIterator iterator(m_normalizedBuffer.get(), 0, m_normalizedBufferLength, m_normalizedBufferLength);
    UChar32 character;
    unsigned clusterLength = 0;
    unsigned startIndexOfCurrentRun = 0;
    if (!iterator.consume(character, clusterLength))
        return false;

    const SimpleFontData* nextFontData = m_font->glyphDataForCharacter(character, false).fontData;
    UErrorCode errorCode = U_ZERO_ERROR;
    UScriptCode nextScript = uscript_getScript(character, &errorCode);
    if (U_FAILURE(errorCode))
        return false;

    do {
        const UChar* currentCharacterPosition = iterator.characters();
        const SimpleFontData* currentFontData = nextFontData;
        UScriptCode currentScript = nextScript;

        for (iterator.advance(clusterLength); iterator.consume(character, clusterLength); iterator.advance(clusterLength)) {
            if (Font::treatAsZeroWidthSpace(character))
                continue;
            if (U_GET_GC_MASK(character) & U_GC_M_MASK) {
                int markLength = clusterLength;
                const UChar* markCharactersEnd = iterator.characters() + clusterLength;
                while (markCharactersEnd < normalizedBufferEnd) {
                    UChar32 nextCharacter;
                    int nextCharacterLength = 0;
                    U16_NEXT(markCharactersEnd, nextCharacterLength, normalizedBufferEnd - markCharactersEnd, nextCharacter);
                    if (!(U_GET_GC_MASK(nextCharacter) & U_GC_M_MASK))
                        break;
                    markLength += nextCharacterLength;
                    markCharactersEnd += nextCharacterLength;
                }
                nextFontData = fontDataForCombiningCharacterSequence(m_font, currentCharacterPosition, markCharactersEnd - currentCharacterPosition);
                if (nextFontData)
                    clusterLength = markLength;
                else
                    nextFontData = m_font->glyphDataForCharacter(character, false).fontData;
            } else
                nextFontData = m_font->glyphDataForCharacter(character, false).fontData;

            nextScript = uscript_getScript(character, &errorCode);
            if (U_FAILURE(errorCode))
                return false;
            if ((nextFontData != currentFontData) || ((currentScript != nextScript) && (nextScript != USCRIPT_INHERITED)))
                break;
            if (nextScript == USCRIPT_INHERITED)
                nextScript = currentScript;
        }
        unsigned numCharactersOfCurrentRun = iterator.currentCharacter() - startIndexOfCurrentRun;
        m_harfbuzzRuns.append(HarfBuzzRun::create(currentFontData, startIndexOfCurrentRun, numCharactersOfCurrentRun, m_run.direction()));
        currentFontData = nextFontData;
        startIndexOfCurrentRun = iterator.currentCharacter();
    } while (iterator.consume(character, clusterLength));

    return !m_harfbuzzRuns.isEmpty();
}
        // We actually don't care about the legacy behavior on Linux since no one
        // has a dependency on it. So this actually is different between
        // Windows and Linux
        CharacterClassificationType GetLegacyCharacterClassificationType(char16 character)
        {
#ifdef HAS_REAL_ICU
            auto charTypeMask = U_GET_GC_MASK(character);

            if ((charTypeMask & U_GC_L_MASK) != 0)
            {
                return CharacterClassificationType::Letter;
            }

            if ((charTypeMask & (U_GC_ND_MASK | U_GC_P_MASK)) != 0)
            {
                return CharacterClassificationType::DigitOrPunct;
            }

            // As per http://archives.miloush.net/michkap/archive/2007/06/11/3230072.html
            //  * C1_SPACE corresponds to the Unicode Zs category.
            //  * C1_BLANK corresponds to a hardcoded list thats ill-defined.
            // We'll skip that compatibility here and just check for Zs.
            // We explicitly check for 0xFEFF to satisfy the unit test in es5/Lex_u3.js
            if ((charTypeMask & U_GC_ZS_MASK) != 0 ||
                character == 0xFEFF ||
                character == 0xFFFE)
            {
                return CharacterClassificationType::Whitespace;
            }
#endif
            return CharacterClassificationType::Invalid;
        }
Exemple #3
0
static XMLCat charCat(UChar character)
{
    if (character == '_')
        return NameStart;

    if (character == '.' || character == '-')
        return NameCont;
    unsigned characterTypeMask = U_GET_GC_MASK(character);
    if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK))
        return NameStart;
    if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK))
        return NameCont;
    return NotPartOfName;
}
void UTF16TextIterator::consumeMultipleUChar()
{
    const UChar* markCharactersEnd = m_characters + m_currentGlyphLength;
    int markLength = m_currentGlyphLength;
    while (markCharactersEnd < m_charactersEnd) {
        UChar32 nextCharacter;
        int nextCharacterLength = 0;
        U16_NEXT(markCharactersEnd, nextCharacterLength,
            m_charactersEnd - markCharactersEnd, nextCharacter);
        if (!(U_GET_GC_MASK(nextCharacter) & U_GC_M_MASK))
            break;
        markLength += nextCharacterLength;
        markCharactersEnd += nextCharacterLength;
    }
    m_currentGlyphLength = markLength;
}
bool HTMLLexer::IsWhitespace( UniChar inChar )
{
	// I'm just going to go with whatever JavaScript did, because the HTML spec doesn't
	// have a strict definition of whitespace
	switch (inChar) {
		case CHAR_SPACE:			// Space
		case CHAR_CONTROL_0009:		// Tab
		case CHAR_CONTROL_000B:		// Vertical Tab
		case CHAR_CONTROL_000C:		// Form Feed
		case CHAR_NO_BREAK_SPACE:	// Non-breaking space
			return true;
	}

	// We also want to check the Zs "whitespace" category
	if (U_GET_GC_MASK( inChar ) & (U_GC_ZS_MASK))	// Letter number
		return true;

	return false;
}
Exemple #6
0
UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
    if (U_FAILURE(status)) {
        return NULL;
    }
    LocalPointer<UVector> dest(new UVector(status), status);
    if (U_FAILURE(status)) {
        return NULL;
    }
    dest->setDeleter(uprv_deleteUObject);
    // Fetch the script-first-primary contractions which are defined in the root collator.
    // They all start with U+FDD1.
    UnicodeSet set;
    collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
    if (U_FAILURE(status)) {
        return NULL;
    }
    if (set.isEmpty()) {
        status = U_UNSUPPORTED_ERROR;
        return NULL;
    }
    UnicodeSetIterator iter(set);
    while (iter.next()) {
        const UnicodeString &boundary = iter.getString();
        uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
        if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
            // Ignore boundaries for the special reordering groups.
            // Take only those for "real scripts" (where the sample character is a Letter,
            // and the one for unassigned implicit weights (Cn).
            continue;
        }
        UnicodeString *s = new UnicodeString(boundary);
        if (s == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
        dest->addElement(s, status);
    }
    return dest.orphan();
}
void UniscribeController::advance(unsigned offset, GlyphBuffer* glyphBuffer)
{
    // FIXME: We really want to be using a newer version of Uniscribe that supports the new OpenType
    // functions.  Those functions would allow us to turn off kerning and ligatures.  Without being able
    // to do that, we will have buggy line breaking and metrics when simple and complex text are close
    // together (the complex code path will narrow the text because of kerning and ligatures and then
    // when bidi processing splits into multiple runs, the simple portions will get wider and cause us to
    // spill off the edge of a line).
    if (static_cast<int>(offset) > m_end)
        offset = m_end;

    int length = offset - m_currentCharacter;
    if (length <= 0)
        return;

    // Itemize the string.
    const UChar* cp = m_run.data(m_currentCharacter);
    unsigned baseCharacter = m_currentCharacter;

    // We break up itemization of the string by fontData and (if needed) the use of small caps.

    // FIXME: It's inconsistent that we use logical order when itemizing, since this
    // does not match normal RTL.

    // FIXME: This function should decode surrogate pairs. Currently it makes little difference that
    // it does not because the font cache on Windows does not support non-BMP characters.
    Vector<UChar, 256> smallCapsBuffer;
    if (m_font.isSmallCaps())
        smallCapsBuffer.resize(length);

    unsigned indexOfFontTransition = m_run.rtl() ? length - 1 : 0;
    const UChar* curr = m_run.rtl() ? cp + length  - 1 : cp;
    const UChar* end = m_run.rtl() ? cp - 1 : cp + length;

    const SimpleFontData* fontData;
    const SimpleFontData* nextFontData = m_font.glyphDataForCharacter(*curr, false).fontData;

    UChar newC = 0;

    bool isSmallCaps;
    bool nextIsSmallCaps = m_font.isSmallCaps() && !(U_GET_GC_MASK(*curr) & U_GC_M_MASK) && (newC = u_toupper(*curr)) != *curr;

    if (nextIsSmallCaps)
        smallCapsBuffer[curr - cp] = newC;

    while (true) {
        curr = m_run.rtl() ? curr - 1 : curr + 1;
        if (curr == end)
            break;

        fontData = nextFontData;
        isSmallCaps = nextIsSmallCaps;
        int index = curr - cp;
        UChar c = *curr;

        bool forceSmallCaps = isSmallCaps && (U_GET_GC_MASK(c) & U_GC_M_MASK);
        nextFontData = m_font.glyphDataForCharacter(*curr, false, forceSmallCaps ? SmallCapsVariant : AutoVariant).fontData;
        if (m_font.isSmallCaps()) {
            nextIsSmallCaps = forceSmallCaps || (newC = u_toupper(c)) != c;
            if (nextIsSmallCaps)
                smallCapsBuffer[index] = forceSmallCaps ? c : newC;
        }

        if (m_fallbackFonts && nextFontData != fontData && fontData != m_font.primaryFont())
            m_fallbackFonts->add(fontData);

        if (nextFontData != fontData || nextIsSmallCaps != isSmallCaps) {
            int itemStart = m_run.rtl() ? index + 1 : indexOfFontTransition;
            int itemLength = m_run.rtl() ? indexOfFontTransition - index : index - indexOfFontTransition;
            m_currentCharacter = baseCharacter + itemStart;
            itemizeShapeAndPlace((isSmallCaps ? smallCapsBuffer.data() : cp) + itemStart, itemLength, fontData, glyphBuffer);
            indexOfFontTransition = index;
        }
    }
    
    int itemLength = m_run.rtl() ? indexOfFontTransition + 1 : length - indexOfFontTransition;
    if (itemLength) {
        if (m_fallbackFonts && nextFontData != m_font.primaryFont())
            m_fallbackFonts->add(nextFontData);

        int itemStart = m_run.rtl() ? 0 : indexOfFontTransition;
        m_currentCharacter = baseCharacter + itemStart;
        itemizeShapeAndPlace((nextIsSmallCaps ? smallCapsBuffer.data() : cp) + itemStart, itemLength, nextFontData, glyphBuffer);
    }

    m_currentCharacter = baseCharacter + length;
}
static bool isNonLatin1Separator(UChar32 character)
{
    ASSERT_ARG(character, character >= 256);

    return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK);
}
Exemple #9
0
static int generateComponents(TextRunComponents* components, const Font &font, const TextRun &run)
{
    int letterSpacing = font.letterSpacing();
    int wordSpacing = font.wordSpacing();
    int padding = run.expansion();
    int numSpaces = 0;
    if (padding) {
        for (int i = 0; i < run.length(); i++)
            if (Font::treatAsSpace(run[i]))
                ++numSpaces;
    }

    int offset = 0;
    if (letterSpacing) {
        // need to draw every letter on it's own
        int start = 0;
        if (Font::treatAsSpace(run[0])) {
            int add = 0;
            if (numSpaces) {
                add = padding/numSpaces;
                padding -= add;
                --numSpaces;
            }
            components->append(TextRunComponent(1, font, offset));
            offset += add + letterSpacing + components->last().m_width;
            start = 1;
        }
        for (int i = 1; i < run.length(); ++i) {
            UChar ch = run[i];
            if (U16_IS_LEAD(ch) && U16_IS_TRAIL(run[i-1]))
                ch = U16_GET_SUPPLEMENTARY(ch, run[i-1]);
            if (U16_IS_TRAIL(ch) || U_GET_GC_MASK(ch) & U_GC_MN_MASK)
                continue;
            if (Font::treatAsSpace(run[i])) {
                int add = 0;
                if (i - start > 0) {
                    components->append(TextRunComponent(run.characters16() + start, i - start,
                                                        run, font, offset));
                    offset += components->last().m_width + letterSpacing;
                }
                if (numSpaces) {
                    add = padding/numSpaces;
                    padding -= add;
                    --numSpaces;
                }
                components->append(TextRunComponent(1, font, offset));
                offset += wordSpacing + add + components->last().m_width + letterSpacing;
                start = i + 1;
                continue;
            }
            if (i - start > 0) {
                components->append(TextRunComponent(run.characters16() + start, i - start,
                                                    run,
                                                    font, offset));
                offset += components->last().m_width + letterSpacing;
            }
            start = i;
        }
        if (run.length() - start > 0) {
            components->append(TextRunComponent(run.characters16() + start, run.length() - start,
                                                run,
                                                font, offset));
            offset += components->last().m_width;
        }
        offset += letterSpacing;
    } else {
        int start = 0;
        for (int i = 0; i < run.length(); ++i) {
            if (Font::treatAsSpace(run[i])) {
                if (i - start > 0) {
                    components->append(TextRunComponent(run.characters16() + start, i - start,
                                                        run,
                                                        font, offset));
                    offset += components->last().m_width;
                }
                int add = 0;
                if (numSpaces) {
                    add = padding/numSpaces;
                    padding -= add;
                    --numSpaces;
                }
                components->append(TextRunComponent(1, font, offset));
                offset += add + components->last().m_width;
                if (i)
                    offset += wordSpacing;
                start = i + 1;
            }
        }
        if (run.length() - start > 0) {
            components->append(TextRunComponent(run.characters16() + start, run.length() - start,
                                                run,
                                                font, offset));
            offset += components->last().m_width;
        }
    }
    return offset;
}
Exemple #10
0
static UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
    int32_t value = *(int32_t*)context;
    return (U_GET_GC_MASK((UChar32) ch) & value) != 0;
}
void CoreTextController::collectCoreTextRuns()
{
    if (!m_end)
        return;

    // We break up glyph run generation for the string by FontData and (if needed) the use of small caps.
    const UChar* cp = m_run.characters();
    bool hasTrailingSoftHyphen = m_run[m_end - 1] == softHyphen;

    if (m_font.isSmallCaps() || hasTrailingSoftHyphen)
        m_smallCapsBuffer.resize(m_end);

    unsigned indexOfFontTransition = m_run.rtl() ? m_end - 1 : 0;
    const UChar* curr = m_run.rtl() ? cp + m_end  - 1 : cp;
    const UChar* end = m_run.rtl() ? cp - 1 : cp + m_end;

    // FIXME: Using HYPHEN-MINUS rather than HYPHEN because Times has a HYPHEN-MINUS glyph that looks like its
    // SOFT-HYPHEN glyph, and has no HYPHEN glyph.
    static const UChar hyphen = '-';

    if (hasTrailingSoftHyphen && m_run.rtl()) {
        collectCoreTextRunsForCharacters(&hyphen, 1, m_end - 1, m_font.glyphDataForCharacter(hyphen, false).fontData);
        indexOfFontTransition--;
        curr--;
    }

    GlyphData glyphData;
    GlyphData nextGlyphData;

    bool isSurrogate = U16_IS_SURROGATE(*curr);
    if (isSurrogate) {
        if (m_run.ltr()) {
            if (!U16_IS_SURROGATE_LEAD(curr[0]) || curr + 1 == end || !U16_IS_TRAIL(curr[1]))
                return;
            nextGlyphData = m_font.glyphDataForCharacter(U16_GET_SUPPLEMENTARY(curr[0], curr[1]), false);
        } else {
            if (!U16_IS_TRAIL(curr[0]) || curr -1 == end || !U16_IS_SURROGATE_LEAD(curr[-1]))
                return;
            nextGlyphData = m_font.glyphDataForCharacter(U16_GET_SUPPLEMENTARY(curr[-1], curr[0]), false);
        }
    } else
        nextGlyphData = m_font.glyphDataForCharacter(*curr, false);

    UChar newC = 0;

    bool isSmallCaps;
    bool nextIsSmallCaps = !isSurrogate && m_font.isSmallCaps() && !(U_GET_GC_MASK(*curr) & U_GC_M_MASK) && (newC = u_toupper(*curr)) != *curr;

    if (nextIsSmallCaps)
        m_smallCapsBuffer[curr - cp] = newC;

    while (true) {
        curr = m_run.rtl() ? curr - (isSurrogate ? 2 : 1) : curr + (isSurrogate ? 2 : 1);
        if (curr == end)
            break;

        glyphData = nextGlyphData;
        isSmallCaps = nextIsSmallCaps;
        int index = curr - cp;
        isSurrogate = U16_IS_SURROGATE(*curr);
        UChar c = *curr;
        bool forceSmallCaps = !isSurrogate && isSmallCaps && (U_GET_GC_MASK(c) & U_GC_M_MASK);
        if (isSurrogate) {
            if (m_run.ltr()) {
                if (!U16_IS_SURROGATE_LEAD(curr[0]) || curr + 1 == end || !U16_IS_TRAIL(curr[1]))
                    return;
                nextGlyphData = m_font.glyphDataForCharacter(U16_GET_SUPPLEMENTARY(curr[0], curr[1]), false);
            } else {
                if (!U16_IS_TRAIL(curr[0]) || curr -1 == end || !U16_IS_SURROGATE_LEAD(curr[-1]))
                    return;
                nextGlyphData = m_font.glyphDataForCharacter(U16_GET_SUPPLEMENTARY(curr[-1], curr[0]), false);
            }
        } else
            nextGlyphData = m_font.glyphDataForCharacter(*curr, false, forceSmallCaps);

        if (!isSurrogate && m_font.isSmallCaps()) {
            nextIsSmallCaps = forceSmallCaps || (newC = u_toupper(c)) != c;
            if (nextIsSmallCaps)
                m_smallCapsBuffer[index] = forceSmallCaps ? c : newC;
        }

        if (nextGlyphData.fontData != glyphData.fontData || nextIsSmallCaps != isSmallCaps || !nextGlyphData.glyph != !glyphData.glyph) {
            int itemStart = m_run.rtl() ? index + 1 : indexOfFontTransition;
            int itemLength = m_run.rtl() ? indexOfFontTransition - index : index - indexOfFontTransition;
            collectCoreTextRunsForCharacters((isSmallCaps ? m_smallCapsBuffer.data() : cp) + itemStart, itemLength, itemStart, glyphData.glyph ? glyphData.fontData : 0);
            indexOfFontTransition = index;
        }
    }

    int itemLength = m_run.rtl() ? indexOfFontTransition + 1 : m_end - indexOfFontTransition - (hasTrailingSoftHyphen ? 1 : 0);
    if (itemLength) {
        int itemStart = m_run.rtl() ? 0 : indexOfFontTransition;
        collectCoreTextRunsForCharacters((nextIsSmallCaps ? m_smallCapsBuffer.data() : cp) + itemStart, itemLength, itemStart, nextGlyphData.glyph ? nextGlyphData.fontData : 0);
    }

    if (hasTrailingSoftHyphen && m_run.ltr())
        collectCoreTextRunsForCharacters(&hyphen, 1, m_end - 1, m_font.glyphDataForCharacter(hyphen, false).fontData);
}
Exemple #12
0
static bool isNonLatin1Separator(UChar32 character) {
  DCHECK_GE(character, 256);
  return U_GET_GC_MASK(character) &
         (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK);
}