bool HarfBuzzShaper::collectHarfBuzzRuns() { const UChar* normalizedBufferEnd = m_normalizedBuffer.get() + m_normalizedBufferLength; SurrogatePairAwareTextIterator iterator(m_normalizedBuffer.get(), 0, m_normalizedBufferLength, m_normalizedBufferLength); UChar32 character; unsigned clusterLength = 0; unsigned startIndexOfCurrentRun = 0; if (!iterator.consume(character, clusterLength)) return false; const SimpleFontData* nextFontData = m_font->glyphDataForCharacter(character, false).fontData; UErrorCode errorCode = U_ZERO_ERROR; UScriptCode nextScript = uscript_getScript(character, &errorCode); if (U_FAILURE(errorCode)) return false; do { const UChar* currentCharacterPosition = iterator.characters(); const SimpleFontData* currentFontData = nextFontData; UScriptCode currentScript = nextScript; for (iterator.advance(clusterLength); iterator.consume(character, clusterLength); iterator.advance(clusterLength)) { if (Font::treatAsZeroWidthSpace(character)) continue; if (U_GET_GC_MASK(character) & U_GC_M_MASK) { int markLength = clusterLength; const UChar* markCharactersEnd = iterator.characters() + clusterLength; while (markCharactersEnd < normalizedBufferEnd) { UChar32 nextCharacter; int nextCharacterLength = 0; U16_NEXT(markCharactersEnd, nextCharacterLength, normalizedBufferEnd - markCharactersEnd, nextCharacter); if (!(U_GET_GC_MASK(nextCharacter) & U_GC_M_MASK)) break; markLength += nextCharacterLength; markCharactersEnd += nextCharacterLength; } nextFontData = fontDataForCombiningCharacterSequence(m_font, currentCharacterPosition, markCharactersEnd - currentCharacterPosition); if (nextFontData) clusterLength = markLength; else nextFontData = m_font->glyphDataForCharacter(character, false).fontData; } else nextFontData = m_font->glyphDataForCharacter(character, false).fontData; nextScript = uscript_getScript(character, &errorCode); if (U_FAILURE(errorCode)) return false; if ((nextFontData != currentFontData) || ((currentScript != nextScript) && (nextScript != USCRIPT_INHERITED))) break; if (nextScript == USCRIPT_INHERITED) nextScript = currentScript; } unsigned numCharactersOfCurrentRun = iterator.currentCharacter() - startIndexOfCurrentRun; m_harfbuzzRuns.append(HarfBuzzRun::create(currentFontData, startIndexOfCurrentRun, numCharactersOfCurrentRun, m_run.direction())); currentFontData = nextFontData; startIndexOfCurrentRun = iterator.currentCharacter(); } while (iterator.consume(character, clusterLength)); return !m_harfbuzzRuns.isEmpty(); }
// We actually don't care about the legacy behavior on Linux since no one // has a dependency on it. So this actually is different between // Windows and Linux CharacterClassificationType GetLegacyCharacterClassificationType(char16 character) { #ifdef HAS_REAL_ICU auto charTypeMask = U_GET_GC_MASK(character); if ((charTypeMask & U_GC_L_MASK) != 0) { return CharacterClassificationType::Letter; } if ((charTypeMask & (U_GC_ND_MASK | U_GC_P_MASK)) != 0) { return CharacterClassificationType::DigitOrPunct; } // As per http://archives.miloush.net/michkap/archive/2007/06/11/3230072.html // * C1_SPACE corresponds to the Unicode Zs category. // * C1_BLANK corresponds to a hardcoded list thats ill-defined. // We'll skip that compatibility here and just check for Zs. // We explicitly check for 0xFEFF to satisfy the unit test in es5/Lex_u3.js if ((charTypeMask & U_GC_ZS_MASK) != 0 || character == 0xFEFF || character == 0xFFFE) { return CharacterClassificationType::Whitespace; } #endif return CharacterClassificationType::Invalid; }
static XMLCat charCat(UChar character) { if (character == '_') return NameStart; if (character == '.' || character == '-') return NameCont; unsigned characterTypeMask = U_GET_GC_MASK(character); if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK)) return NameStart; if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK)) return NameCont; return NotPartOfName; }
void UTF16TextIterator::consumeMultipleUChar() { const UChar* markCharactersEnd = m_characters + m_currentGlyphLength; int markLength = m_currentGlyphLength; while (markCharactersEnd < m_charactersEnd) { UChar32 nextCharacter; int nextCharacterLength = 0; U16_NEXT(markCharactersEnd, nextCharacterLength, m_charactersEnd - markCharactersEnd, nextCharacter); if (!(U_GET_GC_MASK(nextCharacter) & U_GC_M_MASK)) break; markLength += nextCharacterLength; markCharactersEnd += nextCharacterLength; } m_currentGlyphLength = markLength; }
bool HTMLLexer::IsWhitespace( UniChar inChar ) { // I'm just going to go with whatever JavaScript did, because the HTML spec doesn't // have a strict definition of whitespace switch (inChar) { case CHAR_SPACE: // Space case CHAR_CONTROL_0009: // Tab case CHAR_CONTROL_000B: // Vertical Tab case CHAR_CONTROL_000C: // Form Feed case CHAR_NO_BREAK_SPACE: // Non-breaking space return true; } // We also want to check the Zs "whitespace" category if (U_GET_GC_MASK( inChar ) & (U_GC_ZS_MASK)) // Letter number return true; return false; }
UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { if (U_FAILURE(status)) { return NULL; } LocalPointer<UVector> dest(new UVector(status), status); if (U_FAILURE(status)) { return NULL; } dest->setDeleter(uprv_deleteUObject); // Fetch the script-first-primary contractions which are defined in the root collator. // They all start with U+FDD1. UnicodeSet set; collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status); if (U_FAILURE(status)) { return NULL; } if (set.isEmpty()) { status = U_UNSUPPORTED_ERROR; return NULL; } UnicodeSetIterator iter(set); while (iter.next()) { const UnicodeString &boundary = iter.getString(); uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1)); if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) { // Ignore boundaries for the special reordering groups. // Take only those for "real scripts" (where the sample character is a Letter, // and the one for unassigned implicit weights (Cn). continue; } UnicodeString *s = new UnicodeString(boundary); if (s == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } dest->addElement(s, status); } return dest.orphan(); }
void UniscribeController::advance(unsigned offset, GlyphBuffer* glyphBuffer) { // FIXME: We really want to be using a newer version of Uniscribe that supports the new OpenType // functions. Those functions would allow us to turn off kerning and ligatures. Without being able // to do that, we will have buggy line breaking and metrics when simple and complex text are close // together (the complex code path will narrow the text because of kerning and ligatures and then // when bidi processing splits into multiple runs, the simple portions will get wider and cause us to // spill off the edge of a line). if (static_cast<int>(offset) > m_end) offset = m_end; int length = offset - m_currentCharacter; if (length <= 0) return; // Itemize the string. const UChar* cp = m_run.data(m_currentCharacter); unsigned baseCharacter = m_currentCharacter; // We break up itemization of the string by fontData and (if needed) the use of small caps. // FIXME: It's inconsistent that we use logical order when itemizing, since this // does not match normal RTL. // FIXME: This function should decode surrogate pairs. Currently it makes little difference that // it does not because the font cache on Windows does not support non-BMP characters. Vector<UChar, 256> smallCapsBuffer; if (m_font.isSmallCaps()) smallCapsBuffer.resize(length); unsigned indexOfFontTransition = m_run.rtl() ? length - 1 : 0; const UChar* curr = m_run.rtl() ? cp + length - 1 : cp; const UChar* end = m_run.rtl() ? cp - 1 : cp + length; const SimpleFontData* fontData; const SimpleFontData* nextFontData = m_font.glyphDataForCharacter(*curr, false).fontData; UChar newC = 0; bool isSmallCaps; bool nextIsSmallCaps = m_font.isSmallCaps() && !(U_GET_GC_MASK(*curr) & U_GC_M_MASK) && (newC = u_toupper(*curr)) != *curr; if (nextIsSmallCaps) smallCapsBuffer[curr - cp] = newC; while (true) { curr = m_run.rtl() ? curr - 1 : curr + 1; if (curr == end) break; fontData = nextFontData; isSmallCaps = nextIsSmallCaps; int index = curr - cp; UChar c = *curr; bool forceSmallCaps = isSmallCaps && (U_GET_GC_MASK(c) & U_GC_M_MASK); nextFontData = m_font.glyphDataForCharacter(*curr, false, forceSmallCaps ? SmallCapsVariant : AutoVariant).fontData; if (m_font.isSmallCaps()) { nextIsSmallCaps = forceSmallCaps || (newC = u_toupper(c)) != c; if (nextIsSmallCaps) smallCapsBuffer[index] = forceSmallCaps ? c : newC; } if (m_fallbackFonts && nextFontData != fontData && fontData != m_font.primaryFont()) m_fallbackFonts->add(fontData); if (nextFontData != fontData || nextIsSmallCaps != isSmallCaps) { int itemStart = m_run.rtl() ? index + 1 : indexOfFontTransition; int itemLength = m_run.rtl() ? indexOfFontTransition - index : index - indexOfFontTransition; m_currentCharacter = baseCharacter + itemStart; itemizeShapeAndPlace((isSmallCaps ? smallCapsBuffer.data() : cp) + itemStart, itemLength, fontData, glyphBuffer); indexOfFontTransition = index; } } int itemLength = m_run.rtl() ? indexOfFontTransition + 1 : length - indexOfFontTransition; if (itemLength) { if (m_fallbackFonts && nextFontData != m_font.primaryFont()) m_fallbackFonts->add(nextFontData); int itemStart = m_run.rtl() ? 0 : indexOfFontTransition; m_currentCharacter = baseCharacter + itemStart; itemizeShapeAndPlace((nextIsSmallCaps ? smallCapsBuffer.data() : cp) + itemStart, itemLength, nextFontData, glyphBuffer); } m_currentCharacter = baseCharacter + length; }
static bool isNonLatin1Separator(UChar32 character) { ASSERT_ARG(character, character >= 256); return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK); }
static int generateComponents(TextRunComponents* components, const Font &font, const TextRun &run) { int letterSpacing = font.letterSpacing(); int wordSpacing = font.wordSpacing(); int padding = run.expansion(); int numSpaces = 0; if (padding) { for (int i = 0; i < run.length(); i++) if (Font::treatAsSpace(run[i])) ++numSpaces; } int offset = 0; if (letterSpacing) { // need to draw every letter on it's own int start = 0; if (Font::treatAsSpace(run[0])) { int add = 0; if (numSpaces) { add = padding/numSpaces; padding -= add; --numSpaces; } components->append(TextRunComponent(1, font, offset)); offset += add + letterSpacing + components->last().m_width; start = 1; } for (int i = 1; i < run.length(); ++i) { UChar ch = run[i]; if (U16_IS_LEAD(ch) && U16_IS_TRAIL(run[i-1])) ch = U16_GET_SUPPLEMENTARY(ch, run[i-1]); if (U16_IS_TRAIL(ch) || U_GET_GC_MASK(ch) & U_GC_MN_MASK) continue; if (Font::treatAsSpace(run[i])) { int add = 0; if (i - start > 0) { components->append(TextRunComponent(run.characters16() + start, i - start, run, font, offset)); offset += components->last().m_width + letterSpacing; } if (numSpaces) { add = padding/numSpaces; padding -= add; --numSpaces; } components->append(TextRunComponent(1, font, offset)); offset += wordSpacing + add + components->last().m_width + letterSpacing; start = i + 1; continue; } if (i - start > 0) { components->append(TextRunComponent(run.characters16() + start, i - start, run, font, offset)); offset += components->last().m_width + letterSpacing; } start = i; } if (run.length() - start > 0) { components->append(TextRunComponent(run.characters16() + start, run.length() - start, run, font, offset)); offset += components->last().m_width; } offset += letterSpacing; } else { int start = 0; for (int i = 0; i < run.length(); ++i) { if (Font::treatAsSpace(run[i])) { if (i - start > 0) { components->append(TextRunComponent(run.characters16() + start, i - start, run, font, offset)); offset += components->last().m_width; } int add = 0; if (numSpaces) { add = padding/numSpaces; padding -= add; --numSpaces; } components->append(TextRunComponent(1, font, offset)); offset += add + components->last().m_width; if (i) offset += wordSpacing; start = i + 1; } } if (run.length() - start > 0) { components->append(TextRunComponent(run.characters16() + start, run.length() - start, run, font, offset)); offset += components->last().m_width; } } return offset; }
static UBool generalCategoryMaskFilter(UChar32 ch, void* context) { int32_t value = *(int32_t*)context; return (U_GET_GC_MASK((UChar32) ch) & value) != 0; }
void CoreTextController::collectCoreTextRuns() { if (!m_end) return; // We break up glyph run generation for the string by FontData and (if needed) the use of small caps. const UChar* cp = m_run.characters(); bool hasTrailingSoftHyphen = m_run[m_end - 1] == softHyphen; if (m_font.isSmallCaps() || hasTrailingSoftHyphen) m_smallCapsBuffer.resize(m_end); unsigned indexOfFontTransition = m_run.rtl() ? m_end - 1 : 0; const UChar* curr = m_run.rtl() ? cp + m_end - 1 : cp; const UChar* end = m_run.rtl() ? cp - 1 : cp + m_end; // FIXME: Using HYPHEN-MINUS rather than HYPHEN because Times has a HYPHEN-MINUS glyph that looks like its // SOFT-HYPHEN glyph, and has no HYPHEN glyph. static const UChar hyphen = '-'; if (hasTrailingSoftHyphen && m_run.rtl()) { collectCoreTextRunsForCharacters(&hyphen, 1, m_end - 1, m_font.glyphDataForCharacter(hyphen, false).fontData); indexOfFontTransition--; curr--; } GlyphData glyphData; GlyphData nextGlyphData; bool isSurrogate = U16_IS_SURROGATE(*curr); if (isSurrogate) { if (m_run.ltr()) { if (!U16_IS_SURROGATE_LEAD(curr[0]) || curr + 1 == end || !U16_IS_TRAIL(curr[1])) return; nextGlyphData = m_font.glyphDataForCharacter(U16_GET_SUPPLEMENTARY(curr[0], curr[1]), false); } else { if (!U16_IS_TRAIL(curr[0]) || curr -1 == end || !U16_IS_SURROGATE_LEAD(curr[-1])) return; nextGlyphData = m_font.glyphDataForCharacter(U16_GET_SUPPLEMENTARY(curr[-1], curr[0]), false); } } else nextGlyphData = m_font.glyphDataForCharacter(*curr, false); UChar newC = 0; bool isSmallCaps; bool nextIsSmallCaps = !isSurrogate && m_font.isSmallCaps() && !(U_GET_GC_MASK(*curr) & U_GC_M_MASK) && (newC = u_toupper(*curr)) != *curr; if (nextIsSmallCaps) m_smallCapsBuffer[curr - cp] = newC; while (true) { curr = m_run.rtl() ? curr - (isSurrogate ? 2 : 1) : curr + (isSurrogate ? 2 : 1); if (curr == end) break; glyphData = nextGlyphData; isSmallCaps = nextIsSmallCaps; int index = curr - cp; isSurrogate = U16_IS_SURROGATE(*curr); UChar c = *curr; bool forceSmallCaps = !isSurrogate && isSmallCaps && (U_GET_GC_MASK(c) & U_GC_M_MASK); if (isSurrogate) { if (m_run.ltr()) { if (!U16_IS_SURROGATE_LEAD(curr[0]) || curr + 1 == end || !U16_IS_TRAIL(curr[1])) return; nextGlyphData = m_font.glyphDataForCharacter(U16_GET_SUPPLEMENTARY(curr[0], curr[1]), false); } else { if (!U16_IS_TRAIL(curr[0]) || curr -1 == end || !U16_IS_SURROGATE_LEAD(curr[-1])) return; nextGlyphData = m_font.glyphDataForCharacter(U16_GET_SUPPLEMENTARY(curr[-1], curr[0]), false); } } else nextGlyphData = m_font.glyphDataForCharacter(*curr, false, forceSmallCaps); if (!isSurrogate && m_font.isSmallCaps()) { nextIsSmallCaps = forceSmallCaps || (newC = u_toupper(c)) != c; if (nextIsSmallCaps) m_smallCapsBuffer[index] = forceSmallCaps ? c : newC; } if (nextGlyphData.fontData != glyphData.fontData || nextIsSmallCaps != isSmallCaps || !nextGlyphData.glyph != !glyphData.glyph) { int itemStart = m_run.rtl() ? index + 1 : indexOfFontTransition; int itemLength = m_run.rtl() ? indexOfFontTransition - index : index - indexOfFontTransition; collectCoreTextRunsForCharacters((isSmallCaps ? m_smallCapsBuffer.data() : cp) + itemStart, itemLength, itemStart, glyphData.glyph ? glyphData.fontData : 0); indexOfFontTransition = index; } } int itemLength = m_run.rtl() ? indexOfFontTransition + 1 : m_end - indexOfFontTransition - (hasTrailingSoftHyphen ? 1 : 0); if (itemLength) { int itemStart = m_run.rtl() ? 0 : indexOfFontTransition; collectCoreTextRunsForCharacters((nextIsSmallCaps ? m_smallCapsBuffer.data() : cp) + itemStart, itemLength, itemStart, nextGlyphData.glyph ? nextGlyphData.fontData : 0); } if (hasTrailingSoftHyphen && m_run.ltr()) collectCoreTextRunsForCharacters(&hyphen, 1, m_end - 1, m_font.glyphDataForCharacter(hyphen, false).fontData); }
static bool isNonLatin1Separator(UChar32 character) { DCHECK_GE(character, 256); return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK); }