uint32_t HashUTF8AsUTF16(const char* aUTF8, uint32_t aLength, bool* aErr) { uint32_t hash = 0; const char* s = aUTF8; const char* end = aUTF8 + aLength; *aErr = false; while (s < end) { uint32_t ucs4 = UTF8CharEnumerator::NextChar(&s, end, aErr); if (*aErr) { return 0; } if (ucs4 < PLANE1_BASE) { hash = AddToHash(hash, ucs4); } else { hash = AddToHash(hash, H_SURROGATE(ucs4), L_SURROGATE(ucs4)); } } return hash; }
bool TestUnspecifiedCodepoint(uint32_t codepoint) { bool rv = true; PRUnichar unicharArray[3]; nsAutoString X, normalized; char description[9]; if (IS_IN_BMP(codepoint)) { unicharArray[0] = codepoint; unicharArray[1] = 0; X = nsDependentString(unicharArray); } else { unicharArray[0] = H_SURROGATE(codepoint); unicharArray[1] = L_SURROGATE(codepoint); unicharArray[2] = 0; X = nsDependentString(unicharArray); } /* 2. For every code point X assigned in this version of Unicode that is not specifically listed in Part 1, the following invariants must be true for all conformant implementations: X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X) */ DEBUG_TESTCASE(X); sprintf(description, "U+%04X", codepoint); NORMALIZE_AND_COMPARE(X, X, NFC, description); NORMALIZE_AND_COMPARE(X, X, NFD, description); NORMALIZE_AND_COMPARE(X, X, NFKC, description); NORMALIZE_AND_COMPARE(X, X, NFKD, description); return rv; }
PRUint32 nsCRT::HashCodeAsUTF16(const char* start, PRUint32 length, PRBool* err) { PRUint32 h = 0; const char* s = start; const char* end = start + length; *err = PR_FALSE; while ( s < end ) { PRUint32 ucs4 = UTF8CharEnumerator::NextChar(&s, end, err); if (*err) { return 0; } if (ucs4 < PLANE1_BASE) { ADD_TO_HASHVAL(h, ucs4); } else { ADD_TO_HASHVAL(h, H_SURROGATE(ucs4)); ADD_TO_HASHVAL(h, L_SURROGATE(ucs4)); } } return h; }
void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) { NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); if (IS_IN_BMP(aSource)) { aDest.Append(char16_t(aSource)); } else { aDest.Append(H_SURROGATE(aSource)); aDest.Append(L_SURROGATE(aSource)); } }
static void ucs4toUtf16(const PRUint32 *in, nsAString& out) { while (*in) { if (!IS_IN_BMP(*in)) { out.Append((PRUnichar) H_SURROGATE(*in)); out.Append((PRUnichar) L_SURROGATE(*in)); } else out.Append((PRUnichar) *in); in++; } }
void ToUpperCase(const char16_t *aIn, char16_t *aOut, uint32_t aLen) { for (uint32_t i = 0; i < aLen; i++) { uint32_t ch = aIn[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < aLen - 1 && NS_IS_LOW_SURROGATE(aIn[i + 1])) { ch = mozilla::unicode::GetUppercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); continue; } aOut[i] = ToUpperCase(ch); } }
void ToLowerCase(const PRUnichar *aIn, PRUnichar *aOut, PRUint32 aLen) { for (PRUint32 i = 0; i < aLen; i++) { PRUint32 ch = aIn[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < aLen - 1 && NS_IS_LOW_SURROGATE(aIn[i + 1])) { ch = mozilla::unicode::GetLowercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); continue; } aOut[i] = ToLowerCase(ch); } }
static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr) { PRInt32 i; for (i = 0; i < wb->cur; i++) { if (!IS_IN_BMP(wb->ucs[i])) { aToStr.Append((PRUnichar)H_SURROGATE(wb->ucs[i])); aToStr.Append((PRUnichar)L_SURROGATE(wb->ucs[i])); } else { aToStr.Append((PRUnichar)(wb->ucs[i])); } } workbuf_shift(wb, wb->cur); return (NS_OK); }
bool nsCaseTransformTextRunFactory::TransformString( const nsAString& aString, nsString& aConvertedString, bool aAllUppercase, const nsIAtom* aLanguage, nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray, nsTransformedTextRun* aTextRun, nsTArray<uint8_t>* aCanBreakBeforeArray, nsTArray<nsStyleContext*>* aStyleArray) { NS_PRECONDITION(!aTextRun || (aCanBreakBeforeArray && aStyleArray), "either none or all three optional parameters required"); uint32_t length = aString.Length(); const char16_t* str = aString.BeginReading(); bool mergeNeeded = false; bool capitalizeDutchIJ = false; bool prevIsLetter = false; bool ntPrefix = false; // true immediately after a word-initial 'n' or 't' // when doing Irish lowercasing uint32_t sigmaIndex = uint32_t(-1); nsIUGenCategory::nsUGenCategory cat; uint8_t style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : 0; const nsIAtom* lang = aLanguage; LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang); mozilla::GreekCasing::State greekState; mozilla::IrishCasing::State irishState; uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s) for (uint32_t i = 0; i < length; ++i) { uint32_t ch = str[i]; nsStyleContext* styleContext; if (aTextRun) { styleContext = aTextRun->mStyles[i]; style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->StyleText()->mTextTransform; if (lang != styleContext->StyleFont()->mLanguage) { lang = styleContext->StyleFont()->mLanguage; languageSpecificCasing = GetCasingFor(lang); greekState.Reset(); irishState.Reset(); irishMark = uint32_t(-1); } } int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; bool inhibitBreakBefore = false; // have we just deleted preceding hyphen? if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eLSCB_Turkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } cat = mozilla::unicode::GetGenCategory(ch); if (languageSpecificCasing == eLSCB_Irish && cat == nsIUGenCategory::kLetter) { // See bug 1018805 for Irish lowercasing requirements if (!prevIsLetter && (ch == 'n' || ch == 't')) { ntPrefix = true; } else { if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) { aConvertedString.Append('-'); ++extraChars; } ntPrefix = false; } } else { ntPrefix = false; } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsIUGenCategory::kLetter) { aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = aConvertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Greek) { ch = mozilla::GreekCasing::UpperCase(ch, greekState); break; } if (languageSpecificCasing == eLSCB_Irish) { bool mark; uint8_t action; ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action); if (mark) { irishMark = aConvertedString.Length(); break; } else if (action) { nsString& str = aConvertedString; // shorthand switch (action) { case 1: // lowercase a single prefix letter NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(), "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); irishMark = uint32_t(-1); break; case 2: // lowercase two prefix letters (immediately before current pos) NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1); irishMark = uint32_t(-1); break; case 3: // lowercase one prefix letter, and delete following hyphen // (which must be the immediately-preceding char) NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); str.Replace(irishMark, 2, ToLowerCase(str[irishMark])); aDeletedCharsArray[irishMark + 1] = true; // Remove the trailing entries (corresponding to the deleted hyphen) // from the auxiliary arrays. aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1); if (aTextRun) { aStyleArray->SetLength(aStyleArray->Length() - 1); aCanBreakBeforeArray->SetLength(aCanBreakBeforeArray->Length() - 1); inhibitBreakBefore = true; } mergeNeeded = true; irishMark = uint32_t(-1); break; } // ch has been set to the uppercase for current char; // No need to check for SpecialUpper here as none of the characters // that could trigger an Irish casing action have special mappings. break; } // If we didn't have any special action to perform, fall through // to check for special uppercase (ß) } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (aTextRun) { if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } } break; case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: ch = mozilla::unicode::GetFullWidth(ch); break; default: break; } if (ch == uint32_t(-1)) { aDeletedCharsArray.AppendElement(true); mergeNeeded = true; } else { aDeletedCharsArray.AppendElement(false); aCharsToMergeArray.AppendElement(false); if (aTextRun) { aStyleArray->AppendElement(styleContext); aCanBreakBeforeArray->AppendElement(inhibitBreakBefore ? false : aTextRun->CanBreakLineBefore(i)); } if (IS_IN_BMP(ch)) { aConvertedString.Append(ch); } else { aConvertedString.Append(H_SURROGATE(ch)); aConvertedString.Append(L_SURROGATE(ch)); ++i; aDeletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; aCharsToMergeArray.AppendElement(true); if (aTextRun) { aStyleArray->AppendElement(styleContext); aCanBreakBeforeArray->AppendElement(false); } } } } return mergeNeeded; }
bool gfxUniscribeShaper::ShapeText(gfxContext *aContext, const PRUnichar *aText, uint32_t aOffset, uint32_t aLength, int32_t aScript, gfxShapedText *aShapedText) { DCFromContext aDC(aContext); bool result = true; HRESULT rv; Uniscribe us(aText, aShapedText, aOffset, aLength); /* itemize the string */ int numItems = us.Itemize(); uint32_t length = aLength; SaveDC(aDC); uint32_t ivs = 0; for (int i = 0; i < numItems; ++i) { int iCharPos = us.ScriptItem(i)->iCharPos; int iCharPosNext = us.ScriptItem(i+1)->iCharPos; if (ivs) { iCharPos += 2; if (iCharPos >= iCharPosNext) { ivs = 0; continue; } } if (i+1 < numItems && iCharPosNext <= length - 2 && aText[iCharPosNext] == H_SURROGATE(kUnicodeVS17) && uint32_t(aText[iCharPosNext + 1]) - L_SURROGATE(kUnicodeVS17) <= L_SURROGATE(kUnicodeVS256) - L_SURROGATE(kUnicodeVS17)) { ivs = SURROGATE_TO_UCS4(aText[iCharPosNext], aText[iCharPosNext + 1]); } else { ivs = 0; } UniscribeItem item(aContext, aDC, this, aText + iCharPos, iCharPosNext - iCharPos, us.ScriptItem(i), ivs); if (!item.AllocateBuffers()) { result = false; break; } if (!item.ShapingEnabled()) { item.EnableShaping(); } rv = item.Shape(); if (FAILED(rv)) { // we know we have the glyphs to display this font already // so Uniscribe just doesn't know how to shape the script. // Render the glyphs without shaping. item.DisableShaping(); rv = item.Shape(); } #ifdef DEBUG if (FAILED(rv)) { NS_WARNING("Uniscribe failed to shape with font"); } #endif if (SUCCEEDED(rv)) { rv = item.Place(); #ifdef DEBUG if (FAILED(rv)) { // crap fonts may fail when placing (e.g. funky free fonts) NS_WARNING("Uniscribe failed to place with font"); } #endif } if (FAILED(rv)) { // Uniscribe doesn't like this font for some reason. // Returning FALSE will make the gfxGDIFont retry with the // "dumb" GDI shaper, unless useUniscribeOnly was set. result = false; break; } item.SaveGlyphs(aShapedText, aOffset); } RestoreDC(aDC, -1); return result; }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; PRUint32 extraCharsCount = 0; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch // treat "ij" digraph as a unit for capitalization } languageSpecificCasing = eNone; const nsIAtom* lang = nsnull; bool capitalizeDutchIJ = false; PRUint32 i; for (i = 0; i < length; ++i) { PRUint32 ch = str[i]; nsStyleContext* styleContext = styles[i]; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->GetStyleText()->mTextTransform; bool extraChar = false; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->GetStyleFont()->mLanguage) { lang = styleContext->GetStyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish && ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; } else { ch = ToLowerCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; } else { ch = ToUpperCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; } else if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; } else { ch = ToTitleCase(ch); } } break; default: break; } if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); i++; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } if (extraChar) { ++extraCharsCount; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (extraCharsCount > 0) { // Now merge multiple characters into one multi-glyph character as required MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { uint32_t length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<bool,50> deletedCharsArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<uint8_t,50> canBreakBeforeArray; bool mergeNeeded = false; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch, // treat "ij" digraph as a unit for capitalization eGreek // strip accent when uppercasing Greek vowels } languageSpecificCasing = eNone; const nsIAtom* lang = nullptr; bool capitalizeDutchIJ = false; bool prevIsLetter = false; uint32_t sigmaIndex = uint32_t(-1); nsIUGenCategory::nsUGenCategory cat; GreekCasingState greekState = kStart; uint32_t i; for (i = 0; i < length; ++i) { uint32_t ch = str[i]; nsStyleContext* styleContext = styles[i]; uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->StyleText()->mTextTransform; int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->StyleFont()->mLanguage) { lang = styleContext->StyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else if (lang == nsGkAtoms::el) { languageSpecificCasing = eGreek; greekState = kStart; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. cat = mozilla::unicode::GetGenCategory(ch); // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsIUGenCategory::kLetter) { convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = convertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eGreek) { ch = GreekUpperCase(ch, &greekState); break; } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: ch = mozilla::unicode::GetFullWidth(ch); break; default: break; } if (ch == uint32_t(-1)) { deletedCharsArray.AppendElement(true); mergeNeeded = true; } else { deletedCharsArray.AppendElement(false); charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); ++i; deletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } } uint32_t flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (mergeNeeded) { // Now merge multiple characters into one multi-glyph character as required // and deal with skipping deleted accent chars NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), "source length mismatch"); NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), "destination length mismatch"); MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), deletedCharsArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }