static bool CharsCaseInsensitiveEqual(uint32_t aChar1, uint32_t aChar2) { return aChar1 == aChar2 || (IS_IN_BMP(aChar1) && IS_IN_BMP(aChar2) && ToLowerCase(static_cast<char16_t>(aChar1)) == ToLowerCase(static_cast<char16_t>(aChar2))); }
PRBool nsXBLPrototypeHandler::KeyEventMatched(nsIDOMKeyEvent* aKeyEvent, PRUint32 aCharCode, PRBool aIgnoreShiftKey) { if (mDetail != -1) { // Get the keycode or charcode of the key event. PRUint32 code; if (mMisc) { if (aCharCode) code = aCharCode; else aKeyEvent->GetCharCode(&code); if (IS_IN_BMP(code)) code = ToLowerCase(PRUnichar(code)); } else aKeyEvent->GetKeyCode(&code); if (code != PRUint32(mDetail)) return PR_FALSE; } return ModifiersMatchMask(aKeyEvent, aIgnoreShiftKey); }
static void SetupCapitalization(const PRUnichar* aWord, uint32_t aLength, bool* aCapitalization) { // Capitalize the first alphanumeric character after a space or start // of the word. // The only space character a word can contain is NBSP. bool capitalizeNextChar = true; for (uint32_t i = 0; i < aLength; ++i) { uint32_t ch = aWord[i]; if (capitalizeNextChar) { if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength && NS_IS_LOW_SURROGATE(aWord[i + 1])) { ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]); } if (nsContentUtils::IsAlphanumeric(ch)) { aCapitalization[i] = true; capitalizeNextChar = false; } if (!IS_IN_BMP(ch)) { ++i; } } if (ch == 0xA0 /*NBSP*/) { capitalizeNextChar = true; } } }
bool TestUnspecifiedCodepoint(uint32_t codepoint) { bool rv = true; PRUnichar unicharArray[3]; nsAutoString X, normalized; char description[9]; if (IS_IN_BMP(codepoint)) { unicharArray[0] = codepoint; unicharArray[1] = 0; X = nsDependentString(unicharArray); } else { unicharArray[0] = H_SURROGATE(codepoint); unicharArray[1] = L_SURROGATE(codepoint); unicharArray[2] = 0; X = nsDependentString(unicharArray); } /* 2. For every code point X assigned in this version of Unicode that is not specifically listed in Part 1, the following invariants must be true for all conformant implementations: X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X) */ DEBUG_TESTCASE(X); sprintf(description, "U+%04X", codepoint); NORMALIZE_AND_COMPARE(X, X, NFC, description); NORMALIZE_AND_COMPARE(X, X, NFD, description); NORMALIZE_AND_COMPARE(X, X, NFKC, description); NORMALIZE_AND_COMPARE(X, X, NFKD, description); return rv; }
static bool IsCaseChangeableChar(uint32_t aChar) { return IS_IN_BMP(aChar) && ToLowerCase(static_cast<char16_t>(aChar)) != ToUpperCase(static_cast<char16_t>(aChar)); }
void WidgetKeyboardEvent::GetAccessKeyCandidates(nsTArray<uint32_t>& aCandidates) { MOZ_ASSERT(aCandidates.IsEmpty(), "aCandidates must be empty"); // return the lower cased charCode candidates for access keys. // the priority of the charCodes are: // 0: charCode, 1: unshiftedCharCodes[0], 2: shiftedCharCodes[0] // 3: unshiftedCharCodes[1], 4: shiftedCharCodes[1],... if (charCode) { uint32_t ch = charCode; if (IS_IN_BMP(ch)) { ch = ToLowerCase(static_cast<char16_t>(ch)); } aCandidates.AppendElement(ch); } for (uint32_t i = 0; i < alternativeCharCodes.Length(); ++i) { uint32_t ch[2] = { alternativeCharCodes[i].mUnshiftedCharCode, alternativeCharCodes[i].mShiftedCharCode }; for (uint32_t j = 0; j < 2; ++j) { if (!ch[j]) { continue; } if (IS_IN_BMP(ch[j])) { ch[j] = ToLowerCase(static_cast<char16_t>(ch[j])); } // Don't append the charCode that was already appended. if (aCandidates.IndexOf(ch[j]) == aCandidates.NoIndex) { aCandidates.AppendElement(ch[j]); } } } // Special case for "Space" key. With some keyboard layouts, "Space" with // or without Shift key causes non-ASCII space. For such keyboard layouts, // we should guarantee that the key press works as an ASCII white space key // press. However, if the space key is assigned to a function key, it // shouldn't work as a space key. if (mKeyNameIndex == KEY_NAME_INDEX_USE_STRING && mCodeNameIndex == CODE_NAME_INDEX_Space && charCode != static_cast<uint32_t>(' ')) { aCandidates.AppendElement(static_cast<uint32_t>(' ')); } return; }
void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) { NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); if (IS_IN_BMP(aSource)) { aDest.Append(char16_t(aSource)); } else { aDest.Append(H_SURROGATE(aSource)); aDest.Append(L_SURROGATE(aSource)); } }
static void ucs4toUtf16(const PRUint32 *in, nsAString& out) { while (*in) { if (!IS_IN_BMP(*in)) { out.Append((PRUnichar) H_SURROGATE(*in)); out.Append((PRUnichar) L_SURROGATE(*in)); } else out.Append((PRUnichar) *in); in++; } }
void ToUpperCase(const char16_t *aIn, char16_t *aOut, uint32_t aLen) { for (uint32_t i = 0; i < aLen; i++) { uint32_t ch = aIn[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < aLen - 1 && NS_IS_LOW_SURROGATE(aIn[i + 1])) { ch = mozilla::unicode::GetUppercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); continue; } aOut[i] = ToUpperCase(ch); } }
void ToLowerCase(const PRUnichar *aIn, PRUnichar *aOut, PRUint32 aLen) { for (PRUint32 i = 0; i < aLen; i++) { PRUint32 ch = aIn[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < aLen - 1 && NS_IS_LOW_SURROGATE(aIn[i + 1])) { ch = mozilla::unicode::GetLowercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); continue; } aOut[i] = ToLowerCase(ch); } }
static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr) { PRInt32 i; for (i = 0; i < wb->cur; i++) { if (!IS_IN_BMP(wb->ucs[i])) { aToStr.Append((PRUnichar)H_SURROGATE(wb->ucs[i])); aToStr.Append((PRUnichar)L_SURROGATE(wb->ucs[i])); } else { aToStr.Append((PRUnichar)(wb->ucs[i])); } } workbuf_shift(wb, wb->cur); return (NS_OK); }
bool nsCaseTransformTextRunFactory::TransformString( const nsAString& aString, nsString& aConvertedString, bool aAllUppercase, const nsIAtom* aLanguage, nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray, nsTransformedTextRun* aTextRun, nsTArray<uint8_t>* aCanBreakBeforeArray, nsTArray<nsStyleContext*>* aStyleArray) { NS_PRECONDITION(!aTextRun || (aCanBreakBeforeArray && aStyleArray), "either none or all three optional parameters required"); uint32_t length = aString.Length(); const char16_t* str = aString.BeginReading(); bool mergeNeeded = false; bool capitalizeDutchIJ = false; bool prevIsLetter = false; bool ntPrefix = false; // true immediately after a word-initial 'n' or 't' // when doing Irish lowercasing uint32_t sigmaIndex = uint32_t(-1); nsIUGenCategory::nsUGenCategory cat; uint8_t style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : 0; const nsIAtom* lang = aLanguage; LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang); mozilla::GreekCasing::State greekState; mozilla::IrishCasing::State irishState; uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s) for (uint32_t i = 0; i < length; ++i) { uint32_t ch = str[i]; nsStyleContext* styleContext; if (aTextRun) { styleContext = aTextRun->mStyles[i]; style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->StyleText()->mTextTransform; if (lang != styleContext->StyleFont()->mLanguage) { lang = styleContext->StyleFont()->mLanguage; languageSpecificCasing = GetCasingFor(lang); greekState.Reset(); irishState.Reset(); irishMark = uint32_t(-1); } } int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; bool inhibitBreakBefore = false; // have we just deleted preceding hyphen? if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eLSCB_Turkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } cat = mozilla::unicode::GetGenCategory(ch); if (languageSpecificCasing == eLSCB_Irish && cat == nsIUGenCategory::kLetter) { // See bug 1018805 for Irish lowercasing requirements if (!prevIsLetter && (ch == 'n' || ch == 't')) { ntPrefix = true; } else { if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) { aConvertedString.Append('-'); ++extraChars; } ntPrefix = false; } } else { ntPrefix = false; } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsIUGenCategory::kLetter) { aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = aConvertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Greek) { ch = mozilla::GreekCasing::UpperCase(ch, greekState); break; } if (languageSpecificCasing == eLSCB_Irish) { bool mark; uint8_t action; ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action); if (mark) { irishMark = aConvertedString.Length(); break; } else if (action) { nsString& str = aConvertedString; // shorthand switch (action) { case 1: // lowercase a single prefix letter NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(), "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); irishMark = uint32_t(-1); break; case 2: // lowercase two prefix letters (immediately before current pos) NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1); irishMark = uint32_t(-1); break; case 3: // lowercase one prefix letter, and delete following hyphen // (which must be the immediately-preceding char) NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); str.Replace(irishMark, 2, ToLowerCase(str[irishMark])); aDeletedCharsArray[irishMark + 1] = true; // Remove the trailing entries (corresponding to the deleted hyphen) // from the auxiliary arrays. aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1); if (aTextRun) { aStyleArray->SetLength(aStyleArray->Length() - 1); aCanBreakBeforeArray->SetLength(aCanBreakBeforeArray->Length() - 1); inhibitBreakBefore = true; } mergeNeeded = true; irishMark = uint32_t(-1); break; } // ch has been set to the uppercase for current char; // No need to check for SpecialUpper here as none of the characters // that could trigger an Irish casing action have special mappings. break; } // If we didn't have any special action to perform, fall through // to check for special uppercase (ß) } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (aTextRun) { if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } } break; case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: ch = mozilla::unicode::GetFullWidth(ch); break; default: break; } if (ch == uint32_t(-1)) { aDeletedCharsArray.AppendElement(true); mergeNeeded = true; } else { aDeletedCharsArray.AppendElement(false); aCharsToMergeArray.AppendElement(false); if (aTextRun) { aStyleArray->AppendElement(styleContext); aCanBreakBeforeArray->AppendElement(inhibitBreakBefore ? false : aTextRun->CanBreakLineBefore(i)); } if (IS_IN_BMP(ch)) { aConvertedString.Append(ch); } else { aConvertedString.Append(H_SURROGATE(ch)); aConvertedString.Append(L_SURROGATE(ch)); ++i; aDeletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; aCharsToMergeArray.AppendElement(true); if (aTextRun) { aStyleArray->AppendElement(styleContext); aCanBreakBeforeArray->AppendElement(false); } } } } return mergeNeeded; }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; PRUint32 extraCharsCount = 0; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch // treat "ij" digraph as a unit for capitalization } languageSpecificCasing = eNone; const nsIAtom* lang = nsnull; bool capitalizeDutchIJ = false; PRUint32 i; for (i = 0; i < length; ++i) { PRUint32 ch = str[i]; nsStyleContext* styleContext = styles[i]; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->GetStyleText()->mTextTransform; bool extraChar = false; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->GetStyleFont()->mLanguage) { lang = styleContext->GetStyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish && ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; } else { ch = ToLowerCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; } else { ch = ToUpperCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; } else if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; } else { ch = ToTitleCase(ch); } } break; default: break; } if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); i++; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } if (extraChar) { ++extraCharsCount; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (extraCharsCount > 0) { // Now merge multiple characters into one multi-glyph character as required MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { uint32_t length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<bool,50> deletedCharsArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<uint8_t,50> canBreakBeforeArray; bool mergeNeeded = false; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch, // treat "ij" digraph as a unit for capitalization eGreek // strip accent when uppercasing Greek vowels } languageSpecificCasing = eNone; const nsIAtom* lang = nullptr; bool capitalizeDutchIJ = false; bool prevIsLetter = false; uint32_t sigmaIndex = uint32_t(-1); nsIUGenCategory::nsUGenCategory cat; GreekCasingState greekState = kStart; uint32_t i; for (i = 0; i < length; ++i) { uint32_t ch = str[i]; nsStyleContext* styleContext = styles[i]; uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->StyleText()->mTextTransform; int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->StyleFont()->mLanguage) { lang = styleContext->StyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else if (lang == nsGkAtoms::el) { languageSpecificCasing = eGreek; greekState = kStart; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. cat = mozilla::unicode::GetGenCategory(ch); // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsIUGenCategory::kLetter) { convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = convertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eGreek) { ch = GreekUpperCase(ch, &greekState); break; } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: ch = mozilla::unicode::GetFullWidth(ch); break; default: break; } if (ch == uint32_t(-1)) { deletedCharsArray.AppendElement(true); mergeNeeded = true; } else { deletedCharsArray.AppendElement(false); charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); ++i; deletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } } uint32_t flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (mergeNeeded) { // Now merge multiple characters into one multi-glyph character as required // and deal with skipping deleted accent chars NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), "source length mismatch"); NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), "destination length mismatch"); MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), deletedCharsArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }