bool nsCaseTransformTextRunFactory::TransformString( const nsAString& aString, nsString& aConvertedString, bool aAllUppercase, const nsIAtom* aLanguage, nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray, nsTransformedTextRun* aTextRun, nsTArray<uint8_t>* aCanBreakBeforeArray, nsTArray<nsStyleContext*>* aStyleArray) { NS_PRECONDITION(!aTextRun || (aCanBreakBeforeArray && aStyleArray), "either none or all three optional parameters required"); uint32_t length = aString.Length(); const char16_t* str = aString.BeginReading(); bool mergeNeeded = false; bool capitalizeDutchIJ = false; bool prevIsLetter = false; bool ntPrefix = false; // true immediately after a word-initial 'n' or 't' // when doing Irish lowercasing uint32_t sigmaIndex = uint32_t(-1); nsIUGenCategory::nsUGenCategory cat; uint8_t style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : 0; const nsIAtom* lang = aLanguage; LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang); mozilla::GreekCasing::State greekState; mozilla::IrishCasing::State irishState; uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s) for (uint32_t i = 0; i < length; ++i) { uint32_t ch = str[i]; nsStyleContext* styleContext; if (aTextRun) { styleContext = aTextRun->mStyles[i]; style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->StyleText()->mTextTransform; if (lang != styleContext->StyleFont()->mLanguage) { lang = styleContext->StyleFont()->mLanguage; languageSpecificCasing = GetCasingFor(lang); greekState.Reset(); irishState.Reset(); irishMark = uint32_t(-1); } } int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; bool inhibitBreakBefore = false; // have we just deleted preceding hyphen? if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eLSCB_Turkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } cat = mozilla::unicode::GetGenCategory(ch); if (languageSpecificCasing == eLSCB_Irish && cat == nsIUGenCategory::kLetter) { // See bug 1018805 for Irish lowercasing requirements if (!prevIsLetter && (ch == 'n' || ch == 't')) { ntPrefix = true; } else { if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) { aConvertedString.Append('-'); ++extraChars; } ntPrefix = false; } } else { ntPrefix = false; } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsIUGenCategory::kLetter) { aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = aConvertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Greek) { ch = mozilla::GreekCasing::UpperCase(ch, greekState); break; } if (languageSpecificCasing == eLSCB_Irish) { bool mark; uint8_t action; ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action); if (mark) { irishMark = aConvertedString.Length(); break; } else if (action) { nsString& str = aConvertedString; // shorthand switch (action) { case 1: // lowercase a single prefix letter NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(), "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); irishMark = uint32_t(-1); break; case 2: // lowercase two prefix letters (immediately before current pos) NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1); irishMark = uint32_t(-1); break; case 3: // lowercase one prefix letter, and delete following hyphen // (which must be the immediately-preceding char) NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); str.Replace(irishMark, 2, ToLowerCase(str[irishMark])); aDeletedCharsArray[irishMark + 1] = true; // Remove the trailing entries (corresponding to the deleted hyphen) // from the auxiliary arrays. aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1); if (aTextRun) { aStyleArray->SetLength(aStyleArray->Length() - 1); aCanBreakBeforeArray->SetLength(aCanBreakBeforeArray->Length() - 1); inhibitBreakBefore = true; } mergeNeeded = true; irishMark = uint32_t(-1); break; } // ch has been set to the uppercase for current char; // No need to check for SpecialUpper here as none of the characters // that could trigger an Irish casing action have special mappings. break; } // If we didn't have any special action to perform, fall through // to check for special uppercase (ß) } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (aTextRun) { if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } } break; case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: ch = mozilla::unicode::GetFullWidth(ch); break; default: break; } if (ch == uint32_t(-1)) { aDeletedCharsArray.AppendElement(true); mergeNeeded = true; } else { aDeletedCharsArray.AppendElement(false); aCharsToMergeArray.AppendElement(false); if (aTextRun) { aStyleArray->AppendElement(styleContext); aCanBreakBeforeArray->AppendElement(inhibitBreakBefore ? false : aTextRun->CanBreakLineBefore(i)); } if (IS_IN_BMP(ch)) { aConvertedString.Append(ch); } else { aConvertedString.Append(H_SURROGATE(ch)); aConvertedString.Append(L_SURROGATE(ch)); ++i; aDeletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; aCharsToMergeArray.AppendElement(true); if (aTextRun) { aStyleArray->AppendElement(styleContext); aCanBreakBeforeArray->AppendElement(false); } } } } return mergeNeeded; }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; PRUint32 extraCharsCount = 0; PRUint32 i; for (i = 0; i < length; ++i) { PRUnichar ch = str[i]; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styles[i]); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styles[i]->GetStyleText()->mTextTransform; bool extraChar = false; switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else { ch = ToUpperCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else { ch = ToTitleCase(ch); } } break; default: break; } convertedString.Append(ch); if (extraChar) { ++extraCharsCount; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styles[i]); canBreakBeforeArray.AppendElement(false); } } PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (extraCharsCount > 0) { // Now merge multiple characters into one multi-glyph character as required MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { uint32_t length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<bool,50> deletedCharsArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<uint8_t,50> canBreakBeforeArray; bool mergeNeeded = false; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch, // treat "ij" digraph as a unit for capitalization eGreek // strip accent when uppercasing Greek vowels } languageSpecificCasing = eNone; const nsIAtom* lang = nullptr; bool capitalizeDutchIJ = false; bool prevIsLetter = false; uint32_t sigmaIndex = uint32_t(-1); nsIUGenCategory::nsUGenCategory cat; GreekCasingState greekState = kStart; uint32_t i; for (i = 0; i < length; ++i) { uint32_t ch = str[i]; nsStyleContext* styleContext = styles[i]; uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->StyleText()->mTextTransform; int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->StyleFont()->mLanguage) { lang = styleContext->StyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else if (lang == nsGkAtoms::el) { languageSpecificCasing = eGreek; greekState = kStart; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. cat = mozilla::unicode::GetGenCategory(ch); // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsIUGenCategory::kLetter) { convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = convertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eGreek) { ch = GreekUpperCase(ch, &greekState); break; } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: ch = mozilla::unicode::GetFullWidth(ch); break; default: break; } if (ch == uint32_t(-1)) { deletedCharsArray.AppendElement(true); mergeNeeded = true; } else { deletedCharsArray.AppendElement(false); charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); ++i; deletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } } uint32_t flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (mergeNeeded) { // Now merge multiple characters into one multi-glyph character as required // and deal with skipping deleted accent chars NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), "source length mismatch"); NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), "destination length mismatch"); MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), deletedCharsArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; PRUint32 extraCharsCount = 0; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch // treat "ij" digraph as a unit for capitalization } languageSpecificCasing = eNone; const nsIAtom* lang = nsnull; bool capitalizeDutchIJ = false; PRUint32 i; for (i = 0; i < length; ++i) { PRUint32 ch = str[i]; nsStyleContext* styleContext = styles[i]; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->GetStyleText()->mTextTransform; bool extraChar = false; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->GetStyleFont()->mLanguage) { lang = styleContext->GetStyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish && ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; } else { ch = ToLowerCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; } else { ch = ToUpperCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; } else if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; } else { ch = ToTitleCase(ch); } } break; default: break; } if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); i++; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } if (extraChar) { ++extraCharsCount; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (extraCharsCount > 0) { // Now merge multiple characters into one multi-glyph character as required MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }
void TestCaseConversion() { printf("==========================\n"); printf("Start case conversion test\n"); printf("==========================\n"); int i; PRUnichar buf[256]; printf("Test 1 - ToUpper(PRUnichar, PRUnichar*):\n"); for(i=0;i < T2LEN ; i++) { PRUnichar ch = ToUpperCase(t2data[i]); if(ch != t2result[i]) printf("\tFailed!! result unexpected %d\n", i); } printf("Test 2 - ToLower(PRUnichar, PRUnichar*):\n"); for(i=0;i < T3LEN; i++) { PRUnichar ch = ToLowerCase(t3data[i]); if(ch != t3result[i]) printf("\tFailed!! result unexpected %d\n", i); } printf("Test 3 - ToTitle(PRUnichar, PRUnichar*):\n"); for(i=0;i < T4LEN; i++) { PRUnichar ch = ToTitleCase(t4data[i]); if(ch != t4result[i]) printf("\tFailed!! result unexpected %d\n", i); } printf("Test 4 - ToUpper(PRUnichar*, PRUnichar*, uint32_t):\n"); ToUpperCase(t2data, buf, T2LEN); for(i = 0; i < T2LEN; i++) { if(buf[i] != t2result[i]) { printf("\tFailed!! result unexpected %d\n", i); break; } } printf("Test 5 - ToLower(PRUnichar*, PRUnichar*, uint32_t):\n"); ToLowerCase(t3data, buf, T3LEN); for(i = 0; i < T3LEN; i++) { if(buf[i] != t3result[i]) { printf("\tFailed!! result unexpected %d\n", i); break; } } printf("Test 6 - CaseInsensitiveCompare UTF-8 (1):\n"); if (CaseInsensitiveCompare((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs))) printf("\tFailed!\n"); if (!CharByCharCompareEqual((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs))) printf("\tFailed character-by-character comparison!\n"); printf("Test 7 - CaseInsensitiveCompare UTF-8 (2):\n"); if (CaseInsensitiveCompare(t7lhs, t7rhs, strlen(t7lhs), strlen(t7rhs))) printf("\tFailed!\n"); if (!CharByCharCompareEqual(t7lhs, t7rhs, sizeof(t7lhs), sizeof(t7rhs))) printf("\tFailed character-by-character comparison!\n"); printf("Test 8a - CaseInsensitiveCompare UTF-8 (3):\n"); if (CaseInsensitiveCompare(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs)) != -1) printf("\tFailed!\n"); if (CharByCharCompareEqual(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs))) printf("\tFailed character-by-character comparison!\n"); printf("Test 8b - CaseInsensitiveCompare UTF-8 (4):\n"); if (CaseInsensitiveCompare(t8rhs, t8lhs, strlen(t8rhs), strlen(t8lhs)) != 1) printf("\tFailed!\n"); // This test may seem a bit strange. But it's actually an easy bug to make // if we tried to be clever and say that two ASCII characters x and y are // case-insensitively equal if (x & ~0x20) == (y & ~0x20). printf("Test 9 - CaseInsensitiveCompare UTF-8 (5):\n"); if (CaseInsensitiveCompare(t9rhs, t9lhs, strlen(t9lhs), strlen(t9rhs)) != 1) printf("\tFailed!\n"); if (CharByCharCompareEqual(t9lhs, t9rhs, strlen(t9lhs), strlen(t9rhs))) printf("\tFailed character-by-character comparison!\n"); printf("===========================\n"); printf("Finish case conversion test\n"); printf("===========================\n"); }