static void utf16ToUcs4(const nsAString& in, PRUint32 *out, PRUint32 outBufLen, PRUint32 *outLen) { PRUint32 i = 0; nsAString::const_iterator start, end; in.BeginReading(start); in.EndReading(end); while (start != end) { PRUnichar curChar; curChar= *start++; if (start != end && NS_IS_HIGH_SURROGATE(curChar) && NS_IS_LOW_SURROGATE(*start)) { out[i] = SURROGATE_TO_UCS4(curChar, *start); ++start; } else out[i] = curChar; i++; if (i >= outBufLen) { NS_ERROR("input too big, the result truncated"); out[outBufLen-1] = (PRUint32)'\0'; *outLen = outBufLen-1; return; } } out[i] = (PRUint32)'\0'; *outLen = i; }
static void SetupCapitalization(const PRUnichar* aWord, uint32_t aLength, bool* aCapitalization) { // Capitalize the first alphanumeric character after a space or start // of the word. // The only space character a word can contain is NBSP. bool capitalizeNextChar = true; for (uint32_t i = 0; i < aLength; ++i) { uint32_t ch = aWord[i]; if (capitalizeNextChar) { if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength && NS_IS_LOW_SURROGATE(aWord[i + 1])) { ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]); } if (nsContentUtils::IsAlphanumeric(ch)) { aCapitalization[i] = true; capitalizeNextChar = false; } if (!IS_IN_BMP(ch)) { ++i; } } if (ch == 0xA0 /*NBSP*/) { capitalizeNextChar = true; } } }
/** * Various methods for returning the directionality of a string using the * first-strong algorithm defined in http://unicode.org/reports/tr9/#P2 * * @param[out] aFirstStrong the offset to the first character in the string with * strong directionality, or UINT32_MAX if there is none (return value is eDir_NotSet). * @return the directionality of the string */ static Directionality GetDirectionFromText(const PRUnichar* aText, const uint32_t aLength, uint32_t* aFirstStrong = nullptr) { const PRUnichar* start = aText; const PRUnichar* end = aText + aLength; while (start < end) { uint32_t current = start - aText; uint32_t ch = *start++; if (NS_IS_HIGH_SURROGATE(ch) && start < end && NS_IS_LOW_SURROGATE(*start)) { ch = SURROGATE_TO_UCS4(ch, *start++); } Directionality dir = GetDirectionFromChar(ch); if (dir != eDir_NotSet) { if (aFirstStrong) { *aFirstStrong = current; } return dir; } } if (aFirstStrong) { *aFirstStrong = UINT32_MAX; } return eDir_NotSet; }
NS_IMETHODIMP nsEntityConverter::ConvertToEntities(const PRUnichar *inString, PRUint32 entityVersion, PRUnichar **_retval) { NS_ASSERTION(inString, "null ptr- inString"); NS_ASSERTION(_retval, "null ptr- _retval"); if((nsnull == inString) || (nsnull == _retval)) return NS_ERROR_NULL_POINTER; *_retval = NULL; const PRUnichar *entity = NULL; nsString outString; // per character look for the entity PRUint32 len = NS_strlen(inString); for (PRUint32 i = 0; i < len; i++) { nsAutoString key(NS_LITERAL_STRING("entity.")); if (NS_IS_HIGH_SURROGATE(inString[i]) && i + 2 < len && NS_IS_LOW_SURROGATE(inString[i + 1])) { key.AppendInt(SURROGATE_TO_UCS4(inString[i], inString[i+1]), 10); ++i; } else { key.AppendInt(inString[i],10); } nsXPIDLString value; entity = NULL; for (PRUint32 mask = 1, mask2 = 0xFFFFFFFFL; (0!=(entityVersion & mask2)); mask<<=1, mask2<<=1) { if (0 == (entityVersion & mask)) continue; nsIStringBundle* entities = GetVersionBundleInstance(entityVersion & mask); NS_ASSERTION(entities, "Cannot get the property file"); if (NULL == entities) continue; nsresult rv = entities->GetStringFromName(key.get(), getter_Copies(value)); if (NS_SUCCEEDED(rv)) { entity = value.get(); break; } } if (NULL != entity) { outString.Append(entity); } else { outString.Append(&inString[i], 1); } } *_retval = ToNewUnicode(outString); if (NULL == *_retval) return NS_ERROR_OUT_OF_MEMORY; return NS_OK; }
int32_t CaseInsensitiveCompare(const char16_t *a, const char16_t *b, uint32_t len) { NS_ASSERTION(a && b, "Do not pass in invalid pointers!"); if (len) { do { uint32_t c1 = *a++; uint32_t c2 = *b++; // Unfortunately, we need to check for surrogates BEFORE we check // for equality, because we could have identical high surrogates // but non-identical characters, so we can't just skip them // If c1 isn't a surrogate, we don't bother to check c2; // in the case where it _is_ a surrogate, we're definitely going to get // a mismatch, and don't need to interpret and lowercase it if (NS_IS_HIGH_SURROGATE(c1) && len > 1 && NS_IS_LOW_SURROGATE(*a)) { c1 = SURROGATE_TO_UCS4(c1, *a++); if (NS_IS_HIGH_SURROGATE(c2) && NS_IS_LOW_SURROGATE(*b)) { c2 = SURROGATE_TO_UCS4(c2, *b++); } // If c2 wasn't a surrogate, decrementing len means we'd stop // short of the end of string b, but that doesn't actually matter // because we're going to find a mismatch and return early --len; } if (c1 != c2) { c1 = ToLowerCase_inline(c1); c2 = ToLowerCase_inline(c2); if (c1 != c2) { if (c1 < c2) { return -1; } return 1; } } } while (--len != 0); } return 0; }
uint32_t UTF32CodepointFromTestcase(testcaseLine* testLine) { if (!IS_SURROGATE(testLine->c1[0])) return testLine->c1[0]; NS_ASSERTION(NS_IS_HIGH_SURROGATE(testLine->c1[0]) && NS_IS_LOW_SURROGATE(testLine->c1[1]), "Test data neither in BMP nor legal surrogate pair"); return SURROGATE_TO_UCS4(testLine->c1[0], testLine->c1[1]); }
NS_IMETHODIMP nsEntityConverter::ConvertToEntities(const char16_t *inString, uint32_t entityVersion, char16_t **_retval) { NS_ENSURE_ARG_POINTER(inString); NS_ENSURE_ARG_POINTER(_retval); *_retval = nullptr; nsString outString; // per character look for the entity uint32_t len = NS_strlen(inString); for (uint32_t i = 0; i < len; i++) { nsAutoString key(NS_LITERAL_STRING("entity.")); if (NS_IS_HIGH_SURROGATE(inString[i]) && i + 2 < len && NS_IS_LOW_SURROGATE(inString[i + 1])) { key.AppendInt(SURROGATE_TO_UCS4(inString[i], inString[i+1]), 10); ++i; } else { key.AppendInt(inString[i],10); } nsXPIDLString value; const char16_t *entity = nullptr; for (uint32_t mask = 1, mask2 = 0xFFFFFFFFL; (0!=(entityVersion & mask2)); mask<<=1, mask2<<=1) { if (0 == (entityVersion & mask)) { continue; } nsIStringBundle* entities = GetVersionBundleInstance(entityVersion & mask); NS_ASSERTION(entities, "Cannot get the property file"); if (!entities) { continue; } nsresult rv = entities->GetStringFromName(key.get(), getter_Copies(value)); if (NS_SUCCEEDED(rv)) { entity = value.get(); break; } } if (entity) { outString.Append(entity); } else { outString.Append(&inString[i], 1); } } *_retval = ToNewUnicode(outString); return NS_OK; }
PRBool HasRTLChars(nsAString& aString) { PRInt32 length = aString.Length(); for (PRInt32 i = 0; i < length; i++) { if ((UCS2_CHAR_IS_BIDI(aString.CharAt(i)) ) || ((NS_IS_HIGH_SURROGATE(aString.CharAt(i))) && (++i < length) && (NS_IS_LOW_SURROGATE(aString.CharAt(i))) && (UTF32_CHAR_IS_BIDI(SURROGATE_TO_UCS4(aString.CharAt(i-1), aString.CharAt(i)))))) { return PR_TRUE; } } return PR_FALSE; }
void ToLowerCase(const PRUnichar *aIn, PRUnichar *aOut, PRUint32 aLen) { for (PRUint32 i = 0; i < aLen; i++) { PRUint32 ch = aIn[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < aLen - 1 && NS_IS_LOW_SURROGATE(aIn[i + 1])) { ch = mozilla::unicode::GetLowercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); continue; } aOut[i] = ToLowerCase(ch); } }
void ToUpperCase(const char16_t *aIn, char16_t *aOut, uint32_t aLen) { for (uint32_t i = 0; i < aLen; i++) { uint32_t ch = aIn[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < aLen - 1 && NS_IS_LOW_SURROGATE(aIn[i + 1])) { ch = mozilla::unicode::GetUppercase(SURROGATE_TO_UCS4(ch, aIn[i + 1])); NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!"); aOut[i++] = H_SURROGATE(ch); aOut[i] = L_SURROGATE(ch); continue; } aOut[i] = ToUpperCase(ch); } }
// Get the Unicode character at index aPos in the string, and update aPos to // point to the next char (i.e. advance by one or two, depending whether we // found a surrogate pair). // This will assert (and return junk) if the string is not well-formed UTF16. // However, this is only used to process an attribute that comes from the // SVG-glyph XML document, and is not exposed to modification via the DOM, // so it must be well-formed UTF16 data (no unpaired surrogate codepoints) // unless our Unicode handling is seriously broken. static uint32_t NextUSV(const nsAString& aString, uint32_t& aPos) { mozilla::DebugOnly<uint32_t> len = aString.Length(); NS_ASSERTION(aPos < len, "already at end of string"); uint32_t c1 = aString[aPos++]; if (NS_IS_HIGH_SURROGATE(c1)) { NS_ASSERTION(aPos < len, "trailing high surrogate"); uint32_t c2 = aString[aPos++]; NS_ASSERTION(NS_IS_LOW_SURROGATE(c2), "isolated high surrogate"); return SURROGATE_TO_UCS4(c1, c2); } NS_ASSERTION(!NS_IS_LOW_SURROGATE(c1), "isolated low surrogate"); return c1; }
gint nsFreeTypeFont::GetWidth(const PRUnichar* aString, PRUint32 aLength) { FT_UInt glyph_index; FT_Glyph glyph; FT_Pos origin_x = 0; // get the face/size from the FreeType cache FT_Face face = getFTFace(); NS_ASSERTION(face, "failed to get face/size"); if (!face) return 0; FTC_Image_Cache icache; mFt2->GetImageCache(&icache); if (!icache) return 0; PRUint32 i, extraSurrogateLength; for (i=0; i<aLength; i+=1+extraSurrogateLength) { extraSurrogateLength=0; FT_ULong code_point = aString[i]; if(i<aLength-1 && IS_HIGH_SURROGATE(code_point) && IS_LOW_SURROGATE(aString[i+1])) { // if surrogate, make UCS4 code point from high aString[i] surrogate and // low surrogate aString[i+1] code_point = SURROGATE_TO_UCS4(code_point, aString[i+1]); // skip aString[i+1], it is already used as low surrogate extraSurrogateLength = 1; } mFt2->GetCharIndex((FT_Face)face, code_point, &glyph_index); nsresult rv; rv = mFt2->ImageCacheLookup(icache, &mImageDesc, glyph_index, &glyph); NS_ASSERTION(NS_SUCCEEDED(rv),"error loading glyph"); if (NS_FAILED(rv)) { origin_x += face->size->metrics.x_ppem/2 + 2; continue; } origin_x += FT_16_16_TO_REG(glyph->advance.x); } return origin_x; }
// To save time we only do this when we really want to know, not during // every allocation void nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) { if (mState.mIs2b && !mState.mIsBidi) { const char16_t* cp = aBuffer; const char16_t* end = cp + aLength; while (cp < end) { char16_t ch1 = *cp++; uint32_t utf32Char = ch1; if (NS_IS_HIGH_SURROGATE(ch1) && cp < end && NS_IS_LOW_SURROGATE(*cp)) { char16_t ch2 = *cp++; utf32Char = SURROGATE_TO_UCS4(ch1, ch2); } if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) { mState.mIsBidi = true; break; } } } }
// To save time we only do this when we really want to know, not during // every allocation void nsTextFragment::UpdateBidiFlag(const PRUnichar* aBuffer, PRUint32 aLength) { if (mState.mIs2b && !mState.mIsBidi) { const PRUnichar* cp = aBuffer; const PRUnichar* end = cp + aLength; while (cp < end) { PRUnichar ch1 = *cp++; PRUint32 utf32Char = ch1; if (NS_IS_HIGH_SURROGATE(ch1) && cp < end && NS_IS_LOW_SURROGATE(*cp)) { PRUnichar ch2 = *cp++; utf32Char = SURROGATE_TO_UCS4(ch1, ch2); } if (UTF32_CHAR_IS_BIDI(utf32Char) || IS_BIDI_CONTROL_CHAR(utf32Char)) { mState.mIsBidi = PR_TRUE; break; } } } }
void gfxPlatform::SetupClusterBoundaries(gfxTextRun *aTextRun, const PRUnichar *aString) { if (aTextRun->GetFlags() & gfxTextRunFactory::TEXT_IS_8BIT) { // 8-bit text doesn't have clusters. // XXX is this true in all languages??? // behdad: don't think so. Czech for example IIRC has a // 'ch' grapheme. return; } nsIUGenCategory* gc = GetGenCategory(); if (!gc) { NS_WARNING("No Unicode category service: cannot determine clusters"); return; } PRUint32 i, length = aTextRun->GetLength(); for (i = 0; i < length; ++i) { PRBool surrogatePair = PR_FALSE; PRUint32 ch = aString[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(aString[i+1])) { ch = SURROGATE_TO_UCS4(ch, aString[i+1]); surrogatePair = PR_TRUE; } if (i > 0 && gc->Get(ch) == nsIUGenCategory::kMark) { gfxTextRun::CompressedGlyph g; aTextRun->SetGlyphs(i, g.SetComplex(PR_FALSE, PR_TRUE, 0), nsnull); } if (surrogatePair) { ++i; gfxTextRun::CompressedGlyph g; aTextRun->SetGlyphs(i, g.SetComplex(PR_FALSE, PR_TRUE, 0), nsnull); } } }
bool nsCaseTransformTextRunFactory::TransformString( const nsAString& aString, nsString& aConvertedString, bool aAllUppercase, const nsIAtom* aLanguage, nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray, nsTransformedTextRun* aTextRun, nsTArray<uint8_t>* aCanBreakBeforeArray, nsTArray<nsStyleContext*>* aStyleArray) { NS_PRECONDITION(!aTextRun || (aCanBreakBeforeArray && aStyleArray), "either none or all three optional parameters required"); uint32_t length = aString.Length(); const char16_t* str = aString.BeginReading(); bool mergeNeeded = false; bool capitalizeDutchIJ = false; bool prevIsLetter = false; bool ntPrefix = false; // true immediately after a word-initial 'n' or 't' // when doing Irish lowercasing uint32_t sigmaIndex = uint32_t(-1); nsIUGenCategory::nsUGenCategory cat; uint8_t style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : 0; const nsIAtom* lang = aLanguage; LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang); mozilla::GreekCasing::State greekState; mozilla::IrishCasing::State irishState; uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s) for (uint32_t i = 0; i < length; ++i) { uint32_t ch = str[i]; nsStyleContext* styleContext; if (aTextRun) { styleContext = aTextRun->mStyles[i]; style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->StyleText()->mTextTransform; if (lang != styleContext->StyleFont()->mLanguage) { lang = styleContext->StyleFont()->mLanguage; languageSpecificCasing = GetCasingFor(lang); greekState.Reset(); irishState.Reset(); irishMark = uint32_t(-1); } } int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; bool inhibitBreakBefore = false; // have we just deleted preceding hyphen? if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eLSCB_Turkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } cat = mozilla::unicode::GetGenCategory(ch); if (languageSpecificCasing == eLSCB_Irish && cat == nsIUGenCategory::kLetter) { // See bug 1018805 for Irish lowercasing requirements if (!prevIsLetter && (ch == 'n' || ch == 't')) { ntPrefix = true; } else { if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) { aConvertedString.Append('-'); ++extraChars; } ntPrefix = false; } } else { ntPrefix = false; } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsIUGenCategory::kLetter) { aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = aConvertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Greek) { ch = mozilla::GreekCasing::UpperCase(ch, greekState); break; } if (languageSpecificCasing == eLSCB_Irish) { bool mark; uint8_t action; ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action); if (mark) { irishMark = aConvertedString.Length(); break; } else if (action) { nsString& str = aConvertedString; // shorthand switch (action) { case 1: // lowercase a single prefix letter NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(), "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); irishMark = uint32_t(-1); break; case 2: // lowercase two prefix letters (immediately before current pos) NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1); irishMark = uint32_t(-1); break; case 3: // lowercase one prefix letter, and delete following hyphen // (which must be the immediately-preceding char) NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); str.Replace(irishMark, 2, ToLowerCase(str[irishMark])); aDeletedCharsArray[irishMark + 1] = true; // Remove the trailing entries (corresponding to the deleted hyphen) // from the auxiliary arrays. aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1); if (aTextRun) { aStyleArray->SetLength(aStyleArray->Length() - 1); aCanBreakBeforeArray->SetLength(aCanBreakBeforeArray->Length() - 1); inhibitBreakBefore = true; } mergeNeeded = true; irishMark = uint32_t(-1); break; } // ch has been set to the uppercase for current char; // No need to check for SpecialUpper here as none of the characters // that could trigger an Irish casing action have special mappings. break; } // If we didn't have any special action to perform, fall through // to check for special uppercase (ß) } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (aTextRun) { if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } } break; case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: ch = mozilla::unicode::GetFullWidth(ch); break; default: break; } if (ch == uint32_t(-1)) { aDeletedCharsArray.AppendElement(true); mergeNeeded = true; } else { aDeletedCharsArray.AppendElement(false); aCharsToMergeArray.AppendElement(false); if (aTextRun) { aStyleArray->AppendElement(styleContext); aCanBreakBeforeArray->AppendElement(inhibitBreakBefore ? false : aTextRun->CanBreakLineBefore(i)); } if (IS_IN_BMP(ch)) { aConvertedString.Append(ch); } else { aConvertedString.Append(H_SURROGATE(ch)); aConvertedString.Append(L_SURROGATE(ch)); ++i; aDeletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; aCharsToMergeArray.AppendElement(true); if (aTextRun) { aStyleArray->AppendElement(styleContext); aCanBreakBeforeArray->AppendElement(false); } } } } return mergeNeeded; }
bool gfxScriptItemizer::Next(PRUint32& aRunStart, PRUint32& aRunLimit, PRInt32& aRunScript) { /* if we've fallen off the end of the text, we're done */ if (scriptLimit >= textLength) { return PR_FALSE; } SYNC_FIXUP(); scriptCode = HB_SCRIPT_COMMON; for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) { PRUint32 ch; PRInt32 sc; PRInt32 pairIndex; PRUint32 startOfChar = scriptLimit; ch = textPtr[scriptLimit]; /* * MODIFICATION for Gecko - clear the paired-character stack * when we see a space character, because we cannot trust * context outside the current "word" when doing textrun * construction */ if (ch == 0x20) { while (STACK_IS_NOT_EMPTY()) { pop(); } sc = HB_SCRIPT_COMMON; pairIndex = -1; } else { /* decode UTF-16 (may be surrogate pair) */ if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) { PRUint32 low = textPtr[scriptLimit + 1]; if (NS_IS_LOW_SURROGATE(low)) { ch = SURROGATE_TO_UCS4(ch, low); scriptLimit += 1; } } sc = gfxUnicodeProperties::GetScriptCode(ch); pairIndex = getPairIndex(ch); /* * Paired character handling: * * if it's an open character, push it onto the stack. * if it's a close character, find the matching open on the * stack, and use that script code. Any non-matching open * characters above it on the stack will be poped. */ if (pairIndex >= 0) { if ((pairIndex & 1) == 0) { push(pairIndex, scriptCode); } else { PRInt32 pi = pairIndex & ~1; while (STACK_IS_NOT_EMPTY() && TOP().pairIndex != pi) { pop(); } if (STACK_IS_NOT_EMPTY()) { sc = TOP().scriptCode; } } } } if (sameScript(scriptCode, sc)) { if (scriptCode <= HB_SCRIPT_INHERITED && sc > HB_SCRIPT_INHERITED) { scriptCode = sc; fixup(scriptCode); } /* * if this character is a close paired character, * pop the matching open character from the stack */ if (pairIndex >= 0 && (pairIndex & 1) != 0) { pop(); } } else { /* * reset scriptLimit in case it was advanced during reading a * multiple-code-unit character */ scriptLimit = startOfChar; break; } } aRunStart = scriptStart; aRunLimit = scriptLimit; aRunScript = scriptCode; return PR_TRUE; }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { uint32_t length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<bool,50> deletedCharsArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<uint8_t,50> canBreakBeforeArray; bool mergeNeeded = false; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch, // treat "ij" digraph as a unit for capitalization eGreek // strip accent when uppercasing Greek vowels } languageSpecificCasing = eNone; const nsIAtom* lang = nullptr; bool capitalizeDutchIJ = false; bool prevIsLetter = false; uint32_t sigmaIndex = uint32_t(-1); nsIUGenCategory::nsUGenCategory cat; GreekCasingState greekState = kStart; uint32_t i; for (i = 0; i < length; ++i) { uint32_t ch = str[i]; nsStyleContext* styleContext = styles[i]; uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->StyleText()->mTextTransform; int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->StyleFont()->mLanguage) { lang = styleContext->StyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else if (lang == nsGkAtoms::el) { languageSpecificCasing = eGreek; greekState = kStart; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. cat = mozilla::unicode::GetGenCategory(ch); // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsIUGenCategory::kLetter) { convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = convertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eGreek) { ch = GreekUpperCase(ch, &greekState); break; } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: ch = mozilla::unicode::GetFullWidth(ch); break; default: break; } if (ch == uint32_t(-1)) { deletedCharsArray.AppendElement(true); mergeNeeded = true; } else { deletedCharsArray.AppendElement(false); charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); ++i; deletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } } uint32_t flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (mergeNeeded) { // Now merge multiple characters into one multi-glyph character as required // and deal with skipping deleted accent chars NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), "source length mismatch"); NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), "destination length mismatch"); MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), deletedCharsArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }
static nsresult mdn_normalize(bool do_composition, bool compat, const nsAString& aSrcStr, nsAString& aToStr) { workbuf_t wb; nsresult r = NS_OK; /* * Initialize working buffer. */ workbuf_init(&wb); nsAString::const_iterator start, end; aSrcStr.BeginReading(start); aSrcStr.EndReading(end); while (start != end) { PRUint32 c; PRUnichar curChar; //assert(wb.cur == wb.last); /* * Get one character from 'from'. */ curChar= *start++; if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) { c = SURROGATE_TO_UCS4(curChar, *start); ++start; } else { c = curChar; } /* * Decompose it. */ if ((r = decompose(&wb, c, compat)) != NS_OK) break; /* * Get canonical class. */ get_class(&wb); /* * Reorder & compose. */ for (; wb.cur < wb.last; wb.cur++) { if (wb.cur == 0) { continue; } else if (wb.cclass[wb.cur] > 0) { /* * This is not a starter. Try reordering. * Note that characters up to it are * already in canonical order. */ reorder(&wb); continue; } /* * This is a starter character, and there are * some characters before it. Those characters * have been reordered properly, and * ready for composition. */ if (do_composition && wb.cclass[0] == 0) compose(&wb); /* * If CUR points to a starter character, * then process of characters before CUR are * already finished, because any further * reordering/composition for them are blocked * by the starter CUR points. */ if (wb.cur > 0 && wb.cclass[wb.cur] == 0) { /* Flush everything before CUR. */ r = flush_before_cur(&wb, aToStr); if (r != NS_OK) break; } } } if (r == NS_OK) { if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) { /* * There is some characters left in WB. * They are ordered, but not composed yet. * Now CUR points just after the last character in WB, * and since compose() tries to compose characters * between top and CUR inclusive, we must make CUR * one character back during compose(). */ wb.cur--; compose(&wb); wb.cur++; } /* * Call this even when WB.CUR == 0, to make TO * NUL-terminated. */ r = flush_before_cur(&wb, aToStr); } workbuf_free(&wb); return (r); }
gint nsFreeTypeXImage::DrawString(nsRenderingContextGTK* aContext, nsDrawingSurfaceGTK* aSurface, nscoord aX, nscoord aY, const PRUnichar* aString, PRUint32 aLength) { #if DEBUG_SHOW_GLYPH_BOX PRUint32 x, y; // grey shows image size // red shows character cells // green box shows text ink #endif if (aLength < 1) { return 0; } // get the face/size from the FreeType cache FT_Face face = getFTFace(); NS_ASSERTION(face, "failed to get face/size"); if (!face) return 0; nsresult rslt; PRInt32 leftBearing, rightBearing, ascent, descent, width; rslt = doGetBoundingMetrics(aString, aLength, &leftBearing, &rightBearing, &ascent, &descent, &width); if (NS_FAILED(rslt)) return 0; // make sure we bring down enough background for blending rightBearing = PR_MAX(rightBearing, width+1); // offset in the ximage to the x origin PRInt32 x_origin = PR_MAX(0, -leftBearing); // offset in the ximage to the x origin PRInt32 y_origin = ascent; PRInt32 x_pos = x_origin; int image_width = x_origin + rightBearing; int image_height = y_origin + PR_MAX(descent, 0); if ((image_width<=0) || (image_height<=0)) { // if we do not have any pixels then no point in trying to draw // eg: the space char has 0 height NS_ASSERTION(width>=0, "Negative width"); return width; } Display *dpy = GDK_DISPLAY(); Drawable win = GDK_WINDOW_XWINDOW(aSurface->GetDrawable()); GC gc = GDK_GC_XGC(aContext->GetGC()); XGCValues values; if (!XGetGCValues(dpy, gc, GCForeground, &values)) { NS_ERROR("failed to get foreground pixel"); return 0; } nscolor color = nsX11AlphaBlend::PixelToNSColor(values.foreground); #if DEBUG_SHOW_GLYPH_BOX // show X/Y origin XDrawLine(dpy, win, DefaultGC(dpy, 0), aX-2, aY, aX+2, aY); XDrawLine(dpy, win, DefaultGC(dpy, 0), aX, aY-2, aX, aY+2); // show width XDrawLine(dpy, win, DefaultGC(dpy, 0), aX-x_origin, aY-y_origin-2, aX+rightBearing, aY-y_origin-2); #endif // // Get the background // XImage *sub_image = nsX11AlphaBlend::GetBackground(dpy, DefaultScreen(dpy), win, aX-x_origin, aY-y_origin, image_width, image_height); if (sub_image==nsnull) { #ifdef DEBUG int screen = DefaultScreen(dpy); // complain if the requested area is not completely off screen int win_width = DisplayWidth(dpy, screen); int win_height = DisplayHeight(dpy, screen); if (((int)(aX-leftBearing+image_width) > 0) // not hidden to left && ((int)(aX-leftBearing) < win_width) // not hidden to right && ((int)(aY-ascent+image_height) > 0)// not hidden to top && ((int)(aY-ascent) < win_height)) // not hidden to bottom { NS_ASSERTION(sub_image, "failed to get the image"); } #endif return 0; } #if DEBUG_SHOW_GLYPH_BOX DEBUG_AADRAWBOX(sub_image,0,0,image_width,image_height,0,0,0,255/4); nscolor black NS_RGB(0,255,0); blendPixel blendPixelFunc = nsX11AlphaBlend::GetBlendPixel(); // x origin for (x=0; x<(unsigned int)image_height; x++) if (x%4==0) (*blendPixelFunc)(sub_image, x_origin, x, black, 255/2); // y origin for (y=0; y<(unsigned int)image_width; y++) if (y%4==0) (*blendPixelFunc)(sub_image, y, ascent-1, black, 255/2); #endif FTC_Image_Cache icache; mFt2->GetImageCache(&icache); if (!icache) return 0; // // Get aa glyphs and blend with background // blendGlyph blendGlyph = nsX11AlphaBlend::GetBlendGlyph(); PRUint32 i, extraSurrogateLength; for (i=0; i<aLength; i+=1+extraSurrogateLength) { FT_UInt glyph_index; FT_Glyph glyph; nsresult rv; FT_BBox glyph_bbox; FT_ULong code_point = aString[i]; extraSurrogateLength = 0; if(i<aLength-1 && IS_HIGH_SURROGATE(code_point) && IS_LOW_SURROGATE(aString[i+1])) { // if surrogate, make UCS4 code point from high aString[i] surrogate and // low surrogate aString[i+1] code_point = SURROGATE_TO_UCS4(code_point, aString[i+1]); // skip aString[i+1], it is already used as low surrogate extraSurrogateLength = 1; } mFt2->GetCharIndex(face, code_point, &glyph_index); if (glyph_index) { rv = mFt2->ImageCacheLookup(icache, &mImageDesc, glyph_index, &glyph); } if ((glyph_index) && (NS_SUCCEEDED(rv))) { mFt2->GlyphGetCBox(glyph, ft_glyph_bbox_pixels, &glyph_bbox); } else { // draw an empty box for the missing glyphs GetFallbackGlyphMetrics(&glyph_bbox, face); int x, y, w = glyph_bbox.xMax, h = glyph_bbox.yMax; for (x=1; x<w; x++) { XPutPixel(sub_image, x_pos+x, ascent-1, values.foreground); XPutPixel(sub_image, x_pos+x, ascent-h, values.foreground); } for (y=1; y<h; y++) { XPutPixel(sub_image, x_pos+1, ascent-y, values.foreground); XPutPixel(sub_image, x_pos+w-1, ascent-y, values.foreground); x = (y*(w-2))/h; XPutPixel(sub_image, x_pos+x+1, ascent-y, values.foreground); } x_pos += w + 1; continue; } FT_BitmapGlyph slot = (FT_BitmapGlyph)glyph; nsAntiAliasedGlyph aaglyph(glyph_bbox.xMax-glyph_bbox.xMin, glyph_bbox.yMax-glyph_bbox.yMin, 0); PRUint8 buf[IMAGE_BUFFER_SIZE]; // try to use the stack for data if (!aaglyph.WrapFreeType(&glyph_bbox, slot, buf, IMAGE_BUFFER_SIZE)) { NS_ERROR("failed to wrap freetype image"); XDestroyImage(sub_image); return 0; } // // blend the aa-glyph onto the background // NS_ASSERTION(ascent>=glyph_bbox.yMax,"glyph too tall"); NS_ASSERTION(x_pos>=-aaglyph.GetLBearing(),"glyph extends too far to left"); #if DEBUG_SHOW_GLYPH_BOX // draw box around part of glyph that extends to the left // of the main area (negative LBearing) if (aaglyph.GetLBearing() < 0) { DEBUG_AADRAWBOX(sub_image, x_pos + aaglyph.GetLBearing(), ascent-glyph_bbox.yMax, -aaglyph.GetLBearing(), glyph_bbox.yMax, 255,0,0, 255/4); } // draw box around main glyph area DEBUG_AADRAWBOX(sub_image, x_pos, ascent-glyph_bbox.yMax, aaglyph.GetAdvance(), glyph_bbox.yMax, 0,255,0, 255/4); // draw box around part of glyph that extends to the right // of the main area (negative LBearing) if (aaglyph.GetRBearing() > (int)aaglyph.GetAdvance()) { DEBUG_AADRAWBOX(sub_image, x_pos + aaglyph.GetAdvance(), ascent-glyph_bbox.yMax, aaglyph.GetRBearing()-aaglyph.GetAdvance(), glyph_bbox.yMax, 0,0,255, 255/4); } #endif (*blendGlyph)(sub_image, &aaglyph, sLinearWeightTable, color, x_pos + aaglyph.GetLBearing(), ascent-glyph_bbox.yMax); x_pos += aaglyph.GetAdvance(); } // // Send it to the display // XPutImage(dpy, win, gc, sub_image, 0, 0, aX-x_origin , aY-ascent, image_width, image_height); XDestroyImage(sub_image); return width; }
bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, int32_t& aRunScript) { /* if we've fallen off the end of the text, we're done */ if (scriptLimit >= textLength) { return false; } SYNC_FIXUP(); scriptCode = MOZ_SCRIPT_COMMON; for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) { uint32_t ch; int32_t sc; uint32_t startOfChar = scriptLimit; ch = textPtr[scriptLimit]; /* decode UTF-16 (may be surrogate pair) */ if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) { uint32_t low = textPtr[scriptLimit + 1]; if (NS_IS_LOW_SURROGATE(low)) { ch = SURROGATE_TO_UCS4(ch, low); scriptLimit += 1; } } // Get the nsCharProps2 record for the current character, // so we can read the script and (if needed) the gen category // without needing to do two multi-level lookups. // NOTE that this means we're relying on an implementation detail // of the nsUnicodeProperties tables, and might have to revise this // if the nsCharProps records used there are modified in future. const nsCharProps2& charProps = GetCharProps2(ch); // Initialize gc to UNASSIGNED; we'll only set it to the true GC // if the character has script=COMMON, otherwise we don't care. uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; sc = charProps.mScriptCode; if (sc == MOZ_SCRIPT_COMMON) { /* * Paired character handling: * * if it's an open character, push it onto the stack. * if it's a close character, find the matching open on the * stack, and use that script code. Any non-matching open * characters above it on the stack will be popped. * * We only do this if the script is COMMON; for chars with * specific script assignments, we just use them as-is. */ gc = charProps.mCategory; if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) { uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch); if (endPairChar != ch) { push(endPairChar, scriptCode); } } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && HasMirroredChar(ch)) { while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) { pop(); } if (STACK_IS_NOT_EMPTY()) { sc = TOP().scriptCode; } } } if (SameScript(scriptCode, sc)) { if (scriptCode <= MOZ_SCRIPT_INHERITED && sc > MOZ_SCRIPT_INHERITED) { scriptCode = sc; fixup(scriptCode); } /* * if this character is a close paired character, * pop the matching open character from the stack */ if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && HasMirroredChar(ch)) { pop(); } } else { /* * reset scriptLimit in case it was advanced during reading a * multiple-code-unit character */ scriptLimit = startOfChar; break; } } aRunStart = scriptStart; aRunLimit = scriptLimit; aRunScript = scriptCode; return true; }
nsresult nsFreeTypeFont::doGetBoundingMetrics(const PRUnichar* aString, PRUint32 aLength, PRInt32* aLeftBearing, PRInt32* aRightBearing, PRInt32* aAscent, PRInt32* aDescent, PRInt32* aWidth) { nsresult rv; *aLeftBearing = 0; *aRightBearing = 0; *aAscent = 0; *aDescent = 0; *aWidth = 0; if (aLength < 1) { return NS_ERROR_FAILURE; } FT_Pos pos = 0; FT_BBox bbox; // initialize to "uninitialized" values bbox.xMin = bbox.yMin = 32000; bbox.xMax = bbox.yMax = -32000; // get the face/size from the FreeType cache FT_Face face = getFTFace(); NS_ASSERTION(face, "failed to get face/size"); if (!face) return NS_ERROR_FAILURE; FTC_Image_Cache icache; mFt2->GetImageCache(&icache); if (!icache) return NS_ERROR_FAILURE; // get the text size PRUint32 i, extraSurrogateLength; for (i=0; i<aLength; i+=1+extraSurrogateLength) { FT_UInt glyph_index; FT_Glyph glyph; FT_BBox glyph_bbox; FT_Pos advance; extraSurrogateLength=0; FT_ULong code_point = aString[i]; if(i<aLength-1 && IS_HIGH_SURROGATE(code_point) && IS_LOW_SURROGATE(aString[i+1])) { // if surrogate, make UCS4 code point from high aString[i] surrogate and // low surrogate aString[i+1] code_point = SURROGATE_TO_UCS4(code_point, aString[i+1]); // skip aString[i+1], it is already used as low surrogate extraSurrogateLength = 1; } mFt2->GetCharIndex(face, code_point, &glyph_index); //NS_ASSERTION(glyph_index,"failed to get glyph"); if (glyph_index) { rv = mFt2->ImageCacheLookup(icache, &mImageDesc, glyph_index, &glyph); NS_ASSERTION(NS_SUCCEEDED(rv),"error loading glyph"); } if ((glyph_index) && (NS_SUCCEEDED(rv))) { mFt2->GlyphGetCBox(glyph, ft_glyph_bbox_pixels, &glyph_bbox); advance = FT_16_16_TO_REG(glyph->advance.x); } else { // allocate space to draw an empty box in GetFallbackGlyphMetrics(&glyph_bbox, face); advance = glyph_bbox.xMax + 1; } bbox.xMin = PR_MIN(pos+glyph_bbox.xMin, bbox.xMin); bbox.xMax = PR_MAX(pos+glyph_bbox.xMax, bbox.xMax); bbox.yMin = PR_MIN(glyph_bbox.yMin, bbox.yMin); bbox.yMax = PR_MAX(glyph_bbox.yMax, bbox.yMax); pos += advance; } // check we got at least one size if (bbox.xMin > bbox.xMax) bbox.xMin = bbox.xMax = bbox.yMin = bbox.yMax = 0; *aLeftBearing = bbox.xMin; *aRightBearing = bbox.xMax; *aAscent = bbox.yMax; *aDescent = -bbox.yMin; *aWidth = pos; return NS_OK; }
HRESULT Shape() { HRESULT rv; HDC shapeDC = nullptr; const PRUnichar *str = mAlternativeString ? mAlternativeString : mItemString; mScriptItem->a.fLogicalOrder = true; SCRIPT_ANALYSIS sa = mScriptItem->a; while (true) { rv = ScriptShape(shapeDC, mShaper->ScriptCache(), str, mItemLength, mMaxGlyphs, &sa, mGlyphs.Elements(), mClusters.Elements(), mAttr.Elements(), &mNumGlyphs); if (rv == E_OUTOFMEMORY) { mMaxGlyphs *= 2; if (!mGlyphs.SetLength(mMaxGlyphs) || !mAttr.SetLength(mMaxGlyphs)) { return E_OUTOFMEMORY; } continue; } // Uniscribe can't do shaping with some fonts, so it sets the // fNoGlyphIndex flag in the SCRIPT_ANALYSIS structure to indicate // this. This occurs with CFF fonts loaded with // AddFontMemResourceEx but it's not clear what the other cases // are. We return an error so our caller can try fallback shaping. // see http://msdn.microsoft.com/en-us/library/ms776520(VS.85).aspx if (sa.fNoGlyphIndex) { return GDI_ERROR; } if (rv == E_PENDING) { if (shapeDC == mDC) { // we already tried this once, something failed, give up return E_PENDING; } SelectFont(); shapeDC = mDC; continue; } // http://msdn.microsoft.com/en-us/library/dd368564(VS.85).aspx: // Uniscribe will return this if "the font corresponding to the // DC does not support the script required by the run...". // In this case, we'll set the script code to SCRIPT_UNDEFINED // and try again, so that we'll at least get glyphs even though // they won't necessarily have proper shaping. // (We probably shouldn't have selected this font at all, // but it's too late to fix that here.) if (rv == USP_E_SCRIPT_NOT_IN_FONT) { sa.eScript = SCRIPT_UNDEFINED; NS_WARNING("Uniscribe says font does not support script needed"); continue; } // Prior to Windows 7, Uniscribe didn't support Ideographic Variation // Selectors. Replace the UVS glyph manually. if (mIVS) { uint32_t lastChar = str[mItemLength - 1]; if (NS_IS_LOW_SURROGATE(lastChar) && NS_IS_HIGH_SURROGATE(str[mItemLength - 2])) { lastChar = SURROGATE_TO_UCS4(str[mItemLength - 2], lastChar); } uint16_t glyphId = mShaper->GetFont()->GetUVSGlyph(lastChar, mIVS); if (glyphId) { mGlyphs[mNumGlyphs - 1] = glyphId; } } return rv; } }
bool gfxUniscribeShaper::ShapeText(gfxContext *aContext, const PRUnichar *aText, uint32_t aOffset, uint32_t aLength, int32_t aScript, gfxShapedText *aShapedText) { DCFromContext aDC(aContext); bool result = true; HRESULT rv; Uniscribe us(aText, aShapedText, aOffset, aLength); /* itemize the string */ int numItems = us.Itemize(); uint32_t length = aLength; SaveDC(aDC); uint32_t ivs = 0; for (int i = 0; i < numItems; ++i) { int iCharPos = us.ScriptItem(i)->iCharPos; int iCharPosNext = us.ScriptItem(i+1)->iCharPos; if (ivs) { iCharPos += 2; if (iCharPos >= iCharPosNext) { ivs = 0; continue; } } if (i+1 < numItems && iCharPosNext <= length - 2 && aText[iCharPosNext] == H_SURROGATE(kUnicodeVS17) && uint32_t(aText[iCharPosNext + 1]) - L_SURROGATE(kUnicodeVS17) <= L_SURROGATE(kUnicodeVS256) - L_SURROGATE(kUnicodeVS17)) { ivs = SURROGATE_TO_UCS4(aText[iCharPosNext], aText[iCharPosNext + 1]); } else { ivs = 0; } UniscribeItem item(aContext, aDC, this, aText + iCharPos, iCharPosNext - iCharPos, us.ScriptItem(i), ivs); if (!item.AllocateBuffers()) { result = false; break; } if (!item.ShapingEnabled()) { item.EnableShaping(); } rv = item.Shape(); if (FAILED(rv)) { // we know we have the glyphs to display this font already // so Uniscribe just doesn't know how to shape the script. // Render the glyphs without shaping. item.DisableShaping(); rv = item.Shape(); } #ifdef DEBUG if (FAILED(rv)) { NS_WARNING("Uniscribe failed to shape with font"); } #endif if (SUCCEEDED(rv)) { rv = item.Place(); #ifdef DEBUG if (FAILED(rv)) { // crap fonts may fail when placing (e.g. funky free fonts) NS_WARNING("Uniscribe failed to place with font"); } #endif } if (FAILED(rv)) { // Uniscribe doesn't like this font for some reason. // Returning FALSE will make the gfxGDIFont retry with the // "dumb" GDI shaper, unless useUniscribeOnly was set. result = false; break; } item.SaveGlyphs(aShapedText, aOffset); } RestoreDC(aDC, -1); return result; }
void SaveGlyphs(gfxShapedText *aShapedText, uint32_t aOffset) { uint32_t offsetInRun = mScriptItem->iCharPos; // XXX We should store this in the item and only fetch it once SCRIPT_FONTPROPERTIES sfp; ScriptFontProperties(&sfp); uint32_t offset = 0; nsAutoTArray<gfxShapedText::DetailedGlyph,1> detailedGlyphs; gfxShapedText::CompressedGlyph g; gfxShapedText::CompressedGlyph *charGlyphs = aShapedText->GetCharacterGlyphs(); const uint32_t appUnitsPerDevUnit = aShapedText->GetAppUnitsPerDevUnit(); while (offset < mItemLength) { uint32_t runOffset = aOffset + offsetInRun + offset; bool atClusterStart = charGlyphs[runOffset].IsClusterStart(); if (offset > 0 && mClusters[offset] == mClusters[offset - 1]) { gfxShapedText::CompressedGlyph &g = charGlyphs[runOffset]; NS_ASSERTION(!g.IsSimpleGlyph(), "overwriting a simple glyph"); g.SetComplex(atClusterStart, false, 0); } else { // Count glyphs for this character uint32_t k = mClusters[offset]; uint32_t glyphCount = mNumGlyphs - k; uint32_t nextClusterOffset; bool missing = IsGlyphMissing(&sfp, k); for (nextClusterOffset = offset + 1; nextClusterOffset < mItemLength; ++nextClusterOffset) { if (mClusters[nextClusterOffset] > k) { glyphCount = mClusters[nextClusterOffset] - k; break; } } uint32_t j; for (j = 1; j < glyphCount; ++j) { if (IsGlyphMissing(&sfp, k + j)) { missing = true; } } int32_t advance = mAdvances[k]*appUnitsPerDevUnit; WORD glyph = mGlyphs[k]; NS_ASSERTION(!gfxFontGroup::IsInvalidChar(mItemString[offset]), "invalid character detected"); if (missing) { if (NS_IS_HIGH_SURROGATE(mItemString[offset]) && offset + 1 < mItemLength && NS_IS_LOW_SURROGATE(mItemString[offset + 1])) { aShapedText->SetMissingGlyph(runOffset, SURROGATE_TO_UCS4(mItemString[offset], mItemString[offset + 1]), mShaper->GetFont()); } else { aShapedText->SetMissingGlyph(runOffset, mItemString[offset], mShaper->GetFont()); } } else if (glyphCount == 1 && advance >= 0 && mOffsets[k].dv == 0 && mOffsets[k].du == 0 && gfxShapedText::CompressedGlyph::IsSimpleAdvance(advance) && gfxShapedText::CompressedGlyph::IsSimpleGlyphID(glyph) && atClusterStart) { charGlyphs[runOffset].SetSimpleGlyph(advance, glyph); } else { if (detailedGlyphs.Length() < glyphCount) { if (!detailedGlyphs.AppendElements(glyphCount - detailedGlyphs.Length())) return; } uint32_t i; for (i = 0; i < glyphCount; ++i) { gfxTextRun::DetailedGlyph *details = &detailedGlyphs[i]; details->mGlyphID = mGlyphs[k + i]; details->mAdvance = mAdvances[k + i] * appUnitsPerDevUnit; details->mXOffset = float(mOffsets[k + i].du) * appUnitsPerDevUnit * aShapedText->GetDirection(); details->mYOffset = - float(mOffsets[k + i].dv) * appUnitsPerDevUnit; } aShapedText->SetGlyphs(runOffset, g.SetComplex(atClusterStart, true, glyphCount), detailedGlyphs.Elements()); } } ++offset; } }
NS_IMETHODIMP nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString) { NS_ENSURE_ARG_POINTER(outString); *outString = nullptr; nsresult rv; int32_t inStringLength = NS_strlen(inString); // original input string length int32_t bufferLength; // allocated buffer length int32_t srcLength = inStringLength; int32_t dstLength; int32_t pos1, pos2; nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING // estimate and allocate the target buffer (reserve extra memory for fallback) rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength); if (NS_FAILED(rv)) return rv; bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback // + 1 is for the terminating NUL -- we don't add that to bufferLength so that // we can always write dstPtr[pos2] = '\0' even when the encoder filled the // buffer. char *dstPtr = (char *) PR_Malloc(bufferLength + 1); if (!dstPtr) { return NS_ERROR_OUT_OF_MEMORY; } for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) { // convert from unicode dstLength = bufferLength - pos2; NS_ASSERTION(dstLength >= 0, "out of bounds write"); rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength); pos1 += srcLength ? srcLength : 1; pos2 += dstLength; dstPtr[pos2] = '\0'; // break: this is usually the case (no error) OR unrecoverable error if (NS_ERROR_UENC_NOMAPPING != rv) break; // remember this happened and reset the result saveResult = rv; rv = NS_OK; // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } srcLength = inStringLength - pos1; // do the fallback if (!ATTR_NO_FALLBACK(mAttribute)) { uint32_t unMappedChar; if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) { unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]); pos1++; } else { unMappedChar = inString[pos1-1]; } rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength); if (NS_FAILED(rv)) break; rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength); if (NS_FAILED(rv)) break; dstPtr[pos2] = '\0'; } } if (NS_SUCCEEDED(rv)) { // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } } if (NS_FAILED(rv)) { PR_FREEIF(dstPtr); return rv; } *outString = dstPtr; // set the result string // set error code so that the caller can do own fall back if (NS_ERROR_UENC_NOMAPPING == saveResult) { rv = NS_ERROR_UENC_NOMAPPING; } return rv; }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; PRUint32 extraCharsCount = 0; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch // treat "ij" digraph as a unit for capitalization } languageSpecificCasing = eNone; const nsIAtom* lang = nsnull; bool capitalizeDutchIJ = false; PRUint32 i; for (i = 0; i < length; ++i) { PRUint32 ch = str[i]; nsStyleContext* styleContext = styles[i]; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->GetStyleText()->mTextTransform; bool extraChar = false; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->GetStyleFont()->mLanguage) { lang = styleContext->GetStyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish && ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; } else { ch = ToLowerCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; } else { ch = ToUpperCase(ch); } break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (ch == SZLIG) { convertedString.Append('S'); extraChar = true; ch = 'S'; } else if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; } else if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; } else { ch = ToTitleCase(ch); } } break; default: break; } if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); i++; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } if (extraChar) { ++extraCharsCount; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (extraCharsCount > 0) { // Now merge multiple characters into one multi-glyph character as required MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }
void nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); gfxFontStyle fontStyle = *fontGroup->GetStyle(); fontStyle.size *= 0.8; nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle); if (!smallFont) return; PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); // Create a textrun so we can check cluster-start properties nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags)); if (!inner.get()) return; nsCaseTransformTextRunFactory uppercaseFactory(nsnull, true); aTextRun->ResetGlyphRuns(); PRUint32 runStart = 0; bool runIsLowercase = false; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; PRUint32 i; for (i = 0; i <= length; ++i) { bool isLowercase = false; if (i < length) { // Characters that aren't the start of a cluster are ignored here. They // get added to whatever lowercase/non-lowercase run we're in. if (!inner->IsClusterStart(i)) { isLowercase = runIsLowercase; } else { if (styles[i]->GetStyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) { PRUint32 ch = str[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } PRUint32 ch2 = ToUpperCase(ch); isLowercase = ch != ch2 || ch == SZLIG; } else { // Don't transform the character! I.e., pretend that it's not lowercase } } } if ((i == length || runIsLowercase != isLowercase) && runStart < i) { nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (runIsLowercase) { transformedChild = uppercaseFactory.MakeTextRun(str + runStart, i - runStart, &innerParams, smallFont, flags, styleArray.Elements(), false); child = transformedChild; } else { cachedChild = fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart, "lost some break-before values?"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart); runStart = i; styleArray.Clear(); canBreakBeforeArray.Clear(); } if (i < length) { runIsLowercase = isLowercase; styleArray.AppendElement(styles[i]); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); } } }
void nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr, nsAString& aOutputStr) { if (mBodyOnly && !mInBody) { return; } if (mDisableEntityEncoding) { aOutputStr.Append(aStr); return; } if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities | nsIDocumentEncoder::OutputEncodeLatin1Entities | nsIDocumentEncoder::OutputEncodeHTMLEntities | nsIDocumentEncoder::OutputEncodeW3CEntities)) { nsIParserService* parserService = nsContentUtils::GetParserService(); if (!parserService) { NS_ERROR("Can't get parser service"); return; } nsReadingIterator<PRUnichar> done_reading; aStr.EndReading(done_reading); // for each chunk of |aString|... PRUint32 advanceLength = 0; nsReadingIterator<PRUnichar> iter; const char **entityTable = mInAttribute ? kAttrEntities : kEntities; for (aStr.BeginReading(iter); iter != done_reading; iter.advance(PRInt32(advanceLength))) { PRUint32 fragmentLength = iter.size_forward(); PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints // replaced by a particular entity const PRUnichar* c = iter.get(); const PRUnichar* fragmentStart = c; const PRUnichar* fragmentEnd = c + fragmentLength; const char* entityText = nsnull; nsCAutoString entityReplacement; char* fullEntityText = nsnull; advanceLength = 0; // for each character in this chunk, check if it // needs to be replaced for (; c < fragmentEnd; c++, advanceLength++) { PRUnichar val = *c; if (val == kValNBSP) { entityText = kEntityNBSP; break; } else if ((val <= kGTVal) && (entityTable[val][0] != 0)) { entityText = entityTable[val]; break; } else if (val > 127 && ((val < 256 && mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) || mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) { parserService->HTMLConvertUnicodeToEntity(val, entityReplacement); if (!entityReplacement.IsEmpty()) { entityText = entityReplacement.get(); break; } } else if (val > 127 && mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities && mEntityConverter) { if (NS_IS_HIGH_SURROGATE(val) && c + 1 < fragmentEnd && NS_IS_LOW_SURROGATE(*(c + 1))) { PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c)); if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32, nsIEntityConverter::entityW3C, &fullEntityText))) { lengthReplaced = 2; break; } else { advanceLength++; } } else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val, nsIEntityConverter::entityW3C, &fullEntityText))) { lengthReplaced = 1; break; } } } aOutputStr.Append(fragmentStart, advanceLength); if (entityText) { aOutputStr.Append(PRUnichar('&')); AppendASCIItoUTF16(entityText, aOutputStr); aOutputStr.Append(PRUnichar(';')); advanceLength++; } // if it comes from nsIEntityConverter, it already has '&' and ';' else if (fullEntityText) { AppendASCIItoUTF16(fullEntityText, aOutputStr); nsMemory::Free(fullEntityText); advanceLength += lengthReplaced; } } } else { nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr); } }
nsresult nsHyphenator::Hyphenate(const nsAString& aString, nsTArray<bool>& aHyphens) { if (!aHyphens.SetLength(aString.Length())) { return NS_ERROR_OUT_OF_MEMORY; } memset(aHyphens.Elements(), false, aHyphens.Length()); bool inWord = false; uint32_t wordStart = 0, wordLimit = 0; uint32_t chLen; for (uint32_t i = 0; i < aString.Length(); i += chLen) { uint32_t ch = aString[i]; chLen = 1; if (NS_IS_HIGH_SURROGATE(ch)) { if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) { ch = SURROGATE_TO_UCS4(ch, aString[i+1]); chLen = 2; } else { NS_WARNING("unpaired surrogate found during hyphenation"); } } nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch); if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) { if (!inWord) { inWord = true; wordStart = i; } wordLimit = i + chLen; if (i + chLen < aString.Length()) { continue; } } if (inWord) { const PRUnichar *begin = aString.BeginReading(); NS_ConvertUTF16toUTF8 utf8(begin + wordStart, wordLimit - wordStart); nsAutoTArray<char,200> utf8hyphens; utf8hyphens.SetLength(utf8.Length() + 5); char **rep = nullptr; int *pos = nullptr; int *cut = nullptr; int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict, utf8.BeginReading(), utf8.Length(), utf8hyphens.Elements(), nullptr, &rep, &pos, &cut); if (!err) { // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer // from utf8 code unit indexing (which would match the utf8 input // string directly) to Unicode character indexing. // We then need to convert this to utf16 code unit offsets for Gecko. const char *hyphPtr = utf8hyphens.Elements(); const PRUnichar *cur = begin + wordStart; const PRUnichar *end = begin + wordLimit; while (cur < end) { if (*hyphPtr & 0x01) { aHyphens[cur - begin] = true; } cur++; if (cur < end && NS_IS_LOW_SURROGATE(*cur) && NS_IS_HIGH_SURROGATE(*(cur-1))) { cur++; } hyphPtr++; } } } inWord = false; } return NS_OK; }