NS_IMETHODIMP nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString) { NS_ENSURE_ARG_POINTER(outString); *outString = nullptr; nsresult rv; int32_t inStringLength = NS_strlen(inString); // original input string length int32_t bufferLength; // allocated buffer length int32_t srcLength = inStringLength; int32_t dstLength; int32_t pos1, pos2; nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING // estimate and allocate the target buffer (reserve extra memory for fallback) rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength); if (NS_FAILED(rv)) return rv; bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback // + 1 is for the terminating NUL -- we don't add that to bufferLength so that // we can always write dstPtr[pos2] = '\0' even when the encoder filled the // buffer. char *dstPtr = (char *) PR_Malloc(bufferLength + 1); if (!dstPtr) { return NS_ERROR_OUT_OF_MEMORY; } for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) { // convert from unicode dstLength = bufferLength - pos2; NS_ASSERTION(dstLength >= 0, "out of bounds write"); rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength); pos1 += srcLength ? srcLength : 1; pos2 += dstLength; dstPtr[pos2] = '\0'; // break: this is usually the case (no error) OR unrecoverable error if (NS_ERROR_UENC_NOMAPPING != rv) break; // remember this happened and reset the result saveResult = rv; rv = NS_OK; // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } srcLength = inStringLength - pos1; // do the fallback if (!ATTR_NO_FALLBACK(mAttribute)) { uint32_t unMappedChar; if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) { unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]); pos1++; } else { unMappedChar = inString[pos1-1]; } rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength); if (NS_FAILED(rv)) break; rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength); if (NS_FAILED(rv)) break; dstPtr[pos2] = '\0'; } } if (NS_SUCCEEDED(rv)) { // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } } if (NS_FAILED(rv)) { PR_FREEIF(dstPtr); return rv; } *outString = dstPtr; // set the result string // set error code so that the caller can do own fall back if (NS_ERROR_UENC_NOMAPPING == saveResult) { rv = NS_ERROR_UENC_NOMAPPING; } return rv; }
NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff( const PRUnichar * aSrc, PRInt32 * aSrcLength, char * aDest, PRInt32 * aDestLength) { PRInt32 iSrcLength = 0; PRInt32 iDestLength = 0; PRUnichar unicode; nsresult res = NS_OK; while (iSrcLength < *aSrcLength ) { unicode = *aSrc; //if unicode's hi byte has something, it is not ASCII, must be a GB if(IS_ASCII(unicode)) { // this is an ASCII *aDest = CAST_UNICHAR_TO_CHAR(*aSrc); aDest++; // increment 1 byte iDestLength +=1; } else { char byte1, byte2; if(mUtil.UnicodeToGBKChar( unicode, PR_FALSE, &byte1, &byte2)) { // make sure we still have 2 bytes for output first if(iDestLength+2 > *aDestLength) { res = NS_OK_UENC_MOREOUTPUT; break; } aDest[0] = byte1; aDest[1] = byte2; aDest += 2; // increment 2 bytes iDestLength +=2; } else { PRInt32 aOutLen = 2; // make sure we still have 2 bytes for output first if(iDestLength+2 > *aDestLength) { res = NS_OK_UENC_MOREOUTPUT; break; } // we cannot map in the common mapping. Let's try to // call the delegated 2 byte converter for the gbk or gb18030 // unique 2 byte mapping if(TryExtensionEncoder(unicode, aDest, &aOutLen)) { iDestLength += aOutLen; aDest += aOutLen; } else { // make sure we still have 4 bytes for output first if(iDestLength+4 > *aDestLength) { res = NS_OK_UENC_MOREOUTPUT; break; } // we still cannot map. Let's try to // call the delegated GB18030 4 byte converter aOutLen = 4; if( NS_IS_HIGH_SURROGATE(unicode) ) { if((iSrcLength+1) < *aSrcLength ) { if(EncodeSurrogate(aSrc[0],aSrc[1], aDest)) { // since we got a surrogate pair, we need to increment src. iSrcLength++ ; aSrc++; iDestLength += aOutLen; aDest += aOutLen; } else { // only get a high surrogate, but not a low surrogate res = NS_ERROR_UENC_NOMAPPING; iSrcLength++; // include length of the unmapped character break; } } else { mSurrogateHigh = aSrc[0]; break; // this will go to afterwhileloop } } else { if( NS_IS_LOW_SURROGATE(unicode) ) { if(NS_IS_HIGH_SURROGATE(mSurrogateHigh)) { if(EncodeSurrogate(mSurrogateHigh, aSrc[0], aDest)) { iDestLength += aOutLen; aDest += aOutLen; } else { // only get a high surrogate, but not a low surrogate res = NS_ERROR_UENC_NOMAPPING; iSrcLength++; // include length of the unmapped character break; } } else { // only get a low surrogate, but not a low surrogate res = NS_ERROR_UENC_NOMAPPING; iSrcLength++; // include length of the unmapped character break; } } else { if(Try4BytesEncoder(unicode, aDest, &aOutLen)) { NS_ASSERTION((aOutLen == 4), "we should always generate 4 bytes here"); iDestLength += aOutLen; aDest += aOutLen; } else { res = NS_ERROR_UENC_NOMAPPING; iSrcLength++; // include length of the unmapped character break; } } } } } } iSrcLength++ ; // Each unicode char just count as one in PRUnichar string; mSurrogateHigh = 0; aSrc++; if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength) ) { res = NS_OK_UENC_MOREOUTPUT; break; } } //afterwhileloop: *aDestLength = iDestLength; *aSrcLength = iSrcLength; return res; }
nsresult nsHyphenator::Hyphenate(const nsAString& aString, nsTArray<bool>& aHyphens) { if (!aHyphens.SetLength(aString.Length())) { return NS_ERROR_OUT_OF_MEMORY; } memset(aHyphens.Elements(), false, aHyphens.Length()); bool inWord = false; PRUint32 wordStart = 0, wordLimit = 0; PRUint32 chLen; for (PRUint32 i = 0; i < aString.Length(); i += chLen) { PRUint32 ch = aString[i]; chLen = 1; if (NS_IS_HIGH_SURROGATE(ch)) { if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) { ch = SURROGATE_TO_UCS4(ch, aString[i+1]); chLen = 2; } else { NS_WARNING("unpaired surrogate found during hyphenation"); } } nsIUGenCategory::nsUGenCategory cat = mCategories->Get(ch); if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) { if (!inWord) { inWord = true; wordStart = i; } wordLimit = i + chLen; if (i + chLen < aString.Length()) { continue; } } if (inWord) { const PRUnichar *begin = aString.BeginReading(); NS_ConvertUTF16toUTF8 utf8(begin + wordStart, wordLimit - wordStart); nsAutoTArray<char,200> utf8hyphens; utf8hyphens.SetLength(utf8.Length() + 5); char **rep = nsnull; int *pos = nsnull; int *cut = nsnull; int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict, utf8.BeginReading(), utf8.Length(), utf8hyphens.Elements(), nsnull, &rep, &pos, &cut); if (!err) { // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer // from utf8 code unit indexing (which would match the utf8 input // string directly) to Unicode character indexing. // We then need to convert this to utf16 code unit offsets for Gecko. const char *hyphPtr = utf8hyphens.Elements(); const PRUnichar *cur = begin + wordStart; const PRUnichar *end = begin + wordLimit; while (cur < end) { if (*hyphPtr & 0x01) { aHyphens[cur - begin] = true; } cur++; if (cur < end && NS_IS_LOW_SURROGATE(*cur) && NS_IS_HIGH_SURROGATE(*(cur-1))) { cur++; } hyphPtr++; } } } inWord = false; } return NS_OK; }
NS_IMETHODIMP nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString) { if(nullptr == outString ) return NS_ERROR_NULL_POINTER; NS_ASSERTION(outString, "invalid input"); *outString = NULL; nsresult rv; PRInt32 inStringLength = NS_strlen(inString); // original input string length PRInt32 bufferLength; // allocated buffer length PRInt32 srcLength = inStringLength; PRInt32 dstLength; char *dstPtr = NULL; PRInt32 pos1, pos2; nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING // estimate and allocate the target buffer (reserve extra memory for fallback) rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength); if (NS_FAILED(rv)) return rv; bufferLength = dstLength + 512; // reserve 512 byte for fallback. dstPtr = (char *) PR_Malloc(bufferLength); if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY; for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) { // convert from unicode dstLength = bufferLength - pos2; rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength); pos1 += srcLength ? srcLength : 1; pos2 += dstLength; dstPtr[pos2] = '\0'; // break: this is usually the case (no error) OR unrecoverable error if (NS_ERROR_UENC_NOMAPPING != rv) break; // remember this happened and reset the result saveResult = rv; rv = NS_OK; // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } srcLength = inStringLength - pos1; // do the fallback if (!ATTR_NO_FALLBACK(mAttribute)) { PRUint32 unMappedChar; if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) { unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]); pos1++; } else { unMappedChar = inString[pos1-1]; } rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength); if (NS_FAILED(rv)) break; rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength); if (NS_FAILED(rv)) break; dstPtr[pos2] = '\0'; } } if (NS_SUCCEEDED(rv)) { // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } } if (NS_FAILED(rv)) { PR_FREEIF(dstPtr); return rv; } *outString = dstPtr; // set the result string // set error code so that the caller can do own fall back if (NS_ERROR_UENC_NOMAPPING == saveResult) { rv = NS_ERROR_UENC_NOMAPPING; } return rv; }
bool gfxScriptItemizer::Next(PRUint32& aRunStart, PRUint32& aRunLimit, PRInt32& aRunScript) { /* if we've fallen off the end of the text, we're done */ if (scriptLimit >= textLength) { return false; } SYNC_FIXUP(); scriptCode = MOZ_SCRIPT_COMMON; for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) { PRUint32 ch; PRInt32 sc; PRInt32 pairIndex; PRUint32 startOfChar = scriptLimit; ch = textPtr[scriptLimit]; /* * MODIFICATION for Gecko - clear the paired-character stack * when we see a space character, because we cannot trust * context outside the current "word" when doing textrun * construction */ if (ch == 0x20) { while (STACK_IS_NOT_EMPTY()) { pop(); } sc = MOZ_SCRIPT_COMMON; pairIndex = -1; } else { /* decode UTF-16 (may be surrogate pair) */ if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) { PRUint32 low = textPtr[scriptLimit + 1]; if (NS_IS_LOW_SURROGATE(low)) { ch = SURROGATE_TO_UCS4(ch, low); scriptLimit += 1; } } sc = mozilla::unicode::GetScriptCode(ch); pairIndex = getPairIndex(ch); /* * Paired character handling: * * if it's an open character, push it onto the stack. * if it's a close character, find the matching open on the * stack, and use that script code. Any non-matching open * characters above it on the stack will be poped. */ if (pairIndex >= 0) { if ((pairIndex & 1) == 0) { push(pairIndex, scriptCode); } else { PRInt32 pi = pairIndex & ~1; while (STACK_IS_NOT_EMPTY() && TOP().pairIndex != pi) { pop(); } if (STACK_IS_NOT_EMPTY()) { sc = TOP().scriptCode; } } } } if (sameScript(scriptCode, sc)) { if (scriptCode <= MOZ_SCRIPT_INHERITED && sc > MOZ_SCRIPT_INHERITED) { scriptCode = sc; fixup(scriptCode); } /* * if this character is a close paired character, * pop the matching open character from the stack */ if (pairIndex >= 0 && (pairIndex & 1) != 0) { pop(); } } else { /* * reset scriptLimit in case it was advanced during reading a * multiple-code-unit character */ scriptLimit = startOfChar; break; } } aRunStart = scriptStart; aRunLimit = scriptLimit; aRunScript = scriptCode; return true; }
void nsHTMLContentSerializer::AppendToString(const nsAString& aStr, nsAString& aOutputStr, PRBool aTranslateEntities, PRBool aIncrColumn) { if (mBodyOnly && !mInBody) { return; } if (aIncrColumn) { mColPos += aStr.Length(); } if (aTranslateEntities && !mInCDATA) { if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities | nsIDocumentEncoder::OutputEncodeLatin1Entities | nsIDocumentEncoder::OutputEncodeHTMLEntities | nsIDocumentEncoder::OutputEncodeW3CEntities)) { nsIParserService* parserService = nsContentUtils::GetParserService(); if (!parserService) { NS_ERROR("Can't get parser service"); return; } nsReadingIterator<PRUnichar> done_reading; aStr.EndReading(done_reading); // for each chunk of |aString|... PRUint32 advanceLength = 0; nsReadingIterator<PRUnichar> iter; const char **entityTable = mInAttribute ? kAttrEntities : kEntities; for (aStr.BeginReading(iter); iter != done_reading; iter.advance(PRInt32(advanceLength))) { PRUint32 fragmentLength = iter.size_forward(); PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints // replaced by a particular entity const PRUnichar* c = iter.get(); const PRUnichar* fragmentStart = c; const PRUnichar* fragmentEnd = c + fragmentLength; const char* entityText = nsnull; nsCAutoString entityReplacement; char* fullEntityText = nsnull; advanceLength = 0; // for each character in this chunk, check if it // needs to be replaced for (; c < fragmentEnd; c++, advanceLength++) { PRUnichar val = *c; if (val == kValNBSP) { entityText = kEntityNBSP; break; } else if ((val <= kGTVal) && (entityTable[val][0] != 0)) { entityText = entityTable[val]; break; } else if (val > 127 && ((val < 256 && mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) || mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) { parserService->HTMLConvertUnicodeToEntity(val, entityReplacement); if (!entityReplacement.IsEmpty()) { entityText = entityReplacement.get(); break; } } else if (val > 127 && mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities && mEntityConverter) { if (NS_IS_HIGH_SURROGATE(val) && c + 1 < fragmentEnd && NS_IS_LOW_SURROGATE(*(c + 1))) { PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c)); if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32, nsIEntityConverter::entityW3C, &fullEntityText))) { lengthReplaced = 2; break; } else { advanceLength++; } } else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val, nsIEntityConverter::entityW3C, &fullEntityText))) { lengthReplaced = 1; break; } } } aOutputStr.Append(fragmentStart, advanceLength); if (entityText) { aOutputStr.Append(PRUnichar('&')); AppendASCIItoUTF16(entityText, aOutputStr); aOutputStr.Append(PRUnichar(';')); advanceLength++; } // if it comes from nsIEntityConverter, it already has '&' and ';' else if (fullEntityText) { AppendASCIItoUTF16(fullEntityText, aOutputStr); nsMemory::Free(fullEntityText); advanceLength += lengthReplaced; } } } else { nsXMLContentSerializer::AppendToString(aStr, aOutputStr, aTranslateEntities, aIncrColumn); } return; } aOutputStr.Append(aStr); }
bool nsMathMLTokenFrame::SetTextStyle() { if (mContent->Tag() != nsGkAtoms::mi_) return false; if (!mFrames.FirstChild()) return false; // Get the text content that we enclose and its length nsAutoString data; nsContentUtils::GetNodeTextContent(mContent, false, data); PRInt32 length = data.Length(); if (!length) return false; nsAutoString fontstyle; bool isSingleCharacter = length == 1 || (length == 2 && NS_IS_HIGH_SURROGATE(data[0])); if (isSingleCharacter && nsMathMLOperators::LookupInvariantChar(data) != eMATHVARIANT_NONE) { // bug 65951 - a non-stylable character has its own intrinsic appearance fontstyle.AssignLiteral("invariant"); } else { // Attributes override the default behavior. nsAutoString value; if (!(GetAttribute(mContent, mPresentationData.mstyle, nsGkAtoms::mathvariant_, value) || GetAttribute(mContent, mPresentationData.mstyle, nsGkAtoms::fontstyle_, value))) { if (!isSingleCharacter) { fontstyle.AssignLiteral("normal"); } else if (length == 1 && // BMP !nsMathMLOperators:: TransformVariantChar(data[0], eMATHVARIANT_italic). Equals(data)) { // Transformation exists. Try to make the BMP character look like the // styled character using the style system until bug 114365 is resolved. fontstyle.AssignLiteral("italic"); } // else single character but there is no corresponding Math Alphanumeric // Symbol character: "ignore the value of the [default] mathvariant // attribute". } } // set the _moz-math-font-style attribute without notifying that we want a reflow if (fontstyle.IsEmpty()) { if (mContent->HasAttr(kNameSpaceID_None, nsGkAtoms::_moz_math_fontstyle_)) { mContent->UnsetAttr(kNameSpaceID_None, nsGkAtoms::_moz_math_fontstyle_, false); return true; } } else if (!mContent->AttrValueIs(kNameSpaceID_None, nsGkAtoms::_moz_math_fontstyle_, fontstyle, eCaseMatters)) { mContent->SetAttr(kNameSpaceID_None, nsGkAtoms::_moz_math_fontstyle_, fontstyle, false); return true; } return false; }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<bool,50> deletedCharsArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; bool mergeNeeded = false; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch, // treat "ij" digraph as a unit for capitalization eGreek // strip accent when uppercasing Greek vowels } languageSpecificCasing = eNone; const nsIAtom* lang = nsnull; bool capitalizeDutchIJ = false; bool prevIsLetter = false; PRUint32 sigmaIndex = PRUint32(-1); nsIUGenCategory::nsUGenCategory cat; GreekCasingState greekState = kStart; PRUint32 i; for (i = 0; i < length; ++i) { PRUint32 ch = str[i]; nsStyleContext* styleContext = styles[i]; PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->GetStyleText()->mTextTransform; int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->GetStyleFont()->mLanguage) { lang = styleContext->GetStyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else if (lang == nsGkAtoms::el) { languageSpecificCasing = eGreek; greekState = kStart; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = PRUint32(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = PRUint32(-1); break; } } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. cat = mozilla::unicode::GetGenCategory(ch); // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != PRUint32(-1)) { if (cat == nsIUGenCategory::kLetter) { convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = convertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = PRUint32(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = PRUint32(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eGreek) { ch = GreekUpperCase(ch, &greekState); break; } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } break; default: break; } if (ch == PRUint32(-1)) { deletedCharsArray.AppendElement(true); mergeNeeded = true; } else { deletedCharsArray.AppendElement(false); charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); ++i; deletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } } PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (mergeNeeded) { // Now merge multiple characters into one multi-glyph character as required // and deal with skipping deleted accent chars NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), "source length mismatch"); NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), "destination length mismatch"); MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), deletedCharsArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }
void nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); gfxFontStyle fontStyle = *fontGroup->GetStyle(); fontStyle.size *= 0.8; nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle); if (!smallFont) return; PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); // Create a textrun so we can check cluster-start properties nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags)); if (!inner.get()) return; nsCaseTransformTextRunFactory uppercaseFactory(nsnull, true); aTextRun->ResetGlyphRuns(); PRUint32 runStart = 0; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; enum RunCaseState { kUpperOrCaseless, // will be untouched by font-variant:small-caps kLowercase, // will be uppercased and reduced kSpecialUpper // specials: don't shrink, but apply uppercase mapping }; RunCaseState runCase = kUpperOrCaseless; // Note that this loop runs from 0 to length *inclusive*, so the last // iteration is in effect beyond the end of the input text, to give a // chance to finish the last casing run we've found. // The last iteration, when i==length, must not attempt to look at the // character position [i] or the style data for styles[i], as this would // be beyond the valid length of the textrun or its style array. for (PRUint32 i = 0; i <= length; ++i) { RunCaseState chCase = kUpperOrCaseless; // Unless we're at the end, figure out what treatment the current // character will need. if (i < length) { nsStyleContext* styleContext = styles[i]; // Characters that aren't the start of a cluster are ignored here. They // get added to whatever lowercase/non-lowercase run we're in. if (!inner->IsClusterStart(i)) { chCase = runCase; } else { if (styleContext->GetStyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) { PRUint32 ch = str[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } PRUint32 ch2 = ToUpperCase(ch); if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) { chCase = kLowercase; } else if (styleContext->GetStyleFont()->mLanguage == nsGkAtoms::el) { // In Greek, check for characters that will be modified by the // GreekUpperCase mapping - this catches accented capitals where // the accent is to be removed (bug 307039). These are handled by // a transformed child run using the full-size font. GreekCasingState state = kStart; // don't need exact context here ch2 = GreekUpperCase(ch, &state); if (ch != ch2) { chCase = kSpecialUpper; } } } else { // Don't transform the character! I.e., pretend that it's not lowercase } } } // At the end of the text, or when the current character needs different // casing treatment from the current run, finish the run-in-progress // and prepare to accumulate a new run. // Note that we do not look at any source data for offset [i] here, // as that would be invalid in the case where i==length. if ((i == length || runCase != chCase) && runStart < i) { nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; switch (runCase) { case kUpperOrCaseless: cachedChild = fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams, flags); child = cachedChild.get(); break; case kLowercase: transformedChild = uppercaseFactory.MakeTextRun(str + runStart, i - runStart, &innerParams, smallFont, flags, styleArray.Elements(), false); child = transformedChild; break; case kSpecialUpper: transformedChild = uppercaseFactory.MakeTextRun(str + runStart, i - runStart, &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild; break; } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart, "lost some break-before values?"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart); runStart = i; styleArray.Clear(); canBreakBeforeArray.Clear(); } if (i < length) { runCase = chCase; styleArray.AppendElement(styles[i]); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); } } }