nsresult TextEditRules::TruncateInsertionIfNeeded(Selection* aSelection, const nsAString* aInString, nsAString* aOutString, int32_t aMaxLength, bool* aTruncated) { if (!aSelection || !aInString || !aOutString) { return NS_ERROR_NULL_POINTER; } if (!aOutString->Assign(*aInString, mozilla::fallible)) { return NS_ERROR_OUT_OF_MEMORY; } if (aTruncated) { *aTruncated = false; } NS_ENSURE_STATE(mTextEditor); if (-1 != aMaxLength && IsPlaintextEditor() && !mTextEditor->IsIMEComposing()) { // Get the current text length. // Get the length of inString. // Get the length of the selection. // If selection is collapsed, it is length 0. // Subtract the length of the selection from the len(doc) // since we'll delete the selection on insert. // This is resultingDocLength. // Get old length of IME composing string // which will be replaced by new one. // If (resultingDocLength) is at or over max, cancel the insert // If (resultingDocLength) + (length of input) > max, // set aOutString to subset of inString so length = max int32_t docLength; nsresult rv = mTextEditor->GetTextLength(&docLength); if (NS_FAILED(rv)) { return rv; } int32_t start, end; nsContentUtils::GetSelectionInTextControl(aSelection, mTextEditor->GetRoot(), start, end); TextComposition* composition = mTextEditor->GetComposition(); int32_t oldCompStrLength = composition ? composition->String().Length() : 0; const int32_t selectionLength = end - start; const int32_t resultingDocLength = docLength - selectionLength - oldCompStrLength; if (resultingDocLength >= aMaxLength) { // This call is guaranteed to reduce the capacity of the string, so it // cannot cause an OOM. aOutString->Truncate(); if (aTruncated) { *aTruncated = true; } } else { int32_t oldLength = aOutString->Length(); if (oldLength + resultingDocLength > aMaxLength) { int32_t newLength = aMaxLength - resultingDocLength; MOZ_ASSERT(newLength > 0); char16_t newLastChar = aOutString->CharAt(newLength - 1); char16_t removingFirstChar = aOutString->CharAt(newLength); // Don't separate the string between a surrogate pair. if (NS_IS_HIGH_SURROGATE(newLastChar) && NS_IS_LOW_SURROGATE(removingFirstChar)) { newLength--; } // XXX What should we do if we're removing IVS and its preceding // character won't be removed? // This call is guaranteed to reduce the capacity of the string, so it // cannot cause an OOM. aOutString->Truncate(newLength); if (aTruncated) { *aTruncated = true; } } } } return NS_OK; }
void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray<bool,50> charsToMergeArray; nsAutoTArray<bool,50> deletedCharsArray; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; bool mergeNeeded = false; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch, // treat "ij" digraph as a unit for capitalization eGreek // strip accent when uppercasing Greek vowels } languageSpecificCasing = eNone; const nsIAtom* lang = nullptr; bool capitalizeDutchIJ = false; bool prevIsLetter = false; PRUint32 sigmaIndex = PRUint32(-1); nsIUGenCategory::nsUGenCategory cat; GreekCasingState greekState = kStart; PRUint32 i; for (i = 0; i < length; ++i) { PRUint32 ch = str[i]; nsStyleContext* styleContext = styles[i]; PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->GetStyleText()->mTextTransform; int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->GetStyleFont()->mLanguage) { lang = styleContext->GetStyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else if (lang == nsGkAtoms::el) { languageSpecificCasing = eGreek; greekState = kStart; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = PRUint32(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = PRUint32(-1); break; } } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. cat = mozilla::unicode::GetGenCategory(ch); // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != PRUint32(-1)) { if (cat == nsIUGenCategory::kLetter) { convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = convertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = PRUint32(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = PRUint32(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eGreek) { ch = GreekUpperCase(ch, &greekState); break; } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } break; default: break; } if (ch == PRUint32(-1)) { deletedCharsArray.AppendElement(true); mergeNeeded = true; } else { deletedCharsArray.AppendElement(false); charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); ++i; deletedCharsArray.AppendElement(true); // not exactly deleted, but the // trailing surrogate is skipped ++extraChars; } while (extraChars-- > 0) { mergeNeeded = true; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } } } PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (mergeNeeded) { // Now merge multiple characters into one multi-glyph character as required // and deal with skipping deleted accent chars NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), "source length mismatch"); NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), "destination length mismatch"); MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), deletedCharsArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }
static nsresult mdn_normalize(bool do_composition, bool compat, const nsAString& aSrcStr, nsAString& aToStr) { workbuf_t wb; nsresult r = NS_OK; /* * Initialize working buffer. */ workbuf_init(&wb); nsAString::const_iterator start, end; aSrcStr.BeginReading(start); aSrcStr.EndReading(end); while (start != end) { PRUint32 c; PRUnichar curChar; //assert(wb.cur == wb.last); /* * Get one character from 'from'. */ curChar= *start++; if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) { c = SURROGATE_TO_UCS4(curChar, *start); ++start; } else { c = curChar; } /* * Decompose it. */ if ((r = decompose(&wb, c, compat)) != NS_OK) break; /* * Get canonical class. */ get_class(&wb); /* * Reorder & compose. */ for (; wb.cur < wb.last; wb.cur++) { if (wb.cur == 0) { continue; } else if (wb.cclass[wb.cur] > 0) { /* * This is not a starter. Try reordering. * Note that characters up to it are * already in canonical order. */ reorder(&wb); continue; } /* * This is a starter character, and there are * some characters before it. Those characters * have been reordered properly, and * ready for composition. */ if (do_composition && wb.cclass[0] == 0) compose(&wb); /* * If CUR points to a starter character, * then process of characters before CUR are * already finished, because any further * reordering/composition for them are blocked * by the starter CUR points. */ if (wb.cur > 0 && wb.cclass[wb.cur] == 0) { /* Flush everything before CUR. */ r = flush_before_cur(&wb, aToStr); if (r != NS_OK) break; } } } if (r == NS_OK) { if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) { /* * There is some characters left in WB. * They are ordered, but not composed yet. * Now CUR points just after the last character in WB, * and since compose() tries to compose characters * between top and CUR inclusive, we must make CUR * one character back during compose(). */ wb.cur--; compose(&wb); wb.cur++; } /* * Call this even when WB.CUR == 0, to make TO * NUL-terminated. */ r = flush_before_cur(&wb, aToStr); } workbuf_free(&wb); return (r); }
// default SetupClusterBoundaries, based on Unicode properties; // platform subclasses may override if they wish void gfxPlatform::SetupClusterBoundaries(gfxTextRun *aTextRun, const PRUnichar *aString) { if (aTextRun->GetFlags() & gfxTextRunFactory::TEXT_IS_8BIT) { // 8-bit text doesn't have clusters. // XXX is this true in all languages??? // behdad: don't think so. Czech for example IIRC has a // 'ch' grapheme. // jfkthame: but that's not expected to behave as a grapheme cluster // for selection/editing/etc. return; } gfxTextRun::CompressedGlyph extendCluster; extendCluster.SetComplex(PR_FALSE, PR_TRUE, 0); PRUint32 i, length = aTextRun->GetLength(); gfxUnicodeProperties::HSType hangulState = gfxUnicodeProperties::HST_NONE; for (i = 0; i < length; ++i) { PRBool surrogatePair = PR_FALSE; PRUint32 ch = aString[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(aString[i+1])) { ch = SURROGATE_TO_UCS4(ch, aString[i+1]); surrogatePair = PR_TRUE; } PRUint8 category = gfxUnicodeProperties::GetGeneralCategory(ch); gfxUnicodeProperties::HSType hangulType = gfxUnicodeProperties::HST_NONE; // combining marks extend the cluster if ((category >= HB_CATEGORY_COMBINING_MARK && category <= HB_CATEGORY_NON_SPACING_MARK) || (ch >= 0x200c && ch <= 0x200d) || // ZWJ, ZWNJ (ch >= 0xff9e && ch <= 0xff9f)) // katakana sound marks { if (i > 0) { aTextRun->SetGlyphs(i, extendCluster, nsnull); } } else if (category == HB_CATEGORY_OTHER_LETTER) { // handle special cases in Letter_Other category #if 0 // Currently disabled. This would follow the UAX#29 specification // for extended grapheme clusters, but this is not favored by // Thai users, at least for editing behavior. // See discussion of equivalent Pango issue in bug 474068 and // upstream at https://bugzilla.gnome.org/show_bug.cgi?id=576156. if ((ch & ~0xff) == 0x0e00) { // specific Thai & Lao (U+0Exx) chars that extend the cluster if ( ch == 0x0e30 || (ch >= 0x0e32 && ch <= 0x0e33) || ch == 0x0e45 || ch == 0x0eb0 || (ch >= 0x0eb2 && ch <= 0x0eb3)) { if (i > 0) { aTextRun->SetGlyphs(i, extendCluster, nsnull); } } else if ((ch >= 0x0e40 && ch <= 0x0e44) || (ch >= 0x0ec0 && ch <= 0x0ec4)) { // characters that are prepended to the following cluster if (i < length - 1) { aTextRun->SetGlyphs(i+1, extendCluster, nsnull); } } } else #endif if ((ch & ~0xff) == 0x1100 || (ch >= 0xa960 && ch <= 0xa97f) || (ch >= 0xac00 && ch <= 0xd7ff)) { // no break within Hangul syllables hangulType = gfxUnicodeProperties::GetHangulSyllableType(ch); switch (hangulType) { case gfxUnicodeProperties::HST_L: case gfxUnicodeProperties::HST_LV: case gfxUnicodeProperties::HST_LVT: if (hangulState == gfxUnicodeProperties::HST_L) { aTextRun->SetGlyphs(i, extendCluster, nsnull); } break; case gfxUnicodeProperties::HST_V: if ( (hangulState != gfxUnicodeProperties::HST_NONE) && !(hangulState & gfxUnicodeProperties::HST_T)) { aTextRun->SetGlyphs(i, extendCluster, nsnull); } break; case gfxUnicodeProperties::HST_T: if (hangulState & (gfxUnicodeProperties::HST_V | gfxUnicodeProperties::HST_T)) { aTextRun->SetGlyphs(i, extendCluster, nsnull); } break; default: break; } } } if (surrogatePair) { ++i; aTextRun->SetGlyphs(i, extendCluster, nsnull); } hangulState = hangulType; } }
void nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); gfxFontStyle fontStyle = *fontGroup->GetStyle(); fontStyle.size *= 0.8; nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle); if (!smallFont) return; PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); // Create a textrun so we can check cluster-start properties nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags)); if (!inner.get()) return; nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true); aTextRun->ResetGlyphRuns(); PRUint32 runStart = 0; nsAutoTArray<nsStyleContext*,50> styleArray; nsAutoTArray<PRUint8,50> canBreakBeforeArray; enum RunCaseState { kUpperOrCaseless, // will be untouched by font-variant:small-caps kLowercase, // will be uppercased and reduced kSpecialUpper // specials: don't shrink, but apply uppercase mapping }; RunCaseState runCase = kUpperOrCaseless; // Note that this loop runs from 0 to length *inclusive*, so the last // iteration is in effect beyond the end of the input text, to give a // chance to finish the last casing run we've found. // The last iteration, when i==length, must not attempt to look at the // character position [i] or the style data for styles[i], as this would // be beyond the valid length of the textrun or its style array. for (PRUint32 i = 0; i <= length; ++i) { RunCaseState chCase = kUpperOrCaseless; // Unless we're at the end, figure out what treatment the current // character will need. if (i < length) { nsStyleContext* styleContext = styles[i]; // Characters that aren't the start of a cluster are ignored here. They // get added to whatever lowercase/non-lowercase run we're in. if (!inner->IsClusterStart(i)) { chCase = runCase; } else { if (styleContext->GetStyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) { PRUint32 ch = str[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } PRUint32 ch2 = ToUpperCase(ch); if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) { chCase = kLowercase; } else if (styleContext->GetStyleFont()->mLanguage == nsGkAtoms::el) { // In Greek, check for characters that will be modified by the // GreekUpperCase mapping - this catches accented capitals where // the accent is to be removed (bug 307039). These are handled by // a transformed child run using the full-size font. GreekCasingState state = kStart; // don't need exact context here ch2 = GreekUpperCase(ch, &state); if (ch != ch2) { chCase = kSpecialUpper; } } } else { // Don't transform the character! I.e., pretend that it's not lowercase } } } // At the end of the text, or when the current character needs different // casing treatment from the current run, finish the run-in-progress // and prepare to accumulate a new run. // Note that we do not look at any source data for offset [i] here, // as that would be invalid in the case where i==length. if ((i == length || runCase != chCase) && runStart < i) { nsAutoPtr<nsTransformedTextRun> transformedChild; nsAutoPtr<gfxTextRun> cachedChild; gfxTextRun* child; switch (runCase) { case kUpperOrCaseless: cachedChild = fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams, flags); child = cachedChild.get(); break; case kLowercase: transformedChild = uppercaseFactory.MakeTextRun(str + runStart, i - runStart, &innerParams, smallFont, flags, styleArray.Elements(), false); child = transformedChild; break; case kSpecialUpper: transformedChild = uppercaseFactory.MakeTextRun(str + runStart, i - runStart, &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild; break; } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart, "lost some break-before values?"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart); runStart = i; styleArray.Clear(); canBreakBeforeArray.Clear(); } if (i < length) { runCase = chCase; styleArray.AppendElement(styles[i]); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); } } }
NS_IMETHODIMP nsIndexedToHTML::OnIndexAvailable(nsIRequest *aRequest, nsISupports *aCtxt, nsIDirIndex *aIndex) { nsresult rv; if (!aIndex) return NS_ERROR_NULL_POINTER; nsString pushBuffer; pushBuffer.AppendLiteral("<tr"); nsXPIDLString description; aIndex->GetDescription(getter_Copies(description)); if (description.First() == char16_t('.')) pushBuffer.AppendLiteral(" class=\"hidden-object\""); pushBuffer.AppendLiteral(">\n <td sortable-data=\""); // The sort key is the name of the item, prepended by either 0, 1 or 2 // in order to group items. uint32_t type; aIndex->GetType(&type); switch (type) { case nsIDirIndex::TYPE_SYMLINK: pushBuffer.AppendInt(0); break; case nsIDirIndex::TYPE_DIRECTORY: pushBuffer.AppendInt(1); break; case nsIDirIndex::TYPE_FILE: case nsIDirIndex::TYPE_UNKNOWN: pushBuffer.AppendInt(2); break; } char16_t* escaped = nsEscapeHTML2(description.get(), description.Length()); pushBuffer.Append(escaped); pushBuffer.AppendLiteral("\"><a class=\""); switch (type) { case nsIDirIndex::TYPE_DIRECTORY: pushBuffer.AppendLiteral("dir"); break; case nsIDirIndex::TYPE_SYMLINK: pushBuffer.AppendLiteral("symlink"); break; case nsIDirIndex::TYPE_FILE: case nsIDirIndex::TYPE_UNKNOWN: pushBuffer.AppendLiteral("file"); break; } pushBuffer.AppendLiteral("\""); // Truncate long names to not stretch the table //XXX this should be left to the stylesheet (bug 391471) nsString escapedShort; if (description.Length() > 71) { nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); nsCOMPtr<nsIURI> uri; rv = channel->GetURI(getter_AddRefs(uri)); if (NS_FAILED(rv)) return rv; //XXX this potentially truncates after a combining char (bug 391472) nsXPIDLString descriptionAffix; descriptionAffix.Assign(description); descriptionAffix.Cut(0, descriptionAffix.Length() - 25); if (NS_IS_LOW_SURROGATE(descriptionAffix.First())) descriptionAffix.Cut(0, 1); description.Truncate(std::min<uint32_t>(71, description.Length() - 28)); if (NS_IS_HIGH_SURROGATE(description.Last())) description.Truncate(description.Length() - 1); escapedShort.Adopt(nsEscapeHTML2(description.get(), description.Length())); escapedShort.Append(mEscapedEllipsis); // add ZERO WIDTH SPACE (U+200B) for wrapping escapedShort.AppendLiteral("​"); nsString tmp; tmp.Adopt(nsEscapeHTML2(descriptionAffix.get(), descriptionAffix.Length())); escapedShort.Append(tmp); pushBuffer.AppendLiteral(" title=\""); pushBuffer.Append(escaped); pushBuffer.AppendLiteral("\""); } if (escapedShort.IsEmpty()) escapedShort.Assign(escaped); nsMemory::Free(escaped); pushBuffer.AppendLiteral(" href=\""); nsXPIDLCString loc; aIndex->GetLocation(getter_Copies(loc)); nsXPIDLCString encoding; rv = mParser->GetEncoding(getter_Copies(encoding)); if (NS_FAILED(rv)) return rv; // Don't byte-to-Unicode conversion here, it is lossy. loc.SetLength(nsUnescapeCount(loc.BeginWriting())); // need to escape links nsAutoCString locEscaped; // Adding trailing slash helps to recognize whether the URL points to a file // or a directory (bug #214405). if ((type == nsIDirIndex::TYPE_DIRECTORY) && (loc.Last() != '/')) { loc.Append('/'); } // now minimally re-escape the location... uint32_t escFlags; // for some protocols, we expect the location to be absolute. // if so, and if the location indeed appears to be a valid URI, then go // ahead and treat it like one. if (mExpectAbsLoc && NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) { // escape as absolute escFlags = esc_Forced | esc_OnlyASCII | esc_AlwaysCopy | esc_Minimal; } else { // escape as relative // esc_Directory is needed because directories have a trailing slash. // Without it, the trailing '/' will be escaped, and links from within // that directory will be incorrect escFlags = esc_Forced | esc_OnlyASCII | esc_AlwaysCopy | esc_FileBaseName | esc_Colon | esc_Directory; } NS_EscapeURL(loc.get(), loc.Length(), escFlags, locEscaped); // esc_Directory does not escape the semicolons, so if a filename // contains semicolons we need to manually escape them. // This replacement should be removed in bug #473280 locEscaped.ReplaceSubstring(";", "%3b"); nsAutoString utf16URI; if (encoding.EqualsLiteral("UTF-8")) { // Try to convert non-ASCII bytes to Unicode using UTF-8 decoder. nsCOMPtr<nsIUnicodeDecoder> decoder = mozilla::dom::EncodingUtils::DecoderForEncoding("UTF-8"); decoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); int32_t len = locEscaped.Length(); int32_t outlen = 0; rv = decoder->GetMaxLength(locEscaped.get(), len, &outlen); if (NS_FAILED(rv)) { return rv; } nsAutoArrayPtr<char16_t> outbuf(new char16_t[outlen]); rv = decoder->Convert(locEscaped.get(), &len, outbuf, &outlen); // Use the result only if the sequence is valid as UTF-8. if (rv == NS_OK) { utf16URI.Append(outbuf, outlen); } } if (utf16URI.IsEmpty()) { // Escape all non-ASCII bytes to preserve the raw value. nsAutoCString outstr; NS_EscapeURL(locEscaped, esc_AlwaysCopy | esc_OnlyNonASCII, outstr); CopyASCIItoUTF16(outstr, utf16URI); } nsString htmlEscapedURL; htmlEscapedURL.Adopt(nsEscapeHTML2(utf16URI.get(), utf16URI.Length())); pushBuffer.Append(htmlEscapedURL); pushBuffer.AppendLiteral("\">"); if (type == nsIDirIndex::TYPE_FILE || type == nsIDirIndex::TYPE_UNKNOWN) { pushBuffer.AppendLiteral("<img src=\"moz-icon://"); int32_t lastDot = locEscaped.RFindChar('.'); if (lastDot != kNotFound) { locEscaped.Cut(0, lastDot); NS_ConvertUTF8toUTF16 utf16LocEscaped(locEscaped); nsString htmlFileExt; htmlFileExt.Adopt(nsEscapeHTML2(utf16LocEscaped.get(), utf16LocEscaped.Length())); pushBuffer.Append(htmlFileExt); } else { pushBuffer.AppendLiteral("unknown"); } pushBuffer.AppendLiteral("?size=16\" alt=\""); nsXPIDLString altText; rv = mBundle->GetStringFromName(MOZ_UTF16("DirFileLabel"), getter_Copies(altText)); if (NS_FAILED(rv)) return rv; AppendNonAsciiToNCR(altText, pushBuffer); pushBuffer.AppendLiteral("\">"); } pushBuffer.Append(escapedShort); pushBuffer.AppendLiteral("</a></td>\n <td"); if (type == nsIDirIndex::TYPE_DIRECTORY || type == nsIDirIndex::TYPE_SYMLINK) { pushBuffer.AppendLiteral(">"); } else { int64_t size; aIndex->GetSize(&size); if (uint64_t(size) != UINT64_MAX) { pushBuffer.AppendLiteral(" sortable-data=\""); pushBuffer.AppendInt(size); pushBuffer.AppendLiteral("\">"); nsAutoString sizeString; FormatSizeString(size, sizeString); pushBuffer.Append(sizeString); } else { pushBuffer.AppendLiteral(">"); } } pushBuffer.AppendLiteral("</td>\n <td"); PRTime t; aIndex->GetLastModified(&t); if (t == -1) { pushBuffer.AppendLiteral("></td>\n <td>"); } else { pushBuffer.AppendLiteral(" sortable-data=\""); pushBuffer.AppendInt(static_cast<int64_t>(t)); pushBuffer.AppendLiteral("\">"); nsAutoString formatted; mDateTime->FormatPRTime(nullptr, kDateFormatShort, kTimeFormatNone, t, formatted); AppendNonAsciiToNCR(formatted, pushBuffer); pushBuffer.AppendLiteral("</td>\n <td>"); mDateTime->FormatPRTime(nullptr, kDateFormatNone, kTimeFormatSeconds, t, formatted); // use NCR to show date in any doc charset AppendNonAsciiToNCR(formatted, pushBuffer); } pushBuffer.AppendLiteral("</td>\n</tr>"); return FormatInputStream(aRequest, aCtxt, pushBuffer); }
NS_IMETHODIMP nsIndexedToHTML::OnIndexAvailable(nsIRequest *aRequest, nsISupports *aCtxt, nsIDirIndex *aIndex) { nsresult rv; if (!aIndex) return NS_ERROR_NULL_POINTER; nsString pushBuffer; pushBuffer.AppendLiteral("<tr"); nsXPIDLString description; aIndex->GetDescription(getter_Copies(description)); if (description.First() == PRUnichar('.')) pushBuffer.AppendLiteral(" class=\"hidden-object\""); pushBuffer.AppendLiteral(">\n <td sortable-data=\""); // The sort key is the name of the item, prepended by either 0, 1 or 2 // in order to group items. uint32_t type; aIndex->GetType(&type); switch (type) { case nsIDirIndex::TYPE_SYMLINK: pushBuffer.AppendInt(0); break; case nsIDirIndex::TYPE_DIRECTORY: pushBuffer.AppendInt(1); break; case nsIDirIndex::TYPE_FILE: case nsIDirIndex::TYPE_UNKNOWN: pushBuffer.AppendInt(2); break; } PRUnichar* escaped = nsEscapeHTML2(description.get(), description.Length()); pushBuffer.Append(escaped); pushBuffer.AppendLiteral("\"><a class=\""); switch (type) { case nsIDirIndex::TYPE_DIRECTORY: pushBuffer.AppendLiteral("dir"); break; case nsIDirIndex::TYPE_SYMLINK: pushBuffer.AppendLiteral("symlink"); break; case nsIDirIndex::TYPE_FILE: case nsIDirIndex::TYPE_UNKNOWN: pushBuffer.AppendLiteral("file"); break; } pushBuffer.AppendLiteral("\""); // Truncate long names to not stretch the table //XXX this should be left to the stylesheet (bug 391471) nsString escapedShort; if (description.Length() > 71) { nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); nsCOMPtr<nsIURI> uri; rv = channel->GetURI(getter_AddRefs(uri)); if (NS_FAILED(rv)) return rv; //XXX this potentially truncates after a combining char (bug 391472) nsXPIDLString descriptionAffix; descriptionAffix.Assign(description); descriptionAffix.Cut(0, descriptionAffix.Length() - 25); if (NS_IS_LOW_SURROGATE(descriptionAffix.First())) descriptionAffix.Cut(0, 1); description.Truncate(NS_MIN<uint32_t>(71, description.Length() - 28)); if (NS_IS_HIGH_SURROGATE(description.Last())) description.Truncate(description.Length() - 1); escapedShort.Adopt(nsEscapeHTML2(description.get(), description.Length())); escapedShort.Append(mEscapedEllipsis); // add ZERO WIDTH SPACE (U+200B) for wrapping escapedShort.AppendLiteral("​"); nsString tmp; tmp.Adopt(nsEscapeHTML2(descriptionAffix.get(), descriptionAffix.Length())); escapedShort.Append(tmp); pushBuffer.AppendLiteral(" title=\""); pushBuffer.Append(escaped); pushBuffer.AppendLiteral("\""); } if (escapedShort.IsEmpty()) escapedShort.Assign(escaped); nsMemory::Free(escaped); pushBuffer.AppendLiteral(" href=\""); nsXPIDLCString loc; aIndex->GetLocation(getter_Copies(loc)); if (!mTextToSubURI) { mTextToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv); if (NS_FAILED(rv)) return rv; } nsXPIDLCString encoding; rv = mParser->GetEncoding(getter_Copies(encoding)); if (NS_FAILED(rv)) return rv; nsXPIDLString unEscapeSpec; rv = mTextToSubURI->UnEscapeAndConvert(encoding, loc, getter_Copies(unEscapeSpec)); if (NS_FAILED(rv)) return rv; // need to escape links nsAutoCString escapeBuf; NS_ConvertUTF16toUTF8 utf8UnEscapeSpec(unEscapeSpec); // Adding trailing slash helps to recognize whether the URL points to a file // or a directory (bug #214405). if ((type == nsIDirIndex::TYPE_DIRECTORY) && (utf8UnEscapeSpec.Last() != '/')) { utf8UnEscapeSpec.Append('/'); } // now minimally re-escape the location... uint32_t escFlags; // for some protocols, we expect the location to be absolute. // if so, and if the location indeed appears to be a valid URI, then go // ahead and treat it like one. if (mExpectAbsLoc && NS_SUCCEEDED(net_ExtractURLScheme(utf8UnEscapeSpec, nullptr, nullptr, nullptr))) { // escape as absolute escFlags = esc_Forced | esc_OnlyASCII | esc_AlwaysCopy | esc_Minimal; } else { // escape as relative // esc_Directory is needed because directories have a trailing slash. // Without it, the trailing '/' will be escaped, and links from within // that directory will be incorrect escFlags = esc_Forced | esc_OnlyASCII | esc_AlwaysCopy | esc_FileBaseName | esc_Colon | esc_Directory; } NS_EscapeURL(utf8UnEscapeSpec.get(), utf8UnEscapeSpec.Length(), escFlags, escapeBuf); // esc_Directory does not escape the semicolons, so if a filename // contains semicolons we need to manually escape them. // This replacement should be removed in bug #473280 escapeBuf.ReplaceSubstring(";", "%3b"); NS_ConvertUTF8toUTF16 utf16URI(escapeBuf); nsString htmlEscapedURL; htmlEscapedURL.Adopt(nsEscapeHTML2(utf16URI.get(), utf16URI.Length())); pushBuffer.Append(htmlEscapedURL); pushBuffer.AppendLiteral("\">"); if (type == nsIDirIndex::TYPE_FILE || type == nsIDirIndex::TYPE_UNKNOWN) { pushBuffer.AppendLiteral("<img src=\"moz-icon://"); int32_t lastDot = escapeBuf.RFindChar('.'); if (lastDot != kNotFound) { escapeBuf.Cut(0, lastDot); NS_ConvertUTF8toUTF16 utf16EscapeBuf(escapeBuf); nsString htmlFileExt; htmlFileExt.Adopt(nsEscapeHTML2(utf16EscapeBuf.get(), utf16EscapeBuf.Length())); pushBuffer.Append(htmlFileExt); } else { pushBuffer.AppendLiteral("unknown"); } pushBuffer.AppendLiteral("?size=16\" alt=\""); nsXPIDLString altText; rv = mBundle->GetStringFromName(NS_LITERAL_STRING("DirFileLabel").get(), getter_Copies(altText)); if (NS_FAILED(rv)) return rv; AppendNonAsciiToNCR(altText, pushBuffer); pushBuffer.AppendLiteral("\">"); } pushBuffer.Append(escapedShort); pushBuffer.AppendLiteral("</a></td>\n <td"); if (type == nsIDirIndex::TYPE_DIRECTORY || type == nsIDirIndex::TYPE_SYMLINK) { pushBuffer.AppendLiteral(">"); } else { int64_t size; aIndex->GetSize(&size); if (uint64_t(size) != LL_MAXUINT) { pushBuffer.AppendLiteral(" sortable-data=\""); pushBuffer.AppendInt(size); pushBuffer.AppendLiteral("\">"); nsAutoString sizeString; FormatSizeString(size, sizeString); pushBuffer.Append(sizeString); } else { pushBuffer.AppendLiteral(">"); } } pushBuffer.AppendLiteral("</td>\n <td"); PRTime t; aIndex->GetLastModified(&t); if (t == -1) { pushBuffer.AppendLiteral("></td>\n <td>"); } else { pushBuffer.AppendLiteral(" sortable-data=\""); pushBuffer.AppendInt(static_cast<int64_t>(t)); pushBuffer.AppendLiteral("\">"); nsAutoString formatted; mDateTime->FormatPRTime(nullptr, kDateFormatShort, kTimeFormatNone, t, formatted); AppendNonAsciiToNCR(formatted, pushBuffer); pushBuffer.AppendLiteral("</td>\n <td>"); mDateTime->FormatPRTime(nullptr, kDateFormatNone, kTimeFormatSeconds, t, formatted); // use NCR to show date in any doc charset AppendNonAsciiToNCR(formatted, pushBuffer); } pushBuffer.AppendLiteral("</td>\n</tr>"); return FormatInputStream(aRequest, aCtxt, pushBuffer); }
NS_IMETHODIMP nsLocalFile::CreateUnique(uint32_t aType, uint32_t aAttributes) { nsresult rv; bool longName; #ifdef XP_WIN nsAutoString pathName, leafName, rootName, suffix; rv = GetPath(pathName); #else nsAutoCString pathName, leafName, rootName, suffix; rv = GetNativePath(pathName); #endif if (NS_FAILED(rv)) { return rv; } longName = (pathName.Length() + kMaxSequenceNumberLength > kMaxFilenameLength); if (!longName) { rv = Create(aType, aAttributes); if (rv != NS_ERROR_FILE_ALREADY_EXISTS) { return rv; } } #ifdef XP_WIN rv = GetLeafName(leafName); if (NS_FAILED(rv)) { return rv; } const int32_t lastDot = leafName.RFindChar(char16_t('.')); #else rv = GetNativeLeafName(leafName); if (NS_FAILED(rv)) { return rv; } const int32_t lastDot = leafName.RFindChar('.'); #endif if (lastDot == kNotFound) { rootName = leafName; } else { suffix = Substring(leafName, lastDot); // include '.' rootName = Substring(leafName, 0, lastDot); // strip suffix and dot } if (longName) { int32_t maxRootLength = (kMaxFilenameLength - (pathName.Length() - leafName.Length()) - suffix.Length() - kMaxSequenceNumberLength); // We cannot create an item inside a directory whose name is too long. // Also, ensure that at least one character remains after we truncate // the root name, as we don't want to end up with an empty leaf name. if (maxRootLength < 2) { return NS_ERROR_FILE_UNRECOGNIZED_PATH; } #ifdef XP_WIN // ensure that we don't cut the name in mid-UTF16-character rootName.SetLength(NS_IS_LOW_SURROGATE(rootName[maxRootLength]) ? maxRootLength - 1 : maxRootLength); SetLeafName(rootName + suffix); #else if (NS_IsNativeUTF8()) { // ensure that we don't cut the name in mid-UTF8-character // (assume the name is valid UTF8 to begin with) while (UTF8traits::isInSeq(rootName[maxRootLength])) { --maxRootLength; } // Another check to avoid ending up with an empty leaf name. if (maxRootLength == 0 && suffix.IsEmpty()) { return NS_ERROR_FILE_UNRECOGNIZED_PATH; } } rootName.SetLength(maxRootLength); SetNativeLeafName(rootName + suffix); #endif nsresult rvCreate = Create(aType, aAttributes); if (rvCreate != NS_ERROR_FILE_ALREADY_EXISTS) { return rvCreate; } } for (int indx = 1; indx < 10000; ++indx) { // start with "Picture-1.jpg" after "Picture.jpg" exists #ifdef XP_WIN SetLeafName(rootName + NS_ConvertASCIItoUTF16(nsPrintfCString("-%d", indx)) + suffix); #else SetNativeLeafName(rootName + nsPrintfCString("-%d", indx) + suffix); #endif rv = Create(aType, aAttributes); if (NS_SUCCEEDED(rv) || rv != NS_ERROR_FILE_ALREADY_EXISTS) { return rv; } } // The disk is full, sort of return NS_ERROR_FILE_TOO_BIG; }
void nsXHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr, nsAString& aOutputStr) { if (mBodyOnly && !mInBody) { return; } if (mDisableEntityEncoding) { aOutputStr.Append(aStr); return; } if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities | nsIDocumentEncoder::OutputEncodeLatin1Entities | nsIDocumentEncoder::OutputEncodeHTMLEntities | nsIDocumentEncoder::OutputEncodeW3CEntities)) { nsIParserService* parserService = nsContentUtils::GetParserService(); if (!parserService) { NS_ERROR("Can't get parser service"); return; } nsReadingIterator<PRUnichar> done_reading; aStr.EndReading(done_reading); // for each chunk of |aString|... PRUint32 advanceLength = 0; nsReadingIterator<PRUnichar> iter; const char **entityTable = mInAttribute ? kAttrEntities : kEntities; for (aStr.BeginReading(iter); iter != done_reading; iter.advance(PRInt32(advanceLength))) { PRUint32 fragmentLength = iter.size_forward(); PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints // replaced by a particular entity const PRUnichar* c = iter.get(); const PRUnichar* fragmentStart = c; const PRUnichar* fragmentEnd = c + fragmentLength; const char* entityText = nsnull; nsCAutoString entityReplacement; char* fullEntityText = nsnull; advanceLength = 0; // for each character in this chunk, check if it // needs to be replaced for (; c < fragmentEnd; c++, advanceLength++) { PRUnichar val = *c; if (val == kValNBSP) { entityText = kEntityNBSP; break; } else if ((val <= kGTVal) && (entityTable[val][0] != 0)) { entityText = entityTable[val]; break; } else if (val > 127 && ((val < 256 && mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) || mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) { parserService->HTMLConvertUnicodeToEntity(val, entityReplacement); if (!entityReplacement.IsEmpty()) { entityText = entityReplacement.get(); break; } } else if (val > 127 && mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities && mEntityConverter) { if (NS_IS_HIGH_SURROGATE(val) && c + 1 < fragmentEnd && NS_IS_LOW_SURROGATE(*(c + 1))) { PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c)); if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32, nsIEntityConverter::entityW3C, &fullEntityText))) { lengthReplaced = 2; break; } else { advanceLength++; } } else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val, nsIEntityConverter::entityW3C, &fullEntityText))) { lengthReplaced = 1; break; } } } aOutputStr.Append(fragmentStart, advanceLength); if (entityText) { aOutputStr.Append(PRUnichar('&')); AppendASCIItoUTF16(entityText, aOutputStr); aOutputStr.Append(PRUnichar(';')); advanceLength++; } // if it comes from nsIEntityConverter, it already has '&' and ';' else if (fullEntityText) { AppendASCIItoUTF16(fullEntityText, aOutputStr); nsMemory::Free(fullEntityText); advanceLength += lengthReplaced; } } } else { nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr); } }
nsresult nsHyphenator::Hyphenate(const nsAString& aString, nsTArray<bool>& aHyphens) { if (!aHyphens.SetLength(aString.Length())) { return NS_ERROR_OUT_OF_MEMORY; } memset(aHyphens.Elements(), PR_FALSE, aHyphens.Length()); bool inWord = false; PRUint32 wordStart = 0, wordLimit = 0; PRUint32 chLen; for (PRUint32 i = 0; i < aString.Length(); i += chLen) { PRUint32 ch = aString[i]; chLen = 1; if (NS_IS_HIGH_SURROGATE(ch)) { if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) { ch = SURROGATE_TO_UCS4(ch, aString[i+1]); chLen = 2; } else { NS_WARNING("unpaired surrogate found during hyphenation"); } } nsIUGenCategory::nsUGenCategory cat = mCategories->Get(ch); if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) { if (!inWord) { inWord = PR_TRUE; wordStart = i; } wordLimit = i + chLen; if (i + chLen < aString.Length()) { continue; } } if (inWord) { const PRUnichar *begin = aString.BeginReading(); NS_ConvertUTF16toUTF8 utf8(begin + wordStart, wordLimit - wordStart); nsAutoTArray<char,200> utf8hyphens; utf8hyphens.SetLength(utf8.Length() + 5); char **rep = nsnull; int *pos = nsnull; int *cut = nsnull; int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict, utf8.BeginReading(), utf8.Length(), utf8hyphens.Elements(), nsnull, &rep, &pos, &cut); if (!err) { // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer // from utf8 code unit indexing (which would match the utf8 input // string directly) to Unicode character indexing. // We then need to convert this to utf16 code unit offsets for Gecko. const char *hyphPtr = utf8hyphens.Elements(); const PRUnichar *cur = begin + wordStart; const PRUnichar *end = begin + wordLimit; while (cur < end) { if (*hyphPtr & 0x01) { aHyphens[cur - begin] = PR_TRUE; } cur++; if (cur < end && NS_IS_LOW_SURROGATE(*cur) && NS_IS_HIGH_SURROGATE(*(cur-1))) { cur++; } hyphPtr++; } } } inWord = PR_FALSE; } return NS_OK; }
NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff( const PRUnichar * aSrc, PRInt32 * aSrcLength, char * aDest, PRInt32 * aDestLength) { PRInt32 iSrcLength = 0; PRInt32 iDestLength = 0; PRUnichar unicode; nsresult res = NS_OK; while (iSrcLength < *aSrcLength ) { unicode = *aSrc; //if unicode's hi byte has something, it is not ASCII, must be a GB if(IS_ASCII(unicode)) { // this is an ASCII *aDest = CAST_UNICHAR_TO_CHAR(*aSrc); aDest++; // increment 1 byte iDestLength +=1; } else { char byte1, byte2; if(mUtil.UnicodeToGBKChar( unicode, PR_FALSE, &byte1, &byte2)) { // make sure we still have 2 bytes for output first if(iDestLength+2 > *aDestLength) { res = NS_OK_UENC_MOREOUTPUT; break; } aDest[0] = byte1; aDest[1] = byte2; aDest += 2; // increment 2 bytes iDestLength +=2; } else { PRInt32 aOutLen = 2; // make sure we still have 2 bytes for output first if(iDestLength+2 > *aDestLength) { res = NS_OK_UENC_MOREOUTPUT; break; } // we cannot map in the common mapping. Let's try to // call the delegated 2 byte converter for the gbk or gb18030 // unique 2 byte mapping if(TryExtensionEncoder(unicode, aDest, &aOutLen)) { iDestLength += aOutLen; aDest += aOutLen; } else { // make sure we still have 4 bytes for output first if(iDestLength+4 > *aDestLength) { res = NS_OK_UENC_MOREOUTPUT; break; } // we still cannot map. Let's try to // call the delegated GB18030 4 byte converter aOutLen = 4; if( NS_IS_HIGH_SURROGATE(unicode) ) { if((iSrcLength+1) < *aSrcLength ) { if(EncodeSurrogate(aSrc[0],aSrc[1], aDest)) { // since we got a surrogate pair, we need to increment src. iSrcLength++ ; aSrc++; iDestLength += aOutLen; aDest += aOutLen; } else { // only get a high surrogate, but not a low surrogate res = NS_ERROR_UENC_NOMAPPING; iSrcLength++; // include length of the unmapped character break; } } else { mSurrogateHigh = aSrc[0]; break; // this will go to afterwhileloop } } else { if( NS_IS_LOW_SURROGATE(unicode) ) { if(NS_IS_HIGH_SURROGATE(mSurrogateHigh)) { if(EncodeSurrogate(mSurrogateHigh, aSrc[0], aDest)) { iDestLength += aOutLen; aDest += aOutLen; } else { // only get a high surrogate, but not a low surrogate res = NS_ERROR_UENC_NOMAPPING; iSrcLength++; // include length of the unmapped character break; } } else { // only get a low surrogate, but not a low surrogate res = NS_ERROR_UENC_NOMAPPING; iSrcLength++; // include length of the unmapped character break; } } else { if(Try4BytesEncoder(unicode, aDest, &aOutLen)) { NS_ASSERTION((aOutLen == 4), "we should always generate 4 bytes here"); iDestLength += aOutLen; aDest += aOutLen; } else { res = NS_ERROR_UENC_NOMAPPING; iSrcLength++; // include length of the unmapped character break; } } } } } } iSrcLength++ ; // Each unicode char just count as one in PRUnichar string; mSurrogateHigh = 0; aSrc++; if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength) ) { res = NS_OK_UENC_MOREOUTPUT; break; } } //afterwhileloop: *aDestLength = iDestLength; *aSrcLength = iSrcLength; return res; }
NS_IMETHODIMP nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString) { NS_ENSURE_ARG_POINTER(outString); *outString = nullptr; nsresult rv; int32_t inStringLength = NS_strlen(inString); // original input string length int32_t bufferLength; // allocated buffer length int32_t srcLength = inStringLength; int32_t dstLength; int32_t pos1, pos2; nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING // estimate and allocate the target buffer (reserve extra memory for fallback) rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength); if (NS_FAILED(rv)) return rv; bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback // + 1 is for the terminating NUL -- we don't add that to bufferLength so that // we can always write dstPtr[pos2] = '\0' even when the encoder filled the // buffer. char *dstPtr = (char *) PR_Malloc(bufferLength + 1); if (!dstPtr) { return NS_ERROR_OUT_OF_MEMORY; } for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) { // convert from unicode dstLength = bufferLength - pos2; NS_ASSERTION(dstLength >= 0, "out of bounds write"); rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength); pos1 += srcLength ? srcLength : 1; pos2 += dstLength; dstPtr[pos2] = '\0'; // break: this is usually the case (no error) OR unrecoverable error if (NS_ERROR_UENC_NOMAPPING != rv) break; // remember this happened and reset the result saveResult = rv; rv = NS_OK; // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } srcLength = inStringLength - pos1; // do the fallback if (!ATTR_NO_FALLBACK(mAttribute)) { uint32_t unMappedChar; if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) { unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]); pos1++; } else { unMappedChar = inString[pos1-1]; } rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength); if (NS_FAILED(rv)) break; rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength); if (NS_FAILED(rv)) break; dstPtr[pos2] = '\0'; } } if (NS_SUCCEEDED(rv)) { // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } } if (NS_FAILED(rv)) { PR_FREEIF(dstPtr); return rv; } *outString = dstPtr; // set the result string // set error code so that the caller can do own fall back if (NS_ERROR_UENC_NOMAPPING == saveResult) { rv = NS_ERROR_UENC_NOMAPPING; } return rv; }
NS_IMETHODIMP nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString) { if(nullptr == outString ) return NS_ERROR_NULL_POINTER; NS_ASSERTION(outString, "invalid input"); *outString = NULL; nsresult rv; PRInt32 inStringLength = NS_strlen(inString); // original input string length PRInt32 bufferLength; // allocated buffer length PRInt32 srcLength = inStringLength; PRInt32 dstLength; char *dstPtr = NULL; PRInt32 pos1, pos2; nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING // estimate and allocate the target buffer (reserve extra memory for fallback) rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength); if (NS_FAILED(rv)) return rv; bufferLength = dstLength + 512; // reserve 512 byte for fallback. dstPtr = (char *) PR_Malloc(bufferLength); if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY; for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) { // convert from unicode dstLength = bufferLength - pos2; rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength); pos1 += srcLength ? srcLength : 1; pos2 += dstLength; dstPtr[pos2] = '\0'; // break: this is usually the case (no error) OR unrecoverable error if (NS_ERROR_UENC_NOMAPPING != rv) break; // remember this happened and reset the result saveResult = rv; rv = NS_OK; // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } srcLength = inStringLength - pos1; // do the fallback if (!ATTR_NO_FALLBACK(mAttribute)) { PRUint32 unMappedChar; if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) { unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]); pos1++; } else { unMappedChar = inString[pos1-1]; } rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength); if (NS_FAILED(rv)) break; rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength); if (NS_FAILED(rv)) break; dstPtr[pos2] = '\0'; } } if (NS_SUCCEEDED(rv)) { // finish encoder, give it a chance to write extra data like escape sequences dstLength = bufferLength - pos2; rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); if (NS_SUCCEEDED(rv)) { pos2 += dstLength; dstPtr[pos2] = '\0'; } } if (NS_FAILED(rv)) { PR_FREEIF(dstPtr); return rv; } *outString = dstPtr; // set the result string // set error code so that the caller can do own fall back if (NS_ERROR_UENC_NOMAPPING == saveResult) { rv = NS_ERROR_UENC_NOMAPPING; } return rv; }
bool gfxScriptItemizer::Next(PRUint32& aRunStart, PRUint32& aRunLimit, PRInt32& aRunScript) { /* if we've fallen off the end of the text, we're done */ if (scriptLimit >= textLength) { return false; } SYNC_FIXUP(); scriptCode = MOZ_SCRIPT_COMMON; for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) { PRUint32 ch; PRInt32 sc; PRInt32 pairIndex; PRUint32 startOfChar = scriptLimit; ch = textPtr[scriptLimit]; /* * MODIFICATION for Gecko - clear the paired-character stack * when we see a space character, because we cannot trust * context outside the current "word" when doing textrun * construction */ if (ch == 0x20) { while (STACK_IS_NOT_EMPTY()) { pop(); } sc = MOZ_SCRIPT_COMMON; pairIndex = -1; } else { /* decode UTF-16 (may be surrogate pair) */ if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) { PRUint32 low = textPtr[scriptLimit + 1]; if (NS_IS_LOW_SURROGATE(low)) { ch = SURROGATE_TO_UCS4(ch, low); scriptLimit += 1; } } sc = mozilla::unicode::GetScriptCode(ch); pairIndex = getPairIndex(ch); /* * Paired character handling: * * if it's an open character, push it onto the stack. * if it's a close character, find the matching open on the * stack, and use that script code. Any non-matching open * characters above it on the stack will be poped. */ if (pairIndex >= 0) { if ((pairIndex & 1) == 0) { push(pairIndex, scriptCode); } else { PRInt32 pi = pairIndex & ~1; while (STACK_IS_NOT_EMPTY() && TOP().pairIndex != pi) { pop(); } if (STACK_IS_NOT_EMPTY()) { sc = TOP().scriptCode; } } } } if (sameScript(scriptCode, sc)) { if (scriptCode <= MOZ_SCRIPT_INHERITED && sc > MOZ_SCRIPT_INHERITED) { scriptCode = sc; fixup(scriptCode); } /* * if this character is a close paired character, * pop the matching open character from the stack */ if (pairIndex >= 0 && (pairIndex & 1) != 0) { pop(); } } else { /* * reset scriptLimit in case it was advanced during reading a * multiple-code-unit character */ scriptLimit = startOfChar; break; } } aRunStart = scriptStart; aRunLimit = scriptLimit; aRunScript = scriptCode; return true; }