Beispiel #1
0
nsresult
TextEditRules::TruncateInsertionIfNeeded(Selection* aSelection,
                                         const nsAString* aInString,
                                         nsAString* aOutString,
                                         int32_t aMaxLength,
                                         bool* aTruncated)
{
  if (!aSelection || !aInString || !aOutString) {
    return NS_ERROR_NULL_POINTER;
  }

  if (!aOutString->Assign(*aInString, mozilla::fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  if (aTruncated) {
    *aTruncated = false;
  }

  NS_ENSURE_STATE(mTextEditor);
  if (-1 != aMaxLength && IsPlaintextEditor() &&
      !mTextEditor->IsIMEComposing()) {
    // Get the current text length.
    // Get the length of inString.
    // Get the length of the selection.
    //   If selection is collapsed, it is length 0.
    //   Subtract the length of the selection from the len(doc)
    //   since we'll delete the selection on insert.
    //   This is resultingDocLength.
    // Get old length of IME composing string
    //   which will be replaced by new one.
    // If (resultingDocLength) is at or over max, cancel the insert
    // If (resultingDocLength) + (length of input) > max,
    //    set aOutString to subset of inString so length = max
    int32_t docLength;
    nsresult rv = mTextEditor->GetTextLength(&docLength);
    if (NS_FAILED(rv)) {
      return rv;
    }

    int32_t start, end;
    nsContentUtils::GetSelectionInTextControl(aSelection,
                                              mTextEditor->GetRoot(),
                                              start, end);

    TextComposition* composition = mTextEditor->GetComposition();
    int32_t oldCompStrLength = composition ? composition->String().Length() : 0;

    const int32_t selectionLength = end - start;
    const int32_t resultingDocLength = docLength - selectionLength - oldCompStrLength;
    if (resultingDocLength >= aMaxLength) {
      // This call is guaranteed to reduce the capacity of the string, so it
      // cannot cause an OOM.
      aOutString->Truncate();
      if (aTruncated) {
        *aTruncated = true;
      }
    } else {
      int32_t oldLength = aOutString->Length();
      if (oldLength + resultingDocLength > aMaxLength) {
        int32_t newLength = aMaxLength - resultingDocLength;
        MOZ_ASSERT(newLength > 0);
        char16_t newLastChar = aOutString->CharAt(newLength - 1);
        char16_t removingFirstChar = aOutString->CharAt(newLength);
        // Don't separate the string between a surrogate pair.
        if (NS_IS_HIGH_SURROGATE(newLastChar) &&
            NS_IS_LOW_SURROGATE(removingFirstChar)) {
          newLength--;
        }
        // XXX What should we do if we're removing IVS and its preceding
        //     character won't be removed?
        // This call is guaranteed to reduce the capacity of the string, so it
        // cannot cause an OOM.
        aOutString->Truncate(newLength);
        if (aTruncated) {
          *aTruncated = true;
        }
      }
    }
  }
  return NS_OK;
}
void
nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
    gfxContext* aRefContext)
{
  PRUint32 length = aTextRun->GetLength();
  const PRUnichar* str = aTextRun->mString.BeginReading();
  nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();

  nsAutoString convertedString;
  nsAutoTArray<bool,50> charsToMergeArray;
  nsAutoTArray<bool,50> deletedCharsArray;
  nsAutoTArray<nsStyleContext*,50> styleArray;
  nsAutoTArray<PRUint8,50> canBreakBeforeArray;
  bool mergeNeeded = false;

  // Some languages have special casing conventions that differ from the
  // default Unicode mappings.
  // The enum values here are named for well-known exemplar languages that
  // exhibit the behavior in question; multiple lang tags may map to the
  // same setting here, if the behavior is shared by other languages.
  enum {
    eNone,    // default non-lang-specific behavior
    eTurkish, // preserve dotted/dotless-i distinction in uppercase
    eDutch,   // treat "ij" digraph as a unit for capitalization
    eGreek    // strip accent when uppercasing Greek vowels
  } languageSpecificCasing = eNone;

  const nsIAtom* lang = nullptr;
  bool capitalizeDutchIJ = false;
  bool prevIsLetter = false;
  PRUint32 sigmaIndex = PRUint32(-1);
  nsIUGenCategory::nsUGenCategory cat;
  GreekCasingState greekState = kStart;
  PRUint32 i;
  for (i = 0; i < length; ++i) {
    PRUint32 ch = str[i];
    nsStyleContext* styleContext = styles[i];

    PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
      : styleContext->GetStyleText()->mTextTransform;
    int extraChars = 0;
    const mozilla::unicode::MultiCharMapping *mcm;

    if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
      ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
    }

    if (lang != styleContext->GetStyleFont()->mLanguage) {
      lang = styleContext->GetStyleFont()->mLanguage;
      if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
          lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||
          lang == nsGkAtoms::tt) {
        languageSpecificCasing = eTurkish;
      } else if (lang == nsGkAtoms::nl) {
        languageSpecificCasing = eDutch;
      } else if (lang == nsGkAtoms::el) {
        languageSpecificCasing = eGreek;
        greekState = kStart;
      } else {
        languageSpecificCasing = eNone;
      }
    }

    switch (style) {
    case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
      if (languageSpecificCasing == eTurkish) {
        if (ch == 'I') {
          ch = LATIN_SMALL_LETTER_DOTLESS_I;
          prevIsLetter = true;
          sigmaIndex = PRUint32(-1);
          break;
        }
        if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
          ch = 'i';
          prevIsLetter = true;
          sigmaIndex = PRUint32(-1);
          break;
        }
      }

      // Special lowercasing behavior for Greek Sigma: note that this is listed
      // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
      // language-specific mapping; it applies regardless of the language of
      // the element.
      //
      // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
      // the non-final form) whenever there is a following letter, or when the
      // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
      // LETTER); and to FINAL SIGMA when it is preceded by another letter but
      // not followed by one.
      //
      // To implement the context-sensitive nature of this mapping, we keep
      // track of whether the previous character was a letter. If not, CAPITAL
      // SIGMA will map directly to SMALL SIGMA. If the previous character
      // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
      // position in the converted string; if we then encounter another letter,
      // that FINAL SIGMA is replaced with a standard SMALL SIGMA.

      cat = mozilla::unicode::GetGenCategory(ch);

      // If sigmaIndex is not -1, it marks where we have provisionally mapped
      // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
      // need to change it to SMALL SIGMA.
      if (sigmaIndex != PRUint32(-1)) {
        if (cat == nsIUGenCategory::kLetter) {
          convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
        }
      }

      if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
        // If preceding char was a letter, map to FINAL instead of SMALL,
        // and note where it occurred by setting sigmaIndex; we'll change it
        // to standard SMALL SIGMA later if another letter follows
        if (prevIsLetter) {
          ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
          sigmaIndex = convertedString.Length();
        } else {
          // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
          // to SMALL SIGMA
          ch = GREEK_SMALL_LETTER_SIGMA;
          sigmaIndex = PRUint32(-1);
        }
        prevIsLetter = true;
        break;
      }

      // ignore diacritics for the purpose of contextual sigma mapping;
      // otherwise, reset prevIsLetter appropriately and clear the
      // sigmaIndex marker
      if (cat != nsIUGenCategory::kMark) {
        prevIsLetter = (cat == nsIUGenCategory::kLetter);
        sigmaIndex = PRUint32(-1);
      }

      mcm = mozilla::unicode::SpecialLower(ch);
      if (mcm) {
        int j = 0;
        while (j < 2 && mcm->mMappedChars[j + 1]) {
          convertedString.Append(mcm->mMappedChars[j]);
          ++extraChars;
          ++j;
        }
        ch = mcm->mMappedChars[j];
        break;
      }

      ch = ToLowerCase(ch);
      break;

    case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
      if (languageSpecificCasing == eTurkish && ch == 'i') {
        ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
        break;
      }

      if (languageSpecificCasing == eGreek) {
        ch = GreekUpperCase(ch, &greekState);
        break;
      }

      mcm = mozilla::unicode::SpecialUpper(ch);
      if (mcm) {
        int j = 0;
        while (j < 2 && mcm->mMappedChars[j + 1]) {
          convertedString.Append(mcm->mMappedChars[j]);
          ++extraChars;
          ++j;
        }
        ch = mcm->mMappedChars[j];
        break;
      }

      ch = ToUpperCase(ch);
      break;

    case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
      if (capitalizeDutchIJ && ch == 'j') {
        ch = 'J';
        capitalizeDutchIJ = false;
        break;
      }
      capitalizeDutchIJ = false;
      if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
        if (languageSpecificCasing == eTurkish && ch == 'i') {
          ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
          break;
        }
        if (languageSpecificCasing == eDutch && ch == 'i') {
          ch = 'I';
          capitalizeDutchIJ = true;
          break;
        }

        mcm = mozilla::unicode::SpecialTitle(ch);
        if (mcm) {
          int j = 0;
          while (j < 2 && mcm->mMappedChars[j + 1]) {
            convertedString.Append(mcm->mMappedChars[j]);
            ++extraChars;
            ++j;
          }
          ch = mcm->mMappedChars[j];
          break;
        }

        ch = ToTitleCase(ch);
      }
      break;

    default:
      break;
    }

    if (ch == PRUint32(-1)) {
      deletedCharsArray.AppendElement(true);
      mergeNeeded = true;
    } else {
      deletedCharsArray.AppendElement(false);
      charsToMergeArray.AppendElement(false);
      styleArray.AppendElement(styleContext);
      canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));

      if (IS_IN_BMP(ch)) {
        convertedString.Append(ch);
      } else {
        convertedString.Append(H_SURROGATE(ch));
        convertedString.Append(L_SURROGATE(ch));
        ++i;
        deletedCharsArray.AppendElement(true); // not exactly deleted, but the
                                               // trailing surrogate is skipped
        ++extraChars;
      }

      while (extraChars-- > 0) {
        mergeNeeded = true;
        charsToMergeArray.AppendElement(true);
        styleArray.AppendElement(styleContext);
        canBreakBeforeArray.AppendElement(false);
      }
    }
  }

  PRUint32 flags;
  gfxTextRunFactory::Parameters innerParams =
      GetParametersForInner(aTextRun, &flags, aRefContext);
  gfxFontGroup* fontGroup = aTextRun->GetFontGroup();

  nsAutoPtr<nsTransformedTextRun> transformedChild;
  nsAutoPtr<gfxTextRun> cachedChild;
  gfxTextRun* child;

  if (mInnerTransformingTextRunFactory) {
    transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, fontGroup, flags, styleArray.Elements(), false);
    child = transformedChild.get();
  } else {
    cachedChild = fontGroup->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, flags);
    child = cachedChild.get();
  }
  if (!child)
    return;
  // Copy potential linebreaks into child so they're preserved
  // (and also child will be shaped appropriately)
  NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
               "Dropped characters or break-before values somewhere!");
  child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
      canBreakBeforeArray.Elements(), aRefContext);
  if (transformedChild) {
    transformedChild->FinishSettingProperties(aRefContext);
  }

  if (mergeNeeded) {
    // Now merge multiple characters into one multi-glyph character as required
    // and deal with skipping deleted accent chars
    NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
                 "source length mismatch");
    NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
                 "destination length mismatch");
    MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
                             deletedCharsArray.Elements());
  } else {
    // No merging to do, so just copy; this produces a more optimized textrun.
    // We can't steal the data because the child may be cached and stealing
    // the data would break the cache.
    aTextRun->ResetGlyphRuns();
    aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
  }
}
static nsresult
mdn_normalize(bool do_composition, bool compat,
	  const nsAString& aSrcStr, nsAString& aToStr)
{
	workbuf_t wb;
	nsresult r = NS_OK;
	/*
	 * Initialize working buffer.
	 */
	workbuf_init(&wb);

	nsAString::const_iterator start, end;
	aSrcStr.BeginReading(start); 
	aSrcStr.EndReading(end); 

	while (start != end) {
		PRUint32 c;
		PRUnichar curChar;

		//assert(wb.cur == wb.last);

		/*
		 * Get one character from 'from'.
		 */
		curChar= *start++;

		if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) {
			c = SURROGATE_TO_UCS4(curChar, *start);
			++start;
		} else {
			c = curChar;
		}

		/*
		 * Decompose it.
		 */
		if ((r = decompose(&wb, c, compat)) != NS_OK)
			break;

		/*
		 * Get canonical class.
		 */
		get_class(&wb);

		/*
		 * Reorder & compose.
		 */
		for (; wb.cur < wb.last; wb.cur++) {
			if (wb.cur == 0) {
				continue;
			} else if (wb.cclass[wb.cur] > 0) {
				/*
				 * This is not a starter. Try reordering.
				 * Note that characters up to it are
				 * already in canonical order.
				 */
				reorder(&wb);
				continue;
			}

			/*
			 * This is a starter character, and there are
			 * some characters before it.  Those characters
			 * have been reordered properly, and
			 * ready for composition.
			 */
			if (do_composition && wb.cclass[0] == 0)
				compose(&wb);

			/*
			 * If CUR points to a starter character,
			 * then process of characters before CUR are
			 * already finished, because any further
			 * reordering/composition for them are blocked
			 * by the starter CUR points.
			 */
			if (wb.cur > 0 && wb.cclass[wb.cur] == 0) {
				/* Flush everything before CUR. */
				r = flush_before_cur(&wb, aToStr);
				if (r != NS_OK)
					break;
			}
		}
	}

	if (r == NS_OK) {
		if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) {
			/*
			 * There is some characters left in WB.
			 * They are ordered, but not composed yet.
			 * Now CUR points just after the last character in WB,
			 * and since compose() tries to compose characters
			 * between top and CUR inclusive, we must make CUR
			 * one character back during compose().
			 */
			wb.cur--;
			compose(&wb);
			wb.cur++;
		}
		/*
		 * Call this even when WB.CUR == 0, to make TO
		 * NUL-terminated.
		 */
		r = flush_before_cur(&wb, aToStr);
	}

	workbuf_free(&wb);

	return (r);
}
// default SetupClusterBoundaries, based on Unicode properties;
// platform subclasses may override if they wish
void
gfxPlatform::SetupClusterBoundaries(gfxTextRun *aTextRun, const PRUnichar *aString)
{
    if (aTextRun->GetFlags() & gfxTextRunFactory::TEXT_IS_8BIT) {
        // 8-bit text doesn't have clusters.
        // XXX is this true in all languages???
        // behdad: don't think so.  Czech for example IIRC has a
        // 'ch' grapheme.
        // jfkthame: but that's not expected to behave as a grapheme cluster
        // for selection/editing/etc.
        return;
    }

    gfxTextRun::CompressedGlyph extendCluster;
    extendCluster.SetComplex(PR_FALSE, PR_TRUE, 0);

    PRUint32 i, length = aTextRun->GetLength();
    gfxUnicodeProperties::HSType hangulState = gfxUnicodeProperties::HST_NONE;

    for (i = 0; i < length; ++i) {
        PRBool surrogatePair = PR_FALSE;
        PRUint32 ch = aString[i];
        if (NS_IS_HIGH_SURROGATE(ch) &&
            i < length - 1 && NS_IS_LOW_SURROGATE(aString[i+1]))
        {
            ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
            surrogatePair = PR_TRUE;
        }

        PRUint8 category = gfxUnicodeProperties::GetGeneralCategory(ch);
        gfxUnicodeProperties::HSType hangulType = gfxUnicodeProperties::HST_NONE;

        // combining marks extend the cluster
        if ((category >= HB_CATEGORY_COMBINING_MARK &&
             category <= HB_CATEGORY_NON_SPACING_MARK) ||
            (ch >= 0x200c && ch <= 0x200d) || // ZWJ, ZWNJ
            (ch >= 0xff9e && ch <= 0xff9f))   // katakana sound marks
        {
            if (i > 0) {
                aTextRun->SetGlyphs(i, extendCluster, nsnull);
            }
        } else if (category == HB_CATEGORY_OTHER_LETTER) {
            // handle special cases in Letter_Other category
#if 0
            // Currently disabled. This would follow the UAX#29 specification
            // for extended grapheme clusters, but this is not favored by
            // Thai users, at least for editing behavior.
            // See discussion of equivalent Pango issue in bug 474068 and
            // upstream at https://bugzilla.gnome.org/show_bug.cgi?id=576156.

            if ((ch & ~0xff) == 0x0e00) {
                // specific Thai & Lao (U+0Exx) chars that extend the cluster
                if ( ch == 0x0e30 ||
                    (ch >= 0x0e32 && ch <= 0x0e33) ||
                     ch == 0x0e45 ||
                     ch == 0x0eb0 ||
                    (ch >= 0x0eb2 && ch <= 0x0eb3))
                {
                    if (i > 0) {
                        aTextRun->SetGlyphs(i, extendCluster, nsnull);
                    }
                }
                else if ((ch >= 0x0e40 && ch <= 0x0e44) ||
                         (ch >= 0x0ec0 && ch <= 0x0ec4))
                {
                    // characters that are prepended to the following cluster
                    if (i < length - 1) {
                        aTextRun->SetGlyphs(i+1, extendCluster, nsnull);
                    }
                }
            } else
#endif
            if ((ch & ~0xff) == 0x1100 ||
                (ch >= 0xa960 && ch <= 0xa97f) ||
                (ch >= 0xac00 && ch <= 0xd7ff))
            {
                // no break within Hangul syllables
                hangulType = gfxUnicodeProperties::GetHangulSyllableType(ch);
                switch (hangulType) {
                case gfxUnicodeProperties::HST_L:
                case gfxUnicodeProperties::HST_LV:
                case gfxUnicodeProperties::HST_LVT:
                    if (hangulState == gfxUnicodeProperties::HST_L) {
                        aTextRun->SetGlyphs(i, extendCluster, nsnull);
                    }
                    break;
                case gfxUnicodeProperties::HST_V:
                    if ( (hangulState != gfxUnicodeProperties::HST_NONE) &&
                        !(hangulState & gfxUnicodeProperties::HST_T))
                    {
                        aTextRun->SetGlyphs(i, extendCluster, nsnull);
                    }
                    break;
                case gfxUnicodeProperties::HST_T:
                    if (hangulState & (gfxUnicodeProperties::HST_V |
                                       gfxUnicodeProperties::HST_T))
                    {
                        aTextRun->SetGlyphs(i, extendCluster, nsnull);
                    }
                    break;
                default:
                    break;
                }
            }
        }

        if (surrogatePair) {
            ++i;
            aTextRun->SetGlyphs(i, extendCluster, nsnull);
        }

        hangulState = hangulType;
    }
}
void
nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
    gfxContext* aRefContext)
{
  gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
  gfxFontStyle fontStyle = *fontGroup->GetStyle();
  fontStyle.size *= 0.8;
  nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle);
  if (!smallFont)
    return;

  PRUint32 flags;
  gfxTextRunFactory::Parameters innerParams =
      GetParametersForInner(aTextRun, &flags, aRefContext);

  PRUint32 length = aTextRun->GetLength();
  const PRUnichar* str = aTextRun->mString.BeginReading();
  nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
  // Create a textrun so we can check cluster-start properties
  nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags));
  if (!inner.get())
    return;

  nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true);

  aTextRun->ResetGlyphRuns();

  PRUint32 runStart = 0;
  nsAutoTArray<nsStyleContext*,50> styleArray;
  nsAutoTArray<PRUint8,50> canBreakBeforeArray;

  enum RunCaseState {
    kUpperOrCaseless, // will be untouched by font-variant:small-caps
    kLowercase,       // will be uppercased and reduced
    kSpecialUpper     // specials: don't shrink, but apply uppercase mapping
  };
  RunCaseState runCase = kUpperOrCaseless;

  // Note that this loop runs from 0 to length *inclusive*, so the last
  // iteration is in effect beyond the end of the input text, to give a
  // chance to finish the last casing run we've found.
  // The last iteration, when i==length, must not attempt to look at the
  // character position [i] or the style data for styles[i], as this would
  // be beyond the valid length of the textrun or its style array.
  for (PRUint32 i = 0; i <= length; ++i) {
    RunCaseState chCase = kUpperOrCaseless;
    // Unless we're at the end, figure out what treatment the current
    // character will need.
    if (i < length) {
      nsStyleContext* styleContext = styles[i];
      // Characters that aren't the start of a cluster are ignored here. They
      // get added to whatever lowercase/non-lowercase run we're in.
      if (!inner->IsClusterStart(i)) {
        chCase = runCase;
      } else {
        if (styleContext->GetStyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {
          PRUint32 ch = str[i];
          if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
            ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
          }
          PRUint32 ch2 = ToUpperCase(ch);
          if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) {
            chCase = kLowercase;
          } else if (styleContext->GetStyleFont()->mLanguage == nsGkAtoms::el) {
            // In Greek, check for characters that will be modified by the
            // GreekUpperCase mapping - this catches accented capitals where
            // the accent is to be removed (bug 307039). These are handled by
            // a transformed child run using the full-size font.
            GreekCasingState state = kStart; // don't need exact context here
            ch2 = GreekUpperCase(ch, &state);
            if (ch != ch2) {
              chCase = kSpecialUpper;
            }
          }
        } else {
          // Don't transform the character! I.e., pretend that it's not lowercase
        }
      }
    }

    // At the end of the text, or when the current character needs different
    // casing treatment from the current run, finish the run-in-progress
    // and prepare to accumulate a new run.
    // Note that we do not look at any source data for offset [i] here,
    // as that would be invalid in the case where i==length.
    if ((i == length || runCase != chCase) && runStart < i) {
      nsAutoPtr<nsTransformedTextRun> transformedChild;
      nsAutoPtr<gfxTextRun> cachedChild;
      gfxTextRun* child;

      switch (runCase) {
      case kUpperOrCaseless:
        cachedChild =
          fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams,
                                 flags);
        child = cachedChild.get();
        break;
      case kLowercase:
        transformedChild =
          uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
                                       &innerParams, smallFont, flags,
                                       styleArray.Elements(), false);
        child = transformedChild;
        break;
      case kSpecialUpper:
        transformedChild =
          uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
                                       &innerParams, fontGroup, flags,
                                       styleArray.Elements(), false);
        child = transformedChild;
        break;
      }
      if (!child)
        return;
      // Copy potential linebreaks into child so they're preserved
      // (and also child will be shaped appropriately)
      NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart,
                   "lost some break-before values?");
      child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
          canBreakBeforeArray.Elements(), aRefContext);
      if (transformedChild) {
        transformedChild->FinishSettingProperties(aRefContext);
      }
      aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart);

      runStart = i;
      styleArray.Clear();
      canBreakBeforeArray.Clear();
    }

    if (i < length) {
      runCase = chCase;
      styleArray.AppendElement(styles[i]);
      canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
    }
  }
}
NS_IMETHODIMP
nsIndexedToHTML::OnIndexAvailable(nsIRequest *aRequest,
                                  nsISupports *aCtxt,
                                  nsIDirIndex *aIndex) {
    nsresult rv;
    if (!aIndex)
        return NS_ERROR_NULL_POINTER;

    nsString pushBuffer;
    pushBuffer.AppendLiteral("<tr");

    nsXPIDLString description;
    aIndex->GetDescription(getter_Copies(description));
    if (description.First() == char16_t('.'))
        pushBuffer.AppendLiteral(" class=\"hidden-object\"");

    pushBuffer.AppendLiteral(">\n <td sortable-data=\"");

    // The sort key is the name of the item, prepended by either 0, 1 or 2
    // in order to group items.
    uint32_t type;
    aIndex->GetType(&type);
    switch (type) {
        case nsIDirIndex::TYPE_SYMLINK:
            pushBuffer.AppendInt(0);
            break;
        case nsIDirIndex::TYPE_DIRECTORY:
            pushBuffer.AppendInt(1);
            break;
        case nsIDirIndex::TYPE_FILE:
        case nsIDirIndex::TYPE_UNKNOWN:
            pushBuffer.AppendInt(2);
            break;
    }
    char16_t* escaped = nsEscapeHTML2(description.get(), description.Length());
    pushBuffer.Append(escaped);

    pushBuffer.AppendLiteral("\"><a class=\"");
    switch (type) {
        case nsIDirIndex::TYPE_DIRECTORY:
            pushBuffer.AppendLiteral("dir");
            break;
        case nsIDirIndex::TYPE_SYMLINK:
            pushBuffer.AppendLiteral("symlink");
            break;
        case nsIDirIndex::TYPE_FILE:
        case nsIDirIndex::TYPE_UNKNOWN:
            pushBuffer.AppendLiteral("file");
            break;
    }
    pushBuffer.AppendLiteral("\"");

    // Truncate long names to not stretch the table
    //XXX this should be left to the stylesheet (bug 391471)
    nsString escapedShort;
    if (description.Length() > 71) {
        nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
        nsCOMPtr<nsIURI> uri;
        rv = channel->GetURI(getter_AddRefs(uri));
        if (NS_FAILED(rv)) return rv;

        //XXX this potentially truncates after a combining char (bug 391472)
        nsXPIDLString descriptionAffix;
        descriptionAffix.Assign(description);
        descriptionAffix.Cut(0, descriptionAffix.Length() - 25);
        if (NS_IS_LOW_SURROGATE(descriptionAffix.First()))
            descriptionAffix.Cut(0, 1);
        description.Truncate(std::min<uint32_t>(71, description.Length() - 28));
        if (NS_IS_HIGH_SURROGATE(description.Last()))
            description.Truncate(description.Length() - 1);

        escapedShort.Adopt(nsEscapeHTML2(description.get(), description.Length()));

        escapedShort.Append(mEscapedEllipsis);
        // add ZERO WIDTH SPACE (U+200B) for wrapping
        escapedShort.AppendLiteral("&#8203;");
        nsString tmp;
        tmp.Adopt(nsEscapeHTML2(descriptionAffix.get(), descriptionAffix.Length()));
        escapedShort.Append(tmp);

        pushBuffer.AppendLiteral(" title=\"");
        pushBuffer.Append(escaped);
        pushBuffer.AppendLiteral("\"");
    }
    if (escapedShort.IsEmpty())
        escapedShort.Assign(escaped);
    nsMemory::Free(escaped);

    pushBuffer.AppendLiteral(" href=\"");
    nsXPIDLCString loc;
    aIndex->GetLocation(getter_Copies(loc));

    nsXPIDLCString encoding;
    rv = mParser->GetEncoding(getter_Copies(encoding));
    if (NS_FAILED(rv)) return rv;

    // Don't byte-to-Unicode conversion here, it is lossy.
    loc.SetLength(nsUnescapeCount(loc.BeginWriting()));

    // need to escape links
    nsAutoCString locEscaped;

    // Adding trailing slash helps to recognize whether the URL points to a file
    // or a directory (bug #214405).
    if ((type == nsIDirIndex::TYPE_DIRECTORY) && (loc.Last() != '/')) {
        loc.Append('/');
    }

    // now minimally re-escape the location...
    uint32_t escFlags;
    // for some protocols, we expect the location to be absolute.
    // if so, and if the location indeed appears to be a valid URI, then go
    // ahead and treat it like one.
    if (mExpectAbsLoc &&
        NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) {
        // escape as absolute 
        escFlags = esc_Forced | esc_OnlyASCII | esc_AlwaysCopy | esc_Minimal;
    }
    else {
        // escape as relative
        // esc_Directory is needed because directories have a trailing slash.
        // Without it, the trailing '/' will be escaped, and links from within
        // that directory will be incorrect
        escFlags = esc_Forced | esc_OnlyASCII | esc_AlwaysCopy | esc_FileBaseName | esc_Colon | esc_Directory;
    }
    NS_EscapeURL(loc.get(), loc.Length(), escFlags, locEscaped);
    // esc_Directory does not escape the semicolons, so if a filename
    // contains semicolons we need to manually escape them.
    // This replacement should be removed in bug #473280
    locEscaped.ReplaceSubstring(";", "%3b");
    nsAutoString utf16URI;
    if (encoding.EqualsLiteral("UTF-8")) {
        // Try to convert non-ASCII bytes to Unicode using UTF-8 decoder.
        nsCOMPtr<nsIUnicodeDecoder> decoder =
            mozilla::dom::EncodingUtils::DecoderForEncoding("UTF-8");
        decoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);

        int32_t len = locEscaped.Length();
        int32_t outlen = 0;
        rv = decoder->GetMaxLength(locEscaped.get(), len, &outlen);
        if (NS_FAILED(rv)) {
            return rv;
        }
        nsAutoArrayPtr<char16_t> outbuf(new char16_t[outlen]);
        rv = decoder->Convert(locEscaped.get(), &len, outbuf, &outlen);
        // Use the result only if the sequence is valid as UTF-8.
        if (rv == NS_OK) {
            utf16URI.Append(outbuf, outlen);
        }
    }
    if (utf16URI.IsEmpty()) {
        // Escape all non-ASCII bytes to preserve the raw value.
        nsAutoCString outstr;
        NS_EscapeURL(locEscaped, esc_AlwaysCopy | esc_OnlyNonASCII, outstr);
        CopyASCIItoUTF16(outstr, utf16URI);
    }
    nsString htmlEscapedURL;
    htmlEscapedURL.Adopt(nsEscapeHTML2(utf16URI.get(), utf16URI.Length()));
    pushBuffer.Append(htmlEscapedURL);

    pushBuffer.AppendLiteral("\">");

    if (type == nsIDirIndex::TYPE_FILE || type == nsIDirIndex::TYPE_UNKNOWN) {
        pushBuffer.AppendLiteral("<img src=\"moz-icon://");
        int32_t lastDot = locEscaped.RFindChar('.');
        if (lastDot != kNotFound) {
            locEscaped.Cut(0, lastDot);
            NS_ConvertUTF8toUTF16 utf16LocEscaped(locEscaped);
            nsString htmlFileExt;
            htmlFileExt.Adopt(nsEscapeHTML2(utf16LocEscaped.get(), utf16LocEscaped.Length()));
            pushBuffer.Append(htmlFileExt);
        } else {
            pushBuffer.AppendLiteral("unknown");
        }
        pushBuffer.AppendLiteral("?size=16\" alt=\"");

        nsXPIDLString altText;
        rv = mBundle->GetStringFromName(MOZ_UTF16("DirFileLabel"),
                                        getter_Copies(altText));
        if (NS_FAILED(rv)) return rv;
        AppendNonAsciiToNCR(altText, pushBuffer);
        pushBuffer.AppendLiteral("\">");
    }

    pushBuffer.Append(escapedShort);
    pushBuffer.AppendLiteral("</a></td>\n <td");

    if (type == nsIDirIndex::TYPE_DIRECTORY || type == nsIDirIndex::TYPE_SYMLINK) {
        pushBuffer.AppendLiteral(">");
    } else {
        int64_t size;
        aIndex->GetSize(&size);

        if (uint64_t(size) != UINT64_MAX) {
            pushBuffer.AppendLiteral(" sortable-data=\"");
            pushBuffer.AppendInt(size);
            pushBuffer.AppendLiteral("\">");
            nsAutoString  sizeString;
            FormatSizeString(size, sizeString);
            pushBuffer.Append(sizeString);
        } else {
            pushBuffer.AppendLiteral(">");
        }
    }
    pushBuffer.AppendLiteral("</td>\n <td");

    PRTime t;
    aIndex->GetLastModified(&t);

    if (t == -1) {
        pushBuffer.AppendLiteral("></td>\n <td>");
    } else {
        pushBuffer.AppendLiteral(" sortable-data=\"");
        pushBuffer.AppendInt(static_cast<int64_t>(t));
        pushBuffer.AppendLiteral("\">");
        nsAutoString formatted;
        mDateTime->FormatPRTime(nullptr,
                                kDateFormatShort,
                                kTimeFormatNone,
                                t,
                                formatted);
        AppendNonAsciiToNCR(formatted, pushBuffer);
        pushBuffer.AppendLiteral("</td>\n <td>");
        mDateTime->FormatPRTime(nullptr,
                                kDateFormatNone,
                                kTimeFormatSeconds,
                                t,
                                formatted);
        // use NCR to show date in any doc charset
        AppendNonAsciiToNCR(formatted, pushBuffer);
    }

    pushBuffer.AppendLiteral("</td>\n</tr>");

    return FormatInputStream(aRequest, aCtxt, pushBuffer);
}
NS_IMETHODIMP
nsIndexedToHTML::OnIndexAvailable(nsIRequest *aRequest,
                                  nsISupports *aCtxt,
                                  nsIDirIndex *aIndex) {
    nsresult rv;
    if (!aIndex)
        return NS_ERROR_NULL_POINTER;

    nsString pushBuffer;
    pushBuffer.AppendLiteral("<tr");

    nsXPIDLString description;
    aIndex->GetDescription(getter_Copies(description));
    if (description.First() == PRUnichar('.'))
        pushBuffer.AppendLiteral(" class=\"hidden-object\"");

    pushBuffer.AppendLiteral(">\n <td sortable-data=\"");

    // The sort key is the name of the item, prepended by either 0, 1 or 2
    // in order to group items.
    uint32_t type;
    aIndex->GetType(&type);
    switch (type) {
        case nsIDirIndex::TYPE_SYMLINK:
            pushBuffer.AppendInt(0);
            break;
        case nsIDirIndex::TYPE_DIRECTORY:
            pushBuffer.AppendInt(1);
            break;
        case nsIDirIndex::TYPE_FILE:
        case nsIDirIndex::TYPE_UNKNOWN:
            pushBuffer.AppendInt(2);
            break;
    }
    PRUnichar* escaped = nsEscapeHTML2(description.get(), description.Length());
    pushBuffer.Append(escaped);

    pushBuffer.AppendLiteral("\"><a class=\"");
    switch (type) {
        case nsIDirIndex::TYPE_DIRECTORY:
            pushBuffer.AppendLiteral("dir");
            break;
        case nsIDirIndex::TYPE_SYMLINK:
            pushBuffer.AppendLiteral("symlink");
            break;
        case nsIDirIndex::TYPE_FILE:
        case nsIDirIndex::TYPE_UNKNOWN:
            pushBuffer.AppendLiteral("file");
            break;
    }
    pushBuffer.AppendLiteral("\"");

    // Truncate long names to not stretch the table
    //XXX this should be left to the stylesheet (bug 391471)
    nsString escapedShort;
    if (description.Length() > 71) {
        nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
        nsCOMPtr<nsIURI> uri;
        rv = channel->GetURI(getter_AddRefs(uri));
        if (NS_FAILED(rv)) return rv;

        //XXX this potentially truncates after a combining char (bug 391472)
        nsXPIDLString descriptionAffix;
        descriptionAffix.Assign(description);
        descriptionAffix.Cut(0, descriptionAffix.Length() - 25);
        if (NS_IS_LOW_SURROGATE(descriptionAffix.First()))
            descriptionAffix.Cut(0, 1);
        description.Truncate(NS_MIN<uint32_t>(71, description.Length() - 28));
        if (NS_IS_HIGH_SURROGATE(description.Last()))
            description.Truncate(description.Length() - 1);

        escapedShort.Adopt(nsEscapeHTML2(description.get(), description.Length()));

        escapedShort.Append(mEscapedEllipsis);
        // add ZERO WIDTH SPACE (U+200B) for wrapping
        escapedShort.AppendLiteral("&#8203;");
        nsString tmp;
        tmp.Adopt(nsEscapeHTML2(descriptionAffix.get(), descriptionAffix.Length()));
        escapedShort.Append(tmp);

        pushBuffer.AppendLiteral(" title=\"");
        pushBuffer.Append(escaped);
        pushBuffer.AppendLiteral("\"");
    }
    if (escapedShort.IsEmpty())
        escapedShort.Assign(escaped);
    nsMemory::Free(escaped);

    pushBuffer.AppendLiteral(" href=\"");
    nsXPIDLCString loc;
    aIndex->GetLocation(getter_Copies(loc));

    if (!mTextToSubURI) {
        mTextToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
        if (NS_FAILED(rv)) return rv;
    }

    nsXPIDLCString encoding;
    rv = mParser->GetEncoding(getter_Copies(encoding));
    if (NS_FAILED(rv)) return rv;

    nsXPIDLString unEscapeSpec;
    rv = mTextToSubURI->UnEscapeAndConvert(encoding, loc,
                                           getter_Copies(unEscapeSpec));
    if (NS_FAILED(rv)) return rv;

    // need to escape links
    nsAutoCString escapeBuf;

    NS_ConvertUTF16toUTF8 utf8UnEscapeSpec(unEscapeSpec);

    // Adding trailing slash helps to recognize whether the URL points to a file
    // or a directory (bug #214405).
    if ((type == nsIDirIndex::TYPE_DIRECTORY) &&
        (utf8UnEscapeSpec.Last() != '/')) {
        utf8UnEscapeSpec.Append('/');
    }

    // now minimally re-escape the location...
    uint32_t escFlags;
    // for some protocols, we expect the location to be absolute.
    // if so, and if the location indeed appears to be a valid URI, then go
    // ahead and treat it like one.
    if (mExpectAbsLoc &&
        NS_SUCCEEDED(net_ExtractURLScheme(utf8UnEscapeSpec, nullptr, nullptr, nullptr))) {
        // escape as absolute 
        escFlags = esc_Forced | esc_OnlyASCII | esc_AlwaysCopy | esc_Minimal;
    }
    else {
        // escape as relative
        // esc_Directory is needed because directories have a trailing slash.
        // Without it, the trailing '/' will be escaped, and links from within
        // that directory will be incorrect
        escFlags = esc_Forced | esc_OnlyASCII | esc_AlwaysCopy | esc_FileBaseName | esc_Colon | esc_Directory;
    }
    NS_EscapeURL(utf8UnEscapeSpec.get(), utf8UnEscapeSpec.Length(), escFlags, escapeBuf);
    // esc_Directory does not escape the semicolons, so if a filename
    // contains semicolons we need to manually escape them.
    // This replacement should be removed in bug #473280
    escapeBuf.ReplaceSubstring(";", "%3b");
    NS_ConvertUTF8toUTF16 utf16URI(escapeBuf);
    nsString htmlEscapedURL;
    htmlEscapedURL.Adopt(nsEscapeHTML2(utf16URI.get(), utf16URI.Length()));
    pushBuffer.Append(htmlEscapedURL);

    pushBuffer.AppendLiteral("\">");

    if (type == nsIDirIndex::TYPE_FILE || type == nsIDirIndex::TYPE_UNKNOWN) {
        pushBuffer.AppendLiteral("<img src=\"moz-icon://");
        int32_t lastDot = escapeBuf.RFindChar('.');
        if (lastDot != kNotFound) {
            escapeBuf.Cut(0, lastDot);
            NS_ConvertUTF8toUTF16 utf16EscapeBuf(escapeBuf);
            nsString htmlFileExt;
            htmlFileExt.Adopt(nsEscapeHTML2(utf16EscapeBuf.get(), utf16EscapeBuf.Length()));
            pushBuffer.Append(htmlFileExt);
        } else {
            pushBuffer.AppendLiteral("unknown");
        }
        pushBuffer.AppendLiteral("?size=16\" alt=\"");

        nsXPIDLString altText;
        rv = mBundle->GetStringFromName(NS_LITERAL_STRING("DirFileLabel").get(),
                                        getter_Copies(altText));
        if (NS_FAILED(rv)) return rv;
        AppendNonAsciiToNCR(altText, pushBuffer);
        pushBuffer.AppendLiteral("\">");
    }

    pushBuffer.Append(escapedShort);
    pushBuffer.AppendLiteral("</a></td>\n <td");

    if (type == nsIDirIndex::TYPE_DIRECTORY || type == nsIDirIndex::TYPE_SYMLINK) {
        pushBuffer.AppendLiteral(">");
    } else {
        int64_t size;
        aIndex->GetSize(&size);

        if (uint64_t(size) != LL_MAXUINT) {
            pushBuffer.AppendLiteral(" sortable-data=\"");
            pushBuffer.AppendInt(size);
            pushBuffer.AppendLiteral("\">");
            nsAutoString  sizeString;
            FormatSizeString(size, sizeString);
            pushBuffer.Append(sizeString);
        } else {
            pushBuffer.AppendLiteral(">");
        }
    }
    pushBuffer.AppendLiteral("</td>\n <td");

    PRTime t;
    aIndex->GetLastModified(&t);

    if (t == -1) {
        pushBuffer.AppendLiteral("></td>\n <td>");
    } else {
        pushBuffer.AppendLiteral(" sortable-data=\"");
        pushBuffer.AppendInt(static_cast<int64_t>(t));
        pushBuffer.AppendLiteral("\">");
        nsAutoString formatted;
        mDateTime->FormatPRTime(nullptr,
                                kDateFormatShort,
                                kTimeFormatNone,
                                t,
                                formatted);
        AppendNonAsciiToNCR(formatted, pushBuffer);
        pushBuffer.AppendLiteral("</td>\n <td>");
        mDateTime->FormatPRTime(nullptr,
                                kDateFormatNone,
                                kTimeFormatSeconds,
                                t,
                                formatted);
        // use NCR to show date in any doc charset
        AppendNonAsciiToNCR(formatted, pushBuffer);
    }

    pushBuffer.AppendLiteral("</td>\n</tr>");

    return FormatInputStream(aRequest, aCtxt, pushBuffer);
}
NS_IMETHODIMP
nsLocalFile::CreateUnique(uint32_t aType, uint32_t aAttributes)
{
  nsresult rv;
  bool longName;

#ifdef XP_WIN
  nsAutoString pathName, leafName, rootName, suffix;
  rv = GetPath(pathName);
#else
  nsAutoCString pathName, leafName, rootName, suffix;
  rv = GetNativePath(pathName);
#endif
  if (NS_FAILED(rv)) {
    return rv;
  }

  longName = (pathName.Length() + kMaxSequenceNumberLength >
              kMaxFilenameLength);
  if (!longName) {
    rv = Create(aType, aAttributes);
    if (rv != NS_ERROR_FILE_ALREADY_EXISTS) {
      return rv;
    }
  }

#ifdef XP_WIN
  rv = GetLeafName(leafName);
  if (NS_FAILED(rv)) {
    return rv;
  }

  const int32_t lastDot = leafName.RFindChar(char16_t('.'));
#else
  rv = GetNativeLeafName(leafName);
  if (NS_FAILED(rv)) {
    return rv;
  }

  const int32_t lastDot = leafName.RFindChar('.');
#endif

  if (lastDot == kNotFound) {
    rootName = leafName;
  } else {
    suffix = Substring(leafName, lastDot);      // include '.'
    rootName = Substring(leafName, 0, lastDot); // strip suffix and dot
  }

  if (longName) {
    int32_t maxRootLength = (kMaxFilenameLength -
                             (pathName.Length() - leafName.Length()) -
                             suffix.Length() - kMaxSequenceNumberLength);

    // We cannot create an item inside a directory whose name is too long.
    // Also, ensure that at least one character remains after we truncate
    // the root name, as we don't want to end up with an empty leaf name.
    if (maxRootLength < 2) {
      return NS_ERROR_FILE_UNRECOGNIZED_PATH;
    }

#ifdef XP_WIN
    // ensure that we don't cut the name in mid-UTF16-character
    rootName.SetLength(NS_IS_LOW_SURROGATE(rootName[maxRootLength]) ?
                       maxRootLength - 1 : maxRootLength);
    SetLeafName(rootName + suffix);
#else
    if (NS_IsNativeUTF8()) {
      // ensure that we don't cut the name in mid-UTF8-character
      // (assume the name is valid UTF8 to begin with)
      while (UTF8traits::isInSeq(rootName[maxRootLength])) {
        --maxRootLength;
      }

      // Another check to avoid ending up with an empty leaf name.
      if (maxRootLength == 0 && suffix.IsEmpty()) {
        return NS_ERROR_FILE_UNRECOGNIZED_PATH;
      }
    }

    rootName.SetLength(maxRootLength);
    SetNativeLeafName(rootName + suffix);
#endif
    nsresult rvCreate = Create(aType, aAttributes);
    if (rvCreate != NS_ERROR_FILE_ALREADY_EXISTS) {
      return rvCreate;
    }
  }

  for (int indx = 1; indx < 10000; ++indx) {
    // start with "Picture-1.jpg" after "Picture.jpg" exists
#ifdef XP_WIN
    SetLeafName(rootName +
                NS_ConvertASCIItoUTF16(nsPrintfCString("-%d", indx)) +
                suffix);
#else
    SetNativeLeafName(rootName + nsPrintfCString("-%d", indx) + suffix);
#endif
    rv = Create(aType, aAttributes);
    if (NS_SUCCEEDED(rv) || rv != NS_ERROR_FILE_ALREADY_EXISTS) {
      return rv;
    }
  }

  // The disk is full, sort of
  return NS_ERROR_FILE_TOO_BIG;
}
void
nsXHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
                                                     nsAString& aOutputStr)
{
  if (mBodyOnly && !mInBody) {
    return;
  }

  if (mDisableEntityEncoding) {
    aOutputStr.Append(aStr);
    return;
  }

  if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities  |
                nsIDocumentEncoder::OutputEncodeLatin1Entities |
                nsIDocumentEncoder::OutputEncodeHTMLEntities   |
                nsIDocumentEncoder::OutputEncodeW3CEntities)) {
    nsIParserService* parserService = nsContentUtils::GetParserService();

    if (!parserService) {
      NS_ERROR("Can't get parser service");
      return;
    }

    nsReadingIterator<PRUnichar> done_reading;
    aStr.EndReading(done_reading);

    // for each chunk of |aString|...
    PRUint32 advanceLength = 0;
    nsReadingIterator<PRUnichar> iter;

    const char **entityTable = mInAttribute ? kAttrEntities : kEntities;

    for (aStr.BeginReading(iter);
          iter != done_reading;
          iter.advance(PRInt32(advanceLength))) {
      PRUint32 fragmentLength = iter.size_forward();
      PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints
                                    //  replaced by a particular entity
      const PRUnichar* c = iter.get();
      const PRUnichar* fragmentStart = c;
      const PRUnichar* fragmentEnd = c + fragmentLength;
      const char* entityText = nsnull;
      nsCAutoString entityReplacement;
      char* fullEntityText = nsnull;

      advanceLength = 0;
      // for each character in this chunk, check if it
      // needs to be replaced
      for (; c < fragmentEnd; c++, advanceLength++) {
        PRUnichar val = *c;
        if (val == kValNBSP) {
          entityText = kEntityNBSP;
          break;
        }
        else if ((val <= kGTVal) && (entityTable[val][0] != 0)) {
          entityText = entityTable[val];
          break;
        } else if (val > 127 &&
                  ((val < 256 &&
                    mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
                    mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
          parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);

          if (!entityReplacement.IsEmpty()) {
            entityText = entityReplacement.get();
            break;
          }
        }
        else if (val > 127 &&
                  mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
                  mEntityConverter) {
          if (NS_IS_HIGH_SURROGATE(val) &&
              c + 1 < fragmentEnd &&
              NS_IS_LOW_SURROGATE(*(c + 1))) {
            PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
            if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
                              nsIEntityConverter::entityW3C, &fullEntityText))) {
              lengthReplaced = 2;
              break;
            }
            else {
              advanceLength++;
            }
          }
          else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
                                nsIEntityConverter::entityW3C, 
                                &fullEntityText))) {
            lengthReplaced = 1;
            break;
          }
        }
      }

      aOutputStr.Append(fragmentStart, advanceLength);
      if (entityText) {
        aOutputStr.Append(PRUnichar('&'));
        AppendASCIItoUTF16(entityText, aOutputStr);
        aOutputStr.Append(PRUnichar(';'));
        advanceLength++;
      }
      // if it comes from nsIEntityConverter, it already has '&' and ';'
      else if (fullEntityText) {
        AppendASCIItoUTF16(fullEntityText, aOutputStr);
        nsMemory::Free(fullEntityText);
        advanceLength += lengthReplaced;
      }
    }
  } else {
    nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);
  }
}
Beispiel #10
0
nsresult
nsHyphenator::Hyphenate(const nsAString& aString,
                        nsTArray<bool>& aHyphens)
{
  if (!aHyphens.SetLength(aString.Length())) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  memset(aHyphens.Elements(), PR_FALSE, aHyphens.Length());

  bool inWord = false;
  PRUint32 wordStart = 0, wordLimit = 0;
  PRUint32 chLen;
  for (PRUint32 i = 0; i < aString.Length(); i += chLen) {
    PRUint32 ch = aString[i];
    chLen = 1;

    if (NS_IS_HIGH_SURROGATE(ch)) {
      if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
        ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
        chLen = 2;
      } else {
        NS_WARNING("unpaired surrogate found during hyphenation");
      }
    }

    nsIUGenCategory::nsUGenCategory cat = mCategories->Get(ch);
    if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
      if (!inWord) {
        inWord = PR_TRUE;
        wordStart = i;
      }
      wordLimit = i + chLen;
      if (i + chLen < aString.Length()) {
        continue;
      }
    }

    if (inWord) {
      const PRUnichar *begin = aString.BeginReading();
      NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
                                 wordLimit - wordStart);
      nsAutoTArray<char,200> utf8hyphens;
      utf8hyphens.SetLength(utf8.Length() + 5);
      char **rep = nsnull;
      int *pos = nsnull;
      int *cut = nsnull;
      int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
                                      utf8.BeginReading(), utf8.Length(),
                                      utf8hyphens.Elements(), nsnull,
                                      &rep, &pos, &cut);
      if (!err) {
        // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
        // from utf8 code unit indexing (which would match the utf8 input
        // string directly) to Unicode character indexing.
        // We then need to convert this to utf16 code unit offsets for Gecko.
        const char *hyphPtr = utf8hyphens.Elements();
        const PRUnichar *cur = begin + wordStart;
        const PRUnichar *end = begin + wordLimit;
        while (cur < end) {
          if (*hyphPtr & 0x01) {
            aHyphens[cur - begin] = PR_TRUE;
          }
          cur++;
          if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
              NS_IS_HIGH_SURROGATE(*(cur-1)))
          {
            cur++;
          }
          hyphPtr++;
        }
      }
    }
    
    inWord = PR_FALSE;
  }

  return NS_OK;
}
NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff(
  const PRUnichar * aSrc, 
  PRInt32 * aSrcLength, 
  char * aDest, 
  PRInt32 * aDestLength)
{
  PRInt32 iSrcLength = 0;
  PRInt32 iDestLength = 0;
  PRUnichar unicode;
  nsresult res = NS_OK;
  while (iSrcLength < *aSrcLength )
  {
    unicode = *aSrc;
    //if unicode's hi byte has something, it is not ASCII, must be a GB
    if(IS_ASCII(unicode))
    {
      // this is an ASCII
      *aDest = CAST_UNICHAR_TO_CHAR(*aSrc);
      aDest++; // increment 1 byte
      iDestLength +=1;
    } else {
      char byte1, byte2;
      if(mUtil.UnicodeToGBKChar( unicode, PR_FALSE, &byte1, &byte2))
      {
        // make sure we still have 2 bytes for output first
        if(iDestLength+2 > *aDestLength)
        {
          res = NS_OK_UENC_MOREOUTPUT;
          break;
        }
        aDest[0] = byte1;
        aDest[1] = byte2;
        aDest += 2;	// increment 2 bytes
        iDestLength +=2;
      } else {
        PRInt32 aOutLen = 2;
        // make sure we still have 2 bytes for output first
        if(iDestLength+2 > *aDestLength)
        {
          res = NS_OK_UENC_MOREOUTPUT;
          break;
        }
        // we cannot map in the common mapping. Let's try to
        // call the delegated 2 byte converter for the gbk or gb18030
        // unique 2 byte mapping
        if(TryExtensionEncoder(unicode, aDest, &aOutLen))
        {
          iDestLength += aOutLen;
          aDest += aOutLen;
        } else {
          // make sure we still have 4 bytes for output first
          if(iDestLength+4 > *aDestLength)
          {
            res = NS_OK_UENC_MOREOUTPUT;
            break;
          }
          // we still cannot map. Let's try to
          // call the delegated GB18030 4 byte converter 
          aOutLen = 4;
          if( NS_IS_HIGH_SURROGATE(unicode) )
          {
            if((iSrcLength+1) < *aSrcLength ) {
              if(EncodeSurrogate(aSrc[0],aSrc[1], aDest)) {
                // since we got a surrogate pair, we need to increment src.
                iSrcLength++ ; 
                aSrc++;
                iDestLength += aOutLen;
                aDest += aOutLen;
              } else {
                // only get a high surrogate, but not a low surrogate
                res = NS_ERROR_UENC_NOMAPPING;
                iSrcLength++;   // include length of the unmapped character
                break;
              }
            } else {
              mSurrogateHigh = aSrc[0];
              break; // this will go to afterwhileloop
            }
          } else {
            if( NS_IS_LOW_SURROGATE(unicode) )
            {
              if(NS_IS_HIGH_SURROGATE(mSurrogateHigh)) {
                if(EncodeSurrogate(mSurrogateHigh, aSrc[0], aDest)) {
                  iDestLength += aOutLen;
                  aDest += aOutLen;
                } else {
                  // only get a high surrogate, but not a low surrogate
                  res = NS_ERROR_UENC_NOMAPPING;
                  iSrcLength++;   // include length of the unmapped character
                  break;
                }
              } else {
                // only get a low surrogate, but not a low surrogate
                res = NS_ERROR_UENC_NOMAPPING;
                iSrcLength++;   // include length of the unmapped character
                break;
              }
            } else {
              if(Try4BytesEncoder(unicode, aDest, &aOutLen))
              {
                NS_ASSERTION((aOutLen == 4), "we should always generate 4 bytes here");
                iDestLength += aOutLen;
                aDest += aOutLen;
              } else {
                res = NS_ERROR_UENC_NOMAPPING;
                iSrcLength++;   // include length of the unmapped character
                break;
              }
            }
          }
        }
      } 
    }
    iSrcLength++ ; // Each unicode char just count as one in PRUnichar string;  	  
    mSurrogateHigh = 0;
    aSrc++;
    if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength) )
    {
      res = NS_OK_UENC_MOREOUTPUT;
      break;
    }
  }
//afterwhileloop:
  *aDestLength = iDestLength;
  *aSrcLength = iSrcLength;
  return res;
}
Beispiel #12
0
NS_IMETHODIMP
nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
{
  NS_ENSURE_ARG_POINTER(outString);

  *outString = nullptr;

  nsresult rv;
  int32_t inStringLength = NS_strlen(inString);       // original input string length
  int32_t bufferLength;                               // allocated buffer length
  int32_t srcLength = inStringLength;
  int32_t dstLength;
  int32_t pos1, pos2;
  nsresult saveResult = NS_OK;                         // to remember NS_ERROR_UENC_NOMAPPING

  // estimate and allocate the target buffer (reserve extra memory for fallback)
  rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
  if (NS_FAILED(rv)) return rv;

  bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback
  // + 1 is for the terminating NUL -- we don't add that to bufferLength so that
  // we can always write dstPtr[pos2] = '\0' even when the encoder filled the
  // buffer.
  char *dstPtr = (char *) PR_Malloc(bufferLength + 1);
  if (!dstPtr) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  
  for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
    // convert from unicode
    dstLength = bufferLength - pos2;
    NS_ASSERTION(dstLength >= 0, "out of bounds write");
    rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);

    pos1 += srcLength ? srcLength : 1;
    pos2 += dstLength;
    dstPtr[pos2] = '\0';

    // break: this is usually the case (no error) OR unrecoverable error
    if (NS_ERROR_UENC_NOMAPPING != rv) break;

    // remember this happened and reset the result
    saveResult = rv;
    rv = NS_OK;

    // finish encoder, give it a chance to write extra data like escape sequences
    dstLength = bufferLength - pos2;
    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
    if (NS_SUCCEEDED(rv)) {
      pos2 += dstLength;
      dstPtr[pos2] = '\0';
    }

    srcLength = inStringLength - pos1;

    // do the fallback
    if (!ATTR_NO_FALLBACK(mAttribute)) {
      uint32_t unMappedChar;
      if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && 
          inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
        unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
        pos1++;
      } else {
        unMappedChar = inString[pos1-1];
      }

      rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
      if (NS_FAILED(rv)) 
        break;

      rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
      if (NS_FAILED(rv)) 
        break;
      dstPtr[pos2] = '\0';
    }
  }

  if (NS_SUCCEEDED(rv)) {
    // finish encoder, give it a chance to write extra data like escape sequences
    dstLength = bufferLength - pos2;
    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
    if (NS_SUCCEEDED(rv)) {
      pos2 += dstLength;
      dstPtr[pos2] = '\0';
    }
  }

  if (NS_FAILED(rv)) {
    PR_FREEIF(dstPtr);
    return rv;
  }

  *outString = dstPtr;      // set the result string

  // set error code so that the caller can do own fall back
  if (NS_ERROR_UENC_NOMAPPING == saveResult) {
    rv = NS_ERROR_UENC_NOMAPPING;
  }

  return rv;
}
NS_IMETHODIMP
nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
{
  if(nullptr == outString )
    return NS_ERROR_NULL_POINTER;
  NS_ASSERTION(outString, "invalid input");

  *outString = NULL;

  nsresult rv;
  PRInt32 inStringLength = NS_strlen(inString);       // original input string length
  PRInt32 bufferLength;                               // allocated buffer length
  PRInt32 srcLength = inStringLength;
  PRInt32 dstLength;
  char *dstPtr = NULL;
  PRInt32 pos1, pos2;
  nsresult saveResult = NS_OK;                         // to remember NS_ERROR_UENC_NOMAPPING

  // estimate and allocate the target buffer (reserve extra memory for fallback)
  rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
  if (NS_FAILED(rv)) return rv;

  bufferLength = dstLength + 512; // reserve 512 byte for fallback.
  dstPtr = (char *) PR_Malloc(bufferLength);
  if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY;

  
  for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
    // convert from unicode
    dstLength = bufferLength - pos2;
    rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);

    pos1 += srcLength ? srcLength : 1;
    pos2 += dstLength;
    dstPtr[pos2] = '\0';

    // break: this is usually the case (no error) OR unrecoverable error
    if (NS_ERROR_UENC_NOMAPPING != rv) break;

    // remember this happened and reset the result
    saveResult = rv;
    rv = NS_OK;

    // finish encoder, give it a chance to write extra data like escape sequences
    dstLength = bufferLength - pos2;
    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
    if (NS_SUCCEEDED(rv)) {
      pos2 += dstLength;
      dstPtr[pos2] = '\0';
    }

    srcLength = inStringLength - pos1;

    // do the fallback
    if (!ATTR_NO_FALLBACK(mAttribute)) {
      PRUint32 unMappedChar;
      if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && 
          inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
        unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
        pos1++;
      } else {
        unMappedChar = inString[pos1-1];
      }

      rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
      if (NS_FAILED(rv)) 
        break;

      rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
      if (NS_FAILED(rv)) 
        break;
      dstPtr[pos2] = '\0';
    }
  }

  if (NS_SUCCEEDED(rv)) {
    // finish encoder, give it a chance to write extra data like escape sequences
    dstLength = bufferLength - pos2;
    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
    if (NS_SUCCEEDED(rv)) {
      pos2 += dstLength;
      dstPtr[pos2] = '\0';
    }
  }

  if (NS_FAILED(rv)) {
    PR_FREEIF(dstPtr);
    return rv;
  }

  *outString = dstPtr;      // set the result string

  // set error code so that the caller can do own fall back
  if (NS_ERROR_UENC_NOMAPPING == saveResult) {
    rv = NS_ERROR_UENC_NOMAPPING;
  }

  return rv;
}
bool
gfxScriptItemizer::Next(PRUint32& aRunStart, PRUint32& aRunLimit,
                        PRInt32& aRunScript)
{
    /* if we've fallen off the end of the text, we're done */
    if (scriptLimit >= textLength) {
        return false;
    }

    SYNC_FIXUP();
    scriptCode = MOZ_SCRIPT_COMMON;

    for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
        PRUint32 ch;
        PRInt32 sc;
        PRInt32 pairIndex;
        PRUint32 startOfChar = scriptLimit;

        ch = textPtr[scriptLimit];

        /*
         * MODIFICATION for Gecko - clear the paired-character stack
         * when we see a space character, because we cannot trust
         * context outside the current "word" when doing textrun
         * construction
         */
        if (ch == 0x20) {
            while (STACK_IS_NOT_EMPTY()) {
                pop();
            }
            sc = MOZ_SCRIPT_COMMON;
            pairIndex = -1;
        } else {
            /* decode UTF-16 (may be surrogate pair) */
            if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
                PRUint32 low = textPtr[scriptLimit + 1];
                if (NS_IS_LOW_SURROGATE(low)) {
                    ch = SURROGATE_TO_UCS4(ch, low);
                    scriptLimit += 1;
                }
            }

            sc = mozilla::unicode::GetScriptCode(ch);

            pairIndex = getPairIndex(ch);

            /*
             * Paired character handling:
             *
             * if it's an open character, push it onto the stack.
             * if it's a close character, find the matching open on the
             * stack, and use that script code. Any non-matching open
             * characters above it on the stack will be poped.
             */
            if (pairIndex >= 0) {
                if ((pairIndex & 1) == 0) {
                    push(pairIndex, scriptCode);
                } else {
                    PRInt32 pi = pairIndex & ~1;

                    while (STACK_IS_NOT_EMPTY() && TOP().pairIndex != pi) {
                        pop();
                    }

                    if (STACK_IS_NOT_EMPTY()) {
                        sc = TOP().scriptCode;
                    }
                }
            }
        }

        if (sameScript(scriptCode, sc)) {
            if (scriptCode <= MOZ_SCRIPT_INHERITED &&
                sc > MOZ_SCRIPT_INHERITED)
            {
                scriptCode = sc;
                fixup(scriptCode);
            }

            /*
             * if this character is a close paired character,
             * pop the matching open character from the stack
             */
            if (pairIndex >= 0 && (pairIndex & 1) != 0) {
                pop();
            }
        } else {
            /*
             * reset scriptLimit in case it was advanced during reading a
             * multiple-code-unit character
             */
            scriptLimit = startOfChar;

            break;
        }
    }

    aRunStart = scriptStart;
    aRunLimit = scriptLimit;
    aRunScript = scriptCode;

    return true;
}