bool
nsCaseTransformTextRunFactory::TransformString(
    const nsAString& aString,
    nsString& aConvertedString,
    bool aAllUppercase,
    const nsIAtom* aLanguage,
    nsTArray<bool>& aCharsToMergeArray,
    nsTArray<bool>& aDeletedCharsArray,
    nsTransformedTextRun* aTextRun,
    nsTArray<uint8_t>* aCanBreakBeforeArray,
    nsTArray<nsStyleContext*>* aStyleArray)
{
  NS_PRECONDITION(!aTextRun || (aCanBreakBeforeArray && aStyleArray),
                  "either none or all three optional parameters required");

  uint32_t length = aString.Length();
  const char16_t* str = aString.BeginReading();

  bool mergeNeeded = false;

  bool capitalizeDutchIJ = false;
  bool prevIsLetter = false;
  bool ntPrefix = false; // true immediately after a word-initial 'n' or 't'
                         // when doing Irish lowercasing
  uint32_t sigmaIndex = uint32_t(-1);
  nsIUGenCategory::nsUGenCategory cat;

  uint8_t style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : 0;
  const nsIAtom* lang = aLanguage;

  LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
  mozilla::GreekCasing::State greekState;
  mozilla::IrishCasing::State irishState;
  uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s)

  for (uint32_t i = 0; i < length; ++i) {
    uint32_t ch = str[i];

    nsStyleContext* styleContext;
    if (aTextRun) {
      styleContext = aTextRun->mStyles[i];
      style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE :
        styleContext->StyleText()->mTextTransform;

      if (lang != styleContext->StyleFont()->mLanguage) {
        lang = styleContext->StyleFont()->mLanguage;
        languageSpecificCasing = GetCasingFor(lang);
        greekState.Reset();
        irishState.Reset();
        irishMark = uint32_t(-1);
      }
    }

    int extraChars = 0;
    const mozilla::unicode::MultiCharMapping *mcm;
    bool inhibitBreakBefore = false; // have we just deleted preceding hyphen?

    if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 &&
        NS_IS_LOW_SURROGATE(str[i + 1])) {
      ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
    }

    switch (style) {
    case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
      if (languageSpecificCasing == eLSCB_Turkish) {
        if (ch == 'I') {
          ch = LATIN_SMALL_LETTER_DOTLESS_I;
          prevIsLetter = true;
          sigmaIndex = uint32_t(-1);
          break;
        }
        if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
          ch = 'i';
          prevIsLetter = true;
          sigmaIndex = uint32_t(-1);
          break;
        }
      }

      cat = mozilla::unicode::GetGenCategory(ch);

      if (languageSpecificCasing == eLSCB_Irish &&
          cat == nsIUGenCategory::kLetter) {
        // See bug 1018805 for Irish lowercasing requirements
        if (!prevIsLetter && (ch == 'n' || ch == 't')) {
          ntPrefix = true;
        } else {
          if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
            aConvertedString.Append('-');
            ++extraChars;
          }
          ntPrefix = false;
        }
      } else {
        ntPrefix = false;
      }

      // Special lowercasing behavior for Greek Sigma: note that this is listed
      // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
      // language-specific mapping; it applies regardless of the language of
      // the element.
      //
      // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
      // the non-final form) whenever there is a following letter, or when the
      // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
      // LETTER); and to FINAL SIGMA when it is preceded by another letter but
      // not followed by one.
      //
      // To implement the context-sensitive nature of this mapping, we keep
      // track of whether the previous character was a letter. If not, CAPITAL
      // SIGMA will map directly to SMALL SIGMA. If the previous character
      // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
      // position in the converted string; if we then encounter another letter,
      // that FINAL SIGMA is replaced with a standard SMALL SIGMA.

      // If sigmaIndex is not -1, it marks where we have provisionally mapped
      // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
      // need to change it to SMALL SIGMA.
      if (sigmaIndex != uint32_t(-1)) {
        if (cat == nsIUGenCategory::kLetter) {
          aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
        }
      }

      if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
        // If preceding char was a letter, map to FINAL instead of SMALL,
        // and note where it occurred by setting sigmaIndex; we'll change it
        // to standard SMALL SIGMA later if another letter follows
        if (prevIsLetter) {
          ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
          sigmaIndex = aConvertedString.Length();
        } else {
          // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
          // to SMALL SIGMA
          ch = GREEK_SMALL_LETTER_SIGMA;
          sigmaIndex = uint32_t(-1);
        }
        prevIsLetter = true;
        break;
      }

      // ignore diacritics for the purpose of contextual sigma mapping;
      // otherwise, reset prevIsLetter appropriately and clear the
      // sigmaIndex marker
      if (cat != nsIUGenCategory::kMark) {
        prevIsLetter = (cat == nsIUGenCategory::kLetter);
        sigmaIndex = uint32_t(-1);
      }

      mcm = mozilla::unicode::SpecialLower(ch);
      if (mcm) {
        int j = 0;
        while (j < 2 && mcm->mMappedChars[j + 1]) {
          aConvertedString.Append(mcm->mMappedChars[j]);
          ++extraChars;
          ++j;
        }
        ch = mcm->mMappedChars[j];
        break;
      }

      ch = ToLowerCase(ch);
      break;

    case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
      if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
        ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
        break;
      }

      if (languageSpecificCasing == eLSCB_Greek) {
        ch = mozilla::GreekCasing::UpperCase(ch, greekState);
        break;
      }

      if (languageSpecificCasing == eLSCB_Irish) {
        bool mark;
        uint8_t action;
        ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
        if (mark) {
          irishMark = aConvertedString.Length();
          break;
        } else if (action) {
          nsString& str = aConvertedString; // shorthand
          switch (action) {
          case 1:
            // lowercase a single prefix letter
            NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(),
                         "bad irishMark!");
            str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
            irishMark = uint32_t(-1);
            break;
          case 2:
            // lowercase two prefix letters (immediately before current pos)
            NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2,
                         "bad irishMark!");
            str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
            str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
            irishMark = uint32_t(-1);
            break;
          case 3:
            // lowercase one prefix letter, and delete following hyphen
            // (which must be the immediately-preceding char)
            NS_ASSERTION(str.Length() >= 2 && irishMark == str.Length() - 2,
                         "bad irishMark!");
            str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
            aDeletedCharsArray[irishMark + 1] = true;
            // Remove the trailing entries (corresponding to the deleted hyphen)
            // from the auxiliary arrays.
            aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1);
            if (aTextRun) {
              aStyleArray->SetLength(aStyleArray->Length() - 1);
              aCanBreakBeforeArray->SetLength(aCanBreakBeforeArray->Length() - 1);
              inhibitBreakBefore = true;
            }
            mergeNeeded = true;
            irishMark = uint32_t(-1);
            break;
          }
          // ch has been set to the uppercase for current char;
          // No need to check for SpecialUpper here as none of the characters
          // that could trigger an Irish casing action have special mappings.
          break;
        }
        // If we didn't have any special action to perform, fall through
        // to check for special uppercase (ß)
      }

      mcm = mozilla::unicode::SpecialUpper(ch);
      if (mcm) {
        int j = 0;
        while (j < 2 && mcm->mMappedChars[j + 1]) {
          aConvertedString.Append(mcm->mMappedChars[j]);
          ++extraChars;
          ++j;
        }
        ch = mcm->mMappedChars[j];
        break;
      }

      ch = ToUpperCase(ch);
      break;

    case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
      if (aTextRun) {
        if (capitalizeDutchIJ && ch == 'j') {
          ch = 'J';
          capitalizeDutchIJ = false;
          break;
        }
        capitalizeDutchIJ = false;
        if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
          if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
            ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
            break;
          }
          if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
            ch = 'I';
            capitalizeDutchIJ = true;
            break;
          }

          mcm = mozilla::unicode::SpecialTitle(ch);
          if (mcm) {
            int j = 0;
            while (j < 2 && mcm->mMappedChars[j + 1]) {
              aConvertedString.Append(mcm->mMappedChars[j]);
              ++extraChars;
              ++j;
            }
            ch = mcm->mMappedChars[j];
            break;
          }

          ch = ToTitleCase(ch);
        }
      }
      break;

    case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH:
      ch = mozilla::unicode::GetFullWidth(ch);
      break;

    default:
      break;
    }

    if (ch == uint32_t(-1)) {
      aDeletedCharsArray.AppendElement(true);
      mergeNeeded = true;
    } else {
      aDeletedCharsArray.AppendElement(false);
      aCharsToMergeArray.AppendElement(false);
      if (aTextRun) {
        aStyleArray->AppendElement(styleContext);
        aCanBreakBeforeArray->AppendElement(inhibitBreakBefore ? false :
                                            aTextRun->CanBreakLineBefore(i));
      }

      if (IS_IN_BMP(ch)) {
        aConvertedString.Append(ch);
      } else {
        aConvertedString.Append(H_SURROGATE(ch));
        aConvertedString.Append(L_SURROGATE(ch));
        ++i;
        aDeletedCharsArray.AppendElement(true); // not exactly deleted, but the
                                                // trailing surrogate is skipped
        ++extraChars;
      }

      while (extraChars-- > 0) {
        mergeNeeded = true;
        aCharsToMergeArray.AppendElement(true);
        if (aTextRun) {
          aStyleArray->AppendElement(styleContext);
          aCanBreakBeforeArray->AppendElement(false);
        }
      }
    }
  }

  return mergeNeeded;
}
void
nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
    gfxContext* aRefContext)
{
  PRUint32 length = aTextRun->GetLength();
  const PRUnichar* str = aTextRun->mString.BeginReading();
  nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();

  nsAutoString convertedString;
  nsAutoTArray<bool,50> charsToMergeArray;
  nsAutoTArray<nsStyleContext*,50> styleArray;
  nsAutoTArray<PRUint8,50> canBreakBeforeArray;
  PRUint32 extraCharsCount = 0;

  PRUint32 i;
  for (i = 0; i < length; ++i) {
    PRUnichar ch = str[i];

    charsToMergeArray.AppendElement(false);
    styleArray.AppendElement(styles[i]);
    canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));

    PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
      : styles[i]->GetStyleText()->mTextTransform;
    bool extraChar = false;

    switch (style) {
    case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
      ch = ToLowerCase(ch);
      break;
    case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
      if (ch == SZLIG) {
        convertedString.Append('S');
        extraChar = true;
        ch = 'S';
      } else {
        ch = ToUpperCase(ch);
      }
      break;
    case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
      if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
        if (ch == SZLIG) {
          convertedString.Append('S');
          extraChar = true;
          ch = 'S';
        } else {
          ch = ToTitleCase(ch);
        }
      }
      break;
    default:
      break;
    }

    convertedString.Append(ch);
    if (extraChar) {
      ++extraCharsCount;
      charsToMergeArray.AppendElement(true);
      styleArray.AppendElement(styles[i]);
      canBreakBeforeArray.AppendElement(false);
    }
  }

  PRUint32 flags;
  gfxTextRunFactory::Parameters innerParams =
      GetParametersForInner(aTextRun, &flags, aRefContext);
  gfxFontGroup* fontGroup = aTextRun->GetFontGroup();

  nsAutoPtr<nsTransformedTextRun> transformedChild;
  nsAutoPtr<gfxTextRun> cachedChild;
  gfxTextRun* child;

  if (mInnerTransformingTextRunFactory) {
    transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, fontGroup, flags, styleArray.Elements(), false);
    child = transformedChild.get();
  } else {
    cachedChild = fontGroup->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, flags);
    child = cachedChild.get();
  }
  if (!child)
    return;
  // Copy potential linebreaks into child so they're preserved
  // (and also child will be shaped appropriately)
  NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
               "Dropped characters or break-before values somewhere!");
  child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
      canBreakBeforeArray.Elements(), aRefContext);
  if (transformedChild) {
    transformedChild->FinishSettingProperties(aRefContext);
  }

  if (extraCharsCount > 0) {
    // Now merge multiple characters into one multi-glyph character as required
    MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements());
  } else {
    // No merging to do, so just copy; this produces a more optimized textrun.
    // We can't steal the data because the child may be cached and stealing
    // the data would break the cache.
    aTextRun->ResetGlyphRuns();
    aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
  }
}
void
nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
    gfxContext* aRefContext)
{
  uint32_t length = aTextRun->GetLength();
  const PRUnichar* str = aTextRun->mString.BeginReading();
  nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();

  nsAutoString convertedString;
  nsAutoTArray<bool,50> charsToMergeArray;
  nsAutoTArray<bool,50> deletedCharsArray;
  nsAutoTArray<nsStyleContext*,50> styleArray;
  nsAutoTArray<uint8_t,50> canBreakBeforeArray;
  bool mergeNeeded = false;

  // Some languages have special casing conventions that differ from the
  // default Unicode mappings.
  // The enum values here are named for well-known exemplar languages that
  // exhibit the behavior in question; multiple lang tags may map to the
  // same setting here, if the behavior is shared by other languages.
  enum {
    eNone,    // default non-lang-specific behavior
    eTurkish, // preserve dotted/dotless-i distinction in uppercase
    eDutch,   // treat "ij" digraph as a unit for capitalization
    eGreek    // strip accent when uppercasing Greek vowels
  } languageSpecificCasing = eNone;

  const nsIAtom* lang = nullptr;
  bool capitalizeDutchIJ = false;
  bool prevIsLetter = false;
  uint32_t sigmaIndex = uint32_t(-1);
  nsIUGenCategory::nsUGenCategory cat;
  GreekCasingState greekState = kStart;
  uint32_t i;
  for (i = 0; i < length; ++i) {
    uint32_t ch = str[i];
    nsStyleContext* styleContext = styles[i];

    uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
      : styleContext->StyleText()->mTextTransform;
    int extraChars = 0;
    const mozilla::unicode::MultiCharMapping *mcm;

    if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
      ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
    }

    if (lang != styleContext->StyleFont()->mLanguage) {
      lang = styleContext->StyleFont()->mLanguage;
      if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
          lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||
          lang == nsGkAtoms::tt) {
        languageSpecificCasing = eTurkish;
      } else if (lang == nsGkAtoms::nl) {
        languageSpecificCasing = eDutch;
      } else if (lang == nsGkAtoms::el) {
        languageSpecificCasing = eGreek;
        greekState = kStart;
      } else {
        languageSpecificCasing = eNone;
      }
    }

    switch (style) {
    case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
      if (languageSpecificCasing == eTurkish) {
        if (ch == 'I') {
          ch = LATIN_SMALL_LETTER_DOTLESS_I;
          prevIsLetter = true;
          sigmaIndex = uint32_t(-1);
          break;
        }
        if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
          ch = 'i';
          prevIsLetter = true;
          sigmaIndex = uint32_t(-1);
          break;
        }
      }

      // Special lowercasing behavior for Greek Sigma: note that this is listed
      // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
      // language-specific mapping; it applies regardless of the language of
      // the element.
      //
      // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
      // the non-final form) whenever there is a following letter, or when the
      // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
      // LETTER); and to FINAL SIGMA when it is preceded by another letter but
      // not followed by one.
      //
      // To implement the context-sensitive nature of this mapping, we keep
      // track of whether the previous character was a letter. If not, CAPITAL
      // SIGMA will map directly to SMALL SIGMA. If the previous character
      // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
      // position in the converted string; if we then encounter another letter,
      // that FINAL SIGMA is replaced with a standard SMALL SIGMA.

      cat = mozilla::unicode::GetGenCategory(ch);

      // If sigmaIndex is not -1, it marks where we have provisionally mapped
      // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
      // need to change it to SMALL SIGMA.
      if (sigmaIndex != uint32_t(-1)) {
        if (cat == nsIUGenCategory::kLetter) {
          convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
        }
      }

      if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
        // If preceding char was a letter, map to FINAL instead of SMALL,
        // and note where it occurred by setting sigmaIndex; we'll change it
        // to standard SMALL SIGMA later if another letter follows
        if (prevIsLetter) {
          ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
          sigmaIndex = convertedString.Length();
        } else {
          // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
          // to SMALL SIGMA
          ch = GREEK_SMALL_LETTER_SIGMA;
          sigmaIndex = uint32_t(-1);
        }
        prevIsLetter = true;
        break;
      }

      // ignore diacritics for the purpose of contextual sigma mapping;
      // otherwise, reset prevIsLetter appropriately and clear the
      // sigmaIndex marker
      if (cat != nsIUGenCategory::kMark) {
        prevIsLetter = (cat == nsIUGenCategory::kLetter);
        sigmaIndex = uint32_t(-1);
      }

      mcm = mozilla::unicode::SpecialLower(ch);
      if (mcm) {
        int j = 0;
        while (j < 2 && mcm->mMappedChars[j + 1]) {
          convertedString.Append(mcm->mMappedChars[j]);
          ++extraChars;
          ++j;
        }
        ch = mcm->mMappedChars[j];
        break;
      }

      ch = ToLowerCase(ch);
      break;

    case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
      if (languageSpecificCasing == eTurkish && ch == 'i') {
        ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
        break;
      }

      if (languageSpecificCasing == eGreek) {
        ch = GreekUpperCase(ch, &greekState);
        break;
      }

      mcm = mozilla::unicode::SpecialUpper(ch);
      if (mcm) {
        int j = 0;
        while (j < 2 && mcm->mMappedChars[j + 1]) {
          convertedString.Append(mcm->mMappedChars[j]);
          ++extraChars;
          ++j;
        }
        ch = mcm->mMappedChars[j];
        break;
      }

      ch = ToUpperCase(ch);
      break;

    case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
      if (capitalizeDutchIJ && ch == 'j') {
        ch = 'J';
        capitalizeDutchIJ = false;
        break;
      }
      capitalizeDutchIJ = false;
      if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
        if (languageSpecificCasing == eTurkish && ch == 'i') {
          ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
          break;
        }
        if (languageSpecificCasing == eDutch && ch == 'i') {
          ch = 'I';
          capitalizeDutchIJ = true;
          break;
        }

        mcm = mozilla::unicode::SpecialTitle(ch);
        if (mcm) {
          int j = 0;
          while (j < 2 && mcm->mMappedChars[j + 1]) {
            convertedString.Append(mcm->mMappedChars[j]);
            ++extraChars;
            ++j;
          }
          ch = mcm->mMappedChars[j];
          break;
        }

        ch = ToTitleCase(ch);
      }
      break;

    case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH:
      ch = mozilla::unicode::GetFullWidth(ch);
      break;

    default:
      break;
    }

    if (ch == uint32_t(-1)) {
      deletedCharsArray.AppendElement(true);
      mergeNeeded = true;
    } else {
      deletedCharsArray.AppendElement(false);
      charsToMergeArray.AppendElement(false);
      styleArray.AppendElement(styleContext);
      canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));

      if (IS_IN_BMP(ch)) {
        convertedString.Append(ch);
      } else {
        convertedString.Append(H_SURROGATE(ch));
        convertedString.Append(L_SURROGATE(ch));
        ++i;
        deletedCharsArray.AppendElement(true); // not exactly deleted, but the
                                               // trailing surrogate is skipped
        ++extraChars;
      }

      while (extraChars-- > 0) {
        mergeNeeded = true;
        charsToMergeArray.AppendElement(true);
        styleArray.AppendElement(styleContext);
        canBreakBeforeArray.AppendElement(false);
      }
    }
  }

  uint32_t flags;
  gfxTextRunFactory::Parameters innerParams =
      GetParametersForInner(aTextRun, &flags, aRefContext);
  gfxFontGroup* fontGroup = aTextRun->GetFontGroup();

  nsAutoPtr<nsTransformedTextRun> transformedChild;
  nsAutoPtr<gfxTextRun> cachedChild;
  gfxTextRun* child;

  if (mInnerTransformingTextRunFactory) {
    transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, fontGroup, flags, styleArray.Elements(), false);
    child = transformedChild.get();
  } else {
    cachedChild = fontGroup->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, flags);
    child = cachedChild.get();
  }
  if (!child)
    return;
  // Copy potential linebreaks into child so they're preserved
  // (and also child will be shaped appropriately)
  NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
               "Dropped characters or break-before values somewhere!");
  child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
      canBreakBeforeArray.Elements(), aRefContext);
  if (transformedChild) {
    transformedChild->FinishSettingProperties(aRefContext);
  }

  if (mergeNeeded) {
    // Now merge multiple characters into one multi-glyph character as required
    // and deal with skipping deleted accent chars
    NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
                 "source length mismatch");
    NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
                 "destination length mismatch");
    MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
                             deletedCharsArray.Elements());
  } else {
    // No merging to do, so just copy; this produces a more optimized textrun.
    // We can't steal the data because the child may be cached and stealing
    // the data would break the cache.
    aTextRun->ResetGlyphRuns();
    aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
  }
}
Example #4
0
void
nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
    gfxContext* aRefContext)
{
  PRUint32 length = aTextRun->GetLength();
  const PRUnichar* str = aTextRun->mString.BeginReading();
  nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();

  nsAutoString convertedString;
  nsAutoTArray<bool,50> charsToMergeArray;
  nsAutoTArray<nsStyleContext*,50> styleArray;
  nsAutoTArray<PRUint8,50> canBreakBeforeArray;
  PRUint32 extraCharsCount = 0;

  // Some languages have special casing conventions that differ from the
  // default Unicode mappings.
  // The enum values here are named for well-known exemplar languages that
  // exhibit the behavior in question; multiple lang tags may map to the
  // same setting here, if the behavior is shared by other languages.
  enum {
    eNone,    // default non-lang-specific behavior
    eTurkish, // preserve dotted/dotless-i distinction in uppercase
    eDutch    // treat "ij" digraph as a unit for capitalization
  } languageSpecificCasing = eNone;

  const nsIAtom* lang = nsnull;
  bool capitalizeDutchIJ = false;  
  PRUint32 i;
  for (i = 0; i < length; ++i) {
    PRUint32 ch = str[i];
    nsStyleContext* styleContext = styles[i];

    charsToMergeArray.AppendElement(false);
    styleArray.AppendElement(styleContext);
    canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));

    PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
      : styleContext->GetStyleText()->mTextTransform;
    bool extraChar = false;

    if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
      ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
    }

    if (lang != styleContext->GetStyleFont()->mLanguage) {
      lang = styleContext->GetStyleFont()->mLanguage;
      if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
          lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||
          lang == nsGkAtoms::tt) {
        languageSpecificCasing = eTurkish;
      } else if (lang == nsGkAtoms::nl) {
        languageSpecificCasing = eDutch;
      } else {
        languageSpecificCasing = eNone;
      }
    }

    switch (style) {
    case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
      if (languageSpecificCasing == eTurkish && ch == 'I') {
        ch = LATIN_SMALL_LETTER_DOTLESS_I;
      } else {
        ch = ToLowerCase(ch);
      }
      break;
    case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
      if (ch == SZLIG) {
        convertedString.Append('S');
        extraChar = true;
        ch = 'S';
      } else if (languageSpecificCasing == eTurkish && ch == 'i') {
        ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
      } else {
        ch = ToUpperCase(ch);
      }
      break;
    case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
      if (capitalizeDutchIJ && ch == 'j') {
        ch = 'J';
        capitalizeDutchIJ = false;
        break;
      }
      capitalizeDutchIJ = false;
      if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
        if (ch == SZLIG) {
          convertedString.Append('S');
          extraChar = true;
          ch = 'S';
        } else if (languageSpecificCasing == eTurkish && ch == 'i') {
          ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
        } else if (languageSpecificCasing == eDutch && ch == 'i') {
          ch = 'I';
          capitalizeDutchIJ = true;
        } else {
          ch = ToTitleCase(ch);
        }
      }
      break;
    default:
      break;
    }

    if (IS_IN_BMP(ch)) {
      convertedString.Append(ch);
    } else {
      convertedString.Append(H_SURROGATE(ch));
      convertedString.Append(L_SURROGATE(ch));
      i++;
      charsToMergeArray.AppendElement(false);
      styleArray.AppendElement(styleContext);
      canBreakBeforeArray.AppendElement(false);
    }

    if (extraChar) {
      ++extraCharsCount;
      charsToMergeArray.AppendElement(true);
      styleArray.AppendElement(styleContext);
      canBreakBeforeArray.AppendElement(false);
    }
  }

  PRUint32 flags;
  gfxTextRunFactory::Parameters innerParams =
      GetParametersForInner(aTextRun, &flags, aRefContext);
  gfxFontGroup* fontGroup = aTextRun->GetFontGroup();

  nsAutoPtr<nsTransformedTextRun> transformedChild;
  nsAutoPtr<gfxTextRun> cachedChild;
  gfxTextRun* child;

  if (mInnerTransformingTextRunFactory) {
    transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, fontGroup, flags, styleArray.Elements(), false);
    child = transformedChild.get();
  } else {
    cachedChild = fontGroup->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, flags);
    child = cachedChild.get();
  }
  if (!child)
    return;
  // Copy potential linebreaks into child so they're preserved
  // (and also child will be shaped appropriately)
  NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
               "Dropped characters or break-before values somewhere!");
  child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
      canBreakBeforeArray.Elements(), aRefContext);
  if (transformedChild) {
    transformedChild->FinishSettingProperties(aRefContext);
  }

  if (extraCharsCount > 0) {
    // Now merge multiple characters into one multi-glyph character as required
    MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements());
  } else {
    // No merging to do, so just copy; this produces a more optimized textrun.
    // We can't steal the data because the child may be cached and stealing
    // the data would break the cache.
    aTextRun->ResetGlyphRuns();
    aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
  }
}
void TestCaseConversion()
{
  printf("==========================\n");
  printf("Start case conversion test\n");
  printf("==========================\n");

  int i;
  PRUnichar buf[256];

  printf("Test 1 - ToUpper(PRUnichar, PRUnichar*):\n");
  for(i=0;i < T2LEN ; i++)
  {
    PRUnichar ch = ToUpperCase(t2data[i]);
    if(ch != t2result[i])
      printf("\tFailed!! result unexpected %d\n", i);
  }


  printf("Test 2 - ToLower(PRUnichar, PRUnichar*):\n");
  for(i=0;i < T3LEN; i++)
  {
    PRUnichar ch = ToLowerCase(t3data[i]);
    if(ch != t3result[i])
      printf("\tFailed!! result unexpected %d\n", i);
  }

  printf("Test 3 - ToTitle(PRUnichar, PRUnichar*):\n");
  for(i=0;i < T4LEN; i++)
  {
    PRUnichar ch = ToTitleCase(t4data[i]);
    if(ch != t4result[i])
      printf("\tFailed!! result unexpected %d\n", i);
  }

  printf("Test 4 - ToUpper(PRUnichar*, PRUnichar*, uint32_t):\n");
  ToUpperCase(t2data, buf, T2LEN);
  for(i = 0; i < T2LEN; i++)
  {
     if(buf[i] != t2result[i])
     {
       printf("\tFailed!! result unexpected %d\n", i);
       break;
     }
  }

  printf("Test 5 - ToLower(PRUnichar*, PRUnichar*, uint32_t):\n");
  ToLowerCase(t3data, buf, T3LEN);
  for(i = 0; i < T3LEN; i++)
  {
     if(buf[i] != t3result[i])
     {
       printf("\tFailed!! result unexpected %d\n", i);
       break;
     }
  }

  printf("Test 6 - CaseInsensitiveCompare UTF-8 (1):\n");
  if (CaseInsensitiveCompare((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs)))
    printf("\tFailed!\n");
  if (!CharByCharCompareEqual((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs)))
    printf("\tFailed character-by-character comparison!\n");

  printf("Test 7 - CaseInsensitiveCompare UTF-8 (2):\n");
  if (CaseInsensitiveCompare(t7lhs, t7rhs, strlen(t7lhs), strlen(t7rhs)))
    printf("\tFailed!\n");
  if (!CharByCharCompareEqual(t7lhs, t7rhs, sizeof(t7lhs), sizeof(t7rhs)))
    printf("\tFailed character-by-character comparison!\n");

  printf("Test 8a - CaseInsensitiveCompare UTF-8 (3):\n");
  if (CaseInsensitiveCompare(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs)) != -1)
    printf("\tFailed!\n");
  if (CharByCharCompareEqual(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs)))
    printf("\tFailed character-by-character comparison!\n");

  printf("Test 8b - CaseInsensitiveCompare UTF-8 (4):\n");
  if (CaseInsensitiveCompare(t8rhs, t8lhs, strlen(t8rhs), strlen(t8lhs)) != 1)
    printf("\tFailed!\n");

  // This test may seem a bit strange.  But it's actually an easy bug to make
  // if we tried to be clever and say that two ASCII characters x and y are
  // case-insensitively equal if (x & ~0x20) == (y & ~0x20).
  printf("Test 9 - CaseInsensitiveCompare UTF-8 (5):\n");
  if (CaseInsensitiveCompare(t9rhs, t9lhs, strlen(t9lhs), strlen(t9rhs)) != 1)
    printf("\tFailed!\n");
  if (CharByCharCompareEqual(t9lhs, t9rhs, strlen(t9lhs), strlen(t9rhs)))
    printf("\tFailed character-by-character comparison!\n");

  printf("===========================\n");
  printf("Finish case conversion test\n");
  printf("===========================\n");
}