Beispiel #1
0
NS_IMETHODIMP
nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
{
  NS_ENSURE_ARG_POINTER(outString);

  *outString = nullptr;

  nsresult rv;
  int32_t inStringLength = NS_strlen(inString);       // original input string length
  int32_t bufferLength;                               // allocated buffer length
  int32_t srcLength = inStringLength;
  int32_t dstLength;
  int32_t pos1, pos2;
  nsresult saveResult = NS_OK;                         // to remember NS_ERROR_UENC_NOMAPPING

  // estimate and allocate the target buffer (reserve extra memory for fallback)
  rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
  if (NS_FAILED(rv)) return rv;

  bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback
  // + 1 is for the terminating NUL -- we don't add that to bufferLength so that
  // we can always write dstPtr[pos2] = '\0' even when the encoder filled the
  // buffer.
  char *dstPtr = (char *) PR_Malloc(bufferLength + 1);
  if (!dstPtr) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  
  for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
    // convert from unicode
    dstLength = bufferLength - pos2;
    NS_ASSERTION(dstLength >= 0, "out of bounds write");
    rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);

    pos1 += srcLength ? srcLength : 1;
    pos2 += dstLength;
    dstPtr[pos2] = '\0';

    // break: this is usually the case (no error) OR unrecoverable error
    if (NS_ERROR_UENC_NOMAPPING != rv) break;

    // remember this happened and reset the result
    saveResult = rv;
    rv = NS_OK;

    // finish encoder, give it a chance to write extra data like escape sequences
    dstLength = bufferLength - pos2;
    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
    if (NS_SUCCEEDED(rv)) {
      pos2 += dstLength;
      dstPtr[pos2] = '\0';
    }

    srcLength = inStringLength - pos1;

    // do the fallback
    if (!ATTR_NO_FALLBACK(mAttribute)) {
      uint32_t unMappedChar;
      if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && 
          inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
        unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
        pos1++;
      } else {
        unMappedChar = inString[pos1-1];
      }

      rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
      if (NS_FAILED(rv)) 
        break;

      rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
      if (NS_FAILED(rv)) 
        break;
      dstPtr[pos2] = '\0';
    }
  }

  if (NS_SUCCEEDED(rv)) {
    // finish encoder, give it a chance to write extra data like escape sequences
    dstLength = bufferLength - pos2;
    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
    if (NS_SUCCEEDED(rv)) {
      pos2 += dstLength;
      dstPtr[pos2] = '\0';
    }
  }

  if (NS_FAILED(rv)) {
    PR_FREEIF(dstPtr);
    return rv;
  }

  *outString = dstPtr;      // set the result string

  // set error code so that the caller can do own fall back
  if (NS_ERROR_UENC_NOMAPPING == saveResult) {
    rv = NS_ERROR_UENC_NOMAPPING;
  }

  return rv;
}
NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff(
  const PRUnichar * aSrc, 
  PRInt32 * aSrcLength, 
  char * aDest, 
  PRInt32 * aDestLength)
{
  PRInt32 iSrcLength = 0;
  PRInt32 iDestLength = 0;
  PRUnichar unicode;
  nsresult res = NS_OK;
  while (iSrcLength < *aSrcLength )
  {
    unicode = *aSrc;
    //if unicode's hi byte has something, it is not ASCII, must be a GB
    if(IS_ASCII(unicode))
    {
      // this is an ASCII
      *aDest = CAST_UNICHAR_TO_CHAR(*aSrc);
      aDest++; // increment 1 byte
      iDestLength +=1;
    } else {
      char byte1, byte2;
      if(mUtil.UnicodeToGBKChar( unicode, PR_FALSE, &byte1, &byte2))
      {
        // make sure we still have 2 bytes for output first
        if(iDestLength+2 > *aDestLength)
        {
          res = NS_OK_UENC_MOREOUTPUT;
          break;
        }
        aDest[0] = byte1;
        aDest[1] = byte2;
        aDest += 2;	// increment 2 bytes
        iDestLength +=2;
      } else {
        PRInt32 aOutLen = 2;
        // make sure we still have 2 bytes for output first
        if(iDestLength+2 > *aDestLength)
        {
          res = NS_OK_UENC_MOREOUTPUT;
          break;
        }
        // we cannot map in the common mapping. Let's try to
        // call the delegated 2 byte converter for the gbk or gb18030
        // unique 2 byte mapping
        if(TryExtensionEncoder(unicode, aDest, &aOutLen))
        {
          iDestLength += aOutLen;
          aDest += aOutLen;
        } else {
          // make sure we still have 4 bytes for output first
          if(iDestLength+4 > *aDestLength)
          {
            res = NS_OK_UENC_MOREOUTPUT;
            break;
          }
          // we still cannot map. Let's try to
          // call the delegated GB18030 4 byte converter 
          aOutLen = 4;
          if( NS_IS_HIGH_SURROGATE(unicode) )
          {
            if((iSrcLength+1) < *aSrcLength ) {
              if(EncodeSurrogate(aSrc[0],aSrc[1], aDest)) {
                // since we got a surrogate pair, we need to increment src.
                iSrcLength++ ; 
                aSrc++;
                iDestLength += aOutLen;
                aDest += aOutLen;
              } else {
                // only get a high surrogate, but not a low surrogate
                res = NS_ERROR_UENC_NOMAPPING;
                iSrcLength++;   // include length of the unmapped character
                break;
              }
            } else {
              mSurrogateHigh = aSrc[0];
              break; // this will go to afterwhileloop
            }
          } else {
            if( NS_IS_LOW_SURROGATE(unicode) )
            {
              if(NS_IS_HIGH_SURROGATE(mSurrogateHigh)) {
                if(EncodeSurrogate(mSurrogateHigh, aSrc[0], aDest)) {
                  iDestLength += aOutLen;
                  aDest += aOutLen;
                } else {
                  // only get a high surrogate, but not a low surrogate
                  res = NS_ERROR_UENC_NOMAPPING;
                  iSrcLength++;   // include length of the unmapped character
                  break;
                }
              } else {
                // only get a low surrogate, but not a low surrogate
                res = NS_ERROR_UENC_NOMAPPING;
                iSrcLength++;   // include length of the unmapped character
                break;
              }
            } else {
              if(Try4BytesEncoder(unicode, aDest, &aOutLen))
              {
                NS_ASSERTION((aOutLen == 4), "we should always generate 4 bytes here");
                iDestLength += aOutLen;
                aDest += aOutLen;
              } else {
                res = NS_ERROR_UENC_NOMAPPING;
                iSrcLength++;   // include length of the unmapped character
                break;
              }
            }
          }
        }
      } 
    }
    iSrcLength++ ; // Each unicode char just count as one in PRUnichar string;  	  
    mSurrogateHigh = 0;
    aSrc++;
    if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength) )
    {
      res = NS_OK_UENC_MOREOUTPUT;
      break;
    }
  }
//afterwhileloop:
  *aDestLength = iDestLength;
  *aSrcLength = iSrcLength;
  return res;
}
Beispiel #3
0
nsresult
nsHyphenator::Hyphenate(const nsAString& aString,
                        nsTArray<bool>& aHyphens)
{
  if (!aHyphens.SetLength(aString.Length())) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  memset(aHyphens.Elements(), false, aHyphens.Length());

  bool inWord = false;
  PRUint32 wordStart = 0, wordLimit = 0;
  PRUint32 chLen;
  for (PRUint32 i = 0; i < aString.Length(); i += chLen) {
    PRUint32 ch = aString[i];
    chLen = 1;

    if (NS_IS_HIGH_SURROGATE(ch)) {
      if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
        ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
        chLen = 2;
      } else {
        NS_WARNING("unpaired surrogate found during hyphenation");
      }
    }

    nsIUGenCategory::nsUGenCategory cat = mCategories->Get(ch);
    if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
      if (!inWord) {
        inWord = true;
        wordStart = i;
      }
      wordLimit = i + chLen;
      if (i + chLen < aString.Length()) {
        continue;
      }
    }

    if (inWord) {
      const PRUnichar *begin = aString.BeginReading();
      NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
                                 wordLimit - wordStart);
      nsAutoTArray<char,200> utf8hyphens;
      utf8hyphens.SetLength(utf8.Length() + 5);
      char **rep = nsnull;
      int *pos = nsnull;
      int *cut = nsnull;
      int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
                                      utf8.BeginReading(), utf8.Length(),
                                      utf8hyphens.Elements(), nsnull,
                                      &rep, &pos, &cut);
      if (!err) {
        // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
        // from utf8 code unit indexing (which would match the utf8 input
        // string directly) to Unicode character indexing.
        // We then need to convert this to utf16 code unit offsets for Gecko.
        const char *hyphPtr = utf8hyphens.Elements();
        const PRUnichar *cur = begin + wordStart;
        const PRUnichar *end = begin + wordLimit;
        while (cur < end) {
          if (*hyphPtr & 0x01) {
            aHyphens[cur - begin] = true;
          }
          cur++;
          if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
              NS_IS_HIGH_SURROGATE(*(cur-1)))
          {
            cur++;
          }
          hyphPtr++;
        }
      }
    }
    
    inWord = false;
  }

  return NS_OK;
}
NS_IMETHODIMP
nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
{
  if(nullptr == outString )
    return NS_ERROR_NULL_POINTER;
  NS_ASSERTION(outString, "invalid input");

  *outString = NULL;

  nsresult rv;
  PRInt32 inStringLength = NS_strlen(inString);       // original input string length
  PRInt32 bufferLength;                               // allocated buffer length
  PRInt32 srcLength = inStringLength;
  PRInt32 dstLength;
  char *dstPtr = NULL;
  PRInt32 pos1, pos2;
  nsresult saveResult = NS_OK;                         // to remember NS_ERROR_UENC_NOMAPPING

  // estimate and allocate the target buffer (reserve extra memory for fallback)
  rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
  if (NS_FAILED(rv)) return rv;

  bufferLength = dstLength + 512; // reserve 512 byte for fallback.
  dstPtr = (char *) PR_Malloc(bufferLength);
  if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY;

  
  for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
    // convert from unicode
    dstLength = bufferLength - pos2;
    rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);

    pos1 += srcLength ? srcLength : 1;
    pos2 += dstLength;
    dstPtr[pos2] = '\0';

    // break: this is usually the case (no error) OR unrecoverable error
    if (NS_ERROR_UENC_NOMAPPING != rv) break;

    // remember this happened and reset the result
    saveResult = rv;
    rv = NS_OK;

    // finish encoder, give it a chance to write extra data like escape sequences
    dstLength = bufferLength - pos2;
    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
    if (NS_SUCCEEDED(rv)) {
      pos2 += dstLength;
      dstPtr[pos2] = '\0';
    }

    srcLength = inStringLength - pos1;

    // do the fallback
    if (!ATTR_NO_FALLBACK(mAttribute)) {
      PRUint32 unMappedChar;
      if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && 
          inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
        unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
        pos1++;
      } else {
        unMappedChar = inString[pos1-1];
      }

      rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
      if (NS_FAILED(rv)) 
        break;

      rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
      if (NS_FAILED(rv)) 
        break;
      dstPtr[pos2] = '\0';
    }
  }

  if (NS_SUCCEEDED(rv)) {
    // finish encoder, give it a chance to write extra data like escape sequences
    dstLength = bufferLength - pos2;
    rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
    if (NS_SUCCEEDED(rv)) {
      pos2 += dstLength;
      dstPtr[pos2] = '\0';
    }
  }

  if (NS_FAILED(rv)) {
    PR_FREEIF(dstPtr);
    return rv;
  }

  *outString = dstPtr;      // set the result string

  // set error code so that the caller can do own fall back
  if (NS_ERROR_UENC_NOMAPPING == saveResult) {
    rv = NS_ERROR_UENC_NOMAPPING;
  }

  return rv;
}
bool
gfxScriptItemizer::Next(PRUint32& aRunStart, PRUint32& aRunLimit,
                        PRInt32& aRunScript)
{
    /* if we've fallen off the end of the text, we're done */
    if (scriptLimit >= textLength) {
        return false;
    }

    SYNC_FIXUP();
    scriptCode = MOZ_SCRIPT_COMMON;

    for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
        PRUint32 ch;
        PRInt32 sc;
        PRInt32 pairIndex;
        PRUint32 startOfChar = scriptLimit;

        ch = textPtr[scriptLimit];

        /*
         * MODIFICATION for Gecko - clear the paired-character stack
         * when we see a space character, because we cannot trust
         * context outside the current "word" when doing textrun
         * construction
         */
        if (ch == 0x20) {
            while (STACK_IS_NOT_EMPTY()) {
                pop();
            }
            sc = MOZ_SCRIPT_COMMON;
            pairIndex = -1;
        } else {
            /* decode UTF-16 (may be surrogate pair) */
            if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
                PRUint32 low = textPtr[scriptLimit + 1];
                if (NS_IS_LOW_SURROGATE(low)) {
                    ch = SURROGATE_TO_UCS4(ch, low);
                    scriptLimit += 1;
                }
            }

            sc = mozilla::unicode::GetScriptCode(ch);

            pairIndex = getPairIndex(ch);

            /*
             * Paired character handling:
             *
             * if it's an open character, push it onto the stack.
             * if it's a close character, find the matching open on the
             * stack, and use that script code. Any non-matching open
             * characters above it on the stack will be poped.
             */
            if (pairIndex >= 0) {
                if ((pairIndex & 1) == 0) {
                    push(pairIndex, scriptCode);
                } else {
                    PRInt32 pi = pairIndex & ~1;

                    while (STACK_IS_NOT_EMPTY() && TOP().pairIndex != pi) {
                        pop();
                    }

                    if (STACK_IS_NOT_EMPTY()) {
                        sc = TOP().scriptCode;
                    }
                }
            }
        }

        if (sameScript(scriptCode, sc)) {
            if (scriptCode <= MOZ_SCRIPT_INHERITED &&
                sc > MOZ_SCRIPT_INHERITED)
            {
                scriptCode = sc;
                fixup(scriptCode);
            }

            /*
             * if this character is a close paired character,
             * pop the matching open character from the stack
             */
            if (pairIndex >= 0 && (pairIndex & 1) != 0) {
                pop();
            }
        } else {
            /*
             * reset scriptLimit in case it was advanced during reading a
             * multiple-code-unit character
             */
            scriptLimit = startOfChar;

            break;
        }
    }

    aRunStart = scriptStart;
    aRunLimit = scriptLimit;
    aRunScript = scriptCode;

    return true;
}
void
nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
                                        nsAString& aOutputStr,
                                        PRBool aTranslateEntities,
                                        PRBool aIncrColumn)
{
    if (mBodyOnly && !mInBody) {
        return;
    }

    if (aIncrColumn) {
        mColPos += aStr.Length();
    }

    if (aTranslateEntities && !mInCDATA) {
        if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities  |
                      nsIDocumentEncoder::OutputEncodeLatin1Entities |
                      nsIDocumentEncoder::OutputEncodeHTMLEntities   |
                      nsIDocumentEncoder::OutputEncodeW3CEntities)) {
            nsIParserService* parserService = nsContentUtils::GetParserService();

            if (!parserService) {
                NS_ERROR("Can't get parser service");
                return;
            }

            nsReadingIterator<PRUnichar> done_reading;
            aStr.EndReading(done_reading);

            // for each chunk of |aString|...
            PRUint32 advanceLength = 0;
            nsReadingIterator<PRUnichar> iter;

            const char **entityTable = mInAttribute ? kAttrEntities : kEntities;

            for (aStr.BeginReading(iter);
                    iter != done_reading;
                    iter.advance(PRInt32(advanceLength))) {
                PRUint32 fragmentLength = iter.size_forward();
                PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints
                //  replaced by a particular entity
                const PRUnichar* c = iter.get();
                const PRUnichar* fragmentStart = c;
                const PRUnichar* fragmentEnd = c + fragmentLength;
                const char* entityText = nsnull;
                nsCAutoString entityReplacement;
                char* fullEntityText = nsnull;

                advanceLength = 0;
                // for each character in this chunk, check if it
                // needs to be replaced
                for (; c < fragmentEnd; c++, advanceLength++) {
                    PRUnichar val = *c;
                    if (val == kValNBSP) {
                        entityText = kEntityNBSP;
                        break;
                    }
                    else if ((val <= kGTVal) && (entityTable[val][0] != 0)) {
                        entityText = entityTable[val];
                        break;
                    } else if (val > 127 &&
                               ((val < 256 &&
                                 mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
                                mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
                        parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);

                        if (!entityReplacement.IsEmpty()) {
                            entityText = entityReplacement.get();
                            break;
                        }
                    }
                    else if (val > 127 &&
                             mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
                             mEntityConverter) {
                        if (NS_IS_HIGH_SURROGATE(val) &&
                                c + 1 < fragmentEnd &&
                                NS_IS_LOW_SURROGATE(*(c + 1))) {
                            PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
                            if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
                                             nsIEntityConverter::entityW3C, &fullEntityText))) {
                                lengthReplaced = 2;
                                break;
                            }
                            else {
                                advanceLength++;
                            }
                        }
                        else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
                                              nsIEntityConverter::entityW3C,
                                              &fullEntityText))) {
                            lengthReplaced = 1;
                            break;
                        }
                    }
                }

                aOutputStr.Append(fragmentStart, advanceLength);
                if (entityText) {
                    aOutputStr.Append(PRUnichar('&'));
                    AppendASCIItoUTF16(entityText, aOutputStr);
                    aOutputStr.Append(PRUnichar(';'));
                    advanceLength++;
                }
                // if it comes from nsIEntityConverter, it already has '&' and ';'
                else if (fullEntityText) {
                    AppendASCIItoUTF16(fullEntityText, aOutputStr);
                    nsMemory::Free(fullEntityText);
                    advanceLength += lengthReplaced;
                }
            }
        } else {
            nsXMLContentSerializer::AppendToString(aStr, aOutputStr, aTranslateEntities, aIncrColumn);
        }

        return;
    }

    aOutputStr.Append(aStr);
}
Beispiel #7
0
bool
nsMathMLTokenFrame::SetTextStyle()
{
    if (mContent->Tag() != nsGkAtoms::mi_)
        return false;

    if (!mFrames.FirstChild())
        return false;

    // Get the text content that we enclose and its length
    nsAutoString data;
    nsContentUtils::GetNodeTextContent(mContent, false, data);
    PRInt32 length = data.Length();
    if (!length)
        return false;

    nsAutoString fontstyle;
    bool isSingleCharacter =
        length == 1 ||
        (length == 2 && NS_IS_HIGH_SURROGATE(data[0]));
    if (isSingleCharacter &&
            nsMathMLOperators::LookupInvariantChar(data) != eMATHVARIANT_NONE) {
        // bug 65951 - a non-stylable character has its own intrinsic appearance
        fontstyle.AssignLiteral("invariant");
    }
    else {
        // Attributes override the default behavior.
        nsAutoString value;
        if (!(GetAttribute(mContent, mPresentationData.mstyle,
                           nsGkAtoms::mathvariant_, value) ||
                GetAttribute(mContent, mPresentationData.mstyle,
                             nsGkAtoms::fontstyle_, value))) {
            if (!isSingleCharacter) {
                fontstyle.AssignLiteral("normal");
            }
            else if (length == 1 && // BMP
                     !nsMathMLOperators::
                     TransformVariantChar(data[0], eMATHVARIANT_italic).
                     Equals(data)) {
                // Transformation exists.  Try to make the BMP character look like the
                // styled character using the style system until bug 114365 is resolved.
                fontstyle.AssignLiteral("italic");
            }
            // else single character but there is no corresponding Math Alphanumeric
            // Symbol character: "ignore the value of the [default] mathvariant
            // attribute".
        }
    }

    // set the _moz-math-font-style attribute without notifying that we want a reflow
    if (fontstyle.IsEmpty()) {
        if (mContent->HasAttr(kNameSpaceID_None, nsGkAtoms::_moz_math_fontstyle_)) {
            mContent->UnsetAttr(kNameSpaceID_None, nsGkAtoms::_moz_math_fontstyle_,
                                false);
            return true;
        }
    }
    else if (!mContent->AttrValueIs(kNameSpaceID_None,
                                    nsGkAtoms::_moz_math_fontstyle_,
                                    fontstyle, eCaseMatters)) {
        mContent->SetAttr(kNameSpaceID_None, nsGkAtoms::_moz_math_fontstyle_,
                          fontstyle, false);
        return true;
    }

    return false;
}
void
nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
    gfxContext* aRefContext)
{
  PRUint32 length = aTextRun->GetLength();
  const PRUnichar* str = aTextRun->mString.BeginReading();
  nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();

  nsAutoString convertedString;
  nsAutoTArray<bool,50> charsToMergeArray;
  nsAutoTArray<bool,50> deletedCharsArray;
  nsAutoTArray<nsStyleContext*,50> styleArray;
  nsAutoTArray<PRUint8,50> canBreakBeforeArray;
  bool mergeNeeded = false;

  // Some languages have special casing conventions that differ from the
  // default Unicode mappings.
  // The enum values here are named for well-known exemplar languages that
  // exhibit the behavior in question; multiple lang tags may map to the
  // same setting here, if the behavior is shared by other languages.
  enum {
    eNone,    // default non-lang-specific behavior
    eTurkish, // preserve dotted/dotless-i distinction in uppercase
    eDutch,   // treat "ij" digraph as a unit for capitalization
    eGreek    // strip accent when uppercasing Greek vowels
  } languageSpecificCasing = eNone;

  const nsIAtom* lang = nsnull;
  bool capitalizeDutchIJ = false;
  bool prevIsLetter = false;
  PRUint32 sigmaIndex = PRUint32(-1);
  nsIUGenCategory::nsUGenCategory cat;
  GreekCasingState greekState = kStart;
  PRUint32 i;
  for (i = 0; i < length; ++i) {
    PRUint32 ch = str[i];
    nsStyleContext* styleContext = styles[i];

    PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
      : styleContext->GetStyleText()->mTextTransform;
    int extraChars = 0;
    const mozilla::unicode::MultiCharMapping *mcm;

    if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
      ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
    }

    if (lang != styleContext->GetStyleFont()->mLanguage) {
      lang = styleContext->GetStyleFont()->mLanguage;
      if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
          lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||
          lang == nsGkAtoms::tt) {
        languageSpecificCasing = eTurkish;
      } else if (lang == nsGkAtoms::nl) {
        languageSpecificCasing = eDutch;
      } else if (lang == nsGkAtoms::el) {
        languageSpecificCasing = eGreek;
        greekState = kStart;
      } else {
        languageSpecificCasing = eNone;
      }
    }

    switch (style) {
    case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
      if (languageSpecificCasing == eTurkish) {
        if (ch == 'I') {
          ch = LATIN_SMALL_LETTER_DOTLESS_I;
          prevIsLetter = true;
          sigmaIndex = PRUint32(-1);
          break;
        }
        if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
          ch = 'i';
          prevIsLetter = true;
          sigmaIndex = PRUint32(-1);
          break;
        }
      }

      // Special lowercasing behavior for Greek Sigma: note that this is listed
      // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
      // language-specific mapping; it applies regardless of the language of
      // the element.
      //
      // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
      // the non-final form) whenever there is a following letter, or when the
      // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
      // LETTER); and to FINAL SIGMA when it is preceded by another letter but
      // not followed by one.
      //
      // To implement the context-sensitive nature of this mapping, we keep
      // track of whether the previous character was a letter. If not, CAPITAL
      // SIGMA will map directly to SMALL SIGMA. If the previous character
      // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
      // position in the converted string; if we then encounter another letter,
      // that FINAL SIGMA is replaced with a standard SMALL SIGMA.

      cat = mozilla::unicode::GetGenCategory(ch);

      // If sigmaIndex is not -1, it marks where we have provisionally mapped
      // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
      // need to change it to SMALL SIGMA.
      if (sigmaIndex != PRUint32(-1)) {
        if (cat == nsIUGenCategory::kLetter) {
          convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
        }
      }

      if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
        // If preceding char was a letter, map to FINAL instead of SMALL,
        // and note where it occurred by setting sigmaIndex; we'll change it
        // to standard SMALL SIGMA later if another letter follows
        if (prevIsLetter) {
          ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
          sigmaIndex = convertedString.Length();
        } else {
          // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
          // to SMALL SIGMA
          ch = GREEK_SMALL_LETTER_SIGMA;
          sigmaIndex = PRUint32(-1);
        }
        prevIsLetter = true;
        break;
      }

      // ignore diacritics for the purpose of contextual sigma mapping;
      // otherwise, reset prevIsLetter appropriately and clear the
      // sigmaIndex marker
      if (cat != nsIUGenCategory::kMark) {
        prevIsLetter = (cat == nsIUGenCategory::kLetter);
        sigmaIndex = PRUint32(-1);
      }

      mcm = mozilla::unicode::SpecialLower(ch);
      if (mcm) {
        int j = 0;
        while (j < 2 && mcm->mMappedChars[j + 1]) {
          convertedString.Append(mcm->mMappedChars[j]);
          ++extraChars;
          ++j;
        }
        ch = mcm->mMappedChars[j];
        break;
      }

      ch = ToLowerCase(ch);
      break;

    case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
      if (languageSpecificCasing == eTurkish && ch == 'i') {
        ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
        break;
      }

      if (languageSpecificCasing == eGreek) {
        ch = GreekUpperCase(ch, &greekState);
        break;
      }

      mcm = mozilla::unicode::SpecialUpper(ch);
      if (mcm) {
        int j = 0;
        while (j < 2 && mcm->mMappedChars[j + 1]) {
          convertedString.Append(mcm->mMappedChars[j]);
          ++extraChars;
          ++j;
        }
        ch = mcm->mMappedChars[j];
        break;
      }

      ch = ToUpperCase(ch);
      break;

    case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
      if (capitalizeDutchIJ && ch == 'j') {
        ch = 'J';
        capitalizeDutchIJ = false;
        break;
      }
      capitalizeDutchIJ = false;
      if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
        if (languageSpecificCasing == eTurkish && ch == 'i') {
          ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
          break;
        }
        if (languageSpecificCasing == eDutch && ch == 'i') {
          ch = 'I';
          capitalizeDutchIJ = true;
          break;
        }

        mcm = mozilla::unicode::SpecialTitle(ch);
        if (mcm) {
          int j = 0;
          while (j < 2 && mcm->mMappedChars[j + 1]) {
            convertedString.Append(mcm->mMappedChars[j]);
            ++extraChars;
            ++j;
          }
          ch = mcm->mMappedChars[j];
          break;
        }

        ch = ToTitleCase(ch);
      }
      break;

    default:
      break;
    }

    if (ch == PRUint32(-1)) {
      deletedCharsArray.AppendElement(true);
      mergeNeeded = true;
    } else {
      deletedCharsArray.AppendElement(false);
      charsToMergeArray.AppendElement(false);
      styleArray.AppendElement(styleContext);
      canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));

      if (IS_IN_BMP(ch)) {
        convertedString.Append(ch);
      } else {
        convertedString.Append(H_SURROGATE(ch));
        convertedString.Append(L_SURROGATE(ch));
        ++i;
        deletedCharsArray.AppendElement(true); // not exactly deleted, but the
                                               // trailing surrogate is skipped
        ++extraChars;
      }

      while (extraChars-- > 0) {
        mergeNeeded = true;
        charsToMergeArray.AppendElement(true);
        styleArray.AppendElement(styleContext);
        canBreakBeforeArray.AppendElement(false);
      }
    }
  }

  PRUint32 flags;
  gfxTextRunFactory::Parameters innerParams =
      GetParametersForInner(aTextRun, &flags, aRefContext);
  gfxFontGroup* fontGroup = aTextRun->GetFontGroup();

  nsAutoPtr<nsTransformedTextRun> transformedChild;
  nsAutoPtr<gfxTextRun> cachedChild;
  gfxTextRun* child;

  if (mInnerTransformingTextRunFactory) {
    transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, fontGroup, flags, styleArray.Elements(), false);
    child = transformedChild.get();
  } else {
    cachedChild = fontGroup->MakeTextRun(
        convertedString.BeginReading(), convertedString.Length(),
        &innerParams, flags);
    child = cachedChild.get();
  }
  if (!child)
    return;
  // Copy potential linebreaks into child so they're preserved
  // (and also child will be shaped appropriately)
  NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
               "Dropped characters or break-before values somewhere!");
  child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
      canBreakBeforeArray.Elements(), aRefContext);
  if (transformedChild) {
    transformedChild->FinishSettingProperties(aRefContext);
  }

  if (mergeNeeded) {
    // Now merge multiple characters into one multi-glyph character as required
    // and deal with skipping deleted accent chars
    NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
                 "source length mismatch");
    NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
                 "destination length mismatch");
    MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
                             deletedCharsArray.Elements());
  } else {
    // No merging to do, so just copy; this produces a more optimized textrun.
    // We can't steal the data because the child may be cached and stealing
    // the data would break the cache.
    aTextRun->ResetGlyphRuns();
    aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
  }
}
void
nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
    gfxContext* aRefContext)
{
  gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
  gfxFontStyle fontStyle = *fontGroup->GetStyle();
  fontStyle.size *= 0.8;
  nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle);
  if (!smallFont)
    return;

  PRUint32 flags;
  gfxTextRunFactory::Parameters innerParams =
      GetParametersForInner(aTextRun, &flags, aRefContext);

  PRUint32 length = aTextRun->GetLength();
  const PRUnichar* str = aTextRun->mString.BeginReading();
  nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
  // Create a textrun so we can check cluster-start properties
  nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags));
  if (!inner.get())
    return;

  nsCaseTransformTextRunFactory uppercaseFactory(nsnull, true);

  aTextRun->ResetGlyphRuns();

  PRUint32 runStart = 0;
  nsAutoTArray<nsStyleContext*,50> styleArray;
  nsAutoTArray<PRUint8,50> canBreakBeforeArray;

  enum RunCaseState {
    kUpperOrCaseless, // will be untouched by font-variant:small-caps
    kLowercase,       // will be uppercased and reduced
    kSpecialUpper     // specials: don't shrink, but apply uppercase mapping
  };
  RunCaseState runCase = kUpperOrCaseless;

  // Note that this loop runs from 0 to length *inclusive*, so the last
  // iteration is in effect beyond the end of the input text, to give a
  // chance to finish the last casing run we've found.
  // The last iteration, when i==length, must not attempt to look at the
  // character position [i] or the style data for styles[i], as this would
  // be beyond the valid length of the textrun or its style array.
  for (PRUint32 i = 0; i <= length; ++i) {
    RunCaseState chCase = kUpperOrCaseless;
    // Unless we're at the end, figure out what treatment the current
    // character will need.
    if (i < length) {
      nsStyleContext* styleContext = styles[i];
      // Characters that aren't the start of a cluster are ignored here. They
      // get added to whatever lowercase/non-lowercase run we're in.
      if (!inner->IsClusterStart(i)) {
        chCase = runCase;
      } else {
        if (styleContext->GetStyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {
          PRUint32 ch = str[i];
          if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
            ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
          }
          PRUint32 ch2 = ToUpperCase(ch);
          if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) {
            chCase = kLowercase;
          } else if (styleContext->GetStyleFont()->mLanguage == nsGkAtoms::el) {
            // In Greek, check for characters that will be modified by the
            // GreekUpperCase mapping - this catches accented capitals where
            // the accent is to be removed (bug 307039). These are handled by
            // a transformed child run using the full-size font.
            GreekCasingState state = kStart; // don't need exact context here
            ch2 = GreekUpperCase(ch, &state);
            if (ch != ch2) {
              chCase = kSpecialUpper;
            }
          }
        } else {
          // Don't transform the character! I.e., pretend that it's not lowercase
        }
      }
    }

    // At the end of the text, or when the current character needs different
    // casing treatment from the current run, finish the run-in-progress
    // and prepare to accumulate a new run.
    // Note that we do not look at any source data for offset [i] here,
    // as that would be invalid in the case where i==length.
    if ((i == length || runCase != chCase) && runStart < i) {
      nsAutoPtr<nsTransformedTextRun> transformedChild;
      nsAutoPtr<gfxTextRun> cachedChild;
      gfxTextRun* child;

      switch (runCase) {
      case kUpperOrCaseless:
        cachedChild =
          fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams,
                                 flags);
        child = cachedChild.get();
        break;
      case kLowercase:
        transformedChild =
          uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
                                       &innerParams, smallFont, flags,
                                       styleArray.Elements(), false);
        child = transformedChild;
        break;
      case kSpecialUpper:
        transformedChild =
          uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
                                       &innerParams, fontGroup, flags,
                                       styleArray.Elements(), false);
        child = transformedChild;
        break;
      }
      if (!child)
        return;
      // Copy potential linebreaks into child so they're preserved
      // (and also child will be shaped appropriately)
      NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart,
                   "lost some break-before values?");
      child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
          canBreakBeforeArray.Elements(), aRefContext);
      if (transformedChild) {
        transformedChild->FinishSettingProperties(aRefContext);
      }
      aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart);

      runStart = i;
      styleArray.Clear();
      canBreakBeforeArray.Clear();
    }

    if (i < length) {
      runCase = chCase;
      styleArray.AppendElement(styles[i]);
      canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
    }
  }
}