void nsHTMLContentSerializer::AppendToString(const nsAString& aStr, nsAString& aOutputStr, PRBool aTranslateEntities, PRBool aIncrColumn) { if (mBodyOnly && !mInBody) { return; } if (aIncrColumn) { mColPos += aStr.Length(); } if (aTranslateEntities && !mInCDATA) { if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities | nsIDocumentEncoder::OutputEncodeLatin1Entities | nsIDocumentEncoder::OutputEncodeHTMLEntities | nsIDocumentEncoder::OutputEncodeW3CEntities)) { nsIParserService* parserService = nsContentUtils::GetParserService(); if (!parserService) { NS_ERROR("Can't get parser service"); return; } nsReadingIterator<PRUnichar> done_reading; aStr.EndReading(done_reading); // for each chunk of |aString|... PRUint32 advanceLength = 0; nsReadingIterator<PRUnichar> iter; const char **entityTable = mInAttribute ? kAttrEntities : kEntities; for (aStr.BeginReading(iter); iter != done_reading; iter.advance(PRInt32(advanceLength))) { PRUint32 fragmentLength = iter.size_forward(); PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints // replaced by a particular entity const PRUnichar* c = iter.get(); const PRUnichar* fragmentStart = c; const PRUnichar* fragmentEnd = c + fragmentLength; const char* entityText = nsnull; nsCAutoString entityReplacement; char* fullEntityText = nsnull; advanceLength = 0; // for each character in this chunk, check if it // needs to be replaced for (; c < fragmentEnd; c++, advanceLength++) { PRUnichar val = *c; if (val == kValNBSP) { entityText = kEntityNBSP; break; } else if ((val <= kGTVal) && (entityTable[val][0] != 0)) { entityText = entityTable[val]; break; } else if (val > 127 && ((val < 256 && mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) || mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) { parserService->HTMLConvertUnicodeToEntity(val, entityReplacement); if (!entityReplacement.IsEmpty()) { entityText = entityReplacement.get(); break; } } else if (val > 127 && mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities && mEntityConverter) { if (NS_IS_HIGH_SURROGATE(val) && c + 1 < fragmentEnd && NS_IS_LOW_SURROGATE(*(c + 1))) { PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c)); if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32, nsIEntityConverter::entityW3C, &fullEntityText))) { lengthReplaced = 2; break; } else { advanceLength++; } } else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val, nsIEntityConverter::entityW3C, &fullEntityText))) { lengthReplaced = 1; break; } } } aOutputStr.Append(fragmentStart, advanceLength); if (entityText) { aOutputStr.Append(PRUnichar('&')); AppendASCIItoUTF16(entityText, aOutputStr); aOutputStr.Append(PRUnichar(';')); advanceLength++; } // if it comes from nsIEntityConverter, it already has '&' and ';' else if (fullEntityText) { AppendASCIItoUTF16(fullEntityText, aOutputStr); nsMemory::Free(fullEntityText); advanceLength += lengthReplaced; } } } else { nsXMLContentSerializer::AppendToString(aStr, aOutputStr, aTranslateEntities, aIncrColumn); } return; } aOutputStr.Append(aStr); }
void CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest) { aDest.Truncate(); AppendASCIItoUTF16(aSource, aDest); }
nsresult nsHTMLContentSerializer::EscapeURI(const nsAString& aURI, nsAString& aEscapedURI) { // URL escape %xx cannot be used in JS. // No escaping if the scheme is 'javascript'. if (IsJavaScript(nsGkAtoms::href, aURI)) { aEscapedURI = aURI; return NS_OK; } // nsITextToSubURI does charset convert plus uri escape // This is needed to convert to a document charset which is needed to support existing browsers. // But we eventually want to use UTF-8 instead of a document charset, then the code would be much simpler. // See HTML 4.01 spec, "Appendix B.2.1 Non-ASCII characters in URI attribute values" nsCOMPtr<nsITextToSubURI> textToSubURI; nsAutoString uri(aURI); // in order to use FindCharInSet() nsresult rv = NS_OK; if (!mCharset.IsEmpty() && !IsASCII(uri)) { textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv); NS_ENSURE_SUCCESS(rv, rv); } PRInt32 start = 0; PRInt32 end; nsAutoString part; nsXPIDLCString escapedURI; aEscapedURI.Truncate(0); // Loop and escape parts by avoiding escaping reserved characters (and '%', '#' ). while ((end = uri.FindCharInSet("%#;/?:@&=+$,", start)) != -1) { part = Substring(aURI, start, (end-start)); if (textToSubURI && !IsASCII(part)) { rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI)); NS_ENSURE_SUCCESS(rv, rv); } else { escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path)); } AppendASCIItoUTF16(escapedURI, aEscapedURI); // Append a reserved character without escaping. part = Substring(aURI, end, 1); aEscapedURI.Append(part); start = end + 1; } if (start < (PRInt32) aURI.Length()) { // Escape the remaining part. part = Substring(aURI, start, aURI.Length()-start); if (textToSubURI) { rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI)); NS_ENSURE_SUCCESS(rv, rv); } else { escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path)); } AppendASCIItoUTF16(escapedURI, aEscapedURI); } return rv; }
void nsString::AppendWithConversion( const nsACString& aData ) { AppendASCIItoUTF16(aData, *this); }