コード例 #1
0
void
nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
                                        nsAString& aOutputStr,
                                        PRBool aTranslateEntities,
                                        PRBool aIncrColumn)
{
    if (mBodyOnly && !mInBody) {
        return;
    }

    if (aIncrColumn) {
        mColPos += aStr.Length();
    }

    if (aTranslateEntities && !mInCDATA) {
        if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities  |
                      nsIDocumentEncoder::OutputEncodeLatin1Entities |
                      nsIDocumentEncoder::OutputEncodeHTMLEntities   |
                      nsIDocumentEncoder::OutputEncodeW3CEntities)) {
            nsIParserService* parserService = nsContentUtils::GetParserService();

            if (!parserService) {
                NS_ERROR("Can't get parser service");
                return;
            }

            nsReadingIterator<PRUnichar> done_reading;
            aStr.EndReading(done_reading);

            // for each chunk of |aString|...
            PRUint32 advanceLength = 0;
            nsReadingIterator<PRUnichar> iter;

            const char **entityTable = mInAttribute ? kAttrEntities : kEntities;

            for (aStr.BeginReading(iter);
                    iter != done_reading;
                    iter.advance(PRInt32(advanceLength))) {
                PRUint32 fragmentLength = iter.size_forward();
                PRUint32 lengthReplaced = 0; // the number of UTF-16 codepoints
                //  replaced by a particular entity
                const PRUnichar* c = iter.get();
                const PRUnichar* fragmentStart = c;
                const PRUnichar* fragmentEnd = c + fragmentLength;
                const char* entityText = nsnull;
                nsCAutoString entityReplacement;
                char* fullEntityText = nsnull;

                advanceLength = 0;
                // for each character in this chunk, check if it
                // needs to be replaced
                for (; c < fragmentEnd; c++, advanceLength++) {
                    PRUnichar val = *c;
                    if (val == kValNBSP) {
                        entityText = kEntityNBSP;
                        break;
                    }
                    else if ((val <= kGTVal) && (entityTable[val][0] != 0)) {
                        entityText = entityTable[val];
                        break;
                    } else if (val > 127 &&
                               ((val < 256 &&
                                 mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
                                mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
                        parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);

                        if (!entityReplacement.IsEmpty()) {
                            entityText = entityReplacement.get();
                            break;
                        }
                    }
                    else if (val > 127 &&
                             mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
                             mEntityConverter) {
                        if (NS_IS_HIGH_SURROGATE(val) &&
                                c + 1 < fragmentEnd &&
                                NS_IS_LOW_SURROGATE(*(c + 1))) {
                            PRUint32 valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
                            if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
                                             nsIEntityConverter::entityW3C, &fullEntityText))) {
                                lengthReplaced = 2;
                                break;
                            }
                            else {
                                advanceLength++;
                            }
                        }
                        else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
                                              nsIEntityConverter::entityW3C,
                                              &fullEntityText))) {
                            lengthReplaced = 1;
                            break;
                        }
                    }
                }

                aOutputStr.Append(fragmentStart, advanceLength);
                if (entityText) {
                    aOutputStr.Append(PRUnichar('&'));
                    AppendASCIItoUTF16(entityText, aOutputStr);
                    aOutputStr.Append(PRUnichar(';'));
                    advanceLength++;
                }
                // if it comes from nsIEntityConverter, it already has '&' and ';'
                else if (fullEntityText) {
                    AppendASCIItoUTF16(fullEntityText, aOutputStr);
                    nsMemory::Free(fullEntityText);
                    advanceLength += lengthReplaced;
                }
            }
        } else {
            nsXMLContentSerializer::AppendToString(aStr, aOutputStr, aTranslateEntities, aIncrColumn);
        }

        return;
    }

    aOutputStr.Append(aStr);
}
コード例 #2
0
void
CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
{
  aDest.Truncate();
  AppendASCIItoUTF16(aSource, aDest);
}
コード例 #3
0
nsresult
nsHTMLContentSerializer::EscapeURI(const nsAString& aURI, nsAString& aEscapedURI)
{
    // URL escape %xx cannot be used in JS.
    // No escaping if the scheme is 'javascript'.
    if (IsJavaScript(nsGkAtoms::href, aURI)) {
        aEscapedURI = aURI;
        return NS_OK;
    }

    // nsITextToSubURI does charset convert plus uri escape
    // This is needed to convert to a document charset which is needed to support existing browsers.
    // But we eventually want to use UTF-8 instead of a document charset, then the code would be much simpler.
    // See HTML 4.01 spec, "Appendix B.2.1 Non-ASCII characters in URI attribute values"
    nsCOMPtr<nsITextToSubURI> textToSubURI;
    nsAutoString uri(aURI); // in order to use FindCharInSet()
    nsresult rv = NS_OK;


    if (!mCharset.IsEmpty() && !IsASCII(uri)) {
        textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
        NS_ENSURE_SUCCESS(rv, rv);
    }

    PRInt32 start = 0;
    PRInt32 end;
    nsAutoString part;
    nsXPIDLCString escapedURI;
    aEscapedURI.Truncate(0);

    // Loop and escape parts by avoiding escaping reserved characters (and '%', '#' ).
    while ((end = uri.FindCharInSet("%#;/?:@&=+$,", start)) != -1) {
        part = Substring(aURI, start, (end-start));
        if (textToSubURI && !IsASCII(part)) {
            rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
            NS_ENSURE_SUCCESS(rv, rv);
        }
        else {
            escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
        }
        AppendASCIItoUTF16(escapedURI, aEscapedURI);

        // Append a reserved character without escaping.
        part = Substring(aURI, end, 1);
        aEscapedURI.Append(part);
        start = end + 1;
    }

    if (start < (PRInt32) aURI.Length()) {
        // Escape the remaining part.
        part = Substring(aURI, start, aURI.Length()-start);
        if (textToSubURI) {
            rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
            NS_ENSURE_SUCCESS(rv, rv);
        }
        else {
            escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
        }
        AppendASCIItoUTF16(escapedURI, aEscapedURI);
    }

    return rv;
}
コード例 #4
0
void
nsString::AppendWithConversion( const nsACString& aData )
  {
    AppendASCIItoUTF16(aData, *this);
  }