PassRefPtrWillBeRawPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached)
{
    ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty());

    // If no content transfer encoding is specified, default to binary encoding.
    MIMEHeader::Encoding contentTransferEncoding = mimeHeader.contentTransferEncoding();
    if (contentTransferEncoding == MIMEHeader::Unknown)
        contentTransferEncoding = MIMEHeader::Binary;

    RefPtr<SharedBuffer> content = SharedBuffer::create();
    const bool checkBoundary = !endOfPartBoundary.isEmpty();
    bool endOfPartReached = false;
    if (contentTransferEncoding == MIMEHeader::Binary) {
        if (!checkBoundary) {
            WTF_LOG_ERROR("Binary contents requires end of part");
            return nullptr;
        }
        m_lineReader.setSeparator(endOfPartBoundary.utf8().data());
        Vector<char> part;
        if (!m_lineReader.nextChunk(part)) {
            WTF_LOG_ERROR("Binary contents requires end of part");
            return nullptr;
        }
        content->append(part);
        m_lineReader.setSeparator("\r\n");
        Vector<char> nextChars;
        if (m_lineReader.peek(nextChars, 2) != 2) {
            WTF_LOG_ERROR("Invalid seperator.");
            return nullptr;
        }
        endOfPartReached = true;
        ASSERT(nextChars.size() == 2);
        endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
        if (!endOfArchiveReached) {
            String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback();
            if (!line.isEmpty()) {
                WTF_LOG_ERROR("No CRLF at end of binary section.");
                return nullptr;
            }
        }
    } else {
        String line;
        while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
            endOfArchiveReached = (line == endOfDocumentBoundary);
            if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) {
                endOfPartReached = true;
                break;
            }
            // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.
            content->append(line.utf8().data(), line.length());
            if (contentTransferEncoding == MIMEHeader::QuotedPrintable) {
                // The line reader removes the \r\n, but we need them for the content in this case as the QuotedPrintable decoder expects CR-LF terminated lines.
                content->append("\r\n", 2u);
            }
        }
    }
    if (!endOfPartReached && checkBoundary) {
        WTF_LOG_ERROR("No bounday found for MHTML part.");
        return nullptr;
    }

    Vector<char> data;
    switch (contentTransferEncoding) {
    case MIMEHeader::Base64:
        if (!base64Decode(content->data(), content->size(), data)) {
            WTF_LOG_ERROR("Invalid base64 content for MHTML part.");
            return nullptr;
        }
        break;
    case MIMEHeader::QuotedPrintable:
        quotedPrintableDecode(content->data(), content->size(), data);
        break;
    case MIMEHeader::EightBit:
    case MIMEHeader::SevenBit:
    case MIMEHeader::Binary:
        data.append(content->data(), content->size());
        break;
    default:
        WTF_LOG_ERROR("Invalid encoding for MHTML part.");
        return nullptr;
    }
    RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data);
    // FIXME: the URL in the MIME header could be relative, we should resolve it if it is.
    // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rfc2557#section-5
    // IE and Firefox (UNMht) seem to generate only absolute URLs.
    KURL location = KURL(KURL(), mimeHeader.contentLocation());
    return ArchiveResource::create(
        contentBuffer, location, mimeHeader.contentID(), AtomicString(mimeHeader.contentType()), AtomicString(mimeHeader.charset()));
}
Example #2
0
PassRefPtr<ArchiveResource> MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader, const String& endOfPartBoundary, const String& endOfDocumentBoundary, bool& endOfArchiveReached)
{
    ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty());

    RefPtr<SharedBuffer> content = SharedBuffer::create();
    const bool checkBoundary = !endOfPartBoundary.isEmpty();
    bool endOfPartReached = false;
    String line;
    while (!(line = m_lineReader.nextLine()).isNull()) {
        if (checkBoundary && (line == endOfPartBoundary || line == endOfDocumentBoundary)) {
            endOfArchiveReached = (line == endOfDocumentBoundary);
            endOfPartReached = true;
            break;
        }
        // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.
        content->append(line.utf8().data(), line.length());
        if (mimeHeader.contentTransferEncoding() == MIMEHeader::QuotedPrintable) {
            // The line reader removes the \r\n, but we need them for the content in this case as the QuotedPrintable decoder expects CR-LF terminated lines.
            content->append("\r\n", 2);
        }
    }
    if (!endOfPartReached && checkBoundary) {
        LOG_ERROR("No bounday found for MHTML part.");
        return 0;
    }

    Vector<char> data;
    switch (mimeHeader.contentTransferEncoding()) {
    case MIMEHeader::Base64:
        if (!base64Decode(content->data(), content->size(), data)) {
            LOG_ERROR("Invalid base64 content for MHTML part.");
            return 0;
        }
        break;
    case MIMEHeader::QuotedPrintable:
        quotedPrintableDecode(content->data(), content->size(), data);
        break;
    case MIMEHeader::SevenBit:
        data.append(content->data(), content->size());
        break;
    default:
        LOG_ERROR("Invalid encoding for MHTML part.");
        return 0;
    }
    RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data);
    // FIXME: the URL in the MIME header could be relative, we should resolve it if it is.
    // The specs mentions 5 ways to resolve a URL: http://tools.ietf.org/html/rfc2557#section-5
    // IE and Firefox (UNMht) seem to generate only absolute URLs.
    KURL location = KURL(KURL(), mimeHeader.contentLocation());
    return ArchiveResource::create(contentBuffer, location, mimeHeader.contentType(), mimeHeader.charset(), String());
}