Example #1
0
PassRefPtrWillBeRawPtr<MHTMLArchive> MHTMLParser::parseArchiveWithHeader(MIMEHeader* header)
{
    if (!header) {
        WTF_LOG_ERROR("Failed to parse MHTML part: no header.");
        return nullptr;
    }

    RefPtrWillBeRawPtr<MHTMLArchive> archive = MHTMLArchive::create();
    if (!header->isMultipart()) {
        // With IE a page with no resource is not multi-part.
        bool endOfArchiveReached = false;
        RefPtrWillBeRawPtr<ArchiveResource> resource = parseNextPart(*header, String(), String(), endOfArchiveReached);
        if (!resource)
            return nullptr;
        archive->setMainResource(resource);
        return archive;
    }

    // Skip the message content (it's a generic browser specific message).
    skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());

    bool endOfArchive = false;
    while (!endOfArchive) {
        RefPtrWillBeRawPtr<MIMEHeader> resourceHeader = MIMEHeader::parseHeader(&m_lineReader);
        if (!resourceHeader) {
            WTF_LOG_ERROR("Failed to parse MHTML, invalid MIME header.");
            return nullptr;
        }
        if (resourceHeader->contentType() == "multipart/alternative") {
            // Ignore IE nesting which makes little sense (IE seems to nest only some of the frames).
            RefPtrWillBeRawPtr<MHTMLArchive> subframeArchive = parseArchiveWithHeader(resourceHeader.get());
            if (!subframeArchive) {
                WTF_LOG_ERROR("Failed to parse MHTML subframe.");
                return nullptr;
            }
            bool endOfPartReached = skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());
            ASSERT_UNUSED(endOfPartReached, endOfPartReached);
            // The top-frame is the first frame found, regardless of the nesting level.
            if (subframeArchive->mainResource())
                addResourceToArchive(subframeArchive->mainResource(), archive.get());
            archive->addSubframeArchive(subframeArchive);
            continue;
        }

        RefPtrWillBeRawPtr<ArchiveResource> resource = parseNextPart(*resourceHeader, header->endOfPartBoundary(), header->endOfDocumentBoundary(), endOfArchive);
        if (!resource) {
            WTF_LOG_ERROR("Failed to parse MHTML part.");
            return nullptr;
        }
        addResourceToArchive(resource.get(), archive.get());
    }

    return archive.release();
}
PassRefPtrWillBeRawPtr<MHTMLArchive> MHTMLArchive::create(const KURL& url, SharedBuffer* data)
{
    // For security reasons we only load MHTML pages from local URLs.
    if (!SchemeRegistry::shouldTreatURLSchemeAsLocal(url.protocol()))
        return nullptr;

    MHTMLParser parser(data);
    RefPtrWillBeRawPtr<MHTMLArchive> mainArchive = parser.parseArchive();
    if (!mainArchive)
        return nullptr; // Invalid MHTML file.

    // Since MHTML is a flat format, we need to make all frames aware of all resources.
    for (size_t i = 0; i < parser.frameCount(); ++i) {
        RefPtrWillBeRawPtr<MHTMLArchive> archive = parser.frameAt(i);
        for (size_t j = 1; j < parser.frameCount(); ++j) {
            if (i != j)
                archive->addSubframeArchive(parser.frameAt(j));
        }
        for (size_t j = 0; j < parser.subResourceCount(); ++j)
            archive->addSubresource(parser.subResourceAt(j));
    }
    return mainArchive.release();
}