void PageSerializer::serializeFrame(Frame* frame)
{
    Document* document = frame->document();
    URL url = document->url();
    if (!url.isValid() || url.isBlankURL()) {
        // For blank frames we generate a fake URL so they can be referenced by their containing frame.
        url = urlForBlankFrame(frame);
    }

    if (m_resourceURLs.contains(url)) {
        // FIXME: We could have 2 frame with the same URL but which were dynamically changed and have now
        // different content. So we should serialize both and somehow rename the frame src in the containing
        // frame. Arg!
        return;
    }

    Vector<Node*> nodes;
    SerializerMarkupAccumulator accumulator(*this, *document, &nodes);
    TextEncoding textEncoding(document->charset());
    CString data;
    if (!textEncoding.isValid()) {
        // FIXME: iframes used as images trigger this. We should deal with them correctly.
        return;
    }
    String text = accumulator.serializeNodes(*document->documentElement(), 0, IncludeNode);
    CString frameHTML = textEncoding.encode(text, EntitiesForUnencodables);
    m_resources->append(Resource(url, document->suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
    m_resourceURLs.add(url);

    for (Vector<Node*>::iterator iter = nodes.begin(); iter != nodes.end(); ++iter) {
        Node* node = *iter;
        if (!is<Element>(*node))
            continue;

        Element& element = downcast<Element>(*node);
        // We have to process in-line style as it might contain some resources (typically background images).
        if (is<StyledElement>(element))
            retrieveResourcesForProperties(downcast<StyledElement>(element).inlineStyle(), document);

        if (is<HTMLImageElement>(element)) {
            HTMLImageElement& imageElement = downcast<HTMLImageElement>(element);
            URL url = document->completeURL(imageElement.fastGetAttribute(HTMLNames::srcAttr));
            CachedImage* cachedImage = imageElement.cachedImage();
            addImageToResources(cachedImage, imageElement.renderer(), url);
        } else if (is<HTMLLinkElement>(element)) {
            HTMLLinkElement& linkElement = downcast<HTMLLinkElement>(element);
            if (CSSStyleSheet* sheet = linkElement.sheet()) {
                URL url = document->completeURL(linkElement.getAttribute(HTMLNames::hrefAttr));
                serializeCSSStyleSheet(sheet, url);
                ASSERT(m_resourceURLs.contains(url));
            }
        } else if (is<HTMLStyleElement>(element)) {
            if (CSSStyleSheet* sheet = downcast<HTMLStyleElement>(element).sheet())
                serializeCSSStyleSheet(sheet, URL());
        }
    }

    for (Frame* childFrame = frame->tree().firstChild(); childFrame; childFrame = childFrame->tree().nextSibling())
        serializeFrame(childFrame);
}
Example #2
0
void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet* styleSheet, const KURL& url)
{
    StringBuilder cssText;
    for (unsigned i = 0; i < styleSheet->length(); ++i) {
        StyleBase* item = styleSheet->item(i);
        String itemText = item->cssText();
        if (!itemText.isEmpty()) {
            cssText.append(itemText);
            if (i < styleSheet->length() - 1)
                cssText.append("\n\n");
        }
        // Some rules have resources associated with them that we need to retrieve.
        if (item->isImportRule()) {
            CSSImportRule* importRule = static_cast<CSSImportRule*>(item);
            KURL importURL = styleSheet->document()->completeURL(importRule->href());
            if (m_resourceURLs.contains(importURL))
                continue;
            serializeCSSStyleSheet(importRule->styleSheet(), importURL);
        } else if (item->isFontFaceRule()) {
            // FIXME: Add support for font face rule. It is not clear to me at this point if the actual otf/eot file can
            // be retrieved from the CSSFontFaceRule object.
        } else if (item->isStyleRule())
            retrieveResourcesForCSSRule(static_cast<CSSStyleRule*>(item));
    }

    if (url.isValid() && !m_resourceURLs.contains(url)) {
        // FIXME: We should check whether a charset has been specified and if none was found add one.
        TextEncoding textEncoding(styleSheet->charset());
        ASSERT(textEncoding.isValid());
        String textString = cssText.toString();
        CString text = textEncoding.encode(textString.characters(), textString.length(), EntitiesForUnencodables);
        m_resources->append(Resource(url, String("text/css"), SharedBuffer::create(text.data(), text.length())));
        m_resourceURLs.add(url);
    }
}
Example #3
0
bool WebPageSerializerImpl::serialize()
{
    // Collect target frames.
    if (!m_framesCollected)
        collectTargetFrames();
    bool didSerialization = false;
    // Get KURL for main frame.
    KURL mainPageURL = m_specifiedWebFrameImpl->frame()->loader()->url();

    // Go through all frames for serializing DOM for whole page, include
    // sub-frames.
    for (int i = 0; i < static_cast<int>(m_frames.size()); ++i) {
        // Get current serializing frame.
        WebFrameImpl* currentFrame = m_frames[i];
        // Get current using document.
        Document* currentDoc = currentFrame->frame()->document();
        // Get current frame's URL.
        const KURL& currentFrameURL = currentFrame->frame()->loader()->url();

        // Check whether we have done this document.
        if (currentFrameURL.isValid() && m_localLinks.contains(currentFrameURL.string())) {
            // A new document, we will serialize it.
            didSerialization = true;
            // Get target encoding for current document.
            String encoding = currentFrame->frame()->loader()->writer()->encoding();
            // Create the text encoding object with target encoding.
            TextEncoding textEncoding(encoding);
            // Construct serialize parameter for late processing document.
            SerializeDomParam param(currentFrameURL,
                                    encoding.length() ? textEncoding : UTF8Encoding(),
                                    currentDoc,
                                    currentFrameURL == mainPageURL ? m_localDirectoryName : "");

            // Process current document.
            Element* rootElement = currentDoc->documentElement();
            if (rootElement)
                buildContentForNode(rootElement, &param);

            // Flush the remainder data and finish serializing current frame.
            encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsFinished,
                                 &param,
                                 1);
        }
    }

    // We have done call frames, so we send message to embedder to tell it that
    // frames are finished serializing.
    ASSERT(m_dataBuffer.isEmpty());
    m_client->didSerializeDataForFrame(KURL(),
                                       WebCString("", 0),
                                       WebPageSerializerClient::AllFramesAreFinished);
    return didSerialization;
}
Example #4
0
bool isASCIICompatibleEncoding(const char* encoding)
{
    TextEncoding textEncoding(encoding);
    if (!textEncoding.isValid())
        return false;

    // Check the most common encodings first
    if (textEncoding == WebCore::UTF8Encoding() || textEncoding == WebCore::Latin1Encoding() || textEncoding == WebCore::ASCIIEncoding())
        return true;

    String lowercasedEncoding = String(encoding).lower();
    // This is slow and could easily be optimized by directly inspecting encoding[i].
    if (lowercasedEncoding.startsWith("iso-8859")
        || lowercasedEncoding.startsWith("windows")
        || lowercasedEncoding.startsWith("euc-jp")
        || lowercasedEncoding.startsWith("euc-kr"))
        return true;

    return false;
}
void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet* styleSheet, const KURL& url)
{
    StringBuilder cssText;
    for (unsigned i = 0; i < styleSheet->length(); ++i) {
        CSSRule* rule = styleSheet->item(i);
        String itemText = rule->cssText();
        if (!itemText.isEmpty()) {
            cssText.append(itemText);
            if (i < styleSheet->length() - 1)
                cssText.append("\n\n");
        }
        Document* document = styleSheet->ownerDocument();
        // Some rules have resources associated with them that we need to retrieve.
        if (rule->type() == CSSRule::IMPORT_RULE) {
            CSSImportRule* importRule = toCSSImportRule(rule);
            KURL importURL = document->completeURL(importRule->href());
            if (m_resourceURLs.contains(importURL))
                continue;
            serializeCSSStyleSheet(importRule->styleSheet(), importURL);
        } else if (rule->type() == CSSRule::FONT_FACE_RULE) {
            retrieveResourcesForProperties(toCSSFontFaceRule(rule)->styleRule()->properties(), document);
        } else if (rule->type() == CSSRule::STYLE_RULE) {
            retrieveResourcesForProperties(toCSSStyleRule(rule)->styleRule()->properties(), document);
        }
    }

    if (url.isValid() && !m_resourceURLs.contains(url)) {
        // FIXME: We should check whether a charset has been specified and if none was found add one.
        WTF::TextEncoding textEncoding(styleSheet->contents()->charset());
        ASSERT(textEncoding.isValid());
        String textString = cssText.toString();
        CString text = textEncoding.normalizeAndEncode(textString, WTF::EntitiesForUnencodables);
        m_resources->append(SerializedResource(url, String("text/css"), SharedBuffer::create(text.data(), text.length())));
        m_resourceURLs.add(url);
    }
}
Example #6
0
void ScriptStreamer::notifyAppendData(ScriptResource* resource)
{
    ASSERT(isMainThread());
    ASSERT(m_resource == resource);
    {
        MutexLocker locker(m_mutex);
        if (m_streamingSuppressed)
            return;
    }
    if (!m_haveEnoughDataForStreaming) {
        // Even if the first data chunk is small, the script can still be big
        // enough - wait until the next data chunk comes before deciding whether
        // to start the streaming.
        if (resource->resourceBuffer()->size() < kSmallScriptThreshold) {
            return;
        }
        m_haveEnoughDataForStreaming = true;
        const char* histogramName = startedStreamingHistogramName(m_scriptType);

        // Encoding should be detected only when we have some data. It's
        // possible that resource->encoding() returns a different encoding
        // before the loading has started and after we got some data.
        WTF::TextEncoding textEncoding(resource->encoding());
        const char* encodingName = textEncoding.name();

        // Here's a list of encodings we can use for streaming. These are
        // the canonical names.
        v8::ScriptCompiler::StreamedSource::Encoding encoding;
        if (strcmp(encodingName, "windows-1252") == 0
            || strcmp(encodingName, "ISO-8859-1") == 0
            || strcmp(encodingName, "US-ASCII") == 0) {
            encoding = v8::ScriptCompiler::StreamedSource::ONE_BYTE;
        } else if (strcmp(encodingName, "UTF-8") == 0) {
            encoding = v8::ScriptCompiler::StreamedSource::UTF8;
        } else {
            // We don't stream other encodings; especially we don't stream two
            // byte scripts to avoid the handling of byte order marks. Most
            // scripts are Latin1 or UTF-8 anyway, so this should be enough for
            // most real world purposes.
            suppressStreaming();
            blink::Platform::current()->histogramEnumeration(histogramName, 0, 2);
            return;
        }
        if (ScriptStreamerThread::shared()->isRunningTask()) {
            // At the moment we only have one thread for running the tasks. A
            // new task shouldn't be queued before the running task completes,
            // because the running task can block and wait for data from the
            // network.
            suppressStreaming();
            blink::Platform::current()->histogramEnumeration(histogramName, 0, 2);
            return;
        }

        if (!m_scriptState->contextIsValid()) {
            suppressStreaming();
            blink::Platform::current()->histogramEnumeration(histogramName, 0, 2);
            return;
        }

        ASSERT(!m_stream);
        ASSERT(!m_source);
        m_stream = new SourceStream(this);
        // m_source takes ownership of m_stream.
        m_source = adoptPtr(new v8::ScriptCompiler::StreamedSource(m_stream, encoding));

        ScriptState::Scope scope(m_scriptState.get());
        WTF::OwnPtr<v8::ScriptCompiler::ScriptStreamingTask> scriptStreamingTask(adoptPtr(v8::ScriptCompiler::StartStreamingScript(m_scriptState->isolate(), m_source.get(), m_compileOptions)));
        if (!scriptStreamingTask) {
            // V8 cannot stream the script.
            suppressStreaming();
            m_stream = 0;
            m_source.clear();
            blink::Platform::current()->histogramEnumeration(histogramName, 0, 2);
            return;
        }

        // ScriptStreamer needs to stay alive as long as the background task is
        // running. This is taken care of with a manual ref() & deref() pair;
        // the corresponding deref() is in streamingComplete or in
        // notifyFinished.
        ref();
        ScriptStreamingTask* task = new ScriptStreamingTask(scriptStreamingTask.release(), this);
        ScriptStreamerThread::shared()->postTask(task);
        blink::Platform::current()->histogramEnumeration(histogramName, 1, 2);
    }
    if (m_stream)
        m_stream->didReceiveData();
}
Example #7
0
void PageSerializer::serializeFrame(Frame* frame)
{
    Document* document = frame->document();
    KURL url = document->url();
    if (!url.isValid() || url.protocolIs("about")) {
        // For blank frames we generate a fake URL so they can be referenced by their containing frame.
        url = urlForBlankFrame(frame);
    }

    if (m_resourceURLs.contains(url)) {
        // FIXME: We could have 2 frame with the same URL but which were dynamically changed and have now
        // different content. So we should serialize both and somehow rename the frame src in the containing
        // frame. Arg!
        return;
    }

    Vector<Node*> nodes;
    SerializerMarkupAccumulator accumulator(this, document, &nodes);
    TextEncoding textEncoding(document->charset());
    CString data;
    if (!textEncoding.isValid()) {
        // FIXME: iframes used as images trigger this. We should deal with them correctly.
        return;
    }
    String text = accumulator.serializeNodes(document->documentElement(), 0, IncludeNode);
    CString frameHTML = textEncoding.encode(text.characters(), text.length(), EntitiesForUnencodables);
    m_resources->append(Resource(url, document->suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
    m_resourceURLs.add(url);

    for (Vector<Node*>::iterator iter = nodes.begin(); iter != nodes.end(); ++iter) {
        Node* node = *iter;
        if (!node->isElementNode())
            continue;

        Element* element = toElement(node);
        // We have to process in-line style as it might contain some resources (typically background images).
        retrieveResourcesForCSSDeclaration(element->style());

        if (element->hasTagName(HTMLNames::imgTag)) {
            HTMLImageElement* imageElement = static_cast<HTMLImageElement*>(element);
            KURL url = document->completeURL(imageElement->getAttribute(HTMLNames::srcAttr));
            CachedImage* cachedImage = imageElement->cachedImage();
            addImageToResources(cachedImage, url);
        } else if (element->hasTagName(HTMLNames::linkTag)) {
            HTMLLinkElement* linkElement = static_cast<HTMLLinkElement*>(element);
            StyleSheet* sheet = linkElement->sheet();
            if (sheet && sheet->isCSSStyleSheet()) {
                KURL url = document->completeURL(linkElement->getAttribute(HTMLNames::hrefAttr));
                serializeCSSStyleSheet(static_cast<CSSStyleSheet*>(sheet), url);
                ASSERT(m_resourceURLs.contains(url));
            }
        } else if (element->hasTagName(HTMLNames::styleTag)) {
            HTMLStyleElement* styleElement = static_cast<HTMLStyleElement*>(element);
            StyleSheet* sheet = styleElement->sheet();
            if (sheet && sheet->isCSSStyleSheet())
                serializeCSSStyleSheet(static_cast<CSSStyleSheet*>(sheet), KURL());
        }
    }

    for (Frame* childFrame = frame->tree()->firstChild(); childFrame; childFrame = childFrame->tree()->nextSibling())
        serializeFrame(childFrame);
}