void PageSerializer::serializeFrame(Frame* frame) { Document* document = frame->document(); URL url = document->url(); if (!url.isValid() || url.isBlankURL()) { // For blank frames we generate a fake URL so they can be referenced by their containing frame. url = urlForBlankFrame(frame); } if (m_resourceURLs.contains(url)) { // FIXME: We could have 2 frame with the same URL but which were dynamically changed and have now // different content. So we should serialize both and somehow rename the frame src in the containing // frame. Arg! return; } Vector<Node*> nodes; SerializerMarkupAccumulator accumulator(*this, *document, &nodes); TextEncoding textEncoding(document->charset()); CString data; if (!textEncoding.isValid()) { // FIXME: iframes used as images trigger this. We should deal with them correctly. return; } String text = accumulator.serializeNodes(*document->documentElement(), 0, IncludeNode); CString frameHTML = textEncoding.encode(text, EntitiesForUnencodables); m_resources->append(Resource(url, document->suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length()))); m_resourceURLs.add(url); for (Vector<Node*>::iterator iter = nodes.begin(); iter != nodes.end(); ++iter) { Node* node = *iter; if (!is<Element>(*node)) continue; Element& element = downcast<Element>(*node); // We have to process in-line style as it might contain some resources (typically background images). if (is<StyledElement>(element)) retrieveResourcesForProperties(downcast<StyledElement>(element).inlineStyle(), document); if (is<HTMLImageElement>(element)) { HTMLImageElement& imageElement = downcast<HTMLImageElement>(element); URL url = document->completeURL(imageElement.fastGetAttribute(HTMLNames::srcAttr)); CachedImage* cachedImage = imageElement.cachedImage(); addImageToResources(cachedImage, imageElement.renderer(), url); } else if (is<HTMLLinkElement>(element)) { HTMLLinkElement& linkElement = downcast<HTMLLinkElement>(element); if (CSSStyleSheet* sheet = linkElement.sheet()) { URL url = document->completeURL(linkElement.getAttribute(HTMLNames::hrefAttr)); serializeCSSStyleSheet(sheet, url); ASSERT(m_resourceURLs.contains(url)); } } else if (is<HTMLStyleElement>(element)) { if (CSSStyleSheet* sheet = downcast<HTMLStyleElement>(element).sheet()) serializeCSSStyleSheet(sheet, URL()); } } for (Frame* childFrame = frame->tree().firstChild(); childFrame; childFrame = childFrame->tree().nextSibling()) serializeFrame(childFrame); }
void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet* styleSheet, const KURL& url) { StringBuilder cssText; for (unsigned i = 0; i < styleSheet->length(); ++i) { StyleBase* item = styleSheet->item(i); String itemText = item->cssText(); if (!itemText.isEmpty()) { cssText.append(itemText); if (i < styleSheet->length() - 1) cssText.append("\n\n"); } // Some rules have resources associated with them that we need to retrieve. if (item->isImportRule()) { CSSImportRule* importRule = static_cast<CSSImportRule*>(item); KURL importURL = styleSheet->document()->completeURL(importRule->href()); if (m_resourceURLs.contains(importURL)) continue; serializeCSSStyleSheet(importRule->styleSheet(), importURL); } else if (item->isFontFaceRule()) { // FIXME: Add support for font face rule. It is not clear to me at this point if the actual otf/eot file can // be retrieved from the CSSFontFaceRule object. } else if (item->isStyleRule()) retrieveResourcesForCSSRule(static_cast<CSSStyleRule*>(item)); } if (url.isValid() && !m_resourceURLs.contains(url)) { // FIXME: We should check whether a charset has been specified and if none was found add one. TextEncoding textEncoding(styleSheet->charset()); ASSERT(textEncoding.isValid()); String textString = cssText.toString(); CString text = textEncoding.encode(textString.characters(), textString.length(), EntitiesForUnencodables); m_resources->append(Resource(url, String("text/css"), SharedBuffer::create(text.data(), text.length()))); m_resourceURLs.add(url); } }
bool WebPageSerializerImpl::serialize() { // Collect target frames. if (!m_framesCollected) collectTargetFrames(); bool didSerialization = false; // Get KURL for main frame. KURL mainPageURL = m_specifiedWebFrameImpl->frame()->loader()->url(); // Go through all frames for serializing DOM for whole page, include // sub-frames. for (int i = 0; i < static_cast<int>(m_frames.size()); ++i) { // Get current serializing frame. WebFrameImpl* currentFrame = m_frames[i]; // Get current using document. Document* currentDoc = currentFrame->frame()->document(); // Get current frame's URL. const KURL& currentFrameURL = currentFrame->frame()->loader()->url(); // Check whether we have done this document. if (currentFrameURL.isValid() && m_localLinks.contains(currentFrameURL.string())) { // A new document, we will serialize it. didSerialization = true; // Get target encoding for current document. String encoding = currentFrame->frame()->loader()->writer()->encoding(); // Create the text encoding object with target encoding. TextEncoding textEncoding(encoding); // Construct serialize parameter for late processing document. SerializeDomParam param(currentFrameURL, encoding.length() ? textEncoding : UTF8Encoding(), currentDoc, currentFrameURL == mainPageURL ? m_localDirectoryName : ""); // Process current document. Element* rootElement = currentDoc->documentElement(); if (rootElement) buildContentForNode(rootElement, ¶m); // Flush the remainder data and finish serializing current frame. encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsFinished, ¶m, 1); } } // We have done call frames, so we send message to embedder to tell it that // frames are finished serializing. ASSERT(m_dataBuffer.isEmpty()); m_client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished); return didSerialization; }
bool isASCIICompatibleEncoding(const char* encoding) { TextEncoding textEncoding(encoding); if (!textEncoding.isValid()) return false; // Check the most common encodings first if (textEncoding == WebCore::UTF8Encoding() || textEncoding == WebCore::Latin1Encoding() || textEncoding == WebCore::ASCIIEncoding()) return true; String lowercasedEncoding = String(encoding).lower(); // This is slow and could easily be optimized by directly inspecting encoding[i]. if (lowercasedEncoding.startsWith("iso-8859") || lowercasedEncoding.startsWith("windows") || lowercasedEncoding.startsWith("euc-jp") || lowercasedEncoding.startsWith("euc-kr")) return true; return false; }
void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet* styleSheet, const KURL& url) { StringBuilder cssText; for (unsigned i = 0; i < styleSheet->length(); ++i) { CSSRule* rule = styleSheet->item(i); String itemText = rule->cssText(); if (!itemText.isEmpty()) { cssText.append(itemText); if (i < styleSheet->length() - 1) cssText.append("\n\n"); } Document* document = styleSheet->ownerDocument(); // Some rules have resources associated with them that we need to retrieve. if (rule->type() == CSSRule::IMPORT_RULE) { CSSImportRule* importRule = toCSSImportRule(rule); KURL importURL = document->completeURL(importRule->href()); if (m_resourceURLs.contains(importURL)) continue; serializeCSSStyleSheet(importRule->styleSheet(), importURL); } else if (rule->type() == CSSRule::FONT_FACE_RULE) { retrieveResourcesForProperties(toCSSFontFaceRule(rule)->styleRule()->properties(), document); } else if (rule->type() == CSSRule::STYLE_RULE) { retrieveResourcesForProperties(toCSSStyleRule(rule)->styleRule()->properties(), document); } } if (url.isValid() && !m_resourceURLs.contains(url)) { // FIXME: We should check whether a charset has been specified and if none was found add one. WTF::TextEncoding textEncoding(styleSheet->contents()->charset()); ASSERT(textEncoding.isValid()); String textString = cssText.toString(); CString text = textEncoding.normalizeAndEncode(textString, WTF::EntitiesForUnencodables); m_resources->append(SerializedResource(url, String("text/css"), SharedBuffer::create(text.data(), text.length()))); m_resourceURLs.add(url); } }
void ScriptStreamer::notifyAppendData(ScriptResource* resource) { ASSERT(isMainThread()); ASSERT(m_resource == resource); { MutexLocker locker(m_mutex); if (m_streamingSuppressed) return; } if (!m_haveEnoughDataForStreaming) { // Even if the first data chunk is small, the script can still be big // enough - wait until the next data chunk comes before deciding whether // to start the streaming. if (resource->resourceBuffer()->size() < kSmallScriptThreshold) { return; } m_haveEnoughDataForStreaming = true; const char* histogramName = startedStreamingHistogramName(m_scriptType); // Encoding should be detected only when we have some data. It's // possible that resource->encoding() returns a different encoding // before the loading has started and after we got some data. WTF::TextEncoding textEncoding(resource->encoding()); const char* encodingName = textEncoding.name(); // Here's a list of encodings we can use for streaming. These are // the canonical names. v8::ScriptCompiler::StreamedSource::Encoding encoding; if (strcmp(encodingName, "windows-1252") == 0 || strcmp(encodingName, "ISO-8859-1") == 0 || strcmp(encodingName, "US-ASCII") == 0) { encoding = v8::ScriptCompiler::StreamedSource::ONE_BYTE; } else if (strcmp(encodingName, "UTF-8") == 0) { encoding = v8::ScriptCompiler::StreamedSource::UTF8; } else { // We don't stream other encodings; especially we don't stream two // byte scripts to avoid the handling of byte order marks. Most // scripts are Latin1 or UTF-8 anyway, so this should be enough for // most real world purposes. suppressStreaming(); blink::Platform::current()->histogramEnumeration(histogramName, 0, 2); return; } if (ScriptStreamerThread::shared()->isRunningTask()) { // At the moment we only have one thread for running the tasks. A // new task shouldn't be queued before the running task completes, // because the running task can block and wait for data from the // network. suppressStreaming(); blink::Platform::current()->histogramEnumeration(histogramName, 0, 2); return; } if (!m_scriptState->contextIsValid()) { suppressStreaming(); blink::Platform::current()->histogramEnumeration(histogramName, 0, 2); return; } ASSERT(!m_stream); ASSERT(!m_source); m_stream = new SourceStream(this); // m_source takes ownership of m_stream. m_source = adoptPtr(new v8::ScriptCompiler::StreamedSource(m_stream, encoding)); ScriptState::Scope scope(m_scriptState.get()); WTF::OwnPtr<v8::ScriptCompiler::ScriptStreamingTask> scriptStreamingTask(adoptPtr(v8::ScriptCompiler::StartStreamingScript(m_scriptState->isolate(), m_source.get(), m_compileOptions))); if (!scriptStreamingTask) { // V8 cannot stream the script. suppressStreaming(); m_stream = 0; m_source.clear(); blink::Platform::current()->histogramEnumeration(histogramName, 0, 2); return; } // ScriptStreamer needs to stay alive as long as the background task is // running. This is taken care of with a manual ref() & deref() pair; // the corresponding deref() is in streamingComplete or in // notifyFinished. ref(); ScriptStreamingTask* task = new ScriptStreamingTask(scriptStreamingTask.release(), this); ScriptStreamerThread::shared()->postTask(task); blink::Platform::current()->histogramEnumeration(histogramName, 1, 2); } if (m_stream) m_stream->didReceiveData(); }
void PageSerializer::serializeFrame(Frame* frame) { Document* document = frame->document(); KURL url = document->url(); if (!url.isValid() || url.protocolIs("about")) { // For blank frames we generate a fake URL so they can be referenced by their containing frame. url = urlForBlankFrame(frame); } if (m_resourceURLs.contains(url)) { // FIXME: We could have 2 frame with the same URL but which were dynamically changed and have now // different content. So we should serialize both and somehow rename the frame src in the containing // frame. Arg! return; } Vector<Node*> nodes; SerializerMarkupAccumulator accumulator(this, document, &nodes); TextEncoding textEncoding(document->charset()); CString data; if (!textEncoding.isValid()) { // FIXME: iframes used as images trigger this. We should deal with them correctly. return; } String text = accumulator.serializeNodes(document->documentElement(), 0, IncludeNode); CString frameHTML = textEncoding.encode(text.characters(), text.length(), EntitiesForUnencodables); m_resources->append(Resource(url, document->suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length()))); m_resourceURLs.add(url); for (Vector<Node*>::iterator iter = nodes.begin(); iter != nodes.end(); ++iter) { Node* node = *iter; if (!node->isElementNode()) continue; Element* element = toElement(node); // We have to process in-line style as it might contain some resources (typically background images). retrieveResourcesForCSSDeclaration(element->style()); if (element->hasTagName(HTMLNames::imgTag)) { HTMLImageElement* imageElement = static_cast<HTMLImageElement*>(element); KURL url = document->completeURL(imageElement->getAttribute(HTMLNames::srcAttr)); CachedImage* cachedImage = imageElement->cachedImage(); addImageToResources(cachedImage, url); } else if (element->hasTagName(HTMLNames::linkTag)) { HTMLLinkElement* linkElement = static_cast<HTMLLinkElement*>(element); StyleSheet* sheet = linkElement->sheet(); if (sheet && sheet->isCSSStyleSheet()) { KURL url = document->completeURL(linkElement->getAttribute(HTMLNames::hrefAttr)); serializeCSSStyleSheet(static_cast<CSSStyleSheet*>(sheet), url); ASSERT(m_resourceURLs.contains(url)); } } else if (element->hasTagName(HTMLNames::styleTag)) { HTMLStyleElement* styleElement = static_cast<HTMLStyleElement*>(element); StyleSheet* sheet = styleElement->sheet(); if (sheet && sheet->isCSSStyleSheet()) serializeCSSStyleSheet(static_cast<CSSStyleSheet*>(sheet), KURL()); } } for (Frame* childFrame = frame->tree()->firstChild(); childFrame; childFrame = childFrame->tree()->nextSibling()) serializeFrame(childFrame); }