CompactHTMLToken::CompactHTMLToken(const HTMLToken& token) : m_type(token.type()) { switch (m_type) { case HTMLTokenTypes::Uninitialized: ASSERT_NOT_REACHED(); break; case HTMLTokenTypes::DOCTYPE: m_data = String(token.name().data(), token.name().size()); m_publicIdentifier = String(token.publicIdentifier().data(), token.publicIdentifier().size()); m_systemIdentifier = String(token.systemIdentifier().data(), token.systemIdentifier().size()); break; case HTMLTokenTypes::EndOfFile: break; case HTMLTokenTypes::StartTag: m_attributes.reserveInitialCapacity(token.attributes().size()); for (Vector<AttributeBase>::const_iterator it = token.attributes().begin(); it != token.attributes().end(); ++it) m_attributes.append(CompactAttribute(String(it->m_name.data(), it->m_name.size()), String(it->m_value.data(), it->m_value.size()))); // Fall through! case HTMLTokenTypes::EndTag: m_selfClosing = token.selfClosing(); // Fall through! case HTMLTokenTypes::Comment: case HTMLTokenTypes::Character: if (token.isAll8BitData()) m_data = String::make8BitFrom16BitSource(token.data().data(), token.data().size()); else m_data = String(token.data().data(), token.data().size()); break; default: ASSERT_NOT_REACHED(); break; } }
void HTMLViewSourceDocument::processTagToken(const String& source, HTMLToken& token) { m_current = addSpanWithClassName("webkit-html-tag"); AtomicString tagName(token.name().data(), token.name().size()); unsigned index = 0; HTMLToken::AttributeList::const_iterator iter = token.attributes().begin(); while (index < source.length()) { if (iter == token.attributes().end()) { // We want to show the remaining characters in the token. index = addRange(source, index, source.length(), ""); ASSERT(index == source.length()); break; } AtomicString name(iter->m_name.data(), iter->m_name.size()); String value(iter->m_value.data(), iter->m_value.size()); index = addRange(source, index, iter->m_nameRange.m_start - token.startIndex(), ""); index = addRange(source, index, iter->m_nameRange.m_end - token.startIndex(), "webkit-html-attribute-name"); if (tagName == baseTag && name == hrefAttr) m_current = addBase(value); index = addRange(source, index, iter->m_valueRange.m_start - token.startIndex(), ""); bool isLink = name == srcAttr || name == hrefAttr; index = addRange(source, index, iter->m_valueRange.m_end - token.startIndex(), "webkit-html-attribute-value", isLink, tagName == aTag); ++iter; } m_current = m_td; }
void HTMLViewSourceDocument::processTagToken(const String& source, HTMLToken& token, SourceAnnotation annotation) { maybeAddSpanForAnnotation(annotation); m_current = addSpanWithClassName("html-tag"); AtomicString tagName(token.name()); unsigned index = 0; HTMLToken::AttributeList::const_iterator iter = token.attributes().begin(); while (index < source.length()) { if (iter == token.attributes().end()) { // We want to show the remaining characters in the token. index = addRange(source, index, source.length(), emptyAtom); ASSERT(index == source.length()); break; } AtomicString name(iter->name); AtomicString value(StringImpl::create8BitIfPossible(iter->value)); index = addRange(source, index, iter->nameRange.start - token.startIndex(), emptyAtom); index = addRange(source, index, iter->nameRange.end - token.startIndex(), "html-attribute-name"); if (tagName == baseTag && name == hrefAttr) addBase(value); index = addRange(source, index, iter->valueRange.start - token.startIndex(), emptyAtom); bool isLink = name == srcAttr || name == hrefAttr; index = addRange(source, index, iter->valueRange.end - token.startIndex(), "html-attribute-value", isLink, tagName == aTag, value); ++iter; } m_current = m_td; }
static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute) { for (size_t i = 0; i < token.attributes().size(); ++i) { if (equalIgnoringNullity(token.attributes().at(i).m_name, name.localName())) { indexOfMatchingAttribute = i; return true; } } return false; }
static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute) { // Notice that we're careful not to ref the StringImpl here because we might be on a background thread. const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI ? "xlink:" + name.localName().string() : name.localName().string(); for (size_t i = 0; i < token.attributes().size(); ++i) { if (equalIgnoringNullity(token.attributes().at(i).name, attrName)) { indexOfMatchingAttribute = i; return true; } } return false; }
explicit PreloadTask(const HTMLToken& token) : m_tagName(token.name().data(), token.name().size()) , m_linkIsStyleSheet(false) , m_linkMediaAttributeIsScreen(true) , m_inputIsImage(false) { processAttributes(token.attributes()); }
bool XSSAuditor::eraseDangerousAttributesIfInjected(HTMLToken& token) { DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)")); bool didBlockScript = false; for (size_t i = 0; i < token.attributes().size(); ++i) { const HTMLToken::Attribute& attribute = token.attributes().at(i); bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name); bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value); if (!isInlineEventHandler && !valueContainsJavaScriptURL) continue; // Beware of trailing characters which came from the page itself, not the // injected vector. Excluding the terminating character covers common cases // where the page immediately ends the attribute, but doesn't cover more // complex cases where there is other page data following the injection. // Generally, these won't parse as javascript, so the injected vector // typically excludes them from consideration via a single-line comment or // by enclosing them in a string literal terminated later by the page's own // closing punctuation. Since the snippet has not been parsed, the vector // may also try to introduce these via entities. As a result, we'd like to // stop before the first "//", the first <!--, the first entity, or the first // quote not immediately following the first equals sign (taking whitespace // into consideration). To keep things simpler, we don't try to distinguish // between entity-introducing amperands vs. other uses, nor do we bother to // check for a second slash for a comment, nor do we bother to check for // !-- following a less-than sign. We stop instead on any ampersand // slash, or less-than sign. String decodedSnippet = decodedSnippetForAttribute(token, attribute); size_t position; if ((position = decodedSnippet.find("=")) != notFound && (position = decodedSnippet.find(isNotHTMLSpace, position + 1)) != notFound && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != notFound) { decodedSnippet.truncate(position); } if (!isContainedInRequest(decodedSnippet)) continue; token.eraseValueOfAttribute(i); if (valueContainsJavaScriptURL) token.appendToAttributeValue(i, safeJavaScriptURL); didBlockScript = true; } return didBlockScript; }
bool XSSAuditor::eraseDangerousAttributesIfInjected(HTMLToken& token) { DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, (ASCIILiteral("javascript:void(0)"))); bool didBlockScript = false; for (size_t i = 0; i < token.attributes().size(); ++i) { const HTMLToken::Attribute& attribute = token.attributes().at(i); bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name); bool valueContainsJavaScriptURL = !isInlineEventHandler && protocolIsJavaScript(stripLeadingAndTrailingHTMLSpaces(String(attribute.m_value.data(), attribute.m_value.size()))); if (!isInlineEventHandler && !valueContainsJavaScriptURL) continue; if (!isContainedInRequest(decodedSnippetForAttribute(token, attribute, ScriptLikeAttribute))) continue; token.eraseValueOfAttribute(i); if (valueContainsJavaScriptURL) token.appendToAttributeValue(i, safeJavaScriptURL); didBlockScript = true; } return didBlockScript; }
bool XSSFilter::eraseDangerousAttributesIfInjected(HTMLToken& token) { DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)")); bool didBlockScript = false; for (size_t i = 0; i < token.attributes().size(); ++i) { const HTMLToken::Attribute& attribute = token.attributes().at(i); bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name); bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value); if (!isInlineEventHandler && !valueContainsJavaScriptURL) continue; if (!isContainedInRequest(snippetForAttribute(token, attribute))) continue; token.eraseValueOfAttribute(i); if (valueContainsJavaScriptURL) token.appendToAttributeValue(i, safeJavaScriptURL); didBlockScript = true; } return didBlockScript; }
bool HTMLMetaCharsetParser::processMeta(HTMLToken& token) { AttributeList attributes; for (auto& attribute : token.attributes()) { String attributeName = StringImpl::create8BitIfPossible(attribute.name); String attributeValue = StringImpl::create8BitIfPossible(attribute.value); attributes.append(std::make_pair(attributeName, attributeValue)); } m_encoding = encodingFromMetaAttributes(attributes); return m_encoding.isValid(); }
bool XSSFilter::eraseAttributeIfInjected(HTMLToken& token, const QualifiedName& attributeName, const String& replacementValue) { size_t indexOfAttribute; if (findAttributeWithName(token, attributeName, indexOfAttribute)) { const HTMLToken::Attribute& attribute = token.attributes().at(indexOfAttribute); if (isContainedInRequest(snippetForAttribute(token, attribute))) { if (attributeName == srcAttr && isSameOriginResource(String(attribute.m_value.data(), attribute.m_value.size()))) return false; token.eraseValueOfAttribute(indexOfAttribute); if (!replacementValue.isEmpty()) token.appendToAttributeValue(indexOfAttribute, replacementValue); return true; } } return false; }
bool XSSAuditor::filterParamToken(HTMLToken& token) { ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, paramTag)); size_t indexOfNameAttribute; if (!findAttributeWithName(token, nameAttr, indexOfNameAttribute)) return false; const HTMLToken::Attribute& nameAttribute = token.attributes().at(indexOfNameAttribute); String name = String(nameAttribute.m_value.data(), nameAttribute.m_value.size()); if (!HTMLParamElement::isURLParameter(name)) return false; return eraseAttributeIfInjected(token, valueAttr, blankURL().string(), SrcLikeAttribute); }
bool XSSAuditor::eraseAttributeIfInjected(HTMLToken& token, const QualifiedName& attributeName, const String& replacementValue, AttributeKind treatment) { size_t indexOfAttribute = 0; if (findAttributeWithName(token, attributeName, indexOfAttribute)) { const HTMLToken::Attribute& attribute = token.attributes().at(indexOfAttribute); if (isContainedInRequest(decodedSnippetForAttribute(token, attribute, treatment))) { if (attributeName == srcAttr && isLikelySafeResource(String(attribute.m_value.data(), attribute.m_value.size()))) return false; if (attributeName == http_equivAttr && !isDangerousHTTPEquiv(String(attribute.m_value.data(), attribute.m_value.size()))) return false; token.eraseValueOfAttribute(indexOfAttribute); if (!replacementValue.isEmpty()) token.appendToAttributeValue(indexOfAttribute, replacementValue); return true; } } return false; }
void TokenPreloadScanner::updatePredictedBaseURL(const HTMLToken& token) { ASSERT(m_predictedBaseElementURL.isEmpty()); if (auto* hrefAttribute = findAttribute(token.attributes(), hrefAttr.localName().string())) m_predictedBaseElementURL = URL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(StringImpl::create8BitIfPossible(hrefAttribute->value))).isolatedCopy(); }
void TokenPreloadScanner::scan(const HTMLToken& token, Vector<std::unique_ptr<PreloadRequest>>& requests, Document& document) { switch (token.type()) { case HTMLToken::Character: if (!m_inStyle) return; m_cssScanner.scan(token.characters(), requests); return; case HTMLToken::EndTag: { TagId tagId = tagIdFor(token.name()); #if ENABLE(TEMPLATE_ELEMENT) if (tagId == TagId::Template) { if (m_templateCount) --m_templateCount; return; } #endif if (tagId == TagId::Style) { if (m_inStyle) m_cssScanner.reset(); m_inStyle = false; } else if (tagId == TagId::Picture && !m_pictureSourceState.isEmpty()) m_pictureSourceState.removeLast(); return; } case HTMLToken::StartTag: { #if ENABLE(TEMPLATE_ELEMENT) if (m_templateCount) return; #endif TagId tagId = tagIdFor(token.name()); #if ENABLE(TEMPLATE_ELEMENT) if (tagId == TagId::Template) { ++m_templateCount; return; } #endif if (tagId == TagId::Style) { m_inStyle = true; return; } if (tagId == TagId::Base) { // The first <base> element is the one that wins. if (!m_predictedBaseElementURL.isEmpty()) return; updatePredictedBaseURL(token); return; } if (tagId == TagId::Picture) { m_pictureSourceState.append(false); return; } StartTagScanner scanner(tagId, m_deviceScaleFactor); scanner.processAttributes(token.attributes(), document, m_pictureSourceState); if (auto request = scanner.createPreloadRequest(m_predictedBaseElementURL)) requests.append(WTFMove(request)); return; } default: return; } }