void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLToken& rawToken) { RefPtr<AtomicHTMLToken> token = AtomicHTMLToken::create(rawToken); // We clear the rawToken in case constructTreeFromAtomicToken // synchronously re-enters the parser. We don't clear the token immedately // for Character tokens because the AtomicHTMLToken avoids copying the // characters by keeping a pointer to the underlying buffer in the // HTMLToken. Fortunately, Character tokens can't cause us to re-enter // the parser. // // FIXME: Stop clearing the rawToken once we start running the parser off // the main thread or once we stop allowing synchronous JavaScript // execution from parseAttribute. if (rawToken.type() != HTMLTokenTypes::Character) rawToken.clear(); m_treeBuilder->constructTree(token.get()); // AtomicHTMLToken keeps a pointer to the HTMLToken's buffer instead // of copying the characters for performance. // Clear the external characters pointer before the raw token is cleared // to make sure that we won't have a dangling pointer. token->clearExternalCharacters(); if (!rawToken.isUninitialized()) { ASSERT(rawToken.type() == HTMLTokenTypes::Character); rawToken.clear(); } }
void XSSAuditor::filterToken(HTMLToken& token) { if (m_state == Uninitialized) init(); ASSERT(m_state == Initialized); if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled) return; bool didBlockScript = false; if (token.type() == HTMLTokenTypes::StartTag) didBlockScript = filterStartToken(token); else if (m_scriptTagNestingLevel) { if (token.type() == HTMLTokenTypes::Character) didBlockScript = filterCharacterToken(token); else if (token.type() == HTMLTokenTypes::EndTag) filterEndToken(token); } if (didBlockScript) { // FIXME: Consider using a more helpful console message. DEFINE_STATIC_LOCAL(String, consoleMessage, (ASCIILiteral("Refused to execute a JavaScript script. Source code of script found within request.\n"))); m_parser->document()->addConsoleMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage); bool didBlockEntirePage = (m_xssProtection == XSSProtectionBlockEnabled); if (didBlockEntirePage) m_parser->document()->frame()->loader()->stopAllLoaders(); if (m_notifyClient) { m_parser->document()->frame()->loader()->client()->didDetectXSS(m_parser->document()->url(), didBlockEntirePage); m_notifyClient = false; } if (!m_reportURL.isEmpty()) { RefPtr<InspectorObject> reportDetails = InspectorObject::create(); reportDetails->setString("request-url", m_originalURL); reportDetails->setString("request-body", m_originalHTTPBody); RefPtr<InspectorObject> reportObject = InspectorObject::create(); reportObject->setObject("xss-report", reportDetails.release()); RefPtr<FormData> report = FormData::create(reportObject->toJSONString().utf8().data()); PingLoader::sendViolationReport(m_parser->document()->frame(), m_reportURL, report); m_reportURL = KURL(); m_originalURL = String(); m_originalHTTPBody = String(); } if (didBlockEntirePage) m_parser->document()->frame()->navigationScheduler()->scheduleLocationChange(m_parser->document()->securityOrigin(), blankURL(), String()); } }
CompactHTMLToken::CompactHTMLToken(const HTMLToken& token) : m_type(token.type()) { switch (m_type) { case HTMLTokenTypes::Uninitialized: ASSERT_NOT_REACHED(); break; case HTMLTokenTypes::DOCTYPE: m_data = String(token.name().data(), token.name().size()); m_publicIdentifier = String(token.publicIdentifier().data(), token.publicIdentifier().size()); m_systemIdentifier = String(token.systemIdentifier().data(), token.systemIdentifier().size()); break; case HTMLTokenTypes::EndOfFile: break; case HTMLTokenTypes::StartTag: m_attributes.reserveInitialCapacity(token.attributes().size()); for (Vector<AttributeBase>::const_iterator it = token.attributes().begin(); it != token.attributes().end(); ++it) m_attributes.append(CompactAttribute(String(it->m_name.data(), it->m_name.size()), String(it->m_value.data(), it->m_value.size()))); // Fall through! case HTMLTokenTypes::EndTag: m_selfClosing = token.selfClosing(); // Fall through! case HTMLTokenTypes::Comment: case HTMLTokenTypes::Character: if (token.isAll8BitData()) m_data = String::make8BitFrom16BitSource(token.data().data(), token.data().size()); else m_data = String(token.data().data(), token.data().size()); break; default: ASSERT_NOT_REACHED(); break; } }
void HTMLViewSourceDocument::addSource(const String& source, HTMLToken& token) { if (!m_current) createContainingTable(); switch (token.type()) { case HTMLToken::Uninitialized: ASSERT_NOT_REACHED(); break; case HTMLToken::DOCTYPE: processDoctypeToken(source, token); break; case HTMLToken::EndOfFile: break; case HTMLToken::StartTag: case HTMLToken::EndTag: processTagToken(source, token); break; case HTMLToken::Comment: processCommentToken(source, token); break; case HTMLToken::Character: processCharacterToken(source, token); break; } }
String HTMLSourceTracker::sourceForToken(const HTMLToken& token) { if (token.type() == HTMLToken::EndOfFile) return String(); // Hides the null character we use to mark the end of file. if (!m_cachedSourceForToken.isEmpty()) return m_cachedSourceForToken; ASSERT(!token.startIndex()); size_t length = static_cast<size_t>(token.endIndex() - token.startIndex()); StringBuilder source; source.reserveCapacity(length); size_t i = 0; for ( ; i < length && !m_previousSource.isEmpty(); ++i) { source.append(m_previousSource.currentChar()); m_previousSource.advance(); } for ( ; i < length; ++i) { ASSERT(!m_currentSource.isEmpty()); source.append(m_currentSource.currentChar()); m_currentSource.advance(); } m_cachedSourceForToken = source.toString(); return m_cachedSourceForToken; }
String HTMLSourceTracker::sourceForToken(const HTMLToken& token) { if (!m_cachedSourceForToken.isEmpty()) return m_cachedSourceForToken; size_t length; if (token.type() == HTMLToken::EndOfFile) { // Consume the remainder of the input, omitting the null character we use to mark the end of the file. length = m_previousSource.length() + m_currentSource.length() - 1; } else { ASSERT(!token.startIndex()); length = static_cast<size_t>(token.endIndex() - token.startIndex()); } StringBuilder source; source.reserveCapacity(length); size_t i = 0; for ( ; i < length && !m_previousSource.isEmpty(); ++i) { source.append(m_previousSource.currentChar()); m_previousSource.advance(); } for ( ; i < length; ++i) { ASSERT(!m_currentSource.isEmpty()); source.append(m_currentSource.currentChar()); m_currentSource.advance(); } m_cachedSourceForToken = source.toString(); return m_cachedSourceForToken; }
bool XSSAuditor::filterFormToken(HTMLToken& token) { ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, formTag)); return eraseAttributeIfInjected(token, actionAttr, blankURL().string()); }
bool XSSAuditor::filterTokenInitial(HTMLToken& token) { ASSERT(m_state == Initial); if (token.type() != HTMLTokenTypes::StartTag) return false; bool didBlockScript = eraseDangerousAttributesIfInjected(token); if (hasName(token, scriptTag)) didBlockScript |= filterScriptToken(token); else if (hasName(token, objectTag)) didBlockScript |= filterObjectToken(token); else if (hasName(token, paramTag)) didBlockScript |= filterParamToken(token); else if (hasName(token, embedTag)) didBlockScript |= filterEmbedToken(token); else if (hasName(token, appletTag)) didBlockScript |= filterAppletToken(token); else if (hasName(token, iframeTag)) didBlockScript |= filterIframeToken(token); else if (hasName(token, metaTag)) didBlockScript |= filterMetaToken(token); else if (hasName(token, baseTag)) didBlockScript |= filterBaseToken(token); else if (hasName(token, formTag)) didBlockScript |= filterFormToken(token); return didBlockScript; }
bool XSSAuditor::filterBaseToken(HTMLToken& token) { ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, baseTag)); return eraseAttributeIfInjected(token, hrefAttr); }
bool XSSAuditor::filterMetaToken(HTMLToken& token) { ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, metaTag)); return eraseAttributeIfInjected(token, http_equivAttr); }
bool XSSFilter::filterBaseToken(HTMLToken& token) { ASSERT(m_state == Initial); ASSERT(token.type() == HTMLToken::StartTag); ASSERT(hasName(token, baseTag)); return eraseAttributeIfInjected(token, hrefAttr); }
bool XSSFilter::filterMetaToken(HTMLToken& token) { ASSERT(m_state == Initial); ASSERT(token.type() == HTMLToken::StartTag); ASSERT(hasName(token, metaTag)); return eraseAttributeIfInjected(token, http_equivAttr); }
bool XSSAuditor::filterIframeToken(HTMLToken& token) { ASSERT(m_state == Initial); ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, iframeTag)); return eraseAttributeIfInjected(token, srcAttr, String(), SrcLikeAttribute); }
bool XSSAuditor::filterFormToken(HTMLToken& token) { ASSERT(m_state == Initial); ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, formTag)); return eraseAttributeIfInjected(token, actionAttr); }
bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token) { ASSERT(m_state == AfterScriptStartTag); m_state = Initial; if (token.type() != HTMLToken::Character) { ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile); return false; } int start = 0; // FIXME: We probably want to grab only the first few characters of the // contents of the script element. int end = token.endIndex() - token.startIndex(); if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) { token.eraseCharacters(); token.appendToCharacter(' '); // Technically, character tokens can't be empty. return true; } return false; }
bool XSSAuditor::filterIframeToken(HTMLToken& token) { ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, iframeTag)); bool didBlockScript = false; if (isContainedInRequest(decodedSnippetForName(token))) { didBlockScript |= eraseAttributeIfInjected(token, srcAttr, String(), SrcLikeAttribute); didBlockScript |= eraseAttributeIfInjected(token, srcdocAttr, String(), ScriptLikeAttribute); } return didBlockScript; }
void HTMLSourceTracker::start(const HTMLInputStream& input, HTMLTokenizer* tokenizer, HTMLToken& token) { if (token.type() == HTMLTokenTypes::Uninitialized) { m_previousSource.clear(); if (tokenizer->numberOfBufferedCharacters()) m_previousSource = tokenizer->bufferedCharacters(); } else m_previousSource.append(m_currentSource); m_currentSource = input.current(); token.setBaseOffset(m_currentSource.numberOfCharactersConsumed() - m_previousSource.length()); }
void HTMLSourceTracker::start(SegmentedString& currentInput, HTMLTokenizer* tokenizer, HTMLToken& token) { if (token.type() == HTMLToken::Uninitialized) { m_previousSource.clear(); if (tokenizer->numberOfBufferedCharacters()) m_previousSource = tokenizer->bufferedCharacters(); } else m_previousSource.append(m_currentSource); m_currentSource = currentInput; token.setBaseOffset(m_currentSource.numberOfCharactersConsumed() - m_previousSource.length()); }
bool XSSAuditor::filterScriptToken(HTMLToken& token) { ASSERT(m_state == Initial); ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, scriptTag)); if (eraseAttributeIfInjected(token, srcAttr, blankURL().string(), SrcLikeAttribute)) return true; m_state = AfterScriptStartTag; m_cachedSnippet = m_parser->sourceForToken(token); return false; }
bool XSSAuditor::filterObjectToken(HTMLToken& token) { ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, objectTag)); bool didBlockScript = false; if (isContainedInRequest(decodedSnippetForName(token))) { didBlockScript |= eraseAttributeIfInjected(token, dataAttr, blankURL().string(), SrcLikeAttribute); didBlockScript |= eraseAttributeIfInjected(token, typeAttr); didBlockScript |= eraseAttributeIfInjected(token, classidAttr); } return didBlockScript; }
bool XSSFilter::filterEmbedToken(HTMLToken& token) { ASSERT(m_state == Initial); ASSERT(token.type() == HTMLToken::StartTag); ASSERT(hasName(token, embedTag)); bool didBlockScript = false; didBlockScript |= eraseAttributeIfInjected(token, srcAttr, blankURL().string()); didBlockScript |= eraseAttributeIfInjected(token, typeAttr); return didBlockScript; }
bool XSSAuditor::filterAppletToken(HTMLToken& token) { ASSERT(m_state == Initial); ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, appletTag)); bool didBlockScript = false; didBlockScript |= eraseAttributeIfInjected(token, codeAttr, String(), SrcLikeAttribute); didBlockScript |= eraseAttributeIfInjected(token, objectAttr); return didBlockScript; }
bool XSSAuditor::filterScriptToken(HTMLToken& token) { ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, scriptTag)); m_cachedDecodedSnippet = decodedSnippetForName(token); m_shouldAllowCDATA = m_parser->tokenizer()->shouldAllowCDATA(); if (isContainedInRequest(decodedSnippetForName(token))) return eraseAttributeIfInjected(token, srcAttr, blankURL().string(), SrcLikeAttribute); return false; }
bool XSSAuditor::filterTokenAfterScriptStartTag(HTMLToken& token) { ASSERT(m_state == AfterScriptStartTag); m_state = Initial; if (token.type() != HTMLTokenTypes::Character) { ASSERT(token.type() == HTMLTokenTypes::EndTag || token.type() == HTMLTokenTypes::EndOfFile); return false; } TextResourceDecoder* decoder = m_parser->document()->decoder(); if (isContainedInRequest(fullyDecodeString(m_cachedSnippet, decoder))) { int start = 0; int end = token.endIndex() - token.startIndex(); String snippet = snippetForJavaScript(snippetForRange(token, start, end)); if (isContainedInRequest(fullyDecodeString(snippet, decoder))) { token.eraseCharacters(); token.appendToCharacter(' '); // Technically, character tokens can't be empty. return true; } } return false; }
bool XSSAuditor::filterObjectToken(HTMLToken& token) { ASSERT(m_state == Initial); ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, objectTag)); bool didBlockScript = false; didBlockScript |= eraseAttributeIfInjected(token, dataAttr, blankURL().string(), SrcLikeAttribute); didBlockScript |= eraseAttributeIfInjected(token, typeAttr); didBlockScript |= eraseAttributeIfInjected(token, classidAttr); return didBlockScript; }
// This is a regression test for crbug.com/619141 TEST(HTMLTokenizerTest, ZeroOffsetAttributeNameRange) { HTMLParserOptions options; std::unique_ptr<HTMLTokenizer> tokenizer = HTMLTokenizer::create(options); HTMLToken token; SegmentedString input("<script "); EXPECT_FALSE(tokenizer->nextToken(input, token)); EXPECT_EQ(HTMLToken::StartTag, token.type()); SegmentedString input2("type='javascript'"); // Below should not fail ASSERT EXPECT_FALSE(tokenizer->nextToken(input2, token)); }
void XSSAuditor::filterToken(HTMLToken& token) { if (m_state == Uninitialized) init(); ASSERT(m_state == Initialized); if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled) return; bool didBlockScript = false; if (token.type() == HTMLTokenTypes::StartTag) didBlockScript = filterStartToken(token); else if (m_scriptTagNestingLevel) { if (token.type() == HTMLTokenTypes::Character) didBlockScript = filterCharacterToken(token); else if (token.type() == HTMLTokenTypes::EndTag) filterEndToken(token); } if (didBlockScript) { // FIXME: Consider using a more helpful console message. DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n")); m_parser->document()->addConsoleMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage); bool didBlockEntirePage = (m_xssProtection == XSSProtectionBlockEnabled); if (didBlockEntirePage) m_parser->document()->frame()->loader()->stopAllLoaders(); if (!m_notifiedClient) { m_parser->document()->frame()->loader()->client()->didDetectXSS(m_parser->document()->url(), didBlockEntirePage); m_notifiedClient = true; } if (didBlockEntirePage) m_parser->document()->frame()->navigationScheduler()->scheduleLocationChange(m_parser->document()->securityOrigin(), blankURL(), String()); } }
void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLToken& rawToken) { AtomicHTMLToken token(rawToken); // We clear the rawToken in case constructTreeFromAtomicToken // synchronously re-enters the parser. We don't clear the token immedately // for Character tokens because the AtomicHTMLToken avoids copying the // characters by keeping a pointer to the underlying buffer in the // HTMLToken. Fortunately, Character tokens can't cause us to re-enter // the parser. // // FIXME: Stop clearing the rawToken once we start running the parser off // the main thread or once we stop allowing synchronous JavaScript // execution from parseAttribute. if (rawToken.type() != HTMLToken::Character) rawToken.clear(); m_treeBuilder->constructTree(&token); if (!rawToken.isUninitialized()) { ASSERT(rawToken.type() == HTMLToken::Character); rawToken.clear(); } }
bool XSSAuditor::filterParamToken(HTMLToken& token) { ASSERT(token.type() == HTMLTokenTypes::StartTag); ASSERT(hasName(token, paramTag)); size_t indexOfNameAttribute; if (!findAttributeWithName(token, nameAttr, indexOfNameAttribute)) return false; const HTMLToken::Attribute& nameAttribute = token.attributes().at(indexOfNameAttribute); String name = String(nameAttribute.m_value.data(), nameAttribute.m_value.size()); if (!HTMLParamElement::isURLParameter(name)) return false; return eraseAttributeIfInjected(token, valueAttr, blankURL().string(), SrcLikeAttribute); }
void TokenPreloadScanner::scan(const HTMLToken& token, Vector<std::unique_ptr<PreloadRequest>>& requests, Document& document) { switch (token.type()) { case HTMLToken::Character: if (!m_inStyle) return; m_cssScanner.scan(token.characters(), requests); return; case HTMLToken::EndTag: { TagId tagId = tagIdFor(token.name()); #if ENABLE(TEMPLATE_ELEMENT) if (tagId == TagId::Template) { if (m_templateCount) --m_templateCount; return; } #endif if (tagId == TagId::Style) { if (m_inStyle) m_cssScanner.reset(); m_inStyle = false; } else if (tagId == TagId::Picture && !m_pictureSourceState.isEmpty()) m_pictureSourceState.removeLast(); return; } case HTMLToken::StartTag: { #if ENABLE(TEMPLATE_ELEMENT) if (m_templateCount) return; #endif TagId tagId = tagIdFor(token.name()); #if ENABLE(TEMPLATE_ELEMENT) if (tagId == TagId::Template) { ++m_templateCount; return; } #endif if (tagId == TagId::Style) { m_inStyle = true; return; } if (tagId == TagId::Base) { // The first <base> element is the one that wins. if (!m_predictedBaseElementURL.isEmpty()) return; updatePredictedBaseURL(token); return; } if (tagId == TagId::Picture) { m_pictureSourceState.append(false); return; } StartTagScanner scanner(tagId, m_deviceScaleFactor); scanner.processAttributes(token.attributes(), document, m_pictureSourceState); if (auto request = scanner.createPreloadRequest(m_predictedBaseElementURL)) requests.append(WTFMove(request)); return; } default: return; } }