String HTMLSourceTracker::sourceForToken(const HTMLToken& token) { if (!m_cachedSourceForToken.isEmpty()) return m_cachedSourceForToken; size_t length; if (token.type() == HTMLToken::EndOfFile) { // Consume the remainder of the input, omitting the null character we use to mark the end of the file. length = m_previousSource.length() + m_currentSource.length() - 1; } else { ASSERT(!token.startIndex()); length = static_cast<size_t>(token.endIndex() - token.startIndex()); } StringBuilder source; source.reserveCapacity(length); size_t i = 0; for ( ; i < length && !m_previousSource.isEmpty(); ++i) { source.append(m_previousSource.currentChar()); m_previousSource.advance(); } for ( ; i < length; ++i) { ASSERT(!m_currentSource.isEmpty()); source.append(m_currentSource.currentChar()); m_currentSource.advance(); } m_cachedSourceForToken = source.toString(); return m_cachedSourceForToken; }
void HTMLViewSourceDocument::processTagToken(const String& source, HTMLToken& token) { m_current = addSpanWithClassName("webkit-html-tag"); AtomicString tagName(token.name().data(), token.name().size()); unsigned index = 0; HTMLToken::AttributeList::const_iterator iter = token.attributes().begin(); while (index < source.length()) { if (iter == token.attributes().end()) { // We want to show the remaining characters in the token. index = addRange(source, index, source.length(), ""); ASSERT(index == source.length()); break; } AtomicString name(iter->m_name.data(), iter->m_name.size()); String value(iter->m_value.data(), iter->m_value.size()); index = addRange(source, index, iter->m_nameRange.m_start - token.startIndex(), ""); index = addRange(source, index, iter->m_nameRange.m_end - token.startIndex(), "webkit-html-attribute-name"); if (tagName == baseTag && name == hrefAttr) m_current = addBase(value); index = addRange(source, index, iter->m_valueRange.m_start - token.startIndex(), ""); bool isLink = name == srcAttr || name == hrefAttr; index = addRange(source, index, iter->m_valueRange.m_end - token.startIndex(), "webkit-html-attribute-value", isLink, tagName == aTag); ++iter; } m_current = m_td; }
String HTMLSourceTracker::sourceForToken(const HTMLToken& token) { if (token.type() == HTMLToken::EndOfFile) return String(); // Hides the null character we use to mark the end of file. if (!m_cachedSourceForToken.isEmpty()) return m_cachedSourceForToken; ASSERT(!token.startIndex()); size_t length = static_cast<size_t>(token.endIndex() - token.startIndex()); StringBuilder source; source.reserveCapacity(length); size_t i = 0; for ( ; i < length && !m_previousSource.isEmpty(); ++i) { source.append(m_previousSource.currentChar()); m_previousSource.advance(); } for ( ; i < length; ++i) { ASSERT(!m_currentSource.isEmpty()); source.append(m_currentSource.currentChar()); m_currentSource.advance(); } m_cachedSourceForToken = source.toString(); return m_cachedSourceForToken; }
void HTMLViewSourceDocument::processTagToken(const String& source, HTMLToken& token, SourceAnnotation annotation) { maybeAddSpanForAnnotation(annotation); m_current = addSpanWithClassName("html-tag"); AtomicString tagName(token.name()); unsigned index = 0; HTMLToken::AttributeList::const_iterator iter = token.attributes().begin(); while (index < source.length()) { if (iter == token.attributes().end()) { // We want to show the remaining characters in the token. index = addRange(source, index, source.length(), emptyAtom); ASSERT(index == source.length()); break; } AtomicString name(iter->name); AtomicString value(StringImpl::create8BitIfPossible(iter->value)); index = addRange(source, index, iter->nameRange.start - token.startIndex(), emptyAtom); index = addRange(source, index, iter->nameRange.end - token.startIndex(), "html-attribute-name"); if (tagName == baseTag && name == hrefAttr) addBase(value); index = addRange(source, index, iter->valueRange.start - token.startIndex(), emptyAtom); bool isLink = name == srcAttr || name == hrefAttr; index = addRange(source, index, iter->valueRange.end - token.startIndex(), "html-attribute-value", isLink, tagName == aTag, value); ++iter; } m_current = m_td; }
String XSSAuditor::decodedSnippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute, AttributeKind treatment) { const size_t kMaximumSnippetLength = 100; // The range doesn't inlcude the character which terminates the value. So, // for an input of |name="value"|, the snippet is |name="value|. For an // unquoted input of |name=value |, the snippet is |name=value|. // FIXME: We should grab one character before the name also. int start = attribute.m_nameRange.m_start - token.startIndex(); int end = attribute.m_valueRange.m_end - token.startIndex(); String decodedSnippet = fullyDecodeString(snippetForRange(token, start, end), m_parser->document()->decoder()); decodedSnippet.truncate(kMaximumSnippetLength); if (treatment == SrcLikeAttribute) { int slashCount; size_t currentLength; // Characters following the first ?, #, or third slash may come from // the page itself and can be merely ignored by an attacker's server // when a remote script or script-like resource is requested. for (slashCount = 0, currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) { if (decodedSnippet[currentLength] == '?' || decodedSnippet[currentLength] == '#' || ((decodedSnippet[currentLength] == '/' || decodedSnippet[currentLength] == '\\') && ++slashCount > 2)) { decodedSnippet.truncate(currentLength); break; } } } return decodedSnippet; }
String XSSFilter::snippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute) { // FIXME: We should grab one character before the name also. int start = attribute.m_nameRange.m_start - token.startIndex(); // FIXME: We probably want to grab only the first few characters of the attribute value. int end = attribute.m_valueRange.m_end - token.startIndex(); return snippetForRange(token, start, end); }
String XSSAuditor::decodedSnippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute, AttributeKind treatment) { // The range doesn't inlcude the character which terminates the value. So, // for an input of |name="value"|, the snippet is |name="value|. For an // unquoted input of |name=value |, the snippet is |name=value|. // FIXME: We should grab one character before the name also. int start = attribute.m_nameRange.m_start - token.startIndex(); int end = attribute.m_valueRange.m_end - token.startIndex(); String decodedSnippet = fullyDecodeString(m_parser->sourceForToken(token).substring(start, end - start), m_parser->document()->decoder()); decodedSnippet.truncate(kMaximumFragmentLengthTarget); if (treatment == SrcLikeAttribute) { int slashCount = 0; bool commaSeen = false; // In HTTP URLs, characters following the first ?, #, or third slash may come from // the page itself and can be merely ignored by an attacker's server when a remote // script or script-like resource is requested. In DATA URLS, the payload starts at // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters // following this may come from the page itself and may be ignored when the script is // executed. For simplicity, we don't differentiate based on URL scheme, and stop at // the first # or ?, the third slash, or the first slash or < once a comma is seen. for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) { UChar currentChar = decodedSnippet[currentLength]; if (currentChar == '?' || currentChar == '#' || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2)) || (currentChar == '<' && commaSeen)) { decodedSnippet.truncate(currentLength); break; } if (currentChar == ',') commaSeen = true; } } else if (treatment == ScriptLikeAttribute) { // Beware of trailing characters which came from the page itself, not the // injected vector. Excluding the terminating character covers common cases // where the page immediately ends the attribute, but doesn't cover more // complex cases where there is other page data following the injection. // Generally, these won't parse as javascript, so the injected vector // typically excludes them from consideration via a single-line comment or // by enclosing them in a string literal terminated later by the page's own // closing punctuation. Since the snippet has not been parsed, the vector // may also try to introduce these via entities. As a result, we'd like to // stop before the first "//", the first <!--, the first entity, or the first // quote not immediately following the first equals sign (taking whitespace // into consideration). To keep things simpler, we don't try to distinguish // between entity-introducing amperands vs. other uses, nor do we bother to // check for a second slash for a comment, nor do we bother to check for // !-- following a less-than sign. We stop instead on any ampersand // slash, or less-than sign. size_t position = 0; if ((position = decodedSnippet.find("=")) != notFound && (position = decodedSnippet.find(isNotHTMLSpace, position + 1)) != notFound && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != notFound) { decodedSnippet.truncate(position); } } return decodedSnippet; }
bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token) { ASSERT(m_state == AfterScriptStartTag); m_state = Initial; if (token.type() != HTMLToken::Character) { ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile); return false; } int start = 0; // FIXME: We probably want to grab only the first few characters of the // contents of the script element. int end = token.endIndex() - token.startIndex(); if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) { token.eraseCharacters(); token.appendToCharacter(' '); // Technically, character tokens can't be empty. return true; } return false; }
bool XSSAuditor::filterTokenAfterScriptStartTag(HTMLToken& token) { ASSERT(m_state == AfterScriptStartTag); m_state = Initial; if (token.type() != HTMLTokenTypes::Character) { ASSERT(token.type() == HTMLTokenTypes::EndTag || token.type() == HTMLTokenTypes::EndOfFile); return false; } TextResourceDecoder* decoder = m_parser->document()->decoder(); if (isContainedInRequest(fullyDecodeString(m_cachedSnippet, decoder))) { int start = 0; int end = token.endIndex() - token.startIndex(); String snippet = snippetForJavaScript(snippetForRange(token, start, end)); if (isContainedInRequest(fullyDecodeString(snippet, decoder))) { token.eraseCharacters(); token.appendToCharacter(' '); // Technically, character tokens can't be empty. return true; } } return false; }