String HTMLSourceTracker::sourceForToken(const HTMLToken& token)
{
    if (!m_cachedSourceForToken.isEmpty())
        return m_cachedSourceForToken;

    size_t length;
    if (token.type() == HTMLToken::EndOfFile) {
        // Consume the remainder of the input, omitting the null character we use to mark the end of the file.
        length = m_previousSource.length() + m_currentSource.length() - 1;
    } else {
        ASSERT(!token.startIndex());
        length = static_cast<size_t>(token.endIndex() - token.startIndex());
    }

    StringBuilder source;
    source.reserveCapacity(length);

    size_t i = 0;
    for ( ; i < length && !m_previousSource.isEmpty(); ++i) {
        source.append(m_previousSource.currentChar());
        m_previousSource.advance();
    }
    for ( ; i < length; ++i) {
        ASSERT(!m_currentSource.isEmpty());
        source.append(m_currentSource.currentChar());
        m_currentSource.advance();
    }

    m_cachedSourceForToken = source.toString();
    return m_cachedSourceForToken;
}
Exemple #2
0
void HTMLViewSourceDocument::processTagToken(const String& source, HTMLToken& token)
{
    m_current = addSpanWithClassName("webkit-html-tag");

    AtomicString tagName(token.name().data(), token.name().size());

    unsigned index = 0;
    HTMLToken::AttributeList::const_iterator iter = token.attributes().begin();
    while (index < source.length()) {
        if (iter == token.attributes().end()) {
            // We want to show the remaining characters in the token.
            index = addRange(source, index, source.length(), "");
            ASSERT(index == source.length());
            break;
        }

        AtomicString name(iter->m_name.data(), iter->m_name.size());
        String value(iter->m_value.data(), iter->m_value.size());

        index = addRange(source, index, iter->m_nameRange.m_start - token.startIndex(), "");
        index = addRange(source, index, iter->m_nameRange.m_end - token.startIndex(), "webkit-html-attribute-name");

        if (tagName == baseTag && name == hrefAttr)
            m_current = addBase(value);

        index = addRange(source, index, iter->m_valueRange.m_start - token.startIndex(), "");

        bool isLink = name == srcAttr || name == hrefAttr;
        index = addRange(source, index, iter->m_valueRange.m_end - token.startIndex(), "webkit-html-attribute-value", isLink, tagName == aTag);

        ++iter;
    }
    m_current = m_td;
}
String HTMLSourceTracker::sourceForToken(const HTMLToken& token)
{
    if (token.type() == HTMLToken::EndOfFile)
        return String(); // Hides the null character we use to mark the end of file.

    if (!m_cachedSourceForToken.isEmpty())
        return m_cachedSourceForToken;

    ASSERT(!token.startIndex());
    size_t length = static_cast<size_t>(token.endIndex() - token.startIndex());

    StringBuilder source;
    source.reserveCapacity(length);

    size_t i = 0;
    for ( ; i < length && !m_previousSource.isEmpty(); ++i) {
        source.append(m_previousSource.currentChar());
        m_previousSource.advance();
    }
    for ( ; i < length; ++i) {
        ASSERT(!m_currentSource.isEmpty());
        source.append(m_currentSource.currentChar());
        m_currentSource.advance();
    }

    m_cachedSourceForToken = source.toString();
    return m_cachedSourceForToken;
}
void HTMLViewSourceDocument::processTagToken(const String& source, HTMLToken& token, SourceAnnotation annotation)
{
    maybeAddSpanForAnnotation(annotation);
    m_current = addSpanWithClassName("html-tag");

    AtomicString tagName(token.name());

    unsigned index = 0;
    HTMLToken::AttributeList::const_iterator iter = token.attributes().begin();
    while (index < source.length()) {
        if (iter == token.attributes().end()) {
            // We want to show the remaining characters in the token.
            index = addRange(source, index, source.length(), emptyAtom);
            ASSERT(index == source.length());
            break;
        }

        AtomicString name(iter->name);
        AtomicString value(StringImpl::create8BitIfPossible(iter->value));

        index = addRange(source, index, iter->nameRange.start - token.startIndex(), emptyAtom);
        index = addRange(source, index, iter->nameRange.end - token.startIndex(), "html-attribute-name");

        if (tagName == baseTag && name == hrefAttr)
            addBase(value);

        index = addRange(source, index, iter->valueRange.start - token.startIndex(), emptyAtom);

        bool isLink = name == srcAttr || name == hrefAttr;
        index = addRange(source, index, iter->valueRange.end - token.startIndex(), "html-attribute-value", isLink, tagName == aTag, value);

        ++iter;
    }
    m_current = m_td;
}
Exemple #5
0
String XSSAuditor::decodedSnippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute, AttributeKind treatment)
{
    const size_t kMaximumSnippetLength = 100;

    // The range doesn't inlcude the character which terminates the value. So,
    // for an input of |name="value"|, the snippet is |name="value|. For an
    // unquoted input of |name=value |, the snippet is |name=value|.
    // FIXME: We should grab one character before the name also.
    int start = attribute.m_nameRange.m_start - token.startIndex();
    int end = attribute.m_valueRange.m_end - token.startIndex();
    String decodedSnippet = fullyDecodeString(snippetForRange(token, start, end), m_parser->document()->decoder());
    decodedSnippet.truncate(kMaximumSnippetLength);
    if (treatment == SrcLikeAttribute) {
        int slashCount;
        size_t currentLength;
        // Characters following the first ?, #, or third slash may come from 
        // the page itself and can be merely ignored by an attacker's server
        // when a remote script or script-like resource is requested.
        for (slashCount = 0, currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) {
            if (decodedSnippet[currentLength] == '?' || decodedSnippet[currentLength] == '#'
                || ((decodedSnippet[currentLength] == '/' || decodedSnippet[currentLength] == '\\') && ++slashCount > 2)) {
                decodedSnippet.truncate(currentLength);
                break;
            }
        }
    }
    return decodedSnippet;
}
Exemple #6
0
String XSSFilter::snippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute)
{
    // FIXME: We should grab one character before the name also.
    int start = attribute.m_nameRange.m_start - token.startIndex();
    // FIXME: We probably want to grab only the first few characters of the attribute value.
    int end = attribute.m_valueRange.m_end - token.startIndex();
    return snippetForRange(token, start, end);
}
String XSSAuditor::decodedSnippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute, AttributeKind treatment)
{
    // The range doesn't inlcude the character which terminates the value. So,
    // for an input of |name="value"|, the snippet is |name="value|. For an
    // unquoted input of |name=value |, the snippet is |name=value|.
    // FIXME: We should grab one character before the name also.
    int start = attribute.m_nameRange.m_start - token.startIndex();
    int end = attribute.m_valueRange.m_end - token.startIndex();
    String decodedSnippet = fullyDecodeString(m_parser->sourceForToken(token).substring(start, end - start), m_parser->document()->decoder());
    decodedSnippet.truncate(kMaximumFragmentLengthTarget);
    if (treatment == SrcLikeAttribute) {
        int slashCount = 0;
        bool commaSeen = false;
        // In HTTP URLs, characters following the first ?, #, or third slash may come from 
        // the page itself and can be merely ignored by an attacker's server when a remote
        // script or script-like resource is requested. In DATA URLS, the payload starts at
        // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters
        // following this may come from the page itself and may be ignored when the script is
        // executed. For simplicity, we don't differentiate based on URL scheme, and stop at
        // the first # or ?, the third slash, or the first slash or < once a comma is seen.
        for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) {
            UChar currentChar = decodedSnippet[currentLength];
            if (currentChar == '?'
                || currentChar == '#'
                || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2))
                || (currentChar == '<' && commaSeen)) {
                decodedSnippet.truncate(currentLength);
                break;
            }
            if (currentChar == ',')
                commaSeen = true;
        }
    } else if (treatment == ScriptLikeAttribute) {
        // Beware of trailing characters which came from the page itself, not the 
        // injected vector. Excluding the terminating character covers common cases
        // where the page immediately ends the attribute, but doesn't cover more
        // complex cases where there is other page data following the injection. 
        // Generally, these won't parse as javascript, so the injected vector
        // typically excludes them from consideration via a single-line comment or
        // by enclosing them in a string literal terminated later by the page's own
        // closing punctuation. Since the snippet has not been parsed, the vector
        // may also try to introduce these via entities. As a result, we'd like to
        // stop before the first "//", the first <!--, the first entity, or the first
        // quote not immediately following the first equals sign (taking whitespace
        // into consideration). To keep things simpler, we don't try to distinguish
        // between entity-introducing amperands vs. other uses, nor do we bother to
        // check for a second slash for a comment, nor do we bother to check for
        // !-- following a less-than sign. We stop instead on any ampersand
        // slash, or less-than sign.
        size_t position = 0;
        if ((position = decodedSnippet.find("=")) != notFound
            && (position = decodedSnippet.find(isNotHTMLSpace, position + 1)) != notFound
            && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != notFound) {
            decodedSnippet.truncate(position);
        }
    }
    return decodedSnippet;
}
Exemple #8
0
bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token)
{
    ASSERT(m_state == AfterScriptStartTag);
    m_state = Initial;

    if (token.type() != HTMLToken::Character) {
        ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile);
        return false;
    }

    int start = 0;
    // FIXME: We probably want to grab only the first few characters of the
    //        contents of the script element.
    int end = token.endIndex() - token.startIndex();
    if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) {
        token.eraseCharacters();
        token.appendToCharacter(' '); // Technically, character tokens can't be empty.
        return true;
    }
    return false;
}
Exemple #9
0
bool XSSAuditor::filterTokenAfterScriptStartTag(HTMLToken& token)
{
    ASSERT(m_state == AfterScriptStartTag);
    m_state = Initial;

    if (token.type() != HTMLTokenTypes::Character) {
        ASSERT(token.type() == HTMLTokenTypes::EndTag || token.type() == HTMLTokenTypes::EndOfFile);
        return false;
    }

    TextResourceDecoder* decoder = m_parser->document()->decoder();
    if (isContainedInRequest(fullyDecodeString(m_cachedSnippet, decoder))) {
        int start = 0;
        int end = token.endIndex() - token.startIndex();
        String snippet = snippetForJavaScript(snippetForRange(token, start, end));
        if (isContainedInRequest(fullyDecodeString(snippet, decoder))) {
            token.eraseCharacters();
            token.appendToCharacter(' '); // Technically, character tokens can't be empty.
            return true;
        }
    }
    return false;
}