// http://www.w3.org/TR/css3-syntax/#consume-a-name
CSSParserString CSSTokenizer::consumeName()
{
    // Names without escapes get handled without allocations
    for (unsigned size = 0; ; ++size) {
        UChar cc = m_input.peekWithoutReplacement(size);
        if (cc == '\0' || cc == '\\')
            break;
        if (!isNameChar(cc)) {
            unsigned startOffset = m_input.offset();
            m_input.advance(size);
            return m_input.rangeAsCSSParserString(startOffset, size);
        }
    }

    StringBuilder result;
    while (true) {
        UChar cc = consume();
        if (isNameChar(cc)) {
            result.append(cc);
            continue;
        }
        if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) {
            result.append(consumeEscape());
            continue;
        }
        reconsume(cc);
        return registerString(result.toString());
    }
}
Example #2
0
	TokenPtr Tokenizer::consumeString(char32_t end_codepoint)
	{
		std::string res;
		advance();
		while(la0_ != end_codepoint) {
			if(la0_ == LF) {
				return std::make_shared<Token>(TokenId::BAD_STRING);
			} else if(eof(la0_)) {
				return std::make_shared<StringToken>(res);
			} else if(la0_ == '\\') {
				if(eof(next())) {
					// does nothing.
				} else if(next() == LF) {
					advance();
					continue;
				} else {
					res += consumeEscape();
				}
			}
			res += utils::codepoint_to_utf8(la0_);
			advance();
		}
		advance();
		return std::make_shared<StringToken>(res);
	}
Example #3
0
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token
MediaQueryToken MediaQueryTokenizer::consumeStringTokenUntil(UChar endingCodePoint)
{
    StringBuilder output;
    while (true) {
        UChar cc = consume();
        if (cc == endingCodePoint || cc == kEndOfFileMarker) {
            // The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF
            if (cc == kEndOfFileMarker)
                reconsume(cc);
            return MediaQueryToken(StringToken, output.toString());
        }
        if (isNewLine(cc)) {
            reconsume(cc);
            return MediaQueryToken(BadStringToken);
        }
        if (cc == '\\') {
            if (m_input.nextInputChar() == kEndOfFileMarker)
                continue;
            if (isNewLine(m_input.nextInputChar()))
                consume();
            else
                output.append(consumeEscape());
        } else {
            output.append(cc);
        }
    }
}
// http://dev.w3.org/csswg/css-syntax/#consume-the-remnants-of-a-bad-url
void CSSTokenizer::consumeBadUrlRemnants()
{
    while (true) {
        UChar cc = consume();
        if (cc == ')' || cc == kEndOfFileMarker)
            return;
        if (twoCharsAreValidEscape(cc, m_input.nextInputChar()))
            consumeEscape();
    }
}
// http://dev.w3.org/csswg/css-syntax/#consume-url-token
CSSParserToken CSSTokenizer::consumeUrlToken()
{
    consumeUntilNonWhitespace();

    // URL tokens without escapes get handled without allocations
    for (unsigned size = 0; ; size++) {
        UChar cc = m_input.peekWithoutReplacement(size);
        if (cc == ')') {
            unsigned startOffset = m_input.offset();
            m_input.advance(size + 1);
            return CSSParserToken(UrlToken, m_input.rangeAsCSSParserString(startOffset, size));
        }
        if (cc <= ' ' || cc == '\\' || cc == '"' || cc == '\'' || cc == '(' || cc == '\x7f')
            break;
    }

    StringBuilder result;
    while (true) {
        UChar cc = consume();
        if (cc == ')' || cc == kEndOfFileMarker)
            return CSSParserToken(UrlToken, registerString(result.toString()));

        if (isHTMLSpace(cc)) {
            consumeUntilNonWhitespace();
            if (consumeIfNext(')') || m_input.nextInputChar() == kEndOfFileMarker)
                return CSSParserToken(UrlToken, registerString(result.toString()));
            break;
        }

        if (cc == '"' || cc == '\'' || cc == '(' || isNonPrintableCodePoint(cc))
            break;

        if (cc == '\\') {
            if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) {
                result.append(consumeEscape());
                continue;
            }
            break;
        }

        result.append(cc);
    }

    consumeBadUrlRemnants();
    return CSSParserToken(BadUrlToken);
}
Example #6
0
// http://www.w3.org/TR/css3-syntax/#consume-a-name
String MediaQueryTokenizer::consumeName()
{
    // FIXME: Is this as efficient as it can be?
    // The possibility of escape chars mandates a copy AFAICT.
    StringBuilder result;
    while (true) {
        UChar cc = consume();
        if (isNameChar(cc)) {
            result.append(cc);
            continue;
        }
        if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) {
            result.append(consumeEscape());
            continue;
        }
        reconsume(cc);
        return result.toString();
    }
}
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token
CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint)
{
    // Strings without escapes get handled without allocations
    for (unsigned size = 0; ; size++) {
        UChar cc = m_input.peekWithoutReplacement(size);
        if (cc == endingCodePoint) {
            unsigned startOffset = m_input.offset();
            m_input.advance(size + 1);
            return CSSParserToken(StringToken, m_input.rangeAsCSSParserString(startOffset, size));
        }
        if (isNewLine(cc)) {
            m_input.advance(size);
            return CSSParserToken(BadStringToken);
        }
        if (cc == '\0' || cc == '\\')
            break;
    }

    StringBuilder output;
    while (true) {
        UChar cc = consume();
        if (cc == endingCodePoint || cc == kEndOfFileMarker)
            return CSSParserToken(StringToken, registerString(output.toString()));
        if (isNewLine(cc)) {
            reconsume(cc);
            return CSSParserToken(BadStringToken);
        }
        if (cc == '\\') {
            if (m_input.nextInputChar() == kEndOfFileMarker)
                continue;
            if (isNewLine(m_input.nextInputChar()))
                consumeSingleWhitespaceIfNext(); // This handles \r\n for us
            else
                output.append(consumeEscape());
        } else {
            output.append(cc);
        }
    }
}
Example #8
0
// http://dev.w3.org/csswg/css-syntax/#consume-url-token
CSSParserToken CSSTokenizer::consumeUrlToken()
{
    consumeUntilNonWhitespace();
    StringBuilder result;
    while (true) {
        UChar cc = consume();
        if (cc == ')' || cc == kEndOfFileMarker) {
            // The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF
            if (cc == kEndOfFileMarker)
                reconsume(cc);
            return CSSParserToken(UrlToken, result.toString());
        }

        if (isHTMLSpace(cc)) {
            consumeUntilNonWhitespace();
            if (consumeIfNext(')') || m_input.nextInputChar() == kEndOfFileMarker)
                return CSSParserToken(UrlToken, result.toString());
            break;
        }

        if (cc == '"' || cc == '\'' || cc == '(' || isNonPrintableCodePoint(cc))
            break;

        if (cc == '\\') {
            if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) {
                result.append(consumeEscape());
                continue;
            }
            break;
        }

        result.append(cc);
    }

    consumeBadUrlRemnants();
    return CSSParserToken(BadUrlToken);
}
Example #9
0
void Parser::parseAlternative(JumpList& failures)
{
    PatternCharacterSequence sequence(m_generator, failures);

    while (1) {
        switch (peek()) {
        case EndOfPattern:
        case '|':
        case ')':
            sequence.flush();
            return;

        case '*':
        case '+':
        case '?':
        case '{': {
            Quantifier q = consumeQuantifier();

            if (q.type == Quantifier::None) {
                sequence.append(consume());
                continue;
            }

            if (q.type == Quantifier::Error)
                return;

            if (!sequence.size()) {
                setError(QuantifierWithoutAtom);
                return;
            }

            sequence.flush(q);
            continue;
        }

        case '^':
            consume();

            sequence.flush();
            m_generator.generateAssertionBOL(failures);
            continue;

        case '$':
            consume();

            sequence.flush();
            m_generator.generateAssertionEOL(failures);
            continue;

        case '.':
            consume();

            sequence.flush();
            if (!parseCharacterClassQuantifier(failures, CharacterClass::newline(), true))
                return;
            continue;

        case '[':
            consume();

            sequence.flush();
            if (!parseCharacterClass(failures))
                return;
            continue;

        case '(':
            consume();

            sequence.flush();
            if (!parseParentheses(failures))
                return;
            continue;

        case '\\': {
            consume();

            Escape escape = consumeEscape(false);
            if (escape.type() == Escape::PatternCharacter) {
                sequence.append(PatternCharacterEscape::cast(escape).character());
                continue;
            }

            sequence.flush();
            if (!parseNonCharacterEscape(failures, escape))
                return;
            continue;
        }

        default:
            sequence.append(consume());
            continue;
        }
    }
}
Example #10
0
bool Parser::parseCharacterClass(JumpList& failures)
{
    bool invert = false;
    if (peek() == '^') {
        consume();
        invert = true;
    }

    CharacterClassConstructor constructor(m_ignoreCase);

    int ch;
    while ((ch = peek()) != ']') {
        switch (ch) {
        case EndOfPattern:
            setError(CharacterClassUnmatched);
            return false;

        case '\\': {
            consume();
            Escape escape = consumeEscape(true);

            switch (escape.type()) {
                case Escape::PatternCharacter: {
                    int character = PatternCharacterEscape::cast(escape).character();
                    if (character == '-')
                        constructor.flushBeforeEscapedHyphen();
                    constructor.put(character);
                    break;
                }
                case Escape::CharacterClass: {
                    const CharacterClassEscape& characterClassEscape = CharacterClassEscape::cast(escape);
                    ASSERT(!characterClassEscape.invert());
                    constructor.append(characterClassEscape.characterClass());
                    break;
                }
                case Escape::Error:
                    return false;
                case Escape::Backreference:
                case Escape::WordBoundaryAssertion: {
                    ASSERT_NOT_REACHED();
                    break;
                }
            }
            break;
        }

        default:
            consume();
            constructor.put(ch);
        }
    }
    consume();

    // lazily catch reversed ranges ([z-a])in character classes
    if (constructor.isUpsideDown()) {
        setError(CharacterClassOutOfOrder);
        return false;
    }

    constructor.flush();
    CharacterClass charClass = constructor.charClass();
    return parseCharacterClassQuantifier(failures, charClass, invert);
}