CSSParserToken CSSTokenizer::dollarSign(UChar cc) { ASSERT(cc == '$'); if (consumeIfNext('=')) return CSSParserToken(SuffixMatchToken); return CSSParserToken(DelimiterToken, '$'); }
CSSParserToken CSSTokenizer::consumeUnicodeRange() { ASSERT(isASCIIHexDigit(m_input.nextInputChar()) || m_input.nextInputChar() == '?'); int lengthRemaining = 6; UChar32 start = 0; while (lengthRemaining && isASCIIHexDigit(m_input.nextInputChar())) { start = start * 16 + toASCIIHexValue(consume()); --lengthRemaining; } if (lengthRemaining && consumeIfNext('?')) { UChar32 end = start; do { start *= 16; end = end * 16 + 0xF; --lengthRemaining; } while (lengthRemaining && consumeIfNext('?')); return CSSParserToken(UnicodeRangeToken, start, end); } if (m_input.nextInputChar() == '-' && isASCIIHexDigit(m_input.peek(1))) { consume(); lengthRemaining = 6; UChar32 end = 0; do { end = end * 16 + toASCIIHexValue(consume()); --lengthRemaining; } while (lengthRemaining && isASCIIHexDigit(m_input.nextInputChar())); return CSSParserToken(UnicodeRangeToken, start, end); } return CSSParserToken(UnicodeRangeToken, start, start); }
CSSParserToken CSSTokenizer::tilde(UChar cc) { ASSERT(cc == '~'); if (consumeIfNext('=')) return CSSParserToken(IncludeMatchToken); return CSSParserToken(DelimiterToken, '~'); }
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint) { StringBuilder output; while (true) { UChar cc = consume(); if (cc == endingCodePoint || cc == kEndOfFileMarker) { // The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF if (cc == kEndOfFileMarker) reconsume(cc); return CSSParserToken(StringToken, output.toString()); } if (isNewLine(cc)) { reconsume(cc); return CSSParserToken(BadStringToken); } if (cc == '\\') { if (m_input.nextInputChar() == kEndOfFileMarker) continue; if (isNewLine(m_input.nextInputChar())) consumeSingleWhitespaceIfNext(); // This handles \r\n for us else output.append(consumeEscape()); } else { output.append(cc); } } }
CSSParserToken CSSTokenizer::commercialAt(UChar cc) { ASSERT(cc == '@'); if (nextCharsAreIdentifier()) return CSSParserToken(AtKeywordToken, consumeName()); return CSSParserToken(DelimiterToken, '@'); }
CSSParserToken CSSTokenizer::asterisk(UChar cc) { ASSERT(cc == '*'); if (consumeIfNext('=')) return CSSParserToken(SubstringMatchToken); return CSSParserToken(DelimiterToken, '*'); }
CSSParserToken CSSTokenizer::circumflexAccent(UChar cc) { ASSERT(cc == '^'); if (consumeIfNext('=')) return CSSParserToken(PrefixMatchToken); return CSSParserToken(DelimiterToken, '^'); }
CSSParserToken CSSTokenizer::solidus(UChar cc) { if (consumeIfNext('*')) { // We're intentionally deviating from the spec here, by creating tokens for CSS comments. return consumeUntilCommentEndFound()? CSSParserToken(CommentToken): CSSParserToken(EOFToken); } return CSSParserToken(DelimiterToken, cc); }
CSSParserToken CSSTokenizer::lessThan(UChar cc) { ASSERT(cc == '<'); if (m_input.peek(0) == '!' && m_input.peek(1) == '-' && m_input.peek(2) == '-') { consume(3); return CSSParserToken(CDOToken); } return CSSParserToken(DelimiterToken, '<'); }
CSSParserToken CSSTokenizer::verticalLine(UChar cc) { ASSERT(cc == '|'); if (consumeIfNext('=')) return CSSParserToken(DashMatchToken); if (consumeIfNext('|')) return CSSParserToken(ColumnToken); return CSSParserToken(DelimiterToken, '|'); }
CSSParserToken CSSTokenizer::hash(UChar cc) { UChar nextChar = m_input.nextInputChar(); if (isNameChar(nextChar) || twoCharsAreValidEscape(nextChar, m_input.peek(1))) { HashTokenType type = nextCharsAreIdentifier() ? HashTokenId : HashTokenUnrestricted; return CSSParserToken(type, consumeName()); } return CSSParserToken(DelimiterToken, cc); }
CSSParserToken CSSTokenizer::solidus(UChar cc) { if (consumeIfNext('*')) { // These get ignored, but we need a value to return. consumeUntilCommentEndFound(); return CSSParserToken(CommentToken); } return CSSParserToken(DelimiterToken, cc); }
CSSParserToken CSSTokenizer::whiteSpace(UChar cc) { // CSS Tokenization is currently lossy, but we could record // the exact whitespace instead of discarding it here. consumeUntilNonWhitespace(); return CSSParserToken(WhitespaceToken); }
CSSParserToken CSSTokenizer::hyphenMinus(UChar cc) { if (nextCharsAreNumber(cc)) { reconsume(cc); return consumeNumericToken(); } if (m_input.peek(0) == '-' && m_input.peek(1) == '>') { consume(2); return CSSParserToken(CDCToken); } if (nextCharsAreIdentifier(cc)) { reconsume(cc); return consumeIdentLikeToken(); } return CSSParserToken(DelimiterToken, cc); }
CSSParserToken CSSTokenizer::reverseSolidus(UChar cc) { if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) { reconsume(cc); return consumeIdentLikeToken(); } return CSSParserToken(DelimiterToken, cc); }
// http://dev.w3.org/csswg/css-syntax/#consume-url-token CSSParserToken CSSTokenizer::consumeUrlToken() { consumeUntilNonWhitespace(); // URL tokens without escapes get handled without allocations for (unsigned size = 0; ; size++) { UChar cc = m_input.peekWithoutReplacement(size); if (cc == ')') { unsigned startOffset = m_input.offset(); m_input.advance(size + 1); return CSSParserToken(UrlToken, m_input.rangeAsCSSParserString(startOffset, size)); } if (cc <= ' ' || cc == '\\' || cc == '"' || cc == '\'' || cc == '(' || cc == '\x7f') break; } StringBuilder result; while (true) { UChar cc = consume(); if (cc == ')' || cc == kEndOfFileMarker) return CSSParserToken(UrlToken, registerString(result.toString())); if (isHTMLSpace(cc)) { consumeUntilNonWhitespace(); if (consumeIfNext(')') || m_input.nextInputChar() == kEndOfFileMarker) return CSSParserToken(UrlToken, registerString(result.toString())); break; } if (cc == '"' || cc == '\'' || cc == '(' || isNonPrintableCodePoint(cc)) break; if (cc == '\\') { if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) { result.append(consumeEscape()); continue; } break; } result.append(cc); } consumeBadUrlRemnants(); return CSSParserToken(BadUrlToken); }
CSSParserToken CSSTokenizer::plusOrFullStop(UChar cc) { if (nextCharsAreNumber(cc)) { reconsume(cc); return consumeNumericToken(); } return CSSParserToken(DelimiterToken, cc); }
CSSParserToken CSSTokenizer::hyphenMinus(UChar cc) { if (nextCharsAreNumber(cc)) { reconsume(cc); return consumeNumericToken(); } if (nextCharsAreIdentifier(cc)) { reconsume(cc); return consumeIdentLikeToken(); } return CSSParserToken(DelimiterToken, cc); }
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint) { // Strings without escapes get handled without allocations for (unsigned size = 0; ; size++) { UChar cc = m_input.peekWithoutReplacement(size); if (cc == endingCodePoint) { unsigned startOffset = m_input.offset(); m_input.advance(size + 1); return CSSParserToken(StringToken, m_input.rangeAsCSSParserString(startOffset, size)); } if (isNewLine(cc)) { m_input.advance(size); return CSSParserToken(BadStringToken); } if (cc == '\0' || cc == '\\') break; } StringBuilder output; while (true) { UChar cc = consume(); if (cc == endingCodePoint || cc == kEndOfFileMarker) return CSSParserToken(StringToken, registerString(output.toString())); if (isNewLine(cc)) { reconsume(cc); return CSSParserToken(BadStringToken); } if (cc == '\\') { if (m_input.nextInputChar() == kEndOfFileMarker) continue; if (isNewLine(m_input.nextInputChar())) consumeSingleWhitespaceIfNext(); // This handles \r\n for us else output.append(consumeEscape()); } else { output.append(cc); } } }
// http://dev.w3.org/csswg/css-syntax/#consume-url-token CSSParserToken CSSTokenizer::consumeUrlToken() { consumeUntilNonWhitespace(); StringBuilder result; while (true) { UChar cc = consume(); if (cc == ')' || cc == kEndOfFileMarker) { // The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF if (cc == kEndOfFileMarker) reconsume(cc); return CSSParserToken(UrlToken, result.toString()); } if (isHTMLSpace(cc)) { consumeUntilNonWhitespace(); if (consumeIfNext(')') || m_input.nextInputChar() == kEndOfFileMarker) return CSSParserToken(UrlToken, result.toString()); break; } if (cc == '"' || cc == '\'' || cc == '(' || isNonPrintableCodePoint(cc)) break; if (cc == '\\') { if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) { result.append(consumeEscape()); continue; } break; } result.append(cc); } consumeBadUrlRemnants(); return CSSParserToken(BadUrlToken); }
// http://dev.w3.org/csswg/css-syntax/#consume-ident-like-token CSSParserToken CSSTokenizer::consumeIdentLikeToken() { CSSParserString name = consumeName(); if (consumeIfNext('(')) { if (name.equalIgnoringASCIICase("url")) { // The spec is slightly different so as to avoid dropping whitespace // tokens, but they wouldn't be used and this is easier. consumeUntilNonWhitespace(); UChar next = m_input.nextInputChar(); if (next != '"' && next != '\'') return consumeUrlToken(); } return blockStart(LeftParenthesisToken, FunctionToken, name); } return CSSParserToken(IdentToken, name); }
// The state machine loop PassRefPtrWillBeRawPtr<MediaQuerySet> MediaQueryParser::parseImpl(CSSParserTokenRange range) { while (!range.atEnd()) processToken(range.consume()); // FIXME: Can we get rid of this special case? if (m_parserType == MediaQuerySetParser) processToken(CSSParserToken(EOFToken)); if (m_state != ReadAnd && m_state != ReadRestrictor && m_state != Done && m_state != ReadMediaNot) m_querySet->addMediaQuery(MediaQuery::createNotAll()); else if (m_mediaQueryData.currentMediaQueryChanged()) m_querySet->addMediaQuery(m_mediaQueryData.takeMediaQuery()); return m_querySet; }
// The state machine loop RefPtr<MediaQuerySet> MediaQueryParser::parseInternal(CSSParserTokenRange range) { while (!range.atEnd()) processToken(range.consume()); // FIXME: Can we get rid of this special case? if (m_parserType == MediaQuerySetParser) processToken(CSSParserToken(EOFToken)); if (m_state != ReadAnd && m_state != ReadRestrictor && m_state != Done && m_state != ReadMediaNot) { MediaQuery query = MediaQuery(MediaQuery::Not, "all", Vector<MediaQueryExpression>()); m_querySet->addMediaQuery(WTFMove(query)); } else if (m_mediaQueryData.currentMediaQueryChanged()) commitMediaQuery(); return m_querySet; }
// This method merges the following spec sections for efficiency // http://www.w3.org/TR/css3-syntax/#consume-a-number // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number CSSParserToken CSSTokenizer::consumeNumber() { ASSERT(nextCharsAreNumber()); NumericValueType type = IntegerValueType; double value = 0; unsigned offset = 0; int exponentSign = 1; int sign = getSign(m_input, offset); unsigned long long integerPart = getInteger(m_input, offset); unsigned integerPartEndOffset = offset; double fractionPart = getFraction(m_input, offset); unsigned long long exponentPart = getExponent(m_input, offset, exponentSign); double exponent = pow(10, (float)exponentSign * (double)exponentPart); value = (double)sign * ((double)integerPart + fractionPart) * exponent; m_input.advance(offset); if (offset != integerPartEndOffset) type = NumberValueType; return CSSParserToken(NumberToken, value, type); }
CSSParserToken CSSTokenizer::nextToken() { // Unlike the HTMLTokenizer, the CSS Syntax spec is written // as a stateless, (fixed-size) look-ahead tokenizer. // We could move to the stateful model and instead create // states for all the "next 3 codepoints are X" cases. // State-machine tokenizers are easier to write to handle // incremental tokenization of partial sources. // However, for now we follow the spec exactly. UChar cc = consume(); CodePoint codePointFunc = 0; if (isASCII(cc)) { ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber); codePointFunc = codePoints[cc]; } else { codePointFunc = &CSSTokenizer::nameStart; } if (codePointFunc) return ((this)->*(codePointFunc))(cc); return CSSParserToken(DelimiterToken, cc); }
static CSSParserToken unicodeRange(UChar32 start, UChar32 end) { return CSSParserToken(UnicodeRangeToken, start, end); }
static CSSParserToken delim(char c) { return CSSParserToken(DelimiterToken, c); }
static CSSParserToken hash(const String& string, HashTokenType type) { return CSSParserToken(type, toParserString(string)); }
static CSSParserToken url(const String& string) { return CSSParserToken(UrlToken, toParserString(string)); }
static CSSParserToken function(const String& string) { return CSSParserToken(FunctionToken, toParserString(string)); }