// http://www.w3.org/TR/css3-syntax/#consume-a-name CSSParserString CSSTokenizer::consumeName() { // Names without escapes get handled without allocations for (unsigned size = 0; ; ++size) { UChar cc = m_input.peekWithoutReplacement(size); if (cc == '\0' || cc == '\\') break; if (!isNameChar(cc)) { unsigned startOffset = m_input.offset(); m_input.advance(size); return m_input.rangeAsCSSParserString(startOffset, size); } } StringBuilder result; while (true) { UChar cc = consume(); if (isNameChar(cc)) { result.append(cc); continue; } if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) { result.append(consumeEscape()); continue; } reconsume(cc); return registerString(result.toString()); } }
TokenPtr Tokenizer::consumeString(char32_t end_codepoint) { std::string res; advance(); while(la0_ != end_codepoint) { if(la0_ == LF) { return std::make_shared<Token>(TokenId::BAD_STRING); } else if(eof(la0_)) { return std::make_shared<StringToken>(res); } else if(la0_ == '\\') { if(eof(next())) { // does nothing. } else if(next() == LF) { advance(); continue; } else { res += consumeEscape(); } } res += utils::codepoint_to_utf8(la0_); advance(); } advance(); return std::make_shared<StringToken>(res); }
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token MediaQueryToken MediaQueryTokenizer::consumeStringTokenUntil(UChar endingCodePoint) { StringBuilder output; while (true) { UChar cc = consume(); if (cc == endingCodePoint || cc == kEndOfFileMarker) { // The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF if (cc == kEndOfFileMarker) reconsume(cc); return MediaQueryToken(StringToken, output.toString()); } if (isNewLine(cc)) { reconsume(cc); return MediaQueryToken(BadStringToken); } if (cc == '\\') { if (m_input.nextInputChar() == kEndOfFileMarker) continue; if (isNewLine(m_input.nextInputChar())) consume(); else output.append(consumeEscape()); } else { output.append(cc); } } }
// http://dev.w3.org/csswg/css-syntax/#consume-the-remnants-of-a-bad-url void CSSTokenizer::consumeBadUrlRemnants() { while (true) { UChar cc = consume(); if (cc == ')' || cc == kEndOfFileMarker) return; if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) consumeEscape(); } }
// http://dev.w3.org/csswg/css-syntax/#consume-url-token CSSParserToken CSSTokenizer::consumeUrlToken() { consumeUntilNonWhitespace(); // URL tokens without escapes get handled without allocations for (unsigned size = 0; ; size++) { UChar cc = m_input.peekWithoutReplacement(size); if (cc == ')') { unsigned startOffset = m_input.offset(); m_input.advance(size + 1); return CSSParserToken(UrlToken, m_input.rangeAsCSSParserString(startOffset, size)); } if (cc <= ' ' || cc == '\\' || cc == '"' || cc == '\'' || cc == '(' || cc == '\x7f') break; } StringBuilder result; while (true) { UChar cc = consume(); if (cc == ')' || cc == kEndOfFileMarker) return CSSParserToken(UrlToken, registerString(result.toString())); if (isHTMLSpace(cc)) { consumeUntilNonWhitespace(); if (consumeIfNext(')') || m_input.nextInputChar() == kEndOfFileMarker) return CSSParserToken(UrlToken, registerString(result.toString())); break; } if (cc == '"' || cc == '\'' || cc == '(' || isNonPrintableCodePoint(cc)) break; if (cc == '\\') { if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) { result.append(consumeEscape()); continue; } break; } result.append(cc); } consumeBadUrlRemnants(); return CSSParserToken(BadUrlToken); }
// http://www.w3.org/TR/css3-syntax/#consume-a-name String MediaQueryTokenizer::consumeName() { // FIXME: Is this as efficient as it can be? // The possibility of escape chars mandates a copy AFAICT. StringBuilder result; while (true) { UChar cc = consume(); if (isNameChar(cc)) { result.append(cc); continue; } if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) { result.append(consumeEscape()); continue; } reconsume(cc); return result.toString(); } }
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint) { // Strings without escapes get handled without allocations for (unsigned size = 0; ; size++) { UChar cc = m_input.peekWithoutReplacement(size); if (cc == endingCodePoint) { unsigned startOffset = m_input.offset(); m_input.advance(size + 1); return CSSParserToken(StringToken, m_input.rangeAsCSSParserString(startOffset, size)); } if (isNewLine(cc)) { m_input.advance(size); return CSSParserToken(BadStringToken); } if (cc == '\0' || cc == '\\') break; } StringBuilder output; while (true) { UChar cc = consume(); if (cc == endingCodePoint || cc == kEndOfFileMarker) return CSSParserToken(StringToken, registerString(output.toString())); if (isNewLine(cc)) { reconsume(cc); return CSSParserToken(BadStringToken); } if (cc == '\\') { if (m_input.nextInputChar() == kEndOfFileMarker) continue; if (isNewLine(m_input.nextInputChar())) consumeSingleWhitespaceIfNext(); // This handles \r\n for us else output.append(consumeEscape()); } else { output.append(cc); } } }
// http://dev.w3.org/csswg/css-syntax/#consume-url-token CSSParserToken CSSTokenizer::consumeUrlToken() { consumeUntilNonWhitespace(); StringBuilder result; while (true) { UChar cc = consume(); if (cc == ')' || cc == kEndOfFileMarker) { // The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF if (cc == kEndOfFileMarker) reconsume(cc); return CSSParserToken(UrlToken, result.toString()); } if (isHTMLSpace(cc)) { consumeUntilNonWhitespace(); if (consumeIfNext(')') || m_input.nextInputChar() == kEndOfFileMarker) return CSSParserToken(UrlToken, result.toString()); break; } if (cc == '"' || cc == '\'' || cc == '(' || isNonPrintableCodePoint(cc)) break; if (cc == '\\') { if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) { result.append(consumeEscape()); continue; } break; } result.append(cc); } consumeBadUrlRemnants(); return CSSParserToken(BadUrlToken); }
void Parser::parseAlternative(JumpList& failures) { PatternCharacterSequence sequence(m_generator, failures); while (1) { switch (peek()) { case EndOfPattern: case '|': case ')': sequence.flush(); return; case '*': case '+': case '?': case '{': { Quantifier q = consumeQuantifier(); if (q.type == Quantifier::None) { sequence.append(consume()); continue; } if (q.type == Quantifier::Error) return; if (!sequence.size()) { setError(QuantifierWithoutAtom); return; } sequence.flush(q); continue; } case '^': consume(); sequence.flush(); m_generator.generateAssertionBOL(failures); continue; case '$': consume(); sequence.flush(); m_generator.generateAssertionEOL(failures); continue; case '.': consume(); sequence.flush(); if (!parseCharacterClassQuantifier(failures, CharacterClass::newline(), true)) return; continue; case '[': consume(); sequence.flush(); if (!parseCharacterClass(failures)) return; continue; case '(': consume(); sequence.flush(); if (!parseParentheses(failures)) return; continue; case '\\': { consume(); Escape escape = consumeEscape(false); if (escape.type() == Escape::PatternCharacter) { sequence.append(PatternCharacterEscape::cast(escape).character()); continue; } sequence.flush(); if (!parseNonCharacterEscape(failures, escape)) return; continue; } default: sequence.append(consume()); continue; } } }
bool Parser::parseCharacterClass(JumpList& failures) { bool invert = false; if (peek() == '^') { consume(); invert = true; } CharacterClassConstructor constructor(m_ignoreCase); int ch; while ((ch = peek()) != ']') { switch (ch) { case EndOfPattern: setError(CharacterClassUnmatched); return false; case '\\': { consume(); Escape escape = consumeEscape(true); switch (escape.type()) { case Escape::PatternCharacter: { int character = PatternCharacterEscape::cast(escape).character(); if (character == '-') constructor.flushBeforeEscapedHyphen(); constructor.put(character); break; } case Escape::CharacterClass: { const CharacterClassEscape& characterClassEscape = CharacterClassEscape::cast(escape); ASSERT(!characterClassEscape.invert()); constructor.append(characterClassEscape.characterClass()); break; } case Escape::Error: return false; case Escape::Backreference: case Escape::WordBoundaryAssertion: { ASSERT_NOT_REACHED(); break; } } break; } default: consume(); constructor.put(ch); } } consume(); // lazily catch reversed ranges ([z-a])in character classes if (constructor.isUpsideDown()) { setError(CharacterClassOutOfOrder); return false; } constructor.flush(); CharacterClass charClass = constructor.charClass(); return parseCharacterClassQuantifier(failures, charClass, invert); }