// http://dev.w3.org/csswg/css-syntax/#consume-an-escaped-code-point UChar32 CSSTokenizer::consumeEscape() { UChar cc = consume(); ASSERT(!isNewLine(cc)); if (isASCIIHexDigit(cc)) { unsigned consumedHexDigits = 1; StringBuilder hexChars; hexChars.append(cc); while (consumedHexDigits < 6 && isASCIIHexDigit(m_input.nextInputChar())) { cc = consume(); hexChars.append(cc); consumedHexDigits++; }; consumeSingleWhitespaceIfNext(); bool ok = false; UChar32 codePoint = hexChars.toString().toUIntStrict(&ok, 16); ASSERT(ok); if (codePoint == 0 || (0xD800 <= codePoint && codePoint <= 0xDFFF) || codePoint > 0x10FFFF) return replacementCharacter; return codePoint; } if (cc == kEndOfFileMarker) return replacementCharacter; return cc; }
// http://dev.w3.org/csswg/css-syntax/#consume-an-escaped-code-point UChar CSSTokenizer::consumeEscape() { UChar cc = consume(); ASSERT(!isNewLine(cc)); if (isASCIIHexDigit(cc)) { unsigned consumedHexDigits = 1; StringBuilder hexChars; hexChars.append(cc); while (consumedHexDigits < 6 && isASCIIHexDigit(m_input.nextInputChar())) { cc = consume(); hexChars.append(cc); consumedHexDigits++; }; consumeSingleWhitespaceIfNext(); bool ok = false; UChar codePoint = hexChars.toString().toUIntStrict(&ok, 16); if (!ok) return WTF::Unicode::replacementCharacter; return codePoint; } // Replaces NULLs with replacement characters, since we do not perform preprocessing if (cc == kEndOfFileMarker) return WTF::Unicode::replacementCharacter; return cc; }
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint) { StringBuilder output; while (true) { UChar cc = consume(); if (cc == endingCodePoint || cc == kEndOfFileMarker) { // The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF if (cc == kEndOfFileMarker) reconsume(cc); return CSSParserToken(StringToken, output.toString()); } if (isNewLine(cc)) { reconsume(cc); return CSSParserToken(BadStringToken); } if (cc == '\\') { if (m_input.nextInputChar() == kEndOfFileMarker) continue; if (isNewLine(m_input.nextInputChar())) consumeSingleWhitespaceIfNext(); // This handles \r\n for us else output.append(consumeEscape()); } else { output.append(cc); } } }
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token CSSParserToken CSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint) { // Strings without escapes get handled without allocations for (unsigned size = 0; ; size++) { UChar cc = m_input.peekWithoutReplacement(size); if (cc == endingCodePoint) { unsigned startOffset = m_input.offset(); m_input.advance(size + 1); return CSSParserToken(StringToken, m_input.rangeAsCSSParserString(startOffset, size)); } if (isNewLine(cc)) { m_input.advance(size); return CSSParserToken(BadStringToken); } if (cc == '\0' || cc == '\\') break; } StringBuilder output; while (true) { UChar cc = consume(); if (cc == endingCodePoint || cc == kEndOfFileMarker) return CSSParserToken(StringToken, registerString(output.toString())); if (isNewLine(cc)) { reconsume(cc); return CSSParserToken(BadStringToken); } if (cc == '\\') { if (m_input.nextInputChar() == kEndOfFileMarker) continue; if (isNewLine(m_input.nextInputChar())) consumeSingleWhitespaceIfNext(); // This handles \r\n for us else output.append(consumeEscape()); } else { output.append(cc); } } }