CSSParserToken CSSTokenizer::consumeUnicodeRange()
{
    ASSERT(isASCIIHexDigit(m_input.nextInputChar()) || m_input.nextInputChar() == '?');
    int lengthRemaining = 6;
    UChar32 start = 0;

    while (lengthRemaining && isASCIIHexDigit(m_input.nextInputChar())) {
        start = start * 16 + toASCIIHexValue(consume());
        --lengthRemaining;
    }

    UChar32 end = start;
    if (lengthRemaining && consumeIfNext('?')) {
        do {
            start *= 16;
            end = end * 16 + 0xF;
            --lengthRemaining;
        } while (lengthRemaining && consumeIfNext('?'));
    } else if (m_input.nextInputChar() == '-' && isASCIIHexDigit(m_input.peek(1))) {
        consume();
        lengthRemaining = 6;
        end = 0;
        do {
            end = end * 16 + toASCIIHexValue(consume());
            --lengthRemaining;
        } while (lengthRemaining && isASCIIHexDigit(m_input.nextInputChar()));
    }

    return CSSParserToken(UnicodeRangeToken, start, end);
}
// http://dev.w3.org/csswg/css-syntax/#consume-an-escaped-code-point
UChar32 CSSTokenizer::consumeEscape()
{
    UChar cc = consume();
    ASSERT(!isNewLine(cc));
    if (isASCIIHexDigit(cc)) {
        unsigned consumedHexDigits = 1;
        StringBuilder hexChars;
        hexChars.append(cc);
        while (consumedHexDigits < 6 && isASCIIHexDigit(m_input.nextInputChar())) {
            cc = consume();
            hexChars.append(cc);
            consumedHexDigits++;
        };
        consumeSingleWhitespaceIfNext();
        bool ok = false;
        UChar32 codePoint = hexChars.toString().toUIntStrict(&ok, 16);
        ASSERT(ok);
        if (codePoint == 0 || (0xD800 <= codePoint && codePoint <= 0xDFFF) || codePoint > 0x10FFFF)
            return replacementCharacter;
        return codePoint;
    }

    if (cc == kEndOfFileMarker)
        return replacementCharacter;
    return cc;
}
void quotedPrintableDecode(const char* data, size_t dataLength, Vector<char>& out)
{
    out.clear();
    if (!dataLength)
        return;

    for (size_t i = 0; i < dataLength; ++i) {
        char currentCharacter = data[i];
        if (currentCharacter != '=') {
            out.append(currentCharacter);
            continue;
        }
        // We are dealing with a '=xx' sequence.
        if (dataLength - i < 3) {
            // Unfinished = sequence, append as is.
            out.append(currentCharacter);
            continue;
        }
        char upperCharacter = data[++i];
        char lowerCharacter = data[++i];
        if (upperCharacter == '\r' && lowerCharacter == '\n')
            continue;

        if (!isASCIIHexDigit(upperCharacter) || !isASCIIHexDigit(lowerCharacter)) {
            // Invalid sequence, = followed by non hex digits, just insert the characters as is.
            out.append('=');
            out.append(upperCharacter);
            out.append(lowerCharacter);
            continue;
        }
        out.append(static_cast<char>(toASCIIHexValue(upperCharacter, lowerCharacter)));
    }
}
Exemple #4
0
// http://dev.w3.org/csswg/css-syntax/#consume-an-escaped-code-point
UChar MediaQueryTokenizer::consumeEscape()
{
    UChar cc = consume();
    ASSERT(cc != '\n');
    if (isASCIIHexDigit(cc)) {
        unsigned consumedHexDigits = 1;
        StringBuilder hexChars;
        hexChars.append(cc);
        while (consumedHexDigits < 6 && isASCIIHexDigit(m_input.nextInputChar())) {
            cc = consume();
            hexChars.append(cc);
            consumedHexDigits++;
        };
        bool ok = false;
        UChar codePoint = hexChars.toString().toUIntStrict(&ok, 16);
        if (!ok)
            return WTF::Unicode::replacementCharacter;
        return codePoint;
    }

    // Replaces NULLs with replacement characters, since we do not perform preprocessing
    if (cc == kEndOfFileMarker)
        return WTF::Unicode::replacementCharacter;
    return cc;
}
template <typename CharType> bool hexDigitsToHash(CharType* characters, NetworkCacheKey::HashType& hash)
{
    for (unsigned i = 0; i < sizeof(hash); ++i) {
        auto high = characters[2 * i];
        auto low = characters[2 * i + 1];
        if (!isASCIIHexDigit(high) || !isASCIIHexDigit(low))
            return false;
        hash[i] = toASCIIHexValue(high, low);
    }
    return true;
}
static inline String quoteCSSStringInternal(const CharacterType* characters, unsigned length)
{
    // For efficiency, we first pre-calculate the length of the quoted string, then we build the actual one.
    // Please see below for the actual logic.
    unsigned quotedStringSize = 2; // Two quotes surrounding the entire string.
    bool afterEscape = false;
    for (unsigned i = 0; i < length; ++i) {
        CharacterType ch = characters[i];
        if (ch == '\\' || ch == '\'') {
            quotedStringSize += 2;
            afterEscape = false;
        } else if (ch < 0x20 || ch == 0x7F) {
            quotedStringSize += 2 + (ch >= 0x10);
            afterEscape = true;
        } else {
            quotedStringSize += 1 + (afterEscape && (isASCIIHexDigit(ch) || ch == ' '));
            afterEscape = false;
        }
    }

    StringBuffer<CharacterType> buffer(quotedStringSize);
    unsigned index = 0;
    buffer[index++] = '\'';
    afterEscape = false;
    for (unsigned i = 0; i < length; ++i) {
        CharacterType ch = characters[i];
        if (ch == '\\' || ch == '\'') {
            buffer[index++] = '\\';
            buffer[index++] = ch;
            afterEscape = false;
        } else if (ch < 0x20 || ch == 0x7F) { // Control characters.
            buffer[index++] = '\\';
            placeByteAsHexCompressIfPossible(ch, buffer, index, Lowercase);
            afterEscape = true;
        } else {
            // Space character may be required to separate backslash-escape sequence and normal characters.
            if (afterEscape && (isASCIIHexDigit(ch) || ch == ' '))
                buffer[index++] = ' ';
            buffer[index++] = ch;
            afterEscape = false;
        }
    }
    buffer[index++] = '\'';

    ASSERT(quotedStringSize == index);
    return String::adopt(buffer);
}
CSSParserToken CSSTokenizer::letterU(UChar cc)
{
    if (m_input.nextInputChar() == '+'
        && (isASCIIHexDigit(m_input.peek(1)) || m_input.peek(1) == '?')) {
        consume();
        return consumeUnicodeRange();
    }
    reconsume(cc);
    return consumeIdentLikeToken();
}
static RGBA32 parseColorStringWithCrazyLegacyRules(const String& colorString)
{
    // Per spec, only look at the first 128 digits of the string.
    const size_t maxColorLength = 128;
    // We'll pad the buffer with two extra 0s later, so reserve two more than the max.
    Vector<char, maxColorLength+2> digitBuffer;

    size_t i = 0;
    // Skip a leading #.
    if (colorString[0] == '#')
        i = 1;

    // Grab the first 128 characters, replacing non-hex characters with 0.
    // Non-BMP characters are replaced with "00" due to them appearing as two "characters" in the String.
    for (; i < colorString.length() && digitBuffer.size() < maxColorLength; i++) {
        if (!isASCIIHexDigit(colorString[i]))
            digitBuffer.append('0');
        else
            digitBuffer.append(colorString[i]);
    }

    if (!digitBuffer.size())
        return Color::black;

    // Pad the buffer out to at least the next multiple of three in size.
    digitBuffer.append('0');
    digitBuffer.append('0');

    if (digitBuffer.size() < 6)
        return makeRGB(toASCIIHexValue(digitBuffer[0]), toASCIIHexValue(digitBuffer[1]), toASCIIHexValue(digitBuffer[2]));

    // Split the digits into three components, then search the last 8 digits of each component.
    ASSERT(digitBuffer.size() >= 6);
    size_t componentLength = digitBuffer.size() / 3;
    size_t componentSearchWindowLength = min<size_t>(componentLength, 8);
    size_t redIndex = componentLength - componentSearchWindowLength;
    size_t greenIndex = componentLength * 2 - componentSearchWindowLength;
    size_t blueIndex = componentLength * 3 - componentSearchWindowLength;
    // Skip digits until one of them is non-zero, or we've only got two digits left in the component.
    while (digitBuffer[redIndex] == '0' && digitBuffer[greenIndex] == '0' && digitBuffer[blueIndex] == '0' && (componentLength - redIndex) > 2) {
        redIndex++;
        greenIndex++;
        blueIndex++;
    }
    ASSERT(redIndex + 1 < componentLength);
    ASSERT(greenIndex >= componentLength);
    ASSERT(greenIndex + 1 < componentLength * 2);
    ASSERT(blueIndex >= componentLength * 2);
    ASSERT_WITH_SECURITY_IMPLICATION(blueIndex + 1 < digitBuffer.size());

    int redValue = toASCIIHexValue(digitBuffer[redIndex], digitBuffer[redIndex + 1]);
    int greenValue = toASCIIHexValue(digitBuffer[greenIndex], digitBuffer[greenIndex + 1]);
    int blueValue = toASCIIHexValue(digitBuffer[blueIndex], digitBuffer[blueIndex + 1]);
    return makeRGB(redValue, greenValue, blueValue);
}
Exemple #9
0
static inline bool parseHexColorInternal(const CharacterType* name, unsigned length, RGBA32& rgb)
{
    if (length != 3 && length != 6)
        return false;
    unsigned value = 0;
    for (unsigned i = 0; i < length; ++i) {
        if (!isASCIIHexDigit(name[i]))
            return false;
        value <<= 4;
        value |= toASCIIHexValue(name[i]);
    }
    if (length == 6) {
        rgb = 0xFF000000 | value;
        return true;
    }
    // #abc converts to #aabbcc
    rgb = 0xFF000000
        | (value & 0xF00) << 12 | (value & 0xF00) << 8
        | (value & 0xF0) << 8 | (value & 0xF0) << 4
        | (value & 0xF) << 4 | (value & 0xF);
    return true;
}
Exemple #10
0
// originally moved here from the CSS parser
bool Color::parseHexColor(const String& name, RGBA32& rgb)
{
    unsigned length = name.length();
    if (length != 3 && length != 6)
        return false;
    unsigned value = 0;
    for (unsigned i = 0; i < length; ++i) {
        if (!isASCIIHexDigit(name[i]))
            return false;
        value <<= 4;
        value |= toASCIIHexValue(name[i]);
    }
    if (length == 6) {
        rgb = 0xFF000000 | value;
        return true;
    }
    // #abc converts to #aabbcc
    rgb = 0xFF000000
        | (value & 0xF00) << 12 | (value & 0xF00) << 8
        | (value & 0xF0) << 8 | (value & 0xF0) << 4
        | (value & 0xF) << 4 | (value & 0xF);
    return true;
}
// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
{
    ++m_ptr;
    const UChar* runStart;
    StringBuilder builder;
    do {
        runStart = m_ptr;
        while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
            ++m_ptr;
        if (runStart < m_ptr)
            builder.append(runStart, m_ptr - runStart);
        if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
            ++m_ptr;
            if (m_ptr >= m_end)
                return TokError;
            switch (*m_ptr) {
                case '"':
                    builder.append('"');
                    m_ptr++;
                    break;
                case '\\':
                    builder.append('\\');
                    m_ptr++;
                    break;
                case '/':
                    builder.append('/');
                    m_ptr++;
                    break;
                case 'b':
                    builder.append('\b');
                    m_ptr++;
                    break;
                case 'f':
                    builder.append('\f');
                    m_ptr++;
                    break;
                case 'n':
                    builder.append('\n');
                    m_ptr++;
                    break;
                case 'r':
                    builder.append('\r');
                    m_ptr++;
                    break;
                case 't':
                    builder.append('\t');
                    m_ptr++;
                    break;

                case 'u':
                    if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
                        return TokError;
                    for (int i = 1; i < 5; i++) {
                        if (!isASCIIHexDigit(m_ptr[i]))
                            return TokError;
                    }
                    builder.append(AJ::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
                    m_ptr += 5;
                    break;

                default:
                    return TokError;
            }
        }
    } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');

    if (m_ptr >= m_end || *m_ptr != '"')
        return TokError;

    token.stringToken = builder.build();
    token.type = TokString;
    token.end = ++m_ptr;
    return TokString;
}
Exemple #12
0
static bool parseUnicodeRange(const CharType* characters, unsigned length, UnicodeRange& range)
{
    if (length < 2 || characters[0] != 'U' || characters[1] != '+')
        return false;

    // Parse the starting hex number (or its prefix).
    unsigned startRange = 0;
    unsigned startLength = 0;

    const CharType* ptr = characters + 2;
    const CharType* end = characters + length;
    while (ptr < end) {
        if (!isASCIIHexDigit(*ptr))
            break;
        ++startLength;
        if (startLength > 6)
            return false;
        startRange = (startRange << 4) | toASCIIHexValue(*ptr);
        ++ptr;
    }

    // Handle the case of ranges separated by "-" sign.
    if (2 + startLength < length && *ptr == '-') {
        if (!startLength)
            return false;

        // Parse the ending hex number (or its prefix).
        unsigned endRange = 0;
        unsigned endLength = 0;
        ++ptr;
        while (ptr < end) {
            if (!isASCIIHexDigit(*ptr))
                break;
            ++endLength;
            if (endLength > 6)
                return false;
            endRange = (endRange << 4) | toASCIIHexValue(*ptr);
            ++ptr;
        }

        if (!endLength)
            return false;

        range.first = startRange;
        range.second = endRange;
        return true;
    }

    // Handle the case of a number with some optional trailing question marks.
    unsigned endRange = startRange;
    while (ptr < end) {
        if (*ptr != '?')
            break;
        ++startLength;
        if (startLength > 6)
            return false;
        startRange <<= 4;
        endRange = (endRange << 4) | 0xF;
        ++ptr;
    }

    if (!startLength)
        return false;

    range.first = startRange;
    range.second = endRange;
    return true;
}
Exemple #13
0
int Lexer::lex(void* p1, void* p2)
{
    ASSERT(!m_error);
    ASSERT(m_buffer8.isEmpty());
    ASSERT(m_buffer16.isEmpty());

    YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
    YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
    int token = 0;
    m_terminator = false;

start:
    while (isWhiteSpace(m_current))
        shift1();

    int startOffset = currentOffset();

    if (m_current == -1) {
        if (!m_terminator && !m_delimited && !m_isReparsing) {
            // automatic semicolon insertion if program incomplete
            token = ';';
            goto doneSemicolon;
        }
        return 0;
    }

    m_delimited = false;
    switch (m_current) {
        case '>':
            if (m_next1 == '>' && m_next2 == '>') {
                if (m_next3 == '=') {
                    shift4();
                    token = URSHIFTEQUAL;
                    break;
                }
                shift3();
                token = URSHIFT;
                break;
            }
            if (m_next1 == '>') {
                if (m_next2 == '=') {
                    shift3();
                    token = RSHIFTEQUAL;
                    break;
                }
                shift2();
                token = RSHIFT;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = GE;
                break;
            }
            shift1();
            token = '>';
            break;
        case '=':
            if (m_next1 == '=') {
                if (m_next2 == '=') {
                    shift3();
                    token = STREQ;
                    break;
                }
                shift2();
                token = EQEQ;
                break;
            }
            shift1();
            token = '=';
            break;
        case '!':
            if (m_next1 == '=') {
                if (m_next2 == '=') {
                    shift3();
                    token = STRNEQ;
                    break;
                }
                shift2();
                token = NE;
                break;
            }
            shift1();
            token = '!';
            break;
        case '<':
            if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
                // <!-- marks the beginning of a line comment (for www usage)
                shift4();
                goto inSingleLineComment;
            }
            if (m_next1 == '<') {
                if (m_next2 == '=') {
                    shift3();
                    token = LSHIFTEQUAL;
                    break;
                }
                shift2();
                token = LSHIFT;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = LE;
                break;
            }
            shift1();
            token = '<';
            break;
        case '+':
            if (m_next1 == '+') {
                shift2();
                if (m_terminator) {
                    token = AUTOPLUSPLUS;
                    break;
                }
                token = PLUSPLUS;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = PLUSEQUAL;
                break;
            }
            shift1();
            token = '+';
            break;
        case '-':
            if (m_next1 == '-') {
                if (m_atLineStart && m_next2 == '>') {
                    shift3();
                    goto inSingleLineComment;
                }
                shift2();
                if (m_terminator) {
                    token = AUTOMINUSMINUS;
                    break;
                }
                token = MINUSMINUS;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = MINUSEQUAL;
                break;
            }
            shift1();
            token = '-';
            break;
        case '*':
            if (m_next1 == '=') {
                shift2();
                token = MULTEQUAL;
                break;
            }
            shift1();
            token = '*';
            break;
        case '/':
            if (m_next1 == '/') {
                shift2();
                goto inSingleLineComment;
            }
            if (m_next1 == '*')
                goto inMultiLineComment;
            if (m_next1 == '=') {
                shift2();
                token = DIVEQUAL;
                break;
            }
            shift1();
            token = '/';
            break;
        case '&':
            if (m_next1 == '&') {
                shift2();
                token = AND;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = ANDEQUAL;
                break;
            }
            shift1();
            token = '&';
            break;
        case '^':
            if (m_next1 == '=') {
                shift2();
                token = XOREQUAL;
                break;
            }
            shift1();
            token = '^';
            break;
        case '%':
            if (m_next1 == '=') {
                shift2();
                token = MODEQUAL;
                break;
            }
            shift1();
            token = '%';
            break;
        case '|':
            if (m_next1 == '=') {
                shift2();
                token = OREQUAL;
                break;
            }
            if (m_next1 == '|') {
                shift2();
                token = OR;
                break;
            }
            shift1();
            token = '|';
            break;
        case '.':
            if (isASCIIDigit(m_next1)) {
                record8('.');
                shift1();
                goto inNumberAfterDecimalPoint;
            }
            token = '.';
            shift1();
            break;
        case ',':
        case '~':
        case '?':
        case ':':
        case '(':
        case ')':
        case '[':
        case ']':
            token = m_current;
            shift1();
            break;
        case ';':
            shift1();
            m_delimited = true;
            token = ';';
            break;
        case '{':
            lvalp->intValue = currentOffset();
            shift1();
            token = OPENBRACE;
            break;
        case '}':
            lvalp->intValue = currentOffset();
            shift1();
            m_delimited = true;
            token = CLOSEBRACE;
            break;
        case '\\':
            goto startIdentifierWithBackslash;
        case '0':
            goto startNumberWithZeroDigit;
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            goto startNumber;
        case '"':
        case '\'':
            goto startString;
        default:
            if (isIdentStart(m_current))
                goto startIdentifierOrKeyword;
            if (isLineTerminator(m_current)) {
                shiftLineTerminator();
                m_atLineStart = true;
                m_terminator = true;
                if (lastTokenWasRestrKeyword()) {
                    token = ';';
                    goto doneSemicolon;
                }
                goto start;
            }
            goto returnError;
    }

    m_atLineStart = false;
    goto returnToken;

startString: {
    int stringQuoteCharacter = m_current;
    shift1();

    const UChar* stringStart = currentCharacter();
    while (m_current != stringQuoteCharacter) {
        // Fast check for characters that require special handling.
        // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
        // as possible, and lets through all common ASCII characters.
        if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
            m_buffer16.append(stringStart, currentCharacter() - stringStart);
            goto inString;
        }
        shift1();
    }
    lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
    shift1();
    m_atLineStart = false;
    m_delimited = false;
    token = STRING;
    goto returnToken;

inString:
    while (m_current != stringQuoteCharacter) {
        if (m_current == '\\')
            goto inStringEscapeSequence;
        if (UNLIKELY(isLineTerminator(m_current)))
            goto returnError;
        if (UNLIKELY(m_current == -1))
            goto returnError;
        record16(m_current);
        shift1();
    }
    goto doneString;

inStringEscapeSequence:
    shift1();
    if (m_current == 'x') {
        shift1();
        if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
            record16(convertHex(m_current, m_next1));
            shift2();
            goto inString;
        }
        record16('x');
        if (m_current == stringQuoteCharacter)
            goto doneString;
        goto inString;
    }
    if (m_current == 'u') {
        shift1();
        if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
            record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
            shift4();
            goto inString;
        }
        if (m_current == stringQuoteCharacter) {
            record16('u');
            goto doneString;
        }
        goto returnError;
    }
    if (isASCIIOctalDigit(m_current)) {
        if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
            record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
            shift3();
            goto inString;
        }
        if (isASCIIOctalDigit(m_next1)) {
            record16((m_current - '0') * 8 + m_next1 - '0');
            shift2();
            goto inString;
        }
        record16(m_current - '0');
        shift1();
        goto inString;
    }
    if (isLineTerminator(m_current)) {
        shiftLineTerminator();
        goto inString;
    }
    record16(singleEscape(m_current));
    shift1();
    goto inString;
}

startIdentifierWithBackslash:
    shift1();
    if (UNLIKELY(m_current != 'u'))
        goto returnError;
    shift1();
    if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
        goto returnError;
    token = convertUnicode(m_current, m_next1, m_next2, m_next3);
    if (UNLIKELY(!isIdentStart(token)))
        goto returnError;
    goto inIdentifierAfterCharacterCheck;

startIdentifierOrKeyword: {
    const UChar* identifierStart = currentCharacter();
    shift1();
    while (isIdentPart(m_current))
        shift1();
    if (LIKELY(m_current != '\\')) {
        lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
        goto doneIdentifierOrKeyword;
    }
    m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
}

    do {
        shift1();
        if (UNLIKELY(m_current != 'u'))
            goto returnError;
        shift1();
        if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
            goto returnError;
        token = convertUnicode(m_current, m_next1, m_next2, m_next3);
        if (UNLIKELY(!isIdentPart(token)))
            goto returnError;
inIdentifierAfterCharacterCheck:
        record16(token);
        shift4();

        while (isIdentPart(m_current)) {
            record16(m_current);
            shift1();
        }
    } while (UNLIKELY(m_current == '\\'));
    goto doneIdentifier;

inSingleLineComment:
    while (!isLineTerminator(m_current)) {
        if (UNLIKELY(m_current == -1))
            return 0;
        shift1();
    }
    shiftLineTerminator();
    m_atLineStart = true;
    m_terminator = true;
    if (lastTokenWasRestrKeyword())
        goto doneSemicolon;
    goto start;

inMultiLineComment:
    shift2();
    while (m_current != '*' || m_next1 != '/') {
        if (isLineTerminator(m_current))
            shiftLineTerminator();
        else {
            shift1();
            if (UNLIKELY(m_current == -1))
                goto returnError;
        }
    }
    shift2();
    m_atLineStart = false;
    goto start;

startNumberWithZeroDigit:
    shift1();
    if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
        shift1();
        goto inHex;
    }
    if (m_current == '.') {
        record8('0');
        record8('.');
        shift1();
        goto inNumberAfterDecimalPoint;
    }
    if ((m_current | 0x20) == 'e') {
        record8('0');
        record8('e');
        shift1();
        goto inExponentIndicator;
    }
    if (isASCIIOctalDigit(m_current))
        goto inOctal;
    if (isASCIIDigit(m_current))
        goto startNumber;
    lvalp->doubleValue = 0;
    goto doneNumeric;

inNumberAfterDecimalPoint:
    while (isASCIIDigit(m_current)) {
        record8(m_current);
        shift1();
    }
    if ((m_current | 0x20) == 'e') {
        record8('e');
        shift1();
        goto inExponentIndicator;
    }
    goto doneNumber;

inExponentIndicator:
    if (m_current == '+' || m_current == '-') {
        record8(m_current);
        shift1();
    }
    if (!isASCIIDigit(m_current))
        goto returnError;
    do {
        record8(m_current);
        shift1();
    } while (isASCIIDigit(m_current));
    goto doneNumber;

inOctal: {
    do {
        record8(m_current);
        shift1();
    } while (isASCIIOctalDigit(m_current));
    if (isASCIIDigit(m_current))
        goto startNumber;

    double dval = 0;

    const char* end = m_buffer8.end();
    for (const char* p = m_buffer8.data(); p < end; ++p) {
        dval *= 8;
        dval += *p - '0';
    }
    if (dval >= mantissaOverflowLowerBound)
        dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);

    m_buffer8.resize(0);

    lvalp->doubleValue = dval;
    goto doneNumeric;
}

inHex: {
    do {
        record8(m_current);
        shift1();
    } while (isASCIIHexDigit(m_current));

    double dval = 0;

    const char* end = m_buffer8.end();
    for (const char* p = m_buffer8.data(); p < end; ++p) {
        dval *= 16;
        dval += toASCIIHexValue(*p);
    }
    if (dval >= mantissaOverflowLowerBound)
        dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);

    m_buffer8.resize(0);

    lvalp->doubleValue = dval;
    goto doneNumeric;
}

startNumber:
    record8(m_current);
    shift1();
    while (isASCIIDigit(m_current)) {
        record8(m_current);
        shift1();
    }
    if (m_current == '.') {
        record8('.');
        shift1();
        goto inNumberAfterDecimalPoint;
    }
    if ((m_current | 0x20) == 'e') {
        record8('e');
        shift1();
        goto inExponentIndicator;
    }

    // Fall through into doneNumber.

doneNumber:
    // Null-terminate string for strtod.
    m_buffer8.append('\0');
    lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
    m_buffer8.resize(0);

    // Fall through into doneNumeric.

doneNumeric:
    // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
    if (UNLIKELY(isIdentStart(m_current)))
        goto returnError;

    m_atLineStart = false;
    m_delimited = false;
    token = NUMBER;
    goto returnToken;

doneSemicolon:
    token = ';';
    m_delimited = true;
    goto returnToken;

doneIdentifier:
    m_atLineStart = false;
    m_delimited = false;
    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);
    token = IDENT;
    goto returnToken;

doneIdentifierOrKeyword: {
    m_atLineStart = false;
    m_delimited = false;
    m_buffer16.resize(0);
    const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
    token = entry ? entry->lexerValue() : IDENT;
    goto returnToken;
}

doneString:
    // Atomize constant strings in case they're later used in property lookup.
    shift1();
    m_atLineStart = false;
    m_delimited = false;
    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);
    token = STRING;

    // Fall through into returnToken.

returnToken: {
    int lineNumber = m_lineNumber;
    llocp->first_line = lineNumber;
    llocp->last_line = lineNumber;
    llocp->first_column = startOffset;
    llocp->last_column = currentOffset();

    m_lastToken = token;
    return token;
}

returnError:
    m_error = true;
    return -1;
}
Exemple #14
0
template<typename CharType> inline int toASCIIHexValue(CharType upperValue, CharType lowerValue)
{
    DCHECK(isASCIIHexDigit(upperValue) && isASCIIHexDigit(lowerValue));
    return ((toASCIIHexValue(upperValue) << 4) & 0xF0) | toASCIIHexValue(lowerValue);
}
Exemple #15
0
template<typename CharType> inline int toASCIIHexValue(CharType c)
{
    DCHECK(isASCIIHexDigit(c));
    return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF;
}