int QScript::Lexer::lex() { int token = 0; state = Start; ushort stringType = 0; // either single or double quotes pos8 = pos16 = 0; done = false; terminator = false; // did we push a token on the stack previously ? // (after an automatic semicolon insertion) if (stackToken >= 0) { setDone(Other); token = stackToken; stackToken = -1; } while (!done) { switch (state) { case Start: if (isWhiteSpace()) { // do nothing } else if (current == '/' && next1 == '/') { recordStartPos(); shift(1); state = InSingleLineComment; } else if (current == '/' && next1 == '*') { recordStartPos(); shift(1); state = InMultiLineComment; } else if (current == 0) { syncProhibitAutomaticSemicolon(); if (!terminator && !delimited && !prohibitAutomaticSemicolon) { // automatic semicolon insertion if program incomplete token = QScriptGrammar::T_SEMICOLON; stackToken = 0; setDone(Other); } else { setDone(Eof); } } else if (isLineTerminator()) { shiftWindowsLineBreak(); yylineno++; yycolumn = 0; bol = true; terminator = true; syncProhibitAutomaticSemicolon(); if (restrKeyword) { token = QScriptGrammar::T_SEMICOLON; setDone(Other); } } else if (current == '"' || current == '\'') { recordStartPos(); state = InString; stringType = current; } else if (isIdentLetter(current)) { recordStartPos(); record16(current); state = InIdentifier; } else if (current == '0') { recordStartPos(); record8(current); state = InNum0; } else if (isDecimalDigit(current)) { recordStartPos(); record8(current); state = InNum; } else if (current == '.' && isDecimalDigit(next1)) { recordStartPos(); record8(current); state = InDecimal; } else { recordStartPos(); token = matchPunctuator(current, next1, next2, next3); if (token != -1) { if (terminator && !delimited && !prohibitAutomaticSemicolon && (token == QScriptGrammar::T_PLUS_PLUS || token == QScriptGrammar::T_MINUS_MINUS)) { // automatic semicolon insertion stackToken = token; token = QScriptGrammar::T_SEMICOLON; } setDone(Other); } else { setDone(Bad); err = IllegalCharacter; errmsg = QLatin1String("Illegal character"); } } break; case InString: if (current == stringType) { shift(1); setDone(String); } else if (current == 0 || isLineTerminator()) { setDone(Bad); err = UnclosedStringLiteral; errmsg = QLatin1String("Unclosed string at end of line"); } else if (current == '\\') { state = InEscapeSequence; } else { record16(current); } break; // Escape Sequences inside of strings case InEscapeSequence: if (isOctalDigit(current)) { if (current >= '0' && current <= '3' && isOctalDigit(next1) && isOctalDigit(next2)) { record16(convertOctal(current, next1, next2)); shift(2); state = InString; } else if (isOctalDigit(current) && isOctalDigit(next1)) { record16(convertOctal('0', current, next1)); shift(1); state = InString; } else if (isOctalDigit(current)) { record16(convertOctal('0', '0', current)); state = InString; } else { setDone(Bad); err = IllegalEscapeSequence; errmsg = QLatin1String("Illegal escape squence"); } } else if (current == 'x') state = InHexEscape; else if (current == 'u') state = InUnicodeEscape; else { if (isLineTerminator()) { shiftWindowsLineBreak(); yylineno++; yycolumn = 0; bol = true; } else { record16(singleEscape(current)); } state = InString; } break; case InHexEscape: if (isHexDigit(current) && isHexDigit(next1)) { state = InString; record16(QLatin1Char(convertHex(current, next1))); shift(1); } else if (current == stringType) { record16(QLatin1Char('x')); shift(1); setDone(String); } else { record16(QLatin1Char('x')); record16(current); state = InString; } break; case InUnicodeEscape: if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { record16(convertUnicode(current, next1, next2, next3)); shift(3); state = InString; } else if (current == stringType) { record16(QLatin1Char('u')); shift(1); setDone(String); } else { setDone(Bad); err = IllegalUnicodeEscapeSequence; errmsg = QLatin1String("Illegal unicode escape sequence"); } break; case InSingleLineComment: if (isLineTerminator()) { shiftWindowsLineBreak(); yylineno++; yycolumn = 0; terminator = true; bol = true; if (restrKeyword) { token = QScriptGrammar::T_SEMICOLON; setDone(Other); } else state = Start; } else if (current == 0) { setDone(Eof); } break; case InMultiLineComment: if (current == 0) { setDone(Bad); err = UnclosedComment; errmsg = QLatin1String("Unclosed comment at end of file"); } else if (isLineTerminator()) { shiftWindowsLineBreak(); yylineno++; } else if (current == '*' && next1 == '/') { state = Start; shift(1); } break; case InIdentifier: if (isIdentLetter(current) || isDecimalDigit(current)) { record16(current); break; } setDone(Identifier); break; case InNum0: if (current == 'x' || current == 'X') { record8(current); state = InHex; } else if (current == '.') { record8(current); state = InDecimal; } else if (current == 'e' || current == 'E') { record8(current); state = InExponentIndicator; } else if (isOctalDigit(current)) { record8(current); state = InOctal; } else if (isDecimalDigit(current)) { record8(current); state = InDecimal; } else { setDone(Number); } break; case InHex: if (isHexDigit(current)) record8(current); else setDone(Hex); break; case InOctal: if (isOctalDigit(current)) { record8(current); } else if (isDecimalDigit(current)) { record8(current); state = InDecimal; } else { setDone(Octal); } break; case InNum: if (isDecimalDigit(current)) { record8(current); } else if (current == '.') { record8(current); state = InDecimal; } else if (current == 'e' || current == 'E') { record8(current); state = InExponentIndicator; } else { setDone(Number); } break; case InDecimal: if (isDecimalDigit(current)) { record8(current); } else if (current == 'e' || current == 'E') { record8(current); state = InExponentIndicator; } else { setDone(Number); } break; case InExponentIndicator: if (current == '+' || current == '-') { record8(current); } else if (isDecimalDigit(current)) { record8(current); state = InExponent; } else { setDone(Bad); err = IllegalExponentIndicator; errmsg = QLatin1String("Illegal syntax for exponential number"); } break; case InExponent: if (isDecimalDigit(current)) { record8(current); } else { setDone(Number); } break; default: Q_ASSERT_X(0, "Lexer::lex", "Unhandled state in switch statement"); } // move on to the next character if (!done) shift(1); if (state != Start && state != InSingleLineComment) bol = false; } // no identifiers allowed directly after numeric literal, e.g. "3in" is bad if ((state == Number || state == Octal || state == Hex) && isIdentLetter(current)) { state = Bad; err = IllegalIdentifier; errmsg = QLatin1String("Identifier cannot start with numeric literal"); } // terminate string buffer8[pos8] = '\0'; double dval = 0; if (state == Number) { dval = qstrtod(buffer8, 0, 0); } else if (state == Hex) { // scan hex numbers dval = QScript::integerFromString(buffer8, pos8, 16); state = Number; } else if (state == Octal) { // scan octal number dval = QScript::integerFromString(buffer8, pos8, 8); state = Number; } restrKeyword = false; delimited = false; switch (parenthesesState) { case IgnoreParentheses: break; case CountParentheses: if (token == QScriptGrammar::T_RPAREN) { --parenthesesCount; if (parenthesesCount == 0) parenthesesState = BalancedParentheses; } else if (token == QScriptGrammar::T_LPAREN) { ++parenthesesCount; } break; case BalancedParentheses: parenthesesState = IgnoreParentheses; break; } switch (state) { case Eof: return 0; case Other: if(token == QScriptGrammar::T_RBRACE || token == QScriptGrammar::T_SEMICOLON) delimited = true; return token; case Identifier: if ((token = findReservedWord(buffer16, pos16)) < 0) { /* TODO: close leak on parse error. same holds true for String */ if (driver) { Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented"); qsyylval.ustr = 0; // driver->intern(buffer16, pos16); } else qsyylval.ustr = 0; return QScriptGrammar::T_IDENTIFIER; } if (token == QScriptGrammar::T_CONTINUE || token == QScriptGrammar::T_BREAK || token == QScriptGrammar::T_RETURN || token == QScriptGrammar::T_THROW) { restrKeyword = true; } else if (token == QScriptGrammar::T_IF || token == QScriptGrammar::T_FOR || token == QScriptGrammar::T_WHILE || token == QScriptGrammar::T_WITH) { parenthesesState = CountParentheses; parenthesesCount = 0; } else if (token == QScriptGrammar::T_DO) { parenthesesState = BalancedParentheses; } return token; case String: if (driver) { Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented"); qsyylval.ustr = 0; // driver->intern(buffer16, pos16); } else qsyylval.ustr = 0; return QScriptGrammar::T_STRING_LITERAL; case Number: qsyylval.dval = dval; return QScriptGrammar::T_NUMERIC_LITERAL; case Bad: return -1; default: Q_ASSERT(!"unhandled numeration value in switch"); return -1; } }
int Lexer::lex(void* p1, void* p2) { ASSERT(!m_error); ASSERT(m_buffer8.isEmpty()); ASSERT(m_buffer16.isEmpty()); YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); int token = 0; m_terminator = false; start: while (isWhiteSpace(m_current)) shift1(); int startOffset = currentOffset(); if (m_current == -1) { if (!m_terminator && !m_delimited && !m_isReparsing) { // automatic semicolon insertion if program incomplete token = ';'; goto doneSemicolon; } return 0; } m_delimited = false; switch (m_current) { case '>': if (m_next1 == '>' && m_next2 == '>') { if (m_next3 == '=') { shift4(); token = URSHIFTEQUAL; break; } shift3(); token = URSHIFT; break; } if (m_next1 == '>') { if (m_next2 == '=') { shift3(); token = RSHIFTEQUAL; break; } shift2(); token = RSHIFT; break; } if (m_next1 == '=') { shift2(); token = GE; break; } shift1(); token = '>'; break; case '=': if (m_next1 == '=') { if (m_next2 == '=') { shift3(); token = STREQ; break; } shift2(); token = EQEQ; break; } shift1(); token = '='; break; case '!': if (m_next1 == '=') { if (m_next2 == '=') { shift3(); token = STRNEQ; break; } shift2(); token = NE; break; } shift1(); token = '!'; break; case '<': if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { // <!-- marks the beginning of a line comment (for www usage) shift4(); goto inSingleLineComment; } if (m_next1 == '<') { if (m_next2 == '=') { shift3(); token = LSHIFTEQUAL; break; } shift2(); token = LSHIFT; break; } if (m_next1 == '=') { shift2(); token = LE; break; } shift1(); token = '<'; break; case '+': if (m_next1 == '+') { shift2(); if (m_terminator) { token = AUTOPLUSPLUS; break; } token = PLUSPLUS; break; } if (m_next1 == '=') { shift2(); token = PLUSEQUAL; break; } shift1(); token = '+'; break; case '-': if (m_next1 == '-') { if (m_atLineStart && m_next2 == '>') { shift3(); goto inSingleLineComment; } shift2(); if (m_terminator) { token = AUTOMINUSMINUS; break; } token = MINUSMINUS; break; } if (m_next1 == '=') { shift2(); token = MINUSEQUAL; break; } shift1(); token = '-'; break; case '*': if (m_next1 == '=') { shift2(); token = MULTEQUAL; break; } shift1(); token = '*'; break; case '/': if (m_next1 == '/') { shift2(); goto inSingleLineComment; } if (m_next1 == '*') goto inMultiLineComment; if (m_next1 == '=') { shift2(); token = DIVEQUAL; break; } shift1(); token = '/'; break; case '&': if (m_next1 == '&') { shift2(); token = AND; break; } if (m_next1 == '=') { shift2(); token = ANDEQUAL; break; } shift1(); token = '&'; break; case '^': if (m_next1 == '=') { shift2(); token = XOREQUAL; break; } shift1(); token = '^'; break; case '%': if (m_next1 == '=') { shift2(); token = MODEQUAL; break; } shift1(); token = '%'; break; case '|': if (m_next1 == '=') { shift2(); token = OREQUAL; break; } if (m_next1 == '|') { shift2(); token = OR; break; } shift1(); token = '|'; break; case '.': if (isASCIIDigit(m_next1)) { record8('.'); shift1(); goto inNumberAfterDecimalPoint; } token = '.'; shift1(); break; case ',': case '~': case '?': case ':': case '(': case ')': case '[': case ']': token = m_current; shift1(); break; case ';': shift1(); m_delimited = true; token = ';'; break; case '{': lvalp->intValue = currentOffset(); shift1(); token = OPENBRACE; break; case '}': lvalp->intValue = currentOffset(); shift1(); m_delimited = true; token = CLOSEBRACE; break; case '\\': goto startIdentifierWithBackslash; case '0': goto startNumberWithZeroDigit; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': goto startNumber; case '"': case '\'': goto startString; default: if (isIdentStart(m_current)) goto startIdentifierOrKeyword; if (isLineTerminator(m_current)) { shiftLineTerminator(); m_atLineStart = true; m_terminator = true; if (lastTokenWasRestrKeyword()) { token = ';'; goto doneSemicolon; } goto start; } goto returnError; } m_atLineStart = false; goto returnToken; startString: { int stringQuoteCharacter = m_current; shift1(); const UChar* stringStart = currentCharacter(); while (m_current != stringQuoteCharacter) { // Fast check for characters that require special handling. // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently // as possible, and lets through all common ASCII characters. if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { m_buffer16.append(stringStart, currentCharacter() - stringStart); goto inString; } shift1(); } lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart); shift1(); m_atLineStart = false; m_delimited = false; token = STRING; goto returnToken; inString: while (m_current != stringQuoteCharacter) { if (m_current == '\\') goto inStringEscapeSequence; if (UNLIKELY(isLineTerminator(m_current))) goto returnError; if (UNLIKELY(m_current == -1)) goto returnError; record16(m_current); shift1(); } goto doneString; inStringEscapeSequence: shift1(); if (m_current == 'x') { shift1(); if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) { record16(convertHex(m_current, m_next1)); shift2(); goto inString; } record16('x'); if (m_current == stringQuoteCharacter) goto doneString; goto inString; } if (m_current == 'u') { shift1(); if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) { record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); shift4(); goto inString; } if (m_current == stringQuoteCharacter) { record16('u'); goto doneString; } goto returnError; } if (isASCIIOctalDigit(m_current)) { if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) { record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0'); shift3(); goto inString; } if (isASCIIOctalDigit(m_next1)) { record16((m_current - '0') * 8 + m_next1 - '0'); shift2(); goto inString; } record16(m_current - '0'); shift1(); goto inString; } if (isLineTerminator(m_current)) { shiftLineTerminator(); goto inString; } record16(singleEscape(m_current)); shift1(); goto inString; } startIdentifierWithBackslash: shift1(); if (UNLIKELY(m_current != 'u')) goto returnError; shift1(); if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) goto returnError; token = convertUnicode(m_current, m_next1, m_next2, m_next3); if (UNLIKELY(!isIdentStart(token))) goto returnError; goto inIdentifierAfterCharacterCheck; startIdentifierOrKeyword: { const UChar* identifierStart = currentCharacter(); shift1(); while (isIdentPart(m_current)) shift1(); if (LIKELY(m_current != '\\')) { lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart); goto doneIdentifierOrKeyword; } m_buffer16.append(identifierStart, currentCharacter() - identifierStart); } do { shift1(); if (UNLIKELY(m_current != 'u')) goto returnError; shift1(); if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) goto returnError; token = convertUnicode(m_current, m_next1, m_next2, m_next3); if (UNLIKELY(!isIdentPart(token))) goto returnError; inIdentifierAfterCharacterCheck: record16(token); shift4(); while (isIdentPart(m_current)) { record16(m_current); shift1(); } } while (UNLIKELY(m_current == '\\')); goto doneIdentifier; inSingleLineComment: while (!isLineTerminator(m_current)) { if (UNLIKELY(m_current == -1)) return 0; shift1(); } shiftLineTerminator(); m_atLineStart = true; m_terminator = true; if (lastTokenWasRestrKeyword()) goto doneSemicolon; goto start; inMultiLineComment: shift2(); while (m_current != '*' || m_next1 != '/') { if (isLineTerminator(m_current)) shiftLineTerminator(); else { shift1(); if (UNLIKELY(m_current == -1)) goto returnError; } } shift2(); m_atLineStart = false; goto start; startNumberWithZeroDigit: shift1(); if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) { shift1(); goto inHex; } if (m_current == '.') { record8('0'); record8('.'); shift1(); goto inNumberAfterDecimalPoint; } if ((m_current | 0x20) == 'e') { record8('0'); record8('e'); shift1(); goto inExponentIndicator; } if (isASCIIOctalDigit(m_current)) goto inOctal; if (isASCIIDigit(m_current)) goto startNumber; lvalp->doubleValue = 0; goto doneNumeric; inNumberAfterDecimalPoint: while (isASCIIDigit(m_current)) { record8(m_current); shift1(); } if ((m_current | 0x20) == 'e') { record8('e'); shift1(); goto inExponentIndicator; } goto doneNumber; inExponentIndicator: if (m_current == '+' || m_current == '-') { record8(m_current); shift1(); } if (!isASCIIDigit(m_current)) goto returnError; do { record8(m_current); shift1(); } while (isASCIIDigit(m_current)); goto doneNumber; inOctal: { do { record8(m_current); shift1(); } while (isASCIIOctalDigit(m_current)); if (isASCIIDigit(m_current)) goto startNumber; double dval = 0; const char* end = m_buffer8.end(); for (const char* p = m_buffer8.data(); p < end; ++p) { dval *= 8; dval += *p - '0'; } if (dval >= mantissaOverflowLowerBound) dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8); m_buffer8.resize(0); lvalp->doubleValue = dval; goto doneNumeric; } inHex: { do { record8(m_current); shift1(); } while (isASCIIHexDigit(m_current)); double dval = 0; const char* end = m_buffer8.end(); for (const char* p = m_buffer8.data(); p < end; ++p) { dval *= 16; dval += toASCIIHexValue(*p); } if (dval >= mantissaOverflowLowerBound) dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16); m_buffer8.resize(0); lvalp->doubleValue = dval; goto doneNumeric; } startNumber: record8(m_current); shift1(); while (isASCIIDigit(m_current)) { record8(m_current); shift1(); } if (m_current == '.') { record8('.'); shift1(); goto inNumberAfterDecimalPoint; } if ((m_current | 0x20) == 'e') { record8('e'); shift1(); goto inExponentIndicator; } // Fall through into doneNumber. doneNumber: // Null-terminate string for strtod. m_buffer8.append('\0'); lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0); m_buffer8.resize(0); // Fall through into doneNumeric. doneNumeric: // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. if (UNLIKELY(isIdentStart(m_current))) goto returnError; m_atLineStart = false; m_delimited = false; token = NUMBER; goto returnToken; doneSemicolon: token = ';'; m_delimited = true; goto returnToken; doneIdentifier: m_atLineStart = false; m_delimited = false; lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); m_buffer16.resize(0); token = IDENT; goto returnToken; doneIdentifierOrKeyword: { m_atLineStart = false; m_delimited = false; m_buffer16.resize(0); const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident); token = entry ? entry->lexerValue() : IDENT; goto returnToken; } doneString: // Atomize constant strings in case they're later used in property lookup. shift1(); m_atLineStart = false; m_delimited = false; lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); m_buffer16.resize(0); token = STRING; // Fall through into returnToken. returnToken: { int lineNumber = m_lineNumber; llocp->first_line = lineNumber; llocp->last_line = lineNumber; llocp->first_column = startOffset; llocp->last_column = currentOffset(); m_lastToken = token; return token; } returnError: m_error = true; return -1; }