/** * Check if the character falls into IdentifierPart group (ECMA-262 v5, 7.6) * * @return true / false */ static bool lexer_is_char_can_be_identifier_part (ecma_char_t c) /**< a character */ { return (lexer_is_char_can_be_identifier_start (c) || lit_char_is_unicode_combining_mark (c) || lit_char_is_unicode_digit (c) || lit_char_is_unicode_connector_punctuation (c)); } /* lexer_is_char_can_be_identifier_part */
/** * Parse numeric literal (ECMA-262, v5, 7.8.3) * * @return token of TOK_SMALL_INT or TOK_NUMBER types */ static token lexer_parse_number (void) { ecma_char_t c = LA (0); bool is_hex = false; bool is_fp = false; ecma_number_t fp_res = .0; size_t tok_length = 0, i; token known_token; JERRY_ASSERT (lit_char_is_decimal_digit (c) || c == LIT_CHAR_DOT); if (c == LIT_CHAR_0) { if (LA (1) == LIT_CHAR_LOWERCASE_X || LA (1) == LIT_CHAR_UPPERCASE_X) { is_hex = true; } } else if (c == LIT_CHAR_DOT) { JERRY_ASSERT (lit_char_is_decimal_digit (LA (1))); is_fp = true; } if (is_hex) { // Eat up '0x' consume_char (); consume_char (); new_token (); c = LA (0); if (!lit_char_is_hex_digit (c)) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Invalid HexIntegerLiteral", lit_utf8_iterator_get_pos (&src_iter)); } do { consume_char (); c = LA (0); } while (lit_char_is_hex_digit (c)); if (lexer_is_char_can_be_identifier_start (c)) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Identifier just after integer literal", lit_utf8_iterator_get_pos (&src_iter)); } tok_length = (size_t) (TOK_SIZE ()); const lit_utf8_byte_t *fp_buf_p = TOK_START (); /* token is constructed at end of function */ for (i = 0; i < tok_length; i++) { fp_res = fp_res * 16 + (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i]); } } else { bool is_exp = false; new_token (); // Eat up '.' if (is_fp) { consume_char (); } while (true) { c = LA (0); if (c == LIT_CHAR_DOT) { if (is_fp) { /* token is constructed at end of function */ break; } else { is_fp = true; consume_char (); continue; } } else if (c == LIT_CHAR_LOWERCASE_E || c == LIT_CHAR_UPPERCASE_E) { if (is_exp) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Numeric literal shall not contain more than exponential marker ('e' or 'E')", lit_utf8_iterator_get_pos (&src_iter)); } else { is_exp = true; consume_char (); if (LA (0) == LIT_CHAR_MINUS || LA (0) == LIT_CHAR_PLUS) { consume_char (); } continue; } } else if (!lit_char_is_decimal_digit (c)) { if (lexer_is_char_can_be_identifier_start (c)) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Numeric literal shall not contain non-numeric characters", lit_utf8_iterator_get_pos (&src_iter)); } /* token is constructed at end of function */ break; } consume_char (); } tok_length = (size_t) (TOK_SIZE ()); if (is_fp || is_exp) { ecma_number_t res = ecma_utf8_string_to_number (TOK_START (), (jerry_api_size_t) tok_length); JERRY_ASSERT (!ecma_number_is_nan (res)); known_token = convert_seen_num_to_token (res); is_token_parse_in_progress = NULL; return known_token; } else if (*TOK_START () == LIT_CHAR_0 && tok_length != 1) { /* Octal integer literals */ if (strict_mode) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Octal integer literals are not allowed in strict mode", token_start_pos); } else { /* token is constructed at end of function */ const lit_utf8_byte_t *fp_buf_p = TOK_START (); for (i = 0; i < tok_length; i++) { fp_res = fp_res * 8 + (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i]); } } } else { const lit_utf8_byte_t *fp_buf_p = TOK_START (); /* token is constructed at end of function */ ecma_number_t mult = 1.0f; for (i = tok_length; i > 0; i--, mult *= 10) { fp_res += (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i - 1]) * mult; } } } if (fp_res >= 0 && fp_res <= 255 && (uint8_t) fp_res == fp_res) { known_token = create_token (TOK_SMALL_INT, (uint8_t) fp_res); is_token_parse_in_progress = NULL; return known_token; } else { known_token = convert_seen_num_to_token (fp_res); is_token_parse_in_progress = NULL; return known_token; } } /* lexer_parse_number */
/** * Parse Identifier (ECMA-262 v5, 7.6) or ReservedWord (7.6.1; 7.8.1; 7.8.2). * * @return TOK_NAME - for Identifier, * TOK_KEYWORD - for Keyword or FutureReservedWord, * TOK_NULL - for NullLiteral, * TOK_BOOL - for BooleanLiteral */ static token lexer_parse_identifier_or_keyword (void) { ecma_char_t c = LA (0); JERRY_ASSERT (lexer_is_char_can_be_identifier_start (c)); new_token (); bool is_correct_identifier_name = true; bool is_escape_sequence_occured = false; bool is_all_chars_were_lowercase_ascii = true; while (true) { c = LA (0); if (c == LIT_CHAR_BACKSLASH) { consume_char (); is_escape_sequence_occured = true; bool is_unicode_escape_sequence = (LA (0) == LIT_CHAR_LOWERCASE_U); consume_char (); if (is_unicode_escape_sequence) { /* UnicodeEscapeSequence */ if (!lexer_convert_escape_sequence_digits_to_char (&src_iter, true, &c)) { is_correct_identifier_name = false; break; } else { /* c now contains character, encoded in the UnicodeEscapeSequence */ // Check character, converted from UnicodeEscapeSequence if (!lexer_is_char_can_be_identifier_part (c)) { is_correct_identifier_name = false; break; } } } else { is_correct_identifier_name = false; break; } } else if (!lexer_is_char_can_be_identifier_part (c)) { break; } else { if (!(c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)) { is_all_chars_were_lowercase_ascii = false; } consume_char (); } } if (!is_correct_identifier_name) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal identifier name", lit_utf8_iterator_get_pos (&src_iter)); } const lit_utf8_size_t charset_size = TOK_SIZE (); token ret = empty_token; if (!is_escape_sequence_occured && is_all_chars_were_lowercase_ascii) { /* Keyword or FutureReservedWord (TOK_KEYWORD), or boolean literal (TOK_BOOL), or null literal (TOK_NULL) */ ret = lexer_parse_reserved_word (TOK_START (), charset_size); } if (is_empty (ret)) { /* Identifier (TOK_NAME) */ if (!is_escape_sequence_occured) { ret = lexer_create_token_for_charset (TOK_NAME, TOK_START (), charset_size); } else { ret = lexer_create_token_for_charset_transform_escape_sequences (TOK_NAME, TOK_START (), charset_size); } } is_token_parse_in_progress = false; return ret; } /* lexer_parse_identifier_or_keyword */
/** * Parse and construct lexer token * * Note: * Currently, lexer token doesn't fully correspond to Token, defined in ECMA-262, v5, 7.5. * For example, there is no new-line token type in the token definition of ECMA-262 v5. * * @return constructed token */ static token lexer_parse_token (void) { ecma_char_t c = LA (0); if (lit_char_is_white_space (c)) { while (lit_char_is_white_space (c)) { consume_char (); c = LA (0); } } if (lit_char_is_line_terminator (c)) { while (lit_char_is_line_terminator (c)) { consume_char (); c = LA (0); } return create_token (TOK_NEWLINE, 0); } JERRY_ASSERT (is_token_parse_in_progress == false); /* ECMA-262 v5, 7.6, Identifier */ if (lexer_is_char_can_be_identifier_start (c)) { return lexer_parse_identifier_or_keyword (); } /* ECMA-262 v5, 7.8.3, Numeric literal */ if (lit_char_is_decimal_digit (c) || (c == LIT_CHAR_DOT && lit_char_is_decimal_digit (LA (1)))) { return lexer_parse_number (); } if (c == LIT_CHAR_LF) { consume_char (); return create_token (TOK_NEWLINE, 0); } if (c == LIT_CHAR_NULL) { return create_token (TOK_EOF, 0); } if (c == LIT_CHAR_SINGLE_QUOTE || c == LIT_CHAR_DOUBLE_QUOTE) { return lexer_parse_string (); } /* ECMA-262 v5, 7.4, SingleLineComment or MultiLineComment */ if (c == LIT_CHAR_SLASH && (LA (1) == LIT_CHAR_SLASH || LA (1) == LIT_CHAR_ASTERISK)) { if (lexer_parse_comment ()) { return create_token (TOK_NEWLINE, 0); } else { return lexer_parse_token (); } } if (c == LIT_CHAR_SLASH && !(prev_non_lf_token.type == TOK_NAME || prev_non_lf_token.type == TOK_NULL || prev_non_lf_token.type == TOK_BOOL || prev_non_lf_token.type == TOK_CLOSE_BRACE || prev_non_lf_token.type == TOK_CLOSE_SQUARE || prev_non_lf_token.type == TOK_CLOSE_PAREN || prev_non_lf_token.type == TOK_SMALL_INT || prev_non_lf_token.type == TOK_NUMBER || prev_non_lf_token.type == TOK_STRING || prev_non_lf_token.type == TOK_REGEXP)) { return lexer_parse_regexp (); } /* ECMA-262 v5, 7.7, Punctuator */ switch (c) { case LIT_CHAR_LEFT_BRACE: { RETURN_PUNC (TOK_OPEN_BRACE); break; } case LIT_CHAR_RIGHT_BRACE: { RETURN_PUNC (TOK_CLOSE_BRACE); break; } case LIT_CHAR_LEFT_PAREN: { RETURN_PUNC (TOK_OPEN_PAREN); break; } case LIT_CHAR_RIGHT_PAREN: { RETURN_PUNC (TOK_CLOSE_PAREN); break; } case LIT_CHAR_LEFT_SQUARE: { RETURN_PUNC (TOK_OPEN_SQUARE); break; } case LIT_CHAR_RIGHT_SQUARE: { RETURN_PUNC (TOK_CLOSE_SQUARE); break; } case LIT_CHAR_DOT: { RETURN_PUNC (TOK_DOT); break; } case LIT_CHAR_SEMICOLON: { RETURN_PUNC (TOK_SEMICOLON); break; } case LIT_CHAR_COMMA: { RETURN_PUNC (TOK_COMMA); break; } case LIT_CHAR_TILDE: { RETURN_PUNC (TOK_COMPL); break; } case LIT_CHAR_COLON: { RETURN_PUNC (TOK_COLON); break; } case LIT_CHAR_QUESTION: { RETURN_PUNC (TOK_QUERY); break; } case LIT_CHAR_ASTERISK: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_MULT_EQ, TOK_MULT); break; } case LIT_CHAR_SLASH: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_DIV_EQ, TOK_DIV); break; } case LIT_CHAR_CIRCUMFLEX: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_XOR_EQ, TOK_XOR); break; } case LIT_CHAR_PERCENT: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_MOD_EQ, TOK_MOD); break; } case LIT_CHAR_PLUS: { IF_LA_IS_OR (LIT_CHAR_PLUS, TOK_DOUBLE_PLUS, LIT_CHAR_EQUALS, TOK_PLUS_EQ, TOK_PLUS); break; } case LIT_CHAR_MINUS: { IF_LA_IS_OR (LIT_CHAR_MINUS, TOK_DOUBLE_MINUS, LIT_CHAR_EQUALS, TOK_MINUS_EQ, TOK_MINUS); break; } case LIT_CHAR_AMPERSAND: { IF_LA_IS_OR (LIT_CHAR_AMPERSAND, TOK_DOUBLE_AND, LIT_CHAR_EQUALS, TOK_AND_EQ, TOK_AND); break; } case LIT_CHAR_VLINE: { IF_LA_IS_OR (LIT_CHAR_VLINE, TOK_DOUBLE_OR, LIT_CHAR_EQUALS, TOK_OR_EQ, TOK_OR); break; } case LIT_CHAR_LESS_THAN: { switch (LA (1)) { case LIT_CHAR_LESS_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_LSHIFT_EQ, TOK_LSHIFT, 2); break; case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_LESS_EQ, 2); break; default: RETURN_PUNC (TOK_LESS); } break; } case LIT_CHAR_GREATER_THAN: { switch (LA (1)) { case LIT_CHAR_GREATER_THAN: { switch (LA (2)) { case LIT_CHAR_GREATER_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_RSHIFT_EX_EQ, TOK_RSHIFT_EX, 3); break; case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_RSHIFT_EQ, 3); break; default: RETURN_PUNC_EX (TOK_RSHIFT, 2); } break; } case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_GREATER_EQ, 2); break; default: RETURN_PUNC (TOK_GREATER); } break; } case LIT_CHAR_EQUALS: { if (LA (1) == LIT_CHAR_EQUALS) { IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_TRIPLE_EQ, TOK_DOUBLE_EQ, 2); } else { RETURN_PUNC (TOK_EQ); } break; } case LIT_CHAR_EXCLAMATION: { if (LA (1) == LIT_CHAR_EQUALS) { IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_NOT_DOUBLE_EQ, TOK_NOT_EQ, 2); } else { RETURN_PUNC (TOK_NOT); } break; } } PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal character", lit_utf8_iterator_get_pos (&src_iter)); } /* lexer_parse_token */
/** * Parse and construct lexer token * * Note: * Currently, lexer token doesn't fully correspond to Token, defined in ECMA-262, v5, 7.5. * For example, there is no new-line token type in the token definition of ECMA-262 v5. * * Note: * For Lexer alone, it is hard to find out a token is whether a regexp or a division. * Parser must set maybe_regexp to true if a regexp is expected. * Otherwise, a division is expected. * * @return constructed token */ static token lexer_parse_token (bool maybe_regexp, /**< read '/' as regexp? */ bool *out_is_preceed_by_new_lines_p, /**< out: is constructed token preceded by newlines? */ bool is_strict) /**< flag, indicating whether current code is in strict mode code */ { JERRY_ASSERT (is_token_parse_in_progress == false); *out_is_preceed_by_new_lines_p = lexer_skip_whitespace_and_comments (); ecma_char_t c = LA (0); /* ECMA-262 v5, 7.6, Identifier */ if (lexer_is_char_can_be_identifier_start (c)) { return lexer_parse_identifier_or_keyword (is_strict); } /* ECMA-262 v5, 7.8.3, Numeric literal */ if (lit_char_is_decimal_digit (c) || (c == LIT_CHAR_DOT && lit_char_is_decimal_digit (LA (1)))) { return lexer_parse_number (is_strict); } if (c == LIT_CHAR_NULL) { return create_token (TOK_EOF, 0); } if (c == LIT_CHAR_SINGLE_QUOTE || c == LIT_CHAR_DOUBLE_QUOTE) { return lexer_parse_string (); } if (c == LIT_CHAR_SLASH && maybe_regexp) { return lexer_parse_regexp (); } /* ECMA-262 v5, 7.7, Punctuator */ switch (c) { case LIT_CHAR_LEFT_BRACE: { RETURN_PUNC (TOK_OPEN_BRACE); break; } case LIT_CHAR_RIGHT_BRACE: { RETURN_PUNC (TOK_CLOSE_BRACE); break; } case LIT_CHAR_LEFT_PAREN: { RETURN_PUNC (TOK_OPEN_PAREN); break; } case LIT_CHAR_RIGHT_PAREN: { RETURN_PUNC (TOK_CLOSE_PAREN); break; } case LIT_CHAR_LEFT_SQUARE: { RETURN_PUNC (TOK_OPEN_SQUARE); break; } case LIT_CHAR_RIGHT_SQUARE: { RETURN_PUNC (TOK_CLOSE_SQUARE); break; } case LIT_CHAR_DOT: { RETURN_PUNC (TOK_DOT); break; } case LIT_CHAR_SEMICOLON: { RETURN_PUNC (TOK_SEMICOLON); break; } case LIT_CHAR_COMMA: { RETURN_PUNC (TOK_COMMA); break; } case LIT_CHAR_TILDE: { RETURN_PUNC (TOK_COMPL); break; } case LIT_CHAR_COLON: { RETURN_PUNC (TOK_COLON); break; } case LIT_CHAR_QUESTION: { RETURN_PUNC (TOK_QUERY); break; } case LIT_CHAR_ASTERISK: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_MULT_EQ, TOK_MULT); break; } case LIT_CHAR_SLASH: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_DIV_EQ, TOK_DIV); break; } case LIT_CHAR_CIRCUMFLEX: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_XOR_EQ, TOK_XOR); break; } case LIT_CHAR_PERCENT: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_MOD_EQ, TOK_MOD); break; } case LIT_CHAR_PLUS: { IF_LA_IS_OR (LIT_CHAR_PLUS, TOK_DOUBLE_PLUS, LIT_CHAR_EQUALS, TOK_PLUS_EQ, TOK_PLUS); break; } case LIT_CHAR_MINUS: { IF_LA_IS_OR (LIT_CHAR_MINUS, TOK_DOUBLE_MINUS, LIT_CHAR_EQUALS, TOK_MINUS_EQ, TOK_MINUS); break; } case LIT_CHAR_AMPERSAND: { IF_LA_IS_OR (LIT_CHAR_AMPERSAND, TOK_DOUBLE_AND, LIT_CHAR_EQUALS, TOK_AND_EQ, TOK_AND); break; } case LIT_CHAR_VLINE: { IF_LA_IS_OR (LIT_CHAR_VLINE, TOK_DOUBLE_OR, LIT_CHAR_EQUALS, TOK_OR_EQ, TOK_OR); break; } case LIT_CHAR_LESS_THAN: { switch (LA (1)) { case LIT_CHAR_LESS_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_LSHIFT_EQ, TOK_LSHIFT, 2); break; case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_LESS_EQ, 2); break; default: RETURN_PUNC (TOK_LESS); } break; } case LIT_CHAR_GREATER_THAN: { switch (LA (1)) { case LIT_CHAR_GREATER_THAN: { switch (LA (2)) { case LIT_CHAR_GREATER_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_RSHIFT_EX_EQ, TOK_RSHIFT_EX, 3); break; case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_RSHIFT_EQ, 3); break; default: RETURN_PUNC_EX (TOK_RSHIFT, 2); } break; } case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_GREATER_EQ, 2); break; default: RETURN_PUNC (TOK_GREATER); } break; } case LIT_CHAR_EQUALS: { if (LA (1) == LIT_CHAR_EQUALS) { IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_TRIPLE_EQ, TOK_DOUBLE_EQ, 2); } else { RETURN_PUNC (TOK_EQ); } break; } case LIT_CHAR_EXCLAMATION: { if (LA (1) == LIT_CHAR_EQUALS) { IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_NOT_DOUBLE_EQ, TOK_NOT_EQ, 2); } else { RETURN_PUNC (TOK_NOT); } break; } } PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal character", lit_utf8_iterator_get_pos (&src_iter)); } /* lexer_parse_token */