static void new_token (void) { token_start_pos = lit_utf8_iterator_get_pos (&src_iter); JERRY_ASSERT (!token_start_pos.is_non_bmp_middle); is_token_parse_in_progress = true; }
/** * Construct next token from current source code position and increment the position * * @return the constructed token */ token lexer_next_token (bool maybe_regexp, /**< read '/' as regexp? */ bool is_strict) /**< strict mode is on (true) / off (false) */ { lit_utf8_iterator_pos_t src_pos = lit_utf8_iterator_get_pos (&src_iter); if (src_pos.offset == 0 && !src_pos.is_non_bmp_middle) { dump_current_line (); } if (!is_empty (saved_token)) { sent_token = saved_token; saved_token = empty_token; } else { /** * FIXME: * The way to raise syntax errors for unexpected EOF * should be reworked so that EOF would be checked by * caller of the routine, and the following condition * would be checked as assertion in the routine. */ if (lexer_get_token_type (prev_token) == TOK_EOF && lexer_get_token_type (sent_token) == TOK_EOF) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unexpected EOF", lit_utf8_iterator_get_pos (&src_iter)); } prev_token = sent_token; jsp_token_flag_t flags = JSP_TOKEN_FLAG__NO_FLAGS; bool is_preceded_by_new_lines; sent_token = lexer_parse_token (maybe_regexp, &is_preceded_by_new_lines, is_strict); if (is_preceded_by_new_lines) { flags = (jsp_token_flag_t) (flags | JSP_TOKEN_FLAG_PRECEDED_BY_NEWLINES); } sent_token.flags = flags; } return sent_token; } /* lexer_next_token */
token lexer_next_token (void) { lit_utf8_iterator_pos_t src_pos = lit_utf8_iterator_get_pos (&src_iter); if (src_pos.offset == 0 && !src_pos.is_non_bmp_middle) { dump_current_line (); } if (!is_empty (saved_token)) { sent_token = saved_token; saved_token = empty_token; goto end; } /** * FIXME: * The way to raise syntax errors for unexpected EOF * should be reworked so that EOF would be checked by * caller of the routine, and the following condition * would be checked as assertion in the routine. */ if (prev_token.type == TOK_EOF && sent_token.type == TOK_EOF) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unexpected EOF", lit_utf8_iterator_get_pos (&src_iter)); } prev_token = sent_token; sent_token = lexer_parse_token (); if (sent_token.type == TOK_NEWLINE) { dump_current_line (); } else { prev_non_lf_token = sent_token; } end: return sent_token; }
/** * Convert locus to line and column */ void lexer_locus_to_line_and_column (lit_utf8_iterator_pos_t locus, /**< iterator position in the source script */ size_t *line, /**< @out: line number */ size_t *column) /**< @out: column number */ { JERRY_ASSERT ((lit_utf8_size_t) (locus.offset + locus.is_non_bmp_middle) <= buffer_size); lit_utf8_iterator_t iter = lit_utf8_iterator_create (buffer_start, (lit_utf8_size_t) buffer_size); lit_utf8_iterator_pos_t iter_pos = lit_utf8_iterator_get_pos (&iter); size_t l = 0, c = 0; while (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_pos_cmp (iter_pos, locus) < 0) { ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter); iter_pos = lit_utf8_iterator_get_pos (&iter); if (lit_char_is_line_terminator (code_unit)) { if (code_unit == LIT_CHAR_CR && !lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF) { lit_utf8_iterator_incr (&iter); } c = 0; l++; continue; } c++; } if (line) { *line = l; } if (column) { *column = c; } } /* lexer_locus_to_line_and_column */
static locus current_locus (void) { if (is_token_parse_in_progress) { return token_start_pos; } else { return lit_utf8_iterator_get_pos (&src_iter); } }
/** * Parse a comment * * @return true if newline was met during parsing * false - otherwise */ static bool lexer_parse_comment (void) { ecma_char_t c = LA (0); bool multiline; bool was_newlines = false; JERRY_ASSERT (LA (0) == LIT_CHAR_SLASH); JERRY_ASSERT (LA (1) == LIT_CHAR_SLASH || LA (1) == LIT_CHAR_ASTERISK); multiline = (LA (1) == LIT_CHAR_ASTERISK); consume_char (); consume_char (); while (!lit_utf8_iterator_is_eos (&src_iter)) { c = LA (0); if (!multiline) { if (lit_char_is_line_terminator (c)) { return true; } } else { if (c == LIT_CHAR_ASTERISK && LA (1) == LIT_CHAR_SLASH) { consume_char (); consume_char (); return was_newlines; } else if (lit_char_is_line_terminator (c)) { was_newlines = true; } } consume_char (); } if (multiline) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unclosed multiline comment", lit_utf8_iterator_get_pos (&src_iter)); } return false; } /* lexer_parse_comment */
/** * The String.prototype object's 'indexOf' routine * * See also: * ECMA-262 v5, 15.5.4.7 * * @return completion value * Returned value must be freed with ecma_free_completion_value. */ static ecma_completion_value_t ecma_builtin_string_prototype_object_index_of (ecma_value_t this_arg, /**< this argument */ ecma_value_t arg1, /**< routine's first argument */ ecma_value_t arg2) /**< routine's second argument */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); /* 1 */ ECMA_TRY_CATCH (check_coercible_val, ecma_op_check_object_coercible (this_arg), ret_value); /* 2 */ ECMA_TRY_CATCH (to_str_val, ecma_op_to_string (this_arg), ret_value); /* 3 */ ECMA_TRY_CATCH (search_str_val, ecma_op_to_string (arg1), ret_value); /* 4 */ ECMA_OP_TO_NUMBER_TRY_CATCH (pos_num, arg2, ret_value); /* 5 */ ecma_string_t *original_str_p = ecma_get_string_from_value (to_str_val); const ecma_length_t original_len = ecma_string_get_length (original_str_p); const lit_utf8_size_t original_size = ecma_string_get_size (original_str_p); /* 4b, 6 */ ecma_length_t start = ecma_builtin_helper_string_index_normalize (pos_num, original_len); /* 7 */ ecma_string_t *search_str_p = ecma_get_string_from_value (search_str_val); const ecma_length_t search_len = ecma_string_get_length (search_str_p); const lit_utf8_size_t search_size = ecma_string_get_size (search_str_p); ecma_number_t *ret_num_p = ecma_alloc_number (); *ret_num_p = ecma_int32_to_number (-1); /* 8 */ if (search_len <= original_len) { if (!search_len) { *ret_num_p = ecma_uint32_to_number (0); } else { /* create utf8 string from original string and advance to start position */ MEM_DEFINE_LOCAL_ARRAY (original_str_utf8_p, original_size, lit_utf8_byte_t); ecma_string_to_utf8_string (original_str_p, original_str_utf8_p, (ssize_t) (original_size)); lit_utf8_iterator_t original_it = lit_utf8_iterator_create (original_str_utf8_p, original_size); ecma_length_t index = start; lit_utf8_iterator_advance (&original_it, index); /* create utf8 string from search string */ MEM_DEFINE_LOCAL_ARRAY (search_str_utf8_p, search_size, lit_utf8_byte_t); ecma_string_to_utf8_string (search_str_p, search_str_utf8_p, (ssize_t) (search_size)); lit_utf8_iterator_t search_it = lit_utf8_iterator_create (search_str_utf8_p, search_size); /* iterate original string and try to match at each position */ bool found = false; while (!found && index <= original_len - search_len) { ecma_length_t match_len = 0; lit_utf8_iterator_pos_t stored_original_pos = lit_utf8_iterator_get_pos (&original_it); while (match_len < search_len && lit_utf8_iterator_read_next (&original_it) == lit_utf8_iterator_read_next (&search_it)) { match_len++; } /* Check for match */ if (match_len == search_len) { *ret_num_p = ecma_uint32_to_number (index); found = true; } else { /* reset iterators */ lit_utf8_iterator_seek_bos (&search_it); lit_utf8_iterator_seek (&original_it, stored_original_pos); lit_utf8_iterator_incr (&original_it); } index++; } MEM_FINALIZE_LOCAL_ARRAY (search_str_utf8_p); MEM_FINALIZE_LOCAL_ARRAY (original_str_utf8_p); } } ecma_value_t new_value = ecma_make_number_value (ret_num_p); ret_value = ecma_make_normal_completion_value (new_value); ECMA_OP_TO_NUMBER_FINALIZE (pos_num); ECMA_FINALIZE (search_str_val); ECMA_FINALIZE (to_str_val); ECMA_FINALIZE (check_coercible_val); return ret_value; } /* ecma_builtin_string_prototype_object_index_of */
/** * Parse numeric literal (ECMA-262, v5, 7.8.3) * * @return token of TOK_SMALL_INT or TOK_NUMBER types */ static token lexer_parse_number (void) { ecma_char_t c = LA (0); bool is_hex = false; bool is_fp = false; ecma_number_t fp_res = .0; size_t tok_length = 0, i; token known_token; JERRY_ASSERT (lit_char_is_decimal_digit (c) || c == LIT_CHAR_DOT); if (c == LIT_CHAR_0) { if (LA (1) == LIT_CHAR_LOWERCASE_X || LA (1) == LIT_CHAR_UPPERCASE_X) { is_hex = true; } } else if (c == LIT_CHAR_DOT) { JERRY_ASSERT (lit_char_is_decimal_digit (LA (1))); is_fp = true; } if (is_hex) { // Eat up '0x' consume_char (); consume_char (); new_token (); c = LA (0); if (!lit_char_is_hex_digit (c)) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Invalid HexIntegerLiteral", lit_utf8_iterator_get_pos (&src_iter)); } do { consume_char (); c = LA (0); } while (lit_char_is_hex_digit (c)); if (lexer_is_char_can_be_identifier_start (c)) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Identifier just after integer literal", lit_utf8_iterator_get_pos (&src_iter)); } tok_length = (size_t) (TOK_SIZE ()); const lit_utf8_byte_t *fp_buf_p = TOK_START (); /* token is constructed at end of function */ for (i = 0; i < tok_length; i++) { fp_res = fp_res * 16 + (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i]); } } else { bool is_exp = false; new_token (); // Eat up '.' if (is_fp) { consume_char (); } while (true) { c = LA (0); if (c == LIT_CHAR_DOT) { if (is_fp) { /* token is constructed at end of function */ break; } else { is_fp = true; consume_char (); continue; } } else if (c == LIT_CHAR_LOWERCASE_E || c == LIT_CHAR_UPPERCASE_E) { if (is_exp) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Numeric literal shall not contain more than exponential marker ('e' or 'E')", lit_utf8_iterator_get_pos (&src_iter)); } else { is_exp = true; consume_char (); if (LA (0) == LIT_CHAR_MINUS || LA (0) == LIT_CHAR_PLUS) { consume_char (); } continue; } } else if (!lit_char_is_decimal_digit (c)) { if (lexer_is_char_can_be_identifier_start (c)) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Numeric literal shall not contain non-numeric characters", lit_utf8_iterator_get_pos (&src_iter)); } /* token is constructed at end of function */ break; } consume_char (); } tok_length = (size_t) (TOK_SIZE ()); if (is_fp || is_exp) { ecma_number_t res = ecma_utf8_string_to_number (TOK_START (), (jerry_api_size_t) tok_length); JERRY_ASSERT (!ecma_number_is_nan (res)); known_token = convert_seen_num_to_token (res); is_token_parse_in_progress = NULL; return known_token; } else if (*TOK_START () == LIT_CHAR_0 && tok_length != 1) { /* Octal integer literals */ if (strict_mode) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Octal integer literals are not allowed in strict mode", token_start_pos); } else { /* token is constructed at end of function */ const lit_utf8_byte_t *fp_buf_p = TOK_START (); for (i = 0; i < tok_length; i++) { fp_res = fp_res * 8 + (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i]); } } } else { const lit_utf8_byte_t *fp_buf_p = TOK_START (); /* token is constructed at end of function */ ecma_number_t mult = 1.0f; for (i = tok_length; i > 0; i--, mult *= 10) { fp_res += (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i - 1]) * mult; } } } if (fp_res >= 0 && fp_res <= 255 && (uint8_t) fp_res == fp_res) { known_token = create_token (TOK_SMALL_INT, (uint8_t) fp_res); is_token_parse_in_progress = NULL; return known_token; } else { known_token = convert_seen_num_to_token (fp_res); is_token_parse_in_progress = NULL; return known_token; } } /* lexer_parse_number */
/** * Parse Identifier (ECMA-262 v5, 7.6) or ReservedWord (7.6.1; 7.8.1; 7.8.2). * * @return TOK_NAME - for Identifier, * TOK_KEYWORD - for Keyword or FutureReservedWord, * TOK_NULL - for NullLiteral, * TOK_BOOL - for BooleanLiteral */ static token lexer_parse_identifier_or_keyword (void) { ecma_char_t c = LA (0); JERRY_ASSERT (lexer_is_char_can_be_identifier_start (c)); new_token (); bool is_correct_identifier_name = true; bool is_escape_sequence_occured = false; bool is_all_chars_were_lowercase_ascii = true; while (true) { c = LA (0); if (c == LIT_CHAR_BACKSLASH) { consume_char (); is_escape_sequence_occured = true; bool is_unicode_escape_sequence = (LA (0) == LIT_CHAR_LOWERCASE_U); consume_char (); if (is_unicode_escape_sequence) { /* UnicodeEscapeSequence */ if (!lexer_convert_escape_sequence_digits_to_char (&src_iter, true, &c)) { is_correct_identifier_name = false; break; } else { /* c now contains character, encoded in the UnicodeEscapeSequence */ // Check character, converted from UnicodeEscapeSequence if (!lexer_is_char_can_be_identifier_part (c)) { is_correct_identifier_name = false; break; } } } else { is_correct_identifier_name = false; break; } } else if (!lexer_is_char_can_be_identifier_part (c)) { break; } else { if (!(c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)) { is_all_chars_were_lowercase_ascii = false; } consume_char (); } } if (!is_correct_identifier_name) { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal identifier name", lit_utf8_iterator_get_pos (&src_iter)); } const lit_utf8_size_t charset_size = TOK_SIZE (); token ret = empty_token; if (!is_escape_sequence_occured && is_all_chars_were_lowercase_ascii) { /* Keyword or FutureReservedWord (TOK_KEYWORD), or boolean literal (TOK_BOOL), or null literal (TOK_NULL) */ ret = lexer_parse_reserved_word (TOK_START (), charset_size); } if (is_empty (ret)) { /* Identifier (TOK_NAME) */ if (!is_escape_sequence_occured) { ret = lexer_create_token_for_charset (TOK_NAME, TOK_START (), charset_size); } else { ret = lexer_create_token_for_charset_transform_escape_sequences (TOK_NAME, TOK_START (), charset_size); } } is_token_parse_in_progress = false; return ret; } /* lexer_parse_identifier_or_keyword */
/** * Parse and construct lexer token * * Note: * Currently, lexer token doesn't fully correspond to Token, defined in ECMA-262, v5, 7.5. * For example, there is no new-line token type in the token definition of ECMA-262 v5. * * @return constructed token */ static token lexer_parse_token (void) { ecma_char_t c = LA (0); if (lit_char_is_white_space (c)) { while (lit_char_is_white_space (c)) { consume_char (); c = LA (0); } } if (lit_char_is_line_terminator (c)) { while (lit_char_is_line_terminator (c)) { consume_char (); c = LA (0); } return create_token (TOK_NEWLINE, 0); } JERRY_ASSERT (is_token_parse_in_progress == false); /* ECMA-262 v5, 7.6, Identifier */ if (lexer_is_char_can_be_identifier_start (c)) { return lexer_parse_identifier_or_keyword (); } /* ECMA-262 v5, 7.8.3, Numeric literal */ if (lit_char_is_decimal_digit (c) || (c == LIT_CHAR_DOT && lit_char_is_decimal_digit (LA (1)))) { return lexer_parse_number (); } if (c == LIT_CHAR_LF) { consume_char (); return create_token (TOK_NEWLINE, 0); } if (c == LIT_CHAR_NULL) { return create_token (TOK_EOF, 0); } if (c == LIT_CHAR_SINGLE_QUOTE || c == LIT_CHAR_DOUBLE_QUOTE) { return lexer_parse_string (); } /* ECMA-262 v5, 7.4, SingleLineComment or MultiLineComment */ if (c == LIT_CHAR_SLASH && (LA (1) == LIT_CHAR_SLASH || LA (1) == LIT_CHAR_ASTERISK)) { if (lexer_parse_comment ()) { return create_token (TOK_NEWLINE, 0); } else { return lexer_parse_token (); } } if (c == LIT_CHAR_SLASH && !(prev_non_lf_token.type == TOK_NAME || prev_non_lf_token.type == TOK_NULL || prev_non_lf_token.type == TOK_BOOL || prev_non_lf_token.type == TOK_CLOSE_BRACE || prev_non_lf_token.type == TOK_CLOSE_SQUARE || prev_non_lf_token.type == TOK_CLOSE_PAREN || prev_non_lf_token.type == TOK_SMALL_INT || prev_non_lf_token.type == TOK_NUMBER || prev_non_lf_token.type == TOK_STRING || prev_non_lf_token.type == TOK_REGEXP)) { return lexer_parse_regexp (); } /* ECMA-262 v5, 7.7, Punctuator */ switch (c) { case LIT_CHAR_LEFT_BRACE: { RETURN_PUNC (TOK_OPEN_BRACE); break; } case LIT_CHAR_RIGHT_BRACE: { RETURN_PUNC (TOK_CLOSE_BRACE); break; } case LIT_CHAR_LEFT_PAREN: { RETURN_PUNC (TOK_OPEN_PAREN); break; } case LIT_CHAR_RIGHT_PAREN: { RETURN_PUNC (TOK_CLOSE_PAREN); break; } case LIT_CHAR_LEFT_SQUARE: { RETURN_PUNC (TOK_OPEN_SQUARE); break; } case LIT_CHAR_RIGHT_SQUARE: { RETURN_PUNC (TOK_CLOSE_SQUARE); break; } case LIT_CHAR_DOT: { RETURN_PUNC (TOK_DOT); break; } case LIT_CHAR_SEMICOLON: { RETURN_PUNC (TOK_SEMICOLON); break; } case LIT_CHAR_COMMA: { RETURN_PUNC (TOK_COMMA); break; } case LIT_CHAR_TILDE: { RETURN_PUNC (TOK_COMPL); break; } case LIT_CHAR_COLON: { RETURN_PUNC (TOK_COLON); break; } case LIT_CHAR_QUESTION: { RETURN_PUNC (TOK_QUERY); break; } case LIT_CHAR_ASTERISK: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_MULT_EQ, TOK_MULT); break; } case LIT_CHAR_SLASH: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_DIV_EQ, TOK_DIV); break; } case LIT_CHAR_CIRCUMFLEX: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_XOR_EQ, TOK_XOR); break; } case LIT_CHAR_PERCENT: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_MOD_EQ, TOK_MOD); break; } case LIT_CHAR_PLUS: { IF_LA_IS_OR (LIT_CHAR_PLUS, TOK_DOUBLE_PLUS, LIT_CHAR_EQUALS, TOK_PLUS_EQ, TOK_PLUS); break; } case LIT_CHAR_MINUS: { IF_LA_IS_OR (LIT_CHAR_MINUS, TOK_DOUBLE_MINUS, LIT_CHAR_EQUALS, TOK_MINUS_EQ, TOK_MINUS); break; } case LIT_CHAR_AMPERSAND: { IF_LA_IS_OR (LIT_CHAR_AMPERSAND, TOK_DOUBLE_AND, LIT_CHAR_EQUALS, TOK_AND_EQ, TOK_AND); break; } case LIT_CHAR_VLINE: { IF_LA_IS_OR (LIT_CHAR_VLINE, TOK_DOUBLE_OR, LIT_CHAR_EQUALS, TOK_OR_EQ, TOK_OR); break; } case LIT_CHAR_LESS_THAN: { switch (LA (1)) { case LIT_CHAR_LESS_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_LSHIFT_EQ, TOK_LSHIFT, 2); break; case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_LESS_EQ, 2); break; default: RETURN_PUNC (TOK_LESS); } break; } case LIT_CHAR_GREATER_THAN: { switch (LA (1)) { case LIT_CHAR_GREATER_THAN: { switch (LA (2)) { case LIT_CHAR_GREATER_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_RSHIFT_EX_EQ, TOK_RSHIFT_EX, 3); break; case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_RSHIFT_EQ, 3); break; default: RETURN_PUNC_EX (TOK_RSHIFT, 2); } break; } case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_GREATER_EQ, 2); break; default: RETURN_PUNC (TOK_GREATER); } break; } case LIT_CHAR_EQUALS: { if (LA (1) == LIT_CHAR_EQUALS) { IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_TRIPLE_EQ, TOK_DOUBLE_EQ, 2); } else { RETURN_PUNC (TOK_EQ); } break; } case LIT_CHAR_EXCLAMATION: { if (LA (1) == LIT_CHAR_EQUALS) { IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_NOT_DOUBLE_EQ, TOK_NOT_EQ, 2); } else { RETURN_PUNC (TOK_NOT); } break; } } PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal character", lit_utf8_iterator_get_pos (&src_iter)); } /* lexer_parse_token */
/** * Parse and construct lexer token * * Note: * Currently, lexer token doesn't fully correspond to Token, defined in ECMA-262, v5, 7.5. * For example, there is no new-line token type in the token definition of ECMA-262 v5. * * Note: * For Lexer alone, it is hard to find out a token is whether a regexp or a division. * Parser must set maybe_regexp to true if a regexp is expected. * Otherwise, a division is expected. * * @return constructed token */ static token lexer_parse_token (bool maybe_regexp, /**< read '/' as regexp? */ bool *out_is_preceed_by_new_lines_p, /**< out: is constructed token preceded by newlines? */ bool is_strict) /**< flag, indicating whether current code is in strict mode code */ { JERRY_ASSERT (is_token_parse_in_progress == false); *out_is_preceed_by_new_lines_p = lexer_skip_whitespace_and_comments (); ecma_char_t c = LA (0); /* ECMA-262 v5, 7.6, Identifier */ if (lexer_is_char_can_be_identifier_start (c)) { return lexer_parse_identifier_or_keyword (is_strict); } /* ECMA-262 v5, 7.8.3, Numeric literal */ if (lit_char_is_decimal_digit (c) || (c == LIT_CHAR_DOT && lit_char_is_decimal_digit (LA (1)))) { return lexer_parse_number (is_strict); } if (c == LIT_CHAR_NULL) { return create_token (TOK_EOF, 0); } if (c == LIT_CHAR_SINGLE_QUOTE || c == LIT_CHAR_DOUBLE_QUOTE) { return lexer_parse_string (); } if (c == LIT_CHAR_SLASH && maybe_regexp) { return lexer_parse_regexp (); } /* ECMA-262 v5, 7.7, Punctuator */ switch (c) { case LIT_CHAR_LEFT_BRACE: { RETURN_PUNC (TOK_OPEN_BRACE); break; } case LIT_CHAR_RIGHT_BRACE: { RETURN_PUNC (TOK_CLOSE_BRACE); break; } case LIT_CHAR_LEFT_PAREN: { RETURN_PUNC (TOK_OPEN_PAREN); break; } case LIT_CHAR_RIGHT_PAREN: { RETURN_PUNC (TOK_CLOSE_PAREN); break; } case LIT_CHAR_LEFT_SQUARE: { RETURN_PUNC (TOK_OPEN_SQUARE); break; } case LIT_CHAR_RIGHT_SQUARE: { RETURN_PUNC (TOK_CLOSE_SQUARE); break; } case LIT_CHAR_DOT: { RETURN_PUNC (TOK_DOT); break; } case LIT_CHAR_SEMICOLON: { RETURN_PUNC (TOK_SEMICOLON); break; } case LIT_CHAR_COMMA: { RETURN_PUNC (TOK_COMMA); break; } case LIT_CHAR_TILDE: { RETURN_PUNC (TOK_COMPL); break; } case LIT_CHAR_COLON: { RETURN_PUNC (TOK_COLON); break; } case LIT_CHAR_QUESTION: { RETURN_PUNC (TOK_QUERY); break; } case LIT_CHAR_ASTERISK: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_MULT_EQ, TOK_MULT); break; } case LIT_CHAR_SLASH: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_DIV_EQ, TOK_DIV); break; } case LIT_CHAR_CIRCUMFLEX: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_XOR_EQ, TOK_XOR); break; } case LIT_CHAR_PERCENT: { IF_LA_IS (LIT_CHAR_EQUALS, TOK_MOD_EQ, TOK_MOD); break; } case LIT_CHAR_PLUS: { IF_LA_IS_OR (LIT_CHAR_PLUS, TOK_DOUBLE_PLUS, LIT_CHAR_EQUALS, TOK_PLUS_EQ, TOK_PLUS); break; } case LIT_CHAR_MINUS: { IF_LA_IS_OR (LIT_CHAR_MINUS, TOK_DOUBLE_MINUS, LIT_CHAR_EQUALS, TOK_MINUS_EQ, TOK_MINUS); break; } case LIT_CHAR_AMPERSAND: { IF_LA_IS_OR (LIT_CHAR_AMPERSAND, TOK_DOUBLE_AND, LIT_CHAR_EQUALS, TOK_AND_EQ, TOK_AND); break; } case LIT_CHAR_VLINE: { IF_LA_IS_OR (LIT_CHAR_VLINE, TOK_DOUBLE_OR, LIT_CHAR_EQUALS, TOK_OR_EQ, TOK_OR); break; } case LIT_CHAR_LESS_THAN: { switch (LA (1)) { case LIT_CHAR_LESS_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_LSHIFT_EQ, TOK_LSHIFT, 2); break; case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_LESS_EQ, 2); break; default: RETURN_PUNC (TOK_LESS); } break; } case LIT_CHAR_GREATER_THAN: { switch (LA (1)) { case LIT_CHAR_GREATER_THAN: { switch (LA (2)) { case LIT_CHAR_GREATER_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_RSHIFT_EX_EQ, TOK_RSHIFT_EX, 3); break; case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_RSHIFT_EQ, 3); break; default: RETURN_PUNC_EX (TOK_RSHIFT, 2); } break; } case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_GREATER_EQ, 2); break; default: RETURN_PUNC (TOK_GREATER); } break; } case LIT_CHAR_EQUALS: { if (LA (1) == LIT_CHAR_EQUALS) { IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_TRIPLE_EQ, TOK_DOUBLE_EQ, 2); } else { RETURN_PUNC (TOK_EQ); } break; } case LIT_CHAR_EXCLAMATION: { if (LA (1) == LIT_CHAR_EQUALS) { IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_NOT_DOUBLE_EQ, TOK_NOT_EQ, 2); } else { RETURN_PUNC (TOK_NOT); } break; } } PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal character", lit_utf8_iterator_get_pos (&src_iter)); } /* lexer_parse_token */
int main (int __attr_unused___ argc, char __attr_unused___ **argv) { TEST_INIT (); mem_init (); lit_utf8_byte_t utf8_string[max_bytes_in_string]; ecma_char_t code_units[max_code_units_in_string]; lit_utf8_iterator_pos_t saved_positions[max_code_units_in_string]; for (int i = 0; i < test_iters; i++) { lit_utf8_size_t utf8_string_size = (i == 0) ? 0 : (lit_utf8_size_t) (rand () % max_bytes_in_string); ecma_length_t length = generate_utf8_string (utf8_string, utf8_string_size); JERRY_ASSERT (lit_utf8_string_length (utf8_string, utf8_string_size) == length); lit_utf8_iterator_t iter = lit_utf8_iterator_create (utf8_string, utf8_string_size); ecma_length_t calculated_length = 0; ecma_length_t code_units_count = 0; while (!lit_utf8_iterator_is_eos (&iter)) { code_units[code_units_count] = lit_utf8_iterator_peek_next (&iter); saved_positions[code_units_count] = lit_utf8_iterator_get_pos (&iter); code_units_count++; calculated_length++; lit_utf8_iterator_incr (&iter); } JERRY_ASSERT (length == calculated_length); if (code_units_count > 0) { for (int j = 0; j < test_subiters; j++) { ecma_length_t index = (ecma_length_t) rand () % code_units_count; lit_utf8_iterator_seek (&iter, saved_positions[index]); JERRY_ASSERT (lit_utf8_iterator_peek_next (&iter) == code_units[index]); JERRY_ASSERT (lit_utf8_iterator_get_index (&iter) == index); } } lit_utf8_iterator_seek_eos (&iter); while (!lit_utf8_iterator_is_bos (&iter)) { JERRY_ASSERT (code_units_count > 0); calculated_length--; JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_peek_prev (&iter)); lit_utf8_iterator_decr (&iter); } JERRY_ASSERT (calculated_length == 0); while (!lit_utf8_iterator_is_eos (&iter)) { ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter); JERRY_ASSERT (code_unit == code_units[calculated_length]); calculated_length++; } JERRY_ASSERT (length == calculated_length); while (!lit_utf8_iterator_is_bos (&iter)) { JERRY_ASSERT (code_units_count > 0); calculated_length--; JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_read_prev (&iter)); } JERRY_ASSERT (calculated_length == 0); } /* Overlong-encoded code point */ lit_utf8_byte_t invalid_utf8_string_1[] = {0xC0, 0x82}; JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_1, sizeof (invalid_utf8_string_1))); /* Overlong-encoded code point */ lit_utf8_byte_t invalid_utf8_string_2[] = {0xE0, 0x80, 0x81}; JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_2, sizeof (invalid_utf8_string_2))); /* Pair of surrogates: 0xD901 0xDFF0 which encode Unicode character 0x507F0 */ lit_utf8_byte_t invalid_utf8_string_3[] = {0xED, 0xA4, 0x81, 0xED, 0xBF, 0xB0}; JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_3, sizeof (invalid_utf8_string_3))); /* Isolated high surrogate 0xD901 */ lit_utf8_byte_t valid_utf8_string_1[] = {0xED, 0xA4, 0x81}; JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_1, sizeof (valid_utf8_string_1))); /* 4-byte long utf-8 character - Unicode character 0x507F0 */ lit_utf8_byte_t valid_utf8_string_2[] = {0xF1, 0x90, 0x9F, 0xB0}; JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_2, sizeof (valid_utf8_string_2))); lit_utf8_byte_t buf[] = {0xF0, 0x90, 0x8D, 0x88}; lit_code_point_t code_point; lit_utf8_size_t bytes_count = lit_read_code_point_from_utf8 (buf, sizeof (buf), &code_point); JERRY_ASSERT (bytes_count == 4); JERRY_ASSERT (code_point == 0x10348); lit_utf8_byte_t res_buf[3]; lit_utf8_size_t res_size; res_size = lit_code_unit_to_utf8 (0x73, res_buf); JERRY_ASSERT (res_size == 1); JERRY_ASSERT (res_buf[0] == 0x73); res_size = lit_code_unit_to_utf8 (0x41A, res_buf); JERRY_ASSERT (res_size == 2); JERRY_ASSERT (res_buf[0] == 0xD0); JERRY_ASSERT (res_buf[1] == 0x9A); res_size = lit_code_unit_to_utf8 (0xD7FF, res_buf); JERRY_ASSERT (res_size == 3); JERRY_ASSERT (res_buf[0] == 0xED); JERRY_ASSERT (res_buf[1] == 0x9F); JERRY_ASSERT (res_buf[2] == 0xBF); lit_utf8_byte_t bytes[] = {0xF0, 0x90, 0x8D, 0x88}; lit_utf8_iterator_t iter = lit_utf8_iterator_create (bytes, sizeof (bytes)); ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter); JERRY_ASSERT (!lit_utf8_iterator_is_eos (&iter)); JERRY_ASSERT (code_unit == 0xD800); code_unit = lit_utf8_iterator_read_next (&iter); JERRY_ASSERT (lit_utf8_iterator_is_eos (&iter)); JERRY_ASSERT (code_unit == 0xDF48); mem_finalize (true); return 0; }