/** * Dump current line */ static void dump_current_line (void) { if (!allow_dump_lines) { return; } printf ("// "); lit_utf8_iterator_t iter = src_iter; while (!lit_utf8_iterator_is_eos (&iter)) { ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter); if (lit_char_is_line_terminator (code_unit)) { if (code_unit == LIT_CHAR_CR && !lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF) { lit_utf8_iterator_incr (&iter); } break; } lit_put_ecma_char (code_unit); } lit_put_ecma_char (LIT_CHAR_LF); } /* dump_current_line */
/** * Parse RegExp flags (global, ignoreCase, multiline) * * See also: ECMA-262 v5, 15.10.4.1 * * @return completion value * Returned value must be freed with ecma_free_completion_value */ ecma_completion_value_t re_parse_regexp_flags (ecma_string_t *flags_str_p, /**< Input string with flags */ uint8_t *flags_p) /**< Output: parsed flag bits */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); lit_utf8_size_t flags_str_size = ecma_string_get_size (flags_str_p); MEM_DEFINE_LOCAL_ARRAY (flags_start_p, flags_str_size, lit_utf8_byte_t); ecma_string_to_utf8_string (flags_str_p, flags_start_p, (ssize_t) flags_str_size); lit_utf8_iterator_t iter = lit_utf8_iterator_create (flags_start_p, flags_str_size); while (!lit_utf8_iterator_is_eos (&iter) && ecma_is_completion_value_empty (ret_value)) { switch (lit_utf8_iterator_read_next (&iter)) { case 'g': { if (*flags_p & RE_FLAG_GLOBAL) { ret_value = ecma_raise_syntax_error ("Invalid RegExp flags."); } *flags_p |= RE_FLAG_GLOBAL; break; } case 'i': { if (*flags_p & RE_FLAG_IGNORE_CASE) { ret_value = ecma_raise_syntax_error ("Invalid RegExp flags."); } *flags_p |= RE_FLAG_IGNORE_CASE; break; } case 'm': { if (*flags_p & RE_FLAG_MULTILINE) { ret_value = ecma_raise_syntax_error ("Invalid RegExp flags."); } *flags_p |= RE_FLAG_MULTILINE; break; } default: { ret_value = ecma_raise_syntax_error ("Invalid RegExp flags."); break; } } } MEM_FINALIZE_LOCAL_ARRAY (flags_start_p); return ret_value; } /* re_parse_regexp_flags */
/** * Dump specified line of the source script */ void lexer_dump_line (size_t line) /**< line number */ { size_t l = 0; lit_utf8_iterator_t iter = src_iter; lit_utf8_iterator_seek_bos (&iter); while (!lit_utf8_iterator_is_eos (&iter)) { ecma_char_t code_unit; if (l == line) { while (!lit_utf8_iterator_is_eos (&iter)) { code_unit = lit_utf8_iterator_read_next (&iter); if (lit_char_is_line_terminator (code_unit)) { break; } lit_put_ecma_char (code_unit); } return; } code_unit = lit_utf8_iterator_read_next (&iter); if (lit_char_is_line_terminator (code_unit)) { l++; if (code_unit == LIT_CHAR_CR && !lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF) { lit_utf8_iterator_incr (&iter); } } } } /* lexer_dump_line */
static ecma_char_t get_char (size_t i) { lit_utf8_iterator_t iter = src_iter; ecma_char_t code_unit; do { if (lit_utf8_iterator_is_eos (&iter)) { code_unit = LIT_CHAR_NULL; break; } code_unit = lit_utf8_iterator_read_next (&iter); } while (i--); return code_unit; }
/** * Convert locus to line and column */ void lexer_locus_to_line_and_column (lit_utf8_iterator_pos_t locus, /**< iterator position in the source script */ size_t *line, /**< @out: line number */ size_t *column) /**< @out: column number */ { JERRY_ASSERT ((lit_utf8_size_t) (locus.offset + locus.is_non_bmp_middle) <= buffer_size); lit_utf8_iterator_t iter = lit_utf8_iterator_create (buffer_start, (lit_utf8_size_t) buffer_size); lit_utf8_iterator_pos_t iter_pos = lit_utf8_iterator_get_pos (&iter); size_t l = 0, c = 0; while (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_pos_cmp (iter_pos, locus) < 0) { ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter); iter_pos = lit_utf8_iterator_get_pos (&iter); if (lit_char_is_line_terminator (code_unit)) { if (code_unit == LIT_CHAR_CR && !lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF) { lit_utf8_iterator_incr (&iter); } c = 0; l++; continue; } c++; } if (line) { *line = l; } if (column) { *column = c; } } /* lexer_locus_to_line_and_column */
/** * Transform specified number of hexadecimal digits pointed by string iterator to character code * * @return true - upon successful conversion, * false - otherwise (characters, pointed by iterator, are not hexadecimal digits, * or number of characters until end of string is less than specified). */ static bool lexer_convert_escape_sequence_digits_to_char (lit_utf8_iterator_t *src_iter_p, /**< string iterator */ bool is_unicode_escape_sequence, /**< UnicodeEscapeSequence (true) * or HexEscapeSequence (false) */ ecma_char_t *out_converted_char_p) /**< out: converted character */ { uint16_t char_code = 0; const uint32_t digits_num = is_unicode_escape_sequence ? 4 : 2; for (uint32_t i = 0; i < digits_num; i++) { if (lit_utf8_iterator_is_eos (src_iter_p)) { return false; } const ecma_char_t next_char = lit_utf8_iterator_read_next (src_iter_p); if (!lit_char_is_hex_digit (next_char)) { return false; } else { /* * Check that highest 4 bits are zero, so the value would not overflow. */ JERRY_ASSERT ((char_code & 0xF000u) == 0); char_code = (uint16_t) (char_code << 4u); char_code = (uint16_t) (char_code + lit_char_hex_to_int (next_char)); } } *out_converted_char_p = (ecma_char_t) char_code; return true; } /* lexer_convert_escape_sequence_digits_to_char */
/* * Helper function for string indexOf and lastIndexOf functions * * This function implements string indexOf and lastIndexOf with required checks and conversions. * * See also: * ECMA-262 v5, 15.5.4.7 * ECMA-262 v5, 15.5.4.8 * * Used by: * - The String.prototype.indexOf routine. * - The String.prototype.lastIndexOf routine. * * @return uint32_t - (last)index of search string */ ecma_completion_value_t ecma_builtin_helper_string_prototype_object_index_of (ecma_value_t this_arg, /**< this argument */ ecma_value_t arg1, /**< routine's first argument */ ecma_value_t arg2, /**< routine's second argument */ bool firstIndex) /**< routine's third argument */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); /* 1 */ ECMA_TRY_CATCH (check_coercible_val, ecma_op_check_object_coercible (this_arg), ret_value); /* 2 */ ECMA_TRY_CATCH (to_str_val, ecma_op_to_string (this_arg), ret_value); /* 3 */ ECMA_TRY_CATCH (search_str_val, ecma_op_to_string (arg1), ret_value); /* 4 */ ECMA_OP_TO_NUMBER_TRY_CATCH (pos_num, arg2, ret_value); /* 6 */ ecma_string_t *original_str_p = ecma_get_string_from_value (to_str_val); const ecma_length_t original_len = ecma_string_get_length (original_str_p); const lit_utf8_size_t original_size = ecma_string_get_size (original_str_p); /* 4b, 5, 7 */ ecma_length_t start = ecma_builtin_helper_string_index_normalize (pos_num, original_len, firstIndex); /* 8 */ ecma_string_t *search_str_p = ecma_get_string_from_value (search_str_val); const ecma_length_t search_len = ecma_string_get_length (search_str_p); const lit_utf8_size_t search_size = ecma_string_get_size (search_str_p); ecma_number_t *ret_num_p = ecma_alloc_number (); *ret_num_p = ecma_int32_to_number (-1); /* 9 */ if (search_len <= original_len) { if (!search_len) { *ret_num_p = ecma_uint32_to_number (firstIndex ? 0 : original_len); } else { /* create utf8 string from original string and advance to position */ MEM_DEFINE_LOCAL_ARRAY (original_str_utf8_p, original_size, lit_utf8_byte_t); ecma_string_to_utf8_string (original_str_p, original_str_utf8_p, (ssize_t) (original_size)); lit_utf8_iterator_t original_it = lit_utf8_iterator_create (original_str_utf8_p, original_size); ecma_length_t index = start; lit_utf8_iterator_advance (&original_it, index); /* create utf8 string from search string */ MEM_DEFINE_LOCAL_ARRAY (search_str_utf8_p, search_size, lit_utf8_byte_t); ecma_string_to_utf8_string (search_str_p, search_str_utf8_p, (ssize_t) (search_size)); lit_utf8_iterator_t search_it = lit_utf8_iterator_create (search_str_utf8_p, search_size); /* iterate original string and try to match at each position */ bool searching = true; while (searching) { /* match as long as possible */ ecma_length_t match_len = 0; lit_utf8_iterator_t stored_original_it = original_it; while (match_len < search_len && index + match_len < original_len && lit_utf8_iterator_read_next (&original_it) == lit_utf8_iterator_read_next (&search_it)) { match_len++; } /* check for match */ if (match_len == search_len) { *ret_num_p = ecma_uint32_to_number (index); break; } else { /* inc/dec index and update iterators and search condition */ lit_utf8_iterator_seek_bos (&search_it); original_it = stored_original_it; if (firstIndex) { if ((searching = (index <= original_len - search_len))) { lit_utf8_iterator_incr (&original_it); index++; } } else { if ((searching = (index > 0))) { lit_utf8_iterator_decr (&original_it); index--; } } } } MEM_FINALIZE_LOCAL_ARRAY (search_str_utf8_p); MEM_FINALIZE_LOCAL_ARRAY (original_str_utf8_p); } } ecma_value_t new_value = ecma_make_number_value (ret_num_p); ret_value = ecma_make_normal_completion_value (new_value); ECMA_OP_TO_NUMBER_FINALIZE (pos_num); ECMA_FINALIZE (search_str_val); ECMA_FINALIZE (to_str_val); ECMA_FINALIZE (check_coercible_val); return ret_value; } /* ecma_builtin_helper_string_index_normalize */
/** * Recursive function for RegExp matching. Tests for a regular expression * match and returns a MatchResult value. * * See also: * ECMA-262 v5, 15.10.2.1 * * @return completion value * Returned value must be freed with ecma_free_completion_value */ static ecma_completion_value_t re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */ re_bytecode_t *bc_p, /**< pointer to the current RegExp bytecode */ lit_utf8_iterator_t iter, /**< input string iterator */ lit_utf8_iterator_t *out_iter_p) /**< Output: matching substring iterator */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); re_opcode_t op; while ((op = re_get_opcode (&bc_p))) { switch (op) { case RE_OP_MATCH: { JERRY_DDLOG ("Execute RE_OP_MATCH: match\n"); *out_iter_p = iter; ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); return ret_value; /* match */ } case RE_OP_CHAR: { if (lit_utf8_iterator_is_eos (&iter)) { return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE; ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */ ecma_char_t ch2 = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase); JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2); if (ch1 != ch2) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_PERIOD: { if (lit_utf8_iterator_is_eos (&iter)) { return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } ecma_char_t ch = lit_utf8_iterator_read_next (&iter); JERRY_DDLOG ("Period matching '.' to %d: ", (uint32_t) ch); if (lit_char_is_line_terminator (ch)) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_ASSERT_START: { JERRY_DDLOG ("Execute RE_OP_ASSERT_START: "); if ((iter.buf_p + iter.buf_pos.offset) <= re_ctx_p->input_start_p) { JERRY_DDLOG ("match\n"); break; } if (!(re_ctx_p->flags & RE_FLAG_MULTILINE)) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } if (lit_char_is_line_terminator (lit_utf8_iterator_peek_prev (&iter))) { JERRY_DDLOG ("match\n"); break; } JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_ASSERT_END: { JERRY_DDLOG ("Execute RE_OP_ASSERT_END: "); if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p) { JERRY_DDLOG ("match\n"); break; /* tail merge */ } if (!(re_ctx_p->flags & RE_FLAG_MULTILINE)) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } if (lit_char_is_line_terminator (lit_utf8_iterator_peek_next (&iter))) { JERRY_DDLOG ("match\n"); break; /* tail merge */ } JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_ASSERT_WORD_BOUNDARY: case RE_OP_ASSERT_NOT_WORD_BOUNDARY: { bool is_wordchar_left, is_wordchar_right; if ((iter.buf_p + iter.buf_pos.offset) <= re_ctx_p->input_start_p) { is_wordchar_left = false; /* not a wordchar */ } else { is_wordchar_left = lit_char_is_word_char (lit_utf8_iterator_peek_prev (&iter)); } if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p) { is_wordchar_right = false; /* not a wordchar */ } else { is_wordchar_right = lit_char_is_word_char (lit_utf8_iterator_peek_next (&iter)); } if (op == RE_OP_ASSERT_WORD_BOUNDARY) { JERRY_DDLOG ("Execute RE_OP_ASSERT_WORD_BOUNDARY: "); if (is_wordchar_left == is_wordchar_right) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } else { JERRY_ASSERT (op == RE_OP_ASSERT_NOT_WORD_BOUNDARY); JERRY_DDLOG ("Execute RE_OP_ASSERT_NOT_WORD_BOUNDARY: "); if (is_wordchar_left != is_wordchar_right) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_LOOKAHEAD_POS: case RE_OP_LOOKAHEAD_NEG: { ecma_completion_value_t match_value = ecma_make_empty_completion_value (); lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); uint32_t array_size = re_ctx_p->num_of_captures + re_ctx_p->num_of_non_captures; MEM_DEFINE_LOCAL_ARRAY (saved_bck_p, array_size, lit_utf8_iterator_t); size_t size = (size_t) (array_size) * sizeof (lit_utf8_iterator_t); memcpy (saved_bck_p, re_ctx_p->saved_p, size); do { uint32_t offset = re_get_value (&bc_p); if (!sub_iter.buf_p) { match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_completion_value_throw (match_value)) { break; } } bc_p += offset; } while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); if (!ecma_is_completion_value_throw (match_value)) { JERRY_DDLOG ("Execute RE_OP_LOOKAHEAD_POS/NEG: "); ecma_free_completion_value (match_value); if ((op == RE_OP_LOOKAHEAD_POS && sub_iter.buf_p) || (op == RE_OP_LOOKAHEAD_NEG && !sub_iter.buf_p)) { JERRY_DDLOG ("match\n"); match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); } else { JERRY_DDLOG ("fail\n"); match_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } if (!ecma_is_completion_value_throw (match_value)) { if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; } else { JERRY_ASSERT (ecma_is_value_boolean (match_value)); /* restore saved */ memcpy (re_ctx_p->saved_p, saved_bck_p, size); } } MEM_FINALIZE_LOCAL_ARRAY (saved_bck_p); return match_value; } case RE_OP_CHAR_CLASS: case RE_OP_INV_CHAR_CLASS: { uint32_t num_of_ranges; bool is_match; JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, "); if (lit_utf8_iterator_is_eos (&iter)) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE; ecma_char_t curr_ch = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase); num_of_ranges = re_get_value (&bc_p); is_match = false; while (num_of_ranges) { ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase); ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase); JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ", num_of_ranges, ch1, ch2, curr_ch); if (curr_ch >= ch1 && curr_ch <= ch2) { /* We must read all the ranges from bytecode. */ is_match = true; } num_of_ranges--; } if (op == RE_OP_CHAR_CLASS) { if (!is_match) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } else { JERRY_ASSERT (op == RE_OP_INV_CHAR_CLASS); if (is_match) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_BACKREFERENCE: { uint32_t backref_idx; backref_idx = re_get_value (&bc_p); JERRY_DDLOG ("Execute RE_OP_BACKREFERENCE (idx: %d): ", backref_idx); backref_idx *= 2; /* backref n -> saved indices [n*2, n*2+1] */ JERRY_ASSERT (backref_idx >= 2 && backref_idx + 1 < re_ctx_p->num_of_captures); if (!re_ctx_p->saved_p[backref_idx].buf_p || !re_ctx_p->saved_p[backref_idx + 1].buf_p) { JERRY_DDLOG ("match\n"); break; /* capture is 'undefined', always matches! */ } lit_utf8_iterator_t sub_iter = re_ctx_p->saved_p[backref_idx]; while (sub_iter.buf_pos.offset < re_ctx_p->saved_p[backref_idx + 1].buf_pos.offset) { ecma_char_t ch1, ch2; if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } ch1 = lit_utf8_iterator_read_next (&sub_iter); ch2 = lit_utf8_iterator_read_next (&iter); if (ch1 != ch2) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_SAVE_AT_START: { re_bytecode_t *old_bc_p; JERRY_DDLOG ("Execute RE_OP_SAVE_AT_START\n"); lit_utf8_iterator_t old_start_p = re_ctx_p->saved_p[RE_GLOBAL_START_IDX]; re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = iter; do { uint32_t offset = re_get_value (&bc_p); lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } bc_p += offset; old_bc_p = bc_p; } while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); bc_p = old_bc_p; re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p; return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_SAVE_AND_MATCH: { JERRY_DDLOG ("End of pattern is reached: match\n"); re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = iter; *out_iter_p = iter; return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); /* match */ } case RE_OP_ALTERNATIVE: { /* * Alternatives should be jump over, when alternative opcode appears. */ uint32_t offset = re_get_value (&bc_p); JERRY_DDLOG ("Execute RE_OP_ALTERNATIVE"); bc_p += offset; while (*bc_p == RE_OP_ALTERNATIVE) { JERRY_DDLOG (", jump: %d"); bc_p++; offset = re_get_value (&bc_p); bc_p += offset; } JERRY_DDLOG ("\n"); break; /* tail merge */ } case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START: case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START: { /* * On non-greedy iterations we have to execute the bytecode * after the group first, if zero iteration is allowed. */ uint32_t start_idx, iter_idx, offset; lit_utf8_iterator_t old_start = lit_utf8_iterator_create (NULL, 0); lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); re_bytecode_t *old_bc_p; old_bc_p = bc_p; /* save the bytecode start position of the group start */ start_idx = re_get_value (&bc_p); offset = re_get_value (&bc_p); if (RE_IS_CAPTURE_GROUP (op)) { JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2); iter_idx = start_idx - 1; start_idx *= 2; old_start = re_ctx_p->saved_p[start_idx]; re_ctx_p->saved_p[start_idx] = iter; } else { JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures); iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1; start_idx += re_ctx_p->num_of_captures; } re_ctx_p->num_of_iterations_p[iter_idx] = 0; /* Jump all over to the end of the END opcode. */ bc_p += offset; /* Try to match after the close paren if zero is allowed */ ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } if (RE_IS_CAPTURE_GROUP (op)) { re_ctx_p->saved_p[start_idx] = old_start; } bc_p = old_bc_p; /* FALLTHRU */ } case RE_OP_CAPTURE_GROUP_START: case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START: case RE_OP_NON_CAPTURE_GROUP_START: case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START: { uint32_t start_idx, iter_idx, old_iteration_cnt, offset; lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); re_bytecode_t *old_bc_p; re_bytecode_t *end_bc_p = NULL; start_idx = re_get_value (&bc_p); if (op != RE_OP_CAPTURE_GROUP_START && op != RE_OP_NON_CAPTURE_GROUP_START) { offset = re_get_value (&bc_p); end_bc_p = bc_p + offset; } if (RE_IS_CAPTURE_GROUP (op)) { JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2); iter_idx = start_idx - 1; start_idx *= 2; } else { JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures); iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1; start_idx += re_ctx_p->num_of_captures; } lit_utf8_iterator_t old_start = re_ctx_p->saved_p[start_idx]; old_iteration_cnt = re_ctx_p->num_of_iterations_p[iter_idx]; re_ctx_p->saved_p[start_idx] = iter; re_ctx_p->num_of_iterations_p[iter_idx] = 0; do { offset = re_get_value (&bc_p); ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } bc_p += offset; old_bc_p = bc_p; } while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); bc_p = old_bc_p; re_ctx_p->num_of_iterations_p[iter_idx] = old_iteration_cnt; /* Try to match after the close paren if zero is allowed. */ if (op == RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START || op == RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START) { JERRY_ASSERT (end_bc_p); ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, end_bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } } re_ctx_p->saved_p[start_idx] = old_start; return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_CAPTURE_NON_GREEDY_GROUP_END: case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END: { uint32_t end_idx, iter_idx, min, max; re_bytecode_t *old_bc_p; /* * On non-greedy iterations we have to execute the bytecode * after the group first. Try to iterate only if it fails. */ old_bc_p = bc_p; /* save the bytecode start position of the group end */ end_idx = re_get_value (&bc_p); min = re_get_value (&bc_p); max = re_get_value (&bc_p); re_get_value (&bc_p); /* start offset */ if (RE_IS_CAPTURE_GROUP (op)) { JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2); iter_idx = end_idx - 1; end_idx = (end_idx * 2) + 1; } else { JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures); iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1; end_idx += re_ctx_p->num_of_captures; } re_ctx_p->num_of_iterations_p[iter_idx]++; if (re_ctx_p->num_of_iterations_p[iter_idx] >= min && re_ctx_p->num_of_iterations_p[iter_idx] <= max) { lit_utf8_iterator_t old_end = re_ctx_p->saved_p[end_idx]; re_ctx_p->saved_p[end_idx] = iter; lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } re_ctx_p->saved_p[end_idx] = old_end; } re_ctx_p->num_of_iterations_p[iter_idx]--; bc_p = old_bc_p; /* If non-greedy fails and try to iterate... */ /* FALLTHRU */ } case RE_OP_CAPTURE_GREEDY_GROUP_END: case RE_OP_NON_CAPTURE_GREEDY_GROUP_END: { uint32_t start_idx, end_idx, iter_idx, min, max, offset; lit_utf8_iterator_t old_start = lit_utf8_iterator_create (NULL, 0); lit_utf8_iterator_t old_end = lit_utf8_iterator_create (NULL, 0); lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); re_bytecode_t *old_bc_p; end_idx = re_get_value (&bc_p); min = re_get_value (&bc_p); max = re_get_value (&bc_p); offset = re_get_value (&bc_p); if (RE_IS_CAPTURE_GROUP (op)) { JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2); iter_idx = end_idx - 1; start_idx = end_idx * 2; end_idx = start_idx + 1; } else { JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures); iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1; end_idx += re_ctx_p->num_of_captures; start_idx = end_idx; } /* Check the empty iteration if the minimum number of iterations is reached. */ if (re_ctx_p->num_of_iterations_p[iter_idx] >= min && iter.buf_p == re_ctx_p->saved_p[start_idx].buf_p && iter.buf_pos.offset == re_ctx_p->saved_p[start_idx].buf_pos.offset) { return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } re_ctx_p->num_of_iterations_p[iter_idx]++; old_bc_p = bc_p; /* Save the bytecode end position of the END opcodes for matching after it. */ old_end = re_ctx_p->saved_p[end_idx]; re_ctx_p->saved_p[end_idx] = iter; if (re_ctx_p->num_of_iterations_p[iter_idx] < max) { bc_p -= offset; offset = re_get_value (&bc_p); old_start = re_ctx_p->saved_p[start_idx]; re_ctx_p->saved_p[start_idx] = iter; ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } re_ctx_p->saved_p[start_idx] = old_start; /* Try to match alternatives if any. */ bc_p += offset; while (*bc_p == RE_OP_ALTERNATIVE) { bc_p++; /* RE_OP_ALTERNATIVE */ offset = re_get_value (&bc_p); old_start = re_ctx_p->saved_p[start_idx]; re_ctx_p->saved_p[start_idx] = iter; ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } re_ctx_p->saved_p[start_idx] = old_start; bc_p += offset; } } if (re_ctx_p->num_of_iterations_p[iter_idx] >= min && re_ctx_p->num_of_iterations_p[iter_idx] <= max) { /* Try to match the rest of the bytecode. */ ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, old_bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } } /* restore if fails */ re_ctx_p->saved_p[end_idx] = old_end; re_ctx_p->num_of_iterations_p[iter_idx]--; return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_NON_GREEDY_ITERATOR: { uint32_t min, max, offset, num_of_iter; lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); min = re_get_value (&bc_p); max = re_get_value (&bc_p); offset = re_get_value (&bc_p); JERRY_DDLOG ("Non-greedy iterator, min=%lu, max=%lu, offset=%ld\n", (unsigned long) min, (unsigned long) max, (long) offset); num_of_iter = 0; while (num_of_iter <= max) { if (num_of_iter >= min) { ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } } ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (!ecma_is_value_true (match_value)) { if (ecma_is_completion_value_throw (match_value)) { return match_value; } break; } iter = sub_iter; num_of_iter++; } return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_GREEDY_ITERATOR: { uint32_t min, max, offset, num_of_iter; lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); min = re_get_value (&bc_p); max = re_get_value (&bc_p); offset = re_get_value (&bc_p); JERRY_DDLOG ("Greedy iterator, min=%lu, max=%lu, offset=%ld\n", (unsigned long) min, (unsigned long) max, (long) offset); num_of_iter = 0; while (num_of_iter < max) { ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (!ecma_is_value_true (match_value)) { if (ecma_is_completion_value_throw (match_value)) { return match_value; } break; } iter = sub_iter; num_of_iter++; } while (num_of_iter >= min) { ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } if (num_of_iter == min) { break; } lit_utf8_iterator_read_prev (&iter); num_of_iter--; } return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } default: { JERRY_DDLOG ("UNKNOWN opcode (%d)!\n", (uint32_t) op); return ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_COMMON)); } } } JERRY_UNREACHABLE (); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } /* regexp_match */
/** * Helper function to convert a string to upper or lower case. * * @return completion value * Returned value must be freed with ecma_free_completion_value. */ static ecma_completion_value_t ecma_builtin_string_prototype_object_conversion_helper (ecma_value_t this_arg, /**< this argument */ bool lower_case) /**< convert to lower (true) * or upper (false) case */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); /* 1. */ ECMA_TRY_CATCH (check_coercible_val, ecma_op_check_object_coercible (this_arg), ret_value); /* 2. */ ECMA_TRY_CATCH (to_string_val, ecma_op_to_string (this_arg), ret_value); /* 3. */ ecma_string_t *input_string_p = ecma_get_string_from_value (to_string_val); lit_utf8_size_t input_size = ecma_string_get_size (input_string_p); MEM_DEFINE_LOCAL_ARRAY (input_start_p, input_size, lit_utf8_byte_t); ecma_string_to_utf8_string (input_string_p, input_start_p, (ssize_t) (input_size)); /* * The URI encoding has two major phases: first we compute * the length of the lower case string, then we encode it. */ lit_utf8_size_t output_length = 0; lit_utf8_iterator_t input_iterator = lit_utf8_iterator_create (input_start_p, input_size); while (!lit_utf8_iterator_is_eos (&input_iterator)) { ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator); ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH]; lit_utf8_byte_t utf8_byte_buffer[LIT_UTF8_MAX_BYTES_IN_CODE_POINT]; lit_utf8_size_t character_length; /* * We need to keep surrogate pairs. Surrogates are never converted, * regardless they form a valid pair or not. */ if (lit_is_code_unit_high_surrogate (character)) { ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator); if (lit_is_code_unit_low_surrogate (next_character)) { lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character); output_length += lit_code_point_to_utf8 (surrogate_code_point, utf8_byte_buffer); lit_utf8_iterator_incr (&input_iterator); continue; } } if (lower_case) { character_length = lit_char_to_lower_case (character, character_buffer, LIT_MAXIMUM_OTHER_CASE_LENGTH); } else { character_length = lit_char_to_upper_case (character, character_buffer, LIT_MAXIMUM_OTHER_CASE_LENGTH); } JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH); for (lit_utf8_size_t i = 0; i < character_length; i++) { output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer); } } /* Second phase. */ MEM_DEFINE_LOCAL_ARRAY (output_start_p, output_length, lit_utf8_byte_t); lit_utf8_byte_t *output_char_p = output_start_p; /* Encoding the output. */ lit_utf8_iterator_seek_bos (&input_iterator); while (!lit_utf8_iterator_is_eos (&input_iterator)) { ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator); ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH]; lit_utf8_size_t character_length; /* * We need to keep surrogate pairs. Surrogates are never converted, * regardless they form a valid pair or not. */ if (lit_is_code_unit_high_surrogate (character)) { ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator); if (lit_is_code_unit_low_surrogate (next_character)) { lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character); output_char_p += lit_code_point_to_utf8 (surrogate_code_point, output_char_p); lit_utf8_iterator_incr (&input_iterator); continue; } } if (lower_case) { character_length = lit_char_to_lower_case (character, character_buffer, LIT_MAXIMUM_OTHER_CASE_LENGTH); } else { character_length = lit_char_to_upper_case (character, character_buffer, LIT_MAXIMUM_OTHER_CASE_LENGTH); } JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH); for (lit_utf8_size_t i = 0; i < character_length; i++) { output_char_p += lit_code_point_to_utf8 (character_buffer[i], output_char_p); } } JERRY_ASSERT (output_start_p + output_length == output_char_p); ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length); ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p)); MEM_FINALIZE_LOCAL_ARRAY (output_start_p); MEM_FINALIZE_LOCAL_ARRAY (input_start_p); ECMA_FINALIZE (to_string_val); ECMA_FINALIZE (check_coercible_val); return ret_value; } /* ecma_builtin_string_prototype_object_conversion_helper */
/** * The String.prototype object's 'indexOf' routine * * See also: * ECMA-262 v5, 15.5.4.7 * * @return completion value * Returned value must be freed with ecma_free_completion_value. */ static ecma_completion_value_t ecma_builtin_string_prototype_object_index_of (ecma_value_t this_arg, /**< this argument */ ecma_value_t arg1, /**< routine's first argument */ ecma_value_t arg2) /**< routine's second argument */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); /* 1 */ ECMA_TRY_CATCH (check_coercible_val, ecma_op_check_object_coercible (this_arg), ret_value); /* 2 */ ECMA_TRY_CATCH (to_str_val, ecma_op_to_string (this_arg), ret_value); /* 3 */ ECMA_TRY_CATCH (search_str_val, ecma_op_to_string (arg1), ret_value); /* 4 */ ECMA_OP_TO_NUMBER_TRY_CATCH (pos_num, arg2, ret_value); /* 5 */ ecma_string_t *original_str_p = ecma_get_string_from_value (to_str_val); const ecma_length_t original_len = ecma_string_get_length (original_str_p); const lit_utf8_size_t original_size = ecma_string_get_size (original_str_p); /* 4b, 6 */ ecma_length_t start = ecma_builtin_helper_string_index_normalize (pos_num, original_len); /* 7 */ ecma_string_t *search_str_p = ecma_get_string_from_value (search_str_val); const ecma_length_t search_len = ecma_string_get_length (search_str_p); const lit_utf8_size_t search_size = ecma_string_get_size (search_str_p); ecma_number_t *ret_num_p = ecma_alloc_number (); *ret_num_p = ecma_int32_to_number (-1); /* 8 */ if (search_len <= original_len) { if (!search_len) { *ret_num_p = ecma_uint32_to_number (0); } else { /* create utf8 string from original string and advance to start position */ MEM_DEFINE_LOCAL_ARRAY (original_str_utf8_p, original_size, lit_utf8_byte_t); ecma_string_to_utf8_string (original_str_p, original_str_utf8_p, (ssize_t) (original_size)); lit_utf8_iterator_t original_it = lit_utf8_iterator_create (original_str_utf8_p, original_size); ecma_length_t index = start; lit_utf8_iterator_advance (&original_it, index); /* create utf8 string from search string */ MEM_DEFINE_LOCAL_ARRAY (search_str_utf8_p, search_size, lit_utf8_byte_t); ecma_string_to_utf8_string (search_str_p, search_str_utf8_p, (ssize_t) (search_size)); lit_utf8_iterator_t search_it = lit_utf8_iterator_create (search_str_utf8_p, search_size); /* iterate original string and try to match at each position */ bool found = false; while (!found && index <= original_len - search_len) { ecma_length_t match_len = 0; lit_utf8_iterator_pos_t stored_original_pos = lit_utf8_iterator_get_pos (&original_it); while (match_len < search_len && lit_utf8_iterator_read_next (&original_it) == lit_utf8_iterator_read_next (&search_it)) { match_len++; } /* Check for match */ if (match_len == search_len) { *ret_num_p = ecma_uint32_to_number (index); found = true; } else { /* reset iterators */ lit_utf8_iterator_seek_bos (&search_it); lit_utf8_iterator_seek (&original_it, stored_original_pos); lit_utf8_iterator_incr (&original_it); } index++; } MEM_FINALIZE_LOCAL_ARRAY (search_str_utf8_p); MEM_FINALIZE_LOCAL_ARRAY (original_str_utf8_p); } } ecma_value_t new_value = ecma_make_number_value (ret_num_p); ret_value = ecma_make_normal_completion_value (new_value); ECMA_OP_TO_NUMBER_FINALIZE (pos_num); ECMA_FINALIZE (search_str_val); ECMA_FINALIZE (to_str_val); ECMA_FINALIZE (check_coercible_val); return ret_value; } /* ecma_builtin_string_prototype_object_index_of */
/** * Transforming escape sequences in the charset, outputting converted string to specified buffer * * Note: * Size of string with transformed escape sequences is always * less or equal to size of corresponding source string. * * @return size of converted string */ static lit_utf8_size_t lexer_transform_escape_sequences (const jerry_api_char_t *source_str_p, /**< string to convert, * located in source buffer */ lit_utf8_size_t source_str_size, /**< size of the string and of the output buffer */ jerry_api_char_t *output_str_buf_p) /**< output buffer for converted string */ { if (source_str_size == 0) { return 0; } else { JERRY_ASSERT (source_str_p != NULL); } lit_utf8_byte_t *output_str_buf_iter_p = output_str_buf_p; const size_t output_str_buf_size = source_str_size; bool is_correct_sequence = true; lit_utf8_iterator_t source_str_iter = lit_utf8_iterator_create (source_str_p, source_str_size); ecma_char_t prev_converted_char = LIT_CHAR_NULL; while (!lit_utf8_iterator_is_eos (&source_str_iter)) { ecma_char_t converted_char; const ecma_char_t next_char = lit_utf8_iterator_read_next (&source_str_iter); if (next_char == LIT_CHAR_BACKSLASH) { if (lit_utf8_iterator_is_eos (&source_str_iter)) { is_correct_sequence = false; break; } const ecma_char_t char_after_next = lit_utf8_iterator_read_next (&source_str_iter); if (lit_char_is_decimal_digit (char_after_next)) { if (lit_char_is_octal_digit (char_after_next)) { if (char_after_next == LIT_CHAR_0 && (lit_utf8_iterator_is_eos (&source_str_iter) || !lit_char_is_octal_digit (lit_utf8_iterator_peek_next (&source_str_iter)))) { converted_char = LIT_CHAR_NULL; } else { /* Implementation-defined (ECMA-262 v5, B.1.2): octal escape sequences are not implemented */ is_correct_sequence = false; break; } } else { converted_char = char_after_next; } } else if (char_after_next == LIT_CHAR_LOWERCASE_U || char_after_next == LIT_CHAR_LOWERCASE_X) { if (!lexer_convert_escape_sequence_digits_to_char (&source_str_iter, char_after_next == LIT_CHAR_LOWERCASE_U, &converted_char)) { is_correct_sequence = false; break; } } else if (lit_char_is_line_terminator (char_after_next)) { /* Skip \, followed by a LineTerminatorSequence (ECMA-262, v5, 7.3) */ if (char_after_next == LIT_CHAR_CR && !lit_utf8_iterator_is_eos (&source_str_iter) && lit_utf8_iterator_peek_next (&source_str_iter) == LIT_CHAR_LF) { lit_utf8_iterator_incr (&source_str_iter); } continue; } else { lexer_convert_single_escape_character (char_after_next, &converted_char); } } else { converted_char = next_char; } if (lit_is_code_unit_high_surrogate (prev_converted_char) && lit_is_code_unit_low_surrogate (converted_char)) { output_str_buf_iter_p -= LIT_UTF8_MAX_BYTES_IN_CODE_UNIT; lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (prev_converted_char, converted_char); output_str_buf_iter_p += lit_code_point_to_utf8 (code_point, output_str_buf_iter_p); } else { output_str_buf_iter_p += lit_code_unit_to_utf8 (converted_char, output_str_buf_iter_p); JERRY_ASSERT (output_str_buf_iter_p <= output_str_buf_p + output_str_buf_size); } prev_converted_char = converted_char; } if (is_correct_sequence) { return (lit_utf8_size_t) (output_str_buf_iter_p - output_str_buf_p); } else { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal escape sequence", token_start_pos); } } /* lexer_transform_escape_sequences */
/** * Create token of specified type from charset * * @return token descriptor */ static token lexer_create_token_for_charset (jsp_token_type_t tt, /**< token type */ const lit_utf8_byte_t *charset_p, /**< charset buffer */ lit_utf8_size_t size) /**< size of the charset */ { JERRY_ASSERT (charset_p != NULL); lit_utf8_iterator_t iter = lit_utf8_iterator_create (charset_p, (lit_utf8_size_t) size); lit_utf8_size_t new_size = 0; lit_utf8_size_t new_length = 0; bool should_convert = false; while (!lit_utf8_iterator_is_eos (&iter)) { if (iter.buf_pos.is_non_bmp_middle) { should_convert = true; } lit_utf8_iterator_incr (&iter); new_size += LIT_CESU8_MAX_BYTES_IN_CODE_UNIT; } lit_utf8_byte_t *converted_str_p; if (unlikely (should_convert)) { lit_utf8_iterator_seek_bos (&iter); converted_str_p = (lit_utf8_byte_t *) jsp_mm_alloc (new_size); while (!lit_utf8_iterator_is_eos (&iter)) { ecma_char_t ch = lit_utf8_iterator_read_next (&iter); new_length += lit_code_unit_to_utf8 (ch, converted_str_p + new_length); } } else { converted_str_p = (lit_utf8_byte_t *) charset_p; new_length = size; JERRY_ASSERT (lit_is_cesu8_string_valid (converted_str_p, new_length)); } lit_literal_t lit = lit_find_literal_by_utf8_string (converted_str_p, new_length); if (lit != NULL) { if (unlikely (should_convert)) { jsp_mm_free (converted_str_p); } return create_token_from_lit (tt, lit); } lit = lit_create_literal_from_utf8_string (converted_str_p, new_length); rcs_record_type_t type = rcs_record_get_type (lit); JERRY_ASSERT (RCS_RECORD_TYPE_IS_CHARSET (type) || RCS_RECORD_TYPE_IS_MAGIC_STR (type) || RCS_RECORD_TYPE_IS_MAGIC_STR_EX (type)); if (unlikely (should_convert)) { jsp_mm_free (converted_str_p); } return create_token_from_lit (tt, lit); } /* lexer_create_token_for_charset */
int main (int __attr_unused___ argc, char __attr_unused___ **argv) { TEST_INIT (); mem_init (); lit_utf8_byte_t utf8_string[max_bytes_in_string]; ecma_char_t code_units[max_code_units_in_string]; lit_utf8_iterator_pos_t saved_positions[max_code_units_in_string]; for (int i = 0; i < test_iters; i++) { lit_utf8_size_t utf8_string_size = (i == 0) ? 0 : (lit_utf8_size_t) (rand () % max_bytes_in_string); ecma_length_t length = generate_utf8_string (utf8_string, utf8_string_size); JERRY_ASSERT (lit_utf8_string_length (utf8_string, utf8_string_size) == length); lit_utf8_iterator_t iter = lit_utf8_iterator_create (utf8_string, utf8_string_size); ecma_length_t calculated_length = 0; ecma_length_t code_units_count = 0; while (!lit_utf8_iterator_is_eos (&iter)) { code_units[code_units_count] = lit_utf8_iterator_peek_next (&iter); saved_positions[code_units_count] = lit_utf8_iterator_get_pos (&iter); code_units_count++; calculated_length++; lit_utf8_iterator_incr (&iter); } JERRY_ASSERT (length == calculated_length); if (code_units_count > 0) { for (int j = 0; j < test_subiters; j++) { ecma_length_t index = (ecma_length_t) rand () % code_units_count; lit_utf8_iterator_seek (&iter, saved_positions[index]); JERRY_ASSERT (lit_utf8_iterator_peek_next (&iter) == code_units[index]); JERRY_ASSERT (lit_utf8_iterator_get_index (&iter) == index); } } lit_utf8_iterator_seek_eos (&iter); while (!lit_utf8_iterator_is_bos (&iter)) { JERRY_ASSERT (code_units_count > 0); calculated_length--; JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_peek_prev (&iter)); lit_utf8_iterator_decr (&iter); } JERRY_ASSERT (calculated_length == 0); while (!lit_utf8_iterator_is_eos (&iter)) { ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter); JERRY_ASSERT (code_unit == code_units[calculated_length]); calculated_length++; } JERRY_ASSERT (length == calculated_length); while (!lit_utf8_iterator_is_bos (&iter)) { JERRY_ASSERT (code_units_count > 0); calculated_length--; JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_read_prev (&iter)); } JERRY_ASSERT (calculated_length == 0); } /* Overlong-encoded code point */ lit_utf8_byte_t invalid_utf8_string_1[] = {0xC0, 0x82}; JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_1, sizeof (invalid_utf8_string_1))); /* Overlong-encoded code point */ lit_utf8_byte_t invalid_utf8_string_2[] = {0xE0, 0x80, 0x81}; JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_2, sizeof (invalid_utf8_string_2))); /* Pair of surrogates: 0xD901 0xDFF0 which encode Unicode character 0x507F0 */ lit_utf8_byte_t invalid_utf8_string_3[] = {0xED, 0xA4, 0x81, 0xED, 0xBF, 0xB0}; JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_3, sizeof (invalid_utf8_string_3))); /* Isolated high surrogate 0xD901 */ lit_utf8_byte_t valid_utf8_string_1[] = {0xED, 0xA4, 0x81}; JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_1, sizeof (valid_utf8_string_1))); /* 4-byte long utf-8 character - Unicode character 0x507F0 */ lit_utf8_byte_t valid_utf8_string_2[] = {0xF1, 0x90, 0x9F, 0xB0}; JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_2, sizeof (valid_utf8_string_2))); lit_utf8_byte_t buf[] = {0xF0, 0x90, 0x8D, 0x88}; lit_code_point_t code_point; lit_utf8_size_t bytes_count = lit_read_code_point_from_utf8 (buf, sizeof (buf), &code_point); JERRY_ASSERT (bytes_count == 4); JERRY_ASSERT (code_point == 0x10348); lit_utf8_byte_t res_buf[3]; lit_utf8_size_t res_size; res_size = lit_code_unit_to_utf8 (0x73, res_buf); JERRY_ASSERT (res_size == 1); JERRY_ASSERT (res_buf[0] == 0x73); res_size = lit_code_unit_to_utf8 (0x41A, res_buf); JERRY_ASSERT (res_size == 2); JERRY_ASSERT (res_buf[0] == 0xD0); JERRY_ASSERT (res_buf[1] == 0x9A); res_size = lit_code_unit_to_utf8 (0xD7FF, res_buf); JERRY_ASSERT (res_size == 3); JERRY_ASSERT (res_buf[0] == 0xED); JERRY_ASSERT (res_buf[1] == 0x9F); JERRY_ASSERT (res_buf[2] == 0xBF); lit_utf8_byte_t bytes[] = {0xF0, 0x90, 0x8D, 0x88}; lit_utf8_iterator_t iter = lit_utf8_iterator_create (bytes, sizeof (bytes)); ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter); JERRY_ASSERT (!lit_utf8_iterator_is_eos (&iter)); JERRY_ASSERT (code_unit == 0xD800); code_unit = lit_utf8_iterator_read_next (&iter); JERRY_ASSERT (lit_utf8_iterator_is_eos (&iter)); JERRY_ASSERT (code_unit == 0xDF48); mem_finalize (true); return 0; }