static lit_utf8_size_t generate_utf8_char (utf8_char_size char_size, lit_utf8_byte_t *buf) { JERRY_ASSERT (char_size >= 0 && char_size <= LIT_UTF8_MAX_BYTES_IN_CODE_POINT); lit_code_point_t code_point = (lit_code_point_t) rand (); if (char_size == 1) { code_point %= LIT_UTF8_1_BYTE_CODE_POINT_MAX; } else if (char_size == 2) { code_point = LIT_UTF8_2_BYTE_CODE_POINT_MIN + code_point % (LIT_UTF8_2_BYTE_CODE_POINT_MAX - LIT_UTF8_2_BYTE_CODE_POINT_MIN); } else if (char_size == 3) { code_point = LIT_UTF8_3_BYTE_CODE_POINT_MIN + code_point % (LIT_UTF8_3_BYTE_CODE_POINT_MAX - LIT_UTF8_3_BYTE_CODE_POINT_MIN); } else if (char_size == 4) { code_point = LIT_UTF8_4_BYTE_CODE_POINT_MIN + code_point % (LIT_UTF8_4_BYTE_CODE_POINT_MAX - LIT_UTF8_4_BYTE_CODE_POINT_MIN); } else { code_point %= LIT_UTF8_4_BYTE_CODE_POINT_MAX; } if (code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_LOW_SURROGATE_MAX) { code_point = LIT_UTF16_HIGH_SURROGATE_MIN - 1; } return lit_code_point_to_utf8 (code_point, buf); }
/** * Helper function to convert a string to upper or lower case. * * @return completion value * Returned value must be freed with ecma_free_completion_value. */ static ecma_completion_value_t ecma_builtin_string_prototype_object_conversion_helper (ecma_value_t this_arg, /**< this argument */ bool lower_case) /**< convert to lower (true) * or upper (false) case */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); /* 1. */ ECMA_TRY_CATCH (check_coercible_val, ecma_op_check_object_coercible (this_arg), ret_value); /* 2. */ ECMA_TRY_CATCH (to_string_val, ecma_op_to_string (this_arg), ret_value); /* 3. */ ecma_string_t *input_string_p = ecma_get_string_from_value (to_string_val); lit_utf8_size_t input_size = ecma_string_get_size (input_string_p); MEM_DEFINE_LOCAL_ARRAY (input_start_p, input_size, lit_utf8_byte_t); ecma_string_to_utf8_string (input_string_p, input_start_p, (ssize_t) (input_size)); /* * The URI encoding has two major phases: first we compute * the length of the lower case string, then we encode it. */ lit_utf8_size_t output_length = 0; lit_utf8_iterator_t input_iterator = lit_utf8_iterator_create (input_start_p, input_size); while (!lit_utf8_iterator_is_eos (&input_iterator)) { ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator); ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH]; lit_utf8_byte_t utf8_byte_buffer[LIT_UTF8_MAX_BYTES_IN_CODE_POINT]; lit_utf8_size_t character_length; /* * We need to keep surrogate pairs. Surrogates are never converted, * regardless they form a valid pair or not. */ if (lit_is_code_unit_high_surrogate (character)) { ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator); if (lit_is_code_unit_low_surrogate (next_character)) { lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character); output_length += lit_code_point_to_utf8 (surrogate_code_point, utf8_byte_buffer); lit_utf8_iterator_incr (&input_iterator); continue; } } if (lower_case) { character_length = lit_char_to_lower_case (character, character_buffer, LIT_MAXIMUM_OTHER_CASE_LENGTH); } else { character_length = lit_char_to_upper_case (character, character_buffer, LIT_MAXIMUM_OTHER_CASE_LENGTH); } JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH); for (lit_utf8_size_t i = 0; i < character_length; i++) { output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer); } } /* Second phase. */ MEM_DEFINE_LOCAL_ARRAY (output_start_p, output_length, lit_utf8_byte_t); lit_utf8_byte_t *output_char_p = output_start_p; /* Encoding the output. */ lit_utf8_iterator_seek_bos (&input_iterator); while (!lit_utf8_iterator_is_eos (&input_iterator)) { ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator); ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH]; lit_utf8_size_t character_length; /* * We need to keep surrogate pairs. Surrogates are never converted, * regardless they form a valid pair or not. */ if (lit_is_code_unit_high_surrogate (character)) { ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator); if (lit_is_code_unit_low_surrogate (next_character)) { lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character); output_char_p += lit_code_point_to_utf8 (surrogate_code_point, output_char_p); lit_utf8_iterator_incr (&input_iterator); continue; } } if (lower_case) { character_length = lit_char_to_lower_case (character, character_buffer, LIT_MAXIMUM_OTHER_CASE_LENGTH); } else { character_length = lit_char_to_upper_case (character, character_buffer, LIT_MAXIMUM_OTHER_CASE_LENGTH); } JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH); for (lit_utf8_size_t i = 0; i < character_length; i++) { output_char_p += lit_code_point_to_utf8 (character_buffer[i], output_char_p); } } JERRY_ASSERT (output_start_p + output_length == output_char_p); ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length); ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p)); MEM_FINALIZE_LOCAL_ARRAY (output_start_p); MEM_FINALIZE_LOCAL_ARRAY (input_start_p); ECMA_FINALIZE (to_string_val); ECMA_FINALIZE (check_coercible_val); return ret_value; } /* ecma_builtin_string_prototype_object_conversion_helper */
/** * Transforming escape sequences in the charset, outputting converted string to specified buffer * * Note: * Size of string with transformed escape sequences is always * less or equal to size of corresponding source string. * * @return size of converted string */ static lit_utf8_size_t lexer_transform_escape_sequences (const jerry_api_char_t *source_str_p, /**< string to convert, * located in source buffer */ lit_utf8_size_t source_str_size, /**< size of the string and of the output buffer */ jerry_api_char_t *output_str_buf_p) /**< output buffer for converted string */ { if (source_str_size == 0) { return 0; } else { JERRY_ASSERT (source_str_p != NULL); } lit_utf8_byte_t *output_str_buf_iter_p = output_str_buf_p; const size_t output_str_buf_size = source_str_size; bool is_correct_sequence = true; lit_utf8_iterator_t source_str_iter = lit_utf8_iterator_create (source_str_p, source_str_size); ecma_char_t prev_converted_char = LIT_CHAR_NULL; while (!lit_utf8_iterator_is_eos (&source_str_iter)) { ecma_char_t converted_char; const ecma_char_t next_char = lit_utf8_iterator_read_next (&source_str_iter); if (next_char == LIT_CHAR_BACKSLASH) { if (lit_utf8_iterator_is_eos (&source_str_iter)) { is_correct_sequence = false; break; } const ecma_char_t char_after_next = lit_utf8_iterator_read_next (&source_str_iter); if (lit_char_is_decimal_digit (char_after_next)) { if (lit_char_is_octal_digit (char_after_next)) { if (char_after_next == LIT_CHAR_0 && (lit_utf8_iterator_is_eos (&source_str_iter) || !lit_char_is_octal_digit (lit_utf8_iterator_peek_next (&source_str_iter)))) { converted_char = LIT_CHAR_NULL; } else { /* Implementation-defined (ECMA-262 v5, B.1.2): octal escape sequences are not implemented */ is_correct_sequence = false; break; } } else { converted_char = char_after_next; } } else if (char_after_next == LIT_CHAR_LOWERCASE_U || char_after_next == LIT_CHAR_LOWERCASE_X) { if (!lexer_convert_escape_sequence_digits_to_char (&source_str_iter, char_after_next == LIT_CHAR_LOWERCASE_U, &converted_char)) { is_correct_sequence = false; break; } } else if (lit_char_is_line_terminator (char_after_next)) { /* Skip \, followed by a LineTerminatorSequence (ECMA-262, v5, 7.3) */ if (char_after_next == LIT_CHAR_CR && !lit_utf8_iterator_is_eos (&source_str_iter) && lit_utf8_iterator_peek_next (&source_str_iter) == LIT_CHAR_LF) { lit_utf8_iterator_incr (&source_str_iter); } continue; } else { lexer_convert_single_escape_character (char_after_next, &converted_char); } } else { converted_char = next_char; } if (lit_is_code_unit_high_surrogate (prev_converted_char) && lit_is_code_unit_low_surrogate (converted_char)) { output_str_buf_iter_p -= LIT_UTF8_MAX_BYTES_IN_CODE_UNIT; lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (prev_converted_char, converted_char); output_str_buf_iter_p += lit_code_point_to_utf8 (code_point, output_str_buf_iter_p); } else { output_str_buf_iter_p += lit_code_unit_to_utf8 (converted_char, output_str_buf_iter_p); JERRY_ASSERT (output_str_buf_iter_p <= output_str_buf_p + output_str_buf_size); } prev_converted_char = converted_char; } if (is_correct_sequence) { return (lit_utf8_size_t) (output_str_buf_iter_p - output_str_buf_p); } else { PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal escape sequence", token_start_pos); } } /* lexer_transform_escape_sequences */