Пример #1
0
/*
 * Helper function for string indexOf and lastIndexOf functions
 *
 * This function implements string indexOf and lastIndexOf with required checks and conversions.
 *
 * See also:
 *          ECMA-262 v5, 15.5.4.7
 *          ECMA-262 v5, 15.5.4.8
 *
 * Used by:
 *         - The String.prototype.indexOf routine.
 *         - The String.prototype.lastIndexOf routine.
 *
 * @return uint32_t - (last)index of search string
 */
ecma_completion_value_t
ecma_builtin_helper_string_prototype_object_index_of (ecma_value_t this_arg, /**< this argument */
                                                      ecma_value_t arg1, /**< routine's first argument */
                                                      ecma_value_t arg2, /**< routine's second argument */
                                                      bool firstIndex) /**< routine's third argument */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  /* 1 */
  ECMA_TRY_CATCH (check_coercible_val,
                  ecma_op_check_object_coercible (this_arg),
                  ret_value);

  /* 2 */
  ECMA_TRY_CATCH (to_str_val,
                  ecma_op_to_string (this_arg),
                  ret_value);

  /* 3 */
  ECMA_TRY_CATCH (search_str_val,
                  ecma_op_to_string (arg1),
                  ret_value);

  /* 4 */
  ECMA_OP_TO_NUMBER_TRY_CATCH (pos_num,
                               arg2,
                               ret_value);

  /* 6 */
  ecma_string_t *original_str_p = ecma_get_string_from_value (to_str_val);
  const ecma_length_t original_len = ecma_string_get_length (original_str_p);
  const lit_utf8_size_t original_size = ecma_string_get_size (original_str_p);

  /* 4b, 5, 7 */
  ecma_length_t start = ecma_builtin_helper_string_index_normalize (pos_num, original_len, firstIndex);

  /* 8 */
  ecma_string_t *search_str_p = ecma_get_string_from_value (search_str_val);
  const ecma_length_t search_len = ecma_string_get_length (search_str_p);
  const lit_utf8_size_t search_size = ecma_string_get_size (search_str_p);

  ecma_number_t *ret_num_p = ecma_alloc_number ();
  *ret_num_p = ecma_int32_to_number (-1);

  /* 9 */
  if (search_len <= original_len)
  {
    if (!search_len)
    {
      *ret_num_p = ecma_uint32_to_number (firstIndex ? 0 : original_len);
    }
    else
    {
      /* create utf8 string from original string and advance to position */
      MEM_DEFINE_LOCAL_ARRAY (original_str_utf8_p,
                              original_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (original_str_p,
                                  original_str_utf8_p,
                                  (ssize_t) (original_size));

      lit_utf8_iterator_t original_it = lit_utf8_iterator_create (original_str_utf8_p, original_size);

      ecma_length_t index = start;
      lit_utf8_iterator_advance (&original_it, index);

      /* create utf8 string from search string */
      MEM_DEFINE_LOCAL_ARRAY (search_str_utf8_p,
                              search_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (search_str_p,
                                  search_str_utf8_p,
                                  (ssize_t) (search_size));

      lit_utf8_iterator_t search_it = lit_utf8_iterator_create (search_str_utf8_p, search_size);

      /* iterate original string and try to match at each position */
      bool searching = true;

      while (searching)
      {
        /* match as long as possible */
        ecma_length_t match_len = 0;
        lit_utf8_iterator_t stored_original_it = original_it;

        while (match_len < search_len &&
               index + match_len < original_len &&
               lit_utf8_iterator_read_next (&original_it) == lit_utf8_iterator_read_next (&search_it))
        {
          match_len++;
        }

        /* check for match */
        if (match_len == search_len)
        {
          *ret_num_p = ecma_uint32_to_number (index);
          break;
        }
        else
        {
          /* inc/dec index and update iterators and search condition */
          lit_utf8_iterator_seek_bos (&search_it);
          original_it = stored_original_it;

          if (firstIndex)
          {
            if ((searching = (index <= original_len - search_len)))
            {
              lit_utf8_iterator_incr (&original_it);
              index++;
            }
          }
          else
          {
            if ((searching = (index > 0)))
            {
              lit_utf8_iterator_decr (&original_it);
              index--;
            }
          }
        }
      }

      MEM_FINALIZE_LOCAL_ARRAY (search_str_utf8_p);
      MEM_FINALIZE_LOCAL_ARRAY (original_str_utf8_p);
    }
  }

  ecma_value_t new_value = ecma_make_number_value (ret_num_p);
  ret_value = ecma_make_normal_completion_value (new_value);

  ECMA_OP_TO_NUMBER_FINALIZE (pos_num);
  ECMA_FINALIZE (search_str_val);
  ECMA_FINALIZE (to_str_val);
  ECMA_FINALIZE (check_coercible_val);

  return ret_value;
} /* ecma_builtin_helper_string_index_normalize */
Пример #2
0
int
main (int __attr_unused___ argc,
      char __attr_unused___ **argv)
{
  TEST_INIT ();

  mem_init ();

  lit_utf8_byte_t utf8_string[max_bytes_in_string];
  ecma_char_t code_units[max_code_units_in_string];
  lit_utf8_iterator_pos_t saved_positions[max_code_units_in_string];

  for (int i = 0; i < test_iters; i++)
  {
    lit_utf8_size_t utf8_string_size = (i == 0) ? 0 : (lit_utf8_size_t) (rand () % max_bytes_in_string);
    ecma_length_t length = generate_utf8_string (utf8_string, utf8_string_size);

    JERRY_ASSERT (lit_utf8_string_length (utf8_string, utf8_string_size) == length);

    lit_utf8_iterator_t iter = lit_utf8_iterator_create (utf8_string, utf8_string_size);
    ecma_length_t calculated_length = 0;

    ecma_length_t code_units_count = 0;
    while (!lit_utf8_iterator_is_eos (&iter))
    {
      code_units[code_units_count] = lit_utf8_iterator_peek_next (&iter);
      saved_positions[code_units_count] = lit_utf8_iterator_get_pos (&iter);
      code_units_count++;
      calculated_length++;

      lit_utf8_iterator_incr (&iter);
    }

    JERRY_ASSERT (length == calculated_length);

    if (code_units_count > 0)
    {
      for (int j = 0; j < test_subiters; j++)
      {
        ecma_length_t index = (ecma_length_t) rand () % code_units_count;
        lit_utf8_iterator_seek (&iter, saved_positions[index]);
        JERRY_ASSERT (lit_utf8_iterator_peek_next (&iter) == code_units[index]);
        JERRY_ASSERT (lit_utf8_iterator_get_index (&iter) == index);
      }
    }

    lit_utf8_iterator_seek_eos (&iter);
    while (!lit_utf8_iterator_is_bos (&iter))
    {
      JERRY_ASSERT (code_units_count > 0);
      calculated_length--;
      JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_peek_prev (&iter));
      lit_utf8_iterator_decr (&iter);
    }

    JERRY_ASSERT (calculated_length == 0);

    while (!lit_utf8_iterator_is_eos (&iter))
    {
      ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
      JERRY_ASSERT (code_unit == code_units[calculated_length]);
      calculated_length++;
    }

    JERRY_ASSERT (length == calculated_length);

    while (!lit_utf8_iterator_is_bos (&iter))
    {
      JERRY_ASSERT (code_units_count > 0);
      calculated_length--;
      JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_read_prev (&iter));
    }

    JERRY_ASSERT (calculated_length == 0);
  }

  /* Overlong-encoded code point */
  lit_utf8_byte_t invalid_utf8_string_1[] = {0xC0, 0x82};
  JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_1, sizeof (invalid_utf8_string_1)));

  /* Overlong-encoded code point */
  lit_utf8_byte_t invalid_utf8_string_2[] = {0xE0, 0x80, 0x81};
  JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_2, sizeof (invalid_utf8_string_2)));

  /* Pair of surrogates: 0xD901 0xDFF0 which encode Unicode character 0x507F0 */
  lit_utf8_byte_t invalid_utf8_string_3[] = {0xED, 0xA4, 0x81, 0xED, 0xBF, 0xB0};
  JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_3, sizeof (invalid_utf8_string_3)));

  /* Isolated high surrogate 0xD901 */
  lit_utf8_byte_t valid_utf8_string_1[] = {0xED, 0xA4, 0x81};
  JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_1, sizeof (valid_utf8_string_1)));

  /* 4-byte long utf-8 character - Unicode character 0x507F0 */
  lit_utf8_byte_t valid_utf8_string_2[] = {0xF1, 0x90, 0x9F, 0xB0};
  JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_2, sizeof (valid_utf8_string_2)));

  lit_utf8_byte_t buf[] = {0xF0, 0x90, 0x8D, 0x88};
  lit_code_point_t code_point;
  lit_utf8_size_t bytes_count = lit_read_code_point_from_utf8 (buf, sizeof (buf), &code_point);
  JERRY_ASSERT (bytes_count == 4);
  JERRY_ASSERT (code_point == 0x10348);

  lit_utf8_byte_t res_buf[3];
  lit_utf8_size_t res_size;

  res_size = lit_code_unit_to_utf8 (0x73, res_buf);
  JERRY_ASSERT (res_size == 1);
  JERRY_ASSERT (res_buf[0] == 0x73);

  res_size = lit_code_unit_to_utf8 (0x41A, res_buf);
  JERRY_ASSERT (res_size == 2);
  JERRY_ASSERT (res_buf[0] == 0xD0);
  JERRY_ASSERT (res_buf[1] == 0x9A);

  res_size = lit_code_unit_to_utf8 (0xD7FF, res_buf);
  JERRY_ASSERT (res_size == 3);
  JERRY_ASSERT (res_buf[0] == 0xED);
  JERRY_ASSERT (res_buf[1] == 0x9F);
  JERRY_ASSERT (res_buf[2] == 0xBF);

  lit_utf8_byte_t bytes[] = {0xF0, 0x90, 0x8D, 0x88};
  lit_utf8_iterator_t iter = lit_utf8_iterator_create (bytes, sizeof (bytes));
  ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
  JERRY_ASSERT (!lit_utf8_iterator_is_eos (&iter));
  JERRY_ASSERT (code_unit == 0xD800);
  code_unit = lit_utf8_iterator_read_next (&iter);
  JERRY_ASSERT (lit_utf8_iterator_is_eos (&iter));
  JERRY_ASSERT (code_unit == 0xDF48);

  mem_finalize (true);
  return 0;
}