Beispiel #1
0
/**
 * Dump specified line of the source script
 */
void
lexer_dump_line (size_t line) /**< line number */
{
  size_t l = 0;
  lit_utf8_iterator_t iter = src_iter;

  lit_utf8_iterator_seek_bos (&iter);

  while (!lit_utf8_iterator_is_eos (&iter))
  {
    ecma_char_t code_unit;

    if (l == line)
    {
      while (!lit_utf8_iterator_is_eos (&iter))
      {
        code_unit = lit_utf8_iterator_read_next (&iter);
        if (lit_char_is_line_terminator (code_unit))
        {
          break;
        }
        lit_put_ecma_char (code_unit);
      }
      return;
    }

    code_unit = lit_utf8_iterator_read_next (&iter);

    if (lit_char_is_line_terminator (code_unit))
    {
      l++;
      if (code_unit == LIT_CHAR_CR
          && !lit_utf8_iterator_is_eos (&iter)
          && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF)
      {
        lit_utf8_iterator_incr (&iter);
      }
    }
  }
} /* lexer_dump_line */
/*
 * Helper function for string indexOf and lastIndexOf functions
 *
 * This function implements string indexOf and lastIndexOf with required checks and conversions.
 *
 * See also:
 *          ECMA-262 v5, 15.5.4.7
 *          ECMA-262 v5, 15.5.4.8
 *
 * Used by:
 *         - The String.prototype.indexOf routine.
 *         - The String.prototype.lastIndexOf routine.
 *
 * @return uint32_t - (last)index of search string
 */
ecma_completion_value_t
ecma_builtin_helper_string_prototype_object_index_of (ecma_value_t this_arg, /**< this argument */
                                                      ecma_value_t arg1, /**< routine's first argument */
                                                      ecma_value_t arg2, /**< routine's second argument */
                                                      bool firstIndex) /**< routine's third argument */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  /* 1 */
  ECMA_TRY_CATCH (check_coercible_val,
                  ecma_op_check_object_coercible (this_arg),
                  ret_value);

  /* 2 */
  ECMA_TRY_CATCH (to_str_val,
                  ecma_op_to_string (this_arg),
                  ret_value);

  /* 3 */
  ECMA_TRY_CATCH (search_str_val,
                  ecma_op_to_string (arg1),
                  ret_value);

  /* 4 */
  ECMA_OP_TO_NUMBER_TRY_CATCH (pos_num,
                               arg2,
                               ret_value);

  /* 6 */
  ecma_string_t *original_str_p = ecma_get_string_from_value (to_str_val);
  const ecma_length_t original_len = ecma_string_get_length (original_str_p);
  const lit_utf8_size_t original_size = ecma_string_get_size (original_str_p);

  /* 4b, 5, 7 */
  ecma_length_t start = ecma_builtin_helper_string_index_normalize (pos_num, original_len, firstIndex);

  /* 8 */
  ecma_string_t *search_str_p = ecma_get_string_from_value (search_str_val);
  const ecma_length_t search_len = ecma_string_get_length (search_str_p);
  const lit_utf8_size_t search_size = ecma_string_get_size (search_str_p);

  ecma_number_t *ret_num_p = ecma_alloc_number ();
  *ret_num_p = ecma_int32_to_number (-1);

  /* 9 */
  if (search_len <= original_len)
  {
    if (!search_len)
    {
      *ret_num_p = ecma_uint32_to_number (firstIndex ? 0 : original_len);
    }
    else
    {
      /* create utf8 string from original string and advance to position */
      MEM_DEFINE_LOCAL_ARRAY (original_str_utf8_p,
                              original_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (original_str_p,
                                  original_str_utf8_p,
                                  (ssize_t) (original_size));

      lit_utf8_iterator_t original_it = lit_utf8_iterator_create (original_str_utf8_p, original_size);

      ecma_length_t index = start;
      lit_utf8_iterator_advance (&original_it, index);

      /* create utf8 string from search string */
      MEM_DEFINE_LOCAL_ARRAY (search_str_utf8_p,
                              search_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (search_str_p,
                                  search_str_utf8_p,
                                  (ssize_t) (search_size));

      lit_utf8_iterator_t search_it = lit_utf8_iterator_create (search_str_utf8_p, search_size);

      /* iterate original string and try to match at each position */
      bool searching = true;

      while (searching)
      {
        /* match as long as possible */
        ecma_length_t match_len = 0;
        lit_utf8_iterator_t stored_original_it = original_it;

        while (match_len < search_len &&
               index + match_len < original_len &&
               lit_utf8_iterator_read_next (&original_it) == lit_utf8_iterator_read_next (&search_it))
        {
          match_len++;
        }

        /* check for match */
        if (match_len == search_len)
        {
          *ret_num_p = ecma_uint32_to_number (index);
          break;
        }
        else
        {
          /* inc/dec index and update iterators and search condition */
          lit_utf8_iterator_seek_bos (&search_it);
          original_it = stored_original_it;

          if (firstIndex)
          {
            if ((searching = (index <= original_len - search_len)))
            {
              lit_utf8_iterator_incr (&original_it);
              index++;
            }
          }
          else
          {
            if ((searching = (index > 0)))
            {
              lit_utf8_iterator_decr (&original_it);
              index--;
            }
          }
        }
      }

      MEM_FINALIZE_LOCAL_ARRAY (search_str_utf8_p);
      MEM_FINALIZE_LOCAL_ARRAY (original_str_utf8_p);
    }
  }

  ecma_value_t new_value = ecma_make_number_value (ret_num_p);
  ret_value = ecma_make_normal_completion_value (new_value);

  ECMA_OP_TO_NUMBER_FINALIZE (pos_num);
  ECMA_FINALIZE (search_str_val);
  ECMA_FINALIZE (to_str_val);
  ECMA_FINALIZE (check_coercible_val);

  return ret_value;
} /* ecma_builtin_helper_string_index_normalize */
/**
 * Helper function to convert a string to upper or lower case.
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value.
 */
static ecma_completion_value_t
ecma_builtin_string_prototype_object_conversion_helper (ecma_value_t this_arg, /**< this argument */
                                                        bool lower_case) /**< convert to lower (true)
                                                                          *   or upper (false) case */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  /* 1. */
  ECMA_TRY_CATCH (check_coercible_val,
                  ecma_op_check_object_coercible (this_arg),
                  ret_value);

  /* 2. */
  ECMA_TRY_CATCH (to_string_val,
                  ecma_op_to_string (this_arg),
                  ret_value);

  /* 3. */
  ecma_string_t *input_string_p = ecma_get_string_from_value (to_string_val);
  lit_utf8_size_t input_size = ecma_string_get_size (input_string_p);

  MEM_DEFINE_LOCAL_ARRAY (input_start_p,
                          input_size,
                          lit_utf8_byte_t);

  ecma_string_to_utf8_string (input_string_p,
                              input_start_p,
                              (ssize_t) (input_size));

  /*
   * The URI encoding has two major phases: first we compute
   * the length of the lower case string, then we encode it.
   */

  lit_utf8_size_t output_length = 0;
  lit_utf8_iterator_t input_iterator = lit_utf8_iterator_create (input_start_p, input_size);

  while (!lit_utf8_iterator_is_eos (&input_iterator))
  {
    ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator);
    ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
    lit_utf8_byte_t utf8_byte_buffer[LIT_UTF8_MAX_BYTES_IN_CODE_POINT];
    lit_utf8_size_t character_length;

    /*
     * We need to keep surrogate pairs. Surrogates are never converted,
     * regardless they form a valid pair or not.
     */
    if (lit_is_code_unit_high_surrogate (character))
    {
      ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator);

      if (lit_is_code_unit_low_surrogate (next_character))
      {
        lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character);
        output_length += lit_code_point_to_utf8 (surrogate_code_point, utf8_byte_buffer);
        lit_utf8_iterator_incr (&input_iterator);
        continue;
      }
    }

    if (lower_case)
    {
      character_length = lit_char_to_lower_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }
    else
    {
      character_length = lit_char_to_upper_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }

    JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);

    for (lit_utf8_size_t i = 0; i < character_length; i++)
    {
      output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer);
    }
  }

  /* Second phase. */

  MEM_DEFINE_LOCAL_ARRAY (output_start_p,
                          output_length,
                          lit_utf8_byte_t);

  lit_utf8_byte_t *output_char_p = output_start_p;

  /* Encoding the output. */
  lit_utf8_iterator_seek_bos (&input_iterator);

  while (!lit_utf8_iterator_is_eos (&input_iterator))
  {
    ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator);
    ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
    lit_utf8_size_t character_length;

    /*
     * We need to keep surrogate pairs. Surrogates are never converted,
     * regardless they form a valid pair or not.
     */
    if (lit_is_code_unit_high_surrogate (character))
    {
      ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator);

      if (lit_is_code_unit_low_surrogate (next_character))
      {
        lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character);
        output_char_p += lit_code_point_to_utf8 (surrogate_code_point, output_char_p);
        lit_utf8_iterator_incr (&input_iterator);
        continue;
      }
    }

    if (lower_case)
    {
      character_length = lit_char_to_lower_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }
    else
    {
      character_length = lit_char_to_upper_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }

    JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);

    for (lit_utf8_size_t i = 0; i < character_length; i++)
    {
      output_char_p += lit_code_point_to_utf8 (character_buffer[i], output_char_p);
    }
  }

  JERRY_ASSERT (output_start_p + output_length == output_char_p);

  ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length);

  ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));

  MEM_FINALIZE_LOCAL_ARRAY (output_start_p);
  MEM_FINALIZE_LOCAL_ARRAY (input_start_p);

  ECMA_FINALIZE (to_string_val);
  ECMA_FINALIZE (check_coercible_val);

  return ret_value;
} /* ecma_builtin_string_prototype_object_conversion_helper */
/**
 * The String.prototype object's 'indexOf' routine
 *
 * See also:
 *          ECMA-262 v5, 15.5.4.7
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value.
 */
static ecma_completion_value_t
ecma_builtin_string_prototype_object_index_of (ecma_value_t this_arg, /**< this argument */
                                               ecma_value_t arg1, /**< routine's first argument */
                                               ecma_value_t arg2) /**< routine's second argument */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  /* 1 */
  ECMA_TRY_CATCH (check_coercible_val,
                  ecma_op_check_object_coercible (this_arg),
                  ret_value);

  /* 2 */
  ECMA_TRY_CATCH (to_str_val,
                  ecma_op_to_string (this_arg),
                  ret_value);

  /* 3 */
  ECMA_TRY_CATCH (search_str_val,
                  ecma_op_to_string (arg1),
                  ret_value);

  /* 4 */
  ECMA_OP_TO_NUMBER_TRY_CATCH (pos_num,
                               arg2,
                               ret_value);

  /* 5 */
  ecma_string_t *original_str_p = ecma_get_string_from_value (to_str_val);
  const ecma_length_t original_len = ecma_string_get_length (original_str_p);
  const lit_utf8_size_t original_size = ecma_string_get_size (original_str_p);

  /* 4b, 6 */
  ecma_length_t start = ecma_builtin_helper_string_index_normalize (pos_num, original_len);

  /* 7 */
  ecma_string_t *search_str_p = ecma_get_string_from_value (search_str_val);
  const ecma_length_t search_len = ecma_string_get_length (search_str_p);
  const lit_utf8_size_t search_size = ecma_string_get_size (search_str_p);

  ecma_number_t *ret_num_p = ecma_alloc_number ();
  *ret_num_p = ecma_int32_to_number (-1);

  /* 8 */
  if (search_len <= original_len)
  {
    if (!search_len)
    {
      *ret_num_p = ecma_uint32_to_number (0);
    }
    else
    {
      /* create utf8 string from original string and advance to start position */
      MEM_DEFINE_LOCAL_ARRAY (original_str_utf8_p,
                              original_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (original_str_p,
                                  original_str_utf8_p,
                                  (ssize_t) (original_size));

      lit_utf8_iterator_t original_it = lit_utf8_iterator_create (original_str_utf8_p, original_size);

      ecma_length_t index = start;
      lit_utf8_iterator_advance (&original_it, index);

      /* create utf8 string from search string */
      MEM_DEFINE_LOCAL_ARRAY (search_str_utf8_p,
                              search_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (search_str_p,
                                  search_str_utf8_p,
                                  (ssize_t) (search_size));

      lit_utf8_iterator_t search_it = lit_utf8_iterator_create (search_str_utf8_p, search_size);

      /* iterate original string and try to match at each position */
      bool found = false;

      while (!found && index <= original_len - search_len)
      {
        ecma_length_t match_len = 0;
        lit_utf8_iterator_pos_t stored_original_pos = lit_utf8_iterator_get_pos (&original_it);

        while (match_len < search_len &&
               lit_utf8_iterator_read_next (&original_it) == lit_utf8_iterator_read_next (&search_it))
        {
          match_len++;
        }

        /* Check for match */
        if (match_len == search_len)
        {
          *ret_num_p = ecma_uint32_to_number (index);
          found = true;
        }
        else
        {
          /* reset iterators */
          lit_utf8_iterator_seek_bos (&search_it);
          lit_utf8_iterator_seek (&original_it, stored_original_pos);
          lit_utf8_iterator_incr (&original_it);
        }
        index++;
      }

      MEM_FINALIZE_LOCAL_ARRAY (search_str_utf8_p);
      MEM_FINALIZE_LOCAL_ARRAY (original_str_utf8_p);
    }
  }

  ecma_value_t new_value = ecma_make_number_value (ret_num_p);
  ret_value = ecma_make_normal_completion_value (new_value);

  ECMA_OP_TO_NUMBER_FINALIZE (pos_num);
  ECMA_FINALIZE (search_str_val);
  ECMA_FINALIZE (to_str_val);
  ECMA_FINALIZE (check_coercible_val);

  return ret_value;
} /* ecma_builtin_string_prototype_object_index_of */
Beispiel #5
0
/**
 * Create token of specified type from charset
 *
 * @return token descriptor
 */
static token
lexer_create_token_for_charset (jsp_token_type_t tt, /**< token type */
                                const lit_utf8_byte_t *charset_p, /**< charset buffer */
                                lit_utf8_size_t size) /**< size of the charset */
{
  JERRY_ASSERT (charset_p != NULL);

  lit_utf8_iterator_t iter = lit_utf8_iterator_create (charset_p, (lit_utf8_size_t) size);
  lit_utf8_size_t new_size = 0;
  lit_utf8_size_t new_length = 0;
  bool should_convert = false;

  while (!lit_utf8_iterator_is_eos (&iter))
  {
    if (iter.buf_pos.is_non_bmp_middle)
    {
      should_convert = true;
    }
    lit_utf8_iterator_incr (&iter);
    new_size += LIT_CESU8_MAX_BYTES_IN_CODE_UNIT;
  }

  lit_utf8_byte_t *converted_str_p;

  if (unlikely (should_convert))
  {
    lit_utf8_iterator_seek_bos (&iter);
    converted_str_p = (lit_utf8_byte_t *) jsp_mm_alloc (new_size);

    while (!lit_utf8_iterator_is_eos (&iter))
    {
      ecma_char_t ch = lit_utf8_iterator_read_next (&iter);
      new_length += lit_code_unit_to_utf8 (ch, converted_str_p + new_length);
    }
  }
  else
  {
    converted_str_p = (lit_utf8_byte_t *) charset_p;
    new_length = size;
    JERRY_ASSERT (lit_is_cesu8_string_valid (converted_str_p, new_length));
  }

  lit_literal_t lit = lit_find_literal_by_utf8_string (converted_str_p, new_length);
  if (lit != NULL)
  {
    if (unlikely (should_convert))
    {
      jsp_mm_free (converted_str_p);
    }

    return create_token_from_lit (tt, lit);
  }
  lit = lit_create_literal_from_utf8_string (converted_str_p, new_length);
  rcs_record_type_t type = rcs_record_get_type (lit);

  JERRY_ASSERT (RCS_RECORD_TYPE_IS_CHARSET (type)
                || RCS_RECORD_TYPE_IS_MAGIC_STR (type)
                || RCS_RECORD_TYPE_IS_MAGIC_STR_EX (type));

  if (unlikely (should_convert))
  {
    jsp_mm_free (converted_str_p);
  }

  return create_token_from_lit (tt, lit);
} /* lexer_create_token_for_charset */