Exemplo n.º 1
0
/**
 * Initialize lexer to start parsing of a new source
 */
void
lexer_init (const jerry_api_char_t *source, /**< script source */
            size_t source_size, /**< script source size in bytes */
            bool is_print_source_code) /**< flag indicating whether to dump
                                        *   processed source code */
{
  empty_token.type = TOK_EMPTY;
  empty_token.uid = 0;
  empty_token.loc = LIT_ITERATOR_POS_ZERO;

  saved_token = prev_token = sent_token = prev_non_lf_token = empty_token;

  if (!lit_is_utf8_string_valid (source, (lit_utf8_size_t) source_size))
  {
    PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Invalid source encoding", LIT_ITERATOR_POS_ZERO);
  }

  src_iter = lit_utf8_iterator_create (source, (lit_utf8_size_t) source_size);

  buffer_size = source_size;
  buffer_start = source;
  is_token_parse_in_progress = false;

  lexer_set_strict_mode (false);

#ifndef JERRY_NDEBUG
  allow_dump_lines = is_print_source_code;
#else /* JERRY_NDEBUG */
  (void) is_print_source_code;
  allow_dump_lines = false;
#endif /* JERRY_NDEBUG */
} /* lexer_init */
Exemplo n.º 2
0
/**
 * Parse RegExp flags (global, ignoreCase, multiline)
 *
 * See also: ECMA-262 v5, 15.10.4.1
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
ecma_completion_value_t
re_parse_regexp_flags (ecma_string_t *flags_str_p, /**< Input string with flags */
                       uint8_t *flags_p) /**< Output: parsed flag bits */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  lit_utf8_size_t flags_str_size = ecma_string_get_size (flags_str_p);
  MEM_DEFINE_LOCAL_ARRAY (flags_start_p, flags_str_size, lit_utf8_byte_t);

  ecma_string_to_utf8_string (flags_str_p, flags_start_p, (ssize_t) flags_str_size);
  lit_utf8_iterator_t iter = lit_utf8_iterator_create (flags_start_p, flags_str_size);

  while (!lit_utf8_iterator_is_eos (&iter)
         && ecma_is_completion_value_empty (ret_value))
  {
    switch (lit_utf8_iterator_read_next (&iter))
    {
      case 'g':
      {
        if (*flags_p & RE_FLAG_GLOBAL)
        {
          ret_value = ecma_raise_syntax_error ("Invalid RegExp flags.");
        }
        *flags_p |= RE_FLAG_GLOBAL;
        break;
      }
      case 'i':
      {
        if (*flags_p & RE_FLAG_IGNORE_CASE)
        {
          ret_value = ecma_raise_syntax_error ("Invalid RegExp flags.");
        }
        *flags_p |= RE_FLAG_IGNORE_CASE;
        break;
      }
      case 'm':
      {
        if (*flags_p & RE_FLAG_MULTILINE)
        {
          ret_value = ecma_raise_syntax_error ("Invalid RegExp flags.");
        }
        *flags_p |= RE_FLAG_MULTILINE;
        break;
      }
      default:
      {
        ret_value = ecma_raise_syntax_error ("Invalid RegExp flags.");
        break;
      }
    }
  }

  MEM_FINALIZE_LOCAL_ARRAY (flags_start_p);

  return ret_value;
} /* re_parse_regexp_flags  */
Exemplo n.º 3
0
/**
 * Convert locus to line and column
 */
void
lexer_locus_to_line_and_column (lit_utf8_iterator_pos_t locus, /**< iterator position in the source script */
                                size_t *line, /**< @out: line number */
                                size_t *column) /**< @out: column number */
{
  JERRY_ASSERT ((lit_utf8_size_t) (locus.offset + locus.is_non_bmp_middle) <= buffer_size);

  lit_utf8_iterator_t iter = lit_utf8_iterator_create (buffer_start, (lit_utf8_size_t) buffer_size);
  lit_utf8_iterator_pos_t iter_pos = lit_utf8_iterator_get_pos (&iter);

  size_t l = 0, c = 0;
  while (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_pos_cmp (iter_pos, locus) < 0)
  {
    ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
    iter_pos = lit_utf8_iterator_get_pos (&iter);

    if (lit_char_is_line_terminator (code_unit))
    {
      if (code_unit == LIT_CHAR_CR
          && !lit_utf8_iterator_is_eos (&iter)
          && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF)
      {
        lit_utf8_iterator_incr (&iter);
      }
      c = 0;
      l++;
      continue;
    }

    c++;
  }

  if (line)
  {
    *line = l;
  }
  if (column)
  {
    *column = c;
  }
} /* lexer_locus_to_line_and_column */
Exemplo n.º 4
0
/*
 * Helper function for string indexOf and lastIndexOf functions
 *
 * This function implements string indexOf and lastIndexOf with required checks and conversions.
 *
 * See also:
 *          ECMA-262 v5, 15.5.4.7
 *          ECMA-262 v5, 15.5.4.8
 *
 * Used by:
 *         - The String.prototype.indexOf routine.
 *         - The String.prototype.lastIndexOf routine.
 *
 * @return uint32_t - (last)index of search string
 */
ecma_completion_value_t
ecma_builtin_helper_string_prototype_object_index_of (ecma_value_t this_arg, /**< this argument */
                                                      ecma_value_t arg1, /**< routine's first argument */
                                                      ecma_value_t arg2, /**< routine's second argument */
                                                      bool firstIndex) /**< routine's third argument */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  /* 1 */
  ECMA_TRY_CATCH (check_coercible_val,
                  ecma_op_check_object_coercible (this_arg),
                  ret_value);

  /* 2 */
  ECMA_TRY_CATCH (to_str_val,
                  ecma_op_to_string (this_arg),
                  ret_value);

  /* 3 */
  ECMA_TRY_CATCH (search_str_val,
                  ecma_op_to_string (arg1),
                  ret_value);

  /* 4 */
  ECMA_OP_TO_NUMBER_TRY_CATCH (pos_num,
                               arg2,
                               ret_value);

  /* 6 */
  ecma_string_t *original_str_p = ecma_get_string_from_value (to_str_val);
  const ecma_length_t original_len = ecma_string_get_length (original_str_p);
  const lit_utf8_size_t original_size = ecma_string_get_size (original_str_p);

  /* 4b, 5, 7 */
  ecma_length_t start = ecma_builtin_helper_string_index_normalize (pos_num, original_len, firstIndex);

  /* 8 */
  ecma_string_t *search_str_p = ecma_get_string_from_value (search_str_val);
  const ecma_length_t search_len = ecma_string_get_length (search_str_p);
  const lit_utf8_size_t search_size = ecma_string_get_size (search_str_p);

  ecma_number_t *ret_num_p = ecma_alloc_number ();
  *ret_num_p = ecma_int32_to_number (-1);

  /* 9 */
  if (search_len <= original_len)
  {
    if (!search_len)
    {
      *ret_num_p = ecma_uint32_to_number (firstIndex ? 0 : original_len);
    }
    else
    {
      /* create utf8 string from original string and advance to position */
      MEM_DEFINE_LOCAL_ARRAY (original_str_utf8_p,
                              original_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (original_str_p,
                                  original_str_utf8_p,
                                  (ssize_t) (original_size));

      lit_utf8_iterator_t original_it = lit_utf8_iterator_create (original_str_utf8_p, original_size);

      ecma_length_t index = start;
      lit_utf8_iterator_advance (&original_it, index);

      /* create utf8 string from search string */
      MEM_DEFINE_LOCAL_ARRAY (search_str_utf8_p,
                              search_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (search_str_p,
                                  search_str_utf8_p,
                                  (ssize_t) (search_size));

      lit_utf8_iterator_t search_it = lit_utf8_iterator_create (search_str_utf8_p, search_size);

      /* iterate original string and try to match at each position */
      bool searching = true;

      while (searching)
      {
        /* match as long as possible */
        ecma_length_t match_len = 0;
        lit_utf8_iterator_t stored_original_it = original_it;

        while (match_len < search_len &&
               index + match_len < original_len &&
               lit_utf8_iterator_read_next (&original_it) == lit_utf8_iterator_read_next (&search_it))
        {
          match_len++;
        }

        /* check for match */
        if (match_len == search_len)
        {
          *ret_num_p = ecma_uint32_to_number (index);
          break;
        }
        else
        {
          /* inc/dec index and update iterators and search condition */
          lit_utf8_iterator_seek_bos (&search_it);
          original_it = stored_original_it;

          if (firstIndex)
          {
            if ((searching = (index <= original_len - search_len)))
            {
              lit_utf8_iterator_incr (&original_it);
              index++;
            }
          }
          else
          {
            if ((searching = (index > 0)))
            {
              lit_utf8_iterator_decr (&original_it);
              index--;
            }
          }
        }
      }

      MEM_FINALIZE_LOCAL_ARRAY (search_str_utf8_p);
      MEM_FINALIZE_LOCAL_ARRAY (original_str_utf8_p);
    }
  }

  ecma_value_t new_value = ecma_make_number_value (ret_num_p);
  ret_value = ecma_make_normal_completion_value (new_value);

  ECMA_OP_TO_NUMBER_FINALIZE (pos_num);
  ECMA_FINALIZE (search_str_val);
  ECMA_FINALIZE (to_str_val);
  ECMA_FINALIZE (check_coercible_val);

  return ret_value;
} /* ecma_builtin_helper_string_index_normalize */
Exemplo n.º 5
0
/**
 * Recursive function for RegExp matching. Tests for a regular expression
 * match and returns a MatchResult value.
 *
 * See also:
 *          ECMA-262 v5, 15.10.2.1
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
static ecma_completion_value_t
re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
                 re_bytecode_t *bc_p, /**< pointer to the current RegExp bytecode */
                 lit_utf8_iterator_t iter, /**< input string iterator */
                 lit_utf8_iterator_t *out_iter_p) /**< Output: matching substring iterator */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
  re_opcode_t op;

  while ((op = re_get_opcode (&bc_p)))
  {
    switch (op)
    {
      case RE_OP_MATCH:
      {
        JERRY_DDLOG ("Execute RE_OP_MATCH: match\n");
        *out_iter_p = iter;
        ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE);
        return ret_value; /* match */
      }
      case RE_OP_CHAR:
      {
        if (lit_utf8_iterator_is_eos (&iter))
        {
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
        ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */
        ecma_char_t ch2 = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);
        JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2);

        if (ch1 != ch2)
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        JERRY_DDLOG ("match\n");

        break; /* tail merge */
      }
      case RE_OP_PERIOD:
      {
        if (lit_utf8_iterator_is_eos (&iter))
        {
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        ecma_char_t ch = lit_utf8_iterator_read_next (&iter);
        JERRY_DDLOG ("Period matching '.' to %d: ", (uint32_t) ch);

        if (lit_char_is_line_terminator (ch))
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        JERRY_DDLOG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_ASSERT_START:
      {
        JERRY_DDLOG ("Execute RE_OP_ASSERT_START: ");

        if ((iter.buf_p + iter.buf_pos.offset) <= re_ctx_p->input_start_p)
        {
          JERRY_DDLOG ("match\n");
          break;
        }

        if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        if (lit_char_is_line_terminator (lit_utf8_iterator_peek_prev (&iter)))
        {
          JERRY_DDLOG ("match\n");
          break;
        }

        JERRY_DDLOG ("fail\n");
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_ASSERT_END:
      {
        JERRY_DDLOG ("Execute RE_OP_ASSERT_END: ");

        if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p)
        {
          JERRY_DDLOG ("match\n");
          break; /* tail merge */
        }

        if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        if (lit_char_is_line_terminator (lit_utf8_iterator_peek_next (&iter)))
        {
          JERRY_DDLOG ("match\n");
          break; /* tail merge */
        }

        JERRY_DDLOG ("fail\n");
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_ASSERT_WORD_BOUNDARY:
      case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
      {
        bool is_wordchar_left, is_wordchar_right;

        if ((iter.buf_p + iter.buf_pos.offset) <= re_ctx_p->input_start_p)
        {
          is_wordchar_left = false;  /* not a wordchar */
        }
        else
        {
          is_wordchar_left = lit_char_is_word_char (lit_utf8_iterator_peek_prev (&iter));
        }

        if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p)
        {
          is_wordchar_right = false;  /* not a wordchar */
        }
        else
        {
          is_wordchar_right = lit_char_is_word_char (lit_utf8_iterator_peek_next (&iter));
        }

        if (op == RE_OP_ASSERT_WORD_BOUNDARY)
        {
          JERRY_DDLOG ("Execute RE_OP_ASSERT_WORD_BOUNDARY: ");
          if (is_wordchar_left == is_wordchar_right)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }
        else
        {
          JERRY_ASSERT (op == RE_OP_ASSERT_NOT_WORD_BOUNDARY);
          JERRY_DDLOG ("Execute RE_OP_ASSERT_NOT_WORD_BOUNDARY: ");

          if (is_wordchar_left != is_wordchar_right)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }

        JERRY_DDLOG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_LOOKAHEAD_POS:
      case RE_OP_LOOKAHEAD_NEG:
      {
        ecma_completion_value_t match_value = ecma_make_empty_completion_value ();
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);

        uint32_t array_size = re_ctx_p->num_of_captures + re_ctx_p->num_of_non_captures;
        MEM_DEFINE_LOCAL_ARRAY (saved_bck_p, array_size, lit_utf8_iterator_t);

        size_t size = (size_t) (array_size) * sizeof (lit_utf8_iterator_t);
        memcpy (saved_bck_p, re_ctx_p->saved_p, size);

        do
        {
          uint32_t offset = re_get_value (&bc_p);

          if (!sub_iter.buf_p)
          {
            match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
            if (ecma_is_completion_value_throw (match_value))
            {
              break;
            }
          }
          bc_p += offset;
        }
        while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE);

        if (!ecma_is_completion_value_throw (match_value))
        {
          JERRY_DDLOG ("Execute RE_OP_LOOKAHEAD_POS/NEG: ");
          ecma_free_completion_value (match_value);
          if ((op == RE_OP_LOOKAHEAD_POS && sub_iter.buf_p)
              || (op == RE_OP_LOOKAHEAD_NEG && !sub_iter.buf_p))
          {
            JERRY_DDLOG ("match\n");
            match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
          }
          else
          {
            JERRY_DDLOG ("fail\n");
            match_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }

        if (!ecma_is_completion_value_throw (match_value))
        {
          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
          }
          else
          {
            JERRY_ASSERT (ecma_is_value_boolean (match_value));
            /* restore saved */
            memcpy (re_ctx_p->saved_p, saved_bck_p, size);
          }
        }

        MEM_FINALIZE_LOCAL_ARRAY (saved_bck_p);
        return match_value;
      }
      case RE_OP_CHAR_CLASS:
      case RE_OP_INV_CHAR_CLASS:
      {
        uint32_t num_of_ranges;
        bool is_match;

        JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, ");
        if (lit_utf8_iterator_is_eos (&iter))
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
        ecma_char_t curr_ch = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);

        num_of_ranges = re_get_value (&bc_p);
        is_match = false;

        while (num_of_ranges)
        {
          ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
          ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
          JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
                       num_of_ranges, ch1, ch2, curr_ch);

          if (curr_ch >= ch1 && curr_ch <= ch2)
          {
            /* We must read all the ranges from bytecode. */
            is_match = true;
          }
          num_of_ranges--;
        }

        if (op == RE_OP_CHAR_CLASS)
        {
          if (!is_match)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }
        else
        {
          JERRY_ASSERT (op == RE_OP_INV_CHAR_CLASS);
          if (is_match)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }
        JERRY_DDLOG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_BACKREFERENCE:
      {
        uint32_t backref_idx;

        backref_idx = re_get_value (&bc_p);
        JERRY_DDLOG ("Execute RE_OP_BACKREFERENCE (idx: %d): ", backref_idx);
        backref_idx *= 2;  /* backref n -> saved indices [n*2, n*2+1] */
        JERRY_ASSERT (backref_idx >= 2 && backref_idx + 1 < re_ctx_p->num_of_captures);

        if (!re_ctx_p->saved_p[backref_idx].buf_p || !re_ctx_p->saved_p[backref_idx + 1].buf_p)
        {
          JERRY_DDLOG ("match\n");
          break; /* capture is 'undefined', always matches! */
        }

        lit_utf8_iterator_t sub_iter = re_ctx_p->saved_p[backref_idx];

        while (sub_iter.buf_pos.offset < re_ctx_p->saved_p[backref_idx + 1].buf_pos.offset)
        {
          ecma_char_t ch1, ch2;

          if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }

          ch1 = lit_utf8_iterator_read_next (&sub_iter);
          ch2 = lit_utf8_iterator_read_next (&iter);

          if (ch1 != ch2)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }
        JERRY_DDLOG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_SAVE_AT_START:
      {
        re_bytecode_t *old_bc_p;

        JERRY_DDLOG ("Execute RE_OP_SAVE_AT_START\n");
        lit_utf8_iterator_t old_start_p = re_ctx_p->saved_p[RE_GLOBAL_START_IDX];
        re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = iter;

        do
        {
          uint32_t offset = re_get_value (&bc_p);
          lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }
          bc_p += offset;
          old_bc_p = bc_p;
        }
        while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE);
        bc_p = old_bc_p;

        re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_SAVE_AND_MATCH:
      {
        JERRY_DDLOG ("End of pattern is reached: match\n");
        re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = iter;
        *out_iter_p = iter;
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); /* match */
      }
      case RE_OP_ALTERNATIVE:
      {
        /*
        *  Alternatives should be jump over, when alternative opcode appears.
        */
        uint32_t offset = re_get_value (&bc_p);
        JERRY_DDLOG ("Execute RE_OP_ALTERNATIVE");
        bc_p += offset;

        while (*bc_p == RE_OP_ALTERNATIVE)
        {
          JERRY_DDLOG (", jump: %d");
          bc_p++;
          offset = re_get_value (&bc_p);
          bc_p += offset;
        }

        JERRY_DDLOG ("\n");
        break; /* tail merge */
      }
      case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
      case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
      {
        /*
        *  On non-greedy iterations we have to execute the bytecode
        *  after the group first, if zero iteration is allowed.
        */
        uint32_t start_idx, iter_idx, offset;
        lit_utf8_iterator_t old_start = lit_utf8_iterator_create (NULL, 0);
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
        re_bytecode_t *old_bc_p;

        old_bc_p = bc_p; /* save the bytecode start position of the group start */
        start_idx = re_get_value (&bc_p);
        offset = re_get_value (&bc_p);

        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2);
          iter_idx = start_idx - 1;
          start_idx *= 2;

          old_start = re_ctx_p->saved_p[start_idx];
          re_ctx_p->saved_p[start_idx] = iter;
        }
        else
        {
          JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures);
          iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1;
          start_idx += re_ctx_p->num_of_captures;
        }
        re_ctx_p->num_of_iterations_p[iter_idx] = 0;

        /* Jump all over to the end of the END opcode. */
        bc_p += offset;

        /* Try to match after the close paren if zero is allowed */
        ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

        if (ecma_is_value_true (match_value))
        {
          *out_iter_p = sub_iter;
          return match_value; /* match */
        }
        else if (ecma_is_completion_value_throw (match_value))
        {
          return match_value;
        }
        if (RE_IS_CAPTURE_GROUP (op))
        {
          re_ctx_p->saved_p[start_idx] = old_start;
        }

        bc_p = old_bc_p;
        /* FALLTHRU */
      }
      case RE_OP_CAPTURE_GROUP_START:
      case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START:
      case RE_OP_NON_CAPTURE_GROUP_START:
      case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START:
      {
        uint32_t start_idx, iter_idx, old_iteration_cnt, offset;
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
        re_bytecode_t *old_bc_p;
        re_bytecode_t *end_bc_p = NULL;
        start_idx = re_get_value (&bc_p);

        if (op != RE_OP_CAPTURE_GROUP_START
            && op != RE_OP_NON_CAPTURE_GROUP_START)
        {
          offset = re_get_value (&bc_p);
          end_bc_p = bc_p + offset;
        }

        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2);
          iter_idx = start_idx - 1;
          start_idx *= 2;
        }
        else
        {
          JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures);
          iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1;
          start_idx += re_ctx_p->num_of_captures;
        }

        lit_utf8_iterator_t old_start = re_ctx_p->saved_p[start_idx];
        old_iteration_cnt = re_ctx_p->num_of_iterations_p[iter_idx];
        re_ctx_p->saved_p[start_idx] = iter;
        re_ctx_p->num_of_iterations_p[iter_idx] = 0;

        do
        {
          offset = re_get_value (&bc_p);
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }
          bc_p += offset;
          old_bc_p = bc_p;
        }
        while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE);
        bc_p = old_bc_p;
        re_ctx_p->num_of_iterations_p[iter_idx] = old_iteration_cnt;

        /* Try to match after the close paren if zero is allowed. */
        if (op == RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START
            || op == RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START)
        {
          JERRY_ASSERT (end_bc_p);
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, end_bc_p, iter, &sub_iter);

          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }
        }

        re_ctx_p->saved_p[start_idx] = old_start;
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
      case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END:
      {
        uint32_t end_idx, iter_idx, min, max;
        re_bytecode_t *old_bc_p;

        /*
        *  On non-greedy iterations we have to execute the bytecode
        *  after the group first. Try to iterate only if it fails.
        */
        old_bc_p = bc_p; /* save the bytecode start position of the group end */
        end_idx = re_get_value (&bc_p);
        min = re_get_value (&bc_p);
        max = re_get_value (&bc_p);
        re_get_value (&bc_p); /* start offset */

        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2);
          iter_idx = end_idx - 1;
          end_idx = (end_idx * 2) + 1;
        }
        else
        {
          JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures);
          iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1;
          end_idx += re_ctx_p->num_of_captures;
        }

        re_ctx_p->num_of_iterations_p[iter_idx]++;

        if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
            && re_ctx_p->num_of_iterations_p[iter_idx] <= max)
        {
          lit_utf8_iterator_t old_end = re_ctx_p->saved_p[end_idx];
          re_ctx_p->saved_p[end_idx] = iter;

          lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }

          re_ctx_p->saved_p[end_idx] = old_end;
        }
        re_ctx_p->num_of_iterations_p[iter_idx]--;
        bc_p = old_bc_p;

        /* If non-greedy fails and try to iterate... */
        /* FALLTHRU */
      }
      case RE_OP_CAPTURE_GREEDY_GROUP_END:
      case RE_OP_NON_CAPTURE_GREEDY_GROUP_END:
      {
        uint32_t start_idx, end_idx, iter_idx, min, max, offset;
        lit_utf8_iterator_t old_start = lit_utf8_iterator_create (NULL, 0);
        lit_utf8_iterator_t old_end = lit_utf8_iterator_create (NULL, 0);
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
        re_bytecode_t *old_bc_p;

        end_idx = re_get_value (&bc_p);
        min = re_get_value (&bc_p);
        max = re_get_value (&bc_p);
        offset = re_get_value (&bc_p);

        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2);
          iter_idx = end_idx - 1;
          start_idx = end_idx * 2;
          end_idx = start_idx + 1;
        }
        else
        {
          JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures);
          iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1;
          end_idx += re_ctx_p->num_of_captures;
          start_idx = end_idx;
        }

        /* Check the empty iteration if the minimum number of iterations is reached. */
        if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
            && iter.buf_p == re_ctx_p->saved_p[start_idx].buf_p
            && iter.buf_pos.offset == re_ctx_p->saved_p[start_idx].buf_pos.offset)
        {
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        re_ctx_p->num_of_iterations_p[iter_idx]++;

        old_bc_p = bc_p; /* Save the bytecode end position of the END opcodes for matching after it. */
        old_end = re_ctx_p->saved_p[end_idx];
        re_ctx_p->saved_p[end_idx] = iter;

        if (re_ctx_p->num_of_iterations_p[iter_idx] < max)
        {
          bc_p -= offset;
          offset = re_get_value (&bc_p);

          old_start = re_ctx_p->saved_p[start_idx];
          re_ctx_p->saved_p[start_idx] = iter;
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }

          re_ctx_p->saved_p[start_idx] = old_start;

          /* Try to match alternatives if any. */
          bc_p += offset;
          while (*bc_p == RE_OP_ALTERNATIVE)
          {
            bc_p++; /* RE_OP_ALTERNATIVE */
            offset = re_get_value (&bc_p);

            old_start = re_ctx_p->saved_p[start_idx];
            re_ctx_p->saved_p[start_idx] = iter;

            ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

            if (ecma_is_value_true (match_value))
            {
              *out_iter_p = sub_iter;
              return match_value; /* match */
            }
            else if (ecma_is_completion_value_throw (match_value))
            {
              return match_value;
            }

            re_ctx_p->saved_p[start_idx] = old_start;
            bc_p += offset;
          }
        }

        if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
            && re_ctx_p->num_of_iterations_p[iter_idx] <= max)
        {
          /* Try to match the rest of the bytecode. */
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, old_bc_p, iter, &sub_iter);

          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }
        }

        /* restore if fails */
        re_ctx_p->saved_p[end_idx] = old_end;
        re_ctx_p->num_of_iterations_p[iter_idx]--;
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_NON_GREEDY_ITERATOR:
      {
        uint32_t min, max, offset, num_of_iter;
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);

        min = re_get_value (&bc_p);
        max = re_get_value (&bc_p);

        offset = re_get_value (&bc_p);
        JERRY_DDLOG ("Non-greedy iterator, min=%lu, max=%lu, offset=%ld\n",
                     (unsigned long) min, (unsigned long) max, (long) offset);

        num_of_iter = 0;
        while (num_of_iter <= max)
        {
          if (num_of_iter >= min)
          {
            ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, iter, &sub_iter);

            if (ecma_is_value_true (match_value))
            {
              *out_iter_p = sub_iter;
              return match_value; /* match */
            }
            else if (ecma_is_completion_value_throw (match_value))
            {
              return match_value;
            }
          }

          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

          if (!ecma_is_value_true (match_value))
          {
            if (ecma_is_completion_value_throw (match_value))
            {
              return match_value;
            }

            break;
          }

          iter = sub_iter;
          num_of_iter++;
        }
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_GREEDY_ITERATOR:
      {
        uint32_t min, max, offset, num_of_iter;
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);

        min = re_get_value (&bc_p);
        max = re_get_value (&bc_p);

        offset = re_get_value (&bc_p);
        JERRY_DDLOG ("Greedy iterator, min=%lu, max=%lu, offset=%ld\n",
                     (unsigned long) min, (unsigned long) max, (long) offset);

        num_of_iter = 0;

        while (num_of_iter < max)
        {
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

          if (!ecma_is_value_true (match_value))
          {
            if (ecma_is_completion_value_throw (match_value))
            {
              return match_value;
            }

            break;
          }

          iter = sub_iter;
          num_of_iter++;
        }

        while (num_of_iter >= min)
        {
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, iter, &sub_iter);

          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }

          if (num_of_iter == min)
          {
            break;
          }

          lit_utf8_iterator_read_prev (&iter);
          num_of_iter--;
        }
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      default:
      {
        JERRY_DDLOG ("UNKNOWN opcode (%d)!\n", (uint32_t) op);
        return ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_COMMON));
      }
    }
  }

  JERRY_UNREACHABLE ();
  return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
} /* regexp_match */
Exemplo n.º 6
0
/**
 * RegExp helper function to start the recursive matching algorithm
 * and create the result Array object
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
ecma_completion_value_t
ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
                         ecma_value_t input_string, /**< input string */
                         bool ignore_global) /**< ignore global flag */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  JERRY_ASSERT (ecma_is_value_object (regexp_value));
  JERRY_ASSERT (ecma_is_value_string (input_string));

  ecma_object_t *regexp_object_p = ecma_get_object_from_value (regexp_value);

  JERRY_ASSERT (ecma_object_get_class_name (regexp_object_p) == LIT_MAGIC_STRING_REGEXP_UL);

  ecma_property_t *bytecode_prop_p = ecma_get_internal_property (regexp_object_p,
                                                                 ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE);
  re_bytecode_t *bc_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value);

  ecma_string_t *input_string_p = ecma_get_string_from_value (input_string);
  lit_utf8_size_t input_string_size = ecma_string_get_size (input_string_p);

  MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_string_size, lit_utf8_byte_t);

  ecma_string_to_utf8_string (input_string_p, input_utf8_buffer_p, (ssize_t) input_string_size);
  lit_utf8_iterator_t iterator = lit_utf8_iterator_create (input_utf8_buffer_p, input_string_size);

  re_matcher_ctx_t re_ctx;
  re_ctx.input_start_p = iterator.buf_p;
  re_ctx.input_end_p = iterator.buf_p + iterator.buf_size;

  /* 1. Read bytecode header and init regexp matcher context. */
  re_ctx.flags = (uint8_t) re_get_value (&bc_p);

  if (ignore_global)
  {
    re_ctx.flags &= (uint8_t) ~RE_FLAG_GLOBAL;
  }

  JERRY_DDLOG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n",
               re_ctx.flags & RE_FLAG_GLOBAL,
               re_ctx.flags & RE_FLAG_IGNORE_CASE,
               re_ctx.flags & RE_FLAG_MULTILINE);

  re_ctx.num_of_captures = re_get_value (&bc_p);
  JERRY_ASSERT (re_ctx.num_of_captures % 2 == 0);
  re_ctx.num_of_non_captures = re_get_value (&bc_p);

  /* We create an invalid iterator, that will be used to identify unused result values. */
  lit_utf8_iterator_t unused_iter = lit_utf8_iterator_create (NULL, 0);
  unused_iter.buf_p = (lit_utf8_byte_t *) 1;

  MEM_DEFINE_LOCAL_ARRAY (saved_p, re_ctx.num_of_captures + re_ctx.num_of_non_captures, lit_utf8_iterator_t);

  for (uint32_t i = 0; i < re_ctx.num_of_captures + re_ctx.num_of_non_captures; i++)
  {
    saved_p[i] = unused_iter;
  }
  re_ctx.saved_p = saved_p;

  uint32_t num_of_iter_length = (re_ctx.num_of_captures / 2) + (re_ctx.num_of_non_captures - 1);
  MEM_DEFINE_LOCAL_ARRAY (num_of_iter_p, num_of_iter_length, uint32_t);

  for (uint32_t i = 0; i < num_of_iter_length; i++)
  {
    num_of_iter_p[i] = 0u;
  }

  bool is_match = false;
  re_ctx.num_of_iterations_p = num_of_iter_p;
  int32_t index = 0;
  ecma_length_t input_str_len = lit_utf8_string_length (iterator.buf_p, iterator.buf_size);

  if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
  {
    ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
    ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (regexp_object_p, magic_str_p);

    ECMA_OP_TO_NUMBER_TRY_CATCH (lastindex_num, lastindex_prop_p->u.named_data_property.value, ret_value)
    index = ecma_number_to_int32 (lastindex_num);

    JERRY_ASSERT (iterator.buf_pos.offset == 0 && !iterator.buf_pos.is_non_bmp_middle);
    if (!lit_utf8_iterator_is_eos (&iterator)
        && index <= (int32_t) input_str_len
        && index > 0)
    {
      lit_utf8_iterator_advance (&iterator, (ecma_length_t) index);
    }
    ECMA_OP_TO_NUMBER_FINALIZE (lastindex_num);
    ecma_deref_ecma_string (magic_str_p);
  }

  /* 2. Try to match */
  lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);

  while (ecma_is_completion_value_empty (ret_value))
  {
    if (index < 0 || index > (int32_t) input_str_len)
    {
      if (re_ctx.flags & RE_FLAG_GLOBAL)
      {
        ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
        ecma_number_t *lastindex_num_p = ecma_alloc_number ();
        *lastindex_num_p = ECMA_NUMBER_ZERO;
        ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
        ecma_dealloc_number (lastindex_num_p);
        ecma_deref_ecma_string (magic_str_p);
      }

      is_match = false;
      break;
    }
    else
    {
      ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, iterator, &sub_iter), ret_value);

      if (ecma_is_value_true (match_value))
      {
        is_match = true;
        break;
      }

      if (!lit_utf8_iterator_is_eos (&iterator))
      {
        lit_utf8_iterator_advance (&iterator, 1);
      }
      index++;

      ECMA_FINALIZE (match_value);
    }
  }

  if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
  {
    ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
    ecma_number_t *lastindex_num_p = ecma_alloc_number ();
    *lastindex_num_p = sub_iter.buf_pos.offset;
    ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
    ecma_dealloc_number (lastindex_num_p);
    ecma_deref_ecma_string (magic_str_p);
  }

  /* 3. Fill the result array or return with 'undefiend' */
  if (ecma_is_completion_value_empty (ret_value))
  {
    if (is_match)
    {
      ecma_completion_value_t result_array = ecma_op_create_array_object (0, 0, false);
      ecma_object_t *result_array_obj_p = ecma_get_object_from_completion_value (result_array);

      ecma_string_t *input_str_p = ecma_new_ecma_string_from_utf8 (iterator.buf_p, iterator.buf_size);
      re_set_result_array_properties (result_array_obj_p, input_str_p, re_ctx.num_of_captures / 2, index);
      ecma_deref_ecma_string (input_str_p);

      for (uint32_t i = 0; i < re_ctx.num_of_captures; i += 2)
      {
        ecma_string_t *index_str_p = ecma_new_ecma_string_from_uint32 (i / 2);

        /* Note: 'iter_p->buf_p == NULL' means the input is empty string */
        if ((re_ctx.saved_p[i].buf_p != unused_iter.buf_p && re_ctx.saved_p[i + 1].buf_p != unused_iter.buf_p)
            && re_ctx.saved_p[i + 1].buf_pos.offset >= re_ctx.saved_p[i].buf_pos.offset)
        {
          ecma_length_t capture_str_len;
          capture_str_len = (ecma_length_t) re_ctx.saved_p[i + 1].buf_pos.offset - re_ctx.saved_p[i].buf_pos.offset;
          ecma_string_t *capture_str_p;

          if (capture_str_len > 0)
          {
            const lit_utf8_byte_t *utf8_str_p = re_ctx.saved_p[i].buf_p + re_ctx.saved_p[i].buf_pos.offset;
            capture_str_p = ecma_new_ecma_string_from_utf8 (utf8_str_p, capture_str_len);
          }
          else
          {
            capture_str_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
          }
          ecma_op_object_put (result_array_obj_p, index_str_p, ecma_make_string_value (capture_str_p), true);
          ecma_deref_ecma_string (capture_str_p);
        }
        else
        {
          ecma_op_object_put (result_array_obj_p,
                              index_str_p,
                              ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED),
                              true);
        }
        ecma_deref_ecma_string (index_str_p);
      }
      ret_value = result_array;
    }
    else
    {
      ret_value = ecma_make_normal_completion_value (ecma_make_simple_value (ECMA_SIMPLE_VALUE_NULL));
    }
  }

  MEM_FINALIZE_LOCAL_ARRAY (num_of_iter_p);
  MEM_FINALIZE_LOCAL_ARRAY (saved_p);
  MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);

  return ret_value;
} /* ecma_regexp_exec_helper */
Exemplo n.º 7
0
/**
 * The Date object's 'parse' routine
 *
 * See also:
 *          ECMA-262 v5, 15.9.4.2
 *          ECMA-262 v5, 15.9.1.15
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value.
 */
static ecma_completion_value_t
ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argument */
                         ecma_value_t arg) /**< string */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
  ecma_number_t *date_num_p = ecma_alloc_number ();
  *date_num_p = ecma_number_make_nan ();

  /* Date Time String fromat (ECMA-262 v5, 15.9.1.15) */
  ECMA_TRY_CATCH (date_str_value,
                  ecma_op_to_string (arg),
                  ret_value);

  ecma_string_t *date_str_p = ecma_get_string_from_value (date_str_value);

  lit_utf8_size_t date_str_size = ecma_string_get_size (date_str_p);
  MEM_DEFINE_LOCAL_ARRAY (date_start_p, date_str_size, lit_utf8_byte_t);

  ecma_string_to_utf8_string (date_str_p, date_start_p, (ssize_t) date_str_size);
  lit_utf8_iterator_t iter = lit_utf8_iterator_create (date_start_p, date_str_size);

  /* 1. read year */
  ecma_number_t year = ecma_date_parse_date_chars (&iter, 4);

  if (!ecma_number_is_nan (year)
      && year >= 0)
  {
    ecma_number_t month = ECMA_NUMBER_ONE;
    ecma_number_t day = ECMA_NUMBER_ONE;
    ecma_number_t time = ECMA_NUMBER_ZERO;

    /* 2. read month if any */
    if (!lit_utf8_iterator_is_eos (&iter)
        && lit_utf8_iterator_peek_next (&iter) == '-')
    {
      /* eat up '-' */
      lit_utf8_iterator_incr (&iter);
      month = ecma_date_parse_date_chars (&iter, 2);

      if (month > 12 || month < 1)
      {
        month = ecma_number_make_nan ();
      }
    }

    /* 3. read day if any */
    if (!lit_utf8_iterator_is_eos (&iter)
        && lit_utf8_iterator_peek_next (&iter) == '-')
    {
      /* eat up '-' */
      lit_utf8_iterator_incr (&iter);
      day = ecma_date_parse_date_chars (&iter, 2);

      if (day < 1 || day > 31)
      {
        day = ecma_number_make_nan ();
      }
    }

    /* 4. read time if any */
    if (!lit_utf8_iterator_is_eos (&iter)
        && lit_utf8_iterator_peek_next (&iter) == 'T')
    {
      ecma_number_t hours = ECMA_NUMBER_ZERO;
      ecma_number_t minutes = ECMA_NUMBER_ZERO;
      ecma_number_t seconds = ECMA_NUMBER_ZERO;
      ecma_number_t milliseconds = ECMA_NUMBER_ZERO;

      ecma_length_t num_of_visited_chars = lit_utf8_iterator_get_index (&iter);
      ecma_length_t date_str_len = lit_utf8_string_length (iter.buf_p, iter.buf_size) - 1;

      if ((date_str_len - num_of_visited_chars) >= 5)
      {
        /* eat up 'T' */
        lit_utf8_iterator_incr (&iter);

        /* 4.1 read hours and minutes */
        hours = ecma_date_parse_date_chars (&iter, 2);

        if (hours < 0 || hours > 24)
        {
          hours = ecma_number_make_nan ();
        }
        else if (hours == 24)
        {
          hours = ECMA_NUMBER_ZERO;
        }

        /* eat up ':' */
        lit_utf8_iterator_incr (&iter);

        minutes = ecma_date_parse_date_chars (&iter, 2);

        if (minutes < 0 || minutes > 59)
        {
          minutes = ecma_number_make_nan ();
        }

        /* 4.2 read seconds if any */
        if (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == ':')
        {
          /* eat up ':' */
          lit_utf8_iterator_incr (&iter);
          seconds = ecma_date_parse_date_chars (&iter, 2);

          if (seconds < 0 || seconds > 59)
          {
            seconds = ecma_number_make_nan ();
          }

          /* 4.3 read milliseconds if any */
          if (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == '.')
          {
            /* eat up '.' */
            lit_utf8_iterator_incr (&iter);
            milliseconds = ecma_date_parse_date_chars (&iter, 3);

            if (milliseconds < 0)
            {
              milliseconds = ecma_number_make_nan ();
            }
          }
        }

        time = ecma_date_make_time (hours, minutes, seconds, milliseconds);
      }
      else
      {
        time = ecma_number_make_nan ();
      }

      /* 4.4 read timezone if any */
      if (!lit_utf8_iterator_is_eos (&iter)
          && lit_utf8_iterator_peek_next (&iter) == 'Z'
          && !ecma_number_is_nan (time))
      {
        lit_utf8_iterator_incr (&iter);
        time = ecma_date_utc (ecma_date_make_time (hours,
                                                   minutes,
                                                   seconds,
                                                   milliseconds));
      }
      else if (!lit_utf8_iterator_is_eos (&iter)
               && (lit_utf8_iterator_peek_next (&iter) == '+'
                   || lit_utf8_iterator_peek_next (&iter) == '-'))
      {
        ecma_length_t num_of_visited_chars = lit_utf8_iterator_get_index (&iter);
        ecma_length_t date_str_len = lit_utf8_string_length (iter.buf_p, iter.buf_size) - 1;

        if ((date_str_len - num_of_visited_chars) == 5)
        {
          bool is_negative = false;

          if (lit_utf8_iterator_peek_next (&iter) == '-')
          {
            is_negative = true;
          }

          /* eat up '+/-' */
          lit_utf8_iterator_incr (&iter);

          /* read hours and minutes */
          hours = ecma_date_parse_date_chars (&iter, 2);

          if (hours < 0 || hours > 24)
          {
            hours = ecma_number_make_nan ();
          }
          else if (hours == 24)
          {
            hours = ECMA_NUMBER_ZERO;
          }

          /* eat up ':' */
          lit_utf8_iterator_incr (&iter);

          minutes = ecma_date_parse_date_chars (&iter, 2);

          if (minutes < 0 || minutes > 59)
          {
            minutes = ecma_number_make_nan ();
          }

          if (is_negative)
          {
            time += ecma_date_make_time (hours, minutes, ECMA_NUMBER_ZERO, ECMA_NUMBER_ZERO);
          }
          else
          {
            time -= ecma_date_make_time (hours, minutes, ECMA_NUMBER_ZERO, ECMA_NUMBER_ZERO);
          }
        }
      }
    }

    if (lit_utf8_iterator_is_eos (&iter))
    {
      ecma_number_t date = ecma_date_make_day (year, month - 1, day);
      *date_num_p = ecma_date_make_date (date, time);
    }
  }

  ret_value = ecma_make_normal_completion_value (ecma_make_number_value (date_num_p));

  MEM_FINALIZE_LOCAL_ARRAY (date_start_p);
  ECMA_FINALIZE (date_str_value);

  return ret_value;
} /* ecma_builtin_date_parse */
/**
 * Helper function to convert a string to upper or lower case.
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value.
 */
static ecma_completion_value_t
ecma_builtin_string_prototype_object_conversion_helper (ecma_value_t this_arg, /**< this argument */
                                                        bool lower_case) /**< convert to lower (true)
                                                                          *   or upper (false) case */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  /* 1. */
  ECMA_TRY_CATCH (check_coercible_val,
                  ecma_op_check_object_coercible (this_arg),
                  ret_value);

  /* 2. */
  ECMA_TRY_CATCH (to_string_val,
                  ecma_op_to_string (this_arg),
                  ret_value);

  /* 3. */
  ecma_string_t *input_string_p = ecma_get_string_from_value (to_string_val);
  lit_utf8_size_t input_size = ecma_string_get_size (input_string_p);

  MEM_DEFINE_LOCAL_ARRAY (input_start_p,
                          input_size,
                          lit_utf8_byte_t);

  ecma_string_to_utf8_string (input_string_p,
                              input_start_p,
                              (ssize_t) (input_size));

  /*
   * The URI encoding has two major phases: first we compute
   * the length of the lower case string, then we encode it.
   */

  lit_utf8_size_t output_length = 0;
  lit_utf8_iterator_t input_iterator = lit_utf8_iterator_create (input_start_p, input_size);

  while (!lit_utf8_iterator_is_eos (&input_iterator))
  {
    ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator);
    ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
    lit_utf8_byte_t utf8_byte_buffer[LIT_UTF8_MAX_BYTES_IN_CODE_POINT];
    lit_utf8_size_t character_length;

    /*
     * We need to keep surrogate pairs. Surrogates are never converted,
     * regardless they form a valid pair or not.
     */
    if (lit_is_code_unit_high_surrogate (character))
    {
      ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator);

      if (lit_is_code_unit_low_surrogate (next_character))
      {
        lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character);
        output_length += lit_code_point_to_utf8 (surrogate_code_point, utf8_byte_buffer);
        lit_utf8_iterator_incr (&input_iterator);
        continue;
      }
    }

    if (lower_case)
    {
      character_length = lit_char_to_lower_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }
    else
    {
      character_length = lit_char_to_upper_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }

    JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);

    for (lit_utf8_size_t i = 0; i < character_length; i++)
    {
      output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer);
    }
  }

  /* Second phase. */

  MEM_DEFINE_LOCAL_ARRAY (output_start_p,
                          output_length,
                          lit_utf8_byte_t);

  lit_utf8_byte_t *output_char_p = output_start_p;

  /* Encoding the output. */
  lit_utf8_iterator_seek_bos (&input_iterator);

  while (!lit_utf8_iterator_is_eos (&input_iterator))
  {
    ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator);
    ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
    lit_utf8_size_t character_length;

    /*
     * We need to keep surrogate pairs. Surrogates are never converted,
     * regardless they form a valid pair or not.
     */
    if (lit_is_code_unit_high_surrogate (character))
    {
      ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator);

      if (lit_is_code_unit_low_surrogate (next_character))
      {
        lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character);
        output_char_p += lit_code_point_to_utf8 (surrogate_code_point, output_char_p);
        lit_utf8_iterator_incr (&input_iterator);
        continue;
      }
    }

    if (lower_case)
    {
      character_length = lit_char_to_lower_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }
    else
    {
      character_length = lit_char_to_upper_case (character,
                                                 character_buffer,
                                                 LIT_MAXIMUM_OTHER_CASE_LENGTH);
    }

    JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);

    for (lit_utf8_size_t i = 0; i < character_length; i++)
    {
      output_char_p += lit_code_point_to_utf8 (character_buffer[i], output_char_p);
    }
  }

  JERRY_ASSERT (output_start_p + output_length == output_char_p);

  ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length);

  ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));

  MEM_FINALIZE_LOCAL_ARRAY (output_start_p);
  MEM_FINALIZE_LOCAL_ARRAY (input_start_p);

  ECMA_FINALIZE (to_string_val);
  ECMA_FINALIZE (check_coercible_val);

  return ret_value;
} /* ecma_builtin_string_prototype_object_conversion_helper */
/**
 * The String.prototype object's 'indexOf' routine
 *
 * See also:
 *          ECMA-262 v5, 15.5.4.7
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value.
 */
static ecma_completion_value_t
ecma_builtin_string_prototype_object_index_of (ecma_value_t this_arg, /**< this argument */
                                               ecma_value_t arg1, /**< routine's first argument */
                                               ecma_value_t arg2) /**< routine's second argument */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  /* 1 */
  ECMA_TRY_CATCH (check_coercible_val,
                  ecma_op_check_object_coercible (this_arg),
                  ret_value);

  /* 2 */
  ECMA_TRY_CATCH (to_str_val,
                  ecma_op_to_string (this_arg),
                  ret_value);

  /* 3 */
  ECMA_TRY_CATCH (search_str_val,
                  ecma_op_to_string (arg1),
                  ret_value);

  /* 4 */
  ECMA_OP_TO_NUMBER_TRY_CATCH (pos_num,
                               arg2,
                               ret_value);

  /* 5 */
  ecma_string_t *original_str_p = ecma_get_string_from_value (to_str_val);
  const ecma_length_t original_len = ecma_string_get_length (original_str_p);
  const lit_utf8_size_t original_size = ecma_string_get_size (original_str_p);

  /* 4b, 6 */
  ecma_length_t start = ecma_builtin_helper_string_index_normalize (pos_num, original_len);

  /* 7 */
  ecma_string_t *search_str_p = ecma_get_string_from_value (search_str_val);
  const ecma_length_t search_len = ecma_string_get_length (search_str_p);
  const lit_utf8_size_t search_size = ecma_string_get_size (search_str_p);

  ecma_number_t *ret_num_p = ecma_alloc_number ();
  *ret_num_p = ecma_int32_to_number (-1);

  /* 8 */
  if (search_len <= original_len)
  {
    if (!search_len)
    {
      *ret_num_p = ecma_uint32_to_number (0);
    }
    else
    {
      /* create utf8 string from original string and advance to start position */
      MEM_DEFINE_LOCAL_ARRAY (original_str_utf8_p,
                              original_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (original_str_p,
                                  original_str_utf8_p,
                                  (ssize_t) (original_size));

      lit_utf8_iterator_t original_it = lit_utf8_iterator_create (original_str_utf8_p, original_size);

      ecma_length_t index = start;
      lit_utf8_iterator_advance (&original_it, index);

      /* create utf8 string from search string */
      MEM_DEFINE_LOCAL_ARRAY (search_str_utf8_p,
                              search_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (search_str_p,
                                  search_str_utf8_p,
                                  (ssize_t) (search_size));

      lit_utf8_iterator_t search_it = lit_utf8_iterator_create (search_str_utf8_p, search_size);

      /* iterate original string and try to match at each position */
      bool found = false;

      while (!found && index <= original_len - search_len)
      {
        ecma_length_t match_len = 0;
        lit_utf8_iterator_pos_t stored_original_pos = lit_utf8_iterator_get_pos (&original_it);

        while (match_len < search_len &&
               lit_utf8_iterator_read_next (&original_it) == lit_utf8_iterator_read_next (&search_it))
        {
          match_len++;
        }

        /* Check for match */
        if (match_len == search_len)
        {
          *ret_num_p = ecma_uint32_to_number (index);
          found = true;
        }
        else
        {
          /* reset iterators */
          lit_utf8_iterator_seek_bos (&search_it);
          lit_utf8_iterator_seek (&original_it, stored_original_pos);
          lit_utf8_iterator_incr (&original_it);
        }
        index++;
      }

      MEM_FINALIZE_LOCAL_ARRAY (search_str_utf8_p);
      MEM_FINALIZE_LOCAL_ARRAY (original_str_utf8_p);
    }
  }

  ecma_value_t new_value = ecma_make_number_value (ret_num_p);
  ret_value = ecma_make_normal_completion_value (new_value);

  ECMA_OP_TO_NUMBER_FINALIZE (pos_num);
  ECMA_FINALIZE (search_str_val);
  ECMA_FINALIZE (to_str_val);
  ECMA_FINALIZE (check_coercible_val);

  return ret_value;
} /* ecma_builtin_string_prototype_object_index_of */
Exemplo n.º 10
0
/**
 * Transforming escape sequences in the charset, outputting converted string to specified buffer
 *
 * Note:
 *      Size of string with transformed escape sequences is always
 *      less or equal to size of corresponding source string.
 *
 * @return size of converted string
 */
static lit_utf8_size_t
lexer_transform_escape_sequences (const jerry_api_char_t *source_str_p, /**< string to convert,
                                                                         *   located in source buffer */
                                  lit_utf8_size_t source_str_size, /**< size of the string and of the output buffer */
                                  jerry_api_char_t *output_str_buf_p) /**< output buffer for converted string */
{
  if (source_str_size == 0)
  {
    return 0;
  }
  else
  {
    JERRY_ASSERT (source_str_p != NULL);
  }

  lit_utf8_byte_t *output_str_buf_iter_p = output_str_buf_p;
  const size_t output_str_buf_size = source_str_size;
  bool is_correct_sequence = true;

  lit_utf8_iterator_t source_str_iter = lit_utf8_iterator_create (source_str_p,
                                                                  source_str_size);

  ecma_char_t prev_converted_char = LIT_CHAR_NULL;

  while (!lit_utf8_iterator_is_eos (&source_str_iter))
  {
    ecma_char_t converted_char;

    const ecma_char_t next_char = lit_utf8_iterator_read_next (&source_str_iter);

    if (next_char == LIT_CHAR_BACKSLASH)
    {
      if (lit_utf8_iterator_is_eos (&source_str_iter))
      {
        is_correct_sequence = false;
        break;
      }

      const ecma_char_t char_after_next = lit_utf8_iterator_read_next (&source_str_iter);

      if (lit_char_is_decimal_digit (char_after_next))
      {
        if (lit_char_is_octal_digit (char_after_next))
        {
          if (char_after_next == LIT_CHAR_0
              && (lit_utf8_iterator_is_eos (&source_str_iter)
                  || !lit_char_is_octal_digit (lit_utf8_iterator_peek_next (&source_str_iter))))
          {
            converted_char = LIT_CHAR_NULL;
          }
          else
          {
            /* Implementation-defined (ECMA-262 v5, B.1.2): octal escape sequences are not implemented */
            is_correct_sequence = false;
            break;
          }
        }
        else
        {
          converted_char = char_after_next;
        }
      }
      else if (char_after_next == LIT_CHAR_LOWERCASE_U
               || char_after_next == LIT_CHAR_LOWERCASE_X)
      {
        if (!lexer_convert_escape_sequence_digits_to_char (&source_str_iter,
                                                           char_after_next == LIT_CHAR_LOWERCASE_U,
                                                           &converted_char))
        {
          is_correct_sequence = false;
          break;
        }
      }
      else if (lit_char_is_line_terminator (char_after_next))
      {
        /* Skip \, followed by a LineTerminatorSequence (ECMA-262, v5, 7.3) */
        if (char_after_next == LIT_CHAR_CR
            && !lit_utf8_iterator_is_eos (&source_str_iter)
            && lit_utf8_iterator_peek_next (&source_str_iter) == LIT_CHAR_LF)
        {
          lit_utf8_iterator_incr (&source_str_iter);
        }

        continue;
      }
      else
      {
        lexer_convert_single_escape_character (char_after_next, &converted_char);
      }
    }
    else
    {
      converted_char = next_char;
    }

    if (lit_is_code_unit_high_surrogate (prev_converted_char)
        && lit_is_code_unit_low_surrogate (converted_char))
    {
      output_str_buf_iter_p -= LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;

      lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (prev_converted_char,
                                                                              converted_char);
      output_str_buf_iter_p += lit_code_point_to_utf8 (code_point, output_str_buf_iter_p);
    }
    else
    {
      output_str_buf_iter_p += lit_code_unit_to_utf8 (converted_char, output_str_buf_iter_p);
      JERRY_ASSERT (output_str_buf_iter_p <= output_str_buf_p + output_str_buf_size);
    }

    prev_converted_char = converted_char;
  }

  if (is_correct_sequence)
  {
    return (lit_utf8_size_t) (output_str_buf_iter_p - output_str_buf_p);
  }
  else
  {
    PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal escape sequence", token_start_pos);
  }
} /* lexer_transform_escape_sequences */
Exemplo n.º 11
0
/**
 * Create token of specified type from charset
 *
 * @return token descriptor
 */
static token
lexer_create_token_for_charset (jsp_token_type_t tt, /**< token type */
                                const lit_utf8_byte_t *charset_p, /**< charset buffer */
                                lit_utf8_size_t size) /**< size of the charset */
{
  JERRY_ASSERT (charset_p != NULL);

  lit_utf8_iterator_t iter = lit_utf8_iterator_create (charset_p, (lit_utf8_size_t) size);
  lit_utf8_size_t new_size = 0;
  lit_utf8_size_t new_length = 0;
  bool should_convert = false;

  while (!lit_utf8_iterator_is_eos (&iter))
  {
    if (iter.buf_pos.is_non_bmp_middle)
    {
      should_convert = true;
    }
    lit_utf8_iterator_incr (&iter);
    new_size += LIT_CESU8_MAX_BYTES_IN_CODE_UNIT;
  }

  lit_utf8_byte_t *converted_str_p;

  if (unlikely (should_convert))
  {
    lit_utf8_iterator_seek_bos (&iter);
    converted_str_p = (lit_utf8_byte_t *) jsp_mm_alloc (new_size);

    while (!lit_utf8_iterator_is_eos (&iter))
    {
      ecma_char_t ch = lit_utf8_iterator_read_next (&iter);
      new_length += lit_code_unit_to_utf8 (ch, converted_str_p + new_length);
    }
  }
  else
  {
    converted_str_p = (lit_utf8_byte_t *) charset_p;
    new_length = size;
    JERRY_ASSERT (lit_is_cesu8_string_valid (converted_str_p, new_length));
  }

  lit_literal_t lit = lit_find_literal_by_utf8_string (converted_str_p, new_length);
  if (lit != NULL)
  {
    if (unlikely (should_convert))
    {
      jsp_mm_free (converted_str_p);
    }

    return create_token_from_lit (tt, lit);
  }
  lit = lit_create_literal_from_utf8_string (converted_str_p, new_length);
  rcs_record_type_t type = rcs_record_get_type (lit);

  JERRY_ASSERT (RCS_RECORD_TYPE_IS_CHARSET (type)
                || RCS_RECORD_TYPE_IS_MAGIC_STR (type)
                || RCS_RECORD_TYPE_IS_MAGIC_STR_EX (type));

  if (unlikely (should_convert))
  {
    jsp_mm_free (converted_str_p);
  }

  return create_token_from_lit (tt, lit);
} /* lexer_create_token_for_charset */
Exemplo n.º 12
0
int
main (int __attr_unused___ argc,
      char __attr_unused___ **argv)
{
  TEST_INIT ();

  mem_init ();

  lit_utf8_byte_t utf8_string[max_bytes_in_string];
  ecma_char_t code_units[max_code_units_in_string];
  lit_utf8_iterator_pos_t saved_positions[max_code_units_in_string];

  for (int i = 0; i < test_iters; i++)
  {
    lit_utf8_size_t utf8_string_size = (i == 0) ? 0 : (lit_utf8_size_t) (rand () % max_bytes_in_string);
    ecma_length_t length = generate_utf8_string (utf8_string, utf8_string_size);

    JERRY_ASSERT (lit_utf8_string_length (utf8_string, utf8_string_size) == length);

    lit_utf8_iterator_t iter = lit_utf8_iterator_create (utf8_string, utf8_string_size);
    ecma_length_t calculated_length = 0;

    ecma_length_t code_units_count = 0;
    while (!lit_utf8_iterator_is_eos (&iter))
    {
      code_units[code_units_count] = lit_utf8_iterator_peek_next (&iter);
      saved_positions[code_units_count] = lit_utf8_iterator_get_pos (&iter);
      code_units_count++;
      calculated_length++;

      lit_utf8_iterator_incr (&iter);
    }

    JERRY_ASSERT (length == calculated_length);

    if (code_units_count > 0)
    {
      for (int j = 0; j < test_subiters; j++)
      {
        ecma_length_t index = (ecma_length_t) rand () % code_units_count;
        lit_utf8_iterator_seek (&iter, saved_positions[index]);
        JERRY_ASSERT (lit_utf8_iterator_peek_next (&iter) == code_units[index]);
        JERRY_ASSERT (lit_utf8_iterator_get_index (&iter) == index);
      }
    }

    lit_utf8_iterator_seek_eos (&iter);
    while (!lit_utf8_iterator_is_bos (&iter))
    {
      JERRY_ASSERT (code_units_count > 0);
      calculated_length--;
      JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_peek_prev (&iter));
      lit_utf8_iterator_decr (&iter);
    }

    JERRY_ASSERT (calculated_length == 0);

    while (!lit_utf8_iterator_is_eos (&iter))
    {
      ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
      JERRY_ASSERT (code_unit == code_units[calculated_length]);
      calculated_length++;
    }

    JERRY_ASSERT (length == calculated_length);

    while (!lit_utf8_iterator_is_bos (&iter))
    {
      JERRY_ASSERT (code_units_count > 0);
      calculated_length--;
      JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_read_prev (&iter));
    }

    JERRY_ASSERT (calculated_length == 0);
  }

  /* Overlong-encoded code point */
  lit_utf8_byte_t invalid_utf8_string_1[] = {0xC0, 0x82};
  JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_1, sizeof (invalid_utf8_string_1)));

  /* Overlong-encoded code point */
  lit_utf8_byte_t invalid_utf8_string_2[] = {0xE0, 0x80, 0x81};
  JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_2, sizeof (invalid_utf8_string_2)));

  /* Pair of surrogates: 0xD901 0xDFF0 which encode Unicode character 0x507F0 */
  lit_utf8_byte_t invalid_utf8_string_3[] = {0xED, 0xA4, 0x81, 0xED, 0xBF, 0xB0};
  JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_3, sizeof (invalid_utf8_string_3)));

  /* Isolated high surrogate 0xD901 */
  lit_utf8_byte_t valid_utf8_string_1[] = {0xED, 0xA4, 0x81};
  JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_1, sizeof (valid_utf8_string_1)));

  /* 4-byte long utf-8 character - Unicode character 0x507F0 */
  lit_utf8_byte_t valid_utf8_string_2[] = {0xF1, 0x90, 0x9F, 0xB0};
  JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_2, sizeof (valid_utf8_string_2)));

  lit_utf8_byte_t buf[] = {0xF0, 0x90, 0x8D, 0x88};
  lit_code_point_t code_point;
  lit_utf8_size_t bytes_count = lit_read_code_point_from_utf8 (buf, sizeof (buf), &code_point);
  JERRY_ASSERT (bytes_count == 4);
  JERRY_ASSERT (code_point == 0x10348);

  lit_utf8_byte_t res_buf[3];
  lit_utf8_size_t res_size;

  res_size = lit_code_unit_to_utf8 (0x73, res_buf);
  JERRY_ASSERT (res_size == 1);
  JERRY_ASSERT (res_buf[0] == 0x73);

  res_size = lit_code_unit_to_utf8 (0x41A, res_buf);
  JERRY_ASSERT (res_size == 2);
  JERRY_ASSERT (res_buf[0] == 0xD0);
  JERRY_ASSERT (res_buf[1] == 0x9A);

  res_size = lit_code_unit_to_utf8 (0xD7FF, res_buf);
  JERRY_ASSERT (res_size == 3);
  JERRY_ASSERT (res_buf[0] == 0xED);
  JERRY_ASSERT (res_buf[1] == 0x9F);
  JERRY_ASSERT (res_buf[2] == 0xBF);

  lit_utf8_byte_t bytes[] = {0xF0, 0x90, 0x8D, 0x88};
  lit_utf8_iterator_t iter = lit_utf8_iterator_create (bytes, sizeof (bytes));
  ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
  JERRY_ASSERT (!lit_utf8_iterator_is_eos (&iter));
  JERRY_ASSERT (code_unit == 0xD800);
  code_unit = lit_utf8_iterator_read_next (&iter);
  JERRY_ASSERT (lit_utf8_iterator_is_eos (&iter));
  JERRY_ASSERT (code_unit == 0xDF48);

  mem_finalize (true);
  return 0;
}
Exemplo n.º 13
0
/**
 * Compilation of RegExp bytecode
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
ecma_completion_value_t
re_compile_bytecode (re_bytecode_t **out_bytecode_p, /**< out:pointer to bytecode */
                     ecma_string_t *pattern_str_p, /**< pattern */
                     uint8_t flags) /**< flags */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
  re_compiler_ctx_t re_ctx;
  re_ctx.flags = flags;
  re_ctx.highest_backref = 0;
  re_ctx.num_of_non_captures = 0;

  re_bytecode_ctx_t bc_ctx;
  bc_ctx.block_start_p = NULL;
  bc_ctx.block_end_p = NULL;
  bc_ctx.current_p = NULL;

  re_ctx.bytecode_ctx_p = &bc_ctx;

  lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
  MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t);

  ecma_string_to_utf8_string (pattern_str_p, pattern_start_p, (ssize_t) pattern_str_size);
  lit_utf8_iterator_t iter = lit_utf8_iterator_create (pattern_start_p, pattern_str_size);

  re_parser_ctx_t parser_ctx;
  parser_ctx.iter = iter;
  parser_ctx.num_of_groups = -1;
  re_ctx.parser_ctx_p = &parser_ctx;

  /* 1. Parse RegExp pattern */
  re_ctx.num_of_captures = 1;
  re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);

  ECMA_TRY_CATCH (empty, re_parse_alternative (&re_ctx, true), ret_value);

  /* 2. Check for invalid backreference */
  if (re_ctx.highest_backref >= re_ctx.num_of_captures)
  {
    ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
  }
  else
  {
    re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
    re_append_opcode (&bc_ctx, RE_OP_EOF);

    /* 3. Insert extra informations for bytecode header */
    re_insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_non_captures);
    re_insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_captures * 2);
    re_insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.flags);
  }
  ECMA_FINALIZE (empty);

  MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p);

  if (!ecma_is_completion_value_empty (ret_value))
  {
    /* Compilation failed, free bytecode. */
    mem_heap_free_block (bc_ctx.block_start_p);
    *out_bytecode_p = NULL;
  }
  else
  {
    /* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
    JERRY_ASSERT (bc_ctx.block_start_p != NULL);
    *out_bytecode_p = bc_ctx.block_start_p;
  }

#ifdef JERRY_ENABLE_LOG
  re_dump_bytecode (&bc_ctx);
#endif

  return ret_value;
} /* re_compile_bytecode */