Exemplo n.º 1
0
/**
 * Parse a comment
 *
 * @return true if newline was met during parsing
 *         false - otherwise
 */
static bool
lexer_parse_comment (void)
{
  ecma_char_t c = LA (0);
  bool multiline;
  bool was_newlines = false;

  JERRY_ASSERT (LA (0) == LIT_CHAR_SLASH);
  JERRY_ASSERT (LA (1) == LIT_CHAR_SLASH || LA (1) == LIT_CHAR_ASTERISK);

  multiline = (LA (1) == LIT_CHAR_ASTERISK);

  consume_char ();
  consume_char ();

  while (!lit_utf8_iterator_is_eos (&src_iter))
  {
    c = LA (0);

    if (!multiline)
    {
      if (lit_char_is_line_terminator (c))
      {
        return true;
      }
    }
    else
    {
      if (c == LIT_CHAR_ASTERISK
          && LA (1) == LIT_CHAR_SLASH)
      {
        consume_char ();
        consume_char ();

        return was_newlines;
      }
      else if (lit_char_is_line_terminator (c))
      {
        was_newlines = true;
      }
    }

    consume_char ();
  }

  if (multiline)
  {
    PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unclosed multiline comment", lit_utf8_iterator_get_pos (&src_iter));
  }

  return false;
} /* lexer_parse_comment */
Exemplo n.º 2
0
/**
 * Skip any whitespace and comment tokens
 *
 * @return true - if a newline token was skipped,
 *         false - otherwise
 */
static bool
lexer_skip_whitespace_and_comments (void)
{
  bool new_lines_occurred = false;

  while (true)
  {
    ecma_char_t c = LA (0);

    if (lit_char_is_white_space (c))
    {
      do
      {
        consume_char ();

        c = LA (0);
      }
      while (lit_char_is_white_space (c));
    }
    else if (lit_char_is_line_terminator (c))
    {
      dump_current_line ();

      new_lines_occurred = true;

      do
      {
        consume_char ();

        c = LA (0);
      }
      while (lit_char_is_line_terminator (c));
    }
    else if (c == LIT_CHAR_SLASH
             && (LA (1) == LIT_CHAR_SLASH
                 || LA (1) == LIT_CHAR_ASTERISK))
    {
      /* ECMA-262 v5, 7.4, SingleLineComment or MultiLineComment */

      if (lexer_parse_comment ())
      {
        new_lines_occurred = true;
      }
    }
    else
    {
      break;
    }
  }

  return new_lines_occurred;
} /* lexer_skip_whitespace_and_comments */
Exemplo n.º 3
0
/**
 * Dump current line
 */
static void
dump_current_line (void)
{
  if (!allow_dump_lines)
  {
    return;
  }

  printf ("// ");

  lit_utf8_iterator_t iter = src_iter;

  while (!lit_utf8_iterator_is_eos (&iter))
  {
    ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
    if (lit_char_is_line_terminator (code_unit))
    {
      if (code_unit == LIT_CHAR_CR
          && !lit_utf8_iterator_is_eos (&iter)
          && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF)
      {
        lit_utf8_iterator_incr (&iter);
      }
      break;
    }

    lit_put_ecma_char (code_unit);
  }

  lit_put_ecma_char (LIT_CHAR_LF);
} /* dump_current_line */
Exemplo n.º 4
0
/**
 * Dump specified line of the source script
 */
void
lexer_dump_line (size_t line) /**< line number */
{
  size_t l = 0;
  lit_utf8_iterator_t iter = src_iter;

  lit_utf8_iterator_seek_bos (&iter);

  while (!lit_utf8_iterator_is_eos (&iter))
  {
    ecma_char_t code_unit;

    if (l == line)
    {
      while (!lit_utf8_iterator_is_eos (&iter))
      {
        code_unit = lit_utf8_iterator_read_next (&iter);
        if (lit_char_is_line_terminator (code_unit))
        {
          break;
        }
        lit_put_ecma_char (code_unit);
      }
      return;
    }

    code_unit = lit_utf8_iterator_read_next (&iter);

    if (lit_char_is_line_terminator (code_unit))
    {
      l++;
      if (code_unit == LIT_CHAR_CR
          && !lit_utf8_iterator_is_eos (&iter)
          && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF)
      {
        lit_utf8_iterator_incr (&iter);
      }
    }
  }
} /* lexer_dump_line */
Exemplo n.º 5
0
/**
 * Convert locus to line and column
 */
void
lexer_locus_to_line_and_column (lit_utf8_iterator_pos_t locus, /**< iterator position in the source script */
                                size_t *line, /**< @out: line number */
                                size_t *column) /**< @out: column number */
{
  JERRY_ASSERT ((lit_utf8_size_t) (locus.offset + locus.is_non_bmp_middle) <= buffer_size);

  lit_utf8_iterator_t iter = lit_utf8_iterator_create (buffer_start, (lit_utf8_size_t) buffer_size);
  lit_utf8_iterator_pos_t iter_pos = lit_utf8_iterator_get_pos (&iter);

  size_t l = 0, c = 0;
  while (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_pos_cmp (iter_pos, locus) < 0)
  {
    ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
    iter_pos = lit_utf8_iterator_get_pos (&iter);

    if (lit_char_is_line_terminator (code_unit))
    {
      if (code_unit == LIT_CHAR_CR
          && !lit_utf8_iterator_is_eos (&iter)
          && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF)
      {
        lit_utf8_iterator_incr (&iter);
      }
      c = 0;
      l++;
      continue;
    }

    c++;
  }

  if (line)
  {
    *line = l;
  }
  if (column)
  {
    *column = c;
  }
} /* lexer_locus_to_line_and_column */
Exemplo n.º 6
0
/**
 * Recursive function for RegExp matching. Tests for a regular expression
 * match and returns a MatchResult value.
 *
 * See also:
 *          ECMA-262 v5, 15.10.2.1
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
static ecma_completion_value_t
re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
                 re_bytecode_t *bc_p, /**< pointer to the current RegExp bytecode */
                 lit_utf8_iterator_t iter, /**< input string iterator */
                 lit_utf8_iterator_t *out_iter_p) /**< Output: matching substring iterator */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
  re_opcode_t op;

  while ((op = re_get_opcode (&bc_p)))
  {
    switch (op)
    {
      case RE_OP_MATCH:
      {
        JERRY_DDLOG ("Execute RE_OP_MATCH: match\n");
        *out_iter_p = iter;
        ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE);
        return ret_value; /* match */
      }
      case RE_OP_CHAR:
      {
        if (lit_utf8_iterator_is_eos (&iter))
        {
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
        ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */
        ecma_char_t ch2 = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);
        JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2);

        if (ch1 != ch2)
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        JERRY_DDLOG ("match\n");

        break; /* tail merge */
      }
      case RE_OP_PERIOD:
      {
        if (lit_utf8_iterator_is_eos (&iter))
        {
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        ecma_char_t ch = lit_utf8_iterator_read_next (&iter);
        JERRY_DDLOG ("Period matching '.' to %d: ", (uint32_t) ch);

        if (lit_char_is_line_terminator (ch))
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        JERRY_DDLOG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_ASSERT_START:
      {
        JERRY_DDLOG ("Execute RE_OP_ASSERT_START: ");

        if ((iter.buf_p + iter.buf_pos.offset) <= re_ctx_p->input_start_p)
        {
          JERRY_DDLOG ("match\n");
          break;
        }

        if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        if (lit_char_is_line_terminator (lit_utf8_iterator_peek_prev (&iter)))
        {
          JERRY_DDLOG ("match\n");
          break;
        }

        JERRY_DDLOG ("fail\n");
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_ASSERT_END:
      {
        JERRY_DDLOG ("Execute RE_OP_ASSERT_END: ");

        if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p)
        {
          JERRY_DDLOG ("match\n");
          break; /* tail merge */
        }

        if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        if (lit_char_is_line_terminator (lit_utf8_iterator_peek_next (&iter)))
        {
          JERRY_DDLOG ("match\n");
          break; /* tail merge */
        }

        JERRY_DDLOG ("fail\n");
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_ASSERT_WORD_BOUNDARY:
      case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
      {
        bool is_wordchar_left, is_wordchar_right;

        if ((iter.buf_p + iter.buf_pos.offset) <= re_ctx_p->input_start_p)
        {
          is_wordchar_left = false;  /* not a wordchar */
        }
        else
        {
          is_wordchar_left = lit_char_is_word_char (lit_utf8_iterator_peek_prev (&iter));
        }

        if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p)
        {
          is_wordchar_right = false;  /* not a wordchar */
        }
        else
        {
          is_wordchar_right = lit_char_is_word_char (lit_utf8_iterator_peek_next (&iter));
        }

        if (op == RE_OP_ASSERT_WORD_BOUNDARY)
        {
          JERRY_DDLOG ("Execute RE_OP_ASSERT_WORD_BOUNDARY: ");
          if (is_wordchar_left == is_wordchar_right)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }
        else
        {
          JERRY_ASSERT (op == RE_OP_ASSERT_NOT_WORD_BOUNDARY);
          JERRY_DDLOG ("Execute RE_OP_ASSERT_NOT_WORD_BOUNDARY: ");

          if (is_wordchar_left != is_wordchar_right)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }

        JERRY_DDLOG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_LOOKAHEAD_POS:
      case RE_OP_LOOKAHEAD_NEG:
      {
        ecma_completion_value_t match_value = ecma_make_empty_completion_value ();
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);

        uint32_t array_size = re_ctx_p->num_of_captures + re_ctx_p->num_of_non_captures;
        MEM_DEFINE_LOCAL_ARRAY (saved_bck_p, array_size, lit_utf8_iterator_t);

        size_t size = (size_t) (array_size) * sizeof (lit_utf8_iterator_t);
        memcpy (saved_bck_p, re_ctx_p->saved_p, size);

        do
        {
          uint32_t offset = re_get_value (&bc_p);

          if (!sub_iter.buf_p)
          {
            match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
            if (ecma_is_completion_value_throw (match_value))
            {
              break;
            }
          }
          bc_p += offset;
        }
        while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE);

        if (!ecma_is_completion_value_throw (match_value))
        {
          JERRY_DDLOG ("Execute RE_OP_LOOKAHEAD_POS/NEG: ");
          ecma_free_completion_value (match_value);
          if ((op == RE_OP_LOOKAHEAD_POS && sub_iter.buf_p)
              || (op == RE_OP_LOOKAHEAD_NEG && !sub_iter.buf_p))
          {
            JERRY_DDLOG ("match\n");
            match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
          }
          else
          {
            JERRY_DDLOG ("fail\n");
            match_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }

        if (!ecma_is_completion_value_throw (match_value))
        {
          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
          }
          else
          {
            JERRY_ASSERT (ecma_is_value_boolean (match_value));
            /* restore saved */
            memcpy (re_ctx_p->saved_p, saved_bck_p, size);
          }
        }

        MEM_FINALIZE_LOCAL_ARRAY (saved_bck_p);
        return match_value;
      }
      case RE_OP_CHAR_CLASS:
      case RE_OP_INV_CHAR_CLASS:
      {
        uint32_t num_of_ranges;
        bool is_match;

        JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, ");
        if (lit_utf8_iterator_is_eos (&iter))
        {
          JERRY_DDLOG ("fail\n");
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
        ecma_char_t curr_ch = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);

        num_of_ranges = re_get_value (&bc_p);
        is_match = false;

        while (num_of_ranges)
        {
          ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
          ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
          JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
                       num_of_ranges, ch1, ch2, curr_ch);

          if (curr_ch >= ch1 && curr_ch <= ch2)
          {
            /* We must read all the ranges from bytecode. */
            is_match = true;
          }
          num_of_ranges--;
        }

        if (op == RE_OP_CHAR_CLASS)
        {
          if (!is_match)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }
        else
        {
          JERRY_ASSERT (op == RE_OP_INV_CHAR_CLASS);
          if (is_match)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }
        JERRY_DDLOG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_BACKREFERENCE:
      {
        uint32_t backref_idx;

        backref_idx = re_get_value (&bc_p);
        JERRY_DDLOG ("Execute RE_OP_BACKREFERENCE (idx: %d): ", backref_idx);
        backref_idx *= 2;  /* backref n -> saved indices [n*2, n*2+1] */
        JERRY_ASSERT (backref_idx >= 2 && backref_idx + 1 < re_ctx_p->num_of_captures);

        if (!re_ctx_p->saved_p[backref_idx].buf_p || !re_ctx_p->saved_p[backref_idx + 1].buf_p)
        {
          JERRY_DDLOG ("match\n");
          break; /* capture is 'undefined', always matches! */
        }

        lit_utf8_iterator_t sub_iter = re_ctx_p->saved_p[backref_idx];

        while (sub_iter.buf_pos.offset < re_ctx_p->saved_p[backref_idx + 1].buf_pos.offset)
        {
          ecma_char_t ch1, ch2;

          if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }

          ch1 = lit_utf8_iterator_read_next (&sub_iter);
          ch2 = lit_utf8_iterator_read_next (&iter);

          if (ch1 != ch2)
          {
            JERRY_DDLOG ("fail\n");
            return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
          }
        }
        JERRY_DDLOG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_SAVE_AT_START:
      {
        re_bytecode_t *old_bc_p;

        JERRY_DDLOG ("Execute RE_OP_SAVE_AT_START\n");
        lit_utf8_iterator_t old_start_p = re_ctx_p->saved_p[RE_GLOBAL_START_IDX];
        re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = iter;

        do
        {
          uint32_t offset = re_get_value (&bc_p);
          lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }
          bc_p += offset;
          old_bc_p = bc_p;
        }
        while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE);
        bc_p = old_bc_p;

        re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_SAVE_AND_MATCH:
      {
        JERRY_DDLOG ("End of pattern is reached: match\n");
        re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = iter;
        *out_iter_p = iter;
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); /* match */
      }
      case RE_OP_ALTERNATIVE:
      {
        /*
        *  Alternatives should be jump over, when alternative opcode appears.
        */
        uint32_t offset = re_get_value (&bc_p);
        JERRY_DDLOG ("Execute RE_OP_ALTERNATIVE");
        bc_p += offset;

        while (*bc_p == RE_OP_ALTERNATIVE)
        {
          JERRY_DDLOG (", jump: %d");
          bc_p++;
          offset = re_get_value (&bc_p);
          bc_p += offset;
        }

        JERRY_DDLOG ("\n");
        break; /* tail merge */
      }
      case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
      case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
      {
        /*
        *  On non-greedy iterations we have to execute the bytecode
        *  after the group first, if zero iteration is allowed.
        */
        uint32_t start_idx, iter_idx, offset;
        lit_utf8_iterator_t old_start = lit_utf8_iterator_create (NULL, 0);
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
        re_bytecode_t *old_bc_p;

        old_bc_p = bc_p; /* save the bytecode start position of the group start */
        start_idx = re_get_value (&bc_p);
        offset = re_get_value (&bc_p);

        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2);
          iter_idx = start_idx - 1;
          start_idx *= 2;

          old_start = re_ctx_p->saved_p[start_idx];
          re_ctx_p->saved_p[start_idx] = iter;
        }
        else
        {
          JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures);
          iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1;
          start_idx += re_ctx_p->num_of_captures;
        }
        re_ctx_p->num_of_iterations_p[iter_idx] = 0;

        /* Jump all over to the end of the END opcode. */
        bc_p += offset;

        /* Try to match after the close paren if zero is allowed */
        ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

        if (ecma_is_value_true (match_value))
        {
          *out_iter_p = sub_iter;
          return match_value; /* match */
        }
        else if (ecma_is_completion_value_throw (match_value))
        {
          return match_value;
        }
        if (RE_IS_CAPTURE_GROUP (op))
        {
          re_ctx_p->saved_p[start_idx] = old_start;
        }

        bc_p = old_bc_p;
        /* FALLTHRU */
      }
      case RE_OP_CAPTURE_GROUP_START:
      case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START:
      case RE_OP_NON_CAPTURE_GROUP_START:
      case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START:
      {
        uint32_t start_idx, iter_idx, old_iteration_cnt, offset;
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
        re_bytecode_t *old_bc_p;
        re_bytecode_t *end_bc_p = NULL;
        start_idx = re_get_value (&bc_p);

        if (op != RE_OP_CAPTURE_GROUP_START
            && op != RE_OP_NON_CAPTURE_GROUP_START)
        {
          offset = re_get_value (&bc_p);
          end_bc_p = bc_p + offset;
        }

        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2);
          iter_idx = start_idx - 1;
          start_idx *= 2;
        }
        else
        {
          JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures);
          iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1;
          start_idx += re_ctx_p->num_of_captures;
        }

        lit_utf8_iterator_t old_start = re_ctx_p->saved_p[start_idx];
        old_iteration_cnt = re_ctx_p->num_of_iterations_p[iter_idx];
        re_ctx_p->saved_p[start_idx] = iter;
        re_ctx_p->num_of_iterations_p[iter_idx] = 0;

        do
        {
          offset = re_get_value (&bc_p);
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }
          bc_p += offset;
          old_bc_p = bc_p;
        }
        while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE);
        bc_p = old_bc_p;
        re_ctx_p->num_of_iterations_p[iter_idx] = old_iteration_cnt;

        /* Try to match after the close paren if zero is allowed. */
        if (op == RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START
            || op == RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START)
        {
          JERRY_ASSERT (end_bc_p);
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, end_bc_p, iter, &sub_iter);

          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }
        }

        re_ctx_p->saved_p[start_idx] = old_start;
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
      case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END:
      {
        uint32_t end_idx, iter_idx, min, max;
        re_bytecode_t *old_bc_p;

        /*
        *  On non-greedy iterations we have to execute the bytecode
        *  after the group first. Try to iterate only if it fails.
        */
        old_bc_p = bc_p; /* save the bytecode start position of the group end */
        end_idx = re_get_value (&bc_p);
        min = re_get_value (&bc_p);
        max = re_get_value (&bc_p);
        re_get_value (&bc_p); /* start offset */

        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2);
          iter_idx = end_idx - 1;
          end_idx = (end_idx * 2) + 1;
        }
        else
        {
          JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures);
          iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1;
          end_idx += re_ctx_p->num_of_captures;
        }

        re_ctx_p->num_of_iterations_p[iter_idx]++;

        if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
            && re_ctx_p->num_of_iterations_p[iter_idx] <= max)
        {
          lit_utf8_iterator_t old_end = re_ctx_p->saved_p[end_idx];
          re_ctx_p->saved_p[end_idx] = iter;

          lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);
          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }

          re_ctx_p->saved_p[end_idx] = old_end;
        }
        re_ctx_p->num_of_iterations_p[iter_idx]--;
        bc_p = old_bc_p;

        /* If non-greedy fails and try to iterate... */
        /* FALLTHRU */
      }
      case RE_OP_CAPTURE_GREEDY_GROUP_END:
      case RE_OP_NON_CAPTURE_GREEDY_GROUP_END:
      {
        uint32_t start_idx, end_idx, iter_idx, min, max, offset;
        lit_utf8_iterator_t old_start = lit_utf8_iterator_create (NULL, 0);
        lit_utf8_iterator_t old_end = lit_utf8_iterator_create (NULL, 0);
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);
        re_bytecode_t *old_bc_p;

        end_idx = re_get_value (&bc_p);
        min = re_get_value (&bc_p);
        max = re_get_value (&bc_p);
        offset = re_get_value (&bc_p);

        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2);
          iter_idx = end_idx - 1;
          start_idx = end_idx * 2;
          end_idx = start_idx + 1;
        }
        else
        {
          JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures);
          iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1;
          end_idx += re_ctx_p->num_of_captures;
          start_idx = end_idx;
        }

        /* Check the empty iteration if the minimum number of iterations is reached. */
        if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
            && iter.buf_p == re_ctx_p->saved_p[start_idx].buf_p
            && iter.buf_pos.offset == re_ctx_p->saved_p[start_idx].buf_pos.offset)
        {
          return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
        }

        re_ctx_p->num_of_iterations_p[iter_idx]++;

        old_bc_p = bc_p; /* Save the bytecode end position of the END opcodes for matching after it. */
        old_end = re_ctx_p->saved_p[end_idx];
        re_ctx_p->saved_p[end_idx] = iter;

        if (re_ctx_p->num_of_iterations_p[iter_idx] < max)
        {
          bc_p -= offset;
          offset = re_get_value (&bc_p);

          old_start = re_ctx_p->saved_p[start_idx];
          re_ctx_p->saved_p[start_idx] = iter;
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }

          re_ctx_p->saved_p[start_idx] = old_start;

          /* Try to match alternatives if any. */
          bc_p += offset;
          while (*bc_p == RE_OP_ALTERNATIVE)
          {
            bc_p++; /* RE_OP_ALTERNATIVE */
            offset = re_get_value (&bc_p);

            old_start = re_ctx_p->saved_p[start_idx];
            re_ctx_p->saved_p[start_idx] = iter;

            ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

            if (ecma_is_value_true (match_value))
            {
              *out_iter_p = sub_iter;
              return match_value; /* match */
            }
            else if (ecma_is_completion_value_throw (match_value))
            {
              return match_value;
            }

            re_ctx_p->saved_p[start_idx] = old_start;
            bc_p += offset;
          }
        }

        if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
            && re_ctx_p->num_of_iterations_p[iter_idx] <= max)
        {
          /* Try to match the rest of the bytecode. */
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, old_bc_p, iter, &sub_iter);

          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }
        }

        /* restore if fails */
        re_ctx_p->saved_p[end_idx] = old_end;
        re_ctx_p->num_of_iterations_p[iter_idx]--;
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_NON_GREEDY_ITERATOR:
      {
        uint32_t min, max, offset, num_of_iter;
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);

        min = re_get_value (&bc_p);
        max = re_get_value (&bc_p);

        offset = re_get_value (&bc_p);
        JERRY_DDLOG ("Non-greedy iterator, min=%lu, max=%lu, offset=%ld\n",
                     (unsigned long) min, (unsigned long) max, (long) offset);

        num_of_iter = 0;
        while (num_of_iter <= max)
        {
          if (num_of_iter >= min)
          {
            ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, iter, &sub_iter);

            if (ecma_is_value_true (match_value))
            {
              *out_iter_p = sub_iter;
              return match_value; /* match */
            }
            else if (ecma_is_completion_value_throw (match_value))
            {
              return match_value;
            }
          }

          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

          if (!ecma_is_value_true (match_value))
          {
            if (ecma_is_completion_value_throw (match_value))
            {
              return match_value;
            }

            break;
          }

          iter = sub_iter;
          num_of_iter++;
        }
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      case RE_OP_GREEDY_ITERATOR:
      {
        uint32_t min, max, offset, num_of_iter;
        lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0);

        min = re_get_value (&bc_p);
        max = re_get_value (&bc_p);

        offset = re_get_value (&bc_p);
        JERRY_DDLOG ("Greedy iterator, min=%lu, max=%lu, offset=%ld\n",
                     (unsigned long) min, (unsigned long) max, (long) offset);

        num_of_iter = 0;

        while (num_of_iter < max)
        {
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter);

          if (!ecma_is_value_true (match_value))
          {
            if (ecma_is_completion_value_throw (match_value))
            {
              return match_value;
            }

            break;
          }

          iter = sub_iter;
          num_of_iter++;
        }

        while (num_of_iter >= min)
        {
          ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, iter, &sub_iter);

          if (ecma_is_value_true (match_value))
          {
            *out_iter_p = sub_iter;
            return match_value; /* match */
          }
          else if (ecma_is_completion_value_throw (match_value))
          {
            return match_value;
          }

          if (num_of_iter == min)
          {
            break;
          }

          lit_utf8_iterator_read_prev (&iter);
          num_of_iter--;
        }
        return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
      }
      default:
      {
        JERRY_DDLOG ("UNKNOWN opcode (%d)!\n", (uint32_t) op);
        return ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_COMMON));
      }
    }
  }

  JERRY_UNREACHABLE ();
  return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
} /* regexp_match */
/**
 * The String.prototype object's 'trim' routine
 *
 * See also:
 *          ECMA-262 v5, 15.5.4.20
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value.
 */
static ecma_completion_value_t
ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argument */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  /* 1 */
  ECMA_TRY_CATCH (check_coercible_val,
                  ecma_op_check_object_coercible (this_arg),
                  ret_value);

  /* 2 */
  ECMA_TRY_CATCH (to_string_val,
                  ecma_op_to_string (this_arg),
                  ret_value);

  ecma_string_t *original_string_p = ecma_get_string_from_value (to_string_val);

  /* 3 */
  const lit_utf8_size_t size = ecma_string_get_size (original_string_p);
  const ecma_length_t length = ecma_string_get_size (original_string_p);

  /* Workaround: avoid repeated call of ecma_string_get_char_at_pos() because its overhead */
  lit_utf8_byte_t *original_utf8_str_p = (lit_utf8_byte_t *) mem_heap_alloc_block (size + 1,
                                                                                   MEM_HEAP_ALLOC_SHORT_TERM);
  ecma_string_to_utf8_string (original_string_p, original_utf8_str_p, (ssize_t) size);

  uint32_t prefix = 0, postfix = 0;
  uint32_t new_len = 0;

  while (prefix < length)
  {
    ecma_char_t next_char = lit_utf8_string_code_unit_at (original_utf8_str_p,
                                                          size,
                                                          prefix);

    if (lit_char_is_white_space (next_char)
        || lit_char_is_line_terminator (next_char))
    {
      prefix++;
    }
    else
    {
      break;
    }
  }

  while (postfix < length - prefix)
  {
    ecma_char_t next_char = lit_utf8_string_code_unit_at (original_utf8_str_p,
                                                          size,
                                                          length - postfix - 1);
    if (lit_char_is_white_space (next_char)
        || lit_char_is_line_terminator (next_char))
    {
      postfix++;
    }
    else
    {
      break;
    }
  }

  new_len = prefix < size ? size - prefix - postfix : 0;

  ecma_string_t *new_str_p = ecma_string_substr (original_string_p, prefix, prefix + new_len);

  /* 4 */
  ret_value = ecma_make_normal_completion_value (ecma_make_string_value (new_str_p));

  mem_heap_free_block (original_utf8_str_p);

  ECMA_FINALIZE (to_string_val);
  ECMA_FINALIZE (check_coercible_val);

  return ret_value;
} /* ecma_builtin_string_prototype_object_trim */
Exemplo n.º 8
0
/**
 * Transforming escape sequences in the charset, outputting converted string to specified buffer
 *
 * Note:
 *      Size of string with transformed escape sequences is always
 *      less or equal to size of corresponding source string.
 *
 * @return size of converted string
 */
static lit_utf8_size_t
lexer_transform_escape_sequences (const jerry_api_char_t *source_str_p, /**< string to convert,
                                                                         *   located in source buffer */
                                  lit_utf8_size_t source_str_size, /**< size of the string and of the output buffer */
                                  jerry_api_char_t *output_str_buf_p) /**< output buffer for converted string */
{
  if (source_str_size == 0)
  {
    return 0;
  }
  else
  {
    JERRY_ASSERT (source_str_p != NULL);
  }

  lit_utf8_byte_t *output_str_buf_iter_p = output_str_buf_p;
  const size_t output_str_buf_size = source_str_size;
  bool is_correct_sequence = true;

  lit_utf8_iterator_t source_str_iter = lit_utf8_iterator_create (source_str_p,
                                                                  source_str_size);

  ecma_char_t prev_converted_char = LIT_CHAR_NULL;

  while (!lit_utf8_iterator_is_eos (&source_str_iter))
  {
    ecma_char_t converted_char;

    const ecma_char_t next_char = lit_utf8_iterator_read_next (&source_str_iter);

    if (next_char == LIT_CHAR_BACKSLASH)
    {
      if (lit_utf8_iterator_is_eos (&source_str_iter))
      {
        is_correct_sequence = false;
        break;
      }

      const ecma_char_t char_after_next = lit_utf8_iterator_read_next (&source_str_iter);

      if (lit_char_is_decimal_digit (char_after_next))
      {
        if (lit_char_is_octal_digit (char_after_next))
        {
          if (char_after_next == LIT_CHAR_0
              && (lit_utf8_iterator_is_eos (&source_str_iter)
                  || !lit_char_is_octal_digit (lit_utf8_iterator_peek_next (&source_str_iter))))
          {
            converted_char = LIT_CHAR_NULL;
          }
          else
          {
            /* Implementation-defined (ECMA-262 v5, B.1.2): octal escape sequences are not implemented */
            is_correct_sequence = false;
            break;
          }
        }
        else
        {
          converted_char = char_after_next;
        }
      }
      else if (char_after_next == LIT_CHAR_LOWERCASE_U
               || char_after_next == LIT_CHAR_LOWERCASE_X)
      {
        if (!lexer_convert_escape_sequence_digits_to_char (&source_str_iter,
                                                           char_after_next == LIT_CHAR_LOWERCASE_U,
                                                           &converted_char))
        {
          is_correct_sequence = false;
          break;
        }
      }
      else if (lit_char_is_line_terminator (char_after_next))
      {
        /* Skip \, followed by a LineTerminatorSequence (ECMA-262, v5, 7.3) */
        if (char_after_next == LIT_CHAR_CR
            && !lit_utf8_iterator_is_eos (&source_str_iter)
            && lit_utf8_iterator_peek_next (&source_str_iter) == LIT_CHAR_LF)
        {
          lit_utf8_iterator_incr (&source_str_iter);
        }

        continue;
      }
      else
      {
        lexer_convert_single_escape_character (char_after_next, &converted_char);
      }
    }
    else
    {
      converted_char = next_char;
    }

    if (lit_is_code_unit_high_surrogate (prev_converted_char)
        && lit_is_code_unit_low_surrogate (converted_char))
    {
      output_str_buf_iter_p -= LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;

      lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (prev_converted_char,
                                                                              converted_char);
      output_str_buf_iter_p += lit_code_point_to_utf8 (code_point, output_str_buf_iter_p);
    }
    else
    {
      output_str_buf_iter_p += lit_code_unit_to_utf8 (converted_char, output_str_buf_iter_p);
      JERRY_ASSERT (output_str_buf_iter_p <= output_str_buf_p + output_str_buf_size);
    }

    prev_converted_char = converted_char;
  }

  if (is_correct_sequence)
  {
    return (lit_utf8_size_t) (output_str_buf_iter_p - output_str_buf_p);
  }
  else
  {
    PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal escape sequence", token_start_pos);
  }
} /* lexer_transform_escape_sequences */
Exemplo n.º 9
0
/**
 * Parse and construct lexer token
 *
 * Note:
 *      Currently, lexer token doesn't fully correspond to Token, defined in ECMA-262, v5, 7.5.
 *      For example, there is no new-line token type in the token definition of ECMA-262 v5.
 *
 * @return constructed token
 */
static token
lexer_parse_token (void)
{
  ecma_char_t c = LA (0);

  if (lit_char_is_white_space (c))
  {
    while (lit_char_is_white_space (c))
    {
      consume_char ();

      c = LA (0);
    }
  }

  if (lit_char_is_line_terminator (c))
  {
    while (lit_char_is_line_terminator (c))
    {
      consume_char ();

      c = LA (0);
    }

    return create_token (TOK_NEWLINE, 0);
  }

  JERRY_ASSERT (is_token_parse_in_progress == false);

  /* ECMA-262 v5, 7.6, Identifier */
  if (lexer_is_char_can_be_identifier_start (c))
  {
    return lexer_parse_identifier_or_keyword ();
  }

  /* ECMA-262 v5, 7.8.3, Numeric literal */
  if (lit_char_is_decimal_digit (c)
      || (c == LIT_CHAR_DOT
          && lit_char_is_decimal_digit (LA (1))))
  {
    return lexer_parse_number ();
  }

  if (c == LIT_CHAR_LF)
  {
    consume_char ();
    return create_token (TOK_NEWLINE, 0);
  }

  if (c == LIT_CHAR_NULL)
  {
    return create_token (TOK_EOF, 0);
  }

  if (c == LIT_CHAR_SINGLE_QUOTE
      || c == LIT_CHAR_DOUBLE_QUOTE)
  {
    return lexer_parse_string ();
  }

  /* ECMA-262 v5, 7.4, SingleLineComment or MultiLineComment */
  if (c == LIT_CHAR_SLASH
      && (LA (1) == LIT_CHAR_SLASH
          || LA (1) == LIT_CHAR_ASTERISK))
  {
    if (lexer_parse_comment ())
    {
      return create_token (TOK_NEWLINE, 0);
    }
    else
    {
      return lexer_parse_token ();
    }
  }

  if (c == LIT_CHAR_SLASH
      && !(prev_non_lf_token.type == TOK_NAME
           || prev_non_lf_token.type == TOK_NULL
           || prev_non_lf_token.type == TOK_BOOL
           || prev_non_lf_token.type == TOK_CLOSE_BRACE
           || prev_non_lf_token.type == TOK_CLOSE_SQUARE
           || prev_non_lf_token.type == TOK_CLOSE_PAREN
           || prev_non_lf_token.type == TOK_SMALL_INT
           || prev_non_lf_token.type == TOK_NUMBER
           || prev_non_lf_token.type == TOK_STRING
           || prev_non_lf_token.type == TOK_REGEXP))
  {
    return lexer_parse_regexp ();
  }

  /* ECMA-262 v5, 7.7, Punctuator */
  switch (c)
  {
    case LIT_CHAR_LEFT_BRACE:
    {
      RETURN_PUNC (TOK_OPEN_BRACE);
      break;
    }
    case LIT_CHAR_RIGHT_BRACE:
    {
      RETURN_PUNC (TOK_CLOSE_BRACE);
      break;
    }
    case LIT_CHAR_LEFT_PAREN:
    {
      RETURN_PUNC (TOK_OPEN_PAREN);
      break;
    }
    case LIT_CHAR_RIGHT_PAREN:
    {
      RETURN_PUNC (TOK_CLOSE_PAREN);
      break;
    }
    case LIT_CHAR_LEFT_SQUARE:
    {
      RETURN_PUNC (TOK_OPEN_SQUARE);
      break;
    }
    case LIT_CHAR_RIGHT_SQUARE:
    {
      RETURN_PUNC (TOK_CLOSE_SQUARE);
      break;
    }
    case LIT_CHAR_DOT:
    {
      RETURN_PUNC (TOK_DOT);
      break;
    }
    case LIT_CHAR_SEMICOLON:
    {
      RETURN_PUNC (TOK_SEMICOLON);
      break;
    }
    case LIT_CHAR_COMMA:
    {
      RETURN_PUNC (TOK_COMMA);
      break;
    }
    case LIT_CHAR_TILDE:
    {
      RETURN_PUNC (TOK_COMPL);
      break;
    }
    case LIT_CHAR_COLON:
    {
      RETURN_PUNC (TOK_COLON);
      break;
    }
    case LIT_CHAR_QUESTION:
    {
      RETURN_PUNC (TOK_QUERY);
      break;
    }

    case LIT_CHAR_ASTERISK:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_MULT_EQ, TOK_MULT);
      break;
    }
    case LIT_CHAR_SLASH:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_DIV_EQ, TOK_DIV);
      break;
    }
    case LIT_CHAR_CIRCUMFLEX:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_XOR_EQ, TOK_XOR);
      break;
    }
    case LIT_CHAR_PERCENT:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_MOD_EQ, TOK_MOD);
      break;
    }
    case LIT_CHAR_PLUS:
    {
      IF_LA_IS_OR (LIT_CHAR_PLUS, TOK_DOUBLE_PLUS, LIT_CHAR_EQUALS, TOK_PLUS_EQ, TOK_PLUS);
      break;
    }
    case LIT_CHAR_MINUS:
    {
      IF_LA_IS_OR (LIT_CHAR_MINUS, TOK_DOUBLE_MINUS, LIT_CHAR_EQUALS, TOK_MINUS_EQ, TOK_MINUS);
      break;
    }
    case LIT_CHAR_AMPERSAND:
    {
      IF_LA_IS_OR (LIT_CHAR_AMPERSAND, TOK_DOUBLE_AND, LIT_CHAR_EQUALS, TOK_AND_EQ, TOK_AND);
      break;
    }
    case LIT_CHAR_VLINE:
    {
      IF_LA_IS_OR (LIT_CHAR_VLINE, TOK_DOUBLE_OR, LIT_CHAR_EQUALS, TOK_OR_EQ, TOK_OR);
      break;
    }
    case LIT_CHAR_LESS_THAN:
    {
      switch (LA (1))
      {
        case LIT_CHAR_LESS_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_LSHIFT_EQ, TOK_LSHIFT, 2); break;
        case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_LESS_EQ, 2); break;
        default: RETURN_PUNC (TOK_LESS);
      }
      break;
    }
    case LIT_CHAR_GREATER_THAN:
    {
      switch (LA (1))
      {
        case LIT_CHAR_GREATER_THAN:
        {
          switch (LA (2))
          {
            case LIT_CHAR_GREATER_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_RSHIFT_EX_EQ, TOK_RSHIFT_EX, 3); break;
            case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_RSHIFT_EQ, 3); break;
            default: RETURN_PUNC_EX (TOK_RSHIFT, 2);
          }
          break;
        }
        case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_GREATER_EQ, 2); break;
        default: RETURN_PUNC (TOK_GREATER);
      }
      break;
    }
    case LIT_CHAR_EQUALS:
    {
      if (LA (1) == LIT_CHAR_EQUALS)
      {
        IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_TRIPLE_EQ, TOK_DOUBLE_EQ, 2);
      }
      else
      {
        RETURN_PUNC (TOK_EQ);
      }
      break;
    }
    case LIT_CHAR_EXCLAMATION:
    {
      if (LA (1) == LIT_CHAR_EQUALS)
      {
        IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_NOT_DOUBLE_EQ, TOK_NOT_EQ, 2);
      }
      else
      {
        RETURN_PUNC (TOK_NOT);
      }
      break;
    }
  }

  PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal character", lit_utf8_iterator_get_pos (&src_iter));
} /* lexer_parse_token */
Exemplo n.º 10
0
/**
 * Parse string literal (ECMA-262 v5, 7.8.5)
 */
static token
lexer_parse_regexp (void)
{
  token result;
  bool is_char_class = false;

  JERRY_ASSERT (LA (0) == LIT_CHAR_SLASH);
  new_token ();

  /* Eat up '/' */
  consume_char ();

  while (true)
  {
    if (lit_utf8_iterator_is_eos (&src_iter))
    {
      PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unterminated RegExp literal", token_start_pos);
    }

    ecma_char_t c = (ecma_char_t) LA (0);
    if (lit_char_is_line_terminator (c))
    {
      PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "RegExp literal should not contain newline character", token_start_pos);
    }
    else if (c == LIT_CHAR_BACKSLASH)
    {
      consume_char ();
      if (lit_char_is_line_terminator (LA (0)))
      {
        PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX,
                     "RegExp literal backslash sequence should not contain newline character",
                     token_start_pos);
      }
    }
    else if (c == LIT_CHAR_LEFT_SQUARE)
    {
      is_char_class = true;
    }
    else if (c == LIT_CHAR_RIGHT_SQUARE)
    {
      is_char_class = false;
    }
    else if (c == LIT_CHAR_SLASH && !is_char_class)
    {
      /* Eat up '/' */
      consume_char ();
      break;
    }

    consume_char ();
  }

  /* Try to parse RegExp flags */
  while (true)
  {
    ecma_char_t c = (ecma_char_t) LA (0);

    if (!lit_char_is_word_char (c) || lit_char_is_line_terminator (c))
    {
      break;
    }

    consume_char ();
  }

  result = lexer_create_token_for_charset (TOK_REGEXP, TOK_START () + 1, TOK_SIZE () - 1);

  is_token_parse_in_progress = false;
  return result;
} /* lexer_parse_regexp */
Exemplo n.º 11
0
/**
 * Parse string literal (ECMA-262 v5, 7.8.4)
 */
static token
lexer_parse_string (void)
{
  ecma_char_t c = (ecma_char_t) LA (0);
  JERRY_ASSERT (c == LIT_CHAR_SINGLE_QUOTE
                || c == LIT_CHAR_DOUBLE_QUOTE);


  new_token ();

  /* Consume quote character */
  consume_char ();

  const ecma_char_t end_char = c;

  bool is_escape_sequence_occured = false;

  do
  {
    c = LA (0);
    consume_char ();

    if (lit_char_is_line_terminator (c))
    {
      PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "String literal shall not contain newline character", token_start_pos);
    }
    else if (c == LIT_CHAR_BACKSLASH)
    {
      is_escape_sequence_occured = true;

      ecma_char_t nc = (ecma_char_t) LA (0);
      consume_char ();

      if (nc == LIT_CHAR_CR)
      {
        nc = (ecma_char_t) LA (0);

        if (nc == LIT_CHAR_LF)
        {
          consume_char ();
        }
      }
    }
  }
  while (c != end_char && !lit_utf8_iterator_is_eos (&src_iter));

  if (c == LIT_CHAR_NULL)
  {
    PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unclosed string", token_start_pos);
  }

  const lit_utf8_size_t charset_size = TOK_SIZE () - 2;

  token ret;

  if (!is_escape_sequence_occured)
  {
    ret = lexer_create_token_for_charset (TOK_STRING,
                                   TOK_START () + 1,
                                   charset_size);
  }
  else
  {
    ret = lexer_create_token_for_charset_transform_escape_sequences (TOK_STRING,
                                                                     TOK_START () + 1,
                                                                     charset_size);
  }

  is_token_parse_in_progress = false;

  return ret;
} /* lexer_parse_string */
Exemplo n.º 12
0
/**
 * Trim leading and trailing whitespace characters from string.
 *
 * @return trimmed ecma string
 */
ecma_string_t *
ecma_string_trim (const ecma_string_t *string_p) /**< pointer to an ecma string */
{
  ecma_string_t *ret_string_p;

  ECMA_STRING_TO_UTF8_STRING (string_p, utf8_str_p, utf8_str_size);

  if (utf8_str_size > 0)
  {
    ecma_char_t ch;
    lit_utf8_size_t read_size;
    const lit_utf8_byte_t *nonws_start_p = utf8_str_p + utf8_str_size;
    const lit_utf8_byte_t *current_p = utf8_str_p;

    /* Trim front. */
    while (current_p < nonws_start_p)
    {
      read_size = lit_read_code_unit_from_utf8 (current_p, &ch);

      if (!lit_char_is_white_space (ch)
          && !lit_char_is_line_terminator (ch))
      {
        nonws_start_p = current_p;
        break;
      }

      current_p += read_size;
    }

    current_p = utf8_str_p + utf8_str_size;

    /* Trim back. */
    while (current_p > utf8_str_p)
    {
      read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);

      if (!lit_char_is_white_space (ch)
          && !lit_char_is_line_terminator (ch))
      {
        break;
      }

      current_p -= read_size;
    }

    /* Construct new string. */
    if (current_p > nonws_start_p)
    {
      ret_string_p = ecma_new_ecma_string_from_utf8 (nonws_start_p,
                                                     (lit_utf8_size_t) (current_p - nonws_start_p));
    }
    else
    {
      ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
    }
  }
  else
  {
    ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
  }

  ECMA_FINALIZE_UTF8_STRING (utf8_str_p, utf8_str_size);

  return ret_string_p;
} /* ecma_string_trim */