Exemple #1
0
static void
new_token (void)
{
  token_start_pos = lit_utf8_iterator_get_pos (&src_iter);
  JERRY_ASSERT (!token_start_pos.is_non_bmp_middle);
  is_token_parse_in_progress = true;
}
Exemple #2
0
/**
 * Construct next token from current source code position and increment the position
 *
 * @return the constructed token
 */
token
lexer_next_token (bool maybe_regexp, /**< read '/' as regexp? */
                  bool is_strict) /**< strict mode is on (true) / off (false) */
{
  lit_utf8_iterator_pos_t src_pos = lit_utf8_iterator_get_pos (&src_iter);
  if (src_pos.offset == 0 && !src_pos.is_non_bmp_middle)
  {
    dump_current_line ();
  }

  if (!is_empty (saved_token))
  {
    sent_token = saved_token;
    saved_token = empty_token;
  }
  else
  {
    /**
     * FIXME:
     *       The way to raise syntax errors for unexpected EOF
     *       should be reworked so that EOF would be checked by
     *       caller of the routine, and the following condition
     *       would be checked as assertion in the routine.
     */
    if (lexer_get_token_type (prev_token) == TOK_EOF
        && lexer_get_token_type (sent_token) == TOK_EOF)
    {
      PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unexpected EOF", lit_utf8_iterator_get_pos (&src_iter));
    }

    prev_token = sent_token;

    jsp_token_flag_t flags = JSP_TOKEN_FLAG__NO_FLAGS;

    bool is_preceded_by_new_lines;
    sent_token = lexer_parse_token (maybe_regexp, &is_preceded_by_new_lines, is_strict);

    if (is_preceded_by_new_lines)
    {
      flags = (jsp_token_flag_t) (flags | JSP_TOKEN_FLAG_PRECEDED_BY_NEWLINES);
    }

    sent_token.flags = flags;
  }

  return sent_token;
} /* lexer_next_token */
Exemple #3
0
token
lexer_next_token (void)
{
  lit_utf8_iterator_pos_t src_pos = lit_utf8_iterator_get_pos (&src_iter);
  if (src_pos.offset == 0 && !src_pos.is_non_bmp_middle)
  {
    dump_current_line ();
  }

  if (!is_empty (saved_token))
  {
    sent_token = saved_token;
    saved_token = empty_token;
    goto end;
  }

  /**
   * FIXME:
   *       The way to raise syntax errors for unexpected EOF
   *       should be reworked so that EOF would be checked by
   *       caller of the routine, and the following condition
   *       would be checked as assertion in the routine.
   */
  if (prev_token.type == TOK_EOF
      && sent_token.type == TOK_EOF)
  {
    PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unexpected EOF", lit_utf8_iterator_get_pos (&src_iter));
  }

  prev_token = sent_token;
  sent_token = lexer_parse_token ();

  if (sent_token.type == TOK_NEWLINE)
  {
    dump_current_line ();
  }
  else
  {
    prev_non_lf_token = sent_token;
  }

end:
  return sent_token;
}
Exemple #4
0
/**
 * Convert locus to line and column
 */
void
lexer_locus_to_line_and_column (lit_utf8_iterator_pos_t locus, /**< iterator position in the source script */
                                size_t *line, /**< @out: line number */
                                size_t *column) /**< @out: column number */
{
  JERRY_ASSERT ((lit_utf8_size_t) (locus.offset + locus.is_non_bmp_middle) <= buffer_size);

  lit_utf8_iterator_t iter = lit_utf8_iterator_create (buffer_start, (lit_utf8_size_t) buffer_size);
  lit_utf8_iterator_pos_t iter_pos = lit_utf8_iterator_get_pos (&iter);

  size_t l = 0, c = 0;
  while (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_pos_cmp (iter_pos, locus) < 0)
  {
    ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
    iter_pos = lit_utf8_iterator_get_pos (&iter);

    if (lit_char_is_line_terminator (code_unit))
    {
      if (code_unit == LIT_CHAR_CR
          && !lit_utf8_iterator_is_eos (&iter)
          && lit_utf8_iterator_peek_next (&iter) == LIT_CHAR_LF)
      {
        lit_utf8_iterator_incr (&iter);
      }
      c = 0;
      l++;
      continue;
    }

    c++;
  }

  if (line)
  {
    *line = l;
  }
  if (column)
  {
    *column = c;
  }
} /* lexer_locus_to_line_and_column */
Exemple #5
0
static locus
current_locus (void)
{
  if (is_token_parse_in_progress)
  {
    return token_start_pos;
  }
  else
  {
    return lit_utf8_iterator_get_pos (&src_iter);
  }
}
Exemple #6
0
/**
 * Parse a comment
 *
 * @return true if newline was met during parsing
 *         false - otherwise
 */
static bool
lexer_parse_comment (void)
{
  ecma_char_t c = LA (0);
  bool multiline;
  bool was_newlines = false;

  JERRY_ASSERT (LA (0) == LIT_CHAR_SLASH);
  JERRY_ASSERT (LA (1) == LIT_CHAR_SLASH || LA (1) == LIT_CHAR_ASTERISK);

  multiline = (LA (1) == LIT_CHAR_ASTERISK);

  consume_char ();
  consume_char ();

  while (!lit_utf8_iterator_is_eos (&src_iter))
  {
    c = LA (0);

    if (!multiline)
    {
      if (lit_char_is_line_terminator (c))
      {
        return true;
      }
    }
    else
    {
      if (c == LIT_CHAR_ASTERISK
          && LA (1) == LIT_CHAR_SLASH)
      {
        consume_char ();
        consume_char ();

        return was_newlines;
      }
      else if (lit_char_is_line_terminator (c))
      {
        was_newlines = true;
      }
    }

    consume_char ();
  }

  if (multiline)
  {
    PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Unclosed multiline comment", lit_utf8_iterator_get_pos (&src_iter));
  }

  return false;
} /* lexer_parse_comment */
/**
 * The String.prototype object's 'indexOf' routine
 *
 * See also:
 *          ECMA-262 v5, 15.5.4.7
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value.
 */
static ecma_completion_value_t
ecma_builtin_string_prototype_object_index_of (ecma_value_t this_arg, /**< this argument */
                                               ecma_value_t arg1, /**< routine's first argument */
                                               ecma_value_t arg2) /**< routine's second argument */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  /* 1 */
  ECMA_TRY_CATCH (check_coercible_val,
                  ecma_op_check_object_coercible (this_arg),
                  ret_value);

  /* 2 */
  ECMA_TRY_CATCH (to_str_val,
                  ecma_op_to_string (this_arg),
                  ret_value);

  /* 3 */
  ECMA_TRY_CATCH (search_str_val,
                  ecma_op_to_string (arg1),
                  ret_value);

  /* 4 */
  ECMA_OP_TO_NUMBER_TRY_CATCH (pos_num,
                               arg2,
                               ret_value);

  /* 5 */
  ecma_string_t *original_str_p = ecma_get_string_from_value (to_str_val);
  const ecma_length_t original_len = ecma_string_get_length (original_str_p);
  const lit_utf8_size_t original_size = ecma_string_get_size (original_str_p);

  /* 4b, 6 */
  ecma_length_t start = ecma_builtin_helper_string_index_normalize (pos_num, original_len);

  /* 7 */
  ecma_string_t *search_str_p = ecma_get_string_from_value (search_str_val);
  const ecma_length_t search_len = ecma_string_get_length (search_str_p);
  const lit_utf8_size_t search_size = ecma_string_get_size (search_str_p);

  ecma_number_t *ret_num_p = ecma_alloc_number ();
  *ret_num_p = ecma_int32_to_number (-1);

  /* 8 */
  if (search_len <= original_len)
  {
    if (!search_len)
    {
      *ret_num_p = ecma_uint32_to_number (0);
    }
    else
    {
      /* create utf8 string from original string and advance to start position */
      MEM_DEFINE_LOCAL_ARRAY (original_str_utf8_p,
                              original_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (original_str_p,
                                  original_str_utf8_p,
                                  (ssize_t) (original_size));

      lit_utf8_iterator_t original_it = lit_utf8_iterator_create (original_str_utf8_p, original_size);

      ecma_length_t index = start;
      lit_utf8_iterator_advance (&original_it, index);

      /* create utf8 string from search string */
      MEM_DEFINE_LOCAL_ARRAY (search_str_utf8_p,
                              search_size,
                              lit_utf8_byte_t);

      ecma_string_to_utf8_string (search_str_p,
                                  search_str_utf8_p,
                                  (ssize_t) (search_size));

      lit_utf8_iterator_t search_it = lit_utf8_iterator_create (search_str_utf8_p, search_size);

      /* iterate original string and try to match at each position */
      bool found = false;

      while (!found && index <= original_len - search_len)
      {
        ecma_length_t match_len = 0;
        lit_utf8_iterator_pos_t stored_original_pos = lit_utf8_iterator_get_pos (&original_it);

        while (match_len < search_len &&
               lit_utf8_iterator_read_next (&original_it) == lit_utf8_iterator_read_next (&search_it))
        {
          match_len++;
        }

        /* Check for match */
        if (match_len == search_len)
        {
          *ret_num_p = ecma_uint32_to_number (index);
          found = true;
        }
        else
        {
          /* reset iterators */
          lit_utf8_iterator_seek_bos (&search_it);
          lit_utf8_iterator_seek (&original_it, stored_original_pos);
          lit_utf8_iterator_incr (&original_it);
        }
        index++;
      }

      MEM_FINALIZE_LOCAL_ARRAY (search_str_utf8_p);
      MEM_FINALIZE_LOCAL_ARRAY (original_str_utf8_p);
    }
  }

  ecma_value_t new_value = ecma_make_number_value (ret_num_p);
  ret_value = ecma_make_normal_completion_value (new_value);

  ECMA_OP_TO_NUMBER_FINALIZE (pos_num);
  ECMA_FINALIZE (search_str_val);
  ECMA_FINALIZE (to_str_val);
  ECMA_FINALIZE (check_coercible_val);

  return ret_value;
} /* ecma_builtin_string_prototype_object_index_of */
Exemple #8
0
/**
 * Parse numeric literal (ECMA-262, v5, 7.8.3)
 *
 * @return token of TOK_SMALL_INT or TOK_NUMBER types
 */
static token
lexer_parse_number (void)
{
  ecma_char_t c = LA (0);
  bool is_hex = false;
  bool is_fp = false;
  ecma_number_t fp_res = .0;
  size_t tok_length = 0, i;
  token known_token;

  JERRY_ASSERT (lit_char_is_decimal_digit (c)
                || c == LIT_CHAR_DOT);

  if (c == LIT_CHAR_0)
  {
    if (LA (1) == LIT_CHAR_LOWERCASE_X
        || LA (1) == LIT_CHAR_UPPERCASE_X)
    {
      is_hex = true;
    }
  }
  else if (c == LIT_CHAR_DOT)
  {
    JERRY_ASSERT (lit_char_is_decimal_digit (LA (1)));
    is_fp = true;
  }

  if (is_hex)
  {
    // Eat up '0x'
    consume_char ();
    consume_char ();
    new_token ();

    c = LA (0);
    if (!lit_char_is_hex_digit (c))
    {
      PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Invalid HexIntegerLiteral", lit_utf8_iterator_get_pos (&src_iter));
    }

    do
    {
      consume_char ();
      c = LA (0);
    }
    while (lit_char_is_hex_digit (c));

    if (lexer_is_char_can_be_identifier_start (c))
    {
      PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX,
                   "Identifier just after integer literal",
                   lit_utf8_iterator_get_pos (&src_iter));
    }

    tok_length = (size_t) (TOK_SIZE ());

    const lit_utf8_byte_t *fp_buf_p = TOK_START ();
    /* token is constructed at end of function */
    for (i = 0; i < tok_length; i++)
    {
      fp_res = fp_res * 16 + (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i]);
    }
  }
  else
  {
    bool is_exp = false;

    new_token ();

    // Eat up '.'
    if (is_fp)
    {
      consume_char ();
    }

    while (true)
    {
      c = LA (0);

      if (c == LIT_CHAR_DOT)
      {
        if (is_fp)
        {
          /* token is constructed at end of function */
          break;
        }
        else
        {
          is_fp = true;
          consume_char ();

          continue;
        }
      }
      else if (c == LIT_CHAR_LOWERCASE_E
               || c == LIT_CHAR_UPPERCASE_E)
      {
        if (is_exp)
        {
          PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX,
                       "Numeric literal shall not contain more than exponential marker ('e' or 'E')",
                       lit_utf8_iterator_get_pos (&src_iter));
        }
        else
        {
          is_exp = true;
          consume_char ();

          if (LA (0) == LIT_CHAR_MINUS
              || LA (0) == LIT_CHAR_PLUS)
          {
            consume_char ();
          }

          continue;
        }
      }
      else if (!lit_char_is_decimal_digit (c))
      {
        if (lexer_is_char_can_be_identifier_start (c))
        {
          PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX,
                       "Numeric literal shall not contain non-numeric characters",
                       lit_utf8_iterator_get_pos (&src_iter));
        }

        /* token is constructed at end of function */
        break;
      }

      consume_char ();
    }

    tok_length = (size_t) (TOK_SIZE ());

    if (is_fp || is_exp)
    {
      ecma_number_t res = ecma_utf8_string_to_number (TOK_START (), (jerry_api_size_t) tok_length);
      JERRY_ASSERT (!ecma_number_is_nan (res));

      known_token = convert_seen_num_to_token (res);
      is_token_parse_in_progress = NULL;

      return known_token;
    }
    else if (*TOK_START () == LIT_CHAR_0
             && tok_length != 1)
    {
      /* Octal integer literals */
      if (strict_mode)
      {
        PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Octal integer literals are not allowed in strict mode", token_start_pos);
      }
      else
      {
        /* token is constructed at end of function */
        const lit_utf8_byte_t *fp_buf_p = TOK_START ();

        for (i = 0; i < tok_length; i++)
        {
          fp_res = fp_res * 8 + (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i]);
        }
      }
    }
    else
    {
      const lit_utf8_byte_t *fp_buf_p = TOK_START ();
      /* token is constructed at end of function */

      ecma_number_t mult = 1.0f;
      for (i = tok_length; i > 0; i--, mult *= 10)
      {
        fp_res += (ecma_number_t) lit_char_hex_to_int (fp_buf_p[i - 1]) * mult;
      }
    }
  }

  if (fp_res >= 0 && fp_res <= 255 && (uint8_t) fp_res == fp_res)
  {
    known_token = create_token (TOK_SMALL_INT, (uint8_t) fp_res);
    is_token_parse_in_progress = NULL;
    return known_token;
  }
  else
  {
    known_token = convert_seen_num_to_token (fp_res);
    is_token_parse_in_progress = NULL;
    return known_token;
  }
} /* lexer_parse_number */
Exemple #9
0
/**
 * Parse Identifier (ECMA-262 v5, 7.6) or ReservedWord (7.6.1; 7.8.1; 7.8.2).
 *
 * @return TOK_NAME - for Identifier,
 *         TOK_KEYWORD - for Keyword or FutureReservedWord,
 *         TOK_NULL - for NullLiteral,
 *         TOK_BOOL - for BooleanLiteral
 */
static token
lexer_parse_identifier_or_keyword (void)
{
  ecma_char_t c = LA (0);

  JERRY_ASSERT (lexer_is_char_can_be_identifier_start (c));

  new_token ();

  bool is_correct_identifier_name = true;
  bool is_escape_sequence_occured = false;
  bool is_all_chars_were_lowercase_ascii = true;

  while (true)
  {
    c = LA (0);

    if (c == LIT_CHAR_BACKSLASH)
    {
      consume_char ();

      is_escape_sequence_occured = true;

      bool is_unicode_escape_sequence = (LA (0) == LIT_CHAR_LOWERCASE_U);
      consume_char ();

      if (is_unicode_escape_sequence)
      {
        /* UnicodeEscapeSequence */

        if (!lexer_convert_escape_sequence_digits_to_char (&src_iter,
                                                           true,
                                                           &c))
        {
          is_correct_identifier_name = false;
          break;
        }
        else
        {
          /* c now contains character, encoded in the UnicodeEscapeSequence */

          // Check character, converted from UnicodeEscapeSequence
          if (!lexer_is_char_can_be_identifier_part (c))
          {
            is_correct_identifier_name = false;
            break;
          }
        }
      }
      else
      {
        is_correct_identifier_name = false;
        break;
      }
    }
    else if (!lexer_is_char_can_be_identifier_part (c))
    {
      break;
    }
    else
    {
      if (!(c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN
            && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END))
      {
        is_all_chars_were_lowercase_ascii = false;
      }

      consume_char ();
    }
  }

  if (!is_correct_identifier_name)
  {
    PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal identifier name", lit_utf8_iterator_get_pos (&src_iter));
  }

  const lit_utf8_size_t charset_size = TOK_SIZE ();

  token ret = empty_token;

  if (!is_escape_sequence_occured
      && is_all_chars_were_lowercase_ascii)
  {
    /* Keyword or FutureReservedWord (TOK_KEYWORD), or boolean literal (TOK_BOOL), or null literal (TOK_NULL) */
    ret = lexer_parse_reserved_word (TOK_START (), charset_size);
  }

  if (is_empty (ret))
  {
    /* Identifier (TOK_NAME) */

    if (!is_escape_sequence_occured)
    {
      ret = lexer_create_token_for_charset (TOK_NAME,
                                            TOK_START (),
                                            charset_size);
    }
    else
    {
      ret = lexer_create_token_for_charset_transform_escape_sequences (TOK_NAME,
                                                                       TOK_START (),
                                                                       charset_size);
    }
  }

  is_token_parse_in_progress = false;

  return ret;
} /* lexer_parse_identifier_or_keyword */
Exemple #10
0
/**
 * Parse and construct lexer token
 *
 * Note:
 *      Currently, lexer token doesn't fully correspond to Token, defined in ECMA-262, v5, 7.5.
 *      For example, there is no new-line token type in the token definition of ECMA-262 v5.
 *
 * @return constructed token
 */
static token
lexer_parse_token (void)
{
  ecma_char_t c = LA (0);

  if (lit_char_is_white_space (c))
  {
    while (lit_char_is_white_space (c))
    {
      consume_char ();

      c = LA (0);
    }
  }

  if (lit_char_is_line_terminator (c))
  {
    while (lit_char_is_line_terminator (c))
    {
      consume_char ();

      c = LA (0);
    }

    return create_token (TOK_NEWLINE, 0);
  }

  JERRY_ASSERT (is_token_parse_in_progress == false);

  /* ECMA-262 v5, 7.6, Identifier */
  if (lexer_is_char_can_be_identifier_start (c))
  {
    return lexer_parse_identifier_or_keyword ();
  }

  /* ECMA-262 v5, 7.8.3, Numeric literal */
  if (lit_char_is_decimal_digit (c)
      || (c == LIT_CHAR_DOT
          && lit_char_is_decimal_digit (LA (1))))
  {
    return lexer_parse_number ();
  }

  if (c == LIT_CHAR_LF)
  {
    consume_char ();
    return create_token (TOK_NEWLINE, 0);
  }

  if (c == LIT_CHAR_NULL)
  {
    return create_token (TOK_EOF, 0);
  }

  if (c == LIT_CHAR_SINGLE_QUOTE
      || c == LIT_CHAR_DOUBLE_QUOTE)
  {
    return lexer_parse_string ();
  }

  /* ECMA-262 v5, 7.4, SingleLineComment or MultiLineComment */
  if (c == LIT_CHAR_SLASH
      && (LA (1) == LIT_CHAR_SLASH
          || LA (1) == LIT_CHAR_ASTERISK))
  {
    if (lexer_parse_comment ())
    {
      return create_token (TOK_NEWLINE, 0);
    }
    else
    {
      return lexer_parse_token ();
    }
  }

  if (c == LIT_CHAR_SLASH
      && !(prev_non_lf_token.type == TOK_NAME
           || prev_non_lf_token.type == TOK_NULL
           || prev_non_lf_token.type == TOK_BOOL
           || prev_non_lf_token.type == TOK_CLOSE_BRACE
           || prev_non_lf_token.type == TOK_CLOSE_SQUARE
           || prev_non_lf_token.type == TOK_CLOSE_PAREN
           || prev_non_lf_token.type == TOK_SMALL_INT
           || prev_non_lf_token.type == TOK_NUMBER
           || prev_non_lf_token.type == TOK_STRING
           || prev_non_lf_token.type == TOK_REGEXP))
  {
    return lexer_parse_regexp ();
  }

  /* ECMA-262 v5, 7.7, Punctuator */
  switch (c)
  {
    case LIT_CHAR_LEFT_BRACE:
    {
      RETURN_PUNC (TOK_OPEN_BRACE);
      break;
    }
    case LIT_CHAR_RIGHT_BRACE:
    {
      RETURN_PUNC (TOK_CLOSE_BRACE);
      break;
    }
    case LIT_CHAR_LEFT_PAREN:
    {
      RETURN_PUNC (TOK_OPEN_PAREN);
      break;
    }
    case LIT_CHAR_RIGHT_PAREN:
    {
      RETURN_PUNC (TOK_CLOSE_PAREN);
      break;
    }
    case LIT_CHAR_LEFT_SQUARE:
    {
      RETURN_PUNC (TOK_OPEN_SQUARE);
      break;
    }
    case LIT_CHAR_RIGHT_SQUARE:
    {
      RETURN_PUNC (TOK_CLOSE_SQUARE);
      break;
    }
    case LIT_CHAR_DOT:
    {
      RETURN_PUNC (TOK_DOT);
      break;
    }
    case LIT_CHAR_SEMICOLON:
    {
      RETURN_PUNC (TOK_SEMICOLON);
      break;
    }
    case LIT_CHAR_COMMA:
    {
      RETURN_PUNC (TOK_COMMA);
      break;
    }
    case LIT_CHAR_TILDE:
    {
      RETURN_PUNC (TOK_COMPL);
      break;
    }
    case LIT_CHAR_COLON:
    {
      RETURN_PUNC (TOK_COLON);
      break;
    }
    case LIT_CHAR_QUESTION:
    {
      RETURN_PUNC (TOK_QUERY);
      break;
    }

    case LIT_CHAR_ASTERISK:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_MULT_EQ, TOK_MULT);
      break;
    }
    case LIT_CHAR_SLASH:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_DIV_EQ, TOK_DIV);
      break;
    }
    case LIT_CHAR_CIRCUMFLEX:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_XOR_EQ, TOK_XOR);
      break;
    }
    case LIT_CHAR_PERCENT:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_MOD_EQ, TOK_MOD);
      break;
    }
    case LIT_CHAR_PLUS:
    {
      IF_LA_IS_OR (LIT_CHAR_PLUS, TOK_DOUBLE_PLUS, LIT_CHAR_EQUALS, TOK_PLUS_EQ, TOK_PLUS);
      break;
    }
    case LIT_CHAR_MINUS:
    {
      IF_LA_IS_OR (LIT_CHAR_MINUS, TOK_DOUBLE_MINUS, LIT_CHAR_EQUALS, TOK_MINUS_EQ, TOK_MINUS);
      break;
    }
    case LIT_CHAR_AMPERSAND:
    {
      IF_LA_IS_OR (LIT_CHAR_AMPERSAND, TOK_DOUBLE_AND, LIT_CHAR_EQUALS, TOK_AND_EQ, TOK_AND);
      break;
    }
    case LIT_CHAR_VLINE:
    {
      IF_LA_IS_OR (LIT_CHAR_VLINE, TOK_DOUBLE_OR, LIT_CHAR_EQUALS, TOK_OR_EQ, TOK_OR);
      break;
    }
    case LIT_CHAR_LESS_THAN:
    {
      switch (LA (1))
      {
        case LIT_CHAR_LESS_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_LSHIFT_EQ, TOK_LSHIFT, 2); break;
        case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_LESS_EQ, 2); break;
        default: RETURN_PUNC (TOK_LESS);
      }
      break;
    }
    case LIT_CHAR_GREATER_THAN:
    {
      switch (LA (1))
      {
        case LIT_CHAR_GREATER_THAN:
        {
          switch (LA (2))
          {
            case LIT_CHAR_GREATER_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_RSHIFT_EX_EQ, TOK_RSHIFT_EX, 3); break;
            case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_RSHIFT_EQ, 3); break;
            default: RETURN_PUNC_EX (TOK_RSHIFT, 2);
          }
          break;
        }
        case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_GREATER_EQ, 2); break;
        default: RETURN_PUNC (TOK_GREATER);
      }
      break;
    }
    case LIT_CHAR_EQUALS:
    {
      if (LA (1) == LIT_CHAR_EQUALS)
      {
        IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_TRIPLE_EQ, TOK_DOUBLE_EQ, 2);
      }
      else
      {
        RETURN_PUNC (TOK_EQ);
      }
      break;
    }
    case LIT_CHAR_EXCLAMATION:
    {
      if (LA (1) == LIT_CHAR_EQUALS)
      {
        IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_NOT_DOUBLE_EQ, TOK_NOT_EQ, 2);
      }
      else
      {
        RETURN_PUNC (TOK_NOT);
      }
      break;
    }
  }

  PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal character", lit_utf8_iterator_get_pos (&src_iter));
} /* lexer_parse_token */
Exemple #11
0
/**
 * Parse and construct lexer token
 *
 * Note:
 *      Currently, lexer token doesn't fully correspond to Token, defined in ECMA-262, v5, 7.5.
 *      For example, there is no new-line token type in the token definition of ECMA-262 v5.
 *
 * Note:
 *      For Lexer alone, it is hard to find out a token is whether a regexp or a division.
 *      Parser must set maybe_regexp to true if a regexp is expected.
 *      Otherwise, a division is expected.
 *
 * @return constructed token
 */
static token
lexer_parse_token (bool maybe_regexp, /**< read '/' as regexp? */
                   bool *out_is_preceed_by_new_lines_p, /**< out: is constructed token preceded by newlines? */
                   bool is_strict) /**< flag, indicating whether current code is in strict mode code */

{
  JERRY_ASSERT (is_token_parse_in_progress == false);

  *out_is_preceed_by_new_lines_p = lexer_skip_whitespace_and_comments ();

  ecma_char_t c = LA (0);

  /* ECMA-262 v5, 7.6, Identifier */
  if (lexer_is_char_can_be_identifier_start (c))
  {
    return lexer_parse_identifier_or_keyword (is_strict);
  }

  /* ECMA-262 v5, 7.8.3, Numeric literal */
  if (lit_char_is_decimal_digit (c)
      || (c == LIT_CHAR_DOT
          && lit_char_is_decimal_digit (LA (1))))
  {
    return lexer_parse_number (is_strict);
  }

  if (c == LIT_CHAR_NULL)
  {
    return create_token (TOK_EOF, 0);
  }

  if (c == LIT_CHAR_SINGLE_QUOTE
      || c == LIT_CHAR_DOUBLE_QUOTE)
  {
    return lexer_parse_string ();
  }

  if (c == LIT_CHAR_SLASH && maybe_regexp)
  {
    return lexer_parse_regexp ();
  }

  /* ECMA-262 v5, 7.7, Punctuator */
  switch (c)
  {
    case LIT_CHAR_LEFT_BRACE:
    {
      RETURN_PUNC (TOK_OPEN_BRACE);
      break;
    }
    case LIT_CHAR_RIGHT_BRACE:
    {
      RETURN_PUNC (TOK_CLOSE_BRACE);
      break;
    }
    case LIT_CHAR_LEFT_PAREN:
    {
      RETURN_PUNC (TOK_OPEN_PAREN);
      break;
    }
    case LIT_CHAR_RIGHT_PAREN:
    {
      RETURN_PUNC (TOK_CLOSE_PAREN);
      break;
    }
    case LIT_CHAR_LEFT_SQUARE:
    {
      RETURN_PUNC (TOK_OPEN_SQUARE);
      break;
    }
    case LIT_CHAR_RIGHT_SQUARE:
    {
      RETURN_PUNC (TOK_CLOSE_SQUARE);
      break;
    }
    case LIT_CHAR_DOT:
    {
      RETURN_PUNC (TOK_DOT);
      break;
    }
    case LIT_CHAR_SEMICOLON:
    {
      RETURN_PUNC (TOK_SEMICOLON);
      break;
    }
    case LIT_CHAR_COMMA:
    {
      RETURN_PUNC (TOK_COMMA);
      break;
    }
    case LIT_CHAR_TILDE:
    {
      RETURN_PUNC (TOK_COMPL);
      break;
    }
    case LIT_CHAR_COLON:
    {
      RETURN_PUNC (TOK_COLON);
      break;
    }
    case LIT_CHAR_QUESTION:
    {
      RETURN_PUNC (TOK_QUERY);
      break;
    }

    case LIT_CHAR_ASTERISK:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_MULT_EQ, TOK_MULT);
      break;
    }
    case LIT_CHAR_SLASH:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_DIV_EQ, TOK_DIV);
      break;
    }
    case LIT_CHAR_CIRCUMFLEX:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_XOR_EQ, TOK_XOR);
      break;
    }
    case LIT_CHAR_PERCENT:
    {
      IF_LA_IS (LIT_CHAR_EQUALS, TOK_MOD_EQ, TOK_MOD);
      break;
    }
    case LIT_CHAR_PLUS:
    {
      IF_LA_IS_OR (LIT_CHAR_PLUS, TOK_DOUBLE_PLUS, LIT_CHAR_EQUALS, TOK_PLUS_EQ, TOK_PLUS);
      break;
    }
    case LIT_CHAR_MINUS:
    {
      IF_LA_IS_OR (LIT_CHAR_MINUS, TOK_DOUBLE_MINUS, LIT_CHAR_EQUALS, TOK_MINUS_EQ, TOK_MINUS);
      break;
    }
    case LIT_CHAR_AMPERSAND:
    {
      IF_LA_IS_OR (LIT_CHAR_AMPERSAND, TOK_DOUBLE_AND, LIT_CHAR_EQUALS, TOK_AND_EQ, TOK_AND);
      break;
    }
    case LIT_CHAR_VLINE:
    {
      IF_LA_IS_OR (LIT_CHAR_VLINE, TOK_DOUBLE_OR, LIT_CHAR_EQUALS, TOK_OR_EQ, TOK_OR);
      break;
    }
    case LIT_CHAR_LESS_THAN:
    {
      switch (LA (1))
      {
        case LIT_CHAR_LESS_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_LSHIFT_EQ, TOK_LSHIFT, 2); break;
        case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_LESS_EQ, 2); break;
        default: RETURN_PUNC (TOK_LESS);
      }
      break;
    }
    case LIT_CHAR_GREATER_THAN:
    {
      switch (LA (1))
      {
        case LIT_CHAR_GREATER_THAN:
        {
          switch (LA (2))
          {
            case LIT_CHAR_GREATER_THAN: IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_RSHIFT_EX_EQ, TOK_RSHIFT_EX, 3); break;
            case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_RSHIFT_EQ, 3); break;
            default: RETURN_PUNC_EX (TOK_RSHIFT, 2);
          }
          break;
        }
        case LIT_CHAR_EQUALS: RETURN_PUNC_EX (TOK_GREATER_EQ, 2); break;
        default: RETURN_PUNC (TOK_GREATER);
      }
      break;
    }
    case LIT_CHAR_EQUALS:
    {
      if (LA (1) == LIT_CHAR_EQUALS)
      {
        IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_TRIPLE_EQ, TOK_DOUBLE_EQ, 2);
      }
      else
      {
        RETURN_PUNC (TOK_EQ);
      }
      break;
    }
    case LIT_CHAR_EXCLAMATION:
    {
      if (LA (1) == LIT_CHAR_EQUALS)
      {
        IF_LA_N_IS (LIT_CHAR_EQUALS, TOK_NOT_DOUBLE_EQ, TOK_NOT_EQ, 2);
      }
      else
      {
        RETURN_PUNC (TOK_NOT);
      }
      break;
    }
  }

  PARSE_ERROR (JSP_EARLY_ERROR_SYNTAX, "Illegal character", lit_utf8_iterator_get_pos (&src_iter));
} /* lexer_parse_token */
Exemple #12
0
int
main (int __attr_unused___ argc,
      char __attr_unused___ **argv)
{
  TEST_INIT ();

  mem_init ();

  lit_utf8_byte_t utf8_string[max_bytes_in_string];
  ecma_char_t code_units[max_code_units_in_string];
  lit_utf8_iterator_pos_t saved_positions[max_code_units_in_string];

  for (int i = 0; i < test_iters; i++)
  {
    lit_utf8_size_t utf8_string_size = (i == 0) ? 0 : (lit_utf8_size_t) (rand () % max_bytes_in_string);
    ecma_length_t length = generate_utf8_string (utf8_string, utf8_string_size);

    JERRY_ASSERT (lit_utf8_string_length (utf8_string, utf8_string_size) == length);

    lit_utf8_iterator_t iter = lit_utf8_iterator_create (utf8_string, utf8_string_size);
    ecma_length_t calculated_length = 0;

    ecma_length_t code_units_count = 0;
    while (!lit_utf8_iterator_is_eos (&iter))
    {
      code_units[code_units_count] = lit_utf8_iterator_peek_next (&iter);
      saved_positions[code_units_count] = lit_utf8_iterator_get_pos (&iter);
      code_units_count++;
      calculated_length++;

      lit_utf8_iterator_incr (&iter);
    }

    JERRY_ASSERT (length == calculated_length);

    if (code_units_count > 0)
    {
      for (int j = 0; j < test_subiters; j++)
      {
        ecma_length_t index = (ecma_length_t) rand () % code_units_count;
        lit_utf8_iterator_seek (&iter, saved_positions[index]);
        JERRY_ASSERT (lit_utf8_iterator_peek_next (&iter) == code_units[index]);
        JERRY_ASSERT (lit_utf8_iterator_get_index (&iter) == index);
      }
    }

    lit_utf8_iterator_seek_eos (&iter);
    while (!lit_utf8_iterator_is_bos (&iter))
    {
      JERRY_ASSERT (code_units_count > 0);
      calculated_length--;
      JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_peek_prev (&iter));
      lit_utf8_iterator_decr (&iter);
    }

    JERRY_ASSERT (calculated_length == 0);

    while (!lit_utf8_iterator_is_eos (&iter))
    {
      ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
      JERRY_ASSERT (code_unit == code_units[calculated_length]);
      calculated_length++;
    }

    JERRY_ASSERT (length == calculated_length);

    while (!lit_utf8_iterator_is_bos (&iter))
    {
      JERRY_ASSERT (code_units_count > 0);
      calculated_length--;
      JERRY_ASSERT (code_units[calculated_length] == lit_utf8_iterator_read_prev (&iter));
    }

    JERRY_ASSERT (calculated_length == 0);
  }

  /* Overlong-encoded code point */
  lit_utf8_byte_t invalid_utf8_string_1[] = {0xC0, 0x82};
  JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_1, sizeof (invalid_utf8_string_1)));

  /* Overlong-encoded code point */
  lit_utf8_byte_t invalid_utf8_string_2[] = {0xE0, 0x80, 0x81};
  JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_2, sizeof (invalid_utf8_string_2)));

  /* Pair of surrogates: 0xD901 0xDFF0 which encode Unicode character 0x507F0 */
  lit_utf8_byte_t invalid_utf8_string_3[] = {0xED, 0xA4, 0x81, 0xED, 0xBF, 0xB0};
  JERRY_ASSERT (!lit_is_utf8_string_valid (invalid_utf8_string_3, sizeof (invalid_utf8_string_3)));

  /* Isolated high surrogate 0xD901 */
  lit_utf8_byte_t valid_utf8_string_1[] = {0xED, 0xA4, 0x81};
  JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_1, sizeof (valid_utf8_string_1)));

  /* 4-byte long utf-8 character - Unicode character 0x507F0 */
  lit_utf8_byte_t valid_utf8_string_2[] = {0xF1, 0x90, 0x9F, 0xB0};
  JERRY_ASSERT (lit_is_utf8_string_valid (valid_utf8_string_2, sizeof (valid_utf8_string_2)));

  lit_utf8_byte_t buf[] = {0xF0, 0x90, 0x8D, 0x88};
  lit_code_point_t code_point;
  lit_utf8_size_t bytes_count = lit_read_code_point_from_utf8 (buf, sizeof (buf), &code_point);
  JERRY_ASSERT (bytes_count == 4);
  JERRY_ASSERT (code_point == 0x10348);

  lit_utf8_byte_t res_buf[3];
  lit_utf8_size_t res_size;

  res_size = lit_code_unit_to_utf8 (0x73, res_buf);
  JERRY_ASSERT (res_size == 1);
  JERRY_ASSERT (res_buf[0] == 0x73);

  res_size = lit_code_unit_to_utf8 (0x41A, res_buf);
  JERRY_ASSERT (res_size == 2);
  JERRY_ASSERT (res_buf[0] == 0xD0);
  JERRY_ASSERT (res_buf[1] == 0x9A);

  res_size = lit_code_unit_to_utf8 (0xD7FF, res_buf);
  JERRY_ASSERT (res_size == 3);
  JERRY_ASSERT (res_buf[0] == 0xED);
  JERRY_ASSERT (res_buf[1] == 0x9F);
  JERRY_ASSERT (res_buf[2] == 0xBF);

  lit_utf8_byte_t bytes[] = {0xF0, 0x90, 0x8D, 0x88};
  lit_utf8_iterator_t iter = lit_utf8_iterator_create (bytes, sizeof (bytes));
  ecma_char_t code_unit = lit_utf8_iterator_read_next (&iter);
  JERRY_ASSERT (!lit_utf8_iterator_is_eos (&iter));
  JERRY_ASSERT (code_unit == 0xD800);
  code_unit = lit_utf8_iterator_read_next (&iter);
  JERRY_ASSERT (lit_utf8_iterator_is_eos (&iter));
  JERRY_ASSERT (code_unit == 0xDF48);

  mem_finalize (true);
  return 0;
}