Ejemplo n.º 1
0
/**
 * Parse RegExp flags (global, ignoreCase, multiline)
 *
 * See also: ECMA-262 v5, 15.10.4.1
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
ecma_completion_value_t
re_parse_regexp_flags (ecma_string_t *flags_str_p, /**< Input string with flags */
                       uint8_t *flags_p) /**< Output: parsed flag bits */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  lit_utf8_size_t flags_str_size = ecma_string_get_size (flags_str_p);
  MEM_DEFINE_LOCAL_ARRAY (flags_start_p, flags_str_size, lit_utf8_byte_t);

  ecma_string_to_utf8_string (flags_str_p, flags_start_p, (ssize_t) flags_str_size);
  lit_utf8_iterator_t iter = lit_utf8_iterator_create (flags_start_p, flags_str_size);

  while (!lit_utf8_iterator_is_eos (&iter)
         && ecma_is_completion_value_empty (ret_value))
  {
    switch (lit_utf8_iterator_read_next (&iter))
    {
      case 'g':
      {
        if (*flags_p & RE_FLAG_GLOBAL)
        {
          ret_value = ecma_raise_syntax_error ("Invalid RegExp flags.");
        }
        *flags_p |= RE_FLAG_GLOBAL;
        break;
      }
      case 'i':
      {
        if (*flags_p & RE_FLAG_IGNORE_CASE)
        {
          ret_value = ecma_raise_syntax_error ("Invalid RegExp flags.");
        }
        *flags_p |= RE_FLAG_IGNORE_CASE;
        break;
      }
      case 'm':
      {
        if (*flags_p & RE_FLAG_MULTILINE)
        {
          ret_value = ecma_raise_syntax_error ("Invalid RegExp flags.");
        }
        *flags_p |= RE_FLAG_MULTILINE;
        break;
      }
      default:
      {
        ret_value = ecma_raise_syntax_error ("Invalid RegExp flags.");
        break;
      }
    }
  }

  MEM_FINALIZE_LOCAL_ARRAY (flags_start_p);

  return ret_value;
} /* re_parse_regexp_flags  */
Ejemplo n.º 2
0
/**
 * Read the input pattern and parse the next token for the RegExp compiler
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
ecma_completion_value_t
re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context */
                     re_token_t *out_token_p) /**< out: output token */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
  {
    out_token_p->type = RE_TOK_EOF;
    return ret_value;
  }

  ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);

  switch (ch)
  {
    case LIT_CHAR_VLINE:
    {
      out_token_p->type = RE_TOK_ALTERNATIVE;
      break;
    }
    case LIT_CHAR_CIRCUMFLEX:
    {
      out_token_p->type = RE_TOK_ASSERT_START;
      break;
    }
    case LIT_CHAR_DOLLAR_SIGN:
    {
      out_token_p->type = RE_TOK_ASSERT_END;
      break;
    }
    case LIT_CHAR_DOT:
    {
      out_token_p->type = RE_TOK_PERIOD;
      ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
      break;
    }
    case LIT_CHAR_BACKSLASH:
    {
      if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
      {
        return ecma_raise_syntax_error ("invalid regular experssion");
      }

      out_token_p->type = RE_TOK_CHAR;
      ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);

      if (ch == LIT_CHAR_LOWERCASE_B)
      {
        out_token_p->type = RE_TOK_ASSERT_WORD_BOUNDARY;
      }
      else if (ch == LIT_CHAR_UPPERCASE_B)
      {
        out_token_p->type = RE_TOK_ASSERT_NOT_WORD_BOUNDARY;
      }
      else if (ch == LIT_CHAR_LOWERCASE_F)
      {
        out_token_p->value = LIT_CHAR_FF;
      }
      else if (ch == LIT_CHAR_LOWERCASE_N)
      {
        out_token_p->value = LIT_CHAR_LF;
      }
      else if (ch == LIT_CHAR_LOWERCASE_T)
      {
        out_token_p->value = LIT_CHAR_TAB;
      }
      else if (ch == LIT_CHAR_LOWERCASE_R)
      {
        out_token_p->value = LIT_CHAR_CR;
      }
      else if (ch == LIT_CHAR_LOWERCASE_V)
      {
        out_token_p->value = LIT_CHAR_VTAB;
      }
      else if (ch == LIT_CHAR_LOWERCASE_C)
      {
        if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p)
        {
          ch = *parser_ctx_p->input_curr_p;

          if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
              || (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END))
          {
            out_token_p->value = (ch % 32);
            parser_ctx_p->input_curr_p++;
          }
          else
          {
            out_token_p->value = LIT_CHAR_BACKSLASH;
            parser_ctx_p->input_curr_p--;
          }
        }
        else
        {
          out_token_p->value = LIT_CHAR_BACKSLASH;
          parser_ctx_p->input_curr_p--;
        }
      }
      else if (ch == LIT_CHAR_LOWERCASE_X
               && re_hex_lookup (parser_ctx_p, 2))
      {
        lit_code_point_t code_point;

        if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point))
        {
          return ecma_raise_syntax_error ("decode error");
        }

        parser_ctx_p->input_curr_p += 2;
        out_token_p->value = code_point;
      }
      else if (ch == LIT_CHAR_LOWERCASE_U
               && re_hex_lookup (parser_ctx_p, 4))
      {
        lit_code_point_t code_point;

        if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point))
        {
          return ecma_raise_syntax_error ("decode error");
        }

        parser_ctx_p->input_curr_p += 4;
        out_token_p->value = code_point;
      }
      else if (ch == LIT_CHAR_LOWERCASE_D)
      {
        out_token_p->type = RE_TOK_DIGIT;
        break;
      }
      else if (ch == LIT_CHAR_UPPERCASE_D)
      {
        out_token_p->type = RE_TOK_NOT_DIGIT;
        break;
      }
      else if (ch == LIT_CHAR_LOWERCASE_S)
      {
        out_token_p->type = RE_TOK_WHITE;
        break;
      }
      else if (ch == LIT_CHAR_UPPERCASE_S)
      {
        out_token_p->type = RE_TOK_NOT_WHITE;
        break;
      }
      else if (ch == LIT_CHAR_LOWERCASE_W)
      {
        out_token_p->type = RE_TOK_WORD_CHAR;
        break;
      }
      else if (ch == LIT_CHAR_UPPERCASE_W)
      {
        out_token_p->type = RE_TOK_NOT_WORD_CHAR;
        break;
      }
      else if (lit_char_is_decimal_digit (ch))
      {
        if (ch == LIT_CHAR_0)
        {
          if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
              && lit_char_is_decimal_digit (*parser_ctx_p->input_curr_p))
          {
            return ecma_raise_syntax_error ("RegExp escape pattern error.");
          }

          out_token_p->value = LIT_UNICODE_CODE_POINT_NULL;
        }
        else
        {
          if (parser_ctx_p->num_of_groups == -1)
          {
            re_count_num_of_groups (parser_ctx_p);
          }

          if (parser_ctx_p->num_of_groups)
          {
            parser_ctx_p->input_curr_p--;
            uint32_t number = 0;
            int index = 0;

            do
            {
              if (index >= RE_MAX_RE_DECESC_DIGITS)
              {
                ret_value = ecma_raise_syntax_error ("RegExp escape pattern error: decimal escape too long.");
                return ret_value;
              }
              if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
              {
                break;
              }

              ecma_char_t digit = *parser_ctx_p->input_curr_p++;

              if (!lit_char_is_decimal_digit (digit))
              {
                parser_ctx_p->input_curr_p--;
                break;
              }
              number = number * 10 + lit_char_hex_to_int (digit);
              index++;
            }
            while (true);

            if ((int) number <= parser_ctx_p->num_of_groups)
            {
              out_token_p->type = RE_TOK_BACKREFERENCE;
            }
            else
            /* Invalid backreference, fallback to octal */
            {
              /* Rewind to start of number. */
              parser_ctx_p->input_curr_p -= index;

              /* Try to reparse as octal. */
              ecma_char_t digit = *parser_ctx_p->input_curr_p;

              if (!lit_char_is_octal_digit (digit))
              {
                /* Not octal, keep digit character value. */
                number = digit;
                parser_ctx_p->input_curr_p++;
              }
              else
              {
                number = re_parse_octal (parser_ctx_p);
              }
            }
            out_token_p->value = number;
          }
          else
          /* Invalid backreference, fallback to octal if possible */
          {
            if (!lit_char_is_octal_digit (ch))
            {
              /* Not octal, keep character value. */
              out_token_p->value = ch;
            }
            else
            {
              parser_ctx_p->input_curr_p--;
              out_token_p->value = re_parse_octal (parser_ctx_p);
            }
          }
        }
      }
      else
      {
        out_token_p->value = ch;
      }

      ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
      break;
    }
    case LIT_CHAR_LEFT_PAREN:
    {
      if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
      {
        return ecma_raise_syntax_error ("Unterminated group");
      }

      if (*parser_ctx_p->input_curr_p == LIT_CHAR_QUESTION)
      {
        parser_ctx_p->input_curr_p++;
        if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
        {
          return ecma_raise_syntax_error ("Invalid group");
        }

        ch = *parser_ctx_p->input_curr_p++;

        if (ch == LIT_CHAR_EQUALS)
        {
          /* (?= */
          out_token_p->type = RE_TOK_ASSERT_START_POS_LOOKAHEAD;
        }
        else if (ch == LIT_CHAR_EXCLAMATION)
        {
          /* (?! */
          out_token_p->type = RE_TOK_ASSERT_START_NEG_LOOKAHEAD;
        }
        else if (ch == LIT_CHAR_COLON)
        {
          /* (?: */
          out_token_p->type = RE_TOK_START_NON_CAPTURE_GROUP;
        }
        else
        {
          return ecma_raise_syntax_error ("Invalid group");
        }
      }
      else
      {
        /* ( */
        out_token_p->type = RE_TOK_START_CAPTURE_GROUP;
      }
      break;
    }
    case LIT_CHAR_RIGHT_PAREN:
    {
      out_token_p->type = RE_TOK_END_GROUP;
      ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
      break;
    }
    case LIT_CHAR_LEFT_SQUARE:
    {
      out_token_p->type = RE_TOK_START_CHAR_CLASS;

      if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
      {
        return ecma_raise_syntax_error ("invalid character class");
      }

      if (*parser_ctx_p->input_curr_p == LIT_CHAR_CIRCUMFLEX)
      {
        out_token_p->type = RE_TOK_START_INV_CHAR_CLASS;
        parser_ctx_p->input_curr_p++;
      }

      break;
    }
    case LIT_CHAR_QUESTION:
    case LIT_CHAR_ASTERISK:
    case LIT_CHAR_PLUS:
    case LIT_CHAR_LEFT_BRACE:
    {
      return ecma_raise_syntax_error ("Invalid RegExp token.");
    }
    case LIT_CHAR_NULL:
    {
      out_token_p->type = RE_TOK_EOF;
      break;
    }
    default:
    {
      out_token_p->type = RE_TOK_CHAR;
      out_token_p->value = ch;
      ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
      break;
    }
  }

  return ret_value;
} /* re_parse_next_token */
Ejemplo n.º 3
0
/**
 * Read the input pattern and parse the range of character class
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
ecma_completion_value_t
re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
                     re_char_class_callback append_char_class, /**< callback function,
                                                                *   which adds the char-ranges
                                                                *   to the bytecode */
                     void *re_ctx_p, /**< regexp compiler context */
                     re_token_t *out_token_p) /**< out: output token */
{
  re_token_type_t token_type = ((re_compiler_ctx_t *) re_ctx_p)->current_token.type;
  out_token_p->qmax = out_token_p->qmin = 1;
  uint32_t start = RE_CHAR_UNDEF;
  bool is_range = false;
  parser_ctx_p->num_of_classes = 0;

  if (lit_utf8_peek_prev (parser_ctx_p->input_curr_p) != LIT_CHAR_LEFT_SQUARE)
  {
    lit_utf8_decr (&parser_ctx_p->input_curr_p);
    lit_utf8_decr (&parser_ctx_p->input_curr_p);
  }

  do
  {
    if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
    {
      return ecma_raise_syntax_error ("invalid character class, end of string");
    }

    uint32_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);

    if (ch == LIT_CHAR_RIGHT_SQUARE)
    {
      if (start != RE_CHAR_UNDEF)
      {
        append_char_class (re_ctx_p, start, start);
      }
      break;
    }
    else if (ch == LIT_CHAR_MINUS)
    {
      if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
      {
        return ecma_raise_syntax_error ("invalid character class, end of string after '-'");
      }

      if (start != RE_CHAR_UNDEF
          && !is_range
          && *parser_ctx_p->input_curr_p != LIT_CHAR_RIGHT_SQUARE)
      {
        is_range = true;
        continue;
      }
    }
    else if (ch == LIT_CHAR_BACKSLASH)
    {
      if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
      {
        return ecma_raise_syntax_error ("invalid character class, end of string after '\\'");
      }

      ch = *parser_ctx_p->input_curr_p++;

      if (ch == LIT_CHAR_LOWERCASE_B)
      {
        ch = LIT_CHAR_BS;
      }
      else if (ch == LIT_CHAR_LOWERCASE_F)
      {
        ch = LIT_CHAR_FF;
      }
      else if (ch == LIT_CHAR_LOWERCASE_N)
      {
        ch = LIT_CHAR_LF;
      }
      else if (ch == LIT_CHAR_LOWERCASE_T)
      {
        ch = LIT_CHAR_TAB;
      }
      else if (ch == LIT_CHAR_LOWERCASE_R)
      {
        ch = LIT_CHAR_CR;
      }
      else if (ch == LIT_CHAR_LOWERCASE_V)
      {
        ch = LIT_CHAR_VTAB;
      }
      else if (ch == LIT_CHAR_LOWERCASE_C)
      {
        if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p)
        {
          ch = *parser_ctx_p->input_curr_p;

          if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
              || (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
              || (ch >= LIT_CHAR_0 && ch <= LIT_CHAR_9))
          {
            /* See ECMA-262 v5, 15.10.2.10 (Point 3) */
            ch = (ch % 32);
            parser_ctx_p->input_curr_p++;
          }
          else
          {
            ch = LIT_CHAR_LOWERCASE_C;
          }
        }
      }
      else if (ch == LIT_CHAR_LOWERCASE_X)
      {
        lit_code_point_t code_point;

        if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point))
        {
          return ecma_raise_syntax_error ("invalid character class, end of string after '\\x'");
        }

        parser_ctx_p->input_curr_p += 2;
        append_char_class (re_ctx_p, code_point, code_point);
      }
      else if (ch == LIT_CHAR_LOWERCASE_U)
      {
        lit_code_point_t code_point;

        if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point))
        {
          return ecma_raise_syntax_error ("invalid character class, end of string after '\\u'");
        }

        parser_ctx_p->input_curr_p += 4;
        append_char_class (re_ctx_p, code_point, code_point);
      }
      else if (ch == LIT_CHAR_LOWERCASE_D)
      {
        /* See ECMA-262 v5, 15.10.2.12 */
        append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_BEGIN, LIT_CHAR_ASCII_DIGITS_END);
        ch = RE_CHAR_UNDEF;
      }
      else if (ch == LIT_CHAR_UPPERCASE_D)
      {
        /* See ECMA-262 v5, 15.10.2.12 */
        append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_ASCII_DIGITS_BEGIN - 1);
        append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_END + 1, LIT_UTF16_CODE_UNIT_MAX);
        ch = RE_CHAR_UNDEF;
      }
      else if (ch == LIT_CHAR_LOWERCASE_S)
      {
        /* See ECMA-262 v5, 15.10.2.12 */
        append_char_class (re_ctx_p, LIT_CHAR_TAB, LIT_CHAR_CR);
        append_char_class (re_ctx_p, LIT_CHAR_SP, LIT_CHAR_SP);
        append_char_class (re_ctx_p, LIT_CHAR_NBSP, LIT_CHAR_NBSP);
        append_char_class (re_ctx_p, 0x1680UL, 0x1680UL); /* Ogham Space Mark */
        append_char_class (re_ctx_p, 0x180EUL, 0x180EUL); /* Mongolian Vowel Separator */
        append_char_class (re_ctx_p, 0x2000UL, 0x200AUL); /* En Quad - Hair Space */
        append_char_class (re_ctx_p, LIT_CHAR_LS, LIT_CHAR_PS);
        append_char_class (re_ctx_p, 0x202FUL, 0x202FUL); /* Narrow No-Break Space */
        append_char_class (re_ctx_p, 0x205FUL, 0x205FUL); /* Medium Mathematical Space */
        append_char_class (re_ctx_p, 0x3000UL, 0x3000UL); /* Ideographic Space */
        append_char_class (re_ctx_p, LIT_CHAR_BOM, LIT_CHAR_BOM);
        ch = RE_CHAR_UNDEF;
      }
      else if (ch == LIT_CHAR_UPPERCASE_S)
      {
        /* See ECMA-262 v5, 15.10.2.12 */
        append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_TAB - 1);
        append_char_class (re_ctx_p, LIT_CHAR_CR + 1, LIT_CHAR_SP - 1);
        append_char_class (re_ctx_p, LIT_CHAR_SP + 1, LIT_CHAR_NBSP - 1);
        append_char_class (re_ctx_p, LIT_CHAR_NBSP + 1, 0x167FUL);
        append_char_class (re_ctx_p, 0x1681UL, 0x180DUL);
        append_char_class (re_ctx_p, 0x180FUL, 0x1FFFUL);
        append_char_class (re_ctx_p, 0x200BUL, LIT_CHAR_LS - 1);
        append_char_class (re_ctx_p, LIT_CHAR_PS + 1, 0x202EUL);
        append_char_class (re_ctx_p, 0x2030UL, 0x205EUL);
        append_char_class (re_ctx_p, 0x2060UL, 0x2FFFUL);
        append_char_class (re_ctx_p, 0x3001UL, LIT_CHAR_BOM - 1);
        append_char_class (re_ctx_p, LIT_CHAR_BOM + 1, LIT_UTF16_CODE_UNIT_MAX);
        ch = RE_CHAR_UNDEF;
      }
      else if (ch == LIT_CHAR_LOWERCASE_W)
      {
        /* See ECMA-262 v5, 15.10.2.12 */
        append_char_class (re_ctx_p, LIT_CHAR_0, LIT_CHAR_9);
        append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_A, LIT_CHAR_UPPERCASE_Z);
        append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE, LIT_CHAR_UNDERSCORE);
        append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_A, LIT_CHAR_LOWERCASE_Z);
        ch = RE_CHAR_UNDEF;
      }
      else if (ch == LIT_CHAR_UPPERCASE_W)
      {
        /* See ECMA-262 v5, 15.10.2.12 */
        append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_0 - 1);
        append_char_class (re_ctx_p, LIT_CHAR_9 + 1, LIT_CHAR_UPPERCASE_A - 1);
        append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_Z + 1, LIT_CHAR_UNDERSCORE - 1);
        append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE + 1, LIT_CHAR_LOWERCASE_A - 1);
        append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_Z + 1, LIT_UTF16_CODE_UNIT_MAX);
        ch = RE_CHAR_UNDEF;
      }
      else if (ch <= LIT_UTF16_CODE_UNIT_MAX
               && lit_char_is_octal_digit ((ecma_char_t) ch)
               && ch != LIT_CHAR_0)
      {
        parser_ctx_p->input_curr_p--;
        ch = re_parse_octal (parser_ctx_p);
      }
    } /* ch == LIT_CHAR_BACKSLASH */

    if (ch == RE_CHAR_UNDEF)
    {
      if (start != RE_CHAR_UNDEF)
      {
        if (is_range)
        {
          return ecma_raise_syntax_error ("invalid character class, invalid range");
        }
        else
        {
          append_char_class (re_ctx_p, start, start);
          start = RE_CHAR_UNDEF;
        }
      }
    }
    else
    {
      if (start != RE_CHAR_UNDEF)
      {
        if (is_range)
        {
          if (start > ch)
          {
            return ecma_raise_syntax_error ("invalid character class, wrong order");
          }
          else
          {
            append_char_class (re_ctx_p, start, ch);
            start = RE_CHAR_UNDEF;
            is_range = false;
          }
        }
        else
        {
          append_char_class (re_ctx_p, start, start);
          start = ch;
        }
      }
      else
      {
        start = ch;
      }
    }
  }
  while (token_type == RE_TOK_START_CHAR_CLASS || token_type == RE_TOK_START_INV_CHAR_CLASS);

  return re_parse_iterator (parser_ctx_p, out_token_p);
} /* re_parse_char_class */
Ejemplo n.º 4
0
/**
 * Parse RegExp iterators
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
static ecma_completion_value_t
re_parse_iterator (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context */
                   re_token_t *re_token_p) /**< out: output token */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  re_token_p->qmin = 1;
  re_token_p->qmax = 1;
  re_token_p->greedy = true;

  if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
  {
    return ret_value;
  }

  ecma_char_t ch = *parser_ctx_p->input_curr_p;

  switch (ch)
  {
    case LIT_CHAR_QUESTION:
    {
      parser_ctx_p->input_curr_p++;
      re_token_p->qmin = 0;
      re_token_p->qmax = 1;
      re_token_p->greedy = !re_parse_non_greedy_char (parser_ctx_p);
      break;
    }
    case LIT_CHAR_ASTERISK:
    {
      parser_ctx_p->input_curr_p++;
      re_token_p->qmin = 0;
      re_token_p->qmax = RE_ITERATOR_INFINITE;
      re_token_p->greedy = !re_parse_non_greedy_char (parser_ctx_p);
      break;
    }
    case LIT_CHAR_PLUS:
    {
      parser_ctx_p->input_curr_p++;
      re_token_p->qmin = 1;
      re_token_p->qmax = RE_ITERATOR_INFINITE;
      re_token_p->greedy = !re_parse_non_greedy_char (parser_ctx_p);
      break;
    }
    case LIT_CHAR_LEFT_BRACE:
    {
      parser_ctx_p->input_curr_p++;
      uint32_t qmin = 0;
      uint32_t qmax = RE_ITERATOR_INFINITE;
      uint32_t digits = 0;

      while (true)
      {
        if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
        {
          return ecma_raise_syntax_error ("invalid quantifier");
        }

        ch = *parser_ctx_p->input_curr_p++;

        if (lit_char_is_decimal_digit (ch))
        {
          if (digits >= ECMA_NUMBER_MAX_DIGITS)
          {
            return ecma_raise_syntax_error ("RegExp quantifier error: too many digits.");
          }
          digits++;
          qmin = qmin * 10 + lit_char_hex_to_int (ch);
        }
        else if (ch == LIT_CHAR_COMMA)
        {
          if (qmax != RE_ITERATOR_INFINITE)
          {
            return ecma_raise_syntax_error ("RegExp quantifier error: double comma.");
          }

          if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
          {
            return ecma_raise_syntax_error ("invalid quantifier");
          }

          if (*parser_ctx_p->input_curr_p == LIT_CHAR_RIGHT_BRACE)
          {
            if (digits == 0)
            {
              return ecma_raise_syntax_error ("RegExp quantifier error: missing digits.");
            }

            parser_ctx_p->input_curr_p++;
            re_token_p->qmin = qmin;
            re_token_p->qmax = RE_ITERATOR_INFINITE;
            break;
          }
          qmax = qmin;
          qmin = 0;
          digits = 0;
        }
        else if (ch == LIT_CHAR_RIGHT_BRACE)
        {
          if (digits == 0)
          {
            return ecma_raise_syntax_error ("RegExp quantifier error: missing digits.");
          }

          if (qmax != RE_ITERATOR_INFINITE)
          {
            re_token_p->qmin = qmax;
            re_token_p->qmax = qmin;
          }
          else
          {
            re_token_p->qmin = qmin;
            re_token_p->qmax = qmin;
          }

          break;
        }
        else
        {
          return ecma_raise_syntax_error ("RegExp quantifier error: unknown char.");
        }
      }

      re_token_p->greedy = !re_parse_non_greedy_char (parser_ctx_p);
      break;
    }
    default:
    {
      break;
    }
  }

  JERRY_ASSERT (ecma_is_completion_value_empty (ret_value));

  if (re_token_p->qmin > re_token_p->qmax)
  {
    ret_value = ecma_raise_syntax_error ("RegExp quantifier error: qmin > qmax.");
  }

  return ret_value;
} /* re_parse_iterator */
Ejemplo n.º 5
0
/**
 * The JSON object's 'parse' routine
 *
 * See also:
 *          ECMA-262 v5, 15.12.2
 *
 * @return ecma value
 *         Returned value must be freed with ecma_free_value.
 */
static ecma_value_t
ecma_builtin_json_parse (ecma_value_t this_arg __attr_unused___, /**< 'this' argument */
                         ecma_value_t arg1, /**< string argument */
                         ecma_value_t arg2) /**< reviver argument */
{
  ecma_value_t ret_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY);

  ECMA_TRY_CATCH (string,
                  ecma_op_to_string (arg1),
                  ret_value);

  ecma_string_t *string_p = ecma_get_string_from_value (string);
  ecma_length_t string_size = (uint32_t) ecma_string_get_size (string_p);
  size_t buffer_size = sizeof (lit_utf8_byte_t) * (string_size + 1);

  MEM_DEFINE_LOCAL_ARRAY (str_start_p, buffer_size, lit_utf8_byte_t);

  ssize_t sz = ecma_string_to_utf8_string (string_p, str_start_p, (ssize_t) buffer_size);
  JERRY_ASSERT (sz == (ssize_t) string_size);

  str_start_p[string_size] = LIT_BYTE_NULL;

  ecma_json_token_t token;
  token.current_p = str_start_p;
  token.end_p = str_start_p + string_size;

  ecma_value_t final_result = ecma_builtin_json_parse_value (&token);

  if (!ecma_is_value_undefined (final_result))
  {
    ecma_builtin_json_parse_next_token (&token);

    if (token.type != end_token)
    {
      ecma_free_value (final_result);
      final_result = ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED);
    }
  }

  if (ecma_is_value_undefined (final_result))
  {
    ret_value = ecma_raise_syntax_error ("");
  }
  else
  {
    if (ecma_op_is_callable (arg2))
    {
      ecma_object_t *object_p = ecma_op_create_object_object_noarg ();
      ecma_string_t *name_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
      ecma_property_t *property_p = ecma_create_named_data_property (object_p,
                                                                     name_p,
                                                                     true,
                                                                     true,
                                                                     true);

      ecma_named_data_property_assign_value (object_p, property_p, final_result);
      ecma_free_value (final_result);

      ret_value = ecma_builtin_json_walk (ecma_get_object_from_value (arg2),
                                          object_p,
                                          name_p);
      ecma_deref_object (object_p);
      ecma_deref_ecma_string (name_p);
    }
    else
    {
      ret_value = final_result;
    }
  }

  MEM_FINALIZE_LOCAL_ARRAY (str_start_p);

  ECMA_FINALIZE (string);
  return ret_value;
} /* ecma_builtin_json_parse */
Ejemplo n.º 6
0
/**
 * Compilation of RegExp bytecode
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
ecma_completion_value_t
re_compile_bytecode (re_bytecode_t **out_bytecode_p, /**< out:pointer to bytecode */
                     ecma_string_t *pattern_str_p, /**< pattern */
                     uint8_t flags) /**< flags */
{
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
  re_compiler_ctx_t re_ctx;
  re_ctx.flags = flags;
  re_ctx.highest_backref = 0;
  re_ctx.num_of_non_captures = 0;

  re_bytecode_ctx_t bc_ctx;
  bc_ctx.block_start_p = NULL;
  bc_ctx.block_end_p = NULL;
  bc_ctx.current_p = NULL;

  re_ctx.bytecode_ctx_p = &bc_ctx;

  lit_utf8_size_t pattern_str_size = ecma_string_get_size (pattern_str_p);
  MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_size, lit_utf8_byte_t);

  ecma_string_to_utf8_string (pattern_str_p, pattern_start_p, (ssize_t) pattern_str_size);
  lit_utf8_iterator_t iter = lit_utf8_iterator_create (pattern_start_p, pattern_str_size);

  re_parser_ctx_t parser_ctx;
  parser_ctx.iter = iter;
  parser_ctx.num_of_groups = -1;
  re_ctx.parser_ctx_p = &parser_ctx;

  /* 1. Parse RegExp pattern */
  re_ctx.num_of_captures = 1;
  re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);

  ECMA_TRY_CATCH (empty, re_parse_alternative (&re_ctx, true), ret_value);

  /* 2. Check for invalid backreference */
  if (re_ctx.highest_backref >= re_ctx.num_of_captures)
  {
    ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
  }
  else
  {
    re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
    re_append_opcode (&bc_ctx, RE_OP_EOF);

    /* 3. Insert extra informations for bytecode header */
    re_insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_non_captures);
    re_insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_captures * 2);
    re_insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.flags);
  }
  ECMA_FINALIZE (empty);

  MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p);

  if (!ecma_is_completion_value_empty (ret_value))
  {
    /* Compilation failed, free bytecode. */
    mem_heap_free_block (bc_ctx.block_start_p);
    *out_bytecode_p = NULL;
  }
  else
  {
    /* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
    JERRY_ASSERT (bc_ctx.block_start_p != NULL);
    *out_bytecode_p = bc_ctx.block_start_p;
  }

#ifdef JERRY_ENABLE_LOG
  re_dump_bytecode (&bc_ctx);
#endif

  return ret_value;
} /* re_compile_bytecode */
Ejemplo n.º 7
0
/**
 * Parse alternatives
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
static ecma_completion_value_t
re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
                      bool expect_eof) /**< expect end of file */
{
  uint32_t idx;
  re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  uint32_t alterantive_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);

  while (true)
  {
    ECMA_TRY_CATCH (empty,
                    re_parse_next_token (re_ctx_p->parser_ctx_p,
                                         &(re_ctx_p->current_token)),
                    ret_value);
    ECMA_FINALIZE (empty);

    if (!ecma_is_completion_value_empty (ret_value))
    {
      return ret_value; /* error */
    }
    uint32_t new_atom_start_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);

    switch (re_ctx_p->current_token.type)
    {
      case RE_TOK_START_CAPTURE_GROUP:
      {
        idx = re_ctx_p->num_of_captures++;
        JERRY_DDLOG ("Compile a capture group start (idx: %d)\n", idx);

        ret_value = re_parse_alternative (re_ctx_p, false);

        if (ecma_is_completion_value_empty (ret_value))
        {
          re_insert_into_group (re_ctx_p, new_atom_start_offset, idx, true);
        }
        else
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_START_NON_CAPTURE_GROUP:
      {
        idx = re_ctx_p->num_of_non_captures++;
        JERRY_DDLOG ("Compile a non-capture group start (idx: %d)\n", idx);

        ret_value = re_parse_alternative (re_ctx_p, false);

        if (ecma_is_completion_value_empty (ret_value))
        {
          re_insert_into_group (re_ctx_p, new_atom_start_offset, idx, false);
        }
        else
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_CHAR:
      {
        JERRY_DDLOG ("Compile character token: %c, qmin: %d, qmax: %d\n",
                     re_ctx_p->current_token.value, re_ctx_p->current_token.qmin, re_ctx_p->current_token.qmax);

        re_append_opcode (bc_ctx_p, RE_OP_CHAR);
        re_append_u32 (bc_ctx_p, re_canonicalize ((ecma_char_t) re_ctx_p->current_token.value,
                                                   re_ctx_p->flags & RE_FLAG_IGNORE_CASE));

        if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
        {
          re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
        }
        break;
      }
      case RE_TOK_PERIOD:
      {
        JERRY_DDLOG ("Compile a period\n");
        re_append_opcode (bc_ctx_p, RE_OP_PERIOD);

        if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
        {
          re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
        }
        break;
      }
      case RE_TOK_ALTERNATIVE:
      {
        JERRY_DDLOG ("Compile an alternative\n");
        re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
        re_append_opcode (bc_ctx_p, RE_OP_ALTERNATIVE);
        alterantive_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
        break;
      }
      case RE_TOK_ASSERT_START:
      {
        JERRY_DDLOG ("Compile a start assertion\n");
        re_append_opcode (bc_ctx_p, RE_OP_ASSERT_START);
        break;
      }
      case RE_TOK_ASSERT_END:
      {
        JERRY_DDLOG ("Compile an end assertion\n");
        re_append_opcode (bc_ctx_p, RE_OP_ASSERT_END);
        break;
      }
      case RE_TOK_ASSERT_WORD_BOUNDARY:
      {
        JERRY_DDLOG ("Compile a word boundary assertion\n");
        re_append_opcode (bc_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY);
        break;
      }
      case RE_TOK_ASSERT_NOT_WORD_BOUNDARY:
      {
        JERRY_DDLOG ("Compile a not word boundary assertion\n");
        re_append_opcode (bc_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY);
        break;
      }
      case RE_TOK_ASSERT_START_POS_LOOKAHEAD:
      {
        JERRY_DDLOG ("Compile a positive lookahead assertion\n");
        idx = re_ctx_p->num_of_non_captures++;
        re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_POS);

        ret_value = re_parse_alternative (re_ctx_p, false);

        if (ecma_is_completion_value_empty (ret_value))
        {
          re_append_opcode (bc_ctx_p, RE_OP_MATCH);

          re_insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
        }
        else
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_ASSERT_START_NEG_LOOKAHEAD:
      {
        JERRY_DDLOG ("Compile a negative lookahead assertion\n");
        idx = re_ctx_p->num_of_non_captures++;
        re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_NEG);

        ret_value = re_parse_alternative (re_ctx_p, false);

        if (ecma_is_completion_value_empty (ret_value))
        {
          re_append_opcode (bc_ctx_p, RE_OP_MATCH);

          re_insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
        }
        else
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_BACKREFERENCE:
      {
        uint32_t backref = (uint32_t) re_ctx_p->current_token.value;
        idx = re_ctx_p->num_of_non_captures++;

        if (backref > re_ctx_p->highest_backref)
        {
          re_ctx_p->highest_backref = backref;
        }

        JERRY_DDLOG ("Compile a backreference: %d\n", backref);
        re_append_opcode (bc_ctx_p, RE_OP_BACKREFERENCE);
        re_append_u32 (bc_ctx_p, backref);

        re_insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
        break;
      }
      case RE_TOK_DIGIT:
      case RE_TOK_NOT_DIGIT:
      case RE_TOK_WHITE:
      case RE_TOK_NOT_WHITE:
      case RE_TOK_WORD_CHAR:
      case RE_TOK_NOT_WORD_CHAR:
      case RE_TOK_START_CHAR_CLASS:
      case RE_TOK_START_INV_CHAR_CLASS:
      {
        JERRY_DDLOG ("Compile a character class\n");
        re_append_opcode (bc_ctx_p,
                          re_ctx_p->current_token.type == RE_TOK_START_INV_CHAR_CLASS
                                                       ? RE_OP_INV_CHAR_CLASS
                                                       : RE_OP_CHAR_CLASS);
        uint32_t offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);

        ECMA_TRY_CATCH (empty,
                        re_parse_char_class (re_ctx_p->parser_ctx_p,
                                             re_append_char_class,
                                             re_ctx_p,
                                             &(re_ctx_p->current_token)),
                        ret_value);
        re_insert_u32 (bc_ctx_p, offset, re_ctx_p->parser_ctx_p->num_of_classes);

        if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
        {
          re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
        }

        ECMA_FINALIZE (empty);

        if (ecma_is_completion_value_throw (ret_value))
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_END_GROUP:
      {
        JERRY_DDLOG ("Compile a group end\n");

        if (expect_eof)
        {
          ret_value = ecma_raise_syntax_error ("Unexpected end of paren.");
        }
        else
        {
          re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
        }

        return ret_value;
      }
      case RE_TOK_EOF:
      {
        if (!expect_eof)
        {
          ret_value = ecma_raise_syntax_error ("Unexpected end of pattern.");
        }
        else
        {
          re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
        }

        return ret_value;
      }
      default:
      {
        ret_value = ecma_raise_syntax_error ("Unexpected RegExp token.");
        return ret_value;
      }
    }
  }

  JERRY_UNREACHABLE ();
  return ret_value;
} /* re_parse_alternative */