Example #1
0
/**
 * Insert a new bytecode to the bytecode container
 */
void
re_bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
                         size_t offset, /**< distance from the start of the container */
                         uint8_t *bytecode_p, /**< input bytecode */
                         size_t length) /**< length of input */
{
    JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE);

    uint8_t *current_p = bc_ctx_p->current_p;
    if (current_p + length > bc_ctx_p->block_end_p)
    {
        re_realloc_regexp_bytecode_block (bc_ctx_p);
    }

    uint8_t *src_p = bc_ctx_p->block_start_p + offset;
    if ((re_get_bytecode_length (bc_ctx_p) - offset) > 0)
    {
        uint8_t *dest_p = src_p + length;
        uint8_t *tmp_block_start_p;
        tmp_block_start_p = (uint8_t *) jmem_heap_alloc_block (re_get_bytecode_length (bc_ctx_p) - offset);
        memcpy (tmp_block_start_p, src_p, (size_t) (re_get_bytecode_length (bc_ctx_p) - offset));
        memcpy (dest_p, tmp_block_start_p, (size_t) (re_get_bytecode_length (bc_ctx_p) - offset));
        jmem_heap_free_block (tmp_block_start_p, re_get_bytecode_length (bc_ctx_p) - offset);
    }
    memcpy (src_p, bytecode_p, length);

    bc_ctx_p->current_p += length;
} /* re_bytecode_list_insert */
Example #2
0
/**
 * Enclose the given bytecode to a group
 */
static void
re_insert_into_group (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
                      uint32_t group_start_offset, /**< offset of group start */
                      uint32_t idx, /**< index of group */
                      bool is_capturable) /**< is capturable group */
{
  uint32_t qmin, qmax;
  re_opcode_t start_opcode = re_get_start_opcode_type (re_ctx_p, is_capturable);
  re_opcode_t end_opcode = re_get_end_opcode_type (re_ctx_p, is_capturable);
  uint32_t start_head_offset_len;

  qmin = re_ctx_p->current_token.qmin;
  qmax = re_ctx_p->current_token.qmax;
  JERRY_ASSERT (qmin <= qmax);

  start_head_offset_len = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
  re_insert_u32 (re_ctx_p->bytecode_ctx_p, group_start_offset, idx);
  re_insert_opcode (re_ctx_p->bytecode_ctx_p, group_start_offset, start_opcode);
  start_head_offset_len = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p) - start_head_offset_len;
  re_append_opcode (re_ctx_p->bytecode_ctx_p, end_opcode);
  re_append_u32 (re_ctx_p->bytecode_ctx_p, idx);
  re_append_u32 (re_ctx_p->bytecode_ctx_p, qmin);
  re_append_u32 (re_ctx_p->bytecode_ctx_p, qmax);

  group_start_offset += start_head_offset_len;
  re_append_jump_offset (re_ctx_p->bytecode_ctx_p,
                         re_get_bytecode_length (re_ctx_p->bytecode_ctx_p) - group_start_offset);

  if (start_opcode != RE_OP_CAPTURE_GROUP_START && start_opcode != RE_OP_NON_CAPTURE_GROUP_START)
  {
    re_insert_u32 (re_ctx_p->bytecode_ctx_p,
                   group_start_offset,
                   re_get_bytecode_length (re_ctx_p->bytecode_ctx_p) - group_start_offset);
  }
} /* re_insert_into_group */
Example #3
0
/**
 * Insert simple atom iterator
 */
static void
re_insert_simple_iterator (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
                           uint32_t new_atom_start_offset) /**< atom start offset */
{
  uint32_t atom_code_length;
  uint32_t offset;
  uint32_t qmin, qmax;

  qmin = re_ctx_p->current_token.qmin;
  qmax = re_ctx_p->current_token.qmax;
  JERRY_ASSERT (qmin <= qmax);

  /* FIXME: optimize bytecode length. Store 0 rather than INF */

  re_append_opcode (re_ctx_p->bytecode_ctx_p, RE_OP_MATCH);   /* complete 'sub atom' */
  uint32_t bytecode_length = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
  atom_code_length = (uint32_t) (bytecode_length - new_atom_start_offset);

  offset = new_atom_start_offset;
  re_insert_u32 (re_ctx_p->bytecode_ctx_p, offset, atom_code_length);
  re_insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmax);
  re_insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmin);
  if (re_ctx_p->current_token.greedy)
  {
    re_insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_GREEDY_ITERATOR);
  }
  else
  {
    re_insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_NON_GREEDY_ITERATOR);
  }
} /* re_insert_simple_iterator */
Example #4
0
/**
 * Enclose the given bytecode to a group and inster jump value
 */
static void
re_insert_into_group_with_jump (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
                                uint32_t group_start_offset, /**< offset of group start */
                                uint32_t idx, /**< index of group */
                                bool is_capturable) /**< is capturable group */
{
  re_insert_u32 (re_ctx_p->bytecode_ctx_p,
                 group_start_offset,
                 re_get_bytecode_length (re_ctx_p->bytecode_ctx_p) - group_start_offset);
  re_insert_into_group (re_ctx_p, group_start_offset, idx, is_capturable);
} /* re_insert_into_group_with_jump */
Example #5
0
/**
 * Parse alternatives
 *
 * @return completion value
 *         Returned value must be freed with ecma_free_completion_value
 */
static ecma_completion_value_t
re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
                      bool expect_eof) /**< expect end of file */
{
  uint32_t idx;
  re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
  ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

  uint32_t alterantive_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);

  while (true)
  {
    ECMA_TRY_CATCH (empty,
                    re_parse_next_token (re_ctx_p->parser_ctx_p,
                                         &(re_ctx_p->current_token)),
                    ret_value);
    ECMA_FINALIZE (empty);

    if (!ecma_is_completion_value_empty (ret_value))
    {
      return ret_value; /* error */
    }
    uint32_t new_atom_start_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);

    switch (re_ctx_p->current_token.type)
    {
      case RE_TOK_START_CAPTURE_GROUP:
      {
        idx = re_ctx_p->num_of_captures++;
        JERRY_DDLOG ("Compile a capture group start (idx: %d)\n", idx);

        ret_value = re_parse_alternative (re_ctx_p, false);

        if (ecma_is_completion_value_empty (ret_value))
        {
          re_insert_into_group (re_ctx_p, new_atom_start_offset, idx, true);
        }
        else
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_START_NON_CAPTURE_GROUP:
      {
        idx = re_ctx_p->num_of_non_captures++;
        JERRY_DDLOG ("Compile a non-capture group start (idx: %d)\n", idx);

        ret_value = re_parse_alternative (re_ctx_p, false);

        if (ecma_is_completion_value_empty (ret_value))
        {
          re_insert_into_group (re_ctx_p, new_atom_start_offset, idx, false);
        }
        else
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_CHAR:
      {
        JERRY_DDLOG ("Compile character token: %c, qmin: %d, qmax: %d\n",
                     re_ctx_p->current_token.value, re_ctx_p->current_token.qmin, re_ctx_p->current_token.qmax);

        re_append_opcode (bc_ctx_p, RE_OP_CHAR);
        re_append_u32 (bc_ctx_p, re_canonicalize ((ecma_char_t) re_ctx_p->current_token.value,
                                                   re_ctx_p->flags & RE_FLAG_IGNORE_CASE));

        if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
        {
          re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
        }
        break;
      }
      case RE_TOK_PERIOD:
      {
        JERRY_DDLOG ("Compile a period\n");
        re_append_opcode (bc_ctx_p, RE_OP_PERIOD);

        if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
        {
          re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
        }
        break;
      }
      case RE_TOK_ALTERNATIVE:
      {
        JERRY_DDLOG ("Compile an alternative\n");
        re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
        re_append_opcode (bc_ctx_p, RE_OP_ALTERNATIVE);
        alterantive_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
        break;
      }
      case RE_TOK_ASSERT_START:
      {
        JERRY_DDLOG ("Compile a start assertion\n");
        re_append_opcode (bc_ctx_p, RE_OP_ASSERT_START);
        break;
      }
      case RE_TOK_ASSERT_END:
      {
        JERRY_DDLOG ("Compile an end assertion\n");
        re_append_opcode (bc_ctx_p, RE_OP_ASSERT_END);
        break;
      }
      case RE_TOK_ASSERT_WORD_BOUNDARY:
      {
        JERRY_DDLOG ("Compile a word boundary assertion\n");
        re_append_opcode (bc_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY);
        break;
      }
      case RE_TOK_ASSERT_NOT_WORD_BOUNDARY:
      {
        JERRY_DDLOG ("Compile a not word boundary assertion\n");
        re_append_opcode (bc_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY);
        break;
      }
      case RE_TOK_ASSERT_START_POS_LOOKAHEAD:
      {
        JERRY_DDLOG ("Compile a positive lookahead assertion\n");
        idx = re_ctx_p->num_of_non_captures++;
        re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_POS);

        ret_value = re_parse_alternative (re_ctx_p, false);

        if (ecma_is_completion_value_empty (ret_value))
        {
          re_append_opcode (bc_ctx_p, RE_OP_MATCH);

          re_insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
        }
        else
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_ASSERT_START_NEG_LOOKAHEAD:
      {
        JERRY_DDLOG ("Compile a negative lookahead assertion\n");
        idx = re_ctx_p->num_of_non_captures++;
        re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_NEG);

        ret_value = re_parse_alternative (re_ctx_p, false);

        if (ecma_is_completion_value_empty (ret_value))
        {
          re_append_opcode (bc_ctx_p, RE_OP_MATCH);

          re_insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
        }
        else
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_BACKREFERENCE:
      {
        uint32_t backref = (uint32_t) re_ctx_p->current_token.value;
        idx = re_ctx_p->num_of_non_captures++;

        if (backref > re_ctx_p->highest_backref)
        {
          re_ctx_p->highest_backref = backref;
        }

        JERRY_DDLOG ("Compile a backreference: %d\n", backref);
        re_append_opcode (bc_ctx_p, RE_OP_BACKREFERENCE);
        re_append_u32 (bc_ctx_p, backref);

        re_insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false);
        break;
      }
      case RE_TOK_DIGIT:
      case RE_TOK_NOT_DIGIT:
      case RE_TOK_WHITE:
      case RE_TOK_NOT_WHITE:
      case RE_TOK_WORD_CHAR:
      case RE_TOK_NOT_WORD_CHAR:
      case RE_TOK_START_CHAR_CLASS:
      case RE_TOK_START_INV_CHAR_CLASS:
      {
        JERRY_DDLOG ("Compile a character class\n");
        re_append_opcode (bc_ctx_p,
                          re_ctx_p->current_token.type == RE_TOK_START_INV_CHAR_CLASS
                                                       ? RE_OP_INV_CHAR_CLASS
                                                       : RE_OP_CHAR_CLASS);
        uint32_t offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);

        ECMA_TRY_CATCH (empty,
                        re_parse_char_class (re_ctx_p->parser_ctx_p,
                                             re_append_char_class,
                                             re_ctx_p,
                                             &(re_ctx_p->current_token)),
                        ret_value);
        re_insert_u32 (bc_ctx_p, offset, re_ctx_p->parser_ctx_p->num_of_classes);

        if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
        {
          re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
        }

        ECMA_FINALIZE (empty);

        if (ecma_is_completion_value_throw (ret_value))
        {
          return ret_value; /* error */
        }
        break;
      }
      case RE_TOK_END_GROUP:
      {
        JERRY_DDLOG ("Compile a group end\n");

        if (expect_eof)
        {
          ret_value = ecma_raise_syntax_error ("Unexpected end of paren.");
        }
        else
        {
          re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
        }

        return ret_value;
      }
      case RE_TOK_EOF:
      {
        if (!expect_eof)
        {
          ret_value = ecma_raise_syntax_error ("Unexpected end of pattern.");
        }
        else
        {
          re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
        }

        return ret_value;
      }
      default:
      {
        ret_value = ecma_raise_syntax_error ("Unexpected RegExp token.");
        return ret_value;
      }
    }
  }

  JERRY_UNREACHABLE ();
  return ret_value;
} /* re_parse_alternative */