/** * Recursive function for RegExp matching. Tests for a regular expression * match and returns a MatchResult value. * * See also: * ECMA-262 v5, 15.10.2.1 * * @return completion value * Returned value must be freed with ecma_free_completion_value */ static ecma_completion_value_t re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */ re_bytecode_t *bc_p, /**< pointer to the current RegExp bytecode */ lit_utf8_iterator_t iter, /**< input string iterator */ lit_utf8_iterator_t *out_iter_p) /**< Output: matching substring iterator */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); re_opcode_t op; while ((op = re_get_opcode (&bc_p))) { switch (op) { case RE_OP_MATCH: { JERRY_DDLOG ("Execute RE_OP_MATCH: match\n"); *out_iter_p = iter; ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); return ret_value; /* match */ } case RE_OP_CHAR: { if (lit_utf8_iterator_is_eos (&iter)) { return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE; ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */ ecma_char_t ch2 = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase); JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2); if (ch1 != ch2) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_PERIOD: { if (lit_utf8_iterator_is_eos (&iter)) { return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } ecma_char_t ch = lit_utf8_iterator_read_next (&iter); JERRY_DDLOG ("Period matching '.' to %d: ", (uint32_t) ch); if (lit_char_is_line_terminator (ch)) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_ASSERT_START: { JERRY_DDLOG ("Execute RE_OP_ASSERT_START: "); if ((iter.buf_p + iter.buf_pos.offset) <= re_ctx_p->input_start_p) { JERRY_DDLOG ("match\n"); break; } if (!(re_ctx_p->flags & RE_FLAG_MULTILINE)) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } if (lit_char_is_line_terminator (lit_utf8_iterator_peek_prev (&iter))) { JERRY_DDLOG ("match\n"); break; } JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_ASSERT_END: { JERRY_DDLOG ("Execute RE_OP_ASSERT_END: "); if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p) { JERRY_DDLOG ("match\n"); break; /* tail merge */ } if (!(re_ctx_p->flags & RE_FLAG_MULTILINE)) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } if (lit_char_is_line_terminator (lit_utf8_iterator_peek_next (&iter))) { JERRY_DDLOG ("match\n"); break; /* tail merge */ } JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_ASSERT_WORD_BOUNDARY: case RE_OP_ASSERT_NOT_WORD_BOUNDARY: { bool is_wordchar_left, is_wordchar_right; if ((iter.buf_p + iter.buf_pos.offset) <= re_ctx_p->input_start_p) { is_wordchar_left = false; /* not a wordchar */ } else { is_wordchar_left = lit_char_is_word_char (lit_utf8_iterator_peek_prev (&iter)); } if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p) { is_wordchar_right = false; /* not a wordchar */ } else { is_wordchar_right = lit_char_is_word_char (lit_utf8_iterator_peek_next (&iter)); } if (op == RE_OP_ASSERT_WORD_BOUNDARY) { JERRY_DDLOG ("Execute RE_OP_ASSERT_WORD_BOUNDARY: "); if (is_wordchar_left == is_wordchar_right) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } else { JERRY_ASSERT (op == RE_OP_ASSERT_NOT_WORD_BOUNDARY); JERRY_DDLOG ("Execute RE_OP_ASSERT_NOT_WORD_BOUNDARY: "); if (is_wordchar_left != is_wordchar_right) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_LOOKAHEAD_POS: case RE_OP_LOOKAHEAD_NEG: { ecma_completion_value_t match_value = ecma_make_empty_completion_value (); lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); uint32_t array_size = re_ctx_p->num_of_captures + re_ctx_p->num_of_non_captures; MEM_DEFINE_LOCAL_ARRAY (saved_bck_p, array_size, lit_utf8_iterator_t); size_t size = (size_t) (array_size) * sizeof (lit_utf8_iterator_t); memcpy (saved_bck_p, re_ctx_p->saved_p, size); do { uint32_t offset = re_get_value (&bc_p); if (!sub_iter.buf_p) { match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_completion_value_throw (match_value)) { break; } } bc_p += offset; } while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); if (!ecma_is_completion_value_throw (match_value)) { JERRY_DDLOG ("Execute RE_OP_LOOKAHEAD_POS/NEG: "); ecma_free_completion_value (match_value); if ((op == RE_OP_LOOKAHEAD_POS && sub_iter.buf_p) || (op == RE_OP_LOOKAHEAD_NEG && !sub_iter.buf_p)) { JERRY_DDLOG ("match\n"); match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); } else { JERRY_DDLOG ("fail\n"); match_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } if (!ecma_is_completion_value_throw (match_value)) { if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; } else { JERRY_ASSERT (ecma_is_value_boolean (match_value)); /* restore saved */ memcpy (re_ctx_p->saved_p, saved_bck_p, size); } } MEM_FINALIZE_LOCAL_ARRAY (saved_bck_p); return match_value; } case RE_OP_CHAR_CLASS: case RE_OP_INV_CHAR_CLASS: { uint32_t num_of_ranges; bool is_match; JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, "); if (lit_utf8_iterator_is_eos (&iter)) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE; ecma_char_t curr_ch = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase); num_of_ranges = re_get_value (&bc_p); is_match = false; while (num_of_ranges) { ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase); ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase); JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ", num_of_ranges, ch1, ch2, curr_ch); if (curr_ch >= ch1 && curr_ch <= ch2) { /* We must read all the ranges from bytecode. */ is_match = true; } num_of_ranges--; } if (op == RE_OP_CHAR_CLASS) { if (!is_match) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } else { JERRY_ASSERT (op == RE_OP_INV_CHAR_CLASS); if (is_match) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_BACKREFERENCE: { uint32_t backref_idx; backref_idx = re_get_value (&bc_p); JERRY_DDLOG ("Execute RE_OP_BACKREFERENCE (idx: %d): ", backref_idx); backref_idx *= 2; /* backref n -> saved indices [n*2, n*2+1] */ JERRY_ASSERT (backref_idx >= 2 && backref_idx + 1 < re_ctx_p->num_of_captures); if (!re_ctx_p->saved_p[backref_idx].buf_p || !re_ctx_p->saved_p[backref_idx + 1].buf_p) { JERRY_DDLOG ("match\n"); break; /* capture is 'undefined', always matches! */ } lit_utf8_iterator_t sub_iter = re_ctx_p->saved_p[backref_idx]; while (sub_iter.buf_pos.offset < re_ctx_p->saved_p[backref_idx + 1].buf_pos.offset) { ecma_char_t ch1, ch2; if ((iter.buf_p + iter.buf_pos.offset) >= re_ctx_p->input_end_p) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } ch1 = lit_utf8_iterator_read_next (&sub_iter); ch2 = lit_utf8_iterator_read_next (&iter); if (ch1 != ch2) { JERRY_DDLOG ("fail\n"); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } } JERRY_DDLOG ("match\n"); break; /* tail merge */ } case RE_OP_SAVE_AT_START: { re_bytecode_t *old_bc_p; JERRY_DDLOG ("Execute RE_OP_SAVE_AT_START\n"); lit_utf8_iterator_t old_start_p = re_ctx_p->saved_p[RE_GLOBAL_START_IDX]; re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = iter; do { uint32_t offset = re_get_value (&bc_p); lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } bc_p += offset; old_bc_p = bc_p; } while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); bc_p = old_bc_p; re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p; return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_SAVE_AND_MATCH: { JERRY_DDLOG ("End of pattern is reached: match\n"); re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = iter; *out_iter_p = iter; return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); /* match */ } case RE_OP_ALTERNATIVE: { /* * Alternatives should be jump over, when alternative opcode appears. */ uint32_t offset = re_get_value (&bc_p); JERRY_DDLOG ("Execute RE_OP_ALTERNATIVE"); bc_p += offset; while (*bc_p == RE_OP_ALTERNATIVE) { JERRY_DDLOG (", jump: %d"); bc_p++; offset = re_get_value (&bc_p); bc_p += offset; } JERRY_DDLOG ("\n"); break; /* tail merge */ } case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START: case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START: { /* * On non-greedy iterations we have to execute the bytecode * after the group first, if zero iteration is allowed. */ uint32_t start_idx, iter_idx, offset; lit_utf8_iterator_t old_start = lit_utf8_iterator_create (NULL, 0); lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); re_bytecode_t *old_bc_p; old_bc_p = bc_p; /* save the bytecode start position of the group start */ start_idx = re_get_value (&bc_p); offset = re_get_value (&bc_p); if (RE_IS_CAPTURE_GROUP (op)) { JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2); iter_idx = start_idx - 1; start_idx *= 2; old_start = re_ctx_p->saved_p[start_idx]; re_ctx_p->saved_p[start_idx] = iter; } else { JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures); iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1; start_idx += re_ctx_p->num_of_captures; } re_ctx_p->num_of_iterations_p[iter_idx] = 0; /* Jump all over to the end of the END opcode. */ bc_p += offset; /* Try to match after the close paren if zero is allowed */ ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } if (RE_IS_CAPTURE_GROUP (op)) { re_ctx_p->saved_p[start_idx] = old_start; } bc_p = old_bc_p; /* FALLTHRU */ } case RE_OP_CAPTURE_GROUP_START: case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START: case RE_OP_NON_CAPTURE_GROUP_START: case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START: { uint32_t start_idx, iter_idx, old_iteration_cnt, offset; lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); re_bytecode_t *old_bc_p; re_bytecode_t *end_bc_p = NULL; start_idx = re_get_value (&bc_p); if (op != RE_OP_CAPTURE_GROUP_START && op != RE_OP_NON_CAPTURE_GROUP_START) { offset = re_get_value (&bc_p); end_bc_p = bc_p + offset; } if (RE_IS_CAPTURE_GROUP (op)) { JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2); iter_idx = start_idx - 1; start_idx *= 2; } else { JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures); iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1; start_idx += re_ctx_p->num_of_captures; } lit_utf8_iterator_t old_start = re_ctx_p->saved_p[start_idx]; old_iteration_cnt = re_ctx_p->num_of_iterations_p[iter_idx]; re_ctx_p->saved_p[start_idx] = iter; re_ctx_p->num_of_iterations_p[iter_idx] = 0; do { offset = re_get_value (&bc_p); ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } bc_p += offset; old_bc_p = bc_p; } while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); bc_p = old_bc_p; re_ctx_p->num_of_iterations_p[iter_idx] = old_iteration_cnt; /* Try to match after the close paren if zero is allowed. */ if (op == RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START || op == RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START) { JERRY_ASSERT (end_bc_p); ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, end_bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } } re_ctx_p->saved_p[start_idx] = old_start; return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_CAPTURE_NON_GREEDY_GROUP_END: case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END: { uint32_t end_idx, iter_idx, min, max; re_bytecode_t *old_bc_p; /* * On non-greedy iterations we have to execute the bytecode * after the group first. Try to iterate only if it fails. */ old_bc_p = bc_p; /* save the bytecode start position of the group end */ end_idx = re_get_value (&bc_p); min = re_get_value (&bc_p); max = re_get_value (&bc_p); re_get_value (&bc_p); /* start offset */ if (RE_IS_CAPTURE_GROUP (op)) { JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2); iter_idx = end_idx - 1; end_idx = (end_idx * 2) + 1; } else { JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures); iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1; end_idx += re_ctx_p->num_of_captures; } re_ctx_p->num_of_iterations_p[iter_idx]++; if (re_ctx_p->num_of_iterations_p[iter_idx] >= min && re_ctx_p->num_of_iterations_p[iter_idx] <= max) { lit_utf8_iterator_t old_end = re_ctx_p->saved_p[end_idx]; re_ctx_p->saved_p[end_idx] = iter; lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } re_ctx_p->saved_p[end_idx] = old_end; } re_ctx_p->num_of_iterations_p[iter_idx]--; bc_p = old_bc_p; /* If non-greedy fails and try to iterate... */ /* FALLTHRU */ } case RE_OP_CAPTURE_GREEDY_GROUP_END: case RE_OP_NON_CAPTURE_GREEDY_GROUP_END: { uint32_t start_idx, end_idx, iter_idx, min, max, offset; lit_utf8_iterator_t old_start = lit_utf8_iterator_create (NULL, 0); lit_utf8_iterator_t old_end = lit_utf8_iterator_create (NULL, 0); lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); re_bytecode_t *old_bc_p; end_idx = re_get_value (&bc_p); min = re_get_value (&bc_p); max = re_get_value (&bc_p); offset = re_get_value (&bc_p); if (RE_IS_CAPTURE_GROUP (op)) { JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2); iter_idx = end_idx - 1; start_idx = end_idx * 2; end_idx = start_idx + 1; } else { JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures); iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1; end_idx += re_ctx_p->num_of_captures; start_idx = end_idx; } /* Check the empty iteration if the minimum number of iterations is reached. */ if (re_ctx_p->num_of_iterations_p[iter_idx] >= min && iter.buf_p == re_ctx_p->saved_p[start_idx].buf_p && iter.buf_pos.offset == re_ctx_p->saved_p[start_idx].buf_pos.offset) { return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } re_ctx_p->num_of_iterations_p[iter_idx]++; old_bc_p = bc_p; /* Save the bytecode end position of the END opcodes for matching after it. */ old_end = re_ctx_p->saved_p[end_idx]; re_ctx_p->saved_p[end_idx] = iter; if (re_ctx_p->num_of_iterations_p[iter_idx] < max) { bc_p -= offset; offset = re_get_value (&bc_p); old_start = re_ctx_p->saved_p[start_idx]; re_ctx_p->saved_p[start_idx] = iter; ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } re_ctx_p->saved_p[start_idx] = old_start; /* Try to match alternatives if any. */ bc_p += offset; while (*bc_p == RE_OP_ALTERNATIVE) { bc_p++; /* RE_OP_ALTERNATIVE */ offset = re_get_value (&bc_p); old_start = re_ctx_p->saved_p[start_idx]; re_ctx_p->saved_p[start_idx] = iter; ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } re_ctx_p->saved_p[start_idx] = old_start; bc_p += offset; } } if (re_ctx_p->num_of_iterations_p[iter_idx] >= min && re_ctx_p->num_of_iterations_p[iter_idx] <= max) { /* Try to match the rest of the bytecode. */ ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, old_bc_p, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } } /* restore if fails */ re_ctx_p->saved_p[end_idx] = old_end; re_ctx_p->num_of_iterations_p[iter_idx]--; return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_NON_GREEDY_ITERATOR: { uint32_t min, max, offset, num_of_iter; lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); min = re_get_value (&bc_p); max = re_get_value (&bc_p); offset = re_get_value (&bc_p); JERRY_DDLOG ("Non-greedy iterator, min=%lu, max=%lu, offset=%ld\n", (unsigned long) min, (unsigned long) max, (long) offset); num_of_iter = 0; while (num_of_iter <= max) { if (num_of_iter >= min) { ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } } ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (!ecma_is_value_true (match_value)) { if (ecma_is_completion_value_throw (match_value)) { return match_value; } break; } iter = sub_iter; num_of_iter++; } return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } case RE_OP_GREEDY_ITERATOR: { uint32_t min, max, offset, num_of_iter; lit_utf8_iterator_t sub_iter = lit_utf8_iterator_create (NULL, 0); min = re_get_value (&bc_p); max = re_get_value (&bc_p); offset = re_get_value (&bc_p); JERRY_DDLOG ("Greedy iterator, min=%lu, max=%lu, offset=%ld\n", (unsigned long) min, (unsigned long) max, (long) offset); num_of_iter = 0; while (num_of_iter < max) { ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, iter, &sub_iter); if (!ecma_is_value_true (match_value)) { if (ecma_is_completion_value_throw (match_value)) { return match_value; } break; } iter = sub_iter; num_of_iter++; } while (num_of_iter >= min) { ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, iter, &sub_iter); if (ecma_is_value_true (match_value)) { *out_iter_p = sub_iter; return match_value; /* match */ } else if (ecma_is_completion_value_throw (match_value)) { return match_value; } if (num_of_iter == min) { break; } lit_utf8_iterator_read_prev (&iter); num_of_iter--; } return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } default: { JERRY_DDLOG ("UNKNOWN opcode (%d)!\n", (uint32_t) op); return ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_COMMON)); } } } JERRY_UNREACHABLE (); return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ } /* regexp_match */
/** * RegExp bytecode dumper */ void re_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */ { re_compiled_code_t *compiled_code_p = (re_compiled_code_t *) bc_ctx_p->block_start_p; JERRY_DEBUG_MSG ("%d ", compiled_code_p->header.status_flags); JERRY_DEBUG_MSG ("%d ", compiled_code_p->num_of_captures); JERRY_DEBUG_MSG ("%d | ", compiled_code_p->num_of_non_captures); uint8_t *bytecode_p = (uint8_t *) (compiled_code_p + 1); re_opcode_t op; while ((op = re_get_opcode (&bytecode_p))) { switch (op) { case RE_OP_MATCH: { JERRY_DEBUG_MSG ("MATCH, "); break; } case RE_OP_CHAR: { JERRY_DEBUG_MSG ("CHAR "); JERRY_DEBUG_MSG ("%c, ", (char) re_get_char (&bytecode_p)); break; } case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START: { JERRY_DEBUG_MSG ("N"); /* FALLTHRU */ } case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START: { JERRY_DEBUG_MSG ("GZ_START "); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_CAPTURE_GROUP_START: { JERRY_DEBUG_MSG ("START "); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_CAPTURE_NON_GREEDY_GROUP_END: { JERRY_DEBUG_MSG ("N"); /* FALLTHRU */ } case RE_OP_CAPTURE_GREEDY_GROUP_END: { JERRY_DEBUG_MSG ("G_END "); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START: { JERRY_DEBUG_MSG ("N"); /* FALLTHRU */ } case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START: { JERRY_DEBUG_MSG ("GZ_NC_START "); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_NON_CAPTURE_GROUP_START: { JERRY_DEBUG_MSG ("NC_START "); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END: { JERRY_DEBUG_MSG ("N"); /* FALLTHRU */ } case RE_OP_NON_CAPTURE_GREEDY_GROUP_END: { JERRY_DEBUG_MSG ("G_NC_END "); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_SAVE_AT_START: { JERRY_DEBUG_MSG ("RE_START "); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_SAVE_AND_MATCH: { JERRY_DEBUG_MSG ("RE_END, "); break; } case RE_OP_GREEDY_ITERATOR: { JERRY_DEBUG_MSG ("GREEDY_ITERATOR "); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_NON_GREEDY_ITERATOR: { JERRY_DEBUG_MSG ("NON_GREEDY_ITERATOR "); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_PERIOD: { JERRY_DEBUG_MSG ("PERIOD "); break; } case RE_OP_ALTERNATIVE: { JERRY_DEBUG_MSG ("ALTERNATIVE "); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_ASSERT_START: { JERRY_DEBUG_MSG ("ASSERT_START "); break; } case RE_OP_ASSERT_END: { JERRY_DEBUG_MSG ("ASSERT_END "); break; } case RE_OP_ASSERT_WORD_BOUNDARY: { JERRY_DEBUG_MSG ("ASSERT_WORD_BOUNDARY "); break; } case RE_OP_ASSERT_NOT_WORD_BOUNDARY: { JERRY_DEBUG_MSG ("ASSERT_NOT_WORD_BOUNDARY "); break; } case RE_OP_LOOKAHEAD_POS: { JERRY_DEBUG_MSG ("LOOKAHEAD_POS "); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_LOOKAHEAD_NEG: { JERRY_DEBUG_MSG ("LOOKAHEAD_NEG "); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_BACKREFERENCE: { JERRY_DEBUG_MSG ("BACKREFERENCE "); JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p)); break; } case RE_OP_INV_CHAR_CLASS: { JERRY_DEBUG_MSG ("INV_"); /* FALLTHRU */ } case RE_OP_CHAR_CLASS: { JERRY_DEBUG_MSG ("CHAR_CLASS "); uint32_t num_of_class = re_get_value (&bytecode_p); JERRY_DEBUG_MSG ("%d", num_of_class); while (num_of_class) { JERRY_DEBUG_MSG (" %d", re_get_char (&bytecode_p)); JERRY_DEBUG_MSG ("-%d", re_get_char (&bytecode_p)); num_of_class--; } JERRY_DEBUG_MSG (", "); break; } default: { JERRY_DEBUG_MSG ("UNKNOWN(%d), ", (uint32_t) op); break; } } } JERRY_DEBUG_MSG ("EOF\n"); } /* re_dump_bytecode */