literal_t *parse_regexp(parser_ctx_t *ctx) { const WCHAR *re, *flags_ptr; BOOL in_class = FALSE; DWORD re_len, flags; literal_t *ret; HRESULT hres; TRACE("\n"); while(*--ctx->ptr != '/'); /* Simple regexp pre-parser; '/' if used in char class does not terminate regexp literal */ re = ++ctx->ptr; while(ctx->ptr < ctx->end) { if(*ctx->ptr == '\\') { if(++ctx->ptr == ctx->end) break; }else if(in_class) { if(*ctx->ptr == '\n') break; if(*ctx->ptr == ']') in_class = FALSE; }else { if(*ctx->ptr == '/') break; if(*ctx->ptr == '[') in_class = TRUE; } ctx->ptr++; } if(ctx->ptr == ctx->end || *ctx->ptr != '/') { WARN("pre-parsing failed\n"); return NULL; } re_len = ctx->ptr-re; flags_ptr = ++ctx->ptr; while(ctx->ptr < ctx->end && isalnumW(*ctx->ptr)) ctx->ptr++; hres = parse_regexp_flags(flags_ptr, ctx->ptr-flags_ptr, &flags); if(FAILED(hres)) return NULL; ret = parser_alloc(ctx, sizeof(literal_t)); ret->type = LT_REGEXP; ret->u.regexp.str = re; ret->u.regexp.str_len = re_len; ret->u.regexp.flags = flags; return ret; }
HRESULT create_regexp_var(script_ctx_t *ctx, jsval_t src_arg, jsval_t *flags_arg, jsdisp_t **ret) { unsigned flags, opt_len = 0; const WCHAR *opt = NULL; jsstr_t *src; HRESULT hres; if(is_object_instance(src_arg)) { jsdisp_t *obj; obj = iface_to_jsdisp((IUnknown*)get_object(src_arg)); if(obj) { if(is_class(obj, JSCLASS_REGEXP)) { RegExpInstance *regexp = (RegExpInstance*)obj; hres = create_regexp(ctx, regexp->str, regexp->jsregexp->flags, ret); jsdisp_release(obj); return hres; } jsdisp_release(obj); } } if(!is_string(src_arg)) { FIXME("src_arg = %s\n", debugstr_jsval(src_arg)); return E_NOTIMPL; } src = get_string(src_arg); if(flags_arg) { jsstr_t *opt_str; if(!is_string(*flags_arg)) { FIXME("unimplemented for %s\n", debugstr_jsval(*flags_arg)); return E_NOTIMPL; } opt_str = get_string(*flags_arg); opt = jsstr_flatten(opt_str); if(!opt) return E_OUTOFMEMORY; opt_len = jsstr_length(opt_str); } hres = parse_regexp_flags(opt, opt_len, &flags); if(FAILED(hres)) return hres; return create_regexp(ctx, src, flags, ret); }
literal_t *parse_regexp(parser_ctx_t *ctx) { const WCHAR *re, *flags_ptr; DWORD re_len, flags; literal_t *ret; HRESULT hres; TRACE("\n"); while(*ctx->ptr != '/') ctx->ptr--; re = ++ctx->ptr; while(ctx->ptr < ctx->end && *ctx->ptr != '/') { if(*ctx->ptr++ == '\\' && ctx->ptr < ctx->end) ctx->ptr++; } if(ctx->ptr == ctx->end) { WARN("unexpected end of file\n"); return NULL; } re_len = ctx->ptr-re; flags_ptr = ++ctx->ptr; while(ctx->ptr < ctx->end && isalnumW(*ctx->ptr)) ctx->ptr++; hres = parse_regexp_flags(flags_ptr, ctx->ptr-flags_ptr, &flags); if(FAILED(hres)) return NULL; ret = parser_alloc(ctx, sizeof(literal_t)); ret->type = LT_REGEXP; ret->u.regexp.str = re; ret->u.regexp.str_len = re_len; ret->u.regexp.flags = flags; return ret; }
void duk_regexp_compile(duk_hthread *thr) { duk_context *ctx = (duk_context *) thr; duk_re_compiler_ctx re_ctx; duk_lexer_point lex_point; duk_hstring *h_pattern; duk_hstring *h_flags; duk_hbuffer_dynamic *h_buffer; DUK_ASSERT(thr != NULL); DUK_ASSERT(ctx != NULL); /* * Args validation */ /* TypeError if fails */ h_pattern = duk_require_hstring(ctx, -2); h_flags = duk_require_hstring(ctx, -1); /* * Create normalized 'source' property (E5 Section 15.10.3). */ /* [ ... pattern flags ] */ create_escaped_source(thr, -2); /* [ ... pattern flags escaped_source ] */ /* * Init compilation context */ duk_push_dynamic_buffer(ctx, 0); h_buffer = (duk_hbuffer_dynamic *) duk_require_hbuffer(ctx, -1); DUK_ASSERT(DUK_HBUFFER_HAS_DYNAMIC(h_buffer)); /* [ ... pattern flags escaped_source buffer ] */ DUK_MEMSET(&re_ctx, 0, sizeof(re_ctx)); DUK_LEXER_INITCTX(&re_ctx.lex); /* duplicate zeroing, expect for (possible) NULL inits */ re_ctx.thr = thr; re_ctx.lex.thr = thr; re_ctx.lex.input = DUK_HSTRING_GET_DATA(h_pattern); re_ctx.lex.input_length = DUK_HSTRING_GET_BYTELEN(h_pattern); re_ctx.buf = h_buffer; re_ctx.recursion_limit = DUK_RE_COMPILE_RECURSION_LIMIT; re_ctx.re_flags = parse_regexp_flags(thr, h_flags); DUK_DDPRINT("regexp compiler ctx initialized, flags=0x%08x, recursion_limit=%d", (unsigned int) re_ctx.re_flags, (int) re_ctx.recursion_limit); /* * Init lexer */ lex_point.offset = 0; /* expensive init, just want to fill window */ lex_point.line = 1; DUK_LEXER_SETPOINT(&re_ctx.lex, &lex_point); /* * Compilation */ DUK_DPRINT("starting regexp compilation"); append_u32(&re_ctx, DUK_REOP_SAVE); append_u32(&re_ctx, 0); (void) parse_disjunction(&re_ctx, 1); /* 1 = expect eof */ append_u32(&re_ctx, DUK_REOP_SAVE); append_u32(&re_ctx, 1); append_u32(&re_ctx, DUK_REOP_MATCH); DUK_DPRINT("regexp bytecode size (before header) is %d bytes", (int) DUK_HBUFFER_GET_SIZE(re_ctx.buf)); /* * Check for invalid backreferences; note that it is NOT an error * to back-reference a capture group which has not yet been introduced * in the pattern (as in /\1(foo)/); in fact, the backreference will * always match! It IS an error to back-reference a capture group * which will never be introduced in the pattern. Thus, we can check * for such references only after parsing is complete. */ if (re_ctx.highest_backref > re_ctx.captures) { DUK_ERROR(thr, DUK_ERR_SYNTAX_ERROR, "invalid backreference(s)"); } /* * Emit compiled regexp header: flags, ncaptures * (insertion order inverted on purpose) */ insert_u32(&re_ctx, 0, (re_ctx.captures + 1) * 2); insert_u32(&re_ctx, 0, re_ctx.re_flags); DUK_DPRINT("regexp bytecode size (after header) is %d bytes", (int) DUK_HBUFFER_GET_SIZE(re_ctx.buf)); DUK_DDDPRINT("compiled regexp: %!xO", re_ctx.buf); /* [ ... pattern flags escaped_source buffer ] */ duk_to_string(ctx, -1); /* coerce to string */ /* [ ... pattern flags escaped_source bytecode ] */ /* * Finalize stack */ duk_remove(ctx, -4); /* -> [ ... flags escaped_source bytecode ] */ duk_remove(ctx, -3); /* -> [ ... escaped_source bytecode ] */ DUK_DPRINT("regexp compilation successful, bytecode: %!T, escaped source: %!T", duk_get_tval(ctx, -1), duk_get_tval(ctx, -2)); }