/* Overflow, relevant mainly when listlen is 16 bits. */ return 1; /* fail */ } new_lst = (duk_hstring **) DUK_REALLOC(heap, e->u.strlist, sizeof(duk_hstring *) * (e->listlen + 1)); if (new_lst == NULL) { return 1; /* fail */ } new_lst[e->listlen++] = h; e->u.strlist = new_lst; } return 0; } #endif /* DUK_USE_HEAPPTR16 */ #if defined(DUK_USE_HEAPPTR16) DUK_LOCAL duk_hstring *duk__find_matching_string_chain(duk_heap *heap, const duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash) { duk_small_uint_t slotidx; duk_strtab_entry *e; duk_uint16_t *lst; duk_size_t i, n; duk_uint16_t null16 = heap->heapptr_null16; DUK_ASSERT(heap != NULL); slotidx = strhash % DUK_STRTAB_CHAIN_SIZE; DUK_ASSERT(slotidx < DUK_STRTAB_CHAIN_SIZE); e = heap->strtable + slotidx; if (e->listlen == 0) { if (e->u.str16 != null16) { duk_hstring *h = (duk_hstring *) DUK_USE_HEAPPTR_DEC16(heap->heap_udata, e->u.str16); DUK_ASSERT(h != NULL); if (DUK_HSTRING_GET_BYTELEN(h) == blen && DUK_MEMCMP((const void *) str, (const void *) DUK_HSTRING_GET_DATA(h), (size_t) blen) == 0) { return h; } } } else { DUK_ASSERT(e->u.strlist16 != null16); lst = (duk_uint16_t *) DUK_USE_HEAPPTR_DEC16(heap->heap_udata, e->u.strlist16); DUK_ASSERT(lst != NULL); for (i = 0, n = e->listlen; i < n; i++) { if (lst[i] != null16) { duk_hstring *h = (duk_hstring *) DUK_USE_HEAPPTR_DEC16(heap->heap_udata, lst[i]); DUK_ASSERT(h != NULL); if (DUK_HSTRING_GET_BYTELEN(h) == blen && DUK_MEMCMP((const void *) str, (const void *) DUK_HSTRING_GET_DATA(h), (size_t) blen) == 0) { return h; } } } } return NULL; } #else /* DUK_USE_HEAPPTR16 */ DUK_LOCAL duk_hstring *duk__find_matching_string_chain(duk_heap *heap, const duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash) { duk_small_uint_t slotidx; duk_strtab_entry *e; duk_hstring **lst; duk_size_t i, n; DUK_ASSERT(heap != NULL); slotidx = strhash % DUK_STRTAB_CHAIN_SIZE; DUK_ASSERT(slotidx < DUK_STRTAB_CHAIN_SIZE); e = heap->strtable + slotidx; if (e->listlen == 0) { if (e->u.str != NULL && DUK_HSTRING_GET_BYTELEN(e->u.str) == blen && DUK_MEMCMP((const void *) str, (const void *) DUK_HSTRING_GET_DATA(e->u.str), (size_t) blen) == 0) { return e->u.str; } } else { DUK_ASSERT(e->u.strlist != NULL); lst = e->u.strlist; for (i = 0, n = e->listlen; i < n; i++) { if (lst[i] != NULL && DUK_HSTRING_GET_BYTELEN(lst[i]) == blen && DUK_MEMCMP((const void *) str, (const void *) DUK_HSTRING_GET_DATA(lst[i]), (size_t) blen) == 0) { return lst[i]; } } } return NULL; }
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_locale_compare(duk_context *ctx) { duk_hstring *h1; duk_hstring *h2; duk_size_t h1_len, h2_len, prefix_len; duk_small_int_t ret = 0; duk_small_int_t rc; /* The current implementation of localeCompare() is simply a codepoint * by codepoint comparison, implemented with a simple string compare * because UTF-8 should preserve codepoint ordering (assuming valid * shortest UTF-8 encoding). * * The specification requires that the return value must be related * to the sort order: e.g. negative means that 'this' comes before * 'that' in sort order. We assume an ascending sort order. */ /* XXX: could share code with duk_js_ops.c, duk_js_compare_helper */ h1 = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h1 != NULL); h2 = duk_to_hstring(ctx, 0); DUK_ASSERT(h2 != NULL); h1_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h1); h2_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h2); prefix_len = (h1_len <= h2_len ? h1_len : h2_len); /* Zero size compare not an issue with DUK_MEMCMP. */ rc = (duk_small_int_t) DUK_MEMCMP((const char *) DUK_HSTRING_GET_DATA(h1), (const char *) DUK_HSTRING_GET_DATA(h2), prefix_len); if (rc < 0) { ret = -1; goto done; } else if (rc > 0) { ret = 1; goto done; } /* prefix matches, lengths matter now */ if (h1_len > h2_len) { ret = 1; goto done; } else if (h1_len == h2_len) { DUK_ASSERT(ret == 0); goto done; } ret = -1; goto done; done: duk_push_int(ctx, (duk_int_t) ret); return 1; }
DUK_INTERNAL duk_small_int_t duk_js_string_compare(duk_hstring *h1, duk_hstring *h2) { /* * String comparison (E5 Section 11.8.5, step 4), which * needs to compare codepoint by codepoint. * * However, UTF-8 allows us to use strcmp directly: the shared * prefix will be encoded identically (UTF-8 has unique encoding) * and the first differing character can be compared with a simple * unsigned byte comparison (which strcmp does). * * This will not work properly for non-xutf-8 strings, but this * is not an issue for compliance. */ duk_size_t h1_len, h2_len, prefix_len; duk_small_int_t rc; DUK_ASSERT(h1 != NULL); DUK_ASSERT(h2 != NULL); h1_len = DUK_HSTRING_GET_BYTELEN(h1); h2_len = DUK_HSTRING_GET_BYTELEN(h2); prefix_len = (h1_len <= h2_len ? h1_len : h2_len); /* XXX: this special case can now be removed with DUK_MEMCMP */ /* memcmp() should return zero (equal) for zero length, but avoid * it because there are some platform specific bugs. Don't use * strncmp() because it stops comparing at a NUL. */ if (prefix_len == 0) { rc = 0; } else { rc = DUK_MEMCMP((const char *) DUK_HSTRING_GET_DATA(h1), (const char *) DUK_HSTRING_GET_DATA(h2), prefix_len); } if (rc < 0) { return -1; } else if (rc > 0) { return 1; } /* prefix matches, lengths matter now */ if (h1_len < h2_len) { /* e.g. "x" < "xx" */ return -1; } else if (h1_len > h2_len) { return 1; } return 0; }
DUK_INTERNAL void duk_regexp_create_instance(duk_hthread *thr) { duk_context *ctx = (duk_context *) thr; duk_hobject *h; duk_hstring *h_bc; duk_small_int_t re_flags; /* [ ... escape_source bytecode ] */ h_bc = duk_get_hstring(ctx, -1); DUK_ASSERT(h_bc != NULL); DUK_ASSERT(DUK_HSTRING_GET_BYTELEN(h_bc) >= 1); /* always at least the header */ DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h_bc) >= 1); DUK_ASSERT((duk_small_int_t) DUK_HSTRING_GET_DATA(h_bc)[0] < 0x80); /* flags always encodes to 1 byte */ re_flags = (duk_small_int_t) DUK_HSTRING_GET_DATA(h_bc)[0]; /* [ ... escaped_source bytecode ] */ duk_push_object(ctx); h = duk_get_hobject(ctx, -1); DUK_ASSERT(h != NULL); duk_insert(ctx, -3); /* [ ... regexp_object escaped_source bytecode ] */ DUK_HOBJECT_SET_CLASS_NUMBER(h, DUK_HOBJECT_CLASS_REGEXP); DUK_HOBJECT_SET_PROTOTYPE_UPDREF(thr, h, thr->builtins[DUK_BIDX_REGEXP_PROTOTYPE]); duk_xdef_prop_stridx(ctx, -3, DUK_STRIDX_INT_BYTECODE, DUK_PROPDESC_FLAGS_NONE); /* [ ... regexp_object escaped_source ] */ duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_SOURCE, DUK_PROPDESC_FLAGS_NONE); /* [ ... regexp_object ] */ duk_push_boolean(ctx, (re_flags & DUK_RE_FLAG_GLOBAL)); duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_GLOBAL, DUK_PROPDESC_FLAGS_NONE); duk_push_boolean(ctx, (re_flags & DUK_RE_FLAG_IGNORE_CASE)); duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_IGNORE_CASE, DUK_PROPDESC_FLAGS_NONE); duk_push_boolean(ctx, (re_flags & DUK_RE_FLAG_MULTILINE)); duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_MULTILINE, DUK_PROPDESC_FLAGS_NONE); duk_push_int(ctx, 0); duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_LAST_INDEX, DUK_PROPDESC_FLAGS_W); /* [ ... regexp_object ] */ }
static duk_hstring *duk__find_matching_string(duk_heap *heap, duk_hstring **entries, duk_uint32_t size, duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash) { duk_uint32_t i; duk_uint32_t step; DUK_ASSERT(size > 0); i = DUK__HASH_INITIAL(strhash, size); step = DUK__HASH_PROBE_STEP(strhash); for (;;) { duk_hstring *e; e = entries[i]; if (!e) { return NULL; } if (e != DUK__DELETED_MARKER(heap) && DUK_HSTRING_GET_BYTELEN(e) == blen) { if (DUK_MEMCMP(str, DUK_HSTRING_GET_DATA(e), blen) == 0) { DUK_DDD(DUK_DDDPRINT("find matching hit: %d (step %d, size %d)", i, step, size)); return e; } } DUK_DDD(DUK_DDDPRINT("find matching miss: %d (step %d, size %d)", i, step, size)); i = (i + step) % size; /* looping should never happen */ DUK_ASSERT(i != DUK__HASH_INITIAL(strhash, size)); } DUK_UNREACHABLE(); }
DUK_INTERNAL duk_ucodepoint_t duk_hstring_char_code_at_raw(duk_hthread *thr, duk_hstring *h, duk_uint_t pos) { duk_uint32_t boff; const duk_uint8_t *p, *p_start, *p_end; duk_ucodepoint_t cp; /* Caller must check character offset to be inside the string. */ DUK_ASSERT(thr != NULL); DUK_ASSERT(h != NULL); DUK_ASSERT_DISABLE(pos >= 0); /* unsigned */ DUK_ASSERT(pos < (duk_uint_t) DUK_HSTRING_GET_CHARLEN(h)); boff = duk_heap_strcache_offset_char2byte(thr, h, (duk_uint32_t) pos); DUK_DDD(DUK_DDDPRINT("charCodeAt: pos=%ld -> boff=%ld, str=%!O", (long) pos, (long) boff, (duk_heaphdr *) h)); DUK_ASSERT_DISABLE(boff >= 0); DUK_ASSERT(boff < DUK_HSTRING_GET_BYTELEN(h)); p_start = DUK_HSTRING_GET_DATA(h); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h); p = p_start + boff; DUK_DDD(DUK_DDDPRINT("p_start=%p, p_end=%p, p=%p", (void *) p_start, (void *) p_end, (void *) p)); /* This may throw an error though not for valid E5 strings. */ cp = duk_unicode_decode_xutf8_checked(thr, &p, p_start, p_end); return cp; }
DUK_LOCAL void duk__print_hstring(duk__dprint_state *st, duk_hstring *h, duk_bool_t quotes) { duk_fixedbuffer *fb = st->fb; const duk_uint8_t *p; const duk_uint8_t *p_end; /* terminal type: no depth check */ if (duk_fb_is_full(fb)) { return; } duk__print_shared_heaphdr_string(st, &h->hdr); if (!h) { duk_fb_put_cstring(fb, "NULL"); return; } p = DUK_HSTRING_GET_DATA(h); p_end = p + DUK_HSTRING_GET_BYTELEN(h); if (p_end > p && p[0] == DUK_ASC_UNDERSCORE) { /* if property key begins with underscore, encode it with * forced quotes (e.g. "_Foo") to distinguish it from encoded * internal properties (e.g. \xffBar -> _Bar). */ quotes = 1; } if (quotes) { duk_fb_put_byte(fb, (duk_uint8_t) DUK_ASC_DOUBLEQUOTE); } while (p < p_end) { duk_uint8_t ch = *p++; /* two special escapes: '\' and '"', other printables as is */ if (ch == '\\') { duk_fb_sprintf(fb, "\\\\"); } else if (ch == '"') { duk_fb_sprintf(fb, "\\\""); } else if (ch >= 0x20 && ch <= 0x7e) { duk_fb_put_byte(fb, ch); } else if (ch == 0xff && !quotes) { /* encode \xffBar as _Bar if no quotes are applied, this is for * readable internal keys. */ duk_fb_put_byte(fb, (duk_uint8_t) DUK_ASC_UNDERSCORE); } else { duk_fb_sprintf(fb, "\\x%02lx", (unsigned long) ch); } } if (quotes) { duk_fb_put_byte(fb, (duk_uint8_t) DUK_ASC_DOUBLEQUOTE); } #if defined(DUK_USE_REFERENCE_COUNTING) /* XXX: limit to quoted strings only, to save keys from being cluttered? */ duk_fb_sprintf(fb, "/%lu", (unsigned long) DUK_HEAPHDR_GET_REFCOUNT(&h->hdr)); #endif }
size_t duk_hbuffer_append_hstring(duk_hthread *thr, duk_hbuffer_dynamic *buf, duk_hstring *str) { size_t len; DUK_ASSERT(thr != NULL); DUK_ASSERT(buf != NULL); DUK_ASSERT(str != NULL); DUK_ASSERT(DUK_HBUFFER_HAS_DYNAMIC(buf)); len = DUK_HSTRING_GET_BYTELEN(str); duk_hbuffer_insert_bytes(thr, buf, DUK_HBUFFER_GET_SIZE(buf), (duk_uint8_t *) DUK_HSTRING_GET_DATA(str), len); return len; }
DUK_LOCAL duk_hstring *duk__find_matching_string_chain(duk_heap *heap, const duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash) { duk_small_uint_t slotidx; duk_strtab_entry *e; duk_uint16_t *lst; duk_size_t i, n; duk_uint16_t null16 = heap->heapptr_null16; DUK_ASSERT(heap != NULL); slotidx = strhash % DUK_STRTAB_CHAIN_SIZE; DUK_ASSERT(slotidx < DUK_STRTAB_CHAIN_SIZE); e = heap->strtable + slotidx; if (e->listlen == 0) { if (e->u.str16 != null16) { duk_hstring *h = (duk_hstring *) DUK_USE_HEAPPTR_DEC16(heap->heap_udata, e->u.str16); DUK_ASSERT(h != NULL); if (DUK_HSTRING_GET_BYTELEN(h) == blen && DUK_MEMCMP((const void *) str, (const void *) DUK_HSTRING_GET_DATA(h), (size_t) blen) == 0) { return h; } } } else { DUK_ASSERT(e->u.strlist16 != null16); lst = (duk_uint16_t *) DUK_USE_HEAPPTR_DEC16(heap->heap_udata, e->u.strlist16); DUK_ASSERT(lst != NULL); for (i = 0, n = e->listlen; i < n; i++) { if (lst[i] != null16) { duk_hstring *h = (duk_hstring *) DUK_USE_HEAPPTR_DEC16(heap->heap_udata, lst[i]); DUK_ASSERT(h != NULL); if (DUK_HSTRING_GET_BYTELEN(h) == blen && DUK_MEMCMP((const void *) str, (const void *) DUK_HSTRING_GET_DATA(h), (size_t) blen) == 0) { return h; } } } } return NULL; }
DUK_INTERNAL duk_small_int_t duk_js_string_compare(duk_hstring *h1, duk_hstring *h2) { /* * String comparison (E5 Section 11.8.5, step 4), which * needs to compare codepoint by codepoint. * * However, UTF-8 allows us to use strcmp directly: the shared * prefix will be encoded identically (UTF-8 has unique encoding) * and the first differing character can be compared with a simple * unsigned byte comparison (which strcmp does). * * This will not work properly for non-xutf-8 strings, but this * is not an issue for compliance. */ DUK_ASSERT(h1 != NULL); DUK_ASSERT(h2 != NULL); return duk_js_data_compare((const duk_uint8_t *) DUK_HSTRING_GET_DATA(h1), (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h2), (duk_size_t) DUK_HSTRING_GET_BYTELEN(h1), (duk_size_t) DUK_HSTRING_GET_BYTELEN(h2)); }
/* Called by duk_hstring.h macros */ DUK_INTERNAL duk_uarridx_t duk_js_to_arrayindex_string_helper(duk_hstring *h) { duk_uarridx_t res; duk_small_int_t rc; if (!DUK_HSTRING_HAS_ARRIDX(h)) { return DUK_HSTRING_NO_ARRAY_INDEX; } rc = duk_js_to_arrayindex_raw_string(DUK_HSTRING_GET_DATA(h), DUK_HSTRING_GET_BYTELEN(h), &res); DUK_UNREF(rc); DUK_ASSERT(rc != 0); return res; }
DUK_LOCAL duk_uint8_t *duk__dump_hstring_raw(duk_uint8_t *p, duk_hstring *h) { duk_size_t len; duk_uint32_t tmp32; DUK_ASSERT(h != NULL); len = DUK_HSTRING_GET_BYTELEN(h); DUK_ASSERT(len <= 0xffffffffUL); /* string limits */ tmp32 = (duk_uint32_t) len; DUK_RAW_WRITE_U32_BE(p, tmp32); DUK_MEMCPY((void *) p, (const void *) DUK_HSTRING_GET_DATA(h), len); p += len; return p; }
DUK_LOCAL duk_hstring *duk__do_lookup(duk_heap *heap, const duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t *out_strhash) { duk_hstring *res; DUK_ASSERT(out_strhash); *out_strhash = duk_heap_hashstring(heap, str, (duk_size_t) blen); #if defined(DUK_USE_ROM_STRINGS) { duk_small_uint_t i; /* XXX: This is VERY inefficient now, and should be e.g. a * binary search or perfect hash, to be fixed. */ for (i = 0; i < (duk_small_uint_t) (sizeof(duk_rom_strings) / sizeof(duk_hstring *)); i++) { duk_hstring *romstr; romstr = (duk_hstring *) duk_rom_strings[i]; if (blen == DUK_HSTRING_GET_BYTELEN(romstr) && DUK_MEMCMP(str, (void *) DUK_HSTRING_GET_DATA(romstr), blen) == 0) { DUK_DD(DUK_DDPRINT("intern check: rom string: %!O, computed hash 0x%08lx, rom hash 0x%08lx", romstr, (unsigned long) *out_strhash, (unsigned long) DUK_HSTRING_GET_HASH(romstr))); DUK_ASSERT(*out_strhash == DUK_HSTRING_GET_HASH(romstr)); *out_strhash = DUK_HSTRING_GET_HASH(romstr); return romstr; } } } #endif /* DUK_USE_ROM_STRINGS */ #if defined(DUK_USE_STRTAB_CHAIN) res = duk__find_matching_string_chain(heap, str, blen, *out_strhash); #elif defined(DUK_USE_STRTAB_PROBE) res = duk__find_matching_string_probe(heap, #if defined(DUK_USE_HEAPPTR16) heap->strtable16, #else heap->strtable, #endif heap->st_size, str, blen, *out_strhash); #else #error internal error, invalid strtab options #endif return res; }
static duk_uint32_t parse_regexp_flags(duk_hthread *thr, duk_hstring *h) { duk_uint8_t *p; duk_uint8_t *p_end; duk_uint32_t flags = 0; p = DUK_HSTRING_GET_DATA(h); p_end = p + DUK_HSTRING_GET_BYTELEN(h); /* Note: can be safely scanned as bytes (undecoded) */ while (p < p_end) { duk_uint8_t c = *p++; switch ((int) c) { case (int) 'g': { if (flags & DUK_RE_FLAG_GLOBAL) { goto error; } flags |= DUK_RE_FLAG_GLOBAL; break; } case (int) 'i': { if (flags & DUK_RE_FLAG_IGNORE_CASE) { goto error; } flags |= DUK_RE_FLAG_IGNORE_CASE; break; } case (int) 'm': { if (flags & DUK_RE_FLAG_MULTILINE) { goto error; } flags |= DUK_RE_FLAG_MULTILINE; break; } default: { goto error; } } } return flags; error: DUK_ERROR(thr, DUK_ERR_SYNTAX_ERROR, "invalid regexp flags"); return 0; /* never here */ }
DUK_LOCAL duk_uint32_t duk__parse_regexp_flags(duk_hthread *thr, duk_hstring *h) { const duk_uint8_t *p; const duk_uint8_t *p_end; duk_uint32_t flags = 0; p = DUK_HSTRING_GET_DATA(h); p_end = p + DUK_HSTRING_GET_BYTELEN(h); /* Note: can be safely scanned as bytes (undecoded) */ while (p < p_end) { duk_uint8_t c = *p++; switch (c) { case (duk_uint8_t) 'g': { if (flags & DUK_RE_FLAG_GLOBAL) { goto error; } flags |= DUK_RE_FLAG_GLOBAL; break; } case (duk_uint8_t) 'i': { if (flags & DUK_RE_FLAG_IGNORE_CASE) { goto error; } flags |= DUK_RE_FLAG_IGNORE_CASE; break; } case (duk_uint8_t) 'm': { if (flags & DUK_RE_FLAG_MULTILINE) { goto error; } flags |= DUK_RE_FLAG_MULTILINE; break; } default: { goto error; } } } return flags; error: DUK_ERROR_SYNTAX(thr, DUK_STR_INVALID_REGEXP_FLAGS); return 0; /* never here */ }
DUK_LOCAL void duk__create_escaped_source(duk_hthread *thr, int idx_pattern) { duk_context *ctx = (duk_context *) thr; duk_hstring *h; const duk_uint8_t *p; duk_bufwriter_ctx bw_alloc; duk_bufwriter_ctx *bw; duk_uint8_t *q; duk_size_t i, n; duk_uint_fast8_t c_prev, c; h = duk_get_hstring(ctx, idx_pattern); DUK_ASSERT(h != NULL); p = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h); n = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h); if (n == 0) { /* return '(?:)' */ duk_push_hstring_stridx(ctx, DUK_STRIDX_ESCAPED_EMPTY_REGEXP); return; } bw = &bw_alloc; DUK_BW_INIT_PUSHBUF(thr, bw, n); q = DUK_BW_GET_PTR(thr, bw); c_prev = (duk_uint_fast8_t) 0; for (i = 0; i < n; i++) { c = p[i]; q = DUK_BW_ENSURE_RAW(thr, bw, 2, q); if (c == (duk_uint_fast8_t) '/' && c_prev != (duk_uint_fast8_t) '\\') { /* Unescaped '/' ANYWHERE in the regexp (in disjunction, * inside a character class, ...) => same escape works. */ *q++ = DUK_ASC_BACKSLASH; } *q++ = (duk_uint8_t) c; c_prev = c; } DUK_BW_SETPTR_AND_COMPACT(thr, bw, q); duk_to_string(ctx, -1); /* -> [ ... escaped_source ] */ }
static void sanitize_snippet(char *buf, int buf_size, duk_hstring *str) { int i; int nchars; int maxchars; duk_uint8_t *data; DUK_MEMSET(buf, 0, buf_size); maxchars = buf_size - 1; data = DUK_HSTRING_GET_DATA(str); nchars = (str->blen < maxchars ? str->blen : maxchars); for (i = 0; i < nchars; i++) { char c = (char) data[i]; if (c < 0x20 || c > 0x7e) { c = '.'; } buf[i] = c; } }
static void duk__sanitize_snippet(char *buf, duk_size_t buf_size, duk_hstring *str) { duk_size_t i; duk_size_t nchars; duk_size_t maxchars; duk_uint8_t *data; DUK_MEMZERO(buf, buf_size); maxchars = (duk_size_t) (buf_size - 1); data = DUK_HSTRING_GET_DATA(str); nchars = ((duk_size_t) str->blen < maxchars ? (duk_size_t) str->blen : maxchars); for (i = 0; i < nchars; i++) { duk_small_int_t c = (duk_small_int_t) data[i]; if (c < 0x20 || c > 0x7e) { c = '.'; } buf[i] = (char) c; } }
static void create_escaped_source(duk_hthread *thr, int idx_pattern) { duk_context *ctx = (duk_context *) thr; duk_hstring *h; duk_hbuffer_dynamic *buf; const duk_uint8_t *p; duk_size_t i, n; duk_uint_fast8_t c_prev, c; h = duk_get_hstring(ctx, idx_pattern); DUK_ASSERT(h != NULL); p = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h); n = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h); if (n == 0) { /* return '(?:)' */ duk_push_hstring_stridx(ctx, DUK_STRIDX_ESCAPED_EMPTY_REGEXP); return; } duk_push_dynamic_buffer(ctx, 0); buf = (duk_hbuffer_dynamic *) duk_get_hbuffer(ctx, -1); DUK_ASSERT(buf != NULL); c_prev = (duk_uint_fast8_t) 0; for (i = 0; i < n; i++) { c = p[i]; if (c == (duk_uint_fast8_t) '/' && c_prev != (duk_uint_fast8_t) '\\') { /* Unescaped '/' ANYWHERE in the regexp (in disjunction, * inside a character class, ...) => same escape works. */ duk_hbuffer_append_byte(thr, buf, (duk_uint8_t) '\\'); } duk_hbuffer_append_byte(thr, buf, (duk_uint8_t) c); c_prev = c; } duk_to_string(ctx, -1); /* -> [ ... escaped_source ] */ }
DUK_LOCAL duk_bool_t duk__init_heap_strings(duk_heap *heap) { #if defined(DUK_USE_ASSERTIONS) duk_small_uint_t i; #endif /* With ROM-based strings, heap->strs[] and thr->strs[] are omitted * so nothing to initialize for strs[]. */ #if defined(DUK_USE_ASSERTIONS) for (i = 0; i < sizeof(duk_rom_strings) / sizeof(const duk_hstring *); i++) { duk_uint32_t hash; const duk_hstring *h; h = duk_rom_strings[i]; DUK_ASSERT(h != NULL); hash = duk_heap_hashstring(heap, (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h), DUK_HSTRING_GET_BYTELEN(h)); DUK_DD(DUK_DDPRINT("duk_rom_strings[%d] -> hash 0x%08lx, computed 0x%08lx", (int) i, (unsigned long) DUK_HSTRING_GET_HASH(h), (unsigned long) hash)); DUK_ASSERT(hash == (duk_uint32_t) DUK_HSTRING_GET_HASH(h)); } #endif return 1; }
/* Helper which can be called both directly and with duk_safe_call(). */ DUK_LOCAL duk_ret_t duk__do_compile(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk__compile_raw_args *comp_args; duk_uint_t flags; duk_small_uint_t comp_flags; duk_hcompiledfunction *h_templ; /* Note: strictness is not inherited from the current Duktape/C * context. Otherwise it would not be possible to compile * non-strict code inside a Duktape/C activation (which is * always strict now). See api-testcases/test-eval-strictness.c * for discussion. */ /* [ ... source? filename &comp_args ] (depends on flags) */ comp_args = (duk__compile_raw_args *) duk_require_pointer(ctx, -1); flags = comp_args->flags; duk_pop(ctx); /* [ ... source? filename ] */ if (!comp_args->src_buffer) { duk_hstring *h_sourcecode; h_sourcecode = duk_get_hstring(ctx, -2); if ((flags & DUK_COMPILE_NOSOURCE) || /* args incorrect */ (h_sourcecode == NULL)) { /* e.g. duk_push_file_string_raw() pushed undefined */ /* XXX: when this error is caused by a nonexistent * file given to duk_peval_file() or similar, the * error message is not the best possible. */ DUK_ERROR(thr, DUK_ERR_API_ERROR, DUK_STR_NO_SOURCECODE); } DUK_ASSERT(h_sourcecode != NULL); comp_args->src_buffer = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h_sourcecode); comp_args->src_length = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_sourcecode); } DUK_ASSERT(comp_args->src_buffer != NULL); /* XXX: unnecessary translation of flags */ comp_flags = 0; if (flags & DUK_COMPILE_EVAL) { comp_flags |= DUK_JS_COMPILE_FLAG_EVAL; } if (flags & DUK_COMPILE_FUNCTION) { comp_flags |= DUK_JS_COMPILE_FLAG_EVAL | DUK_JS_COMPILE_FLAG_FUNCEXPR; } if (flags & DUK_COMPILE_STRICT) { comp_flags |= DUK_JS_COMPILE_FLAG_STRICT; } /* [ ... source? filename ] */ duk_js_compile(thr, comp_args->src_buffer, comp_args->src_length, comp_flags); /* [ ... source? func_template ] */ if (flags & DUK_COMPILE_NOSOURCE) { ; } else { duk_remove(ctx, -2); } /* [ ... func_template ] */ h_templ = (duk_hcompiledfunction *) duk_get_hobject(ctx, -1); DUK_ASSERT(h_templ != NULL); duk_js_push_closure(thr, h_templ, thr->builtins[DUK_BIDX_GLOBAL_ENV], thr->builtins[DUK_BIDX_GLOBAL_ENV]); duk_remove(ctx, -2); /* -> [ ... closure ] */ /* [ ... closure ] */ return 1; }
DUK_INTERNAL void duk_heap_dump_strtab(duk_heap *heap) { duk_strtab_entry *e; duk_small_uint_t i; duk_size_t j, n, used; #if defined(DUK_USE_HEAPPTR16) duk_uint16_t *lst; duk_uint16_t null16 = heap->heapptr_null16; #else duk_hstring **lst; #endif DUK_ASSERT(heap != NULL); for (i = 0; i < DUK_STRTAB_CHAIN_SIZE; i++) { e = heap->strtable + i; if (e->listlen == 0) { #if defined(DUK_USE_HEAPPTR16) DUK_DD(DUK_DDPRINT("[%03d] -> plain %d", (int) i, (int) (e->u.str16 != null16 ? 1 : 0))); #else DUK_DD(DUK_DDPRINT("[%03d] -> plain %d", (int) i, (int) (e->u.str ? 1 : 0))); #endif } else { used = 0; #if defined(DUK_USE_HEAPPTR16) lst = (duk_uint16_t *) DUK_USE_HEAPPTR_DEC16(heap->heap_udata, e->u.strlist16); #else lst = e->u.strlist; #endif DUK_ASSERT(lst != NULL); for (j = 0, n = e->listlen; j < n; j++) { #if defined(DUK_USE_HEAPPTR16) if (lst[j] != null16) { #else if (lst[j] != NULL) { #endif used++; } } DUK_DD(DUK_DDPRINT("[%03d] -> array %d/%d", (int) i, (int) used, (int) e->listlen)); } } } #endif /* DUK_USE_DEBUG */ #endif /* DUK_USE_STRTAB_CHAIN */ /* * String table algorithm: closed hashing with a probe sequence * * This is the default algorithm and works fine for environments with * minimal memory constraints. */ #if defined(DUK_USE_STRTAB_PROBE) /* Count actually used (non-NULL, non-DELETED) entries. */ DUK_LOCAL duk_int_t duk__count_used_probe(duk_heap *heap) { duk_int_t res = 0; duk_uint_fast32_t i, n; #if defined(DUK_USE_HEAPPTR16) duk_uint16_t null16 = heap->heapptr_null16; duk_uint16_t deleted16 = heap->heapptr_deleted16; #endif n = (duk_uint_fast32_t) heap->st_size; for (i = 0; i < n; i++) { #if defined(DUK_USE_HEAPPTR16) if (heap->strtable16[i] != null16 && heap->strtable16[i] != deleted16) { #else if (heap->strtable[i] != NULL && heap->strtable[i] != DUK__DELETED_MARKER(heap)) { #endif res++; } } return res; } #if defined(DUK_USE_HEAPPTR16) DUK_LOCAL void duk__insert_hstring_probe(duk_heap *heap, duk_uint16_t *entries16, duk_uint32_t size, duk_uint32_t *p_used, duk_hstring *h) { #else DUK_LOCAL void duk__insert_hstring_probe(duk_heap *heap, duk_hstring **entries, duk_uint32_t size, duk_uint32_t *p_used, duk_hstring *h) { #endif duk_uint32_t i; duk_uint32_t step; #if defined(DUK_USE_HEAPPTR16) duk_uint16_t null16 = heap->heapptr_null16; duk_uint16_t deleted16 = heap->heapptr_deleted16; #endif DUK_ASSERT(size > 0); i = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(h), size); step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(h)); for (;;) { #if defined(DUK_USE_HEAPPTR16) duk_uint16_t e16 = entries16[i]; #else duk_hstring *e = entries[i]; #endif #if defined(DUK_USE_HEAPPTR16) /* XXX: could check for e16 == 0 because NULL is guaranteed to * encode to zero. */ if (e16 == null16) { #else if (e == NULL) { #endif DUK_DDD(DUK_DDDPRINT("insert hit (null): %ld", (long) i)); #if defined(DUK_USE_HEAPPTR16) entries16[i] = DUK_USE_HEAPPTR_ENC16(heap->heap_udata, (void *) h); #else entries[i] = h; #endif (*p_used)++; break; #if defined(DUK_USE_HEAPPTR16) } else if (e16 == deleted16) { #else } else if (e == DUK__DELETED_MARKER(heap)) { #endif /* st_used remains the same, DELETED is counted as used */ DUK_DDD(DUK_DDDPRINT("insert hit (deleted): %ld", (long) i)); #if defined(DUK_USE_HEAPPTR16) entries16[i] = DUK_USE_HEAPPTR_ENC16(heap->heap_udata, (void *) h); #else entries[i] = h; #endif break; } DUK_DDD(DUK_DDDPRINT("insert miss: %ld", (long) i)); i = (i + step) % size; /* looping should never happen */ DUK_ASSERT(i != DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(h), size)); } } #if defined(DUK_USE_HEAPPTR16) DUK_LOCAL duk_hstring *duk__find_matching_string_probe(duk_heap *heap, duk_uint16_t *entries16, duk_uint32_t size, const duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash) { #else DUK_LOCAL duk_hstring *duk__find_matching_string_probe(duk_heap *heap, duk_hstring **entries, duk_uint32_t size, const duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash) { #endif duk_uint32_t i; duk_uint32_t step; DUK_ASSERT(size > 0); i = DUK__HASH_INITIAL(strhash, size); step = DUK__HASH_PROBE_STEP(strhash); for (;;) { duk_hstring *e; #if defined(DUK_USE_HEAPPTR16) e = (duk_hstring *) DUK_USE_HEAPPTR_DEC16(heap->heap_udata, entries16[i]); #else e = entries[i]; #endif if (!e) { return NULL; } if (e != DUK__DELETED_MARKER(heap) && DUK_HSTRING_GET_BYTELEN(e) == blen) { if (DUK_MEMCMP((const void *) str, (const void *) DUK_HSTRING_GET_DATA(e), (size_t) blen) == 0) { DUK_DDD(DUK_DDDPRINT("find matching hit: %ld (step %ld, size %ld)", (long) i, (long) step, (long) size)); return e; } } DUK_DDD(DUK_DDDPRINT("find matching miss: %ld (step %ld, size %ld)", (long) i, (long) step, (long) size)); i = (i + step) % size; /* looping should never happen */ DUK_ASSERT(i != DUK__HASH_INITIAL(strhash, size)); } DUK_UNREACHABLE(); }
DUK_INTERNAL duk_bool_t duk_js_equals_helper(duk_hthread *thr, duk_tval *tv_x, duk_tval *tv_y, duk_small_int_t flags) { duk_context *ctx = (duk_context *) thr; duk_tval *tv_tmp; /* If flags != 0 (strict or SameValue), thr can be NULL. For loose * equals comparison it must be != NULL. */ DUK_ASSERT(flags != 0 || thr != NULL); /* * Same type? * * Note: since number values have no explicit tag in the 8-byte * representation, need the awkward if + switch. */ #if defined(DUK_USE_FASTINT) if (DUK_TVAL_IS_FASTINT(tv_x) && DUK_TVAL_IS_FASTINT(tv_y)) { if (DUK_TVAL_GET_FASTINT(tv_x) == DUK_TVAL_GET_FASTINT(tv_y)) { return 1; } else { return 0; } } else #endif if (DUK_TVAL_IS_NUMBER(tv_x) && DUK_TVAL_IS_NUMBER(tv_y)) { /* Catches both doubles and cases where only one argument is a fastint */ if (DUK_UNLIKELY((flags & DUK_EQUALS_FLAG_SAMEVALUE) != 0)) { /* SameValue */ return duk__js_samevalue_number(DUK_TVAL_GET_NUMBER(tv_x), DUK_TVAL_GET_NUMBER(tv_y)); } else { /* equals and strict equals */ return duk__js_equals_number(DUK_TVAL_GET_NUMBER(tv_x), DUK_TVAL_GET_NUMBER(tv_y)); } } else if (DUK_TVAL_GET_TAG(tv_x) == DUK_TVAL_GET_TAG(tv_y)) { switch (DUK_TVAL_GET_TAG(tv_x)) { case DUK_TAG_UNDEFINED: case DUK_TAG_NULL: { return 1; } case DUK_TAG_BOOLEAN: { return DUK_TVAL_GET_BOOLEAN(tv_x) == DUK_TVAL_GET_BOOLEAN(tv_y); } case DUK_TAG_POINTER: { return DUK_TVAL_GET_POINTER(tv_x) == DUK_TVAL_GET_POINTER(tv_y); } case DUK_TAG_STRING: case DUK_TAG_OBJECT: { /* heap pointer comparison suffices */ return DUK_TVAL_GET_HEAPHDR(tv_x) == DUK_TVAL_GET_HEAPHDR(tv_y); } case DUK_TAG_BUFFER: { if ((flags & (DUK_EQUALS_FLAG_STRICT | DUK_EQUALS_FLAG_SAMEVALUE)) != 0) { /* heap pointer comparison suffices */ return DUK_TVAL_GET_HEAPHDR(tv_x) == DUK_TVAL_GET_HEAPHDR(tv_y); } else { /* non-strict equality for buffers compares contents */ duk_hbuffer *h_x = DUK_TVAL_GET_BUFFER(tv_x); duk_hbuffer *h_y = DUK_TVAL_GET_BUFFER(tv_y); duk_size_t len_x = DUK_HBUFFER_GET_SIZE(h_x); duk_size_t len_y = DUK_HBUFFER_GET_SIZE(h_y); void *buf_x; void *buf_y; if (len_x != len_y) { return 0; } buf_x = (void *) DUK_HBUFFER_GET_DATA_PTR(thr->heap, h_x); buf_y = (void *) DUK_HBUFFER_GET_DATA_PTR(thr->heap, h_y); /* if len_x == len_y == 0, buf_x and/or buf_y may * be NULL, but that's OK. */ DUK_ASSERT(len_x == len_y); DUK_ASSERT(len_x == 0 || buf_x != NULL); DUK_ASSERT(len_y == 0 || buf_y != NULL); return (DUK_MEMCMP(buf_x, buf_y, len_x) == 0) ? 1 : 0; } } case DUK_TAG_LIGHTFUNC: { /* At least 'magic' has a significant impact on function * identity. */ duk_small_uint_t lf_flags_x; duk_small_uint_t lf_flags_y; duk_c_function func_x; duk_c_function func_y; DUK_TVAL_GET_LIGHTFUNC(tv_x, func_x, lf_flags_x); DUK_TVAL_GET_LIGHTFUNC(tv_y, func_y, lf_flags_y); return ((func_x == func_y) && (lf_flags_x == lf_flags_y)) ? 1 : 0; } #if defined(DUK_USE_FASTINT) case DUK_TAG_FASTINT: #endif default: { DUK_ASSERT(DUK_TVAL_IS_NUMBER(tv_x)); DUK_ASSERT(DUK_TVAL_IS_NUMBER(tv_y)); DUK_UNREACHABLE(); return 0; } } } if ((flags & (DUK_EQUALS_FLAG_STRICT | DUK_EQUALS_FLAG_SAMEVALUE)) != 0) { return 0; } DUK_ASSERT(flags == 0); /* non-strict equality from here on */ /* * Types are different; various cases for non-strict comparison * * Since comparison is symmetric, we use a "swap trick" to reduce * code size. */ /* Undefined/null are considered equal (e.g. "null == undefined" -> true). */ if ((DUK_TVAL_IS_UNDEFINED(tv_x) && DUK_TVAL_IS_NULL(tv_y)) || (DUK_TVAL_IS_NULL(tv_x) && DUK_TVAL_IS_UNDEFINED(tv_y))) { return 1; } /* Number/string-or-buffer -> coerce string to number (e.g. "'1.5' == 1.5" -> true). */ if (DUK_TVAL_IS_NUMBER(tv_x) && (DUK_TVAL_IS_STRING(tv_y) || DUK_TVAL_IS_BUFFER(tv_y))) { /* the next 'if' is guaranteed to match after swap */ tv_tmp = tv_x; tv_x = tv_y; tv_y = tv_tmp; } if ((DUK_TVAL_IS_STRING(tv_x) || DUK_TVAL_IS_BUFFER(tv_x)) && DUK_TVAL_IS_NUMBER(tv_y)) { /* XXX: this is possible without resorting to the value stack */ duk_double_t d1, d2; d2 = DUK_TVAL_GET_NUMBER(tv_y); duk_push_tval(ctx, tv_x); duk_to_string(ctx, -1); /* buffer values are coerced first to string here */ duk_to_number(ctx, -1); d1 = duk_require_number(ctx, -1); duk_pop(ctx); return duk__js_equals_number(d1, d2); } /* Buffer/string -> compare contents. */ if (DUK_TVAL_IS_BUFFER(tv_x) && DUK_TVAL_IS_STRING(tv_y)) { tv_tmp = tv_x; tv_x = tv_y; tv_y = tv_tmp; } if (DUK_TVAL_IS_STRING(tv_x) && DUK_TVAL_IS_BUFFER(tv_y)) { duk_hstring *h_x = DUK_TVAL_GET_STRING(tv_x); duk_hbuffer *h_y = DUK_TVAL_GET_BUFFER(tv_y); duk_size_t len_x = DUK_HSTRING_GET_BYTELEN(h_x); duk_size_t len_y = DUK_HBUFFER_GET_SIZE(h_y); void *buf_x; void *buf_y; if (len_x != len_y) { return 0; } buf_x = (void *) DUK_HSTRING_GET_DATA(h_x); buf_y = (void *) DUK_HBUFFER_GET_DATA_PTR(thr->heap, h_y); /* if len_x == len_y == 0, buf_x and/or buf_y may * be NULL, but that's OK. */ DUK_ASSERT(len_x == len_y); DUK_ASSERT(len_x == 0 || buf_x != NULL); DUK_ASSERT(len_y == 0 || buf_y != NULL); return (DUK_MEMCMP(buf_x, buf_y, len_x) == 0) ? 1 : 0; } /* Boolean/any -> coerce boolean to number and try again. If boolean is * compared to a pointer, the final comparison after coercion now always * yields false (as pointer vs. number compares to false), but this is * not special cased. */ if (DUK_TVAL_IS_BOOLEAN(tv_x)) { tv_tmp = tv_x; tv_x = tv_y; tv_y = tv_tmp; } if (DUK_TVAL_IS_BOOLEAN(tv_y)) { /* ToNumber(bool) is +1.0 or 0.0. Tagged boolean value is always 0 or 1. */ duk_bool_t rc; DUK_ASSERT(DUK_TVAL_GET_BOOLEAN(tv_y) == 0 || DUK_TVAL_GET_BOOLEAN(tv_y) == 1); duk_push_tval(ctx, tv_x); duk_push_int(ctx, DUK_TVAL_GET_BOOLEAN(tv_y)); rc = duk_js_equals_helper(thr, duk_get_tval(ctx, -2), duk_get_tval(ctx, -1), 0 /*flags:nonstrict*/); duk_pop_2(ctx); return rc; } /* String-number-buffer/object -> coerce object to primitive (apparently without hint), then try again. */ if ((DUK_TVAL_IS_STRING(tv_x) || DUK_TVAL_IS_NUMBER(tv_x) || DUK_TVAL_IS_BUFFER(tv_x)) && DUK_TVAL_IS_OBJECT(tv_y)) { tv_tmp = tv_x; tv_x = tv_y; tv_y = tv_tmp; } if (DUK_TVAL_IS_OBJECT(tv_x) && (DUK_TVAL_IS_STRING(tv_y) || DUK_TVAL_IS_NUMBER(tv_y) || DUK_TVAL_IS_BUFFER(tv_y))) { duk_bool_t rc; duk_push_tval(ctx, tv_x); duk_push_tval(ctx, tv_y); duk_to_primitive(ctx, -2, DUK_HINT_NONE); /* apparently no hint? */ rc = duk_js_equals_helper(thr, duk_get_tval(ctx, -2), duk_get_tval(ctx, -1), 0 /*flags:nonstrict*/); duk_pop_2(ctx); return rc; } /* Nothing worked -> not equal. */ return 0; }
DUK_INTERNAL void duk_regexp_compile(duk_hthread *thr) { duk_context *ctx = (duk_context *) thr; duk_re_compiler_ctx re_ctx; duk_lexer_point lex_point; duk_hstring *h_pattern; duk_hstring *h_flags; duk__re_disjunction_info ign_disj; DUK_ASSERT(thr != NULL); DUK_ASSERT(ctx != NULL); /* * Args validation */ /* TypeError if fails */ h_pattern = duk_require_hstring(ctx, -2); h_flags = duk_require_hstring(ctx, -1); /* * Create normalized 'source' property (E5 Section 15.10.3). */ /* [ ... pattern flags ] */ duk__create_escaped_source(thr, -2); /* [ ... pattern flags escaped_source ] */ /* * Init compilation context */ /* [ ... pattern flags escaped_source buffer ] */ DUK_MEMZERO(&re_ctx, sizeof(re_ctx)); DUK_LEXER_INITCTX(&re_ctx.lex); /* duplicate zeroing, expect for (possible) NULL inits */ re_ctx.thr = thr; re_ctx.lex.thr = thr; re_ctx.lex.input = DUK_HSTRING_GET_DATA(h_pattern); re_ctx.lex.input_length = DUK_HSTRING_GET_BYTELEN(h_pattern); re_ctx.lex.token_limit = DUK_RE_COMPILE_TOKEN_LIMIT; re_ctx.recursion_limit = DUK_USE_REGEXP_COMPILER_RECLIMIT; re_ctx.re_flags = duk__parse_regexp_flags(thr, h_flags); DUK_BW_INIT_PUSHBUF(thr, &re_ctx.bw, DUK__RE_INITIAL_BUFSIZE); DUK_DD(DUK_DDPRINT("regexp compiler ctx initialized, flags=0x%08lx, recursion_limit=%ld", (unsigned long) re_ctx.re_flags, (long) re_ctx.recursion_limit)); /* * Init lexer */ lex_point.offset = 0; /* expensive init, just want to fill window */ lex_point.line = 1; DUK_LEXER_SETPOINT(&re_ctx.lex, &lex_point); /* * Compilation */ DUK_DD(DUK_DDPRINT("starting regexp compilation")); duk__append_u32(&re_ctx, DUK_REOP_SAVE); duk__append_u32(&re_ctx, 0); duk__parse_disjunction(&re_ctx, 1 /*expect_eof*/, &ign_disj); duk__append_u32(&re_ctx, DUK_REOP_SAVE); duk__append_u32(&re_ctx, 1); duk__append_u32(&re_ctx, DUK_REOP_MATCH); /* * Check for invalid backreferences; note that it is NOT an error * to back-reference a capture group which has not yet been introduced * in the pattern (as in /\1(foo)/); in fact, the backreference will * always match! It IS an error to back-reference a capture group * which will never be introduced in the pattern. Thus, we can check * for such references only after parsing is complete. */ if (re_ctx.highest_backref > re_ctx.captures) { DUK_ERROR_SYNTAX(thr, DUK_STR_INVALID_BACKREFS); } /* * Emit compiled regexp header: flags, ncaptures * (insertion order inverted on purpose) */ duk__insert_u32(&re_ctx, 0, (re_ctx.captures + 1) * 2); duk__insert_u32(&re_ctx, 0, re_ctx.re_flags); /* [ ... pattern flags escaped_source buffer ] */ DUK_BW_COMPACT(thr, &re_ctx.bw); duk_to_string(ctx, -1); /* coerce to string */ /* [ ... pattern flags escaped_source bytecode ] */ /* * Finalize stack */ duk_remove(ctx, -4); /* -> [ ... flags escaped_source bytecode ] */ duk_remove(ctx, -3); /* -> [ ... escaped_source bytecode ] */ DUK_DD(DUK_DDPRINT("regexp compilation successful, bytecode: %!T, escaped source: %!T", (duk_tval *) duk_get_tval(ctx, -1), (duk_tval *) duk_get_tval(ctx, -2))); }
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_indexof_shared(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk_hstring *h_this; duk_hstring *h_search; duk_int_t clen_this; duk_int_t cpos; duk_int_t bpos; const duk_uint8_t *p_start, *p_end, *p; const duk_uint8_t *q_start; duk_int_t q_blen; duk_uint8_t firstbyte; duk_uint8_t t; duk_small_int_t is_lastindexof = duk_get_current_magic(ctx); /* 0=indexOf, 1=lastIndexOf */ h_this = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h_this != NULL); clen_this = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h_this); h_search = duk_to_hstring(ctx, 0); DUK_ASSERT(h_search != NULL); q_start = DUK_HSTRING_GET_DATA(h_search); q_blen = (duk_int_t) DUK_HSTRING_GET_BYTELEN(h_search); duk_to_number(ctx, 1); if (duk_is_nan(ctx, 1) && is_lastindexof) { /* indexOf: NaN should cause pos to be zero. * lastIndexOf: NaN should cause pos to be +Infinity * (and later be clamped to len). */ cpos = clen_this; } else { cpos = duk_to_int_clamped(ctx, 1, 0, clen_this); } /* Empty searchstring always matches; cpos must be clamped here. * (If q_blen were < 0 due to clamped coercion, it would also be * caught here.) */ if (q_blen <= 0) { duk_push_int(ctx, cpos); return 1; } DUK_ASSERT(q_blen > 0); bpos = (duk_int_t) duk_heap_strcache_offset_char2byte(thr, h_this, (duk_uint32_t) cpos); p_start = DUK_HSTRING_GET_DATA(h_this); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_this); p = p_start + bpos; /* This loop is optimized for size. For speed, there should be * two separate loops, and we should ensure that memcmp() can be * used without an extra "will searchstring fit" check. Doing * the preconditioning for 'p' and 'p_end' is easy but cpos * must be updated if 'p' is wound back (backward scanning). */ firstbyte = q_start[0]; /* leading byte of match string */ while (p <= p_end && p >= p_start) { t = *p; /* For Ecmascript strings, this check can only match for * initial UTF-8 bytes (not continuation bytes). For other * strings all bets are off. */ if ((t == firstbyte) && ((duk_size_t) (p_end - p) >= (duk_size_t) q_blen)) { DUK_ASSERT(q_blen > 0); /* no issues with memcmp() zero size, even if broken */ if (DUK_MEMCMP(p, q_start, (duk_size_t) q_blen) == 0) { duk_push_int(ctx, cpos); return 1; } } /* track cpos while scanning */ if (is_lastindexof) { /* when going backwards, we decrement cpos 'early'; * 'p' may point to a continuation byte of the char * at offset 'cpos', but that's OK because we'll * backtrack all the way to the initial byte. */ if ((t & 0xc0) != 0x80) { cpos--; } p--; } else { if ((t & 0xc0) != 0x80) { cpos++; } p++; } } /* Not found. Empty string case is handled specially above. */ duk_push_int(ctx, -1); return 1; }
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_replace(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk_hstring *h_input; duk_hstring *h_match; duk_hstring *h_search; duk_hobject *h_re; duk_bufwriter_ctx bw_alloc; duk_bufwriter_ctx *bw; #ifdef DUK_USE_REGEXP_SUPPORT duk_bool_t is_regexp; duk_bool_t is_global; #endif duk_bool_t is_repl_func; duk_uint32_t match_start_coff, match_start_boff; #ifdef DUK_USE_REGEXP_SUPPORT duk_int_t match_caps; #endif duk_uint32_t prev_match_end_boff; const duk_uint8_t *r_start, *r_end, *r; /* repl string scan */ duk_size_t tmp_sz; DUK_ASSERT_TOP(ctx, 2); h_input = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h_input != NULL); bw = &bw_alloc; DUK_BW_INIT_PUSHBUF(thr, bw, DUK_HSTRING_GET_BYTELEN(h_input)); /* input size is good output starting point */ DUK_ASSERT_TOP(ctx, 4); /* stack[0] = search value * stack[1] = replace value * stack[2] = input string * stack[3] = result buffer */ h_re = duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP); if (h_re) { #ifdef DUK_USE_REGEXP_SUPPORT is_regexp = 1; is_global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL); if (is_global) { /* start match from beginning */ duk_push_int(ctx, 0); duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); } #else /* DUK_USE_REGEXP_SUPPORT */ return DUK_RET_UNSUPPORTED_ERROR; #endif /* DUK_USE_REGEXP_SUPPORT */ } else { duk_to_string(ctx, 0); #ifdef DUK_USE_REGEXP_SUPPORT is_regexp = 0; is_global = 0; #endif } if (duk_is_function(ctx, 1)) { is_repl_func = 1; r_start = NULL; r_end = NULL; } else { duk_hstring *h_repl; is_repl_func = 0; h_repl = duk_to_hstring(ctx, 1); DUK_ASSERT(h_repl != NULL); r_start = DUK_HSTRING_GET_DATA(h_repl); r_end = r_start + DUK_HSTRING_GET_BYTELEN(h_repl); } prev_match_end_boff = 0; for (;;) { /* * If matching with a regexp: * - non-global RegExp: lastIndex not touched on a match, zeroed * on a non-match * - global RegExp: on match, lastIndex will be updated by regexp * executor to point to next char after the matching part (so that * characters in the matching part are not matched again) * * If matching with a string: * - always non-global match, find first occurrence * * We need: * - The character offset of start-of-match for the replacer function * - The byte offsets for start-of-match and end-of-match to implement * the replacement values $&, $`, and $', and to copy non-matching * input string portions (including header and trailer) verbatim. * * NOTE: the E5.1 specification is a bit vague how the RegExp should * behave in the replacement process; e.g. is matching done first for * all matches (in the global RegExp case) before any replacer calls * are made? See: test-bi-string-proto-replace.js for discussion. */ DUK_ASSERT_TOP(ctx, 4); #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_dup(ctx, 0); duk_dup(ctx, 2); duk_regexp_match(thr); /* [ ... regexp input ] -> [ res_obj ] */ if (!duk_is_object(ctx, -1)) { duk_pop(ctx); break; } duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX); DUK_ASSERT(duk_is_number(ctx, -1)); match_start_coff = duk_get_int(ctx, -1); duk_pop(ctx); duk_get_prop_index(ctx, -1, 0); DUK_ASSERT(duk_is_string(ctx, -1)); h_match = duk_get_hstring(ctx, -1); DUK_ASSERT(h_match != NULL); duk_pop(ctx); /* h_match is borrowed, remains reachable through match_obj */ if (DUK_HSTRING_GET_BYTELEN(h_match) == 0) { /* This should be equivalent to match() algorithm step 8.f.iii.2: * detect an empty match and allow it, but don't allow it twice. */ duk_uint32_t last_index; duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); last_index = (duk_uint32_t) duk_get_uint(ctx, -1); DUK_DDD(DUK_DDDPRINT("empty match, bump lastIndex: %ld -> %ld", (long) last_index, (long) (last_index + 1))); duk_pop(ctx); duk_push_int(ctx, last_index + 1); duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); } DUK_ASSERT(duk_get_length(ctx, -1) <= DUK_INT_MAX); /* string limits */ match_caps = (duk_int_t) duk_get_length(ctx, -1); } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ const duk_uint8_t *p_start, *p_end, *p; /* input string scan */ const duk_uint8_t *q_start; /* match string */ duk_size_t q_blen; #ifdef DUK_USE_REGEXP_SUPPORT DUK_ASSERT(!is_global); /* single match always */ #endif p_start = DUK_HSTRING_GET_DATA(h_input); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input); p = p_start; h_search = duk_get_hstring(ctx, 0); DUK_ASSERT(h_search != NULL); q_start = DUK_HSTRING_GET_DATA(h_search); q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_search); p_end -= q_blen; /* ensure full memcmp() fits in while */ match_start_coff = 0; while (p <= p_end) { DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input)); if (DUK_MEMCMP((void *) p, (void *) q_start, (size_t) q_blen) == 0) { duk_dup(ctx, 0); h_match = duk_get_hstring(ctx, -1); DUK_ASSERT(h_match != NULL); #ifdef DUK_USE_REGEXP_SUPPORT match_caps = 0; #endif goto found; } /* track utf-8 non-continuation bytes */ if ((p[0] & 0xc0) != 0x80) { match_start_coff++; } p++; } /* not found */ break; } found: /* stack[0] = search value * stack[1] = replace value * stack[2] = input string * stack[3] = result buffer * stack[4] = regexp match OR match string */ match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff); tmp_sz = (duk_size_t) (match_start_boff - prev_match_end_boff); DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz); prev_match_end_boff = match_start_boff + DUK_HSTRING_GET_BYTELEN(h_match); if (is_repl_func) { duk_idx_t idx_args; duk_hstring *h_repl; /* regexp res_obj is at index 4 */ duk_dup(ctx, 1); idx_args = duk_get_top(ctx); #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_int_t idx; duk_require_stack(ctx, match_caps + 2); for (idx = 0; idx < match_caps; idx++) { /* match followed by capture(s) */ duk_get_prop_index(ctx, 4, idx); } } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ /* match == search string, by definition */ duk_dup(ctx, 0); } duk_push_int(ctx, match_start_coff); duk_dup(ctx, 2); /* [ ... replacer match [captures] match_char_offset input ] */ duk_call(ctx, duk_get_top(ctx) - idx_args); h_repl = duk_to_hstring(ctx, -1); /* -> [ ... repl_value ] */ DUK_ASSERT(h_repl != NULL); DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_repl); duk_pop(ctx); /* repl_value */ } else { r = r_start; while (r < r_end) { duk_int_t ch1; duk_int_t ch2; #ifdef DUK_USE_REGEXP_SUPPORT duk_int_t ch3; #endif duk_size_t left; ch1 = *r++; if (ch1 != DUK_ASC_DOLLAR) { goto repl_write; } left = r_end - r; if (left <= 0) { goto repl_write; } ch2 = r[0]; switch ((int) ch2) { case DUK_ASC_DOLLAR: { ch1 = (1 << 8) + DUK_ASC_DOLLAR; goto repl_write; } case DUK_ASC_AMP: { DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_match); r++; continue; } case DUK_ASC_GRAVE: { tmp_sz = (duk_size_t) match_start_boff; DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input), tmp_sz); r++; continue; } case DUK_ASC_SINGLEQUOTE: { duk_uint32_t match_end_boff; /* Use match charlen instead of bytelen, just in case the input and * match codepoint encodings would have different lengths. */ match_end_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff + DUK_HSTRING_GET_CHARLEN(h_match)); tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - match_end_boff); DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + match_end_boff, tmp_sz); r++; continue; } default: { #ifdef DUK_USE_REGEXP_SUPPORT duk_int_t capnum, captmp, capadv; /* XXX: optional check, match_caps is zero if no regexp, * so dollar will be interpreted literally anyway. */ if (!is_regexp) { goto repl_write; } if (!(ch2 >= DUK_ASC_0 && ch2 <= DUK_ASC_9)) { goto repl_write; } capnum = ch2 - DUK_ASC_0; capadv = 1; if (left >= 2) { ch3 = r[1]; if (ch3 >= DUK_ASC_0 && ch3 <= DUK_ASC_9) { captmp = capnum * 10 + (ch3 - DUK_ASC_0); if (captmp < match_caps) { capnum = captmp; capadv = 2; } } } if (capnum > 0 && capnum < match_caps) { DUK_ASSERT(is_regexp != 0); /* match_caps == 0 without regexps */ /* regexp res_obj is at offset 4 */ duk_get_prop_index(ctx, 4, (duk_uarridx_t) capnum); if (duk_is_string(ctx, -1)) { duk_hstring *h_tmp_str; h_tmp_str = duk_get_hstring(ctx, -1); DUK_ASSERT(h_tmp_str != NULL); DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_tmp_str); } else { /* undefined -> skip (replaced with empty) */ } duk_pop(ctx); r += capadv; continue; } else { goto repl_write; } #else /* DUK_USE_REGEXP_SUPPORT */ goto repl_write; /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ } /* default case */ } /* switch (ch2) */ repl_write: /* ch1 = (r_increment << 8) + byte */ DUK_BW_WRITE_ENSURE_U8(thr, bw, (duk_uint8_t) (ch1 & 0xff)); r += ch1 >> 8; } /* while repl */ } /* if (is_repl_func) */ duk_pop(ctx); /* pop regexp res_obj or match string */ #ifdef DUK_USE_REGEXP_SUPPORT if (!is_global) { #else { /* unconditionally; is_global==0 */ #endif break; } } /* trailer */ tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff); DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz); DUK_ASSERT_TOP(ctx, 4); DUK_BW_COMPACT(thr, bw); duk_to_string(ctx, -1); return 1; } /* * split() */ /* XXX: very messy now, but works; clean up, remove unused variables (nomimally * used so compiler doesn't complain). */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_split(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk_hstring *h_input; duk_hstring *h_sep; duk_uint32_t limit; duk_uint32_t arr_idx; #ifdef DUK_USE_REGEXP_SUPPORT duk_bool_t is_regexp; #endif duk_bool_t matched; /* set to 1 if any match exists (needed for empty input special case) */ duk_uint32_t prev_match_end_coff, prev_match_end_boff; duk_uint32_t match_start_boff, match_start_coff; duk_uint32_t match_end_boff, match_end_coff; DUK_UNREF(thr); h_input = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h_input != NULL); duk_push_array(ctx); if (duk_is_undefined(ctx, 1)) { limit = 0xffffffffUL; } else { limit = duk_to_uint32(ctx, 1); } if (limit == 0) { return 1; } /* If the separator is a RegExp, make a "clone" of it. The specification * algorithm calls [[Match]] directly for specific indices; we emulate this * by tweaking lastIndex and using a "force global" variant of duk_regexp_match() * which will use global-style matching even when the RegExp itself is non-global. */ if (duk_is_undefined(ctx, 0)) { /* The spec algorithm first does "R = ToString(separator)" before checking * whether separator is undefined. Since this is side effect free, we can * skip the ToString() here. */ duk_dup(ctx, 2); duk_put_prop_index(ctx, 3, 0); return 1; } else if (duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP) != NULL) { #ifdef DUK_USE_REGEXP_SUPPORT duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR); duk_dup(ctx, 0); duk_new(ctx, 1); /* [ ... RegExp val ] -> [ ... res ] */ duk_replace(ctx, 0); /* lastIndex is initialized to zero by new RegExp() */ is_regexp = 1; #else return DUK_RET_UNSUPPORTED_ERROR; #endif } else { duk_to_string(ctx, 0); #ifdef DUK_USE_REGEXP_SUPPORT is_regexp = 0; #endif } /* stack[0] = separator (string or regexp) * stack[1] = limit * stack[2] = input string * stack[3] = result array */ prev_match_end_boff = 0; prev_match_end_coff = 0; arr_idx = 0; matched = 0; for (;;) { /* * The specification uses RegExp [[Match]] to attempt match at specific * offsets. We don't have such a primitive, so we use an actual RegExp * and tweak lastIndex. Since the RegExp may be non-global, we use a * special variant which forces global-like behavior for matching. */ DUK_ASSERT_TOP(ctx, 4); #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_dup(ctx, 0); duk_dup(ctx, 2); duk_regexp_match_force_global(thr); /* [ ... regexp input ] -> [ res_obj ] */ if (!duk_is_object(ctx, -1)) { duk_pop(ctx); break; } matched = 1; duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX); DUK_ASSERT(duk_is_number(ctx, -1)); match_start_coff = duk_get_int(ctx, -1); match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff); duk_pop(ctx); if (match_start_coff == DUK_HSTRING_GET_CHARLEN(h_input)) { /* don't allow an empty match at the end of the string */ duk_pop(ctx); break; } duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); DUK_ASSERT(duk_is_number(ctx, -1)); match_end_coff = duk_get_int(ctx, -1); match_end_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_end_coff); duk_pop(ctx); /* empty match -> bump and continue */ if (prev_match_end_boff == match_end_boff) { duk_push_int(ctx, match_end_coff + 1); duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); duk_pop(ctx); continue; } } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ const duk_uint8_t *p_start, *p_end, *p; /* input string scan */ const duk_uint8_t *q_start; /* match string */ duk_size_t q_blen, q_clen; p_start = DUK_HSTRING_GET_DATA(h_input); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input); p = p_start + prev_match_end_boff; h_sep = duk_get_hstring(ctx, 0); DUK_ASSERT(h_sep != NULL); q_start = DUK_HSTRING_GET_DATA(h_sep); q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_sep); q_clen = (duk_size_t) DUK_HSTRING_GET_CHARLEN(h_sep); p_end -= q_blen; /* ensure full memcmp() fits in while */ match_start_coff = prev_match_end_coff; if (q_blen == 0) { /* Handle empty separator case: it will always match, and always * triggers the check in step 13.c.iii initially. Note that we * must skip to either end of string or start of first codepoint, * skipping over any continuation bytes! * * Don't allow an empty string to match at the end of the input. */ matched = 1; /* empty separator can always match */ match_start_coff++; p++; while (p < p_end) { if ((p[0] & 0xc0) != 0x80) { goto found; } p++; } goto not_found; } DUK_ASSERT(q_blen > 0 && q_clen > 0); while (p <= p_end) { DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input)); DUK_ASSERT(q_blen > 0); /* no issues with empty memcmp() */ if (DUK_MEMCMP((void *) p, (void *) q_start, (duk_size_t) q_blen) == 0) { /* never an empty match, so step 13.c.iii can't be triggered */ goto found; } /* track utf-8 non-continuation bytes */ if ((p[0] & 0xc0) != 0x80) { match_start_coff++; } p++; } not_found: /* not found */ break; found: matched = 1; match_start_boff = (duk_uint32_t) (p - p_start); match_end_coff = (duk_uint32_t) (match_start_coff + q_clen); /* constrained by string length */ match_end_boff = (duk_uint32_t) (match_start_boff + q_blen); /* ditto */ /* empty match (may happen with empty separator) -> bump and continue */ if (prev_match_end_boff == match_end_boff) { prev_match_end_boff++; prev_match_end_coff++; continue; } } /* if (is_regexp) */ /* stack[0] = separator (string or regexp) * stack[1] = limit * stack[2] = input string * stack[3] = result array * stack[4] = regexp res_obj (if is_regexp) */ DUK_DDD(DUK_DDDPRINT("split; match_start b=%ld,c=%ld, match_end b=%ld,c=%ld, prev_end b=%ld,c=%ld", (long) match_start_boff, (long) match_start_coff, (long) match_end_boff, (long) match_end_coff, (long) prev_match_end_boff, (long) prev_match_end_coff)); duk_push_lstring(ctx, (const char *) (DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff), (duk_size_t) (match_start_boff - prev_match_end_boff)); duk_put_prop_index(ctx, 3, arr_idx); arr_idx++; if (arr_idx >= limit) { goto hit_limit; } #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_size_t i, len; len = duk_get_length(ctx, 4); for (i = 1; i < len; i++) { DUK_ASSERT(i <= DUK_UARRIDX_MAX); /* cannot have >4G captures */ duk_get_prop_index(ctx, 4, (duk_uarridx_t) i); duk_put_prop_index(ctx, 3, arr_idx); arr_idx++; if (arr_idx >= limit) { goto hit_limit; } } duk_pop(ctx); /* lastIndex already set up for next match */ } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ /* no action */ } prev_match_end_boff = match_end_boff; prev_match_end_coff = match_end_coff; continue; } /* for */ /* Combined step 11 (empty string special case) and 14-15. */ DUK_DDD(DUK_DDDPRINT("split trailer; prev_end b=%ld,c=%ld", (long) prev_match_end_boff, (long) prev_match_end_coff)); if (DUK_HSTRING_GET_CHARLEN(h_input) > 0 || !matched) { /* Add trailer if: * a) non-empty input * b) empty input and no (zero size) match found (step 11) */ duk_push_lstring(ctx, (const char *) DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff)); duk_put_prop_index(ctx, 3, arr_idx); /* No arr_idx update or limit check */ } return 1; hit_limit: #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_pop(ctx); } #endif return 1; } /* * Various */ #ifdef DUK_USE_REGEXP_SUPPORT DUK_LOCAL void duk__to_regexp_helper(duk_context *ctx, duk_idx_t index, duk_bool_t force_new) { duk_hobject *h; /* Shared helper for match() steps 3-4, search() steps 3-4. */ DUK_ASSERT(index >= 0); if (force_new) { goto do_new; } h = duk_get_hobject_with_class(ctx, index, DUK_HOBJECT_CLASS_REGEXP); if (!h) { goto do_new; } return; do_new: duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR); duk_dup(ctx, index); duk_new(ctx, 1); /* [ ... RegExp val ] -> [ ... res ] */ duk_replace(ctx, index); } #endif /* DUK_USE_REGEXP_SUPPORT */ #ifdef DUK_USE_REGEXP_SUPPORT DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; /* Easiest way to implement the search required by the specification * is to do a RegExp test() with lastIndex forced to zero. To avoid * side effects on the argument, "clone" the RegExp if a RegExp was * given as input. * * The global flag of the RegExp should be ignored; setting lastIndex * to zero (which happens when "cloning" the RegExp) should have an * equivalent effect. */ DUK_ASSERT_TOP(ctx, 1); (void) duk_push_this_coercible_to_string(ctx); /* at index 1 */ duk__to_regexp_helper(ctx, 0 /*index*/, 1 /*force_new*/); /* stack[0] = regexp * stack[1] = string */ /* Avoid using RegExp.prototype methods, as they're writable and * configurable and may have been changed. */ duk_dup(ctx, 0); duk_dup(ctx, 1); /* [ ... re_obj input ] */ duk_regexp_match(thr); /* -> [ ... res_obj ] */ if (!duk_is_object(ctx, -1)) { duk_push_int(ctx, -1); return 1; } duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX); DUK_ASSERT(duk_is_number(ctx, -1)); return 1; } #else /* DUK_USE_REGEXP_SUPPORT */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) { DUK_UNREF(ctx); return DUK_RET_UNSUPPORTED_ERROR; }
DUK_LOCAL duk_ret_t duk__error_getter_helper(duk_context *ctx, duk_small_int_t output_type) { duk_hthread *thr = (duk_hthread *) ctx; duk_idx_t idx_td; duk_small_int_t i; /* traceback depth fits into 16 bits */ duk_small_int_t t; /* stack type fits into 16 bits */ duk_small_int_t count_func = 0; /* traceback depth ensures fits into 16 bits */ const char *str_tailcall = " tailcall"; const char *str_strict = " strict"; const char *str_construct = " construct"; const char *str_prevyield = " preventsyield"; const char *str_directeval = " directeval"; const char *str_empty = ""; DUK_ASSERT_TOP(ctx, 0); /* fixed arg count */ DUK_UNREF(thr); duk_push_this(ctx); duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INT_TRACEDATA); idx_td = duk_get_top_index(ctx); duk_push_hstring_stridx(ctx, DUK_STRIDX_NEWLINE_4SPACE); duk_push_this(ctx); /* [ ... this tracedata sep this ] */ /* XXX: skip null filename? */ if (duk_check_type(ctx, idx_td, DUK_TYPE_OBJECT)) { /* Current tracedata contains 2 entries per callstack entry. */ for (i = 0; ; i += 2) { duk_int_t pc; duk_int_t line; duk_int_t flags; duk_double_t d; const char *funcname; const char *filename; duk_hobject *h_func; duk_hstring *h_name; duk_require_stack(ctx, 5); duk_get_prop_index(ctx, idx_td, i); duk_get_prop_index(ctx, idx_td, i + 1); d = duk_to_number(ctx, -1); pc = (duk_int_t) DUK_FMOD(d, DUK_DOUBLE_2TO32); flags = (duk_int_t) DUK_FLOOR(d / DUK_DOUBLE_2TO32); t = (duk_small_int_t) duk_get_type(ctx, -2); if (t == DUK_TYPE_OBJECT || t == DUK_TYPE_LIGHTFUNC) { /* * Ecmascript/native function call or lightfunc call */ count_func++; /* [ ... v1(func) v2(pc+flags) ] */ h_func = duk_get_hobject(ctx, -2); /* NULL for lightfunc */ duk_get_prop_stridx(ctx, -2, DUK_STRIDX_NAME); duk_get_prop_stridx(ctx, -3, DUK_STRIDX_FILE_NAME); #if defined(DUK_USE_PC2LINE) line = duk_hobject_pc2line_query(ctx, -4, (duk_uint_fast32_t) pc); #else line = 0; #endif /* [ ... v1 v2 name filename ] */ /* When looking for .fileName/.lineNumber, blame first * function which has a .fileName. */ if (duk_is_string(ctx, -1)) { if (output_type == DUK__OUTPUT_TYPE_FILENAME) { return 1; } else if (output_type == DUK__OUTPUT_TYPE_LINENUMBER) { duk_push_int(ctx, line); return 1; } } /* XXX: Change 'anon' handling here too, to use empty string for anonymous functions? */ /* XXX: Could be improved by coercing to a readable duk_tval (especially string escaping) */ h_name = duk_get_hstring(ctx, -2); /* may be NULL */ funcname = (h_name == NULL || h_name == DUK_HTHREAD_STRING_EMPTY_STRING(thr)) ? "[anon]" : (const char *) DUK_HSTRING_GET_DATA(h_name); filename = duk_get_string(ctx, -1); filename = filename ? filename : ""; DUK_ASSERT(funcname != NULL); DUK_ASSERT(filename != NULL); if (h_func == NULL) { duk_push_sprintf(ctx, "at %s light%s%s%s%s%s", (const char *) funcname, (const char *) ((flags & DUK_ACT_FLAG_STRICT) ? str_strict : str_empty), (const char *) ((flags & DUK_ACT_FLAG_TAILCALLED) ? str_tailcall : str_empty), (const char *) ((flags & DUK_ACT_FLAG_CONSTRUCT) ? str_construct : str_empty), (const char *) ((flags & DUK_ACT_FLAG_DIRECT_EVAL) ? str_directeval : str_empty), (const char *) ((flags & DUK_ACT_FLAG_PREVENT_YIELD) ? str_prevyield : str_empty)); } else if (DUK_HOBJECT_HAS_NATFUNC(h_func)) { duk_push_sprintf(ctx, "at %s (%s) native%s%s%s%s%s", (const char *) funcname, (const char *) filename, (const char *) ((flags & DUK_ACT_FLAG_STRICT) ? str_strict : str_empty), (const char *) ((flags & DUK_ACT_FLAG_TAILCALLED) ? str_tailcall : str_empty), (const char *) ((flags & DUK_ACT_FLAG_CONSTRUCT) ? str_construct : str_empty), (const char *) ((flags & DUK_ACT_FLAG_DIRECT_EVAL) ? str_directeval : str_empty), (const char *) ((flags & DUK_ACT_FLAG_PREVENT_YIELD) ? str_prevyield : str_empty)); } else { duk_push_sprintf(ctx, "at %s (%s:%ld)%s%s%s%s%s", (const char *) funcname, (const char *) filename, (long) line, (const char *) ((flags & DUK_ACT_FLAG_STRICT) ? str_strict : str_empty), (const char *) ((flags & DUK_ACT_FLAG_TAILCALLED) ? str_tailcall : str_empty), (const char *) ((flags & DUK_ACT_FLAG_CONSTRUCT) ? str_construct : str_empty), (const char *) ((flags & DUK_ACT_FLAG_DIRECT_EVAL) ? str_directeval : str_empty), (const char *) ((flags & DUK_ACT_FLAG_PREVENT_YIELD) ? str_prevyield : str_empty)); } duk_replace(ctx, -5); /* [ ... v1 v2 name filename str ] -> [ ... str v2 name filename ] */ duk_pop_n(ctx, 3); /* -> [ ... str ] */ } else if (t == DUK_TYPE_STRING) { /* * __FILE__ / __LINE__ entry, here 'pc' is line number directly. * Sometimes __FILE__ / __LINE__ is reported as the source for * the error (fileName, lineNumber), sometimes not. */ /* [ ... v1(filename) v2(line+flags) ] */ /* When looking for .fileName/.lineNumber, blame compilation * or C call site unless flagged not to do so. */ if (!(flags & DUK_TB_FLAG_NOBLAME_FILELINE)) { if (output_type == DUK__OUTPUT_TYPE_FILENAME) { duk_pop(ctx); return 1; } else if (output_type == DUK__OUTPUT_TYPE_LINENUMBER) { duk_push_int(ctx, pc); return 1; } } duk_push_sprintf(ctx, "at [anon] (%s:%ld) internal", (const char *) duk_get_string(ctx, -2), (long) pc); duk_replace(ctx, -3); /* [ ... v1 v2 str ] -> [ ... str v2 ] */ duk_pop(ctx); /* -> [ ... str ] */ } else { /* unknown, ignore */ duk_pop_2(ctx); break; } } if (count_func >= DUK_USE_TRACEBACK_DEPTH) { /* Possibly truncated; there is no explicit truncation * marker so this is the best we can do. */ duk_push_hstring_stridx(ctx, DUK_STRIDX_BRACKETED_ELLIPSIS); } } /* [ ... this tracedata sep this str1 ... strN ] */ if (output_type != DUK__OUTPUT_TYPE_TRACEBACK) { return 0; } else { /* The 'this' after 'sep' will get ToString() coerced by * duk_join() automatically. We don't want to do that * coercion when providing .fileName or .lineNumber (GH-254). */ duk_join(ctx, duk_get_top(ctx) - (idx_td + 2) /*count, not including sep*/); return 1; } }
DUK_INTERNAL duk_uint_fast32_t duk_heap_strcache_offset_char2byte(duk_hthread *thr, duk_hstring *h, duk_uint_fast32_t char_offset) { duk_heap *heap; duk_strcache *sce; duk_uint_fast32_t byte_offset; duk_small_int_t i; duk_bool_t use_cache; duk_uint_fast32_t dist_start, dist_end, dist_sce; duk_uint8_t *p_start; duk_uint8_t *p_end; duk_uint8_t *p_found; if (char_offset > DUK_HSTRING_GET_CHARLEN(h)) { goto error; } /* * For ASCII strings, the answer is simple. */ if (DUK_HSTRING_IS_ASCII(h)) { /* clen == blen -> pure ascii */ return char_offset; } /* * For non-ASCII strings, we need to scan forwards or backwards * from some starting point. The starting point may be the start * or end of the string, or some cached midpoint in the string * cache. * * For "short" strings we simply scan without checking or updating * the cache. For longer strings we check and update the cache as * necessary, inserting a new cache entry if none exists. */ DUK_DDD(DUK_DDDPRINT("non-ascii string %p, char_offset=%ld, clen=%ld, blen=%ld", (void *) h, (long) char_offset, (long) DUK_HSTRING_GET_CHARLEN(h), (long) DUK_HSTRING_GET_BYTELEN(h))); heap = thr->heap; sce = NULL; use_cache = (DUK_HSTRING_GET_CHARLEN(h) > DUK_HEAP_STRINGCACHE_NOCACHE_LIMIT); if (use_cache) { #ifdef DUK_USE_DDDPRINT DUK_DDD(DUK_DDDPRINT("stringcache before char2byte (using cache):")); for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { duk_strcache *c = heap->strcache + i; DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld", (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx)); } #endif for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { duk_strcache *c = heap->strcache + i; if (c->h == h) { sce = c; break; } } } /* * Scan from shortest distance: * - start of string * - end of string * - cache entry (if exists) */ DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h) >= char_offset); dist_start = char_offset; dist_end = DUK_HSTRING_GET_CHARLEN(h) - char_offset; dist_sce = 0; DUK_UNREF(dist_sce); /* initialize for debug prints, needed if sce==NULL */ p_start = (duk_uint8_t *) DUK_HSTRING_GET_DATA(h); p_end = (duk_uint8_t *) (p_start + DUK_HSTRING_GET_BYTELEN(h)); p_found = NULL; if (sce) { if (char_offset >= sce->cidx) { dist_sce = char_offset - sce->cidx; if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) { DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " "scan forwards from sce", (long) use_cache, (void *) (sce ? sce->h : NULL), (sce ? (long) sce->cidx : (long) -1), (sce ? (long) sce->bidx : (long) -1), (long) dist_start, (long) dist_end, (long) dist_sce)); p_found = duk__scan_forwards(p_start + sce->bidx, p_end, dist_sce); goto scan_done; } } else { dist_sce = sce->cidx - char_offset; if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) { DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " "scan backwards from sce", (long) use_cache, (void *) (sce ? sce->h : NULL), (sce ? (long) sce->cidx : (long) -1), (sce ? (long) sce->bidx : (long) -1), (long) dist_start, (long) dist_end, (long) dist_sce)); p_found = duk__scan_backwards(p_start + sce->bidx, p_start, dist_sce); goto scan_done; } } } /* no sce, or sce scan not best */ if (dist_start <= dist_end) { DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " "scan forwards from string start", (long) use_cache, (void *) (sce ? sce->h : NULL), (sce ? (long) sce->cidx : (long) -1), (sce ? (long) sce->bidx : (long) -1), (long) dist_start, (long) dist_end, (long) dist_sce)); p_found = duk__scan_forwards(p_start, p_end, dist_start); } else { DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " "scan backwards from string end", (long) use_cache, (void *) (sce ? sce->h : NULL), (sce ? (long) sce->cidx : (long) -1), (sce ? (long) sce->bidx : (long) -1), (long) dist_start, (long) dist_end, (long) dist_sce)); p_found = duk__scan_backwards(p_end, p_start, dist_end); } scan_done: if (!p_found) { /* Scan error: this shouldn't normally happen; it could happen if * string is not valid UTF-8 data, and clen/blen are not consistent * with the scanning algorithm. */ goto error; } DUK_ASSERT(p_found >= p_start); DUK_ASSERT(p_found <= p_end); /* may be equal */ byte_offset = (duk_uint32_t) (p_found - p_start); DUK_DDD(DUK_DDDPRINT("-> string %p, cidx %ld -> bidx %ld", (void *) h, (long) char_offset, (long) byte_offset)); /* * Update cache entry (allocating if necessary), and move the * cache entry to the first place (in an "LRU" policy). */ if (use_cache) { /* update entry, allocating if necessary */ if (!sce) { sce = heap->strcache + DUK_HEAP_STRCACHE_SIZE - 1; /* take last entry */ sce->h = h; } DUK_ASSERT(sce != NULL); sce->bidx = (duk_uint32_t) (p_found - p_start); sce->cidx = (duk_uint32_t) char_offset; /* LRU: move our entry to first */ if (sce > &heap->strcache[0]) { /* * A C * B A * C <- sce ==> B * D D */ duk_strcache tmp; tmp = *sce; DUK_MEMMOVE((void *) (&heap->strcache[1]), (void *) (&heap->strcache[0]), (size_t) (((char *) sce) - ((char *) &heap->strcache[0]))); heap->strcache[0] = tmp; /* 'sce' points to the wrong entry here, but is no longer used */ } #ifdef DUK_USE_DDDPRINT DUK_DDD(DUK_DDDPRINT("stringcache after char2byte (using cache):")); for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { duk_strcache *c = heap->strcache + i; DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld", (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx)); } #endif } return byte_offset; error: DUK_ERROR(thr, DUK_ERR_INTERNAL_ERROR, "string scan error"); return 0; }
DUK_INTERNAL duk_ret_t duk_bi_function_constructor(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk_hstring *h_sourcecode; duk_idx_t nargs; duk_idx_t i; duk_small_uint_t comp_flags; duk_hcompiledfunction *func; duk_hobject *outer_lex_env; duk_hobject *outer_var_env; /* normal and constructor calls have identical semantics */ nargs = duk_get_top(ctx); for (i = 0; i < nargs; i++) { duk_to_string(ctx, i); } if (nargs == 0) { duk_push_string(ctx, ""); duk_push_string(ctx, ""); } else if (nargs == 1) { /* XXX: cover this with the generic >1 case? */ duk_push_string(ctx, ""); } else { duk_insert(ctx, 0); /* [ arg1 ... argN-1 body] -> [body arg1 ... argN-1] */ duk_push_string(ctx, ","); duk_insert(ctx, 1); duk_join(ctx, nargs - 1); } /* [ body formals ], formals is comma separated list that needs to be parsed */ DUK_ASSERT_TOP(ctx, 2); /* XXX: this placeholder is not always correct, but use for now. * It will fail in corner cases; see test-dev-func-cons-args.js. */ duk_push_string(ctx, "function("); duk_dup(ctx, 1); duk_push_string(ctx, "){"); duk_dup(ctx, 0); duk_push_string(ctx, "}"); duk_concat(ctx, 5); /* [ body formals source ] */ DUK_ASSERT_TOP(ctx, 3); /* strictness is not inherited, intentional */ comp_flags = DUK_JS_COMPILE_FLAG_FUNCEXPR; duk_push_hstring_stridx(ctx, DUK_STRIDX_COMPILE); /* XXX: copy from caller? */ /* XXX: ignored now */ h_sourcecode = duk_require_hstring(ctx, -2); duk_js_compile(thr, (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h_sourcecode), (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_sourcecode), comp_flags); func = (duk_hcompiledfunction *) duk_get_hobject(ctx, -1); DUK_ASSERT(func != NULL); DUK_ASSERT(DUK_HOBJECT_IS_COMPILEDFUNCTION((duk_hobject *) func)); /* [ body formals source template ] */ /* only outer_lex_env matters, as functions always get a new * variable declaration environment. */ outer_lex_env = thr->builtins[DUK_BIDX_GLOBAL_ENV]; outer_var_env = thr->builtins[DUK_BIDX_GLOBAL_ENV]; duk_js_push_closure(thr, func, outer_var_env, outer_lex_env, 1 /*add_auto_proto*/); /* [ body formals source template closure ] */ return 1; }
/* Shared helper for providing .source, .global, .multiline, etc getters. */ DUK_INTERNAL duk_ret_t duk_bi_regexp_prototype_shared_getter(duk_hthread *thr) { duk_hstring *h_bc; duk_small_uint_t re_flags; duk_hobject *h; duk_int_t magic; DUK_ASSERT_TOP(thr, 0); duk_push_this(thr); h = duk_require_hobject(thr, -1); magic = duk_get_current_magic(thr); if (DUK_HOBJECT_GET_CLASS_NUMBER(h) == DUK_HOBJECT_CLASS_REGEXP) { duk_get_prop_stridx_short(thr, 0, DUK_STRIDX_INT_SOURCE); duk_get_prop_stridx_short(thr, 0, DUK_STRIDX_INT_BYTECODE); h_bc = duk_require_hstring(thr, -1); re_flags = (duk_small_uint_t) DUK_HSTRING_GET_DATA(h_bc)[0]; /* Safe even if h_bc length is 0 (= NUL) */ duk_pop(thr); } else if (h == thr->builtins[DUK_BIDX_REGEXP_PROTOTYPE]) { /* In ES2015 and ES2016 a TypeError would be thrown here. * However, this had real world issues so ES2017 draft * allows RegExp.prototype specifically, returning '(?:)' * for .source and undefined for all flags. */ if (magic != 16 /* .source */) { return 0; } duk_push_literal(thr, "(?:)"); /* .source handled by switch-case */ re_flags = 0; } else { DUK_DCERROR_TYPE_INVALID_ARGS(thr); } /* [ regexp source ] */ switch (magic) { case 0: { /* global */ duk_push_boolean(thr, (re_flags & DUK_RE_FLAG_GLOBAL)); break; } case 1: { /* ignoreCase */ duk_push_boolean(thr, (re_flags & DUK_RE_FLAG_IGNORE_CASE)); break; } case 2: { /* multiline */ duk_push_boolean(thr, (re_flags & DUK_RE_FLAG_MULTILINE)); break; } #if 0 /* Don't provide until implemented to avoid interfering with feature * detection in user code. */ case 3: /* sticky */ case 4: { /* unicode */ duk_push_false(thr); break; } #endif default: { /* source */ /* leave 'source' on top */ break; } } return 1; }