DUK_INTERNAL duk_ret_t duk_bi_string_prototype_slice(duk_context *ctx) { duk_hstring *h; duk_int_t start_pos, end_pos; duk_int_t len; h = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h != NULL); len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h); /* [ start end str ] */ start_pos = duk_to_int_clamped(ctx, 0, -len, len); if (start_pos < 0) { start_pos = len + start_pos; } if (duk_is_undefined(ctx, 1)) { end_pos = len; } else { end_pos = duk_to_int_clamped(ctx, 1, -len, len); if (end_pos < 0) { end_pos = len + end_pos; } } DUK_ASSERT(start_pos >= 0 && start_pos <= len); DUK_ASSERT(end_pos >= 0 && end_pos <= len); if (end_pos < start_pos) { end_pos = start_pos; } DUK_ASSERT(end_pos >= start_pos); duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos); return 1; }
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_code_at(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk_int_t pos; duk_hstring *h; duk_bool_t clamped; /* XXX: faster implementation */ DUK_DDD(DUK_DDDPRINT("arg=%!T", (duk_tval *) duk_get_tval(ctx, 0))); h = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h != NULL); pos = duk_to_int_clamped_raw(ctx, 0 /*index*/, 0 /*min(incl)*/, DUK_HSTRING_GET_CHARLEN(h) - 1 /*max(incl)*/, &clamped /*out_clamped*/); if (clamped) { duk_push_number(ctx, DUK_DOUBLE_NAN); return 1; } duk_push_u32(ctx, (duk_uint32_t) duk_hstring_char_code_at_raw(thr, h, pos)); return 1; }
DUK_INTERNAL duk_ucodepoint_t duk_hstring_char_code_at_raw(duk_hthread *thr, duk_hstring *h, duk_uint_t pos) { duk_uint32_t boff; const duk_uint8_t *p, *p_start, *p_end; duk_ucodepoint_t cp; /* Caller must check character offset to be inside the string. */ DUK_ASSERT(thr != NULL); DUK_ASSERT(h != NULL); DUK_ASSERT_DISABLE(pos >= 0); /* unsigned */ DUK_ASSERT(pos < (duk_uint_t) DUK_HSTRING_GET_CHARLEN(h)); boff = duk_heap_strcache_offset_char2byte(thr, h, (duk_uint32_t) pos); DUK_DDD(DUK_DDDPRINT("charCodeAt: pos=%ld -> boff=%ld, str=%!O", (long) pos, (long) boff, (duk_heaphdr *) h)); DUK_ASSERT_DISABLE(boff >= 0); DUK_ASSERT(boff < DUK_HSTRING_GET_BYTELEN(h)); p_start = DUK_HSTRING_GET_DATA(h); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h); p = p_start + boff; DUK_DDD(DUK_DDDPRINT("p_start=%p, p_end=%p, p=%p", (void *) p_start, (void *) p_end, (void *) p)); /* This may throw an error though not for valid E5 strings. */ cp = duk_unicode_decode_xutf8_checked(thr, &p, p_start, p_end); return cp; }
static duk_hstring *duk__alloc_init_hstring(duk_heap *heap, duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash) { duk_hstring *res = NULL; duk_uint8_t *data; duk_size_t alloc_size; duk_uarridx_t dummy; /* NUL terminate for convenient C access */ alloc_size = (duk_size_t) (sizeof(duk_hstring) + blen + 1); res = (duk_hstring *) DUK_ALLOC(heap, alloc_size); if (!res) { goto error; } DUK_MEMZERO(res, sizeof(duk_hstring)); #ifdef DUK_USE_EXPLICIT_NULL_INIT DUK_HEAPHDR_STRING_INIT_NULLS(&res->hdr); #endif DUK_HEAPHDR_SET_TYPE_AND_FLAGS(&res->hdr, DUK_HTYPE_STRING, 0); if (duk_js_to_arrayindex_raw_string(str, blen, &dummy)) { DUK_HSTRING_SET_ARRIDX(res); } /* All strings beginning with 0xff are treated as "internal", * even strings interned by the user. This allows user code to * create internal properties too, and makes behavior consistent * in case user code happens to use a string also used by Duktape * (such as string has already been interned and has the 'internal' * flag set). */ if (blen > 0 && str[0] == (duk_uint8_t) 0xff) { DUK_HSTRING_SET_INTERNAL(res); } res->hash = strhash; res->blen = blen; res->clen = (duk_uint32_t) duk_unicode_unvalidated_utf8_length(str, (duk_size_t) blen); /* clen <= blen */ data = (duk_uint8_t *) (res + 1); DUK_MEMCPY(data, str, blen); data[blen] = (duk_uint8_t) 0; DUK_DDD(DUK_DDDPRINT("interned string, hash=0x%08lx, blen=%ld, clen=%ld, has_arridx=%ld", (unsigned long) DUK_HSTRING_GET_HASH(res), (long) DUK_HSTRING_GET_BYTELEN(res), (long) DUK_HSTRING_GET_CHARLEN(res), (long) DUK_HSTRING_HAS_ARRIDX(res) ? 1 : 0)); return res; error: DUK_FREE(heap, res); return NULL; }
DUK_INTERNAL void duk_regexp_create_instance(duk_hthread *thr) { duk_context *ctx = (duk_context *) thr; duk_hobject *h; duk_hstring *h_bc; duk_small_int_t re_flags; /* [ ... escape_source bytecode ] */ h_bc = duk_get_hstring(ctx, -1); DUK_ASSERT(h_bc != NULL); DUK_ASSERT(DUK_HSTRING_GET_BYTELEN(h_bc) >= 1); /* always at least the header */ DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h_bc) >= 1); DUK_ASSERT((duk_small_int_t) DUK_HSTRING_GET_DATA(h_bc)[0] < 0x80); /* flags always encodes to 1 byte */ re_flags = (duk_small_int_t) DUK_HSTRING_GET_DATA(h_bc)[0]; /* [ ... escaped_source bytecode ] */ duk_push_object(ctx); h = duk_get_hobject(ctx, -1); DUK_ASSERT(h != NULL); duk_insert(ctx, -3); /* [ ... regexp_object escaped_source bytecode ] */ DUK_HOBJECT_SET_CLASS_NUMBER(h, DUK_HOBJECT_CLASS_REGEXP); DUK_HOBJECT_SET_PROTOTYPE_UPDREF(thr, h, thr->builtins[DUK_BIDX_REGEXP_PROTOTYPE]); duk_xdef_prop_stridx(ctx, -3, DUK_STRIDX_INT_BYTECODE, DUK_PROPDESC_FLAGS_NONE); /* [ ... regexp_object escaped_source ] */ duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_SOURCE, DUK_PROPDESC_FLAGS_NONE); /* [ ... regexp_object ] */ duk_push_boolean(ctx, (re_flags & DUK_RE_FLAG_GLOBAL)); duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_GLOBAL, DUK_PROPDESC_FLAGS_NONE); duk_push_boolean(ctx, (re_flags & DUK_RE_FLAG_IGNORE_CASE)); duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_IGNORE_CASE, DUK_PROPDESC_FLAGS_NONE); duk_push_boolean(ctx, (re_flags & DUK_RE_FLAG_MULTILINE)); duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_MULTILINE, DUK_PROPDESC_FLAGS_NONE); duk_push_int(ctx, 0); duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_LAST_INDEX, DUK_PROPDESC_FLAGS_W); /* [ ... regexp_object ] */ }
static duk_hstring *duk__alloc_init_hstring(duk_heap *heap, duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash) { duk_hstring *res = NULL; duk_uint8_t *data; duk_uint32_t alloc_size; duk_uint32_t dummy; /* NUL terminate for convenient C access */ alloc_size = sizeof(duk_hstring) + blen + 1; res = (duk_hstring *) DUK_ALLOC(heap, alloc_size); if (!res) { goto error; } DUK_MEMZERO(res, sizeof(duk_hstring)); #ifdef DUK_USE_EXPLICIT_NULL_INIT DUK_HEAPHDR_STRING_INIT_NULLS(&res->hdr); #endif DUK_HEAPHDR_SET_TYPE_AND_FLAGS(&res->hdr, DUK_HTYPE_STRING, 0); if (duk_js_to_arrayindex_raw_string(str, blen, &dummy)) { DUK_HSTRING_SET_ARRIDX(res); } res->hash = strhash; res->blen = blen; res->clen = (duk_uint32_t) duk_unicode_unvalidated_utf8_length(str, (duk_size_t) blen); /* clen <= blen */ data = (duk_uint8_t *) (res + 1); DUK_MEMCPY(data, str, blen); data[blen] = (duk_uint8_t) 0; DUK_DDD(DUK_DDDPRINT("interned string, hash=0x%08x, blen=%d, clen=%d, arridx=%d", DUK_HSTRING_GET_HASH(res), DUK_HSTRING_GET_BYTELEN(res), DUK_HSTRING_GET_CHARLEN(res), DUK_HSTRING_HAS_ARRIDX(res) ? 1 : 0)); return res; error: DUK_FREE(heap, res); return NULL; }
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_context *ctx) { duk_hstring *h; duk_int_t start_pos, end_pos; duk_int_t len; /* Unlike non-obsolete String calls, substr() algorithm in E5.1 * specification will happily coerce undefined and null to strings * ("undefined" and "null"). */ duk_push_this(ctx); h = duk_to_hstring(ctx, -1); DUK_ASSERT(h != NULL); len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h); /* [ start length str ] */ /* The implementation for computing of start_pos and end_pos differs * from the standard algorithm, but is intended to result in the exactly * same behavior. This is not always obvious. */ /* combines steps 2 and 5; -len ensures max() not needed for step 5 */ start_pos = duk_to_int_clamped(ctx, 0, -len, len); if (start_pos < 0) { start_pos = len + start_pos; } DUK_ASSERT(start_pos >= 0 && start_pos <= len); /* combines steps 3, 6; step 7 is not needed */ if (duk_is_undefined(ctx, 1)) { end_pos = len; } else { DUK_ASSERT(start_pos <= len); end_pos = start_pos + duk_to_int_clamped(ctx, 1, 0, len - start_pos); } DUK_ASSERT(start_pos >= 0 && start_pos <= len); DUK_ASSERT(end_pos >= 0 && end_pos <= len); DUK_ASSERT(end_pos >= start_pos); duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos); return 1; }
/* Magic: 0=charCodeAt, 1=codePointAt */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_code_at(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk_int_t pos; duk_hstring *h; duk_bool_t clamped; duk_uint32_t cp; duk_int_t magic; /* XXX: faster implementation */ DUK_DDD(DUK_DDDPRINT("arg=%!T", (duk_tval *) duk_get_tval(ctx, 0))); h = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h != NULL); pos = duk_to_int_clamped_raw(ctx, 0 /*index*/, 0 /*min(incl)*/, DUK_HSTRING_GET_CHARLEN(h) - 1 /*max(incl)*/, &clamped /*out_clamped*/); #if defined(DUK_USE_ES6) magic = duk_get_current_magic(ctx); #else DUK_ASSERT(duk_get_current_magic(ctx) == 0); magic = 0; #endif if (clamped) { /* For out-of-bounds indices .charCodeAt() returns NaN and * .codePointAt() returns undefined. */ if (magic != 0) { return 0; } duk_push_nan(ctx); } else { cp = (duk_uint32_t) duk_hstring_char_code_at_raw(thr, h, pos, (duk_bool_t) magic /*surrogate_aware*/); duk_push_u32(ctx, cp); } return 1; }
void duk_hobject_enumerator_create(duk_context *ctx, duk_small_uint_t enum_flags) { duk_hthread *thr = (duk_hthread *) ctx; duk_hobject *enum_target; duk_hobject *curr; duk_hobject *res; #if defined(DUK_USE_ES6_PROXY) duk_hobject *h_proxy_target; duk_hobject *h_proxy_handler; duk_hobject *h_trap_result; #endif duk_uint_fast32_t i, len; /* used for array, stack, and entry indices */ DUK_ASSERT(ctx != NULL); DUK_DDD(DUK_DDDPRINT("create enumerator, stack top: %ld", (long) duk_get_top(ctx))); enum_target = duk_require_hobject(ctx, -1); DUK_ASSERT(enum_target != NULL); duk_push_object_internal(ctx); res = duk_require_hobject(ctx, -1); DUK_DDD(DUK_DDDPRINT("created internal object")); /* [enum_target res] */ /* Target must be stored so that we can recheck whether or not * keys still exist when we enumerate. This is not done if the * enumeration result comes from a proxy trap as there is no * real object to check against. */ duk_push_hobject(ctx, enum_target); duk_put_prop_stridx(ctx, -2, DUK_STRIDX_INT_TARGET); /* Initialize index so that we skip internal control keys. */ duk_push_int(ctx, DUK__ENUM_START_INDEX); duk_put_prop_stridx(ctx, -2, DUK_STRIDX_INT_NEXT); /* * Proxy object handling */ #if defined(DUK_USE_ES6_PROXY) if (DUK_LIKELY((enum_flags & DUK_ENUM_NO_PROXY_BEHAVIOR) != 0)) { goto skip_proxy; } if (DUK_LIKELY(!duk_hobject_proxy_check(thr, enum_target, &h_proxy_target, &h_proxy_handler))) { goto skip_proxy; } DUK_DDD(DUK_DDDPRINT("proxy enumeration")); duk_push_hobject(ctx, h_proxy_handler); if (!duk_get_prop_stridx(ctx, -1, DUK_STRIDX_ENUMERATE)) { /* No need to replace the 'enum_target' value in stack, only the * enum_target reference. This also ensures that the original * enum target is reachable, which keeps the proxy and the proxy * target reachable. We do need to replace the internal _target. */ DUK_DDD(DUK_DDDPRINT("no enumerate trap, enumerate proxy target instead")); DUK_DDD(DUK_DDDPRINT("h_proxy_target=%!O", (duk_heaphdr *) h_proxy_target)); enum_target = h_proxy_target; duk_push_hobject(ctx, enum_target); /* -> [ ... enum_target res handler undefined target ] */ duk_put_prop_stridx(ctx, -4, DUK_STRIDX_INT_TARGET); duk_pop_2(ctx); /* -> [ ... enum_target res ] */ goto skip_proxy; } /* [ ... enum_target res handler trap ] */ duk_insert(ctx, -2); duk_push_hobject(ctx, h_proxy_target); /* -> [ ... enum_target res trap handler target ] */ duk_call_method(ctx, 1 /*nargs*/); /* -> [ ... enum_target res trap_result ] */ h_trap_result = duk_require_hobject(ctx, -1); DUK_UNREF(h_trap_result); /* Copy trap result keys into the enumerator object. */ len = (duk_uint_fast32_t) duk_get_length(ctx, -1); for (i = 0; i < len; i++) { /* XXX: not sure what the correct semantic details are here, * e.g. handling of missing values (gaps), handling of non-array * trap results, etc. * * For keys, we simply skip non-string keys which seems to be * consistent with how e.g. Object.keys() will process proxy trap * results (ES6 draft, Section 19.1.2.14). */ if (duk_get_prop_index(ctx, -1, i) && duk_is_string(ctx, -1)) { /* [ ... enum_target res trap_result val ] */ duk_push_true(ctx); /* [ ... enum_target res trap_result val true ] */ duk_put_prop(ctx, -4); } else { duk_pop(ctx); } } /* [ ... enum_target res trap_result ] */ duk_pop(ctx); duk_remove(ctx, -2); /* [ ... res ] */ /* The internal _target property is kept pointing to the original * enumeration target (the proxy object), so that the enumerator * 'next' operation can read property values if so requested. The * fact that the _target is a proxy disables key existence check * during enumeration. */ DUK_DDD(DUK_DDDPRINT("proxy enumeration, final res: %!O", (duk_heaphdr *) res)); goto compact_and_return; skip_proxy: #endif /* DUK_USE_ES6_PROXY */ curr = enum_target; while (curr) { /* * Virtual properties. * * String and buffer indices are virtual and always enumerable, * 'length' is virtual and non-enumerable. Array and arguments * object props have special behavior but are concrete. */ if (DUK_HOBJECT_HAS_EXOTIC_STRINGOBJ(curr) || DUK_HOBJECT_HAS_EXOTIC_BUFFEROBJ(curr)) { /* String and buffer enumeration behavior is identical now, * so use shared handler. */ if (DUK_HOBJECT_HAS_EXOTIC_STRINGOBJ(curr)) { duk_hstring *h_val; h_val = duk_hobject_get_internal_value_string(thr->heap, curr); DUK_ASSERT(h_val != NULL); /* string objects must not created without internal value */ len = (duk_uint_fast32_t) DUK_HSTRING_GET_CHARLEN(h_val); } else { duk_hbuffer *h_val; DUK_ASSERT(DUK_HOBJECT_HAS_EXOTIC_BUFFEROBJ(curr)); h_val = duk_hobject_get_internal_value_buffer(thr->heap, curr); DUK_ASSERT(h_val != NULL); /* buffer objects must not created without internal value */ len = (duk_uint_fast32_t) DUK_HBUFFER_GET_SIZE(h_val); } for (i = 0; i < len; i++) { duk_hstring *k; k = duk_heap_string_intern_u32_checked(thr, i); DUK_ASSERT(k); duk_push_hstring(ctx, k); duk_push_true(ctx); /* [enum_target res key true] */ duk_put_prop(ctx, -3); /* [enum_target res] */ } /* 'length' property is not enumerable, but is included if * non-enumerable properties are requested. */ if (enum_flags & DUK_ENUM_INCLUDE_NONENUMERABLE) { duk_push_hstring_stridx(ctx, DUK_STRIDX_LENGTH); duk_push_true(ctx); duk_put_prop(ctx, -3); } } else if (DUK_HOBJECT_HAS_EXOTIC_DUKFUNC(curr)) { if (enum_flags & DUK_ENUM_INCLUDE_NONENUMERABLE) { duk_push_hstring_stridx(ctx, DUK_STRIDX_LENGTH); duk_push_true(ctx); duk_put_prop(ctx, -3); } } /* * Array part * * Note: ordering between array and entry part must match 'abandon array' * behavior in duk_hobject_props.c: key order after an array is abandoned * must be the same. */ for (i = 0; i < (duk_uint_fast32_t) curr->a_size; i++) { duk_hstring *k; duk_tval *tv; tv = DUK_HOBJECT_A_GET_VALUE_PTR(curr, i); if (DUK_TVAL_IS_UNDEFINED_UNUSED(tv)) { continue; } k = duk_heap_string_intern_u32_checked(thr, i); DUK_ASSERT(k); duk_push_hstring(ctx, k); duk_push_true(ctx); /* [enum_target res key true] */ duk_put_prop(ctx, -3); /* [enum_target res] */ } /* * Entries part */ for (i = 0; i < (duk_uint_fast32_t) curr->e_next; i++) { duk_hstring *k; k = DUK_HOBJECT_E_GET_KEY(curr, i); if (!k) { continue; } if (!DUK_HOBJECT_E_SLOT_IS_ENUMERABLE(curr, i) && !(enum_flags & DUK_ENUM_INCLUDE_NONENUMERABLE)) { continue; } if (DUK_HSTRING_HAS_INTERNAL(k) && !(enum_flags & DUK_ENUM_INCLUDE_INTERNAL)) { continue; } if ((enum_flags & DUK_ENUM_ARRAY_INDICES_ONLY) && (DUK_HSTRING_GET_ARRIDX_SLOW(k) == DUK_HSTRING_NO_ARRAY_INDEX)) { continue; } DUK_ASSERT(DUK_HOBJECT_E_SLOT_IS_ACCESSOR(curr, i) || !DUK_TVAL_IS_UNDEFINED_UNUSED(&DUK_HOBJECT_E_GET_VALUE_PTR(curr, i)->v)); duk_push_hstring(ctx, k); duk_push_true(ctx); /* [enum_target res key true] */ duk_put_prop(ctx, -3); /* [enum_target res] */ } if (enum_flags & DUK_ENUM_OWN_PROPERTIES_ONLY) { break; } curr = curr->prototype; } /* [enum_target res] */ duk_remove(ctx, -2); /* [res] */ if ((enum_flags & (DUK_ENUM_ARRAY_INDICES_ONLY | DUK_ENUM_SORT_ARRAY_INDICES)) == (DUK_ENUM_ARRAY_INDICES_ONLY | DUK_ENUM_SORT_ARRAY_INDICES)) { /* * Some E5/E5.1 algorithms require that array indices are iterated * in a strictly ascending order. This is the case for e.g. * Array.prototype.forEach() and JSON.stringify() PropertyList * handling. * * To ensure this property for arrays with an array part (and * arbitrary objects too, since e.g. forEach() can be applied * to an array), the caller can request that we sort the keys * here. */ /* XXX: avoid this at least when enum_target is an Array, it has an * array part, and no ancestor properties were included? Not worth * it for JSON, but maybe worth it for forEach(). */ /* XXX: may need a 'length' filter for forEach() */ DUK_DDD(DUK_DDDPRINT("sort array indices by caller request")); duk__sort_array_indices(res); } #if defined(DUK_USE_ES6_PROXY) compact_and_return: #endif /* compact; no need to seal because object is internal */ duk_hobject_compact_props(thr, res); DUK_DDD(DUK_DDDPRINT("created enumerator object: %!iT", (duk_tval *) duk_get_tval(ctx, -1))); }
DUK_LOCAL duk_hstring *duk__alloc_init_hstring(duk_heap *heap, const duk_uint8_t *str, duk_uint32_t blen, duk_uint32_t strhash, const duk_uint8_t *extdata) { duk_hstring *res = NULL; duk_uint8_t *data; duk_size_t alloc_size; duk_uarridx_t dummy; duk_uint32_t clen; #if defined(DUK_USE_STRLEN16) /* If blen <= 0xffffUL, clen is also guaranteed to be <= 0xffffUL. */ if (blen > 0xffffUL) { DUK_D(DUK_DPRINT("16-bit string blen/clen active and blen over 16 bits, reject intern")); return NULL; } #endif if (extdata) { alloc_size = (duk_size_t) sizeof(duk_hstring_external); res = (duk_hstring *) DUK_ALLOC(heap, alloc_size); if (!res) { goto alloc_error; } DUK_MEMZERO(res, sizeof(duk_hstring_external)); #ifdef DUK_USE_EXPLICIT_NULL_INIT DUK_HEAPHDR_STRING_INIT_NULLS(&res->hdr); #endif DUK_HEAPHDR_SET_TYPE_AND_FLAGS(&res->hdr, DUK_HTYPE_STRING, DUK_HSTRING_FLAG_EXTDATA); ((duk_hstring_external *) res)->extdata = extdata; } else { /* NUL terminate for convenient C access */ alloc_size = (duk_size_t) (sizeof(duk_hstring) + blen + 1); res = (duk_hstring *) DUK_ALLOC(heap, alloc_size); if (!res) { goto alloc_error; } DUK_MEMZERO(res, sizeof(duk_hstring)); #ifdef DUK_USE_EXPLICIT_NULL_INIT DUK_HEAPHDR_STRING_INIT_NULLS(&res->hdr); #endif DUK_HEAPHDR_SET_TYPE_AND_FLAGS(&res->hdr, DUK_HTYPE_STRING, 0); data = (duk_uint8_t *) (res + 1); DUK_MEMCPY(data, str, blen); data[blen] = (duk_uint8_t) 0; } if (duk_js_to_arrayindex_raw_string(str, blen, &dummy)) { DUK_HSTRING_SET_ARRIDX(res); } /* All strings beginning with 0xff are treated as "internal", * even strings interned by the user. This allows user code to * create internal properties too, and makes behavior consistent * in case user code happens to use a string also used by Duktape * (such as string has already been interned and has the 'internal' * flag set). */ if (blen > 0 && str[0] == (duk_uint8_t) 0xff) { DUK_HSTRING_SET_INTERNAL(res); } DUK_HSTRING_SET_HASH(res, strhash); DUK_HSTRING_SET_BYTELEN(res, blen); clen = (duk_uint32_t) duk_unicode_unvalidated_utf8_length(str, (duk_size_t) blen); DUK_ASSERT(clen <= blen); DUK_HSTRING_SET_CHARLEN(res, clen); DUK_DDD(DUK_DDDPRINT("interned string, hash=0x%08lx, blen=%ld, clen=%ld, has_arridx=%ld, has_extdata=%ld", (unsigned long) DUK_HSTRING_GET_HASH(res), (long) DUK_HSTRING_GET_BYTELEN(res), (long) DUK_HSTRING_GET_CHARLEN(res), (long) (DUK_HSTRING_HAS_ARRIDX(res) ? 1 : 0), (long) (DUK_HSTRING_HAS_EXTDATA(res) ? 1 : 0))); return res; alloc_error: DUK_FREE(heap, res); return NULL; }
DUK_INTERNAL duk_uint_fast32_t duk_heap_strcache_offset_char2byte(duk_hthread *thr, duk_hstring *h, duk_uint_fast32_t char_offset) { duk_heap *heap; duk_strcache *sce; duk_uint_fast32_t byte_offset; duk_small_int_t i; duk_bool_t use_cache; duk_uint_fast32_t dist_start, dist_end, dist_sce; duk_uint8_t *p_start; duk_uint8_t *p_end; duk_uint8_t *p_found; if (char_offset > DUK_HSTRING_GET_CHARLEN(h)) { goto error; } /* * For ASCII strings, the answer is simple. */ if (DUK_HSTRING_IS_ASCII(h)) { /* clen == blen -> pure ascii */ return char_offset; } /* * For non-ASCII strings, we need to scan forwards or backwards * from some starting point. The starting point may be the start * or end of the string, or some cached midpoint in the string * cache. * * For "short" strings we simply scan without checking or updating * the cache. For longer strings we check and update the cache as * necessary, inserting a new cache entry if none exists. */ DUK_DDD(DUK_DDDPRINT("non-ascii string %p, char_offset=%ld, clen=%ld, blen=%ld", (void *) h, (long) char_offset, (long) DUK_HSTRING_GET_CHARLEN(h), (long) DUK_HSTRING_GET_BYTELEN(h))); heap = thr->heap; sce = NULL; use_cache = (DUK_HSTRING_GET_CHARLEN(h) > DUK_HEAP_STRINGCACHE_NOCACHE_LIMIT); if (use_cache) { #ifdef DUK_USE_DDDPRINT DUK_DDD(DUK_DDDPRINT("stringcache before char2byte (using cache):")); for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { duk_strcache *c = heap->strcache + i; DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld", (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx)); } #endif for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { duk_strcache *c = heap->strcache + i; if (c->h == h) { sce = c; break; } } } /* * Scan from shortest distance: * - start of string * - end of string * - cache entry (if exists) */ DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h) >= char_offset); dist_start = char_offset; dist_end = DUK_HSTRING_GET_CHARLEN(h) - char_offset; dist_sce = 0; DUK_UNREF(dist_sce); /* initialize for debug prints, needed if sce==NULL */ p_start = (duk_uint8_t *) DUK_HSTRING_GET_DATA(h); p_end = (duk_uint8_t *) (p_start + DUK_HSTRING_GET_BYTELEN(h)); p_found = NULL; if (sce) { if (char_offset >= sce->cidx) { dist_sce = char_offset - sce->cidx; if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) { DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " "scan forwards from sce", (long) use_cache, (void *) (sce ? sce->h : NULL), (sce ? (long) sce->cidx : (long) -1), (sce ? (long) sce->bidx : (long) -1), (long) dist_start, (long) dist_end, (long) dist_sce)); p_found = duk__scan_forwards(p_start + sce->bidx, p_end, dist_sce); goto scan_done; } } else { dist_sce = sce->cidx - char_offset; if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) { DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " "scan backwards from sce", (long) use_cache, (void *) (sce ? sce->h : NULL), (sce ? (long) sce->cidx : (long) -1), (sce ? (long) sce->bidx : (long) -1), (long) dist_start, (long) dist_end, (long) dist_sce)); p_found = duk__scan_backwards(p_start + sce->bidx, p_start, dist_sce); goto scan_done; } } } /* no sce, or sce scan not best */ if (dist_start <= dist_end) { DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " "scan forwards from string start", (long) use_cache, (void *) (sce ? sce->h : NULL), (sce ? (long) sce->cidx : (long) -1), (sce ? (long) sce->bidx : (long) -1), (long) dist_start, (long) dist_end, (long) dist_sce)); p_found = duk__scan_forwards(p_start, p_end, dist_start); } else { DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " "scan backwards from string end", (long) use_cache, (void *) (sce ? sce->h : NULL), (sce ? (long) sce->cidx : (long) -1), (sce ? (long) sce->bidx : (long) -1), (long) dist_start, (long) dist_end, (long) dist_sce)); p_found = duk__scan_backwards(p_end, p_start, dist_end); } scan_done: if (!p_found) { /* Scan error: this shouldn't normally happen; it could happen if * string is not valid UTF-8 data, and clen/blen are not consistent * with the scanning algorithm. */ goto error; } DUK_ASSERT(p_found >= p_start); DUK_ASSERT(p_found <= p_end); /* may be equal */ byte_offset = (duk_uint32_t) (p_found - p_start); DUK_DDD(DUK_DDDPRINT("-> string %p, cidx %ld -> bidx %ld", (void *) h, (long) char_offset, (long) byte_offset)); /* * Update cache entry (allocating if necessary), and move the * cache entry to the first place (in an "LRU" policy). */ if (use_cache) { /* update entry, allocating if necessary */ if (!sce) { sce = heap->strcache + DUK_HEAP_STRCACHE_SIZE - 1; /* take last entry */ sce->h = h; } DUK_ASSERT(sce != NULL); sce->bidx = (duk_uint32_t) (p_found - p_start); sce->cidx = (duk_uint32_t) char_offset; /* LRU: move our entry to first */ if (sce > &heap->strcache[0]) { /* * A C * B A * C <- sce ==> B * D D */ duk_strcache tmp; tmp = *sce; DUK_MEMMOVE((void *) (&heap->strcache[1]), (void *) (&heap->strcache[0]), (size_t) (((char *) sce) - ((char *) &heap->strcache[0]))); heap->strcache[0] = tmp; /* 'sce' points to the wrong entry here, but is no longer used */ } #ifdef DUK_USE_DDDPRINT DUK_DDD(DUK_DDDPRINT("stringcache after char2byte (using cache):")); for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { duk_strcache *c = heap->strcache + i; DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld", (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx)); } #endif } return byte_offset; error: DUK_ERROR(thr, DUK_ERR_INTERNAL_ERROR, "string scan error"); return 0; }
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_replace(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk_hstring *h_input; duk_hstring *h_match; duk_hstring *h_search; duk_hobject *h_re; duk_bufwriter_ctx bw_alloc; duk_bufwriter_ctx *bw; #ifdef DUK_USE_REGEXP_SUPPORT duk_bool_t is_regexp; duk_bool_t is_global; #endif duk_bool_t is_repl_func; duk_uint32_t match_start_coff, match_start_boff; #ifdef DUK_USE_REGEXP_SUPPORT duk_int_t match_caps; #endif duk_uint32_t prev_match_end_boff; const duk_uint8_t *r_start, *r_end, *r; /* repl string scan */ duk_size_t tmp_sz; DUK_ASSERT_TOP(ctx, 2); h_input = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h_input != NULL); bw = &bw_alloc; DUK_BW_INIT_PUSHBUF(thr, bw, DUK_HSTRING_GET_BYTELEN(h_input)); /* input size is good output starting point */ DUK_ASSERT_TOP(ctx, 4); /* stack[0] = search value * stack[1] = replace value * stack[2] = input string * stack[3] = result buffer */ h_re = duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP); if (h_re) { #ifdef DUK_USE_REGEXP_SUPPORT is_regexp = 1; is_global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL); if (is_global) { /* start match from beginning */ duk_push_int(ctx, 0); duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); } #else /* DUK_USE_REGEXP_SUPPORT */ return DUK_RET_UNSUPPORTED_ERROR; #endif /* DUK_USE_REGEXP_SUPPORT */ } else { duk_to_string(ctx, 0); #ifdef DUK_USE_REGEXP_SUPPORT is_regexp = 0; is_global = 0; #endif } if (duk_is_function(ctx, 1)) { is_repl_func = 1; r_start = NULL; r_end = NULL; } else { duk_hstring *h_repl; is_repl_func = 0; h_repl = duk_to_hstring(ctx, 1); DUK_ASSERT(h_repl != NULL); r_start = DUK_HSTRING_GET_DATA(h_repl); r_end = r_start + DUK_HSTRING_GET_BYTELEN(h_repl); } prev_match_end_boff = 0; for (;;) { /* * If matching with a regexp: * - non-global RegExp: lastIndex not touched on a match, zeroed * on a non-match * - global RegExp: on match, lastIndex will be updated by regexp * executor to point to next char after the matching part (so that * characters in the matching part are not matched again) * * If matching with a string: * - always non-global match, find first occurrence * * We need: * - The character offset of start-of-match for the replacer function * - The byte offsets for start-of-match and end-of-match to implement * the replacement values $&, $`, and $', and to copy non-matching * input string portions (including header and trailer) verbatim. * * NOTE: the E5.1 specification is a bit vague how the RegExp should * behave in the replacement process; e.g. is matching done first for * all matches (in the global RegExp case) before any replacer calls * are made? See: test-bi-string-proto-replace.js for discussion. */ DUK_ASSERT_TOP(ctx, 4); #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_dup(ctx, 0); duk_dup(ctx, 2); duk_regexp_match(thr); /* [ ... regexp input ] -> [ res_obj ] */ if (!duk_is_object(ctx, -1)) { duk_pop(ctx); break; } duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX); DUK_ASSERT(duk_is_number(ctx, -1)); match_start_coff = duk_get_int(ctx, -1); duk_pop(ctx); duk_get_prop_index(ctx, -1, 0); DUK_ASSERT(duk_is_string(ctx, -1)); h_match = duk_get_hstring(ctx, -1); DUK_ASSERT(h_match != NULL); duk_pop(ctx); /* h_match is borrowed, remains reachable through match_obj */ if (DUK_HSTRING_GET_BYTELEN(h_match) == 0) { /* This should be equivalent to match() algorithm step 8.f.iii.2: * detect an empty match and allow it, but don't allow it twice. */ duk_uint32_t last_index; duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); last_index = (duk_uint32_t) duk_get_uint(ctx, -1); DUK_DDD(DUK_DDDPRINT("empty match, bump lastIndex: %ld -> %ld", (long) last_index, (long) (last_index + 1))); duk_pop(ctx); duk_push_int(ctx, last_index + 1); duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); } DUK_ASSERT(duk_get_length(ctx, -1) <= DUK_INT_MAX); /* string limits */ match_caps = (duk_int_t) duk_get_length(ctx, -1); } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ const duk_uint8_t *p_start, *p_end, *p; /* input string scan */ const duk_uint8_t *q_start; /* match string */ duk_size_t q_blen; #ifdef DUK_USE_REGEXP_SUPPORT DUK_ASSERT(!is_global); /* single match always */ #endif p_start = DUK_HSTRING_GET_DATA(h_input); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input); p = p_start; h_search = duk_get_hstring(ctx, 0); DUK_ASSERT(h_search != NULL); q_start = DUK_HSTRING_GET_DATA(h_search); q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_search); p_end -= q_blen; /* ensure full memcmp() fits in while */ match_start_coff = 0; while (p <= p_end) { DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input)); if (DUK_MEMCMP((void *) p, (void *) q_start, (size_t) q_blen) == 0) { duk_dup(ctx, 0); h_match = duk_get_hstring(ctx, -1); DUK_ASSERT(h_match != NULL); #ifdef DUK_USE_REGEXP_SUPPORT match_caps = 0; #endif goto found; } /* track utf-8 non-continuation bytes */ if ((p[0] & 0xc0) != 0x80) { match_start_coff++; } p++; } /* not found */ break; } found: /* stack[0] = search value * stack[1] = replace value * stack[2] = input string * stack[3] = result buffer * stack[4] = regexp match OR match string */ match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff); tmp_sz = (duk_size_t) (match_start_boff - prev_match_end_boff); DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz); prev_match_end_boff = match_start_boff + DUK_HSTRING_GET_BYTELEN(h_match); if (is_repl_func) { duk_idx_t idx_args; duk_hstring *h_repl; /* regexp res_obj is at index 4 */ duk_dup(ctx, 1); idx_args = duk_get_top(ctx); #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_int_t idx; duk_require_stack(ctx, match_caps + 2); for (idx = 0; idx < match_caps; idx++) { /* match followed by capture(s) */ duk_get_prop_index(ctx, 4, idx); } } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ /* match == search string, by definition */ duk_dup(ctx, 0); } duk_push_int(ctx, match_start_coff); duk_dup(ctx, 2); /* [ ... replacer match [captures] match_char_offset input ] */ duk_call(ctx, duk_get_top(ctx) - idx_args); h_repl = duk_to_hstring(ctx, -1); /* -> [ ... repl_value ] */ DUK_ASSERT(h_repl != NULL); DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_repl); duk_pop(ctx); /* repl_value */ } else { r = r_start; while (r < r_end) { duk_int_t ch1; duk_int_t ch2; #ifdef DUK_USE_REGEXP_SUPPORT duk_int_t ch3; #endif duk_size_t left; ch1 = *r++; if (ch1 != DUK_ASC_DOLLAR) { goto repl_write; } left = r_end - r; if (left <= 0) { goto repl_write; } ch2 = r[0]; switch ((int) ch2) { case DUK_ASC_DOLLAR: { ch1 = (1 << 8) + DUK_ASC_DOLLAR; goto repl_write; } case DUK_ASC_AMP: { DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_match); r++; continue; } case DUK_ASC_GRAVE: { tmp_sz = (duk_size_t) match_start_boff; DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input), tmp_sz); r++; continue; } case DUK_ASC_SINGLEQUOTE: { duk_uint32_t match_end_boff; /* Use match charlen instead of bytelen, just in case the input and * match codepoint encodings would have different lengths. */ match_end_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff + DUK_HSTRING_GET_CHARLEN(h_match)); tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - match_end_boff); DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + match_end_boff, tmp_sz); r++; continue; } default: { #ifdef DUK_USE_REGEXP_SUPPORT duk_int_t capnum, captmp, capadv; /* XXX: optional check, match_caps is zero if no regexp, * so dollar will be interpreted literally anyway. */ if (!is_regexp) { goto repl_write; } if (!(ch2 >= DUK_ASC_0 && ch2 <= DUK_ASC_9)) { goto repl_write; } capnum = ch2 - DUK_ASC_0; capadv = 1; if (left >= 2) { ch3 = r[1]; if (ch3 >= DUK_ASC_0 && ch3 <= DUK_ASC_9) { captmp = capnum * 10 + (ch3 - DUK_ASC_0); if (captmp < match_caps) { capnum = captmp; capadv = 2; } } } if (capnum > 0 && capnum < match_caps) { DUK_ASSERT(is_regexp != 0); /* match_caps == 0 without regexps */ /* regexp res_obj is at offset 4 */ duk_get_prop_index(ctx, 4, (duk_uarridx_t) capnum); if (duk_is_string(ctx, -1)) { duk_hstring *h_tmp_str; h_tmp_str = duk_get_hstring(ctx, -1); DUK_ASSERT(h_tmp_str != NULL); DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_tmp_str); } else { /* undefined -> skip (replaced with empty) */ } duk_pop(ctx); r += capadv; continue; } else { goto repl_write; } #else /* DUK_USE_REGEXP_SUPPORT */ goto repl_write; /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ } /* default case */ } /* switch (ch2) */ repl_write: /* ch1 = (r_increment << 8) + byte */ DUK_BW_WRITE_ENSURE_U8(thr, bw, (duk_uint8_t) (ch1 & 0xff)); r += ch1 >> 8; } /* while repl */ } /* if (is_repl_func) */ duk_pop(ctx); /* pop regexp res_obj or match string */ #ifdef DUK_USE_REGEXP_SUPPORT if (!is_global) { #else { /* unconditionally; is_global==0 */ #endif break; } } /* trailer */ tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff); DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz); DUK_ASSERT_TOP(ctx, 4); DUK_BW_COMPACT(thr, bw); duk_to_string(ctx, -1); return 1; } /* * split() */ /* XXX: very messy now, but works; clean up, remove unused variables (nomimally * used so compiler doesn't complain). */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_split(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk_hstring *h_input; duk_hstring *h_sep; duk_uint32_t limit; duk_uint32_t arr_idx; #ifdef DUK_USE_REGEXP_SUPPORT duk_bool_t is_regexp; #endif duk_bool_t matched; /* set to 1 if any match exists (needed for empty input special case) */ duk_uint32_t prev_match_end_coff, prev_match_end_boff; duk_uint32_t match_start_boff, match_start_coff; duk_uint32_t match_end_boff, match_end_coff; DUK_UNREF(thr); h_input = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h_input != NULL); duk_push_array(ctx); if (duk_is_undefined(ctx, 1)) { limit = 0xffffffffUL; } else { limit = duk_to_uint32(ctx, 1); } if (limit == 0) { return 1; } /* If the separator is a RegExp, make a "clone" of it. The specification * algorithm calls [[Match]] directly for specific indices; we emulate this * by tweaking lastIndex and using a "force global" variant of duk_regexp_match() * which will use global-style matching even when the RegExp itself is non-global. */ if (duk_is_undefined(ctx, 0)) { /* The spec algorithm first does "R = ToString(separator)" before checking * whether separator is undefined. Since this is side effect free, we can * skip the ToString() here. */ duk_dup(ctx, 2); duk_put_prop_index(ctx, 3, 0); return 1; } else if (duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP) != NULL) { #ifdef DUK_USE_REGEXP_SUPPORT duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR); duk_dup(ctx, 0); duk_new(ctx, 1); /* [ ... RegExp val ] -> [ ... res ] */ duk_replace(ctx, 0); /* lastIndex is initialized to zero by new RegExp() */ is_regexp = 1; #else return DUK_RET_UNSUPPORTED_ERROR; #endif } else { duk_to_string(ctx, 0); #ifdef DUK_USE_REGEXP_SUPPORT is_regexp = 0; #endif } /* stack[0] = separator (string or regexp) * stack[1] = limit * stack[2] = input string * stack[3] = result array */ prev_match_end_boff = 0; prev_match_end_coff = 0; arr_idx = 0; matched = 0; for (;;) { /* * The specification uses RegExp [[Match]] to attempt match at specific * offsets. We don't have such a primitive, so we use an actual RegExp * and tweak lastIndex. Since the RegExp may be non-global, we use a * special variant which forces global-like behavior for matching. */ DUK_ASSERT_TOP(ctx, 4); #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_dup(ctx, 0); duk_dup(ctx, 2); duk_regexp_match_force_global(thr); /* [ ... regexp input ] -> [ res_obj ] */ if (!duk_is_object(ctx, -1)) { duk_pop(ctx); break; } matched = 1; duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX); DUK_ASSERT(duk_is_number(ctx, -1)); match_start_coff = duk_get_int(ctx, -1); match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff); duk_pop(ctx); if (match_start_coff == DUK_HSTRING_GET_CHARLEN(h_input)) { /* don't allow an empty match at the end of the string */ duk_pop(ctx); break; } duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); DUK_ASSERT(duk_is_number(ctx, -1)); match_end_coff = duk_get_int(ctx, -1); match_end_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_end_coff); duk_pop(ctx); /* empty match -> bump and continue */ if (prev_match_end_boff == match_end_boff) { duk_push_int(ctx, match_end_coff + 1); duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); duk_pop(ctx); continue; } } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ const duk_uint8_t *p_start, *p_end, *p; /* input string scan */ const duk_uint8_t *q_start; /* match string */ duk_size_t q_blen, q_clen; p_start = DUK_HSTRING_GET_DATA(h_input); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input); p = p_start + prev_match_end_boff; h_sep = duk_get_hstring(ctx, 0); DUK_ASSERT(h_sep != NULL); q_start = DUK_HSTRING_GET_DATA(h_sep); q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_sep); q_clen = (duk_size_t) DUK_HSTRING_GET_CHARLEN(h_sep); p_end -= q_blen; /* ensure full memcmp() fits in while */ match_start_coff = prev_match_end_coff; if (q_blen == 0) { /* Handle empty separator case: it will always match, and always * triggers the check in step 13.c.iii initially. Note that we * must skip to either end of string or start of first codepoint, * skipping over any continuation bytes! * * Don't allow an empty string to match at the end of the input. */ matched = 1; /* empty separator can always match */ match_start_coff++; p++; while (p < p_end) { if ((p[0] & 0xc0) != 0x80) { goto found; } p++; } goto not_found; } DUK_ASSERT(q_blen > 0 && q_clen > 0); while (p <= p_end) { DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input)); DUK_ASSERT(q_blen > 0); /* no issues with empty memcmp() */ if (DUK_MEMCMP((void *) p, (void *) q_start, (duk_size_t) q_blen) == 0) { /* never an empty match, so step 13.c.iii can't be triggered */ goto found; } /* track utf-8 non-continuation bytes */ if ((p[0] & 0xc0) != 0x80) { match_start_coff++; } p++; } not_found: /* not found */ break; found: matched = 1; match_start_boff = (duk_uint32_t) (p - p_start); match_end_coff = (duk_uint32_t) (match_start_coff + q_clen); /* constrained by string length */ match_end_boff = (duk_uint32_t) (match_start_boff + q_blen); /* ditto */ /* empty match (may happen with empty separator) -> bump and continue */ if (prev_match_end_boff == match_end_boff) { prev_match_end_boff++; prev_match_end_coff++; continue; } } /* if (is_regexp) */ /* stack[0] = separator (string or regexp) * stack[1] = limit * stack[2] = input string * stack[3] = result array * stack[4] = regexp res_obj (if is_regexp) */ DUK_DDD(DUK_DDDPRINT("split; match_start b=%ld,c=%ld, match_end b=%ld,c=%ld, prev_end b=%ld,c=%ld", (long) match_start_boff, (long) match_start_coff, (long) match_end_boff, (long) match_end_coff, (long) prev_match_end_boff, (long) prev_match_end_coff)); duk_push_lstring(ctx, (const char *) (DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff), (duk_size_t) (match_start_boff - prev_match_end_boff)); duk_put_prop_index(ctx, 3, arr_idx); arr_idx++; if (arr_idx >= limit) { goto hit_limit; } #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_size_t i, len; len = duk_get_length(ctx, 4); for (i = 1; i < len; i++) { DUK_ASSERT(i <= DUK_UARRIDX_MAX); /* cannot have >4G captures */ duk_get_prop_index(ctx, 4, (duk_uarridx_t) i); duk_put_prop_index(ctx, 3, arr_idx); arr_idx++; if (arr_idx >= limit) { goto hit_limit; } } duk_pop(ctx); /* lastIndex already set up for next match */ } else { #else /* DUK_USE_REGEXP_SUPPORT */ { /* unconditionally */ #endif /* DUK_USE_REGEXP_SUPPORT */ /* no action */ } prev_match_end_boff = match_end_boff; prev_match_end_coff = match_end_coff; continue; } /* for */ /* Combined step 11 (empty string special case) and 14-15. */ DUK_DDD(DUK_DDDPRINT("split trailer; prev_end b=%ld,c=%ld", (long) prev_match_end_boff, (long) prev_match_end_coff)); if (DUK_HSTRING_GET_CHARLEN(h_input) > 0 || !matched) { /* Add trailer if: * a) non-empty input * b) empty input and no (zero size) match found (step 11) */ duk_push_lstring(ctx, (const char *) DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff)); duk_put_prop_index(ctx, 3, arr_idx); /* No arr_idx update or limit check */ } return 1; hit_limit: #ifdef DUK_USE_REGEXP_SUPPORT if (is_regexp) { duk_pop(ctx); } #endif return 1; } /* * Various */ #ifdef DUK_USE_REGEXP_SUPPORT DUK_LOCAL void duk__to_regexp_helper(duk_context *ctx, duk_idx_t index, duk_bool_t force_new) { duk_hobject *h; /* Shared helper for match() steps 3-4, search() steps 3-4. */ DUK_ASSERT(index >= 0); if (force_new) { goto do_new; } h = duk_get_hobject_with_class(ctx, index, DUK_HOBJECT_CLASS_REGEXP); if (!h) { goto do_new; } return; do_new: duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR); duk_dup(ctx, index); duk_new(ctx, 1); /* [ ... RegExp val ] -> [ ... res ] */ duk_replace(ctx, index); } #endif /* DUK_USE_REGEXP_SUPPORT */ #ifdef DUK_USE_REGEXP_SUPPORT DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; /* Easiest way to implement the search required by the specification * is to do a RegExp test() with lastIndex forced to zero. To avoid * side effects on the argument, "clone" the RegExp if a RegExp was * given as input. * * The global flag of the RegExp should be ignored; setting lastIndex * to zero (which happens when "cloning" the RegExp) should have an * equivalent effect. */ DUK_ASSERT_TOP(ctx, 1); (void) duk_push_this_coercible_to_string(ctx); /* at index 1 */ duk__to_regexp_helper(ctx, 0 /*index*/, 1 /*force_new*/); /* stack[0] = regexp * stack[1] = string */ /* Avoid using RegExp.prototype methods, as they're writable and * configurable and may have been changed. */ duk_dup(ctx, 0); duk_dup(ctx, 1); /* [ ... re_obj input ] */ duk_regexp_match(thr); /* -> [ ... res_obj ] */ if (!duk_is_object(ctx, -1)) { duk_push_int(ctx, -1); return 1; } duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX); DUK_ASSERT(duk_is_number(ctx, -1)); return 1; } #else /* DUK_USE_REGEXP_SUPPORT */ DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) { DUK_UNREF(ctx); return DUK_RET_UNSUPPORTED_ERROR; }
DUK_INTERNAL duk_ret_t duk_bi_string_prototype_indexof_shared(duk_context *ctx) { duk_hthread *thr = (duk_hthread *) ctx; duk_hstring *h_this; duk_hstring *h_search; duk_int_t clen_this; duk_int_t cpos; duk_int_t bpos; const duk_uint8_t *p_start, *p_end, *p; const duk_uint8_t *q_start; duk_int_t q_blen; duk_uint8_t firstbyte; duk_uint8_t t; duk_small_int_t is_lastindexof = duk_get_current_magic(ctx); /* 0=indexOf, 1=lastIndexOf */ h_this = duk_push_this_coercible_to_string(ctx); DUK_ASSERT(h_this != NULL); clen_this = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h_this); h_search = duk_to_hstring(ctx, 0); DUK_ASSERT(h_search != NULL); q_start = DUK_HSTRING_GET_DATA(h_search); q_blen = (duk_int_t) DUK_HSTRING_GET_BYTELEN(h_search); duk_to_number(ctx, 1); if (duk_is_nan(ctx, 1) && is_lastindexof) { /* indexOf: NaN should cause pos to be zero. * lastIndexOf: NaN should cause pos to be +Infinity * (and later be clamped to len). */ cpos = clen_this; } else { cpos = duk_to_int_clamped(ctx, 1, 0, clen_this); } /* Empty searchstring always matches; cpos must be clamped here. * (If q_blen were < 0 due to clamped coercion, it would also be * caught here.) */ if (q_blen <= 0) { duk_push_int(ctx, cpos); return 1; } DUK_ASSERT(q_blen > 0); bpos = (duk_int_t) duk_heap_strcache_offset_char2byte(thr, h_this, (duk_uint32_t) cpos); p_start = DUK_HSTRING_GET_DATA(h_this); p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_this); p = p_start + bpos; /* This loop is optimized for size. For speed, there should be * two separate loops, and we should ensure that memcmp() can be * used without an extra "will searchstring fit" check. Doing * the preconditioning for 'p' and 'p_end' is easy but cpos * must be updated if 'p' is wound back (backward scanning). */ firstbyte = q_start[0]; /* leading byte of match string */ while (p <= p_end && p >= p_start) { t = *p; /* For Ecmascript strings, this check can only match for * initial UTF-8 bytes (not continuation bytes). For other * strings all bets are off. */ if ((t == firstbyte) && ((duk_size_t) (p_end - p) >= (duk_size_t) q_blen)) { DUK_ASSERT(q_blen > 0); /* no issues with memcmp() zero size, even if broken */ if (DUK_MEMCMP(p, q_start, (duk_size_t) q_blen) == 0) { duk_push_int(ctx, cpos); return 1; } } /* track cpos while scanning */ if (is_lastindexof) { /* when going backwards, we decrement cpos 'early'; * 'p' may point to a continuation byte of the char * at offset 'cpos', but that's OK because we'll * backtrack all the way to the initial byte. */ if ((t & 0xc0) != 0x80) { cpos--; } p--; } else { if ((t & 0xc0) != 0x80) { cpos++; } p++; } } /* Not found. Empty string case is handled specially above. */ duk_push_int(ctx, -1); return 1; }
DUK_INTERNAL duk_ret_t duk_bi_regexp_prototype_to_string(duk_context *ctx) { duk_hstring *h_bc; duk_small_int_t re_flags; #if 0 /* A little tricky string approach to provide the flags string. * This depends on the specific flag values in duk_regexp.h, * which needs to be asserted for. In practice this doesn't * produce more compact code than the easier approach in use. */ const char *flag_strings = "gim\0gi\0gm\0g\0"; duk_uint8_t flag_offsets[8] = { (duk_uint8_t) 3, /* flags: "" */ (duk_uint8_t) 10, /* flags: "g" */ (duk_uint8_t) 5, /* flags: "i" */ (duk_uint8_t) 4, /* flags: "gi" */ (duk_uint8_t) 2, /* flags: "m" */ (duk_uint8_t) 7, /* flags: "gm" */ (duk_uint8_t) 1, /* flags: "im" */ (duk_uint8_t) 0, /* flags: "gim" */ }; DUK_ASSERT(DUK_RE_FLAG_GLOBAL == 1); DUK_ASSERT(DUK_RE_FLAG_IGNORE_CASE == 2); DUK_ASSERT(DUK_RE_FLAG_MULTILINE == 4); #endif duk__get_this_regexp(ctx); /* [ regexp ] */ duk_get_prop_stridx(ctx, 0, DUK_STRIDX_SOURCE); duk_get_prop_stridx(ctx, 0, DUK_STRIDX_INT_BYTECODE); h_bc = duk_get_hstring(ctx, -1); DUK_ASSERT(h_bc != NULL); DUK_ASSERT(DUK_HSTRING_GET_BYTELEN(h_bc) >= 1); DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h_bc) >= 1); DUK_ASSERT(DUK_HSTRING_GET_DATA(h_bc)[0] < 0x80); re_flags = (duk_small_int_t) DUK_HSTRING_GET_DATA(h_bc)[0]; /* [ regexp source bytecode ] */ #if 1 /* This is a cleaner approach and also produces smaller code than * the other alternative. Use duk_require_string() for format * safety (although the source property should always exist). */ duk_push_sprintf(ctx, "/%s/%s%s%s", (const char *) duk_require_string(ctx, -2), /* require to be safe */ (re_flags & DUK_RE_FLAG_GLOBAL) ? "g" : "", (re_flags & DUK_RE_FLAG_IGNORE_CASE) ? "i" : "", (re_flags & DUK_RE_FLAG_MULTILINE) ? "m" : ""); #else /* This should not be necessary because no-one should tamper with the * regexp bytecode, but is prudent to avoid potential segfaults if that * were to happen for some reason. */ re_flags &= 0x07; DUK_ASSERT(re_flags >= 0 && re_flags <= 7); /* three flags */ duk_push_sprintf(ctx, "/%s/%s", (const char *) duk_require_string(ctx, -2), (const char *) (flag_strings + flag_offsets[re_flags])); #endif return 1; }
void duk_hobject_enumerator_create(duk_context *ctx, int enum_flags) { duk_hthread *thr = (duk_hthread *) ctx; duk_hobject *target; duk_hobject *curr; duk_hobject *res; DUK_ASSERT(ctx != NULL); DUK_DDDPRINT("create enumerator, stack top: %d", duk_get_top(ctx)); target = duk_require_hobject(ctx, -1); DUK_ASSERT(target != NULL); duk_push_object_internal(ctx); DUK_DDDPRINT("created internal object"); /* [target res] */ duk_push_hstring_stridx(ctx, DUK_STRIDX_INT_TARGET); duk_push_hobject(ctx, target); duk_put_prop(ctx, -3); /* initialize index so that we skip internal control keys */ duk_push_hstring_stridx(ctx, DUK_STRIDX_INT_NEXT); duk_push_int(ctx, ENUM_START_INDEX); duk_put_prop(ctx, -3); curr = target; while (curr) { duk_uint32_t i; /* * Virtual properties. * * String indices are virtual and always enumerable. String 'length' * is virtual and non-enumerable. Array and arguments object props * have special behavior but are concrete. */ if (DUK_HOBJECT_HAS_SPECIAL_STRINGOBJ(curr)) { duk_hstring *h_val; h_val = duk_hobject_get_internal_value_string(thr->heap, curr); DUK_ASSERT(h_val != NULL); /* string objects must not created without internal value */ /* FIXME: type for 'i' to match string max len (duk_uint32_t) */ for (i = 0; i < DUK_HSTRING_GET_CHARLEN(h_val); i++) { duk_hstring *k; k = duk_heap_string_intern_u32_checked(thr, i); DUK_ASSERT(k); duk_push_hstring(ctx, k); duk_push_true(ctx); /* [target res key true] */ duk_put_prop(ctx, -3); /* [target res] */ } /* 'length' property is not enumerable, but is included if * non-enumerable properties are requested. */ if (enum_flags & DUK_ENUM_INCLUDE_NONENUMERABLE) { duk_push_hstring_stridx(ctx, DUK_STRIDX_LENGTH); duk_push_true(ctx); duk_put_prop(ctx, -3); } } /* * Array part * * Note: ordering between array and entry part must match 'abandon array' * behavior in duk_hobject_props.c: key order after an array is abandoned * must be the same. */ for (i = 0; i < curr->a_size; i++) { duk_hstring *k; duk_tval *tv; tv = DUK_HOBJECT_A_GET_VALUE_PTR(curr, i); if (DUK_TVAL_IS_UNDEFINED_UNUSED(tv)) { continue; } k = duk_heap_string_intern_u32_checked(thr, i); DUK_ASSERT(k); duk_push_hstring(ctx, k); duk_push_true(ctx); /* [target res key true] */ duk_put_prop(ctx, -3); /* [target res] */ } /* * Entries part */ for (i = 0; i < curr->e_used; i++) { duk_hstring *k; k = DUK_HOBJECT_E_GET_KEY(curr, i); if (!k) { continue; } if (!DUK_HOBJECT_E_SLOT_IS_ENUMERABLE(curr, i) && !(enum_flags & DUK_ENUM_INCLUDE_NONENUMERABLE)) { continue; } if (DUK_HSTRING_HAS_INTERNAL(k) && !(enum_flags & DUK_ENUM_INCLUDE_INTERNAL)) { continue; } if ((enum_flags & DUK_ENUM_ARRAY_INDICES_ONLY) && (DUK_HSTRING_GET_ARRIDX_SLOW(k) == DUK_HSTRING_NO_ARRAY_INDEX)) { continue; } DUK_ASSERT(DUK_HOBJECT_E_SLOT_IS_ACCESSOR(curr, i) || !DUK_TVAL_IS_UNDEFINED_UNUSED(&DUK_HOBJECT_E_GET_VALUE_PTR(curr, i)->v)); duk_push_hstring(ctx, k); duk_push_true(ctx); /* [target res key true] */ duk_put_prop(ctx, -3); /* [target res] */ } if (enum_flags & DUK_ENUM_OWN_PROPERTIES_ONLY) { break; } curr = curr->prototype; } /* [target res] */ duk_remove(ctx, -2); res = duk_require_hobject(ctx, -1); /* [res] */ if ((enum_flags & (DUK_ENUM_ARRAY_INDICES_ONLY | DUK_ENUM_SORT_ARRAY_INDICES)) == (DUK_ENUM_ARRAY_INDICES_ONLY | DUK_ENUM_SORT_ARRAY_INDICES)) { /* * Some E5/E5.1 algorithms require that array indices are iterated * in a strictly ascending order. This is the case for e.g. * Array.prototype.forEach() and JSON.stringify() PropertyList * handling. * * To ensure this property for arrays with an array part (and * arbitrary objects too, since e.g. forEach() can be applied * to an array), the caller can request that we sort the keys * here. */ /* FIXME: avoid this at least when target is an Array, it has an * array part, and no ancestor properties were included? Not worth * it for JSON, but maybe worth it for forEach(). */ /* FIXME: may need a 'length' filter for forEach() */ DUK_DDDPRINT("sort array indices by caller request"); sort_array_indices(res); } /* compact; no need to seal because object is internal */ duk_hobject_compact_props(thr, res); DUK_DDDPRINT("created enumerator object: %!iT", duk_get_tval(ctx, -1)); }