/* * Insert a jump offset at 'offset' to complete an instruction * (the jump offset is always the last component of an instruction). * The 'skip' argument must be computed relative to 'offset', * -without- taking into account the skip field being inserted. * * ... A B C ins X Y Z ... (ins may be a JUMP, SPLIT1/SPLIT2, etc) * => ... A B C ins SKIP X Y Z * * Computing the final (adjusted) skip value, which is relative to the * first byte of the next instruction, is a bit tricky because of the * variable length UTF-8 encoding. See doc/regexp.rst for discussion. */ DUK_LOCAL duk_uint32_t duk__insert_jump_offset(duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_int32_t skip) { duk_small_int_t len; /* XXX: solve into closed form (smaller code) */ if (skip < 0) { /* two encoding attempts suffices */ len = duk_unicode_get_xutf8_length((duk_codepoint_t) duk__encode_i32(skip)); len = duk_unicode_get_xutf8_length((duk_codepoint_t) duk__encode_i32(skip - (duk_int32_t) len)); DUK_ASSERT(duk_unicode_get_xutf8_length(duk__encode_i32(skip - (duk_int32_t) len)) == len); /* no change */ skip -= (duk_int32_t) len; } return duk__insert_i32(re_ctx, offset, skip); }
/* Encode to extended UTF-8; 'out' must have space for at least * DUK_UNICODE_MAX_XUTF8_LENGTH bytes. Allows encoding of any * 32-bit (unsigned) codepoint. */ duk_small_int_t duk_unicode_encode_xutf8(duk_ucodepoint_t cp, duk_uint8_t *out) { duk_uint_fast32_t x = (duk_uint_fast32_t) cp; duk_small_int_t len; duk_uint8_t marker; duk_small_int_t i; len = duk_unicode_get_xutf8_length(cp); DUK_ASSERT(len > 0); marker = duk_unicode_xutf8_markers[len - 1]; /* 64-bit OK because always >= 0 */ i = len; DUK_ASSERT(i > 0); do { i--; if (i > 0) { out[i] = (duk_uint8_t) (0x80 + (x & 0x3f)); x >>= 6; } else { /* Note: masking of 'x' is not necessary because of * range check and shifting -> no bits overlapping * the marker should be set. */ out[0] = (duk_uint8_t) (marker + x); } } while(i > 0);
/* * Insert a jump offset at 'offset' to complete an instruction * (the jump offset is always the last component of an instruction). * The 'skip' argument must be computed relative to 'offset', * -without- taking into account the skip field being inserted. * * ... A B C ins X Y Z ... (ins may be a JUMP, SPLIT1/SPLIT2, etc) * => ... A B C ins SKIP X Y Z * * Computing the final (adjusted) skip value, which is relative to the * first byte of the next instruction, is a bit tricky because of the * variable length UTF-8 encoding. See doc/regexp.rst for discussion. */ DUK_LOCAL duk_uint32_t duk__insert_jump_offset(duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_int32_t skip) { #if 0 /* Iterative solution. */ if (skip < 0) { duk_small_int_t len; /* two encoding attempts suffices */ len = duk_unicode_get_xutf8_length((duk_codepoint_t) duk__encode_i32(skip)); len = duk_unicode_get_xutf8_length((duk_codepoint_t) duk__encode_i32(skip - (duk_int32_t) len)); DUK_ASSERT(duk_unicode_get_xutf8_length(duk__encode_i32(skip - (duk_int32_t) len)) == len); /* no change */ skip -= (duk_int32_t) len; } #endif #if defined(DUK_USE_PREFER_SIZE) /* Closed form solution, this produces smallest code. * See re_neg_jump_offset (closed2). */ if (skip < 0) { skip--; if (skip < -0x3fL) { skip--; } if (skip < -0x3ffL) { skip--; } if (skip < -0x7fffL) { skip--; } if (skip < -0xfffffL) { skip--; } if (skip < -0x1ffffffL) { skip--; } if (skip < -0x3fffffffL) { skip--; } } #else /* DUK_USE_PREFER_SIZE */ /* Closed form solution, this produces fastest code. * See re_neg_jump_offset (closed1). */ if (skip < 0) { if (skip >= -0x3eL) { skip -= 1; } else if (skip >= -0x3fdL) { skip -= 2; } else if (skip >= -0x7ffcL) { skip -= 3; } else if (skip >= -0xffffbL) { skip -= 4; } else if (skip >= -0x1fffffaL) { skip -= 5; } else if (skip >= -0x3ffffff9L) { skip -= 6; } else { skip -= 7; } } #endif /* DUK_USE_PREFER_SIZE */ return duk__insert_i32(re_ctx, offset, skip); }