/* Limit of MCode reservation reached. */ void lj_mcode_limiterr(jit_State *J, size_t need) { size_t sizemcode, maxmcode; lj_mcode_abort(J); sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; if ((size_t)need > sizemcode) lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ if (J->szallmcarea + sizemcode > maxmcode) lj_trace_err(J, LJ_TRERR_MCODEAL); mcode_allocarea(J); lj_trace_err(J, LJ_TRERR_MCODELM); /* Retry with new area. */ }
/* Narrowing of power operator or math.pow. */ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) { lua_Number n; if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) lj_trace_err(J, LJ_TRERR_BADTYPE); n = numV(vc); /* Limit narrowing for pow to small exponents (or for two constants). */ if ((tref_isk(rc) && tref_isint(rc) && tref_isk(rb)) || ((J->flags & JIT_F_OPT_NARROW) && (numisint(n) && n >= -65536.0 && n <= 65536.0))) { TRef tmp; if (!tref_isinteger(rc)) { if (tref_isstr(rc)) rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); rc = emitir(IRTGI(IR_TOINT), rc, IRTOINT_CHECK); /* Guarded TOINT! */ } if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1)); emitir(IRTGI(IR_LE), tmp, lj_ir_kint(J, 2*65536-2147483647-1)); } return emitir(IRTN(IR_POWI), rb, rc); } /* FOLD covers most cases, but some are easier to do here. */ if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) return rb; /* 1 ^ x ==> 1 */ rc = lj_ir_tonum(J, rc); if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); rc = emitir(IRTN(IR_MUL), rb, rc); return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); }
static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, int prot) { void *p = mmap(NULL, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) lj_trace_err(J, LJ_TRERR_MCODEAL); return p; }
static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, DWORD prot) { void *p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); if (!p) lj_trace_err(J, LJ_TRERR_MCODEAL); return p; }
static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); if (!tref_isnumber_str(J->base[1])) lj_trace_err(J, LJ_TRERR_BADTYPE); J->base[0] = lj_opt_narrow_pow(J, tr, J->base[1], &rd->argv[1]); UNUSED(rd); }
static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) { void *p = VirtualAlloc((void *)hint, sz, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); if (!p && !hint) lj_trace_err(J, LJ_TRERR_MCODEAL); return p; }
/* Grow snapshot buffer. */ void lj_snap_grow_buf_(jit_State *J, MSize need) { MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; if (need > maxsnap) lj_trace_err(J, LJ_TRERR_SNAPOV); lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); J->cur.snap = J->snapbuf; }
/* Emit or eliminate collected PHIs. */ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi) { int pass2 = 0; IRRef i, nslots; IRRef invar = J->chain[IR_LOOP]; /* Pass #1: mark redundant and potentially redundant PHIs. */ for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRRef rref = subst[lref]; if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */ irt_setmark(IR(lref)->t); } else if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) { /* Quick check for simple recurrences failed, need pass2. */ irt_setmark(IR(lref)->t); pass2 = 1; } } /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */ if (pass2) { for (i = J->cur.nins-1; i > invar; i--) { IRIns *ir = IR(i); if (!irref_isk(ir->op1)) irt_clearmark(IR(ir->op1)->t); if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); } } /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */ nslots = J->baseslot+J->maxslot; for (i = 1; i < nslots; i++) { IRRef ref = tref_ref(J->slot[i]); while (!irref_isk(ref) && ref != subst[ref]) { IRIns *ir = IR(ref); irt_clearmark(ir->t); /* Unmark potential uses, too. */ if (irt_isphi(ir->t) || irt_ispri(ir->t)) break; irt_setphi(ir->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; ref = subst[ref]; if (ref > invar) break; } } /* Pass #4: emit PHI instructions or eliminate PHIs. */ for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */ IRRef rref = subst[lref]; if (rref > invar) irt_setphi(IR(rref)->t); emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref); } else { /* Otherwise eliminate PHI. */ irt_clearmark(ir->t); irt_clearphi(ir->t); } } }
static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) { void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); p = NULL; } return p; }
static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o) { GCcdata *cd; TRef trtypeid; if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE); cd = cdataV(o); /* Specialize to the CTypeID. */ trtypeid = emitir(IRT(IR_FLOAD, IRT_U16), tr, IRFL_CDATA_TYPEID); emitir(IRTG(IR_EQ, IRT_INT), trtypeid, lj_ir_kint(J, (int32_t)cd->typeid)); return cd; }
/* Get runtime value of string argument. */ static GCstr *argv2str(jit_State *J, TValue *o) { if (LJ_LIKELY(tvisstr(o))) { return strV(o); } else { GCstr *s; if (!tvisnumber(o)) lj_trace_err(J, LJ_TRERR_BADTYPE); if (tvisint(o)) s = lj_str_fromint(J->L, intV(o)); else s = lj_str_fromnum(J->L, &o->n); setstrV(J->L, o, s); return s; } }
static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) { RecordIndex ix; ix.tab = J->base[0]; if (tref_istab(ix.tab)) { if (!tvisnumber(&rd->argv[1])) /* No support for string coercion. */ lj_trace_err(J, LJ_TRERR_BADTYPE); setintV(&ix.keyv, numberVint(&rd->argv[1])+1); settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); ix.val = 0; ix.idxchain = 0; ix.key = lj_opt_narrow_toint(J, J->base[1]); J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); J->base[1] = lj_record_idx(J, &ix); rd->nres = tref_isnil(J->base[1]) ? 0 : 2; } /* else: Interpreter will throw. */ }
/* Get FILE* for I/O function. Any I/O error aborts recording, so there's ** no need to encode the alternate cases for any of the guards. */ static TRef recff_io_fp(jit_State *J, uint32_t id) { TRef tr, ud, fp; if (id) { /* io.func() */ tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); } else { /* fp:method() */ ud = J->base[0]; if (!tref_isudata(ud)) lj_trace_err(J, LJ_TRERR_BADTYPE); tr = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE); emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); } fp = emitir(IRT(IR_FLOAD, IRT_PTR), ud, IRFL_UDATA_FILE); emitir(IRTG(IR_NE, IRT_PTR), fp, lj_ir_knull(J, IRT_PTR)); return fp; }
/* Determine mode of select() call. */ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv) { if (tref_isstr(tr) && *strVdata(tv) == '#') { /* select('#', ...) */ if (strV(tv)->len == 1) { emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); } else { TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); } return 0; } else { /* select(n, ...) */ int32_t start = argv2int(J, tv); if (start == 0) lj_trace_err(J, LJ_TRERR_BADTYPE); /* A bit misleading. */ return start; } }
static CTypeID argv2ctype(jit_State *J, TRef tr, cTValue *o) { if (tref_isstr(tr)) { GCstr *s = strV(o); CPState cp; CTypeID oldtop; /* Specialize to the string containing the C type declaration. */ emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, s)); cp.L = J->L; cp.cts = ctype_ctsG(J2G(J)); oldtop = cp.cts->top; cp.srcname = strdata(s); cp.p = strdata(s); cp.mode = CPARSE_MODE_ABSTRACT|CPARSE_MODE_NOIMPLICIT; if (lj_cparse(&cp) || cp.cts->top > oldtop) /* Avoid new struct defs. */ lj_trace_err(J, LJ_TRERR_BADTYPE); return cp.val.id; } else { GCcdata *cd = argv2cdata(J, tr, o); return cd->typeid == CTID_CTYPEID ? *(CTypeID *)cdataptr(cd) : cd->typeid; } }
/* Add or merge a snapshot. */ void lj_snap_add(jit_State *J) { MSize nsnap = J->cur.nsnap; MSize nsnapmap = J->cur.nsnapmap; /* Merge if no ins. inbetween or if requested and no guard inbetween. */ if (J->mergesnap ? !irt_isguard(J->guardemit) : (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { nsnapmap = J->cur.snap[--nsnap].mapofs; } else { /* Need to grow snapshot buffer? */ if (LJ_UNLIKELY(nsnap >= J->sizesnap)) { MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; if (nsnap >= maxsnap) lj_trace_err(J, LJ_TRERR_SNAPOV); lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); J->cur.snap = J->snapbuf; } J->cur.nsnap = (uint16_t)(nsnap+1); } J->mergesnap = 0; J->guardemit.irt = 0; snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); }
/* Get memory within relative jump distance of our code in 64 bit mode. */ static void *mcode_alloc(jit_State *J, size_t sz) { /* Target an address in the static assembler code (64K aligned). ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB. ** Use half the jump range so every address in the range can reach any other. */ #if LJ_TARGET_MIPS /* Use the middle of the 256MB-aligned region. */ uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + 0x08000000u; #else uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; #endif const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21); /* First try a contiguous area below the last one. */ uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; int i; /* Limit probing iterations, depending on the available pool size. */ for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) { if (mcode_validptr(hint)) { void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN); if (mcode_validptr(p) && ((uintptr_t)p + sz - target < range || target - (uintptr_t)p < range)) return p; if (p) mcode_free(J, p, sz); /* Free badly placed area. */ } /* Next try probing 64K-aligned pseudo-random addresses. */ do { hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; } while (!(hint + sz < range+range)); hint = target + hint - range; } lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */ return NULL; }
static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, void *svisnz) { CTSize dsize = d->size, ssize = s->size; CTInfo dinfo = d->info, sinfo = s->info; IRType dt = crec_ct2irt(d); IRType st = crec_ct2irt(s); if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) goto err_conv; /* ** Note: Unlike lj_cconv_ct_ct(), sp holds the _value_ of pointers and ** numbers up to 8 bytes. Otherwise sp holds a pointer. */ switch (cconv_idx2(dinfo, sinfo)) { /* Destination is a bool. */ case CCX(B, B): goto xstore; /* Source operand is already normalized. */ case CCX(B, I): case CCX(B, F): if (st != IRT_CDATA) { /* Specialize to the result of a comparison against 0. */ TRef zero = (st == IRT_NUM || st == IRT_FLOAT) ? lj_ir_knum(J, 0) : (st == IRT_I64 || st == IRT_U64) ? lj_ir_kint64(J, 0) : lj_ir_kint(J, 0); int isnz = crec_isnonzero(s, svisnz); emitir(IRTG(isnz ? IR_NE : IR_EQ, st), sp, zero); sp = lj_ir_kint(J, isnz); goto xstore; } goto err_nyi; /* Destination is an integer. */ case CCX(I, B): case CCX(I, I): conv_I_I: if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; #if LJ_64 /* Sign-extend 32 to 64 bit integer. */ if (dsize == 8 && ssize < 8 && !(sinfo & CTF_UNSIGNED)) sp = emitconv(sp, dt, IRT_INT, IRCONV_SEXT); /* All other conversions are no-ops on x64. */ #else if (dsize == 8 && ssize < 8) /* Extend to 64 bit integer. */ sp = emitconv(sp, dt, ssize < 4 ? IRT_INT : st, (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); #endif xstore: emitir(IRT(IR_XSTORE, dt), dp, sp); break; case CCX(I, C): sp = emitir(IRT(IR_XLOAD, st), sp, 0); /* Load re. */ /* fallthrough */ case CCX(I, F): if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); goto xstore; case CCX(I, P): case CCX(I, A): sinfo = CTINFO(CT_NUM, CTF_UNSIGNED); ssize = CTSIZE_PTR; st = IRT_UINTP; goto conv_I_I; /* Destination is a floating-point number. */ case CCX(F, B): case CCX(F, I): conv_F_I: if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; sp = emitconv(sp, dt, ssize < 4 ? IRT_INT : st, 0); goto xstore; case CCX(F, C): sp = emitir(IRT(IR_XLOAD, st), sp, 0); /* Load re. */ /* fallthrough */ case CCX(F, F): conv_F_F: if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; if (dt != st) sp = emitconv(sp, dt, st, 0); goto xstore; /* Destination is a complex number. */ case CCX(C, I): case CCX(C, F): { /* Clear im. */ TRef ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, lj_ir_kintp(J, (dsize >> 1))); emitir(IRT(IR_XSTORE, dt), ptr, lj_ir_knum(J, 0)); } /* Convert to re. */ if ((sinfo & CTF_FP)) goto conv_F_F; else goto conv_F_I; case CCX(C, C): if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; { TRef re, im, ptr; re = emitir(IRT(IR_XLOAD, st), sp, 0); ptr = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, (ssize >> 1))); im = emitir(IRT(IR_XLOAD, st), ptr, 0); if (dt != st) { re = emitconv(re, dt, st, 0); im = emitconv(im, dt, st, 0); } emitir(IRT(IR_XSTORE, dt), dp, re); ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, lj_ir_kintp(J, (dsize >> 1))); emitir(IRT(IR_XSTORE, dt), ptr, im); } break; /* Destination is a vector. */ case CCX(V, I): case CCX(V, F): case CCX(V, C): case CCX(V, V): goto err_nyi; /* Destination is a pointer. */ case CCX(P, P): case CCX(P, A): case CCX(P, S): /* There are only 32 bit pointers/addresses on 32 bit machines. ** Also ok on x64, since all 32 bit ops clear the upper part of the reg. */ goto xstore; case CCX(P, I): if (st == IRT_CDATA) goto err_nyi; if (!LJ_64 && ssize == 8) /* Truncate from 64 bit integer. */ sp = emitconv(sp, IRT_U32, st, 0); goto xstore; case CCX(P, F): if (st == IRT_CDATA) goto err_nyi; /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, st, IRCONV_TRUNC|IRCONV_ANY); goto xstore; /* Destination is an array. */ case CCX(A, A): goto err_nyi; /* Destination is a struct/union. */ case CCX(S, S): goto err_nyi; default: err_conv: err_nyi: lj_trace_err(J, LJ_TRERR_NYICONV); break; } }
/* Get runtime value of int argument. */ static int32_t argv2int(jit_State *J, TValue *o) { if (!tvisnumber(o) && !(tvisstr(o) && lj_str_tonumber(strV(o), o))) lj_trace_err(J, LJ_TRERR_BADTYPE); return tvisint(o) ? intV(o) : lj_num2int(numV(o)); }
/* Emit or eliminate collected PHIs. */ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi, SnapNo onsnap) { int passx = 0; IRRef i, j, nslots; IRRef invar = J->chain[IR_LOOP]; /* Pass #1: mark redundant and potentially redundant PHIs. */ for (i = 0, j = 0; i < nphi; i++) { IRRef lref = phi[i]; IRRef rref = subst[lref]; if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */ irt_clearphi(IR(lref)->t); } else { phi[j++] = (IRRef1)lref; if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) { /* Quick check for simple recurrences failed, need pass2. */ irt_setmark(IR(lref)->t); passx = 1; } } } nphi = j; /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */ if (passx) { SnapNo s; for (i = J->cur.nins-1; i > invar; i--) { IRIns *ir = IR(i); if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); if (!irref_isk(ir->op1)) { irt_clearmark(IR(ir->op1)->t); if (ir->op1 < invar && ir->o >= IR_CALLN && ir->o <= IR_CARG) { /* ORDER IR */ ir = IR(ir->op1); while (ir->o == IR_CARG) { if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); if (irref_isk(ir->op1)) break; ir = IR(ir->op1); irt_clearmark(ir->t); } } } } for (s = J->cur.nsnap-1; s >= onsnap; s--) { SnapShot *snap = &J->cur.snap[s]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) irt_clearmark(IR(ref)->t); } } } /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */ nslots = J->baseslot+J->maxslot; for (i = 1; i < nslots; i++) { IRRef ref = tref_ref(J->slot[i]); while (!irref_isk(ref) && ref != subst[ref]) { IRIns *ir = IR(ref); irt_clearmark(ir->t); /* Unmark potential uses, too. */ if (irt_isphi(ir->t) || irt_ispri(ir->t)) break; irt_setphi(ir->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; ref = subst[ref]; if (ref > invar) break; } } /* Pass #4: propagate non-redundant PHIs. */ while (passx) { passx = 0; for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Propagate only from unmarked PHIs. */ IRIns *irr = IR(subst[lref]); if (irt_ismarked(irr->t)) { /* Right ref points to other PHI? */ irt_clearmark(irr->t); /* Mark that PHI as non-redundant. */ passx = 1; /* Retry. */ } } } } /* Pass #5: emit PHI instructions or eliminate PHIs. */ for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */ IRRef rref = subst[lref]; if (rref > invar) irt_setphi(IR(rref)->t); emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref); } else { /* Otherwise eliminate PHI. */ irt_clearmark(ir->t); irt_clearphi(ir->t); } } }
/* Unroll loop. */ static void loop_unroll(jit_State *J) { IRRef1 phi[LJ_MAX_PHI]; uint32_t nphi = 0; IRRef1 *subst; SnapNo onsnap; SnapShot *osnap, *loopsnap; SnapEntry *loopmap, *psentinel; IRRef ins, invar; /* Use temp buffer for substitution table. ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. ** Caveat: don't call into the VM or run the GC or the buffer may be gone. */ invar = J->cur.nins; subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; subst[REF_BASE] = REF_BASE; /* LOOP separates the pre-roll from the loop body. */ emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); /* Grow snapshot buffer and map for copy-substituted snapshots. ** Need up to twice the number of snapshots minus #0 and loop snapshot. ** Need up to twice the number of entries plus fallback substitutions ** from the loop snapshot entries for each new snapshot. ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! */ onsnap = J->cur.nsnap; lj_snap_grow_buf(J, 2*onsnap-2); lj_snap_grow_map(J, J->cur.nsnapmap*2+(onsnap-2)*J->cur.snap[onsnap-1].nent); /* The loop snapshot is used for fallback substitutions. */ loopsnap = &J->cur.snap[onsnap-1]; loopmap = &J->cur.snapmap[loopsnap->mapofs]; /* The PC of snapshot #0 and the loop snapshot must match. */ psentinel = &loopmap[loopsnap->nent]; lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ /* Start substitution with snapshot #1 (#0 is empty for root traces). */ osnap = &J->cur.snap[1]; /* Copy and substitute all recorded instructions and snapshots. */ for (ins = REF_FIRST; ins < invar; ins++) { IRIns *ir; IRRef op1, op2; if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */ loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */ /* Substitute instruction operands. */ ir = IR(ins); op1 = ir->op1; if (!irref_isk(op1)) op1 = subst[op1]; op2 = ir->op2; if (!irref_isk(op2)) op2 = subst[op2]; if (irm_kind(lj_ir_mode[ir->o]) == IRM_N && op1 == ir->op1 && op2 == ir->op2) { /* Regular invariant ins? */ subst[ins] = (IRRef1)ins; /* Shortcut. */ } else { /* Re-emit substituted instruction to the FOLD/CSE/etc. pipeline. */ IRType1 t = ir->t; /* Get this first, since emitir may invalidate ir. */ IRRef ref = tref_ref(emitir(ir->ot & ~IRT_ISPHI, op1, op2)); subst[ins] = (IRRef1)ref; if (ref != ins) { IRIns *irr = IR(ref); if (ref < invar) { /* Loop-carried dependency? */ /* Potential PHI? */ if (!irref_isk(ref) && !irt_isphi(irr->t) && !irt_ispri(irr->t)) { irt_setphi(irr->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; } /* Check all loop-carried dependencies for type instability. */ if (!irt_sametype(t, irr->t)) { if (irt_isinteger(t) && irt_isinteger(irr->t)) continue; else if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num. */ ref = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); else if (irt_isnum(irr->t) && irt_isinteger(t)) /* Fix num->int. */ ref = tref_ref(emitir(IRTGI(IR_CONV), ref, IRCONV_INT_NUM|IRCONV_CHECK)); else lj_trace_err(J, LJ_TRERR_TYPEINS); subst[ins] = (IRRef1)ref; irr = IR(ref); goto phiconv; } } else if (ref != REF_DROP && irr->o == IR_CONV && ref > invar && irr->op1 < invar) { /* May need an extra PHI for a CONV. */ ref = irr->op1; irr = IR(ref); phiconv: if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { irt_setphi(irr->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; } } } } } if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; lua_assert(J->cur.nsnapmap <= J->sizesnapmap); *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ loop_emit_phi(J, subst, phi, nphi, onsnap); }
/* Get runtime value of int argument. */ static int32_t argv2int(jit_State *J, TValue *o) { if (!lj_strscan_numberobj(o)) lj_trace_err(J, LJ_TRERR_BADTYPE); return tvisint(o) ? intV(o) : lj_num2int(numV(o)); }