/* Restore a value from the trace exit state. */ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRRef ref, TValue *o) { IRIns *ir = &T->ir[ref]; IRType1 t = ir->t; RegSP rs = ir->prev; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(J->L, o, ir); return; } if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */ if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); #if !LJ_SOFTFP } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif } else if (LJ_64 && irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); } } else { /* Restore from register. */ Reg r = regsp_reg(rs); if (ra_noreg(r)) { lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); return; } else if (irt_isinteger(t)) { setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); #if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); #endif } else if (LJ_64 && irt_is64(t)) { /* 64 bit values that already have the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; } else if (irt_ispri(t)) { setpriV(o, irt_toitype(t)); } else { setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t)); } } }
/* Add all modified slots to the snapshot. */ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) { IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */ BCReg s; MSize n = 0; for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); if (ref) { SnapEntry sn = SNAP_TR(s, tr); IRIns *ir = &J->cur.ir[ref]; if (!(sn & (SNAP_CONT|SNAP_FRAME)) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { /* No need to snapshot unmodified non-inherited slots. */ if (!(ir->op2 & IRSLOAD_INHERIT)) continue; /* No need to restore readonly slots and unmodified non-parent slots. */ if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) sn |= SNAP_NORESTORE; } if (LJ_SOFTFP && irt_isnum(ir->t)) sn |= SNAP_SOFTFPNUM; map[n++] = sn; } } return n; }
/* Check whether HREF of TNEW/TDUP can be folded to niltv. */ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J) { IRRef lim = fins->op1; /* Search limit. */ IRRef ref; /* The key for an ASTORE may end up in the hash part after a NEWREF. */ if (irt_isnum(fright->t) && J->chain[IR_NEWREF] > lim) { ref = J->chain[IR_ASTORE]; while (ref > lim) { if (ref < J->chain[IR_NEWREF]) return 0; /* Conflict. */ ref = IR(ref)->prev; } } /* Search for conflicting stores. */ ref = J->chain[IR_HSTORE]; while (ref > lim) { IRIns *store = IR(ref); if (aa_ahref(J, fins, IR(store->op1)) != ALIAS_NO) return 0; /* Conflict. */ ref = store->prev; } return 1; /* No conflict. Can fold to niltv. */ }
/* This is rather simplistic right now, but better than nothing. */ TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) { IRRef tab = fins->op1; /* Table reference. */ IRRef lim = tab; /* Search limit. */ IRRef ref; /* Any ASTORE is a conflict and limits the search. */ if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE]; /* Search for conflicting HSTORE with numeric key. */ ref = J->chain[IR_HSTORE]; while (ref > lim) { IRIns *store = IR(ref); IRIns *href = IR(store->op1); IRIns *key = IR(href->op2); if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { lim = ref; /* Conflicting store found, limits search for TLEN. */ break; } ref = store->prev; } /* Try to find a matching load. Below the conflicting store, if any. */ return lj_opt_cselim(J, lim); }
/* Emit a CALLN with two split 64 bit arguments. */ static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, IRIns *ir, IRCallID id) { IRRef tmp, op1 = ir->op1, op2 = ir->op2; J->cur.nins--; #if LJ_LE tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); #else tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); #endif ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); return split_emit(J, IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), tmp, tmp); }
/* Unroll loop. */ static void loop_unroll(jit_State *J) { IRRef1 phi[LJ_MAX_PHI]; uint32_t nphi = 0; IRRef1 *subst; SnapNo onsnap; SnapShot *osnap, *loopsnap; SnapEntry *loopmap, *psentinel; IRRef ins, invar; /* Use temp buffer for substitution table. ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. ** Caveat: don't call into the VM or run the GC or the buffer may be gone. */ invar = J->cur.nins; subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; subst[REF_BASE] = REF_BASE; /* LOOP separates the pre-roll from the loop body. */ emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); /* Grow snapshot buffer and map for copy-substituted snapshots. ** Need up to twice the number of snapshots minus #0 and loop snapshot. ** Need up to twice the number of entries plus fallback substitutions ** from the loop snapshot entries for each new snapshot. ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! */ onsnap = J->cur.nsnap; lj_snap_grow_buf(J, 2*onsnap-2); lj_snap_grow_map(J, J->cur.nsnapmap*2+(onsnap-2)*J->cur.snap[onsnap-1].nent); /* The loop snapshot is used for fallback substitutions. */ loopsnap = &J->cur.snap[onsnap-1]; loopmap = &J->cur.snapmap[loopsnap->mapofs]; /* The PC of snapshot #0 and the loop snapshot must match. */ psentinel = &loopmap[loopsnap->nent]; lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ /* Start substitution with snapshot #1 (#0 is empty for root traces). */ osnap = &J->cur.snap[1]; /* Copy and substitute all recorded instructions and snapshots. */ for (ins = REF_FIRST; ins < invar; ins++) { IRIns *ir; IRRef op1, op2; if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */ loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */ /* Substitute instruction operands. */ ir = IR(ins); op1 = ir->op1; if (!irref_isk(op1)) op1 = subst[op1]; op2 = ir->op2; if (!irref_isk(op2)) op2 = subst[op2]; if (irm_kind(lj_ir_mode[ir->o]) == IRM_N && op1 == ir->op1 && op2 == ir->op2) { /* Regular invariant ins? */ subst[ins] = (IRRef1)ins; /* Shortcut. */ } else { /* Re-emit substituted instruction to the FOLD/CSE/etc. pipeline. */ IRType1 t = ir->t; /* Get this first, since emitir may invalidate ir. */ IRRef ref = tref_ref(emitir(ir->ot & ~IRT_ISPHI, op1, op2)); subst[ins] = (IRRef1)ref; if (ref != ins) { IRIns *irr = IR(ref); if (ref < invar) { /* Loop-carried dependency? */ /* Potential PHI? */ if (!irref_isk(ref) && !irt_isphi(irr->t) && !irt_ispri(irr->t)) { irt_setphi(irr->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; } /* Check all loop-carried dependencies for type instability. */ if (!irt_sametype(t, irr->t)) { if (irt_isinteger(t) && irt_isinteger(irr->t)) continue; else if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num. */ ref = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); else if (irt_isnum(irr->t) && irt_isinteger(t)) /* Fix num->int. */ ref = tref_ref(emitir(IRTGI(IR_CONV), ref, IRCONV_INT_NUM|IRCONV_CHECK)); else lj_trace_err(J, LJ_TRERR_TYPEINS); subst[ins] = (IRRef1)ref; irr = IR(ref); goto phiconv; } } else if (ref != REF_DROP && irr->o == IR_CONV && ref > invar && irr->op1 < invar) { /* May need an extra PHI for a CONV. */ ref = irr->op1; irr = IR(ref); phiconv: if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { irt_setphi(irr->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; } } } } } if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; lua_assert(J->cur.nsnapmap <= J->sizesnapmap); *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ loop_emit_phi(J, subst, phi, nphi, onsnap); }
/* Array and hash load forwarding. */ static TRef fwd_ahload(jit_State *J, IRRef xref) { IRIns *xr = IR(xref); IRRef lim = xref; /* Search limit. */ IRRef ref; /* Search for conflicting stores. */ ref = J->chain[fins->o+IRDELTA_L2S]; while (ref > xref) { IRIns *store = IR(ref); switch (aa_ahref(J, xr, IR(store->op1))) { case ALIAS_NO: break; /* Continue searching. */ case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ case ALIAS_MUST: return store->op2; /* Store forwarding. */ } ref = store->prev; } /* No conflicting store (yet): const-fold loads from allocations. */ { IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; IRRef tab = ir->op1; ir = IR(tab); if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) { /* A NEWREF with a number key may end up pointing to the array part. ** But it's referenced from HSTORE and not found in the ASTORE chain. ** For now simply consider this a conflict without forwarding anything. */ if (xr->o == IR_AREF) { IRRef ref2 = J->chain[IR_NEWREF]; while (ref2 > tab) { IRIns *newref = IR(ref2); if (irt_isnum(IR(newref->op2)->t)) goto cselim; ref2 = newref->prev; } } /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF. ** But the above search for conflicting stores was limited by xref. ** So continue searching, limited by the TNEW/TDUP. Store forwarding ** is ok, too. A conflict does NOT limit the search for a matching load. */ while (ref > tab) { IRIns *store = IR(ref); switch (aa_ahref(J, xr, IR(store->op1))) { case ALIAS_NO: break; /* Continue searching. */ case ALIAS_MAY: goto cselim; /* Conflicting store. */ case ALIAS_MUST: return store->op2; /* Store forwarding. */ } ref = store->prev; } lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t)); if (irt_ispri(fins->t)) { return TREF_PRI(irt_type(fins->t)); } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) || irt_isstr(fins->t)) { TValue keyv; cTValue *tv; IRIns *key = IR(xr->op2); if (key->o == IR_KSLOT) key = IR(key->op1); lj_ir_kvalue(J->L, &keyv, key); tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); lua_assert(itype2irt(tv) == irt_type(fins->t)); if (irt_isnum(fins->t)) return lj_ir_knum_u64(J, tv->u64); else if (LJ_DUALNUM && irt_isint(fins->t)) return lj_ir_kint(J, intV(tv)); else return lj_ir_kstr(J, strV(tv)); } /* Othwerwise: don't intern as a constant. */ } } cselim: /* Try to find a matching load. Below the conflicting store, if any. */ ref = J->chain[fins->o]; while (ref > lim) { IRIns *load = IR(ref); if (load->op1 == xref) return ref; /* Load forwarding. */ ref = load->prev; } return 0; /* Conflict or no match. */ }
/* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); IRRef1 *hisubst; IRRef ref, snref; SnapShot *snap; /* Copy old IR to buffer. */ memcpy(oir, IR(nk), irlen*sizeof(IRIns)); /* Bias hiword substitution table and old IR. Loword kept in field prev. */ hisubst = (IRRef1 *)&oir[irlen] - nk; oir -= nk; /* Remove all IR instructions, but retain IR constants. */ J->cur.nins = REF_FIRST; J->loopref = 0; /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } } /* Process old IR instructions. */ snap = J->cur.snap; snref = snap->ref; for (ref = REF_FIRST; ref < nins; ref++) { IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); IRRef hi = 0; if (ref >= snref) { snap->ref = nref; split_subst_snap(J, snap++, oir); snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; } /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); hisubst[ref] = 0; /* Split 64 bit instructions. */ #if LJ_SOFTFP if (irt_isnum(ir->t)) { nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ switch (ir->o) { case IR_ADD: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); break; case IR_SUB: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); break; case IR_POW: hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); break; case IR_FPMATH: /* Try to rejoin pow from EXP2, MUL and LOG2. */ if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { IRIns *irp = IR(nir->op1); if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { IRIns *irm4 = IR(irp->op1); IRIns *irm3 = IR(irm4->op1); IRIns *irm12 = IR(irm3->op1); IRIns *irl1 = IR(irm12->op1); if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && irl1->op2 == IRCALL_lj_vm_log2) { IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ IRRef arg3 = irm3->op2, arg4 = irm4->op2; J->cur.nins--; tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); break; } } } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; case IR_ATAN2: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); break; case IR_LDEXP: hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); break; case IR_NEG: case IR_ABS: nir->o = IR_CONV; /* Pass through loword. */ nir->op2 = (IRT_INT << 5) | IRT_INT; hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_SLOAD: if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ nir->op2 &= ~IRSLOAD_CONVERT; ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, IRCALL_softfp_i2d); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } /* fallthrough */ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: case IR_STRTO: hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; case IR_XLOAD: { IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ J->cur.nins--; hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ nref = lj_ir_nextins(J); nir = IR(nref); *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); #if LJ_LE ir->prev = nref; #else ir->prev = hi; hi = nref; #endif break; } case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to number. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); UNUSED(st); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { hi = split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); break; } #endif lua_assert(st == IRT_INT || (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); nir->o = IR_CALLN; #if LJ_32 && LJ_HASFFI nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : st == IRT_FLOAT ? IRCALL_softfp_f2d : IRCALL_softfp_ui2d; #else nir->op2 = IRCALL_softfp_i2d; #endif hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: goto split_call; case IR_PHI: if (nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ if (hisubst[ir->op1] != hisubst[ir->op2]) split_emit(J, IRT(IR_PHI, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; } } else #endif #if LJ_32 && LJ_HASFFI if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: case IR_SUB: /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); break; case IR_MOD: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); break; case IR_POW: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XSTORE: split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_SOFTFP if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ hi = split_call_l(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ nir->o = IR_CALLN; nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; hi = split_emit(J, IRTI(IR_HIOP), nref, nref); } #else if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } #endif else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. But fwd both parts. */ hi = hiref; goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); nir = IR(nref); /* May have been reallocated. */ ir->prev = nir->op1; /* Pass through loword. */ nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_CALLXS: goto split_call; case IR_PHI: { IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ hiref2 = hisubst[ir->op2]; if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) split_emit(J, IRTI(IR_PHI), hiref, hiref2); break; } case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } } else #endif #if LJ_SOFTFP if (ir->o == IR_SLOAD) { if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ nir->op2 &= ~IRSLOAD_CONVERT; if (!(nir->op2 & IRSLOAD_TYPECHECK)) nir->t.irt = IRT_INT; /* Drop guard. */ split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); } } else if (ir->o == IR_TOBIT) { IRRef tmp, op1 = ir->op1; J->cur.nins--; #if LJ_LE tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); #else tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); #endif ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); } else if (ir->o == IR_TOSTR) { if (hisubst[ir->op1]) { if (irref_isk(ir->op1)) nir->op1 = ir->op1; else split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); } } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { if (irref_isk(ir->op2) && hisubst[ir->op2]) nir->op2 = ir->op2; } else #endif if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ #if LJ_SOFTFP if (irt_isfloat(ir->t)) { split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); J->cur.nins--; /* Drop unused HIOP. */ } #else if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); } #endif else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ nir->ot = IRT(IR_NOP, IRT_NIL); nir->op1 = nir->op2 = 0; } } #endif #if LJ_SOFTFP && LJ_32 && LJ_HASFFI else if (irt_isfloat(ir->t)) { if (st == IRT_NUM) { split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); J->cur.nins--; /* Drop unused HIOP. */ } else { nir->o = IR_CALLN; nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; } } else if (st == IRT_FLOAT) { nir->o = IR_CALLN; nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; } else #endif #if LJ_SOFTFP if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { if (irt_isguard(ir->t)) { lua_assert(st == IRT_NUM && irt_isint(ir->t)); J->cur.nins--; ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); } else { split_call_l(J, hisubst, oir, ir, #if LJ_32 && LJ_HASFFI st == IRT_NUM ? (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) #else IRCALL_softfp_d2i #endif ); J->cur.nins--; /* Drop unused HIOP. */ } } #endif } else if (ir->o == IR_CALLXS) { IRRef hiref; split_call: hiref = hisubst[ir->op1]; if (hiref) { IROpT ot = nir->ot; IRRef op2 = nir->op2; nir->ot = IRT(IR_CARG, IRT_NIL); #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, ot, nref, op2); } if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) hi = split_emit(J, IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), nref, nref); } else if (ir->o == IR_CARG) { IRRef hiref = hisubst[ir->op1]; if (hiref) { IRRef op2 = nir->op2; #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } hiref = hisubst[ir->op2]; if (hiref) { #if !LJ_TARGET_X86 int carg = 0; IRIns *cir; for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) carg++; if ((carg & 1) == 0) { /* Align 64 bit arguments. */ IRRef op2 = nir->op2; nir->op2 = REF_NIL; nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } #endif #if LJ_BE { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } #endif ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); } } else if (ir->o == IR_CNEWI) { if (hisubst[ir->op2]) split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } hisubst[ref] = hi; /* Store hiword substitution. */ } if (snref == nins) { /* Substitution for last snapshot. */ snap->ref = J->cur.nins; split_subst_snap(J, snap, oir); } /* Add PHI marks. */ for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { IRIns *ir = IR(ref); if (ir->o != IR_PHI) break; if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); } }
/* Restore interpreter state from exit state with the help of a snapshot. */ const BCIns *lj_snap_restore(jit_State *J, void *exptr) { ExitState *ex = (ExitState *)exptr; SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ GCtrace *T = traceref(J, J->parent); SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; SnapEntry *flinks = map + nent + snap->depth; int32_t ftsz0; BCReg nslots = snap->nslots; TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); const BCIns *pc = snap_pc(map[nent]); lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ setcframe_pc(cframe_raw(L->cframe), pc+1); /* Make sure the stack is big enough for the slots from the snapshot. */ if (LJ_UNLIKELY(L->base + nslots > tvref(L->maxstack))) { L->top = curr_topL(L); lj_state_growstack(L, nslots - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ frame = L->base-1; ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef ref = snap_ref(sn); BCReg s = snap_slot(sn); TValue *o = &frame[s]; /* Stack slots are relative to start frame. */ IRIns *ir = &T->ir[ref]; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(L, o, ir); if ((sn & (SNAP_CONT|SNAP_FRAME))) { /* Overwrite tag with frame link. */ o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks-- : ftsz0; if ((sn & SNAP_FRAME)) { GCfunc *fn = ir_kfunc(ir); if (isluafunc(fn)) { MSize framesize = funcproto(fn)->framesize; L->base = ++o; if (LJ_UNLIKELY(o + framesize > tvref(L->maxstack))) { ptrdiff_t fsave = savestack(L, frame); L->top = o; lj_state_growstack(L, framesize); /* Grow again. */ frame = restorestack(L, fsave); } } } } } else if (!(sn & SNAP_NORESTORE)) { IRType1 t = ir->t; RegSP rs = ir->prev; lua_assert(!(sn & (SNAP_CONT|SNAP_FRAME))); if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { o->u32.lo = (uint32_t)*sps; } else if (irt_isinteger(t)) { setintV(o, *sps); #if !LJ_SOFTFP } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif #if LJ_64 } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; #endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcrefi(o->gcr, *sps); setitype(o, irt_toitype(t)); } } else { /* Restore from register. */ Reg r = regsp_reg(rs); lua_assert(ra_hasreg(r)); if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { o->u32.lo = (uint32_t)ex->gpr[r-RID_MIN_GPR]; } else if (irt_isinteger(t)) { setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); #if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); #endif #if LJ_64 } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; #endif } else { if (!irt_ispri(t)) setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); setitype(o, irt_toitype(t)); } } if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { rs = (ir+1)->prev; if (LJ_UNLIKELY(bloomtest(rfilt, ref+1))) rs = snap_renameref(T, snapno, ref+1, rs); o->u32.hi = (ra_hasspill(regsp_spill(rs))) ? (uint32_t)*&ex->spill[regsp_spill(rs)] : (uint32_t)ex->gpr[regsp_reg(rs)-RID_MIN_GPR]; } } } switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: L->top = frame + nslots; break; default: L->top = curr_topL(L); break; } lua_assert(map + nent == flinks); return pc; }
/* Restore interpreter state from exit state with the help of a snapshot. */ void lj_snap_restore(jit_State *J, void *exptr) { ExitState *ex = (ExitState *)exptr; SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ Trace *T = J->trace[J->parent]; SnapShot *snap = &T->snap[snapno]; BCReg s, nslots = snap->nslots; IRRef2 *map = &T->snapmap[snap->mapofs]; IRRef2 *flinks = map + nslots + snap->nframelinks; TValue *o, *newbase, *ntop; BloomFilter rfilt = snap_renamefilter(T, snapno); lua_State *L = J->L; /* Make sure the stack is big enough for the slots from the snapshot. */ if (L->base + nslots >= L->maxstack) { L->top = curr_topL(L); lj_state_growstack(L, nslots - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ newbase = NULL; ntop = L->base; for (s = 0, o = L->base-1; s < nslots; s++, o++) { IRRef ref = snap_ref(map[s]); if (ref) { IRIns *ir = &T->ir[ref]; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(L, o, ir); } else { IRType1 t = ir->t; RegSP rs = ir->prev; if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcrefi(o->gcr, *sps); setitype(o, irt_toitype(t)); } } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ Reg r = regsp_reg(rs); if (irt_isinteger(t)) { setintV(o, ex->gpr[r-RID_MIN_GPR]); } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); } else { if (!irt_ispri(t)) setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); setitype(o, irt_toitype(t)); } } else { /* Restore frame slot. */ lua_assert(ir->o == IR_FRAME); /* This works for both PTR and FUNC IR_FRAME. */ setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); if (s != 0) /* Do not overwrite link to previous frame. */ o->fr.tp.ftsz = (int32_t)*--flinks; if (irt_isfunc(ir->t)) { GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); if (isluafunc(fn)) { TValue *fs; newbase = o+1; fs = newbase + funcproto(fn)->framesize; if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ } } } } } else if (newbase) { setnilV(o); /* Clear unreferenced slots of newly added frames. */ } }