/* Substitute references of a snapshot. */ static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) { SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRIns *ir = &oir[snap_ref(sn)]; if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) map[n] = ((sn & 0xffff0000) | ir->prev); } }
/* Copy-substitute snapshot. */ static void loop_subst_snap(jit_State *J, SnapShot *osnap, SnapEntry *loopmap, IRRef1 *subst) { SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; SnapEntry *nextmap = &J->cur.snapmap[snap_nextofs(&J->cur, osnap)]; MSize nmapofs; MSize on, ln, nn, onent = osnap->nent; BCReg nslots = osnap->nslots; SnapShot *snap = &J->cur.snap[J->cur.nsnap]; if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ nmapofs = J->cur.nsnapmap; J->cur.nsnap++; /* Add new snapshot. */ } else { /* Otherwise overwrite previous snapshot. */ snap--; nmapofs = snap->mapofs; } J->guardemit.irt = 0; /* Setup new snapshot. */ snap->mapofs = (uint16_t)nmapofs; snap->ref = (IRRef1)J->cur.nins; snap->nslots = nslots; snap->topslot = osnap->topslot; snap->count = 0; nmap = &J->cur.snapmap[nmapofs]; /* Substitute snapshot slots. */ on = ln = nn = 0; while (on < onent) { SnapEntry osn = omap[on], lsn = loopmap[ln]; if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */ nmap[nn++] = lsn; ln++; } else { /* Copy substituted slot from snapshot map. */ if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */ if (!irref_isk(snap_ref(osn))) osn = snap_setref(osn, subst[snap_ref(osn)]); nmap[nn++] = osn; on++; } } while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */ nmap[nn++] = loopmap[ln++]; snap->nent = (uint8_t)nn; omap += onent; nmap += nn; while (omap < nextmap) /* Copy PC + frame links. */ *nmap++ = *omap++; J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap); }
/* Copy RegSP from parent snapshot to the parent links of the IR. */ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) { SnapShot *snap = &T->snap[snapno]; SnapEntry *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); MSize n = 0; IRRef ref = 0; for ( ; ; ir++) { uint32_t rs; if (ir->o == IR_SLOAD) { if (!(ir->op2 & IRSLOAD_PARENT)) break; for ( ; ; n++) { lua_assert(n < snap->nent); if (snap_slot(map[n]) == ir->op1) { ref = snap_ref(map[n++]); break; } } } else if (LJ_SOFTFP && ir->o == IR_HIOP) { ref++; } else if (ir->o == IR_PVAL) { ref = ir->op1 + REF_BIAS; } else { break; } rs = T->ir[ref].prev; if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); ir->prev = (uint16_t)rs; lua_assert(regsp_used(rs)); } return ir; }
/* De-duplicate parent reference. */ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) { MSize j; for (j = 0; j < nmax; j++) if (snap_ref(map[j]) == ref) return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); return 0; }
/* Mark all instructions referenced by a snapshot. */ static void sink_mark_snap(jit_State *J, SnapShot *snap) { SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) irt_setmark(IR(ref)->t); } }
/* Scan through all snapshots and mark all referenced instructions. */ static void dce_marksnap(jit_State *J) { SnapNo i, nsnap = J->cur.nsnap; for (i = 0; i < nsnap; i++) { SnapShot *snap = &J->cur.snap[i]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { IRRef ref = snap_ref(map[n]); if (ref >= REF_FIRST) irt_setmark(IR(ref)->t); } } }
/* Scan through all snapshots and mark all referenced instructions. */ static void dce_marksnap(jit_State *J) { SnapNo i, nsnap = J->cur.nsnap; for (i = 0; i < nsnap; i++) { SnapShot *snap = &J->cur.snap[i]; IRRef2 *map = &J->cur.snapmap[snap->mapofs]; BCReg s, nslots = snap->nslots; for (s = 0; s < nslots; s++) { IRRef ref = snap_ref(map[s]); if (!irref_isk(ref)) irt_setmark(IR(ref)->t); } } }
/* Convert a snapshot into a linear slot -> RegSP map. */ void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) { SnapShot *snap = &T->snap[snapno]; BCReg s, nslots = snap->nslots; IRRef2 *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); for (s = 0; s < nslots; s++) { IRRef ref = snap_ref(map[s]); if (!irref_isk(ref)) { IRIns *ir = &T->ir[ref]; uint32_t rs = ir->prev; if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); rsmap[s] = (uint16_t)rs; } } }
/* Convert a snapshot into a linear slot -> RegSP map. ** Note: unused slots are not initialized! */ void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno) { SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef ref = snap_ref(sn); if (!irref_isk(ref)) { IRIns *ir = &T->ir[ref]; uint32_t rs = ir->prev; if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); rsmap[snap_slot(sn)] = (uint16_t)rs; } } }
/* Emit or eliminate collected PHIs. */ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi, SnapNo onsnap) { int passx = 0; IRRef i, j, nslots; IRRef invar = J->chain[IR_LOOP]; /* Pass #1: mark redundant and potentially redundant PHIs. */ for (i = 0, j = 0; i < nphi; i++) { IRRef lref = phi[i]; IRRef rref = subst[lref]; if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */ irt_clearphi(IR(lref)->t); } else { phi[j++] = (IRRef1)lref; if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) { /* Quick check for simple recurrences failed, need pass2. */ irt_setmark(IR(lref)->t); passx = 1; } } } nphi = j; /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */ if (passx) { SnapNo s; for (i = J->cur.nins-1; i > invar; i--) { IRIns *ir = IR(i); if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); if (!irref_isk(ir->op1)) { irt_clearmark(IR(ir->op1)->t); if (ir->op1 < invar && ir->o >= IR_CALLN && ir->o <= IR_CARG) { /* ORDER IR */ ir = IR(ir->op1); while (ir->o == IR_CARG) { if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); if (irref_isk(ir->op1)) break; ir = IR(ir->op1); irt_clearmark(ir->t); } } } } for (s = J->cur.nsnap-1; s >= onsnap; s--) { SnapShot *snap = &J->cur.snap[s]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) irt_clearmark(IR(ref)->t); } } } /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */ nslots = J->baseslot+J->maxslot; for (i = 1; i < nslots; i++) { IRRef ref = tref_ref(J->slot[i]); while (!irref_isk(ref) && ref != subst[ref]) { IRIns *ir = IR(ref); irt_clearmark(ir->t); /* Unmark potential uses, too. */ if (irt_isphi(ir->t) || irt_ispri(ir->t)) break; irt_setphi(ir->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; ref = subst[ref]; if (ref > invar) break; } } /* Pass #4: propagate non-redundant PHIs. */ while (passx) { passx = 0; for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Propagate only from unmarked PHIs. */ IRIns *irr = IR(subst[lref]); if (irt_ismarked(irr->t)) { /* Right ref points to other PHI? */ irt_clearmark(irr->t); /* Mark that PHI as non-redundant. */ passx = 1; /* Retry. */ } } } } /* Pass #5: emit PHI instructions or eliminate PHIs. */ for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */ IRRef rref = subst[lref]; if (rref > invar) irt_setphi(IR(rref)->t); emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref); } else { /* Otherwise eliminate PHI. */ irt_clearmark(ir->t); irt_clearphi(ir->t); } } }
/* Restore interpreter state from exit state with the help of a snapshot. */ const BCIns *lj_snap_restore(jit_State *J, void *exptr) { ExitState *ex = (ExitState *)exptr; SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ GCtrace *T = traceref(J, J->parent); SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; ptrdiff_t ftsz0; TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); const BCIns *pc = snap_pc(map[nent]); lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ setcframe_pc(cframe_raw(L->cframe), pc+1); /* Make sure the stack is big enough for the slots from the snapshot. */ if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { L->top = curr_topL(L); lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ frame = L->base-1; ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; if (!(sn & SNAP_NORESTORE)) { TValue *o = &frame[snap_slot(sn)]; IRRef ref = snap_ref(sn); IRIns *ir = &T->ir[ref]; if (ir->r == RID_SUNK) { MSize j; for (j = 0; j < n; j++) if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */ copyTV(L, o, &frame[snap_slot(map[j])]); goto dupslot; } snap_unsink(J, T, ex, snapno, rfilt, ir, o); dupslot: continue; } snap_restoreval(J, T, ex, snapno, rfilt, ref, o); if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { TValue tmp; snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); o->u32.hi = tmp.u32.lo; } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ /* Overwrite tag with frame link. */ setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); L->base = o+1; } } } lua_assert(map + nent == flinks); /* Compute current stack top. */ switch (bc_op(*pc)) { default: if (bc_op(*pc) < BC_FUNCF) { L->top = curr_topL(L); break; } /* fallthrough */ case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: L->top = frame + snap->nslots; break; } return pc; }
/* Replay snapshot state to setup side trace. */ void lj_snap_replay(jit_State *J, GCtrace *T) { SnapShot *snap = &T->snap[J->exitno]; SnapEntry *map = &T->snapmap[snap->mapofs]; MSize n, nent = snap->nent; BloomFilter seen = 0; int pass23 = 0; J->framedepth = 0; /* Emit IR for slots inherited from parent snapshot. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); IRRef ref = snap_ref(sn); IRIns *ir = &T->ir[ref]; TRef tr; /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) goto setslot; bloomset(seen, ref); if (irref_isk(ref)) { tr = snap_replay_const(J, ir); } else if (!regsp_used(ir->prev)) { pass23 = 1; lua_assert(s != 0); tr = s; } else { IRType t = irt_type(ir->t); uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); } setslot: J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); if ((sn & SNAP_FRAME)) J->baseslot = s+1; } if (pass23) { IRIns *irlast = &T->ir[snap->ref]; pass23 = 0; /* Emit dependent PVALs. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef refp = snap_ref(sn); IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; pass23 = 1; lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW || ir->o == IR_CNEWI); if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (ir+1)->op2); } else { IRIns *irs; for (irs = ir+1; irs < irlast; irs++) if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (irs+1)->op2); } } } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); } } /* Replay sunk instructions. */ for (n = 0; pass23 && n < nent; n++) { SnapEntry sn = map[n]; IRRef refp = snap_ref(sn); IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { TRef op1, op2; if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; continue; } op1 = ir->op1; if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); op2 = ir->op2; if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { lj_needsplit(J); /* Emit joining HIOP. */ op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, snap_pref(J, T, map, nent, seen, (ir+1)->op2)); } J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2); } else { IRIns *irs; TRef tr = emitir(ir->ot, op1, op2); J->slot[snap_slot(sn)] = tr; for (irs = ir+1; irs < irlast; irs++) if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *irr = &T->ir[irs->op1]; TRef val, key = irr->op2, tmp = tr; if (irr->o != IR_FREF) { IRIns *irk = &T->ir[key]; if (irr->o == IR_HREFK) key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), irk->op2); else key = snap_replay_const(J, irk); if (irr->o == IR_HREFK || irr->o == IR_AREF) { IRIns *irf = &T->ir[irr->op1]; tmp = emitir(irf->ot, tmp, irf->op2); } } tmp = emitir(irr->ot, tmp, key); val = snap_pref(J, T, map, nent, seen, irs->op2); if (val == 0) { IRIns *irc = &T->ir[irs->op2]; lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); val = snap_pref(J, T, map, nent, seen, irc->op1); val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) { IRType t = IRT_I64; if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) t = IRT_NUM; lj_needsplit(J); if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { uint64_t k = (uint32_t)T->ir[irs->op2].i + ((uint64_t)T->ir[(irs+1)->op2].i << 32); val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, lj_ir_k64_find(J, k)); } else { val = emitir_raw(IRT(IR_HIOP, t), val, snap_pref(J, T, map, nent, seen, (irs+1)->op2)); } tmp = emitir(IRT(irs->o, t), tmp, val); continue; } tmp = emitir(irs->ot, tmp, val); } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) { emitir(IRT(IR_XBAR, IRT_NIL), 0, 0); } } } } } J->base = J->slot + J->baseslot; J->maxslot = snap->nslots - J->baseslot; lj_snap_add(J); if (pass23) /* Need explicit GC step _after_ initial snapshot. */ emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); }
/* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); IRRef1 *hisubst; IRRef ref; /* Copy old IR to buffer. */ memcpy(oir, IR(nk), irlen*sizeof(IRIns)); /* Bias hiword substitution table and old IR. Loword kept in field prev. */ hisubst = (IRRef1 *)&oir[irlen] - nk; oir -= nk; /* Remove all IR instructions, but retain IR constants. */ J->cur.nins = REF_FIRST; J->loopref = 0; /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } } /* Process old IR instructions. */ for (ref = REF_FIRST; ref < nins; ref++) { IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); IRRef hi = 0; /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); hisubst[ref] = 0; /* Split 64 bit instructions. */ #if LJ_SOFTFP if (irt_isnum(ir->t)) { nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ switch (ir->o) { case IR_ADD: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); break; case IR_SUB: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); break; case IR_POW: hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); break; case IR_FPMATH: /* Try to rejoin pow from EXP2, MUL and LOG2. */ if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { IRIns *irp = IR(nir->op1); if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { IRIns *irm4 = IR(irp->op1); IRIns *irm3 = IR(irm4->op1); IRIns *irm12 = IR(irm3->op1); IRIns *irl1 = IR(irm12->op1); if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && irl1->op2 == IRCALL_lj_vm_log2) { IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ IRRef arg3 = irm3->op2, arg4 = irm4->op2; J->cur.nins--; tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); break; } } } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; case IR_ATAN2: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); break; case IR_LDEXP: hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); break; case IR_NEG: case IR_ABS: nir->o = IR_CONV; /* Pass through loword. */ nir->op2 = (IRT_INT << 5) | IRT_INT; hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_SLOAD: if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ nir->op2 &= ~IRSLOAD_CONVERT; ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, IRCALL_softfp_i2d); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } /* fallthrough */ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: case IR_STRTO: hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; case IR_XLOAD: { IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ J->cur.nins--; hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ nref = lj_ir_nextins(J); nir = IR(nref); *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); #if LJ_LE ir->prev = nref; #else ir->prev = hi; hi = nref; #endif break; } case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to number. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); UNUSED(st); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { hi = split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); break; } #endif lua_assert(st == IRT_INT || (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); nir->o = IR_CALLN; #if LJ_32 && LJ_HASFFI nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : st == IRT_FLOAT ? IRCALL_softfp_f2d : IRCALL_softfp_ui2d; #else nir->op2 = IRCALL_softfp_i2d; #endif hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: goto split_call; case IR_PHI: if (nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ if (hisubst[ir->op1] != hisubst[ir->op2]) split_emit(J, IRT(IR_PHI, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; } } else #endif #if LJ_32 && LJ_HASFFI if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: case IR_SUB: /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); break; case IR_MOD: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); break; case IR_POW: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XSTORE: split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_SOFTFP if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ hi = split_call_l(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ nir->o = IR_CALLN; nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; hi = split_emit(J, IRTI(IR_HIOP), nref, nref); } #else if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } #endif else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. */ goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); nir = IR(nref); /* May have been reallocated. */ ir->prev = nir->op1; /* Pass through loword. */ nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_CALLXS: goto split_call; case IR_PHI: { IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ hiref2 = hisubst[ir->op2]; if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) split_emit(J, IRTI(IR_PHI), hiref, hiref2); break; } case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } } else #endif #if LJ_SOFTFP if (ir->o == IR_SLOAD) { if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ nir->op2 &= ~IRSLOAD_CONVERT; if (!(nir->op2 & IRSLOAD_TYPECHECK)) nir->t.irt = IRT_INT; /* Drop guard. */ split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); } } else if (ir->o == IR_TOBIT) { IRRef tmp, op1 = ir->op1; J->cur.nins--; #if LJ_LE tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); #else tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); #endif ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); } else if (ir->o == IR_TOSTR) { if (hisubst[ir->op1]) { if (irref_isk(ir->op1)) nir->op1 = ir->op1; else split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); } } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { if (irref_isk(ir->op2) && hisubst[ir->op2]) nir->op2 = ir->op2; } else #endif if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ #if LJ_SOFTFP if (irt_isfloat(ir->t)) { split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); J->cur.nins--; /* Drop unused HIOP. */ } #else if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); } #endif else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ nir->ot = IRT(IR_NOP, IRT_NIL); nir->op1 = nir->op2 = 0; } } #endif #if LJ_SOFTFP && LJ_32 && LJ_HASFFI else if (irt_isfloat(ir->t)) { if (st == IRT_NUM) { split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); J->cur.nins--; /* Drop unused HIOP. */ } else { nir->o = IR_CALLN; nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; } } else if (st == IRT_FLOAT) { nir->o = IR_CALLN; nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; } else #endif #if LJ_SOFTFP if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { if (irt_isguard(ir->t)) { lua_assert(st == IRT_NUM && irt_isint(ir->t)); J->cur.nins--; ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); } else { split_call_l(J, hisubst, oir, ir, #if LJ_32 && LJ_HASFFI st == IRT_NUM ? (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) #else IRCALL_softfp_d2i #endif ); J->cur.nins--; /* Drop unused HIOP. */ } } #endif } else if (ir->o == IR_CALLXS) { IRRef hiref; split_call: hiref = hisubst[ir->op1]; if (hiref) { IROpT ot = nir->ot; IRRef op2 = nir->op2; nir->ot = IRT(IR_CARG, IRT_NIL); #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, ot, nref, op2); } if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) hi = split_emit(J, IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), nref, nref); } else if (ir->o == IR_CARG) { IRRef hiref = hisubst[ir->op1]; if (hiref) { IRRef op2 = nir->op2; #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } hiref = hisubst[ir->op2]; if (hiref) { #if !LJ_TARGET_X86 int carg = 0; IRIns *cir; for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) carg++; if ((carg & 1) == 0) { /* Align 64 bit arguments. */ IRRef op2 = nir->op2; nir->op2 = REF_NIL; nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } #endif #if LJ_BE { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } #endif ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); } } else if (ir->o == IR_CNEWI) { if (hisubst[ir->op2]) split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } hisubst[ref] = hi; /* Store hiword substitution. */ } /* Add PHI marks. */ for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { IRIns *ir = IR(ref); if (ir->o != IR_PHI) break; if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); } /* Substitute snapshot maps. */ oir[nins].prev = J->cur.nins; /* Substitution for last snapshot. */ { SnapNo i, nsnap = J->cur.nsnap; for (i = 0; i < nsnap; i++) { SnapShot *snap = &J->cur.snap[i]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; snap->ref = snap->ref == REF_FIRST ? REF_FIRST : oir[snap->ref].prev; for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRIns *ir = &oir[snap_ref(sn)]; if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) map[n] = ((sn & 0xffff0000) | ir->prev); } } } }
/* Restore interpreter state from exit state with the help of a snapshot. */ const BCIns *lj_snap_restore(jit_State *J, void *exptr) { ExitState *ex = (ExitState *)exptr; SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ GCtrace *T = traceref(J, J->parent); SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; SnapEntry *flinks = map + nent + snap->depth; int32_t ftsz0; BCReg nslots = snap->nslots; TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); const BCIns *pc = snap_pc(map[nent]); lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ setcframe_pc(cframe_raw(L->cframe), pc+1); /* Make sure the stack is big enough for the slots from the snapshot. */ if (LJ_UNLIKELY(L->base + nslots > tvref(L->maxstack))) { L->top = curr_topL(L); lj_state_growstack(L, nslots - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ frame = L->base-1; ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef ref = snap_ref(sn); BCReg s = snap_slot(sn); TValue *o = &frame[s]; /* Stack slots are relative to start frame. */ IRIns *ir = &T->ir[ref]; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(L, o, ir); if ((sn & (SNAP_CONT|SNAP_FRAME))) { /* Overwrite tag with frame link. */ o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks-- : ftsz0; if ((sn & SNAP_FRAME)) { GCfunc *fn = ir_kfunc(ir); if (isluafunc(fn)) { MSize framesize = funcproto(fn)->framesize; L->base = ++o; if (LJ_UNLIKELY(o + framesize > tvref(L->maxstack))) { ptrdiff_t fsave = savestack(L, frame); L->top = o; lj_state_growstack(L, framesize); /* Grow again. */ frame = restorestack(L, fsave); } } } } } else if (!(sn & SNAP_NORESTORE)) { IRType1 t = ir->t; RegSP rs = ir->prev; lua_assert(!(sn & (SNAP_CONT|SNAP_FRAME))); if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { o->u32.lo = (uint32_t)*sps; } else if (irt_isinteger(t)) { setintV(o, *sps); #if !LJ_SOFTFP } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif #if LJ_64 } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; #endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcrefi(o->gcr, *sps); setitype(o, irt_toitype(t)); } } else { /* Restore from register. */ Reg r = regsp_reg(rs); lua_assert(ra_hasreg(r)); if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { o->u32.lo = (uint32_t)ex->gpr[r-RID_MIN_GPR]; } else if (irt_isinteger(t)) { setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); #if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); #endif #if LJ_64 } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; #endif } else { if (!irt_ispri(t)) setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); setitype(o, irt_toitype(t)); } } if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { rs = (ir+1)->prev; if (LJ_UNLIKELY(bloomtest(rfilt, ref+1))) rs = snap_renameref(T, snapno, ref+1, rs); o->u32.hi = (ra_hasspill(regsp_spill(rs))) ? (uint32_t)*&ex->spill[regsp_spill(rs)] : (uint32_t)ex->gpr[regsp_reg(rs)-RID_MIN_GPR]; } } } switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: L->top = frame + nslots; break; default: L->top = curr_topL(L); break; } lua_assert(map + nent == flinks); return pc; }
/* Restore interpreter state from exit state with the help of a snapshot. */ void lj_snap_restore(jit_State *J, void *exptr) { ExitState *ex = (ExitState *)exptr; SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ Trace *T = J->trace[J->parent]; SnapShot *snap = &T->snap[snapno]; BCReg s, nslots = snap->nslots; IRRef2 *map = &T->snapmap[snap->mapofs]; IRRef2 *flinks = map + nslots + snap->nframelinks; TValue *o, *newbase, *ntop; BloomFilter rfilt = snap_renamefilter(T, snapno); lua_State *L = J->L; /* Make sure the stack is big enough for the slots from the snapshot. */ if (L->base + nslots >= L->maxstack) { L->top = curr_topL(L); lj_state_growstack(L, nslots - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ newbase = NULL; ntop = L->base; for (s = 0, o = L->base-1; s < nslots; s++, o++) { IRRef ref = snap_ref(map[s]); if (ref) { IRIns *ir = &T->ir[ref]; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(L, o, ir); } else { IRType1 t = ir->t; RegSP rs = ir->prev; if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcrefi(o->gcr, *sps); setitype(o, irt_toitype(t)); } } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ Reg r = regsp_reg(rs); if (irt_isinteger(t)) { setintV(o, ex->gpr[r-RID_MIN_GPR]); } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); } else { if (!irt_ispri(t)) setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); setitype(o, irt_toitype(t)); } } else { /* Restore frame slot. */ lua_assert(ir->o == IR_FRAME); /* This works for both PTR and FUNC IR_FRAME. */ setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); if (s != 0) /* Do not overwrite link to previous frame. */ o->fr.tp.ftsz = (int32_t)*--flinks; if (irt_isfunc(ir->t)) { GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); if (isluafunc(fn)) { TValue *fs; newbase = o+1; fs = newbase + funcproto(fn)->framesize; if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ } } } } } else if (newbase) { setnilV(o); /* Clear unreferenced slots of newly added frames. */ } }
int irEngine(Capability *cap, Fragment *F) { static Inst disp[] = { #define IRIMPL(name, f, o1, o2) &&op_##name, IRDEF(IRIMPL) #undef IRIMPL &&stop }; IRRef ref; Thread *T = cap->T; Word nphis = F->nphis; Word *base = T->base - 1; Word szins = F->nins - F->nk; Word vals_[szins + nphis]; Word *phibuf = &vals_[szins]; /* For parallel copy of PHI nodes */ Word *vals = vals_ - (int)F->nk; IRIns *pc = F->ir + REF_FIRST; IRRef pcref = REF_FIRST; IRIns *pcmax = F->ir + F->nins; IRIns *pcloop = F->nloop ? F->ir + F->nloop + 1 : pc; //int count = 100; DBG_PR("*** Executing trace.\n" "*** base = %p\n" "*** pc = %p\n" "*** pcmax = %p (%d)\n" "*** loop = %p (%d)\n", base, pc, pcmax, (int)(pcmax - pc), pcloop, (int)(pcloop - pc)); for (ref = F->nk; ref < REF_BIAS; ref++) { switch (IR(ref)->o) { case IR_KINT: vals[ref] = (Word)IR(ref)->i; break; case IR_KBASEO: vals[ref] = (Word)(T->base + IR(ref)->i); break; case IR_KWORD: vals[ref] = (Word)(F->kwords[IR(ref)->u]); break; default: LC_ASSERT(0); break; } DBG_LVL(2, "%d, %" FMT_WordX "\n", ref - REF_BIAS, vals[ref]); } vals[REF_BASE] = (Word)base; goto *disp[pc->o]; # define DISPATCH_NEXT \ if (irt_type(pc->t) != IRT_VOID && pc->o != IR_PHI) { \ if (irt_type(pc->t) == IRT_I32) \ DBG_LVL(2, " ===> %" FMT_Int "\n", vals[pcref]); \ else \ DBG_LVL(2, " ===> 0x%" FMT_WordX "\n", vals[pcref]); } \ ++pc; ++pcref; \ if (LC_UNLIKELY(pc >= pcmax)) { pc = pcloop; pcref = F->nloop + 1; } \ if (pc->o != IR_NOP) { \ DBG_LVL(2, "[%d] ", pcref - REF_BIAS); \ IF_DBG_LVL(2, printIR(F, *pc)); } \ goto *disp[pc->o] op_NOP: op_FRAME: op_RET: op_LOOP: DISPATCH_NEXT; op_PHI: { /* PHI nodes represent parallel assignments, so as soon as we discover the first PHI node, we perform all assignments in parallel. */ LC_ASSERT(nphis > 0); u2 i; DBG_LVL(3, " ( "); for (i = 0; i < nphis; i++) { DBG_LVL(3, "%d ", irref_int(pc[i].op2)); phibuf[i] = vals[pc[i].op2]; } DBG_LVL(3, ") --> ( "); for (i = 0; i < nphis; i++) { DBG_LVL(3, "%d ", irref_int(pc[i].op1)); vals[pc[i].op1] = phibuf[i]; } DBG_LVL(3, ") [%d phis]\n", (int)nphis); pc += nphis - 1; //vals[pc->op1] = vals[pc->op2]; DISPATCH_NEXT; } op_LT: recordEvent(EV_CMP, 0); if (!((WordInt)vals[pc->op1] < (WordInt)vals[pc->op2])) goto guard_failed; DISPATCH_NEXT; op_GE: recordEvent(EV_CMP, 0); if (!((WordInt)vals[pc->op1] >= (WordInt)vals[pc->op2])) goto guard_failed; DISPATCH_NEXT; op_LE: recordEvent(EV_CMP, 0); if (!((WordInt)vals[pc->op1] <= (WordInt)vals[pc->op2])) goto guard_failed; DISPATCH_NEXT; op_GT: recordEvent(EV_CMP, 0); if (!((WordInt)vals[pc->op1] > (WordInt)vals[pc->op2])) goto guard_failed; DISPATCH_NEXT; op_EQ: recordEvent(EV_CMP, 0); if (!((WordInt)vals[pc->op1] == (WordInt)vals[pc->op2])) { goto guard_failed; } DISPATCH_NEXT; op_NE: recordEvent(EV_CMP, 0); if (!((WordInt)vals[pc->op1] != (WordInt)vals[pc->op2])) goto guard_failed; DISPATCH_NEXT; op_ADD: recordEvent(EV_ALU, 0); vals[pcref] = vals[pc->op1] + vals[pc->op2]; DISPATCH_NEXT; op_SUB: recordEvent(EV_ALU, 0); vals[pcref] = vals[pc->op1] - vals[pc->op2]; DISPATCH_NEXT; op_MUL: recordEvent(EV_MUL, 0); vals[pcref] = (WordInt)vals[pc->op1] * (WordInt)vals[pc->op2]; DISPATCH_NEXT; op_DIV: recordEvent(EV_REMDIV, 0); if (LC_LIKELY(vals[pc->op2] != 0)) vals[pcref] = (WordInt)vals[pc->op1] / (WordInt)vals[pc->op2]; else LC_ASSERT(0); DISPATCH_NEXT; op_REM: recordEvent(EV_REMDIV, 0); if (LC_LIKELY(vals[pc->op2] != 0)) vals[pcref] = (WordInt)vals[pc->op1] % (WordInt)vals[pc->op2]; else LC_ASSERT(0); DISPATCH_NEXT; op_FREF: vals[pcref] = (Word)(((Closure*)vals[pc->op1])->payload + (pc->op2 - 1)); DISPATCH_NEXT; op_FLOAD: recordEvent(EV_LOAD, 0); vals[pcref] = *((Word*)vals[pc->op1]); DISPATCH_NEXT; op_SLOAD: recordEvent(EV_LOAD, 0); vals[pcref] = base[pc->op1]; DISPATCH_NEXT; op_ILOAD: recordEvent(EV_LOAD, 0); vals[pcref] = (Word)getInfo(vals[pc->op1]); DISPATCH_NEXT; op_NEW: if (!ir_issunken(pc)) { // do actual allocation on trace HeapInfo *hp = &F->heap[pc->op2]; int j; recordEvent(EV_ALLOC, hp->nfields + 1); Closure *cl = allocClosure(wordsof(ClosureHeader) + hp->nfields); setInfo(cl, (InfoTable*)vals[pc->op1]); for (j = 0; j < hp->nfields; j++) { cl->payload[j] = vals[getHeapInfoField(F, hp, j)]; } vals[pcref] = (Word)cl; } else { vals[pcref] = 0; // to trigger an error if accessed } DISPATCH_NEXT; op_UPDATE: { recordEvent(EV_UPDATE, 0); Closure *oldnode = (Closure *)vals[pc->op1]; Closure *newnode = (Closure *)base[pc->op2]; setInfo(oldnode, (InfoTable*)&stg_IND_info); oldnode->payload[0] = (Word)newnode; DISPATCH_NEXT; } op_RLOAD: op_FSTORE: op_RENAME: op_BNOT: op_BAND: op_BOR: op_BXOR: op_BSHL: op_BSHR: op_BSAR: op_BROL: op_BROR: // These should never be executed. op_BASE: op_KINT: op_KWORD: op_KBASEO: LC_ASSERT(0); guard_failed: DBG_PR("Exiting at %d\n", pcref - REF_BIAS); { int i; SnapShot *snap = 0; SnapEntry *se; for (i = 0; i < F->nsnap; i++) { if (F->snap[i].ref == pcref) { snap = &F->snap[i]; break; } } LC_ASSERT(snap != 0); snap->count++; se = F->snapmap + snap->mapofs; DBG_PR("Snapshot: %d, Snap entries: %d, slots = %d\n", i, snap->nent, snap->nslots); recordEvent(EV_EXIT, snap->nent); for (i = 0; i < snap->nent; i++, se++) { BCReg s = snap_slot(*se); IRRef r = snap_ref(*se); DBG_PR("base[%d] = ", s - 1); base[s] = restoreValue(F, vals, r); IF_DBG_LVL(1, printSlot(stderr, base + s); fprintf(stderr, "\n")); //DBG_PR("0x%" FMT_WordX "\n", base[s]); } DBG_PR("Base slot: %d\n", se[1]); // se[1] = T->pc = (BCIns *)F->startpc + (int)se[0]; T->base = base + se[1]; T->top = base + snap->nslots; //printFrame(T->base, T->top); return 0; } stop: return 1; }
/* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); IRRef1 *hisubst; IRRef ref; /* Copy old IR to buffer. */ memcpy(oir, IR(nk), irlen*sizeof(IRIns)); /* Bias hiword substitution table and old IR. Loword kept in field prev. */ hisubst = (IRRef1 *)&oir[irlen] - nk; oir -= nk; /* Remove all IR instructions, but retain IR constants. */ J->cur.nins = REF_FIRST; /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } } /* Process old IR instructions. */ for (ref = REF_FIRST; ref < nins; ref++) { IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); IRRef hi = 0; /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); hisubst[ref] = 0; /* Split 64 bit instructions. */ if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: case IR_SUB: /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: hi = split_call64(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); break; case IR_MOD: hi = split_call64(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); break; case IR_POW: hi = split_call64(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64HI); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XSTORE: #if LJ_LE hiref = hisubst[ir->op2]; #else hiref = nir->op2; nir->op2 = hisubst[ir->op2]; #endif split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hiref); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. */ goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); nir = IR(nref); /* May have been reallocated. */ ir->prev = nir->op1; /* Pass through loword. */ nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_PHI: { IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ hiref2 = hisubst[ir->op2]; if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) split_emit(J, IRTI(IR_PHI), hiref, hiref2); break; } default: lua_assert(ir->o <= IR_NE); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } } else if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); } else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ nir->ot = IRT(IR_NOP, IRT_NIL); nir->op1 = nir->op2 = 0; } } } else if (ir->o == IR_CNEWI) { if (hisubst[ir->op2]) split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } hisubst[ref] = hi; /* Store hiword substitution. */ } /* Add PHI marks. */ for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { IRIns *ir = IR(ref); if (ir->o != IR_PHI) break; if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); } /* Substitute snapshot maps. */ oir[nins].prev = J->cur.nins; /* Substitution for last snapshot. */ { SnapNo i, nsnap = J->cur.nsnap; for (i = 0; i < nsnap; i++) { SnapShot *snap = &J->cur.snap[i]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; snap->ref = oir[snap->ref].prev; for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; map[n] = ((sn & 0xffff0000) | oir[snap_ref(sn)].prev); } } } }