/* Backwards propagate marks. Replace unused instructions with NOPs. */ static void dce_propagate(jit_State *J) { IRRef1 *pchain[IR__MAX]; IRRef ins; uint32_t i; for (i = 0; i < IR__MAX; i++) pchain[i] = &J->chain[i]; for (ins = J->cur.nins-1; ins >= REF_FIRST; ins--) { IRIns *ir = IR(ins); if (irt_ismarked(ir->t)) { irt_clearmark(ir->t); pchain[ir->o] = &ir->prev; } else if (!(irt_isguard(ir->t) || irm_sideeff(lj_ir_mode[ir->o]))) { *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */ *pchain[IR_NOP] = (IRRef1)ins; ir->t.irt = IRT_NIL; ir->o = IR_NOP; /* Replace instruction with NOP. */ ir->op1 = ir->op2 = 0; pchain[IR_NOP] = &ir->prev; continue; } if (!irref_isk(ir->op1)) irt_setmark(IR(ir->op1)->t); if (!irref_isk(ir->op2)) irt_setmark(IR(ir->op2)->t); } *pchain[IR_NOP] = 0; /* Terminate NOP chain. */ }
/* Check whether the previous value for a table store is non-nil. ** This can be derived either from a previous store or from a previous ** load (because all loads from tables perform a type check). ** ** The result of the analysis can be used to avoid the metatable check ** and the guard against HREF returning niltv. Both of these are cheap, ** so let's not spend too much effort on the analysis. ** ** A result of 1 is exact: previous value CANNOT be nil. ** A result of 0 is inexact: previous value MAY be nil. */ int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref) { /* First check stores. */ IRRef ref = J->chain[loadop+IRDELTA_L2S]; while (ref > xref) { IRIns *store = IR(ref); if (store->op1 == xref) { /* Same xREF. */ /* A nil store MAY alias, but a non-nil store MUST alias. */ return !irt_isnil(store->t); } else if (irt_isnil(store->t)) { /* Must check any nil store. */ IRRef skref = IR(store->op1)->op2; IRRef xkref = IR(xref)->op2; /* Same key type MAY alias. Need ALOAD check due to multiple int types. */ if (loadop == IR_ALOAD || irt_sametype(IR(skref)->t, IR(xkref)->t)) { if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref)) return 0; /* A nil store with same const key or var key MAY alias. */ /* Different const keys CANNOT alias. */ } /* Different key types CANNOT alias. */ } /* Other non-nil stores MAY alias. */ ref = store->prev; } /* Check loads since nothing could be derived from stores. */ ref = J->chain[loadop]; while (ref > xref) { IRIns *load = IR(ref); if (load->op1 == xref) { /* Same xREF. */ /* A nil load MAY alias, but a non-nil load MUST alias. */ return !irt_isnil(load->t); } /* Other non-nil loads MAY alias. */ ref = load->prev; } return 0; /* Nothing derived at all, previous value MAY be nil. */ }
/* Emit or eliminate collected PHIs. */ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi) { int pass2 = 0; IRRef i, nslots; IRRef invar = J->chain[IR_LOOP]; /* Pass #1: mark redundant and potentially redundant PHIs. */ for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRRef rref = subst[lref]; if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */ irt_setmark(IR(lref)->t); } else if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) { /* Quick check for simple recurrences failed, need pass2. */ irt_setmark(IR(lref)->t); pass2 = 1; } } /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */ if (pass2) { for (i = J->cur.nins-1; i > invar; i--) { IRIns *ir = IR(i); if (!irref_isk(ir->op1)) irt_clearmark(IR(ir->op1)->t); if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); } } /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */ nslots = J->baseslot+J->maxslot; for (i = 1; i < nslots; i++) { IRRef ref = tref_ref(J->slot[i]); while (!irref_isk(ref) && ref != subst[ref]) { IRIns *ir = IR(ref); irt_clearmark(ir->t); /* Unmark potential uses, too. */ if (irt_isphi(ir->t) || irt_ispri(ir->t)) break; irt_setphi(ir->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; ref = subst[ref]; if (ref > invar) break; } } /* Pass #4: emit PHI instructions or eliminate PHIs. */ for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */ IRRef rref = subst[lref]; if (rref > invar) irt_setphi(IR(rref)->t); emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref); } else { /* Otherwise eliminate PHI. */ irt_clearmark(ir->t); irt_clearphi(ir->t); } } }
/* Alias analysis for array and hash access using key-based disambiguation. */ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) { IRRef ka = refa->op2; IRRef kb = refb->op2; IRIns *keya, *keyb; if (refa == refb) return ALIAS_MUST; /* Shortcut for same refs. */ keya = IR(ka); if (keya->o == IR_KSLOT) { ka = keya->op1; keya = IR(ka); } keyb = IR(kb); if (keyb->o == IR_KSLOT) { kb = keyb->op1; keyb = IR(kb); } if (ka == kb) { /* Same key. Check for same table with different ref (NEWREF vs. HREF). */ IRIns *ta = refa; IRIns *tb = refb; if (ta->o == IR_HREFK || ta->o == IR_AREF) ta = IR(ta->op1); if (tb->o == IR_HREFK || tb->o == IR_AREF) tb = IR(tb->op1); if (ta->op1 == tb->op1) return ALIAS_MUST; /* Same key, same table. */ else return ALIAS_MAY; /* Same key, possibly different table. */ } if (irref_isk(ka) && irref_isk(kb)) return ALIAS_NO; /* Different constant keys. */ if (refa->o == IR_AREF) { /* Disambiguate array references based on index arithmetic. */ lua_assert(refb->o == IR_AREF); if (refa->op1 == refb->op1) { /* Same table, different non-const array keys. */ int32_t ofsa = 0, ofsb = 0; IRRef basea = ka, baseb = kb; /* Gather base and offset from t[base] or t[base+-ofs]. */ if (keya->o == IR_ADD && irref_isk(keya->op2)) { basea = keya->op1; ofsa = IR(keya->op2)->i; if (basea == kb && ofsa != 0) return ALIAS_NO; /* t[base+-ofs] vs. t[base]. */ } if (keyb->o == IR_ADD && irref_isk(keyb->op2)) { baseb = keyb->op1; ofsb = IR(keyb->op2)->i; if (ka == baseb && ofsb != 0) return ALIAS_NO; /* t[base] vs. t[base+-ofs]. */ } if (basea == baseb && ofsa != ofsb) return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ } } else { /* Disambiguate hash references based on the type of their keys. */ lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); if (!irt_sametype(keya->t, keyb->t)) return ALIAS_NO; /* Different key types. */ } return ALIAS_MAY; /* Anything else: we just don't know. */ }
/* Find cdata allocation for a reference (if any). */ static IRIns *aa_findcnew(jit_State *J, IRIns *ir) { while (ir->o == IR_ADD) { if (!irref_isk(ir->op1)) { IRIns *ir1 = aa_findcnew(J, IR(ir->op1)); /* Left-recursion. */ if (ir1) return ir1; } if (irref_isk(ir->op2)) return NULL; ir = IR(ir->op2); /* Flatten right-recursion. */ } return ir->o == IR_CNEW ? ir : NULL; }
/* Alias analysis for XLOAD/XSTORE. */ static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb) { ptrdiff_t ofsa = 0, ofsb = 0; IRIns *refb = IR(xb->op1); IRIns *basea = refa, *baseb = refb; if (refa == refb && irt_sametype(xa->t, xb->t)) return ALIAS_MUST; /* Shortcut for same refs with identical type. */ /* Offset-based disambiguation. */ if (refa->o == IR_ADD && irref_isk(refa->op2)) { IRIns *irk = IR(refa->op2); basea = IR(refa->op1); ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; } if (refb->o == IR_ADD && irref_isk(refb->op2)) { IRIns *irk = IR(refb->op2); baseb = IR(refb->op1); ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; } /* Treat constified pointers like base vs. base+offset. */ if (basea->o == IR_KPTR && baseb->o == IR_KPTR) { ofsb += (char *)ir_kptr(baseb) - (char *)ir_kptr(basea); baseb = basea; } /* This implements (very) strict aliasing rules. ** Different types do NOT alias, except for differences in signedness. ** Type punning through unions is allowed (but forces a reload). */ if (basea == baseb) { ptrdiff_t sza = irt_size(xa->t), szb = irt_size(xb->t); if (ofsa == ofsb) { if (sza == szb && irt_isfp(xa->t) == irt_isfp(xb->t)) return ALIAS_MUST; /* Same-sized, same-kind. May need to convert. */ } else if (ofsa + sza <= ofsb || ofsb + szb <= ofsa) { return ALIAS_NO; /* Non-overlapping base+-o1 vs. base+-o2. */ } /* NYI: extract, extend or reinterpret bits (int <-> fp). */ return ALIAS_MAY; /* Overlapping or type punning: force reload. */ } if (!irt_sametype(xa->t, xb->t) && !(irt_typerange(xa->t, IRT_I8, IRT_U64) && ((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1)) return ALIAS_NO; /* NYI: structural disambiguation. */ return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ }
/* Restore raw data from the trace exit state. */ static void snap_restoredata(GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRRef ref, void *dst, CTSize sz) { IRIns *ir = &T->ir[ref]; RegSP rs = ir->prev; int32_t *src; uint64_t tmp; if (irref_isk(ref)) { if (ir->o == IR_KNUM || ir->o == IR_KINT64) { src = mref(ir->ptr, int32_t); } else if (sz == 8) { tmp = (uint64_t)(uint32_t)ir->i; src = (int32_t *)&tmp; } else { src = &ir->i; } } else { if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); if (ra_hasspill(regsp_spill(rs))) { src = &ex->spill[regsp_spill(rs)]; if (sz == 8 && !irt_is64(ir->t)) { tmp = (uint64_t)(uint32_t)*src; src = (int32_t *)&tmp; } } else { Reg r = regsp_reg(rs); if (ra_noreg(r)) { /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; return; } src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; #if !LJ_SOFTFP if (r >= RID_MAX_GPR) { src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; #if LJ_TARGET_PPC if (sz == 4) { /* PPC FPRs are always doubles. */ *(float *)dst = (float)*(double *)src; return; } #else if (LJ_BE && sz == 4) src++; #endif } #endif } } lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); if (sz == 4) *(int32_t *)dst = *src; else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; else if (sz == 1) *(int8_t *)dst = (int8_t)*src; else *(int16_t *)dst = (int16_t)*src; }
/* Reassociate index references. */ static IRRef reassoc_xref(jit_State *J, IRIns *ir) { ptrdiff_t ofs = 0; if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Get constant offset. */ IRIns *irk = IR(ir->op2); ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; ir = IR(ir->op1); } if (ir->o == IR_ADD) { /* Add of base + index. */ /* Index ref > base ref for loop-carried dependences. Only check op1. */ IRIns *ir2, *ir1 = IR(ir->op1); int32_t shift = 0; IRRef idxref; /* Determine index shifts. Don't bother with IR_MUL here. */ if (ir1->o == IR_BSHL && irref_isk(ir1->op2)) shift = IR(ir1->op2)->i; else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2) shift = 1; else ir1 = ir; ir2 = IR(ir1->op1); /* A non-reassociated add. Must be a loop-carried dependence. */ if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2)) ofs += (ptrdiff_t)IR(ir2->op2)->i << shift; else return 0; idxref = ir2->op1; /* Try to CSE the reassociated chain. Give up if not found. */ if (ir1 != ir && !(idxref = reassoc_trycse(J, ir1->o, idxref, ir1->o == IR_BSHL ? ir1->op2 : idxref))) return 0; if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2))) return 0; if (ofs != 0) { IRRef refk = tref_ref(lj_ir_kintp(J, ofs)); if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk))) return 0; } return idxref; /* Success, found a reassociated index reference. Phew. */ } return 0; /* Failure. */ }
/* Mark all instructions referenced by a snapshot. */ static void sink_mark_snap(jit_State *J, SnapShot *snap) { SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) irt_setmark(IR(ref)->t); } }
/* Alias analysis for XLOAD/XSTORE. */ static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb) { ptrdiff_t ofsa = 0, ofsb = 0; IRIns *refb = IR(xb->op1); IRIns *basea = refa, *baseb = refb; /* This implements (very) strict aliasing rules. ** Different types do NOT alias, except for differences in signedness. ** NYI: this also prevents type punning through unions. */ if (irt_sametype(xa->t, xb->t)) { if (refa == refb) return ALIAS_MUST; /* Shortcut for same refs with identical type. */ } else if (!(irt_typerange(xa->t, IRT_I8, IRT_U64) && ((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1)) { return ALIAS_NO; } /* Offset-based disambiguation. */ if (refa->o == IR_ADD && irref_isk(refa->op2)) { IRIns *irk = IR(refa->op2); basea = IR(refa->op1); ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; if (basea == refb && ofsa != 0) return ALIAS_NO; /* base+-ofs vs. base. */ } if (refb->o == IR_ADD && irref_isk(refb->op2)) { IRIns *irk = IR(refb->op2); baseb = IR(refb->op1); ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; if (refa == baseb && ofsb != 0) return ALIAS_NO; /* base vs. base+-ofs. */ } if (basea == baseb) { /* This assumes strictly-typed, non-overlapping accesses. */ if (ofsa != ofsb) return ALIAS_NO; /* base+-o1 vs. base+-o2 and o1 != o2. */ return ALIAS_MUST; /* Unsigned vs. signed access to the same address. */ } /* NYI: structural disambiguation. */ return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ }
/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ static IRRef split_ptr(jit_State *J, IRRef ref) { IRIns *ir = IR(ref); int32_t ofs = 4; if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Reassociate address. */ ofs += IR(ir->op2)->i; ref = ir->op1; if (ofs == 0) return ref; } return split_emit(J, IRTI(IR_ADD), ref, lj_ir_kint(J, ofs)); }
/* Substitute references of a snapshot. */ static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) { SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRIns *ir = &oir[snap_ref(sn)]; if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) map[n] = ((sn & 0xffff0000) | ir->prev); } }
/* Reassociate ALOAD across PHIs to handle t[i-1] forwarding case. */ static TRef fwd_aload_reassoc(jit_State *J) { IRIns *irx = IR(fins->op1); IRIns *key = IR(irx->op2); if (key->o == IR_ADD && irref_isk(key->op2)) { IRIns *add2 = IR(key->op1); if (add2->o == IR_ADD && irref_isk(add2->op2) && IR(key->op2)->i == -IR(add2->op2)->i) { IRRef ref = J->chain[IR_AREF]; IRRef lim = add2->op1; if (irx->op1 > lim) lim = irx->op1; while (ref > lim) { IRIns *ir = IR(ref); if (ir->op1 == irx->op1 && ir->op2 == add2->op1) return fwd_ahload(J, ref); ref = ir->prev; } } } return 0; }
/* Restore a value from the trace exit state. */ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRRef ref, TValue *o) { IRIns *ir = &T->ir[ref]; IRType1 t = ir->t; RegSP rs = ir->prev; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(J->L, o, ir); return; } if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */ if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); #if !LJ_SOFTFP } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif } else if (LJ_64 && irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); } } else { /* Restore from register. */ Reg r = regsp_reg(rs); if (ra_noreg(r)) { lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); return; } else if (irt_isinteger(t)) { setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); #if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); #endif } else if (LJ_64 && irt_is64(t)) { /* 64 bit values that already have the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; } else if (irt_ispri(t)) { setpriV(o, irt_toitype(t)); } else { setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t)); } } }
/* Emit parent reference with de-duplication. */ static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax, BloomFilter seen, IRRef ref) { IRIns *ir = &T->ir[ref]; TRef tr; if (irref_isk(ref)) tr = snap_replay_const(J, ir); else if (!regsp_used(ir->prev)) tr = 0; else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0) tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0); return tr; }
/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) { IRRef nref = oir[ref].prev; IRIns *ir = IR(nref); int32_t ofs = 4; if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) { /* Reassociate address. */ ofs += IR(ir->op2)->i; nref = ir->op1; if (ofs == 0) return nref; } return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); }
/* Scan through all snapshots and mark all referenced instructions. */ static void dce_marksnap(jit_State *J) { SnapNo i, nsnap = J->cur.nsnap; for (i = 0; i < nsnap; i++) { SnapShot *snap = &J->cur.snap[i]; IRRef2 *map = &J->cur.snapmap[snap->mapofs]; BCReg s, nslots = snap->nslots; for (s = 0; s < nslots; s++) { IRRef ref = snap_ref(map[s]); if (!irref_isk(ref)) irt_setmark(IR(ref)->t); } } }
/* Scan through all snapshots and mark all referenced instructions. */ static void dce_marksnap(jit_State *J) { SnapNo i, nsnap = J->cur.nsnap; for (i = 0; i < nsnap; i++) { SnapShot *snap = &J->cur.snap[i]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) irt_setmark(IR(ref)->t); } } }
/* Check whether the store ref points to an eligible allocation. */ static IRIns *sink_checkalloc(jit_State *J, IRIns *irs) { IRIns *ir = IR(irs->op1); if (!irref_isk(ir->op2)) return NULL; /* Non-constant key. */ if (ir->o == IR_HREFK || ir->o == IR_AREF) ir = IR(ir->op1); else if (!(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_FREF || ir->o == IR_ADD)) return NULL; /* Unhandled reference type (for XSTORE). */ ir = IR(ir->op1); if (!(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW)) return NULL; /* Not an allocation. */ return ir; /* Return allocation. */ }
/* Copy-substitute snapshot. */ static void loop_subst_snap(jit_State *J, SnapShot *osnap, SnapEntry *loopmap, IRRef1 *subst) { SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; SnapEntry *nextmap = &J->cur.snapmap[snap_nextofs(&J->cur, osnap)]; MSize nmapofs; MSize on, ln, nn, onent = osnap->nent; BCReg nslots = osnap->nslots; SnapShot *snap = &J->cur.snap[J->cur.nsnap]; if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ nmapofs = J->cur.nsnapmap; J->cur.nsnap++; /* Add new snapshot. */ } else { /* Otherwise overwrite previous snapshot. */ snap--; nmapofs = snap->mapofs; } J->guardemit.irt = 0; /* Setup new snapshot. */ snap->mapofs = (uint16_t)nmapofs; snap->ref = (IRRef1)J->cur.nins; snap->nslots = nslots; snap->topslot = osnap->topslot; snap->count = 0; nmap = &J->cur.snapmap[nmapofs]; /* Substitute snapshot slots. */ on = ln = nn = 0; while (on < onent) { SnapEntry osn = omap[on], lsn = loopmap[ln]; if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */ nmap[nn++] = lsn; ln++; } else { /* Copy substituted slot from snapshot map. */ if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */ if (!irref_isk(snap_ref(osn))) osn = snap_setref(osn, subst[snap_ref(osn)]); nmap[nn++] = osn; on++; } } while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */ nmap[nn++] = loopmap[ln++]; snap->nent = (uint8_t)nn; omap += onent; nmap += nn; while (omap < nextmap) /* Copy PC + frame links. */ *nmap++ = *omap++; J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap); }
/* Convert a snapshot into a linear slot -> RegSP map. */ void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) { SnapShot *snap = &T->snap[snapno]; BCReg s, nslots = snap->nslots; IRRef2 *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); for (s = 0; s < nslots; s++) { IRRef ref = snap_ref(map[s]); if (!irref_isk(ref)) { IRIns *ir = &T->ir[ref]; uint32_t rs = ir->prev; if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); rsmap[s] = (uint16_t)rs; } } }
/* Emit the conversions collected during backpropagation. */ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) { /* The fins fields must be saved now -- emitir() overwrites them. */ IROpT guardot = irt_isguard(fins->t) ? IRTG(IR_ADDOV-IR_ADD, 0) : 0; IROpT convot = fins->ot; IRRef1 convop2 = fins->op2; NarrowIns *next = nc->stack; /* List of instructions from backpropagation. */ NarrowIns *last = nc->sp; NarrowIns *sp = nc->stack; /* Recycle the stack to store operands. */ while (next < last) { /* Simple stack machine to process the ins. list. */ NarrowIns ref = *next++; IROpT op = narrow_op(ref); if (op == NARROW_REF) { *sp++ = ref; } else if (op == NARROW_CONV) { *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ } else if (op == NARROW_SEXT) { lua_assert(sp >= nc->stack+1); sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); } else if (op == NARROW_INT) { lua_assert(next < last); *sp++ = nc->t == IRT_I64 ? lj_ir_kint64(J, (int64_t)(int32_t)*next++) : lj_ir_kint(J, *next++); } else { /* Regular IROpT. Pops two operands and pushes one result. */ IRRef mode = nc->mode; lua_assert(sp >= nc->stack+2); sp--; /* Omit some overflow checks for array indexing. See comments above. */ if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { if (next == last && irref_isk(narrow_ref(sp[0])) && (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000u < 0x80000000u) guardot = 0; else /* Otherwise cache a stronger check. */ mode += IRCONV_CHECK-IRCONV_INDEX; } sp[-1] = emitir(op+guardot, sp[-1], sp[0]); /* Add to cache. */ if (narrow_ref(ref)) narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); } } lua_assert(sp == nc->stack+1); return nc->stack[0]; }
/* Convert a snapshot into a linear slot -> RegSP map. ** Note: unused slots are not initialized! */ void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno) { SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef ref = snap_ref(sn); if (!irref_isk(ref)) { IRIns *ir = &T->ir[ref]; uint32_t rs = ir->prev; if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); rsmap[snap_slot(sn)] = (uint16_t)rs; } } }
/* Array and hash load forwarding. */ static TRef fwd_ahload(jit_State *J, IRRef xref) { IRIns *xr = IR(xref); IRRef lim = xref; /* Search limit. */ IRRef ref; /* Search for conflicting stores. */ ref = J->chain[fins->o+IRDELTA_L2S]; while (ref > xref) { IRIns *store = IR(ref); switch (aa_ahref(J, xr, IR(store->op1))) { case ALIAS_NO: break; /* Continue searching. */ case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ case ALIAS_MUST: return store->op2; /* Store forwarding. */ } ref = store->prev; } /* No conflicting store (yet): const-fold loads from allocations. */ { IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; IRRef tab = ir->op1; ir = IR(tab); if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) { /* A NEWREF with a number key may end up pointing to the array part. ** But it's referenced from HSTORE and not found in the ASTORE chain. ** For now simply consider this a conflict without forwarding anything. */ if (xr->o == IR_AREF) { IRRef ref2 = J->chain[IR_NEWREF]; while (ref2 > tab) { IRIns *newref = IR(ref2); if (irt_isnum(IR(newref->op2)->t)) goto cselim; ref2 = newref->prev; } } /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF. ** But the above search for conflicting stores was limited by xref. ** So continue searching, limited by the TNEW/TDUP. Store forwarding ** is ok, too. A conflict does NOT limit the search for a matching load. */ while (ref > tab) { IRIns *store = IR(ref); switch (aa_ahref(J, xr, IR(store->op1))) { case ALIAS_NO: break; /* Continue searching. */ case ALIAS_MAY: goto cselim; /* Conflicting store. */ case ALIAS_MUST: return store->op2; /* Store forwarding. */ } ref = store->prev; } lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t)); if (irt_ispri(fins->t)) { return TREF_PRI(irt_type(fins->t)); } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) || irt_isstr(fins->t)) { TValue keyv; cTValue *tv; IRIns *key = IR(xr->op2); if (key->o == IR_KSLOT) key = IR(key->op1); lj_ir_kvalue(J->L, &keyv, key); tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); lua_assert(itype2irt(tv) == irt_type(fins->t)); if (irt_isnum(fins->t)) return lj_ir_knum_u64(J, tv->u64); else if (LJ_DUALNUM && irt_isint(fins->t)) return lj_ir_kint(J, intV(tv)); else return lj_ir_kstr(J, strV(tv)); } /* Othwerwise: don't intern as a constant. */ } } cselim: /* Try to find a matching load. Below the conflicting store, if any. */ ref = J->chain[fins->o]; while (ref > lim) { IRIns *load = IR(ref); if (load->op1 == xref) return ref; /* Load forwarding. */ ref = load->prev; } return 0; /* Conflict or no match. */ }
/* Unroll loop. */ static void loop_unroll(jit_State *J) { IRRef1 phi[LJ_MAX_PHI]; uint32_t nphi = 0; IRRef1 *subst; SnapNo onsnap; SnapShot *osnap, *loopsnap; SnapEntry *loopmap, *psentinel; IRRef ins, invar; /* Use temp buffer for substitution table. ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. ** Caveat: don't call into the VM or run the GC or the buffer may be gone. */ invar = J->cur.nins; subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; subst[REF_BASE] = REF_BASE; /* LOOP separates the pre-roll from the loop body. */ emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); /* Grow snapshot buffer and map for copy-substituted snapshots. ** Need up to twice the number of snapshots minus #0 and loop snapshot. ** Need up to twice the number of entries plus fallback substitutions ** from the loop snapshot entries for each new snapshot. ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! */ onsnap = J->cur.nsnap; lj_snap_grow_buf(J, 2*onsnap-2); lj_snap_grow_map(J, J->cur.nsnapmap*2+(onsnap-2)*J->cur.snap[onsnap-1].nent); /* The loop snapshot is used for fallback substitutions. */ loopsnap = &J->cur.snap[onsnap-1]; loopmap = &J->cur.snapmap[loopsnap->mapofs]; /* The PC of snapshot #0 and the loop snapshot must match. */ psentinel = &loopmap[loopsnap->nent]; lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ /* Start substitution with snapshot #1 (#0 is empty for root traces). */ osnap = &J->cur.snap[1]; /* Copy and substitute all recorded instructions and snapshots. */ for (ins = REF_FIRST; ins < invar; ins++) { IRIns *ir; IRRef op1, op2; if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */ loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */ /* Substitute instruction operands. */ ir = IR(ins); op1 = ir->op1; if (!irref_isk(op1)) op1 = subst[op1]; op2 = ir->op2; if (!irref_isk(op2)) op2 = subst[op2]; if (irm_kind(lj_ir_mode[ir->o]) == IRM_N && op1 == ir->op1 && op2 == ir->op2) { /* Regular invariant ins? */ subst[ins] = (IRRef1)ins; /* Shortcut. */ } else { /* Re-emit substituted instruction to the FOLD/CSE/etc. pipeline. */ IRType1 t = ir->t; /* Get this first, since emitir may invalidate ir. */ IRRef ref = tref_ref(emitir(ir->ot & ~IRT_ISPHI, op1, op2)); subst[ins] = (IRRef1)ref; if (ref != ins) { IRIns *irr = IR(ref); if (ref < invar) { /* Loop-carried dependency? */ /* Potential PHI? */ if (!irref_isk(ref) && !irt_isphi(irr->t) && !irt_ispri(irr->t)) { irt_setphi(irr->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; } /* Check all loop-carried dependencies for type instability. */ if (!irt_sametype(t, irr->t)) { if (irt_isinteger(t) && irt_isinteger(irr->t)) continue; else if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num. */ ref = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); else if (irt_isnum(irr->t) && irt_isinteger(t)) /* Fix num->int. */ ref = tref_ref(emitir(IRTGI(IR_CONV), ref, IRCONV_INT_NUM|IRCONV_CHECK)); else lj_trace_err(J, LJ_TRERR_TYPEINS); subst[ins] = (IRRef1)ref; irr = IR(ref); goto phiconv; } } else if (ref != REF_DROP && irr->o == IR_CONV && ref > invar && irr->op1 < invar) { /* May need an extra PHI for a CONV. */ ref = irr->op1; irr = IR(ref); phiconv: if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { irt_setphi(irr->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; } } } } } if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; lua_assert(J->cur.nsnapmap <= J->sizesnapmap); *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ loop_emit_phi(J, subst, phi, nphi, onsnap); }
/* Emit or eliminate collected PHIs. */ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi, SnapNo onsnap) { int passx = 0; IRRef i, j, nslots; IRRef invar = J->chain[IR_LOOP]; /* Pass #1: mark redundant and potentially redundant PHIs. */ for (i = 0, j = 0; i < nphi; i++) { IRRef lref = phi[i]; IRRef rref = subst[lref]; if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */ irt_clearphi(IR(lref)->t); } else { phi[j++] = (IRRef1)lref; if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) { /* Quick check for simple recurrences failed, need pass2. */ irt_setmark(IR(lref)->t); passx = 1; } } } nphi = j; /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */ if (passx) { SnapNo s; for (i = J->cur.nins-1; i > invar; i--) { IRIns *ir = IR(i); if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); if (!irref_isk(ir->op1)) { irt_clearmark(IR(ir->op1)->t); if (ir->op1 < invar && ir->o >= IR_CALLN && ir->o <= IR_CARG) { /* ORDER IR */ ir = IR(ir->op1); while (ir->o == IR_CARG) { if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); if (irref_isk(ir->op1)) break; ir = IR(ir->op1); irt_clearmark(ir->t); } } } } for (s = J->cur.nsnap-1; s >= onsnap; s--) { SnapShot *snap = &J->cur.snap[s]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) irt_clearmark(IR(ref)->t); } } } /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */ nslots = J->baseslot+J->maxslot; for (i = 1; i < nslots; i++) { IRRef ref = tref_ref(J->slot[i]); while (!irref_isk(ref) && ref != subst[ref]) { IRIns *ir = IR(ref); irt_clearmark(ir->t); /* Unmark potential uses, too. */ if (irt_isphi(ir->t) || irt_ispri(ir->t)) break; irt_setphi(ir->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; ref = subst[ref]; if (ref > invar) break; } } /* Pass #4: propagate non-redundant PHIs. */ while (passx) { passx = 0; for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Propagate only from unmarked PHIs. */ IRIns *irr = IR(subst[lref]); if (irt_ismarked(irr->t)) { /* Right ref points to other PHI? */ irt_clearmark(irr->t); /* Mark that PHI as non-redundant. */ passx = 1; /* Retry. */ } } } } /* Pass #5: emit PHI instructions or eliminate PHIs. */ for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */ IRRef rref = subst[lref]; if (rref > invar) irt_setphi(IR(rref)->t); emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref); } else { /* Otherwise eliminate PHI. */ irt_clearmark(ir->t); irt_clearphi(ir->t); } } }
/* Restore interpreter state from exit state with the help of a snapshot. */ void lj_snap_restore(jit_State *J, void *exptr) { ExitState *ex = (ExitState *)exptr; SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ Trace *T = J->trace[J->parent]; SnapShot *snap = &T->snap[snapno]; BCReg s, nslots = snap->nslots; IRRef2 *map = &T->snapmap[snap->mapofs]; IRRef2 *flinks = map + nslots + snap->nframelinks; TValue *o, *newbase, *ntop; BloomFilter rfilt = snap_renamefilter(T, snapno); lua_State *L = J->L; /* Make sure the stack is big enough for the slots from the snapshot. */ if (L->base + nslots >= L->maxstack) { L->top = curr_topL(L); lj_state_growstack(L, nslots - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ newbase = NULL; ntop = L->base; for (s = 0, o = L->base-1; s < nslots; s++, o++) { IRRef ref = snap_ref(map[s]); if (ref) { IRIns *ir = &T->ir[ref]; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(L, o, ir); } else { IRType1 t = ir->t; RegSP rs = ir->prev; if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcrefi(o->gcr, *sps); setitype(o, irt_toitype(t)); } } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ Reg r = regsp_reg(rs); if (irt_isinteger(t)) { setintV(o, ex->gpr[r-RID_MIN_GPR]); } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); } else { if (!irt_ispri(t)) setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); setitype(o, irt_toitype(t)); } } else { /* Restore frame slot. */ lua_assert(ir->o == IR_FRAME); /* This works for both PTR and FUNC IR_FRAME. */ setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); if (s != 0) /* Do not overwrite link to previous frame. */ o->fr.tp.ftsz = (int32_t)*--flinks; if (irt_isfunc(ir->t)) { GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); if (isluafunc(fn)) { TValue *fs; newbase = o+1; fs = newbase + funcproto(fn)->framesize; if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ } } } } } else if (newbase) { setnilV(o); /* Clear unreferenced slots of newly added frames. */ } }
/* Replay snapshot state to setup side trace. */ void lj_snap_replay(jit_State *J, GCtrace *T) { SnapShot *snap = &T->snap[J->exitno]; SnapEntry *map = &T->snapmap[snap->mapofs]; MSize n, nent = snap->nent; BloomFilter seen = 0; int pass23 = 0; J->framedepth = 0; /* Emit IR for slots inherited from parent snapshot. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); IRRef ref = snap_ref(sn); IRIns *ir = &T->ir[ref]; TRef tr; /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) goto setslot; bloomset(seen, ref); if (irref_isk(ref)) { tr = snap_replay_const(J, ir); } else if (!regsp_used(ir->prev)) { pass23 = 1; lua_assert(s != 0); tr = s; } else { IRType t = irt_type(ir->t); uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); } setslot: J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); if ((sn & SNAP_FRAME)) J->baseslot = s+1; } if (pass23) { IRIns *irlast = &T->ir[snap->ref]; pass23 = 0; /* Emit dependent PVALs. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef refp = snap_ref(sn); IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; pass23 = 1; lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW || ir->o == IR_CNEWI); if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (ir+1)->op2); } else { IRIns *irs; for (irs = ir+1; irs < irlast; irs++) if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (irs+1)->op2); } } } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); } } /* Replay sunk instructions. */ for (n = 0; pass23 && n < nent; n++) { SnapEntry sn = map[n]; IRRef refp = snap_ref(sn); IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { TRef op1, op2; if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; continue; } op1 = ir->op1; if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); op2 = ir->op2; if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { lj_needsplit(J); /* Emit joining HIOP. */ op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, snap_pref(J, T, map, nent, seen, (ir+1)->op2)); } J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2); } else { IRIns *irs; TRef tr = emitir(ir->ot, op1, op2); J->slot[snap_slot(sn)] = tr; for (irs = ir+1; irs < irlast; irs++) if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *irr = &T->ir[irs->op1]; TRef val, key = irr->op2, tmp = tr; if (irr->o != IR_FREF) { IRIns *irk = &T->ir[key]; if (irr->o == IR_HREFK) key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), irk->op2); else key = snap_replay_const(J, irk); if (irr->o == IR_HREFK || irr->o == IR_AREF) { IRIns *irf = &T->ir[irr->op1]; tmp = emitir(irf->ot, tmp, irf->op2); } } tmp = emitir(irr->ot, tmp, key); val = snap_pref(J, T, map, nent, seen, irs->op2); if (val == 0) { IRIns *irc = &T->ir[irs->op2]; lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); val = snap_pref(J, T, map, nent, seen, irc->op1); val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) { IRType t = IRT_I64; if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) t = IRT_NUM; lj_needsplit(J); if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { uint64_t k = (uint32_t)T->ir[irs->op2].i + ((uint64_t)T->ir[(irs+1)->op2].i << 32); val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, lj_ir_k64_find(J, k)); } else { val = emitir_raw(IRT(IR_HIOP, t), val, snap_pref(J, T, map, nent, seen, (irs+1)->op2)); } tmp = emitir(IRT(irs->o, t), tmp, val); continue; } tmp = emitir(irs->ot, tmp, val); } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) { emitir(IRT(IR_XBAR, IRT_NIL), 0, 0); } } } } } J->base = J->slot + J->baseslot; J->maxslot = snap->nslots - J->baseslot; lj_snap_add(J); if (pass23) /* Need explicit GC step _after_ initial snapshot. */ emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); }
/* Restore interpreter state from exit state with the help of a snapshot. */ const BCIns *lj_snap_restore(jit_State *J, void *exptr) { ExitState *ex = (ExitState *)exptr; SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ GCtrace *T = traceref(J, J->parent); SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; SnapEntry *flinks = map + nent + snap->depth; int32_t ftsz0; BCReg nslots = snap->nslots; TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); const BCIns *pc = snap_pc(map[nent]); lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ setcframe_pc(cframe_raw(L->cframe), pc+1); /* Make sure the stack is big enough for the slots from the snapshot. */ if (LJ_UNLIKELY(L->base + nslots > tvref(L->maxstack))) { L->top = curr_topL(L); lj_state_growstack(L, nslots - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ frame = L->base-1; ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef ref = snap_ref(sn); BCReg s = snap_slot(sn); TValue *o = &frame[s]; /* Stack slots are relative to start frame. */ IRIns *ir = &T->ir[ref]; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(L, o, ir); if ((sn & (SNAP_CONT|SNAP_FRAME))) { /* Overwrite tag with frame link. */ o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks-- : ftsz0; if ((sn & SNAP_FRAME)) { GCfunc *fn = ir_kfunc(ir); if (isluafunc(fn)) { MSize framesize = funcproto(fn)->framesize; L->base = ++o; if (LJ_UNLIKELY(o + framesize > tvref(L->maxstack))) { ptrdiff_t fsave = savestack(L, frame); L->top = o; lj_state_growstack(L, framesize); /* Grow again. */ frame = restorestack(L, fsave); } } } } } else if (!(sn & SNAP_NORESTORE)) { IRType1 t = ir->t; RegSP rs = ir->prev; lua_assert(!(sn & (SNAP_CONT|SNAP_FRAME))); if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { o->u32.lo = (uint32_t)*sps; } else if (irt_isinteger(t)) { setintV(o, *sps); #if !LJ_SOFTFP } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif #if LJ_64 } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; #endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcrefi(o->gcr, *sps); setitype(o, irt_toitype(t)); } } else { /* Restore from register. */ Reg r = regsp_reg(rs); lua_assert(ra_hasreg(r)); if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { o->u32.lo = (uint32_t)ex->gpr[r-RID_MIN_GPR]; } else if (irt_isinteger(t)) { setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); #if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); #endif #if LJ_64 } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; #endif } else { if (!irt_ispri(t)) setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); setitype(o, irt_toitype(t)); } } if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { rs = (ir+1)->prev; if (LJ_UNLIKELY(bloomtest(rfilt, ref+1))) rs = snap_renameref(T, snapno, ref+1, rs); o->u32.hi = (ra_hasspill(regsp_spill(rs))) ? (uint32_t)*&ex->spill[regsp_spill(rs)] : (uint32_t)ex->gpr[regsp_reg(rs)-RID_MIN_GPR]; } } } switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: L->top = frame + nslots; break; default: L->top = curr_topL(L); break; } lua_assert(map + nent == flinks); return pc; }
/* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); IRRef1 *hisubst; IRRef ref, snref; SnapShot *snap; /* Copy old IR to buffer. */ memcpy(oir, IR(nk), irlen*sizeof(IRIns)); /* Bias hiword substitution table and old IR. Loword kept in field prev. */ hisubst = (IRRef1 *)&oir[irlen] - nk; oir -= nk; /* Remove all IR instructions, but retain IR constants. */ J->cur.nins = REF_FIRST; J->loopref = 0; /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } } /* Process old IR instructions. */ snap = J->cur.snap; snref = snap->ref; for (ref = REF_FIRST; ref < nins; ref++) { IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); IRRef hi = 0; if (ref >= snref) { snap->ref = nref; split_subst_snap(J, snap++, oir); snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; } /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); hisubst[ref] = 0; /* Split 64 bit instructions. */ #if LJ_SOFTFP if (irt_isnum(ir->t)) { nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ switch (ir->o) { case IR_ADD: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); break; case IR_SUB: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); break; case IR_POW: hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); break; case IR_FPMATH: /* Try to rejoin pow from EXP2, MUL and LOG2. */ if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { IRIns *irp = IR(nir->op1); if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { IRIns *irm4 = IR(irp->op1); IRIns *irm3 = IR(irm4->op1); IRIns *irm12 = IR(irm3->op1); IRIns *irl1 = IR(irm12->op1); if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && irl1->op2 == IRCALL_lj_vm_log2) { IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ IRRef arg3 = irm3->op2, arg4 = irm4->op2; J->cur.nins--; tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); break; } } } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; case IR_ATAN2: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); break; case IR_LDEXP: hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); break; case IR_NEG: case IR_ABS: nir->o = IR_CONV; /* Pass through loword. */ nir->op2 = (IRT_INT << 5) | IRT_INT; hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_SLOAD: if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ nir->op2 &= ~IRSLOAD_CONVERT; ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, IRCALL_softfp_i2d); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } /* fallthrough */ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: case IR_STRTO: hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; case IR_XLOAD: { IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ J->cur.nins--; hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ nref = lj_ir_nextins(J); nir = IR(nref); *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); #if LJ_LE ir->prev = nref; #else ir->prev = hi; hi = nref; #endif break; } case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to number. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); UNUSED(st); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { hi = split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); break; } #endif lua_assert(st == IRT_INT || (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); nir->o = IR_CALLN; #if LJ_32 && LJ_HASFFI nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : st == IRT_FLOAT ? IRCALL_softfp_f2d : IRCALL_softfp_ui2d; #else nir->op2 = IRCALL_softfp_i2d; #endif hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: goto split_call; case IR_PHI: if (nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ if (hisubst[ir->op1] != hisubst[ir->op2]) split_emit(J, IRT(IR_PHI, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; } } else #endif #if LJ_32 && LJ_HASFFI if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: case IR_SUB: /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); break; case IR_MOD: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); break; case IR_POW: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XSTORE: split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_SOFTFP if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ hi = split_call_l(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ nir->o = IR_CALLN; nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; hi = split_emit(J, IRTI(IR_HIOP), nref, nref); } #else if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } #endif else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. But fwd both parts. */ hi = hiref; goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); nir = IR(nref); /* May have been reallocated. */ ir->prev = nir->op1; /* Pass through loword. */ nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_CALLXS: goto split_call; case IR_PHI: { IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ hiref2 = hisubst[ir->op2]; if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) split_emit(J, IRTI(IR_PHI), hiref, hiref2); break; } case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } } else #endif #if LJ_SOFTFP if (ir->o == IR_SLOAD) { if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ nir->op2 &= ~IRSLOAD_CONVERT; if (!(nir->op2 & IRSLOAD_TYPECHECK)) nir->t.irt = IRT_INT; /* Drop guard. */ split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); } } else if (ir->o == IR_TOBIT) { IRRef tmp, op1 = ir->op1; J->cur.nins--; #if LJ_LE tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); #else tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); #endif ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); } else if (ir->o == IR_TOSTR) { if (hisubst[ir->op1]) { if (irref_isk(ir->op1)) nir->op1 = ir->op1; else split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); } } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { if (irref_isk(ir->op2) && hisubst[ir->op2]) nir->op2 = ir->op2; } else #endif if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ #if LJ_SOFTFP if (irt_isfloat(ir->t)) { split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); J->cur.nins--; /* Drop unused HIOP. */ } #else if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); } #endif else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ nir->ot = IRT(IR_NOP, IRT_NIL); nir->op1 = nir->op2 = 0; } } #endif #if LJ_SOFTFP && LJ_32 && LJ_HASFFI else if (irt_isfloat(ir->t)) { if (st == IRT_NUM) { split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); J->cur.nins--; /* Drop unused HIOP. */ } else { nir->o = IR_CALLN; nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; } } else if (st == IRT_FLOAT) { nir->o = IR_CALLN; nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; } else #endif #if LJ_SOFTFP if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { if (irt_isguard(ir->t)) { lua_assert(st == IRT_NUM && irt_isint(ir->t)); J->cur.nins--; ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); } else { split_call_l(J, hisubst, oir, ir, #if LJ_32 && LJ_HASFFI st == IRT_NUM ? (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) #else IRCALL_softfp_d2i #endif ); J->cur.nins--; /* Drop unused HIOP. */ } } #endif } else if (ir->o == IR_CALLXS) { IRRef hiref; split_call: hiref = hisubst[ir->op1]; if (hiref) { IROpT ot = nir->ot; IRRef op2 = nir->op2; nir->ot = IRT(IR_CARG, IRT_NIL); #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, ot, nref, op2); } if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) hi = split_emit(J, IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), nref, nref); } else if (ir->o == IR_CARG) { IRRef hiref = hisubst[ir->op1]; if (hiref) { IRRef op2 = nir->op2; #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } hiref = hisubst[ir->op2]; if (hiref) { #if !LJ_TARGET_X86 int carg = 0; IRIns *cir; for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) carg++; if ((carg & 1) == 0) { /* Align 64 bit arguments. */ IRRef op2 = nir->op2; nir->op2 = REF_NIL; nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } #endif #if LJ_BE { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } #endif ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); } } else if (ir->o == IR_CNEWI) { if (hisubst[ir->op2]) split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } hisubst[ref] = hi; /* Store hiword substitution. */ } if (snref == nins) { /* Substitution for last snapshot. */ snap->ref = J->cur.nins; split_subst_snap(J, snap, oir); } /* Add PHI marks. */ for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { IRIns *ir = IR(ref); if (ir->o != IR_PHI) break; if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); } }