/* Alias analysis for XLOAD/XSTORE. */ static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb) { ptrdiff_t ofsa = 0, ofsb = 0; IRIns *refb = IR(xb->op1); IRIns *basea = refa, *baseb = refb; if (refa == refb && irt_sametype(xa->t, xb->t)) return ALIAS_MUST; /* Shortcut for same refs with identical type. */ /* Offset-based disambiguation. */ if (refa->o == IR_ADD && irref_isk(refa->op2)) { IRIns *irk = IR(refa->op2); basea = IR(refa->op1); ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; } if (refb->o == IR_ADD && irref_isk(refb->op2)) { IRIns *irk = IR(refb->op2); baseb = IR(refb->op1); ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; } /* Treat constified pointers like base vs. base+offset. */ if (basea->o == IR_KPTR && baseb->o == IR_KPTR) { ofsb += (char *)ir_kptr(baseb) - (char *)ir_kptr(basea); baseb = basea; } /* This implements (very) strict aliasing rules. ** Different types do NOT alias, except for differences in signedness. ** Type punning through unions is allowed (but forces a reload). */ if (basea == baseb) { ptrdiff_t sza = irt_size(xa->t), szb = irt_size(xb->t); if (ofsa == ofsb) { if (sza == szb && irt_isfp(xa->t) == irt_isfp(xb->t)) return ALIAS_MUST; /* Same-sized, same-kind. May need to convert. */ } else if (ofsa + sza <= ofsb || ofsb + szb <= ofsa) { return ALIAS_NO; /* Non-overlapping base+-o1 vs. base+-o2. */ } /* NYI: extract, extend or reinterpret bits (int <-> fp). */ return ALIAS_MAY; /* Overlapping or type punning: force reload. */ } if (!irt_sametype(xa->t, xb->t) && !(irt_typerange(xa->t, IRT_I8, IRT_U64) && ((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1)) return ALIAS_NO; /* NYI: structural disambiguation. */ return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ }
/* Alias analysis for XLOAD/XSTORE. */ static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb) { ptrdiff_t ofsa = 0, ofsb = 0; IRIns *refb = IR(xb->op1); IRIns *basea = refa, *baseb = refb; /* This implements (very) strict aliasing rules. ** Different types do NOT alias, except for differences in signedness. ** NYI: this also prevents type punning through unions. */ if (irt_sametype(xa->t, xb->t)) { if (refa == refb) return ALIAS_MUST; /* Shortcut for same refs with identical type. */ } else if (!(irt_typerange(xa->t, IRT_I8, IRT_U64) && ((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1)) { return ALIAS_NO; } /* Offset-based disambiguation. */ if (refa->o == IR_ADD && irref_isk(refa->op2)) { IRIns *irk = IR(refa->op2); basea = IR(refa->op1); ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; if (basea == refb && ofsa != 0) return ALIAS_NO; /* base+-ofs vs. base. */ } if (refb->o == IR_ADD && irref_isk(refb->op2)) { IRIns *irk = IR(refb->op2); baseb = IR(refb->op1); ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; if (refa == baseb && ofsb != 0) return ALIAS_NO; /* base vs. base+-ofs. */ } if (basea == baseb) { /* This assumes strictly-typed, non-overlapping accesses. */ if (ofsa != ofsb) return ALIAS_NO; /* base+-o1 vs. base+-o2 and o1 != o2. */ return ALIAS_MUST; /* Unsigned vs. signed access to the same address. */ } /* NYI: structural disambiguation. */ return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ }
/* Reassociate index references. */ static IRRef reassoc_xref(jit_State *J, IRIns *ir) { ptrdiff_t ofs = 0; if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Get constant offset. */ IRIns *irk = IR(ir->op2); ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; ir = IR(ir->op1); } if (ir->o == IR_ADD) { /* Add of base + index. */ /* Index ref > base ref for loop-carried dependences. Only check op1. */ IRIns *ir2, *ir1 = IR(ir->op1); int32_t shift = 0; IRRef idxref; /* Determine index shifts. Don't bother with IR_MUL here. */ if (ir1->o == IR_BSHL && irref_isk(ir1->op2)) shift = IR(ir1->op2)->i; else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2) shift = 1; else ir1 = ir; ir2 = IR(ir1->op1); /* A non-reassociated add. Must be a loop-carried dependence. */ if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2)) ofs += (ptrdiff_t)IR(ir2->op2)->i << shift; else return 0; idxref = ir2->op1; /* Try to CSE the reassociated chain. Give up if not found. */ if (ir1 != ir && !(idxref = reassoc_trycse(J, ir1->o, idxref, ir1->o == IR_BSHL ? ir1->op2 : idxref))) return 0; if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2))) return 0; if (ofs != 0) { IRRef refk = tref_ref(lj_ir_kintp(J, ofs)); if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk))) return 0; } return idxref; /* Success, found a reassociated index reference. Phew. */ } return 0; /* Failure. */ }
/* Unsink allocation from the trace exit state. Unsink sunk stores. */ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRIns *ir, TValue *o) { lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW || ir->o == IR_CNEWI); #if LJ_HASFFI if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { CTState *cts = ctype_cts(J->L); CTypeID id = (CTypeID)T->ir[ir->op1].i; CTSize sz; CTInfo info = lj_ctype_info(cts, id, &sz); GCcdata *cd = lj_cdata_newx(cts, id, sz, info); setcdataV(J->L, o, cd); if (ir->o == IR_CNEWI) { uint8_t *p = (uint8_t *)cdataptr(cd); lua_assert(sz == 4 || sz == 8); if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); if (LJ_BE) p += 4; sz = 4; } snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); } else { IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; for (irs = ir+1; irs < irlast; irs++) if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *iro = &T->ir[T->ir[irs->op1].op2]; uint8_t *p = (uint8_t *)cd; CTSize szs; lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); if (irt_is64(irs->t)) szs = 8; else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; else szs = 4; if (LJ_64 && iro->o == IR_KINT64) p += (int64_t)ir_k64(iro)->u64; else p += iro->i; lua_assert(p >= (uint8_t *)cdataptr(cd) && p + szs <= (uint8_t *)cdataptr(cd) + sz); if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { lua_assert(szs == 4); snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); if (LJ_BE) p += 4; } snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); } } } else #endif { IRIns *irs, *irlast; GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); settabV(J->L, o, t); irlast = &T->ir[T->snap[snapno].ref]; for (irs = ir+1; irs < irlast; irs++) if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *irk = &T->ir[irs->op1]; TValue tmp, *val; lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || irs->o == IR_FSTORE); if (irk->o == IR_FREF) { lua_assert(irk->op2 == IRFL_TAB_META); snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); /* NOBARRIER: The table is new (marked white). */ setgcref(t->metatable, obj2gco(tabV(&tmp))); } else { irk = &T->ir[irk->op2]; if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; lj_ir_kvalue(J->L, &tmp, irk); val = lj_tab_set(J->L, t, &tmp); /* NOBARRIER: The table is new (marked white). */ snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); val->u32.hi = tmp.u32.lo; } } } } }
/* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); IRRef1 *hisubst; IRRef ref, snref; SnapShot *snap; /* Copy old IR to buffer. */ memcpy(oir, IR(nk), irlen*sizeof(IRIns)); /* Bias hiword substitution table and old IR. Loword kept in field prev. */ hisubst = (IRRef1 *)&oir[irlen] - nk; oir -= nk; /* Remove all IR instructions, but retain IR constants. */ J->cur.nins = REF_FIRST; J->loopref = 0; /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } } /* Process old IR instructions. */ snap = J->cur.snap; snref = snap->ref; for (ref = REF_FIRST; ref < nins; ref++) { IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); IRRef hi = 0; if (ref >= snref) { snap->ref = nref; split_subst_snap(J, snap++, oir); snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; } /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); hisubst[ref] = 0; /* Split 64 bit instructions. */ #if LJ_SOFTFP if (irt_isnum(ir->t)) { nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ switch (ir->o) { case IR_ADD: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); break; case IR_SUB: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); break; case IR_POW: hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); break; case IR_FPMATH: /* Try to rejoin pow from EXP2, MUL and LOG2. */ if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { IRIns *irp = IR(nir->op1); if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { IRIns *irm4 = IR(irp->op1); IRIns *irm3 = IR(irm4->op1); IRIns *irm12 = IR(irm3->op1); IRIns *irl1 = IR(irm12->op1); if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && irl1->op2 == IRCALL_lj_vm_log2) { IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ IRRef arg3 = irm3->op2, arg4 = irm4->op2; J->cur.nins--; tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); break; } } } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; case IR_ATAN2: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); break; case IR_LDEXP: hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); break; case IR_NEG: case IR_ABS: nir->o = IR_CONV; /* Pass through loword. */ nir->op2 = (IRT_INT << 5) | IRT_INT; hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_SLOAD: if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ nir->op2 &= ~IRSLOAD_CONVERT; ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, IRCALL_softfp_i2d); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } /* fallthrough */ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: case IR_STRTO: hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; case IR_XLOAD: { IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ J->cur.nins--; hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ nref = lj_ir_nextins(J); nir = IR(nref); *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); #if LJ_LE ir->prev = nref; #else ir->prev = hi; hi = nref; #endif break; } case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to number. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); UNUSED(st); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { hi = split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); break; } #endif lua_assert(st == IRT_INT || (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); nir->o = IR_CALLN; #if LJ_32 && LJ_HASFFI nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : st == IRT_FLOAT ? IRCALL_softfp_f2d : IRCALL_softfp_ui2d; #else nir->op2 = IRCALL_softfp_i2d; #endif hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: goto split_call; case IR_PHI: if (nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ if (hisubst[ir->op1] != hisubst[ir->op2]) split_emit(J, IRT(IR_PHI, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; } } else #endif #if LJ_32 && LJ_HASFFI if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: case IR_SUB: /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); break; case IR_MOD: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); break; case IR_POW: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XSTORE: split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_SOFTFP if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ hi = split_call_l(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ nir->o = IR_CALLN; nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; hi = split_emit(J, IRTI(IR_HIOP), nref, nref); } #else if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } #endif else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. But fwd both parts. */ hi = hiref; goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); nir = IR(nref); /* May have been reallocated. */ ir->prev = nir->op1; /* Pass through loword. */ nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_CALLXS: goto split_call; case IR_PHI: { IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ hiref2 = hisubst[ir->op2]; if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) split_emit(J, IRTI(IR_PHI), hiref, hiref2); break; } case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } } else #endif #if LJ_SOFTFP if (ir->o == IR_SLOAD) { if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ nir->op2 &= ~IRSLOAD_CONVERT; if (!(nir->op2 & IRSLOAD_TYPECHECK)) nir->t.irt = IRT_INT; /* Drop guard. */ split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); } } else if (ir->o == IR_TOBIT) { IRRef tmp, op1 = ir->op1; J->cur.nins--; #if LJ_LE tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); #else tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); #endif ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); } else if (ir->o == IR_TOSTR) { if (hisubst[ir->op1]) { if (irref_isk(ir->op1)) nir->op1 = ir->op1; else split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); } } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { if (irref_isk(ir->op2) && hisubst[ir->op2]) nir->op2 = ir->op2; } else #endif if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ #if LJ_SOFTFP if (irt_isfloat(ir->t)) { split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); J->cur.nins--; /* Drop unused HIOP. */ } #else if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); } #endif else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ nir->ot = IRT(IR_NOP, IRT_NIL); nir->op1 = nir->op2 = 0; } } #endif #if LJ_SOFTFP && LJ_32 && LJ_HASFFI else if (irt_isfloat(ir->t)) { if (st == IRT_NUM) { split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); J->cur.nins--; /* Drop unused HIOP. */ } else { nir->o = IR_CALLN; nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; } } else if (st == IRT_FLOAT) { nir->o = IR_CALLN; nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; } else #endif #if LJ_SOFTFP if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { if (irt_isguard(ir->t)) { lua_assert(st == IRT_NUM && irt_isint(ir->t)); J->cur.nins--; ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); } else { split_call_l(J, hisubst, oir, ir, #if LJ_32 && LJ_HASFFI st == IRT_NUM ? (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) #else IRCALL_softfp_d2i #endif ); J->cur.nins--; /* Drop unused HIOP. */ } } #endif } else if (ir->o == IR_CALLXS) { IRRef hiref; split_call: hiref = hisubst[ir->op1]; if (hiref) { IROpT ot = nir->ot; IRRef op2 = nir->op2; nir->ot = IRT(IR_CARG, IRT_NIL); #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, ot, nref, op2); } if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) hi = split_emit(J, IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), nref, nref); } else if (ir->o == IR_CARG) { IRRef hiref = hisubst[ir->op1]; if (hiref) { IRRef op2 = nir->op2; #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } hiref = hisubst[ir->op2]; if (hiref) { #if !LJ_TARGET_X86 int carg = 0; IRIns *cir; for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) carg++; if ((carg & 1) == 0) { /* Align 64 bit arguments. */ IRRef op2 = nir->op2; nir->op2 = REF_NIL; nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } #endif #if LJ_BE { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } #endif ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); } } else if (ir->o == IR_CNEWI) { if (hisubst[ir->op2]) split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } hisubst[ref] = hi; /* Store hiword substitution. */ } if (snref == nins) { /* Substitution for last snapshot. */ snap->ref = J->cur.nins; split_subst_snap(J, snap, oir); } /* Add PHI marks. */ for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { IRIns *ir = IR(ref); if (ir->o != IR_PHI) break; if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); } }
/* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); IRRef1 *hisubst; IRRef ref; /* Copy old IR to buffer. */ memcpy(oir, IR(nk), irlen*sizeof(IRIns)); /* Bias hiword substitution table and old IR. Loword kept in field prev. */ hisubst = (IRRef1 *)&oir[irlen] - nk; oir -= nk; /* Remove all IR instructions, but retain IR constants. */ J->cur.nins = REF_FIRST; /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } } /* Process old IR instructions. */ for (ref = REF_FIRST; ref < nins; ref++) { IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); IRRef hi = 0; /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); hisubst[ref] = 0; /* Split 64 bit instructions. */ if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: case IR_SUB: /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: hi = split_call64(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); break; case IR_MOD: hi = split_call64(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); break; case IR_POW: hi = split_call64(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64HI); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XSTORE: #if LJ_LE hiref = hisubst[ir->op2]; #else hiref = nir->op2; nir->op2 = hisubst[ir->op2]; #endif split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hiref); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. */ goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); nir = IR(nref); /* May have been reallocated. */ ir->prev = nir->op1; /* Pass through loword. */ nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_PHI: { IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ hiref2 = hisubst[ir->op2]; if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) split_emit(J, IRTI(IR_PHI), hiref, hiref2); break; } default: lua_assert(ir->o <= IR_NE); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } } else if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); } else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ nir->ot = IRT(IR_NOP, IRT_NIL); nir->op1 = nir->op2 = 0; } } } else if (ir->o == IR_CNEWI) { if (hisubst[ir->op2]) split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } hisubst[ref] = hi; /* Store hiword substitution. */ } /* Add PHI marks. */ for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { IRIns *ir = IR(ref); if (ir->o != IR_PHI) break; if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); } /* Substitute snapshot maps. */ oir[nins].prev = J->cur.nins; /* Substitution for last snapshot. */ { SnapNo i, nsnap = J->cur.nsnap; for (i = 0; i < nsnap; i++) { SnapShot *snap = &J->cur.snap[i]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; snap->ref = oir[snap->ref].prev; for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; map[n] = ((sn & 0xffff0000) | oir[snap_ref(sn)].prev); } } } }