/* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); IRRef1 *hisubst; IRRef ref, snref; SnapShot *snap; /* Copy old IR to buffer. */ memcpy(oir, IR(nk), irlen*sizeof(IRIns)); /* Bias hiword substitution table and old IR. Loword kept in field prev. */ hisubst = (IRRef1 *)&oir[irlen] - nk; oir -= nk; /* Remove all IR instructions, but retain IR constants. */ J->cur.nins = REF_FIRST; J->loopref = 0; /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } } /* Process old IR instructions. */ snap = J->cur.snap; snref = snap->ref; for (ref = REF_FIRST; ref < nins; ref++) { IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); IRRef hi = 0; if (ref >= snref) { snap->ref = nref; split_subst_snap(J, snap++, oir); snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; } /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); hisubst[ref] = 0; /* Split 64 bit instructions. */ #if LJ_SOFTFP if (irt_isnum(ir->t)) { nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ switch (ir->o) { case IR_ADD: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); break; case IR_SUB: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); break; case IR_POW: hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); break; case IR_FPMATH: /* Try to rejoin pow from EXP2, MUL and LOG2. */ if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { IRIns *irp = IR(nir->op1); if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { IRIns *irm4 = IR(irp->op1); IRIns *irm3 = IR(irm4->op1); IRIns *irm12 = IR(irm3->op1); IRIns *irl1 = IR(irm12->op1); if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && irl1->op2 == IRCALL_lj_vm_log2) { IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ IRRef arg3 = irm3->op2, arg4 = irm4->op2; J->cur.nins--; tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); break; } } } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; case IR_ATAN2: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); break; case IR_LDEXP: hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); break; case IR_NEG: case IR_ABS: nir->o = IR_CONV; /* Pass through loword. */ nir->op2 = (IRT_INT << 5) | IRT_INT; hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_SLOAD: if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ nir->op2 &= ~IRSLOAD_CONVERT; ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, IRCALL_softfp_i2d); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } /* fallthrough */ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: case IR_STRTO: hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; case IR_XLOAD: { IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ J->cur.nins--; hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ nref = lj_ir_nextins(J); nir = IR(nref); *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); #if LJ_LE ir->prev = nref; #else ir->prev = hi; hi = nref; #endif break; } case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to number. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); UNUSED(st); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { hi = split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); break; } #endif lua_assert(st == IRT_INT || (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); nir->o = IR_CALLN; #if LJ_32 && LJ_HASFFI nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : st == IRT_FLOAT ? IRCALL_softfp_f2d : IRCALL_softfp_ui2d; #else nir->op2 = IRCALL_softfp_i2d; #endif hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: goto split_call; case IR_PHI: if (nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ if (hisubst[ir->op1] != hisubst[ir->op2]) split_emit(J, IRT(IR_PHI, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; } } else #endif #if LJ_32 && LJ_HASFFI if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: case IR_SUB: /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); break; case IR_MOD: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); break; case IR_POW: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XSTORE: split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_SOFTFP if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ hi = split_call_l(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ nir->o = IR_CALLN; nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; hi = split_emit(J, IRTI(IR_HIOP), nref, nref); } #else if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } #endif else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. But fwd both parts. */ hi = hiref; goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); nir = IR(nref); /* May have been reallocated. */ ir->prev = nir->op1; /* Pass through loword. */ nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_CALLXS: goto split_call; case IR_PHI: { IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ hiref2 = hisubst[ir->op2]; if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) split_emit(J, IRTI(IR_PHI), hiref, hiref2); break; } case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } } else #endif #if LJ_SOFTFP if (ir->o == IR_SLOAD) { if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ nir->op2 &= ~IRSLOAD_CONVERT; if (!(nir->op2 & IRSLOAD_TYPECHECK)) nir->t.irt = IRT_INT; /* Drop guard. */ split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); } } else if (ir->o == IR_TOBIT) { IRRef tmp, op1 = ir->op1; J->cur.nins--; #if LJ_LE tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); #else tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); #endif ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); } else if (ir->o == IR_TOSTR) { if (hisubst[ir->op1]) { if (irref_isk(ir->op1)) nir->op1 = ir->op1; else split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); } } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { if (irref_isk(ir->op2) && hisubst[ir->op2]) nir->op2 = ir->op2; } else #endif if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ #if LJ_SOFTFP if (irt_isfloat(ir->t)) { split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); J->cur.nins--; /* Drop unused HIOP. */ } #else if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); } #endif else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ nir->ot = IRT(IR_NOP, IRT_NIL); nir->op1 = nir->op2 = 0; } } #endif #if LJ_SOFTFP && LJ_32 && LJ_HASFFI else if (irt_isfloat(ir->t)) { if (st == IRT_NUM) { split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); J->cur.nins--; /* Drop unused HIOP. */ } else { nir->o = IR_CALLN; nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; } } else if (st == IRT_FLOAT) { nir->o = IR_CALLN; nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; } else #endif #if LJ_SOFTFP if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { if (irt_isguard(ir->t)) { lua_assert(st == IRT_NUM && irt_isint(ir->t)); J->cur.nins--; ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); } else { split_call_l(J, hisubst, oir, ir, #if LJ_32 && LJ_HASFFI st == IRT_NUM ? (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) #else IRCALL_softfp_d2i #endif ); J->cur.nins--; /* Drop unused HIOP. */ } } #endif } else if (ir->o == IR_CALLXS) { IRRef hiref; split_call: hiref = hisubst[ir->op1]; if (hiref) { IROpT ot = nir->ot; IRRef op2 = nir->op2; nir->ot = IRT(IR_CARG, IRT_NIL); #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, ot, nref, op2); } if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) hi = split_emit(J, IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), nref, nref); } else if (ir->o == IR_CARG) { IRRef hiref = hisubst[ir->op1]; if (hiref) { IRRef op2 = nir->op2; #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } hiref = hisubst[ir->op2]; if (hiref) { #if !LJ_TARGET_X86 int carg = 0; IRIns *cir; for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) carg++; if ((carg & 1) == 0) { /* Align 64 bit arguments. */ IRRef op2 = nir->op2; nir->op2 = REF_NIL; nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } #endif #if LJ_BE { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } #endif ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); } } else if (ir->o == IR_CNEWI) { if (hisubst[ir->op2]) split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } hisubst[ref] = hi; /* Store hiword substitution. */ } if (snref == nins) { /* Substitution for last snapshot. */ snap->ref = J->cur.nins; split_subst_snap(J, snap, oir); } /* Add PHI marks. */ for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { IRIns *ir = IR(ref); if (ir->o != IR_PHI) break; if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); } }
static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, void *svisnz) { CTSize dsize = d->size, ssize = s->size; CTInfo dinfo = d->info, sinfo = s->info; IRType dt = crec_ct2irt(d); IRType st = crec_ct2irt(s); if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) goto err_conv; /* ** Note: Unlike lj_cconv_ct_ct(), sp holds the _value_ of pointers and ** numbers up to 8 bytes. Otherwise sp holds a pointer. */ switch (cconv_idx2(dinfo, sinfo)) { /* Destination is a bool. */ case CCX(B, B): goto xstore; /* Source operand is already normalized. */ case CCX(B, I): case CCX(B, F): if (st != IRT_CDATA) { /* Specialize to the result of a comparison against 0. */ TRef zero = (st == IRT_NUM || st == IRT_FLOAT) ? lj_ir_knum(J, 0) : (st == IRT_I64 || st == IRT_U64) ? lj_ir_kint64(J, 0) : lj_ir_kint(J, 0); int isnz = crec_isnonzero(s, svisnz); emitir(IRTG(isnz ? IR_NE : IR_EQ, st), sp, zero); sp = lj_ir_kint(J, isnz); goto xstore; } goto err_nyi; /* Destination is an integer. */ case CCX(I, B): case CCX(I, I): conv_I_I: if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; /* Extend 32 to 64 bit integer. */ if (dsize == 8 && ssize < 8 && !(LJ_64 && (sinfo & CTF_UNSIGNED))) sp = emitconv(sp, dt, ssize < 4 ? IRT_INT : st, (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); else if (st == IRT_INT) sp = lj_opt_narrow_toint(J, sp); xstore: if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); if (dp == 0) return sp; emitir(IRT(IR_XSTORE, dt), dp, sp); break; case CCX(I, C): sp = emitir(IRT(IR_XLOAD, st), sp, 0); /* Load re. */ /* fallthrough */ case CCX(I, F): if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); goto xstore; case CCX(I, P): case CCX(I, A): sinfo = CTINFO(CT_NUM, CTF_UNSIGNED); ssize = CTSIZE_PTR; st = IRT_UINTP; goto conv_I_I; /* Destination is a floating-point number. */ case CCX(F, B): case CCX(F, I): conv_F_I: if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; sp = emitconv(sp, dt, ssize < 4 ? IRT_INT : st, 0); goto xstore; case CCX(F, C): sp = emitir(IRT(IR_XLOAD, st), sp, 0); /* Load re. */ /* fallthrough */ case CCX(F, F): conv_F_F: if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; if (dt != st) sp = emitconv(sp, dt, st, 0); goto xstore; /* Destination is a complex number. */ case CCX(C, I): case CCX(C, F): { /* Clear im. */ TRef ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, lj_ir_kintp(J, (dsize >> 1))); emitir(IRT(IR_XSTORE, dt), ptr, lj_ir_knum(J, 0)); } /* Convert to re. */ if ((sinfo & CTF_FP)) goto conv_F_F; else goto conv_F_I; case CCX(C, C): if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; { TRef re, im, ptr; re = emitir(IRT(IR_XLOAD, st), sp, 0); ptr = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, (ssize >> 1))); im = emitir(IRT(IR_XLOAD, st), ptr, 0); if (dt != st) { re = emitconv(re, dt, st, 0); im = emitconv(im, dt, st, 0); } emitir(IRT(IR_XSTORE, dt), dp, re); ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, lj_ir_kintp(J, (dsize >> 1))); emitir(IRT(IR_XSTORE, dt), ptr, im); } break; /* Destination is a vector. */ case CCX(V, I): case CCX(V, F): case CCX(V, C): case CCX(V, V): goto err_nyi; /* Destination is a pointer. */ case CCX(P, P): case CCX(P, A): case CCX(P, S): /* There are only 32 bit pointers/addresses on 32 bit machines. ** Also ok on x64, since all 32 bit ops clear the upper part of the reg. */ goto xstore; case CCX(P, I): if (st == IRT_CDATA) goto err_nyi; if (!LJ_64 && ssize == 8) /* Truncate from 64 bit integer. */ sp = emitconv(sp, IRT_U32, st, 0); goto xstore; case CCX(P, F): if (st == IRT_CDATA) goto err_nyi; /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, st, IRCONV_TRUNC|IRCONV_ANY); goto xstore; /* Destination is an array. */ case CCX(A, A): goto err_nyi; /* Destination is a struct/union. */ case CCX(S, S): goto err_nyi; default: err_conv: err_nyi: lj_trace_err(J, LJ_TRERR_NYICONV); break; } return 0; }
/* Replay snapshot state to setup side trace. */ void lj_snap_replay(jit_State *J, GCtrace *T) { SnapShot *snap = &T->snap[J->exitno]; SnapEntry *map = &T->snapmap[snap->mapofs]; MSize n, nent = snap->nent; BloomFilter seen = 0; int pass23 = 0; J->framedepth = 0; /* Emit IR for slots inherited from parent snapshot. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); IRRef ref = snap_ref(sn); IRIns *ir = &T->ir[ref]; TRef tr; /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) goto setslot; bloomset(seen, ref); if (irref_isk(ref)) { tr = snap_replay_const(J, ir); } else if (!regsp_used(ir->prev)) { pass23 = 1; lua_assert(s != 0); tr = s; } else { IRType t = irt_type(ir->t); uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); } setslot: J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); if ((sn & SNAP_FRAME)) J->baseslot = s+1; } if (pass23) { IRIns *irlast = &T->ir[snap->ref]; pass23 = 0; /* Emit dependent PVALs. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef refp = snap_ref(sn); IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; pass23 = 1; lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW || ir->o == IR_CNEWI); if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (ir+1)->op2); } else { IRIns *irs; for (irs = ir+1; irs < irlast; irs++) if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (irs+1)->op2); } } } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); } } /* Replay sunk instructions. */ for (n = 0; pass23 && n < nent; n++) { SnapEntry sn = map[n]; IRRef refp = snap_ref(sn); IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { TRef op1, op2; if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; continue; } op1 = ir->op1; if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); op2 = ir->op2; if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { lj_needsplit(J); /* Emit joining HIOP. */ op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, snap_pref(J, T, map, nent, seen, (ir+1)->op2)); } J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2); } else { IRIns *irs; TRef tr = emitir(ir->ot, op1, op2); J->slot[snap_slot(sn)] = tr; for (irs = ir+1; irs < irlast; irs++) if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { IRIns *irr = &T->ir[irs->op1]; TRef val, key = irr->op2, tmp = tr; if (irr->o != IR_FREF) { IRIns *irk = &T->ir[key]; if (irr->o == IR_HREFK) key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), irk->op2); else key = snap_replay_const(J, irk); if (irr->o == IR_HREFK || irr->o == IR_AREF) { IRIns *irf = &T->ir[irr->op1]; tmp = emitir(irf->ot, tmp, irf->op2); } } tmp = emitir(irr->ot, tmp, key); val = snap_pref(J, T, map, nent, seen, irs->op2); if (val == 0) { IRIns *irc = &T->ir[irs->op2]; lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); val = snap_pref(J, T, map, nent, seen, irc->op1); val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) { IRType t = IRT_I64; if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) t = IRT_NUM; lj_needsplit(J); if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { uint64_t k = (uint32_t)T->ir[irs->op2].i + ((uint64_t)T->ir[(irs+1)->op2].i << 32); val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, lj_ir_k64_find(J, k)); } else { val = emitir_raw(IRT(IR_HIOP, t), val, snap_pref(J, T, map, nent, seen, (irs+1)->op2)); } tmp = emitir(IRT(irs->o, t), tmp, val); continue; } tmp = emitir(irs->ot, tmp, val); } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) { emitir(IRT(IR_XBAR, IRT_NIL), 0, 0); } } } } } J->base = J->slot + J->baseslot; J->maxslot = snap->nslots - J->baseslot; lj_snap_add(J); if (pass23) /* Need explicit GC step _after_ initial snapshot. */ emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); }
/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) { TRef trstr = lj_ir_tostr(J, J->base[0]); TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN); TRef tr0 = lj_ir_kint(J, 0); TRef trstart, trend; GCstr *str = argv2str(J, &rd->argv[0]); int32_t start, end; if (rd->data) { /* string.sub(str, start [,end]) */ start = argv2int(J, &rd->argv[1]); trstart = lj_opt_narrow_toint(J, J->base[1]); trend = J->base[2]; if (tref_isnil(trend)) { trend = lj_ir_kint(J, -1); end = -1; } else { trend = lj_opt_narrow_toint(J, trend); end = argv2int(J, &rd->argv[2]); } } else { /* string.byte(str, [,start [,end]]) */ if (J->base[1]) { start = argv2int(J, &rd->argv[1]); trstart = lj_opt_narrow_toint(J, J->base[1]); trend = J->base[2]; if (tref_isnil(trend)) { trend = trstart; end = start; } else { trend = lj_opt_narrow_toint(J, trend); end = argv2int(J, &rd->argv[2]); } } else { trend = trstart = lj_ir_kint(J, 1); end = start = 1; } } if (end < 0) { emitir(IRTGI(IR_LT), trend, tr0); trend = emitir(IRTI(IR_ADD), emitir(IRTI(IR_ADD), trlen, trend), lj_ir_kint(J, 1)); end = end+(int32_t)str->len+1; } else if ((MSize)end <= str->len) { emitir(IRTGI(IR_ULE), trend, trlen); } else { emitir(IRTGI(IR_GT), trend, trlen); end = (int32_t)str->len; trend = trlen; } if (start < 0) { emitir(IRTGI(IR_LT), trstart, tr0); trstart = emitir(IRTI(IR_ADD), trlen, trstart); start = start+(int32_t)str->len; emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0); if (start < 0) { trstart = tr0; start = 0; } } else { if (start == 0) { emitir(IRTGI(IR_EQ), trstart, tr0); trstart = tr0; } else { trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1)); emitir(IRTGI(IR_GE), trstart, tr0); start--; } } if (rd->data) { /* Return string.sub result. */ if (end - start >= 0) { /* Also handle empty range here, to avoid extra traces. */ TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); emitir(IRTGI(IR_GE), trslen, tr0); trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); } else { /* Range underflow: return empty string. */ emitir(IRTGI(IR_LT), trend, trstart); J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); } } else { /* Return string.byte result(s). */ ptrdiff_t i, len = end - start; if (len > 0) { TRef trslen = emitir(IRTI(IR_SUB), trend, trstart); emitir(IRTGI(IR_EQ), trslen, lj_ir_kint(J, (int32_t)len)); if (J->baseslot + len > LJ_MAX_JSLOTS) lj_trace_err_info(J, LJ_TRERR_STACKOV); rd->nres = len; for (i = 0; i < len; i++) { TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); } } else { /* Empty range or range underflow: return no results. */ emitir(IRTGI(IR_LE), trend, trstart); rd->nres = 0; } } }
/* Backpropagate narrowing conversion. Return number of needed conversions. */ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) { jit_State *J = nc->J; IRIns *ir = IR(ref); IRRef cref; /* Check the easy cases first. */ if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { if ((nc->mode & IRCONV_CONVMASK) <= IRCONV_ANY) narrow_stripov_backprop(nc, ir->op1, depth+1); else *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ if (nc->t == IRT_I64) *nc->sp++ = NARROWINS(NARROW_SEXT, 0); /* Sign-extend integer. */ return 0; } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ lua_Number n = ir_knum(ir)->n; if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { /* Allows a wider range of constants. */ int64_t k64 = (int64_t)n; if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */ *nc->sp++ = NARROWINS(NARROW_INT, 0); *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ return 0; } } else { int32_t k = lj_num2int(n); /* Only if constant is a small integer. */ if (checki16(k) && n == (lua_Number)k) { *nc->sp++ = NARROWINS(NARROW_INT, 0); *nc->sp++ = (NarrowIns)k; return 0; } } return 10; /* Never narrow other FP constants (this is rare). */ } /* Try to CSE the conversion. Stronger checks are ok, too. */ cref = J->chain[fins->o]; while (cref > ref) { IRIns *cr = IR(cref); if (cr->op1 == ref && (fins->o == IR_TOBIT || ((cr->op2 & IRCONV_MODEMASK) == (nc->mode & IRCONV_MODEMASK) && irt_isguard(cr->t) >= irt_isguard(fins->t)))) { *nc->sp++ = NARROWINS(NARROW_REF, cref); return 0; /* Already there, no additional conversion needed. */ } cref = cr->prev; } /* Backpropagate across ADD/SUB. */ if (ir->o == IR_ADD || ir->o == IR_SUB) { /* Try cache lookup first. */ IRRef mode = nc->mode; BPropEntry *bp; /* Inner conversions need a stronger check. */ if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX && depth > 0) mode += IRCONV_CHECK-IRCONV_INDEX; bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); if (bp) { *nc->sp++ = NARROWINS(NARROW_REF, bp->val); return 0; } else if (nc->t == IRT_I64) { /* Try sign-extending from an existing (checked) conversion to int. */ mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); if (bp) { *nc->sp++ = NARROWINS(NARROW_REF, bp->val); *nc->sp++ = NARROWINS(NARROW_SEXT, 0); return 0; } } if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { NarrowIns *savesp = nc->sp; int count = narrow_conv_backprop(nc, ir->op1, depth); count += narrow_conv_backprop(nc, ir->op2, depth); if (count <= nc->lim) { /* Limit total number of conversions. */ *nc->sp++ = NARROWINS(IRT(ir->o, nc->t), ref); return count; } nc->sp = savesp; /* Too many conversions, need to backtrack. */ } } /* Otherwise add a conversion. */ *nc->sp++ = NARROWINS(NARROW_CONV, ref); return 1; }
/* Emit or eliminate collected PHIs. */ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi, SnapNo onsnap) { int passx = 0; IRRef i, j, nslots; IRRef invar = J->chain[IR_LOOP]; /* Pass #1: mark redundant and potentially redundant PHIs. */ for (i = 0, j = 0; i < nphi; i++) { IRRef lref = phi[i]; IRRef rref = subst[lref]; if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */ irt_clearphi(IR(lref)->t); } else { phi[j++] = (IRRef1)lref; if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) { /* Quick check for simple recurrences failed, need pass2. */ irt_setmark(IR(lref)->t); passx = 1; } } } nphi = j; /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */ if (passx) { SnapNo s; for (i = J->cur.nins-1; i > invar; i--) { IRIns *ir = IR(i); if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); if (!irref_isk(ir->op1)) { irt_clearmark(IR(ir->op1)->t); if (ir->op1 < invar && ir->o >= IR_CALLN && ir->o <= IR_CARG) { /* ORDER IR */ ir = IR(ir->op1); while (ir->o == IR_CARG) { if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); if (irref_isk(ir->op1)) break; ir = IR(ir->op1); irt_clearmark(ir->t); } } } } for (s = J->cur.nsnap-1; s >= onsnap; s--) { SnapShot *snap = &J->cur.snap[s]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) irt_clearmark(IR(ref)->t); } } } /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */ nslots = J->baseslot+J->maxslot; for (i = 1; i < nslots; i++) { IRRef ref = tref_ref(J->slot[i]); while (!irref_isk(ref) && ref != subst[ref]) { IRIns *ir = IR(ref); irt_clearmark(ir->t); /* Unmark potential uses, too. */ if (irt_isphi(ir->t) || irt_ispri(ir->t)) break; irt_setphi(ir->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; ref = subst[ref]; if (ref > invar) break; } } /* Pass #4: propagate non-redundant PHIs. */ while (passx) { passx = 0; for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Propagate only from unmarked PHIs. */ IRIns *irr = IR(subst[lref]); if (irt_ismarked(irr->t)) { /* Right ref points to other PHI? */ irt_clearmark(irr->t); /* Mark that PHI as non-redundant. */ passx = 1; /* Retry. */ } } } } /* Pass #5: emit PHI instructions or eliminate PHIs. */ for (i = 0; i < nphi; i++) { IRRef lref = phi[i]; IRIns *ir = IR(lref); if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */ IRRef rref = subst[lref]; if (rref > invar) irt_setphi(IR(rref)->t); emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref); } else { /* Otherwise eliminate PHI. */ irt_clearmark(ir->t); irt_clearphi(ir->t); } } }
if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs #if LJ_TARGET_ARM && (sz == 1 || sz == 4) #endif ) { ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); ofs = 0; }
/* XLOAD forwarding. */ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) { IRRef xref = fins->op1; IRIns *xr = IR(xref); IRRef lim = xref; /* Search limit. */ IRRef ref; if ((fins->op2 & IRXLOAD_READONLY)) goto cselim; if ((fins->op2 & IRXLOAD_VOLATILE)) goto doemit; /* Search for conflicting stores. */ ref = J->chain[IR_XSTORE]; retry: if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS]; if (J->chain[IR_XBAR] > lim) lim = J->chain[IR_XBAR]; while (ref > lim) { IRIns *store = IR(ref); switch (aa_xref(J, xr, fins, store)) { case ALIAS_NO: break; /* Continue searching. */ case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ case ALIAS_MUST: /* Emit conversion if the loaded type doesn't match the forwarded type. */ if (!irt_sametype(fins->t, IR(store->op2)->t)) { IRType dt = irt_type(fins->t), st = irt_type(IR(store->op2)->t); if (dt == IRT_I8 || dt == IRT_I16) { /* Trunc + sign-extend. */ st = dt | IRCONV_SEXT; dt = IRT_INT; } else if (dt == IRT_U8 || dt == IRT_U16) { /* Trunc + zero-extend. */ st = dt; dt = IRT_INT; } fins->ot = IRT(IR_CONV, dt); fins->op1 = store->op2; fins->op2 = (dt<<5)|st; return RETRYFOLD; } return store->op2; /* Store forwarding. */ } ref = store->prev; } cselim: /* Try to find a matching load. Below the conflicting store, if any. */ ref = J->chain[IR_XLOAD]; while (ref > lim) { /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t)) return ref; ref = IR(ref)->prev; } /* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */ if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] && xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) { ref = J->chain[IR_XSTORE]; while (ref > lim) /* Skip stores that have already been checked. */ ref = IR(ref)->prev; lim = xref; xr = IR(xref); goto retry; /* Retry with the reassociated reference. */ } doemit: return EMITFOLD; }
/* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); IRRef1 *hisubst; IRRef ref; /* Copy old IR to buffer. */ memcpy(oir, IR(nk), irlen*sizeof(IRIns)); /* Bias hiword substitution table and old IR. Loword kept in field prev. */ hisubst = (IRRef1 *)&oir[irlen] - nk; oir -= nk; /* Remove all IR instructions, but retain IR constants. */ J->cur.nins = REF_FIRST; /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } } /* Process old IR instructions. */ for (ref = REF_FIRST; ref < nins; ref++) { IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); IRRef hi = 0; /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); hisubst[ref] = 0; /* Split 64 bit instructions. */ if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: case IR_SUB: /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: hi = split_call64(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); break; case IR_MOD: hi = split_call64(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); break; case IR_POW: hi = split_call64(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64HI); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XSTORE: #if LJ_LE hiref = hisubst[ir->op2]; #else hiref = nir->op2; nir->op2 = hisubst[ir->op2]; #endif split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hiref); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. */ goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); nir = IR(nref); /* May have been reallocated. */ ir->prev = nir->op1; /* Pass through loword. */ nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_PHI: { IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ hiref2 = hisubst[ir->op2]; if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) split_emit(J, IRTI(IR_PHI), hiref, hiref2); break; } default: lua_assert(ir->o <= IR_NE); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } } else if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); } else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ nir->ot = IRT(IR_NOP, IRT_NIL); nir->op1 = nir->op2 = 0; } } } else if (ir->o == IR_CNEWI) { if (hisubst[ir->op2]) split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } hisubst[ref] = hi; /* Store hiword substitution. */ } /* Add PHI marks. */ for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { IRIns *ir = IR(ref); if (ir->o != IR_PHI) break; if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); } /* Substitute snapshot maps. */ oir[nins].prev = J->cur.nins; /* Substitution for last snapshot. */ { SnapNo i, nsnap = J->cur.nsnap; for (i = 0; i < nsnap; i++) { SnapShot *snap = &J->cur.snap[i]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, nent = snap->nent; snap->ref = oir[snap->ref].prev; for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; map[n] = ((sn & 0xffff0000) | oir[snap_ref(sn)].prev); } } } }