static int32_t kfold_xload(IRIns *ir, const void *p) { #if !LJ_TARGET_X86ORX64 #error "Missing support for unaligned loads" #endif switch (irt_type(ir->t)) { case IRT_I8: return (int32_t)*(int8_t *)p; case IRT_U8: return (int32_t)*(uint8_t *)p; case IRT_I16: return (int32_t)*(int16_t *)p; case IRT_U16: return (int32_t)*(uint16_t *)p; default: lua_assert(irt_isint(ir->t)); return (int32_t)*(int32_t *)p; } }
/* Reassociate index references. */ static IRRef reassoc_xref(jit_State *J, IRIns *ir) { ptrdiff_t ofs = 0; if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Get constant offset. */ IRIns *irk = IR(ir->op2); ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; ir = IR(ir->op1); } if (ir->o == IR_ADD) { /* Add of base + index. */ /* Index ref > base ref for loop-carried dependences. Only check op1. */ IRIns *ir2, *ir1 = IR(ir->op1); int32_t shift = 0; IRRef idxref; /* Determine index shifts. Don't bother with IR_MUL here. */ if (ir1->o == IR_BSHL && irref_isk(ir1->op2)) shift = IR(ir1->op2)->i; else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2) shift = 1; else ir1 = ir; ir2 = IR(ir1->op1); /* A non-reassociated add. Must be a loop-carried dependence. */ if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2)) ofs += (ptrdiff_t)IR(ir2->op2)->i << shift; else return 0; idxref = ir2->op1; /* Try to CSE the reassociated chain. Give up if not found. */ if (ir1 != ir && !(idxref = reassoc_trycse(J, ir1->o, idxref, ir1->o == IR_BSHL ? ir1->op2 : idxref))) return 0; if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2))) return 0; if (ofs != 0) { IRRef refk = tref_ref(lj_ir_kintp(J, ofs)); if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk))) return 0; } return idxref; /* Success, found a reassociated index reference. Phew. */ } return 0; /* Failure. */ }
/* Array and hash load forwarding. */ static TRef fwd_ahload(jit_State *J, IRRef xref) { IRIns *xr = IR(xref); IRRef lim = xref; /* Search limit. */ IRRef ref; /* Search for conflicting stores. */ ref = J->chain[fins->o+IRDELTA_L2S]; while (ref > xref) { IRIns *store = IR(ref); switch (aa_ahref(J, xr, IR(store->op1))) { case ALIAS_NO: break; /* Continue searching. */ case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ case ALIAS_MUST: return store->op2; /* Store forwarding. */ } ref = store->prev; } /* No conflicting store (yet): const-fold loads from allocations. */ { IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; IRRef tab = ir->op1; ir = IR(tab); if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) { /* A NEWREF with a number key may end up pointing to the array part. ** But it's referenced from HSTORE and not found in the ASTORE chain. ** For now simply consider this a conflict without forwarding anything. */ if (xr->o == IR_AREF) { IRRef ref2 = J->chain[IR_NEWREF]; while (ref2 > tab) { IRIns *newref = IR(ref2); if (irt_isnum(IR(newref->op2)->t)) goto cselim; ref2 = newref->prev; } } /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF. ** But the above search for conflicting stores was limited by xref. ** So continue searching, limited by the TNEW/TDUP. Store forwarding ** is ok, too. A conflict does NOT limit the search for a matching load. */ while (ref > tab) { IRIns *store = IR(ref); switch (aa_ahref(J, xr, IR(store->op1))) { case ALIAS_NO: break; /* Continue searching. */ case ALIAS_MAY: goto cselim; /* Conflicting store. */ case ALIAS_MUST: return store->op2; /* Store forwarding. */ } ref = store->prev; } lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t)); if (irt_ispri(fins->t)) { return TREF_PRI(irt_type(fins->t)); } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) || irt_isstr(fins->t)) { TValue keyv; cTValue *tv; IRIns *key = IR(xr->op2); if (key->o == IR_KSLOT) key = IR(key->op1); lj_ir_kvalue(J->L, &keyv, key); tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); lua_assert(itype2irt(tv) == irt_type(fins->t)); if (irt_isnum(fins->t)) return lj_ir_knum_u64(J, tv->u64); else if (LJ_DUALNUM && irt_isint(fins->t)) return lj_ir_kint(J, intV(tv)); else return lj_ir_kstr(J, strV(tv)); } /* Othwerwise: don't intern as a constant. */ } } cselim: /* Try to find a matching load. Below the conflicting store, if any. */ ref = J->chain[fins->o]; while (ref > lim) { IRIns *load = IR(ref); if (load->op1 == xref) return ref; /* Load forwarding. */ ref = load->prev; } return 0; /* Conflict or no match. */ }
/* Transform the old IR to the new IR. */ static void split_ir(jit_State *J) { IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); IRRef1 *hisubst; IRRef ref, snref; SnapShot *snap; /* Copy old IR to buffer. */ memcpy(oir, IR(nk), irlen*sizeof(IRIns)); /* Bias hiword substitution table and old IR. Loword kept in field prev. */ hisubst = (IRRef1 *)&oir[irlen] - nk; oir -= nk; /* Remove all IR instructions, but retain IR constants. */ J->cur.nins = REF_FIRST; J->loopref = 0; /* Process constants and fixed references. */ for (ref = nk; ref <= REF_BASE; ref++) { IRIns *ir = &oir[ref]; if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { /* Split up 64 bit constant. */ TValue tv = *ir_k64(ir); ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } } /* Process old IR instructions. */ snap = J->cur.snap; snref = snap->ref; for (ref = REF_FIRST; ref < nins; ref++) { IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); IRRef hi = 0; if (ref >= snref) { snap->ref = nref; split_subst_snap(J, snap++, oir); snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; } /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); hisubst[ref] = 0; /* Split 64 bit instructions. */ #if LJ_SOFTFP if (irt_isnum(ir->t)) { nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ switch (ir->o) { case IR_ADD: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); break; case IR_SUB: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); break; case IR_POW: hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); break; case IR_FPMATH: /* Try to rejoin pow from EXP2, MUL and LOG2. */ if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { IRIns *irp = IR(nir->op1); if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { IRIns *irm4 = IR(irp->op1); IRIns *irm3 = IR(irm4->op1); IRIns *irm12 = IR(irm3->op1); IRIns *irl1 = IR(irm12->op1); if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && irl1->op2 == IRCALL_lj_vm_log2) { IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ IRRef arg3 = irm3->op2, arg4 = irm4->op2; J->cur.nins--; tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); break; } } } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; case IR_ATAN2: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); break; case IR_LDEXP: hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); break; case IR_NEG: case IR_ABS: nir->o = IR_CONV; /* Pass through loword. */ nir->op2 = (IRT_INT << 5) | IRT_INT; hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_SLOAD: if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ nir->op2 &= ~IRSLOAD_CONVERT; ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, IRCALL_softfp_i2d); hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } /* fallthrough */ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: case IR_STRTO: hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; case IR_XLOAD: { IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ J->cur.nins--; hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ nref = lj_ir_nextins(J); nir = IR(nref); *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); #if LJ_LE ir->prev = nref; #else ir->prev = hi; hi = nref; #endif break; } case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to number. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); UNUSED(st); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { hi = split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); break; } #endif lua_assert(st == IRT_INT || (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); nir->o = IR_CALLN; #if LJ_32 && LJ_HASFFI nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : st == IRT_FLOAT ? IRCALL_softfp_f2d : IRCALL_softfp_ui2d; #else nir->op2 = IRCALL_softfp_i2d; #endif hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; } case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: goto split_call; case IR_PHI: if (nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ if (hisubst[ir->op1] != hisubst[ir->op2]) split_emit(J, IRT(IR_PHI, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; } } else #endif #if LJ_32 && LJ_HASFFI if (irt_isint64(ir->t)) { IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: case IR_SUB: /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); break; case IR_DIV: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); break; case IR_MOD: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); break; case IR_POW: hi = split_call_ll(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; case IR_FLOAD: lua_assert(ir->op2 == IRFL_CDATA_INT64); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); #if LJ_BE ir->prev = hi; hi = nref; #endif break; case IR_XSTORE: split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_SOFTFP if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ hi = split_call_l(J, hisubst, oir, ir, irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ nir->o = IR_CALLN; nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; hi = split_emit(J, IRTI(IR_HIOP), nref, nref); } #else if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } #endif else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. But fwd both parts. */ hi = hiref; goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); nir = IR(nref); /* May have been reallocated. */ ir->prev = nir->op1; /* Pass through loword. */ nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_CALLXS: goto split_call; case IR_PHI: { IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ hiref2 = hisubst[ir->op2]; if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) split_emit(J, IRTI(IR_PHI), hiref, hiref2); break; } case IR_HIOP: J->cur.nins--; /* Drop joining HIOP. */ ir->prev = nir->op1; hi = nir->op2; break; default: lua_assert(ir->o <= IR_NE); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } } else #endif #if LJ_SOFTFP if (ir->o == IR_SLOAD) { if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ nir->op2 &= ~IRSLOAD_CONVERT; if (!(nir->op2 & IRSLOAD_TYPECHECK)) nir->t.irt = IRT_INT; /* Drop guard. */ split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); } } else if (ir->o == IR_TOBIT) { IRRef tmp, op1 = ir->op1; J->cur.nins--; #if LJ_LE tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); #else tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); #endif ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); } else if (ir->o == IR_TOSTR) { if (hisubst[ir->op1]) { if (irref_isk(ir->op1)) nir->op1 = ir->op1; else split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); } } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { if (irref_isk(ir->op2) && hisubst[ir->op2]) nir->op2 = ir->op2; } else #endif if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_32 && LJ_HASFFI if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ #if LJ_SOFTFP if (irt_isfloat(ir->t)) { split_call_l(J, hisubst, oir, ir, st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); J->cur.nins--; /* Drop unused HIOP. */ } #else if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), hisubst[ir->op1], nref); } #endif else { /* Truncate to lower 32 bits. */ fwdlo: ir->prev = nir->op1; /* Forward loword. */ /* Replace with NOP to avoid messing up the snapshot logic. */ nir->ot = IRT(IR_NOP, IRT_NIL); nir->op1 = nir->op2 = 0; } } #endif #if LJ_SOFTFP && LJ_32 && LJ_HASFFI else if (irt_isfloat(ir->t)) { if (st == IRT_NUM) { split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); J->cur.nins--; /* Drop unused HIOP. */ } else { nir->o = IR_CALLN; nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; } } else if (st == IRT_FLOAT) { nir->o = IR_CALLN; nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; } else #endif #if LJ_SOFTFP if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { if (irt_isguard(ir->t)) { lua_assert(st == IRT_NUM && irt_isint(ir->t)); J->cur.nins--; ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); } else { split_call_l(J, hisubst, oir, ir, #if LJ_32 && LJ_HASFFI st == IRT_NUM ? (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) #else IRCALL_softfp_d2i #endif ); J->cur.nins--; /* Drop unused HIOP. */ } } #endif } else if (ir->o == IR_CALLXS) { IRRef hiref; split_call: hiref = hisubst[ir->op1]; if (hiref) { IROpT ot = nir->ot; IRRef op2 = nir->op2; nir->ot = IRT(IR_CARG, IRT_NIL); #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, ot, nref, op2); } if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) hi = split_emit(J, IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), nref, nref); } else if (ir->o == IR_CARG) { IRRef hiref = hisubst[ir->op1]; if (hiref) { IRRef op2 = nir->op2; #if LJ_LE nir->op2 = hiref; #else nir->op2 = nir->op1; nir->op1 = hiref; #endif ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } hiref = hisubst[ir->op2]; if (hiref) { #if !LJ_TARGET_X86 int carg = 0; IRIns *cir; for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) carg++; if ((carg & 1) == 0) { /* Align 64 bit arguments. */ IRRef op2 = nir->op2; nir->op2 = REF_NIL; nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); nir = IR(nref); } #endif #if LJ_BE { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } #endif ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); } } else if (ir->o == IR_CNEWI) { if (hisubst[ir->op2]) split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } hisubst[ref] = hi; /* Store hiword substitution. */ } if (snref == nins) { /* Substitution for last snapshot. */ snap->ref = J->cur.nins; split_subst_snap(J, snap, oir); } /* Add PHI marks. */ for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { IRIns *ir = IR(ref); if (ir->o != IR_PHI) break; if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); } }
/* XLOAD forwarding. */ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) { IRRef xref = fins->op1; IRIns *xr = IR(xref); IRRef lim = xref; /* Search limit. */ IRRef ref; if ((fins->op2 & IRXLOAD_READONLY)) goto cselim; if ((fins->op2 & IRXLOAD_VOLATILE)) goto doemit; /* Search for conflicting stores. */ ref = J->chain[IR_XSTORE]; retry: if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS]; if (J->chain[IR_XBAR] > lim) lim = J->chain[IR_XBAR]; while (ref > lim) { IRIns *store = IR(ref); switch (aa_xref(J, xr, fins, store)) { case ALIAS_NO: break; /* Continue searching. */ case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ case ALIAS_MUST: /* Emit conversion if the loaded type doesn't match the forwarded type. */ if (!irt_sametype(fins->t, IR(store->op2)->t)) { IRType st = irt_type(fins->t); if (st == IRT_I8 || st == IRT_I16) { /* Trunc + sign-extend. */ st |= IRCONV_SEXT; } else if (st == IRT_U8 || st == IRT_U16) { /* Trunc + zero-extend. */ } else if (st == IRT_INT && !irt_isint(IR(store->op2)->t)) { st = irt_type(IR(store->op2)->t); /* Needs dummy CONV.int.*. */ } else { /* I64/U64 are boxed, U32 is hidden behind a CONV.num.u32. */ goto store_fwd; } fins->ot = IRTI(IR_CONV); fins->op1 = store->op2; fins->op2 = (IRT_INT<<5)|st; return RETRYFOLD; } store_fwd: return store->op2; /* Store forwarding. */ } ref = store->prev; } cselim: /* Try to find a matching load. Below the conflicting store, if any. */ ref = J->chain[IR_XLOAD]; while (ref > lim) { /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t)) return ref; ref = IR(ref)->prev; } /* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */ if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] && xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) { ref = J->chain[IR_XSTORE]; while (ref > lim) /* Skip stores that have already been checked. */ ref = IR(ref)->prev; lim = xref; xr = IR(xref); goto retry; /* Retry with the reassociated reference. */ } doemit: return EMITFOLD; }