/* Check whether the previous value for a table store is non-nil. ** This can be derived either from a previous store or from a previous ** load (because all loads from tables perform a type check). ** ** The result of the analysis can be used to avoid the metatable check ** and the guard against HREF returning niltv. Both of these are cheap, ** so let's not spend too much effort on the analysis. ** ** A result of 1 is exact: previous value CANNOT be nil. ** A result of 0 is inexact: previous value MAY be nil. */ int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref) { /* First check stores. */ IRRef ref = J->chain[loadop+IRDELTA_L2S]; while (ref > xref) { IRIns *store = IR(ref); if (store->op1 == xref) { /* Same xREF. */ /* A nil store MAY alias, but a non-nil store MUST alias. */ return !irt_isnil(store->t); } else if (irt_isnil(store->t)) { /* Must check any nil store. */ IRRef skref = IR(store->op1)->op2; IRRef xkref = IR(xref)->op2; /* Same key type MAY alias. Need ALOAD check due to multiple int types. */ if (loadop == IR_ALOAD || irt_sametype(IR(skref)->t, IR(xkref)->t)) { if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref)) return 0; /* A nil store with same const key or var key MAY alias. */ /* Different const keys CANNOT alias. */ } /* Different key types CANNOT alias. */ } /* Other non-nil stores MAY alias. */ ref = store->prev; } /* Check loads since nothing could be derived from stores. */ ref = J->chain[loadop]; while (ref > xref) { IRIns *load = IR(ref); if (load->op1 == xref) { /* Same xREF. */ /* A nil load MAY alias, but a non-nil load MUST alias. */ return !irt_isnil(load->t); } /* Other non-nil loads MAY alias. */ ref = load->prev; } return 0; /* Nothing derived at all, previous value MAY be nil. */ }
/* Alias analysis for XLOAD/XSTORE. */ static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb) { ptrdiff_t ofsa = 0, ofsb = 0; IRIns *refb = IR(xb->op1); IRIns *basea = refa, *baseb = refb; if (refa == refb && irt_sametype(xa->t, xb->t)) return ALIAS_MUST; /* Shortcut for same refs with identical type. */ /* Offset-based disambiguation. */ if (refa->o == IR_ADD && irref_isk(refa->op2)) { IRIns *irk = IR(refa->op2); basea = IR(refa->op1); ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; } if (refb->o == IR_ADD && irref_isk(refb->op2)) { IRIns *irk = IR(refb->op2); baseb = IR(refb->op1); ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; } /* Treat constified pointers like base vs. base+offset. */ if (basea->o == IR_KPTR && baseb->o == IR_KPTR) { ofsb += (char *)ir_kptr(baseb) - (char *)ir_kptr(basea); baseb = basea; } /* This implements (very) strict aliasing rules. ** Different types do NOT alias, except for differences in signedness. ** Type punning through unions is allowed (but forces a reload). */ if (basea == baseb) { ptrdiff_t sza = irt_size(xa->t), szb = irt_size(xb->t); if (ofsa == ofsb) { if (sza == szb && irt_isfp(xa->t) == irt_isfp(xb->t)) return ALIAS_MUST; /* Same-sized, same-kind. May need to convert. */ } else if (ofsa + sza <= ofsb || ofsb + szb <= ofsa) { return ALIAS_NO; /* Non-overlapping base+-o1 vs. base+-o2. */ } /* NYI: extract, extend or reinterpret bits (int <-> fp). */ return ALIAS_MAY; /* Overlapping or type punning: force reload. */ } if (!irt_sametype(xa->t, xb->t) && !(irt_typerange(xa->t, IRT_I8, IRT_U64) && ((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1)) return ALIAS_NO; /* NYI: structural disambiguation. */ return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ }
/* Alias analysis for array and hash access using key-based disambiguation. */ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) { IRRef ka = refa->op2; IRRef kb = refb->op2; IRIns *keya, *keyb; if (refa == refb) return ALIAS_MUST; /* Shortcut for same refs. */ keya = IR(ka); if (keya->o == IR_KSLOT) { ka = keya->op1; keya = IR(ka); } keyb = IR(kb); if (keyb->o == IR_KSLOT) { kb = keyb->op1; keyb = IR(kb); } if (ka == kb) { /* Same key. Check for same table with different ref (NEWREF vs. HREF). */ IRIns *ta = refa; IRIns *tb = refb; if (ta->o == IR_HREFK || ta->o == IR_AREF) ta = IR(ta->op1); if (tb->o == IR_HREFK || tb->o == IR_AREF) tb = IR(tb->op1); if (ta->op1 == tb->op1) return ALIAS_MUST; /* Same key, same table. */ else return ALIAS_MAY; /* Same key, possibly different table. */ } if (irref_isk(ka) && irref_isk(kb)) return ALIAS_NO; /* Different constant keys. */ if (refa->o == IR_AREF) { /* Disambiguate array references based on index arithmetic. */ lua_assert(refb->o == IR_AREF); if (refa->op1 == refb->op1) { /* Same table, different non-const array keys. */ int32_t ofsa = 0, ofsb = 0; IRRef basea = ka, baseb = kb; /* Gather base and offset from t[base] or t[base+-ofs]. */ if (keya->o == IR_ADD && irref_isk(keya->op2)) { basea = keya->op1; ofsa = IR(keya->op2)->i; if (basea == kb && ofsa != 0) return ALIAS_NO; /* t[base+-ofs] vs. t[base]. */ } if (keyb->o == IR_ADD && irref_isk(keyb->op2)) { baseb = keyb->op1; ofsb = IR(keyb->op2)->i; if (ka == baseb && ofsb != 0) return ALIAS_NO; /* t[base] vs. t[base+-ofs]. */ } if (basea == baseb && ofsa != ofsb) return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ } } else { /* Disambiguate hash references based on the type of their keys. */ lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); if (!irt_sametype(keya->t, keyb->t)) return ALIAS_NO; /* Different key types. */ } return ALIAS_MAY; /* Anything else: we just don't know. */ }
/* Alias analysis for XLOAD/XSTORE. */ static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb) { ptrdiff_t ofsa = 0, ofsb = 0; IRIns *refb = IR(xb->op1); IRIns *basea = refa, *baseb = refb; /* This implements (very) strict aliasing rules. ** Different types do NOT alias, except for differences in signedness. ** NYI: this also prevents type punning through unions. */ if (irt_sametype(xa->t, xb->t)) { if (refa == refb) return ALIAS_MUST; /* Shortcut for same refs with identical type. */ } else if (!(irt_typerange(xa->t, IRT_I8, IRT_U64) && ((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1)) { return ALIAS_NO; } /* Offset-based disambiguation. */ if (refa->o == IR_ADD && irref_isk(refa->op2)) { IRIns *irk = IR(refa->op2); basea = IR(refa->op1); ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; if (basea == refb && ofsa != 0) return ALIAS_NO; /* base+-ofs vs. base. */ } if (refb->o == IR_ADD && irref_isk(refb->op2)) { IRIns *irk = IR(refb->op2); baseb = IR(refb->op1); ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; if (refa == baseb && ofsb != 0) return ALIAS_NO; /* base vs. base+-ofs. */ } if (basea == baseb) { /* This assumes strictly-typed, non-overlapping accesses. */ if (ofsa != ofsb) return ALIAS_NO; /* base+-o1 vs. base+-o2 and o1 != o2. */ return ALIAS_MUST; /* Unsigned vs. signed access to the same address. */ } /* NYI: structural disambiguation. */ return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ }
/* Unroll loop. */ static void loop_unroll(jit_State *J) { IRRef1 phi[LJ_MAX_PHI]; uint32_t nphi = 0; IRRef1 *subst; SnapNo onsnap; SnapShot *osnap, *loopsnap; SnapEntry *loopmap, *psentinel; IRRef ins, invar; /* Use temp buffer for substitution table. ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. ** Caveat: don't call into the VM or run the GC or the buffer may be gone. */ invar = J->cur.nins; subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; subst[REF_BASE] = REF_BASE; /* LOOP separates the pre-roll from the loop body. */ emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); /* Grow snapshot buffer and map for copy-substituted snapshots. ** Need up to twice the number of snapshots minus #0 and loop snapshot. ** Need up to twice the number of entries plus fallback substitutions ** from the loop snapshot entries for each new snapshot. ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! */ onsnap = J->cur.nsnap; lj_snap_grow_buf(J, 2*onsnap-2); lj_snap_grow_map(J, J->cur.nsnapmap*2+(onsnap-2)*J->cur.snap[onsnap-1].nent); /* The loop snapshot is used for fallback substitutions. */ loopsnap = &J->cur.snap[onsnap-1]; loopmap = &J->cur.snapmap[loopsnap->mapofs]; /* The PC of snapshot #0 and the loop snapshot must match. */ psentinel = &loopmap[loopsnap->nent]; lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ /* Start substitution with snapshot #1 (#0 is empty for root traces). */ osnap = &J->cur.snap[1]; /* Copy and substitute all recorded instructions and snapshots. */ for (ins = REF_FIRST; ins < invar; ins++) { IRIns *ir; IRRef op1, op2; if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */ loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */ /* Substitute instruction operands. */ ir = IR(ins); op1 = ir->op1; if (!irref_isk(op1)) op1 = subst[op1]; op2 = ir->op2; if (!irref_isk(op2)) op2 = subst[op2]; if (irm_kind(lj_ir_mode[ir->o]) == IRM_N && op1 == ir->op1 && op2 == ir->op2) { /* Regular invariant ins? */ subst[ins] = (IRRef1)ins; /* Shortcut. */ } else { /* Re-emit substituted instruction to the FOLD/CSE/etc. pipeline. */ IRType1 t = ir->t; /* Get this first, since emitir may invalidate ir. */ IRRef ref = tref_ref(emitir(ir->ot & ~IRT_ISPHI, op1, op2)); subst[ins] = (IRRef1)ref; if (ref != ins) { IRIns *irr = IR(ref); if (ref < invar) { /* Loop-carried dependency? */ /* Potential PHI? */ if (!irref_isk(ref) && !irt_isphi(irr->t) && !irt_ispri(irr->t)) { irt_setphi(irr->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; } /* Check all loop-carried dependencies for type instability. */ if (!irt_sametype(t, irr->t)) { if (irt_isinteger(t) && irt_isinteger(irr->t)) continue; else if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num. */ ref = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); else if (irt_isnum(irr->t) && irt_isinteger(t)) /* Fix num->int. */ ref = tref_ref(emitir(IRTGI(IR_CONV), ref, IRCONV_INT_NUM|IRCONV_CHECK)); else lj_trace_err(J, LJ_TRERR_TYPEINS); subst[ins] = (IRRef1)ref; irr = IR(ref); goto phiconv; } } else if (ref != REF_DROP && irr->o == IR_CONV && ref > invar && irr->op1 < invar) { /* May need an extra PHI for a CONV. */ ref = irr->op1; irr = IR(ref); phiconv: if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { irt_setphi(irr->t); if (nphi >= LJ_MAX_PHI) lj_trace_err(J, LJ_TRERR_PHIOV); phi[nphi++] = (IRRef1)ref; } } } } } if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; lua_assert(J->cur.nsnapmap <= J->sizesnapmap); *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ loop_emit_phi(J, subst, phi, nphi, onsnap); }
/* XLOAD forwarding. */ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) { IRRef xref = fins->op1; IRIns *xr = IR(xref); IRRef lim = xref; /* Search limit. */ IRRef ref; if ((fins->op2 & IRXLOAD_READONLY)) goto cselim; if ((fins->op2 & IRXLOAD_VOLATILE)) goto doemit; /* Search for conflicting stores. */ ref = J->chain[IR_XSTORE]; retry: if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS]; if (J->chain[IR_XBAR] > lim) lim = J->chain[IR_XBAR]; while (ref > lim) { IRIns *store = IR(ref); switch (aa_xref(J, xr, fins, store)) { case ALIAS_NO: break; /* Continue searching. */ case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ case ALIAS_MUST: /* Emit conversion if the loaded type doesn't match the forwarded type. */ if (!irt_sametype(fins->t, IR(store->op2)->t)) { IRType st = irt_type(fins->t); if (st == IRT_I8 || st == IRT_I16) { /* Trunc + sign-extend. */ st |= IRCONV_SEXT; } else if (st == IRT_U8 || st == IRT_U16) { /* Trunc + zero-extend. */ } else if (st == IRT_INT) { st = irt_type(IR(store->op2)->t); /* Needs dummy CONV.int.*. */ } else { /* I64/U64 are boxed, U32 is hidden behind a CONV.num.u32. */ goto store_fwd; } fins->ot = IRTI(IR_CONV); fins->op1 = store->op2; fins->op2 = (IRT_INT<<5)|st; return RETRYFOLD; } store_fwd: return store->op2; /* Store forwarding. */ } ref = store->prev; } cselim: /* Try to find a matching load. Below the conflicting store, if any. */ ref = J->chain[IR_XLOAD]; while (ref > lim) { /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t)) return ref; ref = IR(ref)->prev; } /* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */ if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] && xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) { ref = J->chain[IR_XSTORE]; while (ref > lim) /* Skip stores that have already been checked. */ ref = IR(ref)->prev; lim = xref; xr = IR(xref); goto retry; /* Retry with the reassociated reference. */ } doemit: return EMITFOLD; }