Exemple #1
0
/* Backwards propagate marks. Replace unused instructions with NOPs. */
static void dce_propagate(jit_State *J)
{
  IRRef1 *pchain[IR__MAX];
  IRRef ins;
  uint32_t i;
  for (i = 0; i < IR__MAX; i++) pchain[i] = &J->chain[i];
  for (ins = J->cur.nins-1; ins >= REF_FIRST; ins--) {
    IRIns *ir = IR(ins);
    if (irt_ismarked(ir->t)) {
      irt_clearmark(ir->t);
      pchain[ir->o] = &ir->prev;
    } else if (!(irt_isguard(ir->t) || irm_sideeff(lj_ir_mode[ir->o]))) {
      *pchain[ir->o] = ir->prev;  /* Reroute original instruction chain. */
      *pchain[IR_NOP] = (IRRef1)ins;
      ir->t.irt = IRT_NIL;
      ir->o = IR_NOP;  /* Replace instruction with NOP. */
      ir->op1 = ir->op2 = 0;
      pchain[IR_NOP] = &ir->prev;
      continue;
    }
    if (!irref_isk(ir->op1)) irt_setmark(IR(ir->op1)->t);
    if (!irref_isk(ir->op2)) irt_setmark(IR(ir->op2)->t);
  }
  *pchain[IR_NOP] = 0;  /* Terminate NOP chain. */
}
Exemple #2
0
/* Check whether the previous value for a table store is non-nil.
** This can be derived either from a previous store or from a previous
** load (because all loads from tables perform a type check).
**
** The result of the analysis can be used to avoid the metatable check
** and the guard against HREF returning niltv. Both of these are cheap,
** so let's not spend too much effort on the analysis.
**
** A result of 1 is exact: previous value CANNOT be nil.
** A result of 0 is inexact: previous value MAY be nil.
*/
int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref)
{
  /* First check stores. */
  IRRef ref = J->chain[loadop+IRDELTA_L2S];
  while (ref > xref) {
    IRIns *store = IR(ref);
    if (store->op1 == xref) {  /* Same xREF. */
      /* A nil store MAY alias, but a non-nil store MUST alias. */
      return !irt_isnil(store->t);
    } else if (irt_isnil(store->t)) {  /* Must check any nil store. */
      IRRef skref = IR(store->op1)->op2;
      IRRef xkref = IR(xref)->op2;
      /* Same key type MAY alias. Need ALOAD check due to multiple int types. */
      if (loadop == IR_ALOAD || irt_sametype(IR(skref)->t, IR(xkref)->t)) {
	if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref))
	  return 0;  /* A nil store with same const key or var key MAY alias. */
	/* Different const keys CANNOT alias. */
      }  /* Different key types CANNOT alias. */
    }  /* Other non-nil stores MAY alias. */
    ref = store->prev;
  }

  /* Check loads since nothing could be derived from stores. */
  ref = J->chain[loadop];
  while (ref > xref) {
    IRIns *load = IR(ref);
    if (load->op1 == xref) {  /* Same xREF. */
      /* A nil load MAY alias, but a non-nil load MUST alias. */
      return !irt_isnil(load->t);
    }  /* Other non-nil loads MAY alias. */
    ref = load->prev;
  }
  return 0;  /* Nothing derived at all, previous value MAY be nil. */
}
Exemple #3
0
/* Emit or eliminate collected PHIs. */
static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi)
{
  int pass2 = 0;
  IRRef i, nslots;
  IRRef invar = J->chain[IR_LOOP];
  /* Pass #1: mark redundant and potentially redundant PHIs. */
  for (i = 0; i < nphi; i++) {
    IRRef lref = phi[i];
    IRRef rref = subst[lref];
    if (lref == rref || rref == REF_DROP) {  /* Invariants are redundant. */
      irt_setmark(IR(lref)->t);
    } else if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) {
      /* Quick check for simple recurrences failed, need pass2. */
      irt_setmark(IR(lref)->t);
      pass2 = 1;
    }
  }
  /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */
  if (pass2) {
    for (i = J->cur.nins-1; i > invar; i--) {
      IRIns *ir = IR(i);
      if (!irref_isk(ir->op1)) irt_clearmark(IR(ir->op1)->t);
      if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t);
    }
  }
  /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */
  nslots = J->baseslot+J->maxslot;
  for (i = 1; i < nslots; i++) {
    IRRef ref = tref_ref(J->slot[i]);
    while (!irref_isk(ref) && ref != subst[ref]) {
      IRIns *ir = IR(ref);
      irt_clearmark(ir->t);  /* Unmark potential uses, too. */
      if (irt_isphi(ir->t) || irt_ispri(ir->t))
	break;
      irt_setphi(ir->t);
      if (nphi >= LJ_MAX_PHI)
	lj_trace_err(J, LJ_TRERR_PHIOV);
      phi[nphi++] = (IRRef1)ref;
      ref = subst[ref];
      if (ref > invar)
	break;
    }
  }
  /* Pass #4: emit PHI instructions or eliminate PHIs. */
  for (i = 0; i < nphi; i++) {
    IRRef lref = phi[i];
    IRIns *ir = IR(lref);
    if (!irt_ismarked(ir->t)) {  /* Emit PHI if not marked. */
      IRRef rref = subst[lref];
      if (rref > invar)
	irt_setphi(IR(rref)->t);
      emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref);
    } else {  /* Otherwise eliminate PHI. */
      irt_clearmark(ir->t);
      irt_clearphi(ir->t);
    }
  }
}
Exemple #4
0
/* Alias analysis for array and hash access using key-based disambiguation. */
static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
{
  IRRef ka = refa->op2;
  IRRef kb = refb->op2;
  IRIns *keya, *keyb;
  if (refa == refb)
    return ALIAS_MUST;  /* Shortcut for same refs. */
  keya = IR(ka);
  if (keya->o == IR_KSLOT) { ka = keya->op1; keya = IR(ka); }
  keyb = IR(kb);
  if (keyb->o == IR_KSLOT) { kb = keyb->op1; keyb = IR(kb); }
  if (ka == kb) {
    /* Same key. Check for same table with different ref (NEWREF vs. HREF). */
    IRIns *ta = refa;
    IRIns *tb = refb;
    if (ta->o == IR_HREFK || ta->o == IR_AREF) ta = IR(ta->op1);
    if (tb->o == IR_HREFK || tb->o == IR_AREF) tb = IR(tb->op1);
    if (ta->op1 == tb->op1)
      return ALIAS_MUST;  /* Same key, same table. */
    else
      return ALIAS_MAY;  /* Same key, possibly different table. */
  }
  if (irref_isk(ka) && irref_isk(kb))
    return ALIAS_NO;  /* Different constant keys. */
  if (refa->o == IR_AREF) {
    /* Disambiguate array references based on index arithmetic. */
    lua_assert(refb->o == IR_AREF);
    if (refa->op1 == refb->op1) {
      /* Same table, different non-const array keys. */
      int32_t ofsa = 0, ofsb = 0;
      IRRef basea = ka, baseb = kb;
      /* Gather base and offset from t[base] or t[base+-ofs]. */
      if (keya->o == IR_ADD && irref_isk(keya->op2)) {
	basea = keya->op1;
	ofsa = IR(keya->op2)->i;
	if (basea == kb && ofsa != 0)
	  return ALIAS_NO;  /* t[base+-ofs] vs. t[base]. */
      }
      if (keyb->o == IR_ADD && irref_isk(keyb->op2)) {
	baseb = keyb->op1;
	ofsb = IR(keyb->op2)->i;
	if (ka == baseb && ofsb != 0)
	  return ALIAS_NO;  /* t[base] vs. t[base+-ofs]. */
      }
      if (basea == baseb && ofsa != ofsb)
	return ALIAS_NO;  /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */
    }
  } else {
    /* Disambiguate hash references based on the type of their keys. */
    lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) &&
	       (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF));
    if (!irt_sametype(keya->t, keyb->t))
      return ALIAS_NO;  /* Different key types. */
  }
  return ALIAS_MAY;  /* Anything else: we just don't know. */
}
Exemple #5
0
/* Find cdata allocation for a reference (if any). */
static IRIns *aa_findcnew(jit_State *J, IRIns *ir)
{
  while (ir->o == IR_ADD) {
    if (!irref_isk(ir->op1)) {
      IRIns *ir1 = aa_findcnew(J, IR(ir->op1));  /* Left-recursion. */
      if (ir1) return ir1;
    }
    if (irref_isk(ir->op2)) return NULL;
    ir = IR(ir->op2);  /* Flatten right-recursion. */
  }
  return ir->o == IR_CNEW ? ir : NULL;
}
/* Alias analysis for XLOAD/XSTORE. */
static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb)
{
  ptrdiff_t ofsa = 0, ofsb = 0;
  IRIns *refb = IR(xb->op1);
  IRIns *basea = refa, *baseb = refb;
  if (refa == refb && irt_sametype(xa->t, xb->t))
    return ALIAS_MUST;  /* Shortcut for same refs with identical type. */
  /* Offset-based disambiguation. */
  if (refa->o == IR_ADD && irref_isk(refa->op2)) {
    IRIns *irk = IR(refa->op2);
    basea = IR(refa->op1);
    ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
					    (ptrdiff_t)irk->i;
  }
  if (refb->o == IR_ADD && irref_isk(refb->op2)) {
    IRIns *irk = IR(refb->op2);
    baseb = IR(refb->op1);
    ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
					    (ptrdiff_t)irk->i;
  }
  /* Treat constified pointers like base vs. base+offset. */
  if (basea->o == IR_KPTR && baseb->o == IR_KPTR) {
    ofsb += (char *)ir_kptr(baseb) - (char *)ir_kptr(basea);
    baseb = basea;
  }
  /* This implements (very) strict aliasing rules.
  ** Different types do NOT alias, except for differences in signedness.
  ** Type punning through unions is allowed (but forces a reload).
  */
  if (basea == baseb) {
    ptrdiff_t sza = irt_size(xa->t), szb = irt_size(xb->t);
    if (ofsa == ofsb) {
      if (sza == szb && irt_isfp(xa->t) == irt_isfp(xb->t))
	return ALIAS_MUST;  /* Same-sized, same-kind. May need to convert. */
    } else if (ofsa + sza <= ofsb || ofsb + szb <= ofsa) {
      return ALIAS_NO;  /* Non-overlapping base+-o1 vs. base+-o2. */
    }
    /* NYI: extract, extend or reinterpret bits (int <-> fp). */
    return ALIAS_MAY;  /* Overlapping or type punning: force reload. */
  }
  if (!irt_sametype(xa->t, xb->t) &&
      !(irt_typerange(xa->t, IRT_I8, IRT_U64) &&
	((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1))
    return ALIAS_NO;
  /* NYI: structural disambiguation. */
  return aa_cnew(J, basea, baseb);  /* Try to disambiguate allocations. */
}
Exemple #7
0
/* Restore raw data from the trace exit state. */
static void snap_restoredata(GCtrace *T, ExitState *ex,
			     SnapNo snapno, BloomFilter rfilt,
			     IRRef ref, void *dst, CTSize sz)
{
  IRIns *ir = &T->ir[ref];
  RegSP rs = ir->prev;
  int32_t *src;
  uint64_t tmp;
  if (irref_isk(ref)) {
    if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
      src = mref(ir->ptr, int32_t);
    } else if (sz == 8) {
      tmp = (uint64_t)(uint32_t)ir->i;
      src = (int32_t *)&tmp;
    } else {
      src = &ir->i;
    }
  } else {
    if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
      rs = snap_renameref(T, snapno, ref, rs);
    if (ra_hasspill(regsp_spill(rs))) {
      src = &ex->spill[regsp_spill(rs)];
      if (sz == 8 && !irt_is64(ir->t)) {
	tmp = (uint64_t)(uint32_t)*src;
	src = (int32_t *)&tmp;
      }
    } else {
      Reg r = regsp_reg(rs);
      if (ra_noreg(r)) {
	/* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
	lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
	snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
	*(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
	return;
      }
      src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
#if !LJ_SOFTFP
      if (r >= RID_MAX_GPR) {
	src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
#if LJ_TARGET_PPC
	if (sz == 4) {  /* PPC FPRs are always doubles. */
	  *(float *)dst = (float)*(double *)src;
	  return;
	}
#else
	if (LJ_BE && sz == 4) src++;
#endif
      }
#endif
    }
  }
  lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
  if (sz == 4) *(int32_t *)dst = *src;
  else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
  else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
  else *(int16_t *)dst = (int16_t)*src;
}
Exemple #8
0
/* Reassociate index references. */
static IRRef reassoc_xref(jit_State *J, IRIns *ir)
{
  ptrdiff_t ofs = 0;
  if (ir->o == IR_ADD && irref_isk(ir->op2)) {  /* Get constant offset. */
    IRIns *irk = IR(ir->op2);
    ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
					   (ptrdiff_t)irk->i;
    ir = IR(ir->op1);
  }
  if (ir->o == IR_ADD) {  /* Add of base + index. */
    /* Index ref > base ref for loop-carried dependences. Only check op1. */
    IRIns *ir2, *ir1 = IR(ir->op1);
    int32_t shift = 0;
    IRRef idxref;
    /* Determine index shifts. Don't bother with IR_MUL here. */
    if (ir1->o == IR_BSHL && irref_isk(ir1->op2))
      shift = IR(ir1->op2)->i;
    else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2)
      shift = 1;
    else
      ir1 = ir;
    ir2 = IR(ir1->op1);
    /* A non-reassociated add. Must be a loop-carried dependence. */
    if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2))
      ofs += (ptrdiff_t)IR(ir2->op2)->i << shift;
    else
      return 0;
    idxref = ir2->op1;
    /* Try to CSE the reassociated chain. Give up if not found. */
    if (ir1 != ir &&
	!(idxref = reassoc_trycse(J, ir1->o, idxref,
				  ir1->o == IR_BSHL ? ir1->op2 : idxref)))
      return 0;
    if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2)))
      return 0;
    if (ofs != 0) {
      IRRef refk = tref_ref(lj_ir_kintp(J, ofs));
      if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk)))
	return 0;
    }
    return idxref;  /* Success, found a reassociated index reference. Phew. */
  }
  return 0;  /* Failure. */
}
Exemple #9
0
/* Mark all instructions referenced by a snapshot. */
static void sink_mark_snap(jit_State *J, SnapShot *snap)
{
  SnapEntry *map = &J->cur.snapmap[snap->mapofs];
  MSize n, nent = snap->nent;
  for (n = 0; n < nent; n++) {
    IRRef ref = snap_ref(map[n]);
    if (!irref_isk(ref))
      irt_setmark(IR(ref)->t);
  }
}
Exemple #10
0
/* Alias analysis for XLOAD/XSTORE. */
static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb)
{
  ptrdiff_t ofsa = 0, ofsb = 0;
  IRIns *refb = IR(xb->op1);
  IRIns *basea = refa, *baseb = refb;
  /* This implements (very) strict aliasing rules.
  ** Different types do NOT alias, except for differences in signedness.
  ** NYI: this also prevents type punning through unions.
  */
  if (irt_sametype(xa->t, xb->t)) {
    if (refa == refb)
      return ALIAS_MUST;  /* Shortcut for same refs with identical type. */
  } else if (!(irt_typerange(xa->t, IRT_I8, IRT_U64) &&
	       ((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1)) {
    return ALIAS_NO;
  }
  /* Offset-based disambiguation. */
  if (refa->o == IR_ADD && irref_isk(refa->op2)) {
    IRIns *irk = IR(refa->op2);
    basea = IR(refa->op1);
    ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
					    (ptrdiff_t)irk->i;
    if (basea == refb && ofsa != 0)
      return ALIAS_NO;  /* base+-ofs vs. base. */
  }
  if (refb->o == IR_ADD && irref_isk(refb->op2)) {
    IRIns *irk = IR(refb->op2);
    baseb = IR(refb->op1);
    ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
					    (ptrdiff_t)irk->i;
    if (refa == baseb && ofsb != 0)
      return ALIAS_NO;  /* base vs. base+-ofs. */
  }
  if (basea == baseb) {
    /* This assumes strictly-typed, non-overlapping accesses. */
    if (ofsa != ofsb)
      return ALIAS_NO;  /* base+-o1 vs. base+-o2 and o1 != o2. */
    return ALIAS_MUST;  /* Unsigned vs. signed access to the same address. */
  }
  /* NYI: structural disambiguation. */
  return aa_cnew(J, basea, baseb);  /* Try to disambiguate allocations. */
}
Exemple #11
0
/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
static IRRef split_ptr(jit_State *J, IRRef ref)
{
  IRIns *ir = IR(ref);
  int32_t ofs = 4;
  if (ir->o == IR_ADD && irref_isk(ir->op2)) {  /* Reassociate address. */
    ofs += IR(ir->op2)->i;
    ref = ir->op1;
    if (ofs == 0) return ref;
  }
  return split_emit(J, IRTI(IR_ADD), ref, lj_ir_kint(J, ofs));
}
/* Substitute references of a snapshot. */
static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
{
  SnapEntry *map = &J->cur.snapmap[snap->mapofs];
  MSize n, nent = snap->nent;
  for (n = 0; n < nent; n++) {
    SnapEntry sn = map[n];
    IRIns *ir = &oir[snap_ref(sn)];
    if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
      map[n] = ((sn & 0xffff0000) | ir->prev);
  }
}
Exemple #13
0
/* Reassociate ALOAD across PHIs to handle t[i-1] forwarding case. */
static TRef fwd_aload_reassoc(jit_State *J)
{
  IRIns *irx = IR(fins->op1);
  IRIns *key = IR(irx->op2);
  if (key->o == IR_ADD && irref_isk(key->op2)) {
    IRIns *add2 = IR(key->op1);
    if (add2->o == IR_ADD && irref_isk(add2->op2) &&
	IR(key->op2)->i == -IR(add2->op2)->i) {
      IRRef ref = J->chain[IR_AREF];
      IRRef lim = add2->op1;
      if (irx->op1 > lim) lim = irx->op1;
      while (ref > lim) {
	IRIns *ir = IR(ref);
	if (ir->op1 == irx->op1 && ir->op2 == add2->op1)
	  return fwd_ahload(J, ref);
	ref = ir->prev;
      }
    }
  }
  return 0;
}
Exemple #14
0
/* Restore a value from the trace exit state. */
static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
			    SnapNo snapno, BloomFilter rfilt,
			    IRRef ref, TValue *o)
{
  IRIns *ir = &T->ir[ref];
  IRType1 t = ir->t;
  RegSP rs = ir->prev;
  if (irref_isk(ref)) {  /* Restore constant slot. */
    lj_ir_kvalue(J->L, o, ir);
    return;
  }
  if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
    rs = snap_renameref(T, snapno, ref, rs);
  lua_assert(!LJ_GC64);  /* TODO_GC64: handle 64 bit references. */
  if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */
    int32_t *sps = &ex->spill[regsp_spill(rs)];
    if (irt_isinteger(t)) {
      setintV(o, *sps);
#if !LJ_SOFTFP
    } else if (irt_isnum(t)) {
      o->u64 = *(uint64_t *)sps;
#endif
    } else if (LJ_64 && irt_islightud(t)) {
      /* 64 bit lightuserdata which may escape already has the tag bits. */
      o->u64 = *(uint64_t *)sps;
    } else {
      lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */
      setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
    }
  } else {  /* Restore from register. */
    Reg r = regsp_reg(rs);
    if (ra_noreg(r)) {
      lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
      snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
      if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
      return;
    } else if (irt_isinteger(t)) {
      setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
#if !LJ_SOFTFP
    } else if (irt_isnum(t)) {
      setnumV(o, ex->fpr[r-RID_MIN_FPR]);
#endif
    } else if (LJ_64 && irt_is64(t)) {
      /* 64 bit values that already have the tag bits. */
      o->u64 = ex->gpr[r-RID_MIN_GPR];
    } else if (irt_ispri(t)) {
      setpriV(o, irt_toitype(t));
    } else {
      setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
    }
  }
}
Exemple #15
0
/* Emit parent reference with de-duplication. */
static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
		      BloomFilter seen, IRRef ref)
{
  IRIns *ir = &T->ir[ref];
  TRef tr;
  if (irref_isk(ref))
    tr = snap_replay_const(J, ir);
  else if (!regsp_used(ir->prev))
    tr = 0;
  else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
    tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
  return tr;
}
Exemple #16
0
/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
{
  IRRef nref = oir[ref].prev;
  IRIns *ir = IR(nref);
  int32_t ofs = 4;
  if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
    /* Reassociate address. */
    ofs += IR(ir->op2)->i;
    nref = ir->op1;
    if (ofs == 0) return nref;
  }
  return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
}
/* Scan through all snapshots and mark all referenced instructions. */
static void dce_marksnap(jit_State *J)
{
  SnapNo i, nsnap = J->cur.nsnap;
  for (i = 0; i < nsnap; i++) {
    SnapShot *snap = &J->cur.snap[i];
    IRRef2 *map = &J->cur.snapmap[snap->mapofs];
    BCReg s, nslots = snap->nslots;
    for (s = 0; s < nslots; s++) {
      IRRef ref = snap_ref(map[s]);
      if (!irref_isk(ref))
	irt_setmark(IR(ref)->t);
    }
  }
}
Exemple #18
0
/* Scan through all snapshots and mark all referenced instructions. */
static void dce_marksnap(jit_State *J)
{
  SnapNo i, nsnap = J->cur.nsnap;
  for (i = 0; i < nsnap; i++) {
    SnapShot *snap = &J->cur.snap[i];
    SnapEntry *map = &J->cur.snapmap[snap->mapofs];
    MSize n, nent = snap->nent;
    for (n = 0; n < nent; n++) {
      IRRef ref = snap_ref(map[n]);
      if (!irref_isk(ref))
	irt_setmark(IR(ref)->t);
    }
  }
}
Exemple #19
0
/* Check whether the store ref points to an eligible allocation. */
static IRIns *sink_checkalloc(jit_State *J, IRIns *irs)
{
  IRIns *ir = IR(irs->op1);
  if (!irref_isk(ir->op2))
    return NULL;  /* Non-constant key. */
  if (ir->o == IR_HREFK || ir->o == IR_AREF)
    ir = IR(ir->op1);
  else if (!(ir->o == IR_HREF || ir->o == IR_NEWREF ||
	     ir->o == IR_FREF || ir->o == IR_ADD))
    return NULL;  /* Unhandled reference type (for XSTORE). */
  ir = IR(ir->op1);
  if (!(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW))
    return NULL;  /* Not an allocation. */
  return ir;  /* Return allocation. */
}
/* Copy-substitute snapshot. */
static void loop_subst_snap(jit_State *J, SnapShot *osnap,
			    SnapEntry *loopmap, IRRef1 *subst)
{
  SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
  SnapEntry *nextmap = &J->cur.snapmap[snap_nextofs(&J->cur, osnap)];
  MSize nmapofs;
  MSize on, ln, nn, onent = osnap->nent;
  BCReg nslots = osnap->nslots;
  SnapShot *snap = &J->cur.snap[J->cur.nsnap];
  if (irt_isguard(J->guardemit)) {  /* Guard inbetween? */
    nmapofs = J->cur.nsnapmap;
    J->cur.nsnap++;  /* Add new snapshot. */
  } else {  /* Otherwise overwrite previous snapshot. */
    snap--;
    nmapofs = snap->mapofs;
  }
  J->guardemit.irt = 0;
  /* Setup new snapshot. */
  snap->mapofs = (uint16_t)nmapofs;
  snap->ref = (IRRef1)J->cur.nins;
  snap->nslots = nslots;
  snap->topslot = osnap->topslot;
  snap->count = 0;
  nmap = &J->cur.snapmap[nmapofs];
  /* Substitute snapshot slots. */
  on = ln = nn = 0;
  while (on < onent) {
    SnapEntry osn = omap[on], lsn = loopmap[ln];
    if (snap_slot(lsn) < snap_slot(osn)) {  /* Copy slot from loop map. */
      nmap[nn++] = lsn;
      ln++;
    } else {  /* Copy substituted slot from snapshot map. */
      if (snap_slot(lsn) == snap_slot(osn)) ln++;  /* Shadowed loop slot. */
      if (!irref_isk(snap_ref(osn)))
	osn = snap_setref(osn, subst[snap_ref(osn)]);
      nmap[nn++] = osn;
      on++;
    }
  }
  while (snap_slot(loopmap[ln]) < nslots)  /* Copy remaining loop slots. */
    nmap[nn++] = loopmap[ln++];
  snap->nent = (uint8_t)nn;
  omap += onent;
  nmap += nn;
  while (omap < nextmap)  /* Copy PC + frame links. */
    *nmap++ = *omap++;
  J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap);
}
Exemple #21
0
/* Convert a snapshot into a linear slot -> RegSP map. */
void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno)
{
  SnapShot *snap = &T->snap[snapno];
  BCReg s, nslots = snap->nslots;
  IRRef2 *map = &T->snapmap[snap->mapofs];
  BloomFilter rfilt = snap_renamefilter(T, snapno);
  for (s = 0; s < nslots; s++) {
    IRRef ref = snap_ref(map[s]);
    if (!irref_isk(ref)) {
      IRIns *ir = &T->ir[ref];
      uint32_t rs = ir->prev;
      if (bloomtest(rfilt, ref))
	rs = snap_renameref(T, snapno, ref, rs);
      rsmap[s] = (uint16_t)rs;
    }
  }
}
Exemple #22
0
/* Emit the conversions collected during backpropagation. */
static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
{
  /* The fins fields must be saved now -- emitir() overwrites them. */
  IROpT guardot = irt_isguard(fins->t) ? IRTG(IR_ADDOV-IR_ADD, 0) : 0;
  IROpT convot = fins->ot;
  IRRef1 convop2 = fins->op2;
  NarrowIns *next = nc->stack;  /* List of instructions from backpropagation. */
  NarrowIns *last = nc->sp;
  NarrowIns *sp = nc->stack;  /* Recycle the stack to store operands. */
  while (next < last) {  /* Simple stack machine to process the ins. list. */
    NarrowIns ref = *next++;
    IROpT op = narrow_op(ref);
    if (op == NARROW_REF) {
      *sp++ = ref;
    } else if (op == NARROW_CONV) {
      *sp++ = emitir_raw(convot, ref, convop2);  /* Raw emit avoids a loop. */
    } else if (op == NARROW_SEXT) {
      lua_assert(sp >= nc->stack+1);
      sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],
		      (IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
    } else if (op == NARROW_INT) {
      lua_assert(next < last);
      *sp++ = nc->t == IRT_I64 ?
	      lj_ir_kint64(J, (int64_t)(int32_t)*next++) :
	      lj_ir_kint(J, *next++);
    } else {  /* Regular IROpT. Pops two operands and pushes one result. */
      IRRef mode = nc->mode;
      lua_assert(sp >= nc->stack+2);
      sp--;
      /* Omit some overflow checks for array indexing. See comments above. */
      if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
	if (next == last && irref_isk(narrow_ref(sp[0])) &&
	  (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000u < 0x80000000u)
	  guardot = 0;
	else  /* Otherwise cache a stronger check. */
	  mode += IRCONV_CHECK-IRCONV_INDEX;
      }
      sp[-1] = emitir(op+guardot, sp[-1], sp[0]);
      /* Add to cache. */
      if (narrow_ref(ref))
	narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode);
    }
  }
  lua_assert(sp == nc->stack+1);
  return nc->stack[0];
}
Exemple #23
0
/* Convert a snapshot into a linear slot -> RegSP map.
** Note: unused slots are not initialized!
*/
void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno)
{
  SnapShot *snap = &T->snap[snapno];
  MSize n, nent = snap->nent;
  SnapEntry *map = &T->snapmap[snap->mapofs];
  BloomFilter rfilt = snap_renamefilter(T, snapno);
  for (n = 0; n < nent; n++) {
    SnapEntry sn = map[n];
    IRRef ref = snap_ref(sn);
    if (!irref_isk(ref)) {
      IRIns *ir = &T->ir[ref];
      uint32_t rs = ir->prev;
      if (bloomtest(rfilt, ref))
	rs = snap_renameref(T, snapno, ref, rs);
      rsmap[snap_slot(sn)] = (uint16_t)rs;
    }
  }
}
Exemple #24
0
/* Array and hash load forwarding. */
static TRef fwd_ahload(jit_State *J, IRRef xref)
{
  IRIns *xr = IR(xref);
  IRRef lim = xref;  /* Search limit. */
  IRRef ref;

  /* Search for conflicting stores. */
  ref = J->chain[fins->o+IRDELTA_L2S];
  while (ref > xref) {
    IRIns *store = IR(ref);
    switch (aa_ahref(J, xr, IR(store->op1))) {
    case ALIAS_NO:   break;  /* Continue searching. */
    case ALIAS_MAY:  lim = ref; goto cselim;  /* Limit search for load. */
    case ALIAS_MUST: return store->op2;  /* Store forwarding. */
    }
    ref = store->prev;
  }

  /* No conflicting store (yet): const-fold loads from allocations. */
  {
    IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr;
    IRRef tab = ir->op1;
    ir = IR(tab);
    if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) {
      /* A NEWREF with a number key may end up pointing to the array part.
      ** But it's referenced from HSTORE and not found in the ASTORE chain.
      ** For now simply consider this a conflict without forwarding anything.
      */
      if (xr->o == IR_AREF) {
	IRRef ref2 = J->chain[IR_NEWREF];
	while (ref2 > tab) {
	  IRIns *newref = IR(ref2);
	  if (irt_isnum(IR(newref->op2)->t))
	    goto cselim;
	  ref2 = newref->prev;
	}
      }
      /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF.
      ** But the above search for conflicting stores was limited by xref.
      ** So continue searching, limited by the TNEW/TDUP. Store forwarding
      ** is ok, too. A conflict does NOT limit the search for a matching load.
      */
      while (ref > tab) {
	IRIns *store = IR(ref);
	switch (aa_ahref(J, xr, IR(store->op1))) {
	case ALIAS_NO:   break;  /* Continue searching. */
	case ALIAS_MAY:  goto cselim;  /* Conflicting store. */
	case ALIAS_MUST: return store->op2;  /* Store forwarding. */
	}
	ref = store->prev;
      }
      lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t));
      if (irt_ispri(fins->t)) {
	return TREF_PRI(irt_type(fins->t));
      } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) ||
		 irt_isstr(fins->t)) {
	TValue keyv;
	cTValue *tv;
	IRIns *key = IR(xr->op2);
	if (key->o == IR_KSLOT) key = IR(key->op1);
	lj_ir_kvalue(J->L, &keyv, key);
	tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
	lua_assert(itype2irt(tv) == irt_type(fins->t));
	if (irt_isnum(fins->t))
	  return lj_ir_knum_u64(J, tv->u64);
	else if (LJ_DUALNUM && irt_isint(fins->t))
	  return lj_ir_kint(J, intV(tv));
	else
	  return lj_ir_kstr(J, strV(tv));
      }
      /* Othwerwise: don't intern as a constant. */
    }
  }

cselim:
  /* Try to find a matching load. Below the conflicting store, if any. */
  ref = J->chain[fins->o];
  while (ref > lim) {
    IRIns *load = IR(ref);
    if (load->op1 == xref)
      return ref;  /* Load forwarding. */
    ref = load->prev;
  }
  return 0;  /* Conflict or no match. */
}
/* Unroll loop. */
static void loop_unroll(jit_State *J)
{
  IRRef1 phi[LJ_MAX_PHI];
  uint32_t nphi = 0;
  IRRef1 *subst;
  SnapNo onsnap;
  SnapShot *osnap, *loopsnap;
  SnapEntry *loopmap, *psentinel;
  IRRef ins, invar;

  /* Use temp buffer for substitution table.
  ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
  ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
  */
  invar = J->cur.nins;
  subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf,
				   (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS;
  subst[REF_BASE] = REF_BASE;

  /* LOOP separates the pre-roll from the loop body. */
  emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0);

  /* Grow snapshot buffer and map for copy-substituted snapshots.
  ** Need up to twice the number of snapshots minus #0 and loop snapshot.
  ** Need up to twice the number of entries plus fallback substitutions
  ** from the loop snapshot entries for each new snapshot.
  ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap!
  */
  onsnap = J->cur.nsnap;
  lj_snap_grow_buf(J, 2*onsnap-2);
  lj_snap_grow_map(J, J->cur.nsnapmap*2+(onsnap-2)*J->cur.snap[onsnap-1].nent);

  /* The loop snapshot is used for fallback substitutions. */
  loopsnap = &J->cur.snap[onsnap-1];
  loopmap = &J->cur.snapmap[loopsnap->mapofs];
  /* The PC of snapshot #0 and the loop snapshot must match. */
  psentinel = &loopmap[loopsnap->nent];
  lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]);
  *psentinel = SNAP(255, 0, 0);  /* Replace PC with temporary sentinel. */

  /* Start substitution with snapshot #1 (#0 is empty for root traces). */
  osnap = &J->cur.snap[1];

  /* Copy and substitute all recorded instructions and snapshots. */
  for (ins = REF_FIRST; ins < invar; ins++) {
    IRIns *ir;
    IRRef op1, op2;

    if (ins >= osnap->ref)  /* Instruction belongs to next snapshot? */
      loop_subst_snap(J, osnap++, loopmap, subst);  /* Copy-substitute it. */

    /* Substitute instruction operands. */
    ir = IR(ins);
    op1 = ir->op1;
    if (!irref_isk(op1)) op1 = subst[op1];
    op2 = ir->op2;
    if (!irref_isk(op2)) op2 = subst[op2];
    if (irm_kind(lj_ir_mode[ir->o]) == IRM_N &&
	op1 == ir->op1 && op2 == ir->op2) {  /* Regular invariant ins? */
      subst[ins] = (IRRef1)ins;  /* Shortcut. */
    } else {
      /* Re-emit substituted instruction to the FOLD/CSE/etc. pipeline. */
      IRType1 t = ir->t;  /* Get this first, since emitir may invalidate ir. */
      IRRef ref = tref_ref(emitir(ir->ot & ~IRT_ISPHI, op1, op2));
      subst[ins] = (IRRef1)ref;
      if (ref != ins) {
	IRIns *irr = IR(ref);
	if (ref < invar) {  /* Loop-carried dependency? */
	  /* Potential PHI? */
	  if (!irref_isk(ref) && !irt_isphi(irr->t) && !irt_ispri(irr->t)) {
	    irt_setphi(irr->t);
	    if (nphi >= LJ_MAX_PHI)
	      lj_trace_err(J, LJ_TRERR_PHIOV);
	    phi[nphi++] = (IRRef1)ref;
	  }
	  /* Check all loop-carried dependencies for type instability. */
	  if (!irt_sametype(t, irr->t)) {
	    if (irt_isinteger(t) && irt_isinteger(irr->t))
	      continue;
	    else if (irt_isnum(t) && irt_isinteger(irr->t))  /* Fix int->num. */
	      ref = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT));
	    else if (irt_isnum(irr->t) && irt_isinteger(t))  /* Fix num->int. */
	      ref = tref_ref(emitir(IRTGI(IR_CONV), ref,
				    IRCONV_INT_NUM|IRCONV_CHECK));
	    else
	      lj_trace_err(J, LJ_TRERR_TYPEINS);
	    subst[ins] = (IRRef1)ref;
	    irr = IR(ref);
	    goto phiconv;
	  }
	} else if (ref != REF_DROP && irr->o == IR_CONV &&
		   ref > invar && irr->op1 < invar) {
	  /* May need an extra PHI for a CONV. */
	  ref = irr->op1;
	  irr = IR(ref);
	phiconv:
	  if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) {
	    irt_setphi(irr->t);
	    if (nphi >= LJ_MAX_PHI)
	      lj_trace_err(J, LJ_TRERR_PHIOV);
	    phi[nphi++] = (IRRef1)ref;
	  }
	}
      }
    }
  }
  if (!irt_isguard(J->guardemit))  /* Drop redundant snapshot. */
    J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs;
  lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
  *psentinel = J->cur.snapmap[J->cur.snap[0].nent];  /* Restore PC. */

  loop_emit_phi(J, subst, phi, nphi, onsnap);
}
/* Emit or eliminate collected PHIs. */
static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi,
			  SnapNo onsnap)
{
  int passx = 0;
  IRRef i, j, nslots;
  IRRef invar = J->chain[IR_LOOP];
  /* Pass #1: mark redundant and potentially redundant PHIs. */
  for (i = 0, j = 0; i < nphi; i++) {
    IRRef lref = phi[i];
    IRRef rref = subst[lref];
    if (lref == rref || rref == REF_DROP) {  /* Invariants are redundant. */
      irt_clearphi(IR(lref)->t);
    } else {
      phi[j++] = (IRRef1)lref;
      if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) {
	/* Quick check for simple recurrences failed, need pass2. */
	irt_setmark(IR(lref)->t);
	passx = 1;
      }
    }
  }
  nphi = j;
  /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */
  if (passx) {
    SnapNo s;
    for (i = J->cur.nins-1; i > invar; i--) {
      IRIns *ir = IR(i);
      if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t);
      if (!irref_isk(ir->op1)) {
	irt_clearmark(IR(ir->op1)->t);
	if (ir->op1 < invar &&
	    ir->o >= IR_CALLN && ir->o <= IR_CARG) {  /* ORDER IR */
	  ir = IR(ir->op1);
	  while (ir->o == IR_CARG) {
	    if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t);
	    if (irref_isk(ir->op1)) break;
	    ir = IR(ir->op1);
	    irt_clearmark(ir->t);
	  }
	}
      }
    }
    for (s = J->cur.nsnap-1; s >= onsnap; s--) {
      SnapShot *snap = &J->cur.snap[s];
      SnapEntry *map = &J->cur.snapmap[snap->mapofs];
      MSize n, nent = snap->nent;
      for (n = 0; n < nent; n++) {
	IRRef ref = snap_ref(map[n]);
	if (!irref_isk(ref)) irt_clearmark(IR(ref)->t);
      }
    }
  }
  /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */
  nslots = J->baseslot+J->maxslot;
  for (i = 1; i < nslots; i++) {
    IRRef ref = tref_ref(J->slot[i]);
    while (!irref_isk(ref) && ref != subst[ref]) {
      IRIns *ir = IR(ref);
      irt_clearmark(ir->t);  /* Unmark potential uses, too. */
      if (irt_isphi(ir->t) || irt_ispri(ir->t))
	break;
      irt_setphi(ir->t);
      if (nphi >= LJ_MAX_PHI)
	lj_trace_err(J, LJ_TRERR_PHIOV);
      phi[nphi++] = (IRRef1)ref;
      ref = subst[ref];
      if (ref > invar)
	break;
    }
  }
  /* Pass #4: propagate non-redundant PHIs. */
  while (passx) {
    passx = 0;
    for (i = 0; i < nphi; i++) {
      IRRef lref = phi[i];
      IRIns *ir = IR(lref);
      if (!irt_ismarked(ir->t)) {  /* Propagate only from unmarked PHIs. */
	IRIns *irr = IR(subst[lref]);
	if (irt_ismarked(irr->t)) {  /* Right ref points to other PHI? */
	  irt_clearmark(irr->t);  /* Mark that PHI as non-redundant. */
	  passx = 1;  /* Retry. */
	}
      }
    }
  }
  /* Pass #5: emit PHI instructions or eliminate PHIs. */
  for (i = 0; i < nphi; i++) {
    IRRef lref = phi[i];
    IRIns *ir = IR(lref);
    if (!irt_ismarked(ir->t)) {  /* Emit PHI if not marked. */
      IRRef rref = subst[lref];
      if (rref > invar)
	irt_setphi(IR(rref)->t);
      emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref);
    } else {  /* Otherwise eliminate PHI. */
      irt_clearmark(ir->t);
      irt_clearphi(ir->t);
    }
  }
}
Exemple #27
0
/* Restore interpreter state from exit state with the help of a snapshot. */
void lj_snap_restore(jit_State *J, void *exptr)
{
  ExitState *ex = (ExitState *)exptr;
  SnapNo snapno = J->exitno;  /* For now, snapno == exitno. */
  Trace *T = J->trace[J->parent];
  SnapShot *snap = &T->snap[snapno];
  BCReg s, nslots = snap->nslots;
  IRRef2 *map = &T->snapmap[snap->mapofs];
  IRRef2 *flinks = map + nslots + snap->nframelinks;
  TValue *o, *newbase, *ntop;
  BloomFilter rfilt = snap_renamefilter(T, snapno);
  lua_State *L = J->L;

  /* Make sure the stack is big enough for the slots from the snapshot. */
  if (L->base + nslots >= L->maxstack) {
    L->top = curr_topL(L);
    lj_state_growstack(L, nslots - curr_proto(L)->framesize);
  }

  /* Fill stack slots with data from the registers and spill slots. */
  newbase = NULL;
  ntop = L->base;
  for (s = 0, o = L->base-1; s < nslots; s++, o++) {
    IRRef ref = snap_ref(map[s]);
    if (ref) {
      IRIns *ir = &T->ir[ref];
      if (irref_isk(ref)) {  /* Restore constant slot. */
	lj_ir_kvalue(L, o, ir);
      } else {
	IRType1 t = ir->t;
	RegSP rs = ir->prev;
	if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
	  rs = snap_renameref(T, snapno, ref, rs);
	if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */
	  int32_t *sps = &ex->spill[regsp_spill(rs)];
	  if (irt_isinteger(t)) {
	    setintV(o, *sps);
	  } else if (irt_isnum(t)) {
	    o->u64 = *(uint64_t *)sps;
	  } else {
	    lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */
	    setgcrefi(o->gcr, *sps);
	    setitype(o, irt_toitype(t));
	  }
	} else if (ra_hasreg(regsp_reg(rs))) {  /* Restore from register. */
	  Reg r = regsp_reg(rs);
	  if (irt_isinteger(t)) {
	    setintV(o, ex->gpr[r-RID_MIN_GPR]);
	  } else if (irt_isnum(t)) {
	    setnumV(o, ex->fpr[r-RID_MIN_FPR]);
	  } else {
	    if (!irt_ispri(t))
	      setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
	    setitype(o, irt_toitype(t));
	  }
	} else {  /* Restore frame slot. */
	  lua_assert(ir->o == IR_FRAME);
	  /* This works for both PTR and FUNC IR_FRAME. */
	  setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
	  if (s != 0)  /* Do not overwrite link to previous frame. */
	    o->fr.tp.ftsz = (int32_t)*--flinks;
	  if (irt_isfunc(ir->t)) {
	    GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
	    if (isluafunc(fn)) {
	      TValue *fs;
	      newbase = o+1;
	      fs = newbase + funcproto(fn)->framesize;
	      if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
	    }
	  }
	}
      }
    } else if (newbase) {
      setnilV(o);  /* Clear unreferenced slots of newly added frames. */
    }
  }
Exemple #28
0
/* Replay snapshot state to setup side trace. */
void lj_snap_replay(jit_State *J, GCtrace *T)
{
  SnapShot *snap = &T->snap[J->exitno];
  SnapEntry *map = &T->snapmap[snap->mapofs];
  MSize n, nent = snap->nent;
  BloomFilter seen = 0;
  int pass23 = 0;
  J->framedepth = 0;
  /* Emit IR for slots inherited from parent snapshot. */
  for (n = 0; n < nent; n++) {
    SnapEntry sn = map[n];
    BCReg s = snap_slot(sn);
    IRRef ref = snap_ref(sn);
    IRIns *ir = &T->ir[ref];
    TRef tr;
    /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
    if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
      goto setslot;
    bloomset(seen, ref);
    if (irref_isk(ref)) {
      tr = snap_replay_const(J, ir);
    } else if (!regsp_used(ir->prev)) {
      pass23 = 1;
      lua_assert(s != 0);
      tr = s;
    } else {
      IRType t = irt_type(ir->t);
      uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
      if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
      tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
    }
  setslot:
    J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
    J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
    if ((sn & SNAP_FRAME))
      J->baseslot = s+1;
  }
  if (pass23) {
    IRIns *irlast = &T->ir[snap->ref];
    pass23 = 0;
    /* Emit dependent PVALs. */
    for (n = 0; n < nent; n++) {
      SnapEntry sn = map[n];
      IRRef refp = snap_ref(sn);
      IRIns *ir = &T->ir[refp];
      if (regsp_reg(ir->r) == RID_SUNK) {
	if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
	pass23 = 1;
	lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
		   ir->o == IR_CNEW || ir->o == IR_CNEWI);
	if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
	if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
	if (LJ_HASFFI && ir->o == IR_CNEWI) {
	  if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
	    snap_pref(J, T, map, nent, seen, (ir+1)->op2);
	} else {
	  IRIns *irs;
	  for (irs = ir+1; irs < irlast; irs++)
	    if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
	      if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
		snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
	      else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
		       irs+1 < irlast && (irs+1)->o == IR_HIOP)
		snap_pref(J, T, map, nent, seen, (irs+1)->op2);
	    }
	}
      } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
	lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
	J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
      }
    }
    /* Replay sunk instructions. */
    for (n = 0; pass23 && n < nent; n++) {
      SnapEntry sn = map[n];
      IRRef refp = snap_ref(sn);
      IRIns *ir = &T->ir[refp];
      if (regsp_reg(ir->r) == RID_SUNK) {
	TRef op1, op2;
	if (J->slot[snap_slot(sn)] != snap_slot(sn)) {  /* De-dup allocs. */
	  J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
	  continue;
	}
	op1 = ir->op1;
	if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
	op2 = ir->op2;
	if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
	if (LJ_HASFFI && ir->o == IR_CNEWI) {
	  if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
	    lj_needsplit(J);  /* Emit joining HIOP. */
	    op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
			     snap_pref(J, T, map, nent, seen, (ir+1)->op2));
	  }
	  J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
	} else {
	  IRIns *irs;
	  TRef tr = emitir(ir->ot, op1, op2);
	  J->slot[snap_slot(sn)] = tr;
	  for (irs = ir+1; irs < irlast; irs++)
	    if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
	      IRIns *irr = &T->ir[irs->op1];
	      TRef val, key = irr->op2, tmp = tr;
	      if (irr->o != IR_FREF) {
		IRIns *irk = &T->ir[key];
		if (irr->o == IR_HREFK)
		  key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
				    irk->op2);
		else
		  key = snap_replay_const(J, irk);
		if (irr->o == IR_HREFK || irr->o == IR_AREF) {
		  IRIns *irf = &T->ir[irr->op1];
		  tmp = emitir(irf->ot, tmp, irf->op2);
		}
	      }
	      tmp = emitir(irr->ot, tmp, key);
	      val = snap_pref(J, T, map, nent, seen, irs->op2);
	      if (val == 0) {
		IRIns *irc = &T->ir[irs->op2];
		lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
		val = snap_pref(J, T, map, nent, seen, irc->op1);
		val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
	      } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
			 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
		IRType t = IRT_I64;
		if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
		  t = IRT_NUM;
		lj_needsplit(J);
		if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
		  uint64_t k = (uint32_t)T->ir[irs->op2].i +
			       ((uint64_t)T->ir[(irs+1)->op2].i << 32);
		  val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
				  lj_ir_k64_find(J, k));
		} else {
		  val = emitir_raw(IRT(IR_HIOP, t), val,
			  snap_pref(J, T, map, nent, seen, (irs+1)->op2));
		}
		tmp = emitir(IRT(irs->o, t), tmp, val);
		continue;
	      }
	      tmp = emitir(irs->ot, tmp, val);
	    } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
	      emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
	    }
	}
      }
    }
  }
  J->base = J->slot + J->baseslot;
  J->maxslot = snap->nslots - J->baseslot;
  lj_snap_add(J);
  if (pass23)  /* Need explicit GC step _after_ initial snapshot. */
    emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
}
Exemple #29
0
/* Restore interpreter state from exit state with the help of a snapshot. */
const BCIns *lj_snap_restore(jit_State *J, void *exptr)
{
  ExitState *ex = (ExitState *)exptr;
  SnapNo snapno = J->exitno;  /* For now, snapno == exitno. */
  GCtrace *T = traceref(J, J->parent);
  SnapShot *snap = &T->snap[snapno];
  MSize n, nent = snap->nent;
  SnapEntry *map = &T->snapmap[snap->mapofs];
  SnapEntry *flinks = map + nent + snap->depth;
  int32_t ftsz0;
  BCReg nslots = snap->nslots;
  TValue *frame;
  BloomFilter rfilt = snap_renamefilter(T, snapno);
  const BCIns *pc = snap_pc(map[nent]);
  lua_State *L = J->L;

  /* Set interpreter PC to the next PC to get correct error messages. */
  setcframe_pc(cframe_raw(L->cframe), pc+1);

  /* Make sure the stack is big enough for the slots from the snapshot. */
  if (LJ_UNLIKELY(L->base + nslots > tvref(L->maxstack))) {
    L->top = curr_topL(L);
    lj_state_growstack(L, nslots - curr_proto(L)->framesize);
  }

  /* Fill stack slots with data from the registers and spill slots. */
  frame = L->base-1;
  ftsz0 = frame_ftsz(frame);  /* Preserve link to previous frame in slot #0. */
  for (n = 0; n < nent; n++) {
    SnapEntry sn = map[n];
    IRRef ref = snap_ref(sn);
    BCReg s = snap_slot(sn);
    TValue *o = &frame[s];  /* Stack slots are relative to start frame. */
    IRIns *ir = &T->ir[ref];
    if (irref_isk(ref)) {  /* Restore constant slot. */
      lj_ir_kvalue(L, o, ir);
      if ((sn & (SNAP_CONT|SNAP_FRAME))) {
	/* Overwrite tag with frame link. */
	o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks-- : ftsz0;
	if ((sn & SNAP_FRAME)) {
	  GCfunc *fn = ir_kfunc(ir);
	  if (isluafunc(fn)) {
	    MSize framesize = funcproto(fn)->framesize;
	    L->base = ++o;
	    if (LJ_UNLIKELY(o + framesize > tvref(L->maxstack))) {
	      ptrdiff_t fsave = savestack(L, frame);
	      L->top = o;
	      lj_state_growstack(L, framesize);  /* Grow again. */
	      frame = restorestack(L, fsave);
	    }
	  }
	}
      }
    } else if (!(sn & SNAP_NORESTORE)) {
      IRType1 t = ir->t;
      RegSP rs = ir->prev;
      lua_assert(!(sn & (SNAP_CONT|SNAP_FRAME)));
      if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
	rs = snap_renameref(T, snapno, ref, rs);
      if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */
	int32_t *sps = &ex->spill[regsp_spill(rs)];
	if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
	  o->u32.lo = (uint32_t)*sps;
	} else if (irt_isinteger(t)) {
	  setintV(o, *sps);
#if !LJ_SOFTFP
	} else if (irt_isnum(t)) {
	  o->u64 = *(uint64_t *)sps;
#endif
#if LJ_64
	} else if (irt_islightud(t)) {
	  /* 64 bit lightuserdata which may escape already has the tag bits. */
	  o->u64 = *(uint64_t *)sps;
#endif
	} else {
	  lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */
	  setgcrefi(o->gcr, *sps);
	  setitype(o, irt_toitype(t));
	}
      } else {  /* Restore from register. */
	Reg r = regsp_reg(rs);
	lua_assert(ra_hasreg(r));
	if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
	  o->u32.lo = (uint32_t)ex->gpr[r-RID_MIN_GPR];
	} else if (irt_isinteger(t)) {
	  setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
#if !LJ_SOFTFP
	} else if (irt_isnum(t)) {
	  setnumV(o, ex->fpr[r-RID_MIN_FPR]);
#endif
#if LJ_64
	} else if (irt_islightud(t)) {
	  /* 64 bit lightuserdata which may escape already has the tag bits. */
	  o->u64 = ex->gpr[r-RID_MIN_GPR];
#endif
	} else {
	  if (!irt_ispri(t))
	    setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
	  setitype(o, irt_toitype(t));
	}
      }
      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
	rs = (ir+1)->prev;
	if (LJ_UNLIKELY(bloomtest(rfilt, ref+1)))
	  rs = snap_renameref(T, snapno, ref+1, rs);
	o->u32.hi = (ra_hasspill(regsp_spill(rs))) ?
	    (uint32_t)*&ex->spill[regsp_spill(rs)] :
	    (uint32_t)ex->gpr[regsp_reg(rs)-RID_MIN_GPR];
      }
    }
  }
  switch (bc_op(*pc)) {
  case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
    L->top = frame + nslots;
    break;
  default:
    L->top = curr_topL(L);
    break;
  }
  lua_assert(map + nent == flinks);
  return pc;
}
/* Transform the old IR to the new IR. */
static void split_ir(jit_State *J)
{
  IRRef nins = J->cur.nins, nk = J->cur.nk;
  MSize irlen = nins - nk;
  MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
  IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need);
  IRRef1 *hisubst;
  IRRef ref, snref;
  SnapShot *snap;

  /* Copy old IR to buffer. */
  memcpy(oir, IR(nk), irlen*sizeof(IRIns));
  /* Bias hiword substitution table and old IR. Loword kept in field prev. */
  hisubst = (IRRef1 *)&oir[irlen] - nk;
  oir -= nk;

  /* Remove all IR instructions, but retain IR constants. */
  J->cur.nins = REF_FIRST;
  J->loopref = 0;

  /* Process constants and fixed references. */
  for (ref = nk; ref <= REF_BASE; ref++) {
    IRIns *ir = &oir[ref];
    if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
      /* Split up 64 bit constant. */
      TValue tv = *ir_k64(ir);
      ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
      hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
    } else {
      ir->prev = ref;  /* Identity substitution for loword. */
      hisubst[ref] = 0;
    }
  }

  /* Process old IR instructions. */
  snap = J->cur.snap;
  snref = snap->ref;
  for (ref = REF_FIRST; ref < nins; ref++) {
    IRIns *ir = &oir[ref];
    IRRef nref = lj_ir_nextins(J);
    IRIns *nir = IR(nref);
    IRRef hi = 0;

    if (ref >= snref) {
      snap->ref = nref;
      split_subst_snap(J, snap++, oir);
      snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
    }

    /* Copy-substitute old instruction to new instruction. */
    nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
    nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
    ir->prev = nref;  /* Loword substitution. */
    nir->o = ir->o;
    nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
    hisubst[ref] = 0;

    /* Split 64 bit instructions. */
#if LJ_SOFTFP
    if (irt_isnum(ir->t)) {
      nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
      /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
      switch (ir->o) {
      case IR_ADD:
	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
	break;
      case IR_SUB:
	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
	break;
      case IR_MUL:
	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
	break;
      case IR_DIV:
	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
	break;
      case IR_POW:
	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
	break;
      case IR_FPMATH:
	/* Try to rejoin pow from EXP2, MUL and LOG2. */
	if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
	  IRIns *irp = IR(nir->op1);
	  if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
	    IRIns *irm4 = IR(irp->op1);
	    IRIns *irm3 = IR(irm4->op1);
	    IRIns *irm12 = IR(irm3->op1);
	    IRIns *irl1 = IR(irm12->op1);
	    if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
		irl1->op2 == IRCALL_lj_vm_log2) {
	      IRRef tmp = irl1->op1;  /* Recycle first two args from LOG2. */
	      IRRef arg3 = irm3->op2, arg4 = irm4->op2;
	      J->cur.nins--;
	      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
	      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
	      ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
	      hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
	      break;
	    }
	  }
	}
	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
	break;
      case IR_ATAN2:
	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
	break;
      case IR_LDEXP:
	hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
	break;
      case IR_NEG: case IR_ABS:
	nir->o = IR_CONV;  /* Pass through loword. */
	nir->op2 = (IRT_INT << 5) | IRT_INT;
	hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
			hisubst[ir->op1], hisubst[ir->op2]);
	break;
      case IR_SLOAD:
	if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from int to number. */
	  nir->op2 &= ~IRSLOAD_CONVERT;
	  ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
				       IRCALL_softfp_i2d);
	  hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
	  break;
	}
	/* fallthrough */
      case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
      case IR_STRTO:
	hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
	break;
      case IR_XLOAD: {
	IRIns inslo = *nir;  /* Save/undo the emit of the lo XLOAD. */
	J->cur.nins--;
	hi = split_ptr(J, oir, ir->op1);  /* Insert the hiref ADD. */
	nref = lj_ir_nextins(J);
	nir = IR(nref);
	*nir = inslo;  /* Re-emit lo XLOAD immediately before hi XLOAD. */
	hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
#if LJ_LE
	ir->prev = nref;
#else
	ir->prev = hi; hi = nref;
#endif
	break;
	}
      case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
	split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
	break;
      case IR_CONV: {  /* Conversion to number. Others handled below. */
	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
	UNUSED(st);
#if LJ_32 && LJ_HASFFI
	if (st == IRT_I64 || st == IRT_U64) {
	  hi = split_call_l(J, hisubst, oir, ir,
		 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
	  break;
	}
#endif
	lua_assert(st == IRT_INT ||
		   (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
	nir->o = IR_CALLN;
#if LJ_32 && LJ_HASFFI
	nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
		   st == IRT_FLOAT ? IRCALL_softfp_f2d :
		   IRCALL_softfp_ui2d;
#else
	nir->op2 = IRCALL_softfp_i2d;
#endif
	hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
	break;
	}
      case IR_CALLN:
      case IR_CALLL:
      case IR_CALLS:
      case IR_CALLXS:
	goto split_call;
      case IR_PHI:
	if (nir->op1 == nir->op2)
	  J->cur.nins--;  /* Drop useless PHIs. */
	if (hisubst[ir->op1] != hisubst[ir->op2])
	  split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
		     hisubst[ir->op1], hisubst[ir->op2]);
	break;
      case IR_HIOP:
	J->cur.nins--;  /* Drop joining HIOP. */
	ir->prev = nir->op1;
	hi = nir->op2;
	break;
      default:
	lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
	hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
			hisubst[ir->op1], hisubst[ir->op2]);
	break;
      }
    } else
#endif
#if LJ_32 && LJ_HASFFI
    if (irt_isint64(ir->t)) {
      IRRef hiref = hisubst[ir->op1];
      nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
      switch (ir->o) {
      case IR_ADD:
      case IR_SUB:
	/* Use plain op for hiword if loword cannot produce a carry/borrow. */
	if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
	  ir->prev = nir->op1;  /* Pass through loword. */
	  nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
	  hi = nref;
	  break;
	}
	/* fallthrough */
      case IR_NEG:
	hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
	break;
      case IR_MUL:
	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
	break;
      case IR_DIV:
	hi = split_call_ll(J, hisubst, oir, ir,
			   irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
					      IRCALL_lj_carith_divu64);
	break;
      case IR_MOD:
	hi = split_call_ll(J, hisubst, oir, ir,
			   irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
					      IRCALL_lj_carith_modu64);
	break;
      case IR_POW:
	hi = split_call_ll(J, hisubst, oir, ir,
			   irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
					      IRCALL_lj_carith_powu64);
	break;
      case IR_FLOAD:
	lua_assert(ir->op2 == IRFL_CDATA_INT64);
	hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
#if LJ_BE
	ir->prev = hi; hi = nref;
#endif
	break;
      case IR_XLOAD:
	hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
#if LJ_BE
	ir->prev = hi; hi = nref;
#endif
	break;
      case IR_XSTORE:
	split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
	break;
      case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
#if LJ_SOFTFP
	if (st == IRT_NUM) {  /* NUM to 64 bit int conv. */
	  hi = split_call_l(J, hisubst, oir, ir,
		 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
	} else if (st == IRT_FLOAT) {  /* FLOAT to 64 bit int conv. */
	  nir->o = IR_CALLN;
	  nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
	  hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
	}
#else
	if (st == IRT_NUM || st == IRT_FLOAT) {  /* FP to 64 bit int conv. */
	  hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
	}
#endif
	else if (st == IRT_I64 || st == IRT_U64) {  /* 64/64 bit cast. */
	  /* Drop cast, since assembler doesn't care. But fwd both parts. */
	  hi = hiref;
	  goto fwdlo;
	} else if ((ir->op2 & IRCONV_SEXT)) {  /* Sign-extend to 64 bit. */
	  IRRef k31 = lj_ir_kint(J, 31);
	  nir = IR(nref);  /* May have been reallocated. */
	  ir->prev = nir->op1;  /* Pass through loword. */
	  nir->o = IR_BSAR;  /* hi = bsar(lo, 31). */
	  nir->op2 = k31;
	  hi = nref;
	} else {  /* Zero-extend to 64 bit. */
	  hi = lj_ir_kint(J, 0);
	  goto fwdlo;
	}
	break;
	}
      case IR_CALLXS:
	goto split_call;
      case IR_PHI: {
	IRRef hiref2;
	if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
	    nir->op1 == nir->op2)
	  J->cur.nins--;  /* Drop useless PHIs. */
	hiref2 = hisubst[ir->op2];
	if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
	  split_emit(J, IRTI(IR_PHI), hiref, hiref2);
	break;
	}
      case IR_HIOP:
	J->cur.nins--;  /* Drop joining HIOP. */
	ir->prev = nir->op1;
	hi = nir->op2;
	break;
      default:
	lua_assert(ir->o <= IR_NE);  /* Comparisons. */
	split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
	break;
      }
    } else
#endif
#if LJ_SOFTFP
    if (ir->o == IR_SLOAD) {
      if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from number to int. */
	nir->op2 &= ~IRSLOAD_CONVERT;
	if (!(nir->op2 & IRSLOAD_TYPECHECK))
	  nir->t.irt = IRT_INT;  /* Drop guard. */
	split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
	ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
      }
    } else if (ir->o == IR_TOBIT) {
      IRRef tmp, op1 = ir->op1;
      J->cur.nins--;
#if LJ_LE
      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
#else
      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
#endif
      ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
    } else if (ir->o == IR_TOSTR) {
      if (hisubst[ir->op1]) {
	if (irref_isk(ir->op1))
	  nir->op1 = ir->op1;
	else
	  split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
      }
    } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
      if (irref_isk(ir->op2) && hisubst[ir->op2])
	nir->op2 = ir->op2;
    } else
#endif
    if (ir->o == IR_CONV) {  /* See above, too. */
      IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
#if LJ_32 && LJ_HASFFI
      if (st == IRT_I64 || st == IRT_U64) {  /* Conversion from 64 bit int. */
#if LJ_SOFTFP
	if (irt_isfloat(ir->t)) {
	  split_call_l(J, hisubst, oir, ir,
		       st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
	  J->cur.nins--;  /* Drop unused HIOP. */
	}
#else
	if (irt_isfp(ir->t)) {  /* 64 bit integer to FP conversion. */
	  ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
				hisubst[ir->op1], nref);
	}
#endif
	else {  /* Truncate to lower 32 bits. */
	fwdlo:
	  ir->prev = nir->op1;  /* Forward loword. */
	  /* Replace with NOP to avoid messing up the snapshot logic. */
	  nir->ot = IRT(IR_NOP, IRT_NIL);
	  nir->op1 = nir->op2 = 0;
	}
      }
#endif
#if LJ_SOFTFP && LJ_32 && LJ_HASFFI
      else if (irt_isfloat(ir->t)) {
	if (st == IRT_NUM) {
	  split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
	  J->cur.nins--;  /* Drop unused HIOP. */
	} else {
	  nir->o = IR_CALLN;
	  nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
	}
      } else if (st == IRT_FLOAT) {
	nir->o = IR_CALLN;
	nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
      } else
#endif
#if LJ_SOFTFP
      if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
	if (irt_isguard(ir->t)) {
	  lua_assert(st == IRT_NUM && irt_isint(ir->t));
	  J->cur.nins--;
	  ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
	} else {
	  split_call_l(J, hisubst, oir, ir,
#if LJ_32 && LJ_HASFFI
	    st == IRT_NUM ?
	      (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
	      (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
#else
	    IRCALL_softfp_d2i
#endif
	  );
	  J->cur.nins--;  /* Drop unused HIOP. */
	}
      }
#endif
    } else if (ir->o == IR_CALLXS) {
      IRRef hiref;
    split_call:
      hiref = hisubst[ir->op1];
      if (hiref) {
	IROpT ot = nir->ot;
	IRRef op2 = nir->op2;
	nir->ot = IRT(IR_CARG, IRT_NIL);
#if LJ_LE
	nir->op2 = hiref;
#else
	nir->op2 = nir->op1; nir->op1 = hiref;
#endif
	ir->prev = nref = split_emit(J, ot, nref, op2);
      }
      if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
	hi = split_emit(J,
	  IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
	  nref, nref);
    } else if (ir->o == IR_CARG) {
      IRRef hiref = hisubst[ir->op1];
      if (hiref) {
	IRRef op2 = nir->op2;
#if LJ_LE
	nir->op2 = hiref;
#else
	nir->op2 = nir->op1; nir->op1 = hiref;
#endif
	ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
	nir = IR(nref);
      }
      hiref = hisubst[ir->op2];
      if (hiref) {
#if !LJ_TARGET_X86
	int carg = 0;
	IRIns *cir;
	for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
	  carg++;
	if ((carg & 1) == 0) {  /* Align 64 bit arguments. */
	  IRRef op2 = nir->op2;
	  nir->op2 = REF_NIL;
	  nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
	  nir = IR(nref);
	}
#endif
#if LJ_BE
	{ IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
#endif
	ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
      }
    } else if (ir->o == IR_CNEWI) {
      if (hisubst[ir->op2])
	split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
    } else if (ir->o == IR_LOOP) {
      J->loopref = nref;  /* Needed by assembler. */
    }
    hisubst[ref] = hi;  /* Store hiword substitution. */
  }
  if (snref == nins) {  /* Substitution for last snapshot. */
    snap->ref = J->cur.nins;
    split_subst_snap(J, snap, oir);
  }

  /* Add PHI marks. */
  for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
    IRIns *ir = IR(ref);
    if (ir->o != IR_PHI) break;
    if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
    if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
  }
}