Ejemplo n.º 1
0
/* Array and hash load forwarding. */
static TRef fwd_ahload(jit_State *J, IRRef xref)
{
  IRIns *xr = IR(xref);
  IRRef lim = xref;  /* Search limit. */
  IRRef ref;

  /* Search for conflicting stores. */
  ref = J->chain[fins->o+IRDELTA_L2S];
  while (ref > xref) {
    IRIns *store = IR(ref);
    switch (aa_ahref(J, xr, IR(store->op1))) {
    case ALIAS_NO:   break;  /* Continue searching. */
    case ALIAS_MAY:  lim = ref; goto cselim;  /* Limit search for load. */
    case ALIAS_MUST: return store->op2;  /* Store forwarding. */
    }
    ref = store->prev;
  }

  /* No conflicting store (yet): const-fold loads from allocations. */
  {
    IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr;
    IRRef tab = ir->op1;
    ir = IR(tab);
    if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) {
      /* A NEWREF with a number key may end up pointing to the array part.
      ** But it's referenced from HSTORE and not found in the ASTORE chain.
      ** For now simply consider this a conflict without forwarding anything.
      */
      if (xr->o == IR_AREF) {
	IRRef ref2 = J->chain[IR_NEWREF];
	while (ref2 > tab) {
	  IRIns *newref = IR(ref2);
	  if (irt_isnum(IR(newref->op2)->t))
	    goto cselim;
	  ref2 = newref->prev;
	}
      }
      /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF.
      ** But the above search for conflicting stores was limited by xref.
      ** So continue searching, limited by the TNEW/TDUP. Store forwarding
      ** is ok, too. A conflict does NOT limit the search for a matching load.
      */
      while (ref > tab) {
	IRIns *store = IR(ref);
	switch (aa_ahref(J, xr, IR(store->op1))) {
	case ALIAS_NO:   break;  /* Continue searching. */
	case ALIAS_MAY:  goto cselim;  /* Conflicting store. */
	case ALIAS_MUST: return store->op2;  /* Store forwarding. */
	}
	ref = store->prev;
      }
      lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t));
      if (irt_ispri(fins->t)) {
	return TREF_PRI(irt_type(fins->t));
      } else if (irt_isnum(fins->t) || irt_isstr(fins->t)) {
	TValue keyv;
	cTValue *tv;
	IRIns *key = IR(xr->op2);
	if (key->o == IR_KSLOT) key = IR(key->op1);
	lj_ir_kvalue(J->L, &keyv, key);
	tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
	lua_assert(itype2irt(tv) == irt_type(fins->t));
	if (irt_isnum(fins->t))
	  return lj_ir_knum_u64(J, tv->u64);
	else
	  return lj_ir_kstr(J, strV(tv));
      }
      /* Othwerwise: don't intern as a constant. */
    }
  }

cselim:
  /* Try to find a matching load. Below the conflicting store, if any. */
  ref = J->chain[fins->o];
  while (ref > lim) {
    IRIns *load = IR(ref);
    if (load->op1 == xref)
      return ref;  /* Load forwarding. */
    ref = load->prev;
  }
  return 0;  /* Conflict or no match. */
}
Ejemplo n.º 2
0
/* Transform the old IR to the new IR. */
static void split_ir(jit_State *J)
{
    IRRef nins = J->cur.nins, nk = J->cur.nk;
    MSize irlen = nins - nk;
    MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
    IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need);
    IRRef1 *hisubst;
    IRRef ref, snref;
    SnapShot *snap;

    /* Copy old IR to buffer. */
    memcpy(oir, IR(nk), irlen*sizeof(IRIns));
    /* Bias hiword substitution table and old IR. Loword kept in field prev. */
    hisubst = (IRRef1 *)&oir[irlen] - nk;
    oir -= nk;

    /* Remove all IR instructions, but retain IR constants. */
    J->cur.nins = REF_FIRST;
    J->loopref = 0;

    /* Process constants and fixed references. */
    for (ref = nk; ref <= REF_BASE; ref++) {
        IRIns *ir = &oir[ref];
        if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
            /* Split up 64 bit constant. */
            TValue tv = *ir_k64(ir);
            ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
            hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
        } else {
            ir->prev = ref;  /* Identity substitution for loword. */
            hisubst[ref] = 0;
        }
    }

    /* Process old IR instructions. */
    snap = J->cur.snap;
    snref = snap->ref;
    for (ref = REF_FIRST; ref < nins; ref++) {
        IRIns *ir = &oir[ref];
        IRRef nref = lj_ir_nextins(J);
        IRIns *nir = IR(nref);
        IRRef hi = 0;

        if (ref >= snref) {
            snap->ref = nref;
            split_subst_snap(J, snap++, oir);
            snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
        }

        /* Copy-substitute old instruction to new instruction. */
        nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
        nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
        ir->prev = nref;  /* Loword substitution. */
        nir->o = ir->o;
        nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
        hisubst[ref] = 0;

        /* Split 64 bit instructions. */
#if LJ_SOFTFP
        if (irt_isnum(ir->t)) {
            nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
            /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
            switch (ir->o) {
            case IR_ADD:
                hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
                break;
            case IR_SUB:
                hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
                break;
            case IR_MUL:
                hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
                break;
            case IR_DIV:
                hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
                break;
            case IR_POW:
                hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
                break;
            case IR_FPMATH:
                /* Try to rejoin pow from EXP2, MUL and LOG2. */
                if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
                    IRIns *irp = IR(nir->op1);
                    if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
                        IRIns *irm4 = IR(irp->op1);
                        IRIns *irm3 = IR(irm4->op1);
                        IRIns *irm12 = IR(irm3->op1);
                        IRIns *irl1 = IR(irm12->op1);
                        if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
                                irl1->op2 == IRCALL_lj_vm_log2) {
                            IRRef tmp = irl1->op1;  /* Recycle first two args from LOG2. */
                            IRRef arg3 = irm3->op2, arg4 = irm4->op2;
                            J->cur.nins--;
                            tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
                            tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
                            ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
                            hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
                            break;
                        }
                    }
                }
                hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
                break;
            case IR_ATAN2:
                hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
                break;
            case IR_LDEXP:
                hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
                break;
            case IR_NEG:
            case IR_ABS:
                nir->o = IR_CONV;  /* Pass through loword. */
                nir->op2 = (IRT_INT << 5) | IRT_INT;
                hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
                                hisubst[ir->op1], hisubst[ir->op2]);
                break;
            case IR_SLOAD:
                if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from int to number. */
                    nir->op2 &= ~IRSLOAD_CONVERT;
                    ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
                                                 IRCALL_softfp_i2d);
                    hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
                    break;
                }
            /* fallthrough */
            case IR_ALOAD:
            case IR_HLOAD:
            case IR_ULOAD:
            case IR_VLOAD:
            case IR_STRTO:
                hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
                break;
            case IR_XLOAD: {
                IRIns inslo = *nir;  /* Save/undo the emit of the lo XLOAD. */
                J->cur.nins--;
                hi = split_ptr(J, oir, ir->op1);  /* Insert the hiref ADD. */
                nref = lj_ir_nextins(J);
                nir = IR(nref);
                *nir = inslo;  /* Re-emit lo XLOAD immediately before hi XLOAD. */
                hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
#if LJ_LE
                ir->prev = nref;
#else
                ir->prev = hi;
                hi = nref;
#endif
                break;
            }
            case IR_ASTORE:
            case IR_HSTORE:
            case IR_USTORE:
            case IR_XSTORE:
                split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
                break;
            case IR_CONV: {  /* Conversion to number. Others handled below. */
                IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
                UNUSED(st);
#if LJ_32 && LJ_HASFFI
                if (st == IRT_I64 || st == IRT_U64) {
                    hi = split_call_l(J, hisubst, oir, ir,
                                      st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
                    break;
                }
#endif
                lua_assert(st == IRT_INT ||
                           (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
                nir->o = IR_CALLN;
#if LJ_32 && LJ_HASFFI
                nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
                           st == IRT_FLOAT ? IRCALL_softfp_f2d :
                           IRCALL_softfp_ui2d;
#else
                nir->op2 = IRCALL_softfp_i2d;
#endif
                hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
                break;
            }
            case IR_CALLN:
            case IR_CALLL:
            case IR_CALLS:
            case IR_CALLXS:
                goto split_call;
            case IR_PHI:
                if (nir->op1 == nir->op2)
                    J->cur.nins--;  /* Drop useless PHIs. */
                if (hisubst[ir->op1] != hisubst[ir->op2])
                    split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
                               hisubst[ir->op1], hisubst[ir->op2]);
                break;
            case IR_HIOP:
                J->cur.nins--;  /* Drop joining HIOP. */
                ir->prev = nir->op1;
                hi = nir->op2;
                break;
            default:
                lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
                hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
                                hisubst[ir->op1], hisubst[ir->op2]);
                break;
            }
        } else
#endif
#if LJ_32 && LJ_HASFFI
            if (irt_isint64(ir->t)) {
                IRRef hiref = hisubst[ir->op1];
                nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
                switch (ir->o) {
                case IR_ADD:
                case IR_SUB:
                    /* Use plain op for hiword if loword cannot produce a carry/borrow. */
                    if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
                        ir->prev = nir->op1;  /* Pass through loword. */
                        nir->op1 = hiref;
                        nir->op2 = hisubst[ir->op2];
                        hi = nref;
                        break;
                    }
                /* fallthrough */
                case IR_NEG:
                    hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
                    break;
                case IR_MUL:
                    hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
                    break;
                case IR_DIV:
                    hi = split_call_ll(J, hisubst, oir, ir,
                                       irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
                                       IRCALL_lj_carith_divu64);
                    break;
                case IR_MOD:
                    hi = split_call_ll(J, hisubst, oir, ir,
                                       irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
                                       IRCALL_lj_carith_modu64);
                    break;
                case IR_POW:
                    hi = split_call_ll(J, hisubst, oir, ir,
                                       irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
                                       IRCALL_lj_carith_powu64);
                    break;
                case IR_FLOAD:
                    lua_assert(ir->op2 == IRFL_CDATA_INT64);
                    hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
#if LJ_BE
                    ir->prev = hi;
                    hi = nref;
#endif
                    break;
                case IR_XLOAD:
                    hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
#if LJ_BE
                    ir->prev = hi;
                    hi = nref;
#endif
                    break;
                case IR_XSTORE:
                    split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
                    break;
                case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
                    IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
#if LJ_SOFTFP
                    if (st == IRT_NUM) {  /* NUM to 64 bit int conv. */
                        hi = split_call_l(J, hisubst, oir, ir,
                                          irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
                    } else if (st == IRT_FLOAT) {  /* FLOAT to 64 bit int conv. */
                        nir->o = IR_CALLN;
                        nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
                        hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
                    }
#else
                    if (st == IRT_NUM || st == IRT_FLOAT) {  /* FP to 64 bit int conv. */
                        hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
                    }
#endif
                    else if (st == IRT_I64 || st == IRT_U64) {  /* 64/64 bit cast. */
                        /* Drop cast, since assembler doesn't care. */
                        goto fwdlo;
                    } else if ((ir->op2 & IRCONV_SEXT)) {  /* Sign-extend to 64 bit. */
                        IRRef k31 = lj_ir_kint(J, 31);
                        nir = IR(nref);  /* May have been reallocated. */
                        ir->prev = nir->op1;  /* Pass through loword. */
                        nir->o = IR_BSAR;  /* hi = bsar(lo, 31). */
                        nir->op2 = k31;
                        hi = nref;
                    } else {  /* Zero-extend to 64 bit. */
                        hi = lj_ir_kint(J, 0);
                        goto fwdlo;
                    }
                    break;
                }
                case IR_CALLXS:
                    goto split_call;
                case IR_PHI: {
                    IRRef hiref2;
                    if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
                            nir->op1 == nir->op2)
                        J->cur.nins--;  /* Drop useless PHIs. */
                    hiref2 = hisubst[ir->op2];
                    if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
                        split_emit(J, IRTI(IR_PHI), hiref, hiref2);
                    break;
                }
                case IR_HIOP:
                    J->cur.nins--;  /* Drop joining HIOP. */
                    ir->prev = nir->op1;
                    hi = nir->op2;
                    break;
                default:
                    lua_assert(ir->o <= IR_NE);  /* Comparisons. */
                    split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
                    break;
                }
            } else
#endif
#if LJ_SOFTFP
                if (ir->o == IR_SLOAD) {
                    if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from number to int. */
                        nir->op2 &= ~IRSLOAD_CONVERT;
                        if (!(nir->op2 & IRSLOAD_TYPECHECK))
                            nir->t.irt = IRT_INT;  /* Drop guard. */
                        split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
                        ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
                    }
                } else if (ir->o == IR_TOBIT) {
                    IRRef tmp, op1 = ir->op1;
                    J->cur.nins--;
#if LJ_LE
                    tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
#else
                    tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
#endif
                    ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
                } else if (ir->o == IR_TOSTR) {
                    if (hisubst[ir->op1]) {
                        if (irref_isk(ir->op1))
                            nir->op1 = ir->op1;
                        else
                            split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
                    }
                } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
                    if (irref_isk(ir->op2) && hisubst[ir->op2])
                        nir->op2 = ir->op2;
                } else
#endif
                    if (ir->o == IR_CONV) {  /* See above, too. */
                        IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
#if LJ_32 && LJ_HASFFI
                        if (st == IRT_I64 || st == IRT_U64) {  /* Conversion from 64 bit int. */
#if LJ_SOFTFP
                            if (irt_isfloat(ir->t)) {
                                split_call_l(J, hisubst, oir, ir,
                                             st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
                                J->cur.nins--;  /* Drop unused HIOP. */
                            }
#else
                            if (irt_isfp(ir->t)) {  /* 64 bit integer to FP conversion. */
                                ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
                                                      hisubst[ir->op1], nref);
                            }
#endif
                            else {  /* Truncate to lower 32 bits. */
fwdlo:
                                ir->prev = nir->op1;  /* Forward loword. */
                                /* Replace with NOP to avoid messing up the snapshot logic. */
                                nir->ot = IRT(IR_NOP, IRT_NIL);
                                nir->op1 = nir->op2 = 0;
                            }
                        }
#endif
#if LJ_SOFTFP && LJ_32 && LJ_HASFFI
                        else if (irt_isfloat(ir->t)) {
                            if (st == IRT_NUM) {
                                split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
                                J->cur.nins--;  /* Drop unused HIOP. */
                            } else {
                                nir->o = IR_CALLN;
                                nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
                            }
                        } else if (st == IRT_FLOAT) {
                            nir->o = IR_CALLN;
                            nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
                        } else
#endif
#if LJ_SOFTFP
                            if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
                                if (irt_isguard(ir->t)) {
                                    lua_assert(st == IRT_NUM && irt_isint(ir->t));
                                    J->cur.nins--;
                                    ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
                                } else {
                                    split_call_l(J, hisubst, oir, ir,
#if LJ_32 && LJ_HASFFI
                                                 st == IRT_NUM ?
                                                 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
                                                 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
#else
                                                 IRCALL_softfp_d2i
#endif
                                                );
                                    J->cur.nins--;  /* Drop unused HIOP. */
                                }
                            }
#endif
                    } else if (ir->o == IR_CALLXS) {
                        IRRef hiref;
split_call:
                        hiref = hisubst[ir->op1];
                        if (hiref) {
                            IROpT ot = nir->ot;
                            IRRef op2 = nir->op2;
                            nir->ot = IRT(IR_CARG, IRT_NIL);
#if LJ_LE
                            nir->op2 = hiref;
#else
                            nir->op2 = nir->op1;
                            nir->op1 = hiref;
#endif
                            ir->prev = nref = split_emit(J, ot, nref, op2);
                        }
                        if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
                            hi = split_emit(J,
                                            IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
                                            nref, nref);
                    } else if (ir->o == IR_CARG) {
                        IRRef hiref = hisubst[ir->op1];
                        if (hiref) {
                            IRRef op2 = nir->op2;
#if LJ_LE
                            nir->op2 = hiref;
#else
                            nir->op2 = nir->op1;
                            nir->op1 = hiref;
#endif
                            ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
                            nir = IR(nref);
                        }
                        hiref = hisubst[ir->op2];
                        if (hiref) {
#if !LJ_TARGET_X86
                            int carg = 0;
                            IRIns *cir;
                            for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
                                carg++;
                            if ((carg & 1) == 0) {  /* Align 64 bit arguments. */
                                IRRef op2 = nir->op2;
                                nir->op2 = REF_NIL;
                                nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
                                nir = IR(nref);
                            }
#endif
#if LJ_BE
                            {
                                IRRef tmp = nir->op2;
                                nir->op2 = hiref;
                                hiref = tmp;
                            }
#endif
                            ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
                        }
                    } else if (ir->o == IR_CNEWI) {
                        if (hisubst[ir->op2])
                            split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
                    } else if (ir->o == IR_LOOP) {
                        J->loopref = nref;  /* Needed by assembler. */
                    }
        hisubst[ref] = hi;  /* Store hiword substitution. */
    }
    if (snref == nins) {  /* Substitution for last snapshot. */
        snap->ref = J->cur.nins;
        split_subst_snap(J, snap, oir);
    }

    /* Add PHI marks. */
    for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
        IRIns *ir = IR(ref);
        if (ir->o != IR_PHI) break;
        if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
        if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
    }
}
Ejemplo n.º 3
0
/* Replay snapshot state to setup side trace. */
void lj_snap_replay(jit_State *J, GCtrace *T)
{
  SnapShot *snap = &T->snap[J->exitno];
  SnapEntry *map = &T->snapmap[snap->mapofs];
  MSize n, nent = snap->nent;
  BloomFilter seen = 0;
  int pass23 = 0;
  J->framedepth = 0;
  /* Emit IR for slots inherited from parent snapshot. */
  for (n = 0; n < nent; n++) {
    SnapEntry sn = map[n];
    BCReg s = snap_slot(sn);
    IRRef ref = snap_ref(sn);
    IRIns *ir = &T->ir[ref];
    TRef tr;
    /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
    if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
      goto setslot;
    bloomset(seen, ref);
    if (irref_isk(ref)) {
      tr = snap_replay_const(J, ir);
    } else if (!regsp_used(ir->prev)) {
      pass23 = 1;
      lua_assert(s != 0);
      tr = s;
    } else {
      IRType t = irt_type(ir->t);
      uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
      if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
      tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
    }
  setslot:
    J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
    J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
    if ((sn & SNAP_FRAME))
      J->baseslot = s+1;
  }
  if (pass23) {
    IRIns *irlast = &T->ir[snap->ref];
    pass23 = 0;
    /* Emit dependent PVALs. */
    for (n = 0; n < nent; n++) {
      SnapEntry sn = map[n];
      IRRef refp = snap_ref(sn);
      IRIns *ir = &T->ir[refp];
      if (regsp_reg(ir->r) == RID_SUNK) {
	if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
	pass23 = 1;
	lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
		   ir->o == IR_CNEW || ir->o == IR_CNEWI);
	if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
	if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
	if (LJ_HASFFI && ir->o == IR_CNEWI) {
	  if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
	    snap_pref(J, T, map, nent, seen, (ir+1)->op2);
	} else {
	  IRIns *irs;
	  for (irs = ir+1; irs < irlast; irs++)
	    if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
	      if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
		snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
	      else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
		       irs+1 < irlast && (irs+1)->o == IR_HIOP)
		snap_pref(J, T, map, nent, seen, (irs+1)->op2);
	    }
	}
      } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
	lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
	J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
      }
    }
    /* Replay sunk instructions. */
    for (n = 0; pass23 && n < nent; n++) {
      SnapEntry sn = map[n];
      IRRef refp = snap_ref(sn);
      IRIns *ir = &T->ir[refp];
      if (regsp_reg(ir->r) == RID_SUNK) {
	TRef op1, op2;
	if (J->slot[snap_slot(sn)] != snap_slot(sn)) {  /* De-dup allocs. */
	  J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
	  continue;
	}
	op1 = ir->op1;
	if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
	op2 = ir->op2;
	if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
	if (LJ_HASFFI && ir->o == IR_CNEWI) {
	  if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
	    lj_needsplit(J);  /* Emit joining HIOP. */
	    op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
			     snap_pref(J, T, map, nent, seen, (ir+1)->op2));
	  }
	  J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
	} else {
	  IRIns *irs;
	  TRef tr = emitir(ir->ot, op1, op2);
	  J->slot[snap_slot(sn)] = tr;
	  for (irs = ir+1; irs < irlast; irs++)
	    if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
	      IRIns *irr = &T->ir[irs->op1];
	      TRef val, key = irr->op2, tmp = tr;
	      if (irr->o != IR_FREF) {
		IRIns *irk = &T->ir[key];
		if (irr->o == IR_HREFK)
		  key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
				    irk->op2);
		else
		  key = snap_replay_const(J, irk);
		if (irr->o == IR_HREFK || irr->o == IR_AREF) {
		  IRIns *irf = &T->ir[irr->op1];
		  tmp = emitir(irf->ot, tmp, irf->op2);
		}
	      }
	      tmp = emitir(irr->ot, tmp, key);
	      val = snap_pref(J, T, map, nent, seen, irs->op2);
	      if (val == 0) {
		IRIns *irc = &T->ir[irs->op2];
		lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
		val = snap_pref(J, T, map, nent, seen, irc->op1);
		val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
	      } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
			 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
		IRType t = IRT_I64;
		if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
		  t = IRT_NUM;
		lj_needsplit(J);
		if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
		  uint64_t k = (uint32_t)T->ir[irs->op2].i +
			       ((uint64_t)T->ir[(irs+1)->op2].i << 32);
		  val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
				  lj_ir_k64_find(J, k));
		} else {
		  val = emitir_raw(IRT(IR_HIOP, t), val,
			  snap_pref(J, T, map, nent, seen, (irs+1)->op2));
		}
		tmp = emitir(IRT(irs->o, t), tmp, val);
		continue;
	      }
	      tmp = emitir(irs->ot, tmp, val);
	    } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
	      emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
	    }
	}
      }
    }
  }
  J->base = J->slot + J->baseslot;
  J->maxslot = snap->nslots - J->baseslot;
  lj_snap_add(J);
  if (pass23)  /* Need explicit GC step _after_ initial snapshot. */
    emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
}
Ejemplo n.º 4
0
/* Emit or eliminate collected PHIs. */
static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi,
			  SnapNo onsnap)
{
  int passx = 0;
  IRRef i, j, nslots;
  IRRef invar = J->chain[IR_LOOP];
  /* Pass #1: mark redundant and potentially redundant PHIs. */
  for (i = 0, j = 0; i < nphi; i++) {
    IRRef lref = phi[i];
    IRRef rref = subst[lref];
    if (lref == rref || rref == REF_DROP) {  /* Invariants are redundant. */
      irt_clearphi(IR(lref)->t);
    } else {
      phi[j++] = (IRRef1)lref;
      if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) {
	/* Quick check for simple recurrences failed, need pass2. */
	irt_setmark(IR(lref)->t);
	passx = 1;
      }
    }
  }
  nphi = j;
  /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */
  if (passx) {
    SnapNo s;
    for (i = J->cur.nins-1; i > invar; i--) {
      IRIns *ir = IR(i);
      if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t);
      if (!irref_isk(ir->op1)) {
	irt_clearmark(IR(ir->op1)->t);
	if (ir->op1 < invar &&
	    ir->o >= IR_CALLN && ir->o <= IR_CARG) {  /* ORDER IR */
	  ir = IR(ir->op1);
	  while (ir->o == IR_CARG) {
	    if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t);
	    if (irref_isk(ir->op1)) break;
	    ir = IR(ir->op1);
	    irt_clearmark(ir->t);
	  }
	}
      }
    }
    for (s = J->cur.nsnap-1; s >= onsnap; s--) {
      SnapShot *snap = &J->cur.snap[s];
      SnapEntry *map = &J->cur.snapmap[snap->mapofs];
      MSize n, nent = snap->nent;
      for (n = 0; n < nent; n++) {
	IRRef ref = snap_ref(map[n]);
	if (!irref_isk(ref)) irt_clearmark(IR(ref)->t);
      }
    }
  }
  /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */
  nslots = J->baseslot+J->maxslot;
  for (i = 1; i < nslots; i++) {
    IRRef ref = tref_ref(J->slot[i]);
    while (!irref_isk(ref) && ref != subst[ref]) {
      IRIns *ir = IR(ref);
      irt_clearmark(ir->t);  /* Unmark potential uses, too. */
      if (irt_isphi(ir->t) || irt_ispri(ir->t))
	break;
      irt_setphi(ir->t);
      if (nphi >= LJ_MAX_PHI)
	lj_trace_err(J, LJ_TRERR_PHIOV);
      phi[nphi++] = (IRRef1)ref;
      ref = subst[ref];
      if (ref > invar)
	break;
    }
  }
  /* Pass #4: propagate non-redundant PHIs. */
  while (passx) {
    passx = 0;
    for (i = 0; i < nphi; i++) {
      IRRef lref = phi[i];
      IRIns *ir = IR(lref);
      if (!irt_ismarked(ir->t)) {  /* Propagate only from unmarked PHIs. */
	IRIns *irr = IR(subst[lref]);
	if (irt_ismarked(irr->t)) {  /* Right ref points to other PHI? */
	  irt_clearmark(irr->t);  /* Mark that PHI as non-redundant. */
	  passx = 1;  /* Retry. */
	}
      }
    }
  }
  /* Pass #5: emit PHI instructions or eliminate PHIs. */
  for (i = 0; i < nphi; i++) {
    IRRef lref = phi[i];
    IRIns *ir = IR(lref);
    if (!irt_ismarked(ir->t)) {  /* Emit PHI if not marked. */
      IRRef rref = subst[lref];
      if (rref > invar)
	irt_setphi(IR(rref)->t);
      emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref);
    } else {  /* Otherwise eliminate PHI. */
      irt_clearmark(ir->t);
      irt_clearphi(ir->t);
    }
  }
}
Ejemplo n.º 5
0
/* XLOAD forwarding. */
TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
{
  IRRef xref = fins->op1;
  IRIns *xr = IR(xref);
  IRRef lim = xref;  /* Search limit. */
  IRRef ref;

  if ((fins->op2 & IRXLOAD_READONLY))
    goto cselim;
  if ((fins->op2 & IRXLOAD_VOLATILE))
    goto doemit;

  /* Search for conflicting stores. */
  ref = J->chain[IR_XSTORE];
retry:
  if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS];
  if (J->chain[IR_XBAR] > lim) lim = J->chain[IR_XBAR];
  while (ref > lim) {
    IRIns *store = IR(ref);
    switch (aa_xref(J, xr, fins, store)) {
    case ALIAS_NO:   break;  /* Continue searching. */
    case ALIAS_MAY:  lim = ref; goto cselim;  /* Limit search for load. */
    case ALIAS_MUST:
      /* Emit conversion if the loaded type doesn't match the forwarded type. */
      if (!irt_sametype(fins->t, IR(store->op2)->t)) {
	IRType st = irt_type(fins->t);
	if (st == IRT_I8 || st == IRT_I16) {  /* Trunc + sign-extend. */
	  st |= IRCONV_SEXT;
	} else if (st == IRT_U8 || st == IRT_U16) {  /* Trunc + zero-extend. */
	} else if (st == IRT_INT && !irt_isint(IR(store->op2)->t)) {
	  st = irt_type(IR(store->op2)->t);  /* Needs dummy CONV.int.*. */
	} else {  /* I64/U64 are boxed, U32 is hidden behind a CONV.num.u32. */
	  goto store_fwd;
	}
	fins->ot = IRTI(IR_CONV);
	fins->op1 = store->op2;
	fins->op2 = (IRT_INT<<5)|st;
	return RETRYFOLD;
      }
    store_fwd:
      return store->op2;  /* Store forwarding. */
    }
    ref = store->prev;
  }

cselim:
  /* Try to find a matching load. Below the conflicting store, if any. */
  ref = J->chain[IR_XLOAD];
  while (ref > lim) {
    /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
    if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t))
      return ref;
    ref = IR(ref)->prev;
  }

  /* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */
  if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] &&
      xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) {
    ref = J->chain[IR_XSTORE];
    while (ref > lim)  /* Skip stores that have already been checked. */
      ref = IR(ref)->prev;
    lim = xref;
    xr = IR(xref);
    goto retry;  /* Retry with the reassociated reference. */
  }
doemit:
  return EMITFOLD;
}
Ejemplo n.º 6
0
/* Transform the old IR to the new IR. */
static void split_ir(jit_State *J)
{
  IRRef nins = J->cur.nins, nk = J->cur.nk;
  MSize irlen = nins - nk;
  MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
  IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need);
  IRRef1 *hisubst;
  IRRef ref;

  /* Copy old IR to buffer. */
  memcpy(oir, IR(nk), irlen*sizeof(IRIns));
  /* Bias hiword substitution table and old IR. Loword kept in field prev. */
  hisubst = (IRRef1 *)&oir[irlen] - nk;
  oir -= nk;

  /* Remove all IR instructions, but retain IR constants. */
  J->cur.nins = REF_FIRST;

  /* Process constants and fixed references. */
  for (ref = nk; ref <= REF_BASE; ref++) {
    IRIns *ir = &oir[ref];
    if (ir->o == IR_KINT64) {  /* Split up 64 bit constant. */
      TValue tv = *ir_k64(ir);
      ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
      hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
    } else {
      ir->prev = ref;  /* Identity substitution for loword. */
      hisubst[ref] = 0;
    }
  }

  /* Process old IR instructions. */
  for (ref = REF_FIRST; ref < nins; ref++) {
    IRIns *ir = &oir[ref];
    IRRef nref = lj_ir_nextins(J);
    IRIns *nir = IR(nref);
    IRRef hi = 0;

    /* Copy-substitute old instruction to new instruction. */
    nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
    nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
    ir->prev = nref;  /* Loword substitution. */
    nir->o = ir->o;
    nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
    hisubst[ref] = 0;

    /* Split 64 bit instructions. */
    if (irt_isint64(ir->t)) {
      IRRef hiref = hisubst[ir->op1];
      nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
      switch (ir->o) {
      case IR_ADD:
      case IR_SUB:
	/* Use plain op for hiword if loword cannot produce a carry/borrow. */
	if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
	  ir->prev = nir->op1;  /* Pass through loword. */
	  nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
	  hi = nref;
	  break;
	}
	/* fallthrough */
      case IR_NEG:
	hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
	break;
      case IR_MUL:
	hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
	break;
      case IR_DIV:
	hi = split_call64(J, hisubst, oir, ir,
			  irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
					     IRCALL_lj_carith_divu64);
	break;
      case IR_MOD:
	hi = split_call64(J, hisubst, oir, ir,
			  irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
					     IRCALL_lj_carith_modu64);
	break;
      case IR_POW:
	hi = split_call64(J, hisubst, oir, ir,
			  irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
					     IRCALL_lj_carith_powu64);
	break;
      case IR_FLOAD:
	lua_assert(ir->op2 == IRFL_CDATA_INT64);
	hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64HI);
#if LJ_BE
	ir->prev = hi; hi = nref;
#endif
	break;
      case IR_XLOAD:
	hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2);
#if LJ_BE
	ir->prev = hi; hi = nref;
#endif
	break;
      case IR_XSTORE:
#if LJ_LE
	hiref = hisubst[ir->op2];
#else
	hiref = nir->op2; nir->op2 = hisubst[ir->op2];
#endif
	split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hiref);
	break;
      case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
	if (st == IRT_NUM || st == IRT_FLOAT) {  /* FP to 64 bit int conv. */
	  hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
	} else if (st == IRT_I64 || st == IRT_U64) {  /* 64/64 bit cast. */
	  /* Drop cast, since assembler doesn't care. */
	  goto fwdlo;
	} else if ((ir->op2 & IRCONV_SEXT)) {  /* Sign-extend to 64 bit. */
	  IRRef k31 = lj_ir_kint(J, 31);
	  nir = IR(nref);  /* May have been reallocated. */
	  ir->prev = nir->op1;  /* Pass through loword. */
	  nir->o = IR_BSAR;  /* hi = bsar(lo, 31). */
	  nir->op2 = k31;
	  hi = nref;
	} else {  /* Zero-extend to 64 bit. */
	  hi = lj_ir_kint(J, 0);
	  goto fwdlo;
	}
	break;
	}
      case IR_PHI: {
	IRRef hiref2;
	if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
	    nir->op1 == nir->op2)
	  J->cur.nins--;  /* Drop useless PHIs. */
	hiref2 = hisubst[ir->op2];
	if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
	  split_emit(J, IRTI(IR_PHI), hiref, hiref2);
	break;
	}
      default:
	lua_assert(ir->o <= IR_NE);  /* Comparisons. */
	split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
	break;
      }
    } else if (ir->o == IR_CONV) {  /* See above, too. */
      IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
      if (st == IRT_I64 || st == IRT_U64) {  /* Conversion from 64 bit int. */
	if (irt_isfp(ir->t)) {  /* 64 bit integer to FP conversion. */
	  ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
				hisubst[ir->op1], nref);
	} else {  /* Truncate to lower 32 bits. */
	fwdlo:
	  ir->prev = nir->op1;  /* Forward loword. */
	  /* Replace with NOP to avoid messing up the snapshot logic. */
	  nir->ot = IRT(IR_NOP, IRT_NIL);
	  nir->op1 = nir->op2 = 0;
	}
      }
    } else if (ir->o == IR_CNEWI) {
      if (hisubst[ir->op2])
	split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
    } else if (ir->o == IR_LOOP) {
      J->loopref = nref;  /* Needed by assembler. */
    }
    hisubst[ref] = hi;  /* Store hiword substitution. */
  }

  /* Add PHI marks. */
  for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
    IRIns *ir = IR(ref);
    if (ir->o != IR_PHI) break;
    if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
    if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
  }

  /* Substitute snapshot maps. */
  oir[nins].prev = J->cur.nins;  /* Substitution for last snapshot. */
  {
    SnapNo i, nsnap = J->cur.nsnap;
    for (i = 0; i < nsnap; i++) {
      SnapShot *snap = &J->cur.snap[i];
      SnapEntry *map = &J->cur.snapmap[snap->mapofs];
      MSize n, nent = snap->nent;
      snap->ref = oir[snap->ref].prev;
      for (n = 0; n < nent; n++) {
	SnapEntry sn = map[n];
	map[n] = ((sn & 0xffff0000) | oir[snap_ref(sn)].prev);
      }
    }
  }
}