/* * optimizeExits does two conversions to eliminate common branch-to-exit flows. * * 1. If we see a jcc that leads to two "identical" blocks ending with * bindjmp, then copy the identical part of the targets before the jcc, * and replace the jcc with a bindjcc1st instruction using the bytecode * destinations from the two original bindjmps. For the sake of this pass, * "identical" means matching lea & syncvmsp instructions, and both bindjmp's * are for the same function. * * This leads to more efficient code because the service request stubs will * patch jumps in the main trace instead of off-trace. * * 2. Otherwise, if we see a jcc but only one of the branches is * a normal exit, then convert the jcc to a bindexit with the jcc's condition * and the original bindjmp's dest. */ void optimizeExits(Vunit& unit) { auto const pred_counts = count_predecessors(unit); PostorderWalker{unit} .dfs([&](Vlabel b) { auto& code = unit.blocks[b].code; assertx(!code.empty()); if (code.back().op != Vinstr::jcc) return; auto const ijcc = code.back().jcc_; auto const t0 = ijcc.targets[0]; auto const t1 = ijcc.targets[1]; if (t0 == t1) { code.back() = jmp{t0}; return; } if (pred_counts[t0] != 1 || pred_counts[t1] != 1) return; // copy all but the last instruction in blocks[t] to just before // the last instruction in code. auto hoist_sync = [&](Vlabel t) { const auto& tcode = unit.blocks[t].code; code.insert(std::prev(code.end()), tcode.begin(), std::prev(tcode.end())); }; if (match_bindjcc1st(unit, t0, t1)) { // hoist the sync instructions from t0 to before the jcc, // and replace the jcc with bindjcc1st. const auto& bj0 = unit.blocks[t0].code.back().bindjmp_; const auto& bj1 = unit.blocks[t1].code.back().bindjmp_; hoist_sync(t0); code.back() = bindjcc1st{ijcc.cc, ijcc.sf, {bj0.target, bj1.target}, bj0.args | bj1.args}; return; } auto fold_exit = [&](ConditionCode cc, Vlabel exit, Vlabel next) { const auto& bj = unit.blocks[exit].code.back().bindjmp_; auto origin = code.back().origin; hoist_sync(exit); code.back() = bindjcc{cc, ijcc.sf, bj.target, bj.trflags, bj.args}; code.emplace_back(jmp{next}); code.back().origin = origin; }; // Try to replace jcc to normal exit with bindexit followed by jmp, // as long as the sp adjustment is harmless to hoist (disp==0) Vptr sp; if (match_bindjmp(unit, t1, &sp) && sp == sp.base[0]) { fold_exit(ijcc.cc, t1, t0); } else if (match_bindjmp(unit, t0, &sp) && sp == sp.base[0]) { fold_exit(ccNegate(ijcc.cc), t0, t1); } }); printUnit(kVasmExitsLevel, "after vasm-exits", unit); }
void CodeGenerator::cgGuardStk(IRInstruction* inst) { auto const rSP = x2a(curOpd(inst->src(0)).reg()); auto const baseOff = cellsToBytes(inst->extra<GuardStk>()->offset); emitTypeTest( inst->typeParam(), rSP[baseOff + TVOFF(m_type)], rSP[baseOff + TVOFF(m_data)], [&] (ConditionCode cc) { auto const destSK = SrcKey(curFunc(), m_unit.bcOff()); auto const destSR = m_tx64->getSrcRec(destSK); destSR->emitFallbackJump(this->m_mainCode, ccNegate(cc)); }); }
void Vgen::emit(jcc i) { assertx(i.cc != CC_None); if (i.targets[1] != i.targets[0]) { if (next == i.targets[1]) { // the taken branch is the fall-through block, invert the branch. i = jcc{ccNegate(i.cc), i.sf, {i.targets[1], i.targets[0]}}; } jccs.push_back({a->frontier(), i.targets[1]}); // B.cond range is +/- 1MB but this uses BR emitSmashableJcc(*codeBlock, env.meta, kEndOfTargetChain, i.cc); } emit(jmp{i.targets[0]}); }
void CodeGenerator::cgGuardLoc(IRInstruction* inst) { auto const rFP = x2a(m_regs[inst->src(0)].reg()); auto const baseOff = localOffset(inst->extra<GuardLoc>()->locId); emitTypeTest( inst->typeParam(), rFP[baseOff + TVOFF(m_type)], rFP[baseOff + TVOFF(m_data)], [&] (ConditionCode cc) { auto const destSK = SrcKey(curFunc(), m_unit.bcOff()); auto const destSR = m_tx64->getSrcRec(destSK); destSR->emitFallbackJump(this->m_mainCode, ccNegate(cc)); }); }
void CodeGenerator::cgSideExitGuardStk(IRInstruction* inst) { auto const sp = x2a(curOpd(inst->src(0)).reg()); auto const extra = inst->extra<SideExitGuardStk>(); emitTypeTest( inst->typeParam(), sp[cellsToBytes(extra->checkedSlot) + TVOFF(m_type)], sp[cellsToBytes(extra->checkedSlot) + TVOFF(m_data)], [&] (ConditionCode cc) { auto const sk = SrcKey(curFunc(), extra->taken); emitBindSideExit(this->m_mainCode, this->m_stubsCode, sk, ccNegate(cc)); } ); }
bool simplify(Env& env, const setcc& vsetcc, Vlabel b, size_t i) { return if_inst<Vinstr::xorbi>(env, b, i + 1, [&] (const xorbi& vxorbi) { // setcc{cc, _, tmp}; xorbi{1, tmp, d, _}; --> setcc{~cc, _, tmp}; if (!(env.use_counts[vsetcc.d] == 1 && vxorbi.s0.b() == 1 && vxorbi.s1 == vsetcc.d && env.use_counts[vxorbi.sf] == 0)) return false; return simplify_impl(env, b, i, [&] (Vout& v) { v << setcc{ccNegate(vsetcc.cc), vsetcc.sf, vxorbi.d}; return 2; }); }); }
/* * Branch fusion: * Analyze blocks one at a time, looking for the sequence: * * setcc cc, f1 => b * ... * testb b, b => f2 * ... * jcc E|NE, f2 * * If found, and f2 is only used by the jcc, then change the code to: * * setcc cc, f1 => b * ... * nop * ... * jcc !cc|cc, f1 * * Later, vasm-dead will clean up the nop, and the setcc if b became dead. * * During the search, any other instruction that has a status flag result * will reset the pattern matcher. No instruction can "kill" flags, * since flags are SSA variables. However the transformation we want to * make extends the setcc flags lifetime, and we don't want it to overlap * another flag's lifetime. */ void fuseBranches(Vunit& unit) { auto blocks = sortBlocks(unit); jit::vector<unsigned> uses(unit.next_vr); for (auto b : blocks) { for (auto& inst : unit.blocks[b].code) { visitUses(unit, inst, [&](Vreg r) { uses[r]++; }); } } bool should_print = false; for (auto b : blocks) { auto& code = unit.blocks[b].code; ConditionCode cc; Vreg setcc_flags, setcc_dest, testb_flags; unsigned testb_index; for (unsigned i = 0, n = code.size(); i < n; ++i) { if (code[i].op == Vinstr::setcc) { cc = code[i].setcc_.cc; setcc_flags = code[i].setcc_.sf; setcc_dest = code[i].setcc_.d; continue; } if (setcc_flags.isValid() && match_testb(code[i], setcc_dest) && uses[code[i].testb_.sf] == 1) { testb_flags = code[i].testb_.sf; testb_index = i; continue; } if (match_jcc(code[i], testb_flags)) { code[testb_index] = nop{}; // erase the testb auto& jcc = code[i].jcc_; jcc.cc = jcc.cc == CC_NE ? cc : ccNegate(cc); jcc.sf = setcc_flags; should_print = true; continue; } if (setcc_flags.isValid() && sets_flags(code[i])) { setcc_flags = testb_flags = Vreg{}; } } } if (should_print) { printUnit(kVasmFusionLevel, "after vasm-fusion", unit); } }
bool cmov_impl(Env& env, const Inst& inst, Vlabel b, size_t i, Extend extend) { auto const t_it = env.unit.regToConst.find(inst.t); if (t_it == env.unit.regToConst.end()) return false; auto const f_it = env.unit.regToConst.find(inst.f); if (f_it == env.unit.regToConst.end()) return false; auto const check_const = [](Vconst c, bool& val) { if (c.isUndef) return false; switch (c.kind) { case Vconst::Quad: case Vconst::Long: case Vconst::Byte: if (c.val == 0) { val = false; return true; } else if (c.val == 1) { val = true; return true; } else { return false; } case Vconst::Double: return false; } not_reached(); }; bool t_val; if (!check_const(t_it->second, t_val)) return false; bool f_val; if (!check_const(f_it->second, f_val)) return false; return simplify_impl( env, b, i, [&] (Vout& v) { auto const d = env.unit.makeReg(); if (t_val == f_val) { v << copy{env.unit.makeConst(t_val), d}; } else if (t_val) { v << setcc{inst.cc, inst.sf, d}; } else { v << setcc{ccNegate(inst.cc), inst.sf, d}; } extend(v, d, inst.d); return 1; } ); }
void optimizeJmps(Vunit& unit) { auto isEmpty = [&](Vlabel b, Vinstr::Opcode op) { auto& code = unit.blocks[b].code; return code.size() == 1 && op == code[0].op; }; bool changed = false; bool ever_changed = false; // The number of incoming edges from (reachable) predecessors for each block. // It is maintained as an upper bound of the actual value during the // transformation. jit::vector<int> npreds(unit.blocks.size(), 0); do { if (changed) { std::fill(begin(npreds), end(npreds), 0); } changed = false; PostorderWalker{unit} .dfs([&](Vlabel b) { for (auto s : succs(unit.blocks[b])) { npreds[s]++; } }); // give entry an extra predecessor to prevent cloning it. npreds[unit.entry]++; PostorderWalker{unit} .dfs([&](Vlabel b) { auto& block = unit.blocks[b]; auto& code = block.code; assertx(!code.empty()); if (code.back().op == Vinstr::jcc) { auto ss = succs(block); if (ss[0] == ss[1]) { // both edges have same target, change to jmp code.back() = jmp{ss[0]}; --npreds[ss[0]]; changed = true; } else { auto jcc_i = code.back().jcc_; if (isEmpty(jcc_i.targets[0], Vinstr::fallback)) { jcc_i = jcc{ccNegate(jcc_i.cc), jcc_i.sf, {jcc_i.targets[1], jcc_i.targets[0]}}; } if (isEmpty(jcc_i.targets[1], Vinstr::fallback)) { // replace jcc with fallbackcc and jmp const auto& fb_i = unit.blocks[jcc_i.targets[1]].code[0].fallback_; const auto t0 = jcc_i.targets[0]; const auto jcc_origin = code.back().origin; code.pop_back(); code.emplace_back( fallbackcc{jcc_i.cc, jcc_i.sf, fb_i.dest, fb_i.trflags}); code.back().origin = jcc_origin; code.emplace_back(jmp{t0}); code.back().origin = jcc_origin; changed = true; } } } for (auto& s : succs(block)) { if (isEmpty(s, Vinstr::jmp)) { // skip over s --npreds[s]; s = unit.blocks[s].code.back().jmp_.target; ++npreds[s]; changed = true; } } if (code.back().op == Vinstr::jmp) { auto s = code.back().jmp_.target; if (npreds[s] == 1 || isEmpty(s, Vinstr::jcc)) { // overwrite jmp with copy of s auto& code2 = unit.blocks[s].code; code.pop_back(); code.insert(code.end(), code2.begin(), code2.end()); if (--npreds[s]) { for (auto ss : succs(block)) { ++npreds[ss]; } } changed = true; } } }); ever_changed |= changed; } while (changed); if (ever_changed) { printUnit(kVasmJumpsLevel, "after vasm-jumps", unit); } }
void cgCheckType(IRLS& env, const IRInstruction* inst) { // Note: If you add new supported type checks, you should update // negativeCheckType() to indicate whether it is precise or not. auto const src = inst->src(0); auto const dst = inst->dst(); auto const srcData = srcLoc(env, inst, 0).reg(0); auto const srcType = srcLoc(env, inst, 0).reg(1); auto& v = vmain(env); auto const doJcc = [&] (ConditionCode cc, Vreg sf) { fwdJcc(v, env, ccNegate(cc), sf, inst->taken()); }; auto const doMov = [&] { auto const dstData = dstLoc(env, inst, 0).reg(0); auto const dstType = dstLoc(env, inst, 0).reg(1); if (dst->isA(TBool) && !src->isA(TBool)) { v << movtqb{srcData, dstData}; } else { v << copy{srcData, dstData}; } if (dstType == InvalidReg) return; if (srcType != InvalidReg) { v << copy{srcType, dstType}; } else { v << ldimmq{src->type().toDataType(), dstType}; } }; auto const typeParam = inst->typeParam(); if (src->isA(typeParam)) { // src is the target type or better. Just define our dst. doMov(); return; } if (!src->type().maybe(typeParam)) { // src is definitely not the target type. Always jump. v << jmp{label(env, inst->taken())}; return; } if (srcType != InvalidReg) { emitTypeTest(v, env, typeParam, srcType, srcData, v.makeReg(), doJcc); doMov(); return; } if (src->type() <= TBoxedCell && typeParam <= TBoxedCell) { // We should never have specific known Boxed types; those should only be // used for hints and predictions. always_assert(!(typeParam < TBoxedInitCell)); doMov(); return; } /* * See if we're just checking the array kind or object class of a value with * a mostly-known type. * * Important: We don't support typeParam being something like * StaticArr=kPackedKind unless the src->type() also already knows its * staticness. We do allow things like CheckType<Arr=Packed> t1:StaticArr, * though. This is why we have to check that the unspecialized type is at * least as big as the src->type(). */ if (typeParam.isSpecialized() && typeParam.unspecialize() >= src->type()) { detail::emitSpecializedTypeTest(v, env, typeParam, srcData, v.makeReg(), doJcc); doMov(); return; } /* * Since not all of our unions carry a type register, there are some * situations with strings and arrays that are neither constantly-foldable * nor in the emitTypeTest() code path. * * We currently actually check their persistent bit here, which will let * both static and uncounted strings through. Also note that * CheckType<Uncounted> t1:{Null|Str} doesn't get this treatment currently--- * the emitTypeTest() path above will only check the type register. */ if (!typeParam.isSpecialized() && typeParam <= TUncounted && src->type().subtypeOfAny(TStr, TArr) && src->type().maybe(typeParam)) { assertx(src->type().maybe(TPersistent)); auto const sf = v.makeReg(); v << cmplim{0, srcData[FAST_REFCOUNT_OFFSET], sf}; doJcc(CC_L, sf); doMov(); return; } always_assert_flog( false, "Bad src: {} and dst: {} types in '{}'", src->type(), typeParam, *inst ); }