/* * For every instruction in trace representing a tracelet guard, call func with * its location and type. */ void visitGuards(IRUnit& unit, const VisitGuardFn& func) { using L = RegionDesc::Location; const bool stopAtEndGuards = !RuntimeOption::EvalHHIRConstrictGuards; auto blocks = rpoSortCfg(unit); for (auto* block : blocks) { for (auto const& inst : *block) { switch (inst.op()) { case EndGuards: if (stopAtEndGuards) return; break; case ExitPlaceholder: if (stopAtEndGuards) break; return; case HintLocInner: case CheckLoc: func(L::Local{inst.extra<LocalId>()->locId}, inst.typeParam()); break; case HintStkInner: case CheckStk: { auto bcSpOffset = inst.extra<RelOffsetData>()->bcSpOffset; auto offsetFromFp = inst.marker().spOff() - bcSpOffset; func(L::Stack{offsetFromFp}, inst.typeParam()); break; } default: break; } } } }
bool checkRegisters(IRTrace* trace, const IRFactory& factory, const RegAllocInfo& regs) { assert(checkCfg(trace, factory)); auto blocks = rpoSortCfg(trace, factory); auto children = findDomChildren(blocks); forPreorderDoms(blocks.front(), children, RegState(), [&] (Block* block, RegState& state) { for (IRInstruction& inst : *block) { for (SSATmp* src : inst.srcs()) { auto const &info = regs[src]; if (!info.spilled() && (info.reg(0) == Transl::rVmSp || info.reg(0) == Transl::rVmFp)) { // hack - ignore rbx and rbp continue; } for (unsigned i = 0, n = info.numAllocatedRegs(); i < n; ++i) { assert(state.tmp(info, i) == src); } } for (SSATmp& dst : inst.dsts()) { auto const &info = regs[dst]; for (unsigned i = 0, n = info.numAllocatedRegs(); i < n; ++i) { state.tmp(info, i) = &dst; } } } }); return true; }
Block* findMainExitBlock(const IRUnit& unit, SrcKey lastSk) { Block* mainExit = nullptr; FTRACE(5, "findMainExitBlock: starting on unit:\n{}\n", show(unit)); for (auto block : rpoSortCfg(unit)) { if (endsUnitAtSrcKey(block, lastSk)) { if (mainExit == nullptr) { mainExit = block; continue; } always_assert_flog( mainExit->hint() == Block::Hint::Unlikely || block->hint() == Block::Hint::Unlikely, "findMainExit: 2 likely exits found: B{} and B{}\nlastSk = {}", mainExit->id(), block->id(), showShort(lastSk)); if (mainExit->hint() == Block::Hint::Unlikely) mainExit = block; } } always_assert_flog(mainExit, "findMainExit: no exit found for lastSk = {}", showShort(lastSk)); FTRACE(5, "findMainExitBlock: mainExit = B{}\n", mainExit->id()); return mainExit; }
/* * For all guard instructions in trace, check to see if we can relax the * destination type to something less specific. The GuardConstraints map * contains information about what properties of the guarded type matter for * each instruction. */ bool relaxGuards(IRTrace* trace, const IRFactory& factory, const GuardConstraints& guards) { FTRACE(1, "relaxing guards for trace {}\n", trace); auto blocks = rpoSortCfg(trace, factory); Block* reflowBlock = nullptr; for (auto* block : blocks) { for (auto& inst : *block) { if (!isGuardOp(inst.op())) continue; auto it = guards.find(inst.id()); auto category = it == guards.end() ? DataTypeGeneric : it->second; auto const oldType = inst.typeParam(); auto newType = relaxType(oldType, category); if (!oldType.equals(newType)) { FTRACE(1, "relaxGuards changing {}'s type to {}\n", inst, newType); inst.setTypeParam(newType); if (!reflowBlock) reflowBlock = block; } } } // TODO(t2598894): For now we require regenerating the IR after guard // relaxation, so it's only useful in the tracelet region selector. if (false && reflowBlock) reflowTypes(reflowBlock, blocks); return (bool)reflowBlock; }
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) { auto finishPass = [&](const char* msg) { dumpTrace(6, unit, folly::format("after {}", msg).str().c_str()); assert(checkCfg(unit)); assert(checkTmpsSpanningCalls(unit)); if (debug) { forEachInst(rpoSortCfg(unit), assertOperandTypes); } }; auto doPass = [&](void (*fn)(IRUnit&), const char* msg) { fn(unit); finishPass(msg); }; auto dce = [&](const char* which) { if (!RuntimeOption::EvalHHIRDeadCodeElim) return; eliminateDeadCode(unit); finishPass(folly::format("{} DCE", which).str().c_str()); }; if (RuntimeOption::EvalHHIRRelaxGuards) { auto const simpleRelax = kind == TransProfile; auto changed = relaxGuards(unit, *irBuilder.guards(), simpleRelax); if (changed) finishPass("guard relaxation"); } if (RuntimeOption::EvalHHIRRefcountOpts) { optimizeRefcounts(unit); finishPass("refcount opts"); } dce("initial"); if (RuntimeOption::EvalHHIRPredictionOpts) { doPass(optimizePredictions, "prediction opts"); } if (RuntimeOption::EvalHHIRExtraOptPass && (RuntimeOption::EvalHHIRCse || RuntimeOption::EvalHHIRSimplification)) { irBuilder.reoptimize(); finishPass("reoptimize"); // Cleanup any dead code left around by CSE/Simplification // Ideally, this would be controlled by a flag returned // by optimzeTrace indicating whether DCE is necessary dce("reoptimize"); } if (RuntimeOption::EvalHHIRJumpOpts) { doPass(optimizeJumps, "jumpopts"); dce("jump opts"); } if (RuntimeOption::EvalHHIRGenerateAsserts) { doPass(insertAsserts, "RefCnt asserts"); } }
BlocksWithIds rpoSortCfgWithIds(const IRUnit& unit) { auto ret = BlocksWithIds{rpoSortCfg(unit), {unit, 0xffffffff}}; auto id = ret.blocks.size(); for (auto* block : ret.blocks) { ret.ids[block] = --id; } assert(id == 0); return ret; }
void logTranslation(const TransEnv& env, const TransRange& range) { auto nanos = HPHP::Timer::GetThreadCPUTimeNanos() - env.unit->startNanos(); auto& cols = *env.unit->logEntry(); auto& context = env.unit->context(); auto kind = show(context.kind); cols.setStr("trans_kind", !debug ? kind : kind + "_debug"); if (context.func) { cols.setStr("func", context.func->fullName()->data()); } cols.setInt("jit_sample_rate", RuntimeOption::EvalJitSampleRate); // timing info cols.setInt("jit_micros", nanos / 1000); // hhir stats cols.setInt("max_tmps", env.unit->numTmps()); cols.setInt("max_blocks", env.unit->numBlocks()); cols.setInt("max_insts", env.unit->numInsts()); auto hhir_blocks = rpoSortCfg(*env.unit); cols.setInt("num_blocks", hhir_blocks.size()); size_t num_insts = 0; for (auto b : hhir_blocks) num_insts += b->instrs().size(); cols.setInt("num_insts", num_insts); // vasm stats if (env.vunit) { cols.setInt("max_vreg", env.vunit->next_vr); cols.setInt("max_vblocks", env.vunit->blocks.size()); cols.setInt("max_vcalls", env.vunit->vcallArgs.size()); size_t max_vinstr = 0; for (auto& blk : env.vunit->blocks) max_vinstr += blk.code.size(); cols.setInt("max_vinstr", max_vinstr); cols.setInt("num_vconst", env.vunit->constToReg.size()); auto vblocks = sortBlocks(*env.vunit); size_t num_vinstr[kNumAreas] = {0, 0, 0}; size_t num_vblocks[kNumAreas] = {0, 0, 0}; for (auto b : vblocks) { const auto& block = env.vunit->blocks[b]; num_vinstr[(int)block.area_idx] += block.code.size(); num_vblocks[(int)block.area_idx]++; } cols.setInt("num_vinstr_main", num_vinstr[(int)AreaIndex::Main]); cols.setInt("num_vinstr_cold", num_vinstr[(int)AreaIndex::Cold]); cols.setInt("num_vinstr_frozen", num_vinstr[(int)AreaIndex::Frozen]); cols.setInt("num_vblocks_main", num_vblocks[(int)AreaIndex::Main]); cols.setInt("num_vblocks_cold", num_vblocks[(int)AreaIndex::Cold]); cols.setInt("num_vblocks_frozen", num_vblocks[(int)AreaIndex::Frozen]); } // x64 stats cols.setInt("main_size", range.main.size()); cols.setInt("cold_size", range.cold.size()); cols.setInt("frozen_size", range.frozen.size()); // finish & log StructuredLog::log("hhvm_jit", cols); }
/* * Currently we have very limited control flow in any given tracelet, * so this just selects an appropriate reverse post order on the * blocks, and partitions the unlikely ones to astubs. */ LayoutInfo layoutBlocks(const IRUnit& unit) { LayoutInfo ret; ret.blocks = rpoSortCfg(unit); // Optionally stress test by randomizing the positions. if (RuntimeOption::EvalHHIRStressCodegenBlocks) { auto seed = std::chrono::system_clock::now().time_since_epoch().count(); std::default_random_engine gen(seed); std::random_shuffle(ret.blocks.begin() + 1, ret.blocks.end(), [&](int i) { return gen() % i; }); } // Partition into a and astubs, without changing relative order. ret.astubsIt = std::stable_partition( ret.blocks.begin(), ret.blocks.end(), [&] (Block* b) { return b->hint() != Block::Hint::Unlikely; } ); if (HPHP::Trace::moduleEnabled(HPHP::Trace::hhir, 5)) { std::string str = "Layout:"; auto printRegion = [&] (const char* what, BlockList::iterator& it, BlockList::iterator stop) { folly::toAppend(what, &str); for (; it != stop; ++it) { folly::toAppend((*it)->id(), &str); folly::toAppend(" ", &str); } }; auto it = ret.blocks.begin(); printRegion("\n a: ", it, ret.astubsIt); printRegion("\n astubs: ", it, ret.blocks.end()); HPHP::Trace::traceRelease("%s\n", str.c_str()); } /* * No matter what happens above, it's going to be very broken if the * entry block isn't first, and it's going to perform poorly if the * main exit isn't the last block in a. Assert these. * * Note: this isn't the case if the main exit contains a return, but * we can revisit that later. */ if (!RuntimeOption::EvalHHIRStressCodegenBlocks) { always_assert(ret.blocks.front()->isEntry()); } return ret; }
/* * Build the CFG, then the dominator tree, then use it to validate SSA. * 1. Each src must be defined by some other instruction, and each dst must * be defined by the current instruction. * 2. Each src must be defined earlier in the same block or in a dominator. * 3. Each dst must not be previously defined. * 4. Treat tmps defined by DefConst as always defined. * 5. Each predecessor of a reachable block must be reachable (deleted * blocks must not have out-edges to reachable blocks). */ bool checkCfg(IRTrace* trace, const IRFactory& factory) { forEachTraceBlock(trace, checkBlock); // Check valid successor/predecessor edges. auto const blocks = rpoSortCfg(trace, factory); std::unordered_set<const Edge*> edges; for (Block* b : blocks) { auto checkEdge = [&] (const Edge* e) { assert(e->from() == b); edges.insert(e); for (auto& p : e->to()->preds()) if (&p == e) return; assert(false); // did not find edge. }; if (auto *e = nextEdge(b)) checkEdge(e); if (auto *e = takenEdge(b)) checkEdge(e); } for (Block* b : blocks) { for (DEBUG_ONLY auto const &e : b->preds()) { assert(&e == takenEdge(e.from()) || &e == nextEdge(e.from())); assert(e.to() == b); } } checkCatchTraces(trace, factory); // visit dom tree in preorder, checking all tmps auto const children = findDomChildren(blocks); StateVector<SSATmp, bool> defined0(&factory, false); forPreorderDoms(blocks.front(), children, defined0, [] (Block* block, StateVector<SSATmp, bool>& defined) { for (IRInstruction& inst : *block) { for (DEBUG_ONLY SSATmp* src : inst.srcs()) { assert(src->inst() != &inst); assert_log(src->inst()->op() == DefConst || defined[src], [&]{ return folly::format( "src '{}' in '{}' came from '{}', which is not a " "DefConst and is not defined at this use site", src->toString(), inst.toString(), src->inst()->toString()).str(); }); } for (SSATmp& dst : inst.dsts()) { assert(dst.inst() == &inst && inst.op() != DefConst); assert(!defined[dst]); defined[dst] = true; } } }); return true; }
bool checkTmpsSpanningCalls(IRTrace* trace, const IRFactory& irFactory) { auto const blocks = rpoSortCfg(trace, irFactory); auto const children = findDomChildren(blocks); // CallBuiltin is ok because it is not a php-level call. (It will // call a C++ helper and we can push/pop around it normally.) auto isCall = [&] (Opcode op) { return op == Call || op == CallArray; }; typedef StateVector<SSATmp,bool> State; bool isValid = true; forPreorderDoms( blocks.front(), children, State(&irFactory, false), [&] (Block* b, State& state) { for (auto& inst : *b) { for (auto& src : inst.srcs()) { if (src->isA(Type::FramePtr)) continue; if (src->isConst()) continue; if (!state[src]) { FTRACE(1, "checkTmpsSpanningCalls failed\n" " instruction: {}\n" " src: {}\n", inst.toString(), src->toString()); isValid = false; } } /* * Php calls kill all live temporaries. We can't keep them * alive across the call because we currently have no * callee-saved registers in our abi, and all translations * share the same spill slots. */ if (isCall(inst.op())) state.reset(); for (auto& d : inst.dsts()) { state[d] = true; } } } ); return isValid; }
bool checkRegisters(IRTrace* trace, const IRFactory& factory, const RegAllocInfo& regs) { assert(checkCfg(trace, factory)); auto blocks = rpoSortCfg(trace, factory); StateVector<Block, RegState> states(&factory, RegState()); StateVector<Block, bool> reached(&factory, false); for (auto* block : blocks) { RegState state = states[block]; for (IRInstruction& inst : *block) { for (SSATmp* src : inst.srcs()) { auto const &info = regs[src]; if (!info.spilled() && (info.reg(0) == Transl::rVmSp || info.reg(0) == Transl::rVmFp)) { // hack - ignore rbx and rbp continue; } for (unsigned i = 0, n = info.numAllocatedRegs(); i < n; ++i) { assert(state.tmp(info, i) == src); } } for (SSATmp& dst : inst.dsts()) { auto const &info = regs[dst]; for (unsigned i = 0, n = info.numAllocatedRegs(); i < n; ++i) { state.tmp(info, i) = &dst; } } } // State contains register/spill info at current block end. auto updateEdge = [&](Block* succ) { if (!reached[succ]) { states[succ] = state; } else { states[succ].merge(state); } }; if (auto* next = block->next()) updateEdge(next); if (auto* taken = block->taken()) updateEdge(taken); } return true; }
/* * Compute the stack and local type postconditions for a * single-entry/single-exit tracelet. */ std::vector<RegionDesc::TypePred> IRBuilder::getKnownTypes() { // This function is only correct when given a single-exit region, as // in TransProfile. Furthermore, its output is only used to guide // formation of profile-driven regions. assert(tx->mode() == TransProfile); // We want the state for the last block on the "main trace". Figure // out which that is. Block* mainExit = nullptr; for (auto* b : rpoSortCfg(m_unit)) { if (isMainExit(b)) { assert(mainExit == nullptr); mainExit = b; } } assert(mainExit != nullptr); // Load state for mainExit. This feels hacky. FTRACE(1, "mainExit: B{}\n", mainExit->id()); m_state.startBlock(mainExit); // Now use the current state to get all the types. std::vector<RegionDesc::TypePred> result; auto const curFunc = m_state.func(); auto const sp = m_state.sp(); auto const spOffset = m_state.spOffset(); for (unsigned i = 0; i < curFunc->maxStackCells(); ++i) { auto t = getStackValue(sp, i).knownType; if (!t.equals(Type::StackElem)) { result.push_back({ RegionDesc::Location::Stack{i, spOffset - i}, t }); } } for (unsigned i = 0; i < curFunc->numLocals(); ++i) { auto t = m_state.localType(i); if (!t.equals(Type::Gen)) { FTRACE(1, "Local {}: {}\n", i, t.toString()); result.push_back({ RegionDesc::Location::Local{i}, t }); } } return result; }
void gvn(IRUnit& unit) { PassTracer tracer{&unit, Trace::hhir_gvn, "gvn"}; GVNState state; auto const rpoBlocks = rpoSortCfg(unit); auto const idoms = findDominators( unit, rpoBlocks, numberBlocks(unit, rpoBlocks) ); ValueNumberTable globalTable(unit, ValueNumberMetadata{}); state.globalTable = &globalTable; // This is an implementation of the RPO version of the global value numbering // algorithm presented in the 1996 paper "SCC-based Value Numbering" by // Cooper and Simpson. runAnalysis(state, unit, rpoBlocks); replaceRedundantComputations(unit, idoms, rpoBlocks, globalTable); state.globalTable = nullptr; }
/* * For all guard instructions in trace, check to see if we can relax the * destination type to something less specific. The GuardConstraints map * contains information about what properties of the guarded type matter for * each instruction. Returns true iff any changes were made to the trace. */ bool relaxGuards(const IRUnit& unit, const GuardConstraints& guards) { FTRACE(1, "relaxing guards for trace {}\n", unit.main()); auto blocks = rpoSortCfg(unit); Block* reflowBlock = nullptr; for (auto* block : blocks) { for (auto& inst : *block) { if (!isGuardOp(inst.op())) continue; auto it = guards.find(&inst); auto constraint = it == guards.end() ? TypeConstraint() : it->second; // TODO(t2598894): Support relaxing inner types auto const oldType = inst.typeParam(); auto newType = relaxType(oldType, constraint.category); if (constraint.knownType <= newType) { // If the known type is at least as good as the relaxed type, we can // replace the guard with an assert. auto newOp = guardToAssert(inst.op()); FTRACE(1, "relaxGuards changing {}'s type to {}, op to {}\n", inst, constraint.knownType, newOp); inst.setTypeParam(constraint.knownType); inst.setOpcode(newOp); inst.setTaken(nullptr); if (!reflowBlock) reflowBlock = block; } else if (!oldType.equals(newType)) { FTRACE(1, "relaxGuards changing {}'s type to {}\n", inst, newType); inst.setTypeParam(newType); if (!reflowBlock) reflowBlock = block; } } } if (reflowBlock) reflowTypes(reflowBlock, blocks); return (bool)reflowBlock; }
void visitGuards(IRUnit& unit, F func) { auto blocks = rpoSortCfg(unit); for (auto const block : blocks) { for (auto const& inst : *block) { switch (inst.op()) { case EndGuards: return; case HintLocInner: case CheckLoc: func(&inst, Location::Local{inst.extra<LocalId>()->locId}, inst.typeParam(), inst.is(HintLocInner)); break; case HintStkInner: case CheckStk: { auto const irSPRel = inst.extra<IRSPRelOffsetData>()->offset; auto const defSP = inst.src(0)->inst(); assertx(defSP->is(DefSP)); auto const irSPOff = defSP->extra<DefSP>()->offset; func(&inst, Location::Stack{irSPRel.to<FPInvOffset>(irSPOff)}, inst.typeParam(), inst.is(HintStkInner)); break; } case HintMBaseInner: case CheckMBase: func(&inst, Location::MBase{}, inst.typeParam(), inst.is(HintMBaseInner)); break; default: break; } } } }
/* * This pass tries to merge blocks and cleanup the CFG. * * In each pass, it visits blocks in reverse post order and tries to * (1) convert the conditional branch at the end of the block into a Jmp; * (2) merge the block with its unique successor block, if it is the unique * predecessor of its successor; * (3) fold Jmp, if it fits the Jmp to Jmp pattern. * * The reverse post order is not essential to the transformation; in the current * implementation it helps skipping some blocks after a change happens. */ void cleanCfg(IRUnit& unit) { PassTracer tracer { &unit, Trace::hhir_cfg, "cleanCfg" }; Timer timer(Timer::optimize_cleancfg); do { auto const blocks = rpoSortCfg(unit); for (auto block : blocks) { // Skip malformed unreachable blocks that can appear transiently. if (block->empty()) continue; // keep working on the current block until no further changes are made. // Since we are visiting in reverse post order, we are sure that after a // block is changed here, no more opportunity is exposed in its upstream // blocks. while (true) { simplify(unit, &(block->back())); if (absorbDstBlock(unit, block)) continue; if (foldJmp(unit, block)) continue; break; } } } while (removeUnreachable(unit)); }
void visitGuards(IRUnit& unit, F func) { using L = RegionDesc::Location; auto blocks = rpoSortCfg(unit); for (auto* block : blocks) { for (auto& inst : *block) { switch (inst.op()) { case EndGuards: return; case HintLocInner: case CheckLoc: func(&inst, L::Local{inst.extra<LocalId>()->locId}, inst.typeParam(), inst.is(HintLocInner)); break; case HintStkInner: case CheckStk: { /* * BCSPOffset is optional but should --always-- be set for CheckStk * instructions that appear within the guards for a translation. */ auto bcSpOffset = inst.extra<RelOffsetData>()->bcSpOffset; assertx(inst.extra<RelOffsetData>()->hasBcSpOffset); auto offsetFromFp = inst.marker().spOff() - bcSpOffset; func(&inst, L::Stack{offsetFromFp}, inst.typeParam(), inst.is(HintStkInner)); break; } default: break; } } } }
Block* findMainExitBlock(const IRUnit& unit, SrcKey lastSk) { bool unreachable = false; Block* mainExit = nullptr; FTRACE(5, "findMainExitBlock: looking for exit at {} in unit:\n{}\n", showShort(lastSk), show(unit)); for (auto block : rpoSortCfg(unit)) { if (block->back().is(Unreachable)) unreachable = true; if (endsUnitAtSrcKey(block, lastSk)) { if (mainExit == nullptr) { mainExit = block; continue; } always_assert_flog( mainExit->hint() == Block::Hint::Unlikely || block->hint() == Block::Hint::Unlikely, "findMainExit: 2 likely exits found: B{} and B{}\nlastSk = {}", mainExit->id(), block->id(), showShort(lastSk) ); if (mainExit->hint() == Block::Hint::Unlikely) mainExit = block; } } always_assert_flog( mainExit || unreachable, "findMainExit: no exit found for lastSk = {}", showShort(lastSk) ); FTRACE(5, "findMainExitBlock: mainExit = B{}\n", mainExit->id()); return mainExit; }
/* * reoptimize() runs a trace through a second pass of TraceBuilder * optimizations, like this: * * reset state. * move all blocks to a temporary list. * compute immediate dominators. * for each block in trace order: * if we have a snapshot state for this block: * clear cse entries that don't dominate this block. * use snapshot state. * move all instructions to a temporary list. * for each instruction: * optimizeWork - do CSE and simplify again * if not simplified: * append existing instruction and update state. * else: * if the instruction has a result, insert a mov from the * simplified tmp to the original tmp and discard the instruction. * if the last conditional branch was turned into a jump, remove the * fall-through edge to the next block. */ void TraceBuilder::reoptimize() { FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n"); SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); }; assert(m_curTrace == m_mainTrace.get()); assert(m_savedTraces.empty()); assert(m_inlineSavedStates.empty()); m_enableCse = RuntimeOption::EvalHHIRCse; m_enableSimplification = RuntimeOption::EvalHHIRSimplification; if (!m_enableCse && !m_enableSimplification) return; if (m_mainTrace->blocks().size() > RuntimeOption::EvalHHIRSimplificationMaxBlocks) { // TODO CSEHash::filter is very slow for large block sizes // t2135219 should address that return; } BlockList sortedBlocks = rpoSortCfg(m_mainTrace.get(), m_irFactory); auto const idoms = findDominators(sortedBlocks); clearTrackedState(); auto blocks = std::move(m_mainTrace->blocks()); assert(m_mainTrace->blocks().empty()); while (!blocks.empty()) { Block* block = blocks.front(); blocks.pop_front(); assert(block->trace() == m_mainTrace.get()); FTRACE(5, "Block: {}\n", block->id()); m_mainTrace->push_back(block); if (m_snapshots[block]) { useState(block); } auto instructions = std::move(block->instrs()); assert(block->empty()); while (!instructions.empty()) { auto *inst = &instructions.front(); instructions.pop_front(); // last attempt to elide ActRecs, if we still need the InlineFPAnchor // it will be added back to the trace when we re-add instructions that // rely on it if (inst->op() == InlineFPAnchor) { continue; } // merging state looks at the current marker, and optimizeWork // below may create new instructions. Use the marker from this // instruction. assert(inst->marker().valid()); setMarker(inst->marker()); auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs! if (!tmp) { // Could not optimize; keep the old instruction appendInstruction(inst, block); updateTrackedState(inst); continue; } SSATmp* dst = inst->dst(); if (dst->type() != Type::None && dst != tmp) { // The result of optimization has a different destination than the inst. // Generate a mov(tmp->dst) to get result into dst. If we get here then // assume the last instruction in the block isn't a guard. If it was, // we would have to insert the mov on the fall-through edge. assert(block->empty() || !block->back()->isBlockEnd()); IRInstruction* mov = m_irFactory.mov(dst, tmp, inst->marker()); appendInstruction(mov, block); updateTrackedState(mov); } // Not re-adding inst; remove the inst->taken edge if (inst->taken()) inst->setTaken(nullptr); } if (block->back()->isTerminal()) { // Could have converted a conditional branch to Jmp; clear next. block->setNext(nullptr); } else { // if the last instruction was a branch, we already saved state // for the target in updateTrackedState(). Now save state for // the fall-through path. saveState(block->next()); } } }
/* * reoptimize() runs a trace through a second pass of TraceBuilder * optimizations, like this: * * reset state. * move all blocks to a temporary list. * compute immediate dominators. * for each block in trace order: * if we have a snapshot state for this block: * clear cse entries that don't dominate this block. * use snapshot state. * move all instructions to a temporary list. * for each instruction: * optimizeWork - do CSE and simplify again * if not simplified: * append existing instruction and update state. * else: * if the instruction has a result, insert a mov from the * simplified tmp to the original tmp and discard the instruction. * if the last conditional branch was turned into a jump, remove the * fall-through edge to the next block. */ void TraceBuilder::reoptimize() { FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n"); SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); }; assert(m_savedBlocks.empty()); assert(!m_curWhere); m_state.setEnableCse(RuntimeOption::EvalHHIRCse); m_enableSimplification = RuntimeOption::EvalHHIRSimplification; if (!m_state.enableCse() && !m_enableSimplification) return; setConstrainGuards(false); BlockList sortedBlocks = rpoSortCfg(m_unit); auto const idoms = findDominators(m_unit, sortedBlocks); m_state.clear(); for (auto* block : rpoSortCfg(m_unit)) { FTRACE(5, "Block: {}\n", block->id()); m_state.startBlock(block); m_curBlock = block; auto instructions = std::move(block->instrs()); assert(block->empty()); while (!instructions.empty()) { auto *inst = &instructions.front(); instructions.pop_front(); // merging state looks at the current marker, and optimizeWork // below may create new instructions. Use the marker from this // instruction. assert(inst->marker().valid()); setMarker(inst->marker()); auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs! if (!tmp) { // Could not optimize; keep the old instruction appendInstruction(inst); continue; } SSATmp* dst = inst->dst(); if (dst->type() != Type::None && dst != tmp) { // The result of optimization has a different destination than the inst. // Generate a mov(tmp->dst) to get result into dst. If we get here then // assume the last instruction in the block isn't a guard. If it was, // we would have to insert the mov on the fall-through edge. assert(block->empty() || !block->back().isBlockEnd()); IRInstruction* mov = m_unit.mov(dst, tmp, inst->marker()); appendInstruction(mov); } if (inst->isBlockEnd()) { // Not re-adding inst; replace it with a jump to the next block. auto next = inst->next(); appendInstruction(m_unit.gen(Jmp, inst->marker(), next)); inst->setTaken(nullptr); inst->setNext(nullptr); } } assert(!block->empty()); m_state.finishBlock(block); } }
/* * Unit */ void print(std::ostream& os, const IRUnit& unit, const AsmInfo* asmInfo, const GuardConstraints* guards) { // For nice-looking dumps, we want to remember curMarker between blocks. BCMarker curMarker; static bool dotBodies = getenv("HHIR_DOT_BODIES"); auto blocks = rpoSortCfg(unit); // Partition into main, cold and frozen, without changing relative order. auto cold = std::stable_partition(blocks.begin(), blocks.end(), [&] (Block* b) { return b->hint() == Block::Hint::Neither || b->hint() == Block::Hint::Likely; } ); auto frozen = std::stable_partition(cold, blocks.end(), [&] (Block* b) { return b->hint() == Block::Hint::Unlikely; } ); if (dumpIREnabled(kExtraExtraLevel)) printOpcodeStats(os, blocks); // Print the block CFG above the actual code. auto const retreating_edges = findRetreatingEdges(unit); os << "digraph G {\n"; for (auto block : blocks) { if (block->empty()) continue; if (dotBodies && block->hint() != Block::Hint::Unlikely && block->hint() != Block::Hint::Unused) { // Include the IR in the body of the node std::ostringstream out; print(out, block, AreaIndex::Main, asmInfo, guards, &curMarker); auto bodyRaw = out.str(); std::string body; body.reserve(bodyRaw.size() * 1.25); for (auto c : bodyRaw) { if (c == '\n') body += "\\n"; else if (c == '"') body += "\\\""; else if (c == '\\') body += "\\\\"; else body += c; } os << folly::format("B{} [shape=\"box\" label=\"{}\"]\n", block->id(), body); } auto next = block->nextEdge(); auto taken = block->takenEdge(); if (!next && !taken) continue; auto edge_color = [&] (Edge* edge) { auto const target = edge->to(); return target->isCatch() ? " [color=blue]" : target->isExit() ? " [color=cyan]" : retreating_edges.count(edge) ? " [color=red]" : target->hint() == Block::Hint::Unlikely ? " [color=green]" : ""; }; auto show_edge = [&] (Edge* edge) { os << folly::format( "B{} -> B{}{}", block->id(), edge->to()->id(), edge_color(edge) ); }; if (next) { show_edge(next); if (taken) os << "; "; } if (taken) show_edge(taken); os << "\n"; } os << "}\n"; AreaIndex currentArea = AreaIndex::Main; curMarker = BCMarker(); for (auto it = blocks.begin(); it != blocks.end(); ++it) { if (it == cold) { os << folly::format("\n{:-^60}", "cold blocks"); currentArea = AreaIndex::Cold; } if (it == frozen) { os << folly::format("\n{:-^60}", "frozen blocks"); currentArea = AreaIndex::Frozen; } print(os, *it, currentArea, asmInfo, guards, &curMarker); } }
/* * For all guard instructions in unit, check to see if we can relax the * destination type to something less specific. The GuardConstraints map * contains information about what properties of the guarded type matter for * each instruction. If simple is true, guards will not be relaxed past * DataTypeSpecific except guards which are relaxed all the way to * DataTypeGeneric. Returns true iff any changes were made to the trace. */ bool relaxGuards(IRUnit& unit, const GuardConstraints& guards, bool simple) { Timer _t("optimize_relaxGuards"); splitCriticalEdges(unit); auto blocks = rpoSortCfg(unit); auto changed = false; for (auto* block : blocks) { for (auto& inst : *block) { if (!isGuardOp(inst.op())) continue; auto it = guards.find(&inst); auto constraint = it == guards.end() ? TypeConstraint() : it->second; FTRACE(2, "relaxGuards processing {} with constraint {}\n", inst, constraint); if (simple && constraint.category > DataTypeGeneric && constraint.category < DataTypeSpecific) { constraint.category = DataTypeSpecific; } auto const oldType = inst.typeParam(); auto newType = relaxType(oldType, constraint); // Sometimes we (legitimately) end up with a guard like this: // // t4:StkPtr = GuardStk<BoxedArr,0,<DataTypeGeneric, // inner:DataTypeSpecific, // Type::BoxedCell>> t2:StkPtr // // The outer category is DataTypeGeneric because we know from eval stack // flavors that the top of the stack here is always boxed. The inner // category is DataTypeSpecific, indicating we care what the inner type // is, even though it's just a hint. If we treated this like any other // guard, we would relax the typeParam to Type::Gen and insert an assert // to Type::BoxedCell right after it. Unfortunately, this loses the hint // that the inner type is Arr. Eventually we should have some side // channel for passing around hints for inner ref types, but for now the // best we can do is forcibly keep the guard around, preserving the inner // type hint. if (constraint.assertedType.isBoxed() && oldType < constraint.assertedType) { auto relaxedInner = relaxInner(oldType, constraint); if (relaxedInner < Type::BoxedCell && newType >= Type::BoxedCell) { FTRACE(1, "relaxGuards changing newType to {}\n", newType); newType = relaxedInner; } } if (constraint.assertedType < newType) { // If the asserted type is more specific than the new guarded type, set // the guard to the relaxed type but insert an assert operation between // the instruction and its dst. We go from something like this: // // t5:FramePtr = GuardLoc<Int, 4, <DataTypeGeneric,Int>> t4:FramePtr // // to this: // // t6:FramePtr = GuardLoc<Gen, 4> t4:FramePtr // t5:FramePtr = AssertLoc<Int, 4> t6:FramePtr auto* oldDst = inst.dst(); auto* newDst = unit.genDst(&inst); auto* newAssert = [&] { switch (inst.op()) { case GuardLoc: case CheckLoc: return unit.genWithDst(oldDst, guardToAssert(inst.op()), inst.marker(), *inst.extra<LocalId>(), constraint.assertedType, newDst); case GuardStk: case CheckStk: return unit.genWithDst(oldDst, guardToAssert(inst.op()), inst.marker(), *inst.extra<StackOffset>(), constraint.assertedType, newDst); case CheckType: return unit.genWithDst(oldDst, guardToAssert(inst.op()), inst.marker(), constraint.assertedType, newDst); default: always_assert(false); } }(); FTRACE(1, "relaxGuards inserting {} between {} and its dst, " "changing typeParam to {}\n", *newAssert, inst, newType); inst.setTypeParam(newType); // Now, insert the assert after the guard. For control flow guards, // this means inserting it on the next edge. if (inst.isControlFlow()) { auto* block = inst.next(); block->insert(block->skipHeader(), newAssert); } else { auto* block = inst.block(); auto it = block->iteratorTo(&inst); ++it; block->insert(it, newAssert); } changed = true; } else if (oldType != newType) { FTRACE(1, "relaxGuards changing {}'s type to {}\n", inst, newType); inst.setTypeParam(newType); changed = true; } } } if (!changed) return false; // Make a second pass to reflow types, with some special logic for loads. FrameState state(unit); for (auto* block : blocks) { state.startBlock(block); for (auto& inst : *block) { state.setMarker(inst.marker()); copyProp(&inst); visitLoad(&inst, state); if (!removeGuard(unit, &inst, state)) { retypeDests(&inst); state.update(&inst); } } state.finishBlock(block); } return true; }
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) { Timer _t(Timer::optimize); auto finishPass = [&](const char* msg) { if (msg) { printUnit(6, unit, folly::format("after {}", msg).str().c_str()); } assert(checkCfg(unit)); assert(checkTmpsSpanningCalls(unit)); if (debug) { forEachInst(rpoSortCfg(unit), [&](IRInstruction* inst) { assert(checkOperandTypes(inst, &unit)); }); } }; auto doPass = [&](void (*fn)(IRUnit&), const char* msg = nullptr) { fn(unit); finishPass(msg); }; auto dce = [&](const char* which) { if (!RuntimeOption::EvalHHIRDeadCodeElim) return; eliminateDeadCode(unit); finishPass(folly::format("{} DCE", which).str().c_str()); }; auto const doReoptimize = RuntimeOption::EvalHHIRExtraOptPass && (RuntimeOption::EvalHHIRCse || RuntimeOption::EvalHHIRSimplification); auto const hasLoop = RuntimeOption::EvalJitLoops && cfgHasLoop(unit); // TODO(#5792564): Guard relaxation doesn't work with loops. if (shouldHHIRRelaxGuards() && !hasLoop) { Timer _t(Timer::optimize_relaxGuards); const bool simple = kind == TransKind::Profile && (RuntimeOption::EvalJitRegionSelector == "tracelet" || RuntimeOption::EvalJitRegionSelector == "method"); RelaxGuardsFlags flags = (RelaxGuardsFlags) (RelaxReflow | (simple ? RelaxSimple : RelaxNormal)); auto changed = relaxGuards(unit, *irBuilder.guards(), flags); if (changed) finishPass("guard relaxation"); if (doReoptimize) { irBuilder.reoptimize(); finishPass("guard relaxation reoptimize"); } } if (RuntimeOption::EvalHHIRRefcountOpts) { optimizeRefcounts(unit, FrameStateMgr{unit.entry()->front().marker()}); finishPass("refcount opts"); } dce("initial"); if (RuntimeOption::EvalHHIRPredictionOpts) { doPass(optimizePredictions, "prediction opts"); } if (doReoptimize) { irBuilder.reoptimize(); finishPass("reoptimize"); dce("reoptimize"); } if (RuntimeOption::EvalHHIRGlobalValueNumbering) { doPass(gvn); dce("gvn"); } if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) { doPass(optimizeLoads); dce("loadelim"); } /* * Note: doing this pass this late might not be ideal, in particular because * we've already turned some StLoc instructions into StLocNT. * * But right now there are assumptions preventing us from doing it before * refcount opts. (Refcount opts needs to see all the StLocs explicitly * because it makes assumptions about whether references are consumed based * on that.) */ if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) { doPass(optimizeStores); dce("storeelim"); } if (RuntimeOption::EvalHHIRGenerateAsserts) { doPass(insertAsserts, "RefCnt asserts"); } }
bool checkTmpsSpanningCalls(const IRUnit& unit) { auto const blocks = rpoSortCfg(unit); auto const children = findDomChildren(unit, blocks); // CallBuiltin is ok because it is not a php-level call. (It will // call a C++ helper and we can push/pop around it normally.) auto isCall = [&] (Opcode op) { return op == Call || op == CallArray; }; typedef StateVector<SSATmp,bool> State; bool isValid = true; forPreorderDoms( blocks.front(), children, State(unit, false), [&] (Block* b, State& state) { for (auto& inst : *b) { for (auto& src : inst.srcs()) { /* * These SSATmp's are used only for stack analysis in the * simplifier and therefore may live across calls. In particular * these instructions are used to bridge the logical stack of the * caller when a callee is inlined so that analysis does not scan * into the callee stack when searching for a type of value in the * caller. */ if (inst.op() == ReDefSP && src->isA(Type::StkPtr)) continue; if (inst.op() == ReDefGeneratorSP && src->isA(Type::StkPtr)) { continue; } if (src->isA(Type::FramePtr)) continue; if (src->isConst()) continue; if (!state[src]) { auto msg = folly::format("checkTmpsSpanningCalls failed\n" " instruction: {}\n" " src: {}\n", inst.toString(), src->toString()).str(); std::cerr << msg; FTRACE(1, "{}", msg); isValid = false; } } /* * Php calls kill all live temporaries. We can't keep them * alive across the call because we currently have no * callee-saved registers in our abi, and all translations * share the same spill slots. */ if (isCall(inst.op())) state.reset(); for (auto& d : inst.dsts()) { state[d] = true; } } } ); return isValid; }
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) { Timer _t(Timer::optimize); auto const finishPass = [&] (const char* msg) { if (msg) { printUnit(6, unit, folly::format("after {}", msg).str().c_str()); } assertx(checkCfg(unit)); assertx(checkTmpsSpanningCalls(unit)); if (debug) { forEachInst(rpoSortCfg(unit), [&](IRInstruction* inst) { assertx(checkOperandTypes(inst, &unit)); }); } }; auto const doPass = [&] (void (*fn)(IRUnit&), const char* msg = nullptr) { fn(unit); finishPass(msg); }; auto const dce = [&] (const char* which) { if (!RuntimeOption::EvalHHIRDeadCodeElim) return; eliminateDeadCode(unit); finishPass(folly::format("{} DCE", which).str().c_str()); }; auto const simplifyPass = [] (IRUnit& unit) { boost::dynamic_bitset<> reachable(unit.numBlocks()); reachable.set(unit.entry()->id()); auto const blocks = rpoSortCfg(unit); for (auto block : blocks) { // Skip unreachable blocks, or simplify() cries. if (!reachable.test(block->id())) continue; for (auto& inst : *block) simplify(unit, &inst); if (auto const b = block->back().next()) reachable.set(b->id()); if (auto const b = block->back().taken()) reachable.set(b->id()); } }; auto const doSimplify = RuntimeOption::EvalHHIRExtraOptPass && RuntimeOption::EvalHHIRSimplification; auto const hasLoop = RuntimeOption::EvalJitLoops && cfgHasLoop(unit); auto const traceMode = kind != TransKind::Optimize || RuntimeOption::EvalJitPGORegionSelector == "hottrace"; // TODO (#5792564): Guard relaxation doesn't work with loops. // TODO (#6599498): Guard relaxation is broken in wholecfg mode. if (shouldHHIRRelaxGuards() && !hasLoop && traceMode) { Timer _t(Timer::optimize_relaxGuards); const bool simple = kind == TransKind::Profile && (RuntimeOption::EvalJitRegionSelector == "tracelet" || RuntimeOption::EvalJitRegionSelector == "method"); RelaxGuardsFlags flags = (RelaxGuardsFlags) (RelaxReflow | (simple ? RelaxSimple : RelaxNormal)); auto changed = relaxGuards(unit, *irBuilder.guards(), flags); if (changed) finishPass("guard relaxation"); if (doSimplify) { doPass(simplifyPass, "guard relaxation simplify"); } } // This is vestigial (it removes some instructions needed by the old refcount // opts pass), and will be removed soon. eliminateTakes(unit); dce("initial"); if (RuntimeOption::EvalHHIRPredictionOpts) { doPass(optimizePredictions, "prediction opts"); } if (doSimplify) { doPass(simplifyPass, "simplify"); dce("simplify"); } if (RuntimeOption::EvalHHIRGlobalValueNumbering) { doPass(gvn); dce("gvn"); } if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) { doPass(optimizeLoads); dce("loadelim"); } /* * Note: doing this pass this late might not be ideal, in particular because * we've already turned some StLoc instructions into StLocNT. * * But right now there are assumptions preventing us from doing it before * refcount opts. (Refcount opts needs to see all the StLocs explicitly * because it makes assumptions about whether references are consumed based * on that.) */ if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) { doPass(optimizeStores); dce("storeelim"); } if (kind != TransKind::Profile && RuntimeOption::EvalHHIRRefcountOpts) { doPass(optimizeRefcounts2); dce("refcount"); } if (RuntimeOption::EvalHHIRGenerateAsserts) { doPass(insertAsserts); } }
/* * For all guard instructions in trace, check to see if we can relax the * destination type to something less specific. The GuardConstraints map * contains information about what properties of the guarded type matter for * each instruction. If simple is true, guards will not be relaxed past * DataTypeSpecific except guards which are relaxed all the way to * DataTypeGeneric. Returns true iff any changes were made to the trace. */ bool relaxGuards(IRUnit& unit, const GuardConstraints& guards, bool simple) { auto blocks = rpoSortCfg(unit); auto changed = false; for (auto* block : blocks) { for (auto& inst : *block) { if (!isGuardOp(inst.op())) continue; auto it = guards.find(&inst); auto constraint = it == guards.end() ? TypeConstraint() : it->second; if (simple && constraint.category > DataTypeGeneric && constraint.category < DataTypeSpecific) { constraint.category = DataTypeSpecific; } // TODO(t2598894): Support relaxing inner types auto const oldType = inst.typeParam(); auto newType = relaxType(oldType, constraint.category); if (constraint.knownType <= newType) { // If the known type is at least as good as the relaxed type, we can // replace the guard with an assert. auto newOp = guardToAssert(inst.op()); auto newType = std::min(constraint.knownType, previousGuardType(&inst)); FTRACE(1, "relaxGuards changing {}'s type to {}, op to {}\n", inst, newType, newOp); assert(!hasEdges(newOp)); if (inst.hasEdges()) { block->push_back(unit.gen(Jmp, inst.marker(), inst.next())); } inst.setTypeParam(newType); inst.setOpcode(newOp); changed = true; } else if (!oldType.equals(newType)) { FTRACE(1, "relaxGuards changing {}'s type to {}\n", inst, newType); inst.setTypeParam(newType); changed = true; } } } if (!changed) return false; // Make a second pass to reflow types, with some special logic for loads. FrameState state(unit); for (auto* block : blocks) { state.startBlock(block); for (auto& inst : *block) { state.setMarker(inst.marker()); visitLoad(&inst, state); retypeDests(&inst); state.update(&inst); } state.finishBlock(block); } return true; }
/* * Intended to be called after all optimizations are finished on a * single-entry, single-exit tracelet, this collects the types of all stack * slots and locals at the end of the main exit. */ void IRUnit::collectPostConditions() { // This function is only correct when given a single-exit region, as in // TransKind::Profile. Furthermore, its output is only used to guide // formation of profile-driven regions. assert(mcg->tx().mode() == TransKind::Profile); assert(m_postConds.empty()); Timer _t(Timer::collectPostConditions); // We want the state for the last block on the "main trace". Figure // out which that is. Block* mainExit = nullptr; Block* lastMainBlock = nullptr; FrameStateMgr state{*this, entry()->front().marker()}; // TODO(#5678127): this code is wrong for HHIRBytecodeControlFlow state.setLegacyReoptimize(); ITRACE(2, "collectPostConditions starting\n"); Trace::Indent _i; for (auto* block : rpoSortCfg(*this)) { state.startBlock(block, block->front().marker()); for (auto& inst : *block) { state.update(&inst); } if (isMainBlock(block)) lastMainBlock = block; if (isMainExit(block)) { mainExit = block; break; } state.finishBlock(block); } // If we didn't find an obvious exit, then use the last block in the region. always_assert(lastMainBlock != nullptr); if (mainExit == nullptr) mainExit = lastMainBlock; FTRACE(1, "mainExit: B{}\n", mainExit->id()); // state currently holds the state at the end of mainExit auto const curFunc = state.func(); auto const sp = state.sp(); auto const spOffset = state.spOffset(); for (unsigned i = 0; i < spOffset; ++i) { auto t = getStackValue(sp, i).knownType; if (!t.equals(Type::StackElem)) { m_postConds.push_back({ RegionDesc::Location::Stack{i, spOffset - i}, t }); } } for (unsigned i = 0; i < curFunc->numLocals(); ++i) { auto t = state.localType(i); if (!t.equals(Type::Gen)) { FTRACE(1, "Local {}: {}\n", i, t.toString()); m_postConds.push_back({ RegionDesc::Location::Local{i}, t }); } } }
/* * For all guard instructions in unit, check to see if we can relax the * destination type to something less specific. The GuardConstraints map * contains information about what properties of the guarded type matter for * each instruction. If simple is true, guards will not be relaxed past * DataTypeSpecific except guards which are relaxed all the way to * DataTypeGeneric. Returns true iff any changes were made to the trace. */ bool relaxGuards(IRUnit& unit, const GuardConstraints& constraints, RelaxGuardsFlags flags) { Timer _t(Timer::optimize_relaxGuards); ITRACE(2, "entering relaxGuards\n"); Indent _i; bool simple = flags & RelaxSimple; bool reflow = flags & RelaxReflow; splitCriticalEdges(unit); auto& guards = constraints.guards; auto blocks = rpoSortCfg(unit); auto changed = false; for (auto* block : blocks) { for (auto& inst : *block) { if (!isGuardOp(inst.op())) continue; auto it = guards.find(&inst); auto constraint = it == guards.end() ? TypeConstraint() : it->second; ITRACE(2, "relaxGuards processing {} with constraint {}\n", inst, constraint); auto simplifyCategory = [simple](DataTypeCategory& cat) { if (simple && cat > DataTypeGeneric && cat < DataTypeSpecific) { cat = DataTypeSpecific; } }; simplifyCategory(constraint.category); simplifyCategory(constraint.innerCat); auto const oldType = inst.typeParam(); auto newType = relaxType(oldType, constraint); if (oldType != newType) { ITRACE(1, "relaxGuards changing {}'s type to {}\n", inst, newType); inst.setTypeParam(newType); changed = true; } } } if (!changed) return false; if (!reflow) return true; // Make a second pass to reflow types, with some special logic for loads. FrameState state{unit, unit.entry()->front().marker()}; for (auto* block : blocks) { ITRACE(2, "relaxGuards reflow entering B{}\n", block->id()); Indent _i; state.startBlock(block, block->front().marker()); for (auto& inst : *block) { state.setMarker(inst.marker()); copyProp(&inst); visitLoad(&inst, state); retypeDests(&inst, &unit); state.update(&inst); } state.finishBlock(block); } return true; }
/* * reoptimize() runs a trace through a second pass of TraceBuilder * optimizations, like this: * * reset state. * move all blocks to a temporary list. * compute immediate dominators. * for each block in trace order: * if we have a snapshot state for this block: * clear cse entries that don't dominate this block. * use snapshot state. * move all instructions to a temporary list. * for each instruction: * optimizeWork - do CSE and simplify again * if not simplified: * append existing instruction and update state. * else: * if the instruction has a result, insert a mov from the * simplified tmp to the original tmp and discard the instruction. * if the last conditional branch was turned into a jump, remove the * fall-through edge to the next block. */ void TraceBuilder::reoptimize() { FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n"); SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); }; assert(m_curTrace->isMain()); assert(m_savedTraces.empty()); m_state.setEnableCse(RuntimeOption::EvalHHIRCse); m_enableSimplification = RuntimeOption::EvalHHIRSimplification; if (!m_state.enableCse() && !m_enableSimplification) return; always_assert(!m_inReoptimize); m_inReoptimize = true; BlockList sortedBlocks = rpoSortCfg(m_unit); auto const idoms = findDominators(m_unit, sortedBlocks); m_state.clear(); auto& traceBlocks = m_curTrace->blocks(); BlockList blocks(traceBlocks.begin(), traceBlocks.end()); traceBlocks.clear(); for (auto* block : blocks) { assert(block->trace() == m_curTrace); FTRACE(5, "Block: {}\n", block->id()); assert(m_curTrace->isMain()); m_state.startBlock(block); m_curTrace->push_back(block); auto instructions = std::move(block->instrs()); assert(block->empty()); while (!instructions.empty()) { auto *inst = &instructions.front(); instructions.pop_front(); m_state.setMarker(inst->marker()); // merging state looks at the current marker, and optimizeWork // below may create new instructions. Use the marker from this // instruction. assert(inst->marker().valid()); setMarker(inst->marker()); auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs! if (!tmp) { // Could not optimize; keep the old instruction appendInstruction(inst, block); m_state.update(inst); continue; } SSATmp* dst = inst->dst(); if (dst->type() != Type::None && dst != tmp) { // The result of optimization has a different destination than the inst. // Generate a mov(tmp->dst) to get result into dst. If we get here then // assume the last instruction in the block isn't a guard. If it was, // we would have to insert the mov on the fall-through edge. assert(block->empty() || !block->back().isBlockEnd()); IRInstruction* mov = m_unit.mov(dst, tmp, inst->marker()); appendInstruction(mov, block); m_state.update(mov); } // Not re-adding inst; remove the inst->taken edge if (inst->taken()) inst->setTaken(nullptr); } if (block->empty()) { // If all the instructions in the block were optimized away, remove it // from the trace. auto it = traceBlocks.end(); --it; assert(*it == block); m_curTrace->unlink(it); } else { if (block->back().isTerminal()) { // Could have converted a conditional branch to Jmp; clear next. block->setNext(nullptr); } m_state.finishBlock(block); } } }
bool checkRegisters(const IRUnit& unit, const RegAllocInfo& regs) { assert(checkCfg(unit)); auto blocks = rpoSortCfg(unit); StateVector<Block, RegState> states(unit, RegState()); StateVector<Block, bool> reached(unit, false); for (auto* block : blocks) { RegState state = states[block]; for (IRInstruction& inst : *block) { if (inst.op() == Jmp) continue; // handled by Shuffle auto& inst_regs = regs[inst]; for (int i = 0, n = inst.numSrcs(); i < n; ++i) { auto const &rs = inst_regs.src(i); if (!rs.spilled()) { // hack - ignore rbx and rbp bool ignore_frame_regs; switch (arch()) { case Arch::X64: ignore_frame_regs = (rs.reg(0) == X64::rVmSp || rs.reg(0) == X64::rVmFp); break; case Arch::ARM: ignore_frame_regs = (rs.reg(0) == ARM::rVmSp || rs.reg(0) == ARM::rVmFp); break; } if (ignore_frame_regs) continue; } DEBUG_ONLY auto src = inst.src(i); assert(rs.numWords() == src->numWords() || (src->isConst() && rs.numWords() == 0)); DEBUG_ONLY auto allocated = rs.numAllocated(); if (allocated == 2) { if (rs.spilled()) { assert(rs.slot(0) != rs.slot(1)); } else { assert(rs.reg(0) != rs.reg(1)); } } for (unsigned i = 0, n = rs.numAllocated(); i < n; ++i) { assert(state.tmp(rs, i) == src); } } auto update = [&](SSATmp* tmp, const PhysLoc& loc) { for (unsigned i = 0, n = loc.numAllocated(); i < n; ++i) { state.tmp(loc, i) = tmp; } }; if (inst.op() == Shuffle) { checkShuffle(inst, regs); for (unsigned i = 0; i < inst.numSrcs(); ++i) { update(inst.src(i), inst.extra<Shuffle>()->dests[i]); } } else { for (unsigned i = 0; i < inst.numDsts(); ++i) { update(inst.dst(i), inst_regs.dst(i)); } } } // State contains the PhysLoc->SSATmp reverse mappings at block end; // propagate the state to succ auto updateEdge = [&](Block* succ) { if (!reached[succ]) { states[succ] = state; } else { states[succ].merge(state); } }; if (auto* next = block->next()) updateEdge(next); if (auto* taken = block->taken()) updateEdge(taken); } return true; }