/** * Called to clear out the tracked local values at a call site. * Calls kill all registers, so we don't want to keep locals in * registers across calls. We do continue tracking the types in * locals, however. */ void TraceBuilder::killLocalsForCall() { auto doKill = [&](smart::vector<LocalState>& locals) { for (auto& loc : locals) { SSATmp* t = loc.value; // should not kill DefConst, and LdConst should be replaced by DefConst if (!t || t->inst()->op() == DefConst) continue; if (t->inst()->op() == LdConst) { // make the new DefConst instruction IRInstruction* clone = t->inst()->clone(&m_irFactory); clone->setOpcode(DefConst); loc.value = clone->dst(); continue; } assert(!t->isConst()); loc.unsafe = true; } }; doKill(m_locals); m_callerAvailableValues.clear(); for (auto& state : m_inlineSavedStates) { doKill(state->locals); state->callerAvailableValues.clear(); } }
SSATmp* TraceBuilder::optimizeWork(IRInstruction* inst, const folly::Optional<IdomVector>& idoms) { // Since some of these optimizations inspect tracked state, we don't // perform any of them on non-main traces. if (m_savedTraces.size() > 0) return nullptr; static DEBUG_ONLY __thread int instNest = 0; if (debug) ++instNest; SCOPE_EXIT { if (debug) --instNest; }; DEBUG_ONLY auto indent = [&] { return std::string(instNest * 2, ' '); }; FTRACE(1, "{}{}\n", indent(), inst->toString()); // First pass of tracebuilder optimizations try to replace an // instruction based on tracked state before we do anything else. // May mutate the IRInstruction in place (and return nullptr) or // return an SSATmp*. if (SSATmp* preOpt = preOptimize(inst)) { FTRACE(1, " {}preOptimize returned: {}\n", indent(), preOpt->inst()->toString()); return preOpt; } if (inst->op() == Nop) return nullptr; // copy propagation on inst source operands copyProp(inst); SSATmp* result = nullptr; if (m_enableCse && inst->canCSE()) { result = cseLookup(inst, idoms); if (result) { // Found a dominating instruction that can be used instead of inst FTRACE(1, " {}cse found: {}\n", indent(), result->inst()->toString()); assert(!inst->consumesReferences()); if (inst->producesReference()) { // Replace with an IncRef FTRACE(1, " {}cse of refcount-producing instruction\n", indent()); return gen(IncRef, result); } else { return result; } } } if (m_enableSimplification) { result = m_simplifier.simplify(inst); if (result) { // Found a simpler instruction that can be used instead of inst FTRACE(1, " {}simplification returned: {}\n", indent(), result->inst()->toString()); assert(inst->hasDst()); return result; } } return nullptr; }
PhysReg forceAlloc(const SSATmp& tmp) { if (tmp.type() <= TBottom) return InvalidReg; auto inst = tmp.inst(); auto opc = inst->op(); auto const forceStkPtrs = [&] { switch (arch()) { case Arch::X64: return false; case Arch::ARM: return true; case Arch::PPC64: not_implemented(); break; } not_reached(); }(); if (forceStkPtrs && tmp.isA(TStkPtr)) { assert_flog( opc == DefSP || opc == Mov, "unexpected StkPtr dest from {}", opcodeName(opc) ); return rvmsp(); } // LdContActRec and LdAFWHActRec, loading a generator's AR, is the only time // we have a pointer to an AR that is not in rvmfp(). if (opc != LdContActRec && opc != LdAFWHActRec && tmp.isA(TFramePtr)) { return rvmfp(); } return InvalidReg; }
/** * Called to clear out the tracked local values at a call site. * Calls kill all registers, so we don't want to keep locals in * registers across calls. We do continue tracking the types in * locals, however. */ void TraceBuilder::killLocalsForCall() { for (auto& loc : m_locals) { SSATmp* t = loc.value; // should not kill DefConst, and LdConst should be replaced by DefConst if (!t || t->inst()->op() == DefConst) continue; if (t->inst()->op() == LdConst) { // make the new DefConst instruction IRInstruction* clone = t->inst()->clone(&m_irFactory); clone->setOpcode(DefConst); loc.value = clone->dst(); continue; } assert(!t->isConst()); loc.unsafe = true; } }
SSATmp* TraceBuilder::optimizeWork(IRInstruction* inst) { static DEBUG_ONLY __thread int instNest = 0; if (debug) ++instNest; SCOPE_EXIT { if (debug) --instNest; }; DEBUG_ONLY auto indent = [&] { return std::string(instNest * 2, ' '); }; FTRACE(1, "{}{}\n", indent(), inst->toString()); // First pass of tracebuilder optimizations try to replace an // instruction based on tracked state before we do anything else. // May mutate the IRInstruction in place (and return nullptr) or // return an SSATmp*. if (SSATmp* preOpt = preOptimize(inst)) { FTRACE(1, " {}preOptimize returned: {}\n", indent(), preOpt->inst()->toString()); return preOpt; } if (inst->op() == Nop) return nullptr; // copy propagation on inst source operands copyProp(inst); SSATmp* result = nullptr; if (m_enableCse && inst->canCSE()) { result = cseLookup(inst); if (result) { // Found a dominating instruction that can be used instead of inst FTRACE(1, " {}cse found: {}\n", indent(), result->inst()->toString()); return result; } } if (m_enableSimplification) { result = m_simplifier.simplify(inst); if (result) { // Found a simpler instruction that can be used instead of inst FTRACE(1, " {}simplification returned: {}\n", indent(), result->inst()->toString()); assert(inst->hasDst()); return result; } } return nullptr; }
/** * Called to clear out the tracked local values at a call site. * Calls kill all registers, so we don't want to keep locals in * registers across calls. We do continue tracking the types in * locals, however. */ void TraceBuilder::killLocals() { for (uint32_t i = 0; i < m_localValues.size(); i++) { SSATmp* t = m_localValues[i]; // should not kill DefConst, and LdConst should be replaced by DefConst if (!t || t->inst()->op() == DefConst) { continue; } if (t->inst()->op() == LdConst) { // make the new DefConst instruction IRInstruction* clone = t->inst()->clone(&m_irFactory); clone->setOpcode(DefConst); m_localValues[i] = clone->getDst(); continue; } assert(!t->isConst()); m_localValues[i] = nullptr; } }
PhysReg forceAlloc(const SSATmp& tmp) { auto inst = tmp.inst(); auto opc = inst->op(); // TODO(t5485866) Our manipulations to vmsp must be SSA to play nice with // LLVM. In the X64 backend, this causes enough extra reg-reg copies to // measurably impact performance, so keep forcing things into rVmSp for // now. We should be able to remove this completely once the necessary // improvements are made to vxls. auto const forceStkPtrs = arch() != Arch::X64 || !RuntimeOption::EvalJitLLVM; if (forceStkPtrs && tmp.isA(Type::StkPtr)) { assert_flog( opc == DefSP || opc == ReDefSP || opc == Call || opc == CallArray || opc == ContEnter || opc == SpillStack || opc == SpillFrame || opc == CufIterSpillFrame || opc == ExceptionBarrier || opc == RetAdjustStack || opc == InterpOne || opc == InterpOneCF || opc == Mov || opc == CheckStk || opc == GuardStk || opc == AssertStk || opc == CastStk || opc == CastStkIntToDbl || opc == CoerceStk || opc == DefLabel || opc == HintStkInner || MInstrEffects::supported(opc), "unexpected StkPtr dest from {}", opcodeName(opc) ); return mcg->backEnd().rVmSp(); } // LdContActRec and LdAFWHActRec, loading a generator's AR, is the only time // we have a pointer to an AR that is not in rVmFp. if (opc != LdContActRec && opc != LdAFWHActRec && tmp.isA(Type::FramePtr)) { return mcg->backEnd().rVmFp(); } if (opc == DefMIStateBase) { assert(tmp.isA(Type::PtrToGen)); return mcg->backEnd().rVmTl(); } return InvalidReg; }
void TraceBuilder::genDecRefStack(Type type, uint32_t stackOff) { bool spansCall = false; Type knownType = Type::None; SSATmp* tmp = getStackValue(m_spValue, stackOff, spansCall, knownType); if (!tmp || (spansCall && tmp->inst()->op() != DefConst)) { // We don't want to extend live ranges of tmps across calls, so we // don't get the value if spansCall is true; however, we can use // any type information known. if (knownType != Type::None) { type = Type::mostRefined(type, knownType); } gen(DecRefStack, type, m_spValue, cns(int64_t(stackOff))); } else { gen(DecRef, tmp); } }
PhysReg forceAlloc(const SSATmp& tmp) { auto inst = tmp.inst(); auto opc = inst->op(); if (tmp.isA(Type::StkPtr)) { assert(opc == DefSP || opc == ReDefSP || opc == Call || opc == CallArray || opc == ContEnter || opc == SpillStack || opc == SpillFrame || opc == CufIterSpillFrame || opc == ExceptionBarrier || opc == RetAdjustStack || opc == InterpOne || opc == InterpOneCF || opc == Mov || opc == CheckStk || opc == GuardStk || opc == AssertStk || opc == CastStk || opc == CastStkIntToDbl || opc == CoerceStk || opc == SideExitGuardStk || MInstrEffects::supported(opc)); return mcg->backEnd().rVmSp(); } // LdContActRec and LdAFWHActRec, loading a generator's AR, is the only time // we have a pointer to an AR that is not in rVmFp. if (opc != LdContActRec && opc != LdAFWHActRec && tmp.isA(Type::FramePtr)) { return mcg->backEnd().rVmFp(); } if (opc == DefMIStateBase) { assert(tmp.isA(Type::PtrToCell)); return mcg->backEnd().rSp(); } return InvalidReg; }
SSATmp* IRBuilder::optimizeWork(IRInstruction* inst, const folly::Optional<IdomVector>& idoms) { // Since some of these optimizations inspect tracked state, we don't // perform any of them on non-main traces. if (m_savedBlocks.size() > 0) return nullptr; static DEBUG_ONLY __thread int instNest = 0; if (debug) ++instNest; SCOPE_EXIT { if (debug) --instNest; }; DEBUG_ONLY auto indent = [&] { return std::string(instNest * 2, ' '); }; FTRACE(1, "optimizing {}{}\n", indent(), inst->toString()); // First pass of IRBuilder optimizations try to replace an // instruction based on tracked state before we do anything else. // May mutate the IRInstruction in place (and return nullptr) or // return an SSATmp*. if (SSATmp* preOpt = preOptimize(inst)) { FTRACE(1, " {}preOptimize returned: {}\n", indent(), preOpt->inst()->toString()); return preOpt; } if (inst->op() == Nop) return nullptr; // copy propagation on inst source operands copyProp(inst); SSATmp* result = nullptr; if (m_enableSimplification) { result = m_simplifier.simplify(inst); if (result) { inst = result->inst(); if (inst->producesReference(0)) { // This effectively prevents CSE from kicking in below, which // would replace the instruction with an IncRef. That is // correct if the simplifier morphed the instruction, but it's // incorrect if the simplifier returned one of original // instruction sources. We currently have no way to // distinguish the two cases, so we prevent CSE completely for // now. return result; } } } if (m_state.enableCse() && inst->canCSE()) { SSATmp* cseResult = m_state.cseLookup(inst, idoms); if (cseResult) { // Found a dominating instruction that can be used instead of inst FTRACE(1, " {}cse found: {}\n", indent(), cseResult->inst()->toString()); assert(!inst->consumesReferences()); if (inst->producesReference(0)) { // Replace with an IncRef FTRACE(1, " {}cse of refcount-producing instruction\n", indent()); gen(IncRef, cseResult); } return cseResult; } } return result; }
SSATmp* TraceBuilder::optimizeWork(IRInstruction* inst, const folly::Optional<IdomVector>& idoms) { // Since some of these optimizations inspect tracked state, we don't // perform any of them on non-main traces. if (m_savedTraces.size() > 0) return nullptr; static DEBUG_ONLY __thread int instNest = 0; if (debug) ++instNest; SCOPE_EXIT { if (debug) --instNest; }; DEBUG_ONLY auto indent = [&] { return std::string(instNest * 2, ' '); }; FTRACE(1, "{}{}\n", indent(), inst->toString()); // turn off ActRec optimization for instructions that will require a frame if (m_state.needsFPAnchor(inst)) { m_state.setHasFPAnchor(); always_assert(m_state.fp() != nullptr); gen(InlineFPAnchor, m_state.fp()); FTRACE(2, "Anchor for: {}\n", inst->toString()); } // First pass of tracebuilder optimizations try to replace an // instruction based on tracked state before we do anything else. // May mutate the IRInstruction in place (and return nullptr) or // return an SSATmp*. if (SSATmp* preOpt = preOptimize(inst)) { FTRACE(1, " {}preOptimize returned: {}\n", indent(), preOpt->inst()->toString()); return preOpt; } if (inst->op() == Nop) return nullptr; // copy propagation on inst source operands copyProp(inst); SSATmp* result = nullptr; if (m_state.enableCse() && inst->canCSE()) { result = m_state.cseLookup(inst, idoms); if (result) { // Found a dominating instruction that can be used instead of inst FTRACE(1, " {}cse found: {}\n", indent(), result->inst()->toString()); // CheckType and AssertType are special. They're marked as both PRc and // CRc to placate our refcounting optimizations, for for the purposes of // CSE they're neither. if (inst->is(CheckType, AssertType)) { return result; } assert(!inst->consumesReferences()); if (inst->producesReference()) { // Replace with an IncRef FTRACE(1, " {}cse of refcount-producing instruction\n", indent()); return gen(IncRef, result); } else { return result; } } } if (m_enableSimplification) { result = m_simplifier.simplify(inst); if (result) { // Found a simpler instruction that can be used instead of inst FTRACE(1, " {}simplification returned: {}\n", indent(), result->inst()->toString()); assert(inst->hasDst()); return result; } } return nullptr; }
SSATmp* LinearScan::getSpilledTmp(SSATmp* tmp) { assert(tmp->inst()->op() == Reload); SSATmp* slot = tmp->inst()->src(0); assert(slot->inst()->op() == Spill); return slot->inst()->src(0); }
/* * Performs simplification and CSE on the input instruction. If the input * instruction has a dest, this will return an SSATmp that represents the same * value as dst(0) of the input instruction. If the input instruction has no * dest, this will return nullptr. * * The caller never needs to clone or append; all this has been done. */ SSATmp* IRBuilder::optimizeInst(IRInstruction* inst, CloneFlag doClone, Block* srcBlock, const folly::Optional<IdomVector>& idoms) { static DEBUG_ONLY __thread int instNest = 0; if (debug) ++instNest; SCOPE_EXIT { if (debug) --instNest; }; DEBUG_ONLY auto indent = [&] { return std::string(instNest * 2, ' '); }; auto doCse = [&] (IRInstruction* cseInput) -> SSATmp* { if (m_state.enableCse() && cseInput->canCSE()) { SSATmp* cseResult = m_state.cseLookup(cseInput, srcBlock, idoms); if (cseResult) { // Found a dominating instruction that can be used instead of input FTRACE(1, " {}cse found: {}\n", indent(), cseResult->inst()->toString()); assert(!cseInput->consumesReferences()); if (cseInput->producesReference(0)) { // Replace with an IncRef FTRACE(1, " {}cse of refcount-producing instruction\n", indent()); gen(IncRef, cseResult); } return cseResult; } } return nullptr; }; auto cloneAndAppendOriginal = [&] () -> SSATmp* { if (inst->op() == Nop) return nullptr; if (auto cseResult = doCse(inst)) { return cseResult; } if (doClone == CloneFlag::Yes) { inst = m_unit.cloneInstruction(inst); } appendInstruction(inst); return inst->dst(0); }; // Since some of these optimizations inspect tracked state, we don't // perform any of them on non-main traces. if (m_savedBlocks.size() > 0) return cloneAndAppendOriginal(); // copy propagation on inst source operands copyProp(inst); // First pass of IRBuilder optimizations try to replace an // instruction based on tracked state before we do anything else. // May mutate the IRInstruction in place (and return nullptr) or // return an SSATmp*. if (SSATmp* preOpt = preOptimize(inst)) { FTRACE(1, " {}preOptimize returned: {}\n", indent(), preOpt->inst()->toString()); return preOpt; } if (inst->op() == Nop) return cloneAndAppendOriginal(); if (!m_enableSimplification) { return cloneAndAppendOriginal(); } auto simpResult = m_simplifier.simplify(inst, shouldConstrainGuards()); // These are the possible outputs: // // ([], nullptr): no optimization possible. Use original inst. // // ([], non-nullptr): passing through a src. Don't CSE. // // ([X, ...], Y): throw away input instruction, append 'X, ...' (CSEing // as we go), return Y. if (!simpResult.instrs.empty()) { // New instructions were generated. Append the new ones, filtering out Nops. for (auto* newInst : simpResult.instrs) { assert(!newInst->isTransient()); if (newInst->op() == Nop) continue; auto cseResult = doCse(newInst); if (cseResult) { appendInstruction(m_unit.mov(newInst->dst(), cseResult, newInst->marker())); } else { appendInstruction(newInst); } } return simpResult.dst; } // No new instructions were generated. Either simplification didn't do // anything, or we're using some other instruction's dst instead of our own. if (simpResult.dst) { // We're using some other instruction's output. Don't append anything, and // don't do any CSE. assert(simpResult.dst->inst() != inst); return simpResult.dst; } // No simplification happened. return cloneAndAppendOriginal(); }
void insertIncRefs(PrcEnv& env) { auto antQ = dataflow_worklist<uint32_t, std::less<uint32_t>>(env.rpoBlocks.size()); auto avlQ = dataflow_worklist<uint32_t, std::greater<uint32_t>>(env.rpoBlocks.size()); env.states.resize(env.unit.numBlocks()); for (uint32_t i = 0; i < env.rpoBlocks.size(); i++) { auto blk = env.rpoBlocks[i]; auto& state = env.states[blk->id()]; state.rpoId = i; if (blk->numSuccs()) state.antOut.set(); if (blk->numPreds()) state.avlIn.set(); antQ.push(i); avlQ.push(i); } auto id = 0; for (auto& v : env.insertMap) { for (auto const tmp : v) { auto const blk = tmp->inst()->block(); auto& state = env.states[blk->id()]; if (!state.local.test(id)) { state.local.set(id); continue; } } id++; } using Bits = PrcState::Bits; // compute anticipated do { auto const blk = env.rpoBlocks[antQ.pop()]; auto& state = env.states[blk->id()]; state.antIn = state.antOut | state.local; state.pantIn = state.pantOut | state.local; blk->forEachPred( [&] (Block* b) { auto& s = env.states[b->id()]; auto const antOut = s.antOut & state.antIn; auto const pantOut = s.pantOut | state.pantIn; if (antOut != s.antOut || pantOut != s.pantOut) { s.antOut = antOut; s.pantOut = pantOut; antQ.push(s.rpoId); } } ); } while (!antQ.empty()); // compute available do { auto const blk = env.rpoBlocks[avlQ.pop()]; auto& state = env.states[blk->id()]; state.avlOut = state.avlIn | state.local; blk->forEachSucc( [&] (Block* b) { auto& s = env.states[b->id()]; auto const avlIn = s.avlIn & state.avlOut; if (avlIn != s.avlIn) { s.avlIn = avlIn; avlQ.push(s.rpoId); } }); } while (!avlQ.empty()); for (auto blk : env.rpoBlocks) { auto& state = env.states[blk->id()]; FTRACE(4, "InsertIncDecs: Blk(B{}) <- {}\n" "{}" " ->{}\n", blk->id(), [&] { std::string ret; blk->forEachPred([&] (Block* pred) { folly::format(&ret, " B{}", pred->id()); }); return ret; }(), show(state), [&] { std::string ret; blk->forEachSucc([&] (Block* succ) { folly::format(&ret, " B{}", succ->id()); }); return ret; }()); auto inc = state.local; for (auto inc_id = 0; inc.any(); inc >>= 1, inc_id++) { if (inc.test(0)) { auto const& tmps = env.insertMap[inc_id]; auto insert = [&] (IRInstruction* inst) { FTRACE(3, "Inserting IncRef into B{}\n", blk->id()); auto const iter = std::next(blk->iteratorTo(inst)); blk->insert(iter, env.unit.gen(IncRef, inst->bcctx(), tmps[0])); }; SSATmp* last = nullptr; // Insert an IncRef after every candidate in this block except // the last one (since we know for all but the last that its // successor is anticipated). Note that entries in tmps from // the same block are guaranteed to be in program order. for (auto const tmp : tmps) { if (tmp->inst()->block() != blk) continue; if (last) insert(last->inst()); last = tmp; } // If it's partially anticipated out, insert an inc after the // last one too. always_assert(last); if (state.pantOut.test(inc_id)) insert(last->inst()); } } auto dec = state.avlIn & ~state.pantIn; if (dec.any()) { blk->forEachPred( [&] (Block* pred) { auto& pstate = env.states[pred->id()]; dec &= pstate.pantOut; }); for (auto dec_id = 0; dec.any(); dec >>= 1, dec_id++) { if (dec.test(0)) { FTRACE(3, "Inserting DecRef into B{}\n", blk->id()); auto const tmp = env.insertMap[dec_id][0]; blk->prepend(env.unit.gen(DecRef, tmp->inst()->bcctx(), DecRefData{}, tmp)); } } } }