void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) { auto finishPass = [&](const char* msg) { dumpTrace(6, unit, folly::format("after {}", msg).str().c_str()); assert(checkCfg(unit)); assert(checkTmpsSpanningCalls(unit)); if (debug) { forEachInst(rpoSortCfg(unit), assertOperandTypes); } }; auto doPass = [&](void (*fn)(IRUnit&), const char* msg) { fn(unit); finishPass(msg); }; auto dce = [&](const char* which) { if (!RuntimeOption::EvalHHIRDeadCodeElim) return; eliminateDeadCode(unit); finishPass(folly::format("{} DCE", which).str().c_str()); }; if (RuntimeOption::EvalHHIRRelaxGuards) { auto const simpleRelax = kind == TransProfile; auto changed = relaxGuards(unit, *irBuilder.guards(), simpleRelax); if (changed) finishPass("guard relaxation"); } if (RuntimeOption::EvalHHIRRefcountOpts) { optimizeRefcounts(unit); finishPass("refcount opts"); } dce("initial"); if (RuntimeOption::EvalHHIRPredictionOpts) { doPass(optimizePredictions, "prediction opts"); } if (RuntimeOption::EvalHHIRExtraOptPass && (RuntimeOption::EvalHHIRCse || RuntimeOption::EvalHHIRSimplification)) { irBuilder.reoptimize(); finishPass("reoptimize"); // Cleanup any dead code left around by CSE/Simplification // Ideally, this would be controlled by a flag returned // by optimzeTrace indicating whether DCE is necessary dce("reoptimize"); } if (RuntimeOption::EvalHHIRJumpOpts) { doPass(optimizeJumps, "jumpopts"); dce("jump opts"); } if (RuntimeOption::EvalHHIRGenerateAsserts) { doPass(insertAsserts, "RefCnt asserts"); } }
void optimizeTrace(Trace* trace, IRFactory* irFactory) { if (RuntimeOption::EvalHHIRMemOpt) { optimizeMemoryAccesses(trace, irFactory); } if (RuntimeOption::EvalDumpIR > 5) { std::cout << "----- HHIR before DCE -----\n"; trace->print(std::cout, false); std::cout << "---------------------------\n"; } eliminateDeadCode(trace, irFactory); }
void optimizeTrace(IRTrace* trace, TraceBuilder& traceBuilder) { auto& irFactory = traceBuilder.factory(); auto finishPass = [&](const char* msg) { dumpTrace(6, trace, folly::format("after {}", msg).str().c_str()); assert(checkCfg(trace, irFactory)); assert(checkTmpsSpanningCalls(trace, irFactory)); if (debug) forEachTraceInst(trace, assertOperandTypes); }; auto doPass = [&](void (*fn)(IRTrace*, IRFactory&), const char* msg) { fn(trace, irFactory); finishPass(msg); }; auto dce = [&](const char* which) { if (!RuntimeOption::EvalHHIRDeadCodeElim) return; eliminateDeadCode(trace, irFactory); finishPass(folly::format("{} DCE", which).str().c_str()); }; if (RuntimeOption::EvalHHIRRelaxGuards) { auto changed = relaxGuards(trace, irFactory, *traceBuilder.guards()); if (changed) finishPass("guard relaxation"); } dce("initial"); if (RuntimeOption::EvalHHIRPredictionOpts) { doPass(optimizePredictions, "prediction opts"); } if (RuntimeOption::EvalHHIRExtraOptPass && (RuntimeOption::EvalHHIRCse || RuntimeOption::EvalHHIRSimplification)) { traceBuilder.reoptimize(); finishPass("reoptimize"); // Cleanup any dead code left around by CSE/Simplification // Ideally, this would be controlled by a flag returned // by optimzeTrace indicating whether DCE is necessary dce("reoptimize"); } if (RuntimeOption::EvalHHIRJumpOpts) { doPass(optimizeJumps, "jumpopts"); dce("jump opts"); } if (RuntimeOption::EvalHHIRGenerateAsserts) { doPass(insertAsserts, "RefCnt asserts"); } }
void optimizeTrace(Trace* trace, IRFactory* irFactory) { if (RuntimeOption::EvalHHIRMemOpt) { optimizeMemoryAccesses(trace, irFactory); if (RuntimeOption::EvalDumpIR > 5) { std::cout << "----- HHIR after MemElim -----\n"; trace->print(std::cout, false); std::cout << "---------------------------\n"; } } if (RuntimeOption::EvalHHIRDeadCodeElim) { eliminateDeadCode(trace, irFactory); optimizeJumps(trace, irFactory); if (RuntimeOption::EvalDumpIR > 5) { std::cout << "----- HHIR after DCE -----\n"; trace->print(std::cout, false); std::cout << "---------------------------\n"; } } if (RuntimeOption::EvalHHIRGenerateAsserts) { insertRefCountAsserts(trace, irFactory); } }
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) { Timer _t(Timer::optimize); auto const finishPass = [&] (const char* msg) { if (msg) { printUnit(6, unit, folly::format("after {}", msg).str().c_str()); } assertx(checkCfg(unit)); assertx(checkTmpsSpanningCalls(unit)); if (debug) { forEachInst(rpoSortCfg(unit), [&](IRInstruction* inst) { assertx(checkOperandTypes(inst, &unit)); }); } }; auto const doPass = [&] (void (*fn)(IRUnit&), const char* msg = nullptr) { fn(unit); finishPass(msg); }; auto const dce = [&] (const char* which) { if (!RuntimeOption::EvalHHIRDeadCodeElim) return; eliminateDeadCode(unit); finishPass(folly::format("{} DCE", which).str().c_str()); }; auto const simplifyPass = [] (IRUnit& unit) { boost::dynamic_bitset<> reachable(unit.numBlocks()); reachable.set(unit.entry()->id()); auto const blocks = rpoSortCfg(unit); for (auto block : blocks) { // Skip unreachable blocks, or simplify() cries. if (!reachable.test(block->id())) continue; for (auto& inst : *block) simplify(unit, &inst); if (auto const b = block->back().next()) reachable.set(b->id()); if (auto const b = block->back().taken()) reachable.set(b->id()); } }; auto const doSimplify = RuntimeOption::EvalHHIRExtraOptPass && RuntimeOption::EvalHHIRSimplification; auto const hasLoop = RuntimeOption::EvalJitLoops && cfgHasLoop(unit); auto const traceMode = kind != TransKind::Optimize || RuntimeOption::EvalJitPGORegionSelector == "hottrace"; // TODO (#5792564): Guard relaxation doesn't work with loops. // TODO (#6599498): Guard relaxation is broken in wholecfg mode. if (shouldHHIRRelaxGuards() && !hasLoop && traceMode) { Timer _t(Timer::optimize_relaxGuards); const bool simple = kind == TransKind::Profile && (RuntimeOption::EvalJitRegionSelector == "tracelet" || RuntimeOption::EvalJitRegionSelector == "method"); RelaxGuardsFlags flags = (RelaxGuardsFlags) (RelaxReflow | (simple ? RelaxSimple : RelaxNormal)); auto changed = relaxGuards(unit, *irBuilder.guards(), flags); if (changed) finishPass("guard relaxation"); if (doSimplify) { doPass(simplifyPass, "guard relaxation simplify"); } } // This is vestigial (it removes some instructions needed by the old refcount // opts pass), and will be removed soon. eliminateTakes(unit); dce("initial"); if (RuntimeOption::EvalHHIRPredictionOpts) { doPass(optimizePredictions, "prediction opts"); } if (doSimplify) { doPass(simplifyPass, "simplify"); dce("simplify"); } if (RuntimeOption::EvalHHIRGlobalValueNumbering) { doPass(gvn); dce("gvn"); } if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) { doPass(optimizeLoads); dce("loadelim"); } /* * Note: doing this pass this late might not be ideal, in particular because * we've already turned some StLoc instructions into StLocNT. * * But right now there are assumptions preventing us from doing it before * refcount opts. (Refcount opts needs to see all the StLocs explicitly * because it makes assumptions about whether references are consumed based * on that.) */ if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) { doPass(optimizeStores); dce("storeelim"); } if (kind != TransKind::Profile && RuntimeOption::EvalHHIRRefcountOpts) { doPass(optimizeRefcounts2); dce("refcount"); } if (RuntimeOption::EvalHHIRGenerateAsserts) { doPass(insertAsserts); } }
void generate(Code& code, CCallHelpers& jit) { TimingScope timingScope("Air::generate"); // We don't expect the incoming code to have predecessors computed. code.resetReachability(); if (shouldValidateIR()) validate(code); // If we're doing super verbose dumping, the phase scope of any phase will already do a dump. if (shouldDumpIR() && !shouldDumpIRAtEachPhase()) { dataLog("Initial air:\n"); dataLog(code); } // This is where we run our optimizations and transformations. // FIXME: Add Air optimizations. // https://bugs.webkit.org/show_bug.cgi?id=150456 eliminateDeadCode(code); // This is where we would have a real register allocator. Then, we could use spillEverything() // in place of the register allocator only for testing. // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150457 spillEverything(code); // Prior to this point the prologue and epilogue is implicit. This makes it explicit. It also // does things like identify which callee-saves we're using and saves them. handleCalleeSaves(code); // This turns all Stack and CallArg Args into Addr args that use the frame pointer. It does // this by first-fit allocating stack slots. It should be pretty darn close to optimal, so we // shouldn't have to worry about this very much. allocateStack(code); // If we coalesced moves then we can unbreak critical edges. This is the main reason for this // phase. simplifyCFG(code); // FIXME: We should really have a code layout optimization here. // https://bugs.webkit.org/show_bug.cgi?id=150478 reportUsedRegisters(code); if (shouldValidateIR()) validate(code); // Do a final dump of Air. Note that we have to do this even if we are doing per-phase dumping, // since the final generation is not a phase. if (shouldDumpIR()) { dataLog("Air after ", code.lastPhaseName(), ", before generation:\n"); dataLog(code); } TimingScope codeGenTimingScope("Air::generate backend"); // And now, we generate code. jit.emitFunctionPrologue(); jit.addPtr(CCallHelpers::TrustedImm32(-code.frameSize()), MacroAssembler::stackPointerRegister); GenerationContext context; context.code = &code; IndexMap<BasicBlock, CCallHelpers::Label> blockLabels(code.size()); IndexMap<BasicBlock, CCallHelpers::JumpList> blockJumps(code.size()); auto link = [&] (CCallHelpers::Jump jump, BasicBlock* target) { if (blockLabels[target].isSet()) { jump.linkTo(blockLabels[target], &jit); return; } blockJumps[target].append(jump); }; for (BasicBlock* block : code) { blockJumps[block].link(&jit); ASSERT(block->size() >= 1); for (unsigned i = 0; i < block->size() - 1; ++i) { CCallHelpers::Jump jump = block->at(i).generate(jit, context); ASSERT_UNUSED(jump, !jump.isSet()); } if (block->last().opcode == Jump && block->successorBlock(0) == code.findNextBlock(block)) continue; if (block->last().opcode == Ret) { // We currently don't represent the full prologue/epilogue in Air, so we need to // have this override. jit.emitFunctionEpilogue(); jit.ret(); continue; } CCallHelpers::Jump jump = block->last().generate(jit, context); for (Inst& inst : *block) jump = inst.generate(jit, context); switch (block->numSuccessors()) { case 0: ASSERT(!jump.isSet()); break; case 1: link(jump, block->successorBlock(0)); break; case 2: link(jump, block->successorBlock(0)); if (block->successorBlock(1) != code.findNextBlock(block)) link(jit.jump(), block->successorBlock(1)); break; default: RELEASE_ASSERT_NOT_REACHED(); break; } } for (auto& latePath : context.latePaths) latePath->run(jit, context); }
void prepareForGeneration(Code& code) { TimingScope timingScope("Air::prepareForGeneration"); // We don't expect the incoming code to have predecessors computed. code.resetReachability(); if (shouldValidateIR()) validate(code); // If we're doing super verbose dumping, the phase scope of any phase will already do a dump. if (shouldDumpIR() && !shouldDumpIRAtEachPhase()) { dataLog("Initial air:\n"); dataLog(code); } // This is where we run our optimizations and transformations. // FIXME: Add Air optimizations. // https://bugs.webkit.org/show_bug.cgi?id=150456 eliminateDeadCode(code); // Register allocation for all the Tmps that do not have a corresponding machine register. // After this phase, every Tmp has a reg. // // For debugging, you can use spillEverything() to put everything to the stack between each Inst. if (Options::airSpillsEverything()) spillEverything(code); else iteratedRegisterCoalescing(code); // Prior to this point the prologue and epilogue is implicit. This makes it explicit. It also // does things like identify which callee-saves we're using and saves them. handleCalleeSaves(code); // This turns all Stack and CallArg Args into Addr args that use the frame pointer. It does // this by first-fit allocating stack slots. It should be pretty darn close to optimal, so we // shouldn't have to worry about this very much. allocateStack(code); // If we coalesced moves then we can unbreak critical edges. This is the main reason for this // phase. simplifyCFG(code); // This sorts the basic blocks in Code to achieve an ordering that maximizes the likelihood that a high // frequency successor is also the fall-through target. optimizeBlockOrder(code); // Attempt to remove false dependencies between instructions created by partial register changes. // This must be executed as late as possible as it depends on the instructions order and register use. fixPartialRegisterStalls(code); // This is needed to satisfy a requirement of B3::StackmapValue. reportUsedRegisters(code); if (shouldValidateIR()) validate(code); // Do a final dump of Air. Note that we have to do this even if we are doing per-phase dumping, // since the final generation is not a phase. if (shouldDumpIR()) { dataLog("Air after ", code.lastPhaseName(), ", before generation:\n"); dataLog(code); } }
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) { Timer _t(Timer::optimize); auto finishPass = [&](const char* msg) { if (msg) { printUnit(6, unit, folly::format("after {}", msg).str().c_str()); } assert(checkCfg(unit)); assert(checkTmpsSpanningCalls(unit)); if (debug) { forEachInst(rpoSortCfg(unit), [&](IRInstruction* inst) { assert(checkOperandTypes(inst, &unit)); }); } }; auto doPass = [&](void (*fn)(IRUnit&), const char* msg = nullptr) { fn(unit); finishPass(msg); }; auto dce = [&](const char* which) { if (!RuntimeOption::EvalHHIRDeadCodeElim) return; eliminateDeadCode(unit); finishPass(folly::format("{} DCE", which).str().c_str()); }; auto const doReoptimize = RuntimeOption::EvalHHIRExtraOptPass && (RuntimeOption::EvalHHIRCse || RuntimeOption::EvalHHIRSimplification); auto const hasLoop = RuntimeOption::EvalJitLoops && cfgHasLoop(unit); // TODO(#5792564): Guard relaxation doesn't work with loops. if (shouldHHIRRelaxGuards() && !hasLoop) { Timer _t(Timer::optimize_relaxGuards); const bool simple = kind == TransKind::Profile && (RuntimeOption::EvalJitRegionSelector == "tracelet" || RuntimeOption::EvalJitRegionSelector == "method"); RelaxGuardsFlags flags = (RelaxGuardsFlags) (RelaxReflow | (simple ? RelaxSimple : RelaxNormal)); auto changed = relaxGuards(unit, *irBuilder.guards(), flags); if (changed) finishPass("guard relaxation"); if (doReoptimize) { irBuilder.reoptimize(); finishPass("guard relaxation reoptimize"); } } if (RuntimeOption::EvalHHIRRefcountOpts) { optimizeRefcounts(unit, FrameStateMgr{unit.entry()->front().marker()}); finishPass("refcount opts"); } dce("initial"); if (RuntimeOption::EvalHHIRPredictionOpts) { doPass(optimizePredictions, "prediction opts"); } if (doReoptimize) { irBuilder.reoptimize(); finishPass("reoptimize"); dce("reoptimize"); } if (RuntimeOption::EvalHHIRGlobalValueNumbering) { doPass(gvn); dce("gvn"); } if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) { doPass(optimizeLoads); dce("loadelim"); } /* * Note: doing this pass this late might not be ideal, in particular because * we've already turned some StLoc instructions into StLocNT. * * But right now there are assumptions preventing us from doing it before * refcount opts. (Refcount opts needs to see all the StLocs explicitly * because it makes assumptions about whether references are consumed based * on that.) */ if (kind != TransKind::Profile && RuntimeOption::EvalHHIRMemoryOpts) { doPass(optimizeStores); dce("storeelim"); } if (RuntimeOption::EvalHHIRGenerateAsserts) { doPass(insertAsserts, "RefCnt asserts"); } }
void optimizeTrace(Trace* trace, TraceBuilder* traceBuilder) { IRFactory* irFactory = traceBuilder->getIrFactory(); if (RuntimeOption::EvalHHIRMemOpt) { optimizeMemoryAccesses(trace, irFactory); if (RuntimeOption::EvalDumpIR > 5) { std::cout << "----- HHIR after MemElim -----\n"; trace->print(std::cout); std::cout << "---------------------------\n"; } assert(JIT::checkCfg(trace, *irFactory)); } if (RuntimeOption::EvalHHIRDeadCodeElim) { eliminateDeadCode(trace, irFactory); if (RuntimeOption::EvalDumpIR > 5) { std::cout << "----- HHIR after DCE -----\n"; trace->print(std::cout); std::cout << "---------------------------\n"; } assert(JIT::checkCfg(trace, *irFactory)); } if (RuntimeOption::EvalHHIRExtraOptPass && (RuntimeOption::EvalHHIRCse || RuntimeOption::EvalHHIRSimplification)) { traceBuilder->optimizeTrace(); if (RuntimeOption::EvalDumpIR > 5) { std::cout << "----- HHIR after CSE/Simplification -----\n"; trace->print(std::cout); std::cout << "---------------------------\n"; } assert(JIT::checkCfg(trace, *irFactory)); // Cleanup any dead code left around by CSE/Simplification // Ideally, this would be controlled by a flag returned // by optimzeTrace indicating whether DCE is necessary if (RuntimeOption::EvalHHIRDeadCodeElim) { eliminateDeadCode(trace, irFactory); if (RuntimeOption::EvalDumpIR > 5) { std::cout << "----- HHIR after DCE -----\n"; trace->print(std::cout); std::cout << "---------------------------\n"; } assert(JIT::checkCfg(trace, *irFactory)); } } if (RuntimeOption::EvalHHIRJumpOpts) { optimizeJumps(trace, irFactory); if (RuntimeOption::EvalDumpIR > 5) { std::cout << "----- HHIR after jump opts -----\n"; trace->print(std::cout); std::cout << "---------------------------\n"; } assert(JIT::checkCfg(trace, *irFactory)); } if (RuntimeOption::EvalHHIRGenerateAsserts) { insertAsserts(trace, irFactory); if (RuntimeOption::EvalDumpIR > 5) { std::cout << "----- HHIR after inserting RefCnt asserts -----\n"; trace->print(std::cout); std::cout << "---------------------------\n"; } assert(JIT::checkCfg(trace, *irFactory)); } }
void optimize(IRUnit& unit, IRBuilder& irBuilder, TransKind kind) { Timer _t(Timer::optimize); auto finishPass = [&](const char* msg) { dumpTrace(6, unit, folly::format("after {}", msg).str().c_str()); assert(checkCfg(unit)); assert(checkTmpsSpanningCalls(unit)); if (debug) { forEachInst(rpoSortCfg(unit), assertOperandTypes); } }; auto doPass = [&](void (*fn)(IRUnit&), const char* msg) { fn(unit); finishPass(msg); }; auto dce = [&](const char* which) { if (!RuntimeOption::EvalHHIRDeadCodeElim) return; eliminateDeadCode(unit); finishPass(folly::format("{} DCE", which).str().c_str()); }; if (RuntimeOption::EvalHHIRRelaxGuards) { /* * In TransProfile mode, we can only relax the guards in tracelet * region mode. If the region came from analyze() and we relax the * guards here, then the RegionDesc's TypePreds in ProfData won't * accurately reflect the generated guards. This can result in a * TransOptimze region to be formed with types that are incompatible, * e.g.: * B1: TypePred: Loc0: Bool // but this gets relaxed to Uncounted * PostCond: Loc0: Uncounted // post-conds are accurate * B2: TypePred: Loc0: Int // this will always fail */ const bool relax = kind != TransProfile || RuntimeOption::EvalJitRegionSelector == "tracelet"; if (relax) { Timer _t(Timer::optimize_relaxGuards); const bool simple = kind == TransProfile && RuntimeOption::EvalJitRegionSelector == "tracelet"; auto changed = relaxGuards(unit, *irBuilder.guards(), simple); if (changed) finishPass("guard relaxation"); } } if (RuntimeOption::EvalHHIRRefcountOpts) { optimizeRefcounts(unit, FrameState{unit, unit.entry()->front().marker()}); finishPass("refcount opts"); } dce("initial"); if (RuntimeOption::EvalHHIRPredictionOpts) { doPass(optimizePredictions, "prediction opts"); } if (RuntimeOption::EvalHHIRExtraOptPass && (RuntimeOption::EvalHHIRCse || RuntimeOption::EvalHHIRSimplification)) { irBuilder.reoptimize(); finishPass("reoptimize"); // Cleanup any dead code left around by CSE/Simplification // Ideally, this would be controlled by a flag returned // by optimizeTrace indicating whether DCE is necessary dce("reoptimize"); } if (RuntimeOption::EvalHHIRJumpOpts) { doPass(optimizeJumps, "jumpopts"); dce("jump opts"); } if (RuntimeOption::EvalHHIRGenerateAsserts) { doPass(insertAsserts, "RefCnt asserts"); } }