std::string show(const RegionDesc& region) { return folly::format( "Region ({} blocks):\n{}", region.blocks().size(), [&]{ std::string ret; std::string arcs; for (auto& b : region.blocks()) { folly::toAppend(show(*b), &ret); if (auto r = region.nextRetrans(b->id())) { folly::toAppend(folly::format("{} -R-> {}\n", b->id(), r.value()), &arcs); } for (auto s : region.succs(b->id())) { folly::toAppend(folly::format("{} -> {}\n", b->id(), s), &arcs); } } folly::toAppend("Arcs:\n" + arcs, &ret); folly::toAppend("Side-exiting Blocks:\n", folly::join(", ", region.sideExitingBlocks()), "\n", &ret); return ret; }() ).str(); }
std::string show(const RegionDesc& region) { std::string ret{folly::sformat("Region ({} blocks):\n", region.blocks().size())}; auto profData = mcg->tx().profData(); auto weight = [&] (RegionDesc::BlockPtr b) -> int64_t { if (!profData) return 0; auto tid = b->profTransID(); if (tid == kInvalidTransID) return 0; return profData->transCounter(tid); }; uint64_t maxBlockWgt = 1; // avoid div by 0 // Print contents of all blocks in pure text format. for (auto& b : region.blocks()) { folly::toAppend(show(*b), &ret); auto w = weight(b); if (w > maxBlockWgt) maxBlockWgt = w; } // Print CFG in dot format, coloring the blocks based on hotness. // Print all the blocks first. folly::toAppend("\ndigraph RegionCFG {\n node[shape=box,style=filled]\n", &ret); for (auto& b : region.blocks()) { auto const id = b->id(); auto const& mergedSet = region.merged(id); std::string mergedStr = mergedSet.empty() ? "" : (" (" + folly::join(",", mergedSet) + ")"); uint32_t coldness = 255 - (255 * weight(b) / maxBlockWgt); folly::format(&ret, " \"B{}\" [label=\"B {}{}\\np: {}\"," "fillcolor=\"#ff{:02x}{:02x}\"]\n", id, id, mergedStr, weight(b), coldness, coldness); } // Print arcs in dot format. for (auto& b : region.blocks()) { if (auto r = region.nextRetrans(b->id())) { folly::toAppend(folly::format(" \"B{}\" -> \"B{}\" [label=R,color=red]\n", b->id(), r.value()), &ret); } for (auto s : region.succs(b->id())) { folly::toAppend(folly::format(" \"B{}\" -> \"B{}\"\n", b->id(), s), &ret); } } ret += "}\n"; return ret; }
void RegionDesc::copyBlocksFrom(const RegionDesc& other, BlockVec::iterator where) { auto otherBlocks = other.blocks(); m_blocks.insert(where, otherBlocks.begin(), otherBlocks.end()); for (auto b : otherBlocks) { m_data[b->id()] = BlockData(b); } }
/* * Checks if the given region is well-formed, which entails the * following properties: * * 1) The region has at least one block. * * 2) Each block in the region has a different id. * * 3) All arcs involve blocks within the region. * * 4) For each arc, the bytecode offset of the dst block must * possibly follow the execution of the src block. * * 5) Each block contains at most one successor corresponding to a * given SrcKey. * * 6) The region doesn't contain any loops, unless JitLoops is * enabled. * * 7) All blocks are reachable from the entry block. * * 8) For each block, there must be a path from the entry to it that * includes only earlier blocks in the region. * * 9) The region is topologically sorted unless loops are enabled. * * 10) The block-retranslation chains cannot have cycles. * */ bool check(const RegionDesc& region, std::string& error) { auto bad = [&](const std::string& errorMsg) { error = errorMsg; return false; }; // 1) The region has at least one block. if (region.empty()) return bad("empty region"); RegionDesc::BlockIdSet blockSet; for (auto b : region.blocks()) { auto bid = b->id(); // 2) Each block in the region has a different id. if (blockSet.count(bid)) { return bad(folly::sformat("many blocks with id {}", bid)); } blockSet.insert(bid); } for (auto b : region.blocks()) { auto bid = b->id(); SrcKey lastSk = region.block(bid)->last(); OffsetSet validSuccOffsets = lastSk.succOffsets(); OffsetSet succOffsets; for (auto succ : region.succs(bid)) { SrcKey succSk = region.block(succ)->start(); Offset succOffset = succSk.offset(); // 3) All arcs involve blocks within the region. if (blockSet.count(succ) == 0) { return bad(folly::sformat("arc with dst not in the region: {} -> {}", bid, succ)); } // Checks 4) and 5) below don't make sense for arcs corresponding // to inlined calls and returns, so skip them in such cases. // This won't be possible once task #4076399 is done. if (lastSk.func() != succSk.func()) continue; // 4) For each arc, the bytecode offset of the dst block must // possibly follow the execution of the src block. if (validSuccOffsets.count(succOffset) == 0) { return bad(folly::sformat("arc with impossible control flow: {} -> {}", bid, succ)); } // 5) Each block contains at most one successor corresponding to a // given SrcKey. if (succOffsets.count(succOffset) > 0) { return bad(folly::sformat("block {} has multiple successors with SK {}", bid, show(succSk))); } succOffsets.insert(succOffset); } for (auto pred : region.preds(bid)) { if (blockSet.count(pred) == 0) { return bad(folly::sformat("arc with src not in the region: {} -> {}", pred, bid)); } } } // 6) is checked by dfsCheck. DFSChecker dfsCheck(region); if (!dfsCheck.check(region.entry()->id())) { return bad("region is cyclic"); } // 7) All blocks are reachable from the entry (first) block. if (dfsCheck.numVisited() != blockSet.size()) { return bad("region has unreachable blocks"); } // 8) and 9) are checked below. RegionDesc::BlockIdSet visited; auto& blocks = region.blocks(); for (unsigned i = 0; i < blocks.size(); i++) { auto bid = blocks[i]->id(); unsigned nVisited = 0; for (auto pred : region.preds(bid)) { nVisited += visited.count(pred); } // 8) For each block, there must be a path from the entry to it that // includes only earlier blocks in the region. if (nVisited == 0 && i != 0) { return bad(folly::sformat("block {} appears before all its predecessors", bid)); } // 9) The region is topologically sorted unless loops are enabled. if (!RuntimeOption::EvalJitLoops && nVisited != region.preds(bid).size()) { return bad(folly::sformat("non-topological order (bid: {})", bid)); } visited.insert(bid); } // 10) The block-retranslation chains cannot have cycles. for (auto b : blocks) { auto bid = b->id(); RegionDesc::BlockIdSet chainSet; chainSet.insert(bid); while (auto next = region.nextRetrans(bid)) { auto nextId = next.value(); if (chainSet.count(nextId)) { return bad(folly::sformat("cyclic retranslation chain for block {}", bid)); } chainSet.insert(nextId); bid = nextId; } } return true; }
bool InliningDecider::shouldInline(SrcKey callerSk, const Func* callee, const RegionDesc& region, uint32_t maxTotalCost) { auto sk = region.empty() ? SrcKey() : region.start(); assertx(callee); assertx(sk.func() == callee); // Tracing return lambdas. auto refuse = [&] (const char* why) { FTRACE(2, "shouldInline: rejecting callee region: {}", show(region)); return traceRefusal(m_topFunc, callee, why); }; auto accept = [&, this] (const char* kind) { FTRACE(2, "InliningDecider: inlining {}() <- {}()\t<reason: {}>\n", m_topFunc->fullName()->data(), callee->fullName()->data(), kind); return true; }; if (m_stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("inlining stack depth limit exceeded"); } // Even if the func contains NativeImpl we may have broken the trace before // we hit it. auto containsNativeImpl = [&] { for (auto block : region.blocks()) { if (!block->empty() && block->last().op() == OpNativeImpl) return true; } return false; }; // Try to inline CPP builtin functions. // The NativeImpl opcode may appear later in the function because of Asserts // generated in hhbbc if (callee->isCPPBuiltin() && containsNativeImpl()) { if (isInlinableCPPBuiltin(callee)) { return accept("inlinable CPP builtin"); } return refuse("non-inlinable CPP builtin"); } // If the function may use a VarEnv (which is stored in the ActRec) or may be // variadic, we restrict inlined callees to certain whitelisted instructions // which we know won't actually require these features. const bool needsCheckVVSafe = callee->attrs() & AttrMayUseVV; bool hasRet = false; // Iterate through the region, checking its suitability for inlining. for (auto const& block : region.blocks()) { sk = block->start(); for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) { auto op = sk.op(); // We don't allow inlined functions in the region. The client is // expected to disable inlining for the region it gives us to peek. if (sk.func() != callee) { return refuse("got region with inlined calls"); } // Restrict to VV-safe opcodes if necessary. if (needsCheckVVSafe && !isInliningVVSafe(op)) { return refuse(folly::format("{} may use dynamic environment", opcodeToName(op)).str().c_str()); } // Count the returns. if (isReturnish(op)) { hasRet = true; } // We can't inline FCallArray. XXX: Why? if (op == Op::FCallArray) { return refuse("can't inline FCallArray"); } } } if (!hasRet) { return refuse("region has no returns"); } // Refuse if the cost exceeds our thresholds. // We measure the cost of inlining each callstack and stop when it exceeds a // certain threshold. (Note that we do not measure the total cost of all the // inlined calls for a given caller---just the cost of each nested stack.) const int maxCost = maxTotalCost - m_cost; const int cost = computeTranslationCost(callerSk, region); if (cost > maxCost) { return refuse("too expensive"); } return accept("small region with return"); }
void region_prune_arcs(RegionDesc& region) { FTRACE(4, "region_prune_arcs\n"); region.sortBlocks(); auto const sortedBlocks = region.blocks(); // Maps region block ids to their RPO ids. auto blockToRPO = std::unordered_map<RegionDesc::BlockId,uint32_t>{}; auto blockInfos = std::vector<BlockInfo>(sortedBlocks.size()); auto workQ = dataflow_worklist<uint32_t>(sortedBlocks.size()); for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) { auto const& b = sortedBlocks[rpoID]; auto& binfo = blockInfos[rpoID]; binfo.blockID = b->id(); blockToRPO[binfo.blockID] = rpoID; } workQ.push(0); blockInfos[0].in = entry_state(region); FTRACE(4, "Iterating:\n"); do { auto const rpoID = workQ.pop(); auto& binfo = blockInfos[rpoID]; FTRACE(4, "B{}\n", binfo.blockID); binfo.out = binfo.in; apply_transfer_function( binfo.out, region.block(binfo.blockID)->postConds() ); for (auto& succ : region.succs(binfo.blockID)) { auto const succRPO = blockToRPO.find(succ); assertx(succRPO != end(blockToRPO)); auto& succInfo = blockInfos[succRPO->second]; if (preconds_may_pass(*region.block(succInfo.blockID), binfo.out)) { if (merge_into(succInfo.in, binfo.out)) { FTRACE(5, " -> {}\n", succInfo.blockID); workQ.push(succRPO->second); } } } } while (!workQ.empty()); FTRACE(2, "\nPostConds fixed point:\n{}\n", [&] () -> std::string { auto ret = std::string{}; for (auto& s : blockInfos) { folly::format(&ret, "B{}:\n{}", s.blockID, show(s.in)); } return ret; }() ); // Now remove any edge that looks like it will unconditionally fail type // predictions, and completely remove any block that can't be reached. using ArcIDs = std::pair<RegionDesc::BlockId,RegionDesc::BlockId>; auto toRemove = std::vector<ArcIDs>{}; for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) { auto const& binfo = blockInfos[rpoID]; for (auto& succ : region.succs(binfo.blockID)) { auto const succRPO = blockToRPO.find(succ); assertx(succRPO != end(blockToRPO)); auto const& succInfo = blockInfos[succRPO->second]; if (!binfo.in.initialized || !succInfo.in.initialized || !preconds_may_pass(*region.block(succInfo.blockID), binfo.out)) { FTRACE(2, "Pruning arc: B{} -> B{}\n", binfo.blockID, succInfo.blockID); toRemove.emplace_back(binfo.blockID, succInfo.blockID); } } for (auto& r : toRemove) region.removeArc(r.first, r.second); toRemove.clear(); } // Get rid of the completely unreachable blocks, now that any arcs to/from // them are gone. for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) { auto const& binfo = blockInfos[rpoID]; if (!binfo.in.initialized) { FTRACE(2, "Pruning block: B{}\n", binfo.blockID); region.deleteBlock(binfo.blockID); } } FTRACE(2, "\n"); }
void region_prune_arcs(RegionDesc& region) { FTRACE(4, "region_prune_arcs\n"); region.sortBlocks(); auto const sortedBlocks = region.blocks(); // Maps region block ids to their RPO ids. auto blockToRPO = std::unordered_map<RegionDesc::BlockId,uint32_t>{}; auto blockInfos = std::vector<BlockInfo>(sortedBlocks.size()); auto workQ = dataflow_worklist<uint32_t>(sortedBlocks.size()); for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) { auto const& b = sortedBlocks[rpoID]; auto& binfo = blockInfos[rpoID]; binfo.blockID = b->id(); blockToRPO[binfo.blockID] = rpoID; } workQ.push(0); blockInfos[0].in = entry_state(region); FTRACE(4, "Iterating:\n"); do { auto const rpoID = workQ.pop(); auto& binfo = blockInfos[rpoID]; FTRACE(4, "B{}\n", binfo.blockID); /* * This code currently assumes inlined functions were entirely contained * within a single profiling translation, and will need updates if we * inline bigger things in a way visible to region selection. * * Note: inlined blocks /may/ have postConditions, if they are the last * blocks from profiling translations. Currently any locations referred to * in postconditions for these blocks are for the outermost caller, so this * code handles that correctly. */ if (region.block(binfo.blockID)->inlineLevel() != 0) { assertx(region.block(binfo.blockID)->typePreConditions().empty()); } binfo.out = binfo.in; apply_transfer_function( binfo.out, region.block(binfo.blockID)->postConds() ); for (auto& succ : region.succs(binfo.blockID)) { auto const succRPO = blockToRPO.find(succ); assertx(succRPO != end(blockToRPO)); auto& succInfo = blockInfos[succRPO->second]; if (preconds_may_pass(*region.block(succInfo.blockID), binfo.out)) { if (merge_into(succInfo.in, binfo.out)) { FTRACE(5, " -> {}\n", succInfo.blockID); workQ.push(succRPO->second); } } } } while (!workQ.empty()); FTRACE(2, "\nPostConds fixed point:\n{}\n", [&] () -> std::string { auto ret = std::string{}; for (auto& s : blockInfos) { folly::format(&ret, "B{}:\n{}", s.blockID, show(s.in)); } return ret; }() ); // Now remove any edge that looks like it will unconditionally fail type // predictions, and completely remove any block that can't be reached. using ArcIDs = std::pair<RegionDesc::BlockId,RegionDesc::BlockId>; auto toRemove = std::vector<ArcIDs>{}; for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) { auto const& binfo = blockInfos[rpoID]; for (auto& succ : region.succs(binfo.blockID)) { auto const succRPO = blockToRPO.find(succ); assertx(succRPO != end(blockToRPO)); auto const& succInfo = blockInfos[succRPO->second]; if (!binfo.in.initialized || !succInfo.in.initialized || !preconds_may_pass(*region.block(succInfo.blockID), binfo.out)) { FTRACE(2, "Pruning arc: B{} -> B{}\n", binfo.blockID, succInfo.blockID); toRemove.emplace_back(binfo.blockID, succInfo.blockID); } } for (auto& r : toRemove) region.removeArc(r.first, r.second); toRemove.clear(); } // Get rid of the completely unreachable blocks, now that any arcs to/from // them are gone. for (auto rpoID = uint32_t{0}; rpoID < sortedBlocks.size(); ++rpoID) { auto const& binfo = blockInfos[rpoID]; if (!binfo.in.initialized) { FTRACE(2, "Pruning block: B{}\n", binfo.blockID); region.deleteBlock(binfo.blockID); } } FTRACE(2, "\n"); }
bool InliningDecider::shouldInline(const Func* callee, const RegionDesc& region) { auto sk = region.empty() ? SrcKey() : region.start(); assertx(callee); assertx(sk.func() == callee); int cost = 0; // Tracing return lambdas. auto refuse = [&] (const char* why) { return traceRefusal(m_topFunc, callee, why); }; auto accept = [&, this] (const char* kind) { FTRACE(1, "InliningDecider: inlining {}() <- {}()\t<reason: {}>\n", m_topFunc->fullName()->data(), callee->fullName()->data(), kind); // Update our context. m_costStack.push_back(cost); m_cost += cost; m_callDepth += 1; m_stackDepth += callee->maxStackCells(); return true; }; // Check inlining depths. if (m_callDepth + 1 >= RuntimeOption::EvalHHIRInliningMaxDepth) { return refuse("inlining call depth limit exceeded"); } if (m_stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("inlining stack depth limit exceeded"); } // Even if the func contains NativeImpl we may have broken the trace before // we hit it. auto containsNativeImpl = [&] { for (auto block : region.blocks()) { if (!block->empty() && block->last().op() == OpNativeImpl) return true; } return false; }; // Try to inline CPP builtin functions. // The NativeImpl opcode may appear later in the function because of Asserts // generated in hhbbc if (callee->isCPPBuiltin() && containsNativeImpl()) { if (isInlinableCPPBuiltin(callee)) { return accept("inlinable CPP builtin"); } return refuse("non-inlinable CPP builtin"); } // If the function may use a VarEnv (which is stored in the ActRec) or may be // variadic, we restrict inlined callees to certain whitelisted instructions // which we know won't actually require these features. const bool needsCheckVVSafe = callee->attrs() & AttrMayUseVV; // We measure the cost of inlining each callstack and stop when it exceeds a // certain threshold. (Note that we do not measure the total cost of all the // inlined calls for a given caller---just the cost of each nested stack.) const int maxCost = RuntimeOption::EvalHHIRInliningMaxCost - m_cost; // We only inline callee regions that have exactly one return. // // NOTE: Currently, the tracelet selector uses the first Ret in the child's // region to determine when to stop inlining. However, the safety of this // behavior should not be considered guaranteed by InliningDecider; the // "right" way to decide when inlining ends is to inline all of `region'. int numRets = 0; // Iterate through the region, checking its suitability for inlining. for (auto const& block : region.blocks()) { sk = block->start(); for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) { auto op = sk.op(); // We don't allow inlined functions in the region. The client is // expected to disable inlining for the region it gives us to peek. if (sk.func() != callee) { return refuse("got region with inlined calls"); } // Restrict to VV-safe opcodes if necessary. if (needsCheckVVSafe && !isInliningVVSafe(op)) { return refuse(folly::format("{} may use dynamic environment", opcodeToName(op)).str().c_str()); } // Count the returns. if (isRet(op) || op == Op::NativeImpl) { if (++numRets > 1) { return refuse("region has too many returns"); } continue; } // We can't inline FCallArray. XXX: Why? if (op == Op::FCallArray) { return refuse("can't inline FCallArray"); } // Assert opcodes don't contribute to the inlining cost. if (op == Op::AssertRATL || op == Op::AssertRATStk) continue; cost += 1; // Add the size of immediate vectors to the cost. auto const pc = reinterpret_cast<const Op*>(sk.pc()); if (hasMVector(op)) { cost += getMVector(pc).size(); } else if (hasImmVector(op)) { cost += getImmVector(pc).size(); } // Refuse if the cost exceeds our thresholds. if (cost > maxCost) { return refuse("too expensive"); } } } if (numRets != 1) { return refuse("region has no returns"); } return accept("small region with single return"); }