bool InliningDecider::shouldInline(SrcKey callerSk, const Func* callee, const RegionDesc& region, uint32_t maxTotalCost) { auto sk = region.empty() ? SrcKey() : region.start(); assertx(callee); assertx(sk.func() == callee); // Tracing return lambdas. auto refuse = [&] (const char* why) { FTRACE(2, "shouldInline: rejecting callee region: {}", show(region)); return traceRefusal(m_topFunc, callee, why); }; auto accept = [&, this] (const char* kind) { FTRACE(2, "InliningDecider: inlining {}() <- {}()\t<reason: {}>\n", m_topFunc->fullName()->data(), callee->fullName()->data(), kind); return true; }; if (m_stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("inlining stack depth limit exceeded"); } // Even if the func contains NativeImpl we may have broken the trace before // we hit it. auto containsNativeImpl = [&] { for (auto block : region.blocks()) { if (!block->empty() && block->last().op() == OpNativeImpl) return true; } return false; }; // Try to inline CPP builtin functions. // The NativeImpl opcode may appear later in the function because of Asserts // generated in hhbbc if (callee->isCPPBuiltin() && containsNativeImpl()) { if (isInlinableCPPBuiltin(callee)) { return accept("inlinable CPP builtin"); } return refuse("non-inlinable CPP builtin"); } // If the function may use a VarEnv (which is stored in the ActRec) or may be // variadic, we restrict inlined callees to certain whitelisted instructions // which we know won't actually require these features. const bool needsCheckVVSafe = callee->attrs() & AttrMayUseVV; bool hasRet = false; // Iterate through the region, checking its suitability for inlining. for (auto const& block : region.blocks()) { sk = block->start(); for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) { auto op = sk.op(); // We don't allow inlined functions in the region. The client is // expected to disable inlining for the region it gives us to peek. if (sk.func() != callee) { return refuse("got region with inlined calls"); } // Restrict to VV-safe opcodes if necessary. if (needsCheckVVSafe && !isInliningVVSafe(op)) { return refuse(folly::format("{} may use dynamic environment", opcodeToName(op)).str().c_str()); } // Count the returns. if (isReturnish(op)) { hasRet = true; } // We can't inline FCallArray. XXX: Why? if (op == Op::FCallArray) { return refuse("can't inline FCallArray"); } } } if (!hasRet) { return refuse("region has no returns"); } // Refuse if the cost exceeds our thresholds. // We measure the cost of inlining each callstack and stop when it exceeds a // certain threshold. (Note that we do not measure the total cost of all the // inlined calls for a given caller---just the cost of each nested stack.) const int maxCost = maxTotalCost - m_cost; const int cost = computeTranslationCost(callerSk, region); if (cost > maxCost) { return refuse("too expensive"); } return accept("small region with return"); }
/* * Checks if the given region is well-formed, which entails the * following properties: * * 1) The region has at least one block. * * 2) Each block in the region has a different id. * * 3) All arcs involve blocks within the region. * * 4) For each arc, the bytecode offset of the dst block must * possibly follow the execution of the src block. * * 5) Each block contains at most one successor corresponding to a * given SrcKey. * * 6) The region doesn't contain any loops, unless JitLoops is * enabled. * * 7) All blocks are reachable from the entry block. * * 8) For each block, there must be a path from the entry to it that * includes only earlier blocks in the region. * * 9) The region is topologically sorted unless loops are enabled. * * 10) The block-retranslation chains cannot have cycles. * */ bool check(const RegionDesc& region, std::string& error) { auto bad = [&](const std::string& errorMsg) { error = errorMsg; return false; }; // 1) The region has at least one block. if (region.empty()) return bad("empty region"); RegionDesc::BlockIdSet blockSet; for (auto b : region.blocks()) { auto bid = b->id(); // 2) Each block in the region has a different id. if (blockSet.count(bid)) { return bad(folly::sformat("many blocks with id {}", bid)); } blockSet.insert(bid); } for (auto b : region.blocks()) { auto bid = b->id(); SrcKey lastSk = region.block(bid)->last(); OffsetSet validSuccOffsets = lastSk.succOffsets(); OffsetSet succOffsets; for (auto succ : region.succs(bid)) { SrcKey succSk = region.block(succ)->start(); Offset succOffset = succSk.offset(); // 3) All arcs involve blocks within the region. if (blockSet.count(succ) == 0) { return bad(folly::sformat("arc with dst not in the region: {} -> {}", bid, succ)); } // Checks 4) and 5) below don't make sense for arcs corresponding // to inlined calls and returns, so skip them in such cases. // This won't be possible once task #4076399 is done. if (lastSk.func() != succSk.func()) continue; // 4) For each arc, the bytecode offset of the dst block must // possibly follow the execution of the src block. if (validSuccOffsets.count(succOffset) == 0) { return bad(folly::sformat("arc with impossible control flow: {} -> {}", bid, succ)); } // 5) Each block contains at most one successor corresponding to a // given SrcKey. if (succOffsets.count(succOffset) > 0) { return bad(folly::sformat("block {} has multiple successors with SK {}", bid, show(succSk))); } succOffsets.insert(succOffset); } for (auto pred : region.preds(bid)) { if (blockSet.count(pred) == 0) { return bad(folly::sformat("arc with src not in the region: {} -> {}", pred, bid)); } } } // 6) is checked by dfsCheck. DFSChecker dfsCheck(region); if (!dfsCheck.check(region.entry()->id())) { return bad("region is cyclic"); } // 7) All blocks are reachable from the entry (first) block. if (dfsCheck.numVisited() != blockSet.size()) { return bad("region has unreachable blocks"); } // 8) and 9) are checked below. RegionDesc::BlockIdSet visited; auto& blocks = region.blocks(); for (unsigned i = 0; i < blocks.size(); i++) { auto bid = blocks[i]->id(); unsigned nVisited = 0; for (auto pred : region.preds(bid)) { nVisited += visited.count(pred); } // 8) For each block, there must be a path from the entry to it that // includes only earlier blocks in the region. if (nVisited == 0 && i != 0) { return bad(folly::sformat("block {} appears before all its predecessors", bid)); } // 9) The region is topologically sorted unless loops are enabled. if (!RuntimeOption::EvalJitLoops && nVisited != region.preds(bid).size()) { return bad(folly::sformat("non-topological order (bid: {})", bid)); } visited.insert(bid); } // 10) The block-retranslation chains cannot have cycles. for (auto b : blocks) { auto bid = b->id(); RegionDesc::BlockIdSet chainSet; chainSet.insert(bid); while (auto next = region.nextRetrans(bid)) { auto nextId = next.value(); if (chainSet.count(nextId)) { return bad(folly::sformat("cyclic retranslation chain for block {}", bid)); } chainSet.insert(nextId); bid = nextId; } } return true; }
bool InliningDecider::shouldInline(const Func* callee, const RegionDesc& region) { auto sk = region.empty() ? SrcKey() : region.start(); assertx(callee); assertx(sk.func() == callee); int cost = 0; // Tracing return lambdas. auto refuse = [&] (const char* why) { return traceRefusal(m_topFunc, callee, why); }; auto accept = [&, this] (const char* kind) { FTRACE(1, "InliningDecider: inlining {}() <- {}()\t<reason: {}>\n", m_topFunc->fullName()->data(), callee->fullName()->data(), kind); // Update our context. m_costStack.push_back(cost); m_cost += cost; m_callDepth += 1; m_stackDepth += callee->maxStackCells(); return true; }; // Check inlining depths. if (m_callDepth + 1 >= RuntimeOption::EvalHHIRInliningMaxDepth) { return refuse("inlining call depth limit exceeded"); } if (m_stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("inlining stack depth limit exceeded"); } // Even if the func contains NativeImpl we may have broken the trace before // we hit it. auto containsNativeImpl = [&] { for (auto block : region.blocks()) { if (!block->empty() && block->last().op() == OpNativeImpl) return true; } return false; }; // Try to inline CPP builtin functions. // The NativeImpl opcode may appear later in the function because of Asserts // generated in hhbbc if (callee->isCPPBuiltin() && containsNativeImpl()) { if (isInlinableCPPBuiltin(callee)) { return accept("inlinable CPP builtin"); } return refuse("non-inlinable CPP builtin"); } // If the function may use a VarEnv (which is stored in the ActRec) or may be // variadic, we restrict inlined callees to certain whitelisted instructions // which we know won't actually require these features. const bool needsCheckVVSafe = callee->attrs() & AttrMayUseVV; // We measure the cost of inlining each callstack and stop when it exceeds a // certain threshold. (Note that we do not measure the total cost of all the // inlined calls for a given caller---just the cost of each nested stack.) const int maxCost = RuntimeOption::EvalHHIRInliningMaxCost - m_cost; // We only inline callee regions that have exactly one return. // // NOTE: Currently, the tracelet selector uses the first Ret in the child's // region to determine when to stop inlining. However, the safety of this // behavior should not be considered guaranteed by InliningDecider; the // "right" way to decide when inlining ends is to inline all of `region'. int numRets = 0; // Iterate through the region, checking its suitability for inlining. for (auto const& block : region.blocks()) { sk = block->start(); for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) { auto op = sk.op(); // We don't allow inlined functions in the region. The client is // expected to disable inlining for the region it gives us to peek. if (sk.func() != callee) { return refuse("got region with inlined calls"); } // Restrict to VV-safe opcodes if necessary. if (needsCheckVVSafe && !isInliningVVSafe(op)) { return refuse(folly::format("{} may use dynamic environment", opcodeToName(op)).str().c_str()); } // Count the returns. if (isRet(op) || op == Op::NativeImpl) { if (++numRets > 1) { return refuse("region has too many returns"); } continue; } // We can't inline FCallArray. XXX: Why? if (op == Op::FCallArray) { return refuse("can't inline FCallArray"); } // Assert opcodes don't contribute to the inlining cost. if (op == Op::AssertRATL || op == Op::AssertRATStk) continue; cost += 1; // Add the size of immediate vectors to the cost. auto const pc = reinterpret_cast<const Op*>(sk.pc()); if (hasMVector(op)) { cost += getMVector(pc).size(); } else if (hasImmVector(op)) { cost += getImmVector(pc).size(); } // Refuse if the cost exceeds our thresholds. if (cost > maxCost) { return refuse("too expensive"); } } } if (numRets != 1) { return refuse("region has no returns"); } return accept("small region with single return"); }