bool shouldIRInline(const Func* caller, const Func* callee, RegionIter& iter) { if (!RuntimeOption::EvalHHIREnableGenTimeInlining) { return false; } if (arch() == Arch::ARM) { // TODO(#3331014): hack until more ARM codegen is working. return false; } auto refuse = [&](const char* why) -> bool { FTRACE(1, "shouldIRInline: refusing {} <reason: {}> [NI = {}]\n", callee->fullName()->data(), why, iter.finished() ? "<end>" : iter.sk().showInst()); return false; }; auto accept = [&](const char* kind) -> bool { FTRACE(1, "shouldIRInline: inlining {} <kind: {}>\n", callee->fullName()->data(), kind); return true; }; if (callee->numIterators() != 0) { return refuse("iterators"); } if (callee->isMagic() || Func::isSpecial(callee->name())) { return refuse("special or magic function"); } if (callee->attrs() & AttrMayUseVV) { return refuse("may use dynamic environment"); } if (callee->numSlotsInFrame() + callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("function stack depth too deep"); } //////////// assert(!iter.finished() && "shouldIRInline given empty region"); bool hotCallingCold = !(callee->attrs() & AttrHot) && (caller->attrs() & AttrHot); uint64_t cost = 0; int inlineDepth = 0; Op op = OpLowInvalid; smart::vector<const Func*> funcs; const Func* func = callee; funcs.push_back(func); for (; !iter.finished(); iter.advance()) { // If func has changed after an FCall, we've started an inlined call. This // will have to change when we support inlining recursive calls. if (func != iter.sk().func()) { assert(isRet(op) || op == Op::FCall || op == Op::FCallD); if (op == Op::FCall || op == Op::FCallD) { funcs.push_back(iter.sk().func()); int totalDepth = 0; for (auto* f : funcs) { totalDepth += f->numSlotsInFrame() + f->maxStackCells(); } if (totalDepth >= kStackCheckLeafPadding) { return refuse("stack too deep after nested inlining"); } ++inlineDepth; } } op = iter.sk().op(); func = iter.sk().func(); // If we hit a RetC/V while inlining, leave that level and // continue. Otherwise, accept the tracelet. if (isRet(op)) { if (inlineDepth > 0) { --inlineDepth; funcs.pop_back(); continue; } else { assert(inlineDepth == 0); return accept("entire function fits in one region"); } } if (op == Op::FCallArray) return refuse("FCallArray"); // These opcodes don't indicate any additional work in the callee, // so they shouldn't count toward the inlining cost. if (op == Op::AssertTL || op == Op::AssertTStk || op == Op::AssertObjL || op == Op::AssertObjStk || op == Op::PredictTL || op == Op::PredictTStk) { continue; } cost += 1; // Check for an immediate vector, and if it's present add its size to the // cost. auto const pc = reinterpret_cast<const Op*>(iter.sk().pc()); if (hasMVector(op)) { cost += getMVector(pc).size(); } else if (hasImmVector(op)) { cost += getImmVector(pc).size(); } if (cost > RuntimeOption::EvalHHIRInliningMaxCost) { return refuse("too expensive"); } if (cost > RuntimeOption::EvalHHIRAlwaysInlineMaxCost && hotCallingCold) { return refuse("inlining sizeable cold func into hot func"); } if (JIT::opcodeBreaksBB(op)) { return refuse("breaks tracelet"); } } return refuse("region doesn't end in RetC/RetV"); }
bool InliningDecider::shouldInline(const Func* callee, const RegionDesc& region) { auto sk = region.empty() ? SrcKey() : region.start(); assertx(callee); assertx(sk.func() == callee); int cost = 0; // Tracing return lambdas. auto refuse = [&] (const char* why) { return traceRefusal(m_topFunc, callee, why); }; auto accept = [&, this] (const char* kind) { FTRACE(1, "InliningDecider: inlining {}() <- {}()\t<reason: {}>\n", m_topFunc->fullName()->data(), callee->fullName()->data(), kind); // Update our context. m_costStack.push_back(cost); m_cost += cost; m_callDepth += 1; m_stackDepth += callee->maxStackCells(); return true; }; // Check inlining depths. if (m_callDepth + 1 >= RuntimeOption::EvalHHIRInliningMaxDepth) { return refuse("inlining call depth limit exceeded"); } if (m_stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("inlining stack depth limit exceeded"); } // Even if the func contains NativeImpl we may have broken the trace before // we hit it. auto containsNativeImpl = [&] { for (auto block : region.blocks()) { if (!block->empty() && block->last().op() == OpNativeImpl) return true; } return false; }; // Try to inline CPP builtin functions. // The NativeImpl opcode may appear later in the function because of Asserts // generated in hhbbc if (callee->isCPPBuiltin() && containsNativeImpl()) { if (isInlinableCPPBuiltin(callee)) { return accept("inlinable CPP builtin"); } return refuse("non-inlinable CPP builtin"); } // If the function may use a VarEnv (which is stored in the ActRec) or may be // variadic, we restrict inlined callees to certain whitelisted instructions // which we know won't actually require these features. const bool needsCheckVVSafe = callee->attrs() & AttrMayUseVV; // We measure the cost of inlining each callstack and stop when it exceeds a // certain threshold. (Note that we do not measure the total cost of all the // inlined calls for a given caller---just the cost of each nested stack.) const int maxCost = RuntimeOption::EvalHHIRInliningMaxCost - m_cost; // We only inline callee regions that have exactly one return. // // NOTE: Currently, the tracelet selector uses the first Ret in the child's // region to determine when to stop inlining. However, the safety of this // behavior should not be considered guaranteed by InliningDecider; the // "right" way to decide when inlining ends is to inline all of `region'. int numRets = 0; // Iterate through the region, checking its suitability for inlining. for (auto const& block : region.blocks()) { sk = block->start(); for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) { auto op = sk.op(); // We don't allow inlined functions in the region. The client is // expected to disable inlining for the region it gives us to peek. if (sk.func() != callee) { return refuse("got region with inlined calls"); } // Restrict to VV-safe opcodes if necessary. if (needsCheckVVSafe && !isInliningVVSafe(op)) { return refuse(folly::format("{} may use dynamic environment", opcodeToName(op)).str().c_str()); } // Count the returns. if (isRet(op) || op == Op::NativeImpl) { if (++numRets > 1) { return refuse("region has too many returns"); } continue; } // We can't inline FCallArray. XXX: Why? if (op == Op::FCallArray) { return refuse("can't inline FCallArray"); } // Assert opcodes don't contribute to the inlining cost. if (op == Op::AssertRATL || op == Op::AssertRATStk) continue; cost += 1; // Add the size of immediate vectors to the cost. auto const pc = reinterpret_cast<const Op*>(sk.pc()); if (hasMVector(op)) { cost += getMVector(pc).size(); } else if (hasImmVector(op)) { cost += getImmVector(pc).size(); } // Refuse if the cost exceeds our thresholds. if (cost > maxCost) { return refuse("too expensive"); } } } if (numRets != 1) { return refuse("region has no returns"); } return accept("small region with single return"); }