/** * instrNumPops() returns the number of values consumed from the stack * for a given push/pop instruction. For peek/poke instructions, this * function returns 0. */ int instrNumPops(PC pc) { static const int32_t numberOfPops[] = { #define NOV 0 #define ONE(...) 1 #define TWO(...) 2 #define THREE(...) 3 #define FOUR(...) 4 #define MMANY -1 #define C_MMANY -2 #define V_MMANY -2 #define R_MMANY -2 #define MFINAL -3 #define FMANY -3 #define CVMANY -3 #define CVUMANY -3 #define CMANY -3 #define SMANY -1 #define IDX_A -4 #define O(name, imm, pop, push, flags) pop, OPCODES #undef NOV #undef ONE #undef TWO #undef THREE #undef FOUR #undef MMANY #undef C_MMANY #undef V_MMANY #undef R_MMANY #undef MFINAL #undef FMANY #undef CVMANY #undef CVUMANY #undef CMANY #undef SMANY #undef IDX_A #undef O }; int n = numberOfPops[size_t(peek_op(pc))]; // For most instructions, we know how many values are popped based // solely on the opcode if (n >= 0) return n; // BaseSC and BaseSL remove an A that may be on the top of the stack or one // element below the top, depending on the second immediate. if (n == -4) return getImm(pc, 1).u_IVA + 1; // FCall, NewPackedArray, and final member operations specify how many values // are popped in their first immediate if (n == -3) return getImm(pc, 0).u_IVA; // For instructions with vector immediates, we have to scan the // contents of the vector immediate to determine how many values // are popped assert(n == -1 || n == -2); ImmVector iv = getImmVector(pc); // Count the number of values on the stack accounted for by the // ImmVector's location and members int k = iv.numStackValues(); // If this instruction also takes a RHS, count that too if (n == -2) ++k; return k; }
MInstrLocation getMLocation(const Op* opcode) { auto immVec = getImmVector(opcode); auto vec = immVec.vec(); auto const lcode = LocationCode(*vec++); auto const imm = numLocationCodeImms(lcode) ? decodeVariableSizeImm(&vec) : 0; return {lcode, imm}; }
/** * instrNumPops() returns the number of values consumed from the stack * for a given push/pop instruction. For peek/poke instructions, this * function returns 0. */ int instrNumPops(PC pc) { static const int32_t numberOfPops[] = { #define NOV 0 #define ONE(...) 1 #define TWO(...) 2 #define THREE(...) 3 #define FOUR(...) 4 #define MFINAL -3 #define F_MFINAL -6 #define C_MFINAL -5 #define V_MFINAL C_MFINAL #define FMANY -3 #define CVUMANY -3 #define CMANY -3 #define SMANY -1 #define IDX_A -4 #define O(name, imm, pop, push, flags) pop, OPCODES #undef NOV #undef ONE #undef TWO #undef THREE #undef FOUR #undef MFINAL #undef F_MFINAL #undef C_MFINAL #undef V_MFINAL #undef FMANY #undef CVUMANY #undef CMANY #undef SMANY #undef IDX_A #undef O }; auto const op = peek_op(pc); int n = numberOfPops[size_t(op)]; // For most instructions, we know how many values are popped based // solely on the opcode if (n >= 0) return n; // BaseSC and BaseSL remove an A that may be on the top of the stack or one // element below the top, depending on the second immediate. if (n == -4) return getImm(pc, 1).u_IVA + 1; // FCall, NewPackedArray, and some final member operations specify how many // values are popped in their first immediate if (n == -3) return getImm(pc, 0).u_IVA; // FPassM final operations have paramId as imm 0 and stackCount as imm1 if (n == -6) return getImm(pc, 1).u_IVA; // Other final member operations pop their first immediate + 1 if (n == -5) return getImm(pc, 0).u_IVA + 1; // For instructions with vector immediates, we have to scan the contents of // the vector immediate to determine how many values are popped assert(n == -1); ImmVector iv = getImmVector(pc); int k = iv.numStackValues(); return k; }
/** * instrNumPops() returns the number of values consumed from the stack * for a given push/pop instruction. For peek/poke instructions, this * function returns 0. */ int instrNumPops(PC pc) { static const int32_t numberOfPops[] = { #define NOV 0 #define ONE(...) 1 #define TWO(...) 2 #define THREE(...) 3 #define FOUR(...) 4 #define MFINAL -3 #define F_MFINAL -6 #define C_MFINAL -5 #define V_MFINAL C_MFINAL #define FMANY -3 #define UFMANY -4 #define CVUMANY -3 #define CMANY -3 #define SMANY -1 #define O(name, imm, pop, push, flags) pop, OPCODES #undef NOV #undef ONE #undef TWO #undef THREE #undef FOUR #undef MFINAL #undef F_MFINAL #undef C_MFINAL #undef V_MFINAL #undef FMANY #undef UFMANY #undef CVUMANY #undef CMANY #undef SMANY #undef O }; auto const op = peek_op(pc); int n = numberOfPops[size_t(op)]; // For most instructions, we know how many values are popped based // solely on the opcode if (n >= 0) return n; // FCall, NewPackedArray, and some final member operations specify how many // values are popped in their first immediate if (n == -3) return getImm(pc, 0).u_IVA; // FCallM, FCallDM, and FCallUnpackM pop uninit values from the stack and // push multiple returned values. if (n == -4) return getImm(pc, 0).u_IVA + getImm(pc, 1).u_IVA - 1; // FPassM final operations have paramId as imm 0 and stackCount as imm1 if (n == -6) return getImm(pc, 1).u_IVA; // Other final member operations pop their first immediate + 1 if (n == -5) return getImm(pc, 0).u_IVA + 1; // For instructions with vector immediates, we have to scan the contents of // the vector immediate to determine how many values are popped assertx(n == -1); ImmVector iv = getImmVector(pc); int k = iv.numStackValues(); return k; }
/** * instrNumPops() returns the number of values consumed from the stack * for a given push/pop instruction. For peek/poke instructions, this * function returns 0. */ int instrNumPops(PC pc) { static const int32_t numberOfPops[] = { #define NOV 0 #define ONE(...) 1 #define TWO(...) 2 #define THREE(...) 3 #define FOUR(...) 4 #define FIVE(...) 5 #define MFINAL -3 #define C_MFINAL -5 #define V_MFINAL C_MFINAL #define CVMANY -3 #define CVUMANY -3 #define FCALL -4 #define CMANY -3 #define SMANY -1 #define O(name, imm, pop, push, flags) pop, OPCODES #undef NOV #undef ONE #undef TWO #undef THREE #undef FOUR #undef FIVE #undef MFINAL #undef C_MFINAL #undef V_MFINAL #undef CVMANY #undef CVUMANY #undef FCALL #undef CMANY #undef SMANY #undef O }; auto const op = peek_op(pc); int n = numberOfPops[size_t(op)]; // For most instructions, we know how many values are popped based // solely on the opcode if (n >= 0) return n; // FCallAwait, NewPackedArray, and some final member operations specify how // many values are popped in their first immediate if (n == -3) return getImm(pc, 0).u_IVA; // FCall pops numArgs, unpack and (numRets - 1) uninit values if (n == -4) { auto const fca = getImm(pc, 0).u_FCA; return fca.numArgs + (fca.hasUnpack ? 1 : 0) + fca.numRets - 1; } // Other final member operations pop their first immediate + 1 if (n == -5) return getImm(pc, 0).u_IVA + 1; // For instructions with vector immediates, we have to scan the contents of // the vector immediate to determine how many values are popped assertx(n == -1); ImmVector iv = getImmVector(pc); int k = iv.numStackValues(); return k; }
/** * instrNumPops() returns the number of values consumed from the stack * for a given push/pop instruction. For peek/poke instructions, this * function returns 0. */ int instrNumPops(const Op* opcode) { static const int8_t numberOfPops[] = { #define NOV 0 #define ONE(...) 1 #define TWO(...) 2 #define THREE(...) 3 #define FOUR(...) 4 #define MMANY -1 #define C_MMANY -2 #define V_MMANY -2 #define R_MMANY -2 #define FMANY -3 #define CVMANY -3 #define CVUMANY -3 #define CMANY -3 #define SMANY -1 #define O(name, imm, pop, push, flags) pop, OPCODES #undef NOV #undef ONE #undef TWO #undef THREE #undef FOUR #undef MMANY #undef C_MMANY #undef V_MMANY #undef R_MMANY #undef FMANY #undef CVMANY #undef CVUMANY #undef CMANY #undef SMANY #undef O }; int n = numberOfPops[uint8_t(*opcode)]; // For most instructions, we know how many values are popped based // solely on the opcode if (n >= 0) return n; // FCall and NewPackedArray specify how many values are popped in their // first immediate if (n == -3) return getImm(opcode, 0).u_IVA; // For instructions with vector immediates, we have to scan the // contents of the vector immediate to determine how many values // are popped assert(n == -1 || n == -2); ImmVector iv = getImmVector(opcode); // Count the number of values on the stack accounted for by the // ImmVector's location and members int k = iv.numStackValues(); // If this instruction also takes a RHS, count that too if (n == -2) ++k; return k; }
int64_t getStackPopped(PC pc) { auto const op = peek_op(pc); switch (op) { case Op::FCall: return getImm(pc, 0).u_IVA + kNumActRecCells; case Op::FCallD: return getImm(pc, 0).u_IVA + kNumActRecCells; case Op::FCallAwait: return getImm(pc, 0).u_IVA + kNumActRecCells; case Op::FCallArray: return kNumActRecCells + 1; case Op::QueryM: case Op::VGetM: case Op::IncDecM: case Op::UnsetM: case Op::NewPackedArray: case Op::NewVecArray: case Op::NewKeysetArray: case Op::ConcatN: case Op::FCallBuiltin: case Op::CreateCl: return getImm(pc, 0).u_IVA; case Op::FPassM: // imm[0] is argument index return getImm(pc, 1).u_IVA; case Op::SetM: case Op::SetOpM: case Op::BindM: return getImm(pc, 0).u_IVA + 1; case Op::NewStructArray: return getImmVector(pc).size(); case Op::BaseSC: case Op::BaseSL: return getImm(pc, 1).u_IVA + 1; default: break; } uint64_t mask = getInstrInfo(op).in; int64_t count = 0; // All instructions with these properties are handled above assertx((mask & (StackN | BStackN)) == 0); return count + countOperands(mask); }
std::vector<MVectorItem> getMVector(const Op* opcode) { auto immVec = getImmVector(opcode); std::vector<MVectorItem> result; auto it = immVec.vec(); auto end = it + immVec.size(); // Skip the LocationCode and its immediate auto const lcode = LocationCode(*it++); if (numLocationCodeImms(lcode)) decodeVariableSizeImm(&it); while (it < end) { auto const mcode = MemberCode(*it++); auto const imm = memberCodeHasImm(mcode) ? decodeMemberCodeImm(&it, mcode) : 0; result.push_back({mcode, imm}); } return result; }
/** * instrNumPops() returns the number of values consumed from the stack * for a given push/pop instruction. For peek/poke instructions, this * function returns 0. */ int instrNumPops(const Opcode* opcode) { static const int8_t numberOfPops[] = { #define NOV 0 #define ONE(...) 1 #define TWO(...) 2 #define THREE(...) 3 #define LMANY(...) -1 #define C_LMANY(...) -2 #define V_LMANY(...) -2 #define FMANY -3 #define O(name, imm, pop, push, flags) pop, OPCODES #undef NOV #undef ONE #undef TWO #undef THREE #undef LMANY #undef C_LMANY #undef V_LMANY #undef FMANY #undef O }; int n = numberOfPops[*opcode]; // For most instructions, we know how many values are popped based // solely on the opcode if (n >= 0) return n; // FCall specifies how many values are popped in its first immediate if (n == -3) return getImm(opcode, 0).u_IVA; // For instructions with vector immediates, we have to scan the // contents of the vector immediate to determine how many values // are popped ASSERT(n == -1 || n == -2); ImmVector iv = getImmVector(opcode); // Count the number of values on the stack accounted for by the // ImmVector's location and members int k = iv.numStackValues(); // If this instruction also takes a RHS, count that too if (n == -2) ++k; return k; }
bool shouldIRInline(const Func* caller, const Func* callee, RegionIter& iter) { if (!RuntimeOption::EvalHHIREnableGenTimeInlining) { return false; } if (arch() == Arch::ARM) { // TODO(#3331014): hack until more ARM codegen is working. return false; } auto refuse = [&](const char* why) -> bool { FTRACE(1, "shouldIRInline: refusing {} <reason: {}> [NI = {}]\n", callee->fullName()->data(), why, iter.finished() ? "<end>" : iter.sk().showInst()); return false; }; auto accept = [&](const char* kind) -> bool { FTRACE(1, "shouldIRInline: inlining {} <kind: {}>\n", callee->fullName()->data(), kind); return true; }; if (callee->numIterators() != 0) { return refuse("iterators"); } if (callee->isMagic() || Func::isSpecial(callee->name())) { return refuse("special or magic function"); } if (callee->attrs() & AttrMayUseVV) { return refuse("may use dynamic environment"); } if (callee->numSlotsInFrame() + callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("function stack depth too deep"); } //////////// assert(!iter.finished() && "shouldIRInline given empty region"); bool hotCallingCold = !(callee->attrs() & AttrHot) && (caller->attrs() & AttrHot); uint64_t cost = 0; int inlineDepth = 0; Op op = OpLowInvalid; smart::vector<const Func*> funcs; const Func* func = callee; funcs.push_back(func); for (; !iter.finished(); iter.advance()) { // If func has changed after an FCall, we've started an inlined call. This // will have to change when we support inlining recursive calls. if (func != iter.sk().func()) { assert(isRet(op) || op == Op::FCall || op == Op::FCallD); if (op == Op::FCall || op == Op::FCallD) { funcs.push_back(iter.sk().func()); int totalDepth = 0; for (auto* f : funcs) { totalDepth += f->numSlotsInFrame() + f->maxStackCells(); } if (totalDepth >= kStackCheckLeafPadding) { return refuse("stack too deep after nested inlining"); } ++inlineDepth; } } op = iter.sk().op(); func = iter.sk().func(); // If we hit a RetC/V while inlining, leave that level and // continue. Otherwise, accept the tracelet. if (isRet(op)) { if (inlineDepth > 0) { --inlineDepth; funcs.pop_back(); continue; } else { assert(inlineDepth == 0); return accept("entire function fits in one region"); } } if (op == Op::FCallArray) return refuse("FCallArray"); // These opcodes don't indicate any additional work in the callee, // so they shouldn't count toward the inlining cost. if (op == Op::AssertTL || op == Op::AssertTStk || op == Op::AssertObjL || op == Op::AssertObjStk || op == Op::PredictTL || op == Op::PredictTStk) { continue; } cost += 1; // Check for an immediate vector, and if it's present add its size to the // cost. auto const pc = reinterpret_cast<const Op*>(iter.sk().pc()); if (hasMVector(op)) { cost += getMVector(pc).size(); } else if (hasImmVector(op)) { cost += getImmVector(pc).size(); } if (cost > RuntimeOption::EvalHHIRInliningMaxCost) { return refuse("too expensive"); } if (cost > RuntimeOption::EvalHHIRAlwaysInlineMaxCost && hotCallingCold) { return refuse("inlining sizeable cold func into hot func"); } if (JIT::opcodeBreaksBB(op)) { return refuse("breaks tracelet"); } } return refuse("region doesn't end in RetC/RetV"); }
bool InliningDecider::shouldInline(const Func* callee, const RegionDesc& region) { auto sk = region.empty() ? SrcKey() : region.start(); assertx(callee); assertx(sk.func() == callee); int cost = 0; // Tracing return lambdas. auto refuse = [&] (const char* why) { return traceRefusal(m_topFunc, callee, why); }; auto accept = [&, this] (const char* kind) { FTRACE(1, "InliningDecider: inlining {}() <- {}()\t<reason: {}>\n", m_topFunc->fullName()->data(), callee->fullName()->data(), kind); // Update our context. m_costStack.push_back(cost); m_cost += cost; m_callDepth += 1; m_stackDepth += callee->maxStackCells(); return true; }; // Check inlining depths. if (m_callDepth + 1 >= RuntimeOption::EvalHHIRInliningMaxDepth) { return refuse("inlining call depth limit exceeded"); } if (m_stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) { return refuse("inlining stack depth limit exceeded"); } // Even if the func contains NativeImpl we may have broken the trace before // we hit it. auto containsNativeImpl = [&] { for (auto block : region.blocks()) { if (!block->empty() && block->last().op() == OpNativeImpl) return true; } return false; }; // Try to inline CPP builtin functions. // The NativeImpl opcode may appear later in the function because of Asserts // generated in hhbbc if (callee->isCPPBuiltin() && containsNativeImpl()) { if (isInlinableCPPBuiltin(callee)) { return accept("inlinable CPP builtin"); } return refuse("non-inlinable CPP builtin"); } // If the function may use a VarEnv (which is stored in the ActRec) or may be // variadic, we restrict inlined callees to certain whitelisted instructions // which we know won't actually require these features. const bool needsCheckVVSafe = callee->attrs() & AttrMayUseVV; // We measure the cost of inlining each callstack and stop when it exceeds a // certain threshold. (Note that we do not measure the total cost of all the // inlined calls for a given caller---just the cost of each nested stack.) const int maxCost = RuntimeOption::EvalHHIRInliningMaxCost - m_cost; // We only inline callee regions that have exactly one return. // // NOTE: Currently, the tracelet selector uses the first Ret in the child's // region to determine when to stop inlining. However, the safety of this // behavior should not be considered guaranteed by InliningDecider; the // "right" way to decide when inlining ends is to inline all of `region'. int numRets = 0; // Iterate through the region, checking its suitability for inlining. for (auto const& block : region.blocks()) { sk = block->start(); for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) { auto op = sk.op(); // We don't allow inlined functions in the region. The client is // expected to disable inlining for the region it gives us to peek. if (sk.func() != callee) { return refuse("got region with inlined calls"); } // Restrict to VV-safe opcodes if necessary. if (needsCheckVVSafe && !isInliningVVSafe(op)) { return refuse(folly::format("{} may use dynamic environment", opcodeToName(op)).str().c_str()); } // Count the returns. if (isRet(op) || op == Op::NativeImpl) { if (++numRets > 1) { return refuse("region has too many returns"); } continue; } // We can't inline FCallArray. XXX: Why? if (op == Op::FCallArray) { return refuse("can't inline FCallArray"); } // Assert opcodes don't contribute to the inlining cost. if (op == Op::AssertRATL || op == Op::AssertRATStk) continue; cost += 1; // Add the size of immediate vectors to the cost. auto const pc = reinterpret_cast<const Op*>(sk.pc()); if (hasMVector(op)) { cost += getMVector(pc).size(); } else if (hasImmVector(op)) { cost += getImmVector(pc).size(); } // Refuse if the cost exceeds our thresholds. if (cost > maxCost) { return refuse("too expensive"); } } } if (numRets != 1) { return refuse("region has no returns"); } return accept("small region with single return"); }