bool instrBreaksProfileBB(const NormalizedInstruction* inst) { if (instrIsNonCallControlFlow(inst->op()) || inst->op() == OpAwait || // may branch to scheduler and suspend execution inst->op() == OpFCallAwait || // similar to Await inst->op() == OpClsCnsD) { // side exits if misses in the RDS return true; } // In profiling mode, don't trace through a control flow merge point, // however, allow inlining of default parameter funclets assertx(profData()); if (profData()->anyBlockEndsAt(inst->func(), inst->offset()) && !inst->func()->isEntry(inst->nextSk().offset())) { return true; } return false; }
void cgIncProfCounter(IRLS& env, const IRInstruction* inst) { auto const transID = inst->extra<TransIDData>()->transId; auto const counterAddr = profData()->transCounterAddr(transID); auto& v = vmain(env); v << decqmlock{v.cns(counterAddr)[0], v.makeReg()}; }
void checkFreeProfData() { // In PGO mode, we free all the profiling data once the main code area reaches // its maximum usage and either the hot area is also full or all the functions // that were profiled have already been optimized. // // However, we keep the data around indefinitely in a few special modes: // * Eval.EnableReusableTC // * TC dumping enabled (Eval.DumpTC/DumpIR/etc.) if (profData() && !RuntimeOption::EvalEnableReusableTC && code().main().used() >= CodeCache::AMaxUsage && (!code().hotEnabled() || profData()->profilingFuncs() == profData()->optimizedFuncs()) && !transdb::enabled()) { discardProfData(); } }
void requestExit() { Stats::dump(); Stats::clear(); Timer::RequestExit(); if (profData()) profData()->maybeResetCounters(); requestExitProfData(); if (Trace::moduleEnabledRelease(Trace::mcgstats, 1)) { Trace::traceRelease("MCGenerator perf counters for %s:\n", g_context->getRequestUrl(50).c_str()); for (int i = 0; i < tpc_num_counters; i++) { Trace::traceRelease("%-20s %10" PRId64 "\n", kPerfCounterNames[i], tl_perf_counters[i]); } Trace::traceRelease("\n"); } clearDebuggerCatches(); }
void checkFreeProfData() { // In PGO mode, we free all the profiling data once the main code area reaches // its maximum usage and either the hot area is also full or all the functions // that were profiled have already been optimized. // // However, we keep the data around indefinitely in a few special modes: // * Eval.EnableReusableTC // * TC dumping enabled (Eval.DumpTC/DumpIR/etc.) // // Finally, when the RetranslateAll mode is enabled, the ProfData is discarded // via a different mechanism, after all the optimized translations are // generated. if (profData() && !RuntimeOption::EvalEnableReusableTC && code().main().used() >= CodeCache::AMaxUsage && (!code().hotEnabled() || profData()->profilingFuncs() == profData()->optimizedFuncs()) && !transdb::enabled() && !RuntimeOption::EvalJitRetranslateAllRequest) { discardProfData(); } }
bool profileFunc(const Func* func) { if (!shouldPGOFunc(func)) return false; // If retranslateAll is enabled and we already passed the point that it should // be scheduled to execute (via the treadmill), then we can't emit more // Profile translations. This is to ensure that, when retranslateAll() runs, // no more Profile translations are being added to ProfData. if (RuntimeOption::EvalJitRetranslateAllRequest != 0 && hasEnoughProfDataToRetranslateAll()) { return false; } if (profData()->optimized(func->getFuncId())) return false; // If we already started profiling `func', then we return true and skip the // other checks below. if (profData()->profiling(func->getFuncId())) return true; // Don't start profiling new functions if the size of either main or // prof is already above Eval.JitAMaxUsage and we already filled hot. auto tcUsage = std::max(code().main().used(), code().prof().used()); if (tcUsage >= CodeCache::AMaxUsage && !code().hotEnabled()) { return false; } // We have two knobs to control the number of functions we're allowed to // profile: Eval.JitProfileRequests and Eval.JitProfileBCSize. We profile new // functions until either of these limits is exceeded. In practice, we expect // to hit the bytecode size limit first, but we keep the request limit around // as a safety net. if (RuntimeOption::EvalJitProfileBCSize > 0 && profData()->profilingBCSize() >= RuntimeOption::EvalJitProfileBCSize) { return false; } return requestCount() <= RuntimeOption::EvalJitProfileRequests; }
bool profileSrcKey(SrcKey sk) { if (!shouldPGOFunc(*sk.func())) return false; if (profData()->optimized(sk.funcID())) return false; if (profData()->profiling(sk.funcID())) return true; // Don't start profiling new functions if the size of either main or // prof is already above Eval.JitAMaxUsage and we already filled hot. auto tcUsage = std::max(code().main().used(), code().prof().used()); if (tcUsage >= CodeCache::AMaxUsage && !code().hotEnabled()) { return false; } // We have two knobs to control the number of functions we're allowed to // profile: Eval.JitProfileRequests and Eval.JitProfileBCSize. We profile new // functions until either of these limits is exceeded. In practice we expect // to hit the bytecode size limit first but we keep the request limit around // as a safety net. if (RuntimeOption::EvalJitProfileBCSize > 0 && profData()->profilingBCSize() >= RuntimeOption::EvalJitProfileBCSize) { return false; } return requestCount() <= RuntimeOption::EvalJitProfileRequests; }
void requestExit() { always_assert(!GetWriteLease().amOwner()); TRACE_MOD(Trace::txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64 " kept, %15" PRId64 " grabbed\n", Process::GetThreadIdForTrace(), GetWriteLease().hintKept(), GetWriteLease().hintGrabbed()); Stats::dump(); Stats::clear(); Timer::RequestExit(); if (profData()) profData()->maybeResetCounters(); requestExitProfData(); if (Trace::moduleEnabledRelease(Trace::mcgstats, 1)) { Trace::traceRelease("MCGenerator perf counters for %s:\n", g_context->getRequestUrl(50).c_str()); for (int i = 0; i < tpc_num_counters; i++) { Trace::traceRelease("%-20s %10" PRId64 "\n", kPerfCounterNames[i], tl_perf_counters[i]); } Trace::traceRelease("\n"); } clearDebuggerCatches(); }
RegionDescPtr selectCalleeRegion(const SrcKey& sk, const Func* callee, const irgen::IRGS& irgs, InliningDecider& inl, int32_t maxBCInstrs) { auto const op = sk.pc(); auto const numArgs = getImm(op, 0).u_IVA; auto const& fpi = irgs.irb->fs().fpiStack(); assertx(!fpi.empty()); auto const ctx = fpi.back().ctxType; std::vector<Type> argTypes; for (int i = numArgs - 1; i >= 0; --i) { // DataTypeGeneric is used because we're just passing the locals into the // callee. It's up to the callee to constrain further if needed. auto type = irgen::publicTopType(irgs, BCSPRelOffset{i}); // If we don't have sufficient type information to inline the region return // early if (!(type <= TCell) && !(type <= TBoxedCell) && !(type <= TCls)) { return nullptr; } argTypes.push_back(type); } const auto mode = RuntimeOption::EvalInlineRegionMode; if (mode == "tracelet" || mode == "both") { auto region = selectCalleeTracelet( callee, numArgs, ctx, argTypes, maxBCInstrs ); auto const maxCost = RuntimeOption::EvalHHIRInliningMaxVasmCost; if (region && inl.shouldInline(sk, callee, *region, maxCost)) return region; if (mode == "tracelet") return nullptr; } if (profData()) { auto region = selectCalleeCFG(callee, numArgs, ctx, argTypes, maxBCInstrs); auto const maxCost = RuntimeOption::EvalHHIRInliningMaxVasmCost; if (region && inl.shouldInline(sk, callee, *region, maxCost)) return region; } return nullptr; }
void cgCheckCold(IRLS& env, const IRInstruction* inst) { auto const transID = inst->extra<CheckCold>()->transId; auto const counterAddr = profData()->transCounterAddr(transID); auto& v = vmain(env); auto const sf = v.makeReg(); v << decqmlock{v.cns(counterAddr)[0], sf}; if (RuntimeOption::EvalJitFilterLease) { auto filter = v.makeBlock(); v << jcc{CC_LE, sf, {label(env, inst->next()), filter}}; v = filter; auto const res = v.makeReg(); cgCallHelper(v, env, CallSpec::direct(couldAcquireOptimizeLease), callDest(res), SyncOptions::None, argGroup(env, inst).immPtr(inst->func())); auto const sf2 = v.makeReg(); v << testb{res, res, sf2}; v << jcc{CC_NZ, sf2, {label(env, inst->next()), label(env, inst->taken())}}; } else { v << jcc{CC_LE, sf, {label(env, inst->next()), label(env, inst->taken())}}; } }
folly::Optional<std::pair<SrcKey,TransID>> updateFuncPrologue(TCA start, ProfTransRec* rec) { auto func = rec->func(); auto nArgs = rec->prologueArgs(); auto codeLock = lockCode(); // Smash callers of the old prologue with the address of the new one. for (auto toSmash : rec->mainCallers()) { smashCall(toSmash, start); } // If the prologue has a matching guard, then smash its guard-callers as // well. auto const guard = funcGuardFromPrologue(start, func); if (funcGuardMatches(guard, func)) { for (auto toSmash : rec->guardCallers()) { smashCall(toSmash, guard); } } rec->clearAllCallers(); // If this prologue has a DV funclet, then invalidate it and return its SrcKey // and TransID if (nArgs < func->numNonVariadicParams()) { auto paramInfo = func->params()[nArgs]; if (paramInfo.hasDefaultValue()) { SrcKey funcletSK(func, paramInfo.funcletOff, false); auto funcletTransId = profData()->dvFuncletTransId(func, nArgs); if (funcletTransId != kInvalidTransID) { invalidateSrcKey(funcletSK); return std::make_pair(funcletSK, funcletTransId); } } } return folly::none; }
static TCA emitFuncPrologueImpl(Func* func, int argc, TransKind kind) { if (!newTranslation()) { return nullptr; } const int nparams = func->numNonVariadicParams(); const int paramIndex = argc <= nparams ? argc : nparams + 1; auto const funcBody = SrcKey{func, func->getEntryForNumArgs(argc), false}; profileSetHotFuncAttr(); auto codeLock = lockCode(); auto codeView = code().view(kind); TCA mainOrig = codeView.main().frontier(); CGMeta fixups; // If we're close to a cache line boundary, just burn some space to // try to keep the func and its body on fewer total lines. align(codeView.main(), &fixups, Alignment::CacheLineRoundUp, AlignContext::Dead); TransLocMaker maker(codeView); maker.markStart(); // Careful: this isn't necessarily the real entry point. For funcIsMagic // prologues, this is just a possible prologue. TCA aStart = codeView.main().frontier(); // Give the prologue a TransID if we have profiling data. auto const transID = [&]{ if (kind == TransKind::ProfPrologue) { auto const profData = jit::profData(); auto const id = profData->allocTransID(); profData->addTransProfPrologue(id, funcBody, paramIndex); return id; } if (profData() && transdb::enabled()) { return profData()->allocTransID(); } return kInvalidTransID; }(); TCA start = genFuncPrologue(transID, kind, func, argc, codeView, fixups); auto loc = maker.markEnd(); auto metaLock = lockMetadata(); if (RuntimeOption::EvalEnableReusableTC) { TCA UNUSED ms = loc.mainStart(), me = loc.mainEnd(), cs = loc.coldStart(), ce = loc.coldEnd(), fs = loc.frozenStart(), fe = loc.frozenEnd(), oldStart = start; auto const did_relocate = relocateNewTranslation(loc, codeView, fixups, &start); if (did_relocate) { FTRACE_MOD(Trace::reusetc, 1, "Relocated prologue for func {} (id = {}) " "from M[{}, {}], C[{}, {}], F[{}, {}] to M[{}, {}] " "C[{}, {}] F[{}, {}] orig start @ {} new start @ {}\n", func->fullName()->data(), func->getFuncId(), ms, me, cs, ce, fs, fe, loc.mainStart(), loc.mainEnd(), loc.coldStart(), loc.coldEnd(), loc.frozenStart(), loc.frozenEnd(), oldStart, start); } else { FTRACE_MOD(Trace::reusetc, 1, "Created prologue for func {} (id = {}) at " "M[{}, {}], C[{}, {}], F[{}, {}] start @ {}\n", func->fullName()->data(), func->getFuncId(), ms, me, cs, ce, fs, fe, oldStart); } recordFuncPrologue(func, loc); if (loc.mainStart() != aStart) { codeView.main().setFrontier(mainOrig); // we may have shifted to align } } if (RuntimeOption::EvalPerfRelocate) { GrowableVector<IncomingBranch> incomingBranches; recordPerfRelocMap(loc.mainStart(), loc.mainEnd(), loc.coldCodeStart(), loc.coldEnd(), funcBody, paramIndex, incomingBranches, fixups); } fixups.process(nullptr); assertx(funcGuardMatches(funcGuardFromPrologue(start, func), func)); assertx(code().isValidCodeAddress(start)); TRACE(2, "funcPrologue %s(%d) setting prologue %p\n", func->fullName()->data(), argc, start); func->setPrologue(paramIndex, start); assertx(kind == TransKind::LivePrologue || kind == TransKind::ProfPrologue || kind == TransKind::OptPrologue); auto tr = maker.rec(funcBody, transID, kind); transdb::addTranslation(tr); if (RuntimeOption::EvalJitUseVtuneAPI) { reportTraceletToVtune(func->unit(), func, tr); } recordGdbTranslation(funcBody, func, codeView.main(), loc.mainStart(), false, true); recordBCInstr(OpFuncPrologue, loc.mainStart(), loc.mainEnd(), false); return start; }
std::string TransRec::print(uint64_t profCount) const { if (!isValid()) return "Translation -1 {\n}\n\n"; std::string ret; std::string funcName = src.func()->fullName()->data(); // Split up the call to prevent template explosion folly::format( &ret, "Translation {} {{\n" " src.md5 = {}\n" " src.funcId = {}\n" " src.funcName = {}\n" " src.resumed = {}\n" " src.bcStart = {}\n" " src.blocks = {}\n", id, md5, src.funcID(), funcName.empty() ? "Pseudo-main" : funcName, (int32_t)src.resumed(), src.offset(), blocks.size()); for (auto const& block : blocks) { folly::format( &ret, " {} {} {}\n", block.md5, block.bcStart, block.bcPast); } folly::format( &ret, " src.guards = {}\n", guards.size()); for (auto const& guard : guards) { folly::format( &ret, " {}\n", guard); } folly::format( &ret, " kind = {} ({})\n" " hasLoop = {:d}\n" " aStart = {}\n" " aLen = {:#x}\n" " coldStart = {}\n" " coldLen = {:#x}\n" " frozenStart = {}\n" " frozenLen = {:#x}\n", static_cast<uint32_t>(kind), show(kind), hasLoop, aStart, aLen, acoldStart, acoldLen, afrozenStart, afrozenLen); // Prepend any target profile data to annotations list. if (auto const profD = profData()) { auto targetProfs = profD->getTargetProfiles(id); folly::format(&ret, " annotations = {}\n", annotations.size() + targetProfs.size()); for (auto const& tProf : targetProfs) { folly::format(&ret, " [\"TargetProfile {}: {}\"] = {}\n", tProf.key.bcOff, tProf.key.name->data(), tProf.debugInfo); } } else { folly::format(&ret, " annotations = {}\n", annotations.size()); } for (auto const& annotation : annotations) { folly::format(&ret, " [\"{}\"] = {}\n", annotation.first, annotation.second); } folly::format( &ret, " profCount = {}\n" " bcMapping = {}\n", profCount, bcMapping.size()); for (auto const& info : bcMapping) { folly::format( &ret, " {} {} {} {} {}\n", info.md5, info.bcStart, info.aStart, info.acoldStart, info.afrozenStart); } ret += "}\n\n"; return ret; }