Example #1
0
bool instrBreaksProfileBB(const NormalizedInstruction* inst) {
  if (instrIsNonCallControlFlow(inst->op()) ||
      inst->op() == OpAwait || // may branch to scheduler and suspend execution
      inst->op() == OpFCallAwait || // similar to Await
      inst->op() == OpClsCnsD) { // side exits if misses in the RDS
    return true;
  }
  // In profiling mode, don't trace through a control flow merge point,
  // however, allow inlining of default parameter funclets
  assertx(profData());
  if (profData()->anyBlockEndsAt(inst->func(), inst->offset()) &&
      !inst->func()->isEntry(inst->nextSk().offset())) {
    return true;
  }
  return false;
}
Example #2
0
void cgIncProfCounter(IRLS& env, const IRInstruction* inst) {
  auto const transID = inst->extra<TransIDData>()->transId;
  auto const counterAddr = profData()->transCounterAddr(transID);
  auto& v = vmain(env);

  v << decqmlock{v.cns(counterAddr)[0], v.makeReg()};
}
Example #3
0
void checkFreeProfData() {
  // In PGO mode, we free all the profiling data once the main code area reaches
  // its maximum usage and either the hot area is also full or all the functions
  // that were profiled have already been optimized.
  //
  // However, we keep the data around indefinitely in a few special modes:
  // * Eval.EnableReusableTC
  // * TC dumping enabled (Eval.DumpTC/DumpIR/etc.)
  if (profData() &&
      !RuntimeOption::EvalEnableReusableTC &&
      code().main().used() >= CodeCache::AMaxUsage &&
      (!code().hotEnabled() ||
       profData()->profilingFuncs() == profData()->optimizedFuncs()) &&
      !transdb::enabled()) {
    discardProfData();
  }
}
Example #4
0
void requestExit() {
  Stats::dump();
  Stats::clear();
  Timer::RequestExit();
  if (profData()) profData()->maybeResetCounters();
  requestExitProfData();

  if (Trace::moduleEnabledRelease(Trace::mcgstats, 1)) {
    Trace::traceRelease("MCGenerator perf counters for %s:\n",
                        g_context->getRequestUrl(50).c_str());
    for (int i = 0; i < tpc_num_counters; i++) {
      Trace::traceRelease("%-20s %10" PRId64 "\n",
                          kPerfCounterNames[i], tl_perf_counters[i]);
    }
    Trace::traceRelease("\n");
  }

  clearDebuggerCatches();
}
Example #5
0
void checkFreeProfData() {
  // In PGO mode, we free all the profiling data once the main code area reaches
  // its maximum usage and either the hot area is also full or all the functions
  // that were profiled have already been optimized.
  //
  // However, we keep the data around indefinitely in a few special modes:
  // * Eval.EnableReusableTC
  // * TC dumping enabled (Eval.DumpTC/DumpIR/etc.)
  //
  // Finally, when the RetranslateAll mode is enabled, the ProfData is discarded
  // via a different mechanism, after all the optimized translations are
  // generated.
  if (profData() &&
      !RuntimeOption::EvalEnableReusableTC &&
      code().main().used() >= CodeCache::AMaxUsage &&
      (!code().hotEnabled() ||
       profData()->profilingFuncs() == profData()->optimizedFuncs()) &&
      !transdb::enabled() &&
      !RuntimeOption::EvalJitRetranslateAllRequest) {
    discardProfData();
  }
}
Example #6
0
bool profileFunc(const Func* func) {
  if (!shouldPGOFunc(func)) return false;

  // If retranslateAll is enabled and we already passed the point that it should
  // be scheduled to execute (via the treadmill), then we can't emit more
  // Profile translations.  This is to ensure that, when retranslateAll() runs,
  // no more Profile translations are being added to ProfData.
  if (RuntimeOption::EvalJitRetranslateAllRequest != 0 &&
      hasEnoughProfDataToRetranslateAll()) {
    return false;
  }

  if (profData()->optimized(func->getFuncId())) return false;

  // If we already started profiling `func', then we return true and skip the
  // other checks below.
  if (profData()->profiling(func->getFuncId())) return true;

  // Don't start profiling new functions if the size of either main or
  // prof is already above Eval.JitAMaxUsage and we already filled hot.
  auto tcUsage = std::max(code().main().used(), code().prof().used());
  if (tcUsage >= CodeCache::AMaxUsage && !code().hotEnabled()) {
    return false;
  }

  // We have two knobs to control the number of functions we're allowed to
  // profile: Eval.JitProfileRequests and Eval.JitProfileBCSize. We profile new
  // functions until either of these limits is exceeded. In practice, we expect
  // to hit the bytecode size limit first, but we keep the request limit around
  // as a safety net.
  if (RuntimeOption::EvalJitProfileBCSize > 0 &&
      profData()->profilingBCSize() >= RuntimeOption::EvalJitProfileBCSize) {
    return false;
  }

  return requestCount() <= RuntimeOption::EvalJitProfileRequests;
}
Example #7
0
bool profileSrcKey(SrcKey sk) {
  if (!shouldPGOFunc(*sk.func())) return false;
  if (profData()->optimized(sk.funcID())) return false;
  if (profData()->profiling(sk.funcID())) return true;

  // Don't start profiling new functions if the size of either main or
  // prof is already above Eval.JitAMaxUsage and we already filled hot.
  auto tcUsage = std::max(code().main().used(), code().prof().used());
  if (tcUsage >= CodeCache::AMaxUsage && !code().hotEnabled()) {
    return false;
  }

  // We have two knobs to control the number of functions we're allowed to
  // profile: Eval.JitProfileRequests and Eval.JitProfileBCSize. We profile new
  // functions until either of these limits is exceeded. In practice we expect
  // to hit the bytecode size limit first but we keep the request limit around
  // as a safety net.
  if (RuntimeOption::EvalJitProfileBCSize > 0 &&
      profData()->profilingBCSize() >= RuntimeOption::EvalJitProfileBCSize) {
    return false;
  }

  return requestCount() <= RuntimeOption::EvalJitProfileRequests;
}
Example #8
0
void requestExit() {
  always_assert(!GetWriteLease().amOwner());
  TRACE_MOD(Trace::txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64
            " kept, %15" PRId64 " grabbed\n",
            Process::GetThreadIdForTrace(), GetWriteLease().hintKept(),
            GetWriteLease().hintGrabbed());
  Stats::dump();
  Stats::clear();
  Timer::RequestExit();
  if (profData()) profData()->maybeResetCounters();
  requestExitProfData();

  if (Trace::moduleEnabledRelease(Trace::mcgstats, 1)) {
    Trace::traceRelease("MCGenerator perf counters for %s:\n",
                        g_context->getRequestUrl(50).c_str());
    for (int i = 0; i < tpc_num_counters; i++) {
      Trace::traceRelease("%-20s %10" PRId64 "\n",
                          kPerfCounterNames[i], tl_perf_counters[i]);
    }
    Trace::traceRelease("\n");
  }

  clearDebuggerCatches();
}
Example #9
0
RegionDescPtr selectCalleeRegion(const SrcKey& sk,
                                 const Func* callee,
                                 const irgen::IRGS& irgs,
                                 InliningDecider& inl,
                                 int32_t maxBCInstrs) {
  auto const op = sk.pc();
  auto const numArgs = getImm(op, 0).u_IVA;

  auto const& fpi = irgs.irb->fs().fpiStack();
  assertx(!fpi.empty());
  auto const ctx = fpi.back().ctxType;

  std::vector<Type> argTypes;
  for (int i = numArgs - 1; i >= 0; --i) {
    // DataTypeGeneric is used because we're just passing the locals into the
    // callee.  It's up to the callee to constrain further if needed.
    auto type = irgen::publicTopType(irgs, BCSPRelOffset{i});

    // If we don't have sufficient type information to inline the region return
    // early
    if (!(type <= TCell) && !(type <= TBoxedCell) && !(type <= TCls)) {
      return nullptr;
    }
    argTypes.push_back(type);
  }

  const auto mode = RuntimeOption::EvalInlineRegionMode;
  if (mode == "tracelet" || mode == "both") {
    auto region = selectCalleeTracelet(
      callee,
      numArgs,
      ctx,
      argTypes,
      maxBCInstrs
    );
    auto const maxCost = RuntimeOption::EvalHHIRInliningMaxVasmCost;
    if (region && inl.shouldInline(sk, callee, *region, maxCost)) return region;
    if (mode == "tracelet") return nullptr;
  }

  if (profData()) {
    auto region = selectCalleeCFG(callee, numArgs, ctx, argTypes, maxBCInstrs);
    auto const maxCost = RuntimeOption::EvalHHIRInliningMaxVasmCost;
    if (region && inl.shouldInline(sk, callee, *region, maxCost)) return region;
  }

  return nullptr;
}
Example #10
0
void cgCheckCold(IRLS& env, const IRInstruction* inst) {
  auto const transID = inst->extra<CheckCold>()->transId;
  auto const counterAddr = profData()->transCounterAddr(transID);
  auto& v = vmain(env);

  auto const sf = v.makeReg();
  v << decqmlock{v.cns(counterAddr)[0], sf};
  if (RuntimeOption::EvalJitFilterLease) {
    auto filter = v.makeBlock();
    v << jcc{CC_LE, sf, {label(env, inst->next()), filter}};
    v = filter;
    auto const res = v.makeReg();
    cgCallHelper(v, env, CallSpec::direct(couldAcquireOptimizeLease),
                 callDest(res), SyncOptions::None,
                 argGroup(env, inst).immPtr(inst->func()));
    auto const sf2 = v.makeReg();
    v << testb{res, res, sf2};
    v << jcc{CC_NZ, sf2, {label(env, inst->next()), label(env, inst->taken())}};
  } else {
    v << jcc{CC_LE, sf, {label(env, inst->next()), label(env, inst->taken())}};
  }
}
Example #11
0
folly::Optional<std::pair<SrcKey,TransID>>
updateFuncPrologue(TCA start, ProfTransRec* rec) {
  auto func = rec->func();
  auto nArgs = rec->prologueArgs();

  auto codeLock = lockCode();

  // Smash callers of the old prologue with the address of the new one.
  for (auto toSmash : rec->mainCallers()) {
    smashCall(toSmash, start);
  }

  // If the prologue has a matching guard, then smash its guard-callers as
  // well.
  auto const guard = funcGuardFromPrologue(start, func);
  if (funcGuardMatches(guard, func)) {
    for (auto toSmash : rec->guardCallers()) {
      smashCall(toSmash, guard);
    }
  }
  rec->clearAllCallers();

  // If this prologue has a DV funclet, then invalidate it and return its SrcKey
  // and TransID
  if (nArgs < func->numNonVariadicParams()) {
    auto paramInfo = func->params()[nArgs];
    if (paramInfo.hasDefaultValue()) {
      SrcKey funcletSK(func, paramInfo.funcletOff, false);
      auto funcletTransId = profData()->dvFuncletTransId(func, nArgs);
      if (funcletTransId != kInvalidTransID) {
        invalidateSrcKey(funcletSK);
        return std::make_pair(funcletSK, funcletTransId);
      }
    }
  }

  return folly::none;
}
Example #12
0
static TCA emitFuncPrologueImpl(Func* func, int argc, TransKind kind) {
  if (!newTranslation()) {
    return nullptr;
  }

  const int nparams = func->numNonVariadicParams();
  const int paramIndex = argc <= nparams ? argc : nparams + 1;

  auto const funcBody = SrcKey{func, func->getEntryForNumArgs(argc), false};

  profileSetHotFuncAttr();
  auto codeLock = lockCode();
  auto codeView = code().view(kind);
  TCA mainOrig = codeView.main().frontier();
  CGMeta fixups;

  // If we're close to a cache line boundary, just burn some space to
  // try to keep the func and its body on fewer total lines.
  align(codeView.main(), &fixups, Alignment::CacheLineRoundUp,
        AlignContext::Dead);

  TransLocMaker maker(codeView);
  maker.markStart();

  // Careful: this isn't necessarily the real entry point. For funcIsMagic
  // prologues, this is just a possible prologue.
  TCA aStart = codeView.main().frontier();

  // Give the prologue a TransID if we have profiling data.
  auto const transID = [&]{
    if (kind == TransKind::ProfPrologue) {
      auto const profData = jit::profData();
      auto const id = profData->allocTransID();
      profData->addTransProfPrologue(id, funcBody, paramIndex);
      return id;
    }
    if (profData() && transdb::enabled()) {
      return profData()->allocTransID();
    }
    return kInvalidTransID;
  }();

  TCA start = genFuncPrologue(transID, kind, func, argc, codeView, fixups);

  auto loc = maker.markEnd();
  auto metaLock = lockMetadata();

  if (RuntimeOption::EvalEnableReusableTC) {
    TCA UNUSED ms = loc.mainStart(), me = loc.mainEnd(),
               cs = loc.coldStart(), ce = loc.coldEnd(),
               fs = loc.frozenStart(), fe = loc.frozenEnd(),
               oldStart = start;

    auto const did_relocate = relocateNewTranslation(loc, codeView, fixups,
                                                     &start);

    if (did_relocate) {
      FTRACE_MOD(Trace::reusetc, 1,
                 "Relocated prologue for func {} (id = {}) "
                 "from M[{}, {}], C[{}, {}], F[{}, {}] to M[{}, {}] "
                 "C[{}, {}] F[{}, {}] orig start @ {} new start @ {}\n",
                 func->fullName()->data(), func->getFuncId(),
                 ms, me, cs, ce, fs, fe, loc.mainStart(), loc.mainEnd(),
                 loc.coldStart(), loc.coldEnd(), loc.frozenStart(),
                 loc.frozenEnd(), oldStart, start);
    } else {
      FTRACE_MOD(Trace::reusetc, 1,
                 "Created prologue for func {} (id = {}) at "
                 "M[{}, {}], C[{}, {}], F[{}, {}] start @ {}\n",
                 func->fullName()->data(), func->getFuncId(),
                 ms, me, cs, ce, fs, fe, oldStart);
    }

    recordFuncPrologue(func, loc);
    if (loc.mainStart() != aStart) {
      codeView.main().setFrontier(mainOrig); // we may have shifted to align
    }
  }
  if (RuntimeOption::EvalPerfRelocate) {
    GrowableVector<IncomingBranch> incomingBranches;
    recordPerfRelocMap(loc.mainStart(), loc.mainEnd(),
                       loc.coldCodeStart(), loc.coldEnd(),
                       funcBody, paramIndex,
                       incomingBranches,
                       fixups);
  }
  fixups.process(nullptr);

  assertx(funcGuardMatches(funcGuardFromPrologue(start, func), func));
  assertx(code().isValidCodeAddress(start));

  TRACE(2, "funcPrologue %s(%d) setting prologue %p\n",
        func->fullName()->data(), argc, start);
  func->setPrologue(paramIndex, start);

  assertx(kind == TransKind::LivePrologue ||
          kind == TransKind::ProfPrologue ||
          kind == TransKind::OptPrologue);

  auto tr = maker.rec(funcBody, transID, kind);
  transdb::addTranslation(tr);
  if (RuntimeOption::EvalJitUseVtuneAPI) {
    reportTraceletToVtune(func->unit(), func, tr);
  }


  recordGdbTranslation(funcBody, func, codeView.main(), loc.mainStart(),
                       false, true);
  recordBCInstr(OpFuncPrologue, loc.mainStart(), loc.mainEnd(), false);

  return start;
}
Example #13
0
std::string
TransRec::print(uint64_t profCount) const {
  if (!isValid()) return "Translation -1 {\n}\n\n";

  std::string ret;
  std::string funcName = src.func()->fullName()->data();

  // Split up the call to prevent template explosion
  folly::format(
    &ret,
    "Translation {} {{\n"
    "  src.md5 = {}\n"
    "  src.funcId = {}\n"
    "  src.funcName = {}\n"
    "  src.resumed = {}\n"
    "  src.bcStart = {}\n"
    "  src.blocks = {}\n",
    id, md5, src.funcID(),
    funcName.empty() ? "Pseudo-main" : funcName,
    (int32_t)src.resumed(),
    src.offset(),
    blocks.size());

  for (auto const& block : blocks) {
    folly::format(
      &ret,
      "    {} {} {}\n",
      block.md5, block.bcStart, block.bcPast);
  }

  folly::format( &ret, "  src.guards = {}\n", guards.size());

  for (auto const& guard : guards) {
    folly::format( &ret, "    {}\n", guard);
  }

  folly::format(
    &ret,
    "  kind = {} ({})\n"
    "  hasLoop = {:d}\n"
    "  aStart = {}\n"
    "  aLen = {:#x}\n"
    "  coldStart = {}\n"
    "  coldLen = {:#x}\n"
    "  frozenStart = {}\n"
    "  frozenLen = {:#x}\n",
    static_cast<uint32_t>(kind), show(kind),
    hasLoop,
    aStart, aLen,
    acoldStart, acoldLen,
    afrozenStart, afrozenLen);

  // Prepend any target profile data to annotations list.
  if (auto const profD = profData()) {
    auto targetProfs = profD->getTargetProfiles(id);
    folly::format(&ret, "  annotations = {}\n",
                  annotations.size() + targetProfs.size());
    for (auto const& tProf : targetProfs) {
      folly::format(&ret, "     [\"TargetProfile {}: {}\"] = {}\n",
                    tProf.key.bcOff, tProf.key.name->data(), tProf.debugInfo);
    }
  } else {
    folly::format(&ret, "  annotations = {}\n", annotations.size());
  }
  for (auto const& annotation : annotations) {
    folly::format(&ret, "     [\"{}\"] = {}\n",
                  annotation.first, annotation.second);
  }

  folly::format(
    &ret,
    "  profCount = {}\n"
    "  bcMapping = {}\n",
    profCount, bcMapping.size());

  for (auto const& info : bcMapping) {
    folly::format(
      &ret,
      "    {} {} {} {} {}\n",
      info.md5, info.bcStart,
      info.aStart, info.acoldStart, info.afrozenStart);
  }

  ret += "}\n\n";
  return ret;
}