void Clusterizer::splitHotColdClusters() { // compute the average weight of each cluster jit::vector<uint64_t> clusterAvgWgt(m_clusters.size()); for (size_t c = 0; c < m_clusters.size(); c++) { uint64_t totalWeight = 0; uint64_t totalSize = 0; for (auto b : m_clusters[c]) { const auto numInsts = m_unit.blocks[b].code.size(); totalSize += numInsts; totalWeight += numInsts * m_scale.weight(b); } clusterAvgWgt[c] = totalSize == 0 ? 0 : totalWeight / totalSize; } const auto entryAvgWgt = clusterAvgWgt[m_blockCluster[m_unit.entry]]; const uint64_t hotThreshold = entryAvgWgt * RuntimeOption::EvalJitLayoutHotThreshold; FTRACE(3, "splitHotColdClusters: entryAvgWgt = {} ; hotThreshold = {}\n", entryAvgWgt, hotThreshold); for (auto cid : m_clusterOrder) { if (m_clusters[cid].size() == 0) continue; const AreaIndex area = clusterAvgWgt[cid] >= hotThreshold ? AreaIndex::Main : AreaIndex::Cold; FTRACE(3, " -> C{}: {} (avg wgt = {}): ", cid, area_names[unsigned(area)], clusterAvgWgt[cid]); for (auto b : m_clusters[cid]) { // don't reassign blocks that are in frozen if (m_unit.blocks[b].area_idx == AreaIndex::Frozen) continue; m_unit.blocks[b].area_idx = area; FTRACE(3, "{}, ", b); } FTRACE(3, "\n"); } }
/* * Merge two state-stacks. The stacks must have the same depth. Returns * whether any states changed. */ bool merge_into(jit::vector<FrameState>& dst, const jit::vector<FrameState>& src) { always_assert(src.size() == dst.size()); auto changed = false; for (auto idx = uint32_t{0}; idx < dst.size(); ++idx) { changed |= merge_into(dst[idx], src[idx]); } return changed; }
bool merge_memory_stack_into(jit::vector<StackState>& dst, const jit::vector<StackState>& src) { auto changed = false; // We may need to merge different-sized memory stacks, because a predecessor // may not touch some stack memory that another pred did. We just need to // conservatively throw away slots that aren't tracked on all preds. auto const result_size = std::min(dst.size(), src.size()); dst.resize(result_size); for (auto i = uint32_t{0}; i < result_size; ++i) { changed |= merge_into(dst[i], src[i]); } return changed; }
// overall emitter void Vgen::emit(jit::vector<Vlabel>& labels) { // Some structures here track where we put things just for debug printing. struct Snippet { const IRInstruction* origin; TcaRange range; }; struct BlockInfo { jit::vector<Snippet> snippets; }; // This is under the printir tracemod because it mostly shows you IR and // machine code, not vasm and machine code (not implemented). bool shouldUpdateAsmInfo = !!m_asmInfo && Trace::moduleEnabledRelease(HPHP::Trace::printir, kCodeGenLevel); std::vector<TransBCMapping>* bcmap = nullptr; if (mcg->tx().isTransDBEnabled() || RuntimeOption::EvalJitUseVtuneAPI) { bcmap = &mcg->cgFixups().m_bcMap; } jit::vector<jit::vector<BlockInfo>> areaToBlockInfos; if (shouldUpdateAsmInfo) { areaToBlockInfos.resize(areas.size()); for (auto& r : areaToBlockInfos) { r.resize(unit.blocks.size()); } } for (int i = 0, n = labels.size(); i < n; ++i) { assertx(checkBlockEnd(unit, labels[i])); auto b = labels[i]; auto& block = unit.blocks[b]; codeBlock = &area(block.area).code; vixl::MacroAssembler as { *codeBlock }; a = &as; auto blockStart = a->frontier(); addrs[b] = blockStart; { // Compute the next block we will emit into the current area. auto cur_start = start(labels[i]); auto j = i + 1; while (j < labels.size() && cur_start != start(labels[j])) { j++; } next = j < labels.size() ? labels[j] : Vlabel(unit.blocks.size()); } const IRInstruction* currentOrigin = nullptr; auto blockInfo = shouldUpdateAsmInfo ? &areaToBlockInfos[unsigned(block.area)][b] : nullptr; auto start_snippet = [&](Vinstr& inst) { if (!shouldUpdateAsmInfo) return; blockInfo->snippets.push_back( Snippet { inst.origin, TcaRange { codeBlock->frontier(), nullptr } } ); }; auto finish_snippet = [&] { if (!shouldUpdateAsmInfo) return; if (!blockInfo->snippets.empty()) { auto& snip = blockInfo->snippets.back(); snip.range = TcaRange { snip.range.start(), codeBlock->frontier() }; } }; for (auto& inst : block.code) { if (currentOrigin != inst.origin) { finish_snippet(); start_snippet(inst); currentOrigin = inst.origin; } if (bcmap && inst.origin) { auto sk = inst.origin->marker().sk(); if (bcmap->empty() || bcmap->back().md5 != sk.unit()->md5() || bcmap->back().bcStart != sk.offset()) { bcmap->push_back(TransBCMapping{sk.unit()->md5(), sk.offset(), main().frontier(), cold().frontier(), frozen().frontier()}); } } switch (inst.op) { #define O(name, imms, uses, defs) \ case Vinstr::name: emit(inst.name##_); break; VASM_OPCODES #undef O } } finish_snippet(); } for (auto& p : jccs) { assertx(addrs[p.target]); backend.smashJcc(p.instr, addrs[p.target]); } for (auto& p : bccs) { assertx(addrs[p.target]); auto link = (Instruction*) p.instr; link->SetImmPCOffsetTarget(Instruction::Cast(addrs[p.target])); } for (auto& p : jmps) { assertx(addrs[p.target]); backend.smashJmp(p.instr, addrs[p.target]); } for (auto& p : catches) { mcg->registerCatchBlock(p.instr, addrs[p.target]); } for (auto& p : ldpoints) { CodeCursor cc(main(), p.instr); MacroAssembler a{main()}; a.Mov(X(p.d), points[p.pos]); } if (!shouldUpdateAsmInfo) { return; } for (auto i = 0; i < areas.size(); ++i) { const IRInstruction* currentOrigin = nullptr; auto& blockInfos = areaToBlockInfos[i]; for (auto const blockID : labels) { auto const& blockInfo = blockInfos[static_cast<size_t>(blockID)]; if (blockInfo.snippets.empty()) continue; for (auto const& snip : blockInfo.snippets) { if (currentOrigin != snip.origin && snip.origin) { currentOrigin = snip.origin; } m_asmInfo->updateForInstruction( currentOrigin, static_cast<AreaIndex>(i), snip.range.start(), snip.range.end()); } } } }