void Clusterizer::splitHotColdClusters() { // compute the average weight of each cluster jit::vector<uint64_t> clusterAvgWgt(m_clusters.size()); for (size_t c = 0; c < m_clusters.size(); c++) { uint64_t totalWeight = 0; uint64_t totalSize = 0; for (auto b : m_clusters[c]) { const auto numInsts = m_unit.blocks[b].code.size(); totalSize += numInsts; totalWeight += numInsts * m_scale.weight(b); } clusterAvgWgt[c] = totalSize == 0 ? 0 : totalWeight / totalSize; } const auto entryAvgWgt = clusterAvgWgt[m_blockCluster[m_unit.entry]]; const uint64_t hotThreshold = entryAvgWgt * RuntimeOption::EvalJitLayoutHotThreshold; FTRACE(3, "splitHotColdClusters: entryAvgWgt = {} ; hotThreshold = {}\n", entryAvgWgt, hotThreshold); for (auto cid : m_clusterOrder) { if (m_clusters[cid].size() == 0) continue; const AreaIndex area = clusterAvgWgt[cid] >= hotThreshold ? AreaIndex::Main : AreaIndex::Cold; FTRACE(3, " -> C{}: {} (avg wgt = {}): ", cid, area_names[unsigned(area)], clusterAvgWgt[cid]); for (auto b : m_clusters[cid]) { // don't reassign blocks that are in frozen if (m_unit.blocks[b].area_idx == AreaIndex::Frozen) continue; m_unit.blocks[b].area_idx = area; FTRACE(3, "{}, ", b); } FTRACE(3, "\n"); } }
/* * Merge two state-stacks. The stacks must have the same depth. Returns * whether any states changed. */ bool merge_into(jit::vector<FrameState>& dst, const jit::vector<FrameState>& src) { always_assert(src.size() == dst.size()); auto changed = false; for (auto idx = uint32_t{0}; idx < dst.size(); ++idx) { changed |= merge_into(dst[idx], src[idx]); } return changed; }
void Clusterizer::initClusters() { m_clusters.resize(m_unit.blocks.size()); m_blockCluster.resize(m_unit.blocks.size()); for (auto b : m_blocks) { m_clusters[b].push_back(b); m_blockCluster[b] = b; } }
bool merge_memory_stack_into(jit::vector<StackState>& dst, const jit::vector<StackState>& src) { auto changed = false; // We may need to merge different-sized memory stacks, because a predecessor // may not touch some stack memory that another pred did. We just need to // conservatively throw away slots that aren't tracked on all preds. auto const result_size = std::min(dst.size(), src.size()); dst.resize(result_size); for (auto i = uint32_t{0}; i < result_size; ++i) { changed |= merge_into(dst[i], src[i]); } return changed; }
void DFSSortClusters::dfs(uint32_t cid) { if (m_visited.test(cid)) return; m_visited.set(cid); m_list.push_back(Vlabel(cid)); // find the best successor, which is the one to which cid has the // highest weight among the ones that haven't been visited yet int64_t maxWgt = 0; uint32_t bestSucc = uint32_t(-1); for (auto& sInfo : m_clusterSuccs[cid]) { auto succId = sInfo.first; if (m_visited.test(succId)) continue; auto wgt = sInfo.second; if (wgt >= maxWgt) { maxWgt = wgt; bestSucc = succId; } } if (bestSucc == uint32_t(-1)) return; // visit bestSucc first dfs(bestSucc); // now visit the remaining ones for (auto& sInfo : m_clusterSuccs[cid]) { if (sInfo.first != bestSucc) { dfs(sInfo.first); } } }
void natural_loop_dfs(jit::vector<Block*>& out, Visited& visited, Block* blk) { if (visited.test(blk->id())) return; visited.set(blk->id()); blk->forEachPred([&] (Block* pred) { natural_loop_dfs(out, visited, pred); }); out.push_back(blk); }
void Vgen::emit(jcc& i) { assertx(i.cc != CC_None); if (i.targets[1] != i.targets[0]) { if (next == i.targets[1]) { // the taken branch is the fall-through block, invert the branch. i = jcc{ccNegate(i.cc), i.sf, {i.targets[1], i.targets[0]}}; } jccs.push_back({a->frontier(), i.targets[1]}); // B.cond range is +/- 1MB but this uses BR backend.emitSmashableJump(*codeBlock, kEndOfTargetChain, i.cc); } emit(jmp{i.targets[0]}); }
void Vgen::emit(tbcc& i) { assertx(i.cc == vixl::ne || i.cc == vixl::eq); if (i.targets[1] != i.targets[0]) { if (next == i.targets[1]) { // the taken branch is the fall-through block, invert the branch. i = tbcc{i.cc == vixl::ne ? vixl::eq : vixl::ne, i.bit, i.s, {i.targets[1], i.targets[0]}}; } bccs.push_back({a->frontier(), i.targets[1]}); // offset range +/- 32KB if (i.cc == vixl::ne) { a->tbnz(X(i.s), i.bit, 0); } else { a->tbz(X(i.s), i.bit, 0); } } emit(jmp{i.targets[0]}); }
void Vgen::emit(jmp i) { if (next == i.target) return; jmps.push_back({a->frontier(), i.target}); // B range is +/- 128MB but this uses BR backend.emitSmashableJump(*codeBlock, kEndOfTargetChain, CC_None); }
void Vgen::emit(hcunwind& i) { catches.push_back({points[i.call], i.targets[1]}); emit(jmp{i.targets[0]}); }
// overall emitter void Vgen::emit(jit::vector<Vlabel>& labels) { // Some structures here track where we put things just for debug printing. struct Snippet { const IRInstruction* origin; TcaRange range; }; struct BlockInfo { jit::vector<Snippet> snippets; }; // This is under the printir tracemod because it mostly shows you IR and // machine code, not vasm and machine code (not implemented). bool shouldUpdateAsmInfo = !!m_asmInfo && Trace::moduleEnabledRelease(HPHP::Trace::printir, kCodeGenLevel); std::vector<TransBCMapping>* bcmap = nullptr; if (mcg->tx().isTransDBEnabled() || RuntimeOption::EvalJitUseVtuneAPI) { bcmap = &mcg->cgFixups().m_bcMap; } jit::vector<jit::vector<BlockInfo>> areaToBlockInfos; if (shouldUpdateAsmInfo) { areaToBlockInfos.resize(areas.size()); for (auto& r : areaToBlockInfos) { r.resize(unit.blocks.size()); } } for (int i = 0, n = labels.size(); i < n; ++i) { assertx(checkBlockEnd(unit, labels[i])); auto b = labels[i]; auto& block = unit.blocks[b]; codeBlock = &area(block.area).code; vixl::MacroAssembler as { *codeBlock }; a = &as; auto blockStart = a->frontier(); addrs[b] = blockStart; { // Compute the next block we will emit into the current area. auto cur_start = start(labels[i]); auto j = i + 1; while (j < labels.size() && cur_start != start(labels[j])) { j++; } next = j < labels.size() ? labels[j] : Vlabel(unit.blocks.size()); } const IRInstruction* currentOrigin = nullptr; auto blockInfo = shouldUpdateAsmInfo ? &areaToBlockInfos[unsigned(block.area)][b] : nullptr; auto start_snippet = [&](Vinstr& inst) { if (!shouldUpdateAsmInfo) return; blockInfo->snippets.push_back( Snippet { inst.origin, TcaRange { codeBlock->frontier(), nullptr } } ); }; auto finish_snippet = [&] { if (!shouldUpdateAsmInfo) return; if (!blockInfo->snippets.empty()) { auto& snip = blockInfo->snippets.back(); snip.range = TcaRange { snip.range.start(), codeBlock->frontier() }; } }; for (auto& inst : block.code) { if (currentOrigin != inst.origin) { finish_snippet(); start_snippet(inst); currentOrigin = inst.origin; } if (bcmap && inst.origin) { auto sk = inst.origin->marker().sk(); if (bcmap->empty() || bcmap->back().md5 != sk.unit()->md5() || bcmap->back().bcStart != sk.offset()) { bcmap->push_back(TransBCMapping{sk.unit()->md5(), sk.offset(), main().frontier(), cold().frontier(), frozen().frontier()}); } } switch (inst.op) { #define O(name, imms, uses, defs) \ case Vinstr::name: emit(inst.name##_); break; VASM_OPCODES #undef O } } finish_snippet(); } for (auto& p : jccs) { assertx(addrs[p.target]); backend.smashJcc(p.instr, addrs[p.target]); } for (auto& p : bccs) { assertx(addrs[p.target]); auto link = (Instruction*) p.instr; link->SetImmPCOffsetTarget(Instruction::Cast(addrs[p.target])); } for (auto& p : jmps) { assertx(addrs[p.target]); backend.smashJmp(p.instr, addrs[p.target]); } for (auto& p : catches) { mcg->registerCatchBlock(p.instr, addrs[p.target]); } for (auto& p : ldpoints) { CodeCursor cc(main(), p.instr); MacroAssembler a{main()}; a.Mov(X(p.d), points[p.pos]); } if (!shouldUpdateAsmInfo) { return; } for (auto i = 0; i < areas.size(); ++i) { const IRInstruction* currentOrigin = nullptr; auto& blockInfos = areaToBlockInfos[i]; for (auto const blockID : labels) { auto const& blockInfo = blockInfos[static_cast<size_t>(blockID)]; if (blockInfo.snippets.empty()) continue; for (auto const& snip : blockInfo.snippets) { if (currentOrigin != snip.origin && snip.origin) { currentOrigin = snip.origin; } m_asmInfo->updateForInstruction( currentOrigin, static_cast<AreaIndex>(i), snip.range.start(), snip.range.end()); } } } }