void Vxls::splitCritEdges() { smart::vector<unsigned> preds; preds.resize(unit.blocks.size()); for (auto pred : blocks) { auto succlist = succs(unit.blocks[pred]); for (auto succ : succlist) { preds[succ]++; } } auto resort = false; for (auto pred : blocks) { auto succlist = succs(unit.blocks[pred]); if (succlist.size() <= 1) continue; for (auto& succ : succlist) { if (preds[succ] <= 1) continue; // split the critical edge. auto middle = unit.makeBlock(unit.blocks[succ].area); unit.blocks[middle].code.emplace_back(jmp{succ}); succ = middle; resort = true; } } if (resort) { blocks = sortBlocks(unit); } }
/* * Splits the critical edges in `unit', if any. * Returns true iff the unit was modified. */ bool splitCriticalEdges(Vunit& unit) { jit::vector<unsigned> preds(unit.blocks.size()); jit::flat_set<size_t> catch_blocks; for (size_t b = 0; b < unit.blocks.size(); b++) { auto succlist = succs(unit.blocks[b]); for (auto succ : succlist) { preds[succ]++; } } auto changed = false; for (size_t pred = 0; pred < unit.blocks.size(); pred++) { auto succlist = succs(unit.blocks[pred]); if (succlist.size() <= 1) continue; for (auto& succ : succlist) { if (preds[succ] <= 1) continue; // split the critical edge. auto middle = unit.makeBlock(unit.blocks[succ].area); forwardJmp(unit, catch_blocks, middle, succ); succ = middle; changed = true; } } // Remove any landingpad{} instructions that were hoisted to split edges. for (auto block : catch_blocks) { auto& code = unit.blocks[block].code; assertx(code.front().op == Vinstr::landingpad); code.front() = nop{}; } return changed; }
void CFA::link_to_exit() { typedef thorin::GIDSet<const CFNode*> CFNodeSet; CFNodeSet reachable; std::queue<const CFNode*> queue; // first, link all nodes without succs to exit for (auto p : nodes()) { auto n = p.second; if (n != exit() && n->succs().empty()) n->link(exit()); } auto backwards_reachable = [&] (const CFNode* n) { auto enqueue = [&] (const CFNode* n) { if (reachable.emplace(n).second) queue.push(n); }; enqueue(n); while (!queue.empty()) { for (auto pred : pop(queue)->preds()) enqueue(pred); } }; std::stack<const CFNode*> stack; CFNodeSet on_stack; auto push = [&] (const CFNode* n) { if (on_stack.emplace(n).second) { stack.push(n); return true; } return false; }; backwards_reachable(exit()); push(entry()); while (!stack.empty()) { auto n = stack.top(); bool todo = false; for (auto succ : n->succs()) todo |= push(succ); if (!todo) { if (!reachable.contains(n)) { n->link(exit()); backwards_reachable(n); } stack.pop(); } } }
void Vxls::printInstr(std::ostringstream& str, Vinstr* inst, unsigned pos, Vlabel b) { bool fixed_covers[2] = { false, false }; Interval* fixed = nullptr; forEachInterval(intervals, [&] (Interval* ivl) { if (ivl->fixed()) { if (ignore_reserved && !abi.unreserved().contains(ivl->vreg)) { return; } if (collapse_fixed) { fixed = ivl; // can be any. fixed_covers[0] |= ivl->covers(pos); fixed_covers[1] |= ivl->covers(pos + 1); return; } } str << " "; str << draw(ivl, pos, Light, [&](Interval* child, unsigned p) { return child->covers(p); }); str << draw(ivl, pos, Heavy, [&](Interval* child, unsigned p) { return child->usedAt(p); }); }); str << " " << draw(fixed, pos, Heavy, [&](Interval*, unsigned p) { assert(p-pos < 2); return fixed_covers[p-pos]; }); if (pos == block_ranges[b].start) { str << folly::format(" B{: <2}", size_t(b)); } else { str << " "; } if (pos == block_ranges[b].start || pos > 0) { str << folly::format(" {: <3}", pos); } else { str << " "; } if (inst) { str << folly::format(" {: <10} ", vinst_names[inst->op]); FormatVisitor pv(unit, str); visit(*inst, pv); auto labels = succs(*inst); if (labels.size() == 1) { str << pv.sep() << folly::format("B{}", size_t(labels[0])); } else if (labels.size() == 2) { str << pv.sep() << folly::format("B{}, else B{}", size_t(labels[1]), size_t(labels[0])); } else { for (auto succ : succs(*inst)) { str << folly::format("->B{} ", size_t(succ)); } } } str << "\n"; }
// compute the offset from RSP to the spill area at each block start. void Vxls::analyzeRsp() { auto num_blocks = unit.blocks.size(); boost::dynamic_bitset<> visited(num_blocks); spill_offsets.resize(num_blocks); for (auto b : blocks) { int offset; if (visited.test(b)) { offset = spill_offsets[b]; } else { offset = 0; } for (auto& inst : unit.blocks[b].code) { offset -= rspEffect(unit, inst); } for (auto s : succs(unit.blocks[b])) { if (visited.test(s)) { assert_flog(offset == spill_offsets[s], "rsp mismatch on edge B{}->B{}, expected {} got {}", size_t(b), size_t(s), spill_offsets[s], offset); } else { spill_offsets[s] = offset; visited.set(s); } } } }
// Remove dead instructions by doing a traditional liveness analysis. // instructions that mutate memory, physical registers, or status flags // are considered useful. All branches are considered useful. // // Given SSA, there's a faster sparse version of this algorithm that marks // useful instructions in one pass, then transitively marks pure instructions // that define inputs to useful instructions. However it requires a mapping // from vreg numbers to the instruction that defines them, and a way to address // individual instructions. // // We could remove useless branches by computing the post-dominator tree and // RDF(b) for each block; then a branch is only useful if it controls whether // or not a useful block executes, and useless branches can be forwarded to // the nearest useful post-dominator. void removeDeadCode(Vunit& unit) { auto blocks = sortBlocks(unit); jit::vector<LiveSet> livein(unit.blocks.size()); LiveSet live(unit.next_vr); auto pass = [&](bool mutate) { bool changed = false; for (auto blockIt = blocks.end(); blockIt != blocks.begin();) { auto b = *--blockIt; auto& block = unit.blocks[b]; live.reset(); for (auto s : succs(block)) { if (!livein[s].empty()) { live |= livein[s]; } } for (auto i = block.code.end(); i != block.code.begin();) { auto& inst = *--i; auto useful = effectful(inst); visitDefs(unit, inst, [&](Vreg r) { if (r.isPhys() || live.test(r)) { useful = true; live.reset(r); } }); if (useful) { visitUses(unit, inst, [&](Vreg r) { live.set(r); }); } else if (mutate) { inst = nop{}; changed = true; } } if (mutate) { assert(live == livein[b]); } else { if (live != livein[b]) { livein[b] = live; changed = true; } } } return changed; }; // analyze until livein reaches a fixed point while (pass(false)) {} // nop-out useless instructions if (pass(true)) { for (auto b : blocks) { auto& code = unit.blocks[b].code; auto end = std::remove_if(code.begin(), code.end(), [&](Vinstr& inst) { return inst.op == Vinstr::nop; }); code.erase(end, code.end()); } printUnit(kVasmDCELevel, "after vasm-dead", unit); } }
// Compute lifetime intervals and use positions of all intervals by walking // the code bottom-up once. Loops aren't handled yet. void Vxls::buildIntervals() { livein.resize(unit.blocks.size()); intervals.resize(unit.next_vr); for (auto blockIt = blocks.end(); blockIt != blocks.begin();) { auto vlabel = *--blockIt; auto& block = unit.blocks[vlabel]; LiveSet live(unit.next_vr); for (auto s : succs(block)) { if (!livein[s].empty()) live |= livein[s]; } auto& block_range = block_ranges[vlabel]; forEach(live, [&](Vreg vr) { intervals[vr]->add(block_range); }); auto pos = block_range.end; for (auto i = block.code.end(); i != block.code.begin();) { auto& inst = *--i; pos -= 2; DefVisitor dv(live, *this, pos); visit(inst, dv); RegSet implicit_uses, implicit_defs; getEffects(abi, inst, implicit_uses, implicit_defs); implicit_defs.forEach([&](Vreg r) { dv.def(r); }); UseVisitor uv(live, *this, {block_range.start, pos}); visit(inst, uv); implicit_uses.forEach([&](Vreg r) { uv.use(r); }); } livein[vlabel] = live; } for (auto& c : unit.cpool) { auto ivl = intervals[c.second]; if (ivl) { ivl->ranges.back().start = 0; ivl->cns = true; ivl->val = c.first; } } // Each interval's range and use list is backwards; reverse them now. for (auto ivl : intervals) { if (!ivl) continue; assert(!ivl->ranges.empty()); // no empty intervals std::reverse(ivl->uses.begin(), ivl->uses.end()); std::reverse(ivl->ranges.begin(), ivl->ranges.end()); } if (dumpIREnabled(kRegAllocLevel)) { print("after building intervals"); } // todo: t4764262 this should check each root, not just position 0. for (DEBUG_ONLY auto ivl : intervals) { // only constants and physical registers can be live at entry. assert(!ivl || ivl->cns || ivl->vreg.isPhys() || ivl->start() > 0); } assert(check(unit)); }
void dfs(Vlabel b) { assert_no_log(size_t(b) < unit.blocks.size()); if (visited.test(b)) return; visited.set(b); if (area(b) == 0) { for (auto s : succs(unit.blocks[b])) { // visit colder if (area(s) > area(b)) dfs(s); } for (auto s : succs(unit.blocks[b])) { if (area(s) <= area(b)) dfs(s); } } else { for (auto s : succs(unit.blocks[b])) dfs(s); } blocks.push_back(b); }
// last phase: mutate the code by inserting copies. this destroyes // the position numbering, so we can't use interval positions after this. void Vxls::insertCopies() { // insert copies inside blocks for (auto b : blocks) { auto r = block_ranges[b]; auto pos = r.start; pos += 2; auto& block = unit.blocks[b]; auto& code = block.code; auto offset = spill_offsets[b]; for (unsigned j = 0; j < code.size(); j++, pos += 2) { MemoryRef slots = rsp[offset]; offset -= rspEffect(unit, code[j]); auto s = spills.find(pos - 1); if (s != spills.end()) { insertSpillsAt(code, j, s->second, slots, pos - 1); } auto c = copies.find(pos - 1); if (c != copies.end()) { insertCopiesAt(code, j, c->second, slots, pos - 1); } c = copies.find(pos); if (c != copies.end()) { insertCopiesAt(code, j, c->second, slots, pos); } } } // insert copies on edges for (auto b : blocks) { auto& block = unit.blocks[b]; auto succlist = succs(block); if (succlist.size() == 1) { auto& code = block.code; auto c = edge_copies.find({b, 0}); if (c != edge_copies.end()) { unsigned j = code.size() - 1; auto slots = rsp[spill_offsets[succlist[0]]]; insertCopiesAt(code, j, c->second, slots, block_ranges[b].end - 1); } } else { for (int i = 0, n = succlist.size(); i < n; i++) { auto s = succlist[i]; auto& code = unit.blocks[s].code; auto m = edge_copies.find({b, i}); if (m != edge_copies.end()) { auto slots = rsp[spill_offsets[s]]; unsigned j = 0; insertCopiesAt(code, j, m->second, slots, block_ranges[b].start); } } } } if (dumpIREnabled(kRegAllocLevel)) { dumpIntervals(); print("after inserting copies"); } }
PredVector computePreds(const Vunit& unit) { PredVector preds(unit.blocks.size()); PostorderWalker walker(unit); walker.dfs([&](Vlabel b) { for (auto s: succs(unit.blocks[b])) { preds[s].push_back(b); } }); return preds; }
// Remove dead instructions by doing a traditional liveness analysis. // instructions that mutate memory, physical registers, or status flags // are considered useful. All branches are considered useful. // // Given SSA, there's a faster sparse version of this algorithm that marks // useful instructions in one pass, then transitively marks pure instructions // that define inputs to useful instructions. However it requires a mapping // from vreg numbers to the instruction that defines them, and a way to address // individual instructions. // // We could remove useless branches by computing the post-dominator tree and // RDF(b) for each block; then a branch is only useful if it controls whether // or not a useful block executes, and useless branches can be forwarded to // the nearest useful post-dominator. void removeDeadCode(Vunit& unit) { Timer timer(Timer::vasm_dce); auto blocks = sortBlocks(unit); jit::vector<LiveSet> livein(unit.blocks.size()); LiveSet live(unit.next_vr); auto pass = [&](bool mutate) { bool changed = false; for (auto blockIt = blocks.end(); blockIt != blocks.begin();) { auto b = *--blockIt; auto& block = unit.blocks[b]; live.reset(); for (auto s : succs(block)) { if (!livein[s].empty()) { live |= livein[s]; } } for (auto i = block.code.end(); i != block.code.begin();) { auto& inst = *--i; auto useful = effectful(inst); visitDefs(unit, inst, [&](Vreg r) { if (r.isPhys() || live.test(r)) { useful = true; live.reset(r); } }); if (useful) { visitUses(unit, inst, [&](Vreg r) { live.set(r); }); } else if (mutate) { inst = nop{}; changed = true; } } if (mutate) { assertx(live == livein[b]); } else { if (live != livein[b]) { livein[b] = live; changed = true; } } } return changed; }; // analyze until livein reaches a fixed point while (pass(false)) {} auto const changed = pass(true); removeTrivialNops(unit); if (changed) { printUnit(kVasmDCELevel, "after vasm-dead", unit); } }
size_t CFG<forward>::post_order_visit(const CFNode* n, size_t i) { auto& n_index = forward ? n->f_index_ : n->b_index_; n_index = size_t(-2); for (auto succ : succs(n)) { if (index(succ) == size_t(-1)) i = post_order_visit(succ, i); } n_index = i-1; rpo_[n] = n; return n_index; }
/* * Perform a DFS starting at block `bid', storing the post-order in * `outVec'. */ void RegionDesc::postOrderSort(RegionDesc::BlockId bid, RegionDesc::BlockIdSet& visited, RegionDesc::BlockIdVec& outVec) { if (visited.count(bid)) return; visited.insert(bid); if (auto nextRetr = nextRetrans(bid)) { postOrderSort(nextRetr.value(), visited, outVec); } for (auto succ : succs(bid)) { postOrderSort(succ, visited, outVec); } outVec.push_back(bid); }
/* * Splits the critical edges in `unit', if any. * Returns true iff the unit was modified. */ bool splitCriticalEdges(Vunit& unit) { jit::vector<unsigned> preds(unit.blocks.size()); for (size_t b = 0; b < unit.blocks.size(); b++) { auto succlist = succs(unit.blocks[b]); for (auto succ : succlist) { preds[succ]++; } } auto changed = false; for (size_t pred = 0; pred < unit.blocks.size(); pred++) { auto succlist = succs(unit.blocks[pred]); if (succlist.size() <= 1) continue; for (auto& succ : succlist) { if (preds[succ] <= 1) continue; // split the critical edge. auto middle = unit.makeBlock(unit.blocks[succ].area); forwardJmp(unit, middle, succ); succ = middle; changed = true; } } return changed; }
boost::dynamic_bitset<> backedgeTargets(const Vunit& unit, const jit::vector<Vlabel>& rpoBlocks) { boost::dynamic_bitset<> ret(unit.blocks.size()); boost::dynamic_bitset<> seen(unit.blocks.size()); for (auto label : rpoBlocks) { seen.set(label); for (auto target : succs(unit.blocks[label])) { if (seen.test(target)) ret.set(target); } } return ret; }
/** * Link ordinary blocks with ordinary edges and set their last instruction * and end offsets */ void GraphBuilder::linkBlocks() { PC bc = m_unit->entry(); Block* block = m_graph->first_linear; block->id = m_graph->block_count++; for (InstrRange i = funcInstrs(m_func); !i.empty(); ) { PC pc = i.popFront(); block->last = pc; if (isCF(pc)) { if (isSwitch(*reinterpret_cast<const Op*>(pc))) { int i = 0; foreachSwitchTarget((Op*)pc, [&](Offset& o) { succs(block)[i++] = at(pc + o); }); } else { Offset target = instrJumpTarget((Op*)bc, pc - bc); if (target != InvalidAbsoluteOffset) { assert(numSuccBlocks(block) > 0); succs(block)[numSuccBlocks(block) - 1] = at(target); } } } PC next_pc = !i.empty() ? i.front() : m_unit->at(m_func->past()); Block* next = at(next_pc); if (next) { block->next_linear = next; block->end = next_pc; if (!isTF(pc)) { assert(numSuccBlocks(block) > 0); succs(block)[0] = next; } block = next; block->id = m_graph->block_count++; } } block->end = m_unit->at(m_func->past()); }
/* * This method creates a weighted graph of the clusters, and sorts * them according to a DFS pre-order that prioritizes the arcs with * heaviest weights, so as to try to have a cluster be followed by its * mostly likely successor cluster. */ void Clusterizer::sortClusters() { jit::vector<SuccInfos> clusterGraph; clusterGraph.resize(m_unit.blocks.size()); for (auto b : m_blocks) { for (auto s : succs(m_unit.blocks[b])) { auto srcCid = m_blockCluster[b]; auto dstCid = m_blockCluster[s]; if (srcCid == dstCid) continue; auto wgt = m_scale.weight(b, s); clusterGraph[srcCid][dstCid] += wgt; } } DFSSortClusters dfsSort(std::move(clusterGraph), m_unit); m_clusterOrder = dfsSort.sort(m_blockCluster[m_unit.entry]); }
void Clusterizer::clusterize() { struct ArcInfo { Vlabel src; Vlabel dst; int64_t wgt; }; jit::vector<ArcInfo> arcInfos; for (auto b : m_blocks) { for (auto s : succs(m_unit.blocks[b])) { arcInfos.push_back({b, s, m_scale.weight(b, s)}); } } // sort arcs in decreasing weight order std::sort(arcInfos.begin(), arcInfos.end(), [&](const ArcInfo& a1, const ArcInfo& a2) { return a1.wgt > a2.wgt; }); for (auto& arcInfo : arcInfos) { auto src = arcInfo.src; auto dst = arcInfo.dst; // Only merge blocks in the same area. if (m_unit.blocks[src].area_idx != m_unit.blocks[dst].area_idx) continue; auto srcCid = m_blockCluster[src]; auto dstCid = m_blockCluster[dst]; if (srcCid == dstCid) continue; auto& srcC = m_clusters[srcCid]; auto& dstC = m_clusters[dstCid]; // src must be the last in its cluster if (srcC.back() != src) continue; // dst must be the first in its cluster if (dstC.front() != dst) continue; // merge the clusters by append the blocks in dstC to srcC for (auto d : dstC) { srcC.push_back(d); m_blockCluster[d] = srcCid; } dstC.clear(); } }
std::string Scale::toString() const { std::ostringstream out; out << "digraph {\n"; int64_t maxWgt = 1; for (auto b : m_blocks) { maxWgt = std::max(maxWgt, weight(b)); } for (auto b : m_blocks) { unsigned coldness = 255 - (255 * weight(b) / maxWgt); out << folly::format( "{} [label=\"{}\\nw: {}\\nptid: {}\\narea: {}\\nprof: {}\"," "shape=box,style=filled,fillcolor=\"#ff{:02x}{:02x}\"]\n", b, b, weight(b), findProfTransID(b), unsigned(m_unit.blocks[b].area_idx), findProfCount(b), coldness, coldness); for (auto s : succs(m_unit.blocks[b])) { out << folly::format("{} -> {} [label={}];\n", b, s, weight(b, s)); } } out << "}\n"; return out.str(); }
RegionDesc::BlockVec::iterator RegionDesc::deleteBlock(RegionDesc::BlockVec::iterator it) { const auto bid = (*it)->id(); for (auto pid : preds(bid)) removeArc(pid, bid); for (auto sid : succs(bid)) removeArc(bid, sid); if (auto nextR = nextRetrans(bid)) { auto prevR = prevRetrans(bid); clearPrevRetrans(nextR.value()); if (prevR) { clearNextRetrans(prevR.value()); setNextRetrans(prevR.value(), nextR.value()); } else { clearPrevRetrans(nextR.value()); } } else if (auto prevR = prevRetrans(bid)) { clearNextRetrans(prevR.value()); } m_data.erase(bid); return m_blocks.erase(it); }
void Vxls::resolveEdges() { for (auto b1 : blocks) { auto& block1 = unit.blocks[b1]; auto p1 = block_ranges[b1].end - 2; auto succlist = succs(block1); auto& inst1 = block1.code.back(); if (inst1.op == Vinstr::phijmp) { auto target = inst1.phijmp_.target; auto& uses = unit.tuples[inst1.phijmp_.uses]; auto& defs = unit.tuples[findDefs(unit, target)]; for (unsigned i = 0, n = uses.size(); i < n; ++i) { auto i1 = intervals[uses[i]]; if (i1) i1 = i1->childAt(p1); auto i2 = intervals[defs[i]]; if (i2->reg != i1->reg) { edge_copies[{b1,0}][i2->reg] = i1; } } inst1 = jmp{target}; } for (unsigned i = 0, n = succlist.size(); i < n; i++) { auto b2 = succlist[i]; auto p2 = block_ranges[b2].start; forEach(livein[b2], [&](Vreg vr) { Interval* i1 = nullptr; Interval* i2 = nullptr; for (auto ivl = intervals[vr]; ivl && !(i1 && i2); ivl = ivl->next) { if (ivl->covers(p1)) i1 = ivl; if (ivl->covers(p2)) i2 = ivl; } // i2 can be unallocated if the tmp is a constant or is spilled. if (i2->reg != InvalidReg && i2->reg != i1->reg) { edge_copies[{b1,i}][i2->reg] = i1; } }); } } }
/** * Fill `m_prologue_blocks` and return the register that we're "switching" on * (even if it's not a real switch statement) */ boost::optional<uint16_t> SwitchMethodPartitioning::compute_prologue_blocks( cfg::ControlFlowGraph* cfg, const cp::intraprocedural::FixpointIterator& fixpoint, bool verify_default_case) { for (const cfg::Block* b : cfg->blocks()) { always_assert_log(!b->is_catch(), "SwitchMethodPartitioning does not support methods with " "catch blocks. %d has a catch block in %s", b->id(), SHOW(*cfg)); } // First, add all the prologue blocks that forma a linear chain before the // case block selection blocks (a switch or an if-else tree) begin. for (cfg::Block* b = cfg->entry_block(); b != nullptr; b = b->follow_goto()) { m_prologue_blocks.push_back(b); } { auto last_prologue_block = m_prologue_blocks.back(); auto last_prologue_insn_it = last_prologue_block->get_last_insn(); always_assert(last_prologue_insn_it != last_prologue_block->end()); auto last_prologue_insn = last_prologue_insn_it->insn; // If this method was compiled from a default-case-only switch, there will // be no branch opcode -- the method will always throw an // IllegalArgumentException. auto op = last_prologue_insn->opcode(); always_assert(!verify_default_case || is_branch(op) || op == OPCODE_THROW); if (!is_branch(op)) { return boost::none; } else if (is_switch(op)) { // switch or if-else tree. Not both. return last_prologue_insn->src(0); } } // Handle a tree of if statements in the prologue. d8 emits this // when it would be smaller than a switch statement. The non-leaf nodes of the // tree are prologue blocks. The leaf nodes of the tree are case blocks. // // For example: // load-param v0 // const v1 1 // if-eq v0 v1 CASE_1 // goto EXIT_BLOCK ; or return // const v1 2 // if-eq v0 v1 CASE_2 // goto EXIT_BLOCK ; or return // ... // // Traverse the tree in starting at the end of the linear chain of prologue // blocks and stopping before we reach a leaf. boost::optional<uint16_t> determining_reg; std::queue<cfg::Block*> to_visit; to_visit.push(m_prologue_blocks.back()); while (!to_visit.empty()) { auto b = to_visit.front(); to_visit.pop(); // Leaf nodes have 0 or 1 successors (return or goto the epilogue blocks). // Throw edges are disallowed. if (b->succs().size() >= 2) { // The linear check above and this tree check both account for the // top-most node in the tree. Make sure we don't duplicate it if (b != m_prologue_blocks.back()) { m_prologue_blocks.push_back(b); // Verify there aren't extra instructions in here that we may lose track // of for (const auto& mie : InstructionIterable(b)) { auto insn = mie.insn; auto op = insn->opcode(); always_assert_log(is_const(op) || is_conditional_branch(op), "Unexpected instruction in if-else tree %s", SHOW(insn)); } } for (auto succ : b->succs()) { to_visit.push(succ->target()); } // Make sure all blocks agree on which register is the determiner uint16_t candidate_reg = ::find_determining_reg(b, fixpoint); if (determining_reg == boost::none) { determining_reg = candidate_reg; } else { always_assert_log( *determining_reg == candidate_reg, "Conflict: which register are we switching on? %d != %d in %s", *determining_reg, candidate_reg, SHOW(*cfg)); } } } always_assert_log(determining_reg != boost::none, "Couldn't find determining register in %s", SHOW(*cfg)); return determining_reg; }
void optimizeJmps(Vunit& unit) { auto isEmpty = [&](Vlabel b, Vinstr::Opcode op) { auto& code = unit.blocks[b].code; return code.size() == 1 && op == code[0].op; }; bool changed = false; bool ever_changed = false; jit::vector<int> npreds(unit.blocks.size(), 0); do { if (changed) { fill(npreds.begin(), npreds.end(), 0); } changed = false; PostorderWalker{unit}.dfs([&](Vlabel b) { for (auto s : succs(unit.blocks[b])) { npreds[s]++; } }); // give roots an extra predecessor to prevent cloning them. for (auto b : unit.roots) { npreds[b]++; } PostorderWalker{unit}.dfs([&](Vlabel b) { auto& block = unit.blocks[b]; auto& code = block.code; assert(!code.empty()); if (code.back().op == Vinstr::jcc) { auto ss = succs(block); if (ss[0] == ss[1]) { // both edges have same target, change to jmp code.back() = jmp{ss[0]}; changed = true; } } if (code.back().op == Vinstr::jmp) { auto& s = code.back().jmp_.target; if (isEmpty(s, Vinstr::jmp)) { // skip over s s = unit.blocks[s].code.back().jmp_.target; changed = true; } else if (npreds[s] == 1 || isEmpty(s, Vinstr::jcc)) { // overwrite jmp with copy of s auto& code2 = unit.blocks[s].code; code.pop_back(); code.insert(code.end(), code2.begin(), code2.end()); changed = true; } } else { for (auto& s : succs(block)) { if (isEmpty(s, Vinstr::jmp)) { // skip over s s = unit.blocks[s].code.back().jmp_.target; changed = true; } } } }); ever_changed |= changed; } while (changed); if (ever_changed) { printUnit(kVasmJumpsLevel, "after vasm-jumps", unit); } }
folly::Range<Vlabel*> succs(Vblock& block) { if (block.code.empty()) return {nullptr, nullptr}; return succs(block.code.back()); }
folly::Range<const Vlabel*> succs(const Vblock& block) { return succs(const_cast<Vblock&>(block)).castToConst(); }
folly::Range<const Vlabel*> succs(const Vinstr& inst) { return succs(const_cast<Vinstr&>(inst)).castToConst(); }
bool RegionDesc::isExit(BlockId bid) const { return succs(bid).empty(); }
static bool expandCyclic(NGHolder &h, NFAVertex v) { DEBUG_PRINTF("inspecting %zu\n", h[v].index); bool changes = false; auto v_preds = preds(v, h); auto v_succs = succs(v, h); set<NFAVertex> start_siblings; set<NFAVertex> end_siblings; CharReach &v_cr = h[v].char_reach; /* We need to find start vertices which have all of our preds. * As we have a self loop, it must be one of our succs. */ for (auto a : adjacent_vertices_range(v, h)) { auto a_preds = preds(a, h); if (a_preds == v_preds && isutf8start(h[a].char_reach)) { DEBUG_PRINTF("%zu is a start v\n", h[a].index); start_siblings.insert(a); } } /* We also need to find full cont vertices which have all our own succs; * As we have a self loop, it must be one of our preds. */ for (auto a : inv_adjacent_vertices_range(v, h)) { auto a_succs = succs(a, h); if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) { DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index); end_siblings.insert(a); } } for (auto s : start_siblings) { if (out_degree(s, h) != 1) { continue; } const CharReach &cr = h[s].char_reach; if (cr.isSubsetOf(UTF_TWO_START_CR)) { if (end_siblings.find(*adjacent_vertices(s, h).first) == end_siblings.end()) { DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else if (cr.isSubsetOf(UTF_THREE_START_CR)) { NFAVertex m = *adjacent_vertices(s, h).first; if (h[m].char_reach != UTF_CONT_CR || out_degree(m, h) != 1) { continue; } if (end_siblings.find(*adjacent_vertices(m, h).first) == end_siblings.end()) { DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) { NFAVertex m1 = *adjacent_vertices(s, h).first; if (h[m1].char_reach != UTF_CONT_CR || out_degree(m1, h) != 1) { continue; } NFAVertex m2 = *adjacent_vertices(m1, h).first; if (h[m2].char_reach != UTF_CONT_CR || out_degree(m2, h) != 1) { continue; } if (end_siblings.find(*adjacent_vertices(m2, h).first) == end_siblings.end()) { DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else { DEBUG_PRINTF("%zu is bad\n", h[s].index); continue; } v_cr |= cr; clear_vertex(s, h); changes = true; } if (changes) { v_cr |= UTF_CONT_CR; /* we need to add in cont reach */ v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require * valid unicode data */ v_cr.set(0xc1); v_cr |= CharReach(0xf5, 0xff); } return changes; }
/** * Chain the retranslation blocks. This method enforces that, for * each region block, all its successor have distinct SrcKeys. */ void RegionDesc::chainRetransBlocks() { jit::vector<Chain> chains; BlockToChainMap block2chain; // 1. Initially assign each region block to its own chain. for (auto b : blocks()) { auto bid = b->id(); auto cid = chains.size(); chains.push_back({cid, {bid}}); block2chain[bid] = cid; } // 2. For each block, if it has 2 successors with the same SrcKey, // then merge the successors' chains into one. for (auto b : blocks()) { auto bid = b->id(); const auto& succSet = succs(bid); for (auto it1 = succSet.begin(); it1 != succSet.end(); it1++) { auto bid1 = *it1; auto cid1 = block2chain[bid1]; for (auto it2 = it1 + 1; it2 != succSet.end(); it2++) { auto bid2 = *it2; auto cid2 = block2chain[bid2]; if (data(bid1).block->start() == data(bid2).block->start()) { mergeChains(chains[cid1], chains[cid2], block2chain); } } } } // 3. Sort each chain. In general, we want to sort each chain in // decreasing order of profile weights. However, note that this // transformation can turn acyclic graphs into cyclic ones (see // example below). Therefore, if JitLoops are disabled, we // instead sort each chain following the original block order, // which prevents loops from being generated if the region was // originally acyclic. // // Here's an example showing how an acyclic CFG can become cyclic // by chaining its retranslation blocks: // // - Region before chaining retranslation blocks, where B2' and B2" // are retranslations starting at the same SrcKey: // B1 -> B2' // B1 -> B2" // B2' -> B3 // B3 -> B2" // // - Region after sorting the chain as B2" -R-> B2': // B1 -> B2" // B2" -R-> B2' // B2' -> B3 // B3 -> B2" // Note the cycle: B2" -R-> B2' -> B3 -> B2". // auto profData = mcg->tx().profData(); auto weight = [&](RegionDesc::BlockId bid) { return hasTransID(bid) ? profData->absTransCounter(getTransID(bid)) : 0; }; auto sortGeneral = [&](RegionDesc::BlockId bid1, RegionDesc::BlockId bid2) { return weight(bid1) > weight(bid2); }; using SortFun = std::function<bool(RegionDesc::BlockId, RegionDesc::BlockId)>; SortFun sortFunc = sortGeneral; hphp_hash_map<RegionDesc::BlockId, uint32_t> origBlockOrder; if (!RuntimeOption::EvalJitLoops) { for (uint32_t i = 0; i < m_blocks.size(); i++) { origBlockOrder[m_blocks[i]->id()] = i; } auto sortAcyclic = [&](RegionDesc::BlockId bid1, RegionDesc::BlockId bid2) { return origBlockOrder[bid1] < origBlockOrder[bid2]; }; sortFunc = sortAcyclic; } TRACE(1, "chainRetransBlocks: computed chains:\n"); for (auto& c : chains) { std::sort(c.blocks.begin(), c.blocks.end(), sortFunc); if (Trace::moduleEnabled(Trace::region, 1) && c.blocks.size() > 0) { FTRACE(1, " -> {} (w={})", c.blocks[0], weight(c.blocks[0])); for (size_t i = 1; i < c.blocks.size(); i++) { FTRACE(1, ", {} (w={})", c.blocks[i], weight(c.blocks[i])); } FTRACE(1, "\n"); } } // 4. Set the nextRetrans blocks according to the computed chains. for (auto& c : chains) { if (c.blocks.size() == 0) continue; for (size_t i = 0; i < c.blocks.size() - 1; i++) { setNextRetrans(c.blocks[i], c.blocks[i + 1]); } } // 5. For each block with multiple successors in the same chain, // only keep the successor that first appears in the chain. for (auto b : blocks()) { auto& succSet = data(b->id()).succs; for (auto s : succSet) { auto& c = chains[block2chain[s]]; auto selectedSucc = findFirstInSet(c, succSet); for (auto other : c.blocks) { if (other == selectedSucc) continue; succSet.erase(other); } } } // 6. Reorder the blocks in the region in topological order (if // region is acyclic), since the previous steps may break it. sortBlocks(); }
void optimizeJmps(Vunit& unit) { auto isEmpty = [&](Vlabel b, Vinstr::Opcode op) { auto& code = unit.blocks[b].code; return code.size() == 1 && op == code[0].op; }; bool changed = false; bool ever_changed = false; // The number of incoming edges from (reachable) predecessors for each block. // It is maintained as an upper bound of the actual value during the // transformation. jit::vector<int> npreds(unit.blocks.size(), 0); do { if (changed) { std::fill(begin(npreds), end(npreds), 0); } changed = false; PostorderWalker{unit} .dfs([&](Vlabel b) { for (auto s : succs(unit.blocks[b])) { npreds[s]++; } }); // give entry an extra predecessor to prevent cloning it. npreds[unit.entry]++; PostorderWalker{unit} .dfs([&](Vlabel b) { auto& block = unit.blocks[b]; auto& code = block.code; assertx(!code.empty()); if (code.back().op == Vinstr::jcc) { auto ss = succs(block); if (ss[0] == ss[1]) { // both edges have same target, change to jmp code.back() = jmp{ss[0]}; --npreds[ss[0]]; changed = true; } else { auto jcc_i = code.back().jcc_; if (isEmpty(jcc_i.targets[0], Vinstr::fallback)) { jcc_i = jcc{ccNegate(jcc_i.cc), jcc_i.sf, {jcc_i.targets[1], jcc_i.targets[0]}}; } if (isEmpty(jcc_i.targets[1], Vinstr::fallback)) { // replace jcc with fallbackcc and jmp const auto& fb_i = unit.blocks[jcc_i.targets[1]].code[0].fallback_; const auto t0 = jcc_i.targets[0]; const auto jcc_origin = code.back().origin; code.pop_back(); code.emplace_back( fallbackcc{jcc_i.cc, jcc_i.sf, fb_i.dest, fb_i.trflags}); code.back().origin = jcc_origin; code.emplace_back(jmp{t0}); code.back().origin = jcc_origin; changed = true; } } } for (auto& s : succs(block)) { if (isEmpty(s, Vinstr::jmp)) { // skip over s --npreds[s]; s = unit.blocks[s].code.back().jmp_.target; ++npreds[s]; changed = true; } } if (code.back().op == Vinstr::jmp) { auto s = code.back().jmp_.target; if (npreds[s] == 1 || isEmpty(s, Vinstr::jcc)) { // overwrite jmp with copy of s auto& code2 = unit.blocks[s].code; code.pop_back(); code.insert(code.end(), code2.begin(), code2.end()); if (--npreds[s]) { for (auto ss : succs(block)) { ++npreds[ss]; } } changed = true; } } }); ever_changed |= changed; } while (changed); if (ever_changed) { printUnit(kVasmJumpsLevel, "after vasm-jumps", unit); } }