bool simplify_impl(Env& env, Vlabel b, size_t i, Simplify simplify) { auto& unit = env.unit; return vmodify(unit, b, i, [&] (Vout& v) { auto& blocks = unit.blocks; auto const nremove = simplify(v); // Update use counts for to-be-removed instructions. for (auto j = i; j < i + nremove; ++j) { visitUses(unit, blocks[b].code[j], [&] (Vreg r) { --env.use_counts[r]; }); } // Update use counts and def instructions for to-be-added instructions. for (auto const& inst : blocks[Vlabel(v)].code) { visitUses(unit, inst, [&] (Vreg r) { if (r >= env.use_counts.size()) { env.use_counts.resize(size_t{r}+1); } ++env.use_counts[r]; }); visitDefs(unit, inst, [&] (Vreg r) { if (r >= env.def_insts.size()) { env.def_insts.resize(size_t{r}+1, Vinstr::nop); } env.def_insts[r] = inst.op; }); } return nremove; }); }
// Remove dead instructions by doing a traditional liveness analysis. // instructions that mutate memory, physical registers, or status flags // are considered useful. All branches are considered useful. // // Given SSA, there's a faster sparse version of this algorithm that marks // useful instructions in one pass, then transitively marks pure instructions // that define inputs to useful instructions. However it requires a mapping // from vreg numbers to the instruction that defines them, and a way to address // individual instructions. // // We could remove useless branches by computing the post-dominator tree and // RDF(b) for each block; then a branch is only useful if it controls whether // or not a useful block executes, and useless branches can be forwarded to // the nearest useful post-dominator. void removeDeadCode(Vunit& unit) { auto blocks = sortBlocks(unit); jit::vector<LiveSet> livein(unit.blocks.size()); LiveSet live(unit.next_vr); auto pass = [&](bool mutate) { bool changed = false; for (auto blockIt = blocks.end(); blockIt != blocks.begin();) { auto b = *--blockIt; auto& block = unit.blocks[b]; live.reset(); for (auto s : succs(block)) { if (!livein[s].empty()) { live |= livein[s]; } } for (auto i = block.code.end(); i != block.code.begin();) { auto& inst = *--i; auto useful = effectful(inst); visitDefs(unit, inst, [&](Vreg r) { if (r.isPhys() || live.test(r)) { useful = true; live.reset(r); } }); if (useful) { visitUses(unit, inst, [&](Vreg r) { live.set(r); }); } else if (mutate) { inst = nop{}; changed = true; } } if (mutate) { assert(live == livein[b]); } else { if (live != livein[b]) { livein[b] = live; changed = true; } } } return changed; }; // analyze until livein reaches a fixed point while (pass(false)) {} // nop-out useless instructions if (pass(true)) { for (auto b : blocks) { auto& code = unit.blocks[b].code; auto end = std::remove_if(code.begin(), code.end(), [&](Vinstr& inst) { return inst.op == Vinstr::nop; }); code.erase(end, code.end()); } printUnit(kVasmDCELevel, "after vasm-dead", unit); } }
// Remove dead instructions by doing a traditional liveness analysis. // instructions that mutate memory, physical registers, or status flags // are considered useful. All branches are considered useful. // // Given SSA, there's a faster sparse version of this algorithm that marks // useful instructions in one pass, then transitively marks pure instructions // that define inputs to useful instructions. However it requires a mapping // from vreg numbers to the instruction that defines them, and a way to address // individual instructions. // // We could remove useless branches by computing the post-dominator tree and // RDF(b) for each block; then a branch is only useful if it controls whether // or not a useful block executes, and useless branches can be forwarded to // the nearest useful post-dominator. void removeDeadCode(Vunit& unit) { Timer timer(Timer::vasm_dce); auto blocks = sortBlocks(unit); jit::vector<LiveSet> livein(unit.blocks.size()); LiveSet live(unit.next_vr); auto pass = [&](bool mutate) { bool changed = false; for (auto blockIt = blocks.end(); blockIt != blocks.begin();) { auto b = *--blockIt; auto& block = unit.blocks[b]; live.reset(); for (auto s : succs(block)) { if (!livein[s].empty()) { live |= livein[s]; } } for (auto i = block.code.end(); i != block.code.begin();) { auto& inst = *--i; auto useful = effectful(inst); visitDefs(unit, inst, [&](Vreg r) { if (r.isPhys() || live.test(r)) { useful = true; live.reset(r); } }); if (useful) { visitUses(unit, inst, [&](Vreg r) { live.set(r); }); } else if (mutate) { inst = nop{}; changed = true; } } if (mutate) { assertx(live == livein[b]); } else { if (live != livein[b]) { livein[b] = live; changed = true; } } } return changed; }; // analyze until livein reaches a fixed point while (pass(false)) {} auto const changed = pass(true); removeTrivialNops(unit); if (changed) { printUnit(kVasmDCELevel, "after vasm-dead", unit); } }
/* * Branch fusion: * Analyze blocks one at a time, looking for the sequence: * * setcc cc, f1 => b * ... * testb b, b => f2 * ... * jcc E|NE, f2 * * If found, and f2 is only used by the jcc, then change the code to: * * setcc cc, f1 => b * ... * nop * ... * jcc !cc|cc, f1 * * Later, vasm-dead will clean up the nop, and the setcc if b became dead. * * During the search, any other instruction that has a status flag result * will reset the pattern matcher. No instruction can "kill" flags, * since flags are SSA variables. However the transformation we want to * make extends the setcc flags lifetime, and we don't want it to overlap * another flag's lifetime. */ void fuseBranches(Vunit& unit) { auto blocks = sortBlocks(unit); jit::vector<unsigned> uses(unit.next_vr); for (auto b : blocks) { for (auto& inst : unit.blocks[b].code) { visitUses(unit, inst, [&](Vreg r) { uses[r]++; }); } } bool should_print = false; for (auto b : blocks) { auto& code = unit.blocks[b].code; ConditionCode cc; Vreg setcc_flags, setcc_dest, testb_flags; unsigned testb_index; for (unsigned i = 0, n = code.size(); i < n; ++i) { if (code[i].op == Vinstr::setcc) { cc = code[i].setcc_.cc; setcc_flags = code[i].setcc_.sf; setcc_dest = code[i].setcc_.d; continue; } if (setcc_flags.isValid() && match_testb(code[i], setcc_dest) && uses[code[i].testb_.sf] == 1) { testb_flags = code[i].testb_.sf; testb_index = i; continue; } if (match_jcc(code[i], testb_flags)) { code[testb_index] = nop{}; // erase the testb auto& jcc = code[i].jcc_; jcc.cc = jcc.cc == CC_NE ? cc : ccNegate(cc); jcc.sf = setcc_flags; should_print = true; continue; } if (setcc_flags.isValid() && sets_flags(code[i])) { setcc_flags = testb_flags = Vreg{}; } } } if (should_print) { printUnit(kVasmFusionLevel, "after vasm-fusion", unit); } }
void foldImms(Vunit& unit) { assertx(check(unit)); // especially, SSA // block order doesn't matter, but only visit reachable blocks. auto blocks = sortBlocks(unit); // Use flag for each registers. If a SR is used then // certain optimizations will not fire since they do not // set the condition codes as the original instruction(s) // would. jit::vector<bool> used(unit.next_vr); for (auto b : blocks) { for (auto& inst : unit.blocks[b].code) { visitUses(unit, inst, [&](Vreg r) { used[r] = true; }); } } Folder folder(std::move(used)); folder.vals.resize(unit.next_vr); folder.valid.resize(unit.next_vr); // figure out which Vregs are constants and stash their values. for (auto& entry : unit.constToReg) { folder.valid.set(entry.second); folder.vals[entry.second] = entry.first.val; } // now mutate instructions for (auto b : blocks) { for (auto& inst : unit.blocks[b].code) { switch (inst.op) { #define O(name, imms, uses, defs)\ case Vinstr::name: {\ auto origin = inst.origin;\ folder.fold(inst.name##_, inst);\ inst.origin = origin;\ break;\ } VASM_OPCODES #undef O } } } printUnit(kVasmImmsLevel, "after foldImms", unit); }