// Assign virtual registers to all SSATmps used or defined in reachable // blocks. This assigns a value register to constants defined by DefConst, // because some HHIR instructions require them. Ordinary Gen values with // a known DataType only get one register. Assign "wide" locations when // possible (when all uses and defs can be wide). These will be assigned // SIMD registers later. void assignRegs(IRUnit& unit, Vunit& vunit, CodegenState& state, const BlockList& blocks, BackEnd* backend) { // visit instructions to find tmps eligible to use SIMD registers auto const try_wide = !packed_tv && RuntimeOption::EvalHHIRAllocSIMDRegs; boost::dynamic_bitset<> not_wide(unit.numTmps()); StateVector<SSATmp,SSATmp*> tmps(unit, nullptr); for (auto block : blocks) { for (auto& inst : *block) { for (uint32_t i = 0, n = inst.numSrcs(); i < n; i++) { auto s = inst.src(i); tmps[s] = s; if (!try_wide || !backend->storesCell(inst, i)) { not_wide.set(s->id()); } } for (auto& d : inst.dsts()) { tmps[&d] = &d; if (!try_wide || inst.isControlFlow() || !backend->loadsCell(inst)) { not_wide.set(d.id()); } } } } // visit each tmp, assign 1 or 2 registers to each. auto cns = [&](uint64_t c) { return vunit.makeConst(c); }; for (auto tmp : tmps) { if (!tmp) continue; auto forced = forceAlloc(*tmp); if (forced != InvalidReg) { state.locs[tmp] = Vloc{forced}; UNUSED Reg64 r = forced; FTRACE(kRegAllocLevel, "force t{} in {}\n", tmp->id(), reg::regname(r)); continue; } if (tmp->inst()->is(DefConst)) { auto c = cns(tmp->rawVal()); state.locs[tmp] = Vloc{c}; FTRACE(kRegAllocLevel, "const t{} in %{}\n", tmp->id(), size_t(c)); } else { if (tmp->numWords() == 2) { if (!not_wide.test(tmp->id())) { auto r = vunit.makeReg(); state.locs[tmp] = Vloc{Vloc::kWide, r}; FTRACE(kRegAllocLevel, "def t{} in wide %{}\n", tmp->id(), size_t(r)); } else { auto data = vunit.makeReg(); auto type = vunit.makeReg(); state.locs[tmp] = Vloc{data, type}; FTRACE(kRegAllocLevel, "def t{} in %{},%{}\n", tmp->id(), size_t(data), size_t(type)); } } else { auto data = vunit.makeReg(); state.locs[tmp] = Vloc{data}; FTRACE(kRegAllocLevel, "def t{} in %{}\n", tmp->id(), size_t(data)); } } } }
Vreg make_const(Vunit& unit, Type type) { if (type.subtypeOfAny(TUninit, TInitNull)) { // Return undefined value. return unit.makeConst(Vconst::Quad); } if (type <= TNullptr) return unit.makeConst(0); assertx(type.hasConstVal()); if (type <= TBool) return unit.makeConst(type.boolVal()); if (type <= TDbl) return unit.makeConst(type.dblVal()); return unit.makeConst(type.rawVal()); }
/* * Splits the critical edges in `unit', if any. * Returns true iff the unit was modified. */ bool splitCriticalEdges(Vunit& unit) { jit::vector<unsigned> preds(unit.blocks.size()); jit::flat_set<size_t> catch_blocks; for (size_t b = 0; b < unit.blocks.size(); b++) { auto succlist = succs(unit.blocks[b]); for (auto succ : succlist) { preds[succ]++; } } auto changed = false; for (size_t pred = 0; pred < unit.blocks.size(); pred++) { auto succlist = succs(unit.blocks[pred]); if (succlist.size() <= 1) continue; for (auto& succ : succlist) { if (preds[succ] <= 1) continue; // split the critical edge. auto middle = unit.makeBlock(unit.blocks[succ].area); forwardJmp(unit, catch_blocks, middle, succ); succ = middle; changed = true; } } // Remove any landingpad{} instructions that were hoisted to split edges. for (auto block : catch_blocks) { auto& code = unit.blocks[block].code; assertx(code.front().op == Vinstr::landingpad); code.front() = nop{}; } return changed; }
/* * Splits the critical edges in `unit', if any. * Returns true iff the unit was modified. */ bool splitCriticalEdges(Vunit& unit) { jit::vector<unsigned> preds(unit.blocks.size()); for (size_t b = 0; b < unit.blocks.size(); b++) { auto succlist = succs(unit.blocks[b]); for (auto succ : succlist) { preds[succ]++; } } auto changed = false; for (size_t pred = 0; pred < unit.blocks.size(); pred++) { auto succlist = succs(unit.blocks[pred]); if (succlist.size() <= 1) continue; for (auto& succ : succlist) { if (preds[succ] <= 1) continue; // split the critical edge. auto middle = unit.makeBlock(unit.blocks[succ].area); forwardJmp(unit, middle, succ); succ = middle; changed = true; } } return changed; }
jit::vector<VMoveInfo> doVregMoves(Vunit& unit, MovePlan& moves) { constexpr auto N = 64; assert(std::max(x64::abi.all().size(), arm::abi.all().size()) == N); jit::vector<VMoveInfo> howTo; CycleInfo cycle_mem[N]; List<CycleInfo> cycles(cycle_mem, 0, N); PhysReg::Map<int> outDegree; PhysReg::Map<int> index; for (auto reg : moves) { // Ignore moves from a register to itself if (reg == moves[reg]) moves[reg] = InvalidReg; index[reg] = -1; } // Iterate over the nodes filling in outDegree[] and cycles[] as we go int nextIndex = 0; for (auto reg : moves) { // skip registers we've visited already. if (index[reg] >= 0) continue; // Begin walking a path from reg. for (auto node = reg;;) { assert(nextIndex < N); index[node] = nextIndex++; auto next = moves[node]; if (next != InvalidReg) { ++outDegree[next]; if (index[next] < 0) { // There is an edge from node to next, and next has not been // visited. Extend current path to include next, then loop. node = next; continue; } // next already visited; check if next is on current path. if (index[next] >= index[reg]) { // found a cycle. cycles.push_back({ next, nextIndex - index[next] }); } } break; } } // Handle all moves that aren't part of a cycle. Only nodes with outdegree // zero are put into the queue, which is how nodes in a cycle get excluded. { PhysReg q[N]; int qBack = 0; auto enque = [&](PhysReg r) { assert(qBack < N); q[qBack++] = r; }; for (auto node : outDegree) { if (outDegree[node] == 0) enque(node); } for (int i = 0; i < qBack; ++i) { auto node = q[i]; if (moves[node] == InvalidReg) continue; auto nextNode = moves[node]; howTo.push_back({VMoveInfo::Kind::Move, nextNode, node}); --outDegree[nextNode]; if (outDegree[nextNode] == 0) enque(nextNode); } } // Deal with any cycles we encountered for (auto const& cycle : cycles) { // can't use xchg if one of the registers is SIMD bool hasSIMDReg = cycleHasSIMDReg(cycle, moves); if (cycle.length == 2 && !hasSIMDReg) { auto v = cycle.node; auto w = moves[v]; howTo.push_back({VMoveInfo::Kind::Xchg, w, v}); } else if (cycle.length == 3 && !hasSIMDReg) { auto v = cycle.node; auto w = moves[v]; howTo.push_back({VMoveInfo::Kind::Xchg, w, v}); auto x = moves[w]; howTo.push_back({VMoveInfo::Kind::Xchg, x, w}); } else { auto t = unit.makeReg(); auto v = cycle.node; howTo.push_back({VMoveInfo::Kind::Move, v, t}); auto w = v; auto x = moves[w]; while (x != v) { howTo.push_back({VMoveInfo::Kind::Move, x, w}); w = x; x = moves[w]; } howTo.push_back({VMoveInfo::Kind::Move, t, w}); } } return howTo; }
void lower_vcall(Vunit& unit, Inst& inst, Vlabel b, size_t i) { auto& blocks = unit.blocks; auto const& vinstr = blocks[b].code[i]; auto const is_vcall = vinstr.op == Vinstr::vcall; auto const vcall = vinstr.vcall_; auto const vinvoke = vinstr.vinvoke_; // We lower vinvoke in two phases, and `inst' is overwritten after the first // phase. We need to save any of its parameters that we care about in the // second phase ahead of time. auto const& vargs = unit.vcallArgs[inst.args]; auto const dests = unit.tuples[inst.d]; auto const destType = inst.destType; auto const scratch = unit.makeScratchBlock(); SCOPE_EXIT { unit.freeScratchBlock(scratch); }; Vout v(unit, scratch, vinstr.origin); int32_t const adjust = (vargs.stkArgs.size() & 0x1) ? sizeof(uintptr_t) : 0; if (adjust) v << lea{rsp()[-adjust], rsp()}; // Push stack arguments, in reverse order. for (int i = vargs.stkArgs.size() - 1; i >= 0; --i) { v << push{vargs.stkArgs[i]}; } // Get the arguments in the proper registers. RegSet argRegs; bool needsCopy = false; auto doArgs = [&] (const VregList& srcs, PhysReg (*r)(size_t)) { VregList argDests; for (size_t i = 0, n = srcs.size(); i < n; ++i) { auto const reg = r(i); argDests.push_back(reg); argRegs |= reg; } if (argDests.size()) { v << copyargs{v.makeTuple(srcs), v.makeTuple(std::move(argDests))}; } }; switch (arch()) { case Arch::X64: case Arch::PPC64: doArgs(vargs.args, rarg); break; case Arch::ARM: if (vargs.indirect) { if (vargs.args.size() > 0) { // First arg is a pointer to storage for the return value. v << copy{vargs.args[0], rret_indirect()}; VregList rem(vargs.args.begin() + 1, vargs.args.end()); doArgs(rem, rarg); needsCopy = true; } } else { doArgs(vargs.args, rarg); } } doArgs(vargs.simdArgs, rarg_simd); // Emit the appropriate call instruction sequence. emitCall(v, inst.call, argRegs); // Handle fixup and unwind information. if (inst.fixup.isValid()) { v << syncpoint{inst.fixup}; } if (!is_vcall) { auto& targets = vinvoke.targets; v << unwind{{targets[0], targets[1]}}; // Insert an lea fixup for any stack args at the beginning of the catch // block. if (auto rspOffset = ((vargs.stkArgs.size() + 1) & ~1) * sizeof(uintptr_t)) { auto& taken = unit.blocks[targets[1]].code; assertx(taken.front().op == Vinstr::landingpad || taken.front().op == Vinstr::jmp); Vinstr vi { lea{rsp()[rspOffset], rsp()} }; vi.origin = taken.front().origin; if (taken.front().op == Vinstr::jmp) { taken.insert(taken.begin(), vi); } else { taken.insert(taken.begin() + 1, vi); } } // Write out the code so far to the end of b. Remaining code will be // emitted to the next block. vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else if (vcall.nothrow) { v << nothrow{}; } // Copy back the indirect result pointer into the return register. if (needsCopy) { v << copy{rret_indirect(), rret(0)}; } // For vinvoke, `inst' is no longer valid after this point. // Copy the call result to the destination register(s). switch (destType) { case DestType::TV: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); if (dests.size() == 2) { v << copy2{rret(0), rret(1), dests[0], dests[1]}; } else { // We have cases where we statically know the type but need the value // from native call. Even if the type does not really need a register // (e.g., InitNull), a Vreg is still allocated in assignRegs(), so the // following assertion holds. assertx(dests.size() == 1); v << copy{rret(0), dests[0]}; } break; case DestType::SIMD: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); assertx(dests.size() == 1); pack2(v, rret(0), rret(1), dests[0]); break; case DestType::SSA: case DestType::Byte: assertx(dests.size() == 1); assertx(dests[0].isValid()); // Copy the single-register result to dests[0]. v << copy{rret(0), dests[0]}; break; case DestType::Dbl: // Copy the single-register result to dests[0]. assertx(dests.size() == 1); assertx(dests[0].isValid()); v << copy{rret_simd(0), dests[0]}; break; case DestType::None: assertx(dests.empty()); break; } if (vargs.stkArgs.size() > 0) { auto const delta = safe_cast<int32_t>( vargs.stkArgs.size() * sizeof(uintptr_t) + adjust ); v << lea{rsp()[delta], rsp()}; } // Insert new instructions to the appropriate block. if (is_vcall) { vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else { vector_splice(blocks[vinvoke.targets[0]].code, 0, 0, blocks[scratch].code); } }
void lower_vcall(Vunit& unit, Inst& inst, Vlabel b, size_t i) { auto& blocks = unit.blocks; auto const& vinstr = blocks[b].code[i]; auto const is_vcall = vinstr.op == Vinstr::vcall; auto const vcall = vinstr.vcall_; auto const vinvoke = vinstr.vinvoke_; // We lower vinvoke in two phases, and `inst' is overwritten after the first // phase. We need to save any of its parameters that we care about in the // second phase ahead of time. auto const& vargs = unit.vcallArgs[inst.args]; auto const dests = unit.tuples[inst.d]; auto const destType = inst.destType; auto const scratch = unit.makeScratchBlock(); SCOPE_EXIT { unit.freeScratchBlock(scratch); }; Vout v(unit, scratch, vinstr.irctx()); // Push stack arguments, in reverse order. Push in pairs without padding // except for the last argument (pushed first) which should be padded if // there are an odd number of arguments. auto numArgs = vargs.stkArgs.size(); int32_t const adjust = (numArgs & 0x1) ? sizeof(uintptr_t) : 0; if (adjust) { // Using InvalidReg below fails SSA checks and simplify pass, so just // push the arg twice. It's on the same cacheline and will actually // perform faster than an explicit lea. v << pushp{vargs.stkArgs[numArgs - 1], vargs.stkArgs[numArgs - 1]}; --numArgs; } for (auto i2 = numArgs; i2 >= 2; i2 -= 2) { v << pushp{vargs.stkArgs[i2 - 1], vargs.stkArgs[i2 - 2]}; } // Get the arguments in the proper registers. RegSet argRegs; auto doArgs = [&] (const VregList& srcs, PhysReg (*r)(size_t)) { VregList argDests; for (size_t i2 = 0, n = srcs.size(); i2 < n; ++i2) { auto const reg = r(i2); argDests.push_back(reg); argRegs |= reg; } if (argDests.size()) { v << copyargs{v.makeTuple(srcs), v.makeTuple(std::move(argDests))}; } }; doArgs(vargs.indRetArgs, rarg_ind_ret); doArgs(vargs.args, rarg); doArgs(vargs.simdArgs, rarg_simd); // Emit the appropriate call instruction sequence. emitCall(v, inst.call, argRegs); // Handle fixup and unwind information. if (inst.fixup.isValid()) { v << syncpoint{inst.fixup}; } if (!is_vcall) { auto& targets = vinvoke.targets; v << unwind{{targets[0], targets[1]}}; // Insert an lea fixup for any stack args at the beginning of the catch // block. if (auto rspOffset = ((vargs.stkArgs.size() + 1) & ~1) * sizeof(uintptr_t)) { auto& taken = unit.blocks[targets[1]].code; assertx(taken.front().op == Vinstr::landingpad || taken.front().op == Vinstr::jmp); Vinstr vi { lea{rsp()[rspOffset], rsp()}, taken.front().irctx() }; if (taken.front().op == Vinstr::jmp) { taken.insert(taken.begin(), vi); } else { taken.insert(taken.begin() + 1, vi); } } // Write out the code so far to the end of b. Remaining code will be // emitted to the next block. vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else if (vcall.nothrow) { v << nothrow{}; } // For vinvoke, `inst' is no longer valid after this point. // Copy the call result to the destination register(s). switch (destType) { case DestType::TV: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); if (dests.size() == 2) { switch (arch()) { case Arch::X64: // fall through case Arch::PPC64: v << copy2{rret(0), rret(1), dests[0], dests[1]}; break; case Arch::ARM: // For ARM64 we need to clear the bits 8..31 from the type value. // That allows us to use the resulting register values in // type comparisons without the need for truncation there. // We must not touch bits 63..32 as they contain the AUX data. v << copy{rret(0), dests[0]}; v << andq{v.cns(0xffffffff000000ff), rret(1), dests[1], v.makeReg()}; break; } } else { // We have cases where we statically know the type but need the value // from native call. Even if the type does not really need a register // (e.g., InitNull), a Vreg is still allocated in assignRegs(), so the // following assertion holds. assertx(dests.size() == 1); v << copy{rret(0), dests[0]}; } break; case DestType::SIMD: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); assertx(dests.size() == 1); pack2(v, rret(0), rret(1), dests[0]); break; case DestType::SSA: case DestType::Byte: assertx(dests.size() == 1); assertx(dests[0].isValid()); // Copy the single-register result to dests[0]. v << copy{rret(0), dests[0]}; break; case DestType::SSAPair: assertx(dests.size() == 2); assertx(dests[0].isValid()); assertx(dests[1].isValid()); // Copy the result pair to dests. v << copy2{rret(0), rret(1), dests[0], dests[1]}; break; case DestType::Dbl: // Copy the single-register result to dests[0]. assertx(dests.size() == 1); assertx(dests[0].isValid()); v << copy{rret_simd(0), dests[0]}; break; case DestType::Indirect: // Already asserted above break; case DestType::None: assertx(dests.empty()); break; } if (vargs.stkArgs.size() > 0) { auto const delta = safe_cast<int32_t>( vargs.stkArgs.size() * sizeof(uintptr_t) + adjust ); v << lea{rsp()[delta], rsp()}; } // Insert new instructions to the appropriate block. if (is_vcall) { vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else { vector_splice(blocks[vinvoke.targets[0]].code, 0, 0, blocks[scratch].code); } }
// Assign virtual registers to all SSATmps used or defined in reachable // blocks. This assigns a value register to constants defined by DefConst, // because some HHIR instructions require them. Ordinary Gen values with // a known DataType only get one register. Assign "wide" locations when // possible (when all uses and defs can be wide). These will be assigned // SIMD registers later. void assignRegs(IRUnit& unit, Vunit& vunit, irlower::IRLS& state, const BlockList& blocks) { // visit instructions to find tmps eligible to use SIMD registers auto const try_wide = RuntimeOption::EvalHHIRAllocSIMDRegs; boost::dynamic_bitset<> not_wide(unit.numTmps()); StateVector<SSATmp,SSATmp*> tmps(unit, nullptr); for (auto block : blocks) { for (auto& inst : *block) { for (uint32_t i = 0, n = inst.numSrcs(); i < n; i++) { auto s = inst.src(i); tmps[s] = s; if (!try_wide || !storesCell(inst, i)) { not_wide.set(s->id()); } } for (auto& d : inst.dsts()) { tmps[d] = d; if (!try_wide || inst.isControlFlow() || !loadsCell(inst.op())) { not_wide.set(d->id()); } } } } // visit each tmp, assign 1 or 2 registers to each. for (auto tmp : tmps) { if (!tmp) continue; auto forced = forceAlloc(*tmp); if (forced != InvalidReg) { state.locs[tmp] = Vloc{forced}; UNUSED Reg64 r = forced; FTRACE(kRegAllocLevel, "force t{} in {}\n", tmp->id(), reg::regname(r)); continue; } if (tmp->inst()->is(DefConst)) { auto const type = tmp->type(); Vreg c; if (type.subtypeOfAny(TNull, TNullptr)) { c = vunit.makeConst(0); } else if (type <= TBool) { c = vunit.makeConst(tmp->boolVal()); } else if (type <= TDbl) { c = vunit.makeConst(tmp->dblVal()); } else { c = vunit.makeConst(tmp->rawVal()); } state.locs[tmp] = Vloc{c}; FTRACE(kRegAllocLevel, "const t{} in %{}\n", tmp->id(), size_t(c)); } else { if (tmp->numWords() == 2) { if (!not_wide.test(tmp->id())) { auto r = vunit.makeReg(); state.locs[tmp] = Vloc{Vloc::kWide, r}; FTRACE(kRegAllocLevel, "def t{} in wide %{}\n", tmp->id(), size_t(r)); } else { auto data = vunit.makeReg(); auto type = vunit.makeReg(); state.locs[tmp] = Vloc{data, type}; FTRACE(kRegAllocLevel, "def t{} in %{},%{}\n", tmp->id(), size_t(data), size_t(type)); } } else { auto data = vunit.makeReg(); state.locs[tmp] = Vloc{data}; FTRACE(kRegAllocLevel, "def t{} in %{}\n", tmp->id(), size_t(data)); } } } }