void lower_vcall(Vunit& unit, Inst& inst, Vlabel b, size_t i) { auto& blocks = unit.blocks; auto const& vinstr = blocks[b].code[i]; auto const is_vcall = vinstr.op == Vinstr::vcall; auto const vcall = vinstr.vcall_; auto const vinvoke = vinstr.vinvoke_; // We lower vinvoke in two phases, and `inst' is overwritten after the first // phase. We need to save any of its parameters that we care about in the // second phase ahead of time. auto const& vargs = unit.vcallArgs[inst.args]; auto const dests = unit.tuples[inst.d]; auto const destType = inst.destType; auto const scratch = unit.makeScratchBlock(); SCOPE_EXIT { unit.freeScratchBlock(scratch); }; Vout v(unit, scratch, vinstr.origin); int32_t const adjust = (vargs.stkArgs.size() & 0x1) ? sizeof(uintptr_t) : 0; if (adjust) v << lea{rsp()[-adjust], rsp()}; // Push stack arguments, in reverse order. for (int i = vargs.stkArgs.size() - 1; i >= 0; --i) { v << push{vargs.stkArgs[i]}; } // Get the arguments in the proper registers. RegSet argRegs; bool needsCopy = false; auto doArgs = [&] (const VregList& srcs, PhysReg (*r)(size_t)) { VregList argDests; for (size_t i = 0, n = srcs.size(); i < n; ++i) { auto const reg = r(i); argDests.push_back(reg); argRegs |= reg; } if (argDests.size()) { v << copyargs{v.makeTuple(srcs), v.makeTuple(std::move(argDests))}; } }; switch (arch()) { case Arch::X64: case Arch::PPC64: doArgs(vargs.args, rarg); break; case Arch::ARM: if (vargs.indirect) { if (vargs.args.size() > 0) { // First arg is a pointer to storage for the return value. v << copy{vargs.args[0], rret_indirect()}; VregList rem(vargs.args.begin() + 1, vargs.args.end()); doArgs(rem, rarg); needsCopy = true; } } else { doArgs(vargs.args, rarg); } } doArgs(vargs.simdArgs, rarg_simd); // Emit the appropriate call instruction sequence. emitCall(v, inst.call, argRegs); // Handle fixup and unwind information. if (inst.fixup.isValid()) { v << syncpoint{inst.fixup}; } if (!is_vcall) { auto& targets = vinvoke.targets; v << unwind{{targets[0], targets[1]}}; // Insert an lea fixup for any stack args at the beginning of the catch // block. if (auto rspOffset = ((vargs.stkArgs.size() + 1) & ~1) * sizeof(uintptr_t)) { auto& taken = unit.blocks[targets[1]].code; assertx(taken.front().op == Vinstr::landingpad || taken.front().op == Vinstr::jmp); Vinstr vi { lea{rsp()[rspOffset], rsp()} }; vi.origin = taken.front().origin; if (taken.front().op == Vinstr::jmp) { taken.insert(taken.begin(), vi); } else { taken.insert(taken.begin() + 1, vi); } } // Write out the code so far to the end of b. Remaining code will be // emitted to the next block. vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else if (vcall.nothrow) { v << nothrow{}; } // Copy back the indirect result pointer into the return register. if (needsCopy) { v << copy{rret_indirect(), rret(0)}; } // For vinvoke, `inst' is no longer valid after this point. // Copy the call result to the destination register(s). switch (destType) { case DestType::TV: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); if (dests.size() == 2) { v << copy2{rret(0), rret(1), dests[0], dests[1]}; } else { // We have cases where we statically know the type but need the value // from native call. Even if the type does not really need a register // (e.g., InitNull), a Vreg is still allocated in assignRegs(), so the // following assertion holds. assertx(dests.size() == 1); v << copy{rret(0), dests[0]}; } break; case DestType::SIMD: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); assertx(dests.size() == 1); pack2(v, rret(0), rret(1), dests[0]); break; case DestType::SSA: case DestType::Byte: assertx(dests.size() == 1); assertx(dests[0].isValid()); // Copy the single-register result to dests[0]. v << copy{rret(0), dests[0]}; break; case DestType::Dbl: // Copy the single-register result to dests[0]. assertx(dests.size() == 1); assertx(dests[0].isValid()); v << copy{rret_simd(0), dests[0]}; break; case DestType::None: assertx(dests.empty()); break; } if (vargs.stkArgs.size() > 0) { auto const delta = safe_cast<int32_t>( vargs.stkArgs.size() * sizeof(uintptr_t) + adjust ); v << lea{rsp()[delta], rsp()}; } // Insert new instructions to the appropriate block. if (is_vcall) { vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else { vector_splice(blocks[vinvoke.targets[0]].code, 0, 0, blocks[scratch].code); } }
void lower_vcall(Vunit& unit, Inst& inst, Vlabel b, size_t i) { auto& blocks = unit.blocks; auto const& vinstr = blocks[b].code[i]; auto const is_vcall = vinstr.op == Vinstr::vcall; auto const vcall = vinstr.vcall_; auto const vinvoke = vinstr.vinvoke_; // We lower vinvoke in two phases, and `inst' is overwritten after the first // phase. We need to save any of its parameters that we care about in the // second phase ahead of time. auto const& vargs = unit.vcallArgs[inst.args]; auto const dests = unit.tuples[inst.d]; auto const destType = inst.destType; auto const scratch = unit.makeScratchBlock(); SCOPE_EXIT { unit.freeScratchBlock(scratch); }; Vout v(unit, scratch, vinstr.irctx()); // Push stack arguments, in reverse order. Push in pairs without padding // except for the last argument (pushed first) which should be padded if // there are an odd number of arguments. auto numArgs = vargs.stkArgs.size(); int32_t const adjust = (numArgs & 0x1) ? sizeof(uintptr_t) : 0; if (adjust) { // Using InvalidReg below fails SSA checks and simplify pass, so just // push the arg twice. It's on the same cacheline and will actually // perform faster than an explicit lea. v << pushp{vargs.stkArgs[numArgs - 1], vargs.stkArgs[numArgs - 1]}; --numArgs; } for (auto i2 = numArgs; i2 >= 2; i2 -= 2) { v << pushp{vargs.stkArgs[i2 - 1], vargs.stkArgs[i2 - 2]}; } // Get the arguments in the proper registers. RegSet argRegs; auto doArgs = [&] (const VregList& srcs, PhysReg (*r)(size_t)) { VregList argDests; for (size_t i2 = 0, n = srcs.size(); i2 < n; ++i2) { auto const reg = r(i2); argDests.push_back(reg); argRegs |= reg; } if (argDests.size()) { v << copyargs{v.makeTuple(srcs), v.makeTuple(std::move(argDests))}; } }; doArgs(vargs.indRetArgs, rarg_ind_ret); doArgs(vargs.args, rarg); doArgs(vargs.simdArgs, rarg_simd); // Emit the appropriate call instruction sequence. emitCall(v, inst.call, argRegs); // Handle fixup and unwind information. if (inst.fixup.isValid()) { v << syncpoint{inst.fixup}; } if (!is_vcall) { auto& targets = vinvoke.targets; v << unwind{{targets[0], targets[1]}}; // Insert an lea fixup for any stack args at the beginning of the catch // block. if (auto rspOffset = ((vargs.stkArgs.size() + 1) & ~1) * sizeof(uintptr_t)) { auto& taken = unit.blocks[targets[1]].code; assertx(taken.front().op == Vinstr::landingpad || taken.front().op == Vinstr::jmp); Vinstr vi { lea{rsp()[rspOffset], rsp()}, taken.front().irctx() }; if (taken.front().op == Vinstr::jmp) { taken.insert(taken.begin(), vi); } else { taken.insert(taken.begin() + 1, vi); } } // Write out the code so far to the end of b. Remaining code will be // emitted to the next block. vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else if (vcall.nothrow) { v << nothrow{}; } // For vinvoke, `inst' is no longer valid after this point. // Copy the call result to the destination register(s). switch (destType) { case DestType::TV: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); if (dests.size() == 2) { switch (arch()) { case Arch::X64: // fall through case Arch::PPC64: v << copy2{rret(0), rret(1), dests[0], dests[1]}; break; case Arch::ARM: // For ARM64 we need to clear the bits 8..31 from the type value. // That allows us to use the resulting register values in // type comparisons without the need for truncation there. // We must not touch bits 63..32 as they contain the AUX data. v << copy{rret(0), dests[0]}; v << andq{v.cns(0xffffffff000000ff), rret(1), dests[1], v.makeReg()}; break; } } else { // We have cases where we statically know the type but need the value // from native call. Even if the type does not really need a register // (e.g., InitNull), a Vreg is still allocated in assignRegs(), so the // following assertion holds. assertx(dests.size() == 1); v << copy{rret(0), dests[0]}; } break; case DestType::SIMD: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); assertx(dests.size() == 1); pack2(v, rret(0), rret(1), dests[0]); break; case DestType::SSA: case DestType::Byte: assertx(dests.size() == 1); assertx(dests[0].isValid()); // Copy the single-register result to dests[0]. v << copy{rret(0), dests[0]}; break; case DestType::SSAPair: assertx(dests.size() == 2); assertx(dests[0].isValid()); assertx(dests[1].isValid()); // Copy the result pair to dests. v << copy2{rret(0), rret(1), dests[0], dests[1]}; break; case DestType::Dbl: // Copy the single-register result to dests[0]. assertx(dests.size() == 1); assertx(dests[0].isValid()); v << copy{rret_simd(0), dests[0]}; break; case DestType::Indirect: // Already asserted above break; case DestType::None: assertx(dests.empty()); break; } if (vargs.stkArgs.size() > 0) { auto const delta = safe_cast<int32_t>( vargs.stkArgs.size() * sizeof(uintptr_t) + adjust ); v << lea{rsp()[delta], rsp()}; } // Insert new instructions to the appropriate block. if (is_vcall) { vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else { vector_splice(blocks[vinvoke.targets[0]].code, 0, 0, blocks[scratch].code); } }