void Vgen::emit(const ldimmq& i) { union { double dval; int64_t ival; }; ival = i.s.q(); if (i.d.isSIMD()) { // Assembler::fmov (which you'd think shouldn't be a macro instruction) // will emit a ldr from a literal pool if IsImmFP64 is false. vixl's // literal pools don't work well with our codegen pattern, so if that // would happen, emit the raw bits into a GPR first and then move them // unmodified into a SIMD. if (vixl::Assembler::IsImmFP64(dval)) { a->Fmov(D(i.d), dval); } else if (ival == 0) { // careful: dval==0.0 is true for -0.0 // 0.0 is not encodeable as an immediate to Fmov, but this works. a->Fmov(D(i.d), vixl::xzr); } else { a->Mov(rAsm, ival); // XXX avoid scratch register somehow. a->Fmov(D(i.d), rAsm); } } else { a->Mov(X(i.d), ival); } }
void Vgen::emit(const load& i) { if (i.d.isGP()) { a->Ldr(X(i.d), M(i.s)); } else { a->Ldr(D(i.d), M(i.s)); } }
void Vgen::emit(const store& i) { if (i.s.isGP()) { a->Str(X(i.s), M(i.d)); } else { a->Str(D(i.s), M(i.d)); } }
void Vgen::emit(const copy& i) { if (i.s.isGP() && i.d.isGP()) { a->Mov(X(i.d), X(i.s)); } else if (i.s.isSIMD() && i.d.isGP()) { a->Fmov(X(i.d), D(i.s)); } else if (i.s.isGP() && i.d.isSIMD()) { a->Fmov(D(i.d), X(i.s)); } else { assertx(i.s.isSIMD() && i.d.isSIMD()); a->Fmov(D(i.d), D(i.s)); } }
TCA emitCall(vixl::MacroAssembler& a, CppCall call) { switch (call.kind()) { case CppCall::Kind::Direct: a. Mov (rHostCallReg, reinterpret_cast<intptr_t>(call.address())); break; case CppCall::Kind::Virtual: a. Ldr (rHostCallReg, argReg(0)[0]); a. Ldr (rHostCallReg, rHostCallReg[call.vtableOffset()]); break; case CppCall::Kind::IndirectReg: case CppCall::Kind::IndirectVreg: // call indirect currently not implemented. It'll be something like // a.Br(x2a(call.getReg())) not_implemented(); always_assert(0); break; case CppCall::Kind::ArrayVirt: case CppCall::Kind::Destructor: not_implemented(); always_assert(0); break; } using namespace vixl; auto fixupAddr = a.frontier(); a. HostCall(6); // Note that the fixup address for a HostCall is directly *before* the // HostCall, not after as in the native case. This is because, in simulation // mode we look at the simulator's PC at the time the fixup is invoked, and it // will still be pointing to the HostCall; it's not advanced past it until the // host call returns. In the native case, by contrast, we'll be looking at // return addresses, which point after the call. return fixupAddr; }
void Vgen::emit(const ldimmb& i) { if (i.d.isSIMD()) { emitSimdImmInt(a, i.s.q(), i.d); } else { Vreg8 d = i.d; a->Mov(W(d), i.s.b()); } }
void Vgen::emit(ldimml& i) { if (i.d.isSIMD()) { emitSimdImmInt(a, i.s.q(), i.d); } else { Vreg32 d = i.d; a->Mov(W(d), i.s.l()); } }
TCA emitCallWithinTC(vixl::MacroAssembler& a, TCA call) { a. Mov (rHostCallReg, reinterpret_cast<intptr_t>(call)); a. Blr (rHostCallReg); auto fixupAddr = a.frontier(); return fixupAddr; }
void Vgen::emit(tbcc i) { assertx(i.cc == vixl::ne || i.cc == vixl::eq); if (i.targets[1] != i.targets[0]) { if (next == i.targets[1]) { // the taken branch is the fall-through block, invert the branch. i = tbcc{i.cc == vixl::ne ? vixl::eq : vixl::ne, i.bit, i.s, {i.targets[1], i.targets[0]}}; } bccs.push_back({a->frontier(), i.targets[1]}); // offset range +/- 32KB if (i.cc == vixl::ne) { a->tbnz(X(i.s), i.bit, 0); } else { a->tbz(X(i.s), i.bit, 0); } } emit(jmp{i.targets[0]}); }
void Vgen::emit(const copy2& i) { MovePlan moves; Reg64 d0 = i.d0, d1 = i.d1, s0 = i.s0, s1 = i.s1; moves[d0] = s0; moves[d1] = s1; auto howTo = doRegMoves(moves, rAsm); // rAsm isn't used. for (auto& how : howTo) { if (how.m_kind == MoveInfo::Kind::Move) { a->Mov(X(how.m_dst), X(how.m_src)); } else { auto const d = X(how.m_dst); auto const s = X(how.m_src); a->Eor(d, d, s); a->Eor(s, d, s); a->Eor(d, d, s); } } }
void Vgen::emit(jcc i) { assertx(i.cc != CC_None); if (i.targets[1] != i.targets[0]) { if (next == i.targets[1]) { // the taken branch is the fall-through block, invert the branch. i = jcc{ccNegate(i.cc), i.sf, {i.targets[1], i.targets[0]}}; } jccs.push_back({a->frontier(), i.targets[1]}); // B.cond range is +/- 1MB but this uses BR emitSmashableJcc(*codeBlock, env.meta, kEndOfTargetChain, i.cc); } emit(jmp{i.targets[0]}); }
void Vgen::emit(copy2& i) { PhysReg::Map<PhysReg> moves; Reg64 d0 = i.d0, d1 = i.d1, s0 = i.s0, s1 = i.s1; moves[d0] = s0; moves[d1] = s1; auto howTo = doRegMoves(moves, rAsm); // rAsm isn't used. for (auto& how : howTo) { if (how.m_kind == MoveInfo::Kind::Move) { a->Mov(X(how.m_dst), X(how.m_src)); } else { emitXorSwap(*a, X(how.m_dst), X(how.m_src)); } } }
TCA emitCall(vixl::MacroAssembler& a, CppCall call) { if (call.isDirect()) { a. Mov (rHostCallReg, reinterpret_cast<intptr_t>(call.getAddress())); } else if (call.isVirtual()) { a. Ldr (rHostCallReg, argReg(0)[0]); a. Ldr (rHostCallReg, rHostCallReg[call.getOffset()]); } else { // call indirect currently not implemented. It'll be somthing like // a.Br(x2a(call.getReg())) not_implemented(); } using namespace vixl; auto fixupAddr = a.frontier(); a. HostCall(6); // Note that the fixup address for a HostCall is directly *before* the // HostCall, not after as in the native case. This is because, in simulation // mode we look at the simulator's PC at the time the fixup is invoked, and it // will still be pointing to the HostCall; it's not advanced past it until the // host call returns. In the native case, by contrast, we'll be looking at // return addresses, which point after the call. return fixupAddr; }
void Vgen::emit(const lea& i) { assertx(!i.s.index.isValid()); assertx(i.s.scale == 1); a->Add(X(i.d), X(i.s.base), i.s.disp); }
void Vgen::emit(jmp i) { if (next == i.target) return; jmps.push_back({a->frontier(), i.target}); // B range is +/- 128MB but this uses BR emitSmashableJmp(*codeBlock, env.meta, kEndOfTargetChain); }
void Vgen::emit(const unwind& i) { catches.push_back({a->frontier(), i.targets[1]}); emit(jmp{i.targets[0]}); }
void Vgen::emit(syncpoint& i) { FTRACE(5, "IR recordSyncPoint: {} {} {}\n", a->frontier(), i.fix.pcOffset, i.fix.spOffset); mcg->recordSyncPoint(a->frontier(), i.fix); }
void Vgen::emit(const nothrow& i) { env.meta.catches.emplace_back(a->frontier(), nullptr); }
void Vgen::emit(hostcall& i) { points[i.syncpoint] = a->frontier(); a->HostCall(i.argc); }
void Vgen::emit(const syncpoint& i) { FTRACE(5, "IR recordSyncPoint: {} {} {}\n", a->frontier(), i.fix.pcOffset, i.fix.spOffset); env.meta.fixups.emplace_back(a->frontier(), i.fix); }
// overall emitter void Vgen::emit(jit::vector<Vlabel>& labels) { // Some structures here track where we put things just for debug printing. struct Snippet { const IRInstruction* origin; TcaRange range; }; struct BlockInfo { jit::vector<Snippet> snippets; }; // This is under the printir tracemod because it mostly shows you IR and // machine code, not vasm and machine code (not implemented). bool shouldUpdateAsmInfo = !!m_asmInfo && Trace::moduleEnabledRelease(HPHP::Trace::printir, kCodeGenLevel); std::vector<TransBCMapping>* bcmap = nullptr; if (mcg->tx().isTransDBEnabled() || RuntimeOption::EvalJitUseVtuneAPI) { bcmap = &mcg->cgFixups().m_bcMap; } jit::vector<jit::vector<BlockInfo>> areaToBlockInfos; if (shouldUpdateAsmInfo) { areaToBlockInfos.resize(areas.size()); for (auto& r : areaToBlockInfos) { r.resize(unit.blocks.size()); } } for (int i = 0, n = labels.size(); i < n; ++i) { assertx(checkBlockEnd(unit, labels[i])); auto b = labels[i]; auto& block = unit.blocks[b]; codeBlock = &area(block.area).code; vixl::MacroAssembler as { *codeBlock }; a = &as; auto blockStart = a->frontier(); addrs[b] = blockStart; { // Compute the next block we will emit into the current area. auto cur_start = start(labels[i]); auto j = i + 1; while (j < labels.size() && cur_start != start(labels[j])) { j++; } next = j < labels.size() ? labels[j] : Vlabel(unit.blocks.size()); } const IRInstruction* currentOrigin = nullptr; auto blockInfo = shouldUpdateAsmInfo ? &areaToBlockInfos[unsigned(block.area)][b] : nullptr; auto start_snippet = [&](Vinstr& inst) { if (!shouldUpdateAsmInfo) return; blockInfo->snippets.push_back( Snippet { inst.origin, TcaRange { codeBlock->frontier(), nullptr } } ); }; auto finish_snippet = [&] { if (!shouldUpdateAsmInfo) return; if (!blockInfo->snippets.empty()) { auto& snip = blockInfo->snippets.back(); snip.range = TcaRange { snip.range.start(), codeBlock->frontier() }; } }; for (auto& inst : block.code) { if (currentOrigin != inst.origin) { finish_snippet(); start_snippet(inst); currentOrigin = inst.origin; } if (bcmap && inst.origin) { auto sk = inst.origin->marker().sk(); if (bcmap->empty() || bcmap->back().md5 != sk.unit()->md5() || bcmap->back().bcStart != sk.offset()) { bcmap->push_back(TransBCMapping{sk.unit()->md5(), sk.offset(), main().frontier(), cold().frontier(), frozen().frontier()}); } } switch (inst.op) { #define O(name, imms, uses, defs) \ case Vinstr::name: emit(inst.name##_); break; VASM_OPCODES #undef O } } finish_snippet(); } for (auto& p : jccs) { assertx(addrs[p.target]); backend.smashJcc(p.instr, addrs[p.target]); } for (auto& p : bccs) { assertx(addrs[p.target]); auto link = (Instruction*) p.instr; link->SetImmPCOffsetTarget(Instruction::Cast(addrs[p.target])); } for (auto& p : jmps) { assertx(addrs[p.target]); backend.smashJmp(p.instr, addrs[p.target]); } for (auto& p : catches) { mcg->registerCatchBlock(p.instr, addrs[p.target]); } for (auto& p : ldpoints) { CodeCursor cc(main(), p.instr); MacroAssembler a{main()}; a.Mov(X(p.d), points[p.pos]); } if (!shouldUpdateAsmInfo) { return; } for (auto i = 0; i < areas.size(); ++i) { const IRInstruction* currentOrigin = nullptr; auto& blockInfos = areaToBlockInfos[i]; for (auto const blockID : labels) { auto const& blockInfo = blockInfos[static_cast<size_t>(blockID)]; if (blockInfo.snippets.empty()) continue; for (auto const& snip : blockInfo.snippets) { if (currentOrigin != snip.origin && snip.origin) { currentOrigin = snip.origin; } m_asmInfo->updateForInstruction( currentOrigin, static_cast<AreaIndex>(i), snip.range.start(), snip.range.end()); } } } }
void Vgen::emit(jmp i) { if (next == i.target) return; jmps.push_back({a->frontier(), i.target}); // B range is +/- 128MB but this uses BR backend.emitSmashableJump(*codeBlock, kEndOfTargetChain, CC_None); }