// Kernel call static bool kc(PPCEmuAssembler& a, Instruction instr) { auto id = instr.kcn; auto kc = cpu::getKernelCall(id); decaf_assert(kc, fmt::format("Encountered invalid Kernel Call ID {}", id)); // Evict all stored register as a KC might read or modify them. a.evictAll(); // Save NIA back to memory in case KC reads/writes it a.mov(a.niaMem, a.genCia + 4); // Call the KC a.mov(a.sysArgReg[0], asmjit::Ptr(kc->func)); a.mov(a.sysArgReg[1], asmjit::Ptr(kc->user_data)); a.call(asmjit::Ptr(&kc_stub)); a.mov(a.stateReg, asmjit::x86::rax); // Check if the KC adjusted nia. If it has, we need to return // to the dispatcher. Note that we assume the cache was already // cleared before this instruction since KC requires that anyways. auto niaUnchangedLbl = a.newLabel(); a.cmp(a.niaMem, a.genCia + 4); a.je(niaUnchangedLbl); a.mov(a.finaleNiaArgReg, a.niaMem); a.mov(a.finaleJmpSrcArgReg, 0); a.jmp(asmjit::Ptr(gFinaleFn)); a.bind(niaUnchangedLbl); return true; }
bool jit_fallback(PPCEmuAssembler& a, espresso::Instruction instr) { auto data = espresso::decodeInstruction(instr); decaf_assert(data, fmt::format("Failed to decode instruction {:08X}", instr.value)); auto fptr = cpu::interpreter::getInstructionHandler(data->id); decaf_assert(fptr, fmt::format("Unimplemented instruction {}", static_cast<int>(data->id))); a.evictAll(); if (TRACK_FALLBACK_CALLS) { auto fallbackAddr = reinterpret_cast<intptr_t>(&sFallbackCalls[static_cast<uint32_t>(data->id)]); a.mov(asmjit::x86::rax, asmjit::Ptr(fallbackAddr)); a.lock().inc(asmjit::X86Mem(asmjit::x86::rax, 0)); } a.mov(a.sysArgReg[0], a.stateReg); a.mov(a.sysArgReg[1], (uint32_t)instr); a.call(asmjit::Ptr(fptr)); return true; }
void Driver::eventWriteEOP(const latte::pm4::EventWriteEOP &data) { // Write event data to memory if required if (data.addrHi.DATA_SEL() != latte::pm4::EWP_DATA_DISCARD) { auto addr = phys_addr { data.addrLo.ADDR_LO() << 2 }; auto ptr = gpu::internal::translateAddress(addr); decaf_assert(data.addrHi.ADDR_HI() == 0, "Invalid event write address (high word not zero)"); // Read value auto value = uint64_t { 0u }; switch (data.addrHi.DATA_SEL()) { case latte::pm4::EWP_DATA_32: value = data.dataLo; break; case latte::pm4::EWP_DATA_64: value = static_cast<uint64_t>(data.dataLo) | (static_cast<uint64_t>(data.dataHi) << 32); break; case latte::pm4::EWP_DATA_CLOCK: value = gpu::clock::now(); break; } // Swap value value = latte::applyEndianSwap(value, data.addrLo.ENDIAN_SWAP()); addRetireTask([=](){ // Write value switch (data.addrHi.DATA_SEL()) { case latte::pm4::EWP_DATA_32: *reinterpret_cast<uint32_t *>(ptr) = static_cast<uint32_t>(value); break; case latte::pm4::EWP_DATA_64: case latte::pm4::EWP_DATA_CLOCK: *reinterpret_cast<uint64_t *>(ptr) = value; break; } }); } // Generate interrupt if required if (data.addrHi.INT_SEL() != latte::pm4::EWP_INT_NONE) { addRetireTask([=](){ auto interrupt = gpu::ih::Entry { }; interrupt.word0 = latte::CP_INT_SRC_ID::CP_EOP_EVENT; gpu::ih::write(interrupt); }); } }
void GLDriver::streamOutBufferUpdate(const pm4::StreamOutBufferUpdate &data) { auto bufferIndex = data.control.SELECT_BUFFER(); if (data.control.STORE_BUFFER_FILLED_SIZE()) { copyFeedbackBuffer(bufferIndex); auto addr = data.dstLo; decaf_assert(data.dstHi == 0, fmt::format("Store target out of 32-bit range for feedback buffer {}", bufferIndex)); if (addr != 0) { auto offsetPtr = mem::translate<uint32_t>(addr); *offsetPtr = byte_swap(mFeedbackCurrentOffset[bufferIndex] >> 2); }
/** * Initialise memory, mapping all valid address space */ void initialise() { // Find a good base address gMemoryBase = 0; for (auto n = 32; n < 64; ++n) { auto base = 1ull << n; if (tryMapMemory(base)) { gMemoryBase = base; break; } } decaf_assert(gMemoryBase, "Failed to find a valid memory base address"); }
static void VTX_FETCH(State &state, const ControlFlowInst &cf, const VertexFetchInst &inst) { // FETCH R4.xyzw, R0.y, b131 NO_INDEX_OFFSET FMT_FROM_FETCH_CONSTANT MEGA(16) OFFSET(0) auto id = inst.word0.BUFFER_ID() + SQ_VS_RESOURCE_BASE; // For now we only support reading from vertex buffers (uniform blocks) decaf_assert(id >= SQ_VS_BUF_RESOURCE_0 && id < SQ_VS_GSOUT_RESOURCE, fmt::format("Unsupported VTX_FETCH buffer id {}", id)); // Let's only support a very expected set of values decaf_check(inst.word0.FETCH_TYPE() == SQ_VTX_FETCH_NO_INDEX_OFFSET); decaf_check(inst.word1.USE_CONST_FIELDS() == 1); decaf_check(inst.word2.OFFSET() == 0); decaf_check(inst.word2.MEGA_FETCH() && (inst.word0.MEGA_FETCH_COUNT() + 1) == 16); auto dstSelX = inst.word1.DST_SEL_X(); auto dstSelY = inst.word1.DST_SEL_Y(); auto dstSelZ = inst.word1.DST_SEL_Z(); auto dstSelW = inst.word1.DST_SEL_W(); auto numDstSels = 4u; auto dstSelMask = condenseSelections(dstSelX, dstSelY, dstSelZ, dstSelW, numDstSels); if (numDstSels > 0) { auto dst = getExportRegister(inst.gpr.DST_GPR(), inst.gpr.DST_REL()); auto src = getExportRegister(inst.word0.SRC_GPR(), inst.word0.SRC_REL()); inst.word0.SRC_SEL_X(); auto blockID = id - SQ_VS_BUF_RESOURCE_0; if (state.shader) { state.shader->usedUniformBlocks[blockID] = true; } fmt::MemoryWriter tmp; tmp << "UB_" << blockID << ".values[floatBitsToInt("; insertSelectValue(tmp, src, inst.word0.SRC_SEL_X()); tmp << ")]"; insertLineStart(state); state.out << dst << "." << dstSelMask << " = "; insertSelectVector(state.out, tmp.str(), dstSelX, dstSelY, dstSelZ, dstSelW, numDstSels); state.out << ";"; insertLineEnd(state); } }
static void cpuFaultFiberEntryPoint(void *addr) { // We may have been in the middle of a kernel function... if (coreinit::internal::isSchedulerLocked()) { coreinit::internal::unlockScheduler(); } // Move back an instruction so we can re-exucute the failed instruction // and so that the debugger shows the right stop point. cpu::this_core::state()->nia -= 4; // Alert the debugger if it cares. if (decaf::config::debugger::enabled) { coreinit::internal::pauseCoreTime(true); debugger::handleDbgBreakInterrupt(); coreinit::internal::pauseCoreTime(false); // This will shut down the thread and reschedule. This is required // since returning from the segfault handler is an error. coreinit::OSExitThread(0); } auto core = cpu::this_core::state(); decaf_assert(core, "Uh oh? CPU fault Handler with invalid core"); gLog->critical("{}", coreStateToString(core)); if (sFaultReason == FaultReason::Segfault) { decaf_abort(fmt::format("Invalid memory access for address {:08X} with nia 0x{:08X}\n", sSegfaultAddress, core->nia)); } else if (sFaultReason == FaultReason::IllInst) { decaf_abort(fmt::format("Invalid instruction at nia 0x{:08X}\n", core->nia)); } else { decaf_abort(fmt::format("Unexpected fault occured, fault reason was {} at 0x{:08X}\n", static_cast<uint32_t>(sFaultReason), core->nia)); } }
static unsigned getSamplerArgCount(latte::SQ_TEX_DIM type, bool isShadowOp) { switch (type) { case latte::SQ_TEX_DIM::DIM_1D: return 1 + (isShadowOp ? 1 : 0); case latte::SQ_TEX_DIM::DIM_2D: case latte::SQ_TEX_DIM::DIM_2D_MSAA: return 2 + (isShadowOp ? 1 : 0); case latte::SQ_TEX_DIM::DIM_3D: decaf_assert(!isShadowOp, "Shadow3D samplers have special semantics we don't yet support"); return 3; case latte::SQ_TEX_DIM::DIM_1D_ARRAY: return 1 + 1 + (isShadowOp ? 1 : 0); case latte::SQ_TEX_DIM::DIM_2D_ARRAY: case latte::SQ_TEX_DIM::DIM_2D_ARRAY_MSAA: return 2 + 1 + (isShadowOp ? 1 : 0); case latte::SQ_TEX_DIM::DIM_CUBEMAP: return 3 + (isShadowOp ? 1 : 0); default: throw translate_exception(fmt::format("Unsupported sampler type {}", static_cast<unsigned>(type))); } }