static u32 InterpretDisplayList(u32 address, u32 size) { u8* startAddress; if (Fifo::g_use_deterministic_gpu_thread) startAddress = (u8*)Fifo::PopFifoAuxBuffer(size); else startAddress = Memory::GetPointer(address); u32 cycles = 0; // Avoid the crash if Memory::GetPointer failed .. if (startAddress != nullptr) { // temporarily swap dl and non-dl (small "hack" for the stats) Statistics::SwapDL(); OpcodeDecoder_Run(DataReader(startAddress, startAddress + size), &cycles, true); INCSTAT(stats.thisFrame.numDListsCalled); // un-swap Statistics::SwapDL(); } return cycles; }
void RunGpu() { if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !g_use_deterministic_gpu_thread) return; SCPFifoStruct &fifo = CommandProcessor::fifo; while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) { if (g_use_deterministic_gpu_thread) { ReadDataFromFifoOnCPU(fifo.CPReadPointer); } else { FPURoundMode::SaveSIMDState(); FPURoundMode::LoadDefaultSIMDState(); ReadDataFromFifo(fifo.CPReadPointer); s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); FPURoundMode::LoadSIMDState(); } //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); if (fifo.CPReadPointer == fifo.CPEnd) fifo.CPReadPointer = fifo.CPBase; else fifo.CPReadPointer += 32; fifo.CPReadWriteDistance -= 32; } CommandProcessor::SetCPStatusFromGPU(); }
// The deterministic_gpu_thread version. static void ReadDataFromFifoOnCPU(u32 readPtr) { size_t len = 32; u8 *write_ptr = s_video_buffer_write_ptr; if (len > (size_t)(s_video_buffer + FIFO_SIZE - write_ptr)) { // We can't wrap around while the GPU is working on the data. // This should be very rare due to the reset in SyncGPU. SyncGPU(SYNC_GPU_WRAPAROUND); if (!s_gpu_mainloop.IsRunning()) { // GPU is shutting down, so the next asserts may fail return; } if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr) { PanicAlert("desynced read pointers"); return; } write_ptr = s_video_buffer_write_ptr; size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr; if (len > (size_t)(FIFO_SIZE - existing_len)) { PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE); return; } } Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len); s_video_buffer_pp_read_ptr = OpcodeDecoder_Run<true>(DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false); // This would have to be locked if the GPU thread didn't spin. s_video_buffer_write_ptr = write_ptr + len; }
static void InterpretDisplayListPreprocess(u32 address, u32 size) { u8* startAddress = Memory::GetPointer(address); Fifo::PushFifoAuxBuffer(startAddress, size); if (startAddress != nullptr) { OpcodeDecoder_Run<true>(DataReader(startAddress, startAddress + size), nullptr, true); } }
int RunVertices(DataReader src, DataReader dst, int count) override { buffer_a.resize(count * a->m_native_vtx_decl.stride + 4); buffer_b.resize(count * b->m_native_vtx_decl.stride + 4); int count_a = a->RunVertices(src, DataReader(buffer_a.data(), buffer_a.data()+buffer_a.size()), count); int count_b = b->RunVertices(src, DataReader(buffer_b.data(), buffer_b.data()+buffer_b.size()), count); if (count_a != count_b) ERROR_LOG(VIDEO, "The two vertex loaders have loaded a different amount of vertices (a: %d, b: %d).", count_a, count_b); if (memcmp(buffer_a.data(), buffer_b.data(), std::min(count_a, count_b) * m_native_vtx_decl.stride)) ERROR_LOG(VIDEO, "The two vertex loaders have loaded different data " "(guru meditation 0x%016" PRIx64 ", 0x%08x, 0x%08x, 0x%08x).", m_VtxDesc.Hex, m_vat.g0.Hex, m_vat.g1.Hex, m_vat.g2.Hex); memcpy(dst.GetPointer(), buffer_a.data(), count_a * m_native_vtx_decl.stride); m_numLoadedVertices += count; return count_a; }
static int RunGpuOnCpu(int ticks) { SCPFifoStruct& fifo = CommandProcessor::fifo; bool reset_simd_state = false; int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load(); while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() && available_ticks >= 0) { if (s_use_deterministic_gpu_thread) { ReadDataFromFifoOnCPU(fifo.CPReadPointer); s_gpu_mainloop.Wakeup(); } else { if (!reset_simd_state) { FPURoundMode::SaveSIMDState(); FPURoundMode::LoadDefaultSIMDState(); reset_simd_state = true; } ReadDataFromFifo(fifo.CPReadPointer); u32 cycles = 0; s_video_buffer_read_ptr = OpcodeDecoder::Run( DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false); available_ticks -= cycles; } if (fifo.CPReadPointer == fifo.CPEnd) fifo.CPReadPointer = fifo.CPBase; else fifo.CPReadPointer += 32; fifo.CPReadWriteDistance -= 32; } CommandProcessor::SetCPStatusFromGPU(); if (reset_simd_state) { FPURoundMode::LoadSIMDState(); } // Discard all available ticks as there is nothing to do any more. s_sync_ticks.store(std::min(available_ticks, 0)); // If the GPU is idle, drop the handler. if (available_ticks >= 0) return -1; // Always wait at least for GPU_TIME_SLOT_SIZE cycles. return -available_ticks + GPU_TIME_SLOT_SIZE; }
void RunGpu() { SCPFifoStruct &fifo = CommandProcessor::fifo; const SConfig& param = SConfig::GetInstance(); // execute GPU if (!param.bCPUThread || g_use_deterministic_gpu_thread) { bool reset_simd_state = false; while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) { if (g_use_deterministic_gpu_thread) { ReadDataFromFifoOnCPU(fifo.CPReadPointer); s_gpu_mainloop.Wakeup(); } else { if (!reset_simd_state) { FPURoundMode::SaveSIMDState(); FPURoundMode::LoadDefaultSIMDState(); reset_simd_state = true; } ReadDataFromFifo(fifo.CPReadPointer); s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); } //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); if (fifo.CPReadPointer == fifo.CPEnd) fifo.CPReadPointer = fifo.CPBase; else fifo.CPReadPointer += 32; fifo.CPReadWriteDistance -= 32; } CommandProcessor::SetCPStatusFromGPU(); if (reset_simd_state) { FPURoundMode::LoadSIMDState(); } } // wake up GPU thread if (param.bCPUThread) { s_gpu_mainloop.Wakeup(); } }
// Description: Main FIFO update loop // Purpose: Keep the Core HW updated about the CPU-GPU distance void RunGpuLoop() { AsyncRequests::GetInstance()->SetEnable(true); AsyncRequests::GetInstance()->SetPassthrough(false); s_gpu_mainloop.Run( [] { const SConfig& param = SConfig::GetInstance(); g_video_backend->PeekMessages(); // Do nothing while paused if (!s_emu_running_state.load()) return; if (g_use_deterministic_gpu_thread) { AsyncRequests::GetInstance()->PullEvents(); // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder. u8* seen_ptr = s_video_buffer_seen_ptr; u8* write_ptr = s_video_buffer_write_ptr; // See comment in SyncGPU if (write_ptr > seen_ptr) { s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); s_video_buffer_seen_ptr = write_ptr; } } else { SCPFifoStruct &fifo = CommandProcessor::fifo; AsyncRequests::GetInstance()->PullEvents(); CommandProcessor::SetCPStatusFromGPU(); // check if we are able to run this buffer while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) { if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance) break; u32 cyclesExecuted = 0; u32 readPtr = fifo.CPReadPointer; ReadDataFromFifo(readPtr); if (readPtr == fifo.CPEnd) readPtr = fifo.CPBase; else readPtr += 32; _assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 , "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32); u8* write_ptr = s_video_buffer_write_ptr; s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); Common::AtomicStore(fifo.CPReadPointer, readPtr); Common::AtomicAdd(fifo.CPReadWriteDistance, -32); if ((write_ptr - s_video_buffer_read_ptr) == 0) Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); CommandProcessor::SetCPStatusFromGPU(); if (param.bSyncGPU) { cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock); int old = s_sync_ticks.fetch_sub(cyclesExecuted); if (old > 0 && old - (int)cyclesExecuted <= 0) s_sync_wakeup_event.Set(); } // This call is pretty important in DualCore mode and must be called in the FIFO Loop. // If we don't, s_swapRequested or s_efbAccessRequested won't be set to false // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down. AsyncRequests::GetInstance()->PullEvents(); } // fast skip remaining GPU time if fifo is empty if (s_sync_ticks.load() > 0) { int old = s_sync_ticks.exchange(0); if (old > 0) s_sync_wakeup_event.Set(); } // The fifo is empty and it's unlikely we will get any more work in the near future. // Make sure VertexManager finishes drawing any primitives it has stored in it's buffer. VertexManager::Flush(); } }, 100); AsyncRequests::GetInstance()->SetEnable(false); AsyncRequests::GetInstance()->SetPassthrough(true); }
void ResetPointers() { m_src = DataReader(input_memory, input_memory + sizeof(input_memory)); m_dst = DataReader(output_memory, output_memory + sizeof(output_memory)); }
void ResetPointers() { m_input_pos = m_output_pos = 0; src = DataReader(input_memory, input_memory+sizeof(input_memory)); dst = DataReader(output_memory, output_memory+sizeof(output_memory)); }
// Description: Main FIFO update loop // Purpose: Keep the Core HW updated about the CPU-GPU distance void RunGpuLoop() { std::lock_guard<std::mutex> lk(m_csHWVidOccupied); GpuRunningState = true; SCPFifoStruct &fifo = CommandProcessor::fifo; u32 cyclesExecuted = 0; // If the host CPU has only two cores, idle loop instead of busy loop // This allows a system that we are maxing out in dual core mode to do other things bool yield_cpu = cpu_info.num_cores <= 2; AsyncRequests::GetInstance()->SetEnable(true); AsyncRequests::GetInstance()->SetPassthrough(false); while (GpuRunningState) { g_video_backend->PeekMessages(); AsyncRequests::GetInstance()->PullEvents(); if (g_use_deterministic_gpu_thread) { // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder. u8* seen_ptr = s_video_buffer_seen_ptr; u8* write_ptr = s_video_buffer_write_ptr; // See comment in SyncGPU if (write_ptr > seen_ptr) { s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); { std::lock_guard<std::mutex> vblk(s_video_buffer_lock); s_video_buffer_seen_ptr = write_ptr; s_video_buffer_cond.notify_all(); } } } else { CommandProcessor::SetCPStatusFromGPU(); Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin); // check if we are able to run this buffer while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) { fifo.isGpuReadingData = true; CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false; if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin) { u32 readPtr = fifo.CPReadPointer; ReadDataFromFifo(readPtr); if (readPtr == fifo.CPEnd) readPtr = fifo.CPBase; else readPtr += 32; _assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 , "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32); u8* write_ptr = s_video_buffer_write_ptr; s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted) Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted); Common::AtomicStore(fifo.CPReadPointer, readPtr); Common::AtomicAdd(fifo.CPReadWriteDistance, -32); if ((write_ptr - s_video_buffer_read_ptr) == 0) Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); } CommandProcessor::SetCPStatusFromGPU(); // This call is pretty important in DualCore mode and must be called in the FIFO Loop. // If we don't, s_swapRequested or s_efbAccessRequested won't be set to false // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down. AsyncRequests::GetInstance()->PullEvents(); CommandProcessor::isPossibleWaitingSetDrawDone = false; } fifo.isGpuReadingData = false; } if (EmuRunningState) { // NOTE(jsd): Calling SwitchToThread() on Windows 7 x64 is a hot spot, according to profiler. // See https://docs.google.com/spreadsheet/ccc?key=0Ah4nh0yGtjrgdFpDeF9pS3V6RUotRVE3S3J4TGM1NlE#gid=0 // for benchmark details. if (yield_cpu) Common::YieldCPU(); } else { // While the emu is paused, we still handle async requests then sleep. while (!EmuRunningState) { g_video_backend->PeekMessages(); m_csHWVidOccupied.unlock(); Common::SleepCurrentThread(1); m_csHWVidOccupied.lock(); } } } // wake up SyncGPU if we were interrupted s_video_buffer_cond.notify_all(); AsyncRequests::GetInstance()->SetEnable(false); AsyncRequests::GetInstance()->SetPassthrough(true); }