예제 #1
0
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
{
	if (g_use_deterministic_gpu_thread)
	{
		s_gpu_mainloop.Wait();
		if (!s_gpu_mainloop.IsRunning())
			return;

		// Opportunistically reset FIFOs so we don't wrap around.
		if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
			PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);

		memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
		s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
		s_fifo_aux_read_ptr = s_fifo_aux_data;

		if (may_move_read_ptr)
		{
			u8* write_ptr = s_video_buffer_write_ptr;

			// what's left over in the buffer
			size_t size = write_ptr - s_video_buffer_pp_read_ptr;

			memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size);
			// This change always decreases the pointers.  We write seen_ptr
			// after write_ptr here, and read it before in RunGpuLoop, so
			// 'write_ptr > seen_ptr' there cannot become spuriously true.
			s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
			s_video_buffer_pp_read_ptr = s_video_buffer;
			s_video_buffer_read_ptr = s_video_buffer;
			s_video_buffer_seen_ptr = write_ptr;
		}
	}
}
예제 #2
0
TEST(BusyLoopTest, MultiThreaded)
{
  Common::BlockingLoop loop;
  Common::Event e;
  for (int i = 0; i < 100; i++)
  {
    loop.Prepare();
    std::thread loop_thread([&]() { loop.Run([&]() { e.Set(); }); });

    // Ping - Pong
    for (int j = 0; j < 10; j++)
    {
      loop.Wakeup();
      e.Wait();

      // Just waste some time. So the main loop did fall back to the sleep state much more likely.
      Common::SleepCurrentThread(1);
    }

    for (int j = 0; j < 100; j++)
    {
      // We normally have to call Wakeup to assure the Event is triggered.
      // But this check is for an internal feature of the BlockingLoop.
      // It's implemented to fall back to a busy loop regulary.
      // If we're in the busy loop, the payload (and so the Event) is called all the time.
      // loop.Wakeup();
      e.Wait();
    }

    loop.Stop();
    loop_thread.join();
  }
}
예제 #3
0
파일: Fifo.cpp 프로젝트: stenzek/dolphin
void EmulatorState(bool running)
{
  s_emu_running_state.Set(running);
  if (running)
    s_gpu_mainloop.Wakeup();
  else
    s_gpu_mainloop.AllowSleep();
}
예제 #4
0
void RunGpu()
{
	SCPFifoStruct &fifo = CommandProcessor::fifo;
	const SConfig& param = SConfig::GetInstance();

	// execute GPU
	if (!param.bCPUThread || g_use_deterministic_gpu_thread)
	{
		bool reset_simd_state = false;
		while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
		{
			if (g_use_deterministic_gpu_thread)
			{
				ReadDataFromFifoOnCPU(fifo.CPReadPointer);
				s_gpu_mainloop.Wakeup();
			}
			else
			{
				if (!reset_simd_state)
				{
					FPURoundMode::SaveSIMDState();
					FPURoundMode::LoadDefaultSIMDState();
					reset_simd_state = true;
				}
				ReadDataFromFifo(fifo.CPReadPointer);
				s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
			}

			//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");

			if (fifo.CPReadPointer == fifo.CPEnd)
				fifo.CPReadPointer = fifo.CPBase;
			else
				fifo.CPReadPointer += 32;

			fifo.CPReadWriteDistance -= 32;
		}
		CommandProcessor::SetCPStatusFromGPU();

		if (reset_simd_state)
		{
			FPURoundMode::LoadSIMDState();
		}
	}

	// wake up GPU thread
	if (param.bCPUThread)
	{
		s_gpu_mainloop.Wakeup();
	}
}
예제 #5
0
파일: Fifo.cpp 프로젝트: stenzek/dolphin
/* This function checks the emulated CPU - GPU distance and may wake up the GPU,
 * or block the CPU if required. It should be called by the CPU thread regularly.
 * @ticks The gone emulated CPU time.
 * @return A good time to call WaitForGpuThread() next.
 */
static int WaitForGpuThread(int ticks)
{
  const SConfig& param = SConfig::GetInstance();

  int old = s_sync_ticks.fetch_add(ticks);
  int now = old + ticks;

  // GPU is idle, so stop polling.
  if (old >= 0 && s_gpu_mainloop.IsDone())
    return -1;

  // Wakeup GPU
  if (old < param.iSyncGpuMinDistance && now >= param.iSyncGpuMinDistance)
    RunGpu();

  // If the GPU is still sleeping, wait for a longer time
  if (now < param.iSyncGpuMinDistance)
    return GPU_TIME_SLOT_SIZE + param.iSyncGpuMinDistance - now;

  // Wait for GPU
  if (now >= param.iSyncGpuMaxDistance)
    s_sync_wakeup_event.Wait();

  return GPU_TIME_SLOT_SIZE;
}
예제 #6
0
// The deterministic_gpu_thread version.
static void ReadDataFromFifoOnCPU(u32 readPtr)
{
	size_t len = 32;
	u8 *write_ptr = s_video_buffer_write_ptr;
	if (len > (size_t)(s_video_buffer + FIFO_SIZE - write_ptr))
	{
		// We can't wrap around while the GPU is working on the data.
		// This should be very rare due to the reset in SyncGPU.
		SyncGPU(SYNC_GPU_WRAPAROUND);
		if (!s_gpu_mainloop.IsRunning())
		{
			// GPU is shutting down, so the next asserts may fail
			return;
		}

		if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr)
		{
			PanicAlert("desynced read pointers");
			return;
		}
		write_ptr = s_video_buffer_write_ptr;
		size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr;
		if (len > (size_t)(FIFO_SIZE - existing_len))
		{
			PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE);
			return;
		}
	}
	Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
	s_video_buffer_pp_read_ptr = OpcodeDecoder_Run<true>(DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
	// This would have to be locked if the GPU thread didn't spin.
	s_video_buffer_write_ptr = write_ptr + len;
}
예제 #7
0
void Fifo_Init()
{
	// Padded so that SIMD overreads in the vertex loader are safe
	s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE + 4);
	ResetVideoBuffer();
	if (SConfig::GetInstance().bCPUThread)
		s_gpu_mainloop.Prepare();
	s_sync_ticks.store(0);
}
예제 #8
0
void FlushGpu()
{
	const SConfig& param = SConfig::GetInstance();

	if (!param.bCPUThread || g_use_deterministic_gpu_thread)
		return;

	s_gpu_mainloop.Wait();
}
예제 #9
0
// May be executed from any thread, even the graphics thread.
// Created to allow for self shutdown.
void ExitGpuLoop()
{
	// This should break the wait loop in CPU thread
	CommandProcessor::fifo.bFF_GPReadEnable = false;
	FlushGpu();

	// Terminate GPU thread loop
	s_emu_running_state.store(true);
	s_gpu_mainloop.Stop(false);
}
예제 #10
0
파일: Fifo.cpp 프로젝트: stenzek/dolphin
static int RunGpuOnCpu(int ticks)
{
  SCPFifoStruct& fifo = CommandProcessor::fifo;
  bool reset_simd_state = false;
  int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
  while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() &&
         available_ticks >= 0)
  {
    if (s_use_deterministic_gpu_thread)
    {
      ReadDataFromFifoOnCPU(fifo.CPReadPointer);
      s_gpu_mainloop.Wakeup();
    }
    else
    {
      if (!reset_simd_state)
      {
        FPURoundMode::SaveSIMDState();
        FPURoundMode::LoadDefaultSIMDState();
        reset_simd_state = true;
      }
      ReadDataFromFifo(fifo.CPReadPointer);
      u32 cycles = 0;
      s_video_buffer_read_ptr = OpcodeDecoder::Run(
          DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
      available_ticks -= cycles;
    }

    if (fifo.CPReadPointer == fifo.CPEnd)
      fifo.CPReadPointer = fifo.CPBase;
    else
      fifo.CPReadPointer += 32;

    fifo.CPReadWriteDistance -= 32;
  }

  CommandProcessor::SetCPStatusFromGPU();

  if (reset_simd_state)
  {
    FPURoundMode::LoadSIMDState();
  }

  // Discard all available ticks as there is nothing to do any more.
  s_sync_ticks.store(std::min(available_ticks, 0));

  // If the GPU is idle, drop the handler.
  if (available_ticks >= 0)
    return -1;

  // Always wait at least for GPU_TIME_SLOT_SIZE cycles.
  return -available_ticks + GPU_TIME_SLOT_SIZE;
}
예제 #11
0
void Fifo_Shutdown()
{
	if (s_gpu_mainloop.IsRunning())
		PanicAlert("Fifo shutting down while active");

	FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4);
	s_video_buffer = nullptr;
	s_video_buffer_write_ptr = nullptr;
	s_video_buffer_pp_read_ptr = nullptr;
	s_video_buffer_read_ptr = nullptr;
	s_video_buffer_seen_ptr = nullptr;
	s_fifo_aux_write_ptr = nullptr;
	s_fifo_aux_read_ptr = nullptr;
}
예제 #12
0
파일: Fifo.cpp 프로젝트: stenzek/dolphin
void PauseAndLock(bool doLock, bool unpauseOnUnlock)
{
  if (doLock)
  {
    SyncGPU(SyncGPUReason::Other);
    EmulatorState(false);

    const SConfig& param = SConfig::GetInstance();

    if (!param.bCPUThread || s_use_deterministic_gpu_thread)
      return;

    s_gpu_mainloop.WaitYield(std::chrono::milliseconds(100), Host_YieldToUI);
  }
  else
  {
    if (unpauseOnUnlock)
      EmulatorState(true);
  }
}
예제 #13
0
파일: Fifo.cpp 프로젝트: stenzek/dolphin
void RunGpu()
{
  const SConfig& param = SConfig::GetInstance();

  // wake up GPU thread
  if (param.bCPUThread && !s_use_deterministic_gpu_thread)
  {
    s_gpu_mainloop.Wakeup();
  }

  // if the sync GPU callback is suspended, wake it up.
  if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread ||
      SConfig::GetInstance().bSyncGPU)
  {
    if (s_syncing_suspended)
    {
      s_syncing_suspended = false;
      CoreTiming::ScheduleEvent(GPU_TIME_SLOT_SIZE, s_event_sync_gpu, GPU_TIME_SLOT_SIZE);
    }
  }
}
예제 #14
0
void PushFifoAuxBuffer(void* ptr, size_t size)
{
	if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
	{
		SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
		if (!s_gpu_mainloop.IsRunning())
		{
			// GPU is shutting down
			return;
		}
		if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
		{
			// That will sync us up to the last 32 bytes, so this short region
			// of FIFO would have to point to a 2MB display list or something.
			PanicAlert("absurdly large aux buffer");
			return;
		}
	}
	memcpy(s_fifo_aux_write_ptr, ptr, size);
	s_fifo_aux_write_ptr += size;
}
예제 #15
0
int Fifo_Update(int ticks)
{
	const SConfig& param = SConfig::GetInstance();

	if (ticks == 0)
	{
		FlushGpu();
		return param.iSyncGpuMaxDistance;
	}

	// GPU is sleeping, so no need for synchronization
	if (s_gpu_mainloop.IsDone() || g_use_deterministic_gpu_thread)
	{
		if (s_sync_ticks.load() < 0)
		{
			int old = s_sync_ticks.fetch_add(ticks);
			if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
				RunGpu();
		}
		return param.iSyncGpuMaxDistance;
	}

	int old = s_sync_ticks.fetch_add(ticks);
	if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
		RunGpu();

	if (s_sync_ticks.load() >= param.iSyncGpuMaxDistance)
	{
		while (s_sync_ticks.load() > 0)
		{
			s_sync_wakeup_event.Wait();
		}
	}

	return param.iSyncGpuMaxDistance - s_sync_ticks.load();
}
예제 #16
0
void EmulatorState(bool running)
{
	s_emu_running_state.store(running);
	s_gpu_mainloop.Wakeup();
}
예제 #17
0
파일: Fifo.cpp 프로젝트: stenzek/dolphin
namespace Fifo
{
static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024;
static constexpr int GPU_TIME_SLOT_SIZE = 1000;

static Common::BlockingLoop s_gpu_mainloop;

static Common::Flag s_emu_running_state;

// Most of this array is unlikely to be faulted in...
static u8 s_fifo_aux_data[FIFO_SIZE];
static u8* s_fifo_aux_write_ptr;
static u8* s_fifo_aux_read_ptr;

// This could be in SConfig, but it depends on multiple settings
// and can change at runtime.
static bool s_use_deterministic_gpu_thread;

static CoreTiming::EventType* s_event_sync_gpu;

// STATE_TO_SAVE
static u8* s_video_buffer;
static u8* s_video_buffer_read_ptr;
static std::atomic<u8*> s_video_buffer_write_ptr;
static std::atomic<u8*> s_video_buffer_seen_ptr;
static u8* s_video_buffer_pp_read_ptr;
// The read_ptr is always owned by the GPU thread.  In normal mode, so is the
// write_ptr, despite it being atomic.  In deterministic GPU thread mode,
// things get a bit more complicated:
// - The seen_ptr is written by the GPU thread, and points to what it's already
// processed as much of as possible - in the case of a partial command which
// caused it to stop, not the same as the read ptr.  It's written by the GPU,
// under the lock, and updating the cond.
// - The write_ptr is written by the CPU thread after it copies data from the
// FIFO.  Maybe someday it will be under the lock.  For now, because RunGpuLoop
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.

static std::atomic<int> s_sync_ticks;
static bool s_syncing_suspended;
static Common::Event s_sync_wakeup_event;

void DoState(PointerWrap& p)
{
  p.DoArray(s_video_buffer, FIFO_SIZE);
  u8* write_ptr = s_video_buffer_write_ptr;
  p.DoPointer(write_ptr, s_video_buffer);
  s_video_buffer_write_ptr = write_ptr;
  p.DoPointer(s_video_buffer_read_ptr, s_video_buffer);
  if (p.mode == PointerWrap::MODE_READ && s_use_deterministic_gpu_thread)
  {
    // We're good and paused, right?
    s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
  }

  p.Do(s_sync_ticks);
  p.Do(s_syncing_suspended);
}

void PauseAndLock(bool doLock, bool unpauseOnUnlock)
{
  if (doLock)
  {
    SyncGPU(SyncGPUReason::Other);
    EmulatorState(false);

    const SConfig& param = SConfig::GetInstance();

    if (!param.bCPUThread || s_use_deterministic_gpu_thread)
      return;

    s_gpu_mainloop.WaitYield(std::chrono::milliseconds(100), Host_YieldToUI);
  }
  else
  {
    if (unpauseOnUnlock)
      EmulatorState(true);
  }
}

void Init()
{
  // Padded so that SIMD overreads in the vertex loader are safe
  s_video_buffer = static_cast<u8*>(Common::AllocateMemoryPages(FIFO_SIZE + 4));
  ResetVideoBuffer();
  if (SConfig::GetInstance().bCPUThread)
    s_gpu_mainloop.Prepare();
  s_sync_ticks.store(0);
}

void Shutdown()
{
  if (s_gpu_mainloop.IsRunning())
    PanicAlert("Fifo shutting down while active");

  Common::FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4);
  s_video_buffer = nullptr;
  s_video_buffer_write_ptr = nullptr;
  s_video_buffer_pp_read_ptr = nullptr;
  s_video_buffer_read_ptr = nullptr;
  s_video_buffer_seen_ptr = nullptr;
  s_fifo_aux_write_ptr = nullptr;
  s_fifo_aux_read_ptr = nullptr;
}

// May be executed from any thread, even the graphics thread.
// Created to allow for self shutdown.
void ExitGpuLoop()
{
  // This should break the wait loop in CPU thread
  CommandProcessor::fifo.bFF_GPReadEnable = false;
  FlushGpu();

  // Terminate GPU thread loop
  s_emu_running_state.Set();
  s_gpu_mainloop.Stop(false);
}

void EmulatorState(bool running)
{
  s_emu_running_state.Set(running);
  if (running)
    s_gpu_mainloop.Wakeup();
  else
    s_gpu_mainloop.AllowSleep();
}

void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
{
  if (s_use_deterministic_gpu_thread)
  {
    s_gpu_mainloop.Wait();
    if (!s_gpu_mainloop.IsRunning())
      return;

    // Opportunistically reset FIFOs so we don't wrap around.
    if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
      PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);

    memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
    s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
    s_fifo_aux_read_ptr = s_fifo_aux_data;

    if (may_move_read_ptr)
    {
      u8* write_ptr = s_video_buffer_write_ptr;

      // what's left over in the buffer
      size_t size = write_ptr - s_video_buffer_pp_read_ptr;

      memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size);
      // This change always decreases the pointers.  We write seen_ptr
      // after write_ptr here, and read it before in RunGpuLoop, so
      // 'write_ptr > seen_ptr' there cannot become spuriously true.
      s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
      s_video_buffer_pp_read_ptr = s_video_buffer;
      s_video_buffer_read_ptr = s_video_buffer;
      s_video_buffer_seen_ptr = write_ptr;
    }
  }
}

void PushFifoAuxBuffer(void* ptr, size_t size)
{
  if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
  {
    SyncGPU(SyncGPUReason::AuxSpace, /* may_move_read_ptr */ false);
    if (!s_gpu_mainloop.IsRunning())
    {
      // GPU is shutting down
      return;
    }
    if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
    {
      // That will sync us up to the last 32 bytes, so this short region
      // of FIFO would have to point to a 2MB display list or something.
      PanicAlert("absurdly large aux buffer");
      return;
    }
  }
  memcpy(s_fifo_aux_write_ptr, ptr, size);
  s_fifo_aux_write_ptr += size;
}

void* PopFifoAuxBuffer(size_t size)
{
  void* ret = s_fifo_aux_read_ptr;
  s_fifo_aux_read_ptr += size;
  return ret;
}

// Description: RunGpuLoop() sends data through this function.
static void ReadDataFromFifo(u32 readPtr)
{
  size_t len = 32;
  if (len > (size_t)(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
  {
    size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr;
    if (len > (size_t)(FIFO_SIZE - existing_len))
    {
      PanicAlert("FIFO out of bounds (existing %zu + new %zu > %lu)", existing_len, len,
                 (unsigned long)FIFO_SIZE);
      return;
    }
    memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len);
    s_video_buffer_write_ptr = s_video_buffer + existing_len;
    s_video_buffer_read_ptr = s_video_buffer;
  }
  // Copy new video instructions to s_video_buffer for future use in rendering the new picture
  Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
  s_video_buffer_write_ptr += len;
}

// The deterministic_gpu_thread version.
static void ReadDataFromFifoOnCPU(u32 readPtr)
{
  size_t len = 32;
  u8* write_ptr = s_video_buffer_write_ptr;
  if (len > (size_t)(s_video_buffer + FIFO_SIZE - write_ptr))
  {
    // We can't wrap around while the GPU is working on the data.
    // This should be very rare due to the reset in SyncGPU.
    SyncGPU(SyncGPUReason::Wraparound);
    if (!s_gpu_mainloop.IsRunning())
    {
      // GPU is shutting down, so the next asserts may fail
      return;
    }

    if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr)
    {
      PanicAlert("desynced read pointers");
      return;
    }
    write_ptr = s_video_buffer_write_ptr;
    size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr;
    if (len > (size_t)(FIFO_SIZE - existing_len))
    {
      PanicAlert("FIFO out of bounds (existing %zu + new %zu > %lu)", existing_len, len,
                 (unsigned long)FIFO_SIZE);
      return;
    }
  }
  Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
  s_video_buffer_pp_read_ptr = OpcodeDecoder::Run<true>(
      DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
  // This would have to be locked if the GPU thread didn't spin.
  s_video_buffer_write_ptr = write_ptr + len;
}

void ResetVideoBuffer()
{
  s_video_buffer_read_ptr = s_video_buffer;
  s_video_buffer_write_ptr = s_video_buffer;
  s_video_buffer_seen_ptr = s_video_buffer;
  s_video_buffer_pp_read_ptr = s_video_buffer;
  s_fifo_aux_write_ptr = s_fifo_aux_data;
  s_fifo_aux_read_ptr = s_fifo_aux_data;
}

// Description: Main FIFO update loop
// Purpose: Keep the Core HW updated about the CPU-GPU distance
void RunGpuLoop()
{
  AsyncRequests::GetInstance()->SetEnable(true);
  AsyncRequests::GetInstance()->SetPassthrough(false);

  s_gpu_mainloop.Run(
      [] {
        const SConfig& param = SConfig::GetInstance();

        g_video_backend->PeekMessages();

        // Do nothing while paused
        if (!s_emu_running_state.IsSet())
          return;

        if (s_use_deterministic_gpu_thread)
        {
          AsyncRequests::GetInstance()->PullEvents();

          // All the fifo/CP stuff is on the CPU.  We just need to run the opcode decoder.
          u8* seen_ptr = s_video_buffer_seen_ptr;
          u8* write_ptr = s_video_buffer_write_ptr;
          // See comment in SyncGPU
          if (write_ptr > seen_ptr)
          {
            s_video_buffer_read_ptr =
                OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
            s_video_buffer_seen_ptr = write_ptr;
          }
        }
        else
        {
          SCPFifoStruct& fifo = CommandProcessor::fifo;

          AsyncRequests::GetInstance()->PullEvents();

          CommandProcessor::SetCPStatusFromGPU();

          // check if we are able to run this buffer
          while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable &&
                 fifo.CPReadWriteDistance && !AtBreakpoint())
          {
            if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
              break;

            u32 cyclesExecuted = 0;
            u32 readPtr = fifo.CPReadPointer;
            ReadDataFromFifo(readPtr);

            if (readPtr == fifo.CPEnd)
              readPtr = fifo.CPBase;
            else
              readPtr += 32;

            _assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0,
                         "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce "
                         "instability in the game. Please report it.",
                         fifo.CPReadWriteDistance - 32);

            u8* write_ptr = s_video_buffer_write_ptr;
            s_video_buffer_read_ptr = OpcodeDecoder::Run(
                DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);

            Common::AtomicStore(fifo.CPReadPointer, readPtr);
            Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
            if ((write_ptr - s_video_buffer_read_ptr) == 0)
              Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);

            CommandProcessor::SetCPStatusFromGPU();

            if (param.bSyncGPU)
            {
              cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock);
              int old = s_sync_ticks.fetch_sub(cyclesExecuted);
              if (old >= param.iSyncGpuMaxDistance &&
                  old - (int)cyclesExecuted < param.iSyncGpuMaxDistance)
                s_sync_wakeup_event.Set();
            }

            // This call is pretty important in DualCore mode and must be called in the FIFO Loop.
            // If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
            // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing
            // things down.
            AsyncRequests::GetInstance()->PullEvents();
          }

          // fast skip remaining GPU time if fifo is empty
          if (s_sync_ticks.load() > 0)
          {
            int old = s_sync_ticks.exchange(0);
            if (old >= param.iSyncGpuMaxDistance)
              s_sync_wakeup_event.Set();
          }

          // The fifo is empty and it's unlikely we will get any more work in the near future.
          // Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
          g_vertex_manager->Flush();
        }
      },
      100);

  AsyncRequests::GetInstance()->SetEnable(false);
  AsyncRequests::GetInstance()->SetPassthrough(true);
}

void FlushGpu()
{
  const SConfig& param = SConfig::GetInstance();

  if (!param.bCPUThread || s_use_deterministic_gpu_thread)
    return;

  s_gpu_mainloop.Wait();
}

void GpuMaySleep()
{
  s_gpu_mainloop.AllowSleep();
}

bool AtBreakpoint()
{
  SCPFifoStruct& fifo = CommandProcessor::fifo;
  return fifo.bFF_BPEnable && (fifo.CPReadPointer == fifo.CPBreakpoint);
}

void RunGpu()
{
  const SConfig& param = SConfig::GetInstance();

  // wake up GPU thread
  if (param.bCPUThread && !s_use_deterministic_gpu_thread)
  {
    s_gpu_mainloop.Wakeup();
  }

  // if the sync GPU callback is suspended, wake it up.
  if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread ||
      SConfig::GetInstance().bSyncGPU)
  {
    if (s_syncing_suspended)
    {
      s_syncing_suspended = false;
      CoreTiming::ScheduleEvent(GPU_TIME_SLOT_SIZE, s_event_sync_gpu, GPU_TIME_SLOT_SIZE);
    }
  }
}

static int RunGpuOnCpu(int ticks)
{
  SCPFifoStruct& fifo = CommandProcessor::fifo;
  bool reset_simd_state = false;
  int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
  while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() &&
         available_ticks >= 0)
  {
    if (s_use_deterministic_gpu_thread)
    {
      ReadDataFromFifoOnCPU(fifo.CPReadPointer);
      s_gpu_mainloop.Wakeup();
    }
    else
    {
      if (!reset_simd_state)
      {
        FPURoundMode::SaveSIMDState();
        FPURoundMode::LoadDefaultSIMDState();
        reset_simd_state = true;
      }
      ReadDataFromFifo(fifo.CPReadPointer);
      u32 cycles = 0;
      s_video_buffer_read_ptr = OpcodeDecoder::Run(
          DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
      available_ticks -= cycles;
    }

    if (fifo.CPReadPointer == fifo.CPEnd)
      fifo.CPReadPointer = fifo.CPBase;
    else
      fifo.CPReadPointer += 32;

    fifo.CPReadWriteDistance -= 32;
  }

  CommandProcessor::SetCPStatusFromGPU();

  if (reset_simd_state)
  {
    FPURoundMode::LoadSIMDState();
  }

  // Discard all available ticks as there is nothing to do any more.
  s_sync_ticks.store(std::min(available_ticks, 0));

  // If the GPU is idle, drop the handler.
  if (available_ticks >= 0)
    return -1;

  // Always wait at least for GPU_TIME_SLOT_SIZE cycles.
  return -available_ticks + GPU_TIME_SLOT_SIZE;
}

void UpdateWantDeterminism(bool want)
{
  // We are paused (or not running at all yet), so
  // it should be safe to change this.
  const SConfig& param = SConfig::GetInstance();
  bool gpu_thread = false;
  switch (param.m_GPUDeterminismMode)
  {
  case GPU_DETERMINISM_AUTO:
    gpu_thread = want;

    // Hack: For now movies are an exception to this being on (but not
    // to wanting determinism in general).  Once vertex arrays are
    // fixed, there should be no reason to want this off for movies by
    // default, so this can be removed.
    if (!NetPlay::IsNetPlayRunning())
      gpu_thread = false;

    break;
  case GPU_DETERMINISM_NONE:
    gpu_thread = false;
    break;
  case GPU_DETERMINISM_FAKE_COMPLETION:
    gpu_thread = true;
    break;
  }

  gpu_thread = gpu_thread && param.bCPUThread;

  if (s_use_deterministic_gpu_thread != gpu_thread)
  {
    s_use_deterministic_gpu_thread = gpu_thread;
    if (gpu_thread)
    {
      // These haven't been updated in non-deterministic mode.
      s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
      CopyPreprocessCPStateFromMain();
      VertexLoaderManager::MarkAllDirty();
    }
  }
}

bool UseDeterministicGPUThread()
{
  return s_use_deterministic_gpu_thread;
}

/* This function checks the emulated CPU - GPU distance and may wake up the GPU,
 * or block the CPU if required. It should be called by the CPU thread regularly.
 * @ticks The gone emulated CPU time.
 * @return A good time to call WaitForGpuThread() next.
 */
static int WaitForGpuThread(int ticks)
{
  const SConfig& param = SConfig::GetInstance();

  int old = s_sync_ticks.fetch_add(ticks);
  int now = old + ticks;

  // GPU is idle, so stop polling.
  if (old >= 0 && s_gpu_mainloop.IsDone())
    return -1;

  // Wakeup GPU
  if (old < param.iSyncGpuMinDistance && now >= param.iSyncGpuMinDistance)
    RunGpu();

  // If the GPU is still sleeping, wait for a longer time
  if (now < param.iSyncGpuMinDistance)
    return GPU_TIME_SLOT_SIZE + param.iSyncGpuMinDistance - now;

  // Wait for GPU
  if (now >= param.iSyncGpuMaxDistance)
    s_sync_wakeup_event.Wait();

  return GPU_TIME_SLOT_SIZE;
}

static void SyncGPUCallback(u64 ticks, s64 cyclesLate)
{
  ticks += cyclesLate;
  int next = -1;

  if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread)
  {
    next = RunGpuOnCpu((int)ticks);
  }
  else if (SConfig::GetInstance().bSyncGPU)
  {
    next = WaitForGpuThread((int)ticks);
  }

  s_syncing_suspended = next < 0;
  if (!s_syncing_suspended)
    CoreTiming::ScheduleEvent(next, s_event_sync_gpu, next);
}

// Initialize GPU - CPU thread syncing, this gives us a deterministic way to start the GPU thread.
void Prepare()
{
  s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback);
  s_syncing_suspended = true;
}
}
예제 #18
0
void GpuMaySleep()
{
	s_gpu_mainloop.AllowSleep();
}
예제 #19
0
// Description: Main FIFO update loop
// Purpose: Keep the Core HW updated about the CPU-GPU distance
void RunGpuLoop()
{

	AsyncRequests::GetInstance()->SetEnable(true);
	AsyncRequests::GetInstance()->SetPassthrough(false);

	s_gpu_mainloop.Run(
	[] {
		const SConfig& param = SConfig::GetInstance();

		g_video_backend->PeekMessages();

		// Do nothing while paused
		if (!s_emu_running_state.load())
			return;

		if (g_use_deterministic_gpu_thread)
		{
			AsyncRequests::GetInstance()->PullEvents();

			// All the fifo/CP stuff is on the CPU.  We just need to run the opcode decoder.
			u8* seen_ptr = s_video_buffer_seen_ptr;
			u8* write_ptr = s_video_buffer_write_ptr;
			// See comment in SyncGPU
			if (write_ptr > seen_ptr)
			{
				s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
				s_video_buffer_seen_ptr = write_ptr;
			}
		}
		else
		{
			SCPFifoStruct &fifo = CommandProcessor::fifo;

			AsyncRequests::GetInstance()->PullEvents();

			CommandProcessor::SetCPStatusFromGPU();

			// check if we are able to run this buffer
			while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
			{
				if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
					break;

				u32 cyclesExecuted = 0;
				u32 readPtr = fifo.CPReadPointer;
				ReadDataFromFifo(readPtr);

				if (readPtr == fifo.CPEnd)
					readPtr = fifo.CPBase;
				else
					readPtr += 32;

				_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
					"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);

				u8* write_ptr = s_video_buffer_write_ptr;
				s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);

				Common::AtomicStore(fifo.CPReadPointer, readPtr);
				Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
				if ((write_ptr - s_video_buffer_read_ptr) == 0)
					Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);

				CommandProcessor::SetCPStatusFromGPU();

				if (param.bSyncGPU)
				{
					cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock);
					int old = s_sync_ticks.fetch_sub(cyclesExecuted);
					if (old > 0 && old - (int)cyclesExecuted <= 0)
						s_sync_wakeup_event.Set();
				}

				// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
				// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
				// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
				AsyncRequests::GetInstance()->PullEvents();
			}

			// fast skip remaining GPU time if fifo is empty
			if (s_sync_ticks.load() > 0)
			{
				int old = s_sync_ticks.exchange(0);
				if (old > 0)
					s_sync_wakeup_event.Set();
			}

			// The fifo is empty and it's unlikely we will get any more work in the near future.
			// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
			VertexManager::Flush();
		}
	}, 100);

	AsyncRequests::GetInstance()->SetEnable(false);
	AsyncRequests::GetInstance()->SetPassthrough(true);
}