std::vector<CounterResult> AMDCounters::GetCounterData(uint32_t sessionID, uint32_t maxSampleIndex, const std::vector<uint32_t> &eventIDs, const std::vector<GPUCounter> &counters) { std::vector<CounterResult> ret; bool isReady = false; const uint32_t timeoutPeriod = 10000; // ms PerformanceTimer timeout; do { isReady = IsSessionReady(sessionID); if(!isReady) { Threading::Sleep(0); PerformanceTimer endTime; if(timeout.GetMilliseconds() > timeoutPeriod) { GPA_LoggingCallback(GPA_LOGGING_ERROR, "GetCounterData failed due to elapsed timeout."); return ret; } } } while(!isReady); for(uint32_t s = 0; s < maxSampleIndex; s++) { for(size_t c = 0; c < counters.size(); c++) { const CounterDescription desc = GetCounterDescription(counters[c]); switch(desc.resultType) { case CompType::UInt: { if(desc.resultByteWidth == sizeof(uint32_t)) { uint32_t value = GetSampleUint32(sessionID, s, counters[c]); if(desc.unit == CounterUnit::Percentage) { value = RDCCLAMP(value, 0U, 100U); } ret.push_back(CounterResult(eventIDs[s], counters[c], value)); } else if(desc.resultByteWidth == sizeof(uint64_t)) { uint64_t value = GetSampleUint64(sessionID, s, counters[c]); if(desc.unit == CounterUnit::Percentage) { value = RDCCLAMP(value, (uint64_t)0, (uint64_t)100); } ret.push_back(CounterResult(eventIDs[s], counters[c], value)); } else { RDCERR("Unexpected byte width %u", desc.resultByteWidth); } } break; case CompType::Float: { float value = GetSampleFloat32(sessionID, s, counters[c]); if(desc.unit == CounterUnit::Percentage) { value = RDCCLAMP(value, 0.0f, 100.0f); } ret.push_back(CounterResult(eventIDs[s], counters[c], value)); } break; case CompType::Double: { double value = GetSampleFloat64(sessionID, s, counters[c]); if(desc.unit == CounterUnit::Percentage) { value = RDCCLAMP(value, 0.0, 100.0); } ret.push_back(CounterResult(eventIDs[s], counters[c], value)); } break; default: RDCASSERT(0); break; }; } } return ret; }
vector<CounterResult> D3D11Replay::FetchCounters(const vector<GPUCounter> &counters) { vector<CounterResult> ret; if(counters.empty()) { RDCERR("No counters specified to FetchCounters"); return ret; } SCOPED_TIMER("Fetch Counters, counters to fetch %u", counters.size()); vector<GPUCounter> d3dCounters; std::copy_if(counters.begin(), counters.end(), std::back_inserter(d3dCounters), [](const GPUCounter &c) { return !IsAMDCounter(c); }); if(m_pAMDCounters) { // Filter out the AMD counters vector<GPUCounter> amdCounters; std::copy_if(counters.begin(), counters.end(), std::back_inserter(amdCounters), [](const GPUCounter &c) { return IsAMDCounter(c); }); if(!amdCounters.empty()) { ret = FetchCountersAMD(amdCounters); } } if(d3dCounters.empty()) { return ret; } D3D11_QUERY_DESC disjointdesc = {D3D11_QUERY_TIMESTAMP_DISJOINT, 0}; ID3D11Query *disjoint = NULL; D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0}; ID3D11Query *start = NULL; HRESULT hr = S_OK; hr = m_pDevice->CreateQuery(&disjointdesc, &disjoint); if(FAILED(hr)) { RDCERR("Failed to create disjoint query HRESULT: %s", ToStr(hr).c_str()); return ret; } hr = m_pDevice->CreateQuery(&qdesc, &start); if(FAILED(hr)) { RDCERR("Failed to create start query HRESULT: %s", ToStr(hr).c_str()); return ret; } D3D11CounterContext ctx; { { m_pImmediateContext->Begin(disjoint); m_pImmediateContext->End(start); ctx.eventStart = 0; FillTimers(ctx, m_pImmediateContext->GetRootDraw()); m_pImmediateContext->End(disjoint); } { D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData; do { hr = m_pImmediateContext->GetData(disjoint, &disjointData, sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0); } while(hr == S_FALSE); RDCASSERTEQUAL(hr, S_OK); RDCASSERT(!disjointData.Disjoint); double ticksToSecs = double(disjointData.Frequency); UINT64 a = 0; hr = m_pImmediateContext->GetData(start, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); for(size_t i = 0; i < ctx.timers.size(); i++) { if(ctx.timers[i].before && ctx.timers[i].after && ctx.timers[i].stats && ctx.timers[i].occlusion) { hr = m_pImmediateContext->GetData(ctx.timers[i].before, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); UINT64 b = 0; hr = m_pImmediateContext->GetData(ctx.timers[i].after, &b, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); double duration = (double(b - a) / ticksToSecs); a = b; D3D11_QUERY_DATA_PIPELINE_STATISTICS pipelineStats; hr = m_pImmediateContext->GetData(ctx.timers[i].stats, &pipelineStats, sizeof(D3D11_QUERY_DATA_PIPELINE_STATISTICS), 0); RDCASSERTEQUAL(hr, S_OK); UINT64 occlusion = 0; hr = m_pImmediateContext->GetData(ctx.timers[i].occlusion, &occlusion, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); for(size_t c = 0; c < d3dCounters.size(); c++) { switch(d3dCounters[c]) { case GPUCounter::EventGPUDuration: ret.push_back( CounterResult(ctx.timers[i].eventId, GPUCounter::EventGPUDuration, duration)); break; case GPUCounter::InputVerticesRead: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::InputVerticesRead, pipelineStats.IAVertices)); break; case GPUCounter::IAPrimitives: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::IAPrimitives, pipelineStats.IAPrimitives)); break; case GPUCounter::VSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::VSInvocations, pipelineStats.VSInvocations)); break; case GPUCounter::GSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::GSInvocations, pipelineStats.GSInvocations)); break; case GPUCounter::GSPrimitives: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::GSPrimitives, pipelineStats.GSPrimitives)); break; case GPUCounter::RasterizerInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::RasterizerInvocations, pipelineStats.CInvocations)); break; case GPUCounter::RasterizedPrimitives: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::RasterizedPrimitives, pipelineStats.CPrimitives)); break; case GPUCounter::PSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::PSInvocations, pipelineStats.PSInvocations)); break; case GPUCounter::HSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::HSInvocations, pipelineStats.HSInvocations)); break; case GPUCounter::DSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::DSInvocations, pipelineStats.DSInvocations)); break; case GPUCounter::CSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::CSInvocations, pipelineStats.CSInvocations)); break; case GPUCounter::SamplesWritten: ret.push_back( CounterResult(ctx.timers[i].eventId, GPUCounter::SamplesWritten, occlusion)); break; } } } else { for(size_t c = 0; c < d3dCounters.size(); c++) { switch(d3dCounters[c]) { case GPUCounter::EventGPUDuration: ret.push_back( CounterResult(ctx.timers[i].eventId, GPUCounter::EventGPUDuration, -1.0)); break; case GPUCounter::InputVerticesRead: case GPUCounter::IAPrimitives: case GPUCounter::GSPrimitives: case GPUCounter::RasterizerInvocations: case GPUCounter::RasterizedPrimitives: case GPUCounter::VSInvocations: case GPUCounter::HSInvocations: case GPUCounter::DSInvocations: case GPUCounter::GSInvocations: case GPUCounter::PSInvocations: case GPUCounter::CSInvocations: case GPUCounter::SamplesWritten: ret.push_back( CounterResult(ctx.timers[i].eventId, d3dCounters[c], 0xFFFFFFFFFFFFFFFF)); break; } } } } } } for(size_t i = 0; i < ctx.timers.size(); i++) { SAFE_RELEASE(ctx.timers[i].before); SAFE_RELEASE(ctx.timers[i].after); SAFE_RELEASE(ctx.timers[i].stats); SAFE_RELEASE(ctx.timers[i].occlusion); } SAFE_RELEASE(disjoint); SAFE_RELEASE(start); return ret; }
vector<CounterResult> D3D11DebugManager::FetchCountersAMD(const vector<GPUCounter> &counters) { vector<CounterResult> ret; m_pAMDCounters->DisableAllCounters(); // enable counters it needs for(size_t i = 0; i < counters.size(); i++) { // This function is only called internally, and violating this assertion means our // caller has invoked this method incorrectly RDCASSERT((counters[i] >= (GPUCounter::FirstAMD)) && (counters[i] < (GPUCounter::FirstIntel))); m_pAMDCounters->EnableCounter(counters[i]); } uint32_t sessionID = m_pAMDCounters->BeginSession(); uint32_t passCount = m_pAMDCounters->GetPassCount(); uint32_t sampleIndex = 0; vector<uint32_t> eventIDs; for(uint32_t p = 0; p < passCount; p++) { m_pAMDCounters->BeginPass(); uint32_t eventStartID = 0; sampleIndex = 0; eventIDs.clear(); FillTimersAMD(eventStartID, sampleIndex, eventIDs, m_WrappedContext->GetRootDraw()); m_pAMDCounters->EndPass(); } m_pAMDCounters->EndSesssion(); bool isReady = false; do { isReady = m_pAMDCounters->IsSessionReady(sessionID); } while(!isReady); for(uint32_t s = 0; s < sampleIndex; s++) { for(size_t c = 0; c < counters.size(); c++) { const CounterDescription desc = m_pAMDCounters->GetCounterDescription(counters[c]); switch(desc.resultType) { case CompType::UInt: { if(desc.resultByteWidth == sizeof(uint32_t)) { uint32_t value = m_pAMDCounters->GetSampleUint32(sessionID, s, counters[c]); if(desc.unit == CounterUnit::Percentage) { value = RDCCLAMP(value, 0U, 100U); } ret.push_back(CounterResult(eventIDs[s], counters[c], value)); } else if(desc.resultByteWidth == sizeof(uint64_t)) { uint64_t value = m_pAMDCounters->GetSampleUint64(sessionID, s, counters[c]); if(desc.unit == CounterUnit::Percentage) { value = RDCCLAMP(value, 0ULL, 100ULL); } ret.push_back( CounterResult(eventIDs[s], counters[c], value)); } else { RDCERR("Unexpected byte width %u", desc.resultByteWidth); } } break; case CompType::Float: { float value = m_pAMDCounters->GetSampleFloat32(sessionID, s, counters[c]); if(desc.unit == CounterUnit::Percentage) { value = RDCCLAMP(value, 0.0f, 100.0f); } ret.push_back(CounterResult(eventIDs[s], counters[c], value)); } break; case CompType::Double: { double value = m_pAMDCounters->GetSampleFloat64(sessionID, s, counters[c]); if(desc.unit == CounterUnit::Percentage) { value = RDCCLAMP(value, 0.0, 100.0); } ret.push_back(CounterResult(eventIDs[s], counters[c], value)); } break; default: RDCASSERT(0); break; }; } } return ret; }
vector<CounterResult> D3D11DebugManager::FetchCounters(const vector<uint32_t> &counters) { vector<CounterResult> ret; if(counters.empty()) { RDCERR("No counters specified to FetchCounters"); return ret; } uint32_t counterID = counters[0]; RDCASSERT(counters.size() == 1); RDCASSERT(counterID == eCounter_EventGPUDuration); SCOPED_TIMER("Fetch Counters for %u", counterID); D3D11_QUERY_DESC disjointdesc = {D3D11_QUERY_TIMESTAMP_DISJOINT, 0}; ID3D11Query *disjoint = NULL; D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0}; ID3D11Query *start = NULL; HRESULT hr = S_OK; hr = m_pDevice->CreateQuery(&disjointdesc, &disjoint); if(FAILED(hr)) { RDCERR("Failed to create disjoint query %08x", hr); return ret; } hr = m_pDevice->CreateQuery(&qdesc, &start); if(FAILED(hr)) { RDCERR("Failed to create start query %08x", hr); return ret; } CounterContext ctx; for(int loop = 0; loop < 1; loop++) { { m_pImmediateContext->Begin(disjoint); m_pImmediateContext->End(start); ctx.eventStart = 0; ctx.reuseIdx = loop == 0 ? -1 : 0; FillTimers(ctx, m_WrappedContext->GetRootDraw()); m_pImmediateContext->End(disjoint); } { D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData; do { hr = m_pImmediateContext->GetData(disjoint, &disjointData, sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0); } while(hr == S_FALSE); RDCASSERTEQUAL(hr, S_OK); RDCASSERT(!disjointData.Disjoint); double ticksToSecs = double(disjointData.Frequency); UINT64 a = 0; hr = m_pImmediateContext->GetData(start, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); for(size_t i = 0; i < ctx.timers.size(); i++) { if(ctx.timers[i].before && ctx.timers[i].after) { hr = m_pImmediateContext->GetData(ctx.timers[i].before, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); UINT64 b = 0; hr = m_pImmediateContext->GetData(ctx.timers[i].after, &b, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); double duration = (double(b - a) / ticksToSecs); ret.push_back(CounterResult(ctx.timers[i].eventID, counterID, duration)); a = b; } else { ret.push_back(CounterResult(ctx.timers[i].eventID, counterID, 0.0)); } } } } for(size_t i = 0; i < ctx.timers.size(); i++) { SAFE_RELEASE(ctx.timers[i].before); SAFE_RELEASE(ctx.timers[i].after); } SAFE_RELEASE(disjoint); SAFE_RELEASE(start); return ret; }
vector<CounterResult> GLReplay::FetchCounters(const vector<uint32_t> &counters) { vector<CounterResult> ret; if(counters.empty()) { RDCERR("No counters specified to FetchCounters"); return ret; } MakeCurrentReplayContext(&m_ReplayCtx); GLCounterContext ctx; for(int loop = 0; loop < 1; loop++) { ctx.eventStart = 0; ctx.reuseIdx = loop == 0 ? -1 : 0; m_pDriver->SetFetchCounters(true); FillTimers(ctx, m_pDriver->GetRootDraw(), counters); m_pDriver->SetFetchCounters(false); double nanosToSecs = 1.0 / 1000000000.0; GLuint prevbind = 0; m_pDriver->glGetIntegerv(eGL_QUERY_BUFFER_BINDING, (GLint *)&prevbind); m_pDriver->glBindBuffer(eGL_QUERY_BUFFER, 0); for(size_t i = 0; i < ctx.queries.size(); i++) { for(uint32_t c = 0; c < counters.size(); c++) { if(ctx.queries[i].obj[counters[c]]) { GLuint64 data = 0; m_pDriver->glGetQueryObjectui64v(ctx.queries[i].obj[counters[c]], eGL_QUERY_RESULT, &data); double duration = double(data) * nanosToSecs; if(m_pDriver->glGetError()) { data = (uint64_t)-1; duration = -1; } if(counters[c] == eCounter_EventGPUDuration) { ret.push_back(CounterResult(ctx.queries[i].eventID, eCounter_EventGPUDuration, duration)); } else ret.push_back(CounterResult(ctx.queries[i].eventID, counters[c], data)); } else ret.push_back(CounterResult(ctx.queries[i].eventID, counters[c], (uint64_t)-1)); } } m_pDriver->glBindBuffer(eGL_QUERY_BUFFER, prevbind); } for(size_t i = 0; i < ctx.queries.size(); i++) for(uint32_t c = 0; c < counters.size(); c++) if(ctx.queries[i].obj[counters[c]]) m_pDriver->glDeleteQueries(1, &ctx.queries[i].obj[counters[c]]); return ret; }