void D3D11Replay::FillTimers(D3D11CounterContext &ctx, const DrawcallDescription &drawnode) { const D3D11_QUERY_DESC qtimedesc = {D3D11_QUERY_TIMESTAMP, 0}; const D3D11_QUERY_DESC qstatsdesc = {D3D11_QUERY_PIPELINE_STATISTICS, 0}; const D3D11_QUERY_DESC qoccldesc = {D3D11_QUERY_OCCLUSION, 0}; if(drawnode.children.empty()) return; for(size_t i = 0; i < drawnode.children.size(); i++) { const DrawcallDescription &d = drawnode.children[i]; FillTimers(ctx, drawnode.children[i]); if(d.events.empty()) continue; GPUTimer *timer = NULL; HRESULT hr = S_OK; { ctx.timers.push_back(GPUTimer()); timer = &ctx.timers.back(); timer->eventId = d.eventId; timer->before = timer->after = timer->stats = timer->occlusion = NULL; hr = m_pDevice->CreateQuery(&qtimedesc, &timer->before); RDCASSERTEQUAL(hr, S_OK); hr = m_pDevice->CreateQuery(&qtimedesc, &timer->after); RDCASSERTEQUAL(hr, S_OK); hr = m_pDevice->CreateQuery(&qstatsdesc, &timer->stats); RDCASSERTEQUAL(hr, S_OK); hr = m_pDevice->CreateQuery(&qoccldesc, &timer->occlusion); RDCASSERTEQUAL(hr, S_OK); } m_pDevice->ReplayLog(ctx.eventStart, d.eventId, eReplay_WithoutDraw); SerializeImmediateContext(); if(timer->stats) m_pImmediateContext->Begin(timer->stats); if(timer->occlusion) m_pImmediateContext->Begin(timer->occlusion); if(timer->before && timer->after) m_pImmediateContext->End(timer->before); m_pDevice->ReplayLog(ctx.eventStart, d.eventId, eReplay_OnlyDraw); if(timer->before && timer->after) m_pImmediateContext->End(timer->after); if(timer->occlusion) m_pImmediateContext->End(timer->occlusion); if(timer->stats) m_pImmediateContext->End(timer->stats); ctx.eventStart = d.eventId + 1; } }
void D3D11DebugManager::FillTimers(CounterContext &ctx, const DrawcallTreeNode &drawnode) { const D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0}; if(drawnode.children.empty()) return; for(size_t i = 0; i < drawnode.children.size(); i++) { const FetchDrawcall &d = drawnode.children[i].draw; FillTimers(ctx, drawnode.children[i]); if(d.events.count == 0) continue; GPUTimer *timer = NULL; HRESULT hr = S_OK; { if(ctx.reuseIdx == -1) { ctx.timers.push_back(GPUTimer()); timer = &ctx.timers.back(); timer->eventID = d.eventID; timer->before = timer->after = NULL; hr = m_pDevice->CreateQuery(&qdesc, &timer->before); RDCASSERTEQUAL(hr, S_OK); hr = m_pDevice->CreateQuery(&qdesc, &timer->after); RDCASSERTEQUAL(hr, S_OK); } else { timer = &ctx.timers[ctx.reuseIdx++]; } } m_WrappedDevice->ReplayLog(ctx.eventStart, d.eventID, eReplay_WithoutDraw); m_pImmediateContext->Flush(); if(timer->before && timer->after) { m_pImmediateContext->End(timer->before); m_WrappedDevice->ReplayLog(ctx.eventStart, d.eventID, eReplay_OnlyDraw); m_pImmediateContext->End(timer->after); } else { m_WrappedDevice->ReplayLog(ctx.eventStart, d.eventID, eReplay_OnlyDraw); } ctx.eventStart = d.eventID + 1; } }
vector<CounterResult> D3D11Replay::FetchCounters(const vector<GPUCounter> &counters) { vector<CounterResult> ret; if(counters.empty()) { RDCERR("No counters specified to FetchCounters"); return ret; } SCOPED_TIMER("Fetch Counters, counters to fetch %u", counters.size()); vector<GPUCounter> d3dCounters; std::copy_if(counters.begin(), counters.end(), std::back_inserter(d3dCounters), [](const GPUCounter &c) { return !IsAMDCounter(c); }); if(m_pAMDCounters) { // Filter out the AMD counters vector<GPUCounter> amdCounters; std::copy_if(counters.begin(), counters.end(), std::back_inserter(amdCounters), [](const GPUCounter &c) { return IsAMDCounter(c); }); if(!amdCounters.empty()) { ret = FetchCountersAMD(amdCounters); } } if(d3dCounters.empty()) { return ret; } D3D11_QUERY_DESC disjointdesc = {D3D11_QUERY_TIMESTAMP_DISJOINT, 0}; ID3D11Query *disjoint = NULL; D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0}; ID3D11Query *start = NULL; HRESULT hr = S_OK; hr = m_pDevice->CreateQuery(&disjointdesc, &disjoint); if(FAILED(hr)) { RDCERR("Failed to create disjoint query HRESULT: %s", ToStr(hr).c_str()); return ret; } hr = m_pDevice->CreateQuery(&qdesc, &start); if(FAILED(hr)) { RDCERR("Failed to create start query HRESULT: %s", ToStr(hr).c_str()); return ret; } D3D11CounterContext ctx; { { m_pImmediateContext->Begin(disjoint); m_pImmediateContext->End(start); ctx.eventStart = 0; FillTimers(ctx, m_pImmediateContext->GetRootDraw()); m_pImmediateContext->End(disjoint); } { D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData; do { hr = m_pImmediateContext->GetData(disjoint, &disjointData, sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0); } while(hr == S_FALSE); RDCASSERTEQUAL(hr, S_OK); RDCASSERT(!disjointData.Disjoint); double ticksToSecs = double(disjointData.Frequency); UINT64 a = 0; hr = m_pImmediateContext->GetData(start, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); for(size_t i = 0; i < ctx.timers.size(); i++) { if(ctx.timers[i].before && ctx.timers[i].after && ctx.timers[i].stats && ctx.timers[i].occlusion) { hr = m_pImmediateContext->GetData(ctx.timers[i].before, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); UINT64 b = 0; hr = m_pImmediateContext->GetData(ctx.timers[i].after, &b, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); double duration = (double(b - a) / ticksToSecs); a = b; D3D11_QUERY_DATA_PIPELINE_STATISTICS pipelineStats; hr = m_pImmediateContext->GetData(ctx.timers[i].stats, &pipelineStats, sizeof(D3D11_QUERY_DATA_PIPELINE_STATISTICS), 0); RDCASSERTEQUAL(hr, S_OK); UINT64 occlusion = 0; hr = m_pImmediateContext->GetData(ctx.timers[i].occlusion, &occlusion, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); for(size_t c = 0; c < d3dCounters.size(); c++) { switch(d3dCounters[c]) { case GPUCounter::EventGPUDuration: ret.push_back( CounterResult(ctx.timers[i].eventId, GPUCounter::EventGPUDuration, duration)); break; case GPUCounter::InputVerticesRead: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::InputVerticesRead, pipelineStats.IAVertices)); break; case GPUCounter::IAPrimitives: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::IAPrimitives, pipelineStats.IAPrimitives)); break; case GPUCounter::VSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::VSInvocations, pipelineStats.VSInvocations)); break; case GPUCounter::GSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::GSInvocations, pipelineStats.GSInvocations)); break; case GPUCounter::GSPrimitives: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::GSPrimitives, pipelineStats.GSPrimitives)); break; case GPUCounter::RasterizerInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::RasterizerInvocations, pipelineStats.CInvocations)); break; case GPUCounter::RasterizedPrimitives: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::RasterizedPrimitives, pipelineStats.CPrimitives)); break; case GPUCounter::PSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::PSInvocations, pipelineStats.PSInvocations)); break; case GPUCounter::HSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::HSInvocations, pipelineStats.HSInvocations)); break; case GPUCounter::DSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::DSInvocations, pipelineStats.DSInvocations)); break; case GPUCounter::CSInvocations: ret.push_back(CounterResult(ctx.timers[i].eventId, GPUCounter::CSInvocations, pipelineStats.CSInvocations)); break; case GPUCounter::SamplesWritten: ret.push_back( CounterResult(ctx.timers[i].eventId, GPUCounter::SamplesWritten, occlusion)); break; } } } else { for(size_t c = 0; c < d3dCounters.size(); c++) { switch(d3dCounters[c]) { case GPUCounter::EventGPUDuration: ret.push_back( CounterResult(ctx.timers[i].eventId, GPUCounter::EventGPUDuration, -1.0)); break; case GPUCounter::InputVerticesRead: case GPUCounter::IAPrimitives: case GPUCounter::GSPrimitives: case GPUCounter::RasterizerInvocations: case GPUCounter::RasterizedPrimitives: case GPUCounter::VSInvocations: case GPUCounter::HSInvocations: case GPUCounter::DSInvocations: case GPUCounter::GSInvocations: case GPUCounter::PSInvocations: case GPUCounter::CSInvocations: case GPUCounter::SamplesWritten: ret.push_back( CounterResult(ctx.timers[i].eventId, d3dCounters[c], 0xFFFFFFFFFFFFFFFF)); break; } } } } } } for(size_t i = 0; i < ctx.timers.size(); i++) { SAFE_RELEASE(ctx.timers[i].before); SAFE_RELEASE(ctx.timers[i].after); SAFE_RELEASE(ctx.timers[i].stats); SAFE_RELEASE(ctx.timers[i].occlusion); } SAFE_RELEASE(disjoint); SAFE_RELEASE(start); return ret; }
vector<CounterResult> D3D11DebugManager::FetchCounters(const vector<uint32_t> &counters) { vector<CounterResult> ret; if(counters.empty()) { RDCERR("No counters specified to FetchCounters"); return ret; } uint32_t counterID = counters[0]; RDCASSERT(counters.size() == 1); RDCASSERT(counterID == eCounter_EventGPUDuration); SCOPED_TIMER("Fetch Counters for %u", counterID); D3D11_QUERY_DESC disjointdesc = {D3D11_QUERY_TIMESTAMP_DISJOINT, 0}; ID3D11Query *disjoint = NULL; D3D11_QUERY_DESC qdesc = {D3D11_QUERY_TIMESTAMP, 0}; ID3D11Query *start = NULL; HRESULT hr = S_OK; hr = m_pDevice->CreateQuery(&disjointdesc, &disjoint); if(FAILED(hr)) { RDCERR("Failed to create disjoint query %08x", hr); return ret; } hr = m_pDevice->CreateQuery(&qdesc, &start); if(FAILED(hr)) { RDCERR("Failed to create start query %08x", hr); return ret; } CounterContext ctx; for(int loop = 0; loop < 1; loop++) { { m_pImmediateContext->Begin(disjoint); m_pImmediateContext->End(start); ctx.eventStart = 0; ctx.reuseIdx = loop == 0 ? -1 : 0; FillTimers(ctx, m_WrappedContext->GetRootDraw()); m_pImmediateContext->End(disjoint); } { D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData; do { hr = m_pImmediateContext->GetData(disjoint, &disjointData, sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0); } while(hr == S_FALSE); RDCASSERTEQUAL(hr, S_OK); RDCASSERT(!disjointData.Disjoint); double ticksToSecs = double(disjointData.Frequency); UINT64 a = 0; hr = m_pImmediateContext->GetData(start, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); for(size_t i = 0; i < ctx.timers.size(); i++) { if(ctx.timers[i].before && ctx.timers[i].after) { hr = m_pImmediateContext->GetData(ctx.timers[i].before, &a, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); UINT64 b = 0; hr = m_pImmediateContext->GetData(ctx.timers[i].after, &b, sizeof(UINT64), 0); RDCASSERTEQUAL(hr, S_OK); double duration = (double(b - a) / ticksToSecs); ret.push_back(CounterResult(ctx.timers[i].eventID, counterID, duration)); a = b; } else { ret.push_back(CounterResult(ctx.timers[i].eventID, counterID, 0.0)); } } } } for(size_t i = 0; i < ctx.timers.size(); i++) { SAFE_RELEASE(ctx.timers[i].before); SAFE_RELEASE(ctx.timers[i].after); } SAFE_RELEASE(disjoint); SAFE_RELEASE(start); return ret; }
vector<CounterResult> GLReplay::FetchCounters(const vector<uint32_t> &counters) { vector<CounterResult> ret; if(counters.empty()) { RDCERR("No counters specified to FetchCounters"); return ret; } MakeCurrentReplayContext(&m_ReplayCtx); GLCounterContext ctx; for(int loop = 0; loop < 1; loop++) { ctx.eventStart = 0; ctx.reuseIdx = loop == 0 ? -1 : 0; m_pDriver->SetFetchCounters(true); FillTimers(ctx, m_pDriver->GetRootDraw(), counters); m_pDriver->SetFetchCounters(false); double nanosToSecs = 1.0 / 1000000000.0; GLuint prevbind = 0; m_pDriver->glGetIntegerv(eGL_QUERY_BUFFER_BINDING, (GLint *)&prevbind); m_pDriver->glBindBuffer(eGL_QUERY_BUFFER, 0); for(size_t i = 0; i < ctx.queries.size(); i++) { for(uint32_t c = 0; c < counters.size(); c++) { if(ctx.queries[i].obj[counters[c]]) { GLuint64 data = 0; m_pDriver->glGetQueryObjectui64v(ctx.queries[i].obj[counters[c]], eGL_QUERY_RESULT, &data); double duration = double(data) * nanosToSecs; if(m_pDriver->glGetError()) { data = (uint64_t)-1; duration = -1; } if(counters[c] == eCounter_EventGPUDuration) { ret.push_back(CounterResult(ctx.queries[i].eventID, eCounter_EventGPUDuration, duration)); } else ret.push_back(CounterResult(ctx.queries[i].eventID, counters[c], data)); } else ret.push_back(CounterResult(ctx.queries[i].eventID, counters[c], (uint64_t)-1)); } } m_pDriver->glBindBuffer(eGL_QUERY_BUFFER, prevbind); } for(size_t i = 0; i < ctx.queries.size(); i++) for(uint32_t c = 0; c < counters.size(); c++) if(ctx.queries[i].obj[counters[c]]) m_pDriver->glDeleteQueries(1, &ctx.queries[i].obj[counters[c]]); return ret; }
void GLReplay::FillTimers(GLCounterContext &ctx, const DrawcallTreeNode &drawnode, const vector<uint32_t> &counters) { if(drawnode.children.empty()) return; for(size_t i = 0; i < drawnode.children.size(); i++) { const FetchDrawcall &d = drawnode.children[i].draw; FillTimers(ctx, drawnode.children[i], counters); if(d.events.count == 0) continue; GPUQueries *queries = NULL; { if(ctx.reuseIdx == -1) { ctx.queries.push_back(GPUQueries()); queries = &ctx.queries.back(); queries->eventID = d.eventID; for(uint32_t q = 0; q < eCounter_GLMaxCounters; q++) queries->obj[q] = 0; for(uint32_t c = 0; c < counters.size(); c++) { m_pDriver->glGenQueries(1, &queries->obj[counters[c]]); if(m_pDriver->glGetError()) queries->obj[counters[c]] = 0; } } else { queries = &ctx.queries[ctx.reuseIdx++]; } } m_pDriver->ReplayLog(ctx.eventStart, d.eventID, eReplay_WithoutDraw); // Reverse order so that Timer counter is queried the last. for(int32_t q = (eCounter_GLMaxCounters - 1); q >= 0; q--) if(queries->obj[q]) { m_pDriver->glBeginQuery(glCounters[q], queries->obj[q]); if(m_pDriver->glGetError()) { m_pDriver->glDeleteQueries(1, &queries->obj[q]); queries->obj[q] = 0; } } m_pDriver->ReplayLog(ctx.eventStart, d.eventID, eReplay_OnlyDraw); for(uint32_t q = 0; q < eCounter_GLMaxCounters; q++) if(queries->obj[q]) m_pDriver->glEndQuery(glCounters[q]); ctx.eventStart = d.eventID + 1; } }