std::string CProfiler2::ThreadStorage::GetBuffer() { // Called from an arbitrary thread (not the one writing to the buffer). // // See comments on m_BufferPos0 etc. shared_ptr<u8> buffer(new u8[BUFFER_SIZE], ArrayDeleter()); u32 pos1 = m_BufferPos1; COMPILER_FENCE; // must read m_BufferPos1 before m_Buffer memcpy(buffer.get(), m_Buffer, BUFFER_SIZE); COMPILER_FENCE; // must read m_BufferPos0 after m_Buffer u32 pos0 = m_BufferPos0; // The range [pos1, pos0) modulo BUFFER_SIZE is invalid, so concatenate the rest of the buffer if (pos1 <= pos0) // invalid range is in the middle of the buffer return std::string(buffer.get()+pos0, buffer.get()+BUFFER_SIZE) + std::string(buffer.get(), buffer.get()+pos1); else // invalid wrap is wrapped around the end/start buffer return std::string(buffer.get()+pos0, buffer.get()+pos1); }
void ProcessFrames() { while (!m_Frames.empty()) { SFrame& frame = m_Frames.front(); // Queries don't become available in order, so check them all before // trying to read the results from any for (size_t j = 0; j < m_QueryTypes.size(); ++j) { size_t size = m_QueryTypes[j].counterBufferSize; shared_ptr<char> buf(new char[size], ArrayDeleter()); for (size_t i = 0; i < frame.events.size(); ++i) { if (!frame.events[i].isEnter) continue; GLuint length = 0; pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_NONBLOCK, size, buf.get(), &length); ogl_WarnIfError(); if (length == 0) return; } } double lastTime = frame.timeStart; std::stack<double> endTimes; m_Storage.RecordFrameStart(frame.timeStart); for (size_t i = 0; i < frame.events.size(); ++i) { if (frame.events[i].isEnter) { m_Storage.Record(CProfiler2::ITEM_ENTER, lastTime, frame.events[i].id); if (i == 0) m_Storage.RecordAttributePrintf("%u", frame.num); double elapsed = 0.0; for (size_t j = 0; j < m_QueryTypes.size(); ++j) { GLuint length; char* buf = new char[m_QueryTypes[j].counterBufferSize]; pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_BLOCK, m_QueryTypes[j].counterBufferSize, buf, &length); ogl_WarnIfError(); ENSURE(length == m_QueryTypes[j].counterBufferSize); m_Storage.RecordAttributePrintf("-- %s --", m_QueryTypes[j].name.c_str()); for (size_t k = 0; k < m_QueryTypes[j].counters.size(); ++k) { SPerfCounter& counter = m_QueryTypes[j].counters[k]; if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT) { ENSURE(counter.size == 4); GLuint value; memcpy(&value, buf + counter.offset, counter.size); m_Storage.RecordAttributePrintf("%s: %u", counter.name.c_str(), value); } else if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64) { ENSURE(counter.size == 8); GLuint64 value; memcpy(&value, buf + counter.offset, counter.size); m_Storage.RecordAttributePrintf("%s: %.0f", counter.name.c_str(), (double)value); if (counter.name == "TotalTime") elapsed = (double)value / 1e6; } else if (counter.type == INTEL_PERFQUERIES_TYPE_FLOAT) { ENSURE(counter.size == 4); GLfloat value; memcpy(&value, buf + counter.offset, counter.size); m_Storage.RecordAttributePrintf("%s: %f", counter.name.c_str(), value); } else if (counter.type == INTEL_PERFQUERIES_TYPE_BOOL) { ENSURE(counter.size == 4); GLuint value; memcpy(&value, buf + counter.offset, counter.size); ENSURE(value == 0 || value == 1); m_Storage.RecordAttributePrintf("%s: %u", counter.name.c_str(), value); } else { debug_warn(L"unrecognised Intel performance counter type"); } } delete[] buf; } endTimes.push(lastTime + elapsed); } else { lastTime = endTimes.top(); endTimes.pop(); m_Storage.Record(CProfiler2::ITEM_LEAVE, lastTime, frame.events[i].id); } } PopFrontFrame(); } }