void mozilla_sampler_shutdown() { sInitCount--; if (sInitCount > 0) return; // Save the profile on shutdown if requested. GeckoSampler *t = tlsTicker.get(); if (t) { const char *val = getenv("MOZ_PROFILER_SHUTDOWN"); if (val) { std::ofstream stream; stream.open(val); if (stream.is_open()) { t->ToStreamAsJSON(stream); stream.close(); } } } profiler_stop(); #ifndef SPS_STANDALONE set_stderr_callback(nullptr); #endif Sampler::Shutdown(); #ifdef SPS_STANDALONE mozilla::TimeStamp::Shutdown(); #endif PseudoStack *stack = tlsPseudoStack.get(); stack->deref(); tlsPseudoStack.set(nullptr); #ifdef MOZ_TASK_TRACER mozilla::tasktracer::ShutdownTaskTracer(); #endif }
static void mergeNativeBacktrace(ThreadProfile &aProfile, const PCArray &array) { aProfile.addTag(ProfileEntry('s', "(root)")); PseudoStack* stack = aProfile.GetPseudoStack(); uint32_t pseudoStackPos = 0; /* We have two stacks, the native C stack we extracted from unwinding, * and the pseudostack we managed during execution. We want to consolidate * the two in order. We do so by merging using the approximate stack address * when each entry was push. When pushing JS entry we may not now the stack * address in which case we have a NULL stack address in which case we assume * that it follows immediatly the previous element. * * C Stack | Address -- Pseudo Stack | Address * main() | 0x100 run_js() | 0x40 * start() | 0x80 jsCanvas() | NULL * timer() | 0x50 drawLine() | NULL * azure() | 0x10 * * Merged: main(), start(), timer(), run_js(), jsCanvas(), drawLine(), azure() */ // i is the index in C stack starting at main and decreasing // pseudoStackPos is the position in the Pseudo stack starting // at the first frame (run_js in the example) and increasing. for (size_t i = array.count; i > 0; --i) { while (pseudoStackPos < stack->stackSize()) { volatile StackEntry& entry = stack->mStack[pseudoStackPos]; if (entry.stackAddress() < array.sp_array[i-1] && entry.stackAddress()) break; addProfileEntry(entry, aProfile, stack, array.array[0]); pseudoStackPos++; } aProfile.addTag(ProfileEntry('l', (void*)array.array[i-1])); } }
// Fill the output buffer with the following pattern: // "Lable 1" "\0" "Label 2" "\0" ... "Label N" "\0" "\0" // TODO: use the unwinder instead of pseudo stack. void mozilla_sampler_get_backtrace_noalloc(char *output, size_t outputSize) { MOZ_ASSERT(outputSize >= 2); char *bound = output + outputSize - 2; output[0] = output[1] = '\0'; PseudoStack *pseudoStack = tlsPseudoStack.get(); if (!pseudoStack) { return; } volatile StackEntry *pseudoFrames = pseudoStack->mStack; uint32_t pseudoCount = pseudoStack->stackSize(); for (uint32_t i = 0; i < pseudoCount; i++) { size_t len = strlen(pseudoFrames[i].label()); if (output + len >= bound) break; strcpy(output, pseudoFrames[i].label()); output += len; *output++ = '\0'; *output = '\0'; } }
void TableTicker::InplaceTick(TickSample* sample) { ThreadProfile& currThreadProfile = *sample->threadProfile; PseudoStack* stack = currThreadProfile.GetPseudoStack(); bool recordSample = true; /* Don't process the PeudoStack's markers or honour jankOnly if we're immediately sampling the current thread. */ if (!sample->isSamplingCurrentThread) { // Marker(s) come before the sample ProfilerMarkerLinkedList* pendingMarkersList = stack->getPendingMarkers(); while (pendingMarkersList && pendingMarkersList->peek()) { ProfilerMarker* marker = pendingMarkersList->popHead(); stack->addStoredMarker(marker); currThreadProfile.addTag(ProfileEntry('m', marker)); } stack->updateGeneration(currThreadProfile.GetGenerationID()); if (mJankOnly) { // if we are on a different event we can discard any temporary samples // we've kept around if (sLastSampledEventGeneration != sCurrentEventGeneration) { // XXX: we also probably want to add an entry to the profile to help // distinguish which samples are part of the same event. That, or record // the event generation in each sample currThreadProfile.erase(); } sLastSampledEventGeneration = sCurrentEventGeneration; recordSample = false; // only record the events when we have a we haven't seen a tracer event for 100ms if (!sLastTracerEvent.IsNull()) { TimeDuration delta = sample->timestamp - sLastTracerEvent; if (delta.ToMilliseconds() > 100.0) { recordSample = true; } } } } #if defined(USE_NS_STACKWALK) || defined(USE_EHABI_STACKWALK) if (mUseStackWalk) { doNativeBacktrace(currThreadProfile, sample); } else { doSampleStackTrace(stack, currThreadProfile, mAddLeafAddresses ? sample : nullptr); } #else doSampleStackTrace(stack, currThreadProfile, mAddLeafAddresses ? sample : nullptr); #endif if (recordSample) currThreadProfile.flush(); if (!sLastTracerEvent.IsNull() && sample && currThreadProfile.IsMainThread()) { TimeDuration delta = sample->timestamp - sLastTracerEvent; currThreadProfile.addTag(ProfileEntry('r', delta.ToMilliseconds())); } if (sample) { TimeDuration delta = sample->timestamp - sStartTime; currThreadProfile.addTag(ProfileEntry('t', delta.ToMilliseconds())); } if (sLastFrameNumber != sFrameNumber) { currThreadProfile.addTag(ProfileEntry('f', sFrameNumber)); sLastFrameNumber = sFrameNumber; } }
void TableTicker::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample) { void *pc_array[1000]; void *sp_array[1000]; PCArray array = { pc_array, sp_array, mozilla::ArrayLength(pc_array), 0 }; const mcontext_t *mcontext = &reinterpret_cast<ucontext_t *>(aSample->context)->uc_mcontext; mcontext_t savedContext; PseudoStack *pseudoStack = aProfile.GetPseudoStack(); array.count = 0; // The pseudostack contains an "EnterJIT" frame whenever we enter // JIT code with profiling enabled; the stack pointer value points // the saved registers. We use this to unwind resume unwinding // after encounting JIT code. for (uint32_t i = pseudoStack->stackSize(); i > 0; --i) { // The pseudostack grows towards higher indices, so we iterate // backwards (from callee to caller). volatile StackEntry &entry = pseudoStack->mStack[i - 1]; if (!entry.js() && strcmp(entry.label(), "EnterJIT") == 0) { // Found JIT entry frame. Unwind up to that point (i.e., force // the stack walk to stop before the block of saved registers; // note that it yields nondecreasing stack pointers), then restore // the saved state. uint32_t *vSP = reinterpret_cast<uint32_t*>(entry.stackAddress()); array.count += EHABIStackWalk(*mcontext, /* stackBase = */ vSP, sp_array + array.count, pc_array + array.count, array.size - array.count); memset(&savedContext, 0, sizeof(savedContext)); // See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp savedContext.arm_r4 = *vSP++; savedContext.arm_r5 = *vSP++; savedContext.arm_r6 = *vSP++; savedContext.arm_r7 = *vSP++; savedContext.arm_r8 = *vSP++; savedContext.arm_r9 = *vSP++; savedContext.arm_r10 = *vSP++; savedContext.arm_fp = *vSP++; savedContext.arm_lr = *vSP++; savedContext.arm_sp = reinterpret_cast<uint32_t>(vSP); savedContext.arm_pc = savedContext.arm_lr; mcontext = &savedContext; } } // Now unwind whatever's left (starting from either the last EnterJIT // frame or, if no EnterJIT was found, the original registers). array.count += EHABIStackWalk(*mcontext, aProfile.GetStackTop(), sp_array + array.count, pc_array + array.count, array.size - array.count); mergeNativeBacktrace(aProfile, array); }
static void mergeStacksIntoProfile(ThreadProfile& aProfile, TickSample* aSample, NativeStack& aNativeStack) { PseudoStack* pseudoStack = aProfile.GetPseudoStack(); volatile StackEntry *pseudoFrames = pseudoStack->mStack; uint32_t pseudoCount = pseudoStack->stackSize(); // Make a copy of the JS stack into a JSFrame array. This is necessary since, // like the native stack, the JS stack is iterated youngest-to-oldest and we // need to iterate oldest-to-youngest when adding entries to aProfile. // Synchronous sampling reports an invalid buffer generation to // ProfilingFrameIterator to avoid incorrectly resetting the generation of // sampled JIT entries inside the JS engine. See note below concerning 'J' // entries. uint32_t startBufferGen; if (aSample->isSamplingCurrentThread) { startBufferGen = UINT32_MAX; } else { startBufferGen = aProfile.bufferGeneration(); } uint32_t jsCount = 0; #ifndef SPS_STANDALONE JS::ProfilingFrameIterator::Frame jsFrames[1000]; // Only walk jit stack if profiling frame iterator is turned on. if (pseudoStack->mRuntime && JS::IsProfilingEnabledForRuntime(pseudoStack->mRuntime)) { AutoWalkJSStack autoWalkJSStack; const uint32_t maxFrames = mozilla::ArrayLength(jsFrames); if (aSample && autoWalkJSStack.walkAllowed) { JS::ProfilingFrameIterator::RegisterState registerState; registerState.pc = aSample->pc; registerState.sp = aSample->sp; #ifdef ENABLE_ARM_LR_SAVING registerState.lr = aSample->lr; #endif JS::ProfilingFrameIterator jsIter(pseudoStack->mRuntime, registerState, startBufferGen); for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) { // See note below regarding 'J' entries. if (aSample->isSamplingCurrentThread || jsIter.isAsmJS()) { uint32_t extracted = jsIter.extractStack(jsFrames, jsCount, maxFrames); jsCount += extracted; if (jsCount == maxFrames) break; } else { mozilla::Maybe<JS::ProfilingFrameIterator::Frame> frame = jsIter.getPhysicalFrameWithoutLabel(); if (frame.isSome()) jsFrames[jsCount++] = frame.value(); } } } } #endif // Start the sample with a root entry. aProfile.addTag(ProfileEntry('s', "(root)")); // While the pseudo-stack array is ordered oldest-to-youngest, the JS and // native arrays are ordered youngest-to-oldest. We must add frames to // aProfile oldest-to-youngest. Thus, iterate over the pseudo-stack forwards // and JS and native arrays backwards. Note: this means the terminating // condition jsIndex and nativeIndex is being < 0. uint32_t pseudoIndex = 0; int32_t jsIndex = jsCount - 1; int32_t nativeIndex = aNativeStack.count - 1; uint8_t *lastPseudoCppStackAddr = nullptr; // Iterate as long as there is at least one frame remaining. while (pseudoIndex != pseudoCount || jsIndex >= 0 || nativeIndex >= 0) { // There are 1 to 3 frames available. Find and add the oldest. uint8_t *pseudoStackAddr = nullptr; uint8_t *jsStackAddr = nullptr; uint8_t *nativeStackAddr = nullptr; if (pseudoIndex != pseudoCount) { volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex]; if (pseudoFrame.isCpp()) lastPseudoCppStackAddr = (uint8_t *) pseudoFrame.stackAddress(); #ifndef SPS_STANDALONE // Skip any pseudo-stack JS frames which are marked isOSR // Pseudostack frames are marked isOSR when the JS interpreter // enters a jit frame on a loop edge (via on-stack-replacement, // or OSR). To avoid both the pseudoframe and jit frame being // recorded (and showing up twice), the interpreter marks the // interpreter pseudostack entry with the OSR flag to ensure that // it doesn't get counted. if (pseudoFrame.isJs() && pseudoFrame.isOSR()) { pseudoIndex++; continue; } #endif MOZ_ASSERT(lastPseudoCppStackAddr); pseudoStackAddr = lastPseudoCppStackAddr; } #ifndef SPS_STANDALONE if (jsIndex >= 0) jsStackAddr = (uint8_t *) jsFrames[jsIndex].stackAddress; #endif if (nativeIndex >= 0) nativeStackAddr = (uint8_t *) aNativeStack.sp_array[nativeIndex]; // If there's a native stack entry which has the same SP as a // pseudo stack entry, pretend we didn't see the native stack // entry. Ditto for a native stack entry which has the same SP as // a JS stack entry. In effect this means pseudo or JS entries // trump conflicting native entries. if (nativeStackAddr && (pseudoStackAddr == nativeStackAddr || jsStackAddr == nativeStackAddr)) { nativeStackAddr = nullptr; nativeIndex--; MOZ_ASSERT(pseudoStackAddr || jsStackAddr); } // Sanity checks. MOZ_ASSERT_IF(pseudoStackAddr, pseudoStackAddr != jsStackAddr && pseudoStackAddr != nativeStackAddr); MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != pseudoStackAddr && jsStackAddr != nativeStackAddr); MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != pseudoStackAddr && nativeStackAddr != jsStackAddr); // Check to see if pseudoStack frame is top-most. if (pseudoStackAddr > jsStackAddr && pseudoStackAddr > nativeStackAddr) { MOZ_ASSERT(pseudoIndex < pseudoCount); volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex]; addPseudoEntry(pseudoFrame, aProfile, pseudoStack, nullptr); pseudoIndex++; continue; } #ifndef SPS_STANDALONE // Check to see if JS jit stack frame is top-most if (jsStackAddr > nativeStackAddr) { MOZ_ASSERT(jsIndex >= 0); const JS::ProfilingFrameIterator::Frame& jsFrame = jsFrames[jsIndex]; // Stringifying non-asm.js JIT frames is delayed until streaming // time. To re-lookup the entry in the JitcodeGlobalTable, we need to // store the JIT code address ('J') in the circular buffer. // // Note that we cannot do this when we are sychronously sampling the // current thread; that is, when called from profiler_get_backtrace. The // captured backtrace is usually externally stored for an indeterminate // amount of time, such as in nsRefreshDriver. Problematically, the // stored backtrace may be alive across a GC during which the profiler // itself is disabled. In that case, the JS engine is free to discard // its JIT code. This means that if we inserted such 'J' entries into // the buffer, nsRefreshDriver would now be holding on to a backtrace // with stale JIT code return addresses. if (aSample->isSamplingCurrentThread || jsFrame.kind == JS::ProfilingFrameIterator::Frame_AsmJS) { addDynamicTag(aProfile, 'c', jsFrame.label); } else { MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion || jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline); aProfile.addTag(ProfileEntry('J', jsFrames[jsIndex].returnAddress)); } jsIndex--; continue; } #endif // If we reach here, there must be a native stack entry and it must be the // greatest entry. if (nativeStackAddr) { MOZ_ASSERT(nativeIndex >= 0); aProfile .addTag(ProfileEntry('l', (void*)aNativeStack.pc_array[nativeIndex])); } if (nativeIndex >= 0) { nativeIndex--; } } #ifndef SPS_STANDALONE // Update the JS runtime with the current profile sample buffer generation. // // Do not do this for synchronous sampling, which create their own // ProfileBuffers. if (!aSample->isSamplingCurrentThread && pseudoStack->mRuntime) { MOZ_ASSERT(aProfile.bufferGeneration() >= startBufferGen); uint32_t lapCount = aProfile.bufferGeneration() - startBufferGen; JS::UpdateJSRuntimeProfilerSampleBufferGen(pseudoStack->mRuntime, aProfile.bufferGeneration(), lapCount); } #endif }
void GeckoSampler::InplaceTick(TickSample* sample) { ThreadProfile& currThreadProfile = *sample->threadProfile; currThreadProfile.addTag(ProfileEntry('T', currThreadProfile.ThreadId())); if (sample) { mozilla::TimeDuration delta = sample->timestamp - sStartTime; currThreadProfile.addTag(ProfileEntry('t', delta.ToMilliseconds())); } PseudoStack* stack = currThreadProfile.GetPseudoStack(); #if defined(USE_NS_STACKWALK) || defined(USE_EHABI_STACKWALK) || \ defined(USE_LUL_STACKWALK) if (mUseStackWalk) { doNativeBacktrace(currThreadProfile, sample); } else { doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses); } #else doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses); #endif // Don't process the PeudoStack's markers if we're // synchronously sampling the current thread. if (!sample->isSamplingCurrentThread) { ProfilerMarkerLinkedList* pendingMarkersList = stack->getPendingMarkers(); while (pendingMarkersList && pendingMarkersList->peek()) { ProfilerMarker* marker = pendingMarkersList->popHead(); currThreadProfile.addStoredMarker(marker); currThreadProfile.addTag(ProfileEntry('m', marker)); } } #ifndef SPS_STANDALONE if (sample && currThreadProfile.GetThreadResponsiveness()->HasData()) { mozilla::TimeDuration delta = currThreadProfile.GetThreadResponsiveness()->GetUnresponsiveDuration(sample->timestamp); currThreadProfile.addTag(ProfileEntry('r', delta.ToMilliseconds())); } #endif // rssMemory is equal to 0 when we are not recording. if (sample && sample->rssMemory != 0) { currThreadProfile.addTag(ProfileEntry('R', static_cast<double>(sample->rssMemory))); } // ussMemory is equal to 0 when we are not recording. if (sample && sample->ussMemory != 0) { currThreadProfile.addTag(ProfileEntry('U', static_cast<double>(sample->ussMemory))); } #if defined(XP_WIN) if (mProfilePower) { mIntelPowerGadget->TakeSample(); currThreadProfile.addTag(ProfileEntry('p', static_cast<double>(mIntelPowerGadget->GetTotalPackagePowerInWatts()))); } #endif if (sLastFrameNumber != sFrameNumber) { currThreadProfile.addTag(ProfileEntry('f', sFrameNumber)); sLastFrameNumber = sFrameNumber; } }
// RUNS IN SIGHANDLER CONTEXT void TableTicker::UnwinderTick(TickSample* sample) { if (!sample->threadProfile) { // Platform doesn't support multithread, so use the main thread profile we created sample->threadProfile = GetPrimaryThreadProfile(); } ThreadProfile& currThreadProfile = *sample->threadProfile; /* Get hold of an empty inter-thread buffer into which to park the ProfileEntries for this sample. */ UnwinderThreadBuffer* utb = uwt__acquire_empty_buffer(); /* This could fail, if no buffers are currently available, in which case we must give up right away. We cannot wait for a buffer to become available, as that risks deadlock. */ if (!utb) return; /* Manufacture the ProfileEntries that we will give to the unwinder thread, and park them in |utb|. */ // Marker(s) come before the sample PseudoStack* stack = currThreadProfile.GetPseudoStack(); for (int i = 0; stack->getMarker(i) != NULL; i++) { utb__addEntry( utb, ProfileEntry('m', stack->getMarker(i)) ); } stack->mQueueClearMarker = true; bool recordSample = true; if (mJankOnly) { // if we are on a different event we can discard any temporary samples // we've kept around if (sLastSampledEventGeneration != sCurrentEventGeneration) { // XXX: we also probably want to add an entry to the profile to help // distinguish which samples are part of the same event. That, or record // the event generation in each sample currThreadProfile.erase(); } sLastSampledEventGeneration = sCurrentEventGeneration; recordSample = false; // only record the events when we have a we haven't seen a tracer // event for 100ms if (!sLastTracerEvent.IsNull()) { TimeDuration delta = sample->timestamp - sLastTracerEvent; if (delta.ToMilliseconds() > 100.0) { recordSample = true; } } } // JRS 2012-Sept-27: this logic used to involve mUseStackWalk. // That should be reinstated, but for the moment, use the // settings in sUnwindMode and sUnwindInterval. // Add a native-backtrace request, or add pseudo backtrace entries, // or both. switch (sUnwindMode) { case UnwNATIVE: /* Native only */ // add a "do native stack trace now" hint. This will be actioned // by the unwinder thread as it processes the entries in this // sample. utb__addEntry( utb, ProfileEntry('h'/*hint*/, 'N'/*native-trace*/) ); break; case UnwPSEUDO: /* Pseudo only */ /* Add into |utb|, the pseudo backtrace entries */ genPseudoBacktraceEntries(utb, stack, sample); break; case UnwCOMBINED: /* Both Native and Pseudo */ utb__addEntry( utb, ProfileEntry('h'/*hint*/, 'N'/*native-trace*/) ); genPseudoBacktraceEntries(utb, stack, sample); break; case UnwINVALID: default: MOZ_CRASH(); } if (recordSample) { // add a "flush now" hint utb__addEntry( utb, ProfileEntry('h'/*hint*/, 'F'/*flush*/) ); } // Add any extras if (!sLastTracerEvent.IsNull() && sample) { TimeDuration delta = sample->timestamp - sLastTracerEvent; utb__addEntry( utb, ProfileEntry('r', delta.ToMilliseconds()) ); } if (sample) { TimeDuration delta = sample->timestamp - sStartTime; utb__addEntry( utb, ProfileEntry('t', delta.ToMilliseconds()) ); } if (sLastFrameNumber != sFrameNumber) { utb__addEntry( utb, ProfileEntry('f', sFrameNumber) ); sLastFrameNumber = sFrameNumber; } /* So now we have, in |utb|, the complete set of entries we want to push into the circular buffer. This may also include a 'h' 'F' entry, which is "flush now" hint, and/or a 'h' 'N' entry, which is a "generate a native backtrace and add it to the buffer right now" hint. Hand them off to the helper thread, together with stack and register context needed to do a native unwind, if that is currently enabled. */ /* If a native unwind has been requested, we'll start it off using the context obtained from the signal handler, to avoid the problem of having to unwind through the signal frame itself. */ /* On Linux and Android, the initial register state is in the supplied sample->context. But on MacOS it's not, so we have to fake it up here (sigh). */ if (sUnwindMode == UnwNATIVE || sUnwindMode == UnwCOMBINED) { # if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_arm_android) \ || defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android) void* ucV = (void*)sample->context; # elif defined(SPS_PLAT_amd64_darwin) struct __darwin_mcontext64 mc; memset(&mc, 0, sizeof(mc)); ucontext_t uc; memset(&uc, 0, sizeof(uc)); uc.uc_mcontext = &mc; mc.__ss.__rip = (uint64_t)sample->pc; mc.__ss.__rsp = (uint64_t)sample->sp; mc.__ss.__rbp = (uint64_t)sample->fp; void* ucV = (void*)&uc; # elif defined(SPS_PLAT_x86_darwin) struct __darwin_mcontext32 mc; memset(&mc, 0, sizeof(mc)); ucontext_t uc; memset(&uc, 0, sizeof(uc)); uc.uc_mcontext = &mc; mc.__ss.__eip = (uint32_t)sample->pc; mc.__ss.__esp = (uint32_t)sample->sp; mc.__ss.__ebp = (uint32_t)sample->fp; void* ucV = (void*)&uc; # elif defined(SPS_OS_windows) /* Totally fake this up so it at least builds. No idea if we can even ever get here on Windows. */ void* ucV = NULL; # else # error "Unsupported platform" # endif uwt__release_full_buffer(&currThreadProfile, utb, ucV); } else { uwt__release_full_buffer(&currThreadProfile, utb, NULL); } }
void TableTicker::InplaceTick(TickSample* sample) { ThreadProfile& currThreadProfile = *sample->threadProfile; PseudoStack* stack = currThreadProfile.GetPseudoStack(); stack->updateGeneration(currThreadProfile.GetGenerationID()); bool recordSample = true; #if defined(XP_WIN) bool powerSample = false; #endif /* Don't process the PeudoStack's markers or honour jankOnly if we're immediately sampling the current thread. */ if (!sample->isSamplingCurrentThread) { // Marker(s) come before the sample ProfilerMarkerLinkedList* pendingMarkersList = stack->getPendingMarkers(); while (pendingMarkersList && pendingMarkersList->peek()) { ProfilerMarker* marker = pendingMarkersList->popHead(); stack->addStoredMarker(marker); currThreadProfile.addTag(ProfileEntry('m', marker)); } #if defined(XP_WIN) if (mProfilePower) { mIntelPowerGadget->TakeSample(); powerSample = true; } #endif if (mJankOnly) { // if we are on a different event we can discard any temporary samples // we've kept around if (sLastSampledEventGeneration != sCurrentEventGeneration) { // XXX: we also probably want to add an entry to the profile to help // distinguish which samples are part of the same event. That, or record // the event generation in each sample currThreadProfile.erase(); } sLastSampledEventGeneration = sCurrentEventGeneration; recordSample = false; // only record the events when we have a we haven't seen a tracer event for 100ms if (!sLastTracerEvent.IsNull()) { mozilla::TimeDuration delta = sample->timestamp - sLastTracerEvent; if (delta.ToMilliseconds() > 100.0) { recordSample = true; } } } } #if defined(USE_NS_STACKWALK) || defined(USE_EHABI_STACKWALK) if (mUseStackWalk) { doNativeBacktrace(currThreadProfile, sample); } else { doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses); } #else doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses); #endif if (recordSample) currThreadProfile.flush(); if (sample && currThreadProfile.GetThreadResponsiveness()->HasData()) { mozilla::TimeDuration delta = currThreadProfile.GetThreadResponsiveness()->GetUnresponsiveDuration(sample->timestamp); currThreadProfile.addTag(ProfileEntry('r', static_cast<float>(delta.ToMilliseconds()))); } if (sample) { mozilla::TimeDuration delta = sample->timestamp - sStartTime; currThreadProfile.addTag(ProfileEntry('t', static_cast<float>(delta.ToMilliseconds()))); } // rssMemory is equal to 0 when we are not recording. if (sample && sample->rssMemory != 0) { currThreadProfile.addTag(ProfileEntry('R', static_cast<float>(sample->rssMemory))); } // ussMemory is equal to 0 when we are not recording. if (sample && sample->ussMemory != 0) { currThreadProfile.addTag(ProfileEntry('U', static_cast<float>(sample->ussMemory))); } #if defined(XP_WIN) if (powerSample) { currThreadProfile.addTag(ProfileEntry('p', static_cast<float>(mIntelPowerGadget->GetTotalPackagePowerInWatts()))); } #endif if (sLastFrameNumber != sFrameNumber) { currThreadProfile.addTag(ProfileEntry('f', sFrameNumber)); sLastFrameNumber = sFrameNumber; } }
static void mergeStacksIntoProfile(ThreadProfile& aProfile, TickSample* aSample, NativeStack& aNativeStack) { PseudoStack* pseudoStack = aProfile.GetPseudoStack(); volatile StackEntry *pseudoFrames = pseudoStack->mStack; uint32_t pseudoCount = pseudoStack->stackSize(); // Make a copy of the JS stack into a JSFrame array. This is necessary since, // like the native stack, the JS stack is iterated youngest-to-oldest and we // need to iterate oldest-to-youngest when adding entries to aProfile. JSFrame jsFrames[1000]; uint32_t jsCount = 0; if (aSample && pseudoStack->mRuntime) { JS::ProfilingFrameIterator::RegisterState registerState; registerState.pc = aSample->pc; registerState.sp = aSample->sp; #ifdef ENABLE_ARM_LR_SAVING registerState.lr = aSample->lr; #endif JS::ProfilingFrameIterator jsIter(pseudoStack->mRuntime, registerState); for (; jsCount < mozilla::ArrayLength(jsFrames) && !jsIter.done(); ++jsCount, ++jsIter) { jsFrames[jsCount].stackAddress = jsIter.stackAddress(); jsFrames[jsCount].label = jsIter.label(); } } // Start the sample with a root entry. aProfile.addTag(ProfileEntry('s', "(root)")); // While the pseudo-stack array is ordered oldest-to-youngest, the JS and // native arrays are ordered youngest-to-oldest. We must add frames to // aProfile oldest-to-youngest. Thus, iterate over the pseudo-stack forwards // and JS and native arrays backwards. Note: this means the terminating // condition jsIndex and nativeIndex is being < 0. uint32_t pseudoIndex = 0; int32_t jsIndex = jsCount - 1; int32_t nativeIndex = aNativeStack.count - 1; // Iterate as long as there is at least one frame remaining. while (pseudoIndex != pseudoCount || jsIndex >= 0 || nativeIndex >= 0) { // There are 1 to 3 frames available. Find and add the oldest. Handle pseudo // frames first, since there are two special cases that must be considered // before everything else. if (pseudoIndex != pseudoCount) { volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex]; // isJs pseudo-stack frames assume the stackAddress of the preceding isCpp // pseudo-stack frame. If we arrive at an isJs pseudo frame, we've already // encountered the preceding isCpp stack frame and it was oldest, we can // assume the isJs frame is oldest without checking other frames. if (pseudoFrame.isJs()) { addPseudoEntry(pseudoFrame, aProfile, pseudoStack, nullptr); pseudoIndex++; continue; } // Currently, only asm.js frames use the JS stack and Ion/Baseline/Interp // frames use the pseudo stack. In the optimized asm.js->Ion call path, no // isCpp frame is pushed, leading to the callstack: // old | pseudo isCpp | asm.js | pseudo isJs | new // Since there is no interleaving isCpp pseudo frame between the asm.js // and isJs pseudo frame, the above isJs logic will render the callstack: // old | pseudo isCpp | pseudo isJs | asm.js | new // which is wrong. To deal with this, a pseudo isCpp frame pushed right // before entering asm.js flagged with StackEntry::ASMJS. When we see this // flag, we first push all the asm.js frames (up to the next frame with a // stackAddress) before pushing the isJs frames. There is no Ion->asm.js // fast path, so we don't have to worry about asm.js->Ion->asm.js. // // (This and the above isJs special cases can be removed once all JS // execution modes switch from the pseudo stack to the JS stack.) if (pseudoFrame.hasFlag(StackEntry::ASMJS)) { void *stopStackAddress = nullptr; for (uint32_t i = pseudoIndex + 1; i != pseudoCount; i++) { if (pseudoFrames[i].isCpp()) { stopStackAddress = pseudoFrames[i].stackAddress(); break; } } if (nativeIndex >= 0) { stopStackAddress = std::max(stopStackAddress, aNativeStack.sp_array[nativeIndex]); } while (jsIndex >= 0 && jsFrames[jsIndex].stackAddress > stopStackAddress) { addDynamicTag(aProfile, 'c', jsFrames[jsIndex].label); jsIndex--; } pseudoIndex++; continue; } // Finally, consider the normal case of a plain C++ pseudo-frame. if ((jsIndex < 0 || pseudoFrame.stackAddress() > jsFrames[jsIndex].stackAddress) && (nativeIndex < 0 || pseudoFrame.stackAddress() > aNativeStack.sp_array[nativeIndex])) { // The (C++) pseudo-frame is the oldest. addPseudoEntry(pseudoFrame, aProfile, pseudoStack, nullptr); pseudoIndex++; continue; } } if (jsIndex >= 0) { // Test whether the JS frame is the oldest. JSFrame &jsFrame = jsFrames[jsIndex]; if ((pseudoIndex == pseudoCount || jsFrame.stackAddress > pseudoFrames[pseudoIndex].stackAddress()) && (nativeIndex < 0 || jsFrame.stackAddress > aNativeStack.sp_array[nativeIndex])) { // The JS frame is the oldest. addDynamicTag(aProfile, 'c', jsFrame.label); jsIndex--; continue; } } // If execution reaches this point, there must be a native frame and it must // be the oldest. MOZ_ASSERT(nativeIndex >= 0); aProfile.addTag(ProfileEntry('l', (void*)aNativeStack.pc_array[nativeIndex])); nativeIndex--; } }
void TableTicker::InplaceTick(TickSample* sample) { ThreadProfile& currThreadProfile = *sample->threadProfile; // Marker(s) come before the sample PseudoStack* stack = currThreadProfile.GetPseudoStack(); for (int i = 0; stack->getMarker(i) != NULL; i++) { addDynamicTag(currThreadProfile, 'm', stack->getMarker(i)); } stack->mQueueClearMarker = true; bool recordSample = true; if (mJankOnly) { // if we are on a different event we can discard any temporary samples // we've kept around if (sLastSampledEventGeneration != sCurrentEventGeneration) { // XXX: we also probably want to add an entry to the profile to help // distinguish which samples are part of the same event. That, or record // the event generation in each sample currThreadProfile.erase(); } sLastSampledEventGeneration = sCurrentEventGeneration; recordSample = false; // only record the events when we have a we haven't seen a tracer event for 100ms if (!sLastTracerEvent.IsNull()) { TimeDuration delta = sample->timestamp - sLastTracerEvent; if (delta.ToMilliseconds() > 100.0) { recordSample = true; } } } #if defined(USE_BACKTRACE) || defined(USE_NS_STACKWALK) if (mUseStackWalk) { doNativeBacktrace(currThreadProfile, sample); } else { doSampleStackTrace(stack, currThreadProfile, mAddLeafAddresses ? sample : nullptr); } #else doSampleStackTrace(stack, currThreadProfile, mAddLeafAddresses ? sample : nullptr); #endif if (recordSample) currThreadProfile.flush(); if (!sLastTracerEvent.IsNull() && sample && currThreadProfile.IsMainThread()) { TimeDuration delta = sample->timestamp - sLastTracerEvent; currThreadProfile.addTag(ProfileEntry('r', delta.ToMilliseconds())); } if (sample) { TimeDuration delta = sample->timestamp - sStartTime; currThreadProfile.addTag(ProfileEntry('t', delta.ToMilliseconds())); } if (sLastFrameNumber != sFrameNumber) { currThreadProfile.addTag(ProfileEntry('f', sFrameNumber)); sLastFrameNumber = sFrameNumber; } }
void TableTicker::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample) { #ifndef XP_MACOSX uintptr_t thread = GetThreadHandle(aSample->threadProfile->GetPlatformData()); MOZ_ASSERT(thread); #endif void* pc_array[1000]; void* sp_array[1000]; PCArray array = { pc_array, sp_array, mozilla::ArrayLength(pc_array), 0 }; // Start with the current function. StackWalkCallback(aSample->pc, aSample->sp, &array); uint32_t maxFrames = uint32_t(array.size - array.count); #ifdef XP_MACOSX pthread_t pt = GetProfiledThread(aSample->threadProfile->GetPlatformData()); void *stackEnd = reinterpret_cast<void*>(-1); if (pt) stackEnd = static_cast<char*>(pthread_get_stackaddr_np(pt)); nsresult rv = NS_OK; if (aSample->fp >= aSample->sp && aSample->fp <= stackEnd) rv = FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, &array, reinterpret_cast<void**>(aSample->fp), stackEnd); #else void *platformData = nullptr; #ifdef XP_WIN platformData = aSample->context; #endif // XP_WIN nsresult rv = NS_StackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, &array, thread, platformData); #endif if (NS_SUCCEEDED(rv)) { aProfile.addTag(ProfileEntry('s', "(root)")); PseudoStack* stack = aProfile.GetPseudoStack(); uint32_t pseudoStackPos = 0; /* We have two stacks, the native C stack we extracted from unwinding, * and the pseudostack we managed during execution. We want to consolidate * the two in order. We do so by merging using the approximate stack address * when each entry was push. When pushing JS entry we may not now the stack * address in which case we have a NULL stack address in which case we assume * that it follows immediatly the previous element. * * C Stack | Address -- Pseudo Stack | Address * main() | 0x100 run_js() | 0x40 * start() | 0x80 jsCanvas() | NULL * timer() | 0x50 drawLine() | NULL * azure() | 0x10 * * Merged: main(), start(), timer(), run_js(), jsCanvas(), drawLine(), azure() */ // i is the index in C stack starting at main and decreasing // pseudoStackPos is the position in the Pseudo stack starting // at the first frame (run_js in the example) and increasing. for (size_t i = array.count; i > 0; --i) { while (pseudoStackPos < stack->stackSize()) { volatile StackEntry& entry = stack->mStack[pseudoStackPos]; if (entry.stackAddress() < array.sp_array[i-1] && entry.stackAddress()) break; addProfileEntry(entry, aProfile, stack, array.array[0]); pseudoStackPos++; } aProfile.addTag(ProfileEntry('l', (void*)array.array[i-1])); } } }
static void mergeStacksIntoProfile(ThreadProfile& aProfile, TickSample* aSample, NativeStack& aNativeStack) { PseudoStack* pseudoStack = aProfile.GetPseudoStack(); volatile StackEntry *pseudoFrames = pseudoStack->mStack; uint32_t pseudoCount = pseudoStack->stackSize(); // Make a copy of the JS stack into a JSFrame array. This is necessary since, // like the native stack, the JS stack is iterated youngest-to-oldest and we // need to iterate oldest-to-youngest when adding entries to aProfile. uint32_t startBufferGen = aProfile.bufferGeneration(); uint32_t jsCount = 0; JS::ProfilingFrameIterator::Frame jsFrames[1000]; // Only walk jit stack if profiling frame iterator is turned on. if (pseudoStack->mRuntime && JS::IsProfilingEnabledForRuntime(pseudoStack->mRuntime)) { AutoWalkJSStack autoWalkJSStack; const uint32_t maxFrames = mozilla::ArrayLength(jsFrames); if (aSample && autoWalkJSStack.walkAllowed) { JS::ProfilingFrameIterator::RegisterState registerState; registerState.pc = aSample->pc; registerState.sp = aSample->sp; #ifdef ENABLE_ARM_LR_SAVING registerState.lr = aSample->lr; #endif JS::ProfilingFrameIterator jsIter(pseudoStack->mRuntime, registerState, startBufferGen); for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) { uint32_t extracted = jsIter.extractStack(jsFrames, jsCount, maxFrames); MOZ_ASSERT(extracted <= (maxFrames - jsCount)); jsCount += extracted; if (jsCount == maxFrames) break; } } } // Start the sample with a root entry. aProfile.addTag(ProfileEntry('s', "(root)")); // While the pseudo-stack array is ordered oldest-to-youngest, the JS and // native arrays are ordered youngest-to-oldest. We must add frames to // aProfile oldest-to-youngest. Thus, iterate over the pseudo-stack forwards // and JS and native arrays backwards. Note: this means the terminating // condition jsIndex and nativeIndex is being < 0. uint32_t pseudoIndex = 0; int32_t jsIndex = jsCount - 1; int32_t nativeIndex = aNativeStack.count - 1; uint8_t *lastPseudoCppStackAddr = nullptr; // Iterate as long as there is at least one frame remaining. while (pseudoIndex != pseudoCount || jsIndex >= 0 || nativeIndex >= 0) { // There are 1 to 3 frames available. Find and add the oldest. uint8_t *pseudoStackAddr = nullptr; uint8_t *jsStackAddr = nullptr; uint8_t *nativeStackAddr = nullptr; if (pseudoIndex != pseudoCount) { volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex]; if (pseudoFrame.isCpp()) lastPseudoCppStackAddr = (uint8_t *) pseudoFrame.stackAddress(); // Skip any pseudo-stack JS frames which are marked isOSR // Pseudostack frames are marked isOSR when the JS interpreter // enters a jit frame on a loop edge (via on-stack-replacement, // or OSR). To avoid both the pseudoframe and jit frame being // recorded (and showing up twice), the interpreter marks the // interpreter pseudostack entry with the OSR flag to ensure that // it doesn't get counted. if (pseudoFrame.isJs() && pseudoFrame.isOSR()) { pseudoIndex++; continue; } MOZ_ASSERT(lastPseudoCppStackAddr); pseudoStackAddr = lastPseudoCppStackAddr; } if (jsIndex >= 0) jsStackAddr = (uint8_t *) jsFrames[jsIndex].stackAddress; if (nativeIndex >= 0) nativeStackAddr = (uint8_t *) aNativeStack.sp_array[nativeIndex]; // Sanity checks. MOZ_ASSERT_IF(pseudoStackAddr, pseudoStackAddr != jsStackAddr && pseudoStackAddr != nativeStackAddr); MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != pseudoStackAddr && jsStackAddr != nativeStackAddr); MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != pseudoStackAddr && nativeStackAddr != jsStackAddr); // Check to see if pseudoStack frame is top-most. if (pseudoStackAddr > jsStackAddr && pseudoStackAddr > nativeStackAddr) { MOZ_ASSERT(pseudoIndex < pseudoCount); volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex]; addPseudoEntry(pseudoFrame, aProfile, pseudoStack, nullptr); pseudoIndex++; continue; } // Check to see if JS jit stack frame is top-most if (jsStackAddr > nativeStackAddr) { MOZ_ASSERT(jsIndex >= 0); addDynamicTag(aProfile, 'c', jsFrames[jsIndex].label); // Stringifying optimization information is delayed until streaming // time. To re-lookup the entry in the JitcodeGlobalTable, we need to // store the JIT code address ('J') in the circular buffer. if (jsFrames[jsIndex].hasTrackedOptimizations) { aProfile.addTag(ProfileEntry('J', jsFrames[jsIndex].returnAddress)); } jsIndex--; continue; } // If we reach here, there must be a native stack entry and it must be the // greatest entry. MOZ_ASSERT(nativeStackAddr); MOZ_ASSERT(nativeIndex >= 0); aProfile.addTag(ProfileEntry('l', (void*)aNativeStack.pc_array[nativeIndex])); nativeIndex--; } MOZ_ASSERT(aProfile.bufferGeneration() >= startBufferGen); uint32_t lapCount = aProfile.bufferGeneration() - startBufferGen; // Update the JS runtime with the current profile sample buffer generation. if (pseudoStack->mRuntime) { JS::UpdateJSRuntimeProfilerSampleBufferGen(pseudoStack->mRuntime, aProfile.bufferGeneration(), lapCount); } }