static uint_fast64_t calculateClockOverhead() { int numCalls = 1000000; uint_fast64_t start = nowNs(); for (int i = 0; i < numCalls; i++) { nowNs(); } uint_fast64_t end = nowNs(); return (end - start) / numCalls; }
static uint_fast64_t calculateProfileSectionOverhead() { int numCalls = 1000000; uint_fast64_t start = nowNs(); profiling.isProfiling_ = true; for (int i = 0; i < numCalls; i++) { MICRO_PROFILER_SECTION(static_cast<MicroProfilerName>(0)); } uint_fast64_t end = nowNs(); profiling.isProfiling_ = false; return (end - start) / numCalls; }
void Scheduler::waitNs(int64_t ns) { /* We need to have *precise* timing, and it's not achievable with any other means like 'nanosleep' or EventBase. "pause" instruction would hint processor that this is a spin-loop, it will burn as much CPU as possible. The processor will use this hint to avoid memory order violation, which greatly improves its performance. http://siyobik.info.gf/main/reference/instruction/PAUSE */ for (auto start = nowNs(); nowNs() - start < ns;) { asm volatile("pause"); } }
MicroProfilerSection::~MicroProfilerSection() { if (!isProfiling_ || !profiling.isProfiling_) { return; } auto endTime = nowNs(); auto endNumProfileSections = profileSections; myTraceData.addTime(name_, endTime - startTime_, endNumProfileSections - startNumProfileSections_ - 1); }
MicroProfilerSection::MicroProfilerSection(MicroProfilerName name) : isProfiling_(profiling.isProfiling_), name_(name), startNumProfileSections_(profileSections) { if (!isProfiling_) { return; } profileSections++; startTime_ = nowNs(); }
void MicroProfiler::stopProfiling() { CHECK(profiling.isProfiling_) << "Trying to stop profiling but profiling hasn't been started!"; profiling.isProfiling_ = false; profiling.endTime_ = nowNs(); std::lock_guard<std::mutex> lock(profiling.mutex_); printReport(); clearProfiling(); }
void MicroProfiler::startProfiling() { CHECK(!profiling.isProfiling_) << "Trying to start profiling but profiling was already started!"; profiling.clockOverhead_ = calculateClockOverhead(); profiling.profileSectionOverhead_ = calculateProfileSectionOverhead(); std::lock_guard<std::mutex> lock(profiling.mutex_); clearProfiling(); profiling.startTime_ = nowNs(); profiling.isProfiling_ = true; }
/** * Responsible for generating requests events. * Requests are randomly spaced (intervals are drawn from an * exponential distribution) to achieve the target throughput rate. * Events would be put into notification queues, which would be selected in * round-robin fashion. */ void Scheduler::loop() { do { messageAllWorkers(Event(EventType::RESET)); next_ = 0; int32_t rps = rps_; int64_t interval_ns = 1.0/rps * k_ns_per_s; int64_t a = 0, b = 0, budget = randomExponentialInterval(interval_ns); while (state_ == RUNNING) { b = nowNs(); if (a) { /* Account for time spent sending the message */ budget -= (b - a); } waitNs(std::max(budget, 0L)); a = nowNs(); /* Decrease the sleep budget by the exact time slept (could have been more than the budget value), increase by the next interval */ budget += randomExponentialInterval(interval_ns) - (a - b); queues_[next_].putMessage(Event(EventType::SEND_REQUEST)); if (queues_[next_].size() > logging_threshold_ * logged_[next_]) { LOG(INFO) << "Notification queue for worker " << next_ << " is overloaded by factor of " << logged_[next_]; logged_[next_] *= 2; } ++next_; if (next_ == queues_.size()) { next_ = 0; } if (rps != rps_) { rps = rps_; interval_ns = 1.0/rps * k_ns_per_s; } } while (state_ == PAUSED) waitNs(1000); } while (state_ != STOPPING); messageAllWorkers(Event(EventType::STOP)); promise_.setValue(folly::Unit()); }