bool operator()(int* stat) const { if (s_ever_reached_fd_scan_limit.load(butil::memory_order_relaxed)) { // Never update the count again. return false; } const int count = get_fd_count(MAX_FD_SCAN_COUNT); if (count < 0) { return false; } if (count == MAX_FD_SCAN_COUNT - 2 && s_ever_reached_fd_scan_limit.exchange( true, butil::memory_order_relaxed) == false) { // Rename the bvar to notify user. g_fd_num.hide(); g_fd_num.expose("process_fd_num_too_many"); } *stat = count; return true; }
void EndRunningUserCodeInPool(void (*fn)(void*), void* arg) { InitUserCodeBackupPoolOnceOrDie(); g_usercode_inplace.fetch_sub(1, butil::memory_order_relaxed); // Not enough idle workers, run the code in backup threads to prevent // all workers from being blocked and no responses will be processed // anymore (deadlocked). const UserCode usercode = { fn, arg }; pthread_mutex_lock(&s_usercode_mutex); s_usercode_pool->queue.push_back(usercode); // If the queue has too many items, we can't drop the user code // directly which often must be run, for example: client-side done. // The solution is that we set a mark which is not cleared before // queue becomes short again. RPC code checks the mark before // submitting tasks that may generate more user code. if ((int)s_usercode_pool->queue.size() >= (FLAGS_usercode_backup_threads * FLAGS_max_pending_in_each_backup_thread)) { g_too_many_usercode = true; } pthread_mutex_unlock(&s_usercode_mutex); pthread_cond_signal(&s_usercode_cond); }
static int GetUserCodeInPlace(void*) { return g_usercode_inplace.load(butil::memory_order_relaxed); }
namespace brpc { DEFINE_int32(usercode_backup_threads, 5, "# of backup threads to run user code" " when too many pthread worker of bthreads are used"); DEFINE_int32(max_pending_in_each_backup_thread, 10, "Max number of un-run user code in each backup thread, requests" " still coming in will be failed"); // Store pending user code. struct UserCode { void (*fn)(void*); void* arg; }; struct UserCodeBackupPool { // Run user code when parallelism of user code reaches the threshold std::deque<UserCode> queue; bvar::PassiveStatus<int> inplace_var; bvar::PassiveStatus<size_t> queue_size_var; bvar::Adder<size_t> inpool_count; bvar::PerSecond<bvar::Adder<size_t> > inpool_per_second; // NOTE: we don't use Adder<double> directly which does not compile in gcc 3.4 bvar::Adder<int64_t> inpool_elapse_us; bvar::PassiveStatus<double> inpool_elapse_s; bvar::PerSecond<bvar::PassiveStatus<double> > pool_usage; UserCodeBackupPool(); int Init(); void UserCodeRunningLoop(); }; static pthread_mutex_t s_usercode_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t s_usercode_cond = PTHREAD_COND_INITIALIZER; static pthread_once_t s_usercode_init = PTHREAD_ONCE_INIT; butil::static_atomic<int> g_usercode_inplace = BUTIL_STATIC_ATOMIC_INIT(0); bool g_too_many_usercode = false; static UserCodeBackupPool* s_usercode_pool = NULL; static int GetUserCodeInPlace(void*) { return g_usercode_inplace.load(butil::memory_order_relaxed); } static size_t GetUserCodeQueueSize(void*) { BAIDU_SCOPED_LOCK(s_usercode_mutex); return (s_usercode_pool != NULL ? s_usercode_pool->queue.size() : 0); } static double GetInPoolElapseInSecond(void* arg) { return static_cast<bvar::Adder<int64_t>*>(arg)->get_value() / 1000000.0; } UserCodeBackupPool::UserCodeBackupPool() : inplace_var("rpc_usercode_inplace", GetUserCodeInPlace, NULL) , queue_size_var("rpc_usercode_queue_size", GetUserCodeQueueSize, NULL) , inpool_count("rpc_usercode_backup_count") , inpool_per_second("rpc_usercode_backup_second", &inpool_count) , inpool_elapse_s(GetInPoolElapseInSecond, &inpool_elapse_us) , pool_usage("rpc_usercode_backup_usage", &inpool_elapse_s, 1) { } static void* UserCodeRunner(void* args) { static_cast<UserCodeBackupPool*>(args)->UserCodeRunningLoop(); return NULL; } int UserCodeBackupPool::Init() { // Like bthread workers, these threads never quit (to avoid potential hang // during termination of program). for (int i = 0; i < FLAGS_usercode_backup_threads; ++i) { pthread_t th; if (pthread_create(&th, NULL, UserCodeRunner, this) != 0) { LOG(ERROR) << "Fail to create UserCodeRunner"; return -1; } } return 0; } // Entry of backup thread for running user code. void UserCodeBackupPool::UserCodeRunningLoop() { bthread::run_worker_startfn(); #ifdef BAIDU_INTERNAL logging::ComlogInitializer comlog_initializer; #endif int64_t last_time = butil::cpuwide_time_us(); while (true) { bool blocked = false; UserCode usercode = { NULL, NULL }; { BAIDU_SCOPED_LOCK(s_usercode_mutex); while (queue.empty()) { pthread_cond_wait(&s_usercode_cond, &s_usercode_mutex); blocked = true; } usercode = queue.front(); queue.pop_front(); if (g_too_many_usercode && (int)queue.size() <= FLAGS_usercode_backup_threads) { g_too_many_usercode = false; } } const int64_t begin_time = (blocked ? butil::cpuwide_time_us() : last_time); usercode.fn(usercode.arg); const int64_t end_time = butil::cpuwide_time_us(); inpool_count << 1; inpool_elapse_us << (end_time - begin_time); last_time = end_time; } } static void InitUserCodeBackupPool() { s_usercode_pool = new UserCodeBackupPool; if (s_usercode_pool->Init() != 0) { LOG(ERROR) << "Fail to init UserCodeBackupPool"; // rare and critical, often happen when the program just started since // this function is called from GlobalInitializeOrDieImpl() as well, // quiting is the best choice. exit(1); } } void InitUserCodeBackupPoolOnceOrDie() { pthread_once(&s_usercode_init, InitUserCodeBackupPool); } void EndRunningUserCodeInPool(void (*fn)(void*), void* arg) { InitUserCodeBackupPoolOnceOrDie(); g_usercode_inplace.fetch_sub(1, butil::memory_order_relaxed); // Not enough idle workers, run the code in backup threads to prevent // all workers from being blocked and no responses will be processed // anymore (deadlocked). const UserCode usercode = { fn, arg }; pthread_mutex_lock(&s_usercode_mutex); s_usercode_pool->queue.push_back(usercode); // If the queue has too many items, we can't drop the user code // directly which often must be run, for example: client-side done. // The solution is that we set a mark which is not cleared before // queue becomes short again. RPC code checks the mark before // submitting tasks that may generate more user code. if ((int)s_usercode_pool->queue.size() >= (FLAGS_usercode_backup_threads * FLAGS_max_pending_in_each_backup_thread)) { g_too_many_usercode = true; } pthread_mutex_unlock(&s_usercode_mutex); pthread_cond_signal(&s_usercode_cond); } } // namespace brpc
namespace bthread { extern BAIDU_THREAD_LOCAL TaskGroup* tls_task_group; template <typename T, size_t NBLOCK, size_t BLOCK_SIZE> class LazyArray { struct Block { butil::atomic<T> items[BLOCK_SIZE]; }; public: LazyArray() { memset(_blocks, 0, sizeof(butil::atomic<Block*>) * NBLOCK); } butil::atomic<T>* get_or_new(size_t index) { const size_t block_index = index / BLOCK_SIZE; if (block_index >= NBLOCK) { return NULL; } const size_t block_offset = index - block_index * BLOCK_SIZE; Block* b = _blocks[block_index].load(butil::memory_order_consume); if (b != NULL) { return b->items + block_offset; } b = new (std::nothrow) Block; if (NULL == b) { b = _blocks[block_index].load(butil::memory_order_consume); return (b ? b->items + block_offset : NULL); } // Set items to default value of T. std::fill(b->items, b->items + BLOCK_SIZE, T()); Block* expected = NULL; if (_blocks[block_index].compare_exchange_strong( expected, b, butil::memory_order_release, butil::memory_order_consume)) { return b->items + block_offset; } delete b; return expected->items + block_offset; } butil::atomic<T>* get(size_t index) const { const size_t block_index = index / BLOCK_SIZE; if (__builtin_expect(block_index < NBLOCK, 1)) { const size_t block_offset = index - block_index * BLOCK_SIZE; Block* const b = _blocks[block_index].load(butil::memory_order_consume); if (__builtin_expect(b != NULL, 1)) { return b->items + block_offset; } } return NULL; } private: butil::atomic<Block*> _blocks[NBLOCK]; }; typedef butil::atomic<int> EpollButex; static EpollButex* const CLOSING_GUARD = (EpollButex*)(intptr_t)-1L; #ifndef NDEBUG butil::static_atomic<int> break_nums = BASE_STATIC_ATOMIC_INIT(0); #endif // Able to address 67108864 file descriptors, should be enough. LazyArray<EpollButex*, 262144/*NBLOCK*/, 256/*BLOCK_SIZE*/> fd_butexes; static const int BTHREAD_DEFAULT_EPOLL_SIZE = 65536; class EpollThread { public: EpollThread() : _epfd(-1) , _stop(false) , _tid(0) { } int start(int epoll_size) { if (started()) { return -1; } _epfd = epoll_create(epoll_size); if (_epfd < 0) { PLOG(FATAL) << "Fail to epoll_create"; return -1; } if (bthread_start_background( &_tid, NULL, EpollThread::run_this, this) != 0) { close(_epfd); _epfd = -1; LOG(FATAL) << "Fail to create epoll bthread"; return -1; } return 0; } // Note: This function does not wake up suspended fd_wait. This is fine // since stop_and_join is only called on program's termination // (g_task_control.stop()), suspended bthreads do not block quit of // worker pthreads and completion of g_task_control.stop(). int stop_and_join() { if (!started()) { return 0; } // No matter what this function returns, _epfd will be set to -1 // (making started() false) to avoid latter stop_and_join() to // enter again. const int saved_epfd = _epfd; _epfd = -1; // epoll_wait cannot be woken up by closing _epfd. We wake up // epoll_wait by inserting a fd continuously triggering EPOLLOUT. // Visibility of _stop: constant EPOLLOUT forces epoll_wait to see // _stop (to be true) finally. _stop = true; int closing_epoll_pipe[2]; if (pipe(closing_epoll_pipe)) { PLOG(FATAL) << "Fail to create closing_epoll_pipe"; return -1; } epoll_event evt = { EPOLLOUT, { NULL } }; if (epoll_ctl(saved_epfd, EPOLL_CTL_ADD, closing_epoll_pipe[1], &evt) < 0) { PLOG(FATAL) << "Fail to add closing_epoll_pipe into epfd=" << saved_epfd; return -1; } const int rc = bthread_join(_tid, NULL); if (rc) { LOG(FATAL) << "Fail to join EpollThread, " << berror(rc); return -1; } close(closing_epoll_pipe[0]); close(closing_epoll_pipe[1]); close(saved_epfd); return 0; } int fd_wait(int fd, unsigned epoll_events, const timespec* abstime) { butil::atomic<EpollButex*>* p = fd_butexes.get_or_new(fd); if (NULL == p) { errno = ENOMEM; return -1; } EpollButex* butex = p->load(butil::memory_order_consume); if (NULL == butex) { // It is rare to wait on one file descriptor from multiple threads // simultaneously. Creating singleton by optimistic locking here // saves mutexes for each butex. butex = butex_create_checked<EpollButex>(); butex->store(0, butil::memory_order_relaxed); EpollButex* expected = NULL; if (!p->compare_exchange_strong(expected, butex, butil::memory_order_release, butil::memory_order_consume)) { butex_destroy(butex); butex = expected; } } while (butex == CLOSING_GUARD) { // bthread_close() is running. if (sched_yield() < 0) { return -1; } butex = p->load(butil::memory_order_consume); } // Save value of butex before adding to epoll because the butex may // be changed before butex_wait. No memory fence because EPOLL_CTL_MOD // and EPOLL_CTL_ADD shall have release fence. const int expected_val = butex->load(butil::memory_order_relaxed); #ifdef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG epoll_event evt = { epoll_events | EPOLLONESHOT, { butex } }; if (epoll_ctl(_epfd, EPOLL_CTL_MOD, fd, &evt) < 0) { if (epoll_ctl(_epfd, EPOLL_CTL_ADD, fd, &evt) < 0 && errno != EEXIST) { PLOG(FATAL) << "Fail to add fd=" << fd << " into epfd=" << _epfd; return -1; } } #else epoll_event evt; evt.events = epoll_events; evt.data.fd = fd; if (epoll_ctl(_epfd, EPOLL_CTL_ADD, fd, &evt) < 0 && errno != EEXIST) { PLOG(FATAL) << "Fail to add fd=" << fd << " into epfd=" << _epfd; return -1; } #endif const int rc = butex_wait(butex, expected_val, abstime); if (rc < 0 && errno == EWOULDBLOCK) { // EpollThread did wake up, there's data. return 0; } return rc; } int fd_close(int fd) { if (fd < 0) { // what close(-1) returns errno = EBADF; return -1; } butil::atomic<EpollButex*>* pbutex = bthread::fd_butexes.get(fd); if (NULL == pbutex) { // Did not call bthread_fd functions, close directly. return close(fd); } EpollButex* butex = pbutex->exchange( CLOSING_GUARD, butil::memory_order_relaxed); if (butex == CLOSING_GUARD) { // concurrent double close detected. errno = EBADF; return -1; } if (butex != NULL) { butex->fetch_add(1, butil::memory_order_relaxed); butex_wake_all(butex); } epoll_ctl(_epfd, EPOLL_CTL_DEL, fd, NULL); const int rc = close(fd); pbutex->exchange(butex, butil::memory_order_relaxed); return rc; } bool started() const { return _epfd >= 0; } private: static void* run_this(void* arg) { return static_cast<EpollThread*>(arg)->run(); } void* run() { const int initial_epfd = _epfd; const size_t MAX_EVENTS = 32; epoll_event* e = new (std::nothrow) epoll_event[MAX_EVENTS]; if (NULL == e) { LOG(FATAL) << "Fail to new epoll_event"; return NULL; } #ifndef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG DLOG(INFO) << "Use DEL+ADD instead of EPOLLONESHOT+MOD due to kernel bug. Performance will be much lower."; #endif while (!_stop) { const int epfd = _epfd; const int n = epoll_wait(epfd, e, MAX_EVENTS, -1); if (_stop) { break; } if (n < 0) { if (errno == EINTR) { #ifndef NDEBUG break_nums.fetch_add(1, butil::memory_order_relaxed); int* p = &errno; const char* b = berror(); const char* b2 = berror(errno); DLOG(FATAL) << "Fail to epoll epfd=" << epfd << ", " << errno << " " << p << " " << b << " " << b2; #endif continue; } PLOG(INFO) << "Fail to epoll epfd=" << epfd; break; } #ifndef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG for (int i = 0; i < n; ++i) { epoll_ctl(epfd, EPOLL_CTL_DEL, e[i].data.fd, NULL); } #endif for (int i = 0; i < n; ++i) { #ifdef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG EpollButex* butex = static_cast<EpollButex*>(e[i].data.ptr); #else butil::atomic<EpollButex*>* pbutex = fd_butexes.get(e[i].data.fd); EpollButex* butex = pbutex ? pbutex->load(butil::memory_order_consume) : NULL; #endif if (butex != NULL && butex != CLOSING_GUARD) { butex->fetch_add(1, butil::memory_order_relaxed); butex_wake_all(butex); } } } delete [] e; DLOG(INFO) << "EpollThread=" << _tid << "(epfd=" << initial_epfd << ") is about to stop"; return NULL; } int _epfd; bool _stop; bthread_t _tid; }; EpollThread epoll_thread[BTHREAD_EPOLL_THREAD_NUM]; static inline EpollThread& get_epoll_thread(int fd) { if (BTHREAD_EPOLL_THREAD_NUM == 1UL) { EpollThread& et = epoll_thread[0]; et.start(BTHREAD_DEFAULT_EPOLL_SIZE); return et; } EpollThread& et = epoll_thread[butil::fmix32(fd) % BTHREAD_EPOLL_THREAD_NUM]; et.start(BTHREAD_DEFAULT_EPOLL_SIZE); return et; } int stop_and_join_epoll_threads() { // Returns -1 if any epoll thread failed to stop. int rc = 0; for (size_t i = 0; i < BTHREAD_EPOLL_THREAD_NUM; ++i) { if (epoll_thread[i].stop_and_join() < 0) { rc = -1; } } return rc; } short epoll_to_poll_events(uint32_t epoll_events) { // Most POLL* and EPOLL* are same values. short poll_events = (epoll_events & (EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLRDNORM | EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND | EPOLLMSG | EPOLLERR | EPOLLHUP)); CHECK_EQ((uint32_t)poll_events, epoll_events); return poll_events; } // For pthreads. int pthread_fd_wait(int fd, unsigned epoll_events, const timespec* abstime) { int diff_ms = -1; if (abstime) { timespec now; clock_gettime(CLOCK_REALTIME, &now); int64_t now_us = butil::timespec_to_microseconds(now); int64_t abstime_us = butil::timespec_to_microseconds(*abstime); if (abstime_us <= now_us) { errno = ETIMEDOUT; return -1; } diff_ms = (abstime_us - now_us + 999L) / 1000L; } const short poll_events = bthread::epoll_to_poll_events(epoll_events); if (poll_events == 0) { errno = EINVAL; return -1; } pollfd ufds = { fd, poll_events, 0 }; const int rc = poll(&ufds, 1, diff_ms); if (rc < 0) { return -1; } if (rc == 0) { errno = ETIMEDOUT; return -1; } if (ufds.revents & POLLNVAL) { errno = EBADF; return -1; } return 0; } } // namespace bthread
void* run() { const int initial_epfd = _epfd; const size_t MAX_EVENTS = 32; epoll_event* e = new (std::nothrow) epoll_event[MAX_EVENTS]; if (NULL == e) { LOG(FATAL) << "Fail to new epoll_event"; return NULL; } #ifndef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG DLOG(INFO) << "Use DEL+ADD instead of EPOLLONESHOT+MOD due to kernel bug. Performance will be much lower."; #endif while (!_stop) { const int epfd = _epfd; const int n = epoll_wait(epfd, e, MAX_EVENTS, -1); if (_stop) { break; } if (n < 0) { if (errno == EINTR) { #ifndef NDEBUG break_nums.fetch_add(1, butil::memory_order_relaxed); int* p = &errno; const char* b = berror(); const char* b2 = berror(errno); DLOG(FATAL) << "Fail to epoll epfd=" << epfd << ", " << errno << " " << p << " " << b << " " << b2; #endif continue; } PLOG(INFO) << "Fail to epoll epfd=" << epfd; break; } #ifndef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG for (int i = 0; i < n; ++i) { epoll_ctl(epfd, EPOLL_CTL_DEL, e[i].data.fd, NULL); } #endif for (int i = 0; i < n; ++i) { #ifdef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG EpollButex* butex = static_cast<EpollButex*>(e[i].data.ptr); #else butil::atomic<EpollButex*>* pbutex = fd_butexes.get(e[i].data.fd); EpollButex* butex = pbutex ? pbutex->load(butil::memory_order_consume) : NULL; #endif if (butex != NULL && butex != CLOSING_GUARD) { butex->fetch_add(1, butil::memory_order_relaxed); butex_wake_all(butex); } } } delete [] e; DLOG(INFO) << "EpollThread=" << _tid << "(epfd=" << initial_epfd << ") is about to stop"; return NULL; }
namespace bvar { template <class T, class M> M get_member_type(M T::*); #define BVAR_MEMBER_TYPE(member) BAIDU_TYPEOF(bvar::get_member_type(member)) int do_link_default_variables = 0; const int64_t CACHED_INTERVAL_US = 100000L; // 100ms // ====================================== struct ProcStat { int pid; //std::string comm; char state; int ppid; int pgrp; int session; int tty_nr; int tpgid; uint32_t flags; uint64_t minflt; uint64_t cminflt; uint64_t majflt; uint64_t cmajflt; uint64_t utime; uint64_t stime; uint64_t cutime; uint64_t cstime; int64_t priority; int64_t nice; int64_t num_threads; }; // Read status from /proc/self/stat. Information from `man proc' is out of date, // see http://man7.org/linux/man-pages/man5/proc.5.html static bool read_proc_status(ProcStat &stat) { butil::ScopedFILE fp("/proc/self/stat", "r"); if (NULL == fp) { PLOG_ONCE(WARNING) << "Fail to open /proc/self/stat"; return false; } stat = ProcStat(); errno = 0; if (fscanf(fp, "%d %*s %c " "%d %d %d %d %d " "%u %lu %lu %lu " "%lu %lu %lu %lu %lu " "%ld %ld %ld", &stat.pid, &stat.state, &stat.ppid, &stat.pgrp, &stat.session, &stat.tty_nr, &stat.tpgid, &stat.flags, &stat.minflt, &stat.cminflt, &stat.majflt, &stat.cmajflt, &stat.utime, &stat.stime, &stat.cutime, &stat.cstime, &stat.priority, &stat.nice, &stat.num_threads) != 19) { PLOG(WARNING) << "Fail to fscanf"; return false; } return true; } // Reduce pressures to functions to get system metrics. template <typename T> class CachedReader { public: CachedReader() : _mtime_us(0) { CHECK_EQ(0, pthread_mutex_init(&_mutex, NULL)); } ~CachedReader() { pthread_mutex_destroy(&_mutex); } // NOTE: may return a volatile value that may be overwritten at any time. // This is acceptable right now. Both 32-bit and 64-bit numbers are atomic // to fetch in 64-bit machines(most of baidu machines) and the code inside // this .cpp utilizing this class generally return a struct with 32-bit // and 64-bit numbers. template <typename ReadFn> static const T& get_value(const ReadFn& fn) { CachedReader* p = butil::get_leaky_singleton<CachedReader>(); const int64_t now = butil::gettimeofday_us(); if (now > p->_mtime_us + CACHED_INTERVAL_US) { pthread_mutex_lock(&p->_mutex); if (now > p->_mtime_us + CACHED_INTERVAL_US) { p->_mtime_us = now; pthread_mutex_unlock(&p->_mutex); // don't run fn inside lock otherwise a slow fn may // block all concurrent bvar dumppers. (e.g. /vars) T result; if (fn(&result)) { pthread_mutex_lock(&p->_mutex); p->_cached = result; } else { pthread_mutex_lock(&p->_mutex); } } pthread_mutex_unlock(&p->_mutex); } return p->_cached; } private: int64_t _mtime_us; pthread_mutex_t _mutex; T _cached; }; class ProcStatReader { public: bool operator()(ProcStat* stat) const { return read_proc_status(*stat); } template <typename T, size_t offset> static T get_field(void*) { return *(T*)((char*)&CachedReader<ProcStat>::get_value( ProcStatReader()) + offset); } }; #define BVAR_DEFINE_PROC_STAT_FIELD(field) \ PassiveStatus<BVAR_MEMBER_TYPE(&ProcStat::field)> g_##field( \ ProcStatReader::get_field<BVAR_MEMBER_TYPE(&ProcStat::field), \ offsetof(ProcStat, field)>, NULL); #define BVAR_DEFINE_PROC_STAT_FIELD2(field, name) \ PassiveStatus<BVAR_MEMBER_TYPE(&ProcStat::field)> g_##field( \ name, \ ProcStatReader::get_field<BVAR_MEMBER_TYPE(&ProcStat::field), \ offsetof(ProcStat, field)>, NULL); // ================================================== struct ProcMemory { int64_t size; // total program size int64_t resident; // resident set size int64_t share; // shared pages int64_t trs; // text (code) int64_t drs; // data/stack int64_t lrs; // library int64_t dt; // dirty pages }; static bool read_proc_memory(ProcMemory &m) { butil::ScopedFILE fp("/proc/self/statm", "r"); if (NULL == fp) { PLOG_ONCE(WARNING) << "Fail to open /proc/self/statm"; return false; } m = ProcMemory(); errno = 0; if (fscanf(fp, "%ld %ld %ld %ld %ld %ld %ld", &m.size, &m.resident, &m.share, &m.trs, &m.drs, &m.lrs, &m.dt) != 7) { PLOG(WARNING) << "Fail to fscanf"; return false; } return true; } class ProcMemoryReader { public: bool operator()(ProcMemory* stat) const { return read_proc_memory(*stat); }; template <typename T, size_t offset> static T get_field(void*) { static int64_t pagesize = getpagesize(); return *(T*)((char*)&CachedReader<ProcMemory>::get_value( ProcMemoryReader()) + offset) * pagesize; } }; #define BVAR_DEFINE_PROC_MEMORY_FIELD(field, name) \ PassiveStatus<BVAR_MEMBER_TYPE(&ProcMemory::field)> g_##field( \ name, \ ProcMemoryReader::get_field<BVAR_MEMBER_TYPE(&ProcMemory::field), \ offsetof(ProcMemory, field)>, NULL); // ================================================== struct LoadAverage { double loadavg_1m; double loadavg_5m; double loadavg_15m; }; static bool read_load_average(LoadAverage &m) { butil::ScopedFILE fp("/proc/loadavg", "r"); if (NULL == fp) { PLOG_ONCE(WARNING) << "Fail to open /proc/loadavg"; return false; } m = LoadAverage(); errno = 0; if (fscanf(fp, "%lf %lf %lf", &m.loadavg_1m, &m.loadavg_5m, &m.loadavg_15m) != 3) { PLOG(WARNING) << "Fail to fscanf"; return false; } return true; } class LoadAverageReader { public: bool operator()(LoadAverage* stat) const { return read_load_average(*stat); }; template <typename T, size_t offset> static T get_field(void*) { return *(T*)((char*)&CachedReader<LoadAverage>::get_value( LoadAverageReader()) + offset); } }; #define BVAR_DEFINE_LOAD_AVERAGE_FIELD(field, name) \ PassiveStatus<BVAR_MEMBER_TYPE(&LoadAverage::field)> g_##field( \ name, \ LoadAverageReader::get_field<BVAR_MEMBER_TYPE(&LoadAverage::field), \ offsetof(LoadAverage, field)>, NULL); // ================================================== static int get_fd_count(int limit) { butil::DirReaderPosix dr("/proc/self/fd"); int count = 0; if (!dr.IsValid()) { PLOG(WARNING) << "Fail to open /proc/self/fd"; return -1; } // Have to limit the scaning which consumes a lot of CPU when #fd // are huge (100k+) for (; dr.Next() && count <= limit + 3; ++count) {} return count - 3 /* skipped ., .. and the fd in dr*/; } extern PassiveStatus<int> g_fd_num; const int MAX_FD_SCAN_COUNT = 10003; static butil::static_atomic<bool> s_ever_reached_fd_scan_limit = BUTIL_STATIC_ATOMIC_INIT(false); class FdReader { public: bool operator()(int* stat) const { if (s_ever_reached_fd_scan_limit.load(butil::memory_order_relaxed)) { // Never update the count again. return false; } const int count = get_fd_count(MAX_FD_SCAN_COUNT); if (count < 0) { return false; } if (count == MAX_FD_SCAN_COUNT - 2 && s_ever_reached_fd_scan_limit.exchange( true, butil::memory_order_relaxed) == false) { // Rename the bvar to notify user. g_fd_num.hide(); g_fd_num.expose("process_fd_num_too_many"); } *stat = count; return true; } }; static int print_fd_count(void*) { return CachedReader<int>::get_value(FdReader()); } // ================================================== struct ProcIO { // number of bytes the process read, using any read-like system call (from // files, pipes, tty...). size_t rchar; // number of bytes the process wrote using any write-like system call. size_t wchar; // number of read-like system call invocations that the process performed. size_t syscr; // number of write-like system call invocations that the process performed. size_t syscw; // number of bytes the process directly read from disk. size_t read_bytes; // number of bytes the process originally dirtied in the page-cache // (assuming they will go to disk later). size_t write_bytes; // number of bytes the process "un-dirtied" - e.g. using an "ftruncate" // call that truncated pages from the page-cache. size_t cancelled_write_bytes; }; static bool read_proc_io(ProcIO* s) { butil::ScopedFILE fp("/proc/self/io", "r"); if (NULL == fp) { PLOG_ONCE(WARNING) << "Fail to open /proc/self/io"; return false; } errno = 0; if (fscanf(fp, "%*s %lu %*s %lu %*s %lu %*s %lu %*s %lu %*s %lu %*s %lu", &s->rchar, &s->wchar, &s->syscr, &s->syscw, &s->read_bytes, &s->write_bytes, &s->cancelled_write_bytes) != 7) { PLOG(WARNING) << "Fail to fscanf"; return false; } return true; } class ProcIOReader { public: bool operator()(ProcIO* stat) const { return read_proc_io(stat); } template <typename T, size_t offset> static T get_field(void*) { return *(T*)((char*)&CachedReader<ProcIO>::get_value( ProcIOReader()) + offset); } }; #define BVAR_DEFINE_PROC_IO_FIELD(field) \ PassiveStatus<BVAR_MEMBER_TYPE(&ProcIO::field)> g_##field( \ ProcIOReader::get_field<BVAR_MEMBER_TYPE(&ProcIO::field), \ offsetof(ProcIO, field)>, NULL); // ================================================== // Refs: // https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats // https://www.kernel.org/doc/Documentation/iostats.txt // // The /proc/diskstats file displays the I/O statistics of block devices. // Each line contains the following 14 fields: struct DiskStat { long long major_number; long long minor_mumber; char device_name[64]; // The total number of reads completed successfully. long long reads_completed; // wMB/s wKB/s // Reads and writes which are adjacent to each other may be merged for // efficiency. Thus two 4K reads may become one 8K read before it is // ultimately handed to the disk, and so it will be counted (and queued) // as only one I/O. This field lets you know how often this was done. long long reads_merged; // rrqm/s // The total number of sectors read successfully. long long sectors_read; // rsec/s // The total number of milliseconds spent by all reads (as // measured from __make_request() to end_that_request_last()). long long time_spent_reading_ms; // The total number of writes completed successfully. long long writes_completed; // rKB/s rMB/s // See description of reads_merged long long writes_merged; // wrqm/s // The total number of sectors written successfully. long long sectors_written; // wsec/s // The total number of milliseconds spent by all writes (as // measured from __make_request() to end_that_request_last()). long long time_spent_writing_ms; // The only field that should go to zero. Incremented as requests are // given to appropriate struct request_queue and decremented as they finish. long long io_in_progress; // This field increases so long as `io_in_progress' is nonzero. long long time_spent_io_ms; // This field is incremented at each I/O start, I/O completion, I/O // merge, or read of these stats by the number of I/Os in progress // `io_in_progress' times the number of milliseconds spent doing // I/O since the last update of this field. This can provide an easy // measure of both I/O completion time and the backlog that may be // accumulating. long long weighted_time_spent_io_ms; }; static bool read_disk_stat(DiskStat* s) { butil::ScopedFILE fp("/proc/diskstats", "r"); if (NULL == fp) { PLOG_ONCE(WARNING) << "Fail to open /proc/diskstats"; return false; } errno = 0; if (fscanf(fp, "%lld %lld %s %lld %lld %lld %lld %lld %lld %lld " "%lld %lld %lld %lld", &s->major_number, &s->minor_mumber, s->device_name, &s->reads_completed, &s->reads_merged, &s->sectors_read, &s->time_spent_reading_ms, &s->writes_completed, &s->writes_merged, &s->sectors_written, &s->time_spent_writing_ms, &s->io_in_progress, &s->time_spent_io_ms, &s->weighted_time_spent_io_ms) != 14) { PLOG(WARNING) << "Fail to fscanf"; return false; } return true; } class DiskStatReader { public: bool operator()(DiskStat* stat) const { return read_disk_stat(stat); } template <typename T, size_t offset> static T get_field(void*) { return *(T*)((char*)&CachedReader<DiskStat>::get_value( DiskStatReader()) + offset); } }; #define BVAR_DEFINE_DISK_STAT_FIELD(field) \ PassiveStatus<BVAR_MEMBER_TYPE(&DiskStat::field)> g_##field( \ DiskStatReader::get_field<BVAR_MEMBER_TYPE(&DiskStat::field), \ offsetof(DiskStat, field)>, NULL); // ===================================== static std::string read_first_line(const char* filepath) { char * line = NULL; size_t len = 0; butil::ScopedFILE fp(filepath, "r"); if (fp == NULL) { return ""; } std::string result; ssize_t nr = getline(&line, &len, fp); if (nr != -1) { for (ssize_t i = 0; i < nr; ++i) { if (line[i] == '\0') { line[i] = ' '; } } for (; nr >= 1 && isspace(line[nr - 1]); --nr) {} // trim. result.assign(line, nr); } free(line); return result; } struct ReadProcSelfCmdline { std::string content; ReadProcSelfCmdline() : content(read_first_line("/proc/self/cmdline")) {} }; static void get_cmdline(std::ostream& os, void*) { os << butil::get_leaky_singleton<ReadProcSelfCmdline>()->content; } struct ReadProcVersion { std::string content; ReadProcVersion() : content(read_first_line("/proc/version")) {} }; static void get_kernel_version(std::ostream& os, void*) { os << butil::get_leaky_singleton<ReadProcVersion>()->content; } // ====================================== static int64_t g_starting_time = butil::gettimeofday_us(); static timeval get_uptime(void*) { int64_t uptime_us = butil::gettimeofday_us() - g_starting_time; timeval tm; tm.tv_sec = uptime_us / 1000000L; tm.tv_usec = uptime_us - tm.tv_sec * 1000000L; return tm; } // ====================================== class RUsageReader { public: bool operator()(rusage* stat) const { const int rc = getrusage(RUSAGE_SELF, stat); if (rc < 0) { PLOG(WARNING) << "Fail to getrusage"; return false; } return true; } template <typename T, size_t offset> static T get_field(void*) { return *(T*)((char*)&CachedReader<rusage>::get_value( RUsageReader()) + offset); } }; #define BVAR_DEFINE_RUSAGE_FIELD(field) \ PassiveStatus<BVAR_MEMBER_TYPE(&rusage::field)> g_##field( \ RUsageReader::get_field<BVAR_MEMBER_TYPE(&rusage::field), \ offsetof(rusage, field)>, NULL); \ #define BVAR_DEFINE_RUSAGE_FIELD2(field, name) \ PassiveStatus<BVAR_MEMBER_TYPE(&rusage::field)> g_##field( \ name, \ RUsageReader::get_field<BVAR_MEMBER_TYPE(&rusage::field), \ offsetof(rusage, field)>, NULL); \ // ====================================== BVAR_DEFINE_PROC_STAT_FIELD2(pid, "pid"); BVAR_DEFINE_PROC_STAT_FIELD2(ppid, "ppid"); BVAR_DEFINE_PROC_STAT_FIELD2(pgrp, "pgrp"); static void get_username(std::ostream& os, void*) { char buf[32]; if (getlogin_r(buf, sizeof(buf)) == 0) { buf[sizeof(buf)-1] = '\0'; os << buf; } else { os << "unknown (" << berror() << ')' ; } } PassiveStatus<std::string> g_username( "process_username", get_username, NULL); BVAR_DEFINE_PROC_STAT_FIELD(minflt); PerSecond<PassiveStatus<uint64_t> > g_minflt_second( "process_faults_minor_second", &g_minflt); BVAR_DEFINE_PROC_STAT_FIELD2(majflt, "process_faults_major"); BVAR_DEFINE_PROC_STAT_FIELD2(priority, "process_priority"); BVAR_DEFINE_PROC_STAT_FIELD2(nice, "process_nice"); BVAR_DEFINE_PROC_STAT_FIELD2(num_threads, "process_thread_count"); PassiveStatus<int> g_fd_num("process_fd_count", print_fd_count, NULL); BVAR_DEFINE_PROC_MEMORY_FIELD(size, "process_memory_virtual"); BVAR_DEFINE_PROC_MEMORY_FIELD(resident, "process_memory_resident"); BVAR_DEFINE_PROC_MEMORY_FIELD(share, "process_memory_shared"); BVAR_DEFINE_PROC_MEMORY_FIELD(trs, "process_memory_text"); BVAR_DEFINE_PROC_MEMORY_FIELD(drs, "process_memory_data_and_stack"); BVAR_DEFINE_PROC_MEMORY_FIELD(lrs, "process_memory_library"); BVAR_DEFINE_PROC_MEMORY_FIELD(dt, "process_memory_dirty"); BVAR_DEFINE_LOAD_AVERAGE_FIELD(loadavg_1m, "system_loadavg_1m"); BVAR_DEFINE_LOAD_AVERAGE_FIELD(loadavg_5m, "system_loadavg_5m"); BVAR_DEFINE_LOAD_AVERAGE_FIELD(loadavg_15m, "system_loadavg_15m"); BVAR_DEFINE_PROC_IO_FIELD(rchar); BVAR_DEFINE_PROC_IO_FIELD(wchar); PerSecond<PassiveStatus<size_t> > g_io_read_second( "process_io_read_bytes_second", &g_rchar); PerSecond<PassiveStatus<size_t> > g_io_write_second( "process_io_write_bytes_second", &g_wchar); BVAR_DEFINE_PROC_IO_FIELD(syscr); BVAR_DEFINE_PROC_IO_FIELD(syscw); PerSecond<PassiveStatus<size_t> > g_io_num_reads_second( "process_io_read_second", &g_syscr); PerSecond<PassiveStatus<size_t> > g_io_num_writes_second( "process_io_write_second", &g_syscw); BVAR_DEFINE_PROC_IO_FIELD(read_bytes); BVAR_DEFINE_PROC_IO_FIELD(write_bytes); PerSecond<PassiveStatus<size_t> > g_disk_read_second( "process_disk_read_bytes_second", &g_read_bytes); PerSecond<PassiveStatus<size_t> > g_disk_write_second( "process_disk_write_bytes_second", &g_write_bytes); BVAR_DEFINE_RUSAGE_FIELD(ru_utime); BVAR_DEFINE_RUSAGE_FIELD(ru_stime); PassiveStatus<timeval> g_uptime("process_uptime", get_uptime, NULL); static int get_core_num(void*) { return sysconf(_SC_NPROCESSORS_ONLN); } PassiveStatus<int> g_core_num("system_core_count", get_core_num, NULL); struct TimePercent { int64_t time_us; int64_t real_time_us; void operator-=(const TimePercent& rhs) { time_us -= rhs.time_us; real_time_us -= rhs.real_time_us; } void operator+=(const TimePercent& rhs) { time_us += rhs.time_us; real_time_us += rhs.real_time_us; } }; inline std::ostream& operator<<(std::ostream& os, const TimePercent& tp) { if (tp.real_time_us <= 0) { return os << "0"; } else { return os << std::fixed << std::setprecision(3) << (double)tp.time_us / tp.real_time_us; } } static TimePercent get_cputime_percent(void*) { TimePercent tp = { butil::timeval_to_microseconds(g_ru_stime.get_value()) + butil::timeval_to_microseconds(g_ru_utime.get_value()), butil::timeval_to_microseconds(g_uptime.get_value()) }; return tp; } PassiveStatus<TimePercent> g_cputime_percent(get_cputime_percent, NULL); Window<PassiveStatus<TimePercent>, SERIES_IN_SECOND> g_cputime_percent_second( "process_cpu_usage", &g_cputime_percent, FLAGS_bvar_dump_interval); static TimePercent get_stime_percent(void*) { TimePercent tp = { butil::timeval_to_microseconds(g_ru_stime.get_value()), butil::timeval_to_microseconds(g_uptime.get_value()) }; return tp; } PassiveStatus<TimePercent> g_stime_percent(get_stime_percent, NULL); Window<PassiveStatus<TimePercent>, SERIES_IN_SECOND> g_stime_percent_second( "process_cpu_usage_system", &g_stime_percent, FLAGS_bvar_dump_interval); static TimePercent get_utime_percent(void*) { TimePercent tp = { butil::timeval_to_microseconds(g_ru_utime.get_value()), butil::timeval_to_microseconds(g_uptime.get_value()) }; return tp; } PassiveStatus<TimePercent> g_utime_percent(get_utime_percent, NULL); Window<PassiveStatus<TimePercent>, SERIES_IN_SECOND> g_utime_percent_second( "process_cpu_usage_user", &g_utime_percent, FLAGS_bvar_dump_interval); // According to http://man7.org/linux/man-pages/man2/getrusage.2.html // Unsupported fields in linux: // ru_ixrss // ru_idrss // ru_isrss // ru_nswap // ru_nsignals BVAR_DEFINE_RUSAGE_FIELD(ru_inblock); BVAR_DEFINE_RUSAGE_FIELD(ru_oublock); BVAR_DEFINE_RUSAGE_FIELD(ru_nvcsw); BVAR_DEFINE_RUSAGE_FIELD(ru_nivcsw); PerSecond<PassiveStatus<long> > g_ru_inblock_second( "process_inblocks_second", &g_ru_inblock); PerSecond<PassiveStatus<long> > g_ru_oublock_second( "process_outblocks_second", &g_ru_oublock); PerSecond<PassiveStatus<long> > cs_vol_second( "process_context_switches_voluntary_second", &g_ru_nvcsw); PerSecond<PassiveStatus<long> > cs_invol_second( "process_context_switches_involuntary_second", &g_ru_nivcsw); PassiveStatus<std::string> g_cmdline("process_cmdline", get_cmdline, NULL); PassiveStatus<std::string> g_kernel_version( "kernel_version", get_kernel_version, NULL); static std::string* s_gcc_version = NULL; pthread_once_t g_gen_gcc_version_once = PTHREAD_ONCE_INIT; void gen_gcc_version() { #if defined(__GNUC__) const int gcc_major = __GNUC__; #else const int gcc_major = -1; #endif #if defined(__GNUC_MINOR__) const int gcc_minor = __GNUC_MINOR__; #else const int gcc_minor = -1; #endif #if defined(__GNUC_PATCHLEVEL__) const int gcc_patchlevel = __GNUC_PATCHLEVEL__; #else const int gcc_patchlevel = -1; #endif s_gcc_version = new std::string; if (gcc_major == -1) { *s_gcc_version = "unknown"; return; } std::ostringstream oss; oss << gcc_major; if (gcc_minor == -1) { return; } oss << '.' << gcc_minor; if (gcc_patchlevel == -1) { return; } oss << '.' << gcc_patchlevel; *s_gcc_version = oss.str(); } void get_gcc_version(std::ostream& os, void*) { pthread_once(&g_gen_gcc_version_once, gen_gcc_version); os << *s_gcc_version; } // ============================================= PassiveStatus<std::string> g_gcc_version("gcc_version", get_gcc_version, NULL); void get_work_dir(std::ostream& os, void*) { butil::FilePath path; const bool rc = butil::GetCurrentDirectory(&path); LOG_IF(WARNING, !rc) << "Fail to GetCurrentDirectory"; os << path.value(); } PassiveStatus<std::string> g_work_dir("process_work_dir", get_work_dir, NULL); #undef BVAR_MEMBER_TYPE #undef BVAR_DEFINE_PROC_STAT_FIELD #undef BVAR_DEFINE_PROC_STAT_FIELD2 #undef BVAR_DEFINE_PROC_MEMORY_FIELD #undef BVAR_DEFINE_RUSAGE_FIELD #undef BVAR_DEFINE_RUSAGE_FIELD2 } // namespace bvar
namespace brpc { DECLARE_bool(usercode_in_pthread); DEFINE_int32(free_memory_to_system_interval, 0, "Try to return free memory to system every so many seconds, " "values <= 0 disables this feature"); BRPC_VALIDATE_GFLAG(free_memory_to_system_interval, PassValidate); namespace policy { // Defined in http_rpc_protocol.cpp void InitCommonStrings(); } using namespace policy; const char* const DUMMY_SERVER_PORT_FILE = "dummy_server.port"; struct GlobalExtensions { GlobalExtensions() : ch_mh_lb(MurmurHash32) , ch_md5_lb(MD5Hash32){} #ifdef BAIDU_INTERNAL BaiduNamingService bns; #endif FileNamingService fns; ListNamingService lns; DomainNamingService dns; RemoteFileNamingService rfns; ConsulNamingService cns; RoundRobinLoadBalancer rr_lb; WeightedRoundRobinLoadBalancer wrr_lb; RandomizedLoadBalancer randomized_lb; LocalityAwareLoadBalancer la_lb; ConsistentHashingLoadBalancer ch_mh_lb; ConsistentHashingLoadBalancer ch_md5_lb; DynPartLoadBalancer dynpart_lb; }; static pthread_once_t register_extensions_once = PTHREAD_ONCE_INIT; static GlobalExtensions* g_ext = NULL; static long ReadPortOfDummyServer(const char* filename) { butil::fd_guard fd(open(filename, O_RDONLY)); if (fd < 0) { LOG(ERROR) << "Fail to open `" << DUMMY_SERVER_PORT_FILE << "'"; return -1; } char port_str[32]; const ssize_t nr = read(fd, port_str, sizeof(port_str)); if (nr <= 0) { LOG(ERROR) << "Fail to read `" << DUMMY_SERVER_PORT_FILE << "': " << (nr == 0 ? "nothing to read" : berror()); return -1; } port_str[std::min((size_t)nr, sizeof(port_str)-1)] = '\0'; const char* p = port_str; for (; isspace(*p); ++p) {} char* endptr = NULL; const long port = strtol(p, &endptr, 10); for (; isspace(*endptr); ++endptr) {} if (*endptr != '\0') { LOG(ERROR) << "Invalid port=`" << port_str << "'"; return -1; } return port; } // Expose counters of butil::IOBuf static int64_t GetIOBufBlockCount(void*) { return butil::IOBuf::block_count(); } static int64_t GetIOBufBlockCountHitTLSThreshold(void*) { return butil::IOBuf::block_count_hit_tls_threshold(); } static int64_t GetIOBufNewBigViewCount(void*) { return butil::IOBuf::new_bigview_count(); } static int64_t GetIOBufBlockMemory(void*) { return butil::IOBuf::block_memory(); } // Defined in server.cpp extern butil::static_atomic<int> g_running_server_count; static int GetRunningServerCount(void*) { return g_running_server_count.load(butil::memory_order_relaxed); } // Update global stuff periodically. static void* GlobalUpdate(void*) { // Expose variables. bvar::PassiveStatus<int64_t> var_iobuf_block_count( "iobuf_block_count", GetIOBufBlockCount, NULL); bvar::PassiveStatus<int64_t> var_iobuf_block_count_hit_tls_threshold( "iobuf_block_count_hit_tls_threshold", GetIOBufBlockCountHitTLSThreshold, NULL); bvar::PassiveStatus<int64_t> var_iobuf_new_bigview_count( GetIOBufNewBigViewCount, NULL); bvar::PerSecond<bvar::PassiveStatus<int64_t> > var_iobuf_new_bigview_second( "iobuf_newbigview_second", &var_iobuf_new_bigview_count); bvar::PassiveStatus<int64_t> var_iobuf_block_memory( "iobuf_block_memory", GetIOBufBlockMemory, NULL); bvar::PassiveStatus<int> var_running_server_count( "rpc_server_count", GetRunningServerCount, NULL); butil::FileWatcher fw; if (fw.init_from_not_exist(DUMMY_SERVER_PORT_FILE) < 0) { LOG(FATAL) << "Fail to init FileWatcher on `" << DUMMY_SERVER_PORT_FILE << "'"; return NULL; } std::vector<SocketId> conns; const int64_t start_time_us = butil::gettimeofday_us(); const int WARN_NOSLEEP_THRESHOLD = 2; int64_t last_time_us = start_time_us; int consecutive_nosleep = 0; int64_t last_return_free_memory_time = start_time_us; while (1) { const int64_t sleep_us = 1000000L + last_time_us - butil::gettimeofday_us(); if (sleep_us > 0) { if (bthread_usleep(sleep_us) < 0) { PLOG_IF(FATAL, errno != ESTOP) << "Fail to sleep"; break; } consecutive_nosleep = 0; } else { if (++consecutive_nosleep >= WARN_NOSLEEP_THRESHOLD) { consecutive_nosleep = 0; LOG(WARNING) << __FUNCTION__ << " is too busy!"; } } last_time_us = butil::gettimeofday_us(); TrackMe(); if (!IsDummyServerRunning() && g_running_server_count.load(butil::memory_order_relaxed) == 0 && fw.check_and_consume() > 0) { long port = ReadPortOfDummyServer(DUMMY_SERVER_PORT_FILE); if (port >= 0) { StartDummyServerAt(port); } } SocketMapList(&conns); const int64_t now_ms = butil::cpuwide_time_ms(); for (size_t i = 0; i < conns.size(); ++i) { SocketUniquePtr ptr; if (Socket::Address(conns[i], &ptr) == 0) { ptr->UpdateStatsEverySecond(now_ms); } } const int return_mem_interval = FLAGS_free_memory_to_system_interval/*reloadable*/; if (return_mem_interval > 0 && last_time_us >= last_return_free_memory_time + return_mem_interval * 1000000L) { last_return_free_memory_time = last_time_us; // TODO: Calling MallocExtension::instance()->ReleaseFreeMemory may // crash the program in later calls to malloc, verified on tcmalloc // 1.7 and 2.5, which means making the static member function weak // in details/tcmalloc_extension.cpp is probably not correct, however // it does work for heap profilers. if (MallocExtension_ReleaseFreeMemory != NULL) { MallocExtension_ReleaseFreeMemory(); } else { #if defined(OS_LINUX) // GNU specific. malloc_trim(10 * 1024 * 1024/*leave 10M pad*/); #endif } } } return NULL; } static void BaiduStreamingLogHandler(google::protobuf::LogLevel level, const char* filename, int line, const std::string& message) { switch (level) { case google::protobuf::LOGLEVEL_INFO: LOG(INFO) << filename << ':' << line << ' ' << message; return; case google::protobuf::LOGLEVEL_WARNING: LOG(WARNING) << filename << ':' << line << ' ' << message; return; case google::protobuf::LOGLEVEL_ERROR: LOG(ERROR) << filename << ':' << line << ' ' << message; return; case google::protobuf::LOGLEVEL_FATAL: LOG(FATAL) << filename << ':' << line << ' ' << message; return; } CHECK(false) << filename << ':' << line << ' ' << message; } static void GlobalInitializeOrDieImpl() { ////////////////////////////////////////////////////////////////// // Be careful about usages of gflags inside this function which // // may be called before main() only seeing gflags with default // // values even if the gflags will be set after main(). // ////////////////////////////////////////////////////////////////// // Ignore SIGPIPE. struct sigaction oldact; if (sigaction(SIGPIPE, NULL, &oldact) != 0 || (oldact.sa_handler == NULL && oldact.sa_sigaction == NULL)) { CHECK(NULL == signal(SIGPIPE, SIG_IGN)); } // Make GOOGLE_LOG print to comlog device SetLogHandler(&BaiduStreamingLogHandler); // Setting the variable here does not work, the profiler probably check // the variable before main() for only once. // setenv("TCMALLOC_SAMPLE_PARAMETER", "524288", 0); // Initialize openssl library SSL_library_init(); // RPC doesn't require openssl.cnf, users can load it by themselves if needed SSL_load_error_strings(); if (SSLThreadInit() != 0 || SSLDHInit() != 0) { exit(1); } // Defined in http_rpc_protocol.cpp InitCommonStrings(); // Leave memory of these extensions to process's clean up. g_ext = new(std::nothrow) GlobalExtensions(); if (NULL == g_ext) { exit(1); } // Naming Services #ifdef BAIDU_INTERNAL NamingServiceExtension()->RegisterOrDie("bns", &g_ext->bns); #endif NamingServiceExtension()->RegisterOrDie("file", &g_ext->fns); NamingServiceExtension()->RegisterOrDie("list", &g_ext->lns); NamingServiceExtension()->RegisterOrDie("http", &g_ext->dns); NamingServiceExtension()->RegisterOrDie("remotefile", &g_ext->rfns); NamingServiceExtension()->RegisterOrDie("consul", &g_ext->cns); // Load Balancers LoadBalancerExtension()->RegisterOrDie("rr", &g_ext->rr_lb); LoadBalancerExtension()->RegisterOrDie("wrr", &g_ext->wrr_lb); LoadBalancerExtension()->RegisterOrDie("random", &g_ext->randomized_lb); LoadBalancerExtension()->RegisterOrDie("la", &g_ext->la_lb); LoadBalancerExtension()->RegisterOrDie("c_murmurhash", &g_ext->ch_mh_lb); LoadBalancerExtension()->RegisterOrDie("c_md5", &g_ext->ch_md5_lb); LoadBalancerExtension()->RegisterOrDie("_dynpart", &g_ext->dynpart_lb); // Compress Handlers const CompressHandler gzip_compress = { GzipCompress, GzipDecompress, "gzip" }; if (RegisterCompressHandler(COMPRESS_TYPE_GZIP, gzip_compress) != 0) { exit(1); } const CompressHandler zlib_compress = { ZlibCompress, ZlibDecompress, "zlib" }; if (RegisterCompressHandler(COMPRESS_TYPE_ZLIB, zlib_compress) != 0) { exit(1); } const CompressHandler snappy_compress = { SnappyCompress, SnappyDecompress, "snappy" }; if (RegisterCompressHandler(COMPRESS_TYPE_SNAPPY, snappy_compress) != 0) { exit(1); } // Protocols Protocol baidu_protocol = { ParseRpcMessage, SerializeRequestDefault, PackRpcRequest, ProcessRpcRequest, ProcessRpcResponse, VerifyRpcRequest, NULL, NULL, CONNECTION_TYPE_ALL, "baidu_std" }; if (RegisterProtocol(PROTOCOL_BAIDU_STD, baidu_protocol) != 0) { exit(1); } Protocol streaming_protocol = { ParseStreamingMessage, NULL, NULL, ProcessStreamingMessage, ProcessStreamingMessage, NULL, NULL, NULL, CONNECTION_TYPE_SINGLE, "streaming_rpc" }; if (RegisterProtocol(PROTOCOL_STREAMING_RPC, streaming_protocol) != 0) { exit(1); } Protocol http_protocol = { ParseHttpMessage, SerializeHttpRequest, PackHttpRequest, ProcessHttpRequest, ProcessHttpResponse, VerifyHttpRequest, ParseHttpServerAddress, GetHttpMethodName, CONNECTION_TYPE_POOLED_AND_SHORT, "http" }; if (RegisterProtocol(PROTOCOL_HTTP, http_protocol) != 0) { exit(1); } Protocol hulu_protocol = { ParseHuluMessage, SerializeRequestDefault, PackHuluRequest, ProcessHuluRequest, ProcessHuluResponse, VerifyHuluRequest, NULL, NULL, CONNECTION_TYPE_ALL, "hulu_pbrpc" }; if (RegisterProtocol(PROTOCOL_HULU_PBRPC, hulu_protocol) != 0) { exit(1); } // Only valid at client side Protocol nova_protocol = { ParseNsheadMessage, SerializeNovaRequest, PackNovaRequest, NULL, ProcessNovaResponse, NULL, NULL, NULL, CONNECTION_TYPE_POOLED_AND_SHORT, "nova_pbrpc" }; if (RegisterProtocol(PROTOCOL_NOVA_PBRPC, nova_protocol) != 0) { exit(1); } // Only valid at client side Protocol public_pbrpc_protocol = { ParseNsheadMessage, SerializePublicPbrpcRequest, PackPublicPbrpcRequest, NULL, ProcessPublicPbrpcResponse, NULL, NULL, NULL, // public_pbrpc server implementation // doesn't support full duplex CONNECTION_TYPE_POOLED_AND_SHORT, "public_pbrpc" }; if (RegisterProtocol(PROTOCOL_PUBLIC_PBRPC, public_pbrpc_protocol) != 0) { exit(1); } Protocol sofa_protocol = { ParseSofaMessage, SerializeRequestDefault, PackSofaRequest, ProcessSofaRequest, ProcessSofaResponse, VerifySofaRequest, NULL, NULL, CONNECTION_TYPE_ALL, "sofa_pbrpc" }; if (RegisterProtocol(PROTOCOL_SOFA_PBRPC, sofa_protocol) != 0) { exit(1); } // Only valid at server side. We generalize all the protocols that // prefixes with nshead as `nshead_protocol' and specify the content // parsing after nshead by ServerOptions.nshead_service. Protocol nshead_protocol = { ParseNsheadMessage, SerializeNsheadRequest, PackNsheadRequest, ProcessNsheadRequest, ProcessNsheadResponse, VerifyNsheadRequest, NULL, NULL, CONNECTION_TYPE_POOLED_AND_SHORT, "nshead" }; if (RegisterProtocol(PROTOCOL_NSHEAD, nshead_protocol) != 0) { exit(1); } Protocol mc_binary_protocol = { ParseMemcacheMessage, SerializeMemcacheRequest, PackMemcacheRequest, NULL, ProcessMemcacheResponse, NULL, NULL, GetMemcacheMethodName, CONNECTION_TYPE_ALL, "memcache" }; if (RegisterProtocol(PROTOCOL_MEMCACHE, mc_binary_protocol) != 0) { exit(1); } Protocol redis_protocol = { ParseRedisMessage, SerializeRedisRequest, PackRedisRequest, NULL, ProcessRedisResponse, NULL, NULL, GetRedisMethodName, CONNECTION_TYPE_ALL, "redis" }; if (RegisterProtocol(PROTOCOL_REDIS, redis_protocol) != 0) { exit(1); } Protocol mongo_protocol = { ParseMongoMessage, NULL, NULL, ProcessMongoRequest, NULL, NULL, NULL, NULL, CONNECTION_TYPE_POOLED, "mongo" }; if (RegisterProtocol(PROTOCOL_MONGO, mongo_protocol) != 0) { exit(1); } // Register Thrift framed protocol if linked if (RegisterThriftProtocol) { RegisterThriftProtocol(); } // Only valid at client side Protocol ubrpc_compack_protocol = { ParseNsheadMessage, SerializeUbrpcCompackRequest, PackUbrpcRequest, NULL, ProcessUbrpcResponse, NULL, NULL, NULL, CONNECTION_TYPE_POOLED_AND_SHORT, "ubrpc_compack" }; if (RegisterProtocol(PROTOCOL_UBRPC_COMPACK, ubrpc_compack_protocol) != 0) { exit(1); } Protocol ubrpc_mcpack2_protocol = { ParseNsheadMessage, SerializeUbrpcMcpack2Request, PackUbrpcRequest, NULL, ProcessUbrpcResponse, NULL, NULL, NULL, CONNECTION_TYPE_POOLED_AND_SHORT, "ubrpc_mcpack2" }; if (RegisterProtocol(PROTOCOL_UBRPC_MCPACK2, ubrpc_mcpack2_protocol) != 0) { exit(1); } // Only valid at client side Protocol nshead_mcpack_protocol = { ParseNsheadMessage, SerializeNsheadMcpackRequest, PackNsheadMcpackRequest, NULL, ProcessNsheadMcpackResponse, NULL, NULL, NULL, CONNECTION_TYPE_POOLED_AND_SHORT, "nshead_mcpack" }; if (RegisterProtocol(PROTOCOL_NSHEAD_MCPACK, nshead_mcpack_protocol) != 0) { exit(1); } Protocol rtmp_protocol = { ParseRtmpMessage, SerializeRtmpRequest, PackRtmpRequest, ProcessRtmpMessage, ProcessRtmpMessage, NULL, NULL, NULL, (ConnectionType)(CONNECTION_TYPE_SINGLE|CONNECTION_TYPE_SHORT), "rtmp" }; if (RegisterProtocol(PROTOCOL_RTMP, rtmp_protocol) != 0) { exit(1); } Protocol esp_protocol = { ParseEspMessage, SerializeEspRequest, PackEspRequest, NULL, ProcessEspResponse, NULL, NULL, NULL, CONNECTION_TYPE_POOLED_AND_SHORT, "esp"}; if (RegisterProtocol(PROTOCOL_ESP, esp_protocol) != 0) { exit(1); } std::vector<Protocol> protocols; ListProtocols(&protocols); for (size_t i = 0; i < protocols.size(); ++i) { if (protocols[i].process_response) { InputMessageHandler handler; // `process_response' is required at client side handler.parse = protocols[i].parse; handler.process = protocols[i].process_response; // No need to verify at client side handler.verify = NULL; handler.arg = NULL; handler.name = protocols[i].name; if (get_or_new_client_side_messenger()->AddHandler(handler) != 0) { exit(1); } } } if (FLAGS_usercode_in_pthread) { // Optional. If channel/server are initialized before main(), this // flag may be false at here even if it will be set to true after // main(). In which case, the usercode pool will not be initialized // until the pool is used. InitUserCodeBackupPoolOnceOrDie(); } // We never join GlobalUpdate, let it quit with the process. bthread_t th; CHECK(bthread_start_background(&th, NULL, GlobalUpdate, NULL) == 0) << "Fail to start GlobalUpdate"; } void GlobalInitializeOrDie() { if (pthread_once(®ister_extensions_once, GlobalInitializeOrDieImpl) != 0) { LOG(FATAL) << "Fail to pthread_once"; exit(1); } } } // namespace brpc
// Update global stuff periodically. static void* GlobalUpdate(void*) { // Expose variables. bvar::PassiveStatus<int64_t> var_iobuf_block_count( "iobuf_block_count", GetIOBufBlockCount, NULL); bvar::PassiveStatus<int64_t> var_iobuf_block_count_hit_tls_threshold( "iobuf_block_count_hit_tls_threshold", GetIOBufBlockCountHitTLSThreshold, NULL); bvar::PassiveStatus<int64_t> var_iobuf_new_bigview_count( GetIOBufNewBigViewCount, NULL); bvar::PerSecond<bvar::PassiveStatus<int64_t> > var_iobuf_new_bigview_second( "iobuf_newbigview_second", &var_iobuf_new_bigview_count); bvar::PassiveStatus<int64_t> var_iobuf_block_memory( "iobuf_block_memory", GetIOBufBlockMemory, NULL); bvar::PassiveStatus<int> var_running_server_count( "rpc_server_count", GetRunningServerCount, NULL); butil::FileWatcher fw; if (fw.init_from_not_exist(DUMMY_SERVER_PORT_FILE) < 0) { LOG(FATAL) << "Fail to init FileWatcher on `" << DUMMY_SERVER_PORT_FILE << "'"; return NULL; } std::vector<SocketId> conns; const int64_t start_time_us = butil::gettimeofday_us(); const int WARN_NOSLEEP_THRESHOLD = 2; int64_t last_time_us = start_time_us; int consecutive_nosleep = 0; int64_t last_return_free_memory_time = start_time_us; while (1) { const int64_t sleep_us = 1000000L + last_time_us - butil::gettimeofday_us(); if (sleep_us > 0) { if (bthread_usleep(sleep_us) < 0) { PLOG_IF(FATAL, errno != ESTOP) << "Fail to sleep"; break; } consecutive_nosleep = 0; } else { if (++consecutive_nosleep >= WARN_NOSLEEP_THRESHOLD) { consecutive_nosleep = 0; LOG(WARNING) << __FUNCTION__ << " is too busy!"; } } last_time_us = butil::gettimeofday_us(); TrackMe(); if (!IsDummyServerRunning() && g_running_server_count.load(butil::memory_order_relaxed) == 0 && fw.check_and_consume() > 0) { long port = ReadPortOfDummyServer(DUMMY_SERVER_PORT_FILE); if (port >= 0) { StartDummyServerAt(port); } } SocketMapList(&conns); const int64_t now_ms = butil::cpuwide_time_ms(); for (size_t i = 0; i < conns.size(); ++i) { SocketUniquePtr ptr; if (Socket::Address(conns[i], &ptr) == 0) { ptr->UpdateStatsEverySecond(now_ms); } } const int return_mem_interval = FLAGS_free_memory_to_system_interval/*reloadable*/; if (return_mem_interval > 0 && last_time_us >= last_return_free_memory_time + return_mem_interval * 1000000L) { last_return_free_memory_time = last_time_us; // TODO: Calling MallocExtension::instance()->ReleaseFreeMemory may // crash the program in later calls to malloc, verified on tcmalloc // 1.7 and 2.5, which means making the static member function weak // in details/tcmalloc_extension.cpp is probably not correct, however // it does work for heap profilers. if (MallocExtension_ReleaseFreeMemory != NULL) { MallocExtension_ReleaseFreeMemory(); } else { #if defined(OS_LINUX) // GNU specific. malloc_trim(10 * 1024 * 1024/*leave 10M pad*/); #endif } } } return NULL; }
static int GetRunningServerCount(void*) { return g_running_server_count.load(butil::memory_order_relaxed); }