Exemplo n.º 1
0
 bool operator()(int* stat) const {
     if (s_ever_reached_fd_scan_limit.load(butil::memory_order_relaxed)) {
         // Never update the count again.
         return false;
     }
     const int count = get_fd_count(MAX_FD_SCAN_COUNT);
     if (count < 0) {
         return false;
     }
     if (count == MAX_FD_SCAN_COUNT - 2 
             && s_ever_reached_fd_scan_limit.exchange(
                     true, butil::memory_order_relaxed) == false) {
         // Rename the bvar to notify user.
         g_fd_num.hide();
         g_fd_num.expose("process_fd_num_too_many");
     }
     *stat = count;
     return true;
 }
Exemplo n.º 2
0
void EndRunningUserCodeInPool(void (*fn)(void*), void* arg) {
    InitUserCodeBackupPoolOnceOrDie();
    
    g_usercode_inplace.fetch_sub(1, butil::memory_order_relaxed);

    // Not enough idle workers, run the code in backup threads to prevent
    // all workers from being blocked and no responses will be processed
    // anymore (deadlocked).
    const UserCode usercode = { fn, arg };
    pthread_mutex_lock(&s_usercode_mutex);
    s_usercode_pool->queue.push_back(usercode);
    // If the queue has too many items, we can't drop the user code
    // directly which often must be run, for example: client-side done.
    // The solution is that we set a mark which is not cleared before
    // queue becomes short again. RPC code checks the mark before
    // submitting tasks that may generate more user code.
    if ((int)s_usercode_pool->queue.size() >=
        (FLAGS_usercode_backup_threads *
         FLAGS_max_pending_in_each_backup_thread)) {
        g_too_many_usercode = true;
    }
    pthread_mutex_unlock(&s_usercode_mutex);
    pthread_cond_signal(&s_usercode_cond);
}
Exemplo n.º 3
0
static int GetUserCodeInPlace(void*) {
    return g_usercode_inplace.load(butil::memory_order_relaxed);
}
Exemplo n.º 4
0
namespace brpc {

DEFINE_int32(usercode_backup_threads, 5, "# of backup threads to run user code"
             " when too many pthread worker of bthreads are used");
DEFINE_int32(max_pending_in_each_backup_thread, 10,
             "Max number of un-run user code in each backup thread, requests"
             " still coming in will be failed");

// Store pending user code.
struct UserCode {
    void (*fn)(void*);
    void* arg;
};
struct UserCodeBackupPool {
    // Run user code when parallelism of user code reaches the threshold
    std::deque<UserCode> queue;
    bvar::PassiveStatus<int> inplace_var;
    bvar::PassiveStatus<size_t> queue_size_var;
    bvar::Adder<size_t> inpool_count;
    bvar::PerSecond<bvar::Adder<size_t> > inpool_per_second;
    // NOTE: we don't use Adder<double> directly which does not compile in gcc 3.4
    bvar::Adder<int64_t> inpool_elapse_us;
    bvar::PassiveStatus<double> inpool_elapse_s;
    bvar::PerSecond<bvar::PassiveStatus<double> > pool_usage;

    UserCodeBackupPool();
    int Init();
    void UserCodeRunningLoop();
};

static pthread_mutex_t s_usercode_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t s_usercode_cond = PTHREAD_COND_INITIALIZER;
static pthread_once_t s_usercode_init = PTHREAD_ONCE_INIT;
butil::static_atomic<int> g_usercode_inplace = BUTIL_STATIC_ATOMIC_INIT(0);
bool g_too_many_usercode = false;
static UserCodeBackupPool* s_usercode_pool = NULL;

static int GetUserCodeInPlace(void*) {
    return g_usercode_inplace.load(butil::memory_order_relaxed);
}

static size_t GetUserCodeQueueSize(void*) {
    BAIDU_SCOPED_LOCK(s_usercode_mutex);
    return (s_usercode_pool != NULL ? s_usercode_pool->queue.size() : 0);
}

static double GetInPoolElapseInSecond(void* arg) {
    return static_cast<bvar::Adder<int64_t>*>(arg)->get_value() / 1000000.0;
}

UserCodeBackupPool::UserCodeBackupPool()
    : inplace_var("rpc_usercode_inplace", GetUserCodeInPlace, NULL)
    , queue_size_var("rpc_usercode_queue_size", GetUserCodeQueueSize, NULL)
    , inpool_count("rpc_usercode_backup_count")
    , inpool_per_second("rpc_usercode_backup_second", &inpool_count)
    , inpool_elapse_s(GetInPoolElapseInSecond, &inpool_elapse_us)
    , pool_usage("rpc_usercode_backup_usage", &inpool_elapse_s, 1) {
}

static void* UserCodeRunner(void* args) {
    static_cast<UserCodeBackupPool*>(args)->UserCodeRunningLoop();
    return NULL;
}

int UserCodeBackupPool::Init() {
    // Like bthread workers, these threads never quit (to avoid potential hang
    // during termination of program).
    for (int i = 0; i < FLAGS_usercode_backup_threads; ++i) {
        pthread_t th;
        if (pthread_create(&th, NULL, UserCodeRunner, this) != 0) {
            LOG(ERROR) << "Fail to create UserCodeRunner";
            return -1;
        }
    }
    return 0;
}

// Entry of backup thread for running user code.
void UserCodeBackupPool::UserCodeRunningLoop() {
    bthread::run_worker_startfn();
#ifdef BAIDU_INTERNAL
    logging::ComlogInitializer comlog_initializer;
#endif
    
    int64_t last_time = butil::cpuwide_time_us();
    while (true) {
        bool blocked = false;
        UserCode usercode = { NULL, NULL };
        {
            BAIDU_SCOPED_LOCK(s_usercode_mutex);
            while (queue.empty()) {
                pthread_cond_wait(&s_usercode_cond, &s_usercode_mutex);
                blocked = true;
            }
            usercode = queue.front();
            queue.pop_front();
            if (g_too_many_usercode &&
                (int)queue.size() <= FLAGS_usercode_backup_threads) {
                g_too_many_usercode = false;
            }
        }
        const int64_t begin_time = (blocked ? butil::cpuwide_time_us() : last_time);
        usercode.fn(usercode.arg);
        const int64_t end_time = butil::cpuwide_time_us();
        inpool_count << 1;
        inpool_elapse_us << (end_time - begin_time);
        last_time = end_time;
    }
}

static void InitUserCodeBackupPool() {
    s_usercode_pool = new UserCodeBackupPool;
    if (s_usercode_pool->Init() != 0) {
        LOG(ERROR) << "Fail to init UserCodeBackupPool";
        // rare and critical, often happen when the program just started since
        // this function is called from GlobalInitializeOrDieImpl() as well,
        // quiting is the best choice.
        exit(1);
    }
}

void InitUserCodeBackupPoolOnceOrDie() {
    pthread_once(&s_usercode_init, InitUserCodeBackupPool);
}

void EndRunningUserCodeInPool(void (*fn)(void*), void* arg) {
    InitUserCodeBackupPoolOnceOrDie();
    
    g_usercode_inplace.fetch_sub(1, butil::memory_order_relaxed);

    // Not enough idle workers, run the code in backup threads to prevent
    // all workers from being blocked and no responses will be processed
    // anymore (deadlocked).
    const UserCode usercode = { fn, arg };
    pthread_mutex_lock(&s_usercode_mutex);
    s_usercode_pool->queue.push_back(usercode);
    // If the queue has too many items, we can't drop the user code
    // directly which often must be run, for example: client-side done.
    // The solution is that we set a mark which is not cleared before
    // queue becomes short again. RPC code checks the mark before
    // submitting tasks that may generate more user code.
    if ((int)s_usercode_pool->queue.size() >=
        (FLAGS_usercode_backup_threads *
         FLAGS_max_pending_in_each_backup_thread)) {
        g_too_many_usercode = true;
    }
    pthread_mutex_unlock(&s_usercode_mutex);
    pthread_cond_signal(&s_usercode_cond);
}

} // namespace brpc
Exemplo n.º 5
0
Arquivo: fd.cpp Projeto: alphawzh/brpc
namespace bthread {

extern BAIDU_THREAD_LOCAL TaskGroup* tls_task_group;

template <typename T, size_t NBLOCK, size_t BLOCK_SIZE>
class LazyArray {
    struct Block {
        butil::atomic<T> items[BLOCK_SIZE];
    };

public:
    LazyArray() {
        memset(_blocks, 0, sizeof(butil::atomic<Block*>) * NBLOCK);
    }

    butil::atomic<T>* get_or_new(size_t index) {
        const size_t block_index = index / BLOCK_SIZE;
        if (block_index >= NBLOCK) {
            return NULL;
        }
        const size_t block_offset = index - block_index * BLOCK_SIZE;
        Block* b = _blocks[block_index].load(butil::memory_order_consume);
        if (b != NULL) {
            return b->items + block_offset;
        }
        b = new (std::nothrow) Block;
        if (NULL == b) {
            b = _blocks[block_index].load(butil::memory_order_consume);
            return (b ? b->items + block_offset : NULL);
        }
        // Set items to default value of T.
        std::fill(b->items, b->items + BLOCK_SIZE, T());
        Block* expected = NULL;
        if (_blocks[block_index].compare_exchange_strong(
                expected, b, butil::memory_order_release,
                butil::memory_order_consume)) {
            return b->items + block_offset;
        }
        delete b;
        return expected->items + block_offset;
    }

    butil::atomic<T>* get(size_t index) const {
        const size_t block_index = index / BLOCK_SIZE;
        if (__builtin_expect(block_index < NBLOCK, 1)) {
            const size_t block_offset = index - block_index * BLOCK_SIZE;
            Block* const b = _blocks[block_index].load(butil::memory_order_consume);
            if (__builtin_expect(b != NULL, 1)) {
                return b->items + block_offset;
            }
        }
        return NULL;
    }

private:
    butil::atomic<Block*> _blocks[NBLOCK];
};

typedef butil::atomic<int> EpollButex;

static EpollButex* const CLOSING_GUARD = (EpollButex*)(intptr_t)-1L;

#ifndef NDEBUG
butil::static_atomic<int> break_nums = BASE_STATIC_ATOMIC_INIT(0);
#endif

// Able to address 67108864 file descriptors, should be enough.
LazyArray<EpollButex*, 262144/*NBLOCK*/, 256/*BLOCK_SIZE*/> fd_butexes;

static const int BTHREAD_DEFAULT_EPOLL_SIZE = 65536;

class EpollThread {
public:
    EpollThread()
        : _epfd(-1)
        , _stop(false)
        , _tid(0) {
    }

    int start(int epoll_size) {
        if (started()) {
            return -1;
        }
        _epfd = epoll_create(epoll_size);
        if (_epfd < 0) {
            PLOG(FATAL) << "Fail to epoll_create";
            return -1;
        }
        if (bthread_start_background(
                &_tid, NULL, EpollThread::run_this, this) != 0) {
            close(_epfd);
            _epfd = -1;
            LOG(FATAL) << "Fail to create epoll bthread";
            return -1;
        }
        return 0;
    }

    // Note: This function does not wake up suspended fd_wait. This is fine
    // since stop_and_join is only called on program's termination
    // (g_task_control.stop()), suspended bthreads do not block quit of
    // worker pthreads and completion of g_task_control.stop().
    int stop_and_join() {
        if (!started()) {
            return 0;
        }
        // No matter what this function returns, _epfd will be set to -1
        // (making started() false) to avoid latter stop_and_join() to
        // enter again.
        const int saved_epfd = _epfd;
        _epfd = -1;

        // epoll_wait cannot be woken up by closing _epfd. We wake up
        // epoll_wait by inserting a fd continuously triggering EPOLLOUT.
        // Visibility of _stop: constant EPOLLOUT forces epoll_wait to see
        // _stop (to be true) finally.
        _stop = true;
        int closing_epoll_pipe[2];
        if (pipe(closing_epoll_pipe)) {
            PLOG(FATAL) << "Fail to create closing_epoll_pipe";
            return -1;
        }
        epoll_event evt = { EPOLLOUT, { NULL } };
        if (epoll_ctl(saved_epfd, EPOLL_CTL_ADD,
                      closing_epoll_pipe[1], &evt) < 0) {
            PLOG(FATAL) << "Fail to add closing_epoll_pipe into epfd="
                        << saved_epfd;
            return -1;
        }

        const int rc = bthread_join(_tid, NULL);
        if (rc) {
            LOG(FATAL) << "Fail to join EpollThread, " << berror(rc);
            return -1;
        }
        close(closing_epoll_pipe[0]);
        close(closing_epoll_pipe[1]);
        close(saved_epfd);
        return 0;
    }

    int fd_wait(int fd, unsigned epoll_events, const timespec* abstime) {
        butil::atomic<EpollButex*>* p = fd_butexes.get_or_new(fd);
        if (NULL == p) {
            errno = ENOMEM;
            return -1;
        }

        EpollButex* butex = p->load(butil::memory_order_consume);
        if (NULL == butex) {
            // It is rare to wait on one file descriptor from multiple threads
            // simultaneously. Creating singleton by optimistic locking here
            // saves mutexes for each butex.
            butex = butex_create_checked<EpollButex>();
            butex->store(0, butil::memory_order_relaxed);
            EpollButex* expected = NULL;
            if (!p->compare_exchange_strong(expected, butex,
                                            butil::memory_order_release,
                                            butil::memory_order_consume)) {
                butex_destroy(butex);
                butex = expected;
            }
        }
        
        while (butex == CLOSING_GUARD) {  // bthread_close() is running.
            if (sched_yield() < 0) {
                return -1;
            }
            butex = p->load(butil::memory_order_consume);
        }
        // Save value of butex before adding to epoll because the butex may
        // be changed before butex_wait. No memory fence because EPOLL_CTL_MOD
        // and EPOLL_CTL_ADD shall have release fence.
        const int expected_val = butex->load(butil::memory_order_relaxed);

#ifdef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG
        epoll_event evt = { epoll_events | EPOLLONESHOT, { butex } };
        if (epoll_ctl(_epfd, EPOLL_CTL_MOD, fd, &evt) < 0) {
            if (epoll_ctl(_epfd, EPOLL_CTL_ADD, fd, &evt) < 0 &&
                    errno != EEXIST) {
                PLOG(FATAL) << "Fail to add fd=" << fd << " into epfd=" << _epfd;
                return -1;
            }
        }
#else
        epoll_event evt;
        evt.events = epoll_events;
        evt.data.fd = fd;
        if (epoll_ctl(_epfd, EPOLL_CTL_ADD, fd, &evt) < 0 &&
            errno != EEXIST) {
            PLOG(FATAL) << "Fail to add fd=" << fd << " into epfd=" << _epfd;
            return -1;
        }
#endif        
        const int rc = butex_wait(butex, expected_val, abstime);
        if (rc < 0 && errno == EWOULDBLOCK) {
            // EpollThread did wake up, there's data.
            return 0;
        }
        return rc;
    }

    int fd_close(int fd) {
        if (fd < 0) {
            // what close(-1) returns
            errno = EBADF;
            return -1;
        }
        butil::atomic<EpollButex*>* pbutex = bthread::fd_butexes.get(fd);
        if (NULL == pbutex) {
            // Did not call bthread_fd functions, close directly.
            return close(fd);
        }
        EpollButex* butex = pbutex->exchange(
            CLOSING_GUARD, butil::memory_order_relaxed);
        if (butex == CLOSING_GUARD) {
            // concurrent double close detected.
            errno = EBADF;
            return -1;
        }
        if (butex != NULL) {
            butex->fetch_add(1, butil::memory_order_relaxed);
            butex_wake_all(butex);
        }
        epoll_ctl(_epfd, EPOLL_CTL_DEL, fd, NULL);
        const int rc = close(fd);
        pbutex->exchange(butex, butil::memory_order_relaxed);
        return rc;
    }

    bool started() const {
        return _epfd >= 0;
    }

private:
    static void* run_this(void* arg) {
        return static_cast<EpollThread*>(arg)->run();
    }

    void* run() {
        const int initial_epfd = _epfd;
        const size_t MAX_EVENTS = 32;
        epoll_event* e = new (std::nothrow) epoll_event[MAX_EVENTS];
        if (NULL == e) {
            LOG(FATAL) << "Fail to new epoll_event";
            return NULL;
        }

#ifndef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG
        DLOG(INFO) << "Use DEL+ADD instead of EPOLLONESHOT+MOD due to kernel bug. Performance will be much lower.";
#endif
        while (!_stop) {
            const int epfd = _epfd;
            const int n = epoll_wait(epfd, e, MAX_EVENTS, -1);
            if (_stop) {
                break;
            }

            if (n < 0) {
                if (errno == EINTR) {
#ifndef NDEBUG
                    break_nums.fetch_add(1, butil::memory_order_relaxed);
                    int* p = &errno;
                    const char* b = berror();
                    const char* b2 = berror(errno);
                    DLOG(FATAL) << "Fail to epoll epfd=" << epfd << ", "
                                << errno << " " << p << " " <<  b << " " <<  b2;
#endif
                    continue;
                }

                PLOG(INFO) << "Fail to epoll epfd=" << epfd;
                break;
            }

#ifndef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG
            for (int i = 0; i < n; ++i) {
                epoll_ctl(epfd, EPOLL_CTL_DEL, e[i].data.fd, NULL);
            }
#endif
            for (int i = 0; i < n; ++i) {
#ifdef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG
                EpollButex* butex = static_cast<EpollButex*>(e[i].data.ptr);
#else
                butil::atomic<EpollButex*>* pbutex = fd_butexes.get(e[i].data.fd);
                EpollButex* butex = pbutex ?
                    pbutex->load(butil::memory_order_consume) : NULL;
#endif
                if (butex != NULL && butex != CLOSING_GUARD) {
                    butex->fetch_add(1, butil::memory_order_relaxed);
                    butex_wake_all(butex);
                }
            }
        }

        delete [] e;
        DLOG(INFO) << "EpollThread=" << _tid << "(epfd="
                   << initial_epfd << ") is about to stop";
        return NULL;
    }

    int _epfd;
    bool _stop;
    bthread_t _tid;
};

EpollThread epoll_thread[BTHREAD_EPOLL_THREAD_NUM];

static inline EpollThread& get_epoll_thread(int fd) {
    if (BTHREAD_EPOLL_THREAD_NUM == 1UL) {
        EpollThread& et = epoll_thread[0];
        et.start(BTHREAD_DEFAULT_EPOLL_SIZE);
        return et;
    }

    EpollThread& et = epoll_thread[butil::fmix32(fd) % BTHREAD_EPOLL_THREAD_NUM];
    et.start(BTHREAD_DEFAULT_EPOLL_SIZE);
    return et;
}

int stop_and_join_epoll_threads() {
    // Returns -1 if any epoll thread failed to stop.
    int rc = 0;
    for (size_t i = 0; i < BTHREAD_EPOLL_THREAD_NUM; ++i) {
        if (epoll_thread[i].stop_and_join() < 0) {
            rc = -1;
        }
    }
    return rc;
}

short epoll_to_poll_events(uint32_t epoll_events) {
    // Most POLL* and EPOLL* are same values.
    short poll_events = (epoll_events &
                         (EPOLLIN | EPOLLPRI | EPOLLOUT |
                          EPOLLRDNORM | EPOLLRDBAND |
                          EPOLLWRNORM | EPOLLWRBAND |
                          EPOLLMSG | EPOLLERR | EPOLLHUP));
    CHECK_EQ((uint32_t)poll_events, epoll_events);
    return poll_events;
}

// For pthreads.
int pthread_fd_wait(int fd, unsigned epoll_events,
                    const timespec* abstime) {
    int diff_ms = -1;
    if (abstime) {
        timespec now;
        clock_gettime(CLOCK_REALTIME, &now);
        int64_t now_us = butil::timespec_to_microseconds(now);
        int64_t abstime_us = butil::timespec_to_microseconds(*abstime);
        if (abstime_us <= now_us) {
            errno = ETIMEDOUT;
            return -1;
        }
        diff_ms = (abstime_us - now_us + 999L) / 1000L;
    }
    const short poll_events =
        bthread::epoll_to_poll_events(epoll_events);
    if (poll_events == 0) {
        errno = EINVAL;
        return -1;
    }
    pollfd ufds = { fd, poll_events, 0 };
    const int rc = poll(&ufds, 1, diff_ms);
    if (rc < 0) {
        return -1;
    }
    if (rc == 0) {
        errno = ETIMEDOUT;
        return -1;
    }
    if (ufds.revents & POLLNVAL) {
        errno = EBADF;
        return -1;
    }
    return 0;
}

}  // namespace bthread
Exemplo n.º 6
0
Arquivo: fd.cpp Projeto: alphawzh/brpc
    void* run() {
        const int initial_epfd = _epfd;
        const size_t MAX_EVENTS = 32;
        epoll_event* e = new (std::nothrow) epoll_event[MAX_EVENTS];
        if (NULL == e) {
            LOG(FATAL) << "Fail to new epoll_event";
            return NULL;
        }

#ifndef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG
        DLOG(INFO) << "Use DEL+ADD instead of EPOLLONESHOT+MOD due to kernel bug. Performance will be much lower.";
#endif
        while (!_stop) {
            const int epfd = _epfd;
            const int n = epoll_wait(epfd, e, MAX_EVENTS, -1);
            if (_stop) {
                break;
            }

            if (n < 0) {
                if (errno == EINTR) {
#ifndef NDEBUG
                    break_nums.fetch_add(1, butil::memory_order_relaxed);
                    int* p = &errno;
                    const char* b = berror();
                    const char* b2 = berror(errno);
                    DLOG(FATAL) << "Fail to epoll epfd=" << epfd << ", "
                                << errno << " " << p << " " <<  b << " " <<  b2;
#endif
                    continue;
                }

                PLOG(INFO) << "Fail to epoll epfd=" << epfd;
                break;
            }

#ifndef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG
            for (int i = 0; i < n; ++i) {
                epoll_ctl(epfd, EPOLL_CTL_DEL, e[i].data.fd, NULL);
            }
#endif
            for (int i = 0; i < n; ++i) {
#ifdef BAIDU_KERNEL_FIXED_EPOLLONESHOT_BUG
                EpollButex* butex = static_cast<EpollButex*>(e[i].data.ptr);
#else
                butil::atomic<EpollButex*>* pbutex = fd_butexes.get(e[i].data.fd);
                EpollButex* butex = pbutex ?
                    pbutex->load(butil::memory_order_consume) : NULL;
#endif
                if (butex != NULL && butex != CLOSING_GUARD) {
                    butex->fetch_add(1, butil::memory_order_relaxed);
                    butex_wake_all(butex);
                }
            }
        }

        delete [] e;
        DLOG(INFO) << "EpollThread=" << _tid << "(epfd="
                   << initial_epfd << ") is about to stop";
        return NULL;
    }
Exemplo n.º 7
0
namespace bvar {

template <class T, class M> M get_member_type(M T::*);

#define BVAR_MEMBER_TYPE(member) BAIDU_TYPEOF(bvar::get_member_type(member))

int do_link_default_variables = 0;
const int64_t CACHED_INTERVAL_US = 100000L; // 100ms

// ======================================
struct ProcStat {
    int pid;
    //std::string comm;
    char state;
    int ppid;
    int pgrp;
    int session;
    int tty_nr;
    int tpgid;
    uint32_t flags;
    uint64_t minflt;
    uint64_t cminflt;
    uint64_t majflt;
    uint64_t cmajflt;
    uint64_t utime;
    uint64_t stime;
    uint64_t cutime;
    uint64_t cstime;
    int64_t priority;
    int64_t nice;
    int64_t num_threads;
};

// Read status from /proc/self/stat. Information from `man proc' is out of date,
// see http://man7.org/linux/man-pages/man5/proc.5.html
static bool read_proc_status(ProcStat &stat) {
    butil::ScopedFILE fp("/proc/self/stat", "r");
    if (NULL == fp) {
        PLOG_ONCE(WARNING) << "Fail to open /proc/self/stat";
        return false;
    }
    stat = ProcStat();
    errno = 0;
    if (fscanf(fp, "%d %*s %c "
               "%d %d %d %d %d "
               "%u %lu %lu %lu "
               "%lu %lu %lu %lu %lu "
               "%ld %ld %ld",
               &stat.pid, &stat.state,
               &stat.ppid, &stat.pgrp, &stat.session, &stat.tty_nr, &stat.tpgid,
               &stat.flags, &stat.minflt, &stat.cminflt, &stat.majflt,
               &stat.cmajflt, &stat.utime, &stat.stime, &stat.cutime, &stat.cstime,
               &stat.priority, &stat.nice, &stat.num_threads) != 19) {
        PLOG(WARNING) << "Fail to fscanf";
        return false;
    }
    return true;
}

// Reduce pressures to functions to get system metrics.
template <typename T>
class CachedReader {
public:
    CachedReader() : _mtime_us(0) {
        CHECK_EQ(0, pthread_mutex_init(&_mutex, NULL));
    }
    ~CachedReader() {
        pthread_mutex_destroy(&_mutex);
    }

    // NOTE: may return a volatile value that may be overwritten at any time.
    // This is acceptable right now. Both 32-bit and 64-bit numbers are atomic
    // to fetch in 64-bit machines(most of baidu machines) and the code inside
    // this .cpp utilizing this class generally return a struct with 32-bit
    // and 64-bit numbers.
    template <typename ReadFn>
    static const T& get_value(const ReadFn& fn) {
        CachedReader* p = butil::get_leaky_singleton<CachedReader>();
        const int64_t now = butil::gettimeofday_us();
        if (now > p->_mtime_us + CACHED_INTERVAL_US) {
            pthread_mutex_lock(&p->_mutex);
            if (now > p->_mtime_us + CACHED_INTERVAL_US) {
                p->_mtime_us = now;
                pthread_mutex_unlock(&p->_mutex);
                // don't run fn inside lock otherwise a slow fn may
                // block all concurrent bvar dumppers. (e.g. /vars)
                T result;
                if (fn(&result)) {
                    pthread_mutex_lock(&p->_mutex);
                    p->_cached = result;
                } else {
                    pthread_mutex_lock(&p->_mutex);
                }
            }
            pthread_mutex_unlock(&p->_mutex);
        }
        return p->_cached;
    }

private:
    int64_t _mtime_us;
    pthread_mutex_t _mutex;
    T _cached;
};

class ProcStatReader {
public:
    bool operator()(ProcStat* stat) const {
        return read_proc_status(*stat);
    }
    template <typename T, size_t offset>
    static T get_field(void*) {
        return *(T*)((char*)&CachedReader<ProcStat>::get_value(
                         ProcStatReader()) + offset);
    }
};

#define BVAR_DEFINE_PROC_STAT_FIELD(field)                              \
    PassiveStatus<BVAR_MEMBER_TYPE(&ProcStat::field)> g_##field(        \
        ProcStatReader::get_field<BVAR_MEMBER_TYPE(&ProcStat::field),   \
        offsetof(ProcStat, field)>, NULL);

#define BVAR_DEFINE_PROC_STAT_FIELD2(field, name)                       \
    PassiveStatus<BVAR_MEMBER_TYPE(&ProcStat::field)> g_##field(        \
        name,                                                           \
        ProcStatReader::get_field<BVAR_MEMBER_TYPE(&ProcStat::field),   \
        offsetof(ProcStat, field)>, NULL);

// ==================================================

struct ProcMemory {
    int64_t size;      // total program size
    int64_t resident;  // resident set size
    int64_t share;     // shared pages
    int64_t trs;       // text (code)
    int64_t drs;       // data/stack
    int64_t lrs;       // library
    int64_t dt;        // dirty pages
};

static bool read_proc_memory(ProcMemory &m) {
    butil::ScopedFILE fp("/proc/self/statm", "r");
    if (NULL == fp) {
        PLOG_ONCE(WARNING) << "Fail to open /proc/self/statm";
        return false;
    }
    m = ProcMemory();
    errno = 0;
    if (fscanf(fp, "%ld %ld %ld %ld %ld %ld %ld",
               &m.size, &m.resident, &m.share,
               &m.trs, &m.drs, &m.lrs, &m.dt) != 7) {
        PLOG(WARNING) << "Fail to fscanf";
        return false;
    }
    return true;
}

class ProcMemoryReader {
public:
    bool operator()(ProcMemory* stat) const {
        return read_proc_memory(*stat);
    };
    template <typename T, size_t offset>
    static T get_field(void*) {
        static int64_t pagesize = getpagesize();
        return *(T*)((char*)&CachedReader<ProcMemory>::get_value(
                         ProcMemoryReader()) + offset) * pagesize;
    }
};

#define BVAR_DEFINE_PROC_MEMORY_FIELD(field, name)                      \
    PassiveStatus<BVAR_MEMBER_TYPE(&ProcMemory::field)> g_##field(      \
        name,                                                           \
        ProcMemoryReader::get_field<BVAR_MEMBER_TYPE(&ProcMemory::field), \
        offsetof(ProcMemory, field)>, NULL);

// ==================================================

struct LoadAverage {
    double loadavg_1m;
    double loadavg_5m;
    double loadavg_15m;
};

static bool read_load_average(LoadAverage &m) {
    butil::ScopedFILE fp("/proc/loadavg", "r");
    if (NULL == fp) {
        PLOG_ONCE(WARNING) << "Fail to open /proc/loadavg";
        return false;
    }
    m = LoadAverage();
    errno = 0;
    if (fscanf(fp, "%lf %lf %lf",
               &m.loadavg_1m, &m.loadavg_5m, &m.loadavg_15m) != 3) {
        PLOG(WARNING) << "Fail to fscanf";
        return false;
    }
    return true;
}

class LoadAverageReader {
public:
    bool operator()(LoadAverage* stat) const {
        return read_load_average(*stat);
    };
    template <typename T, size_t offset>
    static T get_field(void*) {
        return *(T*)((char*)&CachedReader<LoadAverage>::get_value(
                         LoadAverageReader()) + offset);
    }
};

#define BVAR_DEFINE_LOAD_AVERAGE_FIELD(field, name)                     \
    PassiveStatus<BVAR_MEMBER_TYPE(&LoadAverage::field)> g_##field(     \
        name,                                                           \
        LoadAverageReader::get_field<BVAR_MEMBER_TYPE(&LoadAverage::field), \
        offsetof(LoadAverage, field)>, NULL);

// ==================================================

static int get_fd_count(int limit) {
    butil::DirReaderPosix dr("/proc/self/fd");
    int count = 0;
    if (!dr.IsValid()) {
        PLOG(WARNING) << "Fail to open /proc/self/fd";
        return -1;
    }
    // Have to limit the scaning which consumes a lot of CPU when #fd
    // are huge (100k+)
    for (; dr.Next() && count <= limit + 3; ++count) {}
    return count - 3 /* skipped ., .. and the fd in dr*/;
}

extern PassiveStatus<int> g_fd_num;

const int MAX_FD_SCAN_COUNT = 10003;
static butil::static_atomic<bool> s_ever_reached_fd_scan_limit = BUTIL_STATIC_ATOMIC_INIT(false);
class FdReader {
public:
    bool operator()(int* stat) const {
        if (s_ever_reached_fd_scan_limit.load(butil::memory_order_relaxed)) {
            // Never update the count again.
            return false;
        }
        const int count = get_fd_count(MAX_FD_SCAN_COUNT);
        if (count < 0) {
            return false;
        }
        if (count == MAX_FD_SCAN_COUNT - 2 
                && s_ever_reached_fd_scan_limit.exchange(
                        true, butil::memory_order_relaxed) == false) {
            // Rename the bvar to notify user.
            g_fd_num.hide();
            g_fd_num.expose("process_fd_num_too_many");
        }
        *stat = count;
        return true;
    }
};

static int print_fd_count(void*) {
    return CachedReader<int>::get_value(FdReader());
}

// ==================================================
struct ProcIO {
    // number of bytes the process read, using any read-like system call (from
    // files, pipes, tty...).
    size_t rchar;

    // number of bytes the process wrote using any write-like system call.
    size_t wchar;

    // number of read-like system call invocations that the process performed.
    size_t syscr;

    // number of write-like system call invocations that the process performed.
    size_t syscw;

    // number of bytes the process directly read from disk.
    size_t read_bytes;

    // number of bytes the process originally dirtied in the page-cache
    // (assuming they will go to disk later).
    size_t write_bytes;

    // number of bytes the process "un-dirtied" - e.g. using an "ftruncate"
    // call that truncated pages from the page-cache.
    size_t cancelled_write_bytes;
};

static bool read_proc_io(ProcIO* s) {
    butil::ScopedFILE fp("/proc/self/io", "r");
    if (NULL == fp) {
        PLOG_ONCE(WARNING) << "Fail to open /proc/self/io";
        return false;
    }
    errno = 0;
    if (fscanf(fp, "%*s %lu %*s %lu %*s %lu %*s %lu %*s %lu %*s %lu %*s %lu",
               &s->rchar, &s->wchar, &s->syscr, &s->syscw,
               &s->read_bytes, &s->write_bytes, &s->cancelled_write_bytes)
        != 7) {
        PLOG(WARNING) << "Fail to fscanf";
        return false;
    }
    return true;
}

class ProcIOReader {
public:
    bool operator()(ProcIO* stat) const {
        return read_proc_io(stat);
    }
    template <typename T, size_t offset>
    static T get_field(void*) {
        return *(T*)((char*)&CachedReader<ProcIO>::get_value(
                         ProcIOReader()) + offset);
    }
};

#define BVAR_DEFINE_PROC_IO_FIELD(field)                                \
    PassiveStatus<BVAR_MEMBER_TYPE(&ProcIO::field)> g_##field(          \
        ProcIOReader::get_field<BVAR_MEMBER_TYPE(&ProcIO::field),       \
        offsetof(ProcIO, field)>, NULL);

// ==================================================
// Refs:
//   https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats
//   https://www.kernel.org/doc/Documentation/iostats.txt
// 
// The /proc/diskstats file displays the I/O statistics of block devices.
// Each line contains the following 14 fields:
struct DiskStat {
    long long major_number;
    long long minor_mumber;
    char device_name[64];

    // The total number of reads completed successfully.
    long long reads_completed; // wMB/s wKB/s
    
    // Reads and writes which are adjacent to each other may be merged for
    // efficiency.  Thus two 4K reads may become one 8K read before it is
    // ultimately handed to the disk, and so it will be counted (and queued)
    // as only one I/O.  This field lets you know how often this was done.
    long long reads_merged;     // rrqm/s
    
    // The total number of sectors read successfully.
    long long sectors_read;     // rsec/s

    // The total number of milliseconds spent by all reads (as
    // measured from __make_request() to end_that_request_last()).
    long long time_spent_reading_ms;

    // The total number of writes completed successfully.
    long long writes_completed; // rKB/s rMB/s

    // See description of reads_merged
    long long writes_merged;    // wrqm/s

    // The total number of sectors written successfully.
    long long sectors_written;  // wsec/s

    // The total number of milliseconds spent by all writes (as
    // measured from __make_request() to end_that_request_last()).
    long long time_spent_writing_ms;

    // The only field that should go to zero. Incremented as requests are
    // given to appropriate struct request_queue and decremented as they finish.
    long long io_in_progress;

    // This field increases so long as `io_in_progress' is nonzero.
    long long time_spent_io_ms;

    // This field is incremented at each I/O start, I/O completion, I/O
    // merge, or read of these stats by the number of I/Os in progress
    // `io_in_progress' times the number of milliseconds spent doing
    // I/O since the last update of this field.  This can provide an easy
    // measure of both I/O completion time and the backlog that may be
    // accumulating.
    long long weighted_time_spent_io_ms;
};

static bool read_disk_stat(DiskStat* s) {
    butil::ScopedFILE fp("/proc/diskstats", "r");
    if (NULL == fp) {
        PLOG_ONCE(WARNING) << "Fail to open /proc/diskstats";
        return false;
    }
    errno = 0;
    if (fscanf(fp, "%lld %lld %s %lld %lld %lld %lld %lld %lld %lld "
               "%lld %lld %lld %lld",
               &s->major_number,
               &s->minor_mumber,
               s->device_name,
               &s->reads_completed,
               &s->reads_merged,
               &s->sectors_read,
               &s->time_spent_reading_ms,
               &s->writes_completed,
               &s->writes_merged,
               &s->sectors_written,
               &s->time_spent_writing_ms,
               &s->io_in_progress,
               &s->time_spent_io_ms,
               &s->weighted_time_spent_io_ms) != 14) {
        PLOG(WARNING) << "Fail to fscanf";
        return false;
    }
    return true;
}

class DiskStatReader {
public:
    bool operator()(DiskStat* stat) const {
        return read_disk_stat(stat);
    }
    template <typename T, size_t offset>
    static T get_field(void*) {
        return *(T*)((char*)&CachedReader<DiskStat>::get_value(
                         DiskStatReader()) + offset);
    }
};

#define BVAR_DEFINE_DISK_STAT_FIELD(field)                              \
    PassiveStatus<BVAR_MEMBER_TYPE(&DiskStat::field)> g_##field(        \
        DiskStatReader::get_field<BVAR_MEMBER_TYPE(&DiskStat::field),   \
        offsetof(DiskStat, field)>, NULL);

// =====================================

static std::string read_first_line(const char* filepath) {
    char * line = NULL;
    size_t len = 0;
    butil::ScopedFILE fp(filepath, "r");
    if (fp == NULL) {
        return "";
    }
    std::string result;
    ssize_t nr = getline(&line, &len, fp);
    if (nr != -1) {
        for (ssize_t i = 0; i < nr; ++i) {
            if (line[i] == '\0') {
                line[i] = ' ';
            }
        }
        for (; nr >= 1 && isspace(line[nr - 1]); --nr) {}  // trim.
        result.assign(line, nr);
    }
    free(line);
    return result;
}

struct ReadProcSelfCmdline {
    std::string content;
    ReadProcSelfCmdline() : content(read_first_line("/proc/self/cmdline")) {}
};
static void get_cmdline(std::ostream& os, void*) {
    os << butil::get_leaky_singleton<ReadProcSelfCmdline>()->content;
}

struct ReadProcVersion {
    std::string content;
    ReadProcVersion() : content(read_first_line("/proc/version")) {}
};
static void get_kernel_version(std::ostream& os, void*) {
    os << butil::get_leaky_singleton<ReadProcVersion>()->content;
}

// ======================================

static int64_t g_starting_time = butil::gettimeofday_us();

static timeval get_uptime(void*) {
    int64_t uptime_us = butil::gettimeofday_us() - g_starting_time;
    timeval tm;
    tm.tv_sec = uptime_us / 1000000L;
    tm.tv_usec = uptime_us - tm.tv_sec * 1000000L;
    return tm;
}

// ======================================

class RUsageReader {
public:
    bool operator()(rusage* stat) const {
        const int rc = getrusage(RUSAGE_SELF, stat);
        if (rc < 0) {
            PLOG(WARNING) << "Fail to getrusage";
            return false;
        }
        return true;
    }
    template <typename T, size_t offset>
    static T get_field(void*) {
        return *(T*)((char*)&CachedReader<rusage>::get_value(
                         RUsageReader()) + offset);
    }
};

#define BVAR_DEFINE_RUSAGE_FIELD(field)                                 \
    PassiveStatus<BVAR_MEMBER_TYPE(&rusage::field)> g_##field(          \
        RUsageReader::get_field<BVAR_MEMBER_TYPE(&rusage::field),       \
        offsetof(rusage, field)>, NULL);                                \
    
#define BVAR_DEFINE_RUSAGE_FIELD2(field, name)                          \
    PassiveStatus<BVAR_MEMBER_TYPE(&rusage::field)> g_##field(          \
        name,                                                           \
        RUsageReader::get_field<BVAR_MEMBER_TYPE(&rusage::field),       \
        offsetof(rusage, field)>, NULL);                                \

// ======================================

BVAR_DEFINE_PROC_STAT_FIELD2(pid, "pid");
BVAR_DEFINE_PROC_STAT_FIELD2(ppid, "ppid");
BVAR_DEFINE_PROC_STAT_FIELD2(pgrp, "pgrp");

static void get_username(std::ostream& os, void*) {
    char buf[32];
    if (getlogin_r(buf, sizeof(buf)) == 0) {
        buf[sizeof(buf)-1] = '\0';
        os << buf;
    } else {
        os << "unknown (" << berror() << ')' ;
    }
}

PassiveStatus<std::string> g_username(
    "process_username", get_username, NULL);

BVAR_DEFINE_PROC_STAT_FIELD(minflt);
PerSecond<PassiveStatus<uint64_t> > g_minflt_second(
    "process_faults_minor_second", &g_minflt);
BVAR_DEFINE_PROC_STAT_FIELD2(majflt, "process_faults_major");

BVAR_DEFINE_PROC_STAT_FIELD2(priority, "process_priority");
BVAR_DEFINE_PROC_STAT_FIELD2(nice, "process_nice");

BVAR_DEFINE_PROC_STAT_FIELD2(num_threads, "process_thread_count");
PassiveStatus<int> g_fd_num("process_fd_count", print_fd_count, NULL);

BVAR_DEFINE_PROC_MEMORY_FIELD(size, "process_memory_virtual");
BVAR_DEFINE_PROC_MEMORY_FIELD(resident, "process_memory_resident");
BVAR_DEFINE_PROC_MEMORY_FIELD(share, "process_memory_shared");
BVAR_DEFINE_PROC_MEMORY_FIELD(trs, "process_memory_text");
BVAR_DEFINE_PROC_MEMORY_FIELD(drs, "process_memory_data_and_stack");
BVAR_DEFINE_PROC_MEMORY_FIELD(lrs, "process_memory_library");
BVAR_DEFINE_PROC_MEMORY_FIELD(dt, "process_memory_dirty");

BVAR_DEFINE_LOAD_AVERAGE_FIELD(loadavg_1m, "system_loadavg_1m");
BVAR_DEFINE_LOAD_AVERAGE_FIELD(loadavg_5m, "system_loadavg_5m");
BVAR_DEFINE_LOAD_AVERAGE_FIELD(loadavg_15m, "system_loadavg_15m");

BVAR_DEFINE_PROC_IO_FIELD(rchar);
BVAR_DEFINE_PROC_IO_FIELD(wchar);
PerSecond<PassiveStatus<size_t> > g_io_read_second(
    "process_io_read_bytes_second", &g_rchar);
PerSecond<PassiveStatus<size_t> > g_io_write_second(
    "process_io_write_bytes_second", &g_wchar);

BVAR_DEFINE_PROC_IO_FIELD(syscr);
BVAR_DEFINE_PROC_IO_FIELD(syscw);
PerSecond<PassiveStatus<size_t> > g_io_num_reads_second(
    "process_io_read_second", &g_syscr);
PerSecond<PassiveStatus<size_t> > g_io_num_writes_second(
    "process_io_write_second", &g_syscw);

BVAR_DEFINE_PROC_IO_FIELD(read_bytes);
BVAR_DEFINE_PROC_IO_FIELD(write_bytes);
PerSecond<PassiveStatus<size_t> > g_disk_read_second(
    "process_disk_read_bytes_second", &g_read_bytes);
PerSecond<PassiveStatus<size_t> > g_disk_write_second(
    "process_disk_write_bytes_second", &g_write_bytes);

BVAR_DEFINE_RUSAGE_FIELD(ru_utime);
BVAR_DEFINE_RUSAGE_FIELD(ru_stime);
PassiveStatus<timeval> g_uptime("process_uptime", get_uptime, NULL);

static int get_core_num(void*) {
    return sysconf(_SC_NPROCESSORS_ONLN);
}
PassiveStatus<int> g_core_num("system_core_count", get_core_num, NULL);

struct TimePercent {
    int64_t time_us;
    int64_t real_time_us;

    void operator-=(const TimePercent& rhs) {
        time_us -= rhs.time_us;
        real_time_us -= rhs.real_time_us;
    }
    void operator+=(const TimePercent& rhs) {
        time_us += rhs.time_us;
        real_time_us += rhs.real_time_us;
    }
};
inline std::ostream& operator<<(std::ostream& os, const TimePercent& tp) {
    if (tp.real_time_us <= 0) {
        return os << "0";
    } else {
        return os << std::fixed << std::setprecision(3)
                  << (double)tp.time_us / tp.real_time_us;
    }
}

static TimePercent get_cputime_percent(void*) {
    TimePercent tp = { butil::timeval_to_microseconds(g_ru_stime.get_value()) +
                       butil::timeval_to_microseconds(g_ru_utime.get_value()),
                       butil::timeval_to_microseconds(g_uptime.get_value()) };
    return tp;
}
PassiveStatus<TimePercent> g_cputime_percent(get_cputime_percent, NULL);
Window<PassiveStatus<TimePercent>, SERIES_IN_SECOND> g_cputime_percent_second(
    "process_cpu_usage", &g_cputime_percent, FLAGS_bvar_dump_interval);

static TimePercent get_stime_percent(void*) {
    TimePercent tp = { butil::timeval_to_microseconds(g_ru_stime.get_value()),
                       butil::timeval_to_microseconds(g_uptime.get_value()) };
    return tp;
}
PassiveStatus<TimePercent> g_stime_percent(get_stime_percent, NULL);
Window<PassiveStatus<TimePercent>, SERIES_IN_SECOND> g_stime_percent_second(
    "process_cpu_usage_system", &g_stime_percent, FLAGS_bvar_dump_interval);

static TimePercent get_utime_percent(void*) {
    TimePercent tp = { butil::timeval_to_microseconds(g_ru_utime.get_value()),
                       butil::timeval_to_microseconds(g_uptime.get_value()) };
    return tp;
}
PassiveStatus<TimePercent> g_utime_percent(get_utime_percent, NULL);
Window<PassiveStatus<TimePercent>, SERIES_IN_SECOND> g_utime_percent_second(
    "process_cpu_usage_user", &g_utime_percent, FLAGS_bvar_dump_interval);

// According to http://man7.org/linux/man-pages/man2/getrusage.2.html
// Unsupported fields in linux:
//   ru_ixrss
//   ru_idrss
//   ru_isrss 
//   ru_nswap 
//   ru_nsignals 
BVAR_DEFINE_RUSAGE_FIELD(ru_inblock);
BVAR_DEFINE_RUSAGE_FIELD(ru_oublock);
BVAR_DEFINE_RUSAGE_FIELD(ru_nvcsw);
BVAR_DEFINE_RUSAGE_FIELD(ru_nivcsw);
PerSecond<PassiveStatus<long> > g_ru_inblock_second(
    "process_inblocks_second", &g_ru_inblock);
PerSecond<PassiveStatus<long> > g_ru_oublock_second(
    "process_outblocks_second", &g_ru_oublock);
PerSecond<PassiveStatus<long> > cs_vol_second(
    "process_context_switches_voluntary_second", &g_ru_nvcsw);
PerSecond<PassiveStatus<long> > cs_invol_second(
    "process_context_switches_involuntary_second", &g_ru_nivcsw);

PassiveStatus<std::string> g_cmdline("process_cmdline", get_cmdline, NULL);
PassiveStatus<std::string> g_kernel_version(
    "kernel_version", get_kernel_version, NULL);

static std::string* s_gcc_version = NULL;
pthread_once_t g_gen_gcc_version_once = PTHREAD_ONCE_INIT;

void gen_gcc_version() {

#if defined(__GNUC__)
    const int gcc_major = __GNUC__;
#else 
    const int gcc_major = -1;
#endif

#if defined(__GNUC_MINOR__)
    const int gcc_minor = __GNUC_MINOR__;
#else
    const int gcc_minor = -1;
#endif

#if defined(__GNUC_PATCHLEVEL__)
    const int gcc_patchlevel = __GNUC_PATCHLEVEL__;
#else
    const int gcc_patchlevel = -1;
#endif

    s_gcc_version = new std::string;

    if (gcc_major == -1) {
        *s_gcc_version = "unknown";
        return;
    }
    std::ostringstream oss;
    oss << gcc_major;
    if (gcc_minor == -1) {
        return;
    }
    oss << '.' << gcc_minor;

    if (gcc_patchlevel == -1) {
        return;
    }
    oss << '.' << gcc_patchlevel;

    *s_gcc_version = oss.str();

}

void get_gcc_version(std::ostream& os, void*) {
    pthread_once(&g_gen_gcc_version_once, gen_gcc_version);
    os << *s_gcc_version;
}

// =============================================
PassiveStatus<std::string> g_gcc_version("gcc_version", get_gcc_version, NULL);

void get_work_dir(std::ostream& os, void*) {
    butil::FilePath path;
    const bool rc = butil::GetCurrentDirectory(&path);
    LOG_IF(WARNING, !rc) << "Fail to GetCurrentDirectory";
    os << path.value();
}
PassiveStatus<std::string> g_work_dir("process_work_dir", get_work_dir, NULL);

#undef BVAR_MEMBER_TYPE
#undef BVAR_DEFINE_PROC_STAT_FIELD
#undef BVAR_DEFINE_PROC_STAT_FIELD2
#undef BVAR_DEFINE_PROC_MEMORY_FIELD
#undef BVAR_DEFINE_RUSAGE_FIELD
#undef BVAR_DEFINE_RUSAGE_FIELD2

}  // namespace bvar
Exemplo n.º 8
0
namespace brpc {

DECLARE_bool(usercode_in_pthread);

DEFINE_int32(free_memory_to_system_interval, 0,
             "Try to return free memory to system every so many seconds, "
             "values <= 0 disables this feature");
BRPC_VALIDATE_GFLAG(free_memory_to_system_interval, PassValidate);

namespace policy {
// Defined in http_rpc_protocol.cpp
void InitCommonStrings();
}

using namespace policy;

const char* const DUMMY_SERVER_PORT_FILE = "dummy_server.port";


struct GlobalExtensions {
    GlobalExtensions()
        : ch_mh_lb(MurmurHash32)
        , ch_md5_lb(MD5Hash32){}
#ifdef BAIDU_INTERNAL
    BaiduNamingService bns;
#endif
    FileNamingService fns;
    ListNamingService lns;
    DomainNamingService dns;
    RemoteFileNamingService rfns;
    ConsulNamingService cns;

    RoundRobinLoadBalancer rr_lb;
    WeightedRoundRobinLoadBalancer wrr_lb;
    RandomizedLoadBalancer randomized_lb;
    LocalityAwareLoadBalancer la_lb;
    ConsistentHashingLoadBalancer ch_mh_lb;
    ConsistentHashingLoadBalancer ch_md5_lb;
    DynPartLoadBalancer dynpart_lb;
};

static pthread_once_t register_extensions_once = PTHREAD_ONCE_INIT;
static GlobalExtensions* g_ext = NULL;

static long ReadPortOfDummyServer(const char* filename) {
    butil::fd_guard fd(open(filename, O_RDONLY));
    if (fd < 0) {
        LOG(ERROR) << "Fail to open `" << DUMMY_SERVER_PORT_FILE << "'";
        return -1;
    }
    char port_str[32];
    const ssize_t nr = read(fd, port_str, sizeof(port_str));
    if (nr <= 0) {
        LOG(ERROR) << "Fail to read `" << DUMMY_SERVER_PORT_FILE << "': "
                   << (nr == 0 ? "nothing to read" : berror());
        return -1;
    }
    port_str[std::min((size_t)nr, sizeof(port_str)-1)] = '\0';
    const char* p = port_str;
    for (; isspace(*p); ++p) {}
    char* endptr = NULL;
    const long port = strtol(p, &endptr, 10);
    for (; isspace(*endptr); ++endptr) {}
    if (*endptr != '\0') {
        LOG(ERROR) << "Invalid port=`" << port_str << "'";
        return -1;
    }
    return port;
}

// Expose counters of butil::IOBuf
static int64_t GetIOBufBlockCount(void*) {
    return butil::IOBuf::block_count();
}
static int64_t GetIOBufBlockCountHitTLSThreshold(void*) {
    return butil::IOBuf::block_count_hit_tls_threshold();
}
static int64_t GetIOBufNewBigViewCount(void*) {
    return butil::IOBuf::new_bigview_count();
}
static int64_t GetIOBufBlockMemory(void*) {
    return butil::IOBuf::block_memory();
}

// Defined in server.cpp
extern butil::static_atomic<int> g_running_server_count;
static int GetRunningServerCount(void*) {
    return g_running_server_count.load(butil::memory_order_relaxed);
}

// Update global stuff periodically.
static void* GlobalUpdate(void*) {
    // Expose variables.
    bvar::PassiveStatus<int64_t> var_iobuf_block_count(
        "iobuf_block_count", GetIOBufBlockCount, NULL);
    bvar::PassiveStatus<int64_t> var_iobuf_block_count_hit_tls_threshold(
        "iobuf_block_count_hit_tls_threshold",
        GetIOBufBlockCountHitTLSThreshold, NULL);
    bvar::PassiveStatus<int64_t> var_iobuf_new_bigview_count(
        GetIOBufNewBigViewCount, NULL);
    bvar::PerSecond<bvar::PassiveStatus<int64_t> > var_iobuf_new_bigview_second(
        "iobuf_newbigview_second", &var_iobuf_new_bigview_count);
    bvar::PassiveStatus<int64_t> var_iobuf_block_memory(
        "iobuf_block_memory", GetIOBufBlockMemory, NULL);
    bvar::PassiveStatus<int> var_running_server_count(
        "rpc_server_count", GetRunningServerCount, NULL);

    butil::FileWatcher fw;
    if (fw.init_from_not_exist(DUMMY_SERVER_PORT_FILE) < 0) {
        LOG(FATAL) << "Fail to init FileWatcher on `" << DUMMY_SERVER_PORT_FILE << "'";
        return NULL;
    }

    std::vector<SocketId> conns;
    const int64_t start_time_us = butil::gettimeofday_us();
    const int WARN_NOSLEEP_THRESHOLD = 2;
    int64_t last_time_us = start_time_us;
    int consecutive_nosleep = 0;
    int64_t last_return_free_memory_time = start_time_us;
    while (1) {
        const int64_t sleep_us = 1000000L + last_time_us - butil::gettimeofday_us();
        if (sleep_us > 0) {
            if (bthread_usleep(sleep_us) < 0) {
                PLOG_IF(FATAL, errno != ESTOP) << "Fail to sleep";
                break;
            }
            consecutive_nosleep = 0;
        } else {
            if (++consecutive_nosleep >= WARN_NOSLEEP_THRESHOLD) {
                consecutive_nosleep = 0;
                LOG(WARNING) << __FUNCTION__ << " is too busy!";
            }
        }
        last_time_us = butil::gettimeofday_us();

        TrackMe();

        if (!IsDummyServerRunning()
            && g_running_server_count.load(butil::memory_order_relaxed) == 0
            && fw.check_and_consume() > 0) {
            long port = ReadPortOfDummyServer(DUMMY_SERVER_PORT_FILE);
            if (port >= 0) {
                StartDummyServerAt(port);
            }
        }

        SocketMapList(&conns);
        const int64_t now_ms = butil::cpuwide_time_ms();
        for (size_t i = 0; i < conns.size(); ++i) {
            SocketUniquePtr ptr;
            if (Socket::Address(conns[i], &ptr) == 0) {
                ptr->UpdateStatsEverySecond(now_ms);
            }
        }

        const int return_mem_interval =
            FLAGS_free_memory_to_system_interval/*reloadable*/;
        if (return_mem_interval > 0 &&
            last_time_us >= last_return_free_memory_time +
            return_mem_interval * 1000000L) {
            last_return_free_memory_time = last_time_us;
            // TODO: Calling MallocExtension::instance()->ReleaseFreeMemory may
            // crash the program in later calls to malloc, verified on tcmalloc
            // 1.7 and 2.5, which means making the static member function weak
            // in details/tcmalloc_extension.cpp is probably not correct, however
            // it does work for heap profilers.
            if (MallocExtension_ReleaseFreeMemory != NULL) {
                MallocExtension_ReleaseFreeMemory();
            } else {
#if defined(OS_LINUX)
                // GNU specific.
                malloc_trim(10 * 1024 * 1024/*leave 10M pad*/);
#endif
            }
        }
    }
    return NULL;
}

static void BaiduStreamingLogHandler(google::protobuf::LogLevel level,
                                     const char* filename, int line,
                                     const std::string& message) {
    switch (level) {
    case google::protobuf::LOGLEVEL_INFO:
        LOG(INFO) << filename << ':' << line << ' ' << message;
        return;
    case google::protobuf::LOGLEVEL_WARNING:
        LOG(WARNING) << filename << ':' << line << ' ' << message;
        return;
    case google::protobuf::LOGLEVEL_ERROR:
        LOG(ERROR) << filename << ':' << line << ' ' << message;
        return;
    case google::protobuf::LOGLEVEL_FATAL:
        LOG(FATAL) << filename << ':' << line << ' ' << message;
        return;
    }
    CHECK(false) << filename << ':' << line << ' ' << message;
}

static void GlobalInitializeOrDieImpl() {
    //////////////////////////////////////////////////////////////////
    // Be careful about usages of gflags inside this function which //
    // may be called before main() only seeing gflags with default  //
    // values even if the gflags will be set after main().          //
    //////////////////////////////////////////////////////////////////

    // Ignore SIGPIPE.
    struct sigaction oldact;
    if (sigaction(SIGPIPE, NULL, &oldact) != 0 ||
            (oldact.sa_handler == NULL && oldact.sa_sigaction == NULL)) {
        CHECK(NULL == signal(SIGPIPE, SIG_IGN));
    }

    // Make GOOGLE_LOG print to comlog device
    SetLogHandler(&BaiduStreamingLogHandler);

    // Setting the variable here does not work, the profiler probably check
    // the variable before main() for only once.
    // setenv("TCMALLOC_SAMPLE_PARAMETER", "524288", 0);

    // Initialize openssl library
    SSL_library_init();
    // RPC doesn't require openssl.cnf, users can load it by themselves if needed
    SSL_load_error_strings();
    if (SSLThreadInit() != 0 || SSLDHInit() != 0) {
        exit(1);
    }

    // Defined in http_rpc_protocol.cpp
    InitCommonStrings();

    // Leave memory of these extensions to process's clean up.
    g_ext = new(std::nothrow) GlobalExtensions();
    if (NULL == g_ext) {
        exit(1);
    }
    // Naming Services
#ifdef BAIDU_INTERNAL
    NamingServiceExtension()->RegisterOrDie("bns", &g_ext->bns);
#endif
    NamingServiceExtension()->RegisterOrDie("file", &g_ext->fns);
    NamingServiceExtension()->RegisterOrDie("list", &g_ext->lns);
    NamingServiceExtension()->RegisterOrDie("http", &g_ext->dns);
    NamingServiceExtension()->RegisterOrDie("remotefile", &g_ext->rfns);
    NamingServiceExtension()->RegisterOrDie("consul", &g_ext->cns);

    // Load Balancers
    LoadBalancerExtension()->RegisterOrDie("rr", &g_ext->rr_lb);
    LoadBalancerExtension()->RegisterOrDie("wrr", &g_ext->wrr_lb);
    LoadBalancerExtension()->RegisterOrDie("random", &g_ext->randomized_lb);
    LoadBalancerExtension()->RegisterOrDie("la", &g_ext->la_lb);
    LoadBalancerExtension()->RegisterOrDie("c_murmurhash", &g_ext->ch_mh_lb);
    LoadBalancerExtension()->RegisterOrDie("c_md5", &g_ext->ch_md5_lb);
    LoadBalancerExtension()->RegisterOrDie("_dynpart", &g_ext->dynpart_lb);

    // Compress Handlers
    const CompressHandler gzip_compress =
        { GzipCompress, GzipDecompress, "gzip" };
    if (RegisterCompressHandler(COMPRESS_TYPE_GZIP, gzip_compress) != 0) {
        exit(1);
    }
    const CompressHandler zlib_compress =
        { ZlibCompress, ZlibDecompress, "zlib" };
    if (RegisterCompressHandler(COMPRESS_TYPE_ZLIB, zlib_compress) != 0) {
        exit(1);
    }
    const CompressHandler snappy_compress =
        { SnappyCompress, SnappyDecompress, "snappy" };
    if (RegisterCompressHandler(COMPRESS_TYPE_SNAPPY, snappy_compress) != 0) {
        exit(1);
    }

    // Protocols
    Protocol baidu_protocol = { ParseRpcMessage,
                                SerializeRequestDefault, PackRpcRequest,
                                ProcessRpcRequest, ProcessRpcResponse,
                                VerifyRpcRequest, NULL, NULL,
                                CONNECTION_TYPE_ALL, "baidu_std" };
    if (RegisterProtocol(PROTOCOL_BAIDU_STD, baidu_protocol) != 0) {
        exit(1);
    }

    Protocol streaming_protocol = { ParseStreamingMessage,
                                    NULL, NULL, ProcessStreamingMessage,
                                    ProcessStreamingMessage,
                                    NULL, NULL, NULL,
                                    CONNECTION_TYPE_SINGLE, "streaming_rpc" };

    if (RegisterProtocol(PROTOCOL_STREAMING_RPC, streaming_protocol) != 0) {
        exit(1);
    }

    Protocol http_protocol = { ParseHttpMessage,
                               SerializeHttpRequest, PackHttpRequest,
                               ProcessHttpRequest, ProcessHttpResponse,
                               VerifyHttpRequest, ParseHttpServerAddress,
                               GetHttpMethodName,
                               CONNECTION_TYPE_POOLED_AND_SHORT,
                               "http" };
    if (RegisterProtocol(PROTOCOL_HTTP, http_protocol) != 0) {
        exit(1);
    }

    Protocol hulu_protocol = { ParseHuluMessage,
                               SerializeRequestDefault, PackHuluRequest,
                               ProcessHuluRequest, ProcessHuluResponse,
                               VerifyHuluRequest, NULL, NULL,
                               CONNECTION_TYPE_ALL, "hulu_pbrpc" };
    if (RegisterProtocol(PROTOCOL_HULU_PBRPC, hulu_protocol) != 0) {
        exit(1);
    }

    // Only valid at client side
    Protocol nova_protocol = { ParseNsheadMessage,
                               SerializeNovaRequest, PackNovaRequest,
                               NULL, ProcessNovaResponse,
                               NULL, NULL, NULL,
                               CONNECTION_TYPE_POOLED_AND_SHORT,  "nova_pbrpc" };
    if (RegisterProtocol(PROTOCOL_NOVA_PBRPC, nova_protocol) != 0) {
        exit(1);
    }

    // Only valid at client side
    Protocol public_pbrpc_protocol = { ParseNsheadMessage,
                                       SerializePublicPbrpcRequest,
                                       PackPublicPbrpcRequest,
                                       NULL, ProcessPublicPbrpcResponse,
                                       NULL, NULL, NULL,
                                       // public_pbrpc server implementation
                                       // doesn't support full duplex
                                       CONNECTION_TYPE_POOLED_AND_SHORT,
                                       "public_pbrpc" };
    if (RegisterProtocol(PROTOCOL_PUBLIC_PBRPC, public_pbrpc_protocol) != 0) {
        exit(1);
    }

    Protocol sofa_protocol = { ParseSofaMessage,
                               SerializeRequestDefault, PackSofaRequest,
                               ProcessSofaRequest, ProcessSofaResponse,
                               VerifySofaRequest, NULL, NULL,
                               CONNECTION_TYPE_ALL, "sofa_pbrpc" };
    if (RegisterProtocol(PROTOCOL_SOFA_PBRPC, sofa_protocol) != 0) {
        exit(1);
    }

    // Only valid at server side. We generalize all the protocols that
    // prefixes with nshead as `nshead_protocol' and specify the content
    // parsing after nshead by ServerOptions.nshead_service.
    Protocol nshead_protocol = { ParseNsheadMessage,
                                 SerializeNsheadRequest, PackNsheadRequest,
                                 ProcessNsheadRequest, ProcessNsheadResponse,
                                 VerifyNsheadRequest, NULL, NULL,
                                 CONNECTION_TYPE_POOLED_AND_SHORT, "nshead" };
    if (RegisterProtocol(PROTOCOL_NSHEAD, nshead_protocol) != 0) {
        exit(1);
    }

    Protocol mc_binary_protocol = { ParseMemcacheMessage,
                                    SerializeMemcacheRequest,
                                    PackMemcacheRequest,
                                    NULL, ProcessMemcacheResponse,
                                    NULL, NULL, GetMemcacheMethodName,
                                    CONNECTION_TYPE_ALL, "memcache" };
    if (RegisterProtocol(PROTOCOL_MEMCACHE, mc_binary_protocol) != 0) {
        exit(1);
    }

    Protocol redis_protocol = { ParseRedisMessage,
                                SerializeRedisRequest,
                                PackRedisRequest,
                                NULL, ProcessRedisResponse,
                                NULL, NULL, GetRedisMethodName,
                                CONNECTION_TYPE_ALL, "redis" };
    if (RegisterProtocol(PROTOCOL_REDIS, redis_protocol) != 0) {
        exit(1);
    }

    Protocol mongo_protocol = { ParseMongoMessage,
                                NULL, NULL,
                                ProcessMongoRequest, NULL,
                                NULL, NULL, NULL,
                                CONNECTION_TYPE_POOLED, "mongo" };
    if (RegisterProtocol(PROTOCOL_MONGO, mongo_protocol) != 0) {
        exit(1);
    }

    // Register Thrift framed protocol if linked
    if (RegisterThriftProtocol) {
        RegisterThriftProtocol();
    }

    // Only valid at client side
    Protocol ubrpc_compack_protocol = {
        ParseNsheadMessage,
        SerializeUbrpcCompackRequest, PackUbrpcRequest,
        NULL, ProcessUbrpcResponse,
        NULL, NULL, NULL,
        CONNECTION_TYPE_POOLED_AND_SHORT,  "ubrpc_compack" };
    if (RegisterProtocol(PROTOCOL_UBRPC_COMPACK, ubrpc_compack_protocol) != 0) {
        exit(1);
    }
    Protocol ubrpc_mcpack2_protocol = {
        ParseNsheadMessage,
        SerializeUbrpcMcpack2Request, PackUbrpcRequest,
        NULL, ProcessUbrpcResponse,
        NULL, NULL, NULL,
        CONNECTION_TYPE_POOLED_AND_SHORT,  "ubrpc_mcpack2" };
    if (RegisterProtocol(PROTOCOL_UBRPC_MCPACK2, ubrpc_mcpack2_protocol) != 0) {
        exit(1);
    }

    // Only valid at client side
    Protocol nshead_mcpack_protocol = {
        ParseNsheadMessage,
        SerializeNsheadMcpackRequest, PackNsheadMcpackRequest,
        NULL, ProcessNsheadMcpackResponse,
        NULL, NULL, NULL,
        CONNECTION_TYPE_POOLED_AND_SHORT,  "nshead_mcpack" };
    if (RegisterProtocol(PROTOCOL_NSHEAD_MCPACK, nshead_mcpack_protocol) != 0) {
        exit(1);
    }

    Protocol rtmp_protocol = {
        ParseRtmpMessage,
        SerializeRtmpRequest, PackRtmpRequest,
        ProcessRtmpMessage, ProcessRtmpMessage,
        NULL, NULL, NULL,
        (ConnectionType)(CONNECTION_TYPE_SINGLE|CONNECTION_TYPE_SHORT),
        "rtmp" };
    if (RegisterProtocol(PROTOCOL_RTMP, rtmp_protocol) != 0) {
        exit(1);
    }

    Protocol esp_protocol = {
        ParseEspMessage,
        SerializeEspRequest, PackEspRequest,
        NULL, ProcessEspResponse,
        NULL, NULL, NULL,
        CONNECTION_TYPE_POOLED_AND_SHORT, "esp"};
    if (RegisterProtocol(PROTOCOL_ESP, esp_protocol) != 0) {
        exit(1);
    }

    std::vector<Protocol> protocols;
    ListProtocols(&protocols);
    for (size_t i = 0; i < protocols.size(); ++i) {
        if (protocols[i].process_response) {
            InputMessageHandler handler;
            // `process_response' is required at client side
            handler.parse = protocols[i].parse;
            handler.process = protocols[i].process_response;
            // No need to verify at client side
            handler.verify = NULL;
            handler.arg = NULL;
            handler.name = protocols[i].name;
            if (get_or_new_client_side_messenger()->AddHandler(handler) != 0) {
                exit(1);
            }
        }
    }

    if (FLAGS_usercode_in_pthread) {
        // Optional. If channel/server are initialized before main(), this
        // flag may be false at here even if it will be set to true after
        // main(). In which case, the usercode pool will not be initialized
        // until the pool is used.
        InitUserCodeBackupPoolOnceOrDie();
    }

    // We never join GlobalUpdate, let it quit with the process.
    bthread_t th;
    CHECK(bthread_start_background(&th, NULL, GlobalUpdate, NULL) == 0)
        << "Fail to start GlobalUpdate";
}

void GlobalInitializeOrDie() {
    if (pthread_once(&register_extensions_once,
                     GlobalInitializeOrDieImpl) != 0) {
        LOG(FATAL) << "Fail to pthread_once";
        exit(1);
    }
}

} // namespace brpc
Exemplo n.º 9
0
// Update global stuff periodically.
static void* GlobalUpdate(void*) {
    // Expose variables.
    bvar::PassiveStatus<int64_t> var_iobuf_block_count(
        "iobuf_block_count", GetIOBufBlockCount, NULL);
    bvar::PassiveStatus<int64_t> var_iobuf_block_count_hit_tls_threshold(
        "iobuf_block_count_hit_tls_threshold",
        GetIOBufBlockCountHitTLSThreshold, NULL);
    bvar::PassiveStatus<int64_t> var_iobuf_new_bigview_count(
        GetIOBufNewBigViewCount, NULL);
    bvar::PerSecond<bvar::PassiveStatus<int64_t> > var_iobuf_new_bigview_second(
        "iobuf_newbigview_second", &var_iobuf_new_bigview_count);
    bvar::PassiveStatus<int64_t> var_iobuf_block_memory(
        "iobuf_block_memory", GetIOBufBlockMemory, NULL);
    bvar::PassiveStatus<int> var_running_server_count(
        "rpc_server_count", GetRunningServerCount, NULL);

    butil::FileWatcher fw;
    if (fw.init_from_not_exist(DUMMY_SERVER_PORT_FILE) < 0) {
        LOG(FATAL) << "Fail to init FileWatcher on `" << DUMMY_SERVER_PORT_FILE << "'";
        return NULL;
    }

    std::vector<SocketId> conns;
    const int64_t start_time_us = butil::gettimeofday_us();
    const int WARN_NOSLEEP_THRESHOLD = 2;
    int64_t last_time_us = start_time_us;
    int consecutive_nosleep = 0;
    int64_t last_return_free_memory_time = start_time_us;
    while (1) {
        const int64_t sleep_us = 1000000L + last_time_us - butil::gettimeofday_us();
        if (sleep_us > 0) {
            if (bthread_usleep(sleep_us) < 0) {
                PLOG_IF(FATAL, errno != ESTOP) << "Fail to sleep";
                break;
            }
            consecutive_nosleep = 0;
        } else {
            if (++consecutive_nosleep >= WARN_NOSLEEP_THRESHOLD) {
                consecutive_nosleep = 0;
                LOG(WARNING) << __FUNCTION__ << " is too busy!";
            }
        }
        last_time_us = butil::gettimeofday_us();

        TrackMe();

        if (!IsDummyServerRunning()
            && g_running_server_count.load(butil::memory_order_relaxed) == 0
            && fw.check_and_consume() > 0) {
            long port = ReadPortOfDummyServer(DUMMY_SERVER_PORT_FILE);
            if (port >= 0) {
                StartDummyServerAt(port);
            }
        }

        SocketMapList(&conns);
        const int64_t now_ms = butil::cpuwide_time_ms();
        for (size_t i = 0; i < conns.size(); ++i) {
            SocketUniquePtr ptr;
            if (Socket::Address(conns[i], &ptr) == 0) {
                ptr->UpdateStatsEverySecond(now_ms);
            }
        }

        const int return_mem_interval =
            FLAGS_free_memory_to_system_interval/*reloadable*/;
        if (return_mem_interval > 0 &&
            last_time_us >= last_return_free_memory_time +
            return_mem_interval * 1000000L) {
            last_return_free_memory_time = last_time_us;
            // TODO: Calling MallocExtension::instance()->ReleaseFreeMemory may
            // crash the program in later calls to malloc, verified on tcmalloc
            // 1.7 and 2.5, which means making the static member function weak
            // in details/tcmalloc_extension.cpp is probably not correct, however
            // it does work for heap profilers.
            if (MallocExtension_ReleaseFreeMemory != NULL) {
                MallocExtension_ReleaseFreeMemory();
            } else {
#if defined(OS_LINUX)
                // GNU specific.
                malloc_trim(10 * 1024 * 1024/*leave 10M pad*/);
#endif
            }
        }
    }
    return NULL;
}
Exemplo n.º 10
0
static int GetRunningServerCount(void*) {
    return g_running_server_count.load(butil::memory_order_relaxed);
}