int epoll_wait_call::get_current_events() { if (m_epfd_info->m_ready_fds.empty()) { return m_n_all_ready_fds; } vector<socket_fd_api *> socket_fd_vec; lock(); int i,r,w; i = r = w = m_n_all_ready_fds; socket_fd_api *p_socket_object; epoll_fd_rec fd_rec; ep_ready_fd_map_t::iterator iter = m_epfd_info->m_ready_fds.begin(); while (iter != m_epfd_info->m_ready_fds.end() && i < m_maxevents) { ep_ready_fd_map_t::iterator iter_cpy = iter; // for protection needs ++iter; p_socket_object = fd_collection_get_sockfd(iter_cpy->first); if (p_socket_object) { if(!m_epfd_info->get_fd_rec_by_fd(iter_cpy->first, fd_rec)) continue; m_events[i].events = 0; //initialize bool got_event = false; //epoll_wait will always wait for EPOLLERR and EPOLLHUP; it is not necessary to set it in events. uint32_t mutual_events = iter_cpy->second & (fd_rec.events | EPOLLERR | EPOLLHUP); //EPOLLHUP & EPOLLOUT are mutually exclusive. see poll man pages. epoll adapt poll behavior. if ((mutual_events & EPOLLHUP) && (mutual_events & EPOLLOUT)) { mutual_events &= ~EPOLLOUT; } if (mutual_events & EPOLLIN) { if (handle_epoll_event(p_socket_object->is_readable(NULL), EPOLLIN, iter_cpy, fd_rec, i)) { r++; got_event = true; } mutual_events &= ~EPOLLIN; } if (mutual_events & EPOLLOUT) { if (handle_epoll_event(p_socket_object->is_writeable(), EPOLLOUT, iter_cpy, fd_rec, i)) { w++; got_event = true; } mutual_events &= ~EPOLLOUT; } if (mutual_events) { if (handle_epoll_event(true, mutual_events, iter_cpy, fd_rec, i)) { got_event = true; } } if (got_event) { socket_fd_vec.push_back(p_socket_object); ++i; } } else { m_epfd_info->m_ready_fds.erase(iter_cpy); } } int ready_rfds = r - m_n_all_ready_fds; //MNY: not only rfds, different counters for read/write ? int ready_wfds = w - m_n_all_ready_fds; m_n_ready_rfds += ready_rfds; m_n_ready_wfds += ready_wfds; m_p_stats->n_iomux_rx_ready += ready_rfds; unlock(); /* * for checking ring migration we need a socket context. * in epoll we separate the rings from the sockets, so only here we access the sockets. * therefore, it is most convenient to check it here. * we need to move the ring migration to the epfd, going over the registered sockets, * when polling the rings was not fruitful. * this will be more similar to the behavior of select/poll. * see RM task 212058 */ for (unsigned int j = 0; j < socket_fd_vec.size(); j++) { socket_fd_vec[j]->consider_rings_migration(); } return (i); }
select_call::select_call(int *off_fds_buffer, offloaded_mode_t *off_modes_buffer, int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, timeval *timeout, const sigset_t *__sigmask /* = NULL */) : io_mux_call(off_fds_buffer, off_modes_buffer, nfds, __sigmask), m_nfds(nfds), m_readfds(readfds), m_writefds(writefds), m_exceptfds(exceptfds), m_timeout(timeout), m_nfds_with_cq(0), m_b_run_prepare_to_poll(false) { int fd; //socket_fd_api* temp_sock_fd_api = NULL; if (m_nfds > FD_SETSIZE) { errno = ENOMEM; vma_throw_object(io_mux_call::io_error); } // create stats m_p_stats = &g_select_stats; vma_stats_instance_get_select_block(m_p_stats); bool offloaded_read = !!m_readfds; bool offloaded_write = !!m_writefds; if (offloaded_read || offloaded_write) { FD_ZERO(&m_os_rfds, m_nfds); FD_ZERO(&m_os_wfds, m_nfds); //covers the case of select(readfds = NULL) if(!m_readfds) { FD_ZERO(&m_cq_rfds, m_nfds); m_readfds = &m_cq_rfds; } // get offloaded fds in read set for (fd = 0; fd < m_nfds; ++fd) { bool check_read = offloaded_read && FD_ISSET(fd, m_readfds); bool check_write = offloaded_write && FD_ISSET(fd, m_writefds); socket_fd_api* psock = fd_collection_get_sockfd(fd); if (psock && psock->get_type() == FD_TYPE_SOCKET) { offloaded_mode_t off_mode = OFF_NONE; if (check_read) off_mode = (offloaded_mode_t)(off_mode | OFF_READ); if (check_write) off_mode = (offloaded_mode_t)(off_mode | OFF_WRITE); if (off_mode) { __log_func("---> fd=%d IS SET for read or write!", fd); m_p_all_offloaded_fds[m_num_all_offloaded_fds] = fd; m_p_offloaded_modes[m_num_all_offloaded_fds] = off_mode; m_num_all_offloaded_fds++; if (! psock->skip_os_select()) { if (check_read) { FD_SET(fd, &m_os_rfds); if (psock->is_readable(NULL)) { io_mux_call::update_fd_array(&m_fd_ready_array, fd); m_n_ready_rfds++; m_n_all_ready_fds++; } else { // Instructing the socket to sample the OS immediately to prevent hitting EAGAIN on recvfrom(), // after iomux returned a shadow fd as ready (only for non-blocking sockets) psock->set_immediate_os_sample(); } } if (check_write) { FD_SET(fd, &m_os_wfds); } } else __log_func("fd=%d must be skipped from os r select()", fd); } } else { if (check_read) { FD_SET(fd, &m_os_rfds); } if (check_write) { FD_SET(fd, &m_os_wfds); } } } } __log_func("num all offloaded_fds=%d", m_num_all_offloaded_fds); }
bool epoll_wait_call::_wait(int timeout) { int i, ready_fds, fd; bool cq_ready = false; __log_func("calling os epoll: %d", m_epfd); if (timeout) { lock(); if (m_epfd_info->m_ready_fds.empty()) { m_epfd_info->going_to_sleep(); } else { timeout = 0; } unlock(); } if (m_sigmask) { ready_fds = orig_os_api.epoll_pwait(m_epfd, m_p_ready_events, m_maxevents, timeout, m_sigmask); } else { ready_fds = orig_os_api.epoll_wait(m_epfd, m_p_ready_events, m_maxevents, timeout); } if (timeout) { lock(); m_epfd_info->return_from_sleep(); unlock(); } if (ready_fds < 0) { vma_throw_object(io_mux_call::io_error); } // convert the returned events to user events and mark offloaded fds m_n_all_ready_fds = 0; for (i = 0; i < ready_fds; ++i) { fd = m_p_ready_events[i].data.fd; // wakeup event if(m_epfd_info->is_wakeup_fd(fd)) { lock(); m_epfd_info->remove_wakeup_fd(); unlock(); continue; } // If it's CQ if (m_epfd_info->is_cq_fd(m_p_ready_events[i].data.u64)) { cq_ready = true; continue; } if ((m_p_ready_events[i].events & EPOLLIN)) { socket_fd_api* temp_sock_fd_api = fd_collection_get_sockfd(fd); if (temp_sock_fd_api) { // Instructing the socket to sample the OS immediately to prevent hitting EAGAIN on recvfrom(), // after iomux returned a shadow fd as ready (only for non-blocking sockets) temp_sock_fd_api->set_immediate_os_sample(); } } // Copy event bits and data m_events[m_n_all_ready_fds].events = m_p_ready_events[i].events; if (!m_epfd_info->get_data_by_fd(fd, &m_events[m_n_all_ready_fds].data)) { continue; } ++m_n_all_ready_fds; } return cq_ready; }