Esempio n. 1
0
bool select_call::wait_os(bool zero_timeout)
{
	timeval to, *pto = NULL;
	timespec to_pselect, *pto_pselect = NULL;
	
/* Avner: I put it in comment, because this logic is wrong

	// optimization: do not call os select if ALL fds are excluded
	// extend check to write/except fds
	if (m_rfd_count == m_n_exclude_fds)
		return;
*/
	
	if (zero_timeout) {
		to.tv_sec = to.tv_usec = 0;
		pto = &to;
	}
	else {
		pto = m_timeout;
	}

	// Restore original sets
	if (m_b_run_prepare_to_poll) {
		if (m_readfds)	FD_COPY(m_readfds, &m_os_rfds, m_nfds);
		if (m_writefds)	FD_COPY(m_writefds, &m_os_wfds, m_nfds);
		if (m_exceptfds)FD_COPY(m_exceptfds, &m_orig_exceptfds, m_nfds);
	}
	__log_func("calling os select: %d", m_nfds);
	if (m_sigmask) {
		if (pto) {
			to_pselect.tv_sec = pto->tv_sec;
			to_pselect.tv_nsec = pto->tv_usec * 1000;
			pto_pselect = &to_pselect;
		}
		m_n_all_ready_fds = orig_os_api.pselect(m_nfds, m_readfds, m_writefds, m_exceptfds, pto_pselect, m_sigmask);
	} else {
		m_n_all_ready_fds = orig_os_api.select(m_nfds, m_readfds, m_writefds, m_exceptfds, pto);
	}
	if (m_n_all_ready_fds < 0) {
		vma_throw_object(io_mux_call::io_error);
	}
	if (m_n_all_ready_fds > 0) {
		__log_func("wait_os() returned with %d", m_n_all_ready_fds);
	}
	return false; // No cq_fd in select() event
}
Esempio n. 2
0
void epoll_wait_call::init_offloaded_fds()
{
	// copy offloaded_fds pointer and count
	m_epfd_info->get_offloaded_fds_arr_and_size(&m_p_num_all_offloaded_fds, &m_p_all_offloaded_fds);
	m_num_all_offloaded_fds = *m_p_num_all_offloaded_fds; // TODO: fix orig ugly code, and then remove this

	__log_func("building: epfd=%d, m_epfd_info->get_fd_info().size()=%d, *m_p_num_all_offloaded_fds=%d", m_epfd, (int)m_epfd_info->get_fd_info().size(), (int)*m_p_num_all_offloaded_fds  );

}
Esempio n. 3
0
void select_call::set_wfd_ready(int fd)
{
	// This function also checks that fd was in the original read set
	if (!FD_ISSET(fd, m_writefds) && FD_ISSET(fd, &m_orig_writefds)) { //TODO: why do we need the last 'if'??
		FD_SET(fd, m_writefds);
		++m_n_ready_wfds;
//		if (!FD_ISSET(fd, m_readfds))
		++m_n_all_ready_fds;
		__log_func("ready w fd: %d", fd);
	}
}
Esempio n. 4
0
void select_call::set_offloaded_wfd_ready(int fd_index)
{
	if (m_p_offloaded_modes[fd_index] & OFF_WRITE) { //TODO: consider removing
		int fd = m_p_all_offloaded_fds[fd_index];
		if (!FD_ISSET(fd, m_writefds)) {
			FD_SET(fd, m_writefds);
			++m_n_ready_wfds;
			++m_n_all_ready_fds;
			__log_func("ready offloaded w fd: %d", fd);
		}
	}
}
Esempio n. 5
0
bool epoll_wait_call::check_all_offloaded_sockets(uint64_t *p_poll_sn)
{
	NOT_IN_USE(p_poll_sn);
	m_n_all_ready_fds = get_current_events();

	if (!m_n_ready_rfds)
	{
		// check cq for acks
		ring_poll_and_process_element(&m_poll_sn, NULL);
		m_n_all_ready_fds = get_current_events();
	}

	__log_func("m_n_all_ready_fds=%d, m_n_ready_rfds=%d, m_n_ready_wfds=%d", m_n_all_ready_fds, m_n_ready_rfds, m_n_ready_wfds);
	return m_n_all_ready_fds;
}
Esempio n. 6
0
void rfs_uc_tcp_gro::flush_gro_desc(void* pv_fd_ready_array)
{
	if (!m_b_active) return;

	if (m_gro_desc.buf_count > 1) {
		m_gro_desc.p_ip_h->tot_len = htons(m_gro_desc.ip_tot_len);
		m_gro_desc.p_tcp_h->ack_seq = m_gro_desc.ack;
		m_gro_desc.p_tcp_h->window = m_gro_desc.wnd;

		if (m_gro_desc.ts_present) {
			tcphdr_ts* p_tcp_ts_h = (tcphdr_ts*) m_gro_desc.p_tcp_h;
			p_tcp_ts_h->popts[2] = m_gro_desc.tsecr;
		}

		m_gro_desc.p_first->rx.tcp.gro = 1;

		m_gro_desc.p_first->lwip_pbuf.pbuf.flags = PBUF_FLAG_IS_CUSTOM;
		m_gro_desc.p_first->lwip_pbuf.pbuf.tot_len = m_gro_desc.p_first->lwip_pbuf.pbuf.len = (m_gro_desc.p_first->sz_data - m_gro_desc.p_first->rx.tcp.n_transport_header_len);
		m_gro_desc.p_first->lwip_pbuf.pbuf.ref = 1;
		m_gro_desc.p_first->lwip_pbuf.pbuf.type = PBUF_REF;
		m_gro_desc.p_first->lwip_pbuf.pbuf.payload = (u8_t *)(m_gro_desc.p_first->p_buffer + m_gro_desc.p_first->rx.tcp.n_transport_header_len);
		m_gro_desc.p_first->rx.is_vma_thr = m_gro_desc.p_last->rx.is_vma_thr;

		for (mem_buf_desc_t* p_desc = m_gro_desc.p_last; p_desc != m_gro_desc.p_first; p_desc = p_desc->p_prev_desc) {
			p_desc->p_prev_desc->lwip_pbuf.pbuf.tot_len += p_desc->lwip_pbuf.pbuf.tot_len;
		}
	}

	__log_func("Rx LRO TCP segment info: src_port=%d, dst_port=%d, flags='%s%s%s%s%s%s' seq=%u, ack=%u, win=%u, payload_sz=%u, num_bufs=%u",
					ntohs(m_gro_desc.p_tcp_h->source), ntohs(m_gro_desc.p_tcp_h->dest),
					m_gro_desc.p_tcp_h->urg?"U":"", m_gro_desc.p_tcp_h->ack?"A":"", m_gro_desc.p_tcp_h->psh?"P":"",
					m_gro_desc.p_tcp_h->rst?"R":"", m_gro_desc.p_tcp_h->syn?"S":"", m_gro_desc.p_tcp_h->fin?"F":"",
					ntohl(m_gro_desc.p_tcp_h->seq), ntohl(m_gro_desc.p_tcp_h->ack_seq), ntohs(m_gro_desc.p_tcp_h->window),
					m_gro_desc.ip_tot_len - 40, m_gro_desc.buf_count);

	if (!rfs_uc::rx_dispatch_packet(m_gro_desc.p_first, pv_fd_ready_array)) {
		m_p_ring->reclaim_recv_buffers_no_lock(m_gro_desc.p_first);
	}

	m_b_active = false;
}
Esempio n. 7
0
select_call::select_call(int *off_fds_buffer, offloaded_mode_t *off_modes_buffer,
                         int nfds, fd_set *readfds, fd_set *writefds,
                         fd_set *exceptfds, timeval *timeout, const sigset_t *__sigmask /* = NULL */) :
	io_mux_call(off_fds_buffer, off_modes_buffer, nfds, __sigmask),
	m_nfds(nfds), m_readfds(readfds), m_writefds(writefds),
	m_exceptfds(exceptfds), m_timeout(timeout), m_nfds_with_cq(0), m_b_run_prepare_to_poll(false)
{
	int fd;
	//socket_fd_api* temp_sock_fd_api = NULL; 

	if (m_nfds > FD_SETSIZE) {
		errno = ENOMEM;
		vma_throw_object(io_mux_call::io_error);
	}

	// create stats
	m_p_stats = &g_select_stats;
        vma_stats_instance_get_select_block(m_p_stats);

	bool offloaded_read  = !!m_readfds;
	bool offloaded_write = !!m_writefds;

	if (offloaded_read || offloaded_write) {
		FD_ZERO(&m_os_rfds, m_nfds);
		FD_ZERO(&m_os_wfds, m_nfds);

		//covers the case of select(readfds = NULL)
		if(!m_readfds) {
			FD_ZERO(&m_cq_rfds, m_nfds);
			m_readfds = &m_cq_rfds;
		}

		// get offloaded fds in read set
		for (fd = 0; fd < m_nfds; ++fd) {

			bool check_read = offloaded_read && FD_ISSET(fd, m_readfds);
			bool check_write = offloaded_write && FD_ISSET(fd, m_writefds);

			socket_fd_api* psock = fd_collection_get_sockfd(fd);

			if (psock && psock->get_type() == FD_TYPE_SOCKET) {

				offloaded_mode_t off_mode  = OFF_NONE;
				if (check_read)  off_mode = (offloaded_mode_t)(off_mode | OFF_READ);
				if (check_write) off_mode = (offloaded_mode_t)(off_mode | OFF_WRITE);

				if (off_mode) {
					__log_func("---> fd=%d IS SET for read or write!", fd);

					m_p_all_offloaded_fds[m_num_all_offloaded_fds] = fd;
					m_p_offloaded_modes[m_num_all_offloaded_fds] = off_mode;
					m_num_all_offloaded_fds++;
					if (! psock->skip_os_select()) {
						if (check_read) {
							FD_SET(fd, &m_os_rfds);
							if (psock->is_readable(NULL)) {
								io_mux_call::update_fd_array(&m_fd_ready_array, fd);
								m_n_ready_rfds++;
								m_n_all_ready_fds++;
							} else {
								// Instructing the socket to sample the OS immediately to prevent hitting EAGAIN on recvfrom(),
								// after iomux returned a shadow fd as ready (only for non-blocking sockets)
								psock->set_immediate_os_sample();
							}
						}
						if (check_write) {
							FD_SET(fd, &m_os_wfds);
						}
					}
					else
						__log_func("fd=%d must be skipped from os r select()", fd);

				}
			}
                        else {
                                if (check_read)  {
                                        FD_SET(fd, &m_os_rfds);
                                }
                                if (check_write) {
                                        FD_SET(fd, &m_os_wfds);
                                }
                        }

		}
	}
	__log_func("num all offloaded_fds=%d", m_num_all_offloaded_fds);
}
Esempio n. 8
0
bool select_call::wait(const timeval &elapsed)
{
	timeval timeout, *pto = NULL;
	timespec to_pselect, *pto_pselect = NULL;
	
	BULLSEYE_EXCLUDE_BLOCK_START
	if (m_n_all_ready_fds > 0) {
		__log_panic("wait() called when there are ready fd's!!!");
		// YossiE TODO make this and some more checks as debug assertions
		// In all functions
	}
	BULLSEYE_EXCLUDE_BLOCK_END

	// Restore original sets
	if (m_b_run_prepare_to_poll) {
		if (m_readfds)	FD_COPY(m_readfds, &m_os_rfds, m_nfds);
		if (m_writefds)	FD_COPY(m_writefds, &m_os_wfds, m_nfds);
		if (m_exceptfds)FD_COPY(m_exceptfds, &m_orig_exceptfds, m_nfds);
	}

	// Call OS select() on original sets + CQ epfd in read set
	if (m_readfds)
		FD_SET(m_cqepfd, m_readfds);
	if (m_timeout) {
		tv_sub(m_timeout, &elapsed, &timeout);
		if (timeout.tv_sec < 0 || timeout.tv_usec < 0) {
			// Already reached timeout
			return false;
		}
		pto = &timeout;
	}

	__log_func("going to wait on select CQ+OS nfds=%d cqfd=%d pto=%p!!!", m_nfds_with_cq, m_cqepfd, pto);

	// ACTUAL CALL TO SELECT
	if (m_sigmask) {
		if (pto) {
			to_pselect.tv_sec = pto->tv_sec;
			to_pselect.tv_nsec = pto->tv_usec * 1000;
			pto_pselect = &to_pselect;
		}
		m_n_all_ready_fds = orig_os_api.pselect(m_nfds, m_readfds, m_writefds, m_exceptfds, pto_pselect, m_sigmask);
	} else {
		m_n_all_ready_fds = orig_os_api.select(m_nfds_with_cq, m_readfds, m_writefds, m_exceptfds, pto);
	}
	__log_func("done select CQ+OS nfds=%d cqfd=%d pto=%p ready=%d!!!", m_nfds_with_cq, m_cqepfd, pto, m_n_all_ready_fds);
	if (m_n_all_ready_fds < 0) {
		vma_throw_object(io_mux_call::io_error);
	}

	// Clear CQ from the set and don't count it
	if (m_readfds)
	{
		if (FD_ISSET(m_cqepfd, m_readfds)) {
			FD_CLR(m_cqepfd, m_readfds); // Not needed if m_readfds is NULL
			--m_n_all_ready_fds;
			return true;
		}
	}
	return false;
}
Esempio n. 9
0
bool epoll_wait_call::_wait(int timeout) 
{
	int i, ready_fds, fd;
	bool cq_ready = false;

	__log_func("calling os epoll: %d", m_epfd);

	if (timeout) {
		lock();
		if (m_epfd_info->m_ready_fds.empty()) {
			m_epfd_info->going_to_sleep();
		} else {
			timeout = 0;
		}
		unlock();
	}

	if (m_sigmask) {
		ready_fds = orig_os_api.epoll_pwait(m_epfd, m_p_ready_events, m_maxevents, timeout, m_sigmask);
	} else {
		ready_fds = orig_os_api.epoll_wait(m_epfd, m_p_ready_events, m_maxevents, timeout);
	}

	if (timeout) {
		lock();
		m_epfd_info->return_from_sleep();
		unlock();
	}

	if (ready_fds < 0) {
		vma_throw_object(io_mux_call::io_error);
	} 
	
	// convert the returned events to user events and mark offloaded fds
	m_n_all_ready_fds = 0;
	for (i = 0; i < ready_fds; ++i) {
		fd = m_p_ready_events[i].data.fd;

		// wakeup event
		if(m_epfd_info->is_wakeup_fd(fd))
		{
			lock();
			m_epfd_info->remove_wakeup_fd();
			unlock();
			continue;
		}

		// If it's CQ
		if (m_epfd_info->is_cq_fd(m_p_ready_events[i].data.u64)) {
			cq_ready = true;
			continue;
		}
		
		if ((m_p_ready_events[i].events & EPOLLIN)) {
			socket_fd_api* temp_sock_fd_api = fd_collection_get_sockfd(fd);
			if (temp_sock_fd_api) {
				// Instructing the socket to sample the OS immediately to prevent hitting EAGAIN on recvfrom(),
				// after iomux returned a shadow fd as ready (only for non-blocking sockets)
				temp_sock_fd_api->set_immediate_os_sample();
			}
		}

		// Copy event bits and data
		m_events[m_n_all_ready_fds].events = m_p_ready_events[i].events;
		if (!m_epfd_info->get_data_by_fd(fd, &m_events[m_n_all_ready_fds].data)) {
			continue;
		}
		++m_n_all_ready_fds;
	}
	
	return cq_ready;
}