static void citp_fdinfo_do_handover(citp_fdinfo* fdi, int fdt_locked) { int rc; citp_fdinfo* epoll_fdi = NULL; int os_fd = fdi->fd; #ifndef NDEBUG /* Yuk: does for UDP too. */ volatile citp_fdinfo_p* p_fdip; p_fdip = &citp_fdtable.table[fdi->fd].fdip; ci_assert(fdip_is_busy(*p_fdip)); #endif Log_V(ci_log("%s: fd=%d nonb_switch=%d", __FUNCTION__, fdi->fd, fdi->on_rcz.handover_nonb_switch)); if( fdi->epoll_fd >= 0 ) { epoll_fdi = citp_epoll_fdi_from_member(fdi, fdt_locked); if( epoll_fdi->protocol->type == CITP_EPOLLB_FD ) citp_epollb_on_handover(epoll_fdi, fdi); } rc = fdtable_fd_move(fdi->fd, OO_IOC_TCP_HANDOVER); if( rc == -EBUSY && fdi->epoll_fd >= 0 ) { ci_assert(fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags & CI_SB_AFLAG_MOVED_AWAY); /* If this is our epoll, we can do full handover: we manually add os * fd into the epoll set. * Fixme: ensure we are not in _other_ epoll sets */ ci_bit_clear(&fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags, CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL_BIT); rc = fdtable_fd_move(fdi->fd, OO_IOC_FILE_MOVED); } if( rc != 0 ) { citp_fdinfo* new_fdi; if( ! fdt_locked ) CITP_FDTABLE_LOCK(); new_fdi = citp_fdtable_probe_locked(fdi->fd, CI_TRUE, CI_TRUE); citp_fdinfo_release_ref(new_fdi, 1); if( ! fdt_locked ) CITP_FDTABLE_UNLOCK(); ci_assert_equal(citp_fdinfo_get_type(new_fdi), CITP_PASSTHROUGH_FD); os_fd = fdi_to_alien_fdi(new_fdi)->os_socket; } if( fdi->on_rcz.handover_nonb_switch >= 0 ) { int on_off = !! fdi->on_rcz.handover_nonb_switch; int rc = ci_sys_ioctl(os_fd, FIONBIO, &on_off); if( rc < 0 ) Log_E(ci_log("%s: ioctl failed on_off=%d", __FUNCTION__, on_off)); } if( rc != 0 ) goto exit; citp_fdtable_busy_clear(fdi->fd, fdip_passthru, fdt_locked); exit: citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked); if( epoll_fdi != NULL && epoll_fdi->protocol->type == CITP_EPOLL_FD ) citp_epoll_on_handover(epoll_fdi, fdi, fdt_locked); if( epoll_fdi != NULL ) citp_fdinfo_release_ref(epoll_fdi, fdt_locked); citp_fdinfo_free(fdi); }
int onload_zc_release_buffers(int fd, onload_zc_handle* bufs, int bufs_len) { int rc = 0, i; citp_lib_context_t lib_context; citp_fdinfo* fdi; citp_sock_fdi* epi; ci_netif* ni; Log_CALL(ci_log("%s(%d, %p, %d)", __FUNCTION__, fd, bufs, bufs_len)); citp_enter_lib(&lib_context); if( (fdi = citp_fdtable_lookup(fd)) != NULL ) { switch( citp_fdinfo_get_type(fdi) ) { case CITP_UDP_SOCKET: case CITP_TCP_SOCKET: epi = fdi_to_sock_fdi(fdi); ni = epi->sock.netif; ci_netif_lock(ni); for( i = 0; i < bufs_len; ++i ) { ci_ip_pkt_fmt* pkt = (ci_ip_pkt_fmt*)bufs[i]; if( pkt->stack_id != ni->state->stack_id ) { LOG_U(log("%s: attempt to free buffer from stack %d to stack %d", __FUNCTION__, pkt->stack_id, ni->state->stack_id)); rc = -EINVAL; break; } } if( rc == 0 ) { for( i = 0; i < bufs_len; ++i ) ci_netif_pkt_release_check_keep(ni, (ci_ip_pkt_fmt*)bufs[i]); } ci_netif_unlock(ni); break; #if CI_CFG_USERSPACE_EPOLL case CITP_EPOLL_FD: rc = -ENOTSOCK; break; #endif #if CI_CFG_USERSPACE_PIPE case CITP_PIPE_FD: rc = -ENOTSOCK; break; #endif default: LOG_U(log("%s: unknown fdinfo type %d", __FUNCTION__, citp_fdinfo_get_type(fdi))); rc = -EINVAL; } citp_fdinfo_release_ref(fdi, 0); } else { /* Not onload socket */ rc = -ESOCKTNOSUPPORT; } citp_exit_lib(&lib_context, TRUE); Log_CALL_RESULT(rc); return rc; }
int onload_zc_alloc_buffers(int fd, struct onload_zc_iovec* iovecs, int iovecs_len, enum onload_zc_buffer_type_flags flags) { int rc = 0, i; citp_lib_context_t lib_context; citp_fdinfo* fdi; citp_sock_fdi* epi; ci_netif* ni; ci_ip_pkt_fmt *pkt; unsigned max_len; Log_CALL(ci_log("%s(%d, %p, %d, %x)", __FUNCTION__, fd, iovecs, iovecs_len, flags)); citp_enter_lib(&lib_context); if( (fdi = citp_fdtable_lookup(fd)) != NULL ) { switch( citp_fdinfo_get_type(fdi) ) { case CITP_UDP_SOCKET: case CITP_TCP_SOCKET: epi = fdi_to_sock_fdi(fdi); ni = epi->sock.netif; ci_netif_lock(ni); for( i = 0; i < iovecs_len; ++i ) { max_len = CI_CFG_PKT_BUF_SIZE; pkt = ci_netif_pkt_tx_tcp_alloc(ni); if( pkt == NULL ) { while( --i >= 0 ) ci_netif_pkt_release(ni, (ci_ip_pkt_fmt*)iovecs[i].buf); rc = -ENOMEM; ci_netif_unlock(ni); goto out; } /* Make sure this is clear as it affects behaviour when freeing */ pkt->pf.udp.rx_flags = 0; iovecs[i].buf = (struct oo_zc_buf *)pkt; if( flags & ONLOAD_ZC_BUFFER_HDR_TCP ) { if( (citp_fdinfo_get_type(fdi) == CITP_TCP_SOCKET) && (epi->sock.s->b.state & CI_TCP_STATE_TCP_CONN) ) { ci_tcp_state* ts = SOCK_TO_TCP(epi->sock.s); oo_tx_pkt_layout_init(pkt); iovecs[i].iov_base = ((char *)oo_tx_ip_hdr(pkt)) + ts->outgoing_hdrs_len; max_len = tcp_eff_mss(ts); } else { /* Best guess. We can fix it up later. Magic 12 leaves * space for time stamp option (common case) */ oo_tx_pkt_layout_init(pkt); iovecs[i].iov_base = (uint8_t*) oo_tx_ip_data(pkt) + sizeof(ci_tcp_hdr) + 12; } } else if( flags & ONLOAD_ZC_BUFFER_HDR_UDP ) { oo_tx_pkt_layout_init(pkt); iovecs[i].iov_base = (uint8_t*) oo_tx_ip_data(pkt) + sizeof(ci_udp_hdr); } else iovecs[i].iov_base = PKT_START(pkt); iovecs[i].iov_len = CI_CFG_PKT_BUF_SIZE - ((char *)iovecs[i].iov_base - (char *)pkt); if( iovecs[i].iov_len > max_len ) iovecs[i].iov_len = max_len; } ni->state->n_async_pkts += iovecs_len; ci_netif_unlock(ni); break; #if CI_CFG_USERSPACE_EPOLL case CITP_EPOLL_FD: rc = -ENOTSOCK; break; #endif #if CI_CFG_USERSPACE_PIPE case CITP_PIPE_FD: rc = -ENOTSOCK; break; #endif case CITP_PASSTHROUGH_FD: rc = -ESOCKTNOSUPPORT; break; default: LOG_U(log("%s: unknown fdinfo type %d", __FUNCTION__, citp_fdinfo_get_type(fdi))); rc = -EINVAL; } citp_fdinfo_release_ref(fdi, 0); } else { /* Not onload socket */ rc = -ESOCKTNOSUPPORT; } out: citp_exit_lib(&lib_context, TRUE); Log_CALL_RESULT(rc); return rc; }
int onload_zc_release_buffers(int fd, onload_zc_handle* bufs, int bufs_len) { int rc = 0, i, rx_pkt, released; citp_lib_context_t lib_context; citp_fdinfo* fdi; citp_sock_fdi* epi; ci_netif* ni; ci_ip_pkt_fmt* pkt; Log_CALL(ci_log("%s(%d, %p, %d)", __FUNCTION__, fd, bufs, bufs_len)); citp_enter_lib(&lib_context); if( (fdi = citp_fdtable_lookup(fd)) != NULL ) { switch( citp_fdinfo_get_type(fdi) ) { case CITP_UDP_SOCKET: case CITP_TCP_SOCKET: epi = fdi_to_sock_fdi(fdi); ni = epi->sock.netif; ci_netif_lock(ni); for( i = 0; i < bufs_len; ++i ) { pkt = (ci_ip_pkt_fmt*)bufs[i]; if( pkt->stack_id != ni->state->stack_id ) { LOG_U(log("%s: attempt to free buffer from stack %d to stack %d", __FUNCTION__, pkt->stack_id, ni->state->stack_id)); rc = -EINVAL; break; } } if( rc == 0 ) { for( i = 0; i < bufs_len; ++i ) { pkt = (ci_ip_pkt_fmt*)bufs[i]; /* If we are releasing a packet without the RX_FLAG then the user * allocated and then freed the packet (without using it). * We detect this to decrement n_asyn_pkts. * RX packets (kept via ONLOAD_ZC_KEEP) are counted differently * so don't decrement here. (But may release) */ rx_pkt = pkt->flags & CI_PKT_FLAG_RX; released = ci_netif_pkt_release_check_keep(ni, pkt); if ( ! rx_pkt ) { ci_assert(released == 1); (void) released; --ni->state->n_async_pkts; } } } ci_netif_unlock(ni); break; #if CI_CFG_USERSPACE_EPOLL case CITP_EPOLL_FD: rc = -ENOTSOCK; break; #endif #if CI_CFG_USERSPACE_PIPE case CITP_PIPE_FD: rc = -ENOTSOCK; break; #endif default: LOG_U(log("%s: unknown fdinfo type %d", __FUNCTION__, citp_fdinfo_get_type(fdi))); rc = -EINVAL; } citp_fdinfo_release_ref(fdi, 0); } else { /* Not onload socket */ rc = -ESOCKTNOSUPPORT; } citp_exit_lib(&lib_context, TRUE); Log_CALL_RESULT(rc); return rc; }