void ci_dump_select_set(ci_log_fn_t log_fn, const fd_set* fds) { char stack_s[256]; char* s = stack_s; int i, n = 0, si; /* We assume the caller ain't too worried about performance. So we find ** out in advance whether we can format the string into [stack_s], or ** need to malloc() a buffer. */ for( i = 0; i < FD_SETSIZE; ++i ) if( FD_ISSET(i, fds) ) ++n; if( n * 4 + 3 >= INLINE_BUF_SIZE ) /* Hope this doesn't fail... */ CI_TEST(s = (char*) malloc(n * 4 + 3)); si = sprintf(s, "["); for( i = 0; i < FD_SETSIZE; ++i ) if( FD_ISSET(i, fds) ) si += sprintf(s + si, i ? " %d":"%d", i); ci_assert(s != stack_s || si < n * 4 + 3); log_fn(s); if( s != stack_s ) free(s); }
void ci_app_startup(int argc, char* argv[]) { int rc; if( ci_appname ) return; if( getenv("EFAB_NIC") ) ci_cfg_nic_name = getenv("EFAB_NIC"); ci_cfg_nic_index = atoi(ci_cfg_nic_name); if( ci_app_cpu_khz == 0 ) { do_platform_init(); rc = ci_get_cpu_khz(&ci_app_cpu_khz); if( rc < 0 ) ci_log("ci_get_cpu_khz: %d", rc); } if( argc > 0 ) { int i, n = 0; char* p; for( i = 0; i < argc; ++i ) n += strlen(argv[i]) + 1; ci_cmdline = malloc(n); if( ci_cmdline ) { p = ci_cmdline; for( i = 0; i < argc; ++i ) p += sprintf(p, "%s%s", i == 0 ? "":" ", argv[i]); CI_TEST(p == ci_cmdline + n - 1); } if( argc >= 1 && argv && argv[0] ) { ci_appname = argv[0] + strlen(argv[0]); while( ci_appname > argv[0] && ci_appname[-1] != '/' && ci_appname[-1] != '\\' ) --ci_appname; } else ci_appname = ""; if( strlen(ci_appname) < (LOG_PREFIX_BUF_SIZE - 5) ) { strcpy(log_prefix_buf, ci_appname); strcat(log_prefix_buf, ": "); ci_set_log_prefix(log_prefix_buf); } } }
/* Find out what sort of thing [fd] is, and if it is a user-level socket * then map in the user-level state. */ static citp_fdinfo * citp_fdtable_probe_locked(unsigned fd, int print_banner, int fdip_is_already_busy) { citp_fdinfo* fdi = NULL; struct stat64 st; ci_ep_info_t info; if( ! fdip_is_already_busy ) { volatile citp_fdinfo_p* p_fdip; citp_fdinfo_p fdip; /* ?? We're repeating some effort already expended in lookup() here, but ** this keeps it cleaner. May optimise down the line when I understand ** what other code needs to call this. */ p_fdip = &citp_fdtable.table[fd].fdip; again: fdip = *p_fdip; if( fdip_is_busy(fdip) ) fdip = citp_fdtable_busy_wait(fd, 1); if( ! fdip_is_unknown(fdip) && ! fdip_is_normal(fdip) ) goto exit; if( fdip_cas_fail(p_fdip, fdip, fdip_busy) ) goto again; if( fdip_is_normal(fdip) ) { fdi = fdip_to_fdi(fdip); citp_fdinfo_ref(fdi); citp_fdtable_busy_clear(fd, fdip, 1); goto exit; } } if( ci_sys_fstat64(fd, &st) != 0 ) { /* fstat() failed. Must be a bad (closed) file descriptor, so ** leave this entry as unknown. Return citp_the_closed_fd to avoid the ** caller passing through to an fd that is created asynchronously. */ citp_fdtable_busy_clear(fd, fdip_unknown, 1); fdi = &citp_the_closed_fd; citp_fdinfo_ref(fdi); goto exit; } /* oo_get_st_rdev() and oo_onloadfs_dev_t() open-and-close fd, so * fdtable should be locked if strict mode requested. */ if( fdtable_strict() ) { CITP_FDTABLE_ASSERT_LOCKED(1); } if( st.st_dev == oo_onloadfs_dev_t() ) { /* Retrieve user-level endpoint info */ if( oo_ep_info(fd, &info) < 0 ) { Log_V(log("%s: fd=%d type=%d unknown", __FUNCTION__,fd,info.fd_type)); citp_fdtable_busy_clear(fd, fdip_passthru, 1); goto exit; } switch( info.fd_type ) { case CI_PRIV_TYPE_TCP_EP: case CI_PRIV_TYPE_UDP_EP: case CI_PRIV_TYPE_PASSTHROUGH_EP: case CI_PRIV_TYPE_ALIEN_EP: #if CI_CFG_USERSPACE_PIPE case CI_PRIV_TYPE_PIPE_READER: case CI_PRIV_TYPE_PIPE_WRITER: #endif { citp_fdinfo_p fdip; Log_V(log("%s: fd=%d %s restore", __FUNCTION__, fd, info.fd_type == CI_PRIV_TYPE_TCP_EP ? "TCP": #if CI_CFG_USERSPACE_PIPE info.fd_type != CI_PRIV_TYPE_UDP_EP ? "PIPE" : #endif "UDP")); fdip = citp_fdtable_probe_restore(fd, &info, print_banner); if( fdip_is_normal(fdip) ) fdi = fdip_to_fdi(fdip); else citp_fdtable_busy_clear(fd, fdip, 1); goto exit; } case CI_PRIV_TYPE_NETIF: /* This should never happen, because netif fds are close-on-exec. ** But let's leave this code here just in case my reasoning is bad. */ Log_U(log("%s: fd=%d NETIF reserved", __FUNCTION__, fd)); citp_fdtable_busy_clear(fd, fdip_reserved, 1); fdi = &citp_the_reserved_fd; citp_fdinfo_ref(fdi); goto exit; case CI_PRIV_TYPE_NONE: /* This happens if a thread gets at an onload driver fd that has just * been created, but not yet specialised. On Linux I think this * means it will shortly be a new netif internal fd. (fds associated * with sockets and pipes are never unspecialised). */ Log_V(log("%s: fd=%d TYPE_NONE", __FUNCTION__, fd)); citp_fdtable_busy_clear(fd, fdip_passthru, 1); goto exit; default: CI_TEST(0); break; } } else if( ci_major(st.st_rdev) == ci_major(oo_get_st_rdev(OO_EPOLL_DEV)) ) { citp_epollb_fdi *epi = CI_ALLOC_OBJ(citp_epollb_fdi); if( ! epi ) { Log_E(log("%s: out of memory (epoll_fdi)", __FUNCTION__)); citp_fdtable_busy_clear(fd, fdip_passthru, 1); goto exit; } oo_epollb_ctor(epi); fdi = &epi->fdinfo; citp_fdinfo_init(fdi, &citp_epollb_protocol_impl); citp_fdinfo_ref(fdi); citp_fdtable_insert(fdi, fd, 1); goto exit; } #ifndef NDEBUG /* /dev/onload may be netif only; they are closed on fork or exec */ if( ci_major(st.st_rdev) == ci_major(oo_get_st_rdev(OO_STACK_DEV)) ) Log_U(log("%s: %d is /dev/onload", __FUNCTION__, fd)); #endif /* Not one of ours, so pass-through. */ Log_V(log("%s: fd=%u non-efab", __FUNCTION__, fd)); citp_fdtable_busy_clear(fd, fdip_passthru, 1); exit: return fdi; }
static citp_fdinfo_p citp_fdtable_probe_restore(int fd, ci_ep_info_t * info, int print_banner) { citp_protocol_impl* proto = 0; citp_fdinfo* fdi = 0; ci_netif* ni; int rc; int c_sock_fdi = 1; /* Must be holding the FD table writer lock */ CITP_FDTABLE_ASSERT_LOCKED(1); ci_assert_nequal(info->resource_id, CI_ID_POOL_ID_NONE); /* Will need to review this function if the following assert fires */ switch( info->fd_type ) { case CI_PRIV_TYPE_TCP_EP: proto = &citp_tcp_protocol_impl; break; case CI_PRIV_TYPE_UDP_EP: proto = &citp_udp_protocol_impl; break; case CI_PRIV_TYPE_PASSTHROUGH_EP: proto = &citp_passthrough_protocol_impl; c_sock_fdi = 0; break; case CI_PRIV_TYPE_ALIEN_EP: proto = NULL; c_sock_fdi = 0; break; #if CI_CFG_USERSPACE_PIPE case CI_PRIV_TYPE_PIPE_READER: proto = &citp_pipe_read_protocol_impl; c_sock_fdi = 0; break; case CI_PRIV_TYPE_PIPE_WRITER: proto = &citp_pipe_write_protocol_impl; c_sock_fdi = 0; break; #endif default: ci_assert(0); } /* Attempt to find the user-level netif for this endpoint */ ni = citp_find_ul_netif(info->resource_id, 1); if( ! ni ) { ef_driver_handle netif_fd; /* Not found, rebuild/restore the netif for this endpoint */ rc = citp_netif_recreate_probed(fd, &netif_fd, &ni); if ( rc < 0 ) { Log_E(log("%s: citp_netif_recreate_probed failed! (%d)", __FUNCTION__, rc)); goto fail; } if( print_banner ) { ci_log("Importing "ONLOAD_PRODUCT" "ONLOAD_VERSION" "ONLOAD_COPYRIGHT " [%s]", ni->state->pretty_name); } } else citp_netif_add_ref(ni); /* There is a race condition where the fd can have been created, but it has * not yet been initialised, as we can't put a busy marker in the right place * in the fdtable until we know what the fd is. In this case we don't want * to probe this new info, so return the closed fd. */ if( SP_TO_WAITABLE(ni, info->sock_id)->sb_aflags & CI_SB_AFLAG_NOT_READY ) { citp_fdtable_busy_clear(fd, fdip_unknown, 1); fdi = &citp_the_closed_fd; citp_fdinfo_ref(fdi); return fdi_to_fdip(fdi); } if (c_sock_fdi) { citp_sock_fdi* sock_fdi; sock_fdi = CI_ALLOC_OBJ(citp_sock_fdi); if( ! sock_fdi ) { Log_E(log("%s: out of memory (sock_fdi)", __FUNCTION__)); goto fail; } fdi = &sock_fdi->fdinfo; sock_fdi->sock.s = SP_TO_SOCK_CMN(ni, info->sock_id); sock_fdi->sock.netif = ni; } else if( info->fd_type == CI_PRIV_TYPE_PASSTHROUGH_EP ) { citp_waitable* w = SP_TO_WAITABLE(ni, info->sock_id); citp_alien_fdi* alien_fdi; if( ~w->sb_aflags & CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL && fdtable_fd_move(fd, OO_IOC_FILE_MOVED) == 0 ) { citp_netif_release_ref(ni, 1); return fdip_passthru; } alien_fdi = CI_ALLOC_OBJ(citp_alien_fdi); if( ! alien_fdi ) { Log_E(log("%s: out of memory (alien_fdi)", __FUNCTION__)); goto fail; } fdi = &alien_fdi->fdinfo; alien_fdi->netif = ni; alien_fdi->ep = SP_TO_WAITABLE(ni, info->sock_id); citp_passthrough_init(alien_fdi); } else if( info->fd_type == CI_PRIV_TYPE_ALIEN_EP ) { citp_waitable* w = SP_TO_WAITABLE(ni, info->sock_id); citp_sock_fdi* sock_fdi; ci_netif* alien_ni; sock_fdi = CI_ALLOC_OBJ(citp_sock_fdi); if( ! sock_fdi ) { Log_E(log("%s: out of memory (alien sock_fdi)", __FUNCTION__)); goto fail; } fdi = &sock_fdi->fdinfo; rc = citp_netif_by_id(w->moved_to_stack_id, &alien_ni, 1); if( rc != 0 ) { goto fail; } sock_fdi->sock.s = SP_TO_SOCK_CMN(alien_ni, w->moved_to_sock_id); sock_fdi->sock.netif = alien_ni; citp_netif_release_ref(ni, 1); /* Replace the file under this fd if possible */ if( ~w->sb_aflags & CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL ) fdtable_fd_move(fd, OO_IOC_FILE_MOVED); if( sock_fdi->sock.s->b.state & CI_TCP_STATE_TCP ) proto = &citp_tcp_protocol_impl; else if( sock_fdi->sock.s->b.state == CI_TCP_STATE_UDP ) proto = &citp_udp_protocol_impl; else { CI_TEST(0); } } #if CI_CFG_USERSPACE_PIPE else { citp_pipe_fdi* pipe_fdi; pipe_fdi = CI_ALLOC_OBJ(citp_pipe_fdi); if( ! pipe_fdi ) { Log_E(log("%s: out of memory (pipe_fdi)", __FUNCTION__)); goto fail; } fdi = &pipe_fdi->fdinfo; pipe_fdi->pipe = SP_TO_PIPE(ni, info->sock_id); pipe_fdi->ni = ni; } #endif citp_fdinfo_init(fdi, proto); /* We're returning a reference to the caller. */ citp_fdinfo_ref(fdi); citp_fdtable_insert(fdi, fd, 1); return fdi_to_fdip(fdi); fail: if( ni ) citp_netif_release_ref(ni, 1); return fdip_unknown; }
static int ci_zc_msg_to_udp_pkt(ci_netif* ni, struct onload_zc_msg* zc_msg, ci_ip_pkt_fmt* pkt) { int i, n_buffers = pkt->n_buffers, dropped_bytes = 0; ci_ip_pkt_fmt* frag; ci_ip_pkt_fmt* prev_frag = NULL; frag = pkt; i = 0; ci_assert_nequal(zc_msg->iov, NULL); /* Ignore first frag if zero length and there is another frag */ if( oo_offbuf_left(&frag->buf) == 0 && OO_PP_NOT_NULL(frag->frag_next) ) { frag = PKT_CHK_NNL(ni, frag->frag_next); --n_buffers; } CI_TEST(zc_msg->msghdr.msg_iovlen <= n_buffers); CI_TEST(zc_msg->msghdr.msg_iovlen > 0); do { CI_TEST(zc_msg->iov[i].buf == (onload_zc_handle)frag); CI_TEST(zc_msg->iov[i].iov_len != 0); if( i < zc_msg->msghdr.msg_iovlen ) { if( zc_msg->iov[i].iov_base != oo_offbuf_ptr(&frag->buf) ) { ci_assert_gt((char*)zc_msg->iov[i].iov_base, oo_offbuf_ptr(&frag->buf)); dropped_bytes += ((char*)zc_msg->iov[i].iov_base - oo_offbuf_ptr(&frag->buf) ); oo_offbuf_set_start(&frag->buf, (char*)zc_msg->iov[i].iov_base); } if( zc_msg->iov[i].iov_len != oo_offbuf_left(&frag->buf) ) { ci_assert_lt(zc_msg->iov[i].iov_len, oo_offbuf_left(&frag->buf)); dropped_bytes += (oo_offbuf_left(&frag->buf) - zc_msg->iov[i].iov_len); oo_offbuf_set_len(&frag->buf, zc_msg->iov[i].iov_len); } } else { /* All remaining fragments should be discarded. Should not get * here on first frag as msg_iovlen > 0 */ ci_assert(prev_frag != NULL); prev_frag->frag_next = OO_PP_NULL; /* remember frag so we can release it after counting dropped bytes */ prev_frag = frag; do { dropped_bytes += oo_offbuf_left(&frag->buf); if( ++i == n_buffers ) break; frag = PKT_CHK_NNL(ni, frag->frag_next); } while( 1 ); ci_netif_pkt_release(ni, prev_frag); pkt->n_buffers -= (n_buffers - zc_msg->msghdr.msg_iovlen); return dropped_bytes; } ci_assert_lt(oo_offbuf_offset(&frag->buf) + oo_offbuf_left(&frag->buf), CI_CFG_PKT_BUF_SIZE); if( ++i == n_buffers ) break; prev_frag = frag; frag = PKT_CHK_NNL(ni, frag->frag_next); } while( 1 ); return dropped_bytes; }