static int efab_vi_rm_mmap_mem(struct efrm_vi *virs, unsigned long *bytes, void *opaque, int *map_num, unsigned long *offset) { int queue_type; uint32_t len; if( virs->q[EFHW_EVQ].capacity != 0 ) { len = efhw_iopages_size(&virs->q[EFHW_EVQ].pages); len = CI_MIN(len, *bytes); ci_assert_gt(len, 0); ci_mmap_iopages(&virs->q[EFHW_EVQ].pages, 0, len, bytes, opaque, map_num, offset); if(*bytes == 0) return 0; } for( queue_type=EFRM_VI_RM_DMA_QUEUE_COUNT-1; queue_type>=0; queue_type-- ) { if( virs->q[queue_type].capacity != 0 ) { len = efhw_iopages_size(&virs->q[queue_type].pages); len = CI_MIN(len, *bytes); ci_assert_gt(len, 0); ci_mmap_iopages(&virs->q[queue_type].pages, 0, len, bytes, opaque, map_num, offset); if(*bytes == 0) return 0; } } return 0; }
/* initialise the iptimer scheduler */ void ci_ip_timer_state_init(ci_netif* netif, unsigned cpu_khz) { ci_ip_timer_state* ipts = IPTIMER_STATE(netif); int i; int us2isn; /* initialise the cycle to tick constants */ ipts->khz = cpu_khz; ipts->ci_ip_time_frc2tick = shift_for_gran(CI_IP_TIME_APP_GRANULARITY, ipts->khz); ipts->ci_ip_time_frc2us = shift_for_gran(1, ipts->khz); /* The Linux kernel ticks the initial sequence number that it would use for * a given tuple every 64 ns. Onload does the same, when using * EF_TCP_ISN_MODE=clocked. However in EF_TCP_ISN_MODE=clocked+cache our use * of the clock-driven ISN is slightly different, though, as we remember * old sequence numbers in the case where the clock-driven ISN is not known * to be safe. As such, we don't need it to tick so fast, and so we let it * tick at most every 256 ns. This means that it takes more than eight * minutes to wrap by half, while four minutes is our assumed maximum * peer-MSL. This in practice reduces the cases in which we have to * remember old sequence numbers. */ us2isn = NI_OPTS(netif).tcp_isn_mode != 0 ? 2 : 4; ipts->ci_ip_time_frc2isn = ipts->ci_ip_time_frc2us > us2isn ? ipts->ci_ip_time_frc2us - us2isn : 0; ci_ip_time_initial_sync(ipts); ipts->sched_ticks = ci_ip_time_now(netif); ipts->closest_timer = ipts->sched_ticks + IPTIME_INFINITY; /* To convert ms to ticks we will use fixed point arithmetic * Calculate conversion factor, which is expected to be in range <0.5,1] * */ ipts->ci_ip_time_ms2tick_fxp = (((ci_uint64)ipts->khz) << 32) / (1u << ipts->ci_ip_time_frc2tick); ci_assert_gt(ipts->ci_ip_time_ms2tick_fxp, 1ull<<31); ci_assert_le(ipts->ci_ip_time_ms2tick_fxp, 1ull<<32); /* set module specific time constants dependent on frc2tick */ ci_tcp_timer_init(netif); ci_ni_dllist_init(netif, &ipts->fire_list, oo_ptr_to_statep(netif, &ipts->fire_list), "fire"); /* Initialise the wheel lists. */ for( i=0; i < CI_IPTIME_WHEELSIZE; i++) ci_ni_dllist_init(netif, &ipts->warray[i], oo_ptr_to_statep(netif, &ipts->warray[i]), "timw"); }
void ci_netif_filter_init(ci_netif_filter_table* tbl, int size_lg2) { unsigned i; unsigned size = ci_pow2(size_lg2); ci_assert(tbl); ci_assert_gt(size_lg2, 0); ci_assert_le(size_lg2, 32); tbl->table_size_mask = size - 1; for( i = 0; i < size; ++i ) { tbl->table[i].id = EMPTY; tbl->table[i].route_count = 0; tbl->table[i].laddr = 0; } }
int onload_zc_send(struct onload_zc_mmsg* msgs, int mlen, int flags) { int done = 0, last_fd = -1, i; citp_lib_context_t lib_context; citp_fdinfo* fdi = NULL; Log_CALL(ci_log("%s(%p, %d, %x)", __FUNCTION__, msgs, mlen, flags)); citp_enter_lib(&lib_context); for( i = 0; i < mlen; ++i ) { if( msgs[i].fd != last_fd ) { if( fdi != NULL ) citp_fdinfo_release_ref(fdi, 0); fdi = citp_fdtable_lookup(msgs[i].fd); if( fdi == NULL ) { msgs[i].rc = -ESOCKTNOSUPPORT; ++done; goto out; } last_fd = msgs[i].fd; } CI_TRY_EQ( citp_fdinfo_get_ops(fdi)->zc_send(fdi, &msgs[i], flags), 1); /* If we got an error, return the number of msgs that have had * rc set and exit. fd_op should have updated msgs.rc appropriately */ ++done; if( msgs[i].rc < 0 ) goto out; } out: if( fdi != NULL ) citp_fdinfo_release_ref(fdi, 0); citp_exit_lib(&lib_context, TRUE); ci_assert_gt(done, 0); ci_assert_le(done, mlen); Log_CALL_RESULT(done); return done; }
ssize_t linux_tcp_helper_fop_sendpage(struct file* filp, struct page* page, int offset, size_t size, loff_t* ppos, int flags) { ci_private_t* priv = filp->private_data; tcp_helper_resource_t* trs = efab_priv_to_thr(priv); ci_sock_cmn* s; OO_DEBUG_VERB(ci_log("%s: %d:%d offset=%d size=%d flags=%x", __FUNCTION__, NI_ID(&trs->netif), OO_SP_FMT(priv->sock_id), offset, (int) size, flags)); ci_assert(page); ci_assert_ge(offset, 0); ci_assert_gt(size, 0); ci_assert_le(offset + size, CI_PAGE_SIZE); #ifndef MSG_SENDPAGE_NOTLAST /* "flags" is really "more". Convert it. */ if( flags ) flags = MSG_MORE; /* [more] is sometimes true even for the last page. We get a little ** closer to the truth by spotting that we're not reading to the end of ** the page. - seen on 2.6.18, but not on 2.6.26 or later */ if( offset + size < CI_PAGE_SIZE && flags ) flags = 0; #endif s = SP_TO_SOCK(&trs->netif, priv->sock_id); if(CI_LIKELY( s->b.state & CI_TCP_STATE_TCP_CONN )) return sendpage_copy(&trs->netif,SOCK_TO_TCP(s),page,offset,size,flags); else /* Closed or listening. Return epipe. Do not send SIGPIPE, because ** Linux will do it for us. */ return -s->tx_errno; }
citp_fdinfo* citp_fdtable_lookup_fast(citp_lib_context_t* ctx, unsigned fd) { /* Note that if we haven't yet initialised this module, then ** [inited_count] will be zero, and the following test will fail. So the ** test for initialisation is done further down... ** ** This is highly performance critial. DO NOT add any code between here ** and the first [return] statement. */ citp_fdinfo* fdi; /* Try to avoid entering lib. */ ctx->thread = NULL; if( fd < citp_fdtable.inited_count ) { volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip; citp_fdinfo_p fdip; again: fdip = *p_fdip; if( fdip_is_normal(fdip) ) { citp_enter_lib_if(ctx); if( citp_fdtable_is_mt_safe() ) { /* No need to use atomic ops or add a ref to the fdi when MT-safe. * The definition of "fds_mt_safe" is that the app does not change * the meaning of a file descriptor in one thread when it is being * used in another thread. */ fdi = fdip_to_fdi(fdip); if( ! citp_fdinfo_is_consistent(fdi) ) fdi = citp_reprobe_moved(fdi, CI_TRUE, CI_FALSE); return fdi; } else { /* Swap in the busy marker. */ if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) { fdi = fdip_to_fdi(fdip); ci_assert(fdi); ci_assert_gt(oo_atomic_read(&fdi->ref_count), 0); ci_assert(fdip_is_closing(fdip) || fdip_is_reserved(fdip) || fdi->fd == fd); /* Bump the reference count. */ citp_fdinfo_ref(fdi); if( ! citp_fdinfo_is_consistent(fdi) ) fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_TRUE); else { /* Swap the busy marker out again. */ citp_fdtable_busy_clear(fd, fdip, 0); } return fdi; } goto again; } } /* Not normal! */ if( fdip_is_passthru(fdip) ) return NULL; citp_enter_lib_if(ctx); if( fdip_is_busy(fdip) ) { citp_fdtable_busy_wait(fd, 0); goto again; } ci_assert(fdip_is_unknown(fdip)); goto probe; } if( citp.init_level < CITP_INIT_FDTABLE ) { if( _citp_do_init_inprogress == 0 ) CI_TRY(citp_do_init(CITP_INIT_ALL)); else CI_TRY(citp_do_init(CITP_INIT_FDTABLE)); /* get what we need */ } if( fd >= citp_fdtable.size ) return NULL; probe: citp_enter_lib_if(ctx); fdi = citp_fdtable_probe(fd); if( fdi && citp_fdtable_is_mt_safe() ) citp_fdinfo_release_ref(fdi, 0); return fdi; }
citp_fdinfo * citp_fdtable_lookup(unsigned fd) { /* Note that if we haven't yet initialised this module, then ** [inited_count] will be zero, and the following test will fail. So the ** test for initialisation is done further down... ** ** This is highly performance critial. DO NOT add any code between here ** and the first [return] statement. */ citp_fdinfo* fdi; /* In some cases, we'll lock fdtable. Assert that it is possible: */ ci_assert(oo_per_thread_get()->sig.inside_lib); if( fd < citp_fdtable.inited_count ) { volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip; citp_fdinfo_p fdip; again: /* Swap in the busy marker. */ fdip = *p_fdip; if( fdip_is_normal(fdip) ) { if( citp_fdtable_not_mt_safe() ) { if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) { fdi = fdip_to_fdi(fdip); ci_assert(fdi); ci_assert_gt(oo_atomic_read(&fdi->ref_count), 0); ci_assert(fdip_is_closing(fdip) || fdip_is_reserved(fdip) || fdi->fd == fd); /* Bump the reference count. */ citp_fdinfo_ref(fdi); if( ! citp_fdinfo_is_consistent(fdi) ) { /* Something is wrong. Re-probe. */ fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_TRUE); } else { /* Swap the busy marker out again. */ citp_fdtable_busy_clear(fd, fdip, 0); } return fdi; } goto again; } else { /* No need to use atomic ops when single-threaded. The definition * of "fds_mt_safe" is that the app does not change the meaning of * a file descriptor in one thread when it is being used in another * thread. In that case I'm hoping this should be safe, but at * time of writing I'm really not confident. (FIXME). */ fdi = fdip_to_fdi(fdip); if( ci_is_multithreaded() ) citp_fdinfo_ref(fdi); else ++fdi->ref_count.n; if( ! citp_fdinfo_is_consistent(fdi) ) fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_FALSE); return fdi; } } /* Not normal! */ if( fdip_is_passthru(fdip) ) return NULL; if( fdip_is_busy(fdip) ) { citp_fdtable_busy_wait(fd, 0); goto again; } ci_assert(fdip_is_unknown(fdip)); goto probe; } if (citp.init_level < CITP_INIT_FDTABLE) { if (_citp_do_init_inprogress == 0) CI_TRY(citp_do_init(CITP_INIT_ALL)); else CI_TRY(citp_do_init(CITP_INIT_FDTABLE)); /* get what we need */ } if( fd >= citp_fdtable.size ) return NULL; probe: fdi = citp_fdtable_probe(fd); return fdi; }
static int ci_udp_recvmsg_block(ci_udp_iomsg_args* a, ci_netif* ni, ci_udp_state* us, int timeout) { int rc; #ifndef __KERNEL__ { citp_signal_info* si; struct pollfd pfd; #if !CI_CFG_CITP_INSIDE_LIB_IS_FLAG int inside_lib; #endif pfd.fd = a->fd; pfd.events = POLLIN; if( timeout == 0 ) timeout = -1; /* Ideally, we should do the same as in citp_tcp_accept(), but since * we do not have lib_context and citp_exit_lib() out of unix/ * subdirectory, we copy it contents. */ si = citp_signal_get_specific_inited(); continue_to_block: #if !CI_CFG_CITP_INSIDE_LIB_IS_FLAG inside_lib = si->inside_lib; ci_assert_gt(inside_lib, 0); #endif si->inside_lib = 0; ci_compiler_barrier(); if(CI_UNLIKELY( si->aflags & OO_SIGNAL_FLAG_HAVE_PENDING )) citp_signal_run_pending(si); rc = ci_sys_poll(&pfd, 1, timeout); #if CI_CFG_CITP_INSIDE_LIB_IS_FLAG si->inside_lib = 1; #else si->inside_lib = inside_lib; #endif if( rc > 0 ) return 0; else if( rc == 0 ) rc = -EAGAIN; else if( errno == EINTR && (si->aflags & OO_SIGNAL_FLAG_NEED_RESTART) && timeout == -1 ) { /* Blocking recv() should only be restarted if there is no timeout. */ goto continue_to_block; } else rc = -errno; return rc; } #else /* __KERNEL__ */ { int mask; s64 t; if( timeout == 0 ) t = -1; else t = msecs_to_jiffies(timeout); mask = POLLIN; rc = efab_tcp_helper_poll_udp(a->filp, &mask, &t); if( rc == 0 ) { if( mask ) { return 0; } else rc = -EAGAIN; } else if( rc == -ERESTARTSYS && us->s.so.rcvtimeo_msec ) rc = -EINTR; } return rc; #endif /* __KERNEL__ */ }
static int ci_udp_filter_kernel_pkt(ci_netif* ni, ci_udp_state* us, struct msghdr* msg, int *bytes) { enum onload_zc_callback_rc rc; struct onload_zc_msg zc_msg; struct onload_zc_iovec zc_iovec[CI_UDP_ZC_IOVEC_MAX]; unsigned cb_flags = 0; int i = 0, bytes_remaining = *bytes; if( msg->msg_iovlen > CI_UDP_ZC_IOVEC_MAX ) { LOG_U(log("%s: too many fragments (%d), passing packet unfiltered", __FUNCTION__, (int)msg->msg_iovlen)); return 1; } zc_msg.iov = zc_iovec; zc_msg.msghdr = *msg; zc_msg.msghdr.msg_iov = NULL; ci_assert_gt(msg->msg_iovlen, 0); do { zc_msg.iov[i].iov_base = msg->msg_iov[i].iov_base; zc_msg.iov[i].iov_len = msg->msg_iov[i].iov_len > bytes_remaining ? bytes_remaining : msg->msg_iov[i].iov_len; zc_msg.iov[i].buf = ONLOAD_ZC_HANDLE_NONZC; zc_msg.iov[i].iov_flags = 0; bytes_remaining -= zc_msg.iov[i].iov_len; } while(++i < msg->msg_iovlen && bytes_remaining); zc_msg.msghdr.msg_iovlen = i; rc = (*(onload_zc_recv_filter_callback)((ci_uintptr_t)us->recv_q_filter)) (&zc_msg, (void *)((ci_uintptr_t)us->recv_q_filter_arg), cb_flags); ci_assert(!(rc & ONLOAD_ZC_KEEP)); if( rc & ONLOAD_ZC_TERMINATE ) return 0; else { if( rc & ONLOAD_ZC_MODIFIED ) { int new_len = 0; #ifndef NDEBUG int found_shortened_iov = 0; #endif for( i = 0; i < zc_msg.msghdr.msg_iovlen; ++i ) { new_len += zc_msg.iov[i].iov_len; #ifndef NDEBUG if( found_shortened_iov ) ci_assert_equal(zc_msg.iov[i].iov_len, 0); ci_assert_equal(zc_msg.iov[i].iov_base, msg->msg_iov[i].iov_base); if( zc_msg.iov[i].iov_len != msg->msg_iov[i].iov_len ) { ci_assert_lt(zc_msg.iov[i].iov_len, msg->msg_iov[i].iov_len); found_shortened_iov = 1; } #endif } #ifndef NDEBUG if( found_shortened_iov ) ci_assert_lt(new_len, *bytes); else ci_assert_equal(new_len, *bytes); #endif *bytes = new_len; } } return 1; }
int ci_udp_filter_recved_pkts(ci_netif* ni, ci_udp_state* us) { enum onload_zc_callback_rc rc; struct onload_zc_msg zc_msg; struct onload_zc_iovec zc_iovec[CI_UDP_ZC_IOVEC_MAX]; ci_ip_pkt_fmt* pkt; unsigned cb_flags; int dropped_bytes; ci_assert(ci_sock_is_locked(ni, &us->s.b)); zc_msg.iov = zc_iovec; zc_msg.msghdr.msg_controllen = 0; zc_msg.msghdr.msg_flags = 0; while( us->recv_q.pkts_added != us->recv_q.pkts_filter_passed + us->recv_q.pkts_filter_dropped ) { ci_rmb(); pkt = PKT_CHK_NNL(ni, us->recv_q.filter); if( pkt->pf.udp.rx_flags & (CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED | CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED) ) { /* We know this can't go past tail because of the while loop condition */ us->recv_q.filter = pkt->next; pkt = PKT_CHK_NNL(ni, us->recv_q.filter); ci_assert( !(pkt->pf.udp.rx_flags & (CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED | CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED)) ); } ci_udp_pkt_to_zc_msg(ni, pkt, &zc_msg); cb_flags = CI_IP_IS_MULTICAST(oo_ip_hdr(pkt)->ip_daddr_be32) ? ONLOAD_ZC_MSG_SHARED : 0; rc = (*(onload_zc_recv_filter_callback)((ci_uintptr_t)us->recv_q_filter)) (&zc_msg, (void *)((ci_uintptr_t)us->recv_q_filter_arg), cb_flags); ci_assert(!(rc & ONLOAD_ZC_KEEP)); if( rc & ONLOAD_ZC_TERMINATE ) { us->recv_q.bytes_filter_dropped += pkt->pf.udp.pay_len; pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED; ++us->recv_q.pkts_filter_dropped; } else { pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED; ++us->recv_q.pkts_filter_passed; if( rc & ONLOAD_ZC_MODIFIED ) { ci_assert(!(cb_flags & ONLOAD_ZC_MSG_SHARED)); dropped_bytes = ci_zc_msg_to_udp_pkt(ni, &zc_msg, pkt); ci_assert_gt(dropped_bytes, 0); ci_assert_lt(dropped_bytes, pkt->pf.udp.pay_len); pkt->pf.udp.pay_len -= dropped_bytes; us->recv_q.bytes_filter_dropped += dropped_bytes; } us->recv_q.bytes_filter_passed += pkt->pf.udp.pay_len; return 1; } } return us->recv_q.pkts_filter_passed != us->recv_q.pkts_delivered; }
static int ci_zc_msg_to_udp_pkt(ci_netif* ni, struct onload_zc_msg* zc_msg, ci_ip_pkt_fmt* pkt) { int i, n_buffers = pkt->n_buffers, dropped_bytes = 0; ci_ip_pkt_fmt* frag; ci_ip_pkt_fmt* prev_frag = NULL; frag = pkt; i = 0; ci_assert_nequal(zc_msg->iov, NULL); /* Ignore first frag if zero length and there is another frag */ if( oo_offbuf_left(&frag->buf) == 0 && OO_PP_NOT_NULL(frag->frag_next) ) { frag = PKT_CHK_NNL(ni, frag->frag_next); --n_buffers; } CI_TEST(zc_msg->msghdr.msg_iovlen <= n_buffers); CI_TEST(zc_msg->msghdr.msg_iovlen > 0); do { CI_TEST(zc_msg->iov[i].buf == (onload_zc_handle)frag); CI_TEST(zc_msg->iov[i].iov_len != 0); if( i < zc_msg->msghdr.msg_iovlen ) { if( zc_msg->iov[i].iov_base != oo_offbuf_ptr(&frag->buf) ) { ci_assert_gt((char*)zc_msg->iov[i].iov_base, oo_offbuf_ptr(&frag->buf)); dropped_bytes += ((char*)zc_msg->iov[i].iov_base - oo_offbuf_ptr(&frag->buf) ); oo_offbuf_set_start(&frag->buf, (char*)zc_msg->iov[i].iov_base); } if( zc_msg->iov[i].iov_len != oo_offbuf_left(&frag->buf) ) { ci_assert_lt(zc_msg->iov[i].iov_len, oo_offbuf_left(&frag->buf)); dropped_bytes += (oo_offbuf_left(&frag->buf) - zc_msg->iov[i].iov_len); oo_offbuf_set_len(&frag->buf, zc_msg->iov[i].iov_len); } } else { /* All remaining fragments should be discarded. Should not get * here on first frag as msg_iovlen > 0 */ ci_assert(prev_frag != NULL); prev_frag->frag_next = OO_PP_NULL; /* remember frag so we can release it after counting dropped bytes */ prev_frag = frag; do { dropped_bytes += oo_offbuf_left(&frag->buf); if( ++i == n_buffers ) break; frag = PKT_CHK_NNL(ni, frag->frag_next); } while( 1 ); ci_netif_pkt_release(ni, prev_frag); pkt->n_buffers -= (n_buffers - zc_msg->msghdr.msg_iovlen); return dropped_bytes; } ci_assert_lt(oo_offbuf_offset(&frag->buf) + oo_offbuf_left(&frag->buf), CI_CFG_PKT_BUF_SIZE); if( ++i == n_buffers ) break; prev_frag = frag; frag = PKT_CHK_NNL(ni, frag->frag_next); } while( 1 ); return dropped_bytes; }
int citp_pipe_splice_write(citp_fdinfo* fdi, int alien_fd, loff_t* alien_off, size_t olen, int flags, citp_lib_context_t* lib_context) { citp_pipe_fdi* epi = fdi_to_pipe_fdi(fdi); int len_in_bufs = OO_PIPE_SIZE_TO_BUFS(olen); struct iovec iov_on_stack[CITP_PIPE_SPLICE_WRITE_STACK_IOV_LEN]; struct iovec* iov = iov_on_stack; int want_buf_count; int rc; int bytes_to_read; int len = olen; int no_more = 1; /* for now we only run single loop */ int written_total = 0; int non_block = (flags & SPLICE_F_NONBLOCK) || (epi->pipe->aflags & (CI_PFD_AFLAG_NONBLOCK << CI_PFD_AFLAG_WRITER_SHIFT)); if( fdi_is_reader(fdi) ) { errno = EINVAL; return -1; } if( alien_off ) { /* TODO support this */ errno = ENOTSUP; return -1; } do { int count; int iov_num; int bytes_to_write; struct ci_pipe_pkt_list pkts = {}; struct ci_pipe_pkt_list pkts2; want_buf_count = len_in_bufs; /* We might need to wait for buffers here on the first iteration */ rc = ci_pipe_zc_alloc_buffers(epi->ni, epi->pipe, want_buf_count, MSG_NOSIGNAL | (non_block || written_total ? MSG_DONTWAIT : 0), &pkts); if( rc < 0 && written_total ) { /* whatever the error we need to report already written_bytes */ rc = written_total; break; } else if( rc < 0 ) break; else if( pkts.count == 0 && non_block ) { errno = EAGAIN; rc = -1; break; } else ci_assert_gt(pkts.count, 0); count = pkts.count; if( count > CITP_PIPE_SPLICE_WRITE_STACK_IOV_LEN ) { void* niov = realloc(iov == iov_on_stack ? NULL : iov, sizeof(*iov) * len_in_bufs); if( niov == NULL ) /* we can still move quite a few pkts */ count = CITP_PIPE_SPLICE_WRITE_STACK_IOV_LEN; else niov = iov; } ci_assert_ge(count, 1); iov_num = count; pkts2 = pkts; bytes_to_read = ci_pipe_list_to_iovec(epi->ni, epi->pipe, iov, &iov_num, &pkts2, len); citp_exit_lib_if(lib_context, TRUE); /* Note: the following call might be non-blocking as well as blocking */ rc = readv(alien_fd, iov, count); citp_reenter_lib(lib_context); if( rc > 0 ) { bytes_to_write = rc; written_total += bytes_to_write; len -= bytes_to_write; no_more |= bytes_to_write < bytes_to_read; } else { bytes_to_write = 0; no_more = 1; } { /* pipe zc_write will write non_empty buffers and release the empty * ones */ int rc2 = ci_pipe_zc_write(epi->ni, epi->pipe, &pkts, bytes_to_write, CI_PIPE_ZC_WRITE_FLAG_FORCE | MSG_DONTWAIT | MSG_NOSIGNAL); (void) rc2; ci_assert_equal(rc2, bytes_to_write); } /* for now we will not be doing second iteration, to allow for that * we'd need to have guarantee that read will not block * e.g. insight into type of fd and a nonblokcing operation * (to name a valid case: socket, recvmsg) */ } while( ! no_more ); if( iov != iov_on_stack ) free(iov); if( rc > 0 ) return written_total; if( rc < 0 && errno == EPIPE && ! (flags & MSG_NOSIGNAL) ) { ci_sys_ioctl(ci_netif_get_driver_handle(epi->ni), OO_IOC_KILL_SELF_SIGPIPE, NULL); } return rc; }