void citp_waitable_init(ci_netif* ni, citp_waitable* w, int id) { /* NB. Some members initialised in citp_waitable_obj_free(). */ oo_p sp; #if CI_CFG_SOCKP_IS_PTR w->bufid = id; #else w->bufid = OO_SP_FROM_INT(ni, id); #endif w->sb_flags = 0; w->sb_aflags = CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_NOT_READY; sp = oo_sockp_to_statep(ni, W_SP(w)); OO_P_ADD(sp, CI_MEMBER_OFFSET(citp_waitable, post_poll_link)); ci_ni_dllist_link_init(ni, &w->post_poll_link, sp, "ppll"); ci_ni_dllist_self_link(ni, &w->post_poll_link); sp = oo_sockp_to_statep(ni, W_SP(w)); OO_P_ADD(sp, CI_MEMBER_OFFSET(citp_waitable, ready_link)); ci_ni_dllist_link_init(ni, &w->ready_link, sp, "rll"); ci_ni_dllist_self_link(ni, &w->ready_link); w->lock.wl_val = 0; CI_DEBUG(w->wt_next = OO_SP_NULL); CI_DEBUG(w->next_id = CI_ILL_END); citp_waitable_reinit(ni, w); }
citp_waitable_obj* citp_waitable_obj_alloc(ci_netif* netif) { citp_waitable_obj* wo; ci_assert(netif); ci_assert(ci_netif_is_locked(netif)); if( netif->state->deferred_free_eps_head != CI_ILL_END ) { ci_uint32 link; do link = netif->state->deferred_free_eps_head; while( ci_cas32_fail(&netif->state->deferred_free_eps_head, link, CI_ILL_END)); while( link != CI_ILL_END ) { citp_waitable* w = ID_TO_WAITABLE(netif, link); link = w->next_id; CI_DEBUG(w->next_id = CI_ILL_END); ci_assert_equal(w->state, CI_TCP_STATE_FREE); ci_assert(OO_SP_IS_NULL(w->wt_next)); w->wt_next = netif->state->free_eps_head; netif->state->free_eps_head = W_SP(w); } } if( OO_SP_IS_NULL(netif->state->free_eps_head) ) { ci_tcp_helper_more_socks(netif); if( OO_SP_IS_NULL(netif->state->free_eps_head) ) ci_netif_timeout_reap(netif); } if( OO_SP_IS_NULL(netif->state->free_eps_head) ) return NULL; LOG_TV(ci_log("%s: allocating %d", __FUNCTION__, OO_SP_FMT(netif->state->free_eps_head))); ci_assert(IS_VALID_SOCK_P(netif, netif->state->free_eps_head)); #if !defined(__KERNEL__) && !defined (CI_HAVE_OS_NOPAGE) ci_netif_mmap_shmbuf(netif, (netif->state->free_eps_head >> EP_BUF_BLOCKSHIFT) + 1); #endif wo = SP_TO_WAITABLE_OBJ(netif, netif->state->free_eps_head); ci_assert(OO_SP_EQ(W_SP(&wo->waitable), netif->state->free_eps_head)); ci_assert_equal(wo->waitable.state, CI_TCP_STATE_FREE); ci_assert_equal(wo->waitable.sb_aflags, (CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_NOT_READY)); ci_assert_equal(wo->waitable.lock.wl_val, 0); netif->state->free_eps_head = wo->waitable.wt_next; CI_DEBUG(wo->waitable.wt_next = OO_SP_NULL); ci_assert_equal(wo->waitable.state, CI_TCP_STATE_FREE); return wo; }
/* unpick the ci_ip_timer structure to actually do the callback */ static void ci_ip_timer_docallback(ci_netif *netif, ci_ip_timer* ts) { ci_assert( TIME_LE(ts->time, ci_ip_time_now(netif)) ); ci_assert( ts->time == IPTIMER_STATE(netif)->sched_ticks ); switch(ts->fn){ case CI_IP_TIMER_TCP_RTO: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_rto(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_DELACK: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_delack(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_ZWIN: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_zwin(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_KALIVE: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_kalive(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_LISTEN: ci_tcp_timeout_listen(netif, SP_TO_TCP_LISTEN(netif, ts->param1)); break; case CI_IP_TIMER_TCP_CORK: ci_tcp_timeout_cork(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_NETIF_TIMEOUT: ci_netif_timeout_state(netif); break; case CI_IP_TIMER_PMTU_DISCOVER: ci_pmtu_timeout_pmtu(netif, SP_TO_TCP(netif, ts->param1)); break; #if CI_CFG_TCP_SOCK_STATS case CI_IP_TIMER_TCP_STATS: ci_tcp_stats_action(netif, SP_TO_TCP(netif, ts->param1), CI_IP_STATS_FLUSH, CI_IP_STATS_OUTPUT_NONE, NULL, NULL ); break; #endif #if CI_CFG_SUPPORT_STATS_COLLECTION case CI_IP_TIMER_NETIF_STATS: ci_netif_stats_action(netif, CI_IP_STATS_FLUSH, CI_IP_STATS_OUTPUT_NONE, NULL, NULL ); break; #endif #if CI_CFG_IP_TIMER_DEBUG case CI_IP_TIMER_DEBUG_HOOK: ci_ip_timer_debug_fn(netif, ts->link.addr, ts->param1); break; #endif default: LOG_U(log( LPF "unknown timer callback code:%x param1:%d", ts->fn, OO_SP_FMT(ts->param1))); CI_DEBUG(ci_fail_stop_fn()); } }
void __citp_fdinfo_ref_count_zero(citp_fdinfo* fdi, int fdt_locked) { Log_V(log("%s: fd=%d on_rcz=%d", __FUNCTION__, fdi->fd, fdi->on_ref_count_zero)); citp_fdinfo_assert_valid(fdi); ci_assert(oo_atomic_read(&fdi->ref_count) == 0); ci_assert_ge(fdi->fd, 0); ci_assert_lt(fdi->fd, citp_fdtable.inited_count); ci_assert_nequal(fdi_to_fdip(fdi), citp_fdtable.table[fdi->fd].fdip); switch( fdi->on_ref_count_zero ) { case FDI_ON_RCZ_CLOSE: #if CI_CFG_FD_CACHING if( citp_fdinfo_get_ops(fdi)->cache(fdi) == 1 ) { if( ! fdt_locked && fdtable_strict() ) CITP_FDTABLE_LOCK(); fdtable_swap(fdi->fd, fdip_closing, fdip_unknown, fdt_locked | fdtable_strict()); citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked | fdtable_strict()); if( ! fdt_locked && fdtable_strict() ) CITP_FDTABLE_UNLOCK(); citp_fdinfo_free(fdi); break; } else #endif { if( ! fdt_locked && fdtable_strict() ) CITP_FDTABLE_LOCK(); ci_tcp_helper_close_no_trampoline(fdi->fd); /* The swap must occur after the close, otherwise another thread could * cause a probe of the old endpoint info, which is about be freed. */ fdtable_swap(fdi->fd, fdip_closing, fdip_unknown, fdt_locked | fdtable_strict()); citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked | fdtable_strict()); if( ! fdt_locked && fdtable_strict() ) CITP_FDTABLE_UNLOCK(); citp_fdinfo_free(fdi); break; } case FDI_ON_RCZ_DUP2: dup2_complete(fdi, fdi_to_fdip(fdi), fdt_locked); break; case FDI_ON_RCZ_HANDOVER: citp_fdinfo_do_handover(fdi, fdt_locked); break; case FDI_ON_RCZ_MOVED: citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked); citp_fdinfo_free(fdi); break; default: CI_DEBUG(ci_log("%s: fd=%d on_ref_count_zero=%d", __FUNCTION__, fdi->fd, fdi->on_ref_count_zero)); ci_assert(0); } }
void citp_fdtable_insert(citp_fdinfo* fdi, unsigned fd, int fdt_locked) { ci_assert(fdi); ci_assert(fdi->protocol); ci_assert(citp_fdtable.inited_count > fd); ci_assert_ge(oo_atomic_read(&fdi->ref_count), 1); fdi->fd = fd; CI_DEBUG(fdi->on_ref_count_zero = FDI_ON_RCZ_NONE); fdi->is_special = 0; citp_fdtable_busy_clear(fd, fdi_to_fdip(fdi), fdt_locked); }
/* initialise all the fields that we can in the UDP state structure. ** There are no IP options, no destination addresses, no ports */ static void ci_udp_state_init(ci_netif* netif, ci_udp_state* us) { ci_sock_cmn_init(netif, &us->s, 1); /* IP_MULTICAST_LOOP is 1 by default, so we should not send multicast * unless specially permitted */ if( ! NI_OPTS(netif).force_send_multicast ) us->s.cp.sock_cp_flags |= OO_SCP_NO_MULTICAST; /* Poison. */ CI_DEBUG(memset(&us->s + 1, 0xf0, (char*) (us + 1) - (char*) (&us->s + 1))); /*! \todo This should be part of sock_cmn reinit, but the comment to that * function suggests that it's possibly not a good plan to move it there */ #if CI_CFG_TIMESTAMPING ci_udp_recv_q_init(&us->timestamp_q); #endif /*! \todo These two should really be handled in ci_sock_cmn_init() */ /* Make sure we don't hit any state assertions. Can use * UDP_STATE_FROM_SOCKET_EPINFO() after this. */ us->s.b.state = CI_TCP_STATE_UDP; us->s.so.sndbuf = NI_OPTS(netif).udp_sndbuf_def; us->s.so.rcvbuf = NI_OPTS(netif).udp_rcvbuf_def; /* Init the ip-caches (packet header templates). */ ci_udp_hdrs_init(&us->s.pkt); ci_ip_cache_init(&us->ephemeral_pkt); ci_udp_hdrs_init(&us->ephemeral_pkt); udp_lport_be16(us) = 0; udp_rport_be16(us) = 0; #if CI_CFG_ZC_RECV_FILTER us->recv_q_filter = 0; us->recv_q_filter_arg = 0; #endif ci_udp_recv_q_init(&us->recv_q); us->zc_kernel_datagram = OO_PP_NULL; us->zc_kernel_datagram_count = 0; us->tx_async_q = CI_ILL_END; oo_atomic_set(&us->tx_async_q_level, 0); us->tx_count = 0; us->udpflags = CI_UDPF_MCAST_LOOP; us->ip_pktinfo_cache.intf_i = -1; us->stamp = 0; memset(&us->stats, 0, sizeof(us->stats)); }
void ci_sock_cmn_init(ci_netif* ni, ci_sock_cmn* s) { oo_p sp; /* Poison. */ CI_DEBUG(memset(&s->b + 1, 0xf0, (char*) (s + 1) - (char*) (&s->b + 1))); citp_waitable_reinit(ni, &s->b); oo_sock_cplane_init(&s->cp); s->local_peer = OO_SP_NULL; s->s_flags = CI_SOCK_FLAG_CONNECT_MUST_BIND | CI_SOCK_FLAG_PMTU_DO; s->s_aflags = 0u; ci_assert_equal( 0, CI_IP_DFLT_TOS ); s->so_priority = 0; /* SO_SNDBUF & SO_RCVBUF. See also ci_tcp_set_established_state() which * may modify these values. */ memset(&s->so, 0, sizeof(s->so)); s->so.sndbuf = NI_OPTS(ni).tcp_sndbuf_def; s->so.rcvbuf = NI_OPTS(ni).tcp_rcvbuf_def; s->rx_bind2dev_ifindex = CI_IFID_BAD; /* These don't really need to be initialised, as only significant when * rx_bind2dev_ifindex != CI_IFID_BAD. But makes stackdump output * cleaner this way... */ s->rx_bind2dev_base_ifindex = 0; s->rx_bind2dev_vlan = 0; s->cmsg_flags = 0u; s->timestamping_flags = 0u; s->os_sock_status = OO_OS_STATUS_TX; ci_ip_queue_init(&s->timestamp_q); s->timestamp_q_extract = OO_PP_NULL; ci_sock_cmn_reinit(ni, s); sp = oo_sockp_to_statep(ni, SC_SP(s)); OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_sock_cmn, reap_link)); ci_ni_dllist_link_init(ni, &s->reap_link, sp, "reap"); ci_ni_dllist_self_link(ni, &s->reap_link); }
static struct file_operations *oo_fops_by_type(int fd_type) { switch( fd_type ) { case CI_PRIV_TYPE_NETIF: return &oo_fops; case CI_PRIV_TYPE_TCP_EP: return &linux_tcp_helper_fops_tcp; case CI_PRIV_TYPE_UDP_EP: return &linux_tcp_helper_fops_udp; case CI_PRIV_TYPE_PASSTHROUGH_EP: return &linux_tcp_helper_fops_passthrough; #if CI_CFG_USERSPACE_PIPE case CI_PRIV_TYPE_PIPE_READER: return &linux_tcp_helper_fops_pipe_reader; case CI_PRIV_TYPE_PIPE_WRITER: return &linux_tcp_helper_fops_pipe_writer; #endif default: CI_DEBUG(ci_log("%s: error fd_type = %d", __FUNCTION__, fd_type)); return NULL; } }
int ci_buddy_ctor2(ci_buddy_allocator* b, unsigned order, void* (*alloc_fn)(size_t), void (*free_fn)(void*)) { unsigned o; ci_assert(b); b->order = order; b->free_lists = (ci_dllist*) alloc_fn((order+1) * sizeof(ci_dllist)); if( b->free_lists == 0 ) goto fail1; b->links = (ci_dllink*) alloc_fn(ci_pow2(order) * sizeof(ci_dllink)); if( b->links == 0 ) goto fail2; b->orders = (ci_uint8*) alloc_fn(ci_pow2(order)); if( b->orders == 0 ) goto fail3; CI_DEBUG(CI_ZERO_ARRAY(b->links, ci_pow2(order))); for( o = 0; o <= b->order; ++o ) ci_dllist_init(b->free_lists + o); ci_dllist_push(FL(b, b->order), ADDR_TO_LINK(b, 0)); ci_assert(b->order < 255); b->orders[0] = (ci_uint8)b->order; ci_assert(!IS_BUSY(b, LINK_TO_ADDR(b, ci_dllist_head(FL(b, b->order))))); return 0; fail3: free_fn(b->links); fail2: free_fn(b->free_lists); fail1: return -ENOMEM; }
/*! Tear down a private_t */ void ci_resource_table_dtor( ci_resource_table_t *rt ) { efch_resource_t *rs; unsigned i; ci_assert(rt); #if CI_CFG_PRIVATE_T_DEBUG_LIST ci_lock_lock(&priv_list_lock); list_del(&(rt->priv_list)); ci_lock_unlock(&priv_list_lock); #endif for( i = 0; i < rt->resource_table_highwater; i++ ) { rs = rt->resource_table[i]; ci_assert(rs != NULL); efch_resource_free(rs); CI_DEBUG(rt->resource_table[i] = NULL); } if( rt->resource_table != rt->resource_table_static ) ci_free(rt->resource_table); CI_DEBUG_ZERO(rt); }
int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags) { volatile citp_fdinfo_p* p_tofdip; citp_fdinfo_p tofdip; unsigned max; Log_V(log("%s(%d, %d)", __FUNCTION__, fromfd, tofd)); /* Must be checked by callers. */ ci_assert(fromfd != tofd); /* Hack: if [tofd] is the fd we're using for logging, we'd better choose ** a different one! */ if( tofd == citp.log_fd ) citp_log_change_fd(); ci_assert(citp.init_level >= CITP_INIT_FDTABLE); max = CI_MAX(fromfd, tofd); if( max >= citp_fdtable.inited_count ) { ci_assert(max < citp_fdtable.size); CITP_FDTABLE_LOCK(); __citp_fdtable_extend(max); CITP_FDTABLE_UNLOCK(); } /* Bug1151: Concurrent threads doing dup2(x,y) and dup2(y,x) can deadlock ** against one another. So we take out a fat lock to prevent concurrent ** dup2()s. */ /* Lock tofd. We need to interlock against select and poll etc, so we ** also grab the exclusive lock. Also grab the bug1151 lock. */ pthread_mutex_lock(&citp_dup_lock); CITP_FDTABLE_LOCK(); p_tofdip = &citp_fdtable.table[tofd].fdip; lock_tofdip_again: tofdip = *p_tofdip; if( fdip_is_busy(tofdip) ) tofdip = citp_fdtable_busy_wait(tofd, 1); if( fdip_is_closing(tofdip) ) tofdip = citp_fdtable_closing_wait(tofd, 1); if( fdip_is_reserved(tofdip) ) { /* ?? FIXME: we can't cope with this at the moment */ CITP_FDTABLE_UNLOCK(); Log_U(log("%s(%d, %d): target is reserved", __FUNCTION__, fromfd, tofd)); errno = EBUSY; tofd = -1; goto out; } if( fdip_cas_fail(p_tofdip, tofdip, fdip_busy) ) goto lock_tofdip_again; CITP_FDTABLE_UNLOCK(); ci_assert(fdip_is_normal(tofdip) | fdip_is_passthru(tofdip) | fdip_is_unknown(tofdip)); if( fdip_is_normal(tofdip) ) { /* We're duping onto a user-level socket. */ citp_fdinfo* tofdi = fdip_to_fdi(tofdip); if( tofdi->epoll_fd >= 0 ) { citp_fdinfo* epoll_fdi = citp_epoll_fdi_from_member(tofdi, 0); if( epoll_fdi ) { if( epoll_fdi->protocol->type == CITP_EPOLL_FD ) citp_epoll_on_close(epoll_fdi, tofdi, 0); citp_fdinfo_release_ref(epoll_fdi, 0); } } ci_assert_equal(tofdi->on_ref_count_zero, FDI_ON_RCZ_NONE); tofdi->on_ref_count_zero = FDI_ON_RCZ_DUP2; tofdi->on_rcz.dup3_args.fd = fromfd; tofdi->on_rcz.dup3_args.flags = flags; citp_fdinfo_release_ref(tofdi, 0); { int i = 0; /* We need to free this fdi. If someone is using it right now, * we are in trouble. So, we spin for a while and interrupt the * user. See bug 28123. */ while( tofdi->on_ref_count_zero != FDI_ON_RCZ_DONE ) { if( ci_is_multithreaded() && i % 10000 == 9999 ) { pthread_t pth = tofdi->thread_id; if( pth != pthread_self() && pth != PTHREAD_NULL ) { pthread_kill(pth, SIGONLOAD); sleep(1); } } ci_spinloop_pause(); i++; } ci_rmb(); } if( tofdi->on_rcz.dup2_result < 0 ) { errno = -tofdi->on_rcz.dup2_result; /* Need to re-insert [tofdi] into the table. */ ci_assert_equal(oo_atomic_read(&tofdi->ref_count), 0); oo_atomic_set(&tofdi->ref_count, 1); CI_DEBUG(tofdi->on_ref_count_zero = FDI_ON_RCZ_NONE); citp_fdtable_busy_clear(tofd, tofdip, 0); tofd = -1; } else { ci_assert(tofdi->on_rcz.dup2_result == tofd); citp_fdinfo_get_ops(tofdi)->dtor(tofdi, 0); citp_fdinfo_free(tofdi); } goto out; } ci_assert(fdip_is_passthru(tofdip) | fdip_is_unknown(tofdip)); { /* We're dupping onto an O/S descriptor, or it may be closed. Create a ** dummy [citp_fdinfo], just so we can share code with the case above. */ citp_fdinfo fdi; fdi.fd = tofd; fdi.on_rcz.dup3_args.fd = fromfd; fdi.on_rcz.dup3_args.flags = flags; dup2_complete(&fdi, tofdip, 0); if( fdi.on_rcz.dup2_result < 0 ) { errno = -fdi.on_rcz.dup2_result; citp_fdtable_busy_clear(tofd, tofdip, 0); tofd = -1; } else ci_assert(fdi.on_rcz.dup2_result == tofd); } out: pthread_mutex_unlock(&citp_dup_lock); return tofd; }
static int citp_udp_socket(int domain, int type, int protocol) { citp_fdinfo* fdi; citp_sock_fdi* epi; ef_driver_handle fd; int rc; ci_netif* ni; Log_V(log(LPF "socket(%d, %d, %d)", domain, type, protocol)); epi = CI_ALLOC_OBJ(citp_sock_fdi); if( ! epi ) { Log_U(ci_log(LPF "socket: failed to allocate epi")); errno = ENOMEM; goto fail1; } fdi = &epi->fdinfo; citp_fdinfo_init(fdi, &citp_udp_protocol_impl); rc = citp_netif_alloc_and_init(&fd, &ni); if( rc != 0 ) { if( rc == CI_SOCKET_HANDOVER ) { /* This implies EF_DONT_ACCELERATE is set, so we handover * regardless of CITP_OPTS.no_fail */ CI_FREE_OBJ(epi); return rc; } goto fail2; } /* Protect the fdtable entry until we're done initialising. */ if( fdtable_strict() ) CITP_FDTABLE_LOCK(); if((fd = ci_udp_ep_ctor(&epi->sock, ni, domain, type)) < 0) { /*! ?? \TODO unpick the ci_udp_ep_ctor according to how failed */ Log_U(ci_log(LPF "socket: udp_ep_ctor failed")); errno = -fd; goto fail3; } citp_fdtable_new_fd_set(fd, fdip_busy, fdtable_strict()); if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); CI_DEBUG(epi->sock.s->pid = getpid()); /* We're ready. Unleash us onto the world! */ ci_assert(epi->sock.s->b.sb_aflags & CI_SB_AFLAG_NOT_READY); ci_atomic32_and(&epi->sock.s->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY); citp_fdtable_insert(fdi, fd, 0); Log_VSS(log(LPF "socket(%d, %d, %d) = "EF_FMT, domain, type, protocol, EF_PRI_ARGS(epi,fd))); return fd; fail3: if( CITP_OPTS.no_fail && errno != ELIBACC ) CITP_STATS_NETIF(++ni->state->stats.udp_handover_socket); citp_netif_release_ref(ni, 0); fail2: CI_FREE_OBJ(epi); fail1: /* BUG1408: Graceful failure. We'll only fail outright if there's a * driver/library mismatch */ if( CITP_OPTS.no_fail && errno != ELIBACC ) { Log_U(ci_log("%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, errno)); return CI_SOCKET_HANDOVER; } return -1; }