void ci_netif_filter_remove(ci_netif* netif, oo_sp sock_p, unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, unsigned protocol) { ci_netif_filter_table_entry* entry; unsigned hash1, hash2, tbl_i; ci_netif_filter_table* tbl; int hops = 0; unsigned first; ci_assert(ci_netif_is_locked(netif) #ifdef __KERNEL__ /* release_ep_tbl might be called without the stack lock. * Do not complain about this. */ || (netif2tcp_helper_resource(netif)->k_ref_count & TCP_HELPER_K_RC_DEAD) #endif ); tbl = netif->filter_table; hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol); hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol); first = hash1; LOG_TC(ci_log("%s: [%d:%d] REMOVE %s %s:%u->%s:%u hash=%u:%u", __FUNCTION__, NI_ID(netif), OO_SP_FMT(sock_p), CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), hash1, hash2)); tbl_i = hash1; while( 1 ) { entry = &tbl->table[tbl_i]; if( entry->id == OO_SP_TO_INT(sock_p) ) { if( laddr == entry->laddr ) break; } else if( entry->id == EMPTY ) { /* We allow multiple removes of the same filter -- helps avoid some * complexity in the filter module. */ return; } tbl_i = (tbl_i + hash2) & tbl->table_size_mask; ++hops; if( tbl_i == first ) { LOG_E(ci_log(FN_FMT "ERROR: LOOP [%d] %s %s:%u->%s:%u", FN_PRI_ARGS(netif), OO_SP_FMT(sock_p), CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport))); return; } } __ci_netif_filter_remove(netif, hash1, hash2, hops, tbl_i); }
/* unpick the ci_ip_timer structure to actually do the callback */ static void ci_ip_timer_docallback(ci_netif *netif, ci_ip_timer* ts) { ci_assert( TIME_LE(ts->time, ci_ip_time_now(netif)) ); ci_assert( ts->time == IPTIMER_STATE(netif)->sched_ticks ); switch(ts->fn){ case CI_IP_TIMER_TCP_RTO: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_rto(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_DELACK: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_delack(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_ZWIN: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_zwin(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_KALIVE: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_kalive(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_LISTEN: ci_tcp_timeout_listen(netif, SP_TO_TCP_LISTEN(netif, ts->param1)); break; case CI_IP_TIMER_TCP_CORK: ci_tcp_timeout_cork(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_NETIF_TIMEOUT: ci_netif_timeout_state(netif); break; case CI_IP_TIMER_PMTU_DISCOVER: ci_pmtu_timeout_pmtu(netif, SP_TO_TCP(netif, ts->param1)); break; #if CI_CFG_TCP_SOCK_STATS case CI_IP_TIMER_TCP_STATS: ci_tcp_stats_action(netif, SP_TO_TCP(netif, ts->param1), CI_IP_STATS_FLUSH, CI_IP_STATS_OUTPUT_NONE, NULL, NULL ); break; #endif #if CI_CFG_SUPPORT_STATS_COLLECTION case CI_IP_TIMER_NETIF_STATS: ci_netif_stats_action(netif, CI_IP_STATS_FLUSH, CI_IP_STATS_OUTPUT_NONE, NULL, NULL ); break; #endif #if CI_CFG_IP_TIMER_DEBUG case CI_IP_TIMER_DEBUG_HOOK: ci_ip_timer_debug_fn(netif, ts->link.addr, ts->param1); break; #endif default: LOG_U(log( LPF "unknown timer callback code:%x param1:%d", ts->fn, OO_SP_FMT(ts->param1))); CI_DEBUG(ci_fail_stop_fn()); } }
citp_waitable_obj* citp_waitable_obj_alloc(ci_netif* netif) { citp_waitable_obj* wo; ci_assert(netif); ci_assert(ci_netif_is_locked(netif)); if( netif->state->deferred_free_eps_head != CI_ILL_END ) { ci_uint32 link; do link = netif->state->deferred_free_eps_head; while( ci_cas32_fail(&netif->state->deferred_free_eps_head, link, CI_ILL_END)); while( link != CI_ILL_END ) { citp_waitable* w = ID_TO_WAITABLE(netif, link); link = w->next_id; CI_DEBUG(w->next_id = CI_ILL_END); ci_assert_equal(w->state, CI_TCP_STATE_FREE); ci_assert(OO_SP_IS_NULL(w->wt_next)); w->wt_next = netif->state->free_eps_head; netif->state->free_eps_head = W_SP(w); } } if( OO_SP_IS_NULL(netif->state->free_eps_head) ) { ci_tcp_helper_more_socks(netif); if( OO_SP_IS_NULL(netif->state->free_eps_head) ) ci_netif_timeout_reap(netif); } if( OO_SP_IS_NULL(netif->state->free_eps_head) ) return NULL; LOG_TV(ci_log("%s: allocating %d", __FUNCTION__, OO_SP_FMT(netif->state->free_eps_head))); ci_assert(IS_VALID_SOCK_P(netif, netif->state->free_eps_head)); #if !defined(__KERNEL__) && !defined (CI_HAVE_OS_NOPAGE) ci_netif_mmap_shmbuf(netif, (netif->state->free_eps_head >> EP_BUF_BLOCKSHIFT) + 1); #endif wo = SP_TO_WAITABLE_OBJ(netif, netif->state->free_eps_head); ci_assert(OO_SP_EQ(W_SP(&wo->waitable), netif->state->free_eps_head)); ci_assert_equal(wo->waitable.state, CI_TCP_STATE_FREE); ci_assert_equal(wo->waitable.sb_aflags, (CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_NOT_READY)); ci_assert_equal(wo->waitable.lock.wl_val, 0); netif->state->free_eps_head = wo->waitable.wt_next; CI_DEBUG(wo->waitable.wt_next = OO_SP_NULL); ci_assert_equal(wo->waitable.state, CI_TCP_STATE_FREE); return wo; }
void citp_waitable_all_fds_gone(ci_netif* ni, oo_sp w_id) { citp_waitable_obj* wo; ci_assert(ni); ci_assert(IS_VALID_SOCK_P(ni, w_id)); ci_assert(ci_netif_is_locked(ni)); wo = SP_TO_WAITABLE_OBJ(ni, w_id); ci_assert(wo->waitable.state != CI_TCP_STATE_FREE); LOG_NC(ci_log("%s: %d:%d %s", __FUNCTION__, NI_ID(ni), OO_SP_FMT(w_id), ci_tcp_state_str(wo->waitable.state))); /* listening socket is closed in blocking conext, see * efab_tcp_helper_close_endpoint(). * CI_SB_AFLAG_ORPHAN is set earlier in this case.. */ CI_DEBUG(if( (wo->waitable.sb_aflags & CI_SB_AFLAG_ORPHAN) && wo->waitable.state != CI_TCP_LISTEN ) ci_log("%s: %d:%d already orphan", __FUNCTION__, NI_ID(ni), OO_SP_FMT(w_id))); /* It's essential that an ORPHANed socket not be on the deferred * socket list, because the same link field is used as timewait * list, free list etc. We must purge the deferred list before * setting the orphan flag. * * NB. This socket cannot now be added to the deferred list, because * no-one has a reference to it. */ ci_netif_purge_deferred_socket_list(ni); ci_bit_set(&wo->waitable.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT); /* We also need to remove the socket from the post-poll list. It may * have been left there because the stack believes a wakeup is needed. */ ci_ni_dllist_remove_safe(ni, &wo->waitable.post_poll_link); ci_ni_dllist_remove_safe(ni, &wo->waitable.ready_link); wo->waitable.ready_list_id = 0; citp_waitable_cleanup(ni, wo, 1); }
static void onload_fmt(int fd, char* buf, int* buf_n, int buf_len) { ci_ep_info_t info; CI_TRY(oo_ep_info(fd, &info)); switch( info.fd_type ) { case CI_PRIV_TYPE_NONE: bprintf("onload[]"); break; case CI_PRIV_TYPE_NETIF: bprintf("onload[stack,%u]", info.resource_id); break; case CI_PRIV_TYPE_TCP_EP: bprintf("onload[TCP,%u,%d]", info.resource_id, OO_SP_FMT(info.sock_id)); break; case CI_PRIV_TYPE_UDP_EP: bprintf("onload[UDP,%u,%d]", info.resource_id, OO_SP_FMT(info.sock_id)); break; default: bprintf("onload[type=%d,%u,%d,%lu]", info.fd_type, info.resource_id, OO_SP_FMT(info.sock_id), (unsigned long) info.mem_mmap_bytes); break; } }
int oo_os_sock_ioctl(ci_netif* ni, oo_sp sock_p, int request, void* arg, int* ioctl_rc) { oo_os_file os_sock_fd; int rc; if( (rc = oo_os_sock_get(ni, sock_p, &os_sock_fd)) == 0 ) { rc = ci_sys_ioctl(os_sock_fd, request, arg); if( rc < 0 ) rc = -errno; oo_os_sock_release(ni, os_sock_fd); if( ioctl_rc != NULL ) { *ioctl_rc = rc; rc = 0; } } else { LOG_E(ci_log("%s: [%d:%d] ERROR: failed to get kernel sock fd " "(rc=%d req=%d)", __FUNCTION__, NI_ID(ni), OO_SP_FMT(sock_p), rc, request)); } return rc; }
ssize_t linux_tcp_helper_fop_sendpage(struct file* filp, struct page* page, int offset, size_t size, loff_t* ppos, int flags) { ci_private_t* priv = filp->private_data; tcp_helper_resource_t* trs = efab_priv_to_thr(priv); ci_sock_cmn* s; OO_DEBUG_VERB(ci_log("%s: %d:%d offset=%d size=%d flags=%x", __FUNCTION__, NI_ID(&trs->netif), OO_SP_FMT(priv->sock_id), offset, (int) size, flags)); ci_assert(page); ci_assert_ge(offset, 0); ci_assert_gt(size, 0); ci_assert_le(offset + size, CI_PAGE_SIZE); #ifndef MSG_SENDPAGE_NOTLAST /* "flags" is really "more". Convert it. */ if( flags ) flags = MSG_MORE; /* [more] is sometimes true even for the last page. We get a little ** closer to the truth by spotting that we're not reading to the end of ** the page. - seen on 2.6.18, but not on 2.6.26 or later */ if( offset + size < CI_PAGE_SIZE && flags ) flags = 0; #endif s = SP_TO_SOCK(&trs->netif, priv->sock_id); if(CI_LIKELY( s->b.state & CI_TCP_STATE_TCP_CONN )) return sendpage_copy(&trs->netif,SOCK_TO_TCP(s),page,offset,size,flags); else /* Closed or listening. Return epipe. Do not send SIGPIPE, because ** Linux will do it for us. */ return -s->tx_errno; }
int oof_cb_socket_id(struct oof_socket* skf) { return (skf->sf_flags & OOF_SOCKET_NO_STACK) == 0 ? OO_SP_FMT(skf_to_ep(skf)->id) : -1; }
/* Insert for either TCP or UDP */ int ci_netif_filter_insert(ci_netif* netif, oo_sp tcp_id, unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, unsigned protocol) { ci_netif_filter_table_entry* entry; unsigned hash1, hash2; ci_netif_filter_table* tbl; #if !defined(NDEBUG) || CI_CFG_STATS_NETIF unsigned hops = 1; #endif unsigned first; ci_assert(netif); ci_assert(ci_netif_is_locked(netif)); ci_assert(netif->filter_table); tbl = netif->filter_table; hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol); hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol); first = hash1; /* Find a free slot. */ while( 1 ) { entry = &tbl->table[hash1]; if( entry->id < 0 ) break; ++entry->route_count; #if !defined(NDEBUG) || CI_CFG_STATS_NETIF ++hops; #endif /* A socket can only have multiple entries in the filter table if each * entry has a different [laddr]. */ ci_assert( !((entry->id == OO_SP_TO_INT(tcp_id)) && (laddr == entry->laddr)) ); hash1 = (hash1 + hash2) & tbl->table_size_mask; if( hash1 == first ) { ci_sock_cmn *s = SP_TO_SOCK_CMN(netif, tcp_id); if( ! (s->s_flags & CI_SOCK_FLAG_SW_FILTER_FULL) ) { LOG_E(ci_log(FN_FMT "%d FULL %s %s:%u->%s:%u hops=%u", FN_PRI_ARGS(netif), OO_SP_FMT(tcp_id), CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), hops)); s->s_flags |= CI_SOCK_FLAG_SW_FILTER_FULL; } CITP_STATS_NETIF_INC(netif, sw_filter_insert_table_full); return -ENOBUFS; } } /* Now insert the new entry. */ LOG_TC(ci_log(FN_FMT "%d INSERT %s %s:%u->%s:%u hash=%u:%u at=%u " "over=%d hops=%u", FN_PRI_ARGS(netif), OO_SP_FMT(tcp_id), CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), first, hash2, hash1, entry->id, hops)); #if CI_CFG_STATS_NETIF if( hops > netif->state->stats.table_max_hops ) netif->state->stats.table_max_hops = hops; /* Keep a rolling average of the number of hops per entry. */ if( netif->state->stats.table_mean_hops == 0 ) netif->state->stats.table_mean_hops = 1; netif->state->stats.table_mean_hops = (netif->state->stats.table_mean_hops * 9 + hops) / 10; if( entry->id == EMPTY ) ++netif->state->stats.table_n_slots; ++netif->state->stats.table_n_entries; #endif entry->id = OO_SP_TO_INT(tcp_id); entry->laddr = laddr; return 0; }
static int efab_tcp_helper_move_state(ci_private_t* priv, void *arg) { oo_tcp_move_state_t *op = arg; tcp_helper_endpoint_t *new_ep; tcp_helper_resource_t * new_trs = NULL; ci_netif* ni, *new_ni; ci_tcp_state * ts, *new_ts; tcp_helper_endpoint_t* ep; int rc = efab_ioctl_get_ep(priv, op->ep_id, &ep); if (rc != 0) return rc; OO_DEBUG_TCPH(ci_log("%s: (trs=%p (%u), priv=%p, ep_id=%u, new_trs_id=%u, " "new_ep_id=%u", __FUNCTION__, priv->thr, priv->thr->id, priv, OO_SP_FMT(op->ep_id), op->new_trs_id, OO_SP_FMT(op->new_ep_id))); do { /* check that the existing id is valid */ ni = &priv->thr->netif; ts = SP_TO_TCP(ni, ep->id); /* TODO: check this endpoint belongs to the tcp helper resource of priv and not * somewhere else */ /* this function does not change fd_type or fd ops, so it is not able * to cope with changing the socket type. We think this only makes sense * for TCP, so assert we are taking a TCP endpoint. */ ci_assert_equal(ts->s.pkt.ip.ip_protocol, IPPROTO_TCP); ci_assert_equal(priv->fd_type, CI_PRIV_TYPE_TCP_EP); /* get pointer to resource from handle - increments ref count */ rc = efab_thr_table_lookup(NULL, op->new_trs_id, EFAB_THR_TABLE_LOOKUP_CHECK_USER, &new_trs); if (rc < 0) { OO_DEBUG_ERR( ci_log("%s: invalid new resource handle", __FUNCTION__) ); break; } ci_assert(new_trs != NULL); /* check valid endpoint in new netif */ new_ni = &new_trs->netif; new_ep = ci_netif_get_valid_ep(new_ni, op->new_ep_id); new_ts = SP_TO_TCP(new_ni, new_ep->id); /* check the two endpoint states look valid */ if( (ts->s.pkt.ip.ip_protocol != new_ts->s.pkt.ip.ip_protocol) || (ts->s.b.state != CI_TCP_CLOSED) || (ep->oofilter.sf_local_port != NULL) ) { efab_thr_release(new_trs); rc = -EINVAL; OO_DEBUG_ERR(ci_log("%s: invalid endpoint states", __FUNCTION__)); break; } /* should be fine to complete */ ci_assert(new_trs); { tcp_helper_resource_t *old_trs; again: old_trs = priv->thr; if (ci_cas_uintptr_fail((ci_uintptr_t *)&priv->thr, (ci_uintptr_t)old_trs, (ci_uintptr_t)new_trs)) goto again; efab_thr_release(old_trs); } /* move file to hold details of new resource, new endpoint */ ci_assert(OO_SP_EQ(priv->sock_id, op->ep_id)); priv->sock_id = new_ep->id; OO_DEBUG_TCPH(ci_log("%s: set epid %u", __FUNCTION__, OO_SP_FMT(priv->sock_id))); /* copy across any necessary state */ ci_assert_equal(new_ep->os_socket, NULL); new_ep->os_socket = ep->os_socket; ep->os_socket = NULL; /* set ORPHAN flag in current as not attached to an FD */ ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT); /* remove ORPHAN flag in new TCP state */ ci_atomic32_and(&new_ts->s.b.sb_aflags, ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ)); return 0; } while (0); return rc; }