static ci_tcp_state_synrecv* ci_tcp_listenq_bucket_lookup(ci_netif* ni, ci_tcp_listen_bucket* bucket, ciip_tcp_rx_pkt* rxp, int level) { ci_ni_aux_mem* aux; int idx = ci_tcp_listenq_hash2idx(rxp->hash, level); ci_tcp_state_synrecv* tsr; unsigned saddr, daddr, sport; #ifdef __KERNEL__ int i = 0; if( level > CI_LISTENQ_BUCKET_MAX_DEPTH(ni) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return 0; } #endif LOG_TV(ci_log("%s([%d] level=%d hash:%x l:%s r:%s:%d)", __func__, NI_ID(ni), level, rxp->hash, ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_daddr_be32), ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_saddr_be32), CI_BSWAP_BE16(rxp->tcp->tcp_source_be16))); if( OO_P_IS_NULL(bucket->bucket[idx]) ) return NULL; level++; aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]); if( aux->type == CI_TCP_AUX_TYPE_BUCKET ) return ci_tcp_listenq_bucket_lookup(ni, &aux->u.bucket, rxp, level); saddr = oo_ip_hdr(rxp->pkt)->ip_saddr_be32; daddr = oo_ip_hdr(rxp->pkt)->ip_daddr_be32; sport = rxp->tcp->tcp_source_be16; tsr = &aux->u.synrecv; do { if( ! ((saddr - tsr->r_addr) | (daddr - tsr->l_addr) | (sport - tsr->r_port)) ) return tsr; if( OO_P_IS_NULL(tsr->bucket_link) ) return NULL; aux = ci_ni_aux_p2aux(ni, tsr->bucket_link); tsr = &aux->u.synrecv; #ifdef __KERNEL__ if( i++ > CI_LISTENQ_BUCKET_LIST_LIMIT(ni) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return NULL; } #endif } while(1); /* unreachable */ return NULL; }
static void ci_tcp_listenq_bucket_insert(ci_netif* ni, ci_tcp_socket_listen* tls, ci_tcp_listen_bucket* bucket, ci_tcp_state_synrecv* tsr, int level) { ci_ni_aux_mem* aux; int idx = ci_tcp_listenq_hash2idx(tsr->hash, level); oo_p tsr_p = ci_tcp_synrecv2p(ni, tsr); #ifdef __KERNEL__ int i = 0; #endif LOG_TV(ci_log("%s([%d] level=%d "TSR_FMT")", __func__, NI_ID(ni), level, TSR_ARGS(tsr))); if( OO_P_IS_NULL(bucket->bucket[idx]) ) { bucket->bucket[idx] = tsr_p; return; } level++; aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]); if( aux->type == CI_TCP_AUX_TYPE_BUCKET ) { ci_tcp_listenq_bucket_insert(ni, tls, &aux->u.bucket, tsr, level); return; } /* So, this bucket contains of a list of other synrecv states. We add * our trs to this list and try to improve things by allocating * next-level bucket. */ tsr->bucket_link = bucket->bucket[idx]; bucket->bucket[idx] = tsr_p; if( level > CI_LISTENQ_BUCKET_MAX_DEPTH(ni) ) return; bucket->bucket[idx] = ci_ni_aux_alloc_bucket(ni); if( OO_P_IS_NULL(bucket->bucket[idx]) ) return; bucket = ci_ni_aux_p2bucket(ni, bucket->bucket[idx]); tls->n_buckets++; while( OO_P_NOT_NULL(tsr_p) ) { tsr = &ci_ni_aux_p2aux(ni, tsr_p)->u.synrecv; #ifdef __KERNEL__ if( i++ > CI_LISTENQ_BUCKET_LIST_LIMIT(ni) ) { ci_tcp_listenq_bucket_insert(ni, tls, bucket, tsr, level); ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return; } #endif tsr_p = tsr->bucket_link; tsr->bucket_link = OO_P_NULL; ci_tcp_listenq_bucket_insert(ni, tls, bucket, tsr, level); } }
static int ci_tcp_listenq_bucket_drop(ci_netif* ni, ci_tcp_listen_bucket* bucket) { ci_ni_aux_mem* aux; int idx; oo_p tsr_p; ci_tcp_state_synrecv* tsr; int ret = 0; for( idx = 0; idx < CI_TCP_LISTEN_BUCKET_SIZE; idx++ ) { if( OO_P_IS_NULL(bucket->bucket[idx]) ) continue; aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]); if( aux->type == CI_TCP_AUX_TYPE_BUCKET ) ret += ci_tcp_listenq_bucket_drop(ni, &aux->u.bucket); else { tsr_p = bucket->bucket[idx]; do { tsr = &ci_ni_aux_p2aux(ni, tsr_p)->u.synrecv; tsr_p = tsr->bucket_link; if( OO_SP_IS_NULL(tsr->local_peer) ) ci_ni_dllist_remove(ni, ci_tcp_synrecv2link(tsr)); /* RFC 793 tells us to send FIN and move to FIN-WAIT1 state. * However, Linux (and probably everybody else) does not do it. */ ci_tcp_synrecv_free(ni, tsr); ret++; } while( OO_P_NOT_NULL(tsr_p) ); } } ci_ni_aux_free(ni, CI_CONTAINER(ci_ni_aux_mem, u.bucket, bucket)); return ret; }
static int ci_tcp_listen_init(ci_netif *ni, ci_tcp_socket_listen *tls) { int i; oo_p sp; tls->acceptq_n_in = tls->acceptq_n_out = 0; tls->acceptq_put = CI_ILL_END; tls->acceptq_get = OO_SP_NULL; tls->n_listenq = 0; tls->n_listenq_new = 0; /* Allocate and initialise the listen bucket */ if( OO_P_IS_NULL(ni->state->free_aux_mem) ) return -ENOBUFS; tls->bucket = ni->state->free_aux_mem; ci_tcp_bucket_alloc(ni); tls->n_buckets = 1; /* Initialise the listenQ. */ for( i = 0; i <= CI_CFG_TCP_SYNACK_RETRANS_MAX; ++i ) { sp = TS_OFF(ni, tls); OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, listenq[i])); ci_ni_dllist_init(ni, &tls->listenq[i], sp, "lstq"); } /* Initialize the cache and pending lists for the EP-cache. * See comment at definition for details */ LOG_EP (log ("Initialise cache and pending list for id %d", S_FMT(tls))); #if CI_CFG_FD_CACHING sp = TS_OFF(ni, tls); OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, epcache_cache)); ci_ni_dllist_init(ni, &tls->epcache_cache, sp, "epch"); sp = TS_OFF(ni, tls); OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, epcache_pending)); ci_ni_dllist_init(ni, &tls->epcache_pending, sp, "eppd"); sp = TS_OFF(ni, tls); OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, epcache_connected)); ci_ni_dllist_init(ni, &tls->epcache_connected, sp, "epco"); sp = TS_OFF(ni, tls); OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, epcache_fd_states)); ci_ni_dllist_init(ni, &tls->epcache_fd_states, sp, "ecfd"); tls->cache_avail_sock = ni->state->opts.per_sock_cache_max; #endif return 0; }
/* Return 1 if the bucket is empty now */ static int ci_tcp_listenq_bucket_remove(ci_netif* ni, ci_tcp_socket_listen* tls, ci_tcp_listen_bucket* bucket, ci_tcp_state_synrecv* tsr, int level) { ci_ni_aux_mem* aux; int idx = ci_tcp_listenq_hash2idx(tsr->hash, level); oo_p tsr_p = ci_tcp_synrecv2p(ni, tsr); /* Fixme: we remove empty buckets only. In theory, it may be useful to * remove a bucket with one non-empty list, but it maked code more * complicated. */ int empty = 0; #ifdef __KERNEL__ int i = 0; if( level > CI_LISTENQ_BUCKET_MAX_DEPTH(ni) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return 0; } #endif LOG_TV(ci_log("%s([%d] level=%d "TSR_FMT")", __func__, NI_ID(ni), level, TSR_ARGS(tsr))); ci_assert( OO_P_NOT_NULL(bucket->bucket[idx]) ); #ifdef __KERNEL__ if( OO_P_IS_NULL(bucket->bucket[idx]) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return 0; } #endif level++; aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]); if( aux->type == CI_TCP_AUX_TYPE_BUCKET ) { empty = ci_tcp_listenq_bucket_remove(ni, tls, &aux->u.bucket, tsr, level); if( empty ) { bucket->bucket[idx] = OO_P_NULL; ci_ni_aux_free(ni, aux); tls->n_buckets--; } } else { if( bucket->bucket[idx] == tsr_p ) { bucket->bucket[idx] = tsr->bucket_link; empty = OO_P_IS_NULL(bucket->bucket[idx]); } else { ci_tcp_state_synrecv* prev = &aux->u.synrecv; while( prev->bucket_link != tsr_p ) { aux = ci_ni_aux_p2aux(ni, prev->bucket_link); prev = &aux->u.synrecv; #ifdef __KERNEL__ if( i++ > CI_LISTENQ_BUCKET_LIST_LIMIT(ni) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return 0; } #endif } prev->bucket_link = tsr->bucket_link; } } if( empty ) { int i; for( i = 0; i < CI_TCP_LISTEN_BUCKET_SIZE; i++ ) if( OO_P_NOT_NULL(bucket->bucket[i]) ) return 0; return 1; } return 0; }