void citp_waitable_print(citp_waitable* w) { /* Output socket using netstat style output: * TCP 2 0 0.0.0.0:12865 0.0.0.0:0 LISTEN * UDP 0 0 172.16.129.131:57521 0.0.0.0:0 UDP */ if( CI_TCP_STATE_IS_SOCKET(w->state) ) { ci_sock_cmn* s = CI_CONTAINER(ci_sock_cmn, b, w); citp_waitable_obj* wo = CI_CONTAINER(citp_waitable_obj, waitable, w); int tq = 0; int rq = 0; if( (w->state & CI_TCP_STATE_TCP) && !(w->state & CI_TCP_STATE_NOT_CONNECTED) ) { tq = ci_tcp_sendq_n_pkts(&wo->tcp); rq = wo->tcp.recv1.num + wo->tcp.recv2.num; } else if( w->state == CI_TCP_STATE_UDP ) { tq = wo->udp.tx_count + oo_atomic_read(&wo->udp.tx_async_q_level); rq = ci_udp_recv_q_pkts(&wo->udp.recv_q); } log("%s %d %d "OOF_IP4PORT" "OOF_IP4PORT" %s", citp_waitable_type_str(w), rq, tq, OOFA_IP4PORT(sock_laddr_be32(s), sock_lport_be16(s)), OOFA_IP4PORT(sock_raddr_be32(s), sock_rport_be16(s)), ci_tcp_state_str(w->state)); } }
void ci_netif_filter_for_each_match(ci_netif* ni, unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, unsigned protocol, int intf_i, int vlan, int (*callback)(ci_sock_cmn*, void*), void* callback_arg, ci_uint32* hash_out) { ci_netif_filter_table* tbl; unsigned hash1, hash2 = 0; unsigned first; tbl = ni->filter_table; if( hash_out != NULL ) *hash_out = tcp_hash3(tbl, laddr, lport, raddr, rport, protocol); hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol); first = hash1; LOG_NV(log("%s: %s %s:%u->%s:%u hash=%u:%u at=%u", __FUNCTION__, CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol), hash1)); while( 1 ) { int id = tbl->table[hash1].id; if(CI_LIKELY( id >= 0 )) { ci_sock_cmn* s = ID_TO_SOCK(ni, id); if( ((laddr - tbl->table[hash1].laddr) | (lport - sock_lport_be16(s) ) | (raddr - sock_raddr_be32(s) ) | (rport - sock_rport_be16(s) ) | (protocol - sock_protocol(s) )) == 0 ) if(CI_LIKELY( (s->rx_bind2dev_ifindex == CI_IFID_BAD || ci_sock_intf_check(ni, s, intf_i, vlan)) )) if( callback(s, callback_arg) != 0 ) return; } else if( id == EMPTY ) break; /* We defer calculating hash2 until it's needed, just to make the fast ** case that little bit faster. */ if( hash1 == first ) hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol); hash1 = (hash1 + hash2) & tbl->table_size_mask; if( hash1 == first ) { LOG_NV(ci_log(FN_FMT "ITERATE FULL %s:%u->%s:%u hash=%u:%u", FN_PRI_ARGS(ni), ip_addr_str(laddr), lport, ip_addr_str(raddr), rport, hash1, hash2)); break; } } }
int ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, unsigned protocol) { unsigned hash1, hash2 = 0; ci_netif_filter_table* tbl; unsigned first; ci_assert(netif); ci_assert(ci_netif_is_locked(netif)); ci_assert(netif->filter_table); tbl = netif->filter_table; hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol); first = hash1; LOG_NV(log("tbl_lookup: %s %s:%u->%s:%u hash=%u:%u at=%u", CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol), hash1)); while( 1 ) { int id = tbl->table[hash1].id; if( CI_LIKELY(id >= 0) ) { ci_sock_cmn* s = ID_TO_SOCK(netif, id); if( ((laddr - tbl->table[hash1].laddr) | (lport - sock_lport_be16(s) ) | (raddr - sock_raddr_be32(s) ) | (rport - sock_rport_be16(s) ) | (protocol - sock_protocol(s) )) == 0 ) return hash1; } if( id == EMPTY ) break; /* We defer calculating hash2 until it's needed, just to make the fast * case that little bit faster. */ if( hash1 == first ) hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol); hash1 = (hash1 + hash2) & tbl->table_size_mask; if( hash1 == first ) { LOG_E(ci_log(FN_FMT "ERROR: LOOP %s:%u->%s:%u hash=%u:%u", FN_PRI_ARGS(netif), ip_addr_str(laddr), lport, ip_addr_str(raddr), rport, hash1, hash2)); return -ELOOP; } } return -ENOENT; }
static void thc_dump_sockets(ci_netif* netif, oo_dump_log_fn_t log, void* log_arg) { unsigned id; for( id = 0; id < netif->state->n_ep_bufs; ++id ) { citp_waitable_obj* wo = ID_TO_WAITABLE_OBJ(netif, id); if( wo->waitable.state != CI_TCP_STATE_FREE ) { citp_waitable* w = &wo->waitable; ci_sock_cmn* s = CI_CONTAINER(ci_sock_cmn, b, w); log(log_arg, " %s lcl="OOF_IP4PORT" rmt="OOF_IP4PORT, citp_waitable_type_str(w), OOFA_IP4PORT(sock_laddr_be32(s), sock_lport_be16(s)), OOFA_IP4PORT(sock_raddr_be32(s), sock_rport_be16(s))); } } }
static void citp_waitable_dump2(ci_netif* ni, citp_waitable* w, const char* pf, oo_dump_log_fn_t logger, void* log_arg) { unsigned tmp; if( CI_TCP_STATE_IS_SOCKET(w->state) ) { ci_sock_cmn* s = CI_CONTAINER(ci_sock_cmn, b, w); logger(log_arg, "%s%s "NT_FMT"lcl="OOF_IP4PORT" rmt="OOF_IP4PORT" %s", pf, citp_waitable_type_str(w), NI_ID(ni), W_FMT(w), OOFA_IP4PORT(sock_laddr_be32(s), sock_lport_be16(s)), OOFA_IP4PORT(sock_raddr_be32(s), sock_rport_be16(s)), ci_tcp_state_str(w->state)); } else logger(log_arg, "%s%s "NT_FMT, pf, citp_waitable_type_str(w), NI_ID(ni), W_FMT(w)); if( w->state == CI_TCP_STATE_FREE || w->state == CI_TCP_STATE_AUXBUF ) return; tmp = w->lock.wl_val; logger(log_arg, "%s lock: %x %s%s", pf, tmp, (tmp & OO_WAITABLE_LK_LOCKED) ? "LOCKED" : "", (tmp & OO_WAITABLE_LK_NEED_WAKE) ? " CONTENDED": ""); logger(log_arg, "%s rx_wake=%08x%s tx_wake=%08x%s flags: "CI_SB_FLAGS_FMT, pf, w->sleep_seq.rw.rx, ci_bit_test(&w->wake_request, CI_SB_FLAG_WAKE_RX_B) ? "(RQ)":" ", w->sleep_seq.rw.tx, ci_bit_test(&w->wake_request, CI_SB_FLAG_WAKE_TX_B) ? "(RQ)":" ", CI_SB_FLAGS_PRI_ARG(w)); if( w->spin_cycles == -1 ) logger(log_arg, "%s ul_poll: -1 spin cycles -1 usecs", pf); else logger(log_arg, "%s ul_poll: %llu spin cycles %u usec", pf, w->spin_cycles, oo_cycles64_to_usec(ni, w->spin_cycles)); }
void ci_netif_filter_dump(ci_netif* ni) { int id; unsigned i; ci_netif_filter_table* tbl; ci_assert(ni); tbl = ni->filter_table; log("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); #if CI_CFG_STATS_NETIF log(FN_FMT "size=%d n_entries=%i n_slots=%i max=%i mean=%i", FN_PRI_ARGS(ni), tbl->table_size_mask + 1, ni->state->stats.table_n_entries, ni->state->stats.table_n_slots, ni->state->stats.table_max_hops, ni->state->stats.table_mean_hops); #endif for( i = 0; i <= tbl->table_size_mask; ++i ) { id = tbl->table[i].id; if( CI_LIKELY(id >= 0) ) { ci_sock_cmn* s = ID_TO_SOCK(ni, id); unsigned laddr = tbl->table[i].laddr; int lport = sock_lport_be16(s); unsigned raddr = sock_raddr_be32(s); int rport = sock_rport_be16(s); int protocol = sock_protocol(s); unsigned hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol); unsigned hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol); log("%010d id=%-10d rt_ct=%d %s "CI_IP_PRINTF_FORMAT":%d " CI_IP_PRINTF_FORMAT":%d %010d:%010d", i, id, tbl->table[i].route_count, CI_IP_PROTOCOL_STR(protocol), CI_IP_PRINTF_ARGS(&laddr), CI_BSWAP_BE16(lport), CI_IP_PRINTF_ARGS(&raddr), CI_BSWAP_BE16(rport), hash1, hash2); } } log("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); }
static int efab_tcp_helper_get_info(ci_private_t *unused, void *arg) { ci_netif_info_t *info = arg; int index, rc=0; tcp_helper_resource_t* thr = NULL; ci_netif* ni = NULL; int flags = EFAB_THR_TABLE_LOOKUP_CHECK_USER | EFAB_THR_TABLE_LOOKUP_NO_WARN; #if CI_CFG_EFAB_EPLOCK_RECORD_CONTENTIONS int j; eplock_resource_t* eplock_rs; #endif info->ni_exists = 0; info->ni_no_perms_exists = 0; if( info->ni_orphan ) { flags |= EFAB_THR_TABLE_LOOKUP_NO_UL; info->ni_orphan = 0; } rc = efab_thr_table_lookup(NULL, info->ni_index, flags, &thr); if( rc == 0 ) { info->ni_exists = 1; info->ni_orphan = (thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND); ni = &thr->netif; info->mmap_bytes = thr->mem_mmap_bytes; info->k_ref_count = thr->k_ref_count; info->rs_ref_count = oo_atomic_read(&thr->ref_count); memcpy(info->ni_name, ni->state->name, sizeof(ni->state->name)); } else if( rc == -EACCES ) { info->ni_no_perms_id = info->ni_index; if( efab_thr_get_inaccessible_stack_info(info->ni_index, &info->ni_no_perms_uid, &info->ni_no_perms_euid, &info->ni_no_perms_share_with, info->ni_no_perms_name) == 0 ) info->ni_no_perms_exists = 1; } /* sub-ops that do not need the netif to exist */ if( info->ni_subop == CI_DBG_NETIF_INFO_GET_NEXT_NETIF ) { tcp_helper_resource_t* next_thr; info->u.ni_next_ni.index = -1; for( index = info->ni_index + 1; index < 10000 /* FIXME: magic! */; ++index ) { rc = efab_thr_table_lookup(NULL, index, flags, &next_thr); if( rc == 0 ) { if( next_thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND ) efab_tcp_helper_k_ref_count_dec(next_thr, 1); else efab_thr_release(next_thr); info->u.ni_next_ni.index = index; break; } if( rc == -EACCES ) { info->u.ni_next_ni.index = index; break; } } rc = 0; } else if( info->ni_subop == CI_DBG_NETIF_INFO_NOOP ) { rc = 0; } if (!info->ni_exists) return 0; /* sub-ops that need the netif to exist */ switch (info->ni_subop) { case CI_DBG_NETIF_INFO_GET_ENDPOINT_STATE: index = info->u.ni_endpoint.index; info->u.ni_endpoint.max = thr->netif.ep_tbl_n; if ((index < 0) || (index >= (int)thr->netif.ep_tbl_n)) { info->u.ni_endpoint.state = CI_TCP_STATE_FREE; } else { citp_waitable_obj* wo = ID_TO_WAITABLE_OBJ(ni, index); info->u.ni_endpoint.state = wo->waitable.state; if( wo->waitable.state == CI_TCP_STATE_UDP ) { ci_udp_state* us = &wo->udp; info->u.ni_endpoint.udpstate = us->udpflags; info->u.ni_endpoint.rx_pkt_ul = us->recv_q.pkts_delivered; info->u.ni_endpoint.rx_pkt_kn = us->stats.n_rx_os; } else if( wo->waitable.state & CI_TCP_STATE_TCP_CONN ) { ci_tcp_state* ts = &wo->tcp; info->u.ni_endpoint.tx_pkts_max = ts->so_sndbuf_pkts; info->u.ni_endpoint.tx_pkts_num = ts->send.num; } if( wo->waitable.state & CI_TCP_STATE_SOCKET ) { ci_sock_cmn* s = &wo->sock; info->u.ni_endpoint.protocol = (int) sock_protocol(s); info->u.ni_endpoint.laddr = sock_laddr_be32(s); info->u.ni_endpoint.lport = (int) sock_lport_be16(s); info->u.ni_endpoint.raddr = sock_raddr_be32(s); info->u.ni_endpoint.rport = (int) sock_rport_be16(s); } } break; case CI_DBG_NETIF_INFO_GET_NEXT_NETIF: /* If the current netif is found, we need to succeed */ break; case CI_DBG_NETIF_INFO_NOOP: /* Always succeeds, rc already set */ break; default: rc = -EINVAL; break; } if( thr ) { /* Lookup needs a matching efab_thr_release() in case of ordinary * stack but just a ref_count_dec in case of orphan */ if( thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND ) efab_tcp_helper_k_ref_count_dec(thr, 1); else efab_thr_release(thr); } return rc; }
/* In this bind handler we just check that the address to which * are binding is either "any" or one of ours. * In the Linux kernel version [fd] is unused. */ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, socklen_t addrlen, ci_fd_t fd ) { struct sockaddr_in* my_addr_in; ci_uint16 new_port; ci_uint32 addr_be32; ci_sock_cmn* s = ep->s; ci_tcp_state* c = &SOCK_TO_WAITABLE_OBJ(s)->tcp; int rc; CHECK_TEP(ep); my_addr_in = (struct sockaddr_in*) my_addr; /* Check if state of the socket is OK for bind operation. */ /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used. * What is better? */ if (my_addr == NULL) RET_WITH_ERRNO( EINVAL ); if (s->b.state != CI_TCP_CLOSED) RET_WITH_ERRNO( EINVAL ); if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB) RET_WITH_ERRNO( EINVAL ); if( my_addr->sa_family != s->domain ) RET_WITH_ERRNO( s->domain == PF_INET ? EAFNOSUPPORT : EINVAL ); /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) * Linux is also relaxed about overlength data areas. */ if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in)) RET_WITH_ERRNO( EINVAL ); #if CI_CFG_FAKE_IPV6 if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133) RET_WITH_ERRNO( EINVAL ); if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) ) return CI_SOCKET_HANDOVER; #endif addr_be32 = ci_get_ip4_addr(s->domain, my_addr); /* Using the port number provided, see if we can do this bind */ new_port = my_addr_in->sin_port; if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) { struct ci_port_list *force_reuseport; CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link, (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) { if( force_reuseport->port == new_port ) { int one = 1; ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd); ci_assert(CI_IS_VALID_SOCKET(os_sock)); rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); ci_rel_os_sock_fd(os_sock); if( rc != 0 && errno == ENOPROTOOPT ) ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT_LEGACY; ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT; LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u", __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port)); } } } if( !(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) ) CI_LOGLEVEL_TRY_RET(LOG_TV, __ci_bind(ep->netif, ep->s, addr_be32, &new_port)); ep->s->s_flags |= CI_SOCK_FLAG_BOUND; sock_lport_be16(s) = new_port; sock_laddr_be32(s) = addr_be32; if( CI_IP_IS_MULTICAST(addr_be32) ) s->cp.ip_laddr_be32 = 0; else s->cp.ip_laddr_be32 = addr_be32; s->cp.lport_be16 = new_port; sock_rport_be16(s) = sock_raddr_be32(s) = 0; LOG_TC(log(LPF "bind to %s:%u n_p:%u lp:%u", ip_addr_str(addr_be32), (unsigned) CI_BSWAP_BE16(my_addr_in->sin_port), CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); return 0; }