static ci_tcp_state_synrecv* ci_tcp_listenq_bucket_lookup(ci_netif* ni, ci_tcp_listen_bucket* bucket, ciip_tcp_rx_pkt* rxp, int level) { ci_ni_aux_mem* aux; int idx = ci_tcp_listenq_hash2idx(rxp->hash, level); ci_tcp_state_synrecv* tsr; unsigned saddr, daddr, sport; #ifdef __KERNEL__ int i = 0; if( level > CI_LISTENQ_BUCKET_MAX_DEPTH(ni) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return 0; } #endif LOG_TV(ci_log("%s([%d] level=%d hash:%x l:%s r:%s:%d)", __func__, NI_ID(ni), level, rxp->hash, ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_daddr_be32), ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_saddr_be32), CI_BSWAP_BE16(rxp->tcp->tcp_source_be16))); if( OO_P_IS_NULL(bucket->bucket[idx]) ) return NULL; level++; aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]); if( aux->type == CI_TCP_AUX_TYPE_BUCKET ) return ci_tcp_listenq_bucket_lookup(ni, &aux->u.bucket, rxp, level); saddr = oo_ip_hdr(rxp->pkt)->ip_saddr_be32; daddr = oo_ip_hdr(rxp->pkt)->ip_daddr_be32; sport = rxp->tcp->tcp_source_be16; tsr = &aux->u.synrecv; do { if( ! ((saddr - tsr->r_addr) | (daddr - tsr->l_addr) | (sport - tsr->r_port)) ) return tsr; if( OO_P_IS_NULL(tsr->bucket_link) ) return NULL; aux = ci_ni_aux_p2aux(ni, tsr->bucket_link); tsr = &aux->u.synrecv; #ifdef __KERNEL__ if( i++ > CI_LISTENQ_BUCKET_LIST_LIMIT(ni) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return NULL; } #endif } while(1); /* unreachable */ return NULL; }
/* ** See if there is a synrecv object that matches this syn request already. */ ci_tcp_state_synrecv* ci_tcp_listenq_lookup(ci_netif* netif, ci_tcp_socket_listen* tls, ciip_tcp_rx_pkt* rxp) { ci_tcp_state_synrecv* tsr; tsr = ci_tcp_listenq_bucket_lookup( netif, ci_ni_aux_p2bucket(netif, tls->bucket), rxp, 0); if( tsr == NULL ) { LOG_TV(log(LPF "no match for %s:%d->%s:%d", ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_saddr_be32), (int) CI_BSWAP_BE16(rxp->tcp->tcp_source_be16), ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_daddr_be32), (int) CI_BSWAP_BE16(rxp->tcp->tcp_dest_be16))); } return tsr; }
static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_iomsg_args* a, ci_msghdr* msg, ci_iovec_ptr *piov, int flags) { int rc = 0; ci_netif* ni = a->ni; ci_udp_state* us = a->us; if(CI_UNLIKELY( ni->state->rxq_low )) ci_netif_rxq_low_on_recv(ni, &us->s, 1 /* assume at least one pkt freed */); /* In the kernel recv() with flags is not called. * only read(). So flags may only contain MSG_DONTWAIT */ #ifdef __KERNEL__ ci_assert_equal(flags, 0); #endif #ifndef __KERNEL__ if( flags & MSG_ERRQUEUE_CHK ) { if( OO_PP_NOT_NULL(us->timestamp_q.extract) ) { ci_ip_pkt_fmt* pkt; struct timespec ts[3]; struct cmsg_state cmsg_state; ci_udp_hdr* udp; int paylen; /* TODO is this necessary? - mirroring ci_udp_recvmsg_get() */ ci_rmb(); pkt = PKT_CHK_NNL(ni, us->timestamp_q.extract); if( pkt->tx_hw_stamp.tv_sec == CI_PKT_TX_HW_STAMP_CONSUMED ) { if( OO_PP_IS_NULL(pkt->tsq_next) ) goto errqueue_empty; us->timestamp_q.extract = pkt->tsq_next; pkt = PKT_CHK_NNL(ni, us->timestamp_q.extract); ci_assert(pkt->tx_hw_stamp.tv_sec != CI_PKT_TX_HW_STAMP_CONSUMED); } udp = oo_ip_data(pkt); paylen = CI_BSWAP_BE16(oo_ip_hdr(pkt)->ip_tot_len_be16) - sizeof(ci_ip4_hdr) - sizeof(udp); msg->msg_flags = 0; cmsg_state.msg = msg; cmsg_state.cm = msg->msg_control; cmsg_state.cmsg_bytes_used = 0; ci_iovec_ptr_init_nz(piov, msg->msg_iov, msg->msg_iovlen); memset(ts, 0, sizeof(ts)); if( us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_RAW_HARDWARE ) { ts[2].tv_sec = pkt->tx_hw_stamp.tv_sec; ts[2].tv_nsec = pkt->tx_hw_stamp.tv_nsec; } if( (us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_SYS_HARDWARE) && (pkt->tx_hw_stamp.tv_nsec & CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC) ) { ts[1].tv_sec = pkt->tx_hw_stamp.tv_sec; ts[1].tv_nsec = pkt->tx_hw_stamp.tv_nsec; } ci_put_cmsg(&cmsg_state, SOL_SOCKET, ONLOAD_SCM_TIMESTAMPING, sizeof(ts), &ts); oo_offbuf_set_start(&pkt->buf, udp + 1); oo_offbuf_set_len(&pkt->buf, paylen); rc = oo_copy_pkt_to_iovec_no_adv(ni, pkt, piov, paylen); /* Mark this packet/timestamp as consumed */ pkt->tx_hw_stamp.tv_sec = CI_PKT_TX_HW_STAMP_CONSUMED; ci_ip_cmsg_finish(&cmsg_state); msg->msg_flags |= MSG_ERRQUEUE_CHK; return rc; } errqueue_empty: /* ICMP is handled via OS, so get OS error */ rc = oo_os_sock_recvmsg(ni, SC_SP(&us->s), msg, flags); if( rc < 0 ) { ci_assert(-rc == errno); return -1; } else return rc; } #endif if( (rc = ci_get_so_error(&us->s)) != 0 ) { CI_SET_ERROR(rc, rc); return rc; } if( msg->msg_iovlen > 0 && msg->msg_iov == NULL ) { CI_SET_ERROR(rc, EFAULT); return rc; } #if MSG_OOB_CHK if( flags & MSG_OOB_CHK ) { CI_SET_ERROR(rc, EOPNOTSUPP); return rc; } #endif #if CI_CFG_POSIX_RECV if( ! udp_lport_be16(us)) { LOG_UV(log("%s: -1 (ENOTCONN)", __FUNCTION__)); CI_SET_ERROR(rc, ENOTCONN); return rc; } #endif if( msg->msg_iovlen == 0 ) { /* We have a difference in behaviour from the Linux stack here. When ** msg_iovlen is 0 Linux 2.4.21-15.EL does not set MSG_TRUNC when a ** datagram has non-zero length. We do. */ CI_IOVEC_LEN(&piov->io) = piov->iovlen = 0; return IOVLEN_WORKAROUND_RC_VALUE; } return 0; }
int ci_udp_filter_recved_pkts(ci_netif* ni, ci_udp_state* us) { enum onload_zc_callback_rc rc; struct onload_zc_msg zc_msg; struct onload_zc_iovec zc_iovec[CI_UDP_ZC_IOVEC_MAX]; ci_ip_pkt_fmt* pkt; unsigned cb_flags; int dropped_bytes; ci_assert(ci_sock_is_locked(ni, &us->s.b)); zc_msg.iov = zc_iovec; zc_msg.msghdr.msg_controllen = 0; zc_msg.msghdr.msg_flags = 0; while( us->recv_q.pkts_added != us->recv_q.pkts_filter_passed + us->recv_q.pkts_filter_dropped ) { ci_rmb(); pkt = PKT_CHK_NNL(ni, us->recv_q.filter); if( pkt->pf.udp.rx_flags & (CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED | CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED) ) { /* We know this can't go past tail because of the while loop condition */ us->recv_q.filter = pkt->next; pkt = PKT_CHK_NNL(ni, us->recv_q.filter); ci_assert( !(pkt->pf.udp.rx_flags & (CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED | CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED)) ); } ci_udp_pkt_to_zc_msg(ni, pkt, &zc_msg); cb_flags = CI_IP_IS_MULTICAST(oo_ip_hdr(pkt)->ip_daddr_be32) ? ONLOAD_ZC_MSG_SHARED : 0; rc = (*(onload_zc_recv_filter_callback)((ci_uintptr_t)us->recv_q_filter)) (&zc_msg, (void *)((ci_uintptr_t)us->recv_q_filter_arg), cb_flags); ci_assert(!(rc & ONLOAD_ZC_KEEP)); if( rc & ONLOAD_ZC_TERMINATE ) { us->recv_q.bytes_filter_dropped += pkt->pf.udp.pay_len; pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED; ++us->recv_q.pkts_filter_dropped; } else { pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED; ++us->recv_q.pkts_filter_passed; if( rc & ONLOAD_ZC_MODIFIED ) { ci_assert(!(cb_flags & ONLOAD_ZC_MSG_SHARED)); dropped_bytes = ci_zc_msg_to_udp_pkt(ni, &zc_msg, pkt); ci_assert_gt(dropped_bytes, 0); ci_assert_lt(dropped_bytes, pkt->pf.udp.pay_len); pkt->pf.udp.pay_len -= dropped_bytes; us->recv_q.bytes_filter_dropped += dropped_bytes; } us->recv_q.bytes_filter_passed += pkt->pf.udp.pay_len; return 1; } } return us->recv_q.pkts_filter_passed != us->recv_q.pkts_delivered; }