int ksocknal_lib_recv_kiov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK struct kvec scratch; struct kvec *scratchiov = &scratch; struct page **pages = NULL; unsigned int niov = 1; #else #ifdef CONFIG_HIGHMEM #warning "XXX risk of kmap deadlock on multiple frags..." #endif struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs; unsigned int niov = conn->ksnc_rx_nkiov; #endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; struct msghdr msg = { .msg_flags = 0 }; int nob; int i; int rc; void *base; void *addr; int sum; int fragnob; int n; /* NB we can't trust socket ops to either consume our iovs * or leave them alone. */ if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) { nob = scratchiov[0].iov_len; n = 1; } else { for (nob = i = 0; i < niov; i++) { nob += scratchiov[i].iov_len = kiov[i].kiov_len; scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; } n = niov; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); rc = kernel_recvmsg(conn->ksnc_sock, &msg, scratchiov, n, nob, MSG_DONTWAIT); if (conn->ksnc_msg.ksm_csum != 0) { for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { LASSERT (i < niov); /* Dang! have to kmap again because I have nowhere to stash the * mapped address. But by doing it while the page is still * mapped, the kernel just bumps the map count and returns me * the address it stashed. */ base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; fragnob = kiov[i].kiov_len; if (fragnob > sum) fragnob = sum; conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, base, fragnob); kunmap(kiov[i].kiov_page); } } if (addr != NULL) { ksocknal_lib_kiov_vunmap(addr); } else { for (i = 0; i < niov; i++) kunmap(kiov[i].kiov_page); } return (rc); } void ksocknal_lib_csum_tx(ksock_tx_t *tx) { int i; __u32 csum; void *base; LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg); LASSERT(tx->tx_conn != NULL); LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x); tx->tx_msg.ksm_csum = 0; csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base, tx->tx_iov[0].iov_len); if (tx->tx_kiov != NULL) { for (i = 0; i < tx->tx_nkiov; i++) { base = kmap(tx->tx_kiov[i].kiov_page) + tx->tx_kiov[i].kiov_offset; csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len); kunmap(tx->tx_kiov[i].kiov_page); } } else { for (i = 1; i < tx->tx_niov; i++) csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base, tx->tx_iov[i].iov_len); } if (*ksocknal_tunables.ksnd_inject_csum_error) { csum++; *ksocknal_tunables.ksnd_inject_csum_error = 0; } tx->tx_msg.ksm_csum = csum; }
int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) { struct socket *sock = conn->ksnc_sock; int nob; int rc; if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */ conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */ tx->tx_nob == tx->tx_resid && /* frist sending */ tx->tx_msg.ksm_csum == 0) /* not checksummed */ ksocknal_lib_csum_tx(tx); /* NB we can't trust socket ops to either consume our iovs * or leave them alone. */ { #if SOCKNAL_SINGLE_FRAG_TX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_niov; #endif struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT }; mm_segment_t oldmm = get_fs(); int i; for (nob = i = 0; i < niov; i++) { scratchiov[i] = tx->tx_iov[i]; nob += scratchiov[i].iov_len; } if (!cfs_list_empty(&conn->ksnc_tx_queue) || nob < tx->tx_resid) msg.msg_flags |= MSG_MORE; set_fs (KERNEL_DS); rc = sock_sendmsg(sock, &msg, nob); set_fs (oldmm); } return rc; } int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) { struct socket *sock = conn->ksnc_sock; lnet_kiov_t *kiov = tx->tx_kiov; int rc; int nob; /* Not NOOP message */ LASSERT (tx->tx_lnetmsg != NULL); /* NB we can't trust socket ops to either consume our iovs * or leave them alone. */ if (tx->tx_msg.ksm_zc_cookies[0] != 0) { /* Zero copy is enabled */ struct sock *sk = sock->sk; struct page *page = kiov->kiov_page; int offset = kiov->kiov_offset; int fragsize = kiov->kiov_len; int msgflg = MSG_DONTWAIT; CDEBUG(D_NET, "page %p + offset %x for %d\n", page, offset, kiov->kiov_len); if (!cfs_list_empty(&conn->ksnc_tx_queue) || fragsize < tx->tx_resid) msgflg |= MSG_MORE; if (sk->sk_prot->sendpage != NULL) { rc = sk->sk_prot->sendpage(sk, page, offset, fragsize, msgflg); } else { rc = cfs_tcp_sendpage(sk, page, offset, fragsize, msgflg); } } else { #if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else #ifdef CONFIG_HIGHMEM #warning "XXX risk of kmap deadlock on multiple frags..." #endif struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_nkiov; #endif struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT }; mm_segment_t oldmm = get_fs(); int i; for (nob = i = 0; i < niov; i++) { scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; nob += scratchiov[i].iov_len = kiov[i].kiov_len; } if (!cfs_list_empty(&conn->ksnc_tx_queue) || nob < tx->tx_resid) msg.msg_flags |= MSG_MORE; set_fs (KERNEL_DS); rc = sock_sendmsg(sock, &msg, nob); set_fs (oldmm); for (i = 0; i < niov; i++) kunmap(kiov[i].kiov_page); } return rc; } void ksocknal_lib_eager_ack (ksock_conn_t *conn) { int opt = 1; mm_segment_t oldmm = get_fs(); struct socket *sock = conn->ksnc_sock; /* Remind the socket to ACK eagerly. If I don't, the socket might * think I'm about to send something it could piggy-back the ACK * on, introducing delay in completing zero-copy sends in my * peer. */ set_fs(KERNEL_DS); sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK, (char *)&opt, sizeof (opt)); set_fs(oldmm); } int ksocknal_lib_recv_iov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_niov; #endif struct iovec *iov = conn->ksnc_rx_iov; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0 }; mm_segment_t oldmm = get_fs(); int nob; int i; int rc; int fragnob; int sum; __u32 saved_csum; /* NB we can't trust socket ops to either consume our iovs * or leave them alone. */ LASSERT (niov > 0); for (nob = i = 0; i < niov; i++) { scratchiov[i] = iov[i]; nob += scratchiov[i].iov_len; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); set_fs (KERNEL_DS); rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); /* NB this is just a boolean..........................^ */ set_fs (oldmm); saved_csum = 0; if (conn->ksnc_proto == &ksocknal_protocol_v2x) { saved_csum = conn->ksnc_msg.ksm_csum; conn->ksnc_msg.ksm_csum = 0; } if (saved_csum != 0) { /* accumulate checksum */ for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { LASSERT (i < niov); fragnob = iov[i].iov_len; if (fragnob > sum) fragnob = sum; conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, iov[i].iov_base, fragnob); } conn->ksnc_msg.ksm_csum = saved_csum; } return rc; } static void ksocknal_lib_kiov_vunmap(void *addr) { if (addr == NULL) return; vunmap(addr); } static void * ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov, struct iovec *iov, struct page **pages) { void *addr; int nob; int i; if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL) return NULL; LASSERT (niov <= LNET_MAX_IOV); if (niov < 2 || niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags) return NULL; for (nob = i = 0; i < niov; i++) { if ((kiov[i].kiov_offset != 0 && i > 0) || (kiov[i].kiov_offset + kiov[i].kiov_len != CFS_PAGE_SIZE && i < niov - 1)) return NULL; pages[i] = kiov[i].kiov_page; nob += kiov[i].kiov_len; } addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL); if (addr == NULL) return NULL; iov->iov_base = addr + kiov[0].kiov_offset; iov->iov_len = nob; return addr; } int ksocknal_lib_recv_kiov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK struct iovec scratch; struct iovec *scratchiov = &scratch; struct page **pages = NULL; unsigned int niov = 1; #else #ifdef CONFIG_HIGHMEM #warning "XXX risk of kmap deadlock on multiple frags..." #endif struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs; unsigned int niov = conn->ksnc_rx_nkiov; #endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0 }; mm_segment_t oldmm = get_fs(); int nob; int i; int rc; void *base; void *addr; int sum; int fragnob; /* NB we can't trust socket ops to either consume our iovs * or leave them alone. */ if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) { nob = scratchiov[0].iov_len; msg.msg_iovlen = 1; } else { for (nob = i = 0; i < niov; i++) { nob += scratchiov[i].iov_len = kiov[i].kiov_len; scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; } msg.msg_iovlen = niov; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); set_fs (KERNEL_DS); rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); /* NB this is just a boolean.......................^ */ set_fs (oldmm); if (conn->ksnc_msg.ksm_csum != 0) { for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { LASSERT (i < niov); /* Dang! have to kmap again because I have nowhere to stash the * mapped address. But by doing it while the page is still * mapped, the kernel just bumps the map count and returns me * the address it stashed. */ base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; fragnob = kiov[i].kiov_len; if (fragnob > sum) fragnob = sum; conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, base, fragnob); kunmap(kiov[i].kiov_page); } } if (addr != NULL) { ksocknal_lib_kiov_vunmap(addr); } else { for (i = 0; i < niov; i++) kunmap(kiov[i].kiov_page); } return (rc); } void ksocknal_lib_csum_tx(ksock_tx_t *tx) { int i; __u32 csum; void *base; LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg); LASSERT(tx->tx_conn != NULL); LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x); tx->tx_msg.ksm_csum = 0; csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base, tx->tx_iov[0].iov_len); if (tx->tx_kiov != NULL) { for (i = 0; i < tx->tx_nkiov; i++) { base = kmap(tx->tx_kiov[i].kiov_page) + tx->tx_kiov[i].kiov_offset; csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len); kunmap(tx->tx_kiov[i].kiov_page); } } else { for (i = 1; i < tx->tx_niov; i++) csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base, tx->tx_iov[i].iov_len); } if (*ksocknal_tunables.ksnd_inject_csum_error) { csum++; *ksocknal_tunables.ksnd_inject_csum_error = 0; } tx->tx_msg.ksm_csum = csum; } int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) { mm_segment_t oldmm = get_fs (); struct socket *sock = conn->ksnc_sock; int len; int rc; rc = ksocknal_connsock_addref(conn); if (rc != 0) { LASSERT (conn->ksnc_closing); *txmem = *rxmem = *nagle = 0; return (-ESHUTDOWN); } rc = libcfs_sock_getbuf(sock, txmem, rxmem); if (rc == 0) { len = sizeof(*nagle); set_fs(KERNEL_DS); rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)nagle, &len); set_fs(oldmm); } ksocknal_connsock_decref(conn); if (rc == 0) *nagle = !*nagle; else *txmem = *rxmem = *nagle = 0; return (rc); }