int libcfs_sock_accept (cfs_socket_t **newsockp, cfs_socket_t *sock) { cfs_socket_t *newsock; int rc; newsock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO); if (!newsock) { CERROR("Can't allocate cfs_socket.\n"); return -ENOMEM; } newsock->s_magic = CFS_SOCK_MAGIC; /* * thread will sleep in sock_accept by calling of msleep(), * it can be interrupted because msleep() use PCATCH as argument. */ rc = -sock_accept(C2B_SOCK(sock), NULL, 0, 0, libcfs_sock_upcall, newsock, &C2B_SOCK(newsock)); if (rc) { if (C2B_SOCK(newsock) != NULL) sock_close(C2B_SOCK(newsock)); FREE(newsock, M_TEMP); if ((sock->s_flags & CFS_SOCK_DOWN) != 0) /* shutdown by libcfs_sock_abort_accept(), fake * error number for lnet_acceptor() */ rc = -EAGAIN; return rc; } *newsockp = newsock; return 0; }
int libcfs_sock_getbuf (cfs_socket_t *sock, int *txbufsize, int *rxbufsize) { int option; int optlen; int rc; if (txbufsize != NULL) { optlen = sizeof(option); rc = -sock_getsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, (char *)&option, &optlen); if (rc != 0) { CERROR ("Can't get send buffer size: %d\n", rc); return (rc); } *txbufsize = option; } if (rxbufsize != NULL) { optlen = sizeof(option); rc = -sock_getsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, (char *)&option, &optlen); if (rc != 0) { CERROR ("Can't get receive buffer size: %d\n", rc); return (rc); } *rxbufsize = option; } return 0; }
int libcfs_sock_listen (cfs_socket_t **sockp, __u32 local_ip, int local_port, int backlog) { cfs_socket_t *sock; int fatal; int rc; rc = libcfs_sock_create(&sock, &fatal, local_ip, local_port); if (rc != 0) { if (!fatal) CERROR("Can't create socket: port %d already in use\n", local_port); return rc; } rc = -sock_listen(C2B_SOCK(sock), backlog); if (rc == 0) { *sockp = sock; return 0; } if (C2B_SOCK(sock) != NULL) sock_close(C2B_SOCK(sock)); FREE(sock, M_TEMP); return rc; }
static int libcfs_sock_create (cfs_socket_t **sockp, int *fatal, __u32 local_ip, int local_port) { struct sockaddr_in locaddr; cfs_socket_t *sock; int option; int optlen; int rc; /* All errors are fatal except bind failure if the port is in use */ *fatal = 1; sock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO); if (!sock) { CERROR("Can't allocate cfs_socket.\n"); return -ENOMEM; } *sockp = sock; sock->s_magic = CFS_SOCK_MAGIC; rc = -sock_socket(PF_INET, SOCK_STREAM, 0, libcfs_sock_upcall, sock, &C2B_SOCK(sock)); if (rc != 0) goto out; option = 1; optlen = sizeof(option); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_REUSEADDR, &option, optlen); if (rc != 0) goto out; /* can't specify a local port without a local IP */ LASSERT (local_ip == 0 || local_port != 0); if (local_ip != 0 || local_port != 0) { bzero (&locaddr, sizeof (locaddr)); locaddr.sin_len = sizeof(struct sockaddr_in); locaddr.sin_family = AF_INET; locaddr.sin_port = htons (local_port); locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : INADDR_ANY; rc = -sock_bind(C2B_SOCK(sock), (struct sockaddr *)&locaddr); if (rc == -EADDRINUSE) { CDEBUG(D_NET, "Port %d already in use\n", local_port); *fatal = 0; goto out; } if (rc != 0) { CERROR("Error trying to bind to port %d: %d\n", local_port, rc); goto out; } } return 0; out: if (C2B_SOCK(sock) != NULL) sock_close(C2B_SOCK(sock)); FREE(sock, M_TEMP); return rc; }
void libcfs_sock_release (cfs_socket_t *sock) { if (C2B_SOCK(sock) != NULL) { sock_shutdown(C2B_SOCK(sock), 2); sock_close(C2B_SOCK(sock)); } FREE(sock, M_TEMP); }
int libcfs_sock_connect (cfs_socket_t **sockp, int *fatal, __u32 local_ip, int local_port, __u32 peer_ip, int peer_port) { cfs_socket_t *sock; struct sockaddr_in srvaddr; int rc; rc = libcfs_sock_create(&sock, fatal, local_ip, local_port); if (rc != 0) return rc; bzero(&srvaddr, sizeof(srvaddr)); srvaddr.sin_len = sizeof(struct sockaddr_in); srvaddr.sin_family = AF_INET; srvaddr.sin_port = htons(peer_port); srvaddr.sin_addr.s_addr = htonl(peer_ip); rc = -sock_connect(C2B_SOCK(sock), (struct sockaddr *)&srvaddr, 0); if (rc == 0) { *sockp = sock; return 0; } *fatal = !(rc == -EADDRNOTAVAIL || rc == -EADDRINUSE); CDEBUG(*fatal ? D_NETERROR : D_NET, "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); libcfs_sock_release(sock); return rc; }
void libcfs_sock_abort_accept (cfs_socket_t *sock) { /* * XXX Liang: * * we want to wakeup thread blocked by sock_accept, but we don't * know the address where thread is sleeping on, so we cannot * wakeup it directly. * The thread slept in sock_accept will be waken up while: * 1. interrupt by signal * 2. new connection is coming (sonewconn) * 3. disconnecting of the socket (soisconnected) * * Cause we can't send signal to a thread directly(no KPI), so the * only thing can be done here is disconnect the socket (by * sock_shutdown() or sth else? ). * * Shutdown request of socket with SHUT_WR or SHUT_RDWR will * be issured to the protocol. * sock_shutdown()->tcp_usr_shutdown()->tcp_usrclosed()-> * tcp_close()->soisdisconnected(), it will wakeup thread by * wakeup((caddr_t)&so->so_timeo); */ sock->s_flags |= CFS_SOCK_DOWN; sock_shutdown(C2B_SOCK(sock), SHUT_RDWR); }
int libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout) { size_t rcvlen; int rc; cfs_duration_t to = cfs_time_seconds(timeout); cfs_time_t then; struct timeval tv; LASSERT(nob > 0); for (;;) { struct iovec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0, }; cfs_duration_usec(to, &tv); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket recv timeout " "%ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } then = cfs_time_current(); rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen); to -= cfs_time_current() - then; if (rc != 0 && rc != -EWOULDBLOCK) return rc; if (rcvlen == nob) return 0; if (to <= 0) return -EAGAIN; buffer = ((char *)buffer) + rcvlen; nob -= rcvlen; } return 0; } int libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout) { size_t sndlen; int rc; cfs_duration_t to = cfs_time_seconds(timeout); cfs_time_t then; struct timeval tv; LASSERT(nob > 0); for (;;) { struct iovec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = NULL, .msg_controllen = 0, .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0, }; if (timeout != 0) { cfs_duration_usec(to, &tv); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket send timeout " "%ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } } then = cfs_time_current(); rc = -sock_send(C2B_SOCK(sock), &msg, ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen); to -= cfs_time_current() - then; if (rc != 0 && rc != -EWOULDBLOCK) return rc; if (sndlen == nob) return 0; if (to <= 0) return -EAGAIN; buffer = ((char *)buffer) + sndlen; nob -= sndlen; } return 0; } int libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port) { struct sockaddr_in sin; int rc; if (remote != 0) /* Get remote address */ rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); else /* Get local address */ rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); if (rc != 0) { CERROR ("Error %d getting sock %s IP/port\n", rc, remote ? "peer" : "local"); return rc; } if (ip != NULL) *ip = ntohl (sin.sin_addr.s_addr); if (port != NULL) *port = ntohs (sin.sin_port); return 0; } int libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize) { int option; int rc; if (txbufsize != 0) { option = txbufsize; rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, (char *)&option, sizeof (option)); if (rc != 0) { CERROR ("Can't set send buffer %d: %d\n", option, rc); return (rc); } } if (rxbufsize != 0) { option = rxbufsize; rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, (char *)&option, sizeof (option)); if (rc != 0) { CERROR ("Can't set receive buffer %d: %d\n", option, rc); return (rc); } } return 0; }
int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) { socket_t sock = C2B_SOCK(conn->ksnc_sock); size_t sndlen; int nob; int rc; #if SOCKNAL_SINGLE_FRAG_TX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_niov; #endif struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT }; int i; for (nob = i = 0; i < niov; i++) { scratchiov[i] = tx->tx_iov[i]; nob += scratchiov[i].iov_len; } /* * XXX Liang: * Linux has MSG_MORE, do we have anything to * reduce number of partial TCP segments sent? */ rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); if (rc == 0) rc = sndlen; return rc; } int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) { socket_t sock = C2B_SOCK(conn->ksnc_sock); lnet_kiov_t *kiov = tx->tx_kiov; int rc; int nob; size_t sndlen; #if SOCKNAL_SINGLE_FRAG_TX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_nkiov; #endif struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT }; int i; for (nob = i = 0; i < niov; i++) { scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; nob += scratchiov[i].iov_len = kiov[i].kiov_len; } /* * XXX Liang: * Linux has MSG_MORE, do wen have anyting to * reduce number of partial TCP segments sent? */ rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); for (i = 0; i < niov; i++) cfs_kunmap(kiov[i].kiov_page); if (rc == 0) rc = sndlen; return rc; } int ksocknal_lib_recv_iov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_niov; #endif struct iovec *iov = conn->ksnc_rx_iov; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0 }; size_t rcvlen; int nob; int i; int rc; LASSERT (niov > 0); for (nob = i = 0; i < niov; i++) { scratchiov[i] = iov[i]; nob += scratchiov[i].iov_len; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); if (rc == 0) rc = rcvlen; return rc; } int ksocknal_lib_recv_kiov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_nkiov; #endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0 }; int nob; int i; size_t rcvlen; int rc; /* NB we can't trust socket ops to either consume our iovs * or leave them alone. */ for (nob = i = 0; i < niov; i++) { scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + \ kiov[i].kiov_offset; nob += scratchiov[i].iov_len = kiov[i].kiov_len; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); rc = -sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); for (i = 0; i < niov; i++) cfs_kunmap(kiov[i].kiov_page); if (rc == 0) rc = rcvlen; return (rc); } void ksocknal_lib_eager_ack (ksock_conn_t *conn) { /* XXX Liang: */ } int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) { socket_t sock = C2B_SOCK(conn->ksnc_sock); int len; int rc; rc = ksocknal_connsock_addref(conn); if (rc != 0) { LASSERT (conn->ksnc_closing); *txmem = *rxmem = *nagle = 0; return (-ESHUTDOWN); } rc = libcfs_sock_getbuf(conn->ksnc_sock, txmem, rxmem); if (rc == 0) { len = sizeof(*nagle); rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY, nagle, &len); } ksocknal_connsock_decref(conn); if (rc == 0) *nagle = !*nagle; else *txmem = *rxmem = *nagle = 0; return (rc); } int ksocknal_lib_setup_sock (cfs_socket_t *sock) { int rc; int option; int keep_idle; int keep_intvl; int keep_count; int do_keepalive; socket_t so = C2B_SOCK(sock); struct linger linger; /* Ensure this socket aborts active sends immediately when we close * it. */ linger.l_onoff = 0; linger.l_linger = 0; rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger)); if (rc != 0) { CERROR ("Can't set SO_LINGER: %d\n", rc); return (rc); } if (!*ksocknal_tunables.ksnd_nagle) { option = 1; rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option)); if (rc != 0) { CERROR ("Can't disable nagle: %d\n", rc); return (rc); } } rc = libcfs_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size); if (rc != 0) { CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size, rc); return (rc); } /* snapshot tunables */ keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; keep_count = *ksocknal_tunables.ksnd_keepalive_count; keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); option = (do_keepalive ? 1 : 0); rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option)); if (rc != 0) { CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); return (rc); } if (!do_keepalive) return (rc); rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE, &keep_idle, sizeof(keep_idle)); return (rc); } void ksocknal_lib_push_conn(ksock_conn_t *conn) { socket_t sock; int val = 1; int rc; rc = ksocknal_connsock_addref(conn); if (rc != 0) /* being shut down */ return; sock = C2B_SOCK(conn->ksnc_sock); rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); LASSERT(rc == 0); ksocknal_connsock_decref(conn); return; }