static int tcpx_pep_sock_create(struct tcpx_pep *pep) { int ret, af; switch (pep->info->addr_format) { case FI_SOCKADDR: case FI_SOCKADDR_IN: case FI_SOCKADDR_IN6: af = ((struct sockaddr *)pep->info->src_addr)->sa_family; break; default: FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "invalid source address format\n"); return -FI_EINVAL; } pep->sock = ofi_socket(af, SOCK_STREAM, 0); if (pep->sock == INVALID_SOCKET) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "failed to create listener: %s\n", strerror(ofi_sockerr())); return -FI_EIO; } if (ofi_addr_get_port(pep->info->src_addr) != 0 || port_range.high == 0) { ret = tcpx_setup_socket(pep->sock); if (ret) { goto err; } ret = bind(pep->sock, pep->info->src_addr, (socklen_t) pep->info->src_addrlen); } else { ret = tcpx_setup_socket_nodelay(pep->sock); if (ret) { goto err; } ret = tcpx_bind_to_port_range(pep->sock, pep->info->src_addr, pep->info->src_addrlen); } if (ret) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "failed to bind listener: %s\n", strerror(ofi_sockerr())); goto err; } return FI_SUCCESS; err: ofi_close_socket(pep->sock); pep->sock = INVALID_SOCKET; return ret; }
struct sock_conn *sock_ep_connect(struct sock_ep_attr *ep_attr, fi_addr_t index) { int conn_fd = -1, ret; int do_retry = sock_conn_retry; struct sock_conn *conn, *new_conn; struct sockaddr_in addr; socklen_t lon; int valopt = 0; struct pollfd poll_fd; if (ep_attr->ep_type == FI_EP_MSG) { /* Need to check that destination address has been passed to endpoint */ assert(ep_attr->dest_addr); addr = *ep_attr->dest_addr; addr.sin_port = htons(ep_attr->msg_dest_port); } else { addr = *((struct sockaddr_in *)&ep_attr->av->table[index].addr); } do_connect: fastlock_acquire(&ep_attr->cmap.lock); conn = sock_ep_lookup_conn(ep_attr, index, &addr); fastlock_release(&ep_attr->cmap.lock); if (conn != SOCK_CM_CONN_IN_PROGRESS) return conn; conn_fd = ofi_socket(AF_INET, SOCK_STREAM, 0); if (conn_fd == -1) { SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", errno); errno = FI_EOTHER; return NULL; } ret = fd_set_nonblock(conn_fd); if (ret) { SOCK_LOG_ERROR("failed to set conn_fd nonblocking, errno: %d\n", errno); errno = FI_EOTHER; ofi_close_socket(conn_fd); return NULL; } SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr), ntohs(addr.sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep_attr->src_addr->sin_addr)); ret = connect(conn_fd, (struct sockaddr *) &addr, sizeof addr); if (ret < 0) { if (ofi_sockerr() == EINPROGRESS) { poll_fd.fd = conn_fd; poll_fd.events = POLLOUT; ret = poll(&poll_fd, 1, 15 * 1000); if (ret < 0) { SOCK_LOG_DBG("poll failed\n"); goto retry; } lon = sizeof(int); ret = getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, (void*)(&valopt), &lon); if (ret < 0) { SOCK_LOG_DBG("getsockopt failed: %d, %d\n", ret, conn_fd); goto retry; } if (valopt) { SOCK_LOG_DBG("Error in connection() %d - %s - %d\n", valopt, strerror(valopt), conn_fd); SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr), ntohs(addr.sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep_attr->src_addr->sin_addr)); goto retry; } goto out; } else { SOCK_LOG_DBG("Timeout or error() - %s: %d\n", strerror(errno), conn_fd); SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr), ntohs(addr.sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep_attr->src_addr->sin_addr)); goto retry; } } else { goto out; } retry: do_retry--; sleep(10); if (!do_retry) goto err; if (conn_fd != -1) { ofi_close_socket(conn_fd); conn_fd = -1; } SOCK_LOG_ERROR("Connect error, retrying - %s - %d\n", strerror(errno), conn_fd); SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr), ntohs(addr.sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep_attr->src_addr->sin_addr)); goto do_connect; out: fastlock_acquire(&ep_attr->cmap.lock); new_conn = sock_conn_map_insert(ep_attr, &addr, conn_fd, 0); if (!new_conn) { fastlock_release(&ep_attr->cmap.lock); goto err; } new_conn->av_index = (ep_attr->ep_type == FI_EP_MSG) ? FI_ADDR_NOTAVAIL : index; conn = ofi_idm_lookup(&ep_attr->av_idm, index); if (conn == SOCK_CM_CONN_IN_PROGRESS) { if (ofi_idm_set(&ep_attr->av_idm, index, new_conn) < 0) SOCK_LOG_ERROR("ofi_idm_set failed\n"); conn = new_conn; } fastlock_release(&ep_attr->cmap.lock); return conn; err: ofi_close_socket(conn_fd); return NULL; }
int sock_conn_listen(struct sock_ep_attr *ep_attr) { struct addrinfo *s_res = NULL, *p; struct addrinfo hints = { 0 }; int listen_fd = 0, ret; socklen_t addr_size; struct sockaddr_in addr; struct sock_conn_listener *listener = &ep_attr->listener; char service[NI_MAXSERV] = {0}; char *port; char ipaddr[24]; hints.ai_family = AF_INET; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_PASSIVE; memcpy(&addr, ep_attr->src_addr, sizeof(addr)); if (getnameinfo((void *)ep_attr->src_addr, sizeof(*ep_attr->src_addr), NULL, 0, listener->service, sizeof(listener->service), NI_NUMERICSERV)) { SOCK_LOG_ERROR("could not resolve src_addr\n"); return -FI_EINVAL; } if (ep_attr->ep_type == FI_EP_MSG) { memset(listener->service, 0, NI_MAXSERV); port = NULL; addr.sin_port = 0; } else port = listener->service; inet_ntop(addr.sin_family, &addr.sin_addr, ipaddr, sizeof(ipaddr)); ret = getaddrinfo(ipaddr, port, &hints, &s_res); if (ret) { SOCK_LOG_ERROR("no available AF_INET address, service %s, %s\n", listener->service, gai_strerror(ret)); return -FI_EINVAL; } SOCK_LOG_DBG("Binding listener thread to port: %s\n", listener->service); for (p = s_res; p; p = p->ai_next) { listen_fd = ofi_socket(p->ai_family, p->ai_socktype, p->ai_protocol); if (listen_fd >= 0) { sock_set_sockopts(listen_fd); if (!bind(listen_fd, s_res->ai_addr, s_res->ai_addrlen)) break; ofi_close_socket(listen_fd); listen_fd = -1; } } freeaddrinfo(s_res); if (listen_fd < 0) { SOCK_LOG_ERROR("failed to listen to port: %s\n", listener->service); goto err; } if (atoi(listener->service) == 0) { addr_size = sizeof(addr); if (getsockname(listen_fd, (struct sockaddr *) &addr, &addr_size)) goto err; snprintf(listener->service, sizeof listener->service, "%d", ntohs(addr.sin_port)); SOCK_LOG_DBG("Bound to port: %s - %d\n", listener->service, getpid()); ep_attr->msg_src_port = ntohs(addr.sin_port); } if (ep_attr->src_addr->sin_addr.s_addr == 0) { snprintf(service, sizeof service, "%s", listener->service); ret = sock_get_src_addr_from_hostname(ep_attr->src_addr, service); if (ret) goto err; } if (listen(listen_fd, sock_cm_def_map_sz)) { SOCK_LOG_ERROR("failed to listen socket: %s\n", strerror(errno)); goto err; } if (((struct sockaddr_in *) (ep_attr->src_addr))->sin_port == 0) { ((struct sockaddr_in *) (ep_attr->src_addr))->sin_port = htons(atoi(listener->service)); } listener->sock = listen_fd; if (socketpair(AF_UNIX, SOCK_STREAM, 0, listener->signal_fds) < 0) goto err; listener->do_listen = 1; fd_set_nonblock(listener->signal_fds[1]); if (pthread_create(&listener->listener_thread, 0, _sock_conn_listen, ep_attr)) { SOCK_LOG_ERROR("failed to create conn listener thread\n"); goto err; } while (!*((volatile int*)&listener->is_ready)); return 0; err: if (listen_fd >= 0) ofi_close_socket(listen_fd); return -FI_EINVAL; }
static void *util_ns_name_server_func(void *args) { struct util_ns *ns; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; struct addrinfo *res, *p; void *cleanup_args[2]; char *service; SOCKET listenfd = INVALID_SOCKET, connfd; int n, ret; struct util_ns_cmd cmd = (const struct util_ns_cmd){ 0 }; ns = (struct util_ns *)args; if (asprintf(&service, "%d", ns->ns_port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { free(service); return NULL; } for (p = res; p; p = p->ai_next) { listenfd = ofi_socket(p->ai_family, p->ai_socktype, p->ai_protocol); if (listenfd != INVALID_SOCKET) { n = 1; (void) setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof(n)); if (!bind(listenfd, p->ai_addr, p->ai_addrlen)) break; ofi_close_socket(listenfd); listenfd = INVALID_SOCKET; } } freeaddrinfo(res); free(service); if (listenfd == INVALID_SOCKET) return NULL; if (util_ns_map_init(ns)) goto done; ret = listen(listenfd, 256); if (ret) goto done; cleanup_args[0] = (void *)(uintptr_t)listenfd; cleanup_args[1] = (void *)ns; pthread_cleanup_push(util_ns_name_server_cleanup, (void *)cleanup_args); while (1) { connfd = accept(listenfd, NULL, 0); if (connfd != INVALID_SOCKET) { /* Read service data */ ret = ofi_read_socket(connfd, &cmd, cmd_len); if (ret == cmd_len) { (void) util_ns_op_dispatcher(ns, &cmd, connfd); } ofi_close_socket(connfd); } } pthread_cleanup_pop(1); done: ofi_close_socket(listenfd); return NULL; } /* * Name server API: client side */ static int util_ns_connect_server(struct util_ns *ns, const char *server) { struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; struct addrinfo *res, *p; char *service; SOCKET sockfd = INVALID_SOCKET; int n; if (asprintf(&service, "%d", ns->ns_port) < 0) return -1; n = getaddrinfo(server, service, &hints, &res); if (n < 0) { free(service); return -1; } for (p = res; p; p = p->ai_next) { sockfd = ofi_socket(p->ai_family, p->ai_socktype, p->ai_protocol); if (sockfd != INVALID_SOCKET) { if (!connect(sockfd, p->ai_addr, p->ai_addrlen)) break; ofi_close_socket(sockfd); sockfd = INVALID_SOCKET; } } freeaddrinfo(res); free(service); return sockfd; } int ofi_ns_add_local_name(struct util_ns *ns, void *service, void *name) { SOCKET sockfd; int ret; char *server = (ns->ns_hostname ? ns->ns_hostname : OFI_NS_DEFAULT_HOSTNAME); void *write_buf; size_t write_len = 0; struct util_ns_cmd cmd = { .op = OFI_UTIL_NS_ADD, .status = 0, }; write_buf = calloc(cmd_len + ns->service_len + ns->name_len, 1); if (!write_buf) { ret = -FI_ENOMEM; goto err1; } memcpy(write_buf, &cmd, cmd_len); write_len += cmd_len; memcpy((void *)((char *)write_buf + write_len), service, ns->service_len); write_len += ns->service_len; memcpy((void *)((char *)write_buf + write_len), name, ns->name_len); write_len += ns->name_len; sockfd = util_ns_connect_server(ns, server); if (sockfd == INVALID_SOCKET) { ret = -FI_ENODATA; goto err2; } ret = util_ns_write_socket_op(sockfd, write_buf, write_len); ret = ((ret == write_len) ? FI_SUCCESS : -FI_ENODATA); ofi_close_socket(sockfd); err2: free(write_buf); err1: return ret; } int ofi_ns_del_local_name(struct util_ns *ns, void *service, void *name) { SOCKET sockfd; int ret; const char *server_hostname = (ns->ns_hostname ? ns->ns_hostname : OFI_NS_DEFAULT_HOSTNAME); void *write_buf; size_t write_len = 0; struct util_ns_cmd cmd = { .op = OFI_UTIL_NS_DEL, .status = 0, }; write_buf = calloc(cmd_len + ns->service_len + ns->name_len, 1); if (!write_buf) { ret = -FI_ENOMEM; goto err1; } memcpy(write_buf, &cmd, cmd_len); write_len += cmd_len; memcpy((void *)((char *)write_buf + write_len), service, ns->service_len); write_len += ns->service_len; memcpy((void *)((char *)write_buf + write_len), name, ns->name_len); write_len += ns->name_len; sockfd = util_ns_connect_server(ns, server_hostname); if (sockfd == INVALID_SOCKET) { ret = -FI_ENODATA; goto err2; } ret = util_ns_write_socket_op(sockfd, write_buf, write_len); ret = ((ret == write_len) ? FI_SUCCESS : -FI_ENODATA); ofi_close_socket(sockfd); err2: free(write_buf); err1: return ret; } void *ofi_ns_resolve_name(struct util_ns *ns, const char *server_hostname, void *service) { void *dest_addr = NULL, *io_buf; size_t io_len = 0; SOCKET sockfd; ssize_t ret = 0; struct util_ns_cmd cmd = { .op = OFI_UTIL_NS_QUERY, .status = 0, }; sockfd = util_ns_connect_server(ns, server_hostname); if (sockfd == INVALID_SOCKET) goto err1; io_buf = calloc(cmd_len + ns->service_len, 1); if (!io_buf) goto err2; memcpy(io_buf, &cmd, cmd_len); io_len += cmd_len; memcpy((void *)((char *)io_buf + io_len), service, ns->service_len); io_len += ns->service_len; ret = util_ns_write_socket_op(sockfd, io_buf, io_len); if (ret < 0) goto err3; free(io_buf); io_len = ns->service_len + ns->name_len; io_buf = calloc(io_len, 1); if (!io_buf) goto err2; ret = util_ns_read_socket_op(sockfd, &cmd, cmd_len); if (ret < 0 || cmd.status) goto err3; ret = util_ns_read_socket_op(sockfd, io_buf, io_len); if (ret == io_len) { dest_addr = calloc(ns->name_len, 1); if (!dest_addr) goto err3; io_len = 0; memcpy(service, (void *)((char *)io_buf + io_len), ns->service_len); io_len += ns->service_len; memcpy(dest_addr, (void *)((char *)io_buf + io_len), ns->name_len); } err3: free(io_buf); err2: ofi_close_socket(sockfd); err1: return dest_addr; } /* * Name server API: server side */ void ofi_ns_start_server(struct util_ns *ns) { int ret; SOCKET sockfd; int sleep_usec = 1000; char *server_hostname = (ns->ns_hostname ? ns->ns_hostname : OFI_NS_DEFAULT_HOSTNAME); ofi_osd_init(); ret = pthread_create(&ns->ns_thread, NULL, util_ns_name_server_func, (void *)ns); if (ret) { /* * use the main thread's ID as invalid * value for the new thread */ ns->ns_thread = pthread_self(); } /* * Wait for the local name server to come up. It could be the thread * created above, or the thread created by another process on the same * node. The total wait time is about (1+2+4+...+8192)ms = 16 seconds. */ while (sleep_usec < 10000) { sockfd = util_ns_connect_server(ns, server_hostname); if (sockfd != INVALID_SOCKET) { ofi_close_socket(sockfd); return; } usleep(sleep_usec); sleep_usec *= 2; } }