static int tcpx_pep_sock_create(struct tcpx_pep *pep) { int ret, af; switch (pep->info->addr_format) { case FI_SOCKADDR: case FI_SOCKADDR_IN: case FI_SOCKADDR_IN6: af = ((struct sockaddr *)pep->info->src_addr)->sa_family; break; default: FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "invalid source address format\n"); return -FI_EINVAL; } pep->sock = ofi_socket(af, SOCK_STREAM, 0); if (pep->sock == INVALID_SOCKET) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "failed to create listener: %s\n", strerror(ofi_sockerr())); return -FI_EIO; } if (ofi_addr_get_port(pep->info->src_addr) != 0 || port_range.high == 0) { ret = tcpx_setup_socket(pep->sock); if (ret) { goto err; } ret = bind(pep->sock, pep->info->src_addr, (socklen_t) pep->info->src_addrlen); } else { ret = tcpx_setup_socket_nodelay(pep->sock); if (ret) { goto err; } ret = tcpx_bind_to_port_range(pep->sock, pep->info->src_addr, pep->info->src_addrlen); } if (ret) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "failed to bind listener: %s\n", strerror(ofi_sockerr())); goto err; } return FI_SUCCESS; err: ofi_close_socket(pep->sock); pep->sock = INVALID_SOCKET; return ret; }
static void handle_connreq(struct poll_fd_mgr *poll_mgr, struct poll_fd_info *poll_info) { struct tcpx_conn_handle *handle; struct tcpx_pep *pep; struct fi_eq_cm_entry *cm_entry; struct ofi_ctrl_hdr conn_req; SOCKET sock; int ret; assert(poll_info->fid->fclass == FI_CLASS_PEP); pep = container_of(poll_info->fid, struct tcpx_pep, util_pep.pep_fid.fid); sock = accept(pep->sock, NULL, 0); if (sock < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "accept error: %d\n", ofi_sockerr()); return; } ret = rx_cm_data(sock, &conn_req, ofi_ctrl_connreq, poll_info); if (ret) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "cm data recv failed \n"); goto err1; } handle = calloc(1, sizeof(*handle)); if (!handle) goto err1; cm_entry = calloc(1, sizeof(*cm_entry) + poll_info->cm_data_sz); if (!cm_entry) goto err2; handle->conn_fd = sock; cm_entry->fid = poll_info->fid; cm_entry->info = fi_dupinfo(&pep->info); if (!cm_entry->info) goto err3; cm_entry->info->handle = &handle->handle; memcpy(cm_entry->data, poll_info->cm_data, poll_info->cm_data_sz); ret = (int) fi_eq_write(&pep->util_pep.eq->eq_fid, FI_CONNREQ, cm_entry, sizeof(*cm_entry) + poll_info->cm_data_sz, 0); if (ret < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n"); goto err4; } free(cm_entry); return; err4: fi_freeinfo(cm_entry->info); err3: free(cm_entry); err2: free(handle); err1: ofi_close_socket(sock); }
void tcpx_cq_report_error(struct util_cq *cq, struct tcpx_xfer_entry *xfer_entry, int err) { struct fi_cq_err_entry err_entry; uint64_t data = 0; if (!(xfer_entry->flags & FI_COMPLETION)) return; if (xfer_entry->hdr.base_hdr.flags & OFI_REMOTE_CQ_DATA) { xfer_entry->flags |= FI_REMOTE_CQ_DATA; data = xfer_entry->hdr.cq_data_hdr.cq_data; } err_entry.op_context = xfer_entry->context; err_entry.flags = xfer_entry->flags; err_entry.len = 0; err_entry.buf = NULL; err_entry.data = data; err_entry.tag = 0; err_entry.olen = 0; err_entry.err = err; err_entry.prov_errno = ofi_sockerr(); err_entry.err_data = NULL; err_entry.err_data_size = 0; ofi_cq_write_error(cq, &err_entry); }
static int tcpx_bind_to_port_range(SOCKET sock, void* src_addr, size_t addrlen) { int ret, i, rand_port_number; rand_port_number = rand() % (port_range.high + 1 - port_range.low) + port_range.low; for (i = port_range.low; i <= port_range.high; i++, rand_port_number++) { if (rand_port_number > port_range.high) { rand_port_number = port_range.low; } ofi_addr_set_port(src_addr, rand_port_number); ret = bind(sock, src_addr, (socklen_t) addrlen); if (ret) { if (errno == EADDRINUSE) { continue; } else { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "failed to bind listener: %s\n", strerror(ofi_sockerr())); return -errno; } } else { break; } } return (i <= port_range.high) ? FI_SUCCESS : -FI_EADDRNOTAVAIL; }
static int tcpx_ep_connect(struct fid_ep *ep, const void *addr, const void *param, size_t paramlen) { struct tcpx_ep *tcpx_ep = container_of(ep, struct tcpx_ep, util_ep.ep_fid); struct tcpx_cm_context *cm_ctx; int ret; if (!addr || !tcpx_ep->conn_fd || paramlen > TCPX_MAX_CM_DATA_SIZE) return -FI_EINVAL; cm_ctx = calloc(1, sizeof(*cm_ctx)); if (!cm_ctx) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "cannot allocate memory \n"); return -FI_ENOMEM; } ret = connect(tcpx_ep->conn_fd, (struct sockaddr *) addr, (socklen_t) ofi_sizeofaddr(addr)); if (ret && ofi_sockerr() != FI_EINPROGRESS) { ret = -ofi_sockerr(); goto err; } cm_ctx->fid = &tcpx_ep->util_ep.ep_fid.fid; cm_ctx->type = CLIENT_SEND_CONNREQ; if (paramlen) { cm_ctx->cm_data_sz = paramlen; memcpy(cm_ctx->cm_data, param, paramlen); } ret = ofi_wait_fd_add(tcpx_ep->util_ep.eq->wait, tcpx_ep->conn_fd, FI_EPOLL_OUT, tcpx_eq_wait_try_func, NULL,cm_ctx); if (ret) goto err; tcpx_ep->util_ep.eq->wait->signal(tcpx_ep->util_ep.eq->wait); return 0; err: free(cm_ctx); return ret; }
int tcpx_send_msg(struct tcpx_xfer_entry *tx_entry) { ssize_t bytes_sent; struct msghdr msg = {0}; msg.msg_iov = tx_entry->msg_data.iov; msg.msg_iovlen = tx_entry->msg_data.iov_cnt; bytes_sent = ofi_sendmsg_tcp(tx_entry->ep->conn_fd, &msg, MSG_NOSIGNAL); if (bytes_sent < 0) return ofi_sockerr() == EPIPE ? -FI_ENOTCONN : -ofi_sockerr(); tx_entry->done_len += bytes_sent; if (tx_entry->done_len < ntohll(tx_entry->msg_hdr.hdr.size)) { ofi_consume_iov(tx_entry->msg_data.iov, &tx_entry->msg_data.iov_cnt, bytes_sent); return -FI_EAGAIN; } return FI_SUCCESS; }
static int tcpx_ep_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen) { struct tcpx_ep *tcpx_ep; size_t addrlen_in = *addrlen; int ret; tcpx_ep = container_of(ep, struct tcpx_ep, util_ep.ep_fid); ret = ofi_getpeername(tcpx_ep->conn_fd, addr, (socklen_t *)addrlen); if (ret) return -ofi_sockerr(); return (addrlen_in < *addrlen)? -FI_ETOOSMALL: FI_SUCCESS; }
static void server_sock_accept(struct util_wait *wait, struct tcpx_cm_context *cm_ctx) { struct tcpx_conn_handle *handle; struct tcpx_pep *pep; SOCKET sock; int ret; FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "Received Connreq\n"); assert(cm_ctx->fid->fclass == FI_CLASS_PEP); pep = container_of(cm_ctx->fid, struct tcpx_pep, util_pep.pep_fid.fid); sock = accept(pep->sock, NULL, 0); if (sock < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "accept error: %d\n", ofi_sockerr()); return; } handle = calloc(1, sizeof(*handle)); if (!handle) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "cannot allocate memory \n"); goto err1; } cm_ctx = calloc(1, sizeof(*cm_ctx)); if (!cm_ctx) goto err2; handle->conn_fd = sock; handle->handle.fclass = FI_CLASS_CONNREQ; handle->pep = pep; cm_ctx->fid = &handle->handle; cm_ctx->type = SERVER_RECV_CONNREQ; ret = ofi_wait_fd_add(wait, sock, FI_EPOLL_IN, tcpx_eq_wait_try_func, NULL, (void *) cm_ctx); if (ret) goto err3; wait->signal(wait); return; err3: free(cm_ctx); err2: free(handle); err1: ofi_close_socket(sock); }
static ssize_t sock_comm_send_socket(struct sock_conn *conn, const void *buf, size_t len) { ssize_t ret; ret = ofi_send_socket(conn->sock_fd, buf, len, MSG_NOSIGNAL); if (ret < 0) { if (OFI_SOCK_TRY_SND_RCV_AGAIN(ofi_sockerr())) { ret = 0; } else if (ofi_sockerr() == EPIPE) { conn->connected = 0; SOCK_LOG_DBG("Disconnected: %s:%d\n", inet_ntoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port)); } else { SOCK_LOG_DBG("write error: %s\n", strerror(ofi_sockerr())); } } if (ret > 0) SOCK_LOG_DBG("wrote to network: %lu\n", ret); return ret; }
int tcpx_recv_hdr(SOCKET sock, struct tcpx_rx_detect *rx_detect) { void *rem_buf; size_t rem_len; ssize_t bytes_recvd; rem_buf = (uint8_t *) &rx_detect->hdr + rx_detect->done_len; rem_len = sizeof(rx_detect->hdr) - rx_detect->done_len; bytes_recvd = ofi_recv_socket(sock, rem_buf, rem_len, 0); if (bytes_recvd <= 0) return (bytes_recvd)? -ofi_sockerr(): -FI_ENOTCONN; rx_detect->done_len += bytes_recvd; return (rem_len == bytes_recvd)? FI_SUCCESS : -FI_EAGAIN; }
static void client_send_connreq(struct util_wait *wait, struct tcpx_cm_context *cm_ctx) { struct tcpx_ep *ep; struct fi_eq_err_entry err_entry; socklen_t len; int status, ret = FI_SUCCESS; FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "client send connreq\n"); assert(cm_ctx->fid->fclass == FI_CLASS_EP); ep = container_of(cm_ctx->fid, struct tcpx_ep, util_ep.ep_fid.fid); len = sizeof(status); ret = getsockopt(ep->conn_fd, SOL_SOCKET, SO_ERROR, (char *) &status, &len); if (ret < 0 || status) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "connection failure\n"); ret = (ret < 0)? -ofi_sockerr() : status; goto err; } ret = tx_cm_data(ep->conn_fd, ofi_ctrl_connreq, cm_ctx); if (ret) goto err; ret = ofi_wait_fd_del(wait, ep->conn_fd); if (ret) goto err; cm_ctx->type = CLIENT_RECV_CONNRESP; ret = ofi_wait_fd_add(wait, ep->conn_fd, FI_EPOLL_IN, tcpx_eq_wait_try_func, NULL, cm_ctx); if (ret) goto err; wait->signal(wait); return; err: memset(&err_entry, 0, sizeof err_entry); err_entry.fid = cm_ctx->fid; err_entry.context = cm_ctx->fid->context; err_entry.err = -ret; free(cm_ctx); fi_eq_write(&ep->util_ep.eq->eq_fid, FI_NOTIFY, &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR); }
int tcpx_recv_msg_data(struct tcpx_xfer_entry *rx_entry) { ssize_t bytes_recvd; bytes_recvd = ofi_readv_socket(rx_entry->ep->conn_fd, rx_entry->msg_data.iov, rx_entry->msg_data.iov_cnt); if (bytes_recvd <= 0) return (bytes_recvd)? -ofi_sockerr(): -FI_ENOTCONN; rx_entry->done_len += bytes_recvd; if (rx_entry->done_len < ntohll(rx_entry->msg_hdr.hdr.size)) { ofi_consume_iov(rx_entry->msg_data.iov, &rx_entry->msg_data.iov_cnt, bytes_recvd); return -FI_EAGAIN; } return FI_SUCCESS; }
ssize_t sock_comm_peek(struct sock_conn *conn, void *buf, size_t len) { ssize_t ret; ret = ofi_recv_socket(conn->sock_fd, buf, len, MSG_PEEK); if (ret == 0) { conn->connected = 0; SOCK_LOG_DBG("Disconnected\n"); return ret; } if (ret < 0) { SOCK_LOG_DBG("peek %s\n", strerror(ofi_sockerr())); ret = 0; } if (ret > 0) SOCK_LOG_DBG("peek from network: %lu\n", ret); return ret; }
static int tcpx_ep_shutdown(struct fid_ep *ep, uint64_t flags) { struct tcpx_ep *tcpx_ep; int ret; tcpx_ep = container_of(ep, struct tcpx_ep, util_ep.ep_fid); ret = ofi_shutdown(tcpx_ep->conn_fd, SHUT_RDWR); if (ret && ofi_sockerr() != ENOTCONN) { FI_WARN(&tcpx_prov, FI_LOG_EP_DATA, "ep shutdown unsuccessful\n"); } fastlock_acquire(&tcpx_ep->lock); ret = tcpx_ep_shutdown_report(tcpx_ep, &ep->fid); fastlock_release(&tcpx_ep->lock); if (ret) { FI_WARN(&tcpx_prov, FI_LOG_EP_DATA, "Error writing to EQ\n"); } return ret; }
static ssize_t sock_comm_recv_socket(struct sock_conn *conn, void *buf, size_t len) { ssize_t ret; ret = ofi_recv_socket(conn->sock_fd, buf, len, 0); if (ret == 0) { conn->connected = 0; SOCK_LOG_DBG("Disconnected: %s:%d\n", inet_ntoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port)); return ret; } if (ret < 0) { SOCK_LOG_DBG("read %s\n", strerror(ofi_sockerr())); ret = 0; } if (ret > 0) SOCK_LOG_DBG("read from network: %lu\n", ret); return ret; }
struct sock_conn *sock_ep_connect(struct sock_ep_attr *ep_attr, fi_addr_t index) { int conn_fd = -1, ret; int do_retry = sock_conn_retry; struct sock_conn *conn, *new_conn; struct sockaddr_in addr; socklen_t lon; int valopt = 0; struct pollfd poll_fd; if (ep_attr->ep_type == FI_EP_MSG) { /* Need to check that destination address has been passed to endpoint */ assert(ep_attr->dest_addr); addr = *ep_attr->dest_addr; addr.sin_port = htons(ep_attr->msg_dest_port); } else { addr = *((struct sockaddr_in *)&ep_attr->av->table[index].addr); } do_connect: fastlock_acquire(&ep_attr->cmap.lock); conn = sock_ep_lookup_conn(ep_attr, index, &addr); fastlock_release(&ep_attr->cmap.lock); if (conn != SOCK_CM_CONN_IN_PROGRESS) return conn; conn_fd = ofi_socket(AF_INET, SOCK_STREAM, 0); if (conn_fd == -1) { SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", errno); errno = FI_EOTHER; return NULL; } ret = fd_set_nonblock(conn_fd); if (ret) { SOCK_LOG_ERROR("failed to set conn_fd nonblocking, errno: %d\n", errno); errno = FI_EOTHER; ofi_close_socket(conn_fd); return NULL; } SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr), ntohs(addr.sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep_attr->src_addr->sin_addr)); ret = connect(conn_fd, (struct sockaddr *) &addr, sizeof addr); if (ret < 0) { if (ofi_sockerr() == EINPROGRESS) { poll_fd.fd = conn_fd; poll_fd.events = POLLOUT; ret = poll(&poll_fd, 1, 15 * 1000); if (ret < 0) { SOCK_LOG_DBG("poll failed\n"); goto retry; } lon = sizeof(int); ret = getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, (void*)(&valopt), &lon); if (ret < 0) { SOCK_LOG_DBG("getsockopt failed: %d, %d\n", ret, conn_fd); goto retry; } if (valopt) { SOCK_LOG_DBG("Error in connection() %d - %s - %d\n", valopt, strerror(valopt), conn_fd); SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr), ntohs(addr.sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep_attr->src_addr->sin_addr)); goto retry; } goto out; } else { SOCK_LOG_DBG("Timeout or error() - %s: %d\n", strerror(errno), conn_fd); SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr), ntohs(addr.sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep_attr->src_addr->sin_addr)); goto retry; } } else { goto out; } retry: do_retry--; sleep(10); if (!do_retry) goto err; if (conn_fd != -1) { ofi_close_socket(conn_fd); conn_fd = -1; } SOCK_LOG_ERROR("Connect error, retrying - %s - %d\n", strerror(errno), conn_fd); SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr), ntohs(addr.sin_port)); SOCK_LOG_DBG("Connecting using address:%s\n", inet_ntoa(ep_attr->src_addr->sin_addr)); goto do_connect; out: fastlock_acquire(&ep_attr->cmap.lock); new_conn = sock_conn_map_insert(ep_attr, &addr, conn_fd, 0); if (!new_conn) { fastlock_release(&ep_attr->cmap.lock); goto err; } new_conn->av_index = (ep_attr->ep_type == FI_EP_MSG) ? FI_ADDR_NOTAVAIL : index; conn = ofi_idm_lookup(&ep_attr->av_idm, index); if (conn == SOCK_CM_CONN_IN_PROGRESS) { if (ofi_idm_set(&ep_attr->av_idm, index, new_conn) < 0) SOCK_LOG_ERROR("ofi_idm_set failed\n"); conn = new_conn; } fastlock_release(&ep_attr->cmap.lock); return conn; err: ofi_close_socket(conn_fd); return NULL; }