int sock_verify_info(struct fi_info *hints) { uint64_t caps; enum fi_ep_type ep_type; int ret; struct sock_domain *domain; struct sock_fabric *fabric; if (!hints) return 0; ep_type = hints->ep_attr ? hints->ep_attr->type : FI_EP_UNSPEC; switch (ep_type) { case FI_EP_UNSPEC: case FI_EP_MSG: caps = SOCK_EP_MSG_CAP; ret = sock_msg_verify_ep_attr(hints->ep_attr, hints->tx_attr, hints->rx_attr); break; case FI_EP_DGRAM: caps = SOCK_EP_DGRAM_CAP; ret = sock_dgram_verify_ep_attr(hints->ep_attr, hints->tx_attr, hints->rx_attr); break; case FI_EP_RDM: caps = SOCK_EP_RDM_CAP; ret = sock_rdm_verify_ep_attr(hints->ep_attr, hints->tx_attr, hints->rx_attr); break; default: ret = -FI_ENODATA; } if (ret) return ret; if ((caps | hints->caps) != caps) { SOCK_LOG_INFO("Unsupported capabilities\n"); return -FI_ENODATA; } switch (hints->addr_format) { case FI_FORMAT_UNSPEC: case FI_SOCKADDR: case FI_SOCKADDR_IN: break; default: return -FI_ENODATA; } if (hints->domain_attr && hints->domain_attr->domain) { domain = container_of(hints->domain_attr->domain, struct sock_domain, dom_fid); if (!sock_dom_check_list(domain)) { SOCK_LOG_INFO("no matching domain\n"); return -FI_ENODATA; } }
static int sock_ep_cm_send_msg(int sock_fd, const struct sockaddr_in *addr, void *msg, size_t len) { int ret, retry = 0; unsigned char response; struct sockaddr_in from_addr; socklen_t addr_len; char sa_ip[INET_ADDRSTRLEN] = {0}; memcpy(sa_ip, inet_ntoa(addr->sin_addr), INET_ADDRSTRLEN); SOCK_LOG_INFO("Sending message to %s:%d\n", sa_ip, ntohs(addr->sin_port)); while (retry < SOCK_EP_MAX_RETRY) { ret = sendto(sock_fd, (char *)msg, len, 0, (struct sockaddr *) addr, sizeof *addr); SOCK_LOG_INFO("Total Sent: %d\n", ret); if (ret < 0) return -1; ret = fi_poll_fd(sock_fd, SOCK_CM_COMM_TIMEOUT); retry++; if (ret <= 0) { continue; } addr_len = sizeof(struct sockaddr_in); ret = recvfrom(sock_fd, &response, sizeof(response), 0, (struct sockaddr *) &from_addr, &addr_len); SOCK_LOG_INFO("Received ACK: %d\n", ret); if (ret == sizeof(response)) return 0; } return -1; }
int sock_comm_buffer_init(struct sock_conn *conn) { int optval; socklen_t size = SOCK_COMM_BUF_SZ; socklen_t optlen = sizeof(socklen_t); optval = 1; if (setsockopt(conn->sock_fd, IPPROTO_TCP, TCP_NODELAY, &optval, sizeof optval)) SOCK_LOG_ERROR("setsockopt failed\n"); fd_set_nonblock(conn->sock_fd); rbinit(&conn->inbuf, SOCK_COMM_BUF_SZ); rbinit(&conn->outbuf, SOCK_COMM_BUF_SZ); if (setsockopt(conn->sock_fd, SOL_SOCKET, SO_RCVBUF, &size, optlen)) SOCK_LOG_ERROR("setsockopt failed\n"); if (setsockopt(conn->sock_fd, SOL_SOCKET, SO_SNDBUF, &size, optlen)) SOCK_LOG_ERROR("setsockopt failed\n"); if (!getsockopt(conn->sock_fd, SOL_SOCKET, SO_RCVBUF, &size, &optlen)) SOCK_LOG_INFO("SO_RCVBUF: %d\n", size); optlen = sizeof(socklen_t); if (!getsockopt(conn->sock_fd, SOL_SOCKET, SO_SNDBUF, &size, &optlen)) SOCK_LOG_INFO("SO_SNDBUF: %d\n", size); return 0; }
static int sock_rdm_verify_tx_attr(const struct fi_tx_attr *attr) { if (!attr) return 0; if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) { SOCK_LOG_INFO("Unsupported RDM tx caps\n"); return -FI_ENODATA; } if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) { SOCK_LOG_INFO("Unsupported tx message order\n"); return -FI_ENODATA; } if (attr->inject_size > sock_rdm_tx_attr.inject_size) { SOCK_LOG_INFO("Inject size too large\n"); return -FI_ENODATA; } if (attr->size > sock_rdm_tx_attr.size) { SOCK_LOG_INFO("Tx size too large\n"); return -FI_ENODATA; } if (attr->iov_limit > sock_rdm_tx_attr.iov_limit) { SOCK_LOG_INFO("Tx iov limit too large\n"); return -FI_ENODATA; } return 0; }
int sock_comm_buffer_init(struct sock_conn *conn) { int optval; uint64_t flags; socklen_t size = SOCK_COMM_BUF_SZ; socklen_t optlen = sizeof(socklen_t); optval = 1; setsockopt(conn->sock_fd, IPPROTO_TCP, TCP_NODELAY, &optval, sizeof optval); flags = fcntl(conn->sock_fd, F_GETFL, 0); fcntl(conn->sock_fd, F_SETFL, flags | O_NONBLOCK); rbinit(&conn->inbuf, SOCK_COMM_BUF_SZ); rbinit(&conn->outbuf, SOCK_COMM_BUF_SZ); setsockopt(conn->sock_fd, SOL_SOCKET, SO_RCVBUF, &size, optlen); setsockopt(conn->sock_fd, SOL_SOCKET, SO_SNDBUF, &size, optlen); getsockopt(conn->sock_fd, SOL_SOCKET, SO_RCVBUF, &size, &optlen); SOCK_LOG_INFO("SO_RCVBUF: %d\n", size); optlen = sizeof(socklen_t); getsockopt(conn->sock_fd, SOL_SOCKET, SO_SNDBUF, &size, &optlen); SOCK_LOG_INFO("SO_SNDBUF: %d\n", size); return 0; }
static int sock_rdm_verify_rx_attr(const struct fi_rx_attr *attr) { if (!attr) return 0; if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) { SOCK_LOG_INFO("Unsupported RDM rx caps\n"); return -FI_ENODATA; } if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) { SOCK_LOG_INFO("Unsuported rx message order\n"); return -FI_ENODATA; } if (attr->total_buffered_recv > sock_rdm_rx_attr.total_buffered_recv) { SOCK_LOG_INFO("Buffered receive size too large\n"); return -FI_ENODATA; } if (attr->size > sock_rdm_rx_attr.size) { SOCK_LOG_INFO("Rx size too large\n"); return -FI_ENODATA; } if (attr->iov_limit > sock_rdm_rx_attr.iov_limit) { SOCK_LOG_INFO("Rx iov limit too large\n"); return -FI_ENODATA; } return 0; }
int sock_verify_domain_attr(struct fi_domain_attr *attr) { if(!attr) return 0; if(attr->name){ if (strcmp(attr->name, sock_dom_name)) return -FI_ENODATA; } switch(attr->threading){ case FI_THREAD_UNSPEC: case FI_THREAD_SAFE: case FI_THREAD_FID: case FI_THREAD_DOMAIN: case FI_THREAD_COMPLETION: case FI_THREAD_ENDPOINT: break; default: SOCK_LOG_INFO("Invalid threading model!\n"); return -FI_ENODATA; } switch (attr->control_progress){ case FI_PROGRESS_UNSPEC: case FI_PROGRESS_AUTO: case FI_PROGRESS_MANUAL: break; default: SOCK_LOG_INFO("Control progress mode not supported!\n"); return -FI_ENODATA; } switch (attr->data_progress){ case FI_PROGRESS_UNSPEC: case FI_PROGRESS_AUTO: case FI_PROGRESS_MANUAL: break; default: SOCK_LOG_INFO("Data progress mode not supported!\n"); return -FI_ENODATA; } if(attr->cq_data_size > sock_domain_attr.cq_data_size) return -FI_ENODATA; if(attr->ep_cnt > sock_domain_attr.ep_cnt) return -FI_ENODATA; if(attr->max_ep_tx_ctx > sock_domain_attr.max_ep_tx_ctx) return -FI_ENODATA; if(attr->max_ep_rx_ctx > sock_domain_attr.max_ep_rx_ctx) return -FI_ENODATA; return 0; }
int sock_rdm_verify_ep_attr(struct fi_ep_attr *ep_attr, struct fi_tx_attr *tx_attr, struct fi_rx_attr *rx_attr) { int ret; if (ep_attr) { switch (ep_attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_SOCK_TCP: break; default: SOCK_LOG_INFO("Unsupported protocol\n"); return -FI_ENODATA; } if (ep_attr->max_msg_size > sock_rdm_ep_attr.max_msg_size) { SOCK_LOG_INFO("Message size too large\n"); return -FI_ENODATA; } if (ep_attr->max_order_raw_size > sock_rdm_ep_attr.max_order_raw_size) { SOCK_LOG_INFO("RAW order size too large\n"); return -FI_ENODATA; } if (ep_attr->max_order_war_size > sock_rdm_ep_attr.max_order_war_size) { SOCK_LOG_INFO("WAR order size too large\n"); return -FI_ENODATA; } if (ep_attr->max_order_waw_size > sock_rdm_ep_attr.max_order_waw_size) { SOCK_LOG_INFO("WAW order size too large\n"); return -FI_ENODATA; } if ((ep_attr->tx_ctx_cnt > SOCK_EP_MAX_TX_CNT) && ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT) return -FI_ENODATA; if ((ep_attr->rx_ctx_cnt > SOCK_EP_MAX_RX_CNT) && ep_attr->rx_ctx_cnt != FI_SHARED_CONTEXT) return -FI_ENODATA; } ret = sock_rdm_verify_tx_attr(tx_attr); if (ret) return ret; ret = sock_rdm_verify_rx_attr(rx_attr); if (ret) return ret; return 0; }
static ssize_t sock_comm_send_socket(struct sock_conn *conn, const void *buf, size_t len) { ssize_t ret; ret = write(conn->sock_fd, buf, len); if (ret < 0) { SOCK_LOG_INFO("write %s\n", strerror(errno)); ret = 0; } SOCK_LOG_INFO("wrote to network: %lu\n", ret); return ret; }
ssize_t sock_comm_recv_socket(struct sock_conn *conn, void *buf, size_t len) { ssize_t ret; ret = read(conn->sock_fd, buf, len); if (ret < 0) { SOCK_LOG_INFO("read %s\n", strerror(errno)); ret = 0; } SOCK_LOG_INFO("read from network: %lu\n", ret); return ret; }
struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx, size_t len) { struct sock_rx_entry *rx_entry; if (rx_ctx->buffered_len + len >= rx_ctx->attr.total_buffered_recv) { SOCK_LOG_ERROR("Reached max buffered recv limit\n"); return NULL; } rx_entry = calloc(1, sizeof(*rx_entry) + len); if (!rx_entry) return NULL; SOCK_LOG_INFO("New buffered entry:%p len: %lu, ctx: %p\n", rx_entry, len, rx_ctx); rx_entry->is_busy = 1; rx_entry->is_buffered = 1; rx_entry->rx_op.dest_iov_len = 1; rx_entry->iov[0].iov.len = len; rx_entry->iov[0].iov.addr = (uintptr_t) (rx_entry + 1); rx_entry->total_len = len; rx_ctx->buffered_len += len; dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_buffered_list); rx_entry->is_busy = 1; rx_entry->is_tagged = 0; return rx_entry; }
ssize_t sock_comm_send(struct sock_conn *conn, const void *buf, size_t len) { ssize_t ret, used; if (len >= SOCK_COMM_THRESHOLD) { used = rbused(&conn->outbuf); if (used == sock_comm_flush(conn)) { return sock_comm_send_socket(conn, buf, len); } else { return 0; } } if (rbavail(&conn->outbuf) < len) { ret = sock_comm_flush(conn); if (ret <= 0) return 0; } ret = MIN(rbavail(&conn->outbuf), len); rbwrite(&conn->outbuf, buf, ret); rbcommit(&conn->outbuf); SOCK_LOG_INFO("buffered %lu\n", ret); return ret; }
ssize_t sock_comm_recv_socket(struct sock_conn *conn, void *buf, size_t len) { ssize_t ret; ret = recv(conn->sock_fd, buf, len, 0); if (ret <= 0) return 0; SOCK_LOG_INFO("READ from wire: %lu\n", ret); return ret; }
/* FIXME: pool of rx_entry */ struct sock_rx_entry *sock_rx_new_entry(struct sock_rx_ctx *rx_ctx) { struct sock_rx_entry *rx_entry; rx_entry = calloc(1, sizeof(*rx_entry)); if (!rx_entry) return NULL; rx_entry->is_tagged = 0; SOCK_LOG_INFO("New rx_entry: %p, ctx: %p\n", rx_entry, rx_ctx); dlist_init(&rx_entry->entry); fastlock_acquire(&rx_ctx->lock); rx_ctx->num_left--; fastlock_release(&rx_ctx->lock); return rx_entry; }
static ssize_t sock_comm_send_socket(struct sock_conn *conn, const void *buf, size_t len) { ssize_t ret; size_t rem = len; size_t offset = 0, done_len = 0; while(rem > 0) { len = MIN(rem, SOCK_COMM_BUF_SZ); ret = send(conn->sock_fd, (char *)buf + offset, len, 0); if (ret <= 0) break; done_len += ret; rem -= ret; offset += ret; } SOCK_LOG_INFO("WROTE %lu on wire\n", done_len); return done_len; }
static struct fi_info * sock_ep_msg_process_info(struct sock_conn_req *req) { req->info.src_addr = &req->src_addr; req->info.dest_addr = &req->dest_addr; req->info.tx_attr = &req->tx_attr; req->info.rx_attr = &req->rx_attr; req->info.ep_attr = &req->ep_attr; req->info.domain_attr = &req->domain_attr; req->info.fabric_attr = &req->fabric_attr; req->info.domain_attr->name = NULL; req->info.fabric_attr->name = NULL; req->info.fabric_attr->prov_name = NULL; if (sock_verify_info(&req->info)) { SOCK_LOG_INFO("incoming conn_req not supported\n"); errno = EINVAL; return NULL; } return sock_fi_info(FI_EP_MSG, &req->info, req->info.dest_addr, req->info.src_addr); }
static int sock_ep_cm_send_ack(int sock_fd, struct sockaddr_in *addr) { int ack_sent = 0, retry = 0, ret; unsigned char response; while(!ack_sent && retry < SOCK_EP_MAX_RETRY) { ret = sendto(sock_fd, &response, sizeof(response), 0, (struct sockaddr *) addr, sizeof *addr); retry++; SOCK_LOG_INFO("ack: %d\n", ret); if (ret == sizeof(response)) { ack_sent = 1; break; } if (ret == EWOULDBLOCK || ret == EAGAIN) usleep(SOCK_CM_COMM_TIMEOUT * 1000); } return ack_sent; }
ssize_t sock_comm_recv(struct sock_conn *conn, void *buf, size_t len) { int ret = 0; ssize_t used, read_len; used = rbused(&conn->inbuf); if (used == 0) { ret = sock_comm_recv_socket(conn, buf, len); sock_comm_recv_buffer(conn); return ret; } read_len = MIN(len, used); rbread(&conn->inbuf, buf, read_len); if (len > used) { ret = sock_comm_recv_socket(conn, (char*) buf + used, len - used); if (ret <= 0) ret = 0; sock_comm_recv_buffer(conn); } SOCK_LOG_INFO("read from buffer: %lu\n", ret + read_len); return ret + read_len; }
int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info, struct sock_ep **ep, void *context, size_t fclass) { int ret, flags; struct sock_ep *sock_ep; struct sock_tx_ctx *tx_ctx; struct sock_rx_ctx *rx_ctx; struct sock_domain *sock_dom; if (info) { ret = sock_verify_info(info); if (ret) { SOCK_LOG_INFO("Cannot support requested options!\n"); return -FI_EINVAL; } } sock_dom = container_of(domain, struct sock_domain, dom_fid); sock_ep = (struct sock_ep*)calloc(1, sizeof(*sock_ep)); if (!sock_ep) return -FI_ENOMEM; switch (fclass) { case FI_CLASS_EP: sock_ep->ep.fid.fclass = FI_CLASS_EP; sock_ep->ep.fid.context = context; sock_ep->ep.fid.ops = &sock_ep_fi_ops; sock_ep->ep.ops = &sock_ep_ops; sock_ep->ep.cm = &sock_ep_cm_ops; sock_ep->ep.msg = &sock_ep_msg_ops; sock_ep->ep.rma = &sock_ep_rma; sock_ep->ep.tagged = &sock_ep_tagged; sock_ep->ep.atomic = &sock_ep_atomic; break; case FI_CLASS_SEP: sock_ep->ep.fid.fclass = FI_CLASS_SEP; sock_ep->ep.fid.context = context; sock_ep->ep.fid.ops = &sock_ep_fi_ops; sock_ep->ep.ops = &sock_ep_ops; sock_ep->ep.cm = &sock_ep_cm_ops; break; default: goto err; } sock_ep->fclass = fclass; *ep = sock_ep; fastlock_acquire(&sock_dom->lock); fastlock_release(&sock_dom->lock); if (info) { sock_ep->info.caps = info->caps; sock_ep->info.addr_format = FI_SOCKADDR_IN; if (info->ep_attr) { sock_ep->ep_type = info->ep_attr->type; sock_ep->ep_attr.tx_ctx_cnt = info->ep_attr->tx_ctx_cnt; sock_ep->ep_attr.rx_ctx_cnt = info->ep_attr->rx_ctx_cnt; } if (info->src_addr) { sock_ep->src_addr = calloc(1, sizeof(struct sockaddr_in)); memcpy(sock_ep->src_addr, info->src_addr, sizeof(struct sockaddr_in)); } if (info->dest_addr) { sock_ep->dest_addr = calloc(1, sizeof(struct sockaddr_in)); memcpy(sock_ep->dest_addr, info->dest_addr, sizeof(struct sockaddr_in)); } if (info->tx_attr) { sock_ep->tx_attr = *info->tx_attr; sock_ep->op_flags = info->tx_attr->op_flags; sock_ep->tx_attr.size = sock_ep->tx_attr.size ? sock_ep->tx_attr.size : (SOCK_EP_TX_SZ * SOCK_EP_TX_ENTRY_SZ); } if (info->rx_attr) { sock_ep->rx_attr = *info->rx_attr; sock_ep->op_flags |= info->rx_attr->op_flags; sock_ep->rx_attr.total_buffered_recv = sock_ep->rx_attr.total_buffered_recv ? sock_ep->rx_attr.total_buffered_recv : SOCK_EP_MAX_BUFF_RECV; } sock_ep->info.connreq = info->connreq; } atomic_init(&sock_ep->ref, 0); atomic_init(&sock_ep->num_tx_ctx, 0); atomic_init(&sock_ep->num_rx_ctx, 0); if (sock_ep->ep_attr.tx_ctx_cnt == FI_SHARED_CONTEXT) sock_ep->tx_shared = 1; if (sock_ep->ep_attr.rx_ctx_cnt == FI_SHARED_CONTEXT) sock_ep->rx_shared = 1; if (sock_ep->fclass != FI_CLASS_SEP) { sock_ep->ep_attr.tx_ctx_cnt = 1; sock_ep->ep_attr.rx_ctx_cnt = 1; } sock_ep->tx_array = calloc(sock_ep->ep_attr.tx_ctx_cnt, sizeof(struct sock_tx_ctx *)); sock_ep->rx_array = calloc(sock_ep->ep_attr.rx_ctx_cnt, sizeof(struct sock_rx_ctx *)); if (sock_ep->fclass != FI_CLASS_SEP && sock_ep->ep_attr.tx_ctx_cnt != FI_SHARED_CONTEXT) { /* default tx ctx */ tx_ctx = sock_tx_ctx_alloc(&sock_ep->tx_attr, context); tx_ctx->ep = sock_ep; tx_ctx->domain = sock_dom; tx_ctx->tx_id = 0; dlist_insert_tail(&sock_ep->tx_ctx_entry, &tx_ctx->ep_list); sock_ep->tx_array[0] = tx_ctx; sock_ep->tx_ctx = tx_ctx; } if (sock_ep->fclass != FI_CLASS_SEP && sock_ep->ep_attr.rx_ctx_cnt != FI_SHARED_CONTEXT) { /* default rx_ctx */ rx_ctx = sock_rx_ctx_alloc(&sock_ep->rx_attr, context); rx_ctx->ep = sock_ep; rx_ctx->domain = sock_dom; rx_ctx->rx_id = 0; dlist_insert_tail(&sock_ep->rx_ctx_entry, &rx_ctx->ep_list); sock_ep->rx_array[0] = rx_ctx; sock_ep->rx_ctx = rx_ctx; } /* default config */ sock_ep->min_multi_recv = SOCK_EP_MIN_MULTI_RECV; if (info) { memcpy(&sock_ep->info, info, sizeof(struct fi_info)); } sock_ep->domain = sock_dom; if (sock_conn_listen(sock_ep)) goto err; if (sock_ep->ep_type == FI_EP_MSG) { dlist_init(&sock_ep->cm.msg_list); if (socketpair(AF_UNIX, SOCK_STREAM, 0, sock_ep->cm.signal_fds) < 0) goto err; flags = fcntl(sock_ep->cm.signal_fds[1], F_GETFL, 0); if (fcntl(sock_ep->cm.signal_fds[1], F_SETFL, flags | O_NONBLOCK)) SOCK_LOG_ERROR("fcntl failed"); } atomic_inc(&sock_dom->ref); return 0; err: free(sock_ep); return -FI_EINVAL; }
int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info, struct fid_pep **pep, void *context) { int ret, flags; struct sock_pep *_pep; char hostname[HOST_NAME_MAX]; struct addrinfo sock_hints; struct addrinfo *result = NULL; if (info) { ret = sock_verify_info(info); if (ret) { SOCK_LOG_INFO("Cannot support requested options!\n"); return -FI_EINVAL; } } _pep = (struct sock_pep*)calloc(1, sizeof(*_pep)); if (!_pep) return -FI_ENOMEM; if(info) { if (info->src_addr) { memcpy(&_pep->src_addr, info->src_addr, sizeof(struct sockaddr_in)); } else { gethostname(hostname, HOST_NAME_MAX); memset(&sock_hints, 0, sizeof(struct addrinfo)); sock_hints.ai_family = AF_INET; sock_hints.ai_socktype = SOCK_STREAM; ret = getaddrinfo(hostname, NULL, &sock_hints, &result); if (ret != 0) { ret = FI_EINVAL; SOCK_LOG_INFO("getaddrinfo failed!\n"); goto err; } memcpy(&_pep->src_addr, result->ai_addr, result->ai_addrlen); } _pep->info = *info; } else { SOCK_LOG_ERROR("invalid fi_info\n"); goto err; } if(socketpair(AF_UNIX, SOCK_STREAM, 0, _pep->signal_fds) < 0) goto err; flags = fcntl(_pep->signal_fds[1], F_GETFL, 0); fcntl(_pep->signal_fds[1], F_SETFL, flags | O_NONBLOCK); _pep->pep.fid.fclass = FI_CLASS_PEP; _pep->pep.fid.context = context; _pep->pep.fid.ops = &sock_pep_fi_ops; _pep->pep.cm = &sock_pep_cm_ops; _pep->pep.ops = NULL; _pep->sock_fab = container_of(fabric, struct sock_fabric, fab_fid); *pep = &_pep->pep; return 0; err: free(_pep); return ret; }
static int sock_ep_close(struct fid *fid) { struct sock_ep *sock_ep; char c = 0; switch(fid->fclass) { case FI_CLASS_EP: sock_ep = container_of(fid, struct sock_ep, ep.fid); break; case FI_CLASS_SEP: sock_ep = container_of(fid, struct sock_ep, ep.fid); break; default: return -FI_EINVAL; } if (atomic_get(&sock_ep->ref) || atomic_get(&sock_ep->num_rx_ctx) || atomic_get(&sock_ep->num_tx_ctx)) return -FI_EBUSY; if (sock_ep->fclass != FI_CLASS_SEP && !sock_ep->tx_shared) { sock_pe_remove_tx_ctx(sock_ep->tx_array[0]); sock_tx_ctx_free(sock_ep->tx_array[0]); } if (sock_ep->fclass != FI_CLASS_SEP && !sock_ep->rx_shared) { sock_pe_remove_rx_ctx(sock_ep->rx_array[0]); sock_rx_ctx_free(sock_ep->rx_array[0]); } free(sock_ep->tx_array); free(sock_ep->rx_array); if (sock_ep->src_addr) free(sock_ep->src_addr); if (sock_ep->dest_addr) free(sock_ep->dest_addr); if (sock_ep->ep_type == FI_EP_MSG) { sock_ep->cm.do_listen = 0; if (write(sock_ep->cm.signal_fds[0], &c, 1) != 1) { SOCK_LOG_INFO("Failed to signal\n"); } if (sock_ep->cm.listener_thread && pthread_join(sock_ep->cm.listener_thread, NULL)) { SOCK_LOG_ERROR("pthread join failed (%d)\n", errno); } close(sock_ep->cm.signal_fds[0]); close(sock_ep->cm.signal_fds[1]); } sock_ep->listener.do_listen = 0; if (write(sock_ep->listener.signal_fds[0], &c, 1) != 1) { SOCK_LOG_INFO("Failed to signal\n"); } if (pthread_join(sock_ep->listener.listener_thread, NULL)) { SOCK_LOG_ERROR("pthread join failed (%d)\n", errno); } close(sock_ep->listener.signal_fds[0]); close(sock_ep->listener.signal_fds[1]); sock_fabric_remove_service(sock_ep->domain->fab, atoi(sock_ep->listener.service)); atomic_dec(&sock_ep->domain->ref); free(sock_ep); return 0; }
int sock_msg_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { int ret; int udp_sock; socklen_t len; struct fi_info *_info; struct addrinfo sock_hints; struct addrinfo *result = NULL; struct sockaddr_in *src_addr = NULL, *dest_addr = NULL; char sa_ip[INET_ADDRSTRLEN]; char hostname[HOST_NAME_MAX]; if (!info) return -FI_EBADFLAGS; *info = NULL; if (!node && !service && !hints) return -FI_EBADFLAGS; if (version != FI_VERSION(SOCK_MAJOR_VERSION, SOCK_MINOR_VERSION)) return -FI_ENODATA; if (hints) { if ((SOCK_EP_MSG_CAP | hints->caps) != SOCK_EP_MSG_CAP) { SOCK_LOG_INFO( "Cannot support requested options!\n"); return -FI_ENODATA; } ret = sock_msg_verify_rx_attr(hints->rx_attr); if (ret) return ret; ret = sock_msg_verify_tx_attr(hints->tx_attr); if (ret) return ret; } memset(&sock_hints, 0, sizeof(struct addrinfo)); sock_hints.ai_family = AF_INET; sock_hints.ai_socktype = SOCK_STREAM; if (flags & FI_NUMERICHOST) sock_hints.ai_flags |= AI_NUMERICHOST; if ((flags & FI_SOURCE) || !node) { if (!node) { gethostname(hostname, HOST_NAME_MAX); } ret = getaddrinfo(node ? node : hostname, service, &sock_hints, &result); if (ret != 0) { ret = FI_ENODATA; SOCK_LOG_INFO("getaddrinfo failed!\n"); goto err; } while (result) { if (result->ai_family == AF_INET && result->ai_addrlen == sizeof(struct sockaddr_in)) break; result = result->ai_next; } if (!result) { SOCK_LOG_ERROR("getaddrinfo failed\n"); ret = -FI_EINVAL; goto err; } src_addr = calloc(1, sizeof(struct sockaddr_in)); if (!src_addr) { ret = -FI_ENOMEM; goto err; } memcpy(src_addr, result->ai_addr, result->ai_addrlen); freeaddrinfo(result); } else if (node || service) { ret = getaddrinfo(node, service, &sock_hints, &result); if (ret != 0) { ret = FI_ENODATA; SOCK_LOG_INFO("getaddrinfo failed!\n"); goto err; } while (result) { if (result->ai_family == AF_INET && result->ai_addrlen == sizeof(struct sockaddr_in)) break; result = result->ai_next; } if (!result) { SOCK_LOG_ERROR("getaddrinfo failed\n"); ret = -FI_EINVAL; goto err; } dest_addr = calloc(1, sizeof(struct sockaddr_in)); if (!dest_addr) { ret = -FI_ENOMEM; goto err; } memcpy(dest_addr, result->ai_addr, result->ai_addrlen); udp_sock = socket(AF_INET, SOCK_DGRAM, 0); ret = connect(udp_sock, result->ai_addr, result->ai_addrlen); if ( ret != 0) { SOCK_LOG_ERROR("Failed to create udp socket\n"); ret = FI_ENODATA; goto err; } len = sizeof(struct sockaddr_in); src_addr = calloc(1, sizeof(struct sockaddr_in)); if (!src_addr) { ret = -FI_ENOMEM; goto err; } ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len); if (ret != 0) { SOCK_LOG_ERROR("getsockname failed\n"); close(udp_sock); ret = FI_ENODATA; goto err; } close(udp_sock); freeaddrinfo(result); } if (hints->src_addr) { assert(hints->src_addrlen == sizeof(struct sockaddr_in)); memcpy(src_addr, hints->src_addr, hints->src_addrlen); } if (hints->dest_addr) { if (!dest_addr) { dest_addr = calloc(1, sizeof(struct sockaddr_in)); if (!dest_addr) { ret = -FI_ENOMEM; goto err; } } assert(hints->dest_addrlen == sizeof(struct sockaddr_in)); memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen); } if (dest_addr) { if (!dest_addr) { dest_addr = calloc(1, sizeof(struct sockaddr_in)); if (!dest_addr) { ret = -FI_ENOMEM; goto err; } } memcpy(sa_ip, inet_ntoa(dest_addr->sin_addr), INET_ADDRSTRLEN); SOCK_LOG_INFO("dest_addr: family: %d, IP is %s\n", ((struct sockaddr_in*)dest_addr)->sin_family, sa_ip); } if (src_addr) { if (!src_addr) { src_addr = calloc(1, sizeof(struct sockaddr_in)); if (!src_addr) { ret = -FI_ENOMEM; goto err; } } memcpy(sa_ip, inet_ntoa(src_addr->sin_addr), INET_ADDRSTRLEN); SOCK_LOG_INFO("src_addr: family: %d, IP is %s\n", ((struct sockaddr_in*)src_addr)->sin_family, sa_ip); } _info = sock_msg_fi_info(hints, src_addr, dest_addr); if (!_info) { ret = FI_ENOMEM; goto err; } *info = _info; if (src_addr) free(src_addr); if (dest_addr) free(dest_addr); return 0; err: if (src_addr) free(src_addr); if (dest_addr) free(dest_addr); SOCK_LOG_ERROR("fi_getinfo failed\n"); return ret; }
static ssize_t sock_ep_tx_atomic(struct fid_ep *ep, const struct fi_msg_atomic *msg, const struct fi_ioc *comparev, void **compare_desc, size_t compare_count, struct fi_ioc *resultv, void **result_desc, size_t result_count, uint64_t flags) { int i, ret; size_t datatype_sz; struct sock_op tx_op; union sock_iov tx_iov; struct sock_conn *conn; struct sock_tx_ctx *tx_ctx; uint64_t total_len, src_len, dst_len; struct sock_ep *sock_ep; switch (ep->fid.fclass) { case FI_CLASS_EP: sock_ep = container_of(ep, struct sock_ep, ep); tx_ctx = sock_ep->tx_ctx; break; case FI_CLASS_TX_CTX: tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx); sock_ep = tx_ctx->ep; break; default: SOCK_LOG_ERROR("Invalid EP type\n"); return -FI_EINVAL; } assert(tx_ctx->enabled && msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT && msg->rma_iov_count <= SOCK_EP_MAX_IOV_LIMIT); if (sock_ep->connected) { conn = sock_ep_lookup_conn(sock_ep); } else { conn = sock_av_lookup_addr(sock_ep, tx_ctx->av, msg->addr); } if (!conn) return -FI_EAGAIN; src_len = 0; datatype_sz = fi_datatype_size(msg->datatype); if (flags & FI_INJECT) { for (i=0; i< msg->iov_count; i++) { src_len += (msg->msg_iov[i].count * datatype_sz); } assert(src_len <= SOCK_EP_MAX_INJECT_SZ); total_len = src_len; } else { total_len = msg->iov_count * sizeof(union sock_iov); } total_len += (sizeof(tx_op) + (msg->rma_iov_count * sizeof(union sock_iov)) + (result_count * sizeof (union sock_iov))); sock_tx_ctx_start(tx_ctx); if (rbfdavail(&tx_ctx->rbfd) < total_len) { ret = -FI_EAGAIN; goto err; } flags |= tx_ctx->attr.op_flags; memset(&tx_op, 0, sizeof(tx_op)); tx_op.op = SOCK_OP_ATOMIC; tx_op.dest_iov_len = msg->rma_iov_count; tx_op.atomic.op = msg->op; tx_op.atomic.datatype = msg->datatype; tx_op.atomic.res_iov_len = result_count; tx_op.atomic.cmp_iov_len = compare_count; if (flags & FI_INJECT) tx_op.src_iov_len = src_len; else tx_op.src_iov_len = msg->iov_count; sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, (uintptr_t) msg->context, msg->addr, (uintptr_t) msg->msg_iov[0].addr, sock_ep, conn); if (flags & FI_REMOTE_CQ_DATA) { sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t)); } src_len = 0; if (flags & FI_INJECT) { for (i=0; i< msg->iov_count; i++) { sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].addr, msg->msg_iov[i].count * datatype_sz); src_len += (msg->msg_iov[i].count * datatype_sz); } } else { for (i = 0; i< msg->iov_count; i++) { tx_iov.ioc.addr = (uintptr_t) msg->msg_iov[i].addr; tx_iov.ioc.count = msg->msg_iov[i].count; tx_iov.ioc.key = (uintptr_t) msg->desc[i]; sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); src_len += (tx_iov.ioc.count * datatype_sz); } } assert(src_len <= SOCK_EP_MAX_ATOMIC_SZ); dst_len = 0; for (i = 0; i< msg->rma_iov_count; i++) { tx_iov.ioc.addr = msg->rma_iov[i].addr; tx_iov.ioc.key = msg->rma_iov[i].key; tx_iov.ioc.count = msg->rma_iov[i].count; sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); dst_len += (tx_iov.ioc.count * datatype_sz); } if (dst_len != src_len) { SOCK_LOG_ERROR("Buffer length mismatch\n"); ret = -FI_EINVAL; goto err; } dst_len = 0; for (i = 0; i< result_count; i++) { tx_iov.ioc.addr = (uintptr_t) resultv[i].addr; tx_iov.ioc.count = resultv[i].count; sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); dst_len += (tx_iov.ioc.count * datatype_sz); } if (result_count && (dst_len != src_len)) { SOCK_LOG_ERROR("Buffer length mismatch\n"); ret = -FI_EINVAL; goto err; } dst_len = 0; for (i = 0; i< compare_count; i++) { tx_iov.ioc.addr = (uintptr_t) comparev[i].addr; tx_iov.ioc.count = comparev[i].count; sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); dst_len += (tx_iov.ioc.count * datatype_sz); } if (compare_count && (dst_len != src_len)) { SOCK_LOG_ERROR("Buffer length mismatch\n"); ret = -FI_EINVAL; goto err; } sock_tx_ctx_commit(tx_ctx); return 0; err: SOCK_LOG_INFO("Not enough space for TX entry, try again\n"); sock_tx_ctx_abort(tx_ctx); return ret; }
static void *sock_msg_ep_listener_thread (void *data) { struct sock_ep *ep = (struct sock_ep *)data; struct sock_conn_response *conn_response = NULL; struct fi_eq_cm_entry cm_entry; struct fi_eq_err_entry cm_err_entry; struct sockaddr_in from_addr; socklen_t addr_len; int ret, user_data_sz; struct fid_ep *fid_ep; struct sock_ep *sock_ep; SOCK_LOG_INFO("Starting listener thread for EP: %p\n", ep); ep->do_listen = 1; while((volatile int)ep->do_listen) { ret = fi_poll_fd(ep->socket, -1); if (ret <= 0) continue; if (conn_response == NULL) { conn_response = (struct sock_conn_response*) calloc(1, sizeof(*conn_response) + SOCK_EP_MAX_CM_DATA_SZ); if (!conn_response) { SOCK_LOG_ERROR("cannot allocate\n"); return NULL; } } addr_len = sizeof(struct sockaddr_in); ret = recvfrom(ep->socket, (char*)conn_response, sizeof(*conn_response) + SOCK_EP_MAX_CM_DATA_SZ, 0, (struct sockaddr *) &from_addr, &addr_len); if (ret <= 0) continue; SOCK_LOG_INFO("Total received: %d\n", ret); if (ret < sizeof(*conn_response) || !sock_ep_cm_send_ack(ep->socket, &from_addr)) continue; user_data_sz = 0; switch (conn_response->hdr.type) { case SOCK_CONN_ACCEPT: SOCK_LOG_INFO("Received SOCK_CONN_ACCEPT\n"); memset(&cm_entry, 0, sizeof(cm_entry)); cm_entry.fid = conn_response->hdr.c_fid; if (ret > sizeof(struct sock_conn_response)) { user_data_sz = ret - sizeof(struct sock_conn_response); memcpy(&cm_entry.data, (char *)conn_response + sizeof(struct sock_conn_response), user_data_sz); } fid_ep = container_of(conn_response->hdr.c_fid, struct fid_ep, fid); sock_ep = container_of(fid_ep, struct sock_ep, ep); sock_ep->connected = 1; sock_ep_enable(&ep->ep); if (sock_eq_report_event(ep->eq, FI_CONNECTED, &cm_entry, sizeof(cm_entry) + user_data_sz, 0)) SOCK_LOG_ERROR("Error in writing to EQ\n"); break; case SOCK_CONN_REJECT: SOCK_LOG_INFO("Received SOCK_CONN_REJECT\n"); memset(&cm_err_entry, 0, sizeof(cm_err_entry)); cm_err_entry.fid = conn_response->hdr.c_fid; cm_err_entry.context = NULL; cm_err_entry.data = 0; cm_err_entry.err = -FI_ECONNREFUSED; cm_err_entry.prov_errno = 0; cm_err_entry.err_data = NULL; if (ret > sizeof(struct sock_conn_response)) { user_data_sz = ret - sizeof(struct sock_conn_response); memcpy(&cm_entry.data, (char *)conn_response + sizeof(struct sock_conn_response), user_data_sz); } if (sock_eq_report_event(ep->eq, FI_ECONNREFUSED, &cm_err_entry, sizeof (cm_err_entry) + user_data_sz, 0)) SOCK_LOG_ERROR("Error in writing to EQ\n"); goto out; default: SOCK_LOG_ERROR("Invalid event\n"); break; } conn_response = NULL; } out: if (conn_response) free(conn_response); close(ep->socket); ep->socket = 0; return NULL; }
void sock_rx_release_entry(struct sock_rx_entry *rx_entry) { SOCK_LOG_INFO("Releasing rx_entry: %p\n", rx_entry); free(rx_entry); }
static void *sock_pep_listener_thread (void *data) { struct sock_pep *pep = (struct sock_pep *)data; struct sock_conn_req *conn_req = NULL; struct fi_eq_cm_entry cm_entry; struct sockaddr_in from_addr; struct pollfd poll_fds[2]; socklen_t addr_len; int ret, user_data_sz, tmp; SOCK_LOG_INFO("Starting listener thread for PEP: %p\n", pep); poll_fds[0].fd = pep->socket; poll_fds[1].fd = pep->signal_fds[1]; poll_fds[0].events = poll_fds[1].events = POLLIN; while((volatile int)pep->do_listen) { if (poll(poll_fds, 2, -1) > 0) { if (poll_fds[1].revents & POLLIN) { read(pep->signal_fds[1], &tmp, 1); continue; } } else return NULL; if (conn_req == NULL) { conn_req = (struct sock_conn_req*)calloc(1, sizeof(*conn_req) + SOCK_EP_MAX_CM_DATA_SZ); if (!conn_req) { SOCK_LOG_ERROR("cannot allocate\n"); return NULL; } } addr_len = sizeof(struct sockaddr_in); ret = recvfrom(pep->socket, (char*)conn_req, sizeof(*conn_req) + SOCK_EP_MAX_CM_DATA_SZ, 0, (struct sockaddr *) &from_addr, &addr_len); if (ret <= 0) continue; memcpy(&conn_req->from_addr, &from_addr, sizeof(struct sockaddr_in)); SOCK_LOG_INFO("Msg received: %d\n", ret); memset(&cm_entry, 0, sizeof(cm_entry)); user_data_sz = 0; if (conn_req->hdr.type == SOCK_CONN_REQ) { SOCK_LOG_INFO("Received SOCK_CONN_REQ\n"); if (ret < sizeof(*conn_req) || !sock_ep_cm_send_ack(pep->socket, &from_addr)) { SOCK_LOG_ERROR("Invalid connection request\n"); break; } cm_entry.info = sock_ep_msg_process_info(conn_req); cm_entry.info->connreq = (fi_connreq_t)conn_req; if (ret > sizeof(struct sock_conn_req)) { user_data_sz = ret - sizeof(struct sock_conn_req); memcpy(&cm_entry.data, (char *)conn_req + sizeof(struct sock_conn_req), user_data_sz); } if (sock_eq_report_event(pep->eq, FI_CONNREQ, &cm_entry, sizeof(cm_entry) + user_data_sz, 0)) SOCK_LOG_ERROR("Error in writing to EQ\n"); } else { SOCK_LOG_ERROR("Invalid event\n"); } conn_req = NULL; } if (conn_req) free(conn_req); close(pep->socket); pep->socket = 0; return NULL; }
static int sock_pep_create_listener_thread(struct sock_pep *pep) { int optval, ret; socklen_t addr_size; struct sockaddr_in addr; struct addrinfo *s_res = NULL, *p; struct addrinfo hints; char sa_ip[INET_ADDRSTRLEN] = {0}; char sa_port[NI_MAXSERV] = {0}; pep->do_listen = 1; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_INET; hints.ai_socktype = SOCK_DGRAM; hints.ai_flags = AI_PASSIVE; hints.ai_protocol = IPPROTO_UDP; memcpy(sa_ip, inet_ntoa(pep->src_addr.sin_addr), INET_ADDRSTRLEN); sprintf(sa_port, "%d", ntohs(pep->src_addr.sin_port)); ret = getaddrinfo(sa_ip, sa_port, &hints, &s_res); if (ret) { SOCK_LOG_ERROR("no available AF_INET address service:%s, %s\n", sa_port, gai_strerror(ret)); return -FI_EINVAL; } for (p=s_res; p; p=p->ai_next) { pep->socket = socket(p->ai_family, p->ai_socktype, p->ai_protocol); if (pep->socket >= 0) { optval = 1; setsockopt(pep->socket, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval); if (!bind(pep->socket, s_res->ai_addr, s_res->ai_addrlen)) break; close(pep->socket); pep->socket = -1; } } freeaddrinfo(s_res); if (pep->socket < 0) return -FI_EIO; optval = 1; setsockopt(pep->socket, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval); if (pep->src_addr.sin_port == 0) { addr_size = sizeof(addr); if (getsockname(pep->socket, (struct sockaddr*)&addr, &addr_size)) return -FI_EINVAL; pep->src_addr.sin_port = addr.sin_port; } SOCK_LOG_INFO("Listener thread bound to %s:%d\n", sa_ip, ntohs(pep->src_addr.sin_port)); if (pthread_create(&pep->listener_thread, NULL, sock_pep_listener_thread, (void *)pep)) { SOCK_LOG_ERROR("Couldn't create listener thread\n"); return -FI_EINVAL; } return 0; }