static void handle_connreq(struct poll_fd_mgr *poll_mgr, struct poll_fd_info *poll_info) { struct tcpx_conn_handle *handle; struct tcpx_pep *pep; struct fi_eq_cm_entry *cm_entry; struct ofi_ctrl_hdr conn_req; SOCKET sock; int ret; assert(poll_info->fid->fclass == FI_CLASS_PEP); pep = container_of(poll_info->fid, struct tcpx_pep, util_pep.pep_fid.fid); sock = accept(pep->sock, NULL, 0); if (sock < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "accept error: %d\n", ofi_sockerr()); return; } ret = rx_cm_data(sock, &conn_req, ofi_ctrl_connreq, poll_info); if (ret) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "cm data recv failed \n"); goto err1; } handle = calloc(1, sizeof(*handle)); if (!handle) goto err1; cm_entry = calloc(1, sizeof(*cm_entry) + poll_info->cm_data_sz); if (!cm_entry) goto err2; handle->conn_fd = sock; cm_entry->fid = poll_info->fid; cm_entry->info = fi_dupinfo(&pep->info); if (!cm_entry->info) goto err3; cm_entry->info->handle = &handle->handle; memcpy(cm_entry->data, poll_info->cm_data, poll_info->cm_data_sz); ret = (int) fi_eq_write(&pep->util_pep.eq->eq_fid, FI_CONNREQ, cm_entry, sizeof(*cm_entry) + poll_info->cm_data_sz, 0); if (ret < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n"); goto err4; } free(cm_entry); return; err4: fi_freeinfo(cm_entry->info); err3: free(cm_entry); err2: free(handle); err1: ofi_close_socket(sock); }
static int rxm_ep_msg_res_close(struct rxm_ep *rxm_ep) { int ret, retv = 0; ret = fi_close(&rxm_ep->msg_cq->fid); if (ret) { FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Unable to close msg CQ\n"); retv = ret; } ret = fi_close(&rxm_ep->srx_ctx->fid); if (ret) { FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Unable to close msg shared ctx\n"); retv = ret; } ret = fi_close(&rxm_ep->msg_pep->fid); if (ret) { FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Unable to close msg passive EP\n"); retv = ret; } fi_freeinfo(rxm_ep->msg_info); return retv; }
static int smr_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) { struct smr_ep *ep; struct util_av *av; int ret = 0; ep = container_of(ep_fid, struct smr_ep, util_ep.ep_fid.fid); switch (bfid->fclass) { case FI_CLASS_AV: av = container_of(bfid, struct util_av, av_fid.fid); ret = ofi_ep_bind_av(&ep->util_ep, av); if (ret) { FI_WARN(&smr_prov, FI_LOG_EP_CTRL, "duplicate AV binding\n"); return -FI_EINVAL; } break; case FI_CLASS_CQ: ret = smr_ep_bind_cq(ep, container_of(bfid, struct util_cq, cq_fid.fid), flags); break; case FI_CLASS_EQ: break; default: FI_WARN(&smr_prov, FI_LOG_EP_CTRL, "invalid fid class\n"); ret = -FI_EINVAL; break; } return ret; }
static int ip_av_remove(struct fid_av *av_fid, fi_addr_t *fi_addr, size_t count, uint64_t flags) { struct util_av *av; int i, slot, index, ret; av = container_of(av_fid, struct util_av, av_fid); if (flags) { FI_WARN(av->prov, FI_LOG_AV, "invalid flags\n"); return -FI_EINVAL; } /* * It's more efficient to remove addresses from high to low index. * We assume that addresses are removed in the same order that they were * added -- i.e. fi_addr passed in here was also passed into insert. * Thus, we walk through the array backwards. */ for (i = count - 1; i >= 0; i--) { index = (int) fi_addr[i]; slot = ip_av_slot(av, ip_av_get_addr(av, index)); ret = fi_av_remove_addr(av, slot, index); if (ret) { FI_WARN(av->prov, FI_LOG_AV, "removal of fi_addr %d failed\n", index); } } return 0; }
static struct ofi_prov *ofi_create_prov_entry(const char *prov_name) { struct ofi_prov *prov = NULL; prov = calloc(sizeof *prov, 1); if (!prov) { FI_WARN(&core_prov, FI_LOG_CORE, "Not enough memory to allocate provider registry\n"); return NULL; } prov->prov_name = strdup(prov_name); if (!prov->prov_name) { FI_WARN(&core_prov, FI_LOG_CORE, "Failed to init pre-registered provider name\n"); free(prov); return NULL; } if (prov_tail) prov_tail->next = prov; else prov_head = prov; prov_tail = prov; return prov; }
static int util_verify_av_attr(struct util_domain *domain, const struct fi_av_attr *attr, const struct util_av_attr *util_attr) { switch (attr->type) { case FI_AV_MAP: case FI_AV_TABLE: if ((domain->av_type != FI_AV_UNSPEC) && (attr->type != domain->av_type)) { FI_INFO(domain->prov, FI_LOG_AV, "Invalid AV type\n"); return -FI_EINVAL; } break; default: FI_WARN(domain->prov, FI_LOG_AV, "invalid av type\n"); return -FI_EINVAL; } if (attr->flags & ~(FI_EVENT | FI_READ | FI_SYMMETRIC)) { FI_WARN(domain->prov, FI_LOG_AV, "invalid flags\n"); return -FI_EINVAL; } if (util_attr->flags & ~(FI_SOURCE)) { FI_WARN(domain->prov, FI_LOG_AV, "invalid internal flags\n"); return -FI_EINVAL; } if (util_attr->addrlen < sizeof(int)) { FI_WARN(domain->prov, FI_LOG_AV, "unsupported address size\n"); return -FI_ENOSYS; } return 0; }
static int rxm_ep_ctrl(struct fid *fid, int command, void *arg) { struct rxm_ep *rxm_ep; struct rxm_fabric *rxm_fabric; int ret; rxm_ep = container_of(fid, struct rxm_ep, util_ep.ep_fid.fid); rxm_fabric = container_of(rxm_ep->util_ep.domain->fabric, struct rxm_fabric, util_fabric); switch (command) { case FI_ENABLE: if (!rxm_ep->util_ep.rx_cq || !rxm_ep->util_ep.tx_cq) return -FI_ENOCQ; if (!rxm_ep->util_ep.av) return -FI_EOPBADSTATE; /* TODO: Add FI_ENOAV */ ret = fi_pep_bind(rxm_ep->msg_pep, &rxm_fabric->msg_eq->fid, 0); if (ret) { FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Unable to bind msg PEP to msg EQ\n"); return ret; } ret = fi_listen(rxm_ep->msg_pep); if (ret) { FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Unable to set msg PEP to listen state\n"); return ret; } break; default: return -FI_ENOSYS; } return 0; }
static void psmx2_string_to_uuid(const char *s, psm2_uuid_t uuid) { int n; if (!s) { memset(uuid, 0, sizeof(psm2_uuid_t)); return; } n = sscanf(s, "%2hhx%2hhx%2hhx%2hhx-" "%2hhx%2hhx-%2hhx%2hhx-%2hhx%2hhx-" "%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx", &uuid[0], &uuid[1], &uuid[2], &uuid[3], &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9], &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14], &uuid[15]); if (n != 16) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "wrong uuid format: %s\n", s); FI_WARN(&psmx2_prov, FI_LOG_CORE, "correct uuid format is: " "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\n"); } }
int rxm_ep_recv_common(struct fid_ep *ep_fid, const struct iovec *iov, void **desc, size_t count, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context, uint64_t flags, int op) { struct rxm_recv_entry *recv_entry; struct rxm_ep *rxm_ep; struct rxm_recv_queue *recv_queue; dlist_func_t *match; int ret, i; rxm_ep = container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid); // TODO pass recv_queue as arg if (op == ofi_op_msg) { recv_queue = &rxm_ep->recv_queue; match = ofi_match_unexp_msg; } else if (op == ofi_op_tagged) { recv_queue = &rxm_ep->trecv_queue; match = ofi_match_unexp_msg_tagged; } else { FI_WARN(&rxm_prov, FI_LOG_EP_DATA, "Unknown op!\n"); return -FI_EINVAL; } if (freestack_isempty(recv_queue->recv_fs)) { FI_DBG(&rxm_prov, FI_LOG_CQ, "Exhaused recv_entry freestack\n"); return -FI_EAGAIN; } recv_entry = freestack_pop(recv_queue->recv_fs); for (i = 0; i < count; i++) { recv_entry->iov[i].iov_base = iov[i].iov_base; recv_entry->iov[i].iov_len = iov[i].iov_len; recv_entry->desc[i] = desc[i]; FI_DBG(&rxm_prov, FI_LOG_EP_CTRL, "post recv: %u\n", iov[i].iov_len); } recv_entry->count = count; recv_entry->addr = (rxm_ep->rxm_info->caps & FI_DIRECTED_RECV) ? src_addr : FI_ADDR_UNSPEC; recv_entry->flags = flags; if (op == ofi_op_tagged) { recv_entry->tag = tag; recv_entry->ignore = ignore; } if (!dlist_empty(&recv_queue->unexp_msg_list)) { ret = rxm_check_unexp_msg_list(rxm_ep->util_ep.rx_cq, recv_queue, recv_entry, match); if (ret) { FI_WARN(&rxm_prov, FI_LOG_EP_DATA, "Unable to check unexp msg list\n"); return ret; } } dlist_insert_tail(&recv_entry->entry, &recv_queue->recv_list); return 0; }
static int rxm_cq_write_error_trunc(struct rxm_rx_buf *rx_buf, size_t done_len) { int ret; if (rx_buf->ep->util_ep.flags & OFI_CNTR_ENABLED) rxm_cntr_incerr(rx_buf->ep->util_ep.rx_cntr); FI_WARN(&rxm_prov, FI_LOG_CQ, "Message truncated: " "recv buf length: %zu message length: %" PRIu64 "\n", done_len, rx_buf->pkt.hdr.size); ret = ofi_cq_write_error_trunc(rx_buf->ep->util_ep.rx_cq, rx_buf->recv_entry->context, rx_buf->recv_entry->comp_flags | rxm_cq_get_rx_comp_flags(rx_buf), rx_buf->pkt.hdr.size, rx_buf->recv_entry->rxm_iov.iov[0].iov_base, rx_buf->pkt.hdr.data, rx_buf->pkt.hdr.tag, rx_buf->pkt.hdr.size - done_len); if (OFI_UNLIKELY(ret)) { FI_WARN(&rxm_prov, FI_LOG_CQ, "Unable to write recv error CQ\n"); return ret; } return 0; }
static void psmx_set_epaddr_context(struct psmx_fid_domain *domain, psm_epid_t epid, psm_epaddr_t epaddr) { struct psmx_epaddr_context *context; context = (void *)psm_epaddr_getctxt(epaddr); if (context) { if (context->domain != domain || context->epid != epid) { FI_WARN(&psmx_prov, FI_LOG_AV, "domain or epid doesn't match\n"); context = NULL; } } if (context) return; context = malloc(sizeof *context); if (!context) { FI_WARN(&psmx_prov, FI_LOG_AV, "cannot allocate context\n"); return; } context->domain = domain; context->epid = epid; psm_epaddr_setctxt(epaddr, context); }
int DEFAULT_SYMVER_PRE(fi_getinfo)(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { struct fi_prov *prov; struct fi_info *tail, *cur; int ret; if (!init) fi_ini(); if (FI_VERSION_LT(fi_version(), version)) { FI_WARN(&core_prov, FI_LOG_CORE, "Requested version is newer than library\n"); return -FI_ENOSYS; } if (flags == FI_PROV_ATTR_ONLY) { return fi_getprovinfo(info); } *info = tail = NULL; for (prov = prov_head; prov; prov = prov->next) { if (!prov->provider->getinfo) continue; if (hints && hints->fabric_attr && hints->fabric_attr->prov_name && strcasecmp(prov->provider->name, hints->fabric_attr->prov_name)) continue; ret = prov->provider->getinfo(version, node, service, flags, hints, &cur); if (ret) { FI_WARN(&core_prov, FI_LOG_CORE, "fi_getinfo: provider %s returned -%d (%s)\n", prov->provider->name, -ret, fi_strerror(-ret)); continue; } if (!*info) *info = cur; else tail->next = cur; for (tail = cur; tail->next; tail = tail->next) { if (tail->fabric_attr->prov_name != NULL) FI_WARN(&core_prov, FI_LOG_CORE, "prov_name field is not NULL (%s)\n", tail->fabric_attr->prov_name); tail->fabric_attr->prov_name = strdup(prov->provider->name); tail->fabric_attr->prov_version = prov->provider->version; } if (tail->fabric_attr->prov_name != NULL) FI_WARN(&core_prov, FI_LOG_CORE, "prov_name field is not NULL (%s)\n", tail->fabric_attr->prov_name); tail->fabric_attr->prov_name = strdup(prov->provider->name); tail->fabric_attr->prov_version = prov->provider->version; } return *info ? 0 : -FI_ENODATA; }
int tcpx_conn_mgr_init(struct tcpx_fabric *tcpx_fabric) { int ret; dlist_init(&tcpx_fabric->poll_mgr.list); fastlock_init(&tcpx_fabric->poll_mgr.lock); ret = fd_signal_init(&tcpx_fabric->poll_mgr.signal); if (ret) { FI_WARN(&tcpx_prov, FI_LOG_FABRIC,"signal init failed\n"); goto err; } tcpx_fabric->poll_mgr.run = 1; ret = pthread_create(&tcpx_fabric->conn_mgr_thread, 0, tcpx_conn_mgr_thread, (void *) tcpx_fabric); if (ret) { FI_WARN(&tcpx_prov, FI_LOG_FABRIC, "Failed creating tcpx connection manager thread"); goto err1; } return 0; err1: fd_signal_free(&tcpx_fabric->poll_mgr.signal); err: fastlock_destroy(&tcpx_fabric->poll_mgr.lock); return ret; }
int ofi_av_insert_addr(struct util_av *av, const void *addr, int slot, int *index) { int ret = 0; fastlock_acquire(&av->lock); if (av->free_list == UTIL_NO_ENTRY) { FI_WARN(av->prov, FI_LOG_AV, "AV is full\n"); ret = -FI_ENOSPC; goto out; } if (av->flags & FI_SOURCE) { ret = util_av_hash_insert(&av->hash, slot, av->free_list); if (ret) { FI_WARN(av->prov, FI_LOG_AV, "failed to insert addr into hash table\n"); goto out; } } *index = av->free_list; av->free_list = *(int *) util_av_get_data(av, av->free_list); util_av_set_data(av, *index, addr, av->addrlen); out: fastlock_release(&av->lock); return ret; }
static int smr_fetch_result(struct smr_ep *ep, struct smr_region *peer_smr, struct iovec *iov, size_t iov_count, const struct fi_rma_ioc *rma_ioc, size_t rma_count, enum fi_datatype datatype, size_t total_len) { int ret, i; struct iovec rma_iov[SMR_IOV_LIMIT]; for (i = 0; i < rma_count; i++) { rma_iov[i].iov_base = (void *) rma_ioc[i].addr; rma_iov[i].iov_len = rma_ioc[i].count * ofi_datatype_size(datatype); } ret = process_vm_readv(peer_smr->pid, iov, iov_count, rma_iov, rma_count, 0); if (ret != total_len) { if (ret < 0) { FI_WARN(&smr_prov, FI_LOG_EP_CTRL, "CMA write error\n"); return -errno; } else { FI_WARN(&smr_prov, FI_LOG_EP_CTRL, "partial read occurred\n"); return -FI_EIO; } } return 0; }
static int rxm_ep_bind_cq(struct rxm_ep *rxm_ep, struct util_cq *util_cq, uint64_t flags) { struct rxm_cq *rxm_cq; rxm_cq = container_of(util_cq, struct rxm_cq, util_cq); if (flags & ~(FI_TRANSMIT | FI_RECV)) { FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "unsupported flags\n"); return -FI_EBADFLAGS; } if (((flags & FI_TRANSMIT) && rxm_ep->tx_cq) || ((flags & FI_RECV) && rxm_ep->rx_cq)) { FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "duplicate CQ binding\n"); return -FI_EINVAL; } if (flags & FI_TRANSMIT) { rxm_ep->util_ep.tx_cq = &rxm_cq->util_cq; atomic_inc(&rxm_cq->util_cq.ref); } if (flags & FI_RECV) { rxm_ep->util_ep.rx_cq = &rxm_cq->util_cq; atomic_inc(&rxm_cq->util_cq.ref); } return 0; }
static int proc_conn_resp(struct poll_fd_mgr *poll_mgr, struct poll_fd_info *poll_info, struct tcpx_ep *ep, int index) { struct ofi_ctrl_hdr conn_resp; struct fi_eq_cm_entry *cm_entry; int ret = FI_SUCCESS; assert(poll_mgr->poll_fds[index].revents == POLLIN); ret = rx_cm_data(ep->conn_fd, &conn_resp, ofi_ctrl_connresp, poll_info); if (ret) return ret; cm_entry = calloc(1, sizeof(*cm_entry) + poll_info->cm_data_sz); if (!cm_entry) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "mem alloc failed\n"); return -FI_ENOMEM; } cm_entry->fid = poll_info->fid; memcpy(cm_entry->data, poll_info->cm_data, poll_info->cm_data_sz); ret = (int) fi_eq_write(&ep->util_ep.eq->eq_fid, FI_CONNECTED, cm_entry, sizeof(*cm_entry) + poll_info->cm_data_sz, 0); if (ret < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n"); goto err; } ret = fi_fd_nonblock(ep->conn_fd); err: free(cm_entry); return ret; }
static int rxm_ep_msg_res_open(struct fi_info *rxm_info, struct util_domain *util_domain, struct rxm_ep *rxm_ep) { struct rxm_fabric *rxm_fabric; struct rxm_domain *rxm_domain; struct fi_cq_attr cq_attr; int ret; ret = ofix_getinfo(rxm_prov.version, NULL, NULL, 0, &rxm_util_prov, rxm_info, rxm_alter_layer_info, rxm_alter_base_info, 1, &rxm_ep->msg_info); if (ret) return ret; rxm_domain = container_of(util_domain, struct rxm_domain, util_domain); rxm_fabric = container_of(util_domain->fabric, struct rxm_fabric, util_fabric); ret = fi_passive_ep(rxm_fabric->msg_fabric, rxm_ep->msg_info, &rxm_ep->msg_pep, rxm_ep); if (ret) { FI_WARN(&rxm_prov, FI_LOG_FABRIC, "Unable to open msg PEP\n"); goto err1; } memset(&cq_attr, 0, sizeof(cq_attr)); cq_attr.size = rxm_info->tx_attr->size + rxm_info->rx_attr->size; cq_attr.format = FI_CQ_FORMAT_MSG; ret = fi_cq_open(rxm_domain->msg_domain, &cq_attr, &rxm_ep->msg_cq, NULL); if (ret) { FI_WARN(&rxm_prov, FI_LOG_CQ, "Unable to open MSG CQ\n"); goto err1; } ret = fi_srx_context(rxm_domain->msg_domain, rxm_ep->msg_info->rx_attr, &rxm_ep->srx_ctx, NULL); if (ret) { FI_WARN(&rxm_prov, FI_LOG_FABRIC, "Unable to open shared receive context\n"); goto err2; } /* We don't care what's in the dest_addr at this point. We go by AV. */ if (rxm_ep->msg_info->dest_addr) { free(rxm_ep->msg_info->dest_addr); rxm_ep->msg_info->dest_addr = NULL; rxm_ep->msg_info->dest_addrlen = 0; } /* Zero out the port as we would be creating multiple MSG EPs for a single * RXM EP and we don't want address conflicts. */ if (rxm_ep->msg_info->src_addr) ((struct sockaddr_in *)(rxm_ep->msg_info->src_addr))->sin_port = 0; return 0; err2: fi_close(&rxm_ep->msg_pep->fid); err1: fi_freeinfo(rxm_ep->msg_info); return ret; }
static int mlx_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) { struct mlx_ep *ep; struct util_cq *cq; ep = container_of(fid, struct mlx_ep, ep.ep_fid.fid); int status = FI_SUCCESS; switch (bfid->fclass) { case FI_CLASS_CQ: /* TODO: check rest flags for send/recv ECs */ do { cq = container_of(bfid, struct util_cq, cq_fid.fid); if ( ((flags & FI_TRANSMIT) && ep->ep.tx_cq)|| ((flags & FI_RECV) && ep->ep.rx_cq)) { FI_WARN( &mlx_prov, FI_LOG_EP_CTRL, "CQ already binded\n"); status = -FI_EINVAL; break; } if (flags & FI_TRANSMIT) { ep->ep.tx_cq = cq; ofi_atomic_inc32(&(cq->ref)); } if (flags & FI_RECV) { ep->ep.rx_cq = cq; ofi_atomic_inc32(&(cq->ref)); status = fid_list_insert( &cq->ep_list, &cq->ep_list_lock, &ep->ep.ep_fid.fid); if (status) break; } if (flags & FI_SELECTIVE_COMPLETION) { ep->ep.flags |= FI_SELECTIVE_COMPLETION; } } while (0); break; case FI_CLASS_AV: if (ep->av) { FI_WARN( &mlx_prov, FI_LOG_EP_CTRL, "AV already binded\n"); status = -FI_EINVAL; break; } ep->av = container_of(bfid, struct mlx_av, av.fid); ep->av->ep = ep; break; default: status = -FI_EINVAL; break; } return status; }
int ofi_ep_bind_cntr(struct util_ep *ep, struct util_cntr *cntr, uint64_t flags) { if (flags & ~(FI_TRANSMIT | FI_RECV | FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE)) { FI_WARN(ep->domain->fabric->prov, FI_LOG_EP_CTRL, "Unsupported bind flags\n"); return -FI_EBADFLAGS; } if (((flags & FI_TRANSMIT) && ep->tx_cntr) || ((flags & FI_RECV) && ep->rx_cntr) || ((flags & FI_READ) && ep->rd_cntr) || ((flags & FI_WRITE) && ep->wr_cntr) || ((flags & FI_REMOTE_READ) && ep->rem_rd_cntr) || ((flags & FI_REMOTE_WRITE) && ep->rem_wr_cntr)) { FI_WARN(ep->domain->fabric->prov, FI_LOG_EP_CTRL, "Duplicate counter binding\n"); return -FI_EINVAL; } if (flags & FI_TRANSMIT) { ep->tx_cntr = cntr; ofi_atomic_inc32(&cntr->ref); } if (flags & FI_RECV) { ep->rx_cntr = cntr; ofi_atomic_inc32(&cntr->ref); } if (flags & FI_READ) { ep->rd_cntr = cntr; ofi_atomic_inc32(&cntr->ref); } if (flags & FI_WRITE) { ep->wr_cntr = cntr; ofi_atomic_inc32(&cntr->ref); } if (flags & FI_REMOTE_READ) { ep->rem_rd_cntr = cntr; ofi_atomic_inc32(&cntr->ref); } if (flags & FI_REMOTE_WRITE) { ep->rem_wr_cntr = cntr; ofi_atomic_inc32(&cntr->ref); } ep->flags |= OFI_CNTR_ENABLED; return fid_list_insert(&cntr->ep_list, &cntr->ep_list_lock, &ep->ep_fid.fid); }
static int tcpx_pep_sock_create(struct tcpx_pep *pep) { int ret, af; switch (pep->info->addr_format) { case FI_SOCKADDR: case FI_SOCKADDR_IN: case FI_SOCKADDR_IN6: af = ((struct sockaddr *)pep->info->src_addr)->sa_family; break; default: FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "invalid source address format\n"); return -FI_EINVAL; } pep->sock = ofi_socket(af, SOCK_STREAM, 0); if (pep->sock == INVALID_SOCKET) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "failed to create listener: %s\n", strerror(ofi_sockerr())); return -FI_EIO; } if (ofi_addr_get_port(pep->info->src_addr) != 0 || port_range.high == 0) { ret = tcpx_setup_socket(pep->sock); if (ret) { goto err; } ret = bind(pep->sock, pep->info->src_addr, (socklen_t) pep->info->src_addrlen); } else { ret = tcpx_setup_socket_nodelay(pep->sock); if (ret) { goto err; } ret = tcpx_bind_to_port_range(pep->sock, pep->info->src_addr, pep->info->src_addrlen); } if (ret) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "failed to bind listener: %s\n", strerror(ofi_sockerr())); goto err; } return FI_SUCCESS; err: ofi_close_socket(pep->sock); pep->sock = INVALID_SOCKET; return ret; }
static void server_recv_connreq(struct util_wait *wait, struct tcpx_cm_context *cm_ctx) { struct tcpx_conn_handle *handle; struct fi_eq_cm_entry *cm_entry; struct ofi_ctrl_hdr conn_req; int ret; assert(cm_ctx->fid->fclass == FI_CLASS_CONNREQ); handle = container_of(cm_ctx->fid, struct tcpx_conn_handle, handle); ret = rx_cm_data(handle->conn_fd, &conn_req, ofi_ctrl_connreq, cm_ctx); if (ret) goto err1; cm_entry = calloc(1, sizeof(*cm_entry) + cm_ctx->cm_data_sz); if (!cm_entry) goto err1; cm_entry->fid = &handle->pep->util_pep.pep_fid.fid; cm_entry->info = fi_dupinfo(&handle->pep->info); if (!cm_entry->info) goto err2; cm_entry->info->handle = &handle->handle; memcpy(cm_entry->data, cm_ctx->cm_data, cm_ctx->cm_data_sz); ret = (int) fi_eq_write(&handle->pep->util_pep.eq->eq_fid, FI_CONNREQ, cm_entry, sizeof(*cm_entry) + cm_ctx->cm_data_sz, 0); if (ret < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n"); goto err3; } ret = ofi_wait_fd_del(wait, handle->conn_fd); if (ret) FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "fd deletion from ofi_wait failed\n"); free(cm_entry); free(cm_ctx); return; err3: fi_freeinfo(cm_entry->info); err2: free(cm_entry); err1: ofi_wait_fd_del(wait, handle->conn_fd); ofi_close_socket(handle->conn_fd); free(cm_ctx); free(handle); }
static int ofi_info_to_util(uint32_t version, const struct fi_provider *prov, struct fi_info *core_info, ofi_alter_info_t info_to_util, struct fi_info **util_info) { if (!(*util_info = fi_allocinfo())) return -FI_ENOMEM; if (info_to_util(version, core_info, *util_info)) goto err; if (ofi_dup_addr(core_info, *util_info)) goto err; /* Release 1.4 brought standardized domain names across IP based * providers. Before this release, the usNIC provider would return a * NULL domain name from fi_getinfo. For compatibility reasons, allow a * NULL domain name when apps are requesting version < 1.4. */ assert(FI_VERSION_LT(1, 4) || core_info->domain_attr->name); if (core_info->domain_attr->name) { (*util_info)->domain_attr->name = strdup(core_info->domain_attr->name); if (!(*util_info)->domain_attr->name) { FI_WARN(prov, FI_LOG_FABRIC, "Unable to allocate domain name\n"); goto err; } } (*util_info)->fabric_attr->name = strdup(core_info->fabric_attr->name); if (!(*util_info)->fabric_attr->name) { FI_WARN(prov, FI_LOG_FABRIC, "Unable to allocate fabric name\n"); goto err; } (*util_info)->fabric_attr->prov_name = strdup(core_info->fabric_attr-> prov_name); if (!(*util_info)->fabric_attr->prov_name) { FI_WARN(prov, FI_LOG_FABRIC, "Unable to allocate fabric name\n"); goto err; } return 0; err: fi_freeinfo(*util_info); return -FI_ENOMEM; }
static void server_sock_accept(struct util_wait *wait, struct tcpx_cm_context *cm_ctx) { struct tcpx_conn_handle *handle; struct tcpx_pep *pep; SOCKET sock; int ret; FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "Received Connreq\n"); assert(cm_ctx->fid->fclass == FI_CLASS_PEP); pep = container_of(cm_ctx->fid, struct tcpx_pep, util_pep.pep_fid.fid); sock = accept(pep->sock, NULL, 0); if (sock < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "accept error: %d\n", ofi_sockerr()); return; } handle = calloc(1, sizeof(*handle)); if (!handle) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "cannot allocate memory \n"); goto err1; } cm_ctx = calloc(1, sizeof(*cm_ctx)); if (!cm_ctx) goto err2; handle->conn_fd = sock; handle->handle.fclass = FI_CLASS_CONNREQ; handle->pep = pep; cm_ctx->fid = &handle->handle; cm_ctx->type = SERVER_RECV_CONNREQ; ret = ofi_wait_fd_add(wait, sock, FI_EPOLL_IN, tcpx_eq_wait_try_func, NULL, (void *) cm_ctx); if (ret) goto err3; wait->signal(wait); return; err3: free(cm_ctx); err2: free(handle); err1: ofi_close_socket(sock); }
static int fi_verify_av_insert(struct util_av *av, uint64_t flags) { if ((av->flags & FI_EVENT) && !av->eq) { FI_WARN(av->prov, FI_LOG_AV, "no EQ bound to AV\n"); return -FI_ENOEQ; } if (flags & ~(FI_MORE)) { FI_WARN(av->prov, FI_LOG_AV, "unsupported flags\n"); return -FI_ENOEQ; } return 0; }
static int fi_ibv_getifaddrs(const char *service, uint64_t flags, struct fi_info *info) { struct ifaddrs *ifaddr, *ifa; char name[INET6_ADDRSTRLEN]; const char *ret_ptr; int ret, num_verbs_ifs = 0; flags |= FI_NUMERICHOST | FI_SOURCE; ret = getifaddrs(&ifaddr); if (ret) { FI_WARN(&fi_ibv_prov, FI_LOG_FABRIC, "Unable to get interface addresses\n"); return ret; } for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) { if (!ifa->ifa_addr || !(ifa->ifa_flags & IFF_UP) || !strcmp(ifa->ifa_name, "lo")) continue; switch (ifa->ifa_addr->sa_family) { case AF_INET: ret_ptr = inet_ntop(AF_INET, &ofi_sin_addr(ifa->ifa_addr), name, INET6_ADDRSTRLEN); break; case AF_INET6: ret_ptr = inet_ntop(AF_INET6, &ofi_sin6_addr(ifa->ifa_addr), name, INET6_ADDRSTRLEN); break; default: continue; } if (!ret_ptr) { FI_WARN(&fi_ibv_prov, FI_LOG_FABRIC, "inet_ntop failed: %s(%d)\n", strerror(errno), errno); goto err; } ret = fi_ibv_copy_ifaddr(name, service, flags, info); if (ret) goto err; num_verbs_ifs++; } freeifaddrs(ifaddr); return num_verbs_ifs ? 0 : -FI_ENODATA; err: freeifaddrs(ifaddr); return ret; }
int ofi_uffd_init(void) { struct uffdio_api api; int ret; uffd.monitor.subscribe = ofi_uffd_subscribe; uffd.monitor.unsubscribe = ofi_uffd_unsubscribe; if (!num_page_sizes) return -FI_ENODATA; uffd.fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd.fd < 0) { FI_WARN(&core_prov, FI_LOG_MR, "syscall/userfaultfd %s\n", strerror(errno)); return -errno; } api.api = UFFD_API; api.features = UFFD_FEATURE_EVENT_UNMAP | UFFD_FEATURE_EVENT_REMOVE | UFFD_FEATURE_EVENT_REMAP; ret = ioctl(uffd.fd, UFFDIO_API, &api); if (ret < 0) { FI_WARN(&core_prov, FI_LOG_MR, "ioctl/uffdio: %s\n", strerror(errno)); ret = -errno; goto closefd; } if (api.api != UFFD_API) { FI_WARN(&core_prov, FI_LOG_MR, "uffd features not supported\n"); ret = -FI_ENOSYS; goto closefd; } ret = pthread_create(&uffd.thread, NULL, ofi_uffd_handler, &uffd); if (ret) { FI_WARN(&core_prov, FI_LOG_MR, "failed to create handler thread %s\n", strerror(ret)); ret = -ret; goto closefd; } return 0; closefd: close(uffd.fd); return ret; }
static int process_rx_read_entry(struct tcpx_xfer_entry *rx_entry) { struct tcpx_cq *tcpx_cq; int ret; ret = tcpx_recv_msg_data(rx_entry); if (OFI_SOCK_TRY_SND_RCV_AGAIN(-ret)) return ret; if (!ret) goto done; FI_WARN(&tcpx_prov, FI_LOG_DOMAIN, "msg recv Failed ret = %d\n", ret); if (ret == -FI_ENOTCONN) tcpx_ep_shutdown_report(rx_entry->ep, &rx_entry->ep->util_ep.ep_fid.fid); done: tcpx_cq_report_completion(rx_entry->ep->util_ep.tx_cq, rx_entry, -ret); slist_remove_head(&rx_entry->ep->rma_read_queue); tcpx_cq = container_of(rx_entry->ep->util_ep.tx_cq, struct tcpx_cq, util_cq); tcpx_xfer_entry_release(tcpx_cq, rx_entry); return FI_SUCCESS; }
void process_tx_entry(struct tcpx_xfer_entry *tx_entry) { struct tcpx_cq *tcpx_cq; int ret; ret = tcpx_send_msg(tx_entry); if (OFI_SOCK_TRY_SND_RCV_AGAIN(-ret)) return; if (!ret) goto done; FI_WARN(&tcpx_prov, FI_LOG_DOMAIN, "msg send failed\n"); if (ret == -FI_ENOTCONN) tcpx_ep_shutdown_report(tx_entry->ep, &tx_entry->ep->util_ep.ep_fid.fid); done: tcpx_cq_report_completion(tx_entry->ep->util_ep.tx_cq, tx_entry, -ret); slist_remove_head(&tx_entry->ep->tx_queue); if (ntohl(tx_entry->msg_hdr.hdr.flags) & (OFI_DELIVERY_COMPLETE | OFI_COMMIT_COMPLETE)) { tx_entry->flags |= FI_COMPLETION; slist_insert_tail(&tx_entry->entry, &tx_entry->ep->tx_rsp_pend_queue); return; } tcpx_cq = container_of(tx_entry->ep->util_ep.tx_cq, struct tcpx_cq, util_cq); tcpx_xfer_entry_release(tcpx_cq, tx_entry); }
static int process_rx_entry(struct tcpx_xfer_entry *rx_entry) { struct tcpx_cq *tcpx_cq; int ret; ret = tcpx_recv_msg_data(rx_entry); if (OFI_SOCK_TRY_SND_RCV_AGAIN(-ret)) return ret; if (!ret) goto done; FI_WARN(&tcpx_prov, FI_LOG_DOMAIN, "msg recv Failed ret = %d\n", ret); if (ret == -FI_ENOTCONN) tcpx_ep_shutdown_report(rx_entry->ep, &rx_entry->ep->util_ep.ep_fid.fid); done: if (ntohl(rx_entry->msg_hdr.hdr.flags) & OFI_DELIVERY_COMPLETE) { if (tcpx_prepare_rx_entry_resp(rx_entry)) rx_entry->ep->cur_rx_proc_fn = tcpx_prepare_rx_entry_resp; return FI_SUCCESS; } tcpx_cq_report_completion(rx_entry->ep->util_ep.rx_cq, rx_entry, -ret); tcpx_cq = container_of(rx_entry->ep->util_ep.rx_cq, struct tcpx_cq, util_cq); tcpx_xfer_entry_release(tcpx_cq, rx_entry); return FI_SUCCESS; }