static void handle_connreq(struct poll_fd_mgr *poll_mgr, struct poll_fd_info *poll_info) { struct tcpx_conn_handle *handle; struct tcpx_pep *pep; struct fi_eq_cm_entry *cm_entry; struct ofi_ctrl_hdr conn_req; SOCKET sock; int ret; assert(poll_info->fid->fclass == FI_CLASS_PEP); pep = container_of(poll_info->fid, struct tcpx_pep, util_pep.pep_fid.fid); sock = accept(pep->sock, NULL, 0); if (sock < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "accept error: %d\n", ofi_sockerr()); return; } ret = rx_cm_data(sock, &conn_req, ofi_ctrl_connreq, poll_info); if (ret) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "cm data recv failed \n"); goto err1; } handle = calloc(1, sizeof(*handle)); if (!handle) goto err1; cm_entry = calloc(1, sizeof(*cm_entry) + poll_info->cm_data_sz); if (!cm_entry) goto err2; handle->conn_fd = sock; cm_entry->fid = poll_info->fid; cm_entry->info = fi_dupinfo(&pep->info); if (!cm_entry->info) goto err3; cm_entry->info->handle = &handle->handle; memcpy(cm_entry->data, poll_info->cm_data, poll_info->cm_data_sz); ret = (int) fi_eq_write(&pep->util_pep.eq->eq_fid, FI_CONNREQ, cm_entry, sizeof(*cm_entry) + poll_info->cm_data_sz, 0); if (ret < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n"); goto err4; } free(cm_entry); return; err4: fi_freeinfo(cm_entry->info); err3: free(cm_entry); err2: free(handle); err1: ofi_close_socket(sock); }
static int get_dupinfo(void) { struct fi_info *hints_dup; int ret; /* Get a fi_info corresponding to a wild card port. The first endpoint * should use default/given port since that is what is known to both * client and server. For other endpoints we should use addresses with * random ports to avoid collision. fi_getinfo should return a random * port if we don't specify it in the service arg or the hints. This * is used only for non-MSG endpoints. */ hints_dup = fi_dupinfo(hints); if (!hints_dup) return -FI_ENOMEM; free(hints_dup->src_addr); free(hints_dup->dest_addr); hints_dup->src_addr = NULL; hints_dup->dest_addr = NULL; hints_dup->src_addrlen = 0; hints_dup->dest_addrlen = 0; if (opts.dst_addr) { ret = fi_getinfo(FT_FIVERSION, opts.dst_addr, NULL, 0, hints_dup, &fi_dup); } else { ret = fi_getinfo(FT_FIVERSION, opts.src_addr, NULL, FI_SOURCE, hints_dup, &fi_dup); } if (ret) FT_PRINTERR("fi_getinfo", ret); fi_freeinfo(hints_dup); return ret; }
static struct fi_info * usdf_pep_conn_info(struct usdf_connreq *crp) { struct fi_info *ip; struct usdf_pep *pep; struct sockaddr_in *sin; struct usdf_connreq_msg *reqp; pep = crp->cr_pep; reqp = (struct usdf_connreq_msg *)crp->cr_data; ip = fi_dupinfo(pep->pep_info); if (!ip) { USDF_WARN_SYS(EP_CTRL, "failed to duplicate pep info\n"); return NULL; } /* fill in dest addr */ ip->dest_addrlen = ip->src_addrlen; sin = calloc(1, ip->dest_addrlen); if (sin == NULL) { goto fail; } sin->sin_family = AF_INET; sin->sin_addr.s_addr = reqp->creq_ipaddr; sin->sin_port = reqp->creq_port; ip->dest_addr = sin; ip->handle = (fid_t) crp; return ip; fail: fi_freeinfo(ip); return NULL; }
/* Process an incoming connection request at a listening PEP. */ static int __gnix_pep_connreq(struct gnix_fid_pep *pep, int fd) { int ret; struct gnix_pep_sock_conn *conn; struct fi_eq_cm_entry *eq_entry; int eqe_size; /* Create and initialize a new connection request. */ conn = calloc(1, sizeof(*conn)); if (!conn) { GNIX_WARN(FI_LOG_EP_CTRL, "Failed to alloc accepted socket conn\n"); return -FI_ENOMEM; } conn->fid.fclass = FI_CLASS_CONNREQ; conn->fid.context = pep; conn->sock_fd = fd; /* Pull request data from the listening socket. */ conn->bytes_read += read(fd, &conn->req, sizeof(conn->req)); if (conn->bytes_read != sizeof(conn->req)) { /* TODO Wait for more bytes. */ GNIX_FATAL(FI_LOG_EP_CTRL, "Unexpected read size\n"); } conn->req.info.src_addr = &conn->req.src_addr; conn->req.info.dest_addr = &conn->req.dest_addr; conn->req.info.tx_attr = &conn->req.tx_attr; conn->req.info.rx_attr = &conn->req.rx_attr; conn->req.info.ep_attr = &conn->req.ep_attr; conn->req.info.domain_attr = &conn->req.domain_attr; conn->req.info.fabric_attr = &conn->req.fabric_attr; conn->req.info.domain_attr->name = NULL; conn->req.info.fabric_attr->name = NULL; conn->req.info.fabric_attr->prov_name = NULL; conn->info = &conn->req.info; conn->info->handle = &conn->fid; /* Tell user of a new conn req via the EQ. */ eq_entry = (struct fi_eq_cm_entry *)conn->req.eqe_buf; eq_entry->fid = &pep->pep_fid.fid; eq_entry->info = fi_dupinfo(conn->info); eqe_size = sizeof(*eq_entry) + conn->req.cm_data_len; ret = fi_eq_write(&pep->eq->eq_fid, FI_CONNREQ, eq_entry, eqe_size, 0); if (ret != eqe_size) { GNIX_WARN(FI_LOG_EP_CTRL, "fi_eq_write failed, err: %d\n", ret); fi_freeinfo(conn->info); free(conn); return ret; } GNIX_DEBUG(FI_LOG_EP_CTRL, "Added FI_CONNREQ EQE: %p, %p\n", pep->eq, pep); return FI_SUCCESS; }
static int efa_get_matching_info(uint32_t version, const char *node, uint64_t flags, const struct fi_info *hints, struct fi_info **info) { const struct fi_info *check_info; struct fi_info *fi, *tail; int ret; *info = tail = NULL; for (check_info = efa_util_prov.info; check_info; check_info = check_info->next) { ret = 0; if (flags & FI_SOURCE) { if (node) ret = efa_node_matches_addr(check_info->src_addr, node); } else if (hints && hints->src_addr) { ret = memcmp(check_info->src_addr, hints->src_addr, EFA_EP_ADDR_LEN); } if (ret) continue; EFA_INFO(FI_LOG_FABRIC, "found match for interface %s %s\n", node, check_info->fabric_attr->name); if (hints) { ret = efa_check_hints(version, hints, check_info); if (ret) continue; } fi = fi_dupinfo(check_info); if (!fi) { ret = -FI_ENOMEM; goto err_free_info; } ret = efa_set_default_info(fi); if (ret) { fi_freeinfo(fi); continue; } if (!*info) *info = fi; else tail->next = fi; tail = fi; } if (!*info) return -FI_ENODATA; return 0; err_free_info: fi_freeinfo(*info); *info = NULL; return ret; }
static struct fi_info * fi_ibv_eq_cm_getinfo(struct fi_ibv_fabric *fab, struct rdma_cm_event *event, struct fi_info *pep_info) { struct fi_info *info, *fi; struct fi_ibv_connreq *connreq; const char *devname = ibv_get_device_name(event->id->verbs->device); if (strcmp(devname, fab->info->domain_attr->name)) { fi = fi_ibv_get_verbs_info(fab->all_infos, devname); if (!fi) return NULL; } else { fi = fab->info; } info = fi_dupinfo(fi); if (!info) return NULL; info->fabric_attr->fabric = &fab->util_fabric.fabric_fid; if (!(info->fabric_attr->prov_name = strdup(VERBS_PROV_NAME))) goto err; ofi_alter_info(info, pep_info, fab->util_fabric.fabric_fid.api_version); info->src_addrlen = fi_ibv_sockaddr_len(rdma_get_local_addr(event->id)); if (!(info->src_addr = malloc(info->src_addrlen))) goto err; memcpy(info->src_addr, rdma_get_local_addr(event->id), info->src_addrlen); info->dest_addrlen = fi_ibv_sockaddr_len(rdma_get_peer_addr(event->id)); if (!(info->dest_addr = malloc(info->dest_addrlen))) goto err; memcpy(info->dest_addr, rdma_get_peer_addr(event->id), info->dest_addrlen); VERBS_INFO(FI_LOG_CORE, "src_addr: %s:%d\n", inet_ntoa(((struct sockaddr_in *)info->src_addr)->sin_addr), ntohs(((struct sockaddr_in *)info->src_addr)->sin_port)); VERBS_INFO(FI_LOG_CORE, "dst_addr: %s:%d\n", inet_ntoa(((struct sockaddr_in *)info->dest_addr)->sin_addr), ntohs(((struct sockaddr_in *)info->dest_addr)->sin_port)); connreq = calloc(1, sizeof *connreq); if (!connreq) goto err; connreq->handle.fclass = FI_CLASS_CONNREQ; connreq->id = event->id; info->handle = &connreq->handle; return info; err: fi_freeinfo(info); return NULL; }
static int fi_bgq_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { if (!((FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) || (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_AUTO))){ fprintf(stderr,"BGQ Provider must be configured with either auto or manual progresss mode specified\n"); exit(1); assert(0); } BG_JobCoords_t jobCoords; uint32_t jcrc = Kernel_JobCoords(&jobCoords); if (jobCoords.isSubBlock) { fprintf(stderr,"BGQ Provider cannot be run in a sub-block.\n"); fflush(stderr); exit(1); } int ret; struct fi_info *fi, *prev_fi, *curr; if (!fi_bgq_count) { errno = FI_ENODATA; return -errno; } if (hints) { ret = fi_bgq_check_info(hints); if (ret) { return ret; } if (!(fi = fi_allocinfo())) { return -FI_ENOMEM; } if (fi_bgq_fillinfo(fi, node, service, hints, flags)) { return -errno; } *info = fi; } else { if(node || service) { errno = FI_ENODATA; return -errno; } else { if (!(fi = fi_dupinfo(fi_bgq_global.info))) { return -FI_ENOMEM; } *info = fi; } } return 0; }
static void server_recv_connreq(struct util_wait *wait, struct tcpx_cm_context *cm_ctx) { struct tcpx_conn_handle *handle; struct fi_eq_cm_entry *cm_entry; struct ofi_ctrl_hdr conn_req; int ret; assert(cm_ctx->fid->fclass == FI_CLASS_CONNREQ); handle = container_of(cm_ctx->fid, struct tcpx_conn_handle, handle); ret = rx_cm_data(handle->conn_fd, &conn_req, ofi_ctrl_connreq, cm_ctx); if (ret) goto err1; cm_entry = calloc(1, sizeof(*cm_entry) + cm_ctx->cm_data_sz); if (!cm_entry) goto err1; cm_entry->fid = &handle->pep->util_pep.pep_fid.fid; cm_entry->info = fi_dupinfo(&handle->pep->info); if (!cm_entry->info) goto err2; cm_entry->info->handle = &handle->handle; memcpy(cm_entry->data, cm_ctx->cm_data, cm_ctx->cm_data_sz); ret = (int) fi_eq_write(&handle->pep->util_pep.eq->eq_fid, FI_CONNREQ, cm_entry, sizeof(*cm_entry) + cm_ctx->cm_data_sz, 0); if (ret < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n"); goto err3; } ret = ofi_wait_fd_del(wait, handle->conn_fd); if (ret) FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "fd deletion from ofi_wait failed\n"); free(cm_entry); free(cm_ctx); return; err3: fi_freeinfo(cm_entry->info); err2: free(cm_entry); err1: ofi_wait_fd_del(wait, handle->conn_fd); ofi_close_socket(handle->conn_fd); free(cm_ctx); free(handle); }
static void udpx_getinfo_ifs(struct fi_info **info) { struct ifaddrs *ifaddrs, *ifa; struct fi_info *head, *tail, *cur; size_t addrlen; uint32_t addr_format; int ret; ret = getifaddrs(&ifaddrs); if (ret) return; head = tail = NULL; for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) { if (ifa->ifa_addr == NULL || !(ifa->ifa_flags & IFF_UP)) continue; switch (ifa->ifa_addr->sa_family) { case AF_INET: addrlen = sizeof(struct sockaddr_in); addr_format = FI_SOCKADDR_IN; break; case AF_INET6: addrlen = sizeof(struct sockaddr_in6); addr_format = FI_SOCKADDR_IN6; break; default: continue; } cur = fi_dupinfo(*info); if (!cur) break; if (!head) head = cur; else tail->next = cur; tail = cur; if ((cur->src_addr = mem_dup(ifa->ifa_addr, addrlen))) { cur->src_addrlen = addrlen; cur->addr_format = addr_format; } } freeifaddrs(ifaddrs); if (head) { fi_freeinfo(*info); *info = head; } }
static int fi_ibv_domain(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct fi_ibv_domain *_domain; struct fi_info *fi; int ret; fi = fi_ibv_get_verbs_info(info->domain_attr->name); if (!fi) return -FI_EINVAL; ret = fi_ibv_check_domain_attr(info->domain_attr, fi); if (ret) return ret; _domain = calloc(1, sizeof *_domain); if (!_domain) return -FI_ENOMEM; _domain->info = fi_dupinfo(info); if (!_domain->info) goto err1; _domain->rdm = FI_IBV_EP_TYPE_IS_RDM(info); ret = fi_ibv_open_device_by_name(_domain, info->domain_attr->name); if (ret) goto err2; _domain->pd = ibv_alloc_pd(_domain->verbs); if (!_domain->pd) { ret = -errno; goto err2; } _domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN; _domain->domain_fid.fid.context = context; _domain->domain_fid.fid.ops = &fi_ibv_fid_ops; _domain->domain_fid.ops = _domain->rdm ? &fi_ibv_rdm_domain_ops : &fi_ibv_domain_ops; _domain->domain_fid.mr = &fi_ibv_domain_mr_ops; _domain->fab = container_of(fabric, struct fi_ibv_fabric, fabric_fid); *domain = &_domain->domain_fid; return 0; err2: fi_freeinfo(_domain->info); err1: free(_domain); return ret; }
/* TODO: This should copy the listening fi_info as the base */ static struct fi_info * fi_ibv_eq_cm_getinfo(struct fi_ibv_fabric *fab, struct rdma_cm_event *event) { struct fi_info *info, *fi; struct fi_ibv_connreq *connreq; fi = fi_ibv_get_verbs_info(ibv_get_device_name(event->id->verbs->device)); if (!fi) return NULL; info = fi_dupinfo(fi); if (!info) return NULL; info->fabric_attr->fabric = &fab->fabric_fid; if (!(info->fabric_attr->prov_name = strdup(VERBS_PROV_NAME))) goto err; fi_ibv_update_info(NULL, info); info->src_addrlen = fi_ibv_sockaddr_len(rdma_get_local_addr(event->id)); if (!(info->src_addr = malloc(info->src_addrlen))) goto err; memcpy(info->src_addr, rdma_get_local_addr(event->id), info->src_addrlen); info->dest_addrlen = fi_ibv_sockaddr_len(rdma_get_peer_addr(event->id)); if (!(info->dest_addr = malloc(info->dest_addrlen))) goto err; memcpy(info->dest_addr, rdma_get_peer_addr(event->id), info->dest_addrlen); FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "src_addr: %s:%d\n", inet_ntoa(((struct sockaddr_in *)info->src_addr)->sin_addr), ntohs(((struct sockaddr_in *)info->src_addr)->sin_port)); FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "dst_addr: %s:%d\n", inet_ntoa(((struct sockaddr_in *)info->dest_addr)->sin_addr), ntohs(((struct sockaddr_in *)info->dest_addr)->sin_port)); connreq = calloc(1, sizeof *connreq); if (!connreq) goto err; connreq->handle.fclass = FI_CLASS_CONNREQ; connreq->id = event->id; info->handle = &connreq->handle; return info; err: fi_freeinfo(info); return NULL; }
int rxm_endpoint(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep_fid, void *context) { struct util_domain *util_domain; struct rxm_ep *rxm_ep; int ret; rxm_ep = calloc(1, sizeof(*rxm_ep)); if (!rxm_ep) return -FI_ENOMEM; if (!(rxm_ep->rxm_info = fi_dupinfo(info))) { ret = -FI_ENOMEM; goto err1; } ret = ofi_endpoint_init(domain, &rxm_util_prov, info, &rxm_ep->util_ep, context, &rxm_ep_progress, FI_MATCH_PREFIX); if (ret) goto err1; util_domain = container_of(domain, struct util_domain, domain_fid); ret = rxm_ep_msg_res_open(info, util_domain, rxm_ep); if (ret) goto err2; ret = rxm_ep_txrx_res_open(rxm_ep); if (ret) goto err3; *ep_fid = &rxm_ep->util_ep.ep_fid; (*ep_fid)->fid.ops = &rxm_ep_fi_ops; (*ep_fid)->ops = &rxm_ops_ep; (*ep_fid)->cm = &rxm_ops_cm; (*ep_fid)->msg = &rxm_ops_msg; (*ep_fid)->tagged = &rxm_ops_tagged; return 0; err3: rxm_ep_msg_res_close(rxm_ep); err2: ofi_endpoint_close(&rxm_ep->util_ep); err1: if (rxm_ep->rxm_info) fi_freeinfo(rxm_ep->rxm_info); free(rxm_ep); return ret; }
static int fi_ibv_get_matching_info(const char *domain_name, struct fi_info *hints, struct rdma_addrinfo *rai, struct fi_info **info) { struct fi_info *check_info; struct fi_info *fi, *tail; int ret; *info = tail = NULL; for (check_info = verbs_info; check_info; check_info = check_info->next) { if (domain_name && strncmp(check_info->domain_attr->name, domain_name, strlen(domain_name))) continue; if (hints) { ret = fi_ibv_check_hints(hints, check_info); if (ret) continue; } if (!(fi = fi_dupinfo(check_info))) { ret = -FI_ENOMEM; goto err1; } ret = fi_ibv_rai_to_fi(rai, fi); if (ret) goto err2; fi_ibv_update_info(hints, fi); if (!*info) *info = fi; else tail->next = fi; tail = fi; } if (!*info) return -FI_ENODATA; return 0; err2: fi_freeinfo(fi); err1: fi_freeinfo(*info); return ret; }
static int fi_bgq_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { int ret; struct fi_info *fi, *prev_fi, *curr; if (!fi_bgq_count) { errno = FI_ENODATA; return -errno; } if (hints) { ret = fi_bgq_check_info(hints); if (ret) { return ret; } if (!(fi = fi_allocinfo())) { return -FI_ENOMEM; } if (fi_bgq_fillinfo(fi, node, service, hints, flags)) { return -errno; } *info = fi; } else { if(node || service) { errno = FI_ENODATA; return -errno; } else { curr = fi_bgq_global.info; *info = curr; prev_fi = NULL; do { if (!(fi = fi_dupinfo(curr))) { return -FI_ENOMEM; } if (prev_fi) { prev_fi->next = fi; } prev_fi = fi; curr = curr->next; } while(curr); } } return 0; }
struct fi_info_1_1 *fi_dupinfo_1_1(const struct fi_info_1_1 *info) { struct fi_info *dup, *base; if (!info) return (struct fi_info_1_1 *) ofi_allocinfo_internal(); ofi_dup_attr(base, info); if (base == NULL) return NULL; dup = fi_dupinfo(base); free(base); return (struct fi_info_1_1 *) dup; }
static struct fi_ibv_msg_ep *fi_ibv_alloc_msg_ep(struct fi_info *info) { struct fi_ibv_msg_ep *ep; ep = calloc(1, sizeof *ep); if (!ep) return NULL; ep->info = fi_dupinfo(info); if (!ep->info) goto err; return ep; err: free(ep); return NULL; }
static int fi_ibv_get_srcaddr_devs(struct fi_info **info) { struct fi_info *fi, *add_info; struct fi_info *fi_unconf = NULL, *fi_prev = NULL; struct verbs_dev_info *dev; struct verbs_addr *addr; int ret = 0; DEFINE_LIST(verbs_devs); ret = fi_ibv_getifaddrs(&verbs_devs); if (ret) return ret; if (dlist_empty(&verbs_devs)) { VERBS_WARN(FI_LOG_CORE, "No interface address found\n"); return 0; } for (fi = *info; fi; fi = fi->next) { dlist_foreach_container(&verbs_devs, struct verbs_dev_info, dev, entry) if (!strncmp(fi->domain_attr->name, dev->name, strlen(dev->name))) { dlist_foreach_container(&dev->addrs, struct verbs_addr, addr, entry) { /* When a device has multiple interfaces/addresses configured * duplicate fi_info and add the address info. fi->src_addr * would have been set in the previous iteration */ if (fi->src_addr) { if (!(add_info = fi_dupinfo(fi))) { ret = -FI_ENOMEM; goto out; } add_info->next = fi->next; fi->next = add_info; fi = add_info; } ret = fi_ibv_rai_to_fi(addr->rai, fi); if (ret) goto out; } break; } }
static int rx_size_left_err(void) { int ret; int testret; struct fid_ep *ep; struct fi_info *myfi; testret = FAIL; ep = NULL; myfi = fi_dupinfo(fi); /* datapath operation, not expected to be caught by libfabric */ #if 0 ret = fi_rx_size_left(NULL); if (ret != -FI_EINVAL) { goto fail; } #endif ret = fi_endpoint(domain, myfi, &ep, NULL); if (ret != 0) { printf("fi_endpoint %s\n", fi_strerror(-ret)); goto fail; } /* ep starts in a non-enabled state, may fail, should not SEGV */ fi_rx_size_left(ep); testret = PASS; fail: if (ep != NULL) { ret = fi_close(&ep->fid); if (ret != 0) printf("fi_close %s\n", fi_strerror(-ret)); ep = NULL; } if (myfi != NULL) { fi_freeinfo(myfi); } return testret; }
static int rxm_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { struct fi_info *cur, *dup; int ret; ret = ofix_getinfo(version, node, service, flags, &rxm_util_prov, hints, rxm_info_to_core, rxm_info_to_rxm, info); if (ret) return ret; /* If app supports FI_MR_LOCAL, prioritize requiring it for * better performance. */ if (hints && hints->domain_attr && (RXM_MR_LOCAL(hints))) { for (cur = *info; cur; cur = cur->next) { if (!RXM_MR_LOCAL(cur)) continue; if (!(dup = fi_dupinfo(cur))) { fi_freeinfo(*info); return -FI_ENOMEM; } if (FI_VERSION_LT(version, FI_VERSION(1, 5))) dup->mode &= ~FI_LOCAL_MR; else dup->domain_attr->mr_mode &= ~FI_MR_LOCAL; dup->next = cur->next; cur->next = dup; cur = dup; } } else { for (cur = *info; cur; cur = cur->next) { if (FI_VERSION_LT(version, FI_VERSION(1, 5))) cur->mode &= ~FI_LOCAL_MR; else cur->domain_attr->mr_mode &= ~FI_MR_LOCAL; } } return 0; }
/* if there are multiple fi_info in the provider: * check and duplicate provider's info */ int ofi_prov_check_dup_info(const struct util_prov *util_prov, uint32_t api_version, const struct fi_info *user_info, struct fi_info **info) { const struct fi_info *prov_info = util_prov->info; const struct fi_provider *prov = util_prov->prov; struct fi_info *fi, *tail; int ret; if (!info) return -FI_EINVAL; *info = tail = NULL; for ( ; prov_info; prov_info = prov_info->next) { ret = ofi_check_info(util_prov, prov_info, api_version, user_info); if (ret) continue; if (!(fi = fi_dupinfo(prov_info))) { ret = -FI_ENOMEM; goto err; } if (!*info) *info = fi; else tail->next = fi; tail = fi; } return (!*info ? -FI_ENODATA : FI_SUCCESS); err: fi_freeinfo(*info); FI_INFO(prov, FI_LOG_CORE, "cannot copy info\n"); return ret; }
static struct fi_info * usdf_pep_conn_info(struct usdf_connreq *crp) { struct fi_info *ip; struct usdf_pep *pep; struct sockaddr_in *sin; struct usdf_fabric *fp; struct usdf_domain *udp; struct usd_device_attrs *dap; struct usdf_connreq_msg *reqp; pep = crp->cr_pep; fp = pep->pep_fabric; udp = LIST_FIRST(&fp->fab_domain_list); dap = fp->fab_dev_attrs; reqp = (struct usdf_connreq_msg *)crp->cr_data; /* If there is a domain, just copy info from there */ if (udp != NULL) { ip = fi_dupinfo(udp->dom_info); if (ip == NULL) { return NULL; } /* no domains yet, make an info suitable for creating one */ } else { ip = fi_allocinfo(); if (ip == NULL) { return NULL; } ip->caps = USDF_MSG_CAPS; ip->mode = USDF_MSG_SUPP_MODE; ip->ep_attr->type = FI_EP_MSG; ip->addr_format = FI_SOCKADDR_IN; ip->src_addrlen = sizeof(struct sockaddr_in); sin = calloc(1, ip->src_addrlen); if (sin == NULL) { goto fail; } sin->sin_family = AF_INET; sin->sin_addr.s_addr = dap->uda_ipaddr_be; ip->src_addr = sin; ip->ep_attr->protocol = FI_PROTO_RUDP; ip->fabric_attr->fabric = fab_utof(fp); ip->fabric_attr->name = strdup(fp->fab_attr.name); ip->fabric_attr->prov_name = strdup(fp->fab_attr.prov_name); ip->fabric_attr->prov_version = fp->fab_attr.prov_version; if (ip->fabric_attr->name == NULL || ip->fabric_attr->prov_name == NULL) { goto fail; } } /* fill in dest addr */ ip->dest_addrlen = ip->src_addrlen; sin = calloc(1, ip->dest_addrlen); if (sin == NULL) { goto fail; } sin->sin_family = AF_INET; sin->sin_addr.s_addr = reqp->creq_ipaddr; sin->sin_port = reqp->creq_port; ip->dest_addr = sin; ip->handle = (fid_t) crp; return ip; fail: fi_freeinfo(ip); return NULL; }
static int av_removal_test(void) { int ret; fprintf(stdout, "AV address removal: "); hints = fi_dupinfo(base_hints); if (!hints) return -FI_ENOMEM; ret = ft_init_fabric(); if (ret) goto out; if (opts.dst_addr) { ret = ft_tx(ep, remote_fi_addr, opts.transfer_size, &tx_ctx); if (ret) { FT_PRINTERR("ft_tx", -ret); goto out; } ret = fi_av_remove(av, &remote_fi_addr, 1, 0); if (ret) { FT_PRINTERR("fi_av_remove", ret); goto out; } ret = ft_sync(); if (ret) goto out; ret = ft_init_av(); if (ret) { FT_PRINTERR("ft_init_av", -ret); goto out; } ret = ft_rx(ep, opts.transfer_size); if (ret) { FT_PRINTERR("ft_rx", -ret); goto out; } } else { ret = ft_rx(ep, opts.transfer_size); if (ret) { FT_PRINTERR("ft_rx", -ret); goto out; } ret = fi_av_remove(av, &remote_fi_addr, 1, 0); if (ret) { FT_PRINTERR("fi_av_remove", ret); goto out; } ret = ft_sync(); if (ret) goto out; ret = ft_init_av(); if (ret) { FT_PRINTERR("ft_init_av", -ret); goto out; } ret = ft_tx(ep, remote_fi_addr, opts.transfer_size, &tx_ctx); if (ret) { FT_PRINTERR("ft_tx", -ret); goto out; } } fprintf(stdout, "PASS\n"); (void) ft_sync(); out: ft_free_res(); return ret; }
int usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_pep **pep_o, void *context) { struct usdf_pep *pep; struct usdf_fabric *fp; struct sockaddr_in *sin; int ret; int optval; USDF_TRACE_SYS(EP_CTRL, "\n"); if (!info) { USDF_DBG_SYS(EP_CTRL, "null fi_info struct is invalid\n"); return -FI_EINVAL; } if (info->ep_attr->type != FI_EP_MSG) { return -FI_ENODEV; } if ((info->caps & ~USDF_MSG_CAPS) != 0) { return -FI_EBADF; } switch (info->addr_format) { case FI_SOCKADDR: if (((struct sockaddr *)info->src_addr)->sa_family != AF_INET) { USDF_WARN_SYS(EP_CTRL, "non-AF_INET src_addr specified\n"); return -FI_EINVAL; } break; case FI_SOCKADDR_IN: break; default: USDF_WARN_SYS(EP_CTRL, "unknown/unsupported addr_format\n"); return -FI_EINVAL; } if (info->src_addrlen && info->src_addrlen != sizeof(struct sockaddr_in)) { USDF_WARN_SYS(EP_CTRL, "unexpected src_addrlen\n"); return -FI_EINVAL; } fp = fab_ftou(fabric); pep = calloc(1, sizeof(*pep)); if (pep == NULL) { return -FI_ENOMEM; } pep->pep_fid.fid.fclass = FI_CLASS_PEP; pep->pep_fid.fid.context = context; pep->pep_fid.fid.ops = &usdf_pep_ops; pep->pep_fid.ops = &usdf_pep_base_ops; pep->pep_fid.cm = &usdf_pep_cm_ops; pep->pep_fabric = fp; pep->pep_state = USDF_PEP_UNBOUND; pep->pep_sock = socket(AF_INET, SOCK_STREAM, 0); if (pep->pep_sock == -1) { ret = -errno; goto fail; } ret = fcntl(pep->pep_sock, F_GETFL, 0); if (ret == -1) { ret = -errno; goto fail; } ret = fcntl(pep->pep_sock, F_SETFL, ret | O_NONBLOCK); if (ret == -1) { ret = -errno; goto fail; } /* set SO_REUSEADDR to prevent annoying "Address already in use" errors * on successive runs of programs listening on a well known port */ optval = 1; ret = setsockopt(pep->pep_sock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (ret == -1) { ret = -errno; goto fail; } pep->pep_info = fi_dupinfo(info); if (!pep->pep_info) { ret = -FI_ENOMEM; goto fail; } if (info->src_addrlen == 0) { /* Copy the source address information from the device * attributes. */ pep->pep_info->src_addrlen = sizeof(struct sockaddr_in); sin = calloc(1, pep->pep_info->src_addrlen); if (!sin) { USDF_WARN_SYS(EP_CTRL, "calloc for src address failed\n"); goto fail; } sin->sin_family = AF_INET; sin->sin_addr.s_addr = fp->fab_dev_attrs->uda_ipaddr_be; pep->pep_info->src_addr = sin; } memcpy(&pep->pep_src_addr, pep->pep_info->src_addr, pep->pep_info->src_addrlen); /* initialize connreq freelist */ ret = pthread_spin_init(&pep->pep_cr_lock, PTHREAD_PROCESS_PRIVATE); if (ret != 0) { ret = -ret; goto fail; } TAILQ_INIT(&pep->pep_cr_free); TAILQ_INIT(&pep->pep_cr_pending); pep->pep_backlog = 10; ret = usdf_pep_grow_backlog(pep); if (ret != 0) { goto fail; } atomic_initialize(&pep->pep_refcnt, 0); atomic_inc(&fp->fab_refcnt); *pep_o = pep_utof(pep); return 0; fail: if (pep != NULL) { usdf_pep_free_cr_lists(pep); if (pep->pep_sock != -1) { close(pep->pep_sock); } fi_freeinfo(pep->pep_info); free(pep); } return ret; }
static int gnix_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { int ret = 0; int ep_type_unspec = 1; uint64_t mode = GNIX_FAB_MODES; struct fi_info *gnix_info = NULL; struct gnix_ep_name *dest_addr = NULL; struct gnix_ep_name *src_addr = NULL; struct gnix_ep_name *addr = NULL; /* * the code below for resolving a node/service to what * will be a gnix_ep_name address is not fully implemented, * but put a place holder in place */ if (node) { addr = malloc(sizeof(*addr)); if (!addr) { goto err; } /* resolve node/service to gnix_ep_name */ ret = gnix_resolve_name(node, service, flags, addr); if (ret) { goto err; } if (flags & FI_SOURCE) { /* resolved address is the local address */ src_addr = addr; if (hints && hints->dest_addr) dest_addr = hints->dest_addr; } else { /* resolved address is a peer */ dest_addr = addr; if (hints && hints->src_addr) src_addr = hints->src_addr; } } if (src_addr) GNIX_INFO(FI_LOG_FABRIC, "src_pe: 0x%x src_port: 0x%lx\n", src_addr->gnix_addr.device_addr, src_addr->gnix_addr.cdm_id); if (dest_addr) GNIX_INFO(FI_LOG_FABRIC, "dest_pe: 0x%x dest_port: 0x%lx\n", dest_addr->gnix_addr.device_addr, dest_addr->gnix_addr.cdm_id); /* * fill in the gnix_info struct */ gnix_info = fi_allocinfo(); if (gnix_info == NULL) { goto err; } /* * Set the default values */ gnix_info->tx_attr->op_flags = 0; gnix_info->rx_attr->op_flags = 0; gnix_info->ep_attr->type = FI_EP_RDM; gnix_info->ep_attr->protocol = FI_PROTO_GNI; gnix_info->ep_attr->max_msg_size = GNIX_MAX_MSG_SIZE; /* TODO: need to work on this */ gnix_info->ep_attr->mem_tag_format = 0x0; gnix_info->ep_attr->tx_ctx_cnt = 1; gnix_info->ep_attr->rx_ctx_cnt = 1; gnix_info->domain_attr->threading = FI_THREAD_SAFE; gnix_info->domain_attr->control_progress = FI_PROGRESS_AUTO; gnix_info->domain_attr->data_progress = FI_PROGRESS_AUTO; gnix_info->domain_attr->av_type = FI_AV_UNSPEC; gnix_info->domain_attr->tx_ctx_cnt = gnix_max_nics_per_ptag; /* only one aries per node */ gnix_info->domain_attr->name = strdup(gnix_dom_name); gnix_info->domain_attr->cq_data_size = sizeof(uint64_t); gnix_info->domain_attr->mr_mode = FI_MR_BASIC; gnix_info->domain_attr->resource_mgmt = FI_RM_ENABLED; gnix_info->next = NULL; gnix_info->addr_format = FI_ADDR_GNI; gnix_info->src_addrlen = sizeof(struct gnix_ep_name); gnix_info->dest_addrlen = sizeof(struct gnix_ep_name); gnix_info->src_addr = src_addr; gnix_info->dest_addr = dest_addr; /* prov_name gets filled in by fi_getinfo from the gnix_prov struct */ /* let's consider gni copyrighted :) */ gnix_info->tx_attr->msg_order = FI_ORDER_SAS; gnix_info->tx_attr->comp_order = FI_ORDER_NONE; gnix_info->tx_attr->size = GNIX_TX_SIZE_DEFAULT; gnix_info->tx_attr->iov_limit = 1; gnix_info->tx_attr->inject_size = GNIX_INJECT_SIZE; gnix_info->tx_attr->rma_iov_limit = 1; gnix_info->rx_attr->msg_order = FI_ORDER_SAS; gnix_info->rx_attr->comp_order = FI_ORDER_NONE; gnix_info->rx_attr->size = GNIX_RX_SIZE_DEFAULT; gnix_info->rx_attr->iov_limit = 1; if (hints) { if (hints->ep_attr) { /* * support FI_EP_RDM, FI_EP_DGRAM endpoint types */ switch (hints->ep_attr->type) { case FI_EP_UNSPEC: break; case FI_EP_RDM: case FI_EP_DGRAM: gnix_info->ep_attr->type = hints->ep_attr->type; ep_type_unspec = 0; break; default: goto err; } /* * only support FI_PROTO_GNI protocol */ switch (hints->ep_attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_GNI: break; default: goto err; } if (hints->ep_attr->tx_ctx_cnt > 1) { goto err; } if (hints->ep_attr->rx_ctx_cnt > 1) { goto err; } if (hints->ep_attr->max_msg_size > GNIX_MAX_MSG_SIZE) { goto err; } } /* * check the mode field */ if (hints->mode) { if ((hints->mode & GNIX_FAB_MODES) != GNIX_FAB_MODES) { goto err; } mode = hints->mode & ~GNIX_FAB_MODES_CLEAR; } if (!hints->caps) { /* Return all supported capabilities. */ gnix_info->caps = GNIX_EP_RDM_CAPS_FULL; } else { /* The provider must support all requested * capabilities. */ if ((hints->caps & GNIX_EP_RDM_CAPS_FULL) != hints->caps) { goto err; } /* The provider may silently enable secondary * capabilities that do not introduce any overhead. */ gnix_info->caps = hints->caps | GNIX_EP_RDM_SEC_CAPS; } if (hints->tx_attr) { if ((hints->tx_attr->op_flags & GNIX_EP_OP_FLAGS) != hints->tx_attr->op_flags) { goto err; } if (hints->tx_attr->inject_size > GNIX_INJECT_SIZE) { goto err; } gnix_info->tx_attr->op_flags = hints->tx_attr->op_flags & GNIX_EP_OP_FLAGS; } if (hints->rx_attr) { if ((hints->rx_attr->op_flags & GNIX_EP_OP_FLAGS) != hints->rx_attr->op_flags) { goto err; } gnix_info->rx_attr->op_flags = hints->rx_attr->op_flags & GNIX_EP_OP_FLAGS; } if (hints->fabric_attr && hints->fabric_attr->name && strncmp(hints->fabric_attr->name, gnix_fab_name, strlen(gnix_fab_name))) { goto err; } if (hints->domain_attr) { if (hints->domain_attr->name && strncmp(hints->domain_attr->name, gnix_dom_name, strlen(gnix_dom_name))) { goto err; } if (hints->domain_attr->control_progress != FI_PROGRESS_UNSPEC) gnix_info->domain_attr->control_progress = hints->domain_attr->control_progress; if (hints->domain_attr->data_progress != FI_PROGRESS_UNSPEC) gnix_info->domain_attr->data_progress = hints->domain_attr->data_progress; switch (hints->domain_attr->mr_mode) { case FI_MR_UNSPEC: case FI_MR_BASIC: gnix_info->domain_attr->mr_mode = hints->domain_attr->mr_mode; break; case FI_MR_SCALABLE: goto err; } switch (hints->domain_attr->threading) { case FI_THREAD_COMPLETION: gnix_info->domain_attr->threading = hints->domain_attr->threading; break; default: break; } ret = fi_check_domain_attr(&gnix_prov, gnix_info->domain_attr, hints->domain_attr, FI_MATCH_EXACT); if (ret) goto err; } } gnix_info->mode = mode; gnix_info->fabric_attr->name = strdup(gnix_fab_name); gnix_info->tx_attr->caps = gnix_info->caps; gnix_info->tx_attr->mode = gnix_info->mode; gnix_info->rx_attr->caps = gnix_info->caps; gnix_info->rx_attr->mode = gnix_info->mode; if (ep_type_unspec) { struct fi_info *dg_info = fi_dupinfo(gnix_info); if (!dg_info) { GNIX_WARN(FI_LOG_FABRIC, "cannot copy info\n"); goto err; } dg_info->ep_attr->type = FI_EP_DGRAM; gnix_info->next = dg_info; } *info = gnix_info; return 0; err: if (gnix_info) { if (gnix_info->tx_attr) free(gnix_info->tx_attr); if (gnix_info->rx_attr) free(gnix_info->rx_attr); if (gnix_info->ep_attr) free(gnix_info->ep_attr); if (gnix_info->domain_attr) free(gnix_info->domain_attr); if (gnix_info->fabric_attr) free(gnix_info->fabric_attr); free(gnix_info); } /* * for the getinfo method, we need to return -FI_ENODATA otherwise * the fi_getinfo call will make an early exit without querying * other providers which may be avaialble. */ return -FI_ENODATA; }
int usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct usdf_fabric *fp; struct usdf_domain *udp; struct sockaddr_in *sin; size_t addrlen; int ret; udp = calloc(1, sizeof *udp); if (udp == NULL) { USDF_DEBUG("unable to alloc mem for domain\n"); ret = -FI_ENOMEM; goto fail; } fp = fab_fidtou(fabric); USDF_DEBUG("uda_devname=%s\n", fp->fab_dev_attrs->uda_devname); /* * Make sure address format is good and matches this fabric */ switch (info->addr_format) { case FI_SOCKADDR: addrlen = sizeof(struct sockaddr); break; case FI_SOCKADDR_IN: addrlen = sizeof(struct sockaddr_in); break; default: ret = -FI_EINVAL; goto fail; } sin = info->src_addr; if (info->src_addrlen != addrlen || sin->sin_family != AF_INET || sin->sin_addr.s_addr != fp->fab_dev_attrs->uda_ipaddr_be) { ret = -FI_EINVAL; goto fail; } ret = usd_open(fp->fab_dev_attrs->uda_devname, &udp->dom_dev); if (ret != 0) { goto fail; } udp->dom_fid.fid.fclass = FI_CLASS_DOMAIN; udp->dom_fid.fid.context = context; udp->dom_fid.fid.ops = &usdf_fid_ops; udp->dom_fid.ops = &usdf_domain_ops; udp->dom_fid.mr = &usdf_domain_mr_ops; ret = pthread_spin_init(&udp->dom_progress_lock, PTHREAD_PROCESS_PRIVATE); if (ret != 0) { ret = -ret; goto fail; } TAILQ_INIT(&udp->dom_tx_ready); TAILQ_INIT(&udp->dom_hcq_list); udp->dom_info = fi_dupinfo(info); if (udp->dom_info == NULL) { ret = -FI_ENOMEM; goto fail; } if (udp->dom_info->dest_addr != NULL) { free(udp->dom_info->dest_addr); udp->dom_info->dest_addr = NULL; } ret = usdf_dom_rdc_alloc_data(udp); if (ret != 0) { goto fail; } udp->dom_fabric = fp; LIST_INSERT_HEAD(&fp->fab_domain_list, udp, dom_link); atomic_init(&udp->dom_refcnt, 0); atomic_inc(&fp->fab_refcnt); *domain = &udp->dom_fid; return 0; fail: if (udp != NULL) { if (udp->dom_info != NULL) { fi_freeinfo(udp->dom_info); } if (udp->dom_dev != NULL) { usd_close(udp->dom_dev); } usdf_dom_rdc_free_data(udp); free(udp); } return ret; }
int util_getinfo(const struct fi_provider *prov, uint32_t version, const char *node, const char *service, uint64_t flags, const struct fi_info *prov_info, struct fi_info *hints, struct fi_info **info) { struct util_fabric *fabric; struct util_domain *domain; struct dlist_entry *item; int ret, copy_dest; FI_DBG(prov, FI_LOG_CORE, "checking info\n"); if ((flags & FI_SOURCE) && !node && !service) { FI_INFO(prov, FI_LOG_CORE, "FI_SOURCE set, but no node or service\n"); return -FI_EINVAL; } ret = fi_check_info(prov, prov_info, hints, FI_MATCH_EXACT); if (ret) return ret; *info = fi_dupinfo(prov_info); if (!*info) { FI_INFO(prov, FI_LOG_CORE, "cannot copy info\n"); return -FI_ENOMEM; } ofi_alter_info(*info, hints); fabric = fi_fabric_find((*info)->fabric_attr->name); if (fabric) { FI_DBG(prov, FI_LOG_CORE, "Found opened fabric\n"); (*info)->fabric_attr->fabric = &fabric->fabric_fid; fastlock_acquire(&fabric->lock); item = dlist_find_first_match(&fabric->domain_list, util_find_domain, *info); if (item) { FI_DBG(prov, FI_LOG_CORE, "Found open domain\n"); domain = container_of(item, struct util_domain, list_entry); (*info)->domain_attr->domain = &domain->domain_fid; } fastlock_release(&fabric->lock); } if (flags & FI_SOURCE) { ret = ofi_get_addr((*info)->addr_format, flags, node, service, &(*info)->src_addr, &(*info)->src_addrlen); if (ret) { FI_INFO(prov, FI_LOG_CORE, "source address not available\n"); goto err; } copy_dest = (hints && hints->dest_addr); } else { if (node || service) { copy_dest = 0; ret = ofi_get_addr((*info)->addr_format, flags, node, service, &(*info)->dest_addr, &(*info)->dest_addrlen); if (ret) { FI_INFO(prov, FI_LOG_CORE, "cannot resolve dest address\n"); goto err; } } else { copy_dest = (hints && hints->dest_addr); } if (hints && hints->src_addr) { (*info)->src_addr = mem_dup(hints->src_addr, hints->src_addrlen); if (!(*info)->src_addr) { ret = -FI_ENOMEM; goto err; } (*info)->src_addrlen = hints->src_addrlen; } } if (copy_dest) { (*info)->dest_addr = mem_dup(hints->dest_addr, hints->dest_addrlen); if (!(*info)->dest_addr) { ret = -FI_ENOMEM; goto err; } (*info)->dest_addrlen = hints->dest_addrlen; } if ((*info)->dest_addr && !(*info)->src_addr) { ret = ofi_get_src_addr((*info)->addr_format, (*info)->dest_addr, (*info)->dest_addrlen, &(*info)->src_addr, &(*info)->src_addrlen); if (ret) { FI_INFO(prov, FI_LOG_CORE, "cannot resolve source address\n"); } } return 0; err: fi_freeinfo(*info); return ret; }
/* returns 0 on success or a negative value that can be stringified with * fi_strerror on error */ static int setup_ep_fixture(struct fid_ep **ep_o) { int ret; struct fi_info *myfi; struct fi_av_attr av_attr; struct fi_cq_attr cq_attr; assert(ep_o != NULL); ret = 0; myfi = fi_dupinfo(fi); if (myfi == NULL) { printf("fi_dupinfo returned NULL\n"); goto fail; } ret = fi_endpoint(domain, myfi, ep_o, NULL); if (ret != 0) { printf("fi_endpoint %s\n", fi_strerror(-ret)); goto fail; } memset(&cq_attr, 0, sizeof cq_attr); cq_attr.format = FI_CQ_FORMAT_CONTEXT; cq_attr.wait_obj = FI_WAIT_NONE; cq_attr.size = TX_CQ_DEPTH; ret = fi_cq_open(domain, &cq_attr, &wcq, /*context=*/NULL); if (ret != 0) { printf("fi_cq_open %s\n", fi_strerror(-ret)); goto fail; } memset(&cq_attr, 0, sizeof cq_attr); cq_attr.format = FI_CQ_FORMAT_CONTEXT; cq_attr.wait_obj = FI_WAIT_NONE; cq_attr.size = RX_CQ_DEPTH; ret = fi_cq_open(domain, &cq_attr, &rcq, /*context=*/NULL); if (ret != 0) { printf("fi_cq_open %s\n", fi_strerror(-ret)); goto fail; } memset(&av_attr, 0, sizeof av_attr); av_attr.type = myfi->domain_attr->av_type ? myfi->domain_attr->av_type : FI_AV_MAP; av_attr.count = 1; av_attr.name = NULL; ret = fi_av_open(domain, &av_attr, &av, NULL); if (ret != 0) { printf("fi_av_open %s\n", fi_strerror(-ret)); goto fail; } ret = fi_ep_bind(*ep_o, &wcq->fid, FI_SEND); if (ret != 0) { printf("fi_ep_bind(wcq) %s\n", fi_strerror(-ret)); goto fail; } ret = fi_ep_bind(*ep_o, &rcq->fid, FI_RECV); if (ret != 0) { printf("fi_ep_bind(rcq) %s\n", fi_strerror(-ret)); goto fail; } ret = fi_ep_bind(*ep_o, &av->fid, 0); if (ret != 0) { printf("fi_ep_bind(av) %s\n", fi_strerror(-ret)); goto fail; } ret = fi_enable(*ep_o); if (ret != 0) { printf("fi_enable %s\n", fi_strerror(-ret)); goto fail; } if (myfi != NULL) { fi_freeinfo(myfi); } return ret; fail: if (myfi != NULL) { fi_freeinfo(myfi); } return teardown_ep_fixture(*ep_o); }
static int fi_ibv_mr_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { struct fi_ibv_mem_desc *md; int fi_ibv_access = 0; struct fid_domain *domain; if (flags) return -FI_EBADFLAGS; if (fid->fclass != FI_CLASS_DOMAIN) { return -FI_EINVAL; } domain = container_of(fid, struct fid_domain, fid); md = calloc(1, sizeof *md); if (!md) return -FI_ENOMEM; md->domain = container_of(domain, struct fi_ibv_domain, domain_fid); md->mr_fid.fid.fclass = FI_CLASS_MR; md->mr_fid.fid.context = context; md->mr_fid.fid.ops = &fi_ibv_mr_ops; /* Enable local write access by default for FI_EP_RDM which hides local * registration requirements. This allows to avoid buffering or double * registration */ if (!(md->domain->info->caps & FI_LOCAL_MR)) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; /* Local read access to an MR is enabled by default in verbs */ if (access & FI_RECV) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; /* iWARP spec requires Remote Write access for an MR that is used * as a data sink for a Remote Read */ if (access & FI_READ) { fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; if (md->domain->verbs->device->transport_type == IBV_TRANSPORT_IWARP) fi_ibv_access |= IBV_ACCESS_REMOTE_WRITE; } if (access & FI_WRITE) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; if (access & FI_REMOTE_READ) fi_ibv_access |= IBV_ACCESS_REMOTE_READ; /* Verbs requires Local Write access too for Remote Write access */ if (access & FI_REMOTE_WRITE) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; md->mr = ibv_reg_mr(md->domain->pd, (void *) buf, len, fi_ibv_access); if (!md->mr) goto err; md->mr_fid.mem_desc = (void *) (uintptr_t) md->mr->lkey; md->mr_fid.key = md->mr->rkey; *mr = &md->mr_fid; if(md->domain->eq && (md->domain->eq_flags & FI_REG_MR)) { struct fi_eq_entry entry = { .fid = &md->mr_fid.fid, .context = context }; fi_ibv_eq_write_event(md->domain->eq, FI_MR_COMPLETE, &entry, sizeof(entry)); } return 0; err: free(md); return -errno; } static int fi_ibv_mr_regv(struct fid *fid, const struct iovec * iov, size_t count, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { if (count > VERBS_MR_IOV_LIMIT) { VERBS_WARN(FI_LOG_FABRIC, "iov count > %d not supported\n", VERBS_MR_IOV_LIMIT); return -FI_EINVAL; } return fi_ibv_mr_reg(fid, iov->iov_base, iov->iov_len, access, offset, requested_key, flags, mr, context); } static int fi_ibv_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr) { return fi_ibv_mr_regv(fid, attr->mr_iov, attr->iov_count, attr->access, 0, attr->requested_key, flags, mr, attr->context); } static int fi_ibv_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags) { struct fi_ibv_domain *domain; struct fi_ibv_eq *eq; domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid); switch (bfid->fclass) { case FI_CLASS_EQ: eq = container_of(bfid, struct fi_ibv_eq, eq_fid); domain->eq = eq; domain->eq_flags = flags; break; default: return -EINVAL; } return 0; } static int fi_ibv_domain_close(fid_t fid) { struct fi_ibv_domain *domain; int ret; domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid); if (domain->rdm) { rdma_destroy_ep(domain->rdm_cm->listener); free(domain->rdm_cm); } if (domain->pd) { ret = ibv_dealloc_pd(domain->pd); if (ret) return -ret; domain->pd = NULL; } fi_freeinfo(domain->info); free(domain); return 0; } static int fi_ibv_open_device_by_name(struct fi_ibv_domain *domain, const char *name) { struct ibv_context **dev_list; int i, ret = -FI_ENODEV; if (!name) return -FI_EINVAL; dev_list = rdma_get_devices(NULL); if (!dev_list) return -errno; for (i = 0; dev_list[i] && ret; i++) { if (domain->rdm) { ret = strncmp(name, ibv_get_device_name(dev_list[i]->device), strlen(name) - strlen(verbs_rdm_domain.suffix)); } else { ret = strcmp(name, ibv_get_device_name(dev_list[i]->device)); } if (!ret) domain->verbs = dev_list[i]; } rdma_free_devices(dev_list); return ret; } static struct fi_ops fi_ibv_fid_ops = { .size = sizeof(struct fi_ops), .close = fi_ibv_domain_close, .bind = fi_ibv_domain_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; static struct fi_ops_mr fi_ibv_domain_mr_ops = { .size = sizeof(struct fi_ops_mr), .reg = fi_ibv_mr_reg, .regv = fi_ibv_mr_regv, .regattr = fi_ibv_mr_regattr, }; static struct fi_ops_domain fi_ibv_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = fi_no_av_open, .cq_open = fi_ibv_cq_open, .endpoint = fi_ibv_open_ep, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_no_cntr_open, .poll_open = fi_no_poll_open, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_ibv_srq_context, }; static struct fi_ops_domain fi_ibv_rdm_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = fi_ibv_rdm_av_open, .cq_open = fi_ibv_rdm_cq_open, .endpoint = fi_ibv_rdm_open_ep, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_rbv_rdm_cntr_open, .poll_open = fi_no_poll_open, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, }; static int fi_ibv_domain(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct fi_ibv_domain *_domain; struct fi_ibv_fabric *fab; struct fi_info *fi; int ret; fi = fi_ibv_get_verbs_info(info->domain_attr->name); if (!fi) return -FI_EINVAL; fab = container_of(fabric, struct fi_ibv_fabric, util_fabric.fabric_fid); ret = ofi_check_domain_attr(&fi_ibv_prov, fabric->api_version, fi->domain_attr, info->domain_attr); if (ret) return ret; _domain = calloc(1, sizeof *_domain); if (!_domain) return -FI_ENOMEM; _domain->info = fi_dupinfo(info); if (!_domain->info) goto err1; _domain->rdm = FI_IBV_EP_TYPE_IS_RDM(info); if (_domain->rdm) { _domain->rdm_cm = calloc(1, sizeof(*_domain->rdm_cm)); if (!_domain->rdm_cm) { ret = -FI_ENOMEM; goto err2; } } ret = fi_ibv_open_device_by_name(_domain, info->domain_attr->name); if (ret) goto err2; _domain->pd = ibv_alloc_pd(_domain->verbs); if (!_domain->pd) { ret = -errno; goto err2; } _domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN; _domain->domain_fid.fid.context = context; _domain->domain_fid.fid.ops = &fi_ibv_fid_ops; _domain->domain_fid.mr = &fi_ibv_domain_mr_ops; if (_domain->rdm) { _domain->domain_fid.ops = &fi_ibv_rdm_domain_ops; _domain->rdm_cm->ec = rdma_create_event_channel(); if (!_domain->rdm_cm->ec) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create listener event channel: %s\n", strerror(errno)); ret = -FI_EOTHER; goto err2; } if (fi_fd_nonblock(_domain->rdm_cm->ec->fd) != 0) { VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno); ret = -FI_EOTHER; goto err3; } if (rdma_create_id(_domain->rdm_cm->ec, &_domain->rdm_cm->listener, NULL, RDMA_PS_TCP)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n", strerror(errno)); ret = -FI_EOTHER; goto err3; } _domain->rdm_cm->is_bound = 0; } else { _domain->domain_fid.ops = &fi_ibv_domain_ops; } _domain->fab = fab; *domain = &_domain->domain_fid; return 0; err3: if (_domain->rdm) rdma_destroy_event_channel(_domain->rdm_cm->ec); err2: if (_domain->rdm) free(_domain->rdm_cm); fi_freeinfo(_domain->info); err1: free(_domain); return ret; } static int fi_ibv_trywait(struct fid_fabric *fabric, struct fid **fids, int count) { struct fi_ibv_cq *cq; int ret, i; for (i = 0; i < count; i++) { switch (fids[i]->fclass) { case FI_CLASS_CQ: cq = container_of(fids[i], struct fi_ibv_cq, cq_fid.fid); ret = cq->trywait(fids[i]); if (ret) return ret; break; case FI_CLASS_EQ: /* We are always ready to wait on an EQ since * rdmacm EQ is based on an fd */ continue; case FI_CLASS_CNTR: case FI_CLASS_WAIT: return -FI_ENOSYS; default: return -FI_EINVAL; } } return FI_SUCCESS; } static int fi_ibv_fabric_close(fid_t fid) { struct fi_ibv_fabric *fab; int ret; fab = container_of(fid, struct fi_ibv_fabric, util_fabric.fabric_fid.fid); ret = ofi_fabric_close(&fab->util_fabric); if (ret) return ret; free(fab); return 0; } static struct fi_ops fi_ibv_fi_ops = { .size = sizeof(struct fi_ops), .close = fi_ibv_fabric_close, .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; static struct fi_ops_fabric fi_ibv_ops_fabric = { .size = sizeof(struct fi_ops_fabric), .domain = fi_ibv_domain, .passive_ep = fi_ibv_passive_ep, .eq_open = fi_ibv_eq_open, .wait_open = fi_no_wait_open, .trywait = fi_ibv_trywait }; int fi_ibv_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context) { struct fi_ibv_fabric *fab; struct fi_info *info; int ret; ret = fi_ibv_init_info(); if (ret) return ret; fab = calloc(1, sizeof(*fab)); if (!fab) return -FI_ENOMEM; for (info = verbs_info; info; info = info->next) { ret = ofi_fabric_init(&fi_ibv_prov, info->fabric_attr, attr, &fab->util_fabric, context); if (ret != -FI_ENODATA) break; } if (ret) { free(fab); return ret; } *fabric = &fab->util_fabric.fabric_fid; (*fabric)->fid.ops = &fi_ibv_fi_ops; (*fabric)->ops = &fi_ibv_ops_fabric; return 0; }
int usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct usdf_fabric *fp; struct usdf_domain *udp; struct sockaddr_in *sin; size_t addrlen; int ret; #if ENABLE_DEBUG char requested[INET_ADDRSTRLEN], actual[INET_ADDRSTRLEN]; #endif USDF_TRACE_SYS(DOMAIN, "\n"); sin = NULL; fp = fab_fidtou(fabric); if (info->domain_attr != NULL) { /* No versioning information available here. */ if (!usdf_domain_checkname(0, fp->fab_dev_attrs, info->domain_attr->name)) { USDF_WARN_SYS(DOMAIN, "domain name mismatch\n"); return -FI_ENODATA; } if (ofi_check_mr_mode(fabric->api_version, OFI_MR_BASIC_MAP | FI_MR_LOCAL, info->domain_attr->mr_mode)) { /* the caller ignored our fi_getinfo results */ USDF_WARN_SYS(DOMAIN, "MR mode (%d) not supported\n", info->domain_attr->mr_mode); return -FI_ENODATA; } } udp = calloc(1, sizeof *udp); if (udp == NULL) { USDF_DBG("unable to alloc mem for domain\n"); ret = -FI_ENOMEM; goto fail; } USDF_DBG("uda_devname=%s\n", fp->fab_dev_attrs->uda_devname); /* * Make sure address format is good and matches this fabric */ switch (info->addr_format) { case FI_SOCKADDR: addrlen = sizeof(struct sockaddr); sin = info->src_addr; break; case FI_SOCKADDR_IN: addrlen = sizeof(struct sockaddr_in); sin = info->src_addr; break; case FI_ADDR_STR: sin = usdf_format_to_sin(info, info->src_addr); goto skip_size_check; default: ret = -FI_EINVAL; goto fail; } if (info->src_addrlen != addrlen) { ret = -FI_EINVAL; goto fail; } skip_size_check: if (sin->sin_family != AF_INET || sin->sin_addr.s_addr != fp->fab_dev_attrs->uda_ipaddr_be) { USDF_DBG_SYS(DOMAIN, "requested src_addr (%s) != fabric addr (%s)\n", inet_ntop(AF_INET, &sin->sin_addr.s_addr, requested, sizeof(requested)), inet_ntop(AF_INET, &fp->fab_dev_attrs->uda_ipaddr_be, actual, sizeof(actual))); ret = -FI_EINVAL; usdf_free_sin_if_needed(info, sin); goto fail; } usdf_free_sin_if_needed(info, sin); ret = usd_open(fp->fab_dev_attrs->uda_devname, &udp->dom_dev); if (ret != 0) { goto fail; } udp->dom_fid.fid.fclass = FI_CLASS_DOMAIN; udp->dom_fid.fid.context = context; udp->dom_fid.fid.ops = &usdf_fid_ops; udp->dom_fid.ops = &usdf_domain_ops; udp->dom_fid.mr = &usdf_domain_mr_ops; ret = pthread_spin_init(&udp->dom_progress_lock, PTHREAD_PROCESS_PRIVATE); if (ret != 0) { ret = -ret; goto fail; } TAILQ_INIT(&udp->dom_tx_ready); TAILQ_INIT(&udp->dom_hcq_list); udp->dom_info = fi_dupinfo(info); if (udp->dom_info == NULL) { ret = -FI_ENOMEM; goto fail; } if (udp->dom_info->dest_addr != NULL) { free(udp->dom_info->dest_addr); udp->dom_info->dest_addr = NULL; } ret = usdf_dom_rdc_alloc_data(udp); if (ret != 0) { goto fail; } udp->dom_fabric = fp; LIST_INSERT_HEAD(&fp->fab_domain_list, udp, dom_link); ofi_atomic_initialize32(&udp->dom_refcnt, 0); ofi_atomic_inc32(&fp->fab_refcnt); *domain = &udp->dom_fid; return 0; fail: if (udp != NULL) { if (udp->dom_info != NULL) { fi_freeinfo(udp->dom_info); } if (udp->dom_dev != NULL) { usd_close(udp->dom_dev); } usdf_dom_rdc_free_data(udp); free(udp); } return ret; }
int fi_bgq_set_default_info() { struct fi_info *fi, *prev_fi; uint32_t ppn = Kernel_ProcessCount(); /* * See: fi_bgq_stx_init() for the number of mu injection fifos * allocated for each tx context. Each rx context uses one * mu injection fifo and one mu reception fifo. */ const unsigned tx_ctx_cnt = (((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / 3) / ppn; /* * The number of rx contexts on a node is the minimum of: * 1. number of mu injection fifos on the node not used by tx contexts * 2. total number mu reception fifos on the node */ const unsigned rx_ctx_cnt = MIN((((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) - (tx_ctx_cnt * ppn)), ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP)) / ppn; fi = fi_dupinfo(NULL); if (!fi) { errno = FI_ENOMEM; return -errno; } fi_bgq_global.info = fi; *fi->tx_attr = (struct fi_tx_attr) { .caps = FI_RMA | FI_ATOMIC | FI_TRANSMIT_COMPLETE, .mode = FI_ASYNC_IOV, .op_flags = FI_TRANSMIT_COMPLETE, .msg_order = FI_ORDER_SAS | FI_ORDER_WAW | FI_ORDER_RAW | FI_ORDER_RAR, .comp_order = FI_ORDER_NONE, .inject_size = FI_BGQ_INJECT_SIZE, .size = FI_BGQ_TX_SIZE, .iov_limit = SIZE_MAX, .rma_iov_limit = 0 }; *fi->rx_attr = (struct fi_rx_attr) { .caps = FI_RMA | FI_ATOMIC | FI_NAMED_RX_CTX, .mode = FI_ASYNC_IOV, .op_flags = 0, .msg_order = 0, .comp_order = FI_ORDER_NONE, .total_buffered_recv = FI_BGQ_TOTAL_BUFFERED_RECV, .size = FI_BGQ_RX_SIZE, .iov_limit = SIZE_MAX }; *fi->ep_attr = (struct fi_ep_attr) { .type = FI_EP_RDM, .protocol = FI_BGQ_PROTOCOL, .protocol_version = FI_BGQ_PROTOCOL_VERSION, .max_msg_size = FI_BGQ_MAX_MSG_SIZE, .msg_prefix_size = FI_BGQ_MAX_PREFIX_SIZE, .max_order_raw_size = FI_BGQ_MAX_ORDER_RAW_SIZE, .max_order_war_size = FI_BGQ_MAX_ORDER_WAR_SIZE, .max_order_waw_size = FI_BGQ_MAX_ORDER_WAW_SIZE, .mem_tag_format = FI_BGQ_MEM_TAG_FORMAT, .tx_ctx_cnt = tx_ctx_cnt, .rx_ctx_cnt = rx_ctx_cnt, }; *fi->domain_attr = (struct fi_domain_attr) { .domain = NULL, .name = NULL, /* TODO: runtime query for name? */ .threading = FI_THREAD_FID, .control_progress = FI_PROGRESS_MANUAL, .data_progress = FI_PROGRESS_AUTO, // + FI_PROGRESS_MANUAL ? .resource_mgmt = FI_RM_DISABLED, .av_type = FI_AV_MAP, .mr_mode = FI_MR_SCALABLE, .mr_key_size = 2, .cq_data_size = 0, .cq_cnt = 128 / ppn, .ep_cnt = SIZE_MAX, .tx_ctx_cnt = tx_ctx_cnt, .rx_ctx_cnt = rx_ctx_cnt, .max_ep_tx_ctx = ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / ppn / 2, .max_ep_rx_ctx = ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP) / ppn, .max_ep_stx_ctx = ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / ppn / 2, .max_ep_srx_ctx = 0 }; *fi->fabric_attr = (struct fi_fabric_attr) { .fabric = NULL, .name = strdup(FI_BGQ_FABRIC_NAME), .prov_name = strdup(FI_BGQ_PROVIDER_NAME), .prov_version = FI_BGQ_PROVIDER_VERSION }; fi->caps = FI_RMA | FI_ATOMIC | FI_NAMED_RX_CTX | FI_TRANSMIT_COMPLETE; fi->mode = FI_ASYNC_IOV; fi->addr_format = FI_ADDR_BGQ; fi->src_addrlen = 24; // includes null fi->dest_addrlen = 24; // includes null prev_fi = fi; fi = fi_dupinfo(prev_fi); prev_fi->next = fi; return 0; }