/* Catch the version changes for domain_attr. */ int usdf_catch_dom_attr(uint32_t version, const struct fi_info *hints, struct fi_domain_attr *dom_attr) { /* version 1.5 introduced new bits. If the user asked for older * version, we can't return these new bits. */ if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { /* We checked mr_mode compatibility before calling * this function. This means it is safe to return * 1.4 default mr_mode. */ dom_attr->mr_mode = FI_MR_BASIC; /* FI_REMOTE_COMM is introduced in 1.5. So don't return it. */ dom_attr->caps &= ~FI_REMOTE_COMM; /* If FI_REMOTE_COMM is given for version < 1.5, fail. */ if (hints && hints->domain_attr) { if (hints->domain_attr->caps == FI_REMOTE_COMM) return -FI_EBADFLAGS; } } return FI_SUCCESS; }
static void fi_alter_domain_attr(struct fi_domain_attr *attr, const struct fi_domain_attr *hints, uint64_t info_caps, uint32_t api_version) { int hints_mr_mode; hints_mr_mode = hints ? hints->mr_mode : 0; if (hints_mr_mode & (FI_MR_BASIC | FI_MR_SCALABLE)) { attr->mr_mode = hints_mr_mode; } else if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) { attr->mr_mode = (attr->mr_mode && attr->mr_mode != FI_MR_SCALABLE) ? FI_MR_BASIC : FI_MR_SCALABLE; } else { if ((hints_mr_mode & attr->mr_mode) != attr->mr_mode) { attr->mr_mode = ofi_cap_mr_mode(info_caps, attr->mr_mode & hints_mr_mode); } } attr->caps = ofi_get_caps(info_caps, hints ? hints->caps : 0, attr->caps); if (!hints) return; if (hints->threading) attr->threading = hints->threading; if (hints->control_progress) attr->control_progress = hints->control_progress; if (hints->data_progress) attr->data_progress = hints->data_progress; if (hints->av_type) attr->av_type = hints->av_type; }
int DEFAULT_SYMVER_PRE(fi_getinfo)(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { struct fi_prov *prov; struct fi_info *tail, *cur; int ret; if (!init) fi_ini(); if (FI_VERSION_LT(fi_version(), version)) { FI_WARN(&core_prov, FI_LOG_CORE, "Requested version is newer than library\n"); return -FI_ENOSYS; } if (flags == FI_PROV_ATTR_ONLY) { return fi_getprovinfo(info); } *info = tail = NULL; for (prov = prov_head; prov; prov = prov->next) { if (!prov->provider->getinfo) continue; if (hints && hints->fabric_attr && hints->fabric_attr->prov_name && strcasecmp(prov->provider->name, hints->fabric_attr->prov_name)) continue; ret = prov->provider->getinfo(version, node, service, flags, hints, &cur); if (ret) { FI_WARN(&core_prov, FI_LOG_CORE, "fi_getinfo: provider %s returned -%d (%s)\n", prov->provider->name, -ret, fi_strerror(-ret)); continue; } if (!*info) *info = cur; else tail->next = cur; for (tail = cur; tail->next; tail = tail->next) { if (tail->fabric_attr->prov_name != NULL) FI_WARN(&core_prov, FI_LOG_CORE, "prov_name field is not NULL (%s)\n", tail->fabric_attr->prov_name); tail->fabric_attr->prov_name = strdup(prov->provider->name); tail->fabric_attr->prov_version = prov->provider->version; } if (tail->fabric_attr->prov_name != NULL) FI_WARN(&core_prov, FI_LOG_CORE, "prov_name field is not NULL (%s)\n", tail->fabric_attr->prov_name); tail->fabric_attr->prov_name = strdup(prov->provider->name); tail->fabric_attr->prov_version = prov->provider->version; } return *info ? 0 : -FI_ENODATA; }
/* * Alter the returned fi_info based on the user hints. We assume that * the hints have been validated and the starting fi_info is properly * configured by the provider. */ void ofi_alter_info(struct fi_info *info, const struct fi_info *hints, uint32_t api_version) { if (!hints) return; for (; info; info = info->next) { /* This should stay before call to fi_alter_domain_attr as * the checks depend on unmodified provider mr_mode attr */ info->caps = ofi_get_info_caps(info, hints, api_version); if ((info->domain_attr->mr_mode & FI_MR_LOCAL) && (FI_VERSION_LT(api_version, FI_VERSION(1, 5)) || (hints && hints->domain_attr && (hints->domain_attr->mr_mode & (FI_MR_BASIC | FI_MR_SCALABLE))))) info->mode |= FI_LOCAL_MR; info->handle = hints->handle; fi_alter_domain_attr(info->domain_attr, hints->domain_attr, info->caps, api_version); fi_alter_ep_attr(info->ep_attr, hints->ep_attr, info->caps); fi_alter_rx_attr(info->rx_attr, hints->rx_attr, info->caps); fi_alter_tx_attr(info->tx_attr, hints->tx_attr, info->caps); } }
static uint64_t ofi_get_info_caps(const struct fi_info *prov_info, const struct fi_info *user_info, uint32_t api_version) { int prov_mode, user_mode; uint64_t caps; assert(user_info); caps = ofi_get_caps(prov_info->caps, user_info->caps, prov_info->caps); prov_mode = prov_info->domain_attr->mr_mode; if (!ofi_rma_target_allowed(caps) || !(prov_mode & OFI_MR_MODE_RMA_TARGET)) return caps; if (!user_info->domain_attr) goto trim_caps; user_mode = user_info->domain_attr->mr_mode; if ((FI_VERSION_LT(api_version, FI_VERSION(1,5)) && (user_mode == FI_MR_UNSPEC)) || (user_mode == FI_MR_BASIC) || ((user_mode & prov_mode & OFI_MR_MODE_RMA_TARGET) == (prov_mode & OFI_MR_MODE_RMA_TARGET))) return caps; trim_caps: return caps & ~(FI_REMOTE_WRITE | FI_REMOTE_READ); }
/* A wrapper function to core utility function to check mr_mode bits. * We need to check some more things for backward compatibility. */ int usdf_check_mr_mode(uint32_t version, const struct fi_info *hints, uint64_t prov_mode) { int ret; ret = ofi_check_mr_mode(version, prov_mode, hints->domain_attr->mr_mode); /* If ofi_check_mr_mode fails. */ if (ret) { /* Is it because the user give 0 as mr_mode? */ if (hints->domain_attr->mr_mode == 0) { if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { /* If the version is < 1.5, it is ok. * We let this slide and catch it later on. */ return FI_SUCCESS; } else if (hints->mode & FI_LOCAL_MR) { /* If version is >= 1.5, we check fi_info mode * for FI_LOCAL_MR for backward compatibility. */ return FI_SUCCESS; } } } return ret; }
/* Catch the version changes for rx_attr. */ int usdf_catch_rx_attr(uint32_t version, const struct fi_rx_attr *rx_attr) { /* In version < 1.5, FI_LOCAL_MR is required. */ if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { if ((rx_attr->mode & FI_LOCAL_MR) == 0) return -FI_ENODATA; } return FI_SUCCESS; }
int rxm_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { int ret; struct rxm_domain *rxm_domain; struct rxm_fabric *rxm_fabric; struct fi_info *msg_info; rxm_domain = calloc(1, sizeof(*rxm_domain)); if (!rxm_domain) return -FI_ENOMEM; rxm_fabric = container_of(fabric, struct rxm_fabric, util_fabric.fabric_fid); ret = ofi_get_core_info(fabric->api_version, NULL, NULL, 0, &rxm_util_prov, info, rxm_info_to_core, &msg_info); if (ret) goto err1; /* Force core provider to supply MR key */ if (FI_VERSION_LT(fabric->api_version, FI_VERSION(1, 5))) msg_info->domain_attr->mr_mode = FI_MR_BASIC; else msg_info->domain_attr->mr_mode |= FI_MR_PROV_KEY; ret = fi_domain(rxm_fabric->msg_fabric, msg_info, &rxm_domain->msg_domain, context); if (ret) goto err2; ret = ofi_domain_init(fabric, info, &rxm_domain->util_domain, context); if (ret) { goto err3; } *domain = &rxm_domain->util_domain.domain_fid; (*domain)->fid.ops = &rxm_domain_fi_ops; /* Replace MR ops set by ofi_domain_init() */ (*domain)->mr = &rxm_domain_mr_ops; (*domain)->ops = &rxm_domain_ops; rxm_domain->mr_local = OFI_CHECK_MR_LOCAL(msg_info->domain_attr->mr_mode) && !OFI_CHECK_MR_LOCAL(info->domain_attr->mr_mode); fi_freeinfo(msg_info); return 0; err3: fi_close(&rxm_domain->msg_domain->fid); err2: fi_freeinfo(msg_info); err1: free(rxm_domain); return ret; }
static int rxm_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { struct fi_info *cur, *dup; int ret; ret = ofix_getinfo(version, node, service, flags, &rxm_util_prov, hints, rxm_info_to_core, rxm_info_to_rxm, info); if (ret) return ret; /* If app supports FI_MR_LOCAL, prioritize requiring it for * better performance. */ if (hints && hints->domain_attr && (RXM_MR_LOCAL(hints))) { for (cur = *info; cur; cur = cur->next) { if (!RXM_MR_LOCAL(cur)) continue; if (!(dup = fi_dupinfo(cur))) { fi_freeinfo(*info); return -FI_ENOMEM; } if (FI_VERSION_LT(version, FI_VERSION(1, 5))) dup->mode &= ~FI_LOCAL_MR; else dup->domain_attr->mr_mode &= ~FI_MR_LOCAL; dup->next = cur->next; cur->next = dup; cur = dup; } } else { for (cur = *info; cur; cur = cur->next) { if (FI_VERSION_LT(version, FI_VERSION(1, 5))) cur->mode &= ~FI_LOCAL_MR; else cur->domain_attr->mr_mode &= ~FI_MR_LOCAL; } } return 0; }
/* * Providers should set v1.0 registration modes (FI_MR_BASIC and * FI_MR_SCALABLE) that they support, along with all required modes. */ int ofi_check_mr_mode(const struct fi_provider *prov, uint32_t api_version, int prov_mode, const struct fi_info *user_info) { int user_mode = user_info->domain_attr->mr_mode; int ret = -FI_ENODATA; if ((prov_mode & FI_MR_LOCAL) && !((user_info->mode & FI_LOCAL_MR) || (user_mode & FI_MR_LOCAL))) goto out; if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) { switch (user_mode) { case FI_MR_UNSPEC: if (!(prov_mode & (FI_MR_SCALABLE | FI_MR_BASIC))) goto out; break; case FI_MR_BASIC: if (!(prov_mode & FI_MR_BASIC)) goto out; break; case FI_MR_SCALABLE: if (!(prov_mode & FI_MR_SCALABLE)) goto out; break; default: goto out; } } else { if (user_mode & FI_MR_BASIC) { if ((user_mode & ~FI_MR_BASIC) || !(prov_mode & FI_MR_BASIC)) goto out; } else if (user_mode & FI_MR_SCALABLE) { if ((user_mode & ~FI_MR_SCALABLE) || !(prov_mode & FI_MR_SCALABLE)) goto out; } else { prov_mode = ofi_cap_mr_mode(user_info->caps, prov_mode); if ((user_mode & prov_mode) != prov_mode) goto out; } } ret = 0; out: if (ret) { FI_INFO(prov, FI_LOG_CORE, "Invalid memory registration mode\n"); FI_INFO_MR_MODE(prov, prov_mode, user_mode); } return ret; }
static int ofi_info_to_util(uint32_t version, const struct fi_provider *prov, struct fi_info *core_info, ofi_alter_info_t info_to_util, struct fi_info **util_info) { if (!(*util_info = fi_allocinfo())) return -FI_ENOMEM; if (info_to_util(version, core_info, *util_info)) goto err; if (ofi_dup_addr(core_info, *util_info)) goto err; /* Release 1.4 brought standardized domain names across IP based * providers. Before this release, the usNIC provider would return a * NULL domain name from fi_getinfo. For compatibility reasons, allow a * NULL domain name when apps are requesting version < 1.4. */ assert(FI_VERSION_LT(1, 4) || core_info->domain_attr->name); if (core_info->domain_attr->name) { (*util_info)->domain_attr->name = strdup(core_info->domain_attr->name); if (!(*util_info)->domain_attr->name) { FI_WARN(prov, FI_LOG_FABRIC, "Unable to allocate domain name\n"); goto err; } } (*util_info)->fabric_attr->name = strdup(core_info->fabric_attr->name); if (!(*util_info)->fabric_attr->name) { FI_WARN(prov, FI_LOG_FABRIC, "Unable to allocate fabric name\n"); goto err; } (*util_info)->fabric_attr->prov_name = strdup(core_info->fabric_attr-> prov_name); if (!(*util_info)->fabric_attr->prov_name) { FI_WARN(prov, FI_LOG_FABRIC, "Unable to allocate fabric name\n"); goto err; } return 0; err: fi_freeinfo(*util_info); return -FI_ENOMEM; }
int ofi_check_fabric_attr(const struct fi_provider *prov, const struct fi_fabric_attr *prov_attr, const struct fi_fabric_attr *user_attr) { /* Provider names are checked by the framework */ if (user_attr->prov_version > prov_attr->prov_version) { FI_INFO(prov, FI_LOG_CORE, "Unsupported provider version\n"); return -FI_ENODATA; } if (FI_VERSION_LT(user_attr->api_version, prov_attr->api_version)) { FI_INFO(prov, FI_LOG_CORE, "Unsupported api version\n"); return -FI_ENODATA; } return 0; }
static void _setup(uint32_t version) { int ret; hints = fi_allocinfo(); cr_assert(hints, "fi_allocinfo"); if (FI_VERSION_LT(version, FI_VERSION(1, 5))) hints->domain_attr->mr_mode = FI_MR_BASIC; else hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; hints->fabric_attr->prov_name = strdup("gni"); ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); cr_assert(!ret, "fi_getinfo"); ret = fi_fabric(fi->fabric_attr, &fab, NULL); cr_assert(!ret, "fi_fabric"); ret = fi_domain(fab, fi, &dom, NULL); cr_assert(!ret, "fi_domain"); }
/* * - Support FI_MR_LOCAL/FI_LOCAL_MR as ofi_rxm can handle it. * - The RxM FI_RMA implementation is pass-through but the provider can handle * FI_MR_PROV_KEY and FI_MR_VIRT_ADDR in its large message transfer rendezvous * protocol. * - fi_alter_domain_attr should correctly set the mr_mode in return fi_info * based on hints. */ void rxm_info_to_core_mr_modes(uint32_t version, const struct fi_info *hints, struct fi_info *core_info) { /* We handle FI_MR_BASIC and FI_MR_SCALABLE irrespective of version */ if (hints && hints->domain_attr && (hints->domain_attr->mr_mode & (FI_MR_SCALABLE | FI_MR_BASIC))) { core_info->mode = FI_LOCAL_MR; core_info->domain_attr->mr_mode = hints->domain_attr->mr_mode; } else if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { core_info->mode |= FI_LOCAL_MR; /* Specify FI_MR_UNSPEC (instead of FI_MR_BASIC) so that * providers that support only FI_MR_SCALABLE aren't dropped */ core_info->domain_attr->mr_mode = FI_MR_UNSPEC; } else { core_info->domain_attr->mr_mode |= FI_MR_LOCAL; if (!hints || !ofi_rma_target_allowed(hints->caps)) core_info->domain_attr->mr_mode |= OFI_MR_BASIC_MAP; else if (hints->domain_attr) core_info->domain_attr->mr_mode |= hints->domain_attr->mr_mode & OFI_MR_BASIC_MAP; } }
static void sock_set_domain_attr(uint32_t api_version, void *src_addr, const struct fi_domain_attr *hint_attr, struct fi_domain_attr *attr) { struct sock_domain *domain; domain = sock_dom_list_head(); attr->domain = domain ? &domain->dom_fid : NULL; if (!hint_attr) { *attr = sock_domain_attr; if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) attr->mr_mode = FI_MR_SCALABLE; goto out; } if (hint_attr->domain) { domain = container_of(hint_attr->domain, struct sock_domain, dom_fid); *attr = domain->attr; attr->domain = hint_attr->domain; goto out; }
static int validate_modebits(uint32_t version, const struct fi_info *hints, uint64_t supported, uint64_t *mode_out) { uint64_t mode; /* If there is no hints, return everything we supported. */ if (!hints) { *mode_out = supported; return FI_SUCCESS; } mode = hints->mode & supported; /* Before version 1.5, FI_LOCAL_MR is a requirement. */ if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { if ((mode & FI_LOCAL_MR) == 0) return -FI_ENODATA; } *mode_out = mode; return FI_SUCCESS; }
int DEFAULT_SYMVER_PRE(fi_getinfo)(uint32_t version, const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct fi_info **info) { struct ofi_prov *prov; struct fi_info *tail, *cur; char **prov_vec = NULL; size_t count = 0; int ret; if (!ofi_init) fi_ini(); if (FI_VERSION_LT(fi_version(), version)) { FI_WARN(&core_prov, FI_LOG_CORE, "Requested version is newer than library\n"); return -FI_ENOSYS; } if (flags == FI_PROV_ATTR_ONLY) { return ofi_getprovinfo(info); } if (hints && hints->fabric_attr && hints->fabric_attr->prov_name) { prov_vec = ofi_split_and_alloc(hints->fabric_attr->prov_name, ";", &count); if (!prov_vec) return -FI_ENOMEM; FI_DBG(&core_prov, FI_LOG_CORE, "hints prov_name: %s\n", hints->fabric_attr->prov_name); } *info = tail = NULL; for (prov = prov_head; prov; prov = prov->next) { if (!prov->provider) continue; if (!ofi_layering_ok(prov->provider, prov_vec, count, flags)) continue; if (FI_VERSION_LT(prov->provider->fi_version, version)) { FI_WARN(&core_prov, FI_LOG_CORE, "Provider %s fi_version %d.%d < requested %d.%d\n", prov->provider->name, FI_MAJOR(prov->provider->fi_version), FI_MINOR(prov->provider->fi_version), FI_MAJOR(version), FI_MINOR(version)); continue; } ret = prov->provider->getinfo(version, node, service, flags, hints, &cur); if (ret) { FI_WARN(&core_prov, FI_LOG_CORE, "fi_getinfo: provider %s returned -%d (%s)\n", prov->provider->name, -ret, fi_strerror(-ret)); continue; } if (!cur) { FI_WARN(&core_prov, FI_LOG_CORE, "fi_getinfo: provider %s output empty list\n", prov->provider->name); continue; } if (!*info) *info = cur; else tail->next = cur; for (tail = cur; tail->next; tail = tail->next) { ofi_set_prov_attr(tail->fabric_attr, prov->provider); tail->fabric_attr->api_version = version; } ofi_set_prov_attr(tail->fabric_attr, prov->provider); tail->fabric_attr->api_version = version; } ofi_free_string_array(prov_vec); return *info ? 0 : -FI_ENODATA; }
int ofi_check_domain_attr(const struct fi_provider *prov, uint32_t api_version, const struct fi_domain_attr *prov_attr, const struct fi_info *user_info) { const struct fi_domain_attr *user_attr = user_info->domain_attr; if (prov_attr->name && user_attr->name && strcasecmp(user_attr->name, prov_attr->name)) { FI_INFO(prov, FI_LOG_CORE, "Unknown domain name\n"); FI_INFO_NAME(prov, prov_attr, user_attr); return -FI_ENODATA; } if (fi_thread_level(user_attr->threading) < fi_thread_level(prov_attr->threading)) { FI_INFO(prov, FI_LOG_CORE, "Invalid threading model\n"); return -FI_ENODATA; } if (fi_progress_level(user_attr->control_progress) < fi_progress_level(prov_attr->control_progress)) { FI_INFO(prov, FI_LOG_CORE, "Invalid control progress model\n"); return -FI_ENODATA; } if (fi_progress_level(user_attr->data_progress) < fi_progress_level(prov_attr->data_progress)) { FI_INFO(prov, FI_LOG_CORE, "Invalid data progress model\n"); return -FI_ENODATA; } if (fi_resource_mgmt_level(user_attr->resource_mgmt) < fi_resource_mgmt_level(prov_attr->resource_mgmt)) { FI_INFO(prov, FI_LOG_CORE, "Invalid resource mgmt model\n"); return -FI_ENODATA; } if ((prov_attr->av_type != FI_AV_UNSPEC) && (user_attr->av_type != FI_AV_UNSPEC) && (prov_attr->av_type != user_attr->av_type)) { FI_INFO(prov, FI_LOG_CORE, "Invalid AV type\n"); return -FI_ENODATA; } if (user_attr->cq_data_size > prov_attr->cq_data_size) { FI_INFO(prov, FI_LOG_CORE, "CQ data size too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, cq_data_size); return -FI_ENODATA; } if (ofi_check_mr_mode(prov, api_version, prov_attr->mr_mode, user_info)) return -FI_ENODATA; if (user_attr->max_ep_stx_ctx > prov_attr->max_ep_stx_ctx) { FI_INFO(prov, FI_LOG_CORE, "max_ep_stx_ctx greater than supported\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_ep_stx_ctx); } if (user_attr->max_ep_srx_ctx > prov_attr->max_ep_srx_ctx) { FI_INFO(prov, FI_LOG_CORE, "max_ep_srx_ctx greater than supported\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_ep_srx_ctx); } /* following checks only apply to api 1.5 and beyond */ if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) return 0; if (user_attr->cntr_cnt > prov_attr->cntr_cnt) { FI_INFO(prov, FI_LOG_CORE, "Cntr count too large\n"); return -FI_ENODATA; } if (user_attr->mr_iov_limit > prov_attr->mr_iov_limit) { FI_INFO(prov, FI_LOG_CORE, "MR iov limit too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, mr_iov_limit); return -FI_ENODATA; } if (user_attr->caps & ~(prov_attr->caps)) { FI_INFO(prov, FI_LOG_CORE, "Requested domain caps not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, caps, FI_TYPE_CAPS); return -FI_ENODATA; } if ((user_attr->mode & prov_attr->mode) != prov_attr->mode) { FI_INFO(prov, FI_LOG_CORE, "Required domain mode missing\n"); FI_INFO_MODE(prov, prov_attr->mode, user_attr->mode); return -FI_ENODATA; } if (user_attr->max_err_data > prov_attr->max_err_data) { FI_INFO(prov, FI_LOG_CORE, "Max err data too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_err_data); return -FI_ENODATA; } if (user_attr->mr_cnt > prov_attr->mr_cnt) { FI_INFO(prov, FI_LOG_CORE, "MR count too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, mr_cnt); return -FI_ENODATA; } return 0; }
static int gnix_cq_set_wait(struct gnix_fid_cq *cq) { int ret = FI_SUCCESS; GNIX_TRACE(FI_LOG_CQ, "\n"); struct fi_wait_attr requested = { .wait_obj = cq->attr.wait_obj, .flags = 0 }; switch (cq->attr.wait_obj) { case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: ret = gnix_wait_open(&cq->domain->fabric->fab_fid, &requested, &cq->wait); break; case FI_WAIT_SET: ret = _gnix_wait_set_add(cq->attr.wait_set, &cq->cq_fid.fid); if (!ret) cq->wait = cq->attr.wait_set; break; default: break; } return ret; } static void free_cq_entry(struct slist_entry *item) { struct gnix_cq_entry *entry; entry = container_of(item, struct gnix_cq_entry, item); free(entry->the_entry); free(entry); } static struct slist_entry *alloc_cq_entry(size_t size) { struct gnix_cq_entry *entry = malloc(sizeof(*entry)); if (!entry) { GNIX_DEBUG(FI_LOG_CQ, "out of memory\n"); goto err; } entry->the_entry = malloc(size); if (!entry->the_entry) { GNIX_DEBUG(FI_LOG_CQ, "out of memory\n"); goto cleanup; } return &entry->item; cleanup: free(entry); err: return NULL; } static int __gnix_cq_progress(struct gnix_fid_cq *cq) { return _gnix_prog_progress(&cq->pset); } /******************************************************************************* * Exposed helper functions ******************************************************************************/ ssize_t _gnix_cq_add_event(struct gnix_fid_cq *cq, struct gnix_fid_ep *ep, void *op_context, uint64_t flags, size_t len, void *buf, uint64_t data, uint64_t tag, fi_addr_t src_addr) { struct gnix_cq_entry *event; struct slist_entry *item; uint64_t mask; ssize_t ret = FI_SUCCESS; if (ep) { if (ep->info && ep->info->mode & FI_NOTIFY_FLAGS_ONLY) { mask = (FI_REMOTE_CQ_DATA | FI_MULTI_RECV); if (flags & FI_RMA_EVENT) { mask |= (FI_REMOTE_READ | FI_REMOTE_WRITE | FI_RMA); } flags &= mask; } } COND_ACQUIRE(cq->requires_lock, &cq->lock); item = _gnix_queue_get_free(cq->events); if (!item) { GNIX_DEBUG(FI_LOG_CQ, "error creating cq_entry\n"); ret = -FI_ENOMEM; goto err; } event = container_of(item, struct gnix_cq_entry, item); assert(event->the_entry); fill_function[cq->attr.format](event->the_entry, op_context, flags, len, buf, data, tag); event->src_addr = src_addr; _gnix_queue_enqueue(cq->events, &event->item); GNIX_DEBUG(FI_LOG_CQ, "Added event: %lx\n", op_context); if (cq->wait) _gnix_signal_wait_obj(cq->wait); err: COND_RELEASE(cq->requires_lock, &cq->lock); return ret; } ssize_t _gnix_cq_add_error(struct gnix_fid_cq *cq, void *op_context, uint64_t flags, size_t len, void *buf, uint64_t data, uint64_t tag, size_t olen, int err, int prov_errno, void *err_data, size_t err_data_size) { struct fi_cq_err_entry *error; struct gnix_cq_entry *event; struct slist_entry *item; ssize_t ret = FI_SUCCESS; GNIX_INFO(FI_LOG_CQ, "creating error event entry\n"); COND_ACQUIRE(cq->requires_lock, &cq->lock); item = _gnix_queue_get_free(cq->errors); if (!item) { GNIX_WARN(FI_LOG_CQ, "error creating error entry\n"); ret = -FI_ENOMEM; goto err; } event = container_of(item, struct gnix_cq_entry, item); error = event->the_entry; error->op_context = op_context; error->flags = flags; error->len = len; error->buf = buf; error->data = data; error->tag = tag; error->olen = olen; error->err = err; error->prov_errno = prov_errno; error->err_data = err_data; error->err_data_size = err_data_size; _gnix_queue_enqueue(cq->errors, &event->item); if (cq->wait) _gnix_signal_wait_obj(cq->wait); err: COND_RELEASE(cq->requires_lock, &cq->lock); return ret; } int _gnix_cq_poll_obj_add(struct gnix_fid_cq *cq, void *obj, int (*prog_fn)(void *data)) { return _gnix_prog_obj_add(&cq->pset, obj, prog_fn); } int _gnix_cq_poll_obj_rem(struct gnix_fid_cq *cq, void *obj, int (*prog_fn)(void *data)) { return _gnix_prog_obj_rem(&cq->pset, obj, prog_fn); } static void __cq_destruct(void *obj) { struct gnix_fid_cq *cq = (struct gnix_fid_cq *) obj; _gnix_ref_put(cq->domain); switch (cq->attr.wait_obj) { case FI_WAIT_NONE: break; case FI_WAIT_SET: _gnix_wait_set_remove(cq->wait, &cq->cq_fid.fid); break; case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: assert(cq->wait); gnix_wait_close(&cq->wait->fid); break; default: GNIX_WARN(FI_LOG_CQ, "format: %d unsupported.\n", cq->attr.wait_obj); break; } _gnix_prog_fini(&cq->pset); _gnix_queue_destroy(cq->events); _gnix_queue_destroy(cq->errors); fastlock_destroy(&cq->lock); free(cq->cq_fid.ops); free(cq->cq_fid.fid.ops); free(cq); } /******************************************************************************* * API functions. ******************************************************************************/ static int gnix_cq_close(fid_t fid) { struct gnix_fid_cq *cq; int references_held; GNIX_TRACE(FI_LOG_CQ, "\n"); cq = container_of(fid, struct gnix_fid_cq, cq_fid); references_held = _gnix_ref_put(cq); if (references_held) { GNIX_INFO(FI_LOG_CQ, "failed to fully close cq due to lingering " "references. references=%i cq=%p\n", references_held, cq); } return FI_SUCCESS; } static ssize_t __gnix_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr) { struct gnix_fid_cq *cq_priv; struct gnix_cq_entry *event; struct slist_entry *temp; ssize_t read_count = 0; if (!cq || !buf || !count) return -FI_EINVAL; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); __gnix_cq_progress(cq_priv); if (_gnix_queue_peek(cq_priv->errors)) return -FI_EAVAIL; COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock); while (_gnix_queue_peek(cq_priv->events) && count--) { temp = _gnix_queue_dequeue(cq_priv->events); event = container_of(temp, struct gnix_cq_entry, item); assert(event->the_entry); memcpy(buf, event->the_entry, cq_priv->entry_size); if (src_addr) memcpy(&src_addr[read_count], &event->src_addr, sizeof(fi_addr_t)); _gnix_queue_enqueue_free(cq_priv->events, &event->item); buf = (void *) ((uint8_t *) buf + cq_priv->entry_size); read_count++; } COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock); return read_count ?: -FI_EAGAIN; } static ssize_t __gnix_cq_sreadfrom(int blocking, struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr, const void *cond, int timeout) { struct gnix_fid_cq *cq_priv; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); if ((blocking && !cq_priv->wait) || (blocking && cq_priv->attr.wait_obj == FI_WAIT_SET)) return -FI_EINVAL; if (_gnix_queue_peek(cq_priv->errors)) return -FI_EAVAIL; if (cq_priv->wait) gnix_wait_wait((struct fid_wait *)cq_priv->wait, timeout); return __gnix_cq_readfrom(cq, buf, count, src_addr); } DIRECT_FN STATIC ssize_t gnix_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr, const void *cond, int timeout) { return __gnix_cq_sreadfrom(1, cq, buf, count, src_addr, cond, timeout); } DIRECT_FN STATIC ssize_t gnix_cq_read(struct fid_cq *cq, void *buf, size_t count) { return __gnix_cq_sreadfrom(0, cq, buf, count, NULL, NULL, 0); } DIRECT_FN STATIC ssize_t gnix_cq_sread(struct fid_cq *cq, void *buf, size_t count, const void *cond, int timeout) { return __gnix_cq_sreadfrom(1, cq, buf, count, NULL, cond, timeout); } DIRECT_FN STATIC ssize_t gnix_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr) { return __gnix_cq_sreadfrom(0, cq, buf, count, src_addr, NULL, 0); } DIRECT_FN STATIC ssize_t gnix_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, uint64_t flags) { struct gnix_fid_cq *cq_priv; struct gnix_cq_entry *event; struct slist_entry *entry; size_t err_data_cpylen; struct fi_cq_err_entry *gnix_cq_err; ssize_t read_count = 0; if (!cq || !buf) return -FI_EINVAL; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); /* * we need to progress cq. some apps may be only using * cq to check for errors. */ _gnix_prog_progress(&cq_priv->pset); COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock); entry = _gnix_queue_dequeue(cq_priv->errors); if (!entry) { read_count = -FI_EAGAIN; goto err; } event = container_of(entry, struct gnix_cq_entry, item); gnix_cq_err = event->the_entry; buf->op_context = gnix_cq_err->op_context; buf->flags = gnix_cq_err->flags; buf->len = gnix_cq_err->len; buf->buf = gnix_cq_err->buf; buf->data = gnix_cq_err->data; buf->tag = gnix_cq_err->tag; buf->olen = gnix_cq_err->olen; buf->err = gnix_cq_err->err; buf->prov_errno = gnix_cq_err->prov_errno; if (gnix_cq_err->err_data != NULL) { /* * Note: If the api version is >= 1.5 then copy err_data into * buf->err_data and copy at most buf->err_data_size. * If buf->err_data_size is zero or the api version is < 1.5, * use the old method of allocating space in provider. */ if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version, FI_VERSION(1, 5)) || buf->err_data_size == 0) { err_data_cpylen = sizeof(cq_priv->err_data); memcpy(cq_priv->err_data, gnix_cq_err->err_data, err_data_cpylen); buf->err_data = cq_priv->err_data; } else { if (buf->err_data == NULL) return -FI_EINVAL; err_data_cpylen = MIN(buf->err_data_size, gnix_cq_err->err_data_size); memcpy(buf->err_data, gnix_cq_err->err_data, err_data_cpylen); buf->err_data_size = err_data_cpylen; } free(gnix_cq_err->err_data); gnix_cq_err->err_data = NULL; } else { if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version, FI_VERSION(1, 5))) { buf->err_data = NULL; } else { buf->err_data_size = 0; } } _gnix_queue_enqueue_free(cq_priv->errors, &event->item); read_count++; err: COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock); return read_count; } DIRECT_FN STATIC const char *gnix_cq_strerror(struct fid_cq *cq, int prov_errno, const void *prov_data, char *buf, size_t len) { return NULL; } DIRECT_FN STATIC int gnix_cq_signal(struct fid_cq *cq) { struct gnix_fid_cq *cq_priv; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); if (cq_priv->wait) _gnix_signal_wait_obj(cq_priv->wait); return FI_SUCCESS; } static int gnix_cq_control(struct fid *cq, int command, void *arg) { switch (command) { case FI_GETWAIT: return -FI_ENOSYS; default: return -FI_EINVAL; } } DIRECT_FN int gnix_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, struct fid_cq **cq, void *context) { struct gnix_fid_domain *domain_priv; struct gnix_fid_cq *cq_priv; struct fi_ops_cq *cq_ops; struct fi_ops *fi_cq_ops; int ret = FI_SUCCESS; GNIX_TRACE(FI_LOG_CQ, "\n"); cq_ops = calloc(1, sizeof(*cq_ops)); if (!cq_ops) { return -FI_ENOMEM; } fi_cq_ops = calloc(1, sizeof(*fi_cq_ops)); if (!fi_cq_ops) { ret = -FI_ENOMEM; goto free_cq_ops; } *cq_ops = gnix_cq_ops; *fi_cq_ops = gnix_cq_fi_ops; ret = verify_cq_attr(attr, cq_ops, fi_cq_ops); if (ret) goto free_fi_cq_ops; domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); if (!domain_priv) { ret = -FI_EINVAL; goto free_fi_cq_ops; } cq_priv = calloc(1, sizeof(*cq_priv)); if (!cq_priv) { ret = -FI_ENOMEM; goto free_fi_cq_ops; } cq_priv->requires_lock = (domain_priv->thread_model != FI_THREAD_COMPLETION); cq_priv->domain = domain_priv; cq_priv->attr = *attr; _gnix_ref_init(&cq_priv->ref_cnt, 1, __cq_destruct); _gnix_ref_get(cq_priv->domain); _gnix_prog_init(&cq_priv->pset); cq_priv->cq_fid.fid.fclass = FI_CLASS_CQ; cq_priv->cq_fid.fid.context = context; cq_priv->cq_fid.fid.ops = fi_cq_ops; cq_priv->cq_fid.ops = cq_ops; /* * Although we don't need to store entry_size since we're already * storing the format, this might provide a performance benefit * when allocating storage. */ cq_priv->entry_size = format_sizes[cq_priv->attr.format]; fastlock_init(&cq_priv->lock); ret = gnix_cq_set_wait(cq_priv); if (ret) goto free_cq_priv; ret = _gnix_queue_create(&cq_priv->events, alloc_cq_entry, free_cq_entry, cq_priv->entry_size, cq_priv->attr.size); if (ret) goto free_cq_priv; ret = _gnix_queue_create(&cq_priv->errors, alloc_cq_entry, free_cq_entry, sizeof(struct fi_cq_err_entry), 0); if (ret) goto free_gnix_queue; *cq = &cq_priv->cq_fid; return ret; free_gnix_queue: _gnix_queue_destroy(cq_priv->events); free_cq_priv: _gnix_ref_put(cq_priv->domain); fastlock_destroy(&cq_priv->lock); free(cq_priv); free_fi_cq_ops: free(fi_cq_ops); free_cq_ops: free(cq_ops); return ret; } /******************************************************************************* * FI_OPS_* data structures. ******************************************************************************/ static const struct fi_ops gnix_cq_fi_ops = { .size = sizeof(struct fi_ops), .close = gnix_cq_close, .bind = fi_no_bind, .control = gnix_cq_control, .ops_open = fi_no_ops_open }; static const struct fi_ops_cq gnix_cq_ops = { .size = sizeof(struct fi_ops_cq), .read = gnix_cq_read, .readfrom = gnix_cq_readfrom, .readerr = gnix_cq_readerr, .sread = gnix_cq_sread, .sreadfrom = gnix_cq_sreadfrom, .signal = gnix_cq_signal, .strerror = gnix_cq_strerror };