int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, struct fid_cntr **cntr, void *context) { struct psmx_fid_domain *domain_priv; struct psmx_fid_cntr *cntr_priv; struct psmx_fid_wait *wait = NULL; struct fi_wait_attr wait_attr; int wait_is_local = 0; int events; uint64_t flags; int err; events = FI_CNTR_EVENTS_COMP; flags = 0; domain_priv = container_of(domain, struct psmx_fid_domain, domain); switch (attr->events) { case FI_CNTR_EVENTS_COMP: events = attr->events; break; default: FI_INFO(&psmx_prov, FI_LOG_CQ, "attr->events=%d, supported=%d\n", attr->events, FI_CNTR_EVENTS_COMP); return -FI_EINVAL; } switch (attr->wait_obj) { case FI_WAIT_NONE: case FI_WAIT_UNSPEC: break; case FI_WAIT_SET: if (!attr->wait_set) { FI_INFO(&psmx_prov, FI_LOG_CQ, "FI_WAIT_SET is specified but attr->wait_set is NULL\n"); return -FI_EINVAL; } wait = (struct psmx_fid_wait *)attr->wait_set; break; case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; err = psmx_wait_open(&domain_priv->fabric->fabric, &wait_attr, (struct fid_wait **)&wait); if (err) return err; wait_is_local = 1; break; default: FI_INFO(&psmx_prov, FI_LOG_CQ, "attr->wait_obj=%d, supported=%d...%d\n", attr->wait_obj, FI_WAIT_NONE, FI_WAIT_MUTEX_COND); return -FI_EINVAL; } cntr_priv = (struct psmx_fid_cntr *) calloc(1, sizeof *cntr_priv); if (!cntr_priv) { err = -FI_ENOMEM; goto fail; } cntr_priv->domain = domain_priv; cntr_priv->events = events; cntr_priv->wait = wait; cntr_priv->wait_is_local = wait_is_local; cntr_priv->flags = flags; cntr_priv->cntr.fid.fclass = FI_CLASS_CNTR; cntr_priv->cntr.fid.context = context; cntr_priv->cntr.fid.ops = &psmx_fi_ops; cntr_priv->cntr.ops = &psmx_cntr_ops; pthread_mutex_init(&cntr_priv->trigger_lock, NULL); *cntr = &cntr_priv->cntr; return 0; fail: if (wait && wait_is_local) fi_close(&wait->wait.fid); return err; }
int ofi_check_rx_attr(const struct fi_provider *prov, const struct fi_info *prov_info, const struct fi_rx_attr *user_attr, uint64_t info_mode) { const struct fi_rx_attr *prov_attr = prov_info->rx_attr; int rm_enabled = (prov_info->domain_attr->resource_mgmt == FI_RM_ENABLED); if (user_attr->caps & ~(prov_attr->caps)) { FI_INFO(prov, FI_LOG_CORE, "caps not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, caps, FI_TYPE_CAPS); return -FI_ENODATA; } info_mode = user_attr->mode ? user_attr->mode : info_mode; if ((info_mode & prov_attr->mode) != prov_attr->mode) { FI_INFO(prov, FI_LOG_CORE, "needed mode not set\n"); FI_INFO_MODE(prov, prov_attr->mode, user_attr->mode); return -FI_ENODATA; } if (prov_attr->op_flags & ~(prov_attr->op_flags)) { FI_INFO(prov, FI_LOG_CORE, "op_flags not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, op_flags, FI_TYPE_OP_FLAGS); return -FI_ENODATA; } if (user_attr->msg_order & ~(prov_attr->msg_order)) { FI_INFO(prov, FI_LOG_CORE, "msg_order not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, msg_order, FI_TYPE_MSG_ORDER); return -FI_ENODATA; } if (user_attr->comp_order & ~(prov_attr->comp_order)) { FI_INFO(prov, FI_LOG_CORE, "comp_order not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, comp_order, FI_TYPE_MSG_ORDER); return -FI_ENODATA; } if (user_attr->total_buffered_recv > prov_attr->total_buffered_recv) { FI_INFO(prov, FI_LOG_CORE, "total_buffered_recv too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, total_buffered_recv); return -FI_ENODATA; } if (user_attr->size > prov_attr->size) { FI_INFO(prov, FI_LOG_CORE, "size is greater than supported\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, size); return -FI_ENODATA; } if (user_attr->iov_limit > prov_attr->iov_limit) { FI_INFO(prov, FI_LOG_CORE, "iov_limit too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, iov_limit); return -FI_ENODATA; } if (!rm_enabled && user_attr->total_buffered_recv > prov_attr->total_buffered_recv) { /* Just log a notification, but ignore the value */ FI_INFO(prov, FI_LOG_CORE, "Total buffered recv size exceeds supported size\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, total_buffered_recv); } return 0; }
static int fi_register_provider(struct fi_provider *provider, void *dlhandle) { struct fi_prov_context *ctx; struct fi_prov *prov; int ret; if (!provider) { ret = -FI_EINVAL; goto cleanup; } FI_INFO(&core_prov, FI_LOG_CORE, "registering provider: %s (%d.%d)\n", provider->name, FI_MAJOR(provider->version), FI_MINOR(provider->version)); if (FI_MAJOR(provider->fi_version) != FI_MAJOR_VERSION || FI_MINOR(provider->fi_version) != FI_MINOR_VERSION) { FI_INFO(&core_prov, FI_LOG_CORE, "provider has unsupported FI version (provider %d.%d != libfabric %d.%d); ignoring\n", FI_MAJOR(provider->fi_version), FI_MINOR(provider->fi_version), FI_MAJOR_VERSION, FI_MINOR_VERSION); ret = -FI_ENOSYS; goto cleanup; } if (fi_apply_filter(&prov_filter, provider->name)) { FI_INFO(&core_prov, FI_LOG_CORE, "\"%s\" filtered by provider include/exclude list, skipping\n", provider->name); ret = -FI_ENODEV; goto cleanup; } if (fi_apply_filter(&prov_log_filter, provider->name)) { ctx = (struct fi_prov_context *) &provider->context; ctx->disable_logging = 1; } prov = fi_getprov(provider->name); if (prov) { /* If this provider is older than an already-loaded * provider of the same name, then discard this one. */ if (FI_VERSION_GE(prov->provider->version, provider->version)) { FI_INFO(&core_prov, FI_LOG_CORE, "a newer %s provider was already loaded; ignoring this one\n", provider->name); ret = -FI_EALREADY; goto cleanup; } /* This provider is newer than an already-loaded * provider of the same name, so discard the * already-loaded one. */ FI_INFO(&core_prov, FI_LOG_CORE, "an older %s provider was already loaded; keeping this one and ignoring the older one\n", provider->name); cleanup_provider(prov->provider, prov->dlhandle); prov->dlhandle = dlhandle; prov->provider = provider; return 0; } prov = calloc(sizeof *prov, 1); if (!prov) { ret = -FI_ENOMEM; goto cleanup; } prov->dlhandle = dlhandle; prov->provider = provider; if (prov_tail) prov_tail->next = prov; else prov_head = prov; prov_tail = prov; return 0; cleanup: cleanup_provider(provider, dlhandle); return ret; }
static int fi_ibv_alloc_info(struct ibv_context *ctx, struct fi_info **info, const struct verbs_ep_domain *ep_dom) { struct fi_info *fi; union ibv_gid gid; size_t name_len; int ret; int param; if (!(fi = fi_allocinfo())) return -FI_ENOMEM; fi->caps = ep_dom->caps; fi->handle = NULL; if (ep_dom->type == FI_EP_RDM) { fi->mode = VERBS_RDM_MODE; *(fi->tx_attr) = verbs_rdm_tx_attr; } else { fi->mode = VERBS_MODE; *(fi->tx_attr) = verbs_tx_attr; } *(fi->rx_attr) = (ep_dom->type == FI_EP_RDM) ? verbs_rdm_rx_attr : verbs_rx_attr; *(fi->ep_attr) = verbs_ep_attr; *(fi->domain_attr) = verbs_domain_attr; *(fi->fabric_attr) = verbs_fabric_attr; fi->ep_attr->type = ep_dom->type; fi->tx_attr->caps = ep_dom->caps; fi->rx_attr->caps = ep_dom->caps; ret = fi_ibv_get_device_attrs(ctx, fi); if (ret) goto err; if (ep_dom->type == FI_EP_RDM) { fi->tx_attr->inject_size = FI_IBV_RDM_DFLT_BUFFERED_SSIZE; fi->tx_attr->iov_limit = 1; fi->tx_attr->rma_iov_limit = 1; if (!fi_param_get_int(&fi_ibv_prov, "rdm_buffer_size", ¶m)) { if (param > sizeof (struct fi_ibv_rdm_rndv_header)) { fi->tx_attr->inject_size = param; } else { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "rdm_buffer_size too small, should be greater then %d\n", sizeof (struct fi_ibv_rdm_rndv_header)); ret = -FI_EINVAL; goto err; } } fi->domain_attr->resource_mgmt = FI_RM_ENABLED; } switch (ctx->device->transport_type) { case IBV_TRANSPORT_IB: if(ibv_query_gid(ctx, 1, 0, &gid)) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_gid", errno); ret = -errno; goto err; } name_len = strlen(VERBS_IB_PREFIX) + INET6_ADDRSTRLEN; if (!(fi->fabric_attr->name = calloc(1, name_len + 1))) { ret = -FI_ENOMEM; goto err; } snprintf(fi->fabric_attr->name, name_len, VERBS_IB_PREFIX "%lx", gid.global.subnet_prefix); fi->ep_attr->protocol = (ep_dom == &verbs_msg_domain) ? FI_PROTO_RDMA_CM_IB_RC : FI_PROTO_IB_RDM; break; case IBV_TRANSPORT_IWARP: fi->fabric_attr->name = strdup(VERBS_IWARP_FABRIC); if (!fi->fabric_attr->name) { ret = -FI_ENOMEM; goto err; } if (ep_dom == &verbs_msg_domain) { fi->ep_attr->protocol = FI_PROTO_IWARP; fi->tx_attr->op_flags = VERBS_TX_OP_FLAGS_IWARP; } else { fi->ep_attr->protocol = FI_PROTO_IWARP_RDM; fi->tx_attr->op_flags = VERBS_TX_OP_FLAGS_IWARP_RDM; } break; default: FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unknown transport type\n"); ret = -FI_ENODATA; goto err; } name_len = strlen(ctx->device->name) + strlen(ep_dom->suffix); fi->domain_attr->name = malloc(name_len + 1); if (!fi->domain_attr->name) { ret = -FI_ENOMEM; goto err; } snprintf(fi->domain_attr->name, name_len + 1, "%s%s", ctx->device->name, ep_dom->suffix); fi->domain_attr->name[name_len] = '\0'; *info = fi; return 0; err: fi_freeinfo(fi); return ret; }
int ofi_check_domain_attr(const struct fi_provider *prov, uint32_t api_version, const struct fi_domain_attr *prov_attr, const struct fi_info *user_info) { const struct fi_domain_attr *user_attr = user_info->domain_attr; if (prov_attr->name && user_attr->name && strcasecmp(user_attr->name, prov_attr->name)) { FI_INFO(prov, FI_LOG_CORE, "Unknown domain name\n"); FI_INFO_NAME(prov, prov_attr, user_attr); return -FI_ENODATA; } if (fi_thread_level(user_attr->threading) < fi_thread_level(prov_attr->threading)) { FI_INFO(prov, FI_LOG_CORE, "Invalid threading model\n"); return -FI_ENODATA; } if (fi_progress_level(user_attr->control_progress) < fi_progress_level(prov_attr->control_progress)) { FI_INFO(prov, FI_LOG_CORE, "Invalid control progress model\n"); return -FI_ENODATA; } if (fi_progress_level(user_attr->data_progress) < fi_progress_level(prov_attr->data_progress)) { FI_INFO(prov, FI_LOG_CORE, "Invalid data progress model\n"); return -FI_ENODATA; } if (fi_resource_mgmt_level(user_attr->resource_mgmt) < fi_resource_mgmt_level(prov_attr->resource_mgmt)) { FI_INFO(prov, FI_LOG_CORE, "Invalid resource mgmt model\n"); return -FI_ENODATA; } if ((prov_attr->av_type != FI_AV_UNSPEC) && (user_attr->av_type != FI_AV_UNSPEC) && (prov_attr->av_type != user_attr->av_type)) { FI_INFO(prov, FI_LOG_CORE, "Invalid AV type\n"); return -FI_ENODATA; } if (user_attr->cq_data_size > prov_attr->cq_data_size) { FI_INFO(prov, FI_LOG_CORE, "CQ data size too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, cq_data_size); return -FI_ENODATA; } if (ofi_check_mr_mode(prov, api_version, prov_attr->mr_mode, user_info)) return -FI_ENODATA; if (user_attr->max_ep_stx_ctx > prov_attr->max_ep_stx_ctx) { FI_INFO(prov, FI_LOG_CORE, "max_ep_stx_ctx greater than supported\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_ep_stx_ctx); } if (user_attr->max_ep_srx_ctx > prov_attr->max_ep_srx_ctx) { FI_INFO(prov, FI_LOG_CORE, "max_ep_srx_ctx greater than supported\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_ep_srx_ctx); } /* following checks only apply to api 1.5 and beyond */ if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) return 0; if (user_attr->cntr_cnt > prov_attr->cntr_cnt) { FI_INFO(prov, FI_LOG_CORE, "Cntr count too large\n"); return -FI_ENODATA; } if (user_attr->mr_iov_limit > prov_attr->mr_iov_limit) { FI_INFO(prov, FI_LOG_CORE, "MR iov limit too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, mr_iov_limit); return -FI_ENODATA; } if (user_attr->caps & ~(prov_attr->caps)) { FI_INFO(prov, FI_LOG_CORE, "Requested domain caps not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, caps, FI_TYPE_CAPS); return -FI_ENODATA; } if ((user_attr->mode & prov_attr->mode) != prov_attr->mode) { FI_INFO(prov, FI_LOG_CORE, "Required domain mode missing\n"); FI_INFO_MODE(prov, prov_attr->mode, user_attr->mode); return -FI_ENODATA; } if (user_attr->max_err_data > prov_attr->max_err_data) { FI_INFO(prov, FI_LOG_CORE, "Max err data too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_err_data); return -FI_ENODATA; } if (user_attr->mr_cnt > prov_attr->mr_cnt) { FI_INFO(prov, FI_LOG_CORE, "MR count too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, mr_cnt); return -FI_ENODATA; } return 0; }
int psmx2_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context) { struct psmx2_fid_fabric *fabric_priv; int ret; FI_INFO(&psmx2_prov, FI_LOG_CORE, "\n"); if (strcmp(attr->name, PSMX2_FABRIC_NAME)) return -FI_ENODATA; if (psmx2_active_fabric) { psmx2_fabric_acquire(psmx2_active_fabric); *fabric = &psmx2_active_fabric->util_fabric.fabric_fid; return 0; } fabric_priv = calloc(1, sizeof(*fabric_priv)); if (!fabric_priv) return -FI_ENOMEM; fastlock_init(&fabric_priv->domain_lock); dlist_init(&fabric_priv->domain_list); psmx2_get_uuid(fabric_priv->uuid); if (psmx2_env.name_server) { fabric_priv->name_server.port = psmx2_uuid_to_port(fabric_priv->uuid); fabric_priv->name_server.name_len = sizeof(struct psmx2_ep_name); fabric_priv->name_server.service_len = sizeof(int); fabric_priv->name_server.service_cmp = psmx2_ns_service_cmp; fabric_priv->name_server.is_service_wildcard = psmx2_ns_is_service_wildcard; ofi_ns_init(&fabric_priv->name_server); ofi_ns_start_server(&fabric_priv->name_server); } ret = ofi_fabric_init(&psmx2_prov, &psmx2_fabric_attr, attr, &fabric_priv->util_fabric, context); if (ret) { FI_INFO(&psmx2_prov, FI_LOG_CORE, "ofi_fabric_init returns %d\n", ret); if (psmx2_env.name_server) ofi_ns_stop_server(&fabric_priv->name_server); free(fabric_priv); return ret; } /* fclass & context initialized in ofi_fabric_init */ fabric_priv->util_fabric.fabric_fid.fid.ops = &psmx2_fabric_fi_ops; fabric_priv->util_fabric.fabric_fid.ops = &psmx2_fabric_ops; psmx2_atomic_global_init(); psmx2_query_mpi(); /* take the reference to count for multiple fabric open calls */ psmx2_fabric_acquire(fabric_priv); *fabric = &fabric_priv->util_fabric.fabric_fid; psmx2_active_fabric = fabric_priv; return 0; }
int fi_ibv_check_domain_attr(const struct fi_domain_attr *attr, const struct fi_info *info) { if (attr->name && strcmp(attr->name, info->domain_attr->name)) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unknown domain name\n"); return -FI_ENODATA; } switch (attr->threading) { case FI_THREAD_UNSPEC: case FI_THREAD_SAFE: case FI_THREAD_FID: case FI_THREAD_DOMAIN: case FI_THREAD_COMPLETION: case FI_THREAD_ENDPOINT: break; default: FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Invalid threading model\n"); return -FI_ENODATA; } switch (attr->control_progress) { case FI_PROGRESS_UNSPEC: case FI_PROGRESS_AUTO: case FI_PROGRESS_MANUAL: break; default: FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Given control progress mode not supported\n"); return -FI_ENODATA; } switch (attr->data_progress) { case FI_PROGRESS_UNSPEC: case FI_PROGRESS_AUTO: case FI_PROGRESS_MANUAL: break; default: FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Given data progress mode not supported!\n"); return -FI_ENODATA; } switch (attr->mr_mode) { case FI_MR_UNSPEC: case FI_MR_BASIC: break; default: FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "MR mode not supported\n"); return -FI_ENODATA; } if (attr->mr_key_size > info->domain_attr->mr_key_size) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "MR key size too large\n"); return -FI_ENODATA; } if (attr->cq_data_size > info->domain_attr->cq_data_size) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "CQ data size too large\n"); return -FI_ENODATA; } if (attr->cq_cnt > info->domain_attr->cq_cnt) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "cq_cnt exceeds supported size\n"); return -FI_ENODATA; } if (attr->ep_cnt > info->domain_attr->ep_cnt) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "ep_cnt exceeds supported size\n"); return -FI_ENODATA; } if (attr->max_ep_tx_ctx > info->domain_attr->max_ep_tx_ctx) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "domain_attr: max_ep_tx_ctx exceeds supported size\n"); return -FI_ENODATA; } if (attr->max_ep_rx_ctx > info->domain_attr->max_ep_rx_ctx) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "domain_attr: max_ep_rx_ctx exceeds supported size\n"); return -FI_ENODATA; } return 0; }
static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags, void *context) { struct psmx_fid_av *av_priv; psm_error_t *errors; int error_count = 0; int *mask; int i, j; fi_addr_t *result = NULL; struct psmx_epaddr_context *epaddr_context; struct psmx_eq_event *event; av_priv = container_of(av, struct psmx_fid_av, av); if ((av_priv->flags & FI_EVENT) && !av_priv->eq) return -FI_ENOEQ; errors = (psm_error_t *) calloc(count, sizeof *errors); if (!errors) return -FI_ENOMEM; mask = (int *) calloc(count, sizeof *mask); if (!mask) { free(errors); return -FI_ENOMEM; } if (av_priv->type == FI_AV_TABLE) { if (psmx_av_check_table_size(av_priv, count)) { free(mask); free(errors); return -FI_ENOMEM; } for (i=0; i<count; i++) av_priv->psm_epids[av_priv->last + i] = ((psm_epid_t *)addr)[i]; result = fi_addr; addr = (const void *)(av_priv->psm_epids + av_priv->last); fi_addr = (fi_addr_t *)(av_priv->psm_epaddrs + av_priv->last); } /* prevent connecting to the same ep twice, which is fatal in PSM */ for (i=0; i<count; i++) { psm_epconn_t epconn; if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) { epaddr_context = psm_epaddr_getctxt(epconn.addr); if (epaddr_context && epaddr_context->epid == ((psm_epid_t *) addr)[i]) ((psm_epaddr_t *) fi_addr)[i] = epconn.addr; else mask[i] = 1; } else { mask[i] = 1; } } psm_ep_connect(av_priv->domain->psm_ep, count, (psm_epid_t *) addr, mask, errors, (psm_epaddr_t *) fi_addr, 30*1e9); for (i=0; i<count; i++){ if (!mask[i]) continue; if (errors[i] == PSM_OK || errors[i] == PSM_EPID_ALREADY_CONNECTED) { psmx_set_epaddr_context(av_priv->domain, ((psm_epid_t *) addr)[i], ((psm_epaddr_t *) fi_addr)[i]); } else { FI_INFO(&psmx_prov, FI_LOG_AV, "%d: psm_ep_connect returned %s. remote epid=%lx.\n", i, psm_error_get_string(errors[i]), ((psm_epid_t *)addr)[i]); if (((psm_epid_t *)addr)[i] == 0) FI_INFO(&psmx_prov, FI_LOG_AV, "does the application depend on the provider" "to resolve IP address into endpoint id? if so" "check if the name server has started correctly" "at the other side.\n"); fi_addr[i] = FI_ADDR_NOTAVAIL; error_count++; if (av_priv->flags & FI_EVENT) { event = psmx_eq_create_event(av_priv->eq, FI_AV_COMPLETE, /* event */ context, /* context */ i, /* data: failed index */ psmx_errno(errors[i]), /* err */ errors[i], /* prov_errno */ NULL, /* err_data */ 0); /* err_data_size */ if (!event) return -FI_ENOMEM; psmx_eq_enqueue_event(av_priv->eq, event); } } } free(mask); free(errors); if (av_priv->type == FI_AV_TABLE) { /* NOTE: unresolved addresses are left in the AV table */ if (result) { for (i=0; i<count; i++) { j = av_priv->last + i; if ((fi_addr_t)av_priv->psm_epaddrs[j] == FI_ADDR_NOTAVAIL) result[i] = FI_ADDR_NOTAVAIL; else result[i] = j; } } av_priv->last += count; } if (!(av_priv->flags & FI_EVENT)) return count - error_count; event = psmx_eq_create_event(av_priv->eq, FI_AV_COMPLETE, /* event */ context, /* context */ count - error_count, /* data: succ count */ 0, /* err */ 0, /* prov_errno */ NULL, /* err_data */ 0); /* err_data_size */ if (!event) return -FI_ENOMEM; psmx_eq_enqueue_event(av_priv->eq, event); return 0; }
static int psmx2_av_connet_eps(struct psmx2_fid_av *av, size_t count, psm2_epid_t *epids, int *mask, psm2_error_t *errors, psm2_epaddr_t *epaddrs, void *context) { int i; psm2_epconn_t epconn; struct psmx2_epaddr_context *epaddr_context; int error_count = 0; /* set up mask to prevent connecting to an already connected ep */ for (i=0; i<count; i++) { if (psm2_ep_epid_lookup(epids[i], &epconn) == PSM2_OK) { epaddr_context = psm2_epaddr_getctxt(epconn.addr); if (epaddr_context && epaddr_context->epid == epids[i]) epaddrs[i] = epconn.addr; else mask[i] = 1; } else { mask[i] = 1; } } psm2_ep_connect(av->domain->psm2_ep, count, epids, mask, errors, epaddrs, psmx2_conn_timeout(count)); for (i=0; i<count; i++){ if (!mask[i]) continue; if (errors[i] == PSM2_OK || errors[i] == PSM2_EPID_ALREADY_CONNECTED) { psmx2_set_epaddr_context(av->domain, epids[i], epaddrs[i]); } else { /* If duplicated addrs are passed to psm2_ep_connect(), * all but one will fail with error "Endpoint could not * be reached". This should be treated the same as * "Endpoint already connected". */ if (psm2_ep_epid_lookup(epids[i], &epconn) == PSM2_OK) { epaddr_context = psm2_epaddr_getctxt(epconn.addr); if (epaddr_context && epaddr_context->epid == epids[i]) { epaddrs[i] = epconn.addr; continue; } } FI_INFO(&psmx2_prov, FI_LOG_AV, "%d: psm2_ep_connect returned %s. remote epid=%lx.\n", i, psm2_error_get_string(errors[i]), epids[i]); if (epids[i] == 0) FI_INFO(&psmx2_prov, FI_LOG_AV, "does the application depend on the provider" "to resolve IP address into endpoint id? if so" "check if the name server has started correctly" "at the other side.\n"); epaddrs[i] = (void *)FI_ADDR_NOTAVAIL; error_count++; if (av->flags & FI_EVENT) psmx2_av_post_completion(av, context, i, errors[i]); } } return error_count; }
static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags, void *context) { struct psmx_fid_av *av_priv; psm_error_t *errors; int error_count = 0; int *mask; int i, j; fi_addr_t *result = NULL; struct psmx_epaddr_context *epaddr_context; if (count && !addr) { FI_INFO(&psmx_prov, FI_LOG_AV, "the input address array is NULL.\n"); return -FI_EINVAL; } av_priv = container_of(av, struct psmx_fid_av, av); if ((av_priv->flags & FI_EVENT) && !av_priv->eq) return -FI_ENOEQ; errors = (psm_error_t *) calloc(count, sizeof *errors); if (!errors) return -FI_ENOMEM; mask = (int *) calloc(count, sizeof *mask); if (!mask) { free(errors); return -FI_ENOMEM; } if (av_priv->type == FI_AV_TABLE) { if (psmx_av_check_table_size(av_priv, count)) { free(mask); free(errors); return -FI_ENOMEM; } for (i=0; i<count; i++) av_priv->psm_epids[av_priv->last + i] = ((psm_epid_t *)addr)[i]; result = fi_addr; addr = (const void *)(av_priv->psm_epids + av_priv->last); fi_addr = (fi_addr_t *)(av_priv->psm_epaddrs + av_priv->last); } /* prevent connecting to the same ep twice, which is fatal in PSM */ for (i=0; i<count; i++) { psm_epconn_t epconn; if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) { epaddr_context = psm_epaddr_getctxt(epconn.addr); if (epaddr_context && epaddr_context->epid == ((psm_epid_t *) addr)[i]) ((psm_epaddr_t *) fi_addr)[i] = epconn.addr; else mask[i] = 1; } else { mask[i] = 1; } } psm_ep_connect(av_priv->domain->psm_ep, count, (psm_epid_t *) addr, mask, errors, (psm_epaddr_t *) fi_addr, 30*1e9); for (i=0; i<count; i++){ if (!mask[i]) continue; if (errors[i] == PSM_OK || errors[i] == PSM_EPID_ALREADY_CONNECTED) { psmx_set_epaddr_context(av_priv->domain, ((psm_epid_t *) addr)[i], ((psm_epaddr_t *) fi_addr)[i]); } else { psm_epconn_t epconn; /* If duplicated addresses are passed to psm_ep_connect(), all but one will fail * with error "Endpoint could not be reached". They should be treated as already * connected. */ if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) { epaddr_context = psm_epaddr_getctxt(epconn.addr); if (epaddr_context && epaddr_context->epid == ((psm_epid_t *) addr)[i]) { ((psm_epaddr_t *) fi_addr)[i] = epconn.addr; continue; } } FI_INFO(&psmx_prov, FI_LOG_AV, "%d: psm_ep_connect returned %s. remote epid=%lx.\n", i, psm_error_get_string(errors[i]), ((psm_epid_t *)addr)[i]); if (((psm_epid_t *)addr)[i] == 0) FI_INFO(&psmx_prov, FI_LOG_AV, "does the application depend on the provider" "to resolve IP address into endpoint id? if so" "check if the name server has started correctly" "at the other side.\n"); fi_addr[i] = FI_ADDR_NOTAVAIL; error_count++; if (av_priv->flags & FI_EVENT) psmx_av_post_completion(av_priv, context, i, errors[i]); } } free(mask); free(errors); if (av_priv->type == FI_AV_TABLE) { /* NOTE: unresolved addresses are left in the AV table */ if (result) { for (i=0; i<count; i++) { j = av_priv->last + i; if ((fi_addr_t)av_priv->psm_epaddrs[j] == FI_ADDR_NOTAVAIL) result[i] = FI_ADDR_NOTAVAIL; else result[i] = j; } } av_priv->last += count; } if (!(av_priv->flags & FI_EVENT)) return count - error_count; psmx_av_post_completion(av_priv, context, count - error_count, 0); return 0; }
static int mlx_getinfo ( uint32_t version, const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct fi_info **info) { int status = -ENODATA; char *configfile_name = NULL; int inject_thresh = -1; mlx_descriptor.config = NULL; status = fi_param_get( &mlx_prov, "mlx_tinject_limit", &inject_thresh); if (!status) inject_thresh = FI_MLX_DEFAULT_INJECT_SIZE; FI_INFO( &mlx_prov, FI_LOG_CORE, "used inlect size = %d \n", inject_thresh); status = fi_param_get( &mlx_prov, "mlx_config", &configfile_name); if (!status) { configfile_name = NULL; } /* NS is disabled by default */ status = fi_param_get( &mlx_prov, "mlx_ns_enable", &mlx_descriptor.use_ns); if (!status) { mlx_descriptor.use_ns = 0; } status = fi_param_get( &mlx_prov, "mlx_ns_port", &mlx_descriptor.ns_port); if (!status) { mlx_descriptor.ns_port = FI_MLX_DEFAULT_NS_PORT; } status = ucp_config_read( NULL, status? NULL: configfile_name, &mlx_descriptor.config); if (status != UCS_OK) { FI_WARN( &mlx_prov, FI_LOG_CORE, "MLX error: invalid config file\n\t%d (%s)\n", status, ucs_status_string(status)); } /*Setup some presets*/ status = ucm_config_modify("MALLOC_HOOKS", "no"); if (status != UCS_OK) { FI_WARN( &mlx_prov, FI_LOG_CORE, "MLX error: failed to switch off UCM memory hooks:\t%d (%s)\n", status, ucs_status_string(status)); } FI_INFO( &mlx_prov, FI_LOG_CORE, "Loaded MLX version %s\n", ucp_get_version_string()); #if ENABLE_DEBUG if (mlx_descriptor.config && fi_log_enabled( &mlx_prov, FI_LOG_INFO, FI_LOG_CORE)) { ucp_config_print( mlx_descriptor.config, stderr, "Used MLX configuration", (1<<4)-1); } #endif *info = NULL; if (node || service) { FI_WARN(&mlx_prov, FI_LOG_CORE, "fi_getinfo with \"node != NULL \" or \"service != NULL \" is temporary not supported\n"); node = service = NULL; flags = 0; } /* Only Pure MLX address and IPv4 are supported */ if (hints->addr_format == FI_ADDR_MLX) { mlx_info.addr_format = FI_ADDR_MLX; } if (hints->addr_format <= FI_SOCKADDR_IN) { mlx_descriptor.use_ns = 1; mlx_info.addr_format = FI_SOCKADDR_IN; } status = util_getinfo( &mlx_util_prov, version, service, node, flags, hints, info); return status; }
static int psmx_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct fi_info **info) { struct fi_info *psmx_info; uint32_t cnt = 0; psm_epid_t *dest_addr = NULL; struct psmx_src_name *src_addr = NULL; int ep_type = FI_EP_RDM; int av_type = FI_AV_UNSPEC; uint64_t mode = FI_CONTEXT; enum fi_mr_mode mr_mode = FI_MR_SCALABLE; enum fi_threading threading = FI_THREAD_COMPLETION; enum fi_progress control_progress = FI_PROGRESS_MANUAL; enum fi_progress data_progress = FI_PROGRESS_MANUAL; int caps = 0; uint64_t max_tag_value = 0; int err = -FI_ENODATA; int svc0, svc = PSMX_ANY_SERVICE; FI_INFO(&psmx_prov, FI_LOG_CORE,"\n"); *info = NULL; /* Perform some quick check first to avoid unnecessary operations */ if (hints) { if (hints->fabric_attr && hints->fabric_attr->name && strcasecmp(hints->fabric_attr->name, PSMX_FABRIC_NAME)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->fabric_name=%s, supported=%s\n", hints->fabric_attr->name, PSMX_FABRIC_NAME); goto err_out; } if (hints->domain_attr && hints->domain_attr->name && strcasecmp(hints->domain_attr->name, PSMX_DOMAIN_NAME)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_name=%s, supported=%s\n", hints->domain_attr->name, PSMX_DOMAIN_NAME); goto err_out; } if (hints->ep_attr) { switch (hints->ep_attr->type) { case FI_EP_UNSPEC: case FI_EP_DGRAM: case FI_EP_RDM: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->type=%d, supported=%d,%d,%d.\n", hints->ep_attr->type, FI_EP_UNSPEC, FI_EP_DGRAM, FI_EP_RDM); goto err_out; } } if ((hints->caps & PSMX_CAPS) != hints->caps && (hints->caps & PSMX_CAPS2) != hints->caps) { uint64_t psmx_caps = PSMX_CAPS; uint64_t psmx_caps2 = PSMX_CAPS2; PSMX_INFO_DIFF("hints->caps", hints->caps, psmx_caps, FI_TYPE_CAPS); PSMX_INFO_DIFF("alternatively, hints->caps", hints->caps, psmx_caps2, FI_TYPE_CAPS); goto err_out; } } if (FI_VERSION_GE(version, FI_VERSION(1,5))) mr_mode = 0; if (psmx_init_lib()) return -FI_ENODATA; if (psmx_compat_lib) { /* * native PSM running over TrueScale doesn't have the issue handled * here. it's only present when PSM is supported via the psm2-compat * library, where the PSM functions are just wrappers around the PSM2 * counterparts. * * psm2_ep_num_devunits() may wait for 15 seconds before return * when /dev/hfi1_0 is not present. Check the existence of any hfi1 * device interface first to avoid this delay. Note that the devices * don't necessarily appear consecutively so we need to check all * possible device names before returning "no device found" error. * This also means if "/dev/hfi1_0" doesn't exist but other devices * exist, we are still going to see the delay; but that's a rare case. */ glob_t glob_buf; if ((glob("/dev/hfi1_[0-9]", 0, NULL, &glob_buf) != 0) && (glob("/dev/hfi1_[0-9][0-9]", GLOB_APPEND, NULL, &glob_buf) != 0)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "no hfi1 device is found.\n"); return -FI_ENODATA; } globfree(&glob_buf); } if (psm_ep_num_devunits(&cnt) || !cnt) { FI_INFO(&psmx_prov, FI_LOG_CORE, "no PSM device is found.\n"); return -FI_ENODATA; } src_addr = calloc(1, sizeof(*src_addr)); if (!src_addr) { FI_INFO(&psmx_prov, FI_LOG_CORE, "failed to allocate src addr.\n"); return -FI_ENODATA; } src_addr->signature = 0xFFFF; src_addr->unit = PSMX_DEFAULT_UNIT; src_addr->port = PSMX_DEFAULT_PORT; src_addr->service = PSMX_ANY_SERVICE; if (flags & FI_SOURCE) { if (node) sscanf(node, "%*[^:]:%" SCNi8 ":%" SCNu8, &src_addr->unit, &src_addr->port); if (service) sscanf(service, "%" SCNu32, &src_addr->service); FI_INFO(&psmx_prov, FI_LOG_CORE, "node '%s' service '%s' converted to <unit=%d, port=%d, service=%d>\n", node, service, src_addr->unit, src_addr->port, src_addr->service); } else if (node) { psm_uuid_t uuid; psmx_get_uuid(uuid); struct util_ns ns = { .port = psmx_uuid_to_port(uuid), .name_len = sizeof(*dest_addr), .service_len = sizeof(svc), .service_cmp = psmx_ns_service_cmp, .is_service_wildcard = psmx_ns_is_service_wildcard, }; ofi_ns_init(&ns); if (service) svc = atoi(service); svc0 = svc; dest_addr = (psm_epid_t *)ofi_ns_resolve_name(&ns, node, &svc); if (dest_addr) { FI_INFO(&psmx_prov, FI_LOG_CORE, "'%s:%u' resolved to <epid=%"PRIu64">:%u\n", node, svc0, *dest_addr, svc); } else { FI_INFO(&psmx_prov, FI_LOG_CORE, "failed to resolve '%s:%u'.\n", node, svc); err = -FI_ENODATA; goto err_out; } } if (hints) { switch (hints->addr_format) { case FI_FORMAT_UNSPEC: case FI_ADDR_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->addr_format=%d, supported=%d,%d.\n", hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX); goto err_out; } if (hints->ep_attr) { switch (hints->ep_attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->protocol=%d, supported=%d %d\n", hints->ep_attr->protocol, FI_PROTO_UNSPEC, FI_PROTO_PSMX); goto err_out; } if (hints->ep_attr->tx_ctx_cnt > 1 && hints->ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->tx_ctx_cnt=%"PRIu64", supported=0,1\n", hints->ep_attr->tx_ctx_cnt); goto err_out; } if (hints->ep_attr->rx_ctx_cnt > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->rx_ctx_cnt=%"PRIu64", supported=0,1\n", hints->ep_attr->rx_ctx_cnt); goto err_out; } } if (hints->tx_attr) { if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) != hints->tx_attr->op_flags) { uint64_t psmx_op_flags = PSMX_OP_FLAGS; PSMX_INFO_DIFF("hints->tx_attr->of_flags", hints->tx_attr->op_flags, psmx_op_flags, FI_TYPE_OP_FLAGS); goto err_out; } if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->inject_size=%"PRIu64"," "supported=%d.\n", hints->tx_attr->inject_size, PSMX_INJECT_SIZE); goto err_out; } } if (hints->rx_attr && (hints->rx_attr->op_flags & PSMX_OP_FLAGS) != hints->rx_attr->op_flags) { uint64_t psmx_op_flags = PSMX_OP_FLAGS; PSMX_INFO_DIFF("hints->rx_attr->of_flags", hints->rx_attr->op_flags, psmx_op_flags, FI_TYPE_OP_FLAGS); goto err_out; } if ((hints->caps & FI_TAGGED) || ((hints->caps & FI_MSG) && !psmx_env.am_msg)) { if ((hints->mode & FI_CONTEXT) != FI_CONTEXT) { uint64_t psmx_mode = FI_CONTEXT; PSMX_INFO_DIFF("hints->mode", hints->mode, psmx_mode, FI_TYPE_MODE); goto err_out; } } else { mode = 0; } if (hints->domain_attr) { switch (hints->domain_attr->av_type) { case FI_AV_UNSPEC: case FI_AV_MAP: case FI_AV_TABLE: av_type = hints->domain_attr->av_type; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->av_type=%d, supported=%d %d %d\n", hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP, FI_AV_TABLE); goto err_out; } if (hints->domain_attr->mr_mode == FI_MR_BASIC) { mr_mode = FI_MR_BASIC; } else if (hints->domain_attr->mr_mode == FI_MR_SCALABLE) { mr_mode = FI_MR_SCALABLE; } else if (hints->domain_attr->mr_mode & (FI_MR_BASIC | FI_MR_SCALABLE)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->mr_mode has FI_MR_BASIC or FI_MR_SCALABLE " "combined with other bits\n"); goto err_out; } switch (hints->domain_attr->threading) { case FI_THREAD_UNSPEC: break; case FI_THREAD_FID: case FI_THREAD_ENDPOINT: case FI_THREAD_COMPLETION: case FI_THREAD_DOMAIN: threading = hints->domain_attr->threading; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->threading=%d, supported=%d %d %d %d %d\n", hints->domain_attr->threading, FI_THREAD_UNSPEC, FI_THREAD_FID, FI_THREAD_ENDPOINT, FI_THREAD_COMPLETION, FI_THREAD_DOMAIN); goto err_out; } switch (hints->domain_attr->control_progress) { case FI_PROGRESS_UNSPEC: break; case FI_PROGRESS_MANUAL: case FI_PROGRESS_AUTO: control_progress = hints->domain_attr->control_progress; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->control_progress=%d, supported=%d %d %d\n", hints->domain_attr->control_progress, FI_PROGRESS_UNSPEC, FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO); goto err_out; } switch (hints->domain_attr->data_progress) { case FI_PROGRESS_UNSPEC: break; case FI_PROGRESS_MANUAL: case FI_PROGRESS_AUTO: data_progress = hints->domain_attr->data_progress; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->data_progress=%d, supported=%d %d %d\n", hints->domain_attr->data_progress, FI_PROGRESS_UNSPEC, FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO); goto err_out; } if (hints->domain_attr->caps & FI_SHARED_AV) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->caps=%lx, shared AV is unsupported\n", hints->domain_attr->caps); goto err_out; } } if (hints->ep_attr) { if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->max_msg_size=%"PRIu64"," "supported=%llu.\n", hints->ep_attr->max_msg_size, PSMX_MAX_MSG_SIZE); goto err_out; } max_tag_value = ofi_max_tag(hints->ep_attr->mem_tag_format); } if (hints->tx_attr) { if ((hints->tx_attr->msg_order & PSMX_MSG_ORDER) != hints->tx_attr->msg_order) { uint64_t psmx_msg_order = PSMX_MSG_ORDER; PSMX_INFO_DIFF("hints->tx_attr->msg_order", hints->tx_attr->msg_order, psmx_msg_order, FI_TYPE_MSG_ORDER); goto err_out; } if ((hints->tx_attr->comp_order & PSMX_COMP_ORDER) != hints->tx_attr->comp_order) { uint64_t psmx_comp_order = PSMX_COMP_ORDER; PSMX_INFO_DIFF("hints->tx_attr->comp_order", hints->tx_attr->comp_order, psmx_comp_order, FI_TYPE_MSG_ORDER); goto err_out; } if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->inject_size=%ld," "supported=%d.\n", hints->tx_attr->inject_size, PSMX_INJECT_SIZE); goto err_out; } if (hints->tx_attr->iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->iov_limit=%ld," "supported=1.\n", hints->tx_attr->iov_limit); goto err_out; } if (hints->tx_attr->rma_iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->rma_iov_limit=%ld," "supported=1.\n", hints->tx_attr->rma_iov_limit); goto err_out; } } if (hints->rx_attr) { if ((hints->rx_attr->msg_order & PSMX_MSG_ORDER) != hints->rx_attr->msg_order) { uint64_t psmx_msg_order = PSMX_MSG_ORDER; PSMX_INFO_DIFF("hints->rx_attr->msg_order", hints->rx_attr->msg_order, psmx_msg_order, FI_TYPE_MSG_ORDER); goto err_out; } if ((hints->rx_attr->comp_order & PSMX_COMP_ORDER) != hints->rx_attr->comp_order) { uint64_t psmx_comp_order = PSMX_COMP_ORDER; PSMX_INFO_DIFF("hints->rx_attr->comp_order", hints->rx_attr->comp_order, psmx_comp_order, FI_TYPE_MSG_ORDER); goto err_out; } if (hints->rx_attr->iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx_attr->iov_limit=%ld," "supported=1.\n", hints->rx_attr->iov_limit); goto err_out; } } caps = hints->caps; /* TODO: check other fields of hints */ } if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0) goto err_out; psmx_info = fi_allocinfo(); if (!psmx_info) { err = -FI_ENOMEM; goto err_out; } psmx_info->ep_attr->type = ep_type; psmx_info->ep_attr->protocol = FI_PROTO_PSMX; psmx_info->ep_attr->protocol_version = PSM_VERNO; psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE; psmx_info->ep_attr->max_order_raw_size = PSMX_RMA_ORDER_SIZE; psmx_info->ep_attr->max_order_war_size = PSMX_RMA_ORDER_SIZE; psmx_info->ep_attr->max_order_waw_size = PSMX_RMA_ORDER_SIZE; psmx_info->ep_attr->mem_tag_format = ofi_tag_format(max_tag_value); psmx_info->ep_attr->tx_ctx_cnt = 1; psmx_info->ep_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->threading = threading; psmx_info->domain_attr->control_progress = control_progress; psmx_info->domain_attr->data_progress = data_progress; psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME); psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED; psmx_info->domain_attr->av_type = av_type; psmx_info->domain_attr->mr_mode = mr_mode; psmx_info->domain_attr->mr_key_size = sizeof(uint64_t); psmx_info->domain_attr->cq_data_size = 4; psmx_info->domain_attr->cq_cnt = 65535; psmx_info->domain_attr->ep_cnt = 65535; psmx_info->domain_attr->tx_ctx_cnt = 1; psmx_info->domain_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->max_ep_tx_ctx = 1; psmx_info->domain_attr->max_ep_rx_ctx = 1; psmx_info->domain_attr->max_ep_stx_ctx = 65535; psmx_info->domain_attr->max_ep_srx_ctx = 0; psmx_info->domain_attr->cntr_cnt = 65535; psmx_info->domain_attr->mr_iov_limit = 65535; psmx_info->domain_attr->caps = PSMX_DOM_CAPS; psmx_info->domain_attr->mode = 0; psmx_info->domain_attr->mr_cnt = 65535; psmx_info->next = NULL; psmx_info->caps = (hints && hints->caps) ? hints->caps : caps; psmx_info->mode = mode; psmx_info->addr_format = FI_ADDR_PSMX; psmx_info->src_addr = src_addr; psmx_info->src_addrlen = sizeof(*src_addr); psmx_info->dest_addr = dest_addr; psmx_info->dest_addrlen = sizeof(*dest_addr); psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME); psmx_info->fabric_attr->prov_name = NULL; psmx_info->fabric_attr->prov_version = PSMX_VERSION; psmx_info->tx_attr->caps = psmx_info->caps; psmx_info->tx_attr->mode = psmx_info->mode; psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags) ? hints->tx_attr->op_flags : 0; psmx_info->tx_attr->msg_order = PSMX_MSG_ORDER; psmx_info->tx_attr->comp_order = PSMX_COMP_ORDER; psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE; psmx_info->tx_attr->size = UINT64_MAX; psmx_info->tx_attr->iov_limit = 1; psmx_info->tx_attr->rma_iov_limit = 1; psmx_info->rx_attr->caps = psmx_info->caps; psmx_info->rx_attr->mode = psmx_info->mode; psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->rx_attr->op_flags) ? hints->rx_attr->op_flags : 0; psmx_info->rx_attr->msg_order = PSMX_MSG_ORDER; psmx_info->rx_attr->comp_order = PSMX_COMP_ORDER; psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */ psmx_info->rx_attr->size = UINT64_MAX; psmx_info->rx_attr->iov_limit = 1; *info = psmx_info; return 0; err_out: free(dest_addr); free(src_addr); return err; } static void psmx_fini(void) { FI_INFO(&psmx_prov, FI_LOG_CORE, "\n"); if (! --psmx_init_count && psmx_lib_initialized) { /* This function is called from a library destructor, which is called * automatically when exit() is called. The call to psm_finalize() * might cause deadlock if the applicaiton is terminated with Ctrl-C * -- the application could be inside a PSM call, holding a lock that * psm_finalize() tries to acquire. This can be avoided by only * calling psm_finalize() when PSM is guaranteed to be unused. */ if (psmx_active_fabric) { FI_INFO(&psmx_prov, FI_LOG_CORE, "psmx_active_fabric != NULL, skip psm_finalize\n"); } else { psm_finalize(); psmx_lib_initialized = 0; } } }
static int psmx_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { struct fi_info *psmx_info; uint32_t cnt = 0; psm_epid_t *dest_addr = NULL; struct psmx_src_name *src_addr; int ep_type = FI_EP_RDM; int av_type = FI_AV_UNSPEC; uint64_t mode = FI_CONTEXT; enum fi_mr_mode mr_mode = FI_MR_SCALABLE; enum fi_threading threading = FI_THREAD_COMPLETION; enum fi_progress control_progress = FI_PROGRESS_MANUAL; enum fi_progress data_progress = FI_PROGRESS_MANUAL; int caps = 0; uint64_t max_tag_value = 0; int err = -FI_ENODATA; int svc0, svc = PSMX_ANY_SERVICE; FI_INFO(&psmx_prov, FI_LOG_CORE,"\n"); *info = NULL; if (psmx_init_lib()) return -FI_ENODATA; if (psm_ep_num_devunits(&cnt) || !cnt) { FI_INFO(&psmx_prov, FI_LOG_CORE, "no PSM device is found.\n"); return -FI_ENODATA; } psmx_init_env(); src_addr = calloc(1, sizeof(*src_addr)); if (!src_addr) { FI_INFO(&psmx_prov, FI_LOG_CORE, "failed to allocate src addr.\n"); return -FI_ENODATA; } src_addr->unit = PSMX_DEFAULT_UNIT; src_addr->port = PSMX_DEFAULT_PORT; src_addr->service = PSMX_ANY_SERVICE; if (flags & FI_SOURCE) { if (node) sscanf(node, "%*[^:]:%d:%d", &src_addr->unit, &src_addr->port); if (service) sscanf(service, "%d", &src_addr->service); FI_INFO(&psmx_prov, FI_LOG_CORE, "node '%s' service '%s' converted to <unit=%d, port=%d, service=%d>\n", node, service, src_addr->unit, src_addr->port, src_addr->service); } else if (node) { if (service) svc = atoi(service); svc0 = svc; dest_addr = psmx_ns_resolve_name(node, &svc); if (dest_addr) { FI_INFO(&psmx_prov, FI_LOG_CORE, "'%s:%u' resolved to <epid=0x%llx>:%u\n", node, svc0, *dest_addr, svc); } else { FI_INFO(&psmx_prov, FI_LOG_CORE, "failed to resolve '%s:%u'.\n", node, svc); err = -FI_ENODATA; goto err_out; } } if (hints) { switch (hints->addr_format) { case FI_FORMAT_UNSPEC: case FI_ADDR_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->addr_format=%d, supported=%d,%d.\n", hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX); goto err_out; } if (hints->ep_attr) { switch (hints->ep_attr->type) { case FI_EP_UNSPEC: case FI_EP_DGRAM: case FI_EP_RDM: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->type=%d, supported=%d,%d,%d.\n", hints->ep_attr->type, FI_EP_UNSPEC, FI_EP_DGRAM, FI_EP_RDM); goto err_out; } switch (hints->ep_attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->protocol=%d, supported=%d %d\n", hints->ep_attr->protocol, FI_PROTO_UNSPEC, FI_PROTO_PSMX); goto err_out; } if (hints->ep_attr->tx_ctx_cnt > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->tx_ctx_cnt=%d, supported=0,1\n", hints->ep_attr->tx_ctx_cnt); goto err_out; } if (hints->ep_attr->rx_ctx_cnt > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->rx_ctx_cnt=%d, supported=0,1\n", hints->ep_attr->rx_ctx_cnt); goto err_out; } } if ((hints->caps & PSMX_CAPS) != hints->caps && (hints->caps & PSMX_CAPS2) != hints->caps) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->caps=0x%llx, supported=0x%llx,0x%llx\n", hints->caps, PSMX_CAPS, PSMX_CAPS2); goto err_out; } if (hints->tx_attr) { if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) != hints->tx_attr->op_flags) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx->flags=0x%llx, " "supported=0x%llx\n", hints->tx_attr->op_flags, PSMX_OP_FLAGS); goto err_out; } if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->inject_size=%ld," "supported=%ld.\n", hints->tx_attr->inject_size, PSMX_INJECT_SIZE); goto err_out; } } if (hints->rx_attr && (hints->rx_attr->op_flags & PSMX_OP_FLAGS) != hints->rx_attr->op_flags) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx->flags=0x%llx, supported=0x%llx\n", hints->rx_attr->op_flags, PSMX_OP_FLAGS); goto err_out; } if ((hints->caps & FI_TAGGED) || ((hints->caps & FI_MSG) && !psmx_env.am_msg)) { if ((hints->mode & FI_CONTEXT) != FI_CONTEXT) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->mode=0x%llx, required=0x%llx\n", hints->mode, FI_CONTEXT); goto err_out; } } else { mode = 0; } if (hints->fabric_attr && hints->fabric_attr->name && strcmp(hints->fabric_attr->name, PSMX_FABRIC_NAME)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->fabric_name=%s, supported=psm\n", hints->fabric_attr->name); goto err_out; } if (hints->domain_attr) { if (hints->domain_attr->name && strcmp(hints->domain_attr->name, PSMX_DOMAIN_NAME)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_name=%s, supported=psm\n", hints->domain_attr->name); goto err_out; } switch (hints->domain_attr->av_type) { case FI_AV_UNSPEC: case FI_AV_MAP: case FI_AV_TABLE: av_type = hints->domain_attr->av_type; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->av_type=%d, supported=%d %d %d\n", hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP, FI_AV_TABLE); goto err_out; } switch (hints->domain_attr->mr_mode) { case FI_MR_UNSPEC: break; case FI_MR_BASIC: case FI_MR_SCALABLE: mr_mode = hints->domain_attr->mr_mode; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->mr_mode=%d, supported=%d %d %d\n", hints->domain_attr->mr_mode, FI_MR_UNSPEC, FI_MR_BASIC, FI_MR_SCALABLE); goto err_out; } switch (hints->domain_attr->threading) { case FI_THREAD_UNSPEC: break; case FI_THREAD_FID: case FI_THREAD_ENDPOINT: case FI_THREAD_COMPLETION: case FI_THREAD_DOMAIN: threading = hints->domain_attr->threading; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->threading=%d, supported=%d %d %d %d %d\n", hints->domain_attr->threading, FI_THREAD_UNSPEC, FI_THREAD_FID, FI_THREAD_ENDPOINT, FI_THREAD_COMPLETION, FI_THREAD_DOMAIN); goto err_out; } switch (hints->domain_attr->control_progress) { case FI_PROGRESS_UNSPEC: break; case FI_PROGRESS_MANUAL: case FI_PROGRESS_AUTO: control_progress = hints->domain_attr->control_progress; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->control_progress=%d, supported=%d %d %d\n", hints->domain_attr->control_progress, FI_PROGRESS_UNSPEC, FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO); goto err_out; } switch (hints->domain_attr->data_progress) { case FI_PROGRESS_UNSPEC: break; case FI_PROGRESS_MANUAL: case FI_PROGRESS_AUTO: data_progress = hints->domain_attr->data_progress; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->data_progress=%d, supported=%d %d %d\n", hints->domain_attr->data_progress, FI_PROGRESS_UNSPEC, FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO); goto err_out; } if (hints->domain_attr->caps & FI_SHARED_AV) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->caps=%lx, shared AV is unsupported\n", hints->domain_attr->caps); goto err_out; } } if (hints->ep_attr) { if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->max_msg_size=%ld," "supported=%ld.\n", hints->ep_attr->max_msg_size, PSMX_MAX_MSG_SIZE); goto err_out; } max_tag_value = fi_tag_bits(hints->ep_attr->mem_tag_format); } if (hints->tx_attr) { if ((hints->tx_attr->msg_order & PSMX_MSG_ORDER) != hints->tx_attr->msg_order) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->msg_order=%lx," "supported=%lx.\n", hints->tx_attr->msg_order, PSMX_MSG_ORDER); goto err_out; } if ((hints->tx_attr->comp_order & PSMX_COMP_ORDER) != hints->tx_attr->comp_order) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->msg_order=%lx," "supported=%lx.\n", hints->tx_attr->comp_order, PSMX_COMP_ORDER); goto err_out; } if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->inject_size=%ld," "supported=%d.\n", hints->tx_attr->inject_size, PSMX_INJECT_SIZE); goto err_out; } if (hints->tx_attr->iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->iov_limit=%ld," "supported=1.\n", hints->tx_attr->iov_limit); goto err_out; } if (hints->tx_attr->rma_iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->rma_iov_limit=%ld," "supported=1.\n", hints->tx_attr->rma_iov_limit); goto err_out; } } if (hints->rx_attr) { if ((hints->rx_attr->msg_order & PSMX_MSG_ORDER) != hints->rx_attr->msg_order) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx_attr->msg_order=%lx," "supported=%lx.\n", hints->rx_attr->msg_order, PSMX_MSG_ORDER); goto err_out; } if ((hints->rx_attr->comp_order & PSMX_COMP_ORDER) != hints->rx_attr->comp_order) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx_attr->msg_order=%lx," "supported=%lx.\n", hints->rx_attr->comp_order, PSMX_COMP_ORDER); goto err_out; } if (hints->rx_attr->iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx_attr->iov_limit=%ld," "supported=1.\n", hints->rx_attr->iov_limit); goto err_out; } } caps = hints->caps; /* TODO: check other fields of hints */ } if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0) goto err_out; psmx_info = fi_allocinfo(); if (!psmx_info) { err = -FI_ENOMEM; goto err_out; } psmx_info->ep_attr->type = ep_type; psmx_info->ep_attr->protocol = FI_PROTO_PSMX; psmx_info->ep_attr->protocol_version = PSM_VERNO; psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE; psmx_info->ep_attr->mem_tag_format = fi_tag_format(max_tag_value); psmx_info->ep_attr->tx_ctx_cnt = 1; psmx_info->ep_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->threading = threading; psmx_info->domain_attr->control_progress = control_progress; psmx_info->domain_attr->data_progress = data_progress; psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME); psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED; psmx_info->domain_attr->av_type = av_type; psmx_info->domain_attr->mr_mode = mr_mode; psmx_info->domain_attr->mr_key_size = sizeof(uint64_t); psmx_info->domain_attr->cq_data_size = 4; psmx_info->domain_attr->cq_cnt = 65535; psmx_info->domain_attr->ep_cnt = 65535; psmx_info->domain_attr->tx_ctx_cnt = 1; psmx_info->domain_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->max_ep_tx_ctx = 1; psmx_info->domain_attr->max_ep_rx_ctx = 1; psmx_info->domain_attr->max_ep_stx_ctx = 65535; psmx_info->domain_attr->max_ep_srx_ctx = 0; psmx_info->domain_attr->cntr_cnt = 65535; psmx_info->domain_attr->mr_iov_limit = 65535; psmx_info->domain_attr->caps = PSMX_DOM_CAPS; psmx_info->domain_attr->mode = 0; psmx_info->next = NULL; psmx_info->caps = (hints && hints->caps) ? hints->caps : caps; psmx_info->mode = mode; psmx_info->addr_format = FI_ADDR_PSMX; psmx_info->src_addr = src_addr; psmx_info->src_addrlen = sizeof(*src_addr); psmx_info->dest_addr = dest_addr; psmx_info->dest_addrlen = sizeof(*dest_addr); psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME); psmx_info->fabric_attr->prov_name = NULL; psmx_info->fabric_attr->prov_version = PSMX_VERSION; psmx_info->tx_attr->caps = psmx_info->caps; psmx_info->tx_attr->mode = psmx_info->mode; psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags) ? hints->tx_attr->op_flags : 0; psmx_info->tx_attr->msg_order = PSMX_MSG_ORDER; psmx_info->tx_attr->comp_order = PSMX_COMP_ORDER; psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE; psmx_info->tx_attr->size = UINT64_MAX; psmx_info->tx_attr->iov_limit = 1; psmx_info->tx_attr->rma_iov_limit = 1; psmx_info->rx_attr->caps = psmx_info->caps; psmx_info->rx_attr->mode = psmx_info->mode; psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->rx_attr->op_flags) ? hints->rx_attr->op_flags : 0; psmx_info->rx_attr->msg_order = PSMX_MSG_ORDER; psmx_info->rx_attr->comp_order = PSMX_COMP_ORDER; psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */ psmx_info->rx_attr->size = UINT64_MAX; psmx_info->rx_attr->iov_limit = 1; *info = psmx_info; return 0; err_out: free(dest_addr); free(src_addr); return err; }
static int ofi_register_provider(struct fi_provider *provider, void *dlhandle) { struct fi_prov_context *ctx; struct ofi_prov *prov = NULL; int ret; if (!provider || !provider->name) { FI_WARN(&core_prov, FI_LOG_CORE, "no provider structure or name\n"); ret = -FI_EINVAL; goto cleanup; } FI_INFO(&core_prov, FI_LOG_CORE, "registering provider: %s (%d.%d)\n", provider->name, FI_MAJOR(provider->version), FI_MINOR(provider->version)); if (!provider->getinfo || !provider->fabric) { FI_WARN(&core_prov, FI_LOG_CORE, "provider missing mandatory entry points\n"); ret = -FI_EINVAL; goto cleanup; } /* The current core implementation is not backward compatible * with providers that support a release earlier than v1.3. * See commit 0f4b6651. */ if (provider->fi_version < FI_VERSION(1, 3)) { FI_INFO(&core_prov, FI_LOG_CORE, "provider has unsupported FI version " "(provider %d.%d != libfabric %d.%d); ignoring\n", FI_MAJOR(provider->fi_version), FI_MINOR(provider->fi_version), FI_MAJOR_VERSION, FI_MINOR_VERSION); ret = -FI_ENOSYS; goto cleanup; } ctx = (struct fi_prov_context *) &provider->context; ctx->is_util_prov = ofi_has_util_prefix(provider->name); if (ofi_getinfo_filter(provider)) { FI_INFO(&core_prov, FI_LOG_CORE, "\"%s\" filtered by provider include/exclude " "list, skipping\n", provider->name); ret = -FI_ENODEV; goto cleanup; } if (ofi_apply_filter(&prov_log_filter, provider->name)) ctx->disable_logging = 1; prov = ofi_getprov(provider->name, strlen(provider->name)); if (prov) { /* If this provider has not been init yet, then we add the * provider and dlhandle to the struct and exit. */ if (prov->provider == NULL) goto update_prov_registry; /* If this provider is older than an already-loaded * provider of the same name, then discard this one. */ if (FI_VERSION_GE(prov->provider->version, provider->version)) { FI_INFO(&core_prov, FI_LOG_CORE, "a newer %s provider was already loaded; " "ignoring this one\n", provider->name); ret = -FI_EALREADY; goto cleanup; } /* This provider is newer than an already-loaded * provider of the same name, so discard the * already-loaded one. */ FI_INFO(&core_prov, FI_LOG_CORE, "an older %s provider was already loaded; " "keeping this one and ignoring the older one\n", provider->name); cleanup_provider(prov->provider, prov->dlhandle); } else { prov = ofi_create_prov_entry(provider->name); if (!prov) { ret = -FI_EOTHER; goto cleanup; } } update_prov_registry: prov->dlhandle = dlhandle; prov->provider = provider; return 0; cleanup: cleanup_provider(provider, dlhandle); return ret; }
static int psmx_domain_init(struct psmx_fid_domain *domain, struct psmx_src_name *src_addr) { struct psmx_fid_fabric *fabric = domain->fabric; struct psm_ep_open_opts opts; int err; psm_ep_open_opts_get_defaults(&opts); FI_INFO(&psmx_prov, FI_LOG_CORE, "uuid: %s\n", psmx_uuid_to_string(fabric->uuid)); if (src_addr) { opts.unit = src_addr->unit; opts.port = src_addr->port; FI_INFO(&psmx_prov, FI_LOG_CORE, "ep_open_opts: unit=%d port=%u\n", opts.unit, opts.port); } err = psm_ep_open(fabric->uuid, &opts, &domain->psm_ep, &domain->psm_epid); if (err != PSM_OK) { FI_WARN(&psmx_prov, FI_LOG_CORE, "psm_ep_open returns %d, errno=%d\n", err, errno); err = psmx_errno(err); goto err_out; } FI_INFO(&psmx_prov, FI_LOG_CORE, "epid: 0x%016lx\n", domain->psm_epid); err = psm_mq_init(domain->psm_ep, PSM_MQ_ORDERMASK_ALL, NULL, 0, &domain->psm_mq); if (err != PSM_OK) { FI_WARN(&psmx_prov, FI_LOG_CORE, "psm_mq_init returns %d, errno=%d\n", err, errno); err = psmx_errno(err); goto err_out_close_ep; } err = fastlock_init(&domain->mr_lock); if (err) { FI_WARN(&psmx_prov, FI_LOG_CORE, "fastlock_init(mr_lock) returns %d\n", err); goto err_out_finalize_mq; } domain->mr_map = rbtNew(&psmx_key_compare); if (!domain->mr_map) { FI_WARN(&psmx_prov, FI_LOG_CORE, "rbtNew failed\n"); goto err_out_destroy_mr_lock; } domain->mr_reserved_key = 1; err = fastlock_init(&domain->poll_lock); if (err) { FI_WARN(&psmx_prov, FI_LOG_CORE, "fastlock_init(poll_lock) returns %d\n", err); goto err_out_delete_mr_map; } /* Set active domain before psmx_domain_enable_ep() installs the * AM handlers to ensure that psmx_active_fabric->active_domain * is always non-NULL inside the handlers. Notice that the vlaue * active_domain becomes NULL again only when the domain is closed. * At that time the AM handlers are gone with the PSM endpoint. */ fabric->active_domain = domain; if (psmx_domain_enable_ep(domain, NULL) < 0) goto err_out_reset_active_domain; if (domain->progress_thread_enabled) psmx_domain_start_progress(domain); return 0; err_out_reset_active_domain: fabric->active_domain = NULL; fastlock_destroy(&domain->poll_lock); err_out_delete_mr_map: rbtDelete(domain->mr_map); err_out_destroy_mr_lock: fastlock_destroy(&domain->mr_lock); err_out_finalize_mq: psm_mq_finalize(domain->psm_mq); err_out_close_ep: if (psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_GRACEFUL, (int64_t) psmx_env.timeout * 1000000000LL) != PSM_OK) psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_FORCE, 0); err_out: return err; }
static int psmx2_av_insert(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags, void *context) { struct psmx2_fid_av *av_priv; psm2_epid_t *epids; uint8_t *vlanes; psm2_epaddr_t *epaddrs; psm2_error_t *errors; int *mask; const struct psmx2_ep_name *names = addr; int error_count; int i; if (count && !addr) { FI_INFO(&psmx2_prov, FI_LOG_AV, "the input address array is NULL.\n"); return -FI_EINVAL; } av_priv = container_of(av, struct psmx2_fid_av, av); if ((av_priv->flags & FI_EVENT) && !av_priv->eq) return -FI_ENOEQ; if (psmx2_av_check_table_size(av_priv, count)) return -FI_ENOMEM; epids = av_priv->epids + av_priv->last; epaddrs = av_priv->epaddrs + av_priv->last; vlanes = av_priv->vlanes + av_priv->last; for (i=0; i<count; i++) { epids[i] = names[i].epid; vlanes[i] = names[i].vlane; } errors = (psm2_error_t *) calloc(count, sizeof *errors); mask = (int *) calloc(count, sizeof *mask); if (!errors || !mask) { free(mask); free(errors); return -FI_ENOMEM; } error_count = psmx2_av_connet_eps(av_priv, count, epids, mask, errors, epaddrs, context); free(mask); free(errors); if (fi_addr) { for (i=0; i<count; i++) { if (epaddrs[i] == (void *)FI_ADDR_NOTAVAIL) fi_addr[i] = FI_ADDR_NOTAVAIL; else if (av_priv->type == FI_AV_TABLE) fi_addr[i] = av_priv->last + i; else fi_addr[i] = PSMX2_EP_TO_ADDR(epaddrs[i], vlanes[i]); } } if (av_priv->type == FI_AV_TABLE) av_priv->last += count; if (!(av_priv->flags & FI_EVENT)) return count - error_count; psmx2_av_post_completion(av_priv, context, count - error_count, 0); return 0; }
int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct psmx_fid_fabric *fabric_priv; struct psmx_fid_domain *domain_priv; int err; FI_INFO(&psmx_prov, FI_LOG_DOMAIN, "\n"); fabric_priv = container_of(fabric, struct psmx_fid_fabric, util_fabric.fabric_fid); if (fabric_priv->active_domain) { psmx_domain_acquire(fabric_priv->active_domain); *domain = &fabric_priv->active_domain->util_domain.domain_fid; return 0; } if (!info->domain_attr->name || strcmp(info->domain_attr->name, PSMX_DOMAIN_NAME)) { err = -FI_EINVAL; goto err_out; } domain_priv = (struct psmx_fid_domain *) calloc(1, sizeof *domain_priv); if (!domain_priv) { err = -FI_ENOMEM; goto err_out; } err = ofi_domain_init(fabric, info, &domain_priv->util_domain, context); if (err) goto err_out_free_domain; /* fclass & context are set in ofi_domain_init */ domain_priv->util_domain.domain_fid.fid.ops = &psmx_fi_ops; domain_priv->util_domain.domain_fid.ops = &psmx_domain_ops; domain_priv->util_domain.domain_fid.mr = &psmx_mr_ops; domain_priv->mr_mode = info->domain_attr->mr_mode; domain_priv->mode = info->mode; domain_priv->caps = info->caps; domain_priv->fabric = fabric_priv; domain_priv->progress_thread_enabled = (info->domain_attr->data_progress == FI_PROGRESS_AUTO && psmx_env.prog_thread); err = psmx_domain_init(domain_priv, info->src_addr); if (err) goto err_out_close_domain; /* tale the reference to count for multiple domain open calls */ psmx_domain_acquire(fabric_priv->active_domain); *domain = &domain_priv->util_domain.domain_fid; return 0; err_out_close_domain: ofi_domain_close(&domain_priv->util_domain); err_out_free_domain: free(domain_priv); err_out: return err; }
int psmx2_av_open(struct fid_domain *domain, struct fi_av_attr *attr, struct fid_av **av, void *context) { struct psmx2_fid_domain *domain_priv; struct psmx2_fid_av *av_priv; int type = FI_AV_MAP; size_t count = 64; uint64_t flags = 0; domain_priv = container_of(domain, struct psmx2_fid_domain, util_domain.domain_fid); if (attr) { switch (attr->type) { case FI_AV_UNSPEC: break; case FI_AV_MAP: case FI_AV_TABLE: type = attr->type; break; default: FI_INFO(&psmx2_prov, FI_LOG_AV, "attr->type=%d, supported=%d %d\n", attr->type, FI_AV_MAP, FI_AV_TABLE); return -FI_EINVAL; } count = attr->count; flags = attr->flags; if (flags & (FI_READ | FI_SYMMETRIC)) { FI_INFO(&psmx2_prov, FI_LOG_AV, "attr->flags=%x, supported=%x\n", attr->flags, FI_EVENT); return -FI_ENOSYS; } if (attr->name) { FI_INFO(&psmx2_prov, FI_LOG_AV, "attr->name=%s, named AV is not supported\n", attr->name); return -FI_ENOSYS; } } av_priv = (struct psmx2_fid_av *) calloc(1, sizeof *av_priv); if (!av_priv) return -FI_ENOMEM; psmx2_domain_acquire(domain_priv); av_priv->domain = domain_priv; av_priv->type = type; av_priv->addrlen = sizeof(psm2_epaddr_t); av_priv->count = count; av_priv->flags = flags; av_priv->av.fid.fclass = FI_CLASS_AV; av_priv->av.fid.context = context; av_priv->av.fid.ops = &psmx2_fi_ops; av_priv->av.ops = &psmx2_av_ops; *av = &av_priv->av; if (attr) attr->type = type; return 0; }
static int fi_ibv_rdm_find_sysaddrs(struct fi_ibv_rdm_sysaddr *iface_addr, struct fi_ibv_rdm_sysaddr *lo_addr) { struct ifaddrs *ifaddr, *ifa; char iface[IFNAMSIZ]; char *iface_tmp = "ib"; size_t iface_len = 2; int ret; if (!iface_addr || !lo_addr) { return -FI_EINVAL; } iface_addr->is_found = 0; lo_addr->is_found = 0; if (fi_param_get_str(&fi_ibv_prov, "iface", &iface_tmp) == FI_SUCCESS) { iface_len = strlen(iface_tmp); if (iface_len > IFNAMSIZ) { VERBS_INFO(FI_LOG_EP_CTRL, "Too long iface name: %s, max: %d\n", iface_tmp, IFNAMSIZ); return -FI_EINVAL; } } strncpy(iface, iface_tmp, iface_len); ret = getifaddrs(&ifaddr); if (ret) { FI_WARN(&fi_ibv_prov, FI_LOG_FABRIC, "Unable to get interface addresses\n"); return ret; } for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) { if (!iface_addr->is_found && (ifa->ifa_addr->sa_family == AF_INET) && !strncmp(ifa->ifa_name, iface, iface_len)) { memcpy(&iface_addr->addr, ifa->ifa_addr, sizeof(iface_addr->addr)); iface_addr->is_found = 1; FI_INFO(&fi_ibv_prov, FI_LOG_FABRIC, "iface addr %s:%u\n", inet_ntoa(iface_addr->addr.sin_addr), ntohs(iface_addr->addr.sin_port)); } if (!lo_addr->is_found && (ifa->ifa_addr->sa_family == AF_INET) && !strncmp(ifa->ifa_name, "lo", strlen(ifa->ifa_name))) { memcpy(&lo_addr->addr, ifa->ifa_addr, sizeof(lo_addr->addr)); lo_addr->is_found = 1; FI_INFO(&fi_ibv_prov, FI_LOG_FABRIC, "lo addr %s:%u\n", inet_ntoa(lo_addr->addr.sin_addr), ntohs(lo_addr->addr.sin_port)); } if (iface_addr->is_found && lo_addr->is_found) { break; } } freeifaddrs(ifaddr); return 0; }
void *psmx_resolve_name(const char *servername, int port) { struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; struct addrinfo *res, *p; psm_uuid_t uuid; char *service; void *dest_addr; int sockfd = -1; int n; if (!port) { psmx_get_uuid(uuid); port = psmx_uuid_to_port(uuid); } if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { FI_INFO(&psmx_prov, FI_LOG_CORE, "(%s:%d):%s\n", servername, port, gai_strerror(n)); free(service); return NULL; } for (p = res; p; p = p->ai_next) { sockfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, p->ai_addr, p->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo(res); free(service); if (sockfd < 0) { FI_INFO(&psmx_prov, FI_LOG_CORE, "couldn't connect to %s:%d\n", servername, port); return NULL; } dest_addr = calloc(1,sizeof(psm_epid_t)); if (!dest_addr) { close(sockfd); return NULL; } if (read(sockfd, dest_addr, sizeof(psm_epid_t)) != sizeof(psm_epid_t)) { perror(__func__); free(dest_addr); close(sockfd); return NULL; } close(sockfd); return dest_addr; }
int fi_ibv_check_ep_attr(const struct fi_ep_attr *attr, const struct fi_info *info) { if ((attr->type != FI_EP_UNSPEC) && (attr->type != info->ep_attr->type)) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unsupported endpoint type\n"); return -FI_ENODATA; } switch (attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_RDMA_CM_IB_RC: case FI_PROTO_IWARP: case FI_PROTO_IB_UD: case FI_PROTO_IB_RDM: case FI_PROTO_IWARP_RDM: break; default: FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unsupported protocol\n"); return -FI_ENODATA; } if (attr->protocol_version > 1) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unsupported protocol version\n"); return -FI_ENODATA; } if (attr->max_msg_size > info->ep_attr->max_msg_size) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Max message size too large\n"); return -FI_ENODATA; } if (attr->max_order_raw_size > info->ep_attr->max_order_raw_size) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "max_order_raw_size exceeds supported size\n"); return -FI_ENODATA; } if (attr->max_order_war_size) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "max_order_war_size exceeds supported size\n"); return -FI_ENODATA; } if (attr->max_order_waw_size > info->ep_attr->max_order_waw_size) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "max_order_waw_size exceeds supported size\n"); return -FI_ENODATA; } if (attr->tx_ctx_cnt > info->domain_attr->max_ep_tx_ctx) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "tx_ctx_cnt exceeds supported size\n"); return -FI_ENODATA; } if ((attr->rx_ctx_cnt > info->domain_attr->max_ep_rx_ctx) && (attr->rx_ctx_cnt != FI_SHARED_CONTEXT)) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "rx_ctx_cnt exceeds supported size\n"); return -FI_ENODATA; } return 0; }
/************************************************************* * A simple name resolution mechanism for client-server style * applications. The server side has to run first. The client * side then passes the server name as the "node" parameter * of fi_getinfo call and the resulting provider info should * have the transport address of the server in the "dest_addr" * field. Both sides have to use the same UUID. *************************************************************/ void *psmx_name_server(void *args) { struct psmx_fid_fabric *fabric; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; struct addrinfo *res, *p; char *service; int listenfd = -1, connfd; int port; int n; int ret; fabric = args; port = psmx_uuid_to_port(fabric->uuid); if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { FI_INFO(&psmx_prov, FI_LOG_CORE, "port %d: %s\n", port, gai_strerror(n)); free(service); return NULL; } for (p=res; p; p=p->ai_next) { listenfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol); if (listenfd >= 0) { n = 1; if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof(n)) == -1) FI_WARN(&psmx_prov, FI_LOG_CORE, "setsockopt: %s\n", strerror(errno)); if (!bind(listenfd, p->ai_addr, p->ai_addrlen)) break; close(listenfd); listenfd = -1; } } freeaddrinfo(res); free(service); if (listenfd < 0) { FI_INFO(&psmx_prov, FI_LOG_CORE, "couldn't listen to port %d. try set FI_PSM_UUID to a different value?\n", port); return NULL; } listen(listenfd, 256); while (1) { connfd = accept(listenfd, NULL, 0); if (connfd >= 0) { if (fabric->active_domain) { ret = write(connfd, &fabric->active_domain->psm_epid, sizeof(psm_epid_t)); if (ret != sizeof(psm_epid_t)) FI_WARN(&psmx_prov, FI_LOG_CORE, "error sending address info to the client\n"); } close(connfd); } } return NULL; }
int fi_ibv_init_info(void) { struct ibv_context **ctx_list; struct fi_info *fi = NULL, *tail = NULL; int ret = 0, i, num_devices, fork_unsafe = 0; if (verbs_info) return 0; pthread_mutex_lock(&verbs_info_lock); if (verbs_info) goto unlock; if (!fi_ibv_have_device()) { VERBS_INFO(FI_LOG_FABRIC, "No RDMA devices found\n"); ret = -FI_ENODATA; goto unlock; } fi_param_get_bool(NULL, "fork_unsafe", &fork_unsafe); if (!fork_unsafe) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Enabling IB fork support\n"); ret = ibv_fork_init(); if (ret) { FI_WARN(&fi_ibv_prov, FI_LOG_CORE, "Enabling IB fork support failed: %s (%d)\n", strerror(ret), ret); goto unlock; } } else { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Not enabling IB fork support\n"); } ctx_list = rdma_get_devices(&num_devices); if (!num_devices) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_get_devices", errno); ret = -errno; goto unlock; } for (i = 0; i < num_devices; i++) { ret = fi_ibv_alloc_info(ctx_list[i], &fi, &verbs_msg_domain); if (!ret) { if (!verbs_info) verbs_info = fi; else tail->next = fi; tail = fi; ret = fi_ibv_alloc_info(ctx_list[i], &fi, &verbs_rdm_domain); if (!ret) { tail->next = fi; tail = fi; } } } ret = verbs_info ? 0 : ret; rdma_free_devices(ctx_list); unlock: pthread_mutex_unlock(&verbs_info_lock); return ret; }
int psmx2_am_rma_handler(psm2_am_token_t token, psm2_amarg_t *args, int nargs, void *src, uint32_t len) { psm2_amarg_t rep_args[8]; uint8_t *rma_addr; ssize_t rma_len; uint64_t key; int err = 0; int op_error = 0; int cmd, eom, has_data; struct psmx2_am_request *req; struct psmx2_cq_event *event; uint64_t offset; struct psmx2_fid_mr *mr; psm2_epaddr_t epaddr; uint8_t dst_vl, src_vl; struct psmx2_fid_domain *domain; struct psmx2_fid_ep *ep; psm2_am_get_source(token, &epaddr); cmd = PSMX2_AM_GET_OP(args[0].u32w0); dst_vl = PSMX2_AM_GET_DST(args[0].u32w0); domain = psmx2_active_fabric->active_domain; ep = domain->eps[dst_vl]; eom = args[0].u32w0 & PSMX2_AM_EOM; has_data = args[0].u32w0 & PSMX2_AM_DATA; switch (cmd) { case PSMX2_AM_REQ_WRITE: rma_len = args[0].u32w1; rma_addr = (uint8_t *)(uintptr_t)args[2].u64; key = args[3].u64; mr = psmx2_mr_get(domain, key); op_error = mr ? psmx2_mr_validate(mr, (uint64_t)rma_addr, len, FI_REMOTE_WRITE) : -FI_EINVAL; if (!op_error) { rma_addr += mr->offset; memcpy(rma_addr, src, len); if (eom) { if (ep->recv_cq && has_data) { /* TODO: report the addr/len of the whole write */ event = psmx2_cq_create_event( ep->recv_cq, 0, /* context */ rma_addr, FI_REMOTE_WRITE | FI_RMA | FI_REMOTE_CQ_DATA, rma_len, args[4].u64, 0, /* tag */ 0, /* olen */ 0); if (event) psmx2_cq_enqueue_event(ep->recv_cq, event); else err = -FI_ENOMEM; } if (ep->remote_write_cntr) psmx2_cntr_inc(ep->remote_write_cntr); if (mr->cntr && mr->cntr != ep->remote_write_cntr) psmx2_cntr_inc(mr->cntr); } } if (eom || op_error) { rep_args[0].u32w0 = PSMX2_AM_REP_WRITE | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER, rep_args, 2, NULL, 0, 0, NULL, NULL ); } break; case PSMX2_AM_REQ_WRITE_LONG: src_vl = PSMX2_AM_GET_SRC(args[0].u32w0); rma_len = args[0].u32w1; rma_addr = (uint8_t *)(uintptr_t)args[2].u64; key = args[3].u64; mr = psmx2_mr_get(domain, key); op_error = mr ? psmx2_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_WRITE) : -FI_EINVAL; if (op_error) { rep_args[0].u32w0 = PSMX2_AM_REP_WRITE | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER, rep_args, 2, NULL, 0, 0, NULL, NULL ); break; } rma_addr += mr->offset; req = calloc(1, sizeof(*req)); if (!req) { err = -FI_ENOMEM; } else { req->ep = ep; req->op = args[0].u32w0; req->write.addr = (uint64_t)rma_addr; req->write.len = rma_len; req->write.key = key; req->write.context = (void *)args[1].u64; req->write.peer_addr = (void *)epaddr; req->write.vl = dst_vl; req->write.peer_vl = src_vl; req->write.data = has_data ? args[4].u64 : 0; req->cq_flags = FI_REMOTE_WRITE | FI_RMA | (has_data ? FI_REMOTE_CQ_DATA : 0), PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_REMOTE_WRITE_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = mr; psmx2_am_enqueue_rma(mr->domain, req); } break; case PSMX2_AM_REQ_READ: rma_len = args[0].u32w1; rma_addr = (uint8_t *)(uintptr_t)args[2].u64; key = args[3].u64; offset = args[4].u64; mr = psmx2_mr_get(domain, key); op_error = mr ? psmx2_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_READ) : -FI_EINVAL; if (!op_error) { rma_addr += mr->offset; } else { rma_addr = NULL; rma_len = 0; } rep_args[0].u32w0 = PSMX2_AM_REP_READ | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; rep_args[2].u64 = offset; err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER, rep_args, 3, rma_addr, rma_len, 0, NULL, NULL ); if (eom && !op_error) { if (ep->remote_read_cntr) psmx2_cntr_inc(ep->remote_read_cntr); } break; case PSMX2_AM_REQ_READ_LONG: src_vl = PSMX2_AM_GET_SRC(args[0].u32w0); rma_len = args[0].u32w1; rma_addr = (uint8_t *)(uintptr_t)args[2].u64; key = args[3].u64; mr = psmx2_mr_get(domain, key); op_error = mr ? psmx2_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_READ) : -FI_EINVAL; if (op_error) { rep_args[0].u32w0 = PSMX2_AM_REP_READ | eom; rep_args[0].u32w1 = op_error; rep_args[1].u64 = args[1].u64; rep_args[2].u64 = 0; err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER, rep_args, 3, NULL, 0, 0, NULL, NULL ); break; } rma_addr += mr->offset; req = calloc(1, sizeof(*req)); if (!req) { err = -FI_ENOMEM; } else { req->ep = ep; req->op = args[0].u32w0; req->read.addr = (uint64_t)rma_addr; req->read.len = rma_len; req->read.key = key; req->read.context = (void *)args[1].u64; req->read.peer_addr = (void *)epaddr; req->read.vl = dst_vl; req->read.peer_vl = src_vl; PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_REMOTE_READ_CONTEXT; PSMX2_CTXT_USER(&req->fi_context) = mr; psmx2_am_enqueue_rma(mr->domain, req); } break; case PSMX2_AM_REP_WRITE: req = (struct psmx2_am_request *)(uintptr_t)args[1].u64; assert(req->op == PSMX2_AM_REQ_WRITE); op_error = (int)args[0].u32w1; if (!req->error) req->error = op_error; if (eom) { if (req->ep->send_cq && !req->no_event) { event = psmx2_cq_create_event( req->ep->send_cq, req->write.context, req->write.buf, req->cq_flags, req->write.len, 0, /* data */ 0, /* tag */ 0, /* olen */ req->error); if (event) psmx2_cq_enqueue_event(req->ep->send_cq, event); else err = -FI_ENOMEM; } if (req->ep->write_cntr) psmx2_cntr_inc(req->ep->write_cntr); free(req); } break; case PSMX2_AM_REP_READ: req = (struct psmx2_am_request *)(uintptr_t)args[1].u64; assert(req->op == PSMX2_AM_REQ_READ || req->op == PSMX2_AM_REQ_READV); op_error = (int)args[0].u32w1; offset = args[2].u64; if (!req->error) req->error = op_error; if (!op_error) { if (req->op == PSMX2_AM_REQ_READ) memcpy(req->read.buf + offset, src, len); else psmx2_iov_copy(req->iov, req->read.iov_count, offset, src, len); req->read.len_read += len; } if (eom || req->read.len == req->read.len_read) { if (!eom) FI_INFO(&psmx2_prov, FI_LOG_EP_DATA, "readv: short protocol finishes after long protocol.\n"); if (req->ep->send_cq && !req->no_event) { event = psmx2_cq_create_event( req->ep->send_cq, req->read.context, req->read.buf, req->cq_flags, req->read.len_read, 0, /* data */ 0, /* tag */ req->read.len - req->read.len_read, req->error); if (event) psmx2_cq_enqueue_event(req->ep->send_cq, event); else err = -FI_ENOMEM; } if (req->ep->read_cntr) psmx2_cntr_inc(req->ep->read_cntr); free(req); } break; default: err = -FI_EINVAL; } return err; }
int ofi_check_ep_attr(const struct util_prov *util_prov, uint32_t api_version, const struct fi_info *prov_info, const struct fi_info *user_info) { const struct fi_ep_attr *prov_attr = prov_info->ep_attr; const struct fi_ep_attr *user_attr = user_info->ep_attr; const struct fi_provider *prov = util_prov->prov; int ret; ret = ofi_check_ep_type(prov, prov_attr, user_attr); if (ret) return ret; if ((user_attr->protocol != FI_PROTO_UNSPEC) && (user_attr->protocol != prov_attr->protocol)) { FI_INFO(prov, FI_LOG_CORE, "Unsupported protocol\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, protocol, FI_TYPE_PROTOCOL); return -FI_ENODATA; } if (user_attr->protocol_version && (user_attr->protocol_version > prov_attr->protocol_version)) { FI_INFO(prov, FI_LOG_CORE, "Unsupported protocol version\n"); return -FI_ENODATA; } if (user_attr->max_msg_size > prov_attr->max_msg_size) { FI_INFO(prov, FI_LOG_CORE, "Max message size too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_msg_size); return -FI_ENODATA; } if (user_attr->tx_ctx_cnt > prov_info->domain_attr->max_ep_tx_ctx) { if (user_attr->tx_ctx_cnt == FI_SHARED_CONTEXT) { if (!prov_info->domain_attr->max_ep_stx_ctx) { FI_INFO(prov, FI_LOG_CORE, "Shared tx context not supported\n"); return -FI_ENODATA; } } else { FI_INFO(prov, FI_LOG_CORE, "Requested tx_ctx_cnt exceeds supported." " Expected:%zd, Requested%zd\n", prov_info->domain_attr->max_ep_tx_ctx, user_attr->tx_ctx_cnt); return -FI_ENODATA; } } if (user_attr->rx_ctx_cnt > prov_info->domain_attr->max_ep_rx_ctx) { if (user_attr->rx_ctx_cnt == FI_SHARED_CONTEXT) { if (!prov_info->domain_attr->max_ep_srx_ctx) { FI_INFO(prov, FI_LOG_CORE, "Shared rx context not supported\n"); return -FI_ENODATA; } } else { FI_INFO(prov, FI_LOG_CORE, "Requested rx_ctx_cnt exceeds supported." " Expected: %zd, Requested:%zd\n", prov_info->domain_attr->max_ep_rx_ctx, user_attr->rx_ctx_cnt); return -FI_ENODATA; } } if (user_info->caps & (FI_RMA | FI_ATOMIC)) { if (user_attr->max_order_raw_size > prov_attr->max_order_raw_size) { FI_INFO(prov, FI_LOG_CORE, "Max order RAW size exceeds supported size\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_order_raw_size); return -FI_ENODATA; } if (user_attr->max_order_war_size > prov_attr->max_order_war_size) { FI_INFO(prov, FI_LOG_CORE, "Max order WAR size exceeds supported size\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_order_war_size); return -FI_ENODATA; } if (user_attr->max_order_waw_size > prov_attr->max_order_waw_size) { FI_INFO(prov, FI_LOG_CORE, "Max order WAW size exceeds supported size\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_order_waw_size); return -FI_ENODATA; } } if (user_attr->auth_key_size && (user_attr->auth_key_size != prov_attr->auth_key_size)) { FI_INFO(prov, FI_LOG_CORE, "Unsupported authentication size."); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, auth_key_size); return -FI_ENODATA; } return 0; }
int psmx_process_trigger(struct psmx_fid_domain *domain, struct psmx_trigger *trigger) { switch (trigger->op) { case PSMX_TRIGGERED_SEND: _psmx_send(trigger->send.ep, trigger->send.buf, trigger->send.len, trigger->send.desc, trigger->send.dest_addr, trigger->send.context, trigger->send.flags); break; case PSMX_TRIGGERED_RECV: _psmx_recv(trigger->recv.ep, trigger->recv.buf, trigger->recv.len, trigger->recv.desc, trigger->recv.src_addr, trigger->recv.context, trigger->recv.flags); break; case PSMX_TRIGGERED_TSEND: _psmx_tagged_send(trigger->tsend.ep, trigger->tsend.buf, trigger->tsend.len, trigger->tsend.desc, trigger->tsend.dest_addr, trigger->tsend.tag, trigger->tsend.context, trigger->tsend.flags); break; case PSMX_TRIGGERED_TRECV: _psmx_tagged_recv(trigger->trecv.ep, trigger->trecv.buf, trigger->trecv.len, trigger->trecv.desc, trigger->trecv.src_addr, trigger->trecv.tag, trigger->trecv.ignore, trigger->trecv.context, trigger->trecv.flags); break; case PSMX_TRIGGERED_WRITE: _psmx_write(trigger->write.ep, trigger->write.buf, trigger->write.len, trigger->write.desc, trigger->write.dest_addr, trigger->write.addr, trigger->write.key, trigger->write.context, trigger->write.flags, trigger->write.data); break; case PSMX_TRIGGERED_READ: _psmx_read(trigger->read.ep, trigger->read.buf, trigger->read.len, trigger->read.desc, trigger->read.src_addr, trigger->read.addr, trigger->read.key, trigger->read.context, trigger->read.flags); break; case PSMX_TRIGGERED_ATOMIC_WRITE: _psmx_atomic_write(trigger->atomic_write.ep, trigger->atomic_write.buf, trigger->atomic_write.count, trigger->atomic_write.desc, trigger->atomic_write.dest_addr, trigger->atomic_write.addr, trigger->atomic_write.key, trigger->atomic_write.datatype, trigger->atomic_write.atomic_op, trigger->atomic_write.context, trigger->atomic_write.flags); break; case PSMX_TRIGGERED_ATOMIC_READWRITE: _psmx_atomic_readwrite(trigger->atomic_readwrite.ep, trigger->atomic_readwrite.buf, trigger->atomic_readwrite.count, trigger->atomic_readwrite.desc, trigger->atomic_readwrite.result, trigger->atomic_readwrite.result_desc, trigger->atomic_readwrite.dest_addr, trigger->atomic_readwrite.addr, trigger->atomic_readwrite.key, trigger->atomic_readwrite.datatype, trigger->atomic_readwrite.atomic_op, trigger->atomic_readwrite.context, trigger->atomic_readwrite.flags); break; case PSMX_TRIGGERED_ATOMIC_COMPWRITE: _psmx_atomic_compwrite(trigger->atomic_compwrite.ep, trigger->atomic_compwrite.buf, trigger->atomic_compwrite.count, trigger->atomic_compwrite.desc, trigger->atomic_compwrite.compare, trigger->atomic_compwrite.compare_desc, trigger->atomic_compwrite.result, trigger->atomic_compwrite.result_desc, trigger->atomic_compwrite.dest_addr, trigger->atomic_compwrite.addr, trigger->atomic_compwrite.key, trigger->atomic_compwrite.datatype, trigger->atomic_compwrite.atomic_op, trigger->atomic_compwrite.context, trigger->atomic_compwrite.flags); break; default: FI_INFO(&psmx_prov, FI_LOG_CQ, "%d unsupported op\n", trigger->op); break; } free(trigger); return 0; }
int ofi_check_tx_attr(const struct fi_provider *prov, const struct fi_tx_attr *prov_attr, const struct fi_tx_attr *user_attr, uint64_t info_mode) { if (user_attr->caps & ~(prov_attr->caps)) { FI_INFO(prov, FI_LOG_CORE, "caps not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, caps, FI_TYPE_CAPS); return -FI_ENODATA; } info_mode = user_attr->mode ? user_attr->mode : info_mode; if ((info_mode & prov_attr->mode) != prov_attr->mode) { FI_INFO(prov, FI_LOG_CORE, "needed mode not set\n"); FI_INFO_MODE(prov, prov_attr->mode, user_attr->mode); return -FI_ENODATA; } if (prov_attr->op_flags & ~(prov_attr->op_flags)) { FI_INFO(prov, FI_LOG_CORE, "op_flags not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, op_flags, FI_TYPE_OP_FLAGS); return -FI_ENODATA; } if (user_attr->msg_order & ~(prov_attr->msg_order)) { FI_INFO(prov, FI_LOG_CORE, "msg_order not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, msg_order, FI_TYPE_MSG_ORDER); return -FI_ENODATA; } if (user_attr->comp_order & ~(prov_attr->comp_order)) { FI_INFO(prov, FI_LOG_CORE, "comp_order not supported\n"); FI_INFO_CHECK(prov, prov_attr, user_attr, comp_order, FI_TYPE_MSG_ORDER); return -FI_ENODATA; } if (user_attr->inject_size > prov_attr->inject_size) { FI_INFO(prov, FI_LOG_CORE, "inject_size too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, inject_size); return -FI_ENODATA; } if (user_attr->size > prov_attr->size) { FI_INFO(prov, FI_LOG_CORE, "size is greater than supported\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, size); return -FI_ENODATA; } if (user_attr->iov_limit > prov_attr->iov_limit) { FI_INFO(prov, FI_LOG_CORE, "iov_limit too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, iov_limit); return -FI_ENODATA; } if (user_attr->rma_iov_limit > prov_attr->rma_iov_limit) { FI_INFO(prov, FI_LOG_CORE, "rma_iov_limit too large\n"); FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, rma_iov_limit); return -FI_ENODATA; } return 0; }
static ssize_t fi_ibv_rdm_process_connect_request(struct rdma_cm_event *event, struct fi_ibv_rdm_ep *ep) { struct ibv_qp_init_attr qp_attr; struct rdma_conn_param cm_params; struct fi_ibv_rdm_tagged_conn *conn = NULL; struct rdma_cm_id *id = event->id; ssize_t ret = FI_SUCCESS; char *p = (char *) event->param.conn.private_data; if (ep->is_closing) { int rej_message = 0xdeadbeef; if (rdma_reject(id, &rej_message, sizeof(int))) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_reject\n", errno); ret = -errno; if (rdma_destroy_id(id)) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n", errno); ret = (ret == FI_SUCCESS) ? -errno : ret; } } assert(ret == FI_SUCCESS); return ret; } HASH_FIND(hh, fi_ibv_rdm_tagged_conn_hash, p, FI_IBV_RDM_DFLT_ADDRLEN, conn); if (!conn) { conn = memalign(FI_IBV_RDM_MEM_ALIGNMENT, sizeof(*conn)); if (!conn) return -FI_ENOMEM; memset(conn, 0, sizeof(struct fi_ibv_rdm_tagged_conn)); conn->state = FI_VERBS_CONN_ALLOCATED; dlist_init(&conn->postponed_requests_head); fi_ibv_rdm_unpack_cm_params(&event->param.conn, conn, ep); fi_ibv_rdm_conn_init_cm_role(conn, ep); FI_INFO(&fi_ibv_prov, FI_LOG_AV, "CONN REQUEST, NOT found in hash, new conn %p %d, addr %s:%u, HASH ADD\n", conn, conn->cm_role, inet_ntoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port)); HASH_ADD(hh, fi_ibv_rdm_tagged_conn_hash, addr, FI_IBV_RDM_DFLT_ADDRLEN, conn); } else { if (conn->cm_role != FI_VERBS_CM_ACTIVE) { /* * Do it before rdma_create_qp since that call would * modify event->param.conn.private_data buffer */ fi_ibv_rdm_unpack_cm_params(&event->param.conn, conn, ep); } FI_INFO(&fi_ibv_prov, FI_LOG_AV, "CONN REQUEST, FOUND in hash, conn %p %d, addr %s:%u\n", conn, conn->cm_role, inet_ntoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port)); } if (conn->cm_role == FI_VERBS_CM_ACTIVE) { int rej_message = 0xdeadbeef; if (rdma_reject(id, &rej_message, sizeof(rej_message))) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_reject\n", errno); ret = -errno; if (rdma_destroy_id(id)) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n", errno); ret = (ret == FI_SUCCESS) ? -errno : ret; } } if (conn->state == FI_VERBS_CONN_ALLOCATED) { ret = fi_ibv_rdm_start_connection(ep, conn); if (ret != FI_SUCCESS) goto err; } } else { assert(conn->state == FI_VERBS_CONN_ALLOCATED || conn->state == FI_VERBS_CONN_STARTED); const size_t idx = (conn->cm_role == FI_VERBS_CM_PASSIVE) ? 0 : 1; conn->state = FI_VERBS_CONN_STARTED; assert (conn->id[idx] == NULL); conn->id[idx] = id; ret = fi_ibv_rdm_prepare_conn_memory(ep, conn); if (ret != FI_SUCCESS) goto err; fi_ibv_rdm_tagged_init_qp_attributes(&qp_attr, ep); if (rdma_create_qp(id, ep->domain->pd, &qp_attr)) { ret = -errno; goto err; } conn->qp[idx] = id->qp; ret = fi_ibv_rdm_repost_receives(conn, ep, ep->rq_wr_depth); if (ret < 0) { VERBS_INFO(FI_LOG_AV, "repost receives failed\n"); goto err; } else { ret = FI_SUCCESS; } id->context = conn; fi_ibv_rdm_pack_cm_params(&cm_params, conn, ep); if (rdma_accept(id, &cm_params)) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_accept\n", errno); ret = -errno; goto err; } if (cm_params.private_data) { free((void *) cm_params.private_data); } } return ret; err: /* ret err code is already set here, just cleanup resources */ fi_ibv_rdm_conn_cleanup(conn); return ret; }