int psmx2_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context) { struct psmx2_fid_fabric *fabric_priv; int ret; FI_INFO(&psmx2_prov, FI_LOG_CORE, "\n"); if (strcmp(attr->name, PSMX2_FABRIC_NAME)) return -FI_ENODATA; if (psmx2_active_fabric) { psmx2_fabric_acquire(psmx2_active_fabric); *fabric = &psmx2_active_fabric->util_fabric.fabric_fid; return 0; } fabric_priv = calloc(1, sizeof(*fabric_priv)); if (!fabric_priv) return -FI_ENOMEM; ret = ofi_fabric_init(&psmx2_prov, &psmx2_fabric_attr, attr, &fabric_priv->util_fabric, context, FI_MATCH_EXACT); if (ret) { FI_INFO(&psmx2_prov, FI_LOG_CORE, "ofi_fabric_init returns %d\n", ret); free(fabric_priv); return ret; } /* fclass & context initialized in ofi_fabric_init */ fabric_priv->util_fabric.fabric_fid.fid.ops = &psmx2_fabric_fi_ops; fabric_priv->util_fabric.fabric_fid.ops = &psmx2_fabric_ops; psmx2_get_uuid(fabric_priv->uuid); if (psmx2_env.name_server) { ret = pthread_create(&fabric_priv->name_server_thread, NULL, psmx2_name_server, (void *)fabric_priv); if (ret) { FI_INFO(&psmx2_prov, FI_LOG_CORE, "pthread_create returns %d\n", ret); /* use the main thread's ID as invalid value for the new thread */ fabric_priv->name_server_thread = pthread_self(); } } psmx2_query_mpi(); /* take the reference to count for multiple fabric open calls */ psmx2_fabric_acquire(fabric_priv); *fabric = &fabric_priv->util_fabric.fabric_fid; psmx2_active_fabric = fabric_priv; return 0; }
int psmx2_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct psmx2_fid_fabric *fabric_priv; struct psmx2_fid_domain *domain_priv; struct psmx2_ep_name *src_addr = info->src_addr; int mr_mode = (info->domain_attr->mr_mode & FI_MR_BASIC) ? FI_MR_BASIC : 0; int err; FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "\n"); fabric_priv = container_of(fabric, struct psmx2_fid_fabric, util_fabric.fabric_fid); if (!info->domain_attr->name || strcmp(info->domain_attr->name, PSMX2_DOMAIN_NAME)) { err = -FI_EINVAL; goto err_out; } domain_priv = (struct psmx2_fid_domain *) calloc(1, sizeof *domain_priv); if (!domain_priv) { err = -FI_ENOMEM; goto err_out; } err = ofi_domain_init(fabric, info, &domain_priv->util_domain, context); if (err) goto err_out_free_domain; /* fclass & context are set in ofi_domain_init */ domain_priv->util_domain.domain_fid.fid.ops = &psmx2_fi_ops; domain_priv->util_domain.domain_fid.ops = &psmx2_domain_ops; domain_priv->util_domain.domain_fid.mr = &psmx2_mr_ops; domain_priv->mr_mode = mr_mode; domain_priv->mode = info->mode; domain_priv->caps = PSMX2_CAPS | PSMX2_DOM_CAPS; domain_priv->fabric = fabric_priv; domain_priv->progress_thread_enabled = (info->domain_attr->data_progress == FI_PROGRESS_AUTO); domain_priv->addr_format = info->addr_format; if (info->addr_format == FI_ADDR_STR) src_addr = psmx2_string_to_ep_name(info->src_addr); err = psmx2_domain_init(domain_priv, src_addr); if (info->addr_format == FI_ADDR_STR) free(src_addr); if (err) goto err_out_close_domain; psmx2_fabric_acquire(fabric_priv); psmx2_lock(&fabric_priv->domain_lock, 1); dlist_insert_before(&domain_priv->entry, &fabric_priv->domain_list); psmx2_unlock(&fabric_priv->domain_lock, 1); *domain = &domain_priv->util_domain.domain_fid; return 0; err_out_close_domain: ofi_domain_close(&domain_priv->util_domain); err_out_free_domain: free(domain_priv); err_out: return err; }
int psmx2_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct psmx2_fid_fabric *fabric_priv; struct psmx2_fid_domain *domain_priv; struct psm2_ep_open_opts opts; int err; FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "\n"); fabric_priv = container_of(fabric, struct psmx2_fid_fabric, fabric); psmx2_fabric_acquire(fabric_priv); if (fabric_priv->active_domain) { psmx2_domain_acquire(fabric_priv->active_domain); *domain = &fabric_priv->active_domain->domain; return 0; } if (!info->domain_attr->name || strcmp(info->domain_attr->name, PSMX2_DOMAIN_NAME)) { err = -FI_EINVAL; goto err_out; } domain_priv = (struct psmx2_fid_domain *) calloc(1, sizeof *domain_priv); if (!domain_priv) { err = -FI_ENOMEM; goto err_out; } domain_priv->domain.fid.fclass = FI_CLASS_DOMAIN; domain_priv->domain.fid.context = context; domain_priv->domain.fid.ops = &psmx2_fi_ops; domain_priv->domain.ops = &psmx2_domain_ops; domain_priv->domain.mr = &psmx2_mr_ops; domain_priv->mr_mode = info->domain_attr->mr_mode; domain_priv->mode = info->mode; domain_priv->caps = info->caps; domain_priv->fabric = fabric_priv; domain_priv->progress_thread_enabled = (info->domain_attr->data_progress == FI_PROGRESS_AUTO); psm2_ep_open_opts_get_defaults(&opts); FI_INFO(&psmx2_prov, FI_LOG_CORE, "uuid: %s\n", psmx2_uuid_to_string(fabric_priv->uuid)); err = psm2_ep_open(fabric_priv->uuid, &opts, &domain_priv->psm2_ep, &domain_priv->psm2_epid); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_ep_open returns %d, errno=%d\n", err, errno); err = psmx2_errno(err); goto err_out_free_domain; } FI_INFO(&psmx2_prov, FI_LOG_CORE, "epid: 0x%016lx\n", domain_priv->psm2_epid); err = psm2_mq_init(domain_priv->psm2_ep, PSM2_MQ_ORDERMASK_ALL, NULL, 0, &domain_priv->psm2_mq); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_mq_init returns %d, errno=%d\n", err, errno); err = psmx2_errno(err); goto err_out_close_ep; } err = fastlock_init(&domain_priv->mr_lock); if (err) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "fastlock_init(mr_lock) returns %d\n", err); goto err_out_finalize_mq; } domain_priv->mr_map = rbtNew(&psmx2_key_compare); if (!domain_priv->mr_map) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "rbtNew failed\n"); goto err_out_destroy_mr_lock; } domain_priv->mr_reserved_key = 1; err = fastlock_init(&domain_priv->vl_lock); if (err) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "fastlock_init(vl_lock) returns %d\n", err); goto err_out_delete_mr_map; } memset(domain_priv->vl_map, 0, sizeof(domain_priv->vl_map)); domain_priv->vl_alloc = 0; err = fastlock_init(&domain_priv->poll_lock); if (err) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "fastlock_init(poll_lock) returns %d\n", err); goto err_out_destroy_vl_lock; } /* Set active domain before psmx2_domain_enable_ep() installs the * AM handlers to ensure that psmx2_active_fabric->active_domain * is always non-NULL inside the handlers. Notice that the vlaue * active_domain becomes NULL again only when the domain is closed. * At that time the AM handlers are gone with the PSM endpoint. */ fabric_priv->active_domain = domain_priv; if (psmx2_domain_enable_ep(domain_priv, NULL) < 0) goto err_out_reset_active_domain; if (domain_priv->progress_thread_enabled) psmx2_domain_start_progress(domain_priv); domain_priv->refcnt = 1; *domain = &domain_priv->domain; return 0; err_out_reset_active_domain: fabric_priv->active_domain = NULL; fastlock_destroy(&domain_priv->poll_lock); err_out_destroy_vl_lock: fastlock_destroy(&domain_priv->vl_lock); err_out_delete_mr_map: rbtDelete(domain_priv->mr_map); err_out_destroy_mr_lock: fastlock_destroy(&domain_priv->mr_lock); err_out_finalize_mq: psm2_mq_finalize(domain_priv->psm2_mq); err_out_close_ep: if (psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_GRACEFUL, (int64_t) psmx2_env.timeout * 1000000000LL) != PSM2_OK) psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_FORCE, 0); err_out_free_domain: free(domain_priv); err_out: psmx2_fabric_release(fabric_priv); return err; }
int psmx2_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context) { struct psmx2_fid_fabric *fabric_priv; int ret; FI_INFO(&psmx2_prov, FI_LOG_CORE, "\n"); if (strcmp(attr->name, PSMX2_FABRIC_NAME)) return -FI_ENODATA; if (psmx2_active_fabric) { psmx2_fabric_acquire(psmx2_active_fabric); *fabric = &psmx2_active_fabric->util_fabric.fabric_fid; return 0; } fabric_priv = calloc(1, sizeof(*fabric_priv)); if (!fabric_priv) return -FI_ENOMEM; fastlock_init(&fabric_priv->domain_lock); dlist_init(&fabric_priv->domain_list); psmx2_get_uuid(fabric_priv->uuid); if (psmx2_env.name_server) { fabric_priv->name_server.port = psmx2_uuid_to_port(fabric_priv->uuid); fabric_priv->name_server.name_len = sizeof(struct psmx2_ep_name); fabric_priv->name_server.service_len = sizeof(int); fabric_priv->name_server.service_cmp = psmx2_ns_service_cmp; fabric_priv->name_server.is_service_wildcard = psmx2_ns_is_service_wildcard; ofi_ns_init(&fabric_priv->name_server); ofi_ns_start_server(&fabric_priv->name_server); } ret = ofi_fabric_init(&psmx2_prov, &psmx2_fabric_attr, attr, &fabric_priv->util_fabric, context); if (ret) { FI_INFO(&psmx2_prov, FI_LOG_CORE, "ofi_fabric_init returns %d\n", ret); if (psmx2_env.name_server) ofi_ns_stop_server(&fabric_priv->name_server); free(fabric_priv); return ret; } /* fclass & context initialized in ofi_fabric_init */ fabric_priv->util_fabric.fabric_fid.fid.ops = &psmx2_fabric_fi_ops; fabric_priv->util_fabric.fabric_fid.ops = &psmx2_fabric_ops; psmx2_atomic_global_init(); psmx2_query_mpi(); /* take the reference to count for multiple fabric open calls */ psmx2_fabric_acquire(fabric_priv); *fabric = &fabric_priv->util_fabric.fabric_fid; psmx2_active_fabric = fabric_priv; return 0; }