struct psmx2_trx_ctxt *psmx2_trx_ctxt_alloc(struct psmx2_fid_domain *domain, struct psmx2_src_name *src_addr, int sep_ctxt_idx) { struct psmx2_trx_ctxt *trx_ctxt; struct psm2_ep_open_opts opts; int should_retry = 0; int err; trx_ctxt = calloc(1, sizeof(*trx_ctxt)); if (!trx_ctxt) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "failed to allocate trx_ctxt.\n"); return NULL; } psm2_ep_open_opts_get_defaults(&opts); FI_INFO(&psmx2_prov, FI_LOG_CORE, "uuid: %s\n", psmx2_uuid_to_string(domain->fabric->uuid)); if (src_addr) { opts.unit = src_addr->unit; opts.port = src_addr->port; FI_INFO(&psmx2_prov, FI_LOG_CORE, "ep_open_opts: unit=%d port=%u\n", opts.unit, opts.port); } if (opts.unit < 0 && sep_ctxt_idx >= 0) { should_retry = 1; opts.unit = sep_ctxt_idx % psmx2_env.num_devunits; FI_INFO(&psmx2_prov, FI_LOG_CORE, "sep %d: ep_open_opts: unit=%d\n", sep_ctxt_idx, opts.unit); } err = psm2_ep_open(domain->fabric->uuid, &opts, &trx_ctxt->psm2_ep, &trx_ctxt->psm2_epid); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_ep_open returns %d, errno=%d\n", err, errno); if (!should_retry) { err = psmx2_errno(err); goto err_out; } /* When round-robin fails, retry w/o explicit assignment */ opts.unit = -1; err = psm2_ep_open(domain->fabric->uuid, &opts, &trx_ctxt->psm2_ep, &trx_ctxt->psm2_epid); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_ep_open returns %d, errno=%d\n", err, errno); err = psmx2_errno(err); goto err_out; } } FI_INFO(&psmx2_prov, FI_LOG_CORE, "epid: 0x%016lx\n", trx_ctxt->psm2_epid); err = psm2_mq_init(trx_ctxt->psm2_ep, PSM2_MQ_ORDERMASK_ALL, NULL, 0, &trx_ctxt->psm2_mq); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_mq_init returns %d, errno=%d\n", err, errno); err = psmx2_errno(err); goto err_out_close_ep; } fastlock_init(&trx_ctxt->poll_lock); fastlock_init(&trx_ctxt->rma_queue.lock); fastlock_init(&trx_ctxt->trigger_queue.lock); slist_init(&trx_ctxt->rma_queue.list); slist_init(&trx_ctxt->trigger_queue.list); return trx_ctxt; err_out_close_ep: if (psm2_ep_close(trx_ctxt->psm2_ep, PSM2_EP_CLOSE_GRACEFUL, (int64_t) psmx2_env.timeout * 1000000000LL) != PSM2_OK) psm2_ep_close(trx_ctxt->psm2_ep, PSM2_EP_CLOSE_FORCE, 0); err_out: free(trx_ctxt); return NULL; }
int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { psm2_error_t err; psm2_ep_t ep; /* endpoint handle */ psm2_mq_t mq; psm2_epid_t epid; /* unique lid+port identifier */ psm2_uuid_t unique_job_key; struct psm2_ep_open_opts ep_opt; unsigned long long *uu = (unsigned long long *) unique_job_key; char *generated_key; char env_string[256]; int rc; generated_key = getenv("OMPI_MCA_orte_precondition_transports"); memset(uu, 0, sizeof(psm2_uuid_t)); if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) { opal_show_help("help-mtl-psm2.txt", "no uuid present", true, generated_key ? "could not be parsed from" : "not present in", ompi_process_info.nodename); return OMPI_ERROR; } /* Handle our own errors for opening endpoints */ psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler); /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM2 can allocate hardware * contexts correctly. */ snprintf(env_string, sizeof(env_string), "%d", local_rank); setenv("MPI_LOCALRANKID", env_string, 0); snprintf(env_string, sizeof(env_string), "%d", num_local_procs); setenv("MPI_LOCALNRANKS", env_string, 0); /* Setup the endpoint options. */ psm2_ep_open_opts_get_defaults(&ep_opt); ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9; ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP; /* do not let PSM2 set affinity */ /* Open PSM2 endpoint */ err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid); if (err) { opal_show_help("help-mtl-psm2.txt", "unable to open endpoint", true, psm2_error_get_string(err)); return OMPI_ERROR; } /* Future errors are handled by the default error handler */ psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT); err = psm2_mq_init(ep, 0xffff000000000000ULL, NULL, 0, &mq); if (err) { opal_show_help("help-mtl-psm2.txt", "psm2 init", true, psm2_error_get_string(err)); return OMPI_ERROR; } ompi_mtl_psm2.ep = ep; ompi_mtl_psm2.epid = epid; ompi_mtl_psm2.mq = mq; OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_mtl_psm2_component.super.mtl_version, &ompi_mtl_psm2.epid, sizeof(psm2_epid_t)); if (OMPI_SUCCESS != rc) { opal_output(0, "Open MPI couldn't send PSM2 epid to head node process"); return OMPI_ERROR; } /* register the psm2 progress function */ opal_progress_register(ompi_mtl_psm2_progress); return OMPI_SUCCESS; }
int psmx2_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct psmx2_fid_fabric *fabric_priv; struct psmx2_fid_domain *domain_priv; struct psm2_ep_open_opts opts; int err; FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "\n"); fabric_priv = container_of(fabric, struct psmx2_fid_fabric, fabric); psmx2_fabric_acquire(fabric_priv); if (fabric_priv->active_domain) { psmx2_domain_acquire(fabric_priv->active_domain); *domain = &fabric_priv->active_domain->domain; return 0; } if (!info->domain_attr->name || strcmp(info->domain_attr->name, PSMX2_DOMAIN_NAME)) { err = -FI_EINVAL; goto err_out; } domain_priv = (struct psmx2_fid_domain *) calloc(1, sizeof *domain_priv); if (!domain_priv) { err = -FI_ENOMEM; goto err_out; } domain_priv->domain.fid.fclass = FI_CLASS_DOMAIN; domain_priv->domain.fid.context = context; domain_priv->domain.fid.ops = &psmx2_fi_ops; domain_priv->domain.ops = &psmx2_domain_ops; domain_priv->domain.mr = &psmx2_mr_ops; domain_priv->mr_mode = info->domain_attr->mr_mode; domain_priv->mode = info->mode; domain_priv->caps = info->caps; domain_priv->fabric = fabric_priv; domain_priv->progress_thread_enabled = (info->domain_attr->data_progress == FI_PROGRESS_AUTO); psm2_ep_open_opts_get_defaults(&opts); FI_INFO(&psmx2_prov, FI_LOG_CORE, "uuid: %s\n", psmx2_uuid_to_string(fabric_priv->uuid)); err = psm2_ep_open(fabric_priv->uuid, &opts, &domain_priv->psm2_ep, &domain_priv->psm2_epid); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_ep_open returns %d, errno=%d\n", err, errno); err = psmx2_errno(err); goto err_out_free_domain; } FI_INFO(&psmx2_prov, FI_LOG_CORE, "epid: 0x%016lx\n", domain_priv->psm2_epid); err = psm2_mq_init(domain_priv->psm2_ep, PSM2_MQ_ORDERMASK_ALL, NULL, 0, &domain_priv->psm2_mq); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_mq_init returns %d, errno=%d\n", err, errno); err = psmx2_errno(err); goto err_out_close_ep; } err = fastlock_init(&domain_priv->mr_lock); if (err) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "fastlock_init(mr_lock) returns %d\n", err); goto err_out_finalize_mq; } domain_priv->mr_map = rbtNew(&psmx2_key_compare); if (!domain_priv->mr_map) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "rbtNew failed\n"); goto err_out_destroy_mr_lock; } domain_priv->mr_reserved_key = 1; err = fastlock_init(&domain_priv->vl_lock); if (err) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "fastlock_init(vl_lock) returns %d\n", err); goto err_out_delete_mr_map; } memset(domain_priv->vl_map, 0, sizeof(domain_priv->vl_map)); domain_priv->vl_alloc = 0; err = fastlock_init(&domain_priv->poll_lock); if (err) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "fastlock_init(poll_lock) returns %d\n", err); goto err_out_destroy_vl_lock; } /* Set active domain before psmx2_domain_enable_ep() installs the * AM handlers to ensure that psmx2_active_fabric->active_domain * is always non-NULL inside the handlers. Notice that the vlaue * active_domain becomes NULL again only when the domain is closed. * At that time the AM handlers are gone with the PSM endpoint. */ fabric_priv->active_domain = domain_priv; if (psmx2_domain_enable_ep(domain_priv, NULL) < 0) goto err_out_reset_active_domain; if (domain_priv->progress_thread_enabled) psmx2_domain_start_progress(domain_priv); domain_priv->refcnt = 1; *domain = &domain_priv->domain; return 0; err_out_reset_active_domain: fabric_priv->active_domain = NULL; fastlock_destroy(&domain_priv->poll_lock); err_out_destroy_vl_lock: fastlock_destroy(&domain_priv->vl_lock); err_out_delete_mr_map: rbtDelete(domain_priv->mr_map); err_out_destroy_mr_lock: fastlock_destroy(&domain_priv->mr_lock); err_out_finalize_mq: psm2_mq_finalize(domain_priv->psm2_mq); err_out_close_ep: if (psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_GRACEFUL, (int64_t) psmx2_env.timeout * 1000000000LL) != PSM2_OK) psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_FORCE, 0); err_out_free_domain: free(domain_priv); err_out: psmx2_fabric_release(fabric_priv); return err; }