int ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl) { psm_error_t err; opal_progress_unregister(ompi_mtl_psm_progress); /* free resources */ err = psm_mq_finalize(ompi_mtl_psm.mq); if (err) { opal_output(0, "Error in psm_mq_finalize (error %s)\n", psm_error_get_string(err)); return OMPI_ERROR; } err = psm_ep_close(ompi_mtl_psm.ep, PSM_EP_CLOSE_GRACEFUL, 1*1e9); if (err) { opal_output(0, "Error in psm_ep_close (error %s)\n", psm_error_get_string(err)); return OMPI_ERROR; } err = psm_finalize(); if (err) { opal_output(0, "Error in psm_finalize (error %s)\n", psm_error_get_string(err)); return OMPI_ERROR; } return OMPI_SUCCESS; }
static int psmx_domain_close(fid_t fid) { struct psmx_fid_domain *domain; int err; domain = container_of(fid, struct psmx_fid_domain, util_domain.domain_fid.fid); FI_INFO(&psmx_prov, FI_LOG_DOMAIN, "refcnt=%d\n", ofi_atomic_get32(&domain->util_domain.ref)); psmx_domain_release(domain); if (ofi_domain_close(&domain->util_domain)) return 0; if (domain->progress_thread_enabled) psmx_domain_stop_progress(domain); if (domain->am_initialized) psmx_am_fini(domain); fastlock_destroy(&domain->poll_lock); rbtDelete(domain->mr_map); fastlock_destroy(&domain->mr_lock); #if 0 /* AM messages could arrive after MQ is finalized, causing segfault * when trying to dereference the MQ pointer. There is no mechanism * to properly shutdown AM. The workaround is to keep MQ valid. */ psm_mq_finalize(domain->psm_mq); #endif /* workaround for: * Assertion failure at psm_ep.c:1059: ep->mctxt_master == ep */ sleep(psmx_env.delay); if (psmx_env.timeout) err = psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_GRACEFUL, (int64_t) psmx_env.timeout * 1000000000LL); else err = PSM_EP_CLOSE_TIMEOUT; if (err != PSM_OK) psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_FORCE, 0); domain->fabric->active_domain = NULL; free(domain); return 0; }
int pspsm_finalize_mq(void) { psm_error_t ret; if (pspsm_mq){ ret = psm_mq_finalize(pspsm_mq); if (ret != PSM_OK) goto err; pspsm_dprint(2, "pspsm_finalize_mq: OK"); } return 0; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_finalize_mq: %s", pspsm_err_str); return -1; }
static int psmx_domain_init(struct psmx_fid_domain *domain, struct psmx_src_name *src_addr) { struct psmx_fid_fabric *fabric = domain->fabric; struct psm_ep_open_opts opts; int err; psm_ep_open_opts_get_defaults(&opts); FI_INFO(&psmx_prov, FI_LOG_CORE, "uuid: %s\n", psmx_uuid_to_string(fabric->uuid)); if (src_addr) { opts.unit = src_addr->unit; opts.port = src_addr->port; FI_INFO(&psmx_prov, FI_LOG_CORE, "ep_open_opts: unit=%d port=%u\n", opts.unit, opts.port); } err = psm_ep_open(fabric->uuid, &opts, &domain->psm_ep, &domain->psm_epid); if (err != PSM_OK) { FI_WARN(&psmx_prov, FI_LOG_CORE, "psm_ep_open returns %d, errno=%d\n", err, errno); err = psmx_errno(err); goto err_out; } FI_INFO(&psmx_prov, FI_LOG_CORE, "epid: 0x%016lx\n", domain->psm_epid); err = psm_mq_init(domain->psm_ep, PSM_MQ_ORDERMASK_ALL, NULL, 0, &domain->psm_mq); if (err != PSM_OK) { FI_WARN(&psmx_prov, FI_LOG_CORE, "psm_mq_init returns %d, errno=%d\n", err, errno); err = psmx_errno(err); goto err_out_close_ep; } err = fastlock_init(&domain->mr_lock); if (err) { FI_WARN(&psmx_prov, FI_LOG_CORE, "fastlock_init(mr_lock) returns %d\n", err); goto err_out_finalize_mq; } domain->mr_map = rbtNew(&psmx_key_compare); if (!domain->mr_map) { FI_WARN(&psmx_prov, FI_LOG_CORE, "rbtNew failed\n"); goto err_out_destroy_mr_lock; } domain->mr_reserved_key = 1; err = fastlock_init(&domain->poll_lock); if (err) { FI_WARN(&psmx_prov, FI_LOG_CORE, "fastlock_init(poll_lock) returns %d\n", err); goto err_out_delete_mr_map; } /* Set active domain before psmx_domain_enable_ep() installs the * AM handlers to ensure that psmx_active_fabric->active_domain * is always non-NULL inside the handlers. Notice that the vlaue * active_domain becomes NULL again only when the domain is closed. * At that time the AM handlers are gone with the PSM endpoint. */ fabric->active_domain = domain; if (psmx_domain_enable_ep(domain, NULL) < 0) goto err_out_reset_active_domain; if (domain->progress_thread_enabled) psmx_domain_start_progress(domain); return 0; err_out_reset_active_domain: fabric->active_domain = NULL; fastlock_destroy(&domain->poll_lock); err_out_delete_mr_map: rbtDelete(domain->mr_map); err_out_destroy_mr_lock: fastlock_destroy(&domain->mr_lock); err_out_finalize_mq: psm_mq_finalize(domain->psm_mq); err_out_close_ep: if (psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_GRACEFUL, (int64_t) psmx_env.timeout * 1000000000LL) != PSM_OK) psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_FORCE, 0); err_out: return err; }