Exemplo n.º 1
0
int
ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl) { 
    psm_error_t err;

    opal_progress_unregister(ompi_mtl_psm_progress);

    /* free resources */
    err = psm_mq_finalize(ompi_mtl_psm.mq);
    if (err) {
        opal_output(0, "Error in psm_mq_finalize (error %s)\n", 
		    psm_error_get_string(err));
        return OMPI_ERROR;
    }

    err = psm_ep_close(ompi_mtl_psm.ep, PSM_EP_CLOSE_GRACEFUL, 1*1e9);
    if (err) {
        opal_output(0, "Error in psm_ep_close (error %s)\n", 
		    psm_error_get_string(err));
        return OMPI_ERROR;
    }

    err = psm_finalize();
    if (err) {
        opal_output(0, "Error in psm_finalize (error %s)\n", 
		    psm_error_get_string(err));
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;
}
Exemplo n.º 2
0
static int psmx_domain_close(fid_t fid)
{
	struct psmx_fid_domain *domain;
	int err;

	domain = container_of(fid, struct psmx_fid_domain,
			      util_domain.domain_fid.fid);

	FI_INFO(&psmx_prov, FI_LOG_DOMAIN, "refcnt=%d\n",
		ofi_atomic_get32(&domain->util_domain.ref));

	psmx_domain_release(domain);

	if (ofi_domain_close(&domain->util_domain))
		return 0;

	if (domain->progress_thread_enabled)
		psmx_domain_stop_progress(domain);

	if (domain->am_initialized)
		psmx_am_fini(domain);

	fastlock_destroy(&domain->poll_lock);
	rbtDelete(domain->mr_map);
	fastlock_destroy(&domain->mr_lock);

#if 0
	/* AM messages could arrive after MQ is finalized, causing segfault
	 * when trying to dereference the MQ pointer. There is no mechanism
	 * to properly shutdown AM. The workaround is to keep MQ valid.
	 */
	psm_mq_finalize(domain->psm_mq);
#endif

	/* workaround for:
	 * Assertion failure at psm_ep.c:1059: ep->mctxt_master == ep
	 */
	sleep(psmx_env.delay);

	if (psmx_env.timeout)
		err = psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_GRACEFUL,
				   (int64_t) psmx_env.timeout * 1000000000LL);
	else
		err = PSM_EP_CLOSE_TIMEOUT;

	if (err != PSM_OK)
		psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_FORCE, 0);

	domain->fabric->active_domain = NULL;

	free(domain);
	return 0;
}
Exemplo n.º 3
0
int pspsm_finalize_mq(void)
{
	psm_error_t ret;

	if (pspsm_mq){
		ret = psm_mq_finalize(pspsm_mq);
		if (ret != PSM_OK) goto err;
		pspsm_dprint(2, "pspsm_finalize_mq: OK");
	}
	return 0;

 err:
	pspsm_err(psm_error_get_string(ret));
	pspsm_dprint(1, "pspsm_finalize_mq: %s", pspsm_err_str);
	return -1;
}
Exemplo n.º 4
0
static int psmx_domain_init(struct psmx_fid_domain *domain,
			    struct psmx_src_name *src_addr)
{
	struct psmx_fid_fabric *fabric = domain->fabric;
	struct psm_ep_open_opts opts;
	int err;

	psm_ep_open_opts_get_defaults(&opts);

	FI_INFO(&psmx_prov, FI_LOG_CORE,
		"uuid: %s\n", psmx_uuid_to_string(fabric->uuid));

	if (src_addr) {
		opts.unit = src_addr->unit;
		opts.port = src_addr->port;
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"ep_open_opts: unit=%d port=%u\n", opts.unit, opts.port);
	}

	err = psm_ep_open(fabric->uuid, &opts,
			  &domain->psm_ep, &domain->psm_epid);
	if (err != PSM_OK) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"psm_ep_open returns %d, errno=%d\n", err, errno);
		err = psmx_errno(err);
		goto err_out;
	}

	FI_INFO(&psmx_prov, FI_LOG_CORE,
		"epid: 0x%016lx\n", domain->psm_epid);

	err = psm_mq_init(domain->psm_ep, PSM_MQ_ORDERMASK_ALL,
			  NULL, 0, &domain->psm_mq);
	if (err != PSM_OK) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"psm_mq_init returns %d, errno=%d\n", err, errno);
		err = psmx_errno(err);
		goto err_out_close_ep;
	}

	err = fastlock_init(&domain->mr_lock);
	if (err) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"fastlock_init(mr_lock) returns %d\n", err);
		goto err_out_finalize_mq;
	}

	domain->mr_map = rbtNew(&psmx_key_compare);
	if (!domain->mr_map) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"rbtNew failed\n");
		goto err_out_destroy_mr_lock;
	}

	domain->mr_reserved_key = 1;
	
	err = fastlock_init(&domain->poll_lock);
	if (err) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"fastlock_init(poll_lock) returns %d\n", err);
		goto err_out_delete_mr_map;
	}

	/* Set active domain before psmx_domain_enable_ep() installs the
	 * AM handlers to ensure that psmx_active_fabric->active_domain
	 * is always non-NULL inside the handlers. Notice that the vlaue
	 * active_domain becomes NULL again only when the domain is closed.
	 * At that time the AM handlers are gone with the PSM endpoint.
	 */
	fabric->active_domain = domain;

	if (psmx_domain_enable_ep(domain, NULL) < 0)
		goto err_out_reset_active_domain;

	if (domain->progress_thread_enabled)
		psmx_domain_start_progress(domain);

	return 0;

err_out_reset_active_domain:
	fabric->active_domain = NULL;
	fastlock_destroy(&domain->poll_lock);

err_out_delete_mr_map:
	rbtDelete(domain->mr_map);

err_out_destroy_mr_lock:
	fastlock_destroy(&domain->mr_lock);

err_out_finalize_mq:
	psm_mq_finalize(domain->psm_mq);

err_out_close_ep:
	if (psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_GRACEFUL,
			 (int64_t) psmx_env.timeout * 1000000000LL) != PSM_OK)
		psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_FORCE, 0);

err_out:
	return err;
}