Exemplo n.º 1
0
int
ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) {
    psm2_error_t err;

    opal_progress_unregister(ompi_mtl_psm2_progress);

    /* free resources */
    err = psm2_mq_finalize(ompi_mtl_psm2.mq);
    if (err) {
        opal_output(0, "Error in psm2_mq_finalize (error %s)\n",
		    psm2_error_get_string(err));
        return OMPI_ERROR;
    }

    err = psm2_ep_close(ompi_mtl_psm2.ep, PSM2_EP_CLOSE_GRACEFUL, 1*1e9);
    if (err) {
        opal_output(0, "Error in psm2_ep_close (error %s)\n",
		    psm2_error_get_string(err));
        return OMPI_ERROR;
    }

    err = psm2_finalize();
    if (err) {
        opal_output(0, "Error in psm2_finalize (error %s)\n",
		    psm2_error_get_string(err));
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;
}
Exemplo n.º 2
0
void psmx2_trx_ctxt_free(struct psmx2_trx_ctxt *trx_ctxt)
{
	int err;

	if (!trx_ctxt)
		return;

	if (trx_ctxt->am_initialized)
		psmx2_am_fini(trx_ctxt);

#if 0
	/* AM messages could arrive after MQ is finalized, causing segfault
	 * when trying to dereference the MQ pointer. There is no mechanism
	 * to properly shutdown AM. The workaround is to keep MQ valid.
	 */
	psm2_mq_finalize(trx_ctxt->psm2_mq);
#endif

	/* workaround for:
	 * Assertion failure at psm2_ep.c:1059: ep->mctxt_master == ep
	 */
	sleep(psmx2_env.delay);

	if (psmx2_env.timeout)
		err = psm2_ep_close(trx_ctxt->psm2_ep, PSM2_EP_CLOSE_GRACEFUL,
				    (int64_t) psmx2_env.timeout * 1000000000LL);
	else
		err = PSM2_EP_CLOSE_TIMEOUT;

	if (err != PSM2_OK)
		psm2_ep_close(trx_ctxt->psm2_ep, PSM2_EP_CLOSE_FORCE, 0);

	fastlock_destroy(&trx_ctxt->poll_lock);
	free(trx_ctxt);
}
Exemplo n.º 3
0
void psmx2_domain_release(struct psmx2_fid_domain *domain)
{
	int err;

	FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "refcnt=%d\n", domain->refcnt);

	if (--domain->refcnt > 0)
		return;

	if (domain->progress_thread_enabled)
		psmx2_domain_stop_progress(domain);

	psmx2_am_fini(domain);

	fastlock_destroy(&domain->poll_lock);
	fastlock_destroy(&domain->vl_lock);
	rbtDelete(domain->mr_map);
	fastlock_destroy(&domain->mr_lock);

#if 0
	/* AM messages could arrive after MQ is finalized, causing segfault
	 * when trying to dereference the MQ pointer. There is no mechanism
	 * to properly shutdown AM. The workaround is to keep MQ valid.
	 */
	psm2_mq_finalize(domain->psm2_mq);
#endif

	/* workaround for:
	 * Assertion failure at psm2_ep.c:1059: ep->mctxt_master == ep
	 */
	sleep(psmx2_env.delay);

	if (psmx2_env.timeout)
		err = psm2_ep_close(domain->psm2_ep, PSM2_EP_CLOSE_GRACEFUL,
				    (int64_t) psmx2_env.timeout * 1000000000LL);
	else
		err = PSM2_EP_CLOSE_TIMEOUT;

	if (err != PSM2_OK)
		psm2_ep_close(domain->psm2_ep, PSM2_EP_CLOSE_FORCE, 0);

	domain->fabric->active_domain = NULL;

	psmx2_fabric_release(domain->fabric);

	free(domain);
}
Exemplo n.º 4
0
int psmx2_domain_open(struct fid_fabric *fabric, struct fi_info *info,
		      struct fid_domain **domain, void *context)
{
	struct psmx2_fid_fabric *fabric_priv;
	struct psmx2_fid_domain *domain_priv;
	struct psm2_ep_open_opts opts;
	int err;

	FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "\n");

	fabric_priv = container_of(fabric, struct psmx2_fid_fabric, fabric);
	psmx2_fabric_acquire(fabric_priv);

	if (fabric_priv->active_domain) {
		psmx2_domain_acquire(fabric_priv->active_domain);
		*domain = &fabric_priv->active_domain->domain;
		return 0;
	}

	if (!info->domain_attr->name ||
	    strcmp(info->domain_attr->name, PSMX2_DOMAIN_NAME)) {
		err = -FI_EINVAL;
		goto err_out;
	}

	domain_priv = (struct psmx2_fid_domain *) calloc(1, sizeof *domain_priv);
	if (!domain_priv) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	domain_priv->domain.fid.fclass = FI_CLASS_DOMAIN;
	domain_priv->domain.fid.context = context;
	domain_priv->domain.fid.ops = &psmx2_fi_ops;
	domain_priv->domain.ops = &psmx2_domain_ops;
	domain_priv->domain.mr = &psmx2_mr_ops;
	domain_priv->mr_mode = info->domain_attr->mr_mode;
	domain_priv->mode = info->mode;
	domain_priv->caps = info->caps;
	domain_priv->fabric = fabric_priv;
	domain_priv->progress_thread_enabled =
		(info->domain_attr->data_progress == FI_PROGRESS_AUTO);

	psm2_ep_open_opts_get_defaults(&opts);

	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"uuid: %s\n", psmx2_uuid_to_string(fabric_priv->uuid));

	err = psm2_ep_open(fabric_priv->uuid, &opts,
			   &domain_priv->psm2_ep, &domain_priv->psm2_epid);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_ep_open returns %d, errno=%d\n", err, errno);
		err = psmx2_errno(err);
		goto err_out_free_domain;
	}

	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"epid: 0x%016lx\n", domain_priv->psm2_epid);

	err = psm2_mq_init(domain_priv->psm2_ep, PSM2_MQ_ORDERMASK_ALL,
			   NULL, 0, &domain_priv->psm2_mq);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_mq_init returns %d, errno=%d\n", err, errno);
		err = psmx2_errno(err);
		goto err_out_close_ep;
	}

	err = fastlock_init(&domain_priv->mr_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(mr_lock) returns %d\n", err);
		goto err_out_finalize_mq;
	}

	domain_priv->mr_map = rbtNew(&psmx2_key_compare);
	if (!domain_priv->mr_map) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"rbtNew failed\n");
		goto err_out_destroy_mr_lock;
	}

	domain_priv->mr_reserved_key = 1;
	
	err = fastlock_init(&domain_priv->vl_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(vl_lock) returns %d\n", err);
		goto err_out_delete_mr_map;
	}
	memset(domain_priv->vl_map, 0, sizeof(domain_priv->vl_map));
	domain_priv->vl_alloc = 0;

	err = fastlock_init(&domain_priv->poll_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(poll_lock) returns %d\n", err);
		goto err_out_destroy_vl_lock;
	}

	/* Set active domain before psmx2_domain_enable_ep() installs the
	 * AM handlers to ensure that psmx2_active_fabric->active_domain
	 * is always non-NULL inside the handlers. Notice that the vlaue
	 * active_domain becomes NULL again only when the domain is closed.
	 * At that time the AM handlers are gone with the PSM endpoint.
	 */
	fabric_priv->active_domain = domain_priv;

	if (psmx2_domain_enable_ep(domain_priv, NULL) < 0)
		goto err_out_reset_active_domain;

	if (domain_priv->progress_thread_enabled)
		psmx2_domain_start_progress(domain_priv);

	domain_priv->refcnt = 1;
	*domain = &domain_priv->domain;
	return 0;

err_out_reset_active_domain:
	fabric_priv->active_domain = NULL;
	fastlock_destroy(&domain_priv->poll_lock);

err_out_destroy_vl_lock:
	fastlock_destroy(&domain_priv->vl_lock);

err_out_delete_mr_map:
	rbtDelete(domain_priv->mr_map);

err_out_destroy_mr_lock:
	fastlock_destroy(&domain_priv->mr_lock);

err_out_finalize_mq:
	psm2_mq_finalize(domain_priv->psm2_mq);

err_out_close_ep:
	if (psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_GRACEFUL,
			  (int64_t) psmx2_env.timeout * 1000000000LL) != PSM2_OK)
		psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_FORCE, 0);

err_out_free_domain:
	free(domain_priv);

err_out:
	psmx2_fabric_release(fabric_priv);
	return err;
}