Пример #1
0
static int psmx2_domain_close(fid_t fid)
{
	struct psmx2_fid_domain *domain;

	domain = container_of(fid, struct psmx2_fid_domain,
			      util_domain.domain_fid.fid);

	FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "refcnt=%d\n",
		ofi_atomic_get32(&domain->util_domain.ref));

	if (ofi_domain_close(&domain->util_domain))
		return 0;

	if (domain->progress_thread_enabled)
		psmx2_domain_stop_progress(domain);

	fastlock_destroy(&domain->sep_lock);
	fastlock_destroy(&domain->mr_lock);
	rbtDelete(domain->mr_map);

	psmx2_lock(&domain->fabric->domain_lock, 1);
	dlist_remove(&domain->entry);
	psmx2_unlock(&domain->fabric->domain_lock, 1);
	psmx2_fabric_release(domain->fabric);

	free(domain);
	return 0;
}
Пример #2
0
static int psmx2_fabric_close(fid_t fid)
{
	struct psmx2_fid_fabric *fabric;

	fabric = container_of(fid, struct psmx2_fid_fabric,
			      util_fabric.fabric_fid.fid);

	psmx2_fabric_release(fabric);

	FI_INFO(&psmx2_prov, FI_LOG_CORE, "refcnt=%d\n",
		ofi_atomic_get32(&fabric->util_fabric.ref));

	if (ofi_fabric_close(&fabric->util_fabric))
		return 0;

	if (psmx2_env.name_server)
		ofi_ns_stop_server(&fabric->name_server);

	fastlock_destroy(&fabric->domain_lock);
	assert(fabric == psmx2_active_fabric);
	psmx2_active_fabric = NULL;
	free(fabric);

	psmx2_atomic_global_fini();
	return 0;
}
Пример #3
0
void psmx2_domain_release(struct psmx2_fid_domain *domain)
{
	int err;

	FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "refcnt=%d\n", domain->refcnt);

	if (--domain->refcnt > 0)
		return;

	if (domain->progress_thread_enabled)
		psmx2_domain_stop_progress(domain);

	psmx2_am_fini(domain);

	fastlock_destroy(&domain->poll_lock);
	fastlock_destroy(&domain->vl_lock);
	rbtDelete(domain->mr_map);
	fastlock_destroy(&domain->mr_lock);

#if 0
	/* AM messages could arrive after MQ is finalized, causing segfault
	 * when trying to dereference the MQ pointer. There is no mechanism
	 * to properly shutdown AM. The workaround is to keep MQ valid.
	 */
	psm2_mq_finalize(domain->psm2_mq);
#endif

	/* workaround for:
	 * Assertion failure at psm2_ep.c:1059: ep->mctxt_master == ep
	 */
	sleep(psmx2_env.delay);

	if (psmx2_env.timeout)
		err = psm2_ep_close(domain->psm2_ep, PSM2_EP_CLOSE_GRACEFUL,
				    (int64_t) psmx2_env.timeout * 1000000000LL);
	else
		err = PSM2_EP_CLOSE_TIMEOUT;

	if (err != PSM2_OK)
		psm2_ep_close(domain->psm2_ep, PSM2_EP_CLOSE_FORCE, 0);

	domain->fabric->active_domain = NULL;

	psmx2_fabric_release(domain->fabric);

	free(domain);
}
Пример #4
0
static int psmx2_fabric_close(fid_t fid)
{
	struct psmx2_fid_fabric *fabric;
	void *exit_code;
	int ret;

	fabric = container_of(fid, struct psmx2_fid_fabric,
			      util_fabric.fabric_fid.fid);

	FI_INFO(&psmx2_prov, FI_LOG_CORE, "refcnt=%d\n",
		atomic_get(&fabric->util_fabric.ref));

	psmx2_fabric_release(fabric);

	if (ofi_fabric_close(&fabric->util_fabric))
		return 0;

	if (psmx2_env.name_server &&
	    !pthread_equal(fabric->name_server_thread, pthread_self())) {
		ret = pthread_cancel(fabric->name_server_thread);
		if (ret) {
			FI_INFO(&psmx2_prov, FI_LOG_CORE,
				"pthread_cancel returns %d\n", ret);
		}
		ret = pthread_join(fabric->name_server_thread, &exit_code);
		if (ret) {
			FI_INFO(&psmx2_prov, FI_LOG_CORE,
				"pthread_join returns %d\n", ret);
		} else {
			FI_INFO(&psmx2_prov, FI_LOG_CORE,
				"name server thread exited with code %ld (%s)\n",
				(uintptr_t)exit_code,
				(exit_code == PTHREAD_CANCELED) ? "PTHREAD_CANCELED" : "?");
		}
	}
	if (fabric->active_domain) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE, "forced closing of active_domain\n");
		fi_close(&fabric->active_domain->util_domain.domain_fid.fid);
	}
	assert(fabric == psmx2_active_fabric);
	psmx2_active_fabric = NULL;
	free(fabric);

	return 0;
}
Пример #5
0
int psmx2_domain_open(struct fid_fabric *fabric, struct fi_info *info,
		      struct fid_domain **domain, void *context)
{
	struct psmx2_fid_fabric *fabric_priv;
	struct psmx2_fid_domain *domain_priv;
	struct psm2_ep_open_opts opts;
	int err;

	FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "\n");

	fabric_priv = container_of(fabric, struct psmx2_fid_fabric, fabric);
	psmx2_fabric_acquire(fabric_priv);

	if (fabric_priv->active_domain) {
		psmx2_domain_acquire(fabric_priv->active_domain);
		*domain = &fabric_priv->active_domain->domain;
		return 0;
	}

	if (!info->domain_attr->name ||
	    strcmp(info->domain_attr->name, PSMX2_DOMAIN_NAME)) {
		err = -FI_EINVAL;
		goto err_out;
	}

	domain_priv = (struct psmx2_fid_domain *) calloc(1, sizeof *domain_priv);
	if (!domain_priv) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	domain_priv->domain.fid.fclass = FI_CLASS_DOMAIN;
	domain_priv->domain.fid.context = context;
	domain_priv->domain.fid.ops = &psmx2_fi_ops;
	domain_priv->domain.ops = &psmx2_domain_ops;
	domain_priv->domain.mr = &psmx2_mr_ops;
	domain_priv->mr_mode = info->domain_attr->mr_mode;
	domain_priv->mode = info->mode;
	domain_priv->caps = info->caps;
	domain_priv->fabric = fabric_priv;
	domain_priv->progress_thread_enabled =
		(info->domain_attr->data_progress == FI_PROGRESS_AUTO);

	psm2_ep_open_opts_get_defaults(&opts);

	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"uuid: %s\n", psmx2_uuid_to_string(fabric_priv->uuid));

	err = psm2_ep_open(fabric_priv->uuid, &opts,
			   &domain_priv->psm2_ep, &domain_priv->psm2_epid);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_ep_open returns %d, errno=%d\n", err, errno);
		err = psmx2_errno(err);
		goto err_out_free_domain;
	}

	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"epid: 0x%016lx\n", domain_priv->psm2_epid);

	err = psm2_mq_init(domain_priv->psm2_ep, PSM2_MQ_ORDERMASK_ALL,
			   NULL, 0, &domain_priv->psm2_mq);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_mq_init returns %d, errno=%d\n", err, errno);
		err = psmx2_errno(err);
		goto err_out_close_ep;
	}

	err = fastlock_init(&domain_priv->mr_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(mr_lock) returns %d\n", err);
		goto err_out_finalize_mq;
	}

	domain_priv->mr_map = rbtNew(&psmx2_key_compare);
	if (!domain_priv->mr_map) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"rbtNew failed\n");
		goto err_out_destroy_mr_lock;
	}

	domain_priv->mr_reserved_key = 1;
	
	err = fastlock_init(&domain_priv->vl_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(vl_lock) returns %d\n", err);
		goto err_out_delete_mr_map;
	}
	memset(domain_priv->vl_map, 0, sizeof(domain_priv->vl_map));
	domain_priv->vl_alloc = 0;

	err = fastlock_init(&domain_priv->poll_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(poll_lock) returns %d\n", err);
		goto err_out_destroy_vl_lock;
	}

	/* Set active domain before psmx2_domain_enable_ep() installs the
	 * AM handlers to ensure that psmx2_active_fabric->active_domain
	 * is always non-NULL inside the handlers. Notice that the vlaue
	 * active_domain becomes NULL again only when the domain is closed.
	 * At that time the AM handlers are gone with the PSM endpoint.
	 */
	fabric_priv->active_domain = domain_priv;

	if (psmx2_domain_enable_ep(domain_priv, NULL) < 0)
		goto err_out_reset_active_domain;

	if (domain_priv->progress_thread_enabled)
		psmx2_domain_start_progress(domain_priv);

	domain_priv->refcnt = 1;
	*domain = &domain_priv->domain;
	return 0;

err_out_reset_active_domain:
	fabric_priv->active_domain = NULL;
	fastlock_destroy(&domain_priv->poll_lock);

err_out_destroy_vl_lock:
	fastlock_destroy(&domain_priv->vl_lock);

err_out_delete_mr_map:
	rbtDelete(domain_priv->mr_map);

err_out_destroy_mr_lock:
	fastlock_destroy(&domain_priv->mr_lock);

err_out_finalize_mq:
	psm2_mq_finalize(domain_priv->psm2_mq);

err_out_close_ep:
	if (psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_GRACEFUL,
			  (int64_t) psmx2_env.timeout * 1000000000LL) != PSM2_OK)
		psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_FORCE, 0);

err_out_free_domain:
	free(domain_priv);

err_out:
	psmx2_fabric_release(fabric_priv);
	return err;
}