int psmx2_fabric(struct fi_fabric_attr *attr,
		 struct fid_fabric **fabric, void *context)
{
	struct psmx2_fid_fabric *fabric_priv;
	int ret;

	FI_INFO(&psmx2_prov, FI_LOG_CORE, "\n");

	if (strcmp(attr->name, PSMX2_FABRIC_NAME))
		return -FI_ENODATA;

	if (psmx2_active_fabric) {
		psmx2_fabric_acquire(psmx2_active_fabric);
		*fabric = &psmx2_active_fabric->util_fabric.fabric_fid;
		return 0;
	}

	fabric_priv = calloc(1, sizeof(*fabric_priv));
	if (!fabric_priv)
		return -FI_ENOMEM;

	ret = ofi_fabric_init(&psmx2_prov, &psmx2_fabric_attr, attr,
			     &fabric_priv->util_fabric, context,
			     FI_MATCH_EXACT);
	if (ret) {
		FI_INFO(&psmx2_prov, FI_LOG_CORE, "ofi_fabric_init returns %d\n", ret);
		free(fabric_priv);
		return ret;
	}

	/* fclass & context initialized in ofi_fabric_init */
	fabric_priv->util_fabric.fabric_fid.fid.ops = &psmx2_fabric_fi_ops;
	fabric_priv->util_fabric.fabric_fid.ops = &psmx2_fabric_ops;

	psmx2_get_uuid(fabric_priv->uuid);

	if (psmx2_env.name_server) {
		ret = pthread_create(&fabric_priv->name_server_thread, NULL,
				     psmx2_name_server, (void *)fabric_priv);
		if (ret) {
			FI_INFO(&psmx2_prov, FI_LOG_CORE, "pthread_create returns %d\n", ret);
			/* use the main thread's ID as invalid value for the new thread */
			fabric_priv->name_server_thread = pthread_self();
		}
	}

	psmx2_query_mpi();

	/* take the reference to count for multiple fabric open calls */
	psmx2_fabric_acquire(fabric_priv);

	*fabric = &fabric_priv->util_fabric.fabric_fid;
	psmx2_active_fabric = fabric_priv;

	return 0;
}
int psmx2_domain_open(struct fid_fabric *fabric, struct fi_info *info,
		      struct fid_domain **domain, void *context)
{
	struct psmx2_fid_fabric *fabric_priv;
	struct psmx2_fid_domain *domain_priv;
	struct psmx2_ep_name *src_addr = info->src_addr;
	int mr_mode = (info->domain_attr->mr_mode & FI_MR_BASIC) ? FI_MR_BASIC : 0;
	int err;

	FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "\n");

	fabric_priv = container_of(fabric, struct psmx2_fid_fabric,
				   util_fabric.fabric_fid);

	if (!info->domain_attr->name ||
	    strcmp(info->domain_attr->name, PSMX2_DOMAIN_NAME)) {
		err = -FI_EINVAL;
		goto err_out;
	}

	domain_priv = (struct psmx2_fid_domain *) calloc(1, sizeof *domain_priv);
	if (!domain_priv) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	err = ofi_domain_init(fabric, info, &domain_priv->util_domain, context);
	if (err)
		goto err_out_free_domain;

	/* fclass & context are set in ofi_domain_init */
	domain_priv->util_domain.domain_fid.fid.ops = &psmx2_fi_ops;
	domain_priv->util_domain.domain_fid.ops = &psmx2_domain_ops;
	domain_priv->util_domain.domain_fid.mr = &psmx2_mr_ops;
	domain_priv->mr_mode = mr_mode;
	domain_priv->mode = info->mode;
	domain_priv->caps = PSMX2_CAPS | PSMX2_DOM_CAPS;
	domain_priv->fabric = fabric_priv;
	domain_priv->progress_thread_enabled =
		(info->domain_attr->data_progress == FI_PROGRESS_AUTO);
	domain_priv->addr_format = info->addr_format;

	if (info->addr_format == FI_ADDR_STR)
		src_addr = psmx2_string_to_ep_name(info->src_addr);

	err = psmx2_domain_init(domain_priv, src_addr);
	if (info->addr_format == FI_ADDR_STR)
		free(src_addr);
	if (err)
		goto err_out_close_domain;

	psmx2_fabric_acquire(fabric_priv);
	psmx2_lock(&fabric_priv->domain_lock, 1);
	dlist_insert_before(&domain_priv->entry, &fabric_priv->domain_list);
	psmx2_unlock(&fabric_priv->domain_lock, 1);

	*domain = &domain_priv->util_domain.domain_fid;
	return 0;

err_out_close_domain:
	ofi_domain_close(&domain_priv->util_domain);

err_out_free_domain:
	free(domain_priv);

err_out:
	return err;
}
Exemple #3
0
int psmx2_domain_open(struct fid_fabric *fabric, struct fi_info *info,
		      struct fid_domain **domain, void *context)
{
	struct psmx2_fid_fabric *fabric_priv;
	struct psmx2_fid_domain *domain_priv;
	struct psm2_ep_open_opts opts;
	int err;

	FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "\n");

	fabric_priv = container_of(fabric, struct psmx2_fid_fabric, fabric);
	psmx2_fabric_acquire(fabric_priv);

	if (fabric_priv->active_domain) {
		psmx2_domain_acquire(fabric_priv->active_domain);
		*domain = &fabric_priv->active_domain->domain;
		return 0;
	}

	if (!info->domain_attr->name ||
	    strcmp(info->domain_attr->name, PSMX2_DOMAIN_NAME)) {
		err = -FI_EINVAL;
		goto err_out;
	}

	domain_priv = (struct psmx2_fid_domain *) calloc(1, sizeof *domain_priv);
	if (!domain_priv) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	domain_priv->domain.fid.fclass = FI_CLASS_DOMAIN;
	domain_priv->domain.fid.context = context;
	domain_priv->domain.fid.ops = &psmx2_fi_ops;
	domain_priv->domain.ops = &psmx2_domain_ops;
	domain_priv->domain.mr = &psmx2_mr_ops;
	domain_priv->mr_mode = info->domain_attr->mr_mode;
	domain_priv->mode = info->mode;
	domain_priv->caps = info->caps;
	domain_priv->fabric = fabric_priv;
	domain_priv->progress_thread_enabled =
		(info->domain_attr->data_progress == FI_PROGRESS_AUTO);

	psm2_ep_open_opts_get_defaults(&opts);

	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"uuid: %s\n", psmx2_uuid_to_string(fabric_priv->uuid));

	err = psm2_ep_open(fabric_priv->uuid, &opts,
			   &domain_priv->psm2_ep, &domain_priv->psm2_epid);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_ep_open returns %d, errno=%d\n", err, errno);
		err = psmx2_errno(err);
		goto err_out_free_domain;
	}

	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"epid: 0x%016lx\n", domain_priv->psm2_epid);

	err = psm2_mq_init(domain_priv->psm2_ep, PSM2_MQ_ORDERMASK_ALL,
			   NULL, 0, &domain_priv->psm2_mq);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_mq_init returns %d, errno=%d\n", err, errno);
		err = psmx2_errno(err);
		goto err_out_close_ep;
	}

	err = fastlock_init(&domain_priv->mr_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(mr_lock) returns %d\n", err);
		goto err_out_finalize_mq;
	}

	domain_priv->mr_map = rbtNew(&psmx2_key_compare);
	if (!domain_priv->mr_map) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"rbtNew failed\n");
		goto err_out_destroy_mr_lock;
	}

	domain_priv->mr_reserved_key = 1;
	
	err = fastlock_init(&domain_priv->vl_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(vl_lock) returns %d\n", err);
		goto err_out_delete_mr_map;
	}
	memset(domain_priv->vl_map, 0, sizeof(domain_priv->vl_map));
	domain_priv->vl_alloc = 0;

	err = fastlock_init(&domain_priv->poll_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(poll_lock) returns %d\n", err);
		goto err_out_destroy_vl_lock;
	}

	/* Set active domain before psmx2_domain_enable_ep() installs the
	 * AM handlers to ensure that psmx2_active_fabric->active_domain
	 * is always non-NULL inside the handlers. Notice that the vlaue
	 * active_domain becomes NULL again only when the domain is closed.
	 * At that time the AM handlers are gone with the PSM endpoint.
	 */
	fabric_priv->active_domain = domain_priv;

	if (psmx2_domain_enable_ep(domain_priv, NULL) < 0)
		goto err_out_reset_active_domain;

	if (domain_priv->progress_thread_enabled)
		psmx2_domain_start_progress(domain_priv);

	domain_priv->refcnt = 1;
	*domain = &domain_priv->domain;
	return 0;

err_out_reset_active_domain:
	fabric_priv->active_domain = NULL;
	fastlock_destroy(&domain_priv->poll_lock);

err_out_destroy_vl_lock:
	fastlock_destroy(&domain_priv->vl_lock);

err_out_delete_mr_map:
	rbtDelete(domain_priv->mr_map);

err_out_destroy_mr_lock:
	fastlock_destroy(&domain_priv->mr_lock);

err_out_finalize_mq:
	psm2_mq_finalize(domain_priv->psm2_mq);

err_out_close_ep:
	if (psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_GRACEFUL,
			  (int64_t) psmx2_env.timeout * 1000000000LL) != PSM2_OK)
		psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_FORCE, 0);

err_out_free_domain:
	free(domain_priv);

err_out:
	psmx2_fabric_release(fabric_priv);
	return err;
}
int psmx2_fabric(struct fi_fabric_attr *attr,
		 struct fid_fabric **fabric, void *context)
{
	struct psmx2_fid_fabric *fabric_priv;
	int ret;

	FI_INFO(&psmx2_prov, FI_LOG_CORE, "\n");

	if (strcmp(attr->name, PSMX2_FABRIC_NAME))
		return -FI_ENODATA;

	if (psmx2_active_fabric) {
		psmx2_fabric_acquire(psmx2_active_fabric);
		*fabric = &psmx2_active_fabric->util_fabric.fabric_fid;
		return 0;
	}

	fabric_priv = calloc(1, sizeof(*fabric_priv));
	if (!fabric_priv)
		return -FI_ENOMEM;

	fastlock_init(&fabric_priv->domain_lock);
	dlist_init(&fabric_priv->domain_list);

	psmx2_get_uuid(fabric_priv->uuid);
	if (psmx2_env.name_server) {
		fabric_priv->name_server.port = psmx2_uuid_to_port(fabric_priv->uuid);
		fabric_priv->name_server.name_len = sizeof(struct psmx2_ep_name);
		fabric_priv->name_server.service_len = sizeof(int);
		fabric_priv->name_server.service_cmp = psmx2_ns_service_cmp;
		fabric_priv->name_server.is_service_wildcard = psmx2_ns_is_service_wildcard;

		ofi_ns_init(&fabric_priv->name_server);
		ofi_ns_start_server(&fabric_priv->name_server);
	}

	ret = ofi_fabric_init(&psmx2_prov, &psmx2_fabric_attr, attr,
			     &fabric_priv->util_fabric, context);
	if (ret) {
		FI_INFO(&psmx2_prov, FI_LOG_CORE, "ofi_fabric_init returns %d\n", ret);
		if (psmx2_env.name_server)
			ofi_ns_stop_server(&fabric_priv->name_server);
		free(fabric_priv);
		return ret;
	}

	/* fclass & context initialized in ofi_fabric_init */
	fabric_priv->util_fabric.fabric_fid.fid.ops = &psmx2_fabric_fi_ops;
	fabric_priv->util_fabric.fabric_fid.ops = &psmx2_fabric_ops;

	psmx2_atomic_global_init();
	psmx2_query_mpi();

	/* take the reference to count for multiple fabric open calls */
	psmx2_fabric_acquire(fabric_priv);

	*fabric = &fabric_priv->util_fabric.fabric_fid;
	psmx2_active_fabric = fabric_priv;

	return 0;
}