示例#1
0
struct psmx2_trx_ctxt *psmx2_trx_ctxt_alloc(struct psmx2_fid_domain *domain,
					    struct psmx2_src_name *src_addr,
					    int sep_ctxt_idx)
{
	struct psmx2_trx_ctxt *trx_ctxt;
	struct psm2_ep_open_opts opts;
	int should_retry = 0;
	int err;

	trx_ctxt = calloc(1, sizeof(*trx_ctxt));
	if (!trx_ctxt) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"failed to allocate trx_ctxt.\n");
		return NULL;
	}

	psm2_ep_open_opts_get_defaults(&opts);
	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"uuid: %s\n", psmx2_uuid_to_string(domain->fabric->uuid));

	if (src_addr) {
		opts.unit = src_addr->unit;
		opts.port = src_addr->port;
		FI_INFO(&psmx2_prov, FI_LOG_CORE,
			"ep_open_opts: unit=%d port=%u\n", opts.unit, opts.port);
	}

	if (opts.unit < 0 && sep_ctxt_idx >= 0) {
		should_retry = 1;
		opts.unit = sep_ctxt_idx % psmx2_env.num_devunits;
		FI_INFO(&psmx2_prov, FI_LOG_CORE,
			"sep %d: ep_open_opts: unit=%d\n", sep_ctxt_idx, opts.unit);
	}

	err = psm2_ep_open(domain->fabric->uuid, &opts,
			   &trx_ctxt->psm2_ep, &trx_ctxt->psm2_epid);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_ep_open returns %d, errno=%d\n", err, errno);
		if (!should_retry) {
			err = psmx2_errno(err);
			goto err_out;
		}

		/* When round-robin fails, retry w/o explicit assignment */
		opts.unit = -1;
		err = psm2_ep_open(domain->fabric->uuid, &opts,
				   &trx_ctxt->psm2_ep, &trx_ctxt->psm2_epid);
		if (err != PSM2_OK) {
			FI_WARN(&psmx2_prov, FI_LOG_CORE,
				"psm2_ep_open returns %d, errno=%d\n", err, errno);
			err = psmx2_errno(err);
			goto err_out;
		}
	}

	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"epid: 0x%016lx\n", trx_ctxt->psm2_epid);

	err = psm2_mq_init(trx_ctxt->psm2_ep, PSM2_MQ_ORDERMASK_ALL,
			   NULL, 0, &trx_ctxt->psm2_mq);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_mq_init returns %d, errno=%d\n", err, errno);
		err = psmx2_errno(err);
		goto err_out_close_ep;
	}

	fastlock_init(&trx_ctxt->poll_lock);
	fastlock_init(&trx_ctxt->rma_queue.lock);
	fastlock_init(&trx_ctxt->trigger_queue.lock);
	slist_init(&trx_ctxt->rma_queue.list);
	slist_init(&trx_ctxt->trigger_queue.list);

	return trx_ctxt;

err_out_close_ep:
	if (psm2_ep_close(trx_ctxt->psm2_ep, PSM2_EP_CLOSE_GRACEFUL,
			  (int64_t) psmx2_env.timeout * 1000000000LL) != PSM2_OK)
		psm2_ep_close(trx_ctxt->psm2_ep, PSM2_EP_CLOSE_FORCE, 0);

err_out:
	free(trx_ctxt);
	return NULL;
}
示例#2
0
int psmx2_ep_open_internal(struct psmx2_fid_domain *domain_priv,
			   struct fi_info *info,
			   struct psmx2_fid_ep **ep_out, void *context,
			   struct psmx2_trx_ctxt *trx_ctxt)
{
	struct psmx2_fid_ep *ep_priv;
	uint64_t ep_cap;
	int err = -FI_EINVAL;

	if (info)
		ep_cap = info->caps;
	else
		ep_cap = FI_TAGGED;

	if (info && info->ep_attr && info->ep_attr->auth_key) {
		if (info->ep_attr->auth_key_size != sizeof(psm2_uuid_t)) {
			FI_WARN(&psmx2_prov, FI_LOG_EP_CTRL,
				"Invalid auth_key_len %"PRIu64
				", should be %"PRIu64".\n",
				info->ep_attr->auth_key_size,
				sizeof(psm2_uuid_t));
			goto errout;
		}
		if (memcmp(domain_priv->fabric->uuid, info->ep_attr->auth_key,
			   sizeof(psm2_uuid_t))) {
			FI_WARN(&psmx2_prov, FI_LOG_EP_CTRL,
				"Invalid auth_key: %s\n",
				psmx2_uuid_to_string((void *)info->ep_attr->auth_key));
			goto errout;
		}
	}

	ep_priv = (struct psmx2_fid_ep *) calloc(1, sizeof *ep_priv);
	if (!ep_priv) {
		err = -FI_ENOMEM;
		goto errout;
	}

	ep_priv->ep.fid.fclass = FI_CLASS_EP;
	ep_priv->ep.fid.context = context;
	ep_priv->ep.fid.ops = &psmx2_fi_ops;
	ep_priv->ep.ops = &psmx2_ep_ops;
	ep_priv->ep.cm = &psmx2_cm_ops;
	ep_priv->domain = domain_priv;
	ep_priv->rx = trx_ctxt;
	if (!(info && info->ep_attr && info->ep_attr->tx_ctx_cnt == FI_SHARED_CONTEXT))
		ep_priv->tx = trx_ctxt;
	ofi_atomic_initialize32(&ep_priv->ref, 0);

	PSMX2_CTXT_TYPE(&ep_priv->nocomp_send_context) = PSMX2_NOCOMP_SEND_CONTEXT;
	PSMX2_CTXT_EP(&ep_priv->nocomp_send_context) = ep_priv;
	PSMX2_CTXT_TYPE(&ep_priv->nocomp_tsend_context) = PSMX2_NOCOMP_TSEND_CONTEXT;
	PSMX2_CTXT_EP(&ep_priv->nocomp_tsend_context) = ep_priv;

	if (ep_cap & FI_TAGGED)
		ep_priv->ep.tagged = &psmx2_tagged_ops;
	if (ep_cap & FI_MSG)
		ep_priv->ep.msg = &psmx2_msg_ops;
	if (ep_cap & FI_RMA)
		ep_priv->ep.rma = &psmx2_rma_ops;
	if (ep_cap & FI_ATOMICS)
		ep_priv->ep.atomic = &psmx2_atomic_ops;

	ep_priv->caps = ep_cap;

	err = psmx2_domain_enable_ep(domain_priv, ep_priv);
	if (err)
		goto errout_free_ep;

	psmx2_domain_acquire(domain_priv);

	if (info) {
		if (info->tx_attr)
			ep_priv->tx_flags = info->tx_attr->op_flags;
		if (info->rx_attr)
			ep_priv->rx_flags = info->rx_attr->op_flags;
	}

	psmx2_ep_optimize_ops(ep_priv);

	PSMX2_EP_INIT_OP_CONTEXT(ep_priv);

	*ep_out = ep_priv;
	return 0;

errout_free_ep:
	free(ep_priv);

errout:
	return err;
}
示例#3
0
int psmx2_domain_open(struct fid_fabric *fabric, struct fi_info *info,
		      struct fid_domain **domain, void *context)
{
	struct psmx2_fid_fabric *fabric_priv;
	struct psmx2_fid_domain *domain_priv;
	struct psm2_ep_open_opts opts;
	int err;

	FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "\n");

	fabric_priv = container_of(fabric, struct psmx2_fid_fabric, fabric);
	psmx2_fabric_acquire(fabric_priv);

	if (fabric_priv->active_domain) {
		psmx2_domain_acquire(fabric_priv->active_domain);
		*domain = &fabric_priv->active_domain->domain;
		return 0;
	}

	if (!info->domain_attr->name ||
	    strcmp(info->domain_attr->name, PSMX2_DOMAIN_NAME)) {
		err = -FI_EINVAL;
		goto err_out;
	}

	domain_priv = (struct psmx2_fid_domain *) calloc(1, sizeof *domain_priv);
	if (!domain_priv) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	domain_priv->domain.fid.fclass = FI_CLASS_DOMAIN;
	domain_priv->domain.fid.context = context;
	domain_priv->domain.fid.ops = &psmx2_fi_ops;
	domain_priv->domain.ops = &psmx2_domain_ops;
	domain_priv->domain.mr = &psmx2_mr_ops;
	domain_priv->mr_mode = info->domain_attr->mr_mode;
	domain_priv->mode = info->mode;
	domain_priv->caps = info->caps;
	domain_priv->fabric = fabric_priv;
	domain_priv->progress_thread_enabled =
		(info->domain_attr->data_progress == FI_PROGRESS_AUTO);

	psm2_ep_open_opts_get_defaults(&opts);

	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"uuid: %s\n", psmx2_uuid_to_string(fabric_priv->uuid));

	err = psm2_ep_open(fabric_priv->uuid, &opts,
			   &domain_priv->psm2_ep, &domain_priv->psm2_epid);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_ep_open returns %d, errno=%d\n", err, errno);
		err = psmx2_errno(err);
		goto err_out_free_domain;
	}

	FI_INFO(&psmx2_prov, FI_LOG_CORE,
		"epid: 0x%016lx\n", domain_priv->psm2_epid);

	err = psm2_mq_init(domain_priv->psm2_ep, PSM2_MQ_ORDERMASK_ALL,
			   NULL, 0, &domain_priv->psm2_mq);
	if (err != PSM2_OK) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"psm2_mq_init returns %d, errno=%d\n", err, errno);
		err = psmx2_errno(err);
		goto err_out_close_ep;
	}

	err = fastlock_init(&domain_priv->mr_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(mr_lock) returns %d\n", err);
		goto err_out_finalize_mq;
	}

	domain_priv->mr_map = rbtNew(&psmx2_key_compare);
	if (!domain_priv->mr_map) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"rbtNew failed\n");
		goto err_out_destroy_mr_lock;
	}

	domain_priv->mr_reserved_key = 1;
	
	err = fastlock_init(&domain_priv->vl_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(vl_lock) returns %d\n", err);
		goto err_out_delete_mr_map;
	}
	memset(domain_priv->vl_map, 0, sizeof(domain_priv->vl_map));
	domain_priv->vl_alloc = 0;

	err = fastlock_init(&domain_priv->poll_lock);
	if (err) {
		FI_WARN(&psmx2_prov, FI_LOG_CORE,
			"fastlock_init(poll_lock) returns %d\n", err);
		goto err_out_destroy_vl_lock;
	}

	/* Set active domain before psmx2_domain_enable_ep() installs the
	 * AM handlers to ensure that psmx2_active_fabric->active_domain
	 * is always non-NULL inside the handlers. Notice that the vlaue
	 * active_domain becomes NULL again only when the domain is closed.
	 * At that time the AM handlers are gone with the PSM endpoint.
	 */
	fabric_priv->active_domain = domain_priv;

	if (psmx2_domain_enable_ep(domain_priv, NULL) < 0)
		goto err_out_reset_active_domain;

	if (domain_priv->progress_thread_enabled)
		psmx2_domain_start_progress(domain_priv);

	domain_priv->refcnt = 1;
	*domain = &domain_priv->domain;
	return 0;

err_out_reset_active_domain:
	fabric_priv->active_domain = NULL;
	fastlock_destroy(&domain_priv->poll_lock);

err_out_destroy_vl_lock:
	fastlock_destroy(&domain_priv->vl_lock);

err_out_delete_mr_map:
	rbtDelete(domain_priv->mr_map);

err_out_destroy_mr_lock:
	fastlock_destroy(&domain_priv->mr_lock);

err_out_finalize_mq:
	psm2_mq_finalize(domain_priv->psm2_mq);

err_out_close_ep:
	if (psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_GRACEFUL,
			  (int64_t) psmx2_env.timeout * 1000000000LL) != PSM2_OK)
		psm2_ep_close(domain_priv->psm2_ep, PSM2_EP_CLOSE_FORCE, 0);

err_out_free_domain:
	free(domain_priv);

err_out:
	psmx2_fabric_release(fabric_priv);
	return err;
}