Example #1
0
int psmx2_fabric(struct fi_fabric_attr *attr,
		 struct fid_fabric **fabric, void *context)
{
	struct psmx2_fid_fabric *fabric_priv;
	int ret;

	FI_INFO(&psmx2_prov, FI_LOG_CORE, "\n");

	if (strcmp(attr->name, PSMX2_FABRIC_NAME))
		return -FI_ENODATA;

	if (psmx2_active_fabric) {
		psmx2_fabric_acquire(psmx2_active_fabric);
		*fabric = &psmx2_active_fabric->util_fabric.fabric_fid;
		return 0;
	}

	fabric_priv = calloc(1, sizeof(*fabric_priv));
	if (!fabric_priv)
		return -FI_ENOMEM;

	fastlock_init(&fabric_priv->domain_lock);
	dlist_init(&fabric_priv->domain_list);

	psmx2_get_uuid(fabric_priv->uuid);
	if (psmx2_env.name_server) {
		fabric_priv->name_server.port = psmx2_uuid_to_port(fabric_priv->uuid);
		fabric_priv->name_server.name_len = sizeof(struct psmx2_ep_name);
		fabric_priv->name_server.service_len = sizeof(int);
		fabric_priv->name_server.service_cmp = psmx2_ns_service_cmp;
		fabric_priv->name_server.is_service_wildcard = psmx2_ns_is_service_wildcard;

		ofi_ns_init(&fabric_priv->name_server);
		ofi_ns_start_server(&fabric_priv->name_server);
	}

	ret = ofi_fabric_init(&psmx2_prov, &psmx2_fabric_attr, attr,
			     &fabric_priv->util_fabric, context);
	if (ret) {
		FI_INFO(&psmx2_prov, FI_LOG_CORE, "ofi_fabric_init returns %d\n", ret);
		if (psmx2_env.name_server)
			ofi_ns_stop_server(&fabric_priv->name_server);
		free(fabric_priv);
		return ret;
	}

	/* fclass & context initialized in ofi_fabric_init */
	fabric_priv->util_fabric.fabric_fid.fid.ops = &psmx2_fabric_fi_ops;
	fabric_priv->util_fabric.fabric_fid.ops = &psmx2_fabric_ops;

	psmx2_atomic_global_init();
	psmx2_query_mpi();

	/* take the reference to count for multiple fabric open calls */
	psmx2_fabric_acquire(fabric_priv);

	*fabric = &fabric_priv->util_fabric.fabric_fid;
	psmx2_active_fabric = fabric_priv;

	return 0;
}
Example #2
0
static int psmx_getinfo(uint32_t version, const char *node, const char *service,
			uint64_t flags, const struct fi_info *hints,
			struct fi_info **info)
{
	struct fi_info *psmx_info;
	uint32_t cnt = 0;
	psm_epid_t *dest_addr = NULL;
	struct psmx_src_name *src_addr = NULL;
	int ep_type = FI_EP_RDM;
	int av_type = FI_AV_UNSPEC;
	uint64_t mode = FI_CONTEXT;
	enum fi_mr_mode mr_mode = FI_MR_SCALABLE;
	enum fi_threading threading = FI_THREAD_COMPLETION;
	enum fi_progress control_progress = FI_PROGRESS_MANUAL;
	enum fi_progress data_progress = FI_PROGRESS_MANUAL;
	int caps = 0;
	uint64_t max_tag_value = 0;
	int err = -FI_ENODATA;
	int svc0, svc = PSMX_ANY_SERVICE;

	FI_INFO(&psmx_prov, FI_LOG_CORE,"\n");

	*info = NULL;

	/* Perform some quick check first to avoid unnecessary operations */
	if (hints) {
		if (hints->fabric_attr && hints->fabric_attr->name &&
		    strcasecmp(hints->fabric_attr->name, PSMX_FABRIC_NAME)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->fabric_name=%s, supported=%s\n",
				hints->fabric_attr->name, PSMX_FABRIC_NAME);
			goto err_out;
		}

		if (hints->domain_attr && hints->domain_attr->name &&
		    strcasecmp(hints->domain_attr->name, PSMX_DOMAIN_NAME)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->domain_name=%s, supported=%s\n",
				hints->domain_attr->name, PSMX_DOMAIN_NAME);
			goto err_out;
		}

		if (hints->ep_attr) {
			switch (hints->ep_attr->type) {
			case FI_EP_UNSPEC:
			case FI_EP_DGRAM:
			case FI_EP_RDM:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->type=%d, supported=%d,%d,%d.\n",
					hints->ep_attr->type, FI_EP_UNSPEC,
					FI_EP_DGRAM, FI_EP_RDM);
				goto err_out;
			}
		}

		if ((hints->caps & PSMX_CAPS) != hints->caps &&
		    (hints->caps & PSMX_CAPS2) != hints->caps) {
			uint64_t psmx_caps = PSMX_CAPS;
			uint64_t psmx_caps2 = PSMX_CAPS2;
			PSMX_INFO_DIFF("hints->caps", hints->caps, psmx_caps,
				       FI_TYPE_CAPS);
			PSMX_INFO_DIFF("alternatively, hints->caps",
				       hints->caps, psmx_caps2, FI_TYPE_CAPS);
			goto err_out;
		}
	}

	if (FI_VERSION_GE(version, FI_VERSION(1,5)))
		mr_mode = 0;

	if (psmx_init_lib())
		return -FI_ENODATA;

	if (psmx_compat_lib) {
		/*
		 * native PSM running over TrueScale doesn't have the issue handled
		 * here. it's only present when PSM is supported via the psm2-compat
		 * library, where the PSM functions are just wrappers around the PSM2
		 * counterparts.
		 *
		 * psm2_ep_num_devunits() may wait for 15 seconds before return
		 * when /dev/hfi1_0 is not present. Check the existence of any hfi1
		 * device interface first to avoid this delay. Note that the devices
		 * don't necessarily appear consecutively so we need to check all
		 * possible device names before returning "no device found" error.
		 * This also means if "/dev/hfi1_0" doesn't exist but other devices
		 * exist, we are still going to see the delay; but that's a rare case.
		 */
		glob_t glob_buf;

		if ((glob("/dev/hfi1_[0-9]", 0, NULL, &glob_buf) != 0) &&
		    (glob("/dev/hfi1_[0-9][0-9]", GLOB_APPEND, NULL, &glob_buf) != 0)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"no hfi1 device is found.\n");
			return -FI_ENODATA;
		}
		globfree(&glob_buf);
	}

	if (psm_ep_num_devunits(&cnt) || !cnt) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"no PSM device is found.\n");
		return -FI_ENODATA;
	}

	src_addr = calloc(1, sizeof(*src_addr));
	if (!src_addr) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"failed to allocate src addr.\n");
		return -FI_ENODATA;
	}
	src_addr->signature = 0xFFFF;
	src_addr->unit = PSMX_DEFAULT_UNIT;
	src_addr->port = PSMX_DEFAULT_PORT;
	src_addr->service = PSMX_ANY_SERVICE;

	if (flags & FI_SOURCE) {
		if (node)
			sscanf(node, "%*[^:]:%" SCNi8 ":%" SCNu8, &src_addr->unit, &src_addr->port);
		if (service)
			sscanf(service, "%" SCNu32, &src_addr->service);
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"node '%s' service '%s' converted to <unit=%d, port=%d, service=%d>\n",
			node, service, src_addr->unit, src_addr->port, src_addr->service);
	} else if (node) {
		psm_uuid_t uuid;
		psmx_get_uuid(uuid);

		struct util_ns ns = {
			.port = psmx_uuid_to_port(uuid),
			.name_len = sizeof(*dest_addr),
			.service_len = sizeof(svc),
			.service_cmp = psmx_ns_service_cmp,
			.is_service_wildcard = psmx_ns_is_service_wildcard,
		};
		ofi_ns_init(&ns);

		if (service)
			svc = atoi(service);
		svc0 = svc;
		dest_addr = (psm_epid_t *)ofi_ns_resolve_name(&ns, node, &svc);
		if (dest_addr) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"'%s:%u' resolved to <epid=%"PRIu64">:%u\n",
				node, svc0, *dest_addr, svc);
		} else {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"failed to resolve '%s:%u'.\n", node, svc);
			err = -FI_ENODATA;
			goto err_out;
		}
	}

	if (hints) {
		switch (hints->addr_format) {
		case FI_FORMAT_UNSPEC:
		case FI_ADDR_PSMX:
			break;
		default:
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->addr_format=%d, supported=%d,%d.\n",
				hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX);
			goto err_out;
		}

		if (hints->ep_attr) {
			switch (hints->ep_attr->protocol) {
			case FI_PROTO_UNSPEC:
			case FI_PROTO_PSMX:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->protocol=%d, supported=%d %d\n",
					hints->ep_attr->protocol,
					FI_PROTO_UNSPEC, FI_PROTO_PSMX);
				goto err_out;
			}

			if (hints->ep_attr->tx_ctx_cnt > 1 &&
			    hints->ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->tx_ctx_cnt=%"PRIu64", supported=0,1\n",
					hints->ep_attr->tx_ctx_cnt);
				goto err_out;
			}

			if (hints->ep_attr->rx_ctx_cnt > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->rx_ctx_cnt=%"PRIu64", supported=0,1\n",
					hints->ep_attr->rx_ctx_cnt);
				goto err_out;
			}
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) !=
			    hints->tx_attr->op_flags) {
				uint64_t psmx_op_flags = PSMX_OP_FLAGS;
				PSMX_INFO_DIFF("hints->tx_attr->of_flags",
					       hints->tx_attr->op_flags,
					       psmx_op_flags, FI_TYPE_OP_FLAGS);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%"PRIu64","
					"supported=%d.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
		}

		if (hints->rx_attr &&
		    (hints->rx_attr->op_flags & PSMX_OP_FLAGS) !=
		     hints->rx_attr->op_flags) {
			uint64_t psmx_op_flags = PSMX_OP_FLAGS;
			PSMX_INFO_DIFF("hints->rx_attr->of_flags",
				       hints->rx_attr->op_flags,
				       psmx_op_flags, FI_TYPE_OP_FLAGS);
			goto err_out;
		}

		if ((hints->caps & FI_TAGGED) ||
		    ((hints->caps & FI_MSG) && !psmx_env.am_msg)) {
			if ((hints->mode & FI_CONTEXT) != FI_CONTEXT) {
				uint64_t psmx_mode = FI_CONTEXT;
				PSMX_INFO_DIFF("hints->mode", hints->mode,
					       psmx_mode, FI_TYPE_MODE);
				goto err_out;
			}
		} else {
			mode = 0;
		}

		if (hints->domain_attr) {
			switch (hints->domain_attr->av_type) {
			case FI_AV_UNSPEC:
			case FI_AV_MAP:
			case FI_AV_TABLE:
				av_type = hints->domain_attr->av_type;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->av_type=%d, supported=%d %d %d\n",
					hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP,
					FI_AV_TABLE);
				goto err_out;
			}

			if (hints->domain_attr->mr_mode == FI_MR_BASIC) {
				mr_mode = FI_MR_BASIC;
			} else if (hints->domain_attr->mr_mode == FI_MR_SCALABLE) {
				mr_mode = FI_MR_SCALABLE;
			} else if (hints->domain_attr->mr_mode & (FI_MR_BASIC | FI_MR_SCALABLE)) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->mr_mode has FI_MR_BASIC or FI_MR_SCALABLE "
					"combined with other bits\n");
				goto err_out;
			}

			switch (hints->domain_attr->threading) {
			case FI_THREAD_UNSPEC:
				break;
			case FI_THREAD_FID:
			case FI_THREAD_ENDPOINT:
			case FI_THREAD_COMPLETION:
			case FI_THREAD_DOMAIN:
				threading = hints->domain_attr->threading;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->threading=%d, supported=%d %d %d %d %d\n",
					hints->domain_attr->threading, FI_THREAD_UNSPEC,
					FI_THREAD_FID, FI_THREAD_ENDPOINT, FI_THREAD_COMPLETION,
					FI_THREAD_DOMAIN);
				goto err_out;
			}

			switch (hints->domain_attr->control_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				control_progress = hints->domain_attr->control_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->control_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->control_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			switch (hints->domain_attr->data_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				data_progress = hints->domain_attr->data_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->data_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->data_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			if (hints->domain_attr->caps & FI_SHARED_AV) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->caps=%lx, shared AV is unsupported\n",
					hints->domain_attr->caps);
				goto err_out;
			}
		}

		if (hints->ep_attr) {
			if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->max_msg_size=%"PRIu64","
					"supported=%llu.\n",
					hints->ep_attr->max_msg_size,
					PSMX_MAX_MSG_SIZE);
				goto err_out;
			}
			max_tag_value = ofi_max_tag(hints->ep_attr->mem_tag_format);
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->tx_attr->msg_order) {
				uint64_t psmx_msg_order = PSMX_MSG_ORDER;
				PSMX_INFO_DIFF("hints->tx_attr->msg_order",
					       hints->tx_attr->msg_order,
					       psmx_msg_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if ((hints->tx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->tx_attr->comp_order) {
				uint64_t psmx_comp_order = PSMX_COMP_ORDER;
				PSMX_INFO_DIFF("hints->tx_attr->comp_order",
					       hints->tx_attr->comp_order,
					       psmx_comp_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%ld,"
					"supported=%d.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
			if (hints->tx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->iov_limit);
				goto err_out;
			}
			if (hints->tx_attr->rma_iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->rma_iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->rma_iov_limit);
				goto err_out;
			}
		}

		if (hints->rx_attr) {
			if ((hints->rx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->rx_attr->msg_order) {
				uint64_t psmx_msg_order = PSMX_MSG_ORDER;
				PSMX_INFO_DIFF("hints->rx_attr->msg_order",
					       hints->rx_attr->msg_order,
					       psmx_msg_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if ((hints->rx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->rx_attr->comp_order) {
				uint64_t psmx_comp_order = PSMX_COMP_ORDER;
				PSMX_INFO_DIFF("hints->rx_attr->comp_order",
					       hints->rx_attr->comp_order,
					       psmx_comp_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if (hints->rx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->rx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->rx_attr->iov_limit);
				goto err_out;
			}
		}

		caps = hints->caps;

		/* TODO: check other fields of hints */
	}

	if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0)
		goto err_out;

	psmx_info = fi_allocinfo();
	if (!psmx_info) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	psmx_info->ep_attr->type = ep_type;
	psmx_info->ep_attr->protocol = FI_PROTO_PSMX;
	psmx_info->ep_attr->protocol_version = PSM_VERNO;
	psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE;
	psmx_info->ep_attr->max_order_raw_size = PSMX_RMA_ORDER_SIZE;
	psmx_info->ep_attr->max_order_war_size = PSMX_RMA_ORDER_SIZE;
	psmx_info->ep_attr->max_order_waw_size = PSMX_RMA_ORDER_SIZE;
	psmx_info->ep_attr->mem_tag_format = ofi_tag_format(max_tag_value);
	psmx_info->ep_attr->tx_ctx_cnt = 1;
	psmx_info->ep_attr->rx_ctx_cnt = 1;

	psmx_info->domain_attr->threading = threading;
	psmx_info->domain_attr->control_progress = control_progress;
	psmx_info->domain_attr->data_progress = data_progress;
	psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME);
	psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED;
	psmx_info->domain_attr->av_type = av_type;
	psmx_info->domain_attr->mr_mode = mr_mode;
	psmx_info->domain_attr->mr_key_size = sizeof(uint64_t);
	psmx_info->domain_attr->cq_data_size = 4;
	psmx_info->domain_attr->cq_cnt = 65535;
	psmx_info->domain_attr->ep_cnt = 65535;
	psmx_info->domain_attr->tx_ctx_cnt = 1;
	psmx_info->domain_attr->rx_ctx_cnt = 1;
	psmx_info->domain_attr->max_ep_tx_ctx = 1;
	psmx_info->domain_attr->max_ep_rx_ctx = 1;
	psmx_info->domain_attr->max_ep_stx_ctx = 65535;
	psmx_info->domain_attr->max_ep_srx_ctx = 0;
	psmx_info->domain_attr->cntr_cnt = 65535;
	psmx_info->domain_attr->mr_iov_limit = 65535;
	psmx_info->domain_attr->caps = PSMX_DOM_CAPS;
	psmx_info->domain_attr->mode = 0;
	psmx_info->domain_attr->mr_cnt = 65535;

	psmx_info->next = NULL;
	psmx_info->caps = (hints && hints->caps) ? hints->caps : caps;
	psmx_info->mode = mode;
	psmx_info->addr_format = FI_ADDR_PSMX;
	psmx_info->src_addr = src_addr;
	psmx_info->src_addrlen = sizeof(*src_addr);
	psmx_info->dest_addr = dest_addr;
	psmx_info->dest_addrlen = sizeof(*dest_addr);
	psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME);
	psmx_info->fabric_attr->prov_name = NULL;
	psmx_info->fabric_attr->prov_version = PSMX_VERSION;

	psmx_info->tx_attr->caps = psmx_info->caps;
	psmx_info->tx_attr->mode = psmx_info->mode;
	psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags)
					? hints->tx_attr->op_flags : 0;
	psmx_info->tx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->tx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE;
	psmx_info->tx_attr->size = UINT64_MAX;
	psmx_info->tx_attr->iov_limit = 1;
	psmx_info->tx_attr->rma_iov_limit = 1;

	psmx_info->rx_attr->caps = psmx_info->caps;
	psmx_info->rx_attr->mode = psmx_info->mode;
	psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->rx_attr->op_flags)
					? hints->rx_attr->op_flags : 0;
	psmx_info->rx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->rx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */
	psmx_info->rx_attr->size = UINT64_MAX;
	psmx_info->rx_attr->iov_limit = 1;

	*info = psmx_info;
	return 0;

err_out:
	free(dest_addr);
	free(src_addr);

	return err;
}

static void psmx_fini(void)
{
	FI_INFO(&psmx_prov, FI_LOG_CORE, "\n");

	if (! --psmx_init_count && psmx_lib_initialized) {
		/* This function is called from a library destructor, which is called
		 * automatically when exit() is called. The call to psm_finalize()
		 * might cause deadlock if the applicaiton is terminated with Ctrl-C
		 * -- the application could be inside a PSM call, holding a lock that
		 * psm_finalize() tries to acquire. This can be avoided by only
		 * calling psm_finalize() when PSM is guaranteed to be unused.
		 */
		if (psmx_active_fabric) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"psmx_active_fabric != NULL, skip psm_finalize\n");
		} else {
			psm_finalize();
			psmx_lib_initialized = 0;
		}
	}
}