예제 #1
0
int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
			struct fid_cntr **cntr, void *context)
{
	struct psmx_fid_domain *domain_priv;
	struct psmx_fid_cntr *cntr_priv;
	struct psmx_fid_wait *wait = NULL;
	struct fi_wait_attr wait_attr;
	int wait_is_local = 0;
	int events;
	uint64_t flags;
	int err;

	events = FI_CNTR_EVENTS_COMP;
	flags = 0;
	domain_priv = container_of(domain, struct psmx_fid_domain, domain);

	switch (attr->events) {
	case FI_CNTR_EVENTS_COMP:
		events = attr->events;
		break;

	default:
		FI_INFO(&psmx_prov, FI_LOG_CQ,
			"attr->events=%d, supported=%d\n",
			attr->events, FI_CNTR_EVENTS_COMP);
		return -FI_EINVAL;
	}

	switch (attr->wait_obj) {
	case FI_WAIT_NONE:
	case FI_WAIT_UNSPEC:
		break;

	case FI_WAIT_SET:
		if (!attr->wait_set) {
			FI_INFO(&psmx_prov, FI_LOG_CQ,
				"FI_WAIT_SET is specified but attr->wait_set is NULL\n");
			return -FI_EINVAL;
		}
		wait = (struct psmx_fid_wait *)attr->wait_set;
		break;

	case FI_WAIT_FD:
	case FI_WAIT_MUTEX_COND:
		wait_attr.wait_obj = attr->wait_obj;
		wait_attr.flags = 0;
		err = psmx_wait_open(&domain_priv->fabric->fabric,
				     &wait_attr, (struct fid_wait **)&wait);
		if (err)
			return err;
		wait_is_local = 1;
		break;

	default:
		FI_INFO(&psmx_prov, FI_LOG_CQ,
			"attr->wait_obj=%d, supported=%d...%d\n",
			attr->wait_obj, FI_WAIT_NONE, FI_WAIT_MUTEX_COND);
		return -FI_EINVAL;
	}

	cntr_priv = (struct psmx_fid_cntr *) calloc(1, sizeof *cntr_priv);
	if (!cntr_priv) {
		err = -FI_ENOMEM;
		goto fail;
	}

	cntr_priv->domain = domain_priv;
	cntr_priv->events = events;
	cntr_priv->wait = wait;
	cntr_priv->wait_is_local = wait_is_local;
	cntr_priv->flags = flags;
	cntr_priv->cntr.fid.fclass = FI_CLASS_CNTR;
	cntr_priv->cntr.fid.context = context;
	cntr_priv->cntr.fid.ops = &psmx_fi_ops;
	cntr_priv->cntr.ops = &psmx_cntr_ops;

	pthread_mutex_init(&cntr_priv->trigger_lock, NULL);

	*cntr = &cntr_priv->cntr;
	return 0;
fail:
	if (wait && wait_is_local)
		fi_close(&wait->wait.fid);
	return err;
}
예제 #2
0
int ofi_check_rx_attr(const struct fi_provider *prov,
		      const struct fi_info *prov_info,
		      const struct fi_rx_attr *user_attr, uint64_t info_mode)
{
	const struct fi_rx_attr *prov_attr = prov_info->rx_attr;
	int rm_enabled = (prov_info->domain_attr->resource_mgmt == FI_RM_ENABLED);

	if (user_attr->caps & ~(prov_attr->caps)) {
		FI_INFO(prov, FI_LOG_CORE, "caps not supported\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, caps, FI_TYPE_CAPS);
		return -FI_ENODATA;
	}

	info_mode = user_attr->mode ? user_attr->mode : info_mode;
	if ((info_mode & prov_attr->mode) != prov_attr->mode) {
		FI_INFO(prov, FI_LOG_CORE, "needed mode not set\n");
		FI_INFO_MODE(prov, prov_attr->mode, user_attr->mode);
		return -FI_ENODATA;
	}

	if (prov_attr->op_flags & ~(prov_attr->op_flags)) {
		FI_INFO(prov, FI_LOG_CORE, "op_flags not supported\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, op_flags,
			     FI_TYPE_OP_FLAGS);
		return -FI_ENODATA;
	}

	if (user_attr->msg_order & ~(prov_attr->msg_order)) {
		FI_INFO(prov, FI_LOG_CORE, "msg_order not supported\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, msg_order,
			     FI_TYPE_MSG_ORDER);
		return -FI_ENODATA;
	}

	if (user_attr->comp_order & ~(prov_attr->comp_order)) {
		FI_INFO(prov, FI_LOG_CORE, "comp_order not supported\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, comp_order,
			     FI_TYPE_MSG_ORDER);
		return -FI_ENODATA;
	}

	if (user_attr->total_buffered_recv > prov_attr->total_buffered_recv) {
		FI_INFO(prov, FI_LOG_CORE, "total_buffered_recv too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr,
				  total_buffered_recv);
		return -FI_ENODATA;
	}

	if (user_attr->size > prov_attr->size) {
		FI_INFO(prov, FI_LOG_CORE, "size is greater than supported\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, size);
		return -FI_ENODATA;
	}

	if (user_attr->iov_limit > prov_attr->iov_limit) {
		FI_INFO(prov, FI_LOG_CORE, "iov_limit too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, iov_limit);
		return -FI_ENODATA;
	}

	if (!rm_enabled &&
	    user_attr->total_buffered_recv > prov_attr->total_buffered_recv) {
		/* Just log a notification, but ignore the value */
		FI_INFO(prov, FI_LOG_CORE,
			"Total buffered recv size exceeds supported size\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr,
				  total_buffered_recv);
	}

	return 0;
}
예제 #3
0
파일: fabric.c 프로젝트: wesbland/libfabric
static int fi_register_provider(struct fi_provider *provider, void *dlhandle)
{
	struct fi_prov_context *ctx;
	struct fi_prov *prov;
	int ret;

	if (!provider) {
		ret = -FI_EINVAL;
		goto cleanup;
	}

	FI_INFO(&core_prov, FI_LOG_CORE,
	       "registering provider: %s (%d.%d)\n", provider->name,
	       FI_MAJOR(provider->version), FI_MINOR(provider->version));

	if (FI_MAJOR(provider->fi_version) != FI_MAJOR_VERSION ||
	    FI_MINOR(provider->fi_version) != FI_MINOR_VERSION) {
		FI_INFO(&core_prov, FI_LOG_CORE,
		       "provider has unsupported FI version (provider %d.%d != libfabric %d.%d); ignoring\n",
		       FI_MAJOR(provider->fi_version),
		       FI_MINOR(provider->fi_version), FI_MAJOR_VERSION,
		       FI_MINOR_VERSION);

		ret = -FI_ENOSYS;
		goto cleanup;
	}

	if (fi_apply_filter(&prov_filter, provider->name)) {
		FI_INFO(&core_prov, FI_LOG_CORE,
			"\"%s\" filtered by provider include/exclude list, skipping\n",
			provider->name);
		ret = -FI_ENODEV;
		goto cleanup;
	}

	if (fi_apply_filter(&prov_log_filter, provider->name)) {
		ctx = (struct fi_prov_context *) &provider->context;
		ctx->disable_logging = 1;
	}

	prov = fi_getprov(provider->name);
	if (prov) {
		/* If this provider is older than an already-loaded
		 * provider of the same name, then discard this one.
		 */
		if (FI_VERSION_GE(prov->provider->version, provider->version)) {
			FI_INFO(&core_prov, FI_LOG_CORE,
			       "a newer %s provider was already loaded; ignoring this one\n",
			       provider->name);
			ret = -FI_EALREADY;
			goto cleanup;
		}

		/* This provider is newer than an already-loaded
		 * provider of the same name, so discard the
		 * already-loaded one.
		 */
		FI_INFO(&core_prov, FI_LOG_CORE,
		       "an older %s provider was already loaded; keeping this one and ignoring the older one\n",
		       provider->name);
		cleanup_provider(prov->provider, prov->dlhandle);

		prov->dlhandle = dlhandle;
		prov->provider = provider;
		return 0;
	}

	prov = calloc(sizeof *prov, 1);
	if (!prov) {
		ret = -FI_ENOMEM;
		goto cleanup;
	}

	prov->dlhandle = dlhandle;
	prov->provider = provider;
	if (prov_tail)
		prov_tail->next = prov;
	else
		prov_head = prov;
	prov_tail = prov;
	return 0;

cleanup:
	cleanup_provider(provider, dlhandle);
	return ret;
}
예제 #4
0
static int fi_ibv_alloc_info(struct ibv_context *ctx, struct fi_info **info,
			     const struct verbs_ep_domain *ep_dom)
{
	struct fi_info *fi;
	union ibv_gid gid;
	size_t name_len;
	int ret;
	int param;

	if (!(fi = fi_allocinfo()))
		return -FI_ENOMEM;

	fi->caps		= ep_dom->caps;
	fi->handle		= NULL;
	if (ep_dom->type == FI_EP_RDM) {
		fi->mode	= VERBS_RDM_MODE;
		*(fi->tx_attr)	= verbs_rdm_tx_attr;
	} else {
		fi->mode	= VERBS_MODE;
		*(fi->tx_attr)	= verbs_tx_attr;
	}

	*(fi->rx_attr)		= (ep_dom->type == FI_EP_RDM)
				? verbs_rdm_rx_attr : verbs_rx_attr;
	*(fi->ep_attr)		= verbs_ep_attr;
	*(fi->domain_attr)	= verbs_domain_attr;
	*(fi->fabric_attr)	= verbs_fabric_attr;

	fi->ep_attr->type	= ep_dom->type;
	fi->tx_attr->caps	= ep_dom->caps;
	fi->rx_attr->caps	= ep_dom->caps;

	ret = fi_ibv_get_device_attrs(ctx, fi);
	if (ret)
		goto err;

	if (ep_dom->type == FI_EP_RDM) {
		fi->tx_attr->inject_size = FI_IBV_RDM_DFLT_BUFFERED_SSIZE;
		fi->tx_attr->iov_limit = 1;
		fi->tx_attr->rma_iov_limit = 1;
		if (!fi_param_get_int(&fi_ibv_prov, "rdm_buffer_size", &param)) {
			if (param > sizeof (struct fi_ibv_rdm_rndv_header)) {
				fi->tx_attr->inject_size = param;
			} else {
				FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
					"rdm_buffer_size too small, should be greater then %d\n",
					sizeof (struct fi_ibv_rdm_rndv_header));
				ret = -FI_EINVAL;
				goto err;
			}
		}
		fi->domain_attr->resource_mgmt = FI_RM_ENABLED;
	}

	switch (ctx->device->transport_type) {
	case IBV_TRANSPORT_IB:
		if(ibv_query_gid(ctx, 1, 0, &gid)) {
			VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_gid", errno);
			ret = -errno;
			goto err;
		}

		name_len =  strlen(VERBS_IB_PREFIX) + INET6_ADDRSTRLEN;

		if (!(fi->fabric_attr->name = calloc(1, name_len + 1))) {
			ret = -FI_ENOMEM;
			goto err;
		}

		snprintf(fi->fabric_attr->name, name_len, VERBS_IB_PREFIX "%lx",
			 gid.global.subnet_prefix);

		fi->ep_attr->protocol = (ep_dom == &verbs_msg_domain) ?
					FI_PROTO_RDMA_CM_IB_RC : FI_PROTO_IB_RDM;
		break;
	case IBV_TRANSPORT_IWARP:
		fi->fabric_attr->name = strdup(VERBS_IWARP_FABRIC);
		if (!fi->fabric_attr->name) {
			ret = -FI_ENOMEM;
			goto err;
		}

		if (ep_dom == &verbs_msg_domain) {
			fi->ep_attr->protocol = FI_PROTO_IWARP;
			fi->tx_attr->op_flags = VERBS_TX_OP_FLAGS_IWARP;
		} else {
			fi->ep_attr->protocol = FI_PROTO_IWARP_RDM;
			fi->tx_attr->op_flags = VERBS_TX_OP_FLAGS_IWARP_RDM;
		}
		break;
	default:
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unknown transport type\n");
		ret = -FI_ENODATA;
		goto err;
	}

	name_len = strlen(ctx->device->name) + strlen(ep_dom->suffix);
	fi->domain_attr->name = malloc(name_len + 1);
	if (!fi->domain_attr->name) {
		ret = -FI_ENOMEM;
		goto err;
	}

	snprintf(fi->domain_attr->name, name_len + 1, "%s%s",
		 ctx->device->name, ep_dom->suffix);
	fi->domain_attr->name[name_len] = '\0';

	*info = fi;
	return 0;
err:
	fi_freeinfo(fi);
	return ret;
}
예제 #5
0
int ofi_check_domain_attr(const struct fi_provider *prov, uint32_t api_version,
			  const struct fi_domain_attr *prov_attr,
			  const struct fi_info *user_info)
{
	const struct fi_domain_attr *user_attr = user_info->domain_attr;

	if (prov_attr->name && user_attr->name &&
	    strcasecmp(user_attr->name, prov_attr->name)) {
		FI_INFO(prov, FI_LOG_CORE, "Unknown domain name\n");
		FI_INFO_NAME(prov, prov_attr, user_attr);
		return -FI_ENODATA;
	}

	if (fi_thread_level(user_attr->threading) <
	    fi_thread_level(prov_attr->threading)) {
		FI_INFO(prov, FI_LOG_CORE, "Invalid threading model\n");
		return -FI_ENODATA;
	}

	if (fi_progress_level(user_attr->control_progress) <
	    fi_progress_level(prov_attr->control_progress)) {
		FI_INFO(prov, FI_LOG_CORE, "Invalid control progress model\n");
		return -FI_ENODATA;
	}

	if (fi_progress_level(user_attr->data_progress) <
	    fi_progress_level(prov_attr->data_progress)) {
		FI_INFO(prov, FI_LOG_CORE, "Invalid data progress model\n");
		return -FI_ENODATA;
	}

	if (fi_resource_mgmt_level(user_attr->resource_mgmt) <
	    fi_resource_mgmt_level(prov_attr->resource_mgmt)) {
		FI_INFO(prov, FI_LOG_CORE, "Invalid resource mgmt model\n");
		return -FI_ENODATA;
	}

	if ((prov_attr->av_type != FI_AV_UNSPEC) &&
	    (user_attr->av_type != FI_AV_UNSPEC) &&
	    (prov_attr->av_type != user_attr->av_type)) {
		FI_INFO(prov, FI_LOG_CORE, "Invalid AV type\n");
	   	return -FI_ENODATA;
	}

	if (user_attr->cq_data_size > prov_attr->cq_data_size) {
		FI_INFO(prov, FI_LOG_CORE, "CQ data size too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, cq_data_size);
		return -FI_ENODATA;
	}

	if (ofi_check_mr_mode(prov, api_version, prov_attr->mr_mode, user_info))
		return -FI_ENODATA;

	if (user_attr->max_ep_stx_ctx > prov_attr->max_ep_stx_ctx) {
		FI_INFO(prov, FI_LOG_CORE, "max_ep_stx_ctx greater than supported\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_ep_stx_ctx);
	}

	if (user_attr->max_ep_srx_ctx > prov_attr->max_ep_srx_ctx) {
		FI_INFO(prov, FI_LOG_CORE, "max_ep_srx_ctx greater than supported\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_ep_srx_ctx);
	}

	/* following checks only apply to api 1.5 and beyond */
	if (FI_VERSION_LT(api_version, FI_VERSION(1, 5)))
		return 0;

	if (user_attr->cntr_cnt > prov_attr->cntr_cnt) {
		FI_INFO(prov, FI_LOG_CORE, "Cntr count too large\n");
		return -FI_ENODATA;
	}

	if (user_attr->mr_iov_limit > prov_attr->mr_iov_limit) {
		FI_INFO(prov, FI_LOG_CORE, "MR iov limit too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, mr_iov_limit);
		return -FI_ENODATA;
	}

	if (user_attr->caps & ~(prov_attr->caps)) {
		FI_INFO(prov, FI_LOG_CORE, "Requested domain caps not supported\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, caps, FI_TYPE_CAPS);
		return -FI_ENODATA;
	}

	if ((user_attr->mode & prov_attr->mode) != prov_attr->mode) {
		FI_INFO(prov, FI_LOG_CORE, "Required domain mode missing\n");
		FI_INFO_MODE(prov, prov_attr->mode, user_attr->mode);
		return -FI_ENODATA;
	}

	if (user_attr->max_err_data > prov_attr->max_err_data) {
		FI_INFO(prov, FI_LOG_CORE, "Max err data too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_err_data);
		return -FI_ENODATA;
	}

	if (user_attr->mr_cnt > prov_attr->mr_cnt) {
		FI_INFO(prov, FI_LOG_CORE, "MR count too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, mr_cnt);
		return -FI_ENODATA;
	}

	return 0;
}
예제 #6
0
int psmx2_fabric(struct fi_fabric_attr *attr,
		 struct fid_fabric **fabric, void *context)
{
	struct psmx2_fid_fabric *fabric_priv;
	int ret;

	FI_INFO(&psmx2_prov, FI_LOG_CORE, "\n");

	if (strcmp(attr->name, PSMX2_FABRIC_NAME))
		return -FI_ENODATA;

	if (psmx2_active_fabric) {
		psmx2_fabric_acquire(psmx2_active_fabric);
		*fabric = &psmx2_active_fabric->util_fabric.fabric_fid;
		return 0;
	}

	fabric_priv = calloc(1, sizeof(*fabric_priv));
	if (!fabric_priv)
		return -FI_ENOMEM;

	fastlock_init(&fabric_priv->domain_lock);
	dlist_init(&fabric_priv->domain_list);

	psmx2_get_uuid(fabric_priv->uuid);
	if (psmx2_env.name_server) {
		fabric_priv->name_server.port = psmx2_uuid_to_port(fabric_priv->uuid);
		fabric_priv->name_server.name_len = sizeof(struct psmx2_ep_name);
		fabric_priv->name_server.service_len = sizeof(int);
		fabric_priv->name_server.service_cmp = psmx2_ns_service_cmp;
		fabric_priv->name_server.is_service_wildcard = psmx2_ns_is_service_wildcard;

		ofi_ns_init(&fabric_priv->name_server);
		ofi_ns_start_server(&fabric_priv->name_server);
	}

	ret = ofi_fabric_init(&psmx2_prov, &psmx2_fabric_attr, attr,
			     &fabric_priv->util_fabric, context);
	if (ret) {
		FI_INFO(&psmx2_prov, FI_LOG_CORE, "ofi_fabric_init returns %d\n", ret);
		if (psmx2_env.name_server)
			ofi_ns_stop_server(&fabric_priv->name_server);
		free(fabric_priv);
		return ret;
	}

	/* fclass & context initialized in ofi_fabric_init */
	fabric_priv->util_fabric.fabric_fid.fid.ops = &psmx2_fabric_fi_ops;
	fabric_priv->util_fabric.fabric_fid.ops = &psmx2_fabric_ops;

	psmx2_atomic_global_init();
	psmx2_query_mpi();

	/* take the reference to count for multiple fabric open calls */
	psmx2_fabric_acquire(fabric_priv);

	*fabric = &fabric_priv->util_fabric.fabric_fid;
	psmx2_active_fabric = fabric_priv;

	return 0;
}
예제 #7
0
int fi_ibv_check_domain_attr(const struct fi_domain_attr *attr,
			     const struct fi_info *info)
{
	if (attr->name && strcmp(attr->name, info->domain_attr->name)) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unknown domain name\n");
		return -FI_ENODATA;
	}

	switch (attr->threading) {
	case FI_THREAD_UNSPEC:
	case FI_THREAD_SAFE:
	case FI_THREAD_FID:
	case FI_THREAD_DOMAIN:
	case FI_THREAD_COMPLETION:
	case FI_THREAD_ENDPOINT:
		break;
	default:
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"Invalid threading model\n");
		return -FI_ENODATA;
	}

	switch (attr->control_progress) {
	case FI_PROGRESS_UNSPEC:
	case FI_PROGRESS_AUTO:
	case FI_PROGRESS_MANUAL:
		break;
	default:
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"Given control progress mode not supported\n");
		return -FI_ENODATA;
	}

	switch (attr->data_progress) {
	case FI_PROGRESS_UNSPEC:
	case FI_PROGRESS_AUTO:
	case FI_PROGRESS_MANUAL:
		break;
	default:
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"Given data progress mode not supported!\n");
		return -FI_ENODATA;
	}

	switch (attr->mr_mode) {
	case FI_MR_UNSPEC:
	case FI_MR_BASIC:
		break;
	default:
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"MR mode not supported\n");
		return -FI_ENODATA;
	}

	if (attr->mr_key_size > info->domain_attr->mr_key_size) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"MR key size too large\n");
		return -FI_ENODATA;
	}

	if (attr->cq_data_size > info->domain_attr->cq_data_size) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"CQ data size too large\n");
		return -FI_ENODATA;
	}

	if (attr->cq_cnt > info->domain_attr->cq_cnt) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"cq_cnt exceeds supported size\n");
		return -FI_ENODATA;
	}

	if (attr->ep_cnt > info->domain_attr->ep_cnt) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"ep_cnt exceeds supported size\n");
		return -FI_ENODATA;
	}

	if (attr->max_ep_tx_ctx > info->domain_attr->max_ep_tx_ctx) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"domain_attr: max_ep_tx_ctx exceeds supported size\n");
		return -FI_ENODATA;
	}

	if (attr->max_ep_rx_ctx > info->domain_attr->max_ep_rx_ctx) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"domain_attr: max_ep_rx_ctx exceeds supported size\n");
		return -FI_ENODATA;
	}

	return 0;
}
예제 #8
0
파일: psmx_av.c 프로젝트: Slbomber/ompi
static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count,
			  fi_addr_t *fi_addr, uint64_t flags, void *context)
{
	struct psmx_fid_av *av_priv;
	psm_error_t *errors;
	int error_count = 0;
	int *mask;
	int i, j;
	fi_addr_t *result = NULL;
	struct psmx_epaddr_context *epaddr_context;
	struct psmx_eq_event *event;

	av_priv = container_of(av, struct psmx_fid_av, av);

	if ((av_priv->flags & FI_EVENT) && !av_priv->eq)
		return -FI_ENOEQ;

	errors = (psm_error_t *) calloc(count, sizeof *errors);
	if (!errors)
		return -FI_ENOMEM;

	mask = (int *) calloc(count, sizeof *mask);
	if (!mask) {
		free(errors);
		return -FI_ENOMEM;
	}

	if (av_priv->type == FI_AV_TABLE) {
		if (psmx_av_check_table_size(av_priv, count)) {
			free(mask);
			free(errors);
			return -FI_ENOMEM;
		}

		for (i=0; i<count; i++)
			av_priv->psm_epids[av_priv->last + i] = ((psm_epid_t *)addr)[i];

		result = fi_addr;
		addr = (const void *)(av_priv->psm_epids + av_priv->last);
		fi_addr = (fi_addr_t *)(av_priv->psm_epaddrs + av_priv->last);
	}

	/* prevent connecting to the same ep twice, which is fatal in PSM */
	for (i=0; i<count; i++) {
		psm_epconn_t epconn;
		if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) {
			epaddr_context = psm_epaddr_getctxt(epconn.addr);
			if (epaddr_context && epaddr_context->epid  == ((psm_epid_t *) addr)[i])
				((psm_epaddr_t *) fi_addr)[i] = epconn.addr;
			else
				mask[i] = 1;
		}
		else {
			mask[i] = 1;
		}
	}

	psm_ep_connect(av_priv->domain->psm_ep, count, 
			(psm_epid_t *) addr, mask, errors,
			(psm_epaddr_t *) fi_addr, 30*1e9);

	for (i=0; i<count; i++){
		if (!mask[i])
			continue;

		if (errors[i] == PSM_OK || errors[i] == PSM_EPID_ALREADY_CONNECTED) {
			psmx_set_epaddr_context(av_priv->domain,
						((psm_epid_t *) addr)[i],
						((psm_epaddr_t *) fi_addr)[i]);
		}
		else {
			FI_INFO(&psmx_prov, FI_LOG_AV,
				"%d: psm_ep_connect returned %s. remote epid=%lx.\n",
				i, psm_error_get_string(errors[i]),
				((psm_epid_t *)addr)[i]);
			if (((psm_epid_t *)addr)[i] == 0)
				FI_INFO(&psmx_prov, FI_LOG_AV,
					"does the application depend on the provider"
					"to resolve IP address into endpoint id? if so"
					"check if the name server has started correctly"
					"at the other side.\n");
			fi_addr[i] = FI_ADDR_NOTAVAIL;
			error_count++;

			if (av_priv->flags & FI_EVENT) {
				event = psmx_eq_create_event(av_priv->eq,
							     FI_AV_COMPLETE,		/* event */
							     context,			/* context */
							     i,				/* data: failed index */
							     psmx_errno(errors[i]),	/* err */
							     errors[i],			/* prov_errno */
							     NULL,			/* err_data */
							     0);			/* err_data_size */
				if (!event)
					return -FI_ENOMEM;

				psmx_eq_enqueue_event(av_priv->eq, event);
			}
		}
	}

	free(mask);
	free(errors);

	if (av_priv->type == FI_AV_TABLE) {
		/* NOTE: unresolved addresses are left in the AV table */
		if (result) {
			for (i=0; i<count; i++) {
				j = av_priv->last + i;
				if ((fi_addr_t)av_priv->psm_epaddrs[j] == FI_ADDR_NOTAVAIL)
					result[i] = FI_ADDR_NOTAVAIL;
				else
					result[i] = j;
			}
		}
		av_priv->last += count;
	}

	if (!(av_priv->flags & FI_EVENT))
		return count - error_count;

	event = psmx_eq_create_event(av_priv->eq,
				     FI_AV_COMPLETE,		/* event */
				     context,			/* context */
				     count - error_count,	/* data: succ count */
				     0,				/* err */
				     0,				/* prov_errno */
				     NULL,			/* err_data */
				     0);			/* err_data_size */
	if (!event)
		return -FI_ENOMEM;

	psmx_eq_enqueue_event(av_priv->eq, event);
	return 0;
}
예제 #9
0
static int psmx2_av_connet_eps(struct psmx2_fid_av *av, size_t count,
			       psm2_epid_t *epids, int *mask,
			       psm2_error_t *errors,
			       psm2_epaddr_t *epaddrs,
			       void *context)
{
	int i;
	psm2_epconn_t epconn;
	struct psmx2_epaddr_context *epaddr_context;
	int error_count = 0;

	/* set up mask to prevent connecting to an already connected ep */
	for (i=0; i<count; i++) {
		if (psm2_ep_epid_lookup(epids[i], &epconn) == PSM2_OK) {
			epaddr_context = psm2_epaddr_getctxt(epconn.addr);
			if (epaddr_context && epaddr_context->epid == epids[i])
				epaddrs[i] = epconn.addr;
			else
				mask[i] = 1;
		} else {
			mask[i] = 1;
		}
	}

	psm2_ep_connect(av->domain->psm2_ep, count, epids, mask, errors,
			epaddrs, psmx2_conn_timeout(count));

	for (i=0; i<count; i++){
		if (!mask[i])
			continue;

		if (errors[i] == PSM2_OK ||
		    errors[i] == PSM2_EPID_ALREADY_CONNECTED) {
			psmx2_set_epaddr_context(av->domain, epids[i], epaddrs[i]);
		} else {
			/* If duplicated addrs are passed to psm2_ep_connect(),
			 * all but one will fail with error "Endpoint could not
			 * be reached". This should be treated the same as
			 * "Endpoint already connected".
			 */
			if (psm2_ep_epid_lookup(epids[i], &epconn) == PSM2_OK) {
				epaddr_context = psm2_epaddr_getctxt(epconn.addr);
				if (epaddr_context &&
				    epaddr_context->epid == epids[i]) {
					epaddrs[i] = epconn.addr;
					continue;
				}
			}

			FI_INFO(&psmx2_prov, FI_LOG_AV,
				"%d: psm2_ep_connect returned %s. remote epid=%lx.\n",
				i, psm2_error_get_string(errors[i]), epids[i]);
			if (epids[i] == 0)
				FI_INFO(&psmx2_prov, FI_LOG_AV,
					"does the application depend on the provider"
					"to resolve IP address into endpoint id? if so"
					"check if the name server has started correctly"
					"at the other side.\n");
			epaddrs[i] = (void *)FI_ADDR_NOTAVAIL;
			error_count++;

			if (av->flags & FI_EVENT)
				psmx2_av_post_completion(av, context, i, errors[i]);
		}
	}

	return error_count;
}
예제 #10
0
static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count,
			  fi_addr_t *fi_addr, uint64_t flags, void *context)
{
	struct psmx_fid_av *av_priv;
	psm_error_t *errors;
	int error_count = 0;
	int *mask;
	int i, j;
	fi_addr_t *result = NULL;
	struct psmx_epaddr_context *epaddr_context;

	if (count && !addr) {
		FI_INFO(&psmx_prov, FI_LOG_AV,
			"the input address array is NULL.\n");
		return -FI_EINVAL;
	}

	av_priv = container_of(av, struct psmx_fid_av, av);

	if ((av_priv->flags & FI_EVENT) && !av_priv->eq)
		return -FI_ENOEQ;

	errors = (psm_error_t *) calloc(count, sizeof *errors);
	if (!errors)
		return -FI_ENOMEM;

	mask = (int *) calloc(count, sizeof *mask);
	if (!mask) {
		free(errors);
		return -FI_ENOMEM;
	}

	if (av_priv->type == FI_AV_TABLE) {
		if (psmx_av_check_table_size(av_priv, count)) {
			free(mask);
			free(errors);
			return -FI_ENOMEM;
		}

		for (i=0; i<count; i++)
			av_priv->psm_epids[av_priv->last + i] = ((psm_epid_t *)addr)[i];

		result = fi_addr;
		addr = (const void *)(av_priv->psm_epids + av_priv->last);
		fi_addr = (fi_addr_t *)(av_priv->psm_epaddrs + av_priv->last);
	}

	/* prevent connecting to the same ep twice, which is fatal in PSM */
	for (i=0; i<count; i++) {
		psm_epconn_t epconn;
		if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) {
			epaddr_context = psm_epaddr_getctxt(epconn.addr);
			if (epaddr_context && epaddr_context->epid  == ((psm_epid_t *) addr)[i])
				((psm_epaddr_t *) fi_addr)[i] = epconn.addr;
			else
				mask[i] = 1;
		} else {
			mask[i] = 1;
		}
	}

	psm_ep_connect(av_priv->domain->psm_ep, count, 
			(psm_epid_t *) addr, mask, errors,
			(psm_epaddr_t *) fi_addr, 30*1e9);

	for (i=0; i<count; i++){
		if (!mask[i])
			continue;

		if (errors[i] == PSM_OK || errors[i] == PSM_EPID_ALREADY_CONNECTED) {
			psmx_set_epaddr_context(av_priv->domain,
						((psm_epid_t *) addr)[i],
						((psm_epaddr_t *) fi_addr)[i]);
		} else {
			psm_epconn_t epconn;

			/* If duplicated addresses are passed to psm_ep_connect(), all but one will fail
			 * with error "Endpoint could not be reached". They should be treated as already
			 * connected.
			 */
			if (psm_ep_epid_lookup(((psm_epid_t *) addr)[i], &epconn) == PSM_OK) {
				epaddr_context = psm_epaddr_getctxt(epconn.addr);
				if (epaddr_context && epaddr_context->epid  == ((psm_epid_t *) addr)[i]) {
					((psm_epaddr_t *) fi_addr)[i] = epconn.addr;
					continue;
				}
			}

			FI_INFO(&psmx_prov, FI_LOG_AV,
				"%d: psm_ep_connect returned %s. remote epid=%lx.\n",
				i, psm_error_get_string(errors[i]),
				((psm_epid_t *)addr)[i]);
			if (((psm_epid_t *)addr)[i] == 0)
				FI_INFO(&psmx_prov, FI_LOG_AV,
					"does the application depend on the provider"
					"to resolve IP address into endpoint id? if so"
					"check if the name server has started correctly"
					"at the other side.\n");
			fi_addr[i] = FI_ADDR_NOTAVAIL;
			error_count++;

			if (av_priv->flags & FI_EVENT)
				psmx_av_post_completion(av_priv, context, i, errors[i]);
		}
	}

	free(mask);
	free(errors);

	if (av_priv->type == FI_AV_TABLE) {
		/* NOTE: unresolved addresses are left in the AV table */
		if (result) {
			for (i=0; i<count; i++) {
				j = av_priv->last + i;
				if ((fi_addr_t)av_priv->psm_epaddrs[j] == FI_ADDR_NOTAVAIL)
					result[i] = FI_ADDR_NOTAVAIL;
				else
					result[i] = j;
			}
		}
		av_priv->last += count;
	}

	if (!(av_priv->flags & FI_EVENT))
		return count - error_count;

	psmx_av_post_completion(av_priv, context, count - error_count, 0);
	return 0;
}
예제 #11
0
static int mlx_getinfo (
			uint32_t version, const char *node,
			const char *service, uint64_t flags,
			const struct fi_info *hints, struct fi_info **info)
{
	int status = -ENODATA;
	char *configfile_name = NULL;
	int inject_thresh = -1;

	mlx_descriptor.config = NULL;

	status = fi_param_get( &mlx_prov,
				"mlx_tinject_limit",
				&inject_thresh);
	if (!status)
		inject_thresh = FI_MLX_DEFAULT_INJECT_SIZE;

	FI_INFO( &mlx_prov, FI_LOG_CORE,
		"used inlect size = %d \n", inject_thresh);

	status = fi_param_get( &mlx_prov, "mlx_config", &configfile_name);
	if (!status) {
		configfile_name = NULL;
	}

	/* NS is disabled by default */
	status = fi_param_get( &mlx_prov, "mlx_ns_enable",
			&mlx_descriptor.use_ns);
	if (!status) {
		mlx_descriptor.use_ns = 0;
	}
	status = fi_param_get( &mlx_prov, "mlx_ns_port",
			&mlx_descriptor.ns_port);
	if (!status) {
		mlx_descriptor.ns_port = FI_MLX_DEFAULT_NS_PORT;
	}



	status = ucp_config_read( NULL,
			status? NULL: configfile_name,
			&mlx_descriptor.config);
	if (status != UCS_OK) {
		FI_WARN( &mlx_prov, FI_LOG_CORE,
			"MLX error: invalid config file\n\t%d (%s)\n",
			status, ucs_status_string(status));
	}

	/*Setup some presets*/
	status = ucm_config_modify("MALLOC_HOOKS", "no");
	if (status != UCS_OK) {
		FI_WARN( &mlx_prov, FI_LOG_CORE,
			"MLX error: failed to switch off UCM memory hooks:\t%d (%s)\n",
			status, ucs_status_string(status));
	}

	FI_INFO( &mlx_prov, FI_LOG_CORE,
		"Loaded MLX version %s\n",
		ucp_get_version_string());

#if ENABLE_DEBUG
	if (mlx_descriptor.config &&
			fi_log_enabled( &mlx_prov, FI_LOG_INFO, FI_LOG_CORE)) {
		ucp_config_print( mlx_descriptor.config,
			stderr, "Used MLX configuration", (1<<4)-1);
	}
#endif

	*info = NULL;
	if (node || service) {
		FI_WARN(&mlx_prov, FI_LOG_CORE,
		"fi_getinfo with \"node != NULL \" or \"service != NULL \" is temporary not supported\n");
		node = service = NULL;
		flags = 0;
	}

	/* Only Pure MLX address and IPv4 are supported */
	if (hints->addr_format == FI_ADDR_MLX) {
		mlx_info.addr_format = FI_ADDR_MLX;
	}

	if (hints->addr_format <= FI_SOCKADDR_IN) {
		mlx_descriptor.use_ns = 1;
		mlx_info.addr_format = FI_SOCKADDR_IN;
	}

	status = util_getinfo( &mlx_util_prov, version,
				service, node, flags, hints, info);

	return status;
}
예제 #12
0
static int psmx_getinfo(uint32_t version, const char *node, const char *service,
			uint64_t flags, const struct fi_info *hints,
			struct fi_info **info)
{
	struct fi_info *psmx_info;
	uint32_t cnt = 0;
	psm_epid_t *dest_addr = NULL;
	struct psmx_src_name *src_addr = NULL;
	int ep_type = FI_EP_RDM;
	int av_type = FI_AV_UNSPEC;
	uint64_t mode = FI_CONTEXT;
	enum fi_mr_mode mr_mode = FI_MR_SCALABLE;
	enum fi_threading threading = FI_THREAD_COMPLETION;
	enum fi_progress control_progress = FI_PROGRESS_MANUAL;
	enum fi_progress data_progress = FI_PROGRESS_MANUAL;
	int caps = 0;
	uint64_t max_tag_value = 0;
	int err = -FI_ENODATA;
	int svc0, svc = PSMX_ANY_SERVICE;

	FI_INFO(&psmx_prov, FI_LOG_CORE,"\n");

	*info = NULL;

	/* Perform some quick check first to avoid unnecessary operations */
	if (hints) {
		if (hints->fabric_attr && hints->fabric_attr->name &&
		    strcasecmp(hints->fabric_attr->name, PSMX_FABRIC_NAME)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->fabric_name=%s, supported=%s\n",
				hints->fabric_attr->name, PSMX_FABRIC_NAME);
			goto err_out;
		}

		if (hints->domain_attr && hints->domain_attr->name &&
		    strcasecmp(hints->domain_attr->name, PSMX_DOMAIN_NAME)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->domain_name=%s, supported=%s\n",
				hints->domain_attr->name, PSMX_DOMAIN_NAME);
			goto err_out;
		}

		if (hints->ep_attr) {
			switch (hints->ep_attr->type) {
			case FI_EP_UNSPEC:
			case FI_EP_DGRAM:
			case FI_EP_RDM:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->type=%d, supported=%d,%d,%d.\n",
					hints->ep_attr->type, FI_EP_UNSPEC,
					FI_EP_DGRAM, FI_EP_RDM);
				goto err_out;
			}
		}

		if ((hints->caps & PSMX_CAPS) != hints->caps &&
		    (hints->caps & PSMX_CAPS2) != hints->caps) {
			uint64_t psmx_caps = PSMX_CAPS;
			uint64_t psmx_caps2 = PSMX_CAPS2;
			PSMX_INFO_DIFF("hints->caps", hints->caps, psmx_caps,
				       FI_TYPE_CAPS);
			PSMX_INFO_DIFF("alternatively, hints->caps",
				       hints->caps, psmx_caps2, FI_TYPE_CAPS);
			goto err_out;
		}
	}

	if (FI_VERSION_GE(version, FI_VERSION(1,5)))
		mr_mode = 0;

	if (psmx_init_lib())
		return -FI_ENODATA;

	if (psmx_compat_lib) {
		/*
		 * native PSM running over TrueScale doesn't have the issue handled
		 * here. it's only present when PSM is supported via the psm2-compat
		 * library, where the PSM functions are just wrappers around the PSM2
		 * counterparts.
		 *
		 * psm2_ep_num_devunits() may wait for 15 seconds before return
		 * when /dev/hfi1_0 is not present. Check the existence of any hfi1
		 * device interface first to avoid this delay. Note that the devices
		 * don't necessarily appear consecutively so we need to check all
		 * possible device names before returning "no device found" error.
		 * This also means if "/dev/hfi1_0" doesn't exist but other devices
		 * exist, we are still going to see the delay; but that's a rare case.
		 */
		glob_t glob_buf;

		if ((glob("/dev/hfi1_[0-9]", 0, NULL, &glob_buf) != 0) &&
		    (glob("/dev/hfi1_[0-9][0-9]", GLOB_APPEND, NULL, &glob_buf) != 0)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"no hfi1 device is found.\n");
			return -FI_ENODATA;
		}
		globfree(&glob_buf);
	}

	if (psm_ep_num_devunits(&cnt) || !cnt) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"no PSM device is found.\n");
		return -FI_ENODATA;
	}

	src_addr = calloc(1, sizeof(*src_addr));
	if (!src_addr) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"failed to allocate src addr.\n");
		return -FI_ENODATA;
	}
	src_addr->signature = 0xFFFF;
	src_addr->unit = PSMX_DEFAULT_UNIT;
	src_addr->port = PSMX_DEFAULT_PORT;
	src_addr->service = PSMX_ANY_SERVICE;

	if (flags & FI_SOURCE) {
		if (node)
			sscanf(node, "%*[^:]:%" SCNi8 ":%" SCNu8, &src_addr->unit, &src_addr->port);
		if (service)
			sscanf(service, "%" SCNu32, &src_addr->service);
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"node '%s' service '%s' converted to <unit=%d, port=%d, service=%d>\n",
			node, service, src_addr->unit, src_addr->port, src_addr->service);
	} else if (node) {
		psm_uuid_t uuid;
		psmx_get_uuid(uuid);

		struct util_ns ns = {
			.port = psmx_uuid_to_port(uuid),
			.name_len = sizeof(*dest_addr),
			.service_len = sizeof(svc),
			.service_cmp = psmx_ns_service_cmp,
			.is_service_wildcard = psmx_ns_is_service_wildcard,
		};
		ofi_ns_init(&ns);

		if (service)
			svc = atoi(service);
		svc0 = svc;
		dest_addr = (psm_epid_t *)ofi_ns_resolve_name(&ns, node, &svc);
		if (dest_addr) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"'%s:%u' resolved to <epid=%"PRIu64">:%u\n",
				node, svc0, *dest_addr, svc);
		} else {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"failed to resolve '%s:%u'.\n", node, svc);
			err = -FI_ENODATA;
			goto err_out;
		}
	}

	if (hints) {
		switch (hints->addr_format) {
		case FI_FORMAT_UNSPEC:
		case FI_ADDR_PSMX:
			break;
		default:
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->addr_format=%d, supported=%d,%d.\n",
				hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX);
			goto err_out;
		}

		if (hints->ep_attr) {
			switch (hints->ep_attr->protocol) {
			case FI_PROTO_UNSPEC:
			case FI_PROTO_PSMX:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->protocol=%d, supported=%d %d\n",
					hints->ep_attr->protocol,
					FI_PROTO_UNSPEC, FI_PROTO_PSMX);
				goto err_out;
			}

			if (hints->ep_attr->tx_ctx_cnt > 1 &&
			    hints->ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->tx_ctx_cnt=%"PRIu64", supported=0,1\n",
					hints->ep_attr->tx_ctx_cnt);
				goto err_out;
			}

			if (hints->ep_attr->rx_ctx_cnt > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->rx_ctx_cnt=%"PRIu64", supported=0,1\n",
					hints->ep_attr->rx_ctx_cnt);
				goto err_out;
			}
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) !=
			    hints->tx_attr->op_flags) {
				uint64_t psmx_op_flags = PSMX_OP_FLAGS;
				PSMX_INFO_DIFF("hints->tx_attr->of_flags",
					       hints->tx_attr->op_flags,
					       psmx_op_flags, FI_TYPE_OP_FLAGS);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%"PRIu64","
					"supported=%d.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
		}

		if (hints->rx_attr &&
		    (hints->rx_attr->op_flags & PSMX_OP_FLAGS) !=
		     hints->rx_attr->op_flags) {
			uint64_t psmx_op_flags = PSMX_OP_FLAGS;
			PSMX_INFO_DIFF("hints->rx_attr->of_flags",
				       hints->rx_attr->op_flags,
				       psmx_op_flags, FI_TYPE_OP_FLAGS);
			goto err_out;
		}

		if ((hints->caps & FI_TAGGED) ||
		    ((hints->caps & FI_MSG) && !psmx_env.am_msg)) {
			if ((hints->mode & FI_CONTEXT) != FI_CONTEXT) {
				uint64_t psmx_mode = FI_CONTEXT;
				PSMX_INFO_DIFF("hints->mode", hints->mode,
					       psmx_mode, FI_TYPE_MODE);
				goto err_out;
			}
		} else {
			mode = 0;
		}

		if (hints->domain_attr) {
			switch (hints->domain_attr->av_type) {
			case FI_AV_UNSPEC:
			case FI_AV_MAP:
			case FI_AV_TABLE:
				av_type = hints->domain_attr->av_type;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->av_type=%d, supported=%d %d %d\n",
					hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP,
					FI_AV_TABLE);
				goto err_out;
			}

			if (hints->domain_attr->mr_mode == FI_MR_BASIC) {
				mr_mode = FI_MR_BASIC;
			} else if (hints->domain_attr->mr_mode == FI_MR_SCALABLE) {
				mr_mode = FI_MR_SCALABLE;
			} else if (hints->domain_attr->mr_mode & (FI_MR_BASIC | FI_MR_SCALABLE)) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->mr_mode has FI_MR_BASIC or FI_MR_SCALABLE "
					"combined with other bits\n");
				goto err_out;
			}

			switch (hints->domain_attr->threading) {
			case FI_THREAD_UNSPEC:
				break;
			case FI_THREAD_FID:
			case FI_THREAD_ENDPOINT:
			case FI_THREAD_COMPLETION:
			case FI_THREAD_DOMAIN:
				threading = hints->domain_attr->threading;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->threading=%d, supported=%d %d %d %d %d\n",
					hints->domain_attr->threading, FI_THREAD_UNSPEC,
					FI_THREAD_FID, FI_THREAD_ENDPOINT, FI_THREAD_COMPLETION,
					FI_THREAD_DOMAIN);
				goto err_out;
			}

			switch (hints->domain_attr->control_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				control_progress = hints->domain_attr->control_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->control_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->control_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			switch (hints->domain_attr->data_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				data_progress = hints->domain_attr->data_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->data_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->data_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			if (hints->domain_attr->caps & FI_SHARED_AV) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->caps=%lx, shared AV is unsupported\n",
					hints->domain_attr->caps);
				goto err_out;
			}
		}

		if (hints->ep_attr) {
			if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->max_msg_size=%"PRIu64","
					"supported=%llu.\n",
					hints->ep_attr->max_msg_size,
					PSMX_MAX_MSG_SIZE);
				goto err_out;
			}
			max_tag_value = ofi_max_tag(hints->ep_attr->mem_tag_format);
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->tx_attr->msg_order) {
				uint64_t psmx_msg_order = PSMX_MSG_ORDER;
				PSMX_INFO_DIFF("hints->tx_attr->msg_order",
					       hints->tx_attr->msg_order,
					       psmx_msg_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if ((hints->tx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->tx_attr->comp_order) {
				uint64_t psmx_comp_order = PSMX_COMP_ORDER;
				PSMX_INFO_DIFF("hints->tx_attr->comp_order",
					       hints->tx_attr->comp_order,
					       psmx_comp_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%ld,"
					"supported=%d.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
			if (hints->tx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->iov_limit);
				goto err_out;
			}
			if (hints->tx_attr->rma_iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->rma_iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->rma_iov_limit);
				goto err_out;
			}
		}

		if (hints->rx_attr) {
			if ((hints->rx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->rx_attr->msg_order) {
				uint64_t psmx_msg_order = PSMX_MSG_ORDER;
				PSMX_INFO_DIFF("hints->rx_attr->msg_order",
					       hints->rx_attr->msg_order,
					       psmx_msg_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if ((hints->rx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->rx_attr->comp_order) {
				uint64_t psmx_comp_order = PSMX_COMP_ORDER;
				PSMX_INFO_DIFF("hints->rx_attr->comp_order",
					       hints->rx_attr->comp_order,
					       psmx_comp_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if (hints->rx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->rx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->rx_attr->iov_limit);
				goto err_out;
			}
		}

		caps = hints->caps;

		/* TODO: check other fields of hints */
	}

	if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0)
		goto err_out;

	psmx_info = fi_allocinfo();
	if (!psmx_info) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	psmx_info->ep_attr->type = ep_type;
	psmx_info->ep_attr->protocol = FI_PROTO_PSMX;
	psmx_info->ep_attr->protocol_version = PSM_VERNO;
	psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE;
	psmx_info->ep_attr->max_order_raw_size = PSMX_RMA_ORDER_SIZE;
	psmx_info->ep_attr->max_order_war_size = PSMX_RMA_ORDER_SIZE;
	psmx_info->ep_attr->max_order_waw_size = PSMX_RMA_ORDER_SIZE;
	psmx_info->ep_attr->mem_tag_format = ofi_tag_format(max_tag_value);
	psmx_info->ep_attr->tx_ctx_cnt = 1;
	psmx_info->ep_attr->rx_ctx_cnt = 1;

	psmx_info->domain_attr->threading = threading;
	psmx_info->domain_attr->control_progress = control_progress;
	psmx_info->domain_attr->data_progress = data_progress;
	psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME);
	psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED;
	psmx_info->domain_attr->av_type = av_type;
	psmx_info->domain_attr->mr_mode = mr_mode;
	psmx_info->domain_attr->mr_key_size = sizeof(uint64_t);
	psmx_info->domain_attr->cq_data_size = 4;
	psmx_info->domain_attr->cq_cnt = 65535;
	psmx_info->domain_attr->ep_cnt = 65535;
	psmx_info->domain_attr->tx_ctx_cnt = 1;
	psmx_info->domain_attr->rx_ctx_cnt = 1;
	psmx_info->domain_attr->max_ep_tx_ctx = 1;
	psmx_info->domain_attr->max_ep_rx_ctx = 1;
	psmx_info->domain_attr->max_ep_stx_ctx = 65535;
	psmx_info->domain_attr->max_ep_srx_ctx = 0;
	psmx_info->domain_attr->cntr_cnt = 65535;
	psmx_info->domain_attr->mr_iov_limit = 65535;
	psmx_info->domain_attr->caps = PSMX_DOM_CAPS;
	psmx_info->domain_attr->mode = 0;
	psmx_info->domain_attr->mr_cnt = 65535;

	psmx_info->next = NULL;
	psmx_info->caps = (hints && hints->caps) ? hints->caps : caps;
	psmx_info->mode = mode;
	psmx_info->addr_format = FI_ADDR_PSMX;
	psmx_info->src_addr = src_addr;
	psmx_info->src_addrlen = sizeof(*src_addr);
	psmx_info->dest_addr = dest_addr;
	psmx_info->dest_addrlen = sizeof(*dest_addr);
	psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME);
	psmx_info->fabric_attr->prov_name = NULL;
	psmx_info->fabric_attr->prov_version = PSMX_VERSION;

	psmx_info->tx_attr->caps = psmx_info->caps;
	psmx_info->tx_attr->mode = psmx_info->mode;
	psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags)
					? hints->tx_attr->op_flags : 0;
	psmx_info->tx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->tx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE;
	psmx_info->tx_attr->size = UINT64_MAX;
	psmx_info->tx_attr->iov_limit = 1;
	psmx_info->tx_attr->rma_iov_limit = 1;

	psmx_info->rx_attr->caps = psmx_info->caps;
	psmx_info->rx_attr->mode = psmx_info->mode;
	psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->rx_attr->op_flags)
					? hints->rx_attr->op_flags : 0;
	psmx_info->rx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->rx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */
	psmx_info->rx_attr->size = UINT64_MAX;
	psmx_info->rx_attr->iov_limit = 1;

	*info = psmx_info;
	return 0;

err_out:
	free(dest_addr);
	free(src_addr);

	return err;
}

static void psmx_fini(void)
{
	FI_INFO(&psmx_prov, FI_LOG_CORE, "\n");

	if (! --psmx_init_count && psmx_lib_initialized) {
		/* This function is called from a library destructor, which is called
		 * automatically when exit() is called. The call to psm_finalize()
		 * might cause deadlock if the applicaiton is terminated with Ctrl-C
		 * -- the application could be inside a PSM call, holding a lock that
		 * psm_finalize() tries to acquire. This can be avoided by only
		 * calling psm_finalize() when PSM is guaranteed to be unused.
		 */
		if (psmx_active_fabric) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"psmx_active_fabric != NULL, skip psm_finalize\n");
		} else {
			psm_finalize();
			psmx_lib_initialized = 0;
		}
	}
}
예제 #13
0
static int psmx_getinfo(uint32_t version, const char *node, const char *service,
			uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
	struct fi_info *psmx_info;
	uint32_t cnt = 0;
	psm_epid_t *dest_addr = NULL;
	struct psmx_src_name *src_addr;
	int ep_type = FI_EP_RDM;
	int av_type = FI_AV_UNSPEC;
	uint64_t mode = FI_CONTEXT;
	enum fi_mr_mode mr_mode = FI_MR_SCALABLE;
	enum fi_threading threading = FI_THREAD_COMPLETION;
	enum fi_progress control_progress = FI_PROGRESS_MANUAL;
	enum fi_progress data_progress = FI_PROGRESS_MANUAL;
	int caps = 0;
	uint64_t max_tag_value = 0;
	int err = -FI_ENODATA;
	int svc0, svc = PSMX_ANY_SERVICE;

	FI_INFO(&psmx_prov, FI_LOG_CORE,"\n");

	*info = NULL;

	if (psmx_init_lib())
		return -FI_ENODATA;

	if (psm_ep_num_devunits(&cnt) || !cnt) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"no PSM device is found.\n");
		return -FI_ENODATA;
	}

	psmx_init_env();

	src_addr = calloc(1, sizeof(*src_addr));
	if (!src_addr) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"failed to allocate src addr.\n");
		return -FI_ENODATA;
	}
	src_addr->unit = PSMX_DEFAULT_UNIT;
	src_addr->port = PSMX_DEFAULT_PORT;
	src_addr->service = PSMX_ANY_SERVICE;

	if (flags & FI_SOURCE) {
		if (node)
			sscanf(node, "%*[^:]:%d:%d", &src_addr->unit, &src_addr->port);
		if (service)
			sscanf(service, "%d", &src_addr->service);
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"node '%s' service '%s' converted to <unit=%d, port=%d, service=%d>\n",
			node, service, src_addr->unit, src_addr->port, src_addr->service);
	} else if (node) {
		if (service)
			svc = atoi(service);
		svc0 = svc;
		dest_addr = psmx_ns_resolve_name(node, &svc);
		if (dest_addr) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"'%s:%u' resolved to <epid=0x%llx>:%u\n", node, svc0,
				*dest_addr, svc);
		} else {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"failed to resolve '%s:%u'.\n", node, svc);
			err = -FI_ENODATA;
			goto err_out;
		}
	}

	if (hints) {
		switch (hints->addr_format) {
		case FI_FORMAT_UNSPEC:
		case FI_ADDR_PSMX:
			break;
		default:
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->addr_format=%d, supported=%d,%d.\n",
				hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX);
			goto err_out;
		}

		if (hints->ep_attr) {
			switch (hints->ep_attr->type) {
			case FI_EP_UNSPEC:
			case FI_EP_DGRAM:
			case FI_EP_RDM:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->type=%d, supported=%d,%d,%d.\n",
					hints->ep_attr->type, FI_EP_UNSPEC,
					FI_EP_DGRAM, FI_EP_RDM);
				goto err_out;
			}

			switch (hints->ep_attr->protocol) {
			case FI_PROTO_UNSPEC:
			case FI_PROTO_PSMX:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->protocol=%d, supported=%d %d\n",
					hints->ep_attr->protocol,
					FI_PROTO_UNSPEC, FI_PROTO_PSMX);
				goto err_out;
			}

			if (hints->ep_attr->tx_ctx_cnt > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->tx_ctx_cnt=%d, supported=0,1\n",
					hints->ep_attr->tx_ctx_cnt);
				goto err_out;
			}

			if (hints->ep_attr->rx_ctx_cnt > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->rx_ctx_cnt=%d, supported=0,1\n",
					hints->ep_attr->rx_ctx_cnt);
				goto err_out;
			}
		}

		if ((hints->caps & PSMX_CAPS) != hints->caps &&
		    (hints->caps & PSMX_CAPS2) != hints->caps) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->caps=0x%llx, supported=0x%llx,0x%llx\n",
				hints->caps, PSMX_CAPS, PSMX_CAPS2);
			goto err_out;
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) !=
			    hints->tx_attr->op_flags) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx->flags=0x%llx, "
					"supported=0x%llx\n",
					hints->tx_attr->op_flags,
					PSMX_OP_FLAGS);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%ld,"
					"supported=%ld.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
		}

		if (hints->rx_attr &&
		    (hints->rx_attr->op_flags & PSMX_OP_FLAGS) !=
		     hints->rx_attr->op_flags) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->rx->flags=0x%llx, supported=0x%llx\n",
				hints->rx_attr->op_flags, PSMX_OP_FLAGS);
			goto err_out;
		}

		if ((hints->caps & FI_TAGGED) ||
		    ((hints->caps & FI_MSG) && !psmx_env.am_msg)) {
			if ((hints->mode & FI_CONTEXT) != FI_CONTEXT) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->mode=0x%llx, required=0x%llx\n",
					hints->mode, FI_CONTEXT);
				goto err_out;
			}
		} else {
			mode = 0;
		}

		if (hints->fabric_attr && hints->fabric_attr->name &&
		    strcmp(hints->fabric_attr->name, PSMX_FABRIC_NAME)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->fabric_name=%s, supported=psm\n",
				hints->fabric_attr->name);
			goto err_out;
		}

		if (hints->domain_attr) {
			if (hints->domain_attr->name &&
			    strcmp(hints->domain_attr->name, PSMX_DOMAIN_NAME)) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_name=%s, supported=psm\n",
					hints->domain_attr->name);
				goto err_out;
			}

			switch (hints->domain_attr->av_type) {
			case FI_AV_UNSPEC:
			case FI_AV_MAP:
			case FI_AV_TABLE:
				av_type = hints->domain_attr->av_type;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->av_type=%d, supported=%d %d %d\n",
					hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP,
					FI_AV_TABLE);
				goto err_out;
			}

			switch (hints->domain_attr->mr_mode) {
			case FI_MR_UNSPEC:
				break;
			case FI_MR_BASIC:
			case FI_MR_SCALABLE:
				mr_mode = hints->domain_attr->mr_mode;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->mr_mode=%d, supported=%d %d %d\n",
					hints->domain_attr->mr_mode, FI_MR_UNSPEC, FI_MR_BASIC,
					FI_MR_SCALABLE);
				goto err_out;
			}

			switch (hints->domain_attr->threading) {
			case FI_THREAD_UNSPEC:
				break;
			case FI_THREAD_FID:
			case FI_THREAD_ENDPOINT:
			case FI_THREAD_COMPLETION:
			case FI_THREAD_DOMAIN:
				threading = hints->domain_attr->threading;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->threading=%d, supported=%d %d %d %d %d\n",
					hints->domain_attr->threading, FI_THREAD_UNSPEC,
					FI_THREAD_FID, FI_THREAD_ENDPOINT, FI_THREAD_COMPLETION,
					FI_THREAD_DOMAIN);
				goto err_out;
			}

			switch (hints->domain_attr->control_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				control_progress = hints->domain_attr->control_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->control_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->control_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			switch (hints->domain_attr->data_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				data_progress = hints->domain_attr->data_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->data_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->data_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			if (hints->domain_attr->caps & FI_SHARED_AV) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->caps=%lx, shared AV is unsupported\n",
					hints->domain_attr->caps);
				goto err_out;
			}
		}

		if (hints->ep_attr) {
			if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->max_msg_size=%ld,"
					"supported=%ld.\n",
					hints->ep_attr->max_msg_size,
					PSMX_MAX_MSG_SIZE);
				goto err_out;
			}
			max_tag_value = fi_tag_bits(hints->ep_attr->mem_tag_format);
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->tx_attr->msg_order) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->msg_order=%lx,"
					"supported=%lx.\n",
					hints->tx_attr->msg_order,
					PSMX_MSG_ORDER);
				goto err_out;
			}
			if ((hints->tx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->tx_attr->comp_order) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->msg_order=%lx,"
					"supported=%lx.\n",
					hints->tx_attr->comp_order,
					PSMX_COMP_ORDER);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%ld,"
					"supported=%d.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
			if (hints->tx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->iov_limit);
				goto err_out;
			}
			if (hints->tx_attr->rma_iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->rma_iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->rma_iov_limit);
				goto err_out;
			}
		}

		if (hints->rx_attr) {
			if ((hints->rx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->rx_attr->msg_order) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->rx_attr->msg_order=%lx,"
					"supported=%lx.\n",
					hints->rx_attr->msg_order,
					PSMX_MSG_ORDER);
				goto err_out;
			}
			if ((hints->rx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->rx_attr->comp_order) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->rx_attr->msg_order=%lx,"
					"supported=%lx.\n",
					hints->rx_attr->comp_order,
					PSMX_COMP_ORDER);
				goto err_out;
			}
			if (hints->rx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->rx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->rx_attr->iov_limit);
				goto err_out;
			}
		}

		caps = hints->caps;

		/* TODO: check other fields of hints */
	}

	if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0)
		goto err_out;

	psmx_info = fi_allocinfo();
	if (!psmx_info) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	psmx_info->ep_attr->type = ep_type;
	psmx_info->ep_attr->protocol = FI_PROTO_PSMX;
	psmx_info->ep_attr->protocol_version = PSM_VERNO;
	psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE;
	psmx_info->ep_attr->mem_tag_format = fi_tag_format(max_tag_value);
	psmx_info->ep_attr->tx_ctx_cnt = 1;
	psmx_info->ep_attr->rx_ctx_cnt = 1;

	psmx_info->domain_attr->threading = threading;
	psmx_info->domain_attr->control_progress = control_progress;
	psmx_info->domain_attr->data_progress = data_progress;
	psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME);
	psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED;
	psmx_info->domain_attr->av_type = av_type;
	psmx_info->domain_attr->mr_mode = mr_mode;
	psmx_info->domain_attr->mr_key_size = sizeof(uint64_t);
	psmx_info->domain_attr->cq_data_size = 4;
	psmx_info->domain_attr->cq_cnt = 65535;
	psmx_info->domain_attr->ep_cnt = 65535;
	psmx_info->domain_attr->tx_ctx_cnt = 1;
	psmx_info->domain_attr->rx_ctx_cnt = 1;
	psmx_info->domain_attr->max_ep_tx_ctx = 1;
	psmx_info->domain_attr->max_ep_rx_ctx = 1;
	psmx_info->domain_attr->max_ep_stx_ctx = 65535;
	psmx_info->domain_attr->max_ep_srx_ctx = 0;
	psmx_info->domain_attr->cntr_cnt = 65535;
	psmx_info->domain_attr->mr_iov_limit = 65535;
	psmx_info->domain_attr->caps = PSMX_DOM_CAPS;
	psmx_info->domain_attr->mode = 0;

	psmx_info->next = NULL;
	psmx_info->caps = (hints && hints->caps) ? hints->caps : caps;
	psmx_info->mode = mode;
	psmx_info->addr_format = FI_ADDR_PSMX;
	psmx_info->src_addr = src_addr;
	psmx_info->src_addrlen = sizeof(*src_addr);
	psmx_info->dest_addr = dest_addr;
	psmx_info->dest_addrlen = sizeof(*dest_addr);
	psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME);
	psmx_info->fabric_attr->prov_name = NULL;
	psmx_info->fabric_attr->prov_version = PSMX_VERSION;

	psmx_info->tx_attr->caps = psmx_info->caps;
	psmx_info->tx_attr->mode = psmx_info->mode;
	psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags)
					? hints->tx_attr->op_flags : 0;
	psmx_info->tx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->tx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE;
	psmx_info->tx_attr->size = UINT64_MAX;
	psmx_info->tx_attr->iov_limit = 1;
	psmx_info->tx_attr->rma_iov_limit = 1;

	psmx_info->rx_attr->caps = psmx_info->caps;
	psmx_info->rx_attr->mode = psmx_info->mode;
	psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->rx_attr->op_flags)
					? hints->rx_attr->op_flags : 0;
	psmx_info->rx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->rx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */
	psmx_info->rx_attr->size = UINT64_MAX;
	psmx_info->rx_attr->iov_limit = 1;

	*info = psmx_info;
	return 0;

err_out:
	free(dest_addr);
	free(src_addr);

	return err;
}
예제 #14
0
static int ofi_register_provider(struct fi_provider *provider, void *dlhandle)
{
	struct fi_prov_context *ctx;
	struct ofi_prov *prov = NULL;
	int ret;

	if (!provider || !provider->name) {
		FI_WARN(&core_prov, FI_LOG_CORE,
			"no provider structure or name\n");
		ret = -FI_EINVAL;
		goto cleanup;
	}

	FI_INFO(&core_prov, FI_LOG_CORE,
	       "registering provider: %s (%d.%d)\n", provider->name,
	       FI_MAJOR(provider->version), FI_MINOR(provider->version));

	if (!provider->getinfo || !provider->fabric) {
		FI_WARN(&core_prov, FI_LOG_CORE,
			"provider missing mandatory entry points\n");
		ret = -FI_EINVAL;
		goto cleanup;
	}

	/* The current core implementation is not backward compatible
	 * with providers that support a release earlier than v1.3.
	 * See commit 0f4b6651.
	 */
	if (provider->fi_version < FI_VERSION(1, 3)) {
		FI_INFO(&core_prov, FI_LOG_CORE,
			"provider has unsupported FI version "
			"(provider %d.%d != libfabric %d.%d); ignoring\n",
			FI_MAJOR(provider->fi_version),
			FI_MINOR(provider->fi_version), FI_MAJOR_VERSION,
			FI_MINOR_VERSION);

		ret = -FI_ENOSYS;
		goto cleanup;
	}

	ctx = (struct fi_prov_context *) &provider->context;
	ctx->is_util_prov = ofi_has_util_prefix(provider->name);

	if (ofi_getinfo_filter(provider)) {
		FI_INFO(&core_prov, FI_LOG_CORE,
			"\"%s\" filtered by provider include/exclude "
			"list, skipping\n", provider->name);
		ret = -FI_ENODEV;
		goto cleanup;
	}

	if (ofi_apply_filter(&prov_log_filter, provider->name))
		ctx->disable_logging = 1;

	prov = ofi_getprov(provider->name, strlen(provider->name));
	if (prov) {
		/* If this provider has not been init yet, then we add the
		 * provider and dlhandle to the struct and exit.
		 */
		if (prov->provider == NULL)
			goto update_prov_registry;

		/* If this provider is older than an already-loaded
		 * provider of the same name, then discard this one.
		 */
		if (FI_VERSION_GE(prov->provider->version, provider->version)) {
			FI_INFO(&core_prov, FI_LOG_CORE,
				"a newer %s provider was already loaded; "
				"ignoring this one\n", provider->name);
			ret = -FI_EALREADY;
			goto cleanup;
		}

		/* This provider is newer than an already-loaded
		 * provider of the same name, so discard the
		 * already-loaded one.
		 */
		FI_INFO(&core_prov, FI_LOG_CORE,
			"an older %s provider was already loaded; "
			"keeping this one and ignoring the older one\n",
			provider->name);
		cleanup_provider(prov->provider, prov->dlhandle);
	} else {
		prov = ofi_create_prov_entry(provider->name);
		if (!prov) {
			ret = -FI_EOTHER;
			goto cleanup;
		}
	}

update_prov_registry:
	prov->dlhandle = dlhandle;
	prov->provider = provider;
	return 0;

cleanup:
	cleanup_provider(provider, dlhandle);
	return ret;
}
예제 #15
0
static int psmx_domain_init(struct psmx_fid_domain *domain,
			    struct psmx_src_name *src_addr)
{
	struct psmx_fid_fabric *fabric = domain->fabric;
	struct psm_ep_open_opts opts;
	int err;

	psm_ep_open_opts_get_defaults(&opts);

	FI_INFO(&psmx_prov, FI_LOG_CORE,
		"uuid: %s\n", psmx_uuid_to_string(fabric->uuid));

	if (src_addr) {
		opts.unit = src_addr->unit;
		opts.port = src_addr->port;
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"ep_open_opts: unit=%d port=%u\n", opts.unit, opts.port);
	}

	err = psm_ep_open(fabric->uuid, &opts,
			  &domain->psm_ep, &domain->psm_epid);
	if (err != PSM_OK) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"psm_ep_open returns %d, errno=%d\n", err, errno);
		err = psmx_errno(err);
		goto err_out;
	}

	FI_INFO(&psmx_prov, FI_LOG_CORE,
		"epid: 0x%016lx\n", domain->psm_epid);

	err = psm_mq_init(domain->psm_ep, PSM_MQ_ORDERMASK_ALL,
			  NULL, 0, &domain->psm_mq);
	if (err != PSM_OK) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"psm_mq_init returns %d, errno=%d\n", err, errno);
		err = psmx_errno(err);
		goto err_out_close_ep;
	}

	err = fastlock_init(&domain->mr_lock);
	if (err) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"fastlock_init(mr_lock) returns %d\n", err);
		goto err_out_finalize_mq;
	}

	domain->mr_map = rbtNew(&psmx_key_compare);
	if (!domain->mr_map) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"rbtNew failed\n");
		goto err_out_destroy_mr_lock;
	}

	domain->mr_reserved_key = 1;
	
	err = fastlock_init(&domain->poll_lock);
	if (err) {
		FI_WARN(&psmx_prov, FI_LOG_CORE,
			"fastlock_init(poll_lock) returns %d\n", err);
		goto err_out_delete_mr_map;
	}

	/* Set active domain before psmx_domain_enable_ep() installs the
	 * AM handlers to ensure that psmx_active_fabric->active_domain
	 * is always non-NULL inside the handlers. Notice that the vlaue
	 * active_domain becomes NULL again only when the domain is closed.
	 * At that time the AM handlers are gone with the PSM endpoint.
	 */
	fabric->active_domain = domain;

	if (psmx_domain_enable_ep(domain, NULL) < 0)
		goto err_out_reset_active_domain;

	if (domain->progress_thread_enabled)
		psmx_domain_start_progress(domain);

	return 0;

err_out_reset_active_domain:
	fabric->active_domain = NULL;
	fastlock_destroy(&domain->poll_lock);

err_out_delete_mr_map:
	rbtDelete(domain->mr_map);

err_out_destroy_mr_lock:
	fastlock_destroy(&domain->mr_lock);

err_out_finalize_mq:
	psm_mq_finalize(domain->psm_mq);

err_out_close_ep:
	if (psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_GRACEFUL,
			 (int64_t) psmx_env.timeout * 1000000000LL) != PSM_OK)
		psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_FORCE, 0);

err_out:
	return err;
}
예제 #16
0
static int psmx2_av_insert(struct fid_av *av, const void *addr,
			   size_t count, fi_addr_t *fi_addr,
			   uint64_t flags, void *context)
{
	struct psmx2_fid_av *av_priv;
	psm2_epid_t *epids;
	uint8_t *vlanes;
	psm2_epaddr_t *epaddrs;
	psm2_error_t *errors;
	int *mask;
	const struct psmx2_ep_name *names = addr;
	int error_count;
	int i;

	if (count && !addr) {
		FI_INFO(&psmx2_prov, FI_LOG_AV,
			"the input address array is NULL.\n");
		return -FI_EINVAL;
	}

	av_priv = container_of(av, struct psmx2_fid_av, av);

	if ((av_priv->flags & FI_EVENT) && !av_priv->eq)
		return -FI_ENOEQ;

	if (psmx2_av_check_table_size(av_priv, count))
		return -FI_ENOMEM;

	epids = av_priv->epids + av_priv->last;
	epaddrs = av_priv->epaddrs + av_priv->last;
	vlanes = av_priv->vlanes + av_priv->last;

	for (i=0; i<count; i++) {
		epids[i] = names[i].epid;
		vlanes[i] = names[i].vlane;
	}

	errors = (psm2_error_t *) calloc(count, sizeof *errors);
	mask = (int *) calloc(count, sizeof *mask);
	if (!errors || !mask) {
		free(mask);
		free(errors);
		return -FI_ENOMEM;
	}

	error_count = psmx2_av_connet_eps(av_priv, count, epids, mask,
					  errors, epaddrs, context);

	free(mask);
	free(errors);

	if (fi_addr) {
		for (i=0; i<count; i++) {
			if (epaddrs[i] == (void *)FI_ADDR_NOTAVAIL)
				fi_addr[i] = FI_ADDR_NOTAVAIL;
			else if (av_priv->type == FI_AV_TABLE)
				fi_addr[i] = av_priv->last + i;
			else
				fi_addr[i] = PSMX2_EP_TO_ADDR(epaddrs[i], vlanes[i]);
		}
	}

	if (av_priv->type == FI_AV_TABLE)
		av_priv->last += count;

	if (!(av_priv->flags & FI_EVENT))
		return count - error_count;

	psmx2_av_post_completion(av_priv, context, count - error_count, 0);

	return 0;
}
예제 #17
0
int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info,
		     struct fid_domain **domain, void *context)
{
	struct psmx_fid_fabric *fabric_priv;
	struct psmx_fid_domain *domain_priv;
	int err;

	FI_INFO(&psmx_prov, FI_LOG_DOMAIN, "\n");

	fabric_priv = container_of(fabric, struct psmx_fid_fabric,
				   util_fabric.fabric_fid);

	if (fabric_priv->active_domain) {
		psmx_domain_acquire(fabric_priv->active_domain);
		*domain = &fabric_priv->active_domain->util_domain.domain_fid;
		return 0;
	}

	if (!info->domain_attr->name ||
	    strcmp(info->domain_attr->name, PSMX_DOMAIN_NAME)) {
		err = -FI_EINVAL;
		goto err_out;
	}

	domain_priv = (struct psmx_fid_domain *) calloc(1, sizeof *domain_priv);
	if (!domain_priv) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	err = ofi_domain_init(fabric, info, &domain_priv->util_domain, context);
	if (err)
		goto err_out_free_domain;

	/* fclass & context are set in ofi_domain_init */
	domain_priv->util_domain.domain_fid.fid.ops = &psmx_fi_ops;
	domain_priv->util_domain.domain_fid.ops = &psmx_domain_ops;
	domain_priv->util_domain.domain_fid.mr = &psmx_mr_ops;
	domain_priv->mr_mode = info->domain_attr->mr_mode;
	domain_priv->mode = info->mode;
	domain_priv->caps = info->caps;
	domain_priv->fabric = fabric_priv;
	domain_priv->progress_thread_enabled =
		(info->domain_attr->data_progress == FI_PROGRESS_AUTO && psmx_env.prog_thread);

	err = psmx_domain_init(domain_priv, info->src_addr);
	if (err)
		goto err_out_close_domain;

	/* tale the reference to count for multiple domain open calls */
	psmx_domain_acquire(fabric_priv->active_domain);

	*domain = &domain_priv->util_domain.domain_fid;
	return 0;

err_out_close_domain:
	ofi_domain_close(&domain_priv->util_domain);

err_out_free_domain:
	free(domain_priv);

err_out:
	return err;
}
예제 #18
0
int psmx2_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
		  struct fid_av **av, void *context)
{
	struct psmx2_fid_domain *domain_priv;
	struct psmx2_fid_av *av_priv;
	int type = FI_AV_MAP;
	size_t count = 64;
	uint64_t flags = 0;

	domain_priv = container_of(domain, struct psmx2_fid_domain,
				   util_domain.domain_fid);

	if (attr) {
		switch (attr->type) {
		case FI_AV_UNSPEC:
			break;

		case FI_AV_MAP:
		case FI_AV_TABLE:
			type = attr->type;
			break;
		default:
			FI_INFO(&psmx2_prov, FI_LOG_AV,
				"attr->type=%d, supported=%d %d\n",
				attr->type, FI_AV_MAP, FI_AV_TABLE);
			return -FI_EINVAL;
		}

		count = attr->count;
		flags = attr->flags;

		if (flags & (FI_READ | FI_SYMMETRIC)) {
			FI_INFO(&psmx2_prov, FI_LOG_AV,
				"attr->flags=%x, supported=%x\n",
				attr->flags, FI_EVENT);
			return -FI_ENOSYS;
		}

		if (attr->name) {
			FI_INFO(&psmx2_prov, FI_LOG_AV,
				"attr->name=%s, named AV is not supported\n",
				attr->name);
			return -FI_ENOSYS;
		}
	}

	av_priv = (struct psmx2_fid_av *) calloc(1, sizeof *av_priv);
	if (!av_priv)
		return -FI_ENOMEM;

	psmx2_domain_acquire(domain_priv);

	av_priv->domain = domain_priv;
	av_priv->type = type;
	av_priv->addrlen = sizeof(psm2_epaddr_t);
	av_priv->count = count;
	av_priv->flags = flags;

	av_priv->av.fid.fclass = FI_CLASS_AV;
	av_priv->av.fid.context = context;
	av_priv->av.fid.ops = &psmx2_fi_ops;
	av_priv->av.ops = &psmx2_av_ops;

	*av = &av_priv->av;
	if (attr)
		attr->type = type;

	return 0;
}
예제 #19
0
static int
fi_ibv_rdm_find_sysaddrs(struct fi_ibv_rdm_sysaddr *iface_addr,
			 struct fi_ibv_rdm_sysaddr *lo_addr)
{
	struct ifaddrs *ifaddr, *ifa;
	char iface[IFNAMSIZ];
	char *iface_tmp = "ib";
	size_t iface_len = 2;
	int ret;

	if (!iface_addr || !lo_addr) {
		return -FI_EINVAL;
	}

	iface_addr->is_found = 0;
	lo_addr->is_found = 0;

	if (fi_param_get_str(&fi_ibv_prov, "iface", &iface_tmp) == FI_SUCCESS) {
		iface_len = strlen(iface_tmp);
		if (iface_len > IFNAMSIZ) {
			VERBS_INFO(FI_LOG_EP_CTRL,
				   "Too long iface name: %s, max: %d\n",
				   iface_tmp, IFNAMSIZ);
			return -FI_EINVAL;
		}
	}
	strncpy(iface, iface_tmp, iface_len);

	ret = getifaddrs(&ifaddr);
	if (ret) {
		FI_WARN(&fi_ibv_prov, FI_LOG_FABRIC,
				"Unable to get interface addresses\n");
		return ret;
	}

	for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) {
		if (!iface_addr->is_found && (ifa->ifa_addr->sa_family == AF_INET) &&
		    !strncmp(ifa->ifa_name, iface, iface_len)) {
			memcpy(&iface_addr->addr, ifa->ifa_addr,
				sizeof(iface_addr->addr));
			iface_addr->is_found = 1;
			FI_INFO(&fi_ibv_prov, FI_LOG_FABRIC,
				"iface addr %s:%u\n",
				inet_ntoa(iface_addr->addr.sin_addr),
				ntohs(iface_addr->addr.sin_port));
		}
		if (!lo_addr->is_found && (ifa->ifa_addr->sa_family == AF_INET) &&
		    !strncmp(ifa->ifa_name, "lo", strlen(ifa->ifa_name))) {
			memcpy(&lo_addr->addr, ifa->ifa_addr, sizeof(lo_addr->addr));
			lo_addr->is_found = 1;
			FI_INFO(&fi_ibv_prov, FI_LOG_FABRIC, "lo addr %s:%u\n",
				inet_ntoa(lo_addr->addr.sin_addr),
				ntohs(lo_addr->addr.sin_port));
		}
		if (iface_addr->is_found && lo_addr->is_found) {
			break;
		}
	}

	freeifaddrs(ifaddr);

	return 0;
}
예제 #20
0
파일: psmx_util.c 프로젝트: xyuan/libfabric
void *psmx_resolve_name(const char *servername, int port)
{
	struct addrinfo hints = {
		.ai_family   = AF_UNSPEC,
		.ai_socktype = SOCK_STREAM
	};
	struct addrinfo *res, *p;
	psm_uuid_t uuid;
	char *service;
	void *dest_addr;
	int sockfd = -1;
	int n;

	if (!port) {
		psmx_get_uuid(uuid);
		port = psmx_uuid_to_port(uuid);
	}

	if (asprintf(&service, "%d", port) < 0)
		return NULL;

	n = getaddrinfo(servername, service, &hints, &res);
	if (n < 0) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"(%s:%d):%s\n", servername, port, gai_strerror(n));
		free(service);
		return NULL;
	}

	for (p = res; p; p = p->ai_next) {
		sockfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
		if (sockfd >= 0) {
			if (!connect(sockfd, p->ai_addr, p->ai_addrlen))
				break;
			close(sockfd);
			sockfd = -1;
		}
	}

	freeaddrinfo(res);
	free(service);

	if (sockfd < 0) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"couldn't connect to %s:%d\n", servername, port);
		return NULL;
	}

	dest_addr = calloc(1,sizeof(psm_epid_t));
	if (!dest_addr) {
		close(sockfd);
		return NULL;
	}

	if (read(sockfd, dest_addr, sizeof(psm_epid_t)) != sizeof(psm_epid_t)) {
		perror(__func__);
		free(dest_addr);
		close(sockfd);
		return NULL;
	}

	close(sockfd);

	return dest_addr;
}
예제 #21
0
int fi_ibv_check_ep_attr(const struct fi_ep_attr *attr,
			 const struct fi_info *info)
{
	if ((attr->type != FI_EP_UNSPEC) &&
	    (attr->type != info->ep_attr->type)) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"Unsupported endpoint type\n");
		return -FI_ENODATA;
	}

	switch (attr->protocol) {
	case FI_PROTO_UNSPEC:
	case FI_PROTO_RDMA_CM_IB_RC:
	case FI_PROTO_IWARP:
	case FI_PROTO_IB_UD:
	case FI_PROTO_IB_RDM:
	case FI_PROTO_IWARP_RDM:
		break;
	default:
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"Unsupported protocol\n");
		return -FI_ENODATA;
	}

	if (attr->protocol_version > 1) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"Unsupported protocol version\n");
		return -FI_ENODATA;
	}

	if (attr->max_msg_size > info->ep_attr->max_msg_size) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"Max message size too large\n");
		return -FI_ENODATA;
	}

	if (attr->max_order_raw_size > info->ep_attr->max_order_raw_size) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"max_order_raw_size exceeds supported size\n");
		return -FI_ENODATA;
	}

	if (attr->max_order_war_size) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"max_order_war_size exceeds supported size\n");
		return -FI_ENODATA;
	}

	if (attr->max_order_waw_size > info->ep_attr->max_order_waw_size) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"max_order_waw_size exceeds supported size\n");
		return -FI_ENODATA;
	}

	if (attr->tx_ctx_cnt > info->domain_attr->max_ep_tx_ctx) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"tx_ctx_cnt exceeds supported size\n");
		return -FI_ENODATA;
	}

	if ((attr->rx_ctx_cnt > info->domain_attr->max_ep_rx_ctx) &&
			(attr->rx_ctx_cnt != FI_SHARED_CONTEXT)) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
			"rx_ctx_cnt exceeds supported size\n");
		return -FI_ENODATA;
	}

	return 0;
}
예제 #22
0
파일: psmx_util.c 프로젝트: xyuan/libfabric
/*************************************************************
 * A simple name resolution mechanism for client-server style
 * applications. The server side has to run first. The client
 * side then passes the server name as the "node" parameter
 * of fi_getinfo call and the resulting provider info should
 * have the transport address of the server in the "dest_addr"
 * field. Both sides have to use the same UUID.
 *************************************************************/
void *psmx_name_server(void *args)
{
	struct psmx_fid_fabric *fabric;
	struct addrinfo hints = {
		.ai_flags = AI_PASSIVE,
		.ai_family = AF_UNSPEC,
		.ai_socktype = SOCK_STREAM
	};
	struct addrinfo *res, *p;
	char *service;
	int listenfd = -1, connfd;
	int port;
	int n;
	int ret;

	fabric = args;
	port = psmx_uuid_to_port(fabric->uuid);

	if (asprintf(&service, "%d", port) < 0)
		return NULL;

	n = getaddrinfo(NULL, service, &hints, &res);
	if (n < 0) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"port %d: %s\n", port, gai_strerror(n));
		free(service);
		return NULL;
	}

	for (p=res; p; p=p->ai_next) {
		listenfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
		if (listenfd >= 0) {
			n = 1;
			if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof(n)) == -1)
				FI_WARN(&psmx_prov, FI_LOG_CORE, "setsockopt: %s\n", strerror(errno));
			if (!bind(listenfd, p->ai_addr, p->ai_addrlen))
				break;
			close(listenfd);
			listenfd = -1;
		}
	}

	freeaddrinfo(res);
	free(service);

	if (listenfd < 0) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"couldn't listen to port %d. try set FI_PSM_UUID to a different value?\n", port);
		return NULL;
	}

	listen(listenfd, 256);

	while (1) {
		connfd = accept(listenfd, NULL, 0);
		if (connfd >= 0) {
			if (fabric->active_domain) {
				ret = write(connfd, &fabric->active_domain->psm_epid,
					    sizeof(psm_epid_t));
				if (ret != sizeof(psm_epid_t))
					FI_WARN(&psmx_prov, FI_LOG_CORE,
						"error sending address info to the client\n");
			}
			close(connfd);
		}
	}

	return NULL;
}
예제 #23
0
int fi_ibv_init_info(void)
{
	struct ibv_context **ctx_list;
	struct fi_info *fi = NULL, *tail = NULL;
	int ret = 0, i, num_devices, fork_unsafe = 0;

	if (verbs_info)
		return 0;

	pthread_mutex_lock(&verbs_info_lock);
	if (verbs_info)
		goto unlock;

	if (!fi_ibv_have_device()) {
		VERBS_INFO(FI_LOG_FABRIC, "No RDMA devices found\n");
		ret = -FI_ENODATA;
		goto unlock;
	}

	fi_param_get_bool(NULL, "fork_unsafe", &fork_unsafe);

	if (!fork_unsafe) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Enabling IB fork support\n");
		ret = ibv_fork_init();
		if (ret) {
			FI_WARN(&fi_ibv_prov, FI_LOG_CORE,
					"Enabling IB fork support failed: %s (%d)\n",
					strerror(ret), ret);
			goto unlock;
		}
	} else {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Not enabling IB fork support\n");
	}

	ctx_list = rdma_get_devices(&num_devices);
	if (!num_devices) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_get_devices", errno);
		ret = -errno;
		goto unlock;
	}

	for (i = 0; i < num_devices; i++) {
		ret = fi_ibv_alloc_info(ctx_list[i], &fi, &verbs_msg_domain);
		if (!ret) {
			if (!verbs_info)
				verbs_info = fi;
			else
				tail->next = fi;
			tail = fi;

			ret = fi_ibv_alloc_info(ctx_list[i], &fi,
						&verbs_rdm_domain);
			if (!ret) {
				tail->next = fi;
				tail = fi;
			}
		}
	}

	ret = verbs_info ? 0 : ret;

	rdma_free_devices(ctx_list);
unlock:
	pthread_mutex_unlock(&verbs_info_lock);
	return ret;
}
예제 #24
0
int psmx2_am_rma_handler(psm2_am_token_t token, psm2_amarg_t *args,
			 int nargs, void *src, uint32_t len)
{
	psm2_amarg_t rep_args[8];
	uint8_t *rma_addr;
	ssize_t rma_len;
	uint64_t key;
	int err = 0;
	int op_error = 0;
	int cmd, eom, has_data;
	struct psmx2_am_request *req;
	struct psmx2_cq_event *event;
	uint64_t offset;
	struct psmx2_fid_mr *mr;
	psm2_epaddr_t epaddr;
	uint8_t dst_vl, src_vl;
	struct psmx2_fid_domain *domain;
	struct psmx2_fid_ep *ep;

	psm2_am_get_source(token, &epaddr);

	cmd = PSMX2_AM_GET_OP(args[0].u32w0);
	dst_vl = PSMX2_AM_GET_DST(args[0].u32w0);

	domain = psmx2_active_fabric->active_domain;
	ep = domain->eps[dst_vl];

	eom = args[0].u32w0 & PSMX2_AM_EOM;
	has_data = args[0].u32w0 & PSMX2_AM_DATA;

	switch (cmd) {
	case PSMX2_AM_REQ_WRITE:
		rma_len = args[0].u32w1;
		rma_addr = (uint8_t *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		mr = psmx2_mr_get(domain, key);
		op_error = mr ?
			psmx2_mr_validate(mr, (uint64_t)rma_addr, len, FI_REMOTE_WRITE) :
			-FI_EINVAL;
		if (!op_error) {
			rma_addr += mr->offset;
			memcpy(rma_addr, src, len);
			if (eom) {
				if (ep->recv_cq && has_data) {
					/* TODO: report the addr/len of the whole write */
					event = psmx2_cq_create_event(
							ep->recv_cq,
							0, /* context */
							rma_addr,
							FI_REMOTE_WRITE | FI_RMA | FI_REMOTE_CQ_DATA,
							rma_len,
							args[4].u64,
							0, /* tag */
							0, /* olen */
							0);

					if (event)
						psmx2_cq_enqueue_event(ep->recv_cq, event);
					else
						err = -FI_ENOMEM;
				}

				if (ep->remote_write_cntr)
					psmx2_cntr_inc(ep->remote_write_cntr);

				if (mr->cntr && mr->cntr != ep->remote_write_cntr)
					psmx2_cntr_inc(mr->cntr);
			}
		}
		if (eom || op_error) {
			rep_args[0].u32w0 = PSMX2_AM_REP_WRITE | eom;
			rep_args[0].u32w1 = op_error;
			rep_args[1].u64 = args[1].u64;
			err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER,
						  rep_args, 2, NULL, 0, 0,
						  NULL, NULL );
		}
		break;

	case PSMX2_AM_REQ_WRITE_LONG:
		src_vl = PSMX2_AM_GET_SRC(args[0].u32w0);
		rma_len = args[0].u32w1;
		rma_addr = (uint8_t *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		mr = psmx2_mr_get(domain, key);
		op_error = mr ?
			psmx2_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_WRITE) :
			-FI_EINVAL;
		if (op_error) {
			rep_args[0].u32w0 = PSMX2_AM_REP_WRITE | eom;
			rep_args[0].u32w1 = op_error;
			rep_args[1].u64 = args[1].u64;
			err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER,
						  rep_args, 2, NULL, 0, 0,
						  NULL, NULL );
			break;
		}

		rma_addr += mr->offset;

		req = calloc(1, sizeof(*req));
		if (!req) {
			err = -FI_ENOMEM;
		} else {
			req->ep = ep;
			req->op = args[0].u32w0;
			req->write.addr = (uint64_t)rma_addr;
			req->write.len = rma_len;
			req->write.key = key;
			req->write.context = (void *)args[1].u64;
			req->write.peer_addr = (void *)epaddr;
			req->write.vl = dst_vl;
			req->write.peer_vl = src_vl;
			req->write.data = has_data ? args[4].u64 : 0;
			req->cq_flags = FI_REMOTE_WRITE | FI_RMA |
					(has_data ? FI_REMOTE_CQ_DATA : 0),
			PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_REMOTE_WRITE_CONTEXT;
			PSMX2_CTXT_USER(&req->fi_context) = mr;
			psmx2_am_enqueue_rma(mr->domain, req);
		}
		break;

	case PSMX2_AM_REQ_READ:
		rma_len = args[0].u32w1;
		rma_addr = (uint8_t *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		offset = args[4].u64;
		mr = psmx2_mr_get(domain, key);
		op_error = mr ?
			psmx2_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_READ) :
			-FI_EINVAL;
		if (!op_error) {
			rma_addr += mr->offset;
		} else {
			rma_addr = NULL;
			rma_len = 0;
		}

		rep_args[0].u32w0 = PSMX2_AM_REP_READ | eom;
		rep_args[0].u32w1 = op_error;
		rep_args[1].u64 = args[1].u64;
		rep_args[2].u64 = offset;
		err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER,
				rep_args, 3, rma_addr, rma_len, 0,
				NULL, NULL );

		if (eom && !op_error) {
			if (ep->remote_read_cntr)
				psmx2_cntr_inc(ep->remote_read_cntr);
		}
		break;

	case PSMX2_AM_REQ_READ_LONG:
		src_vl = PSMX2_AM_GET_SRC(args[0].u32w0);
		rma_len = args[0].u32w1;
		rma_addr = (uint8_t *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		mr = psmx2_mr_get(domain, key);
		op_error = mr ?
			psmx2_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_READ) :
			-FI_EINVAL;
		if (op_error) {
			rep_args[0].u32w0 = PSMX2_AM_REP_READ | eom;
			rep_args[0].u32w1 = op_error;
			rep_args[1].u64 = args[1].u64;
			rep_args[2].u64 = 0;
			err = psm2_am_reply_short(token, PSMX2_AM_RMA_HANDLER,
					rep_args, 3, NULL, 0, 0,
					NULL, NULL );
			break;
		}

		rma_addr += mr->offset;

		req = calloc(1, sizeof(*req));
		if (!req) {
			err = -FI_ENOMEM;
		} else {
			req->ep = ep;
			req->op = args[0].u32w0;
			req->read.addr = (uint64_t)rma_addr;
			req->read.len = rma_len;
			req->read.key = key;
			req->read.context = (void *)args[1].u64;
			req->read.peer_addr = (void *)epaddr;
			req->read.vl = dst_vl;
			req->read.peer_vl = src_vl;
			PSMX2_CTXT_TYPE(&req->fi_context) = PSMX2_REMOTE_READ_CONTEXT;
			PSMX2_CTXT_USER(&req->fi_context) = mr;
			psmx2_am_enqueue_rma(mr->domain, req);
		}
		break;

	case PSMX2_AM_REP_WRITE:
		req = (struct psmx2_am_request *)(uintptr_t)args[1].u64;
		assert(req->op == PSMX2_AM_REQ_WRITE);
		op_error = (int)args[0].u32w1;
		if (!req->error)
			req->error = op_error;
		if (eom) {
			if (req->ep->send_cq && !req->no_event) {
				event = psmx2_cq_create_event(
						req->ep->send_cq,
						req->write.context,
						req->write.buf,
						req->cq_flags,
						req->write.len,
						0, /* data */
						0, /* tag */
						0, /* olen */
						req->error);
				if (event)
					psmx2_cq_enqueue_event(req->ep->send_cq, event);
				else
					err = -FI_ENOMEM;
			}

			if (req->ep->write_cntr)
				psmx2_cntr_inc(req->ep->write_cntr);

			free(req);
		}
		break;

	case PSMX2_AM_REP_READ:
		req = (struct psmx2_am_request *)(uintptr_t)args[1].u64;
		assert(req->op == PSMX2_AM_REQ_READ || req->op == PSMX2_AM_REQ_READV);
		op_error = (int)args[0].u32w1;
		offset = args[2].u64;
		if (!req->error)
			req->error = op_error;
		if (!op_error) {
			if (req->op == PSMX2_AM_REQ_READ)
				memcpy(req->read.buf + offset, src, len);
			else 
				psmx2_iov_copy(req->iov, req->read.iov_count, offset, src, len);

			req->read.len_read += len;
		}
		if (eom || req->read.len == req->read.len_read) {
			if (!eom)
				FI_INFO(&psmx2_prov, FI_LOG_EP_DATA,
					"readv: short protocol finishes after long protocol.\n");
			if (req->ep->send_cq && !req->no_event) {
				event = psmx2_cq_create_event(
						req->ep->send_cq,
						req->read.context,
						req->read.buf,
						req->cq_flags,
						req->read.len_read,
						0, /* data */
						0, /* tag */
						req->read.len - req->read.len_read,
						req->error);
				if (event)
					psmx2_cq_enqueue_event(req->ep->send_cq, event);
				else
					err = -FI_ENOMEM;
			}

			if (req->ep->read_cntr)
				psmx2_cntr_inc(req->ep->read_cntr);

			free(req);
		}
		break;

	default:
		err = -FI_EINVAL;
	}
	return err;
}
예제 #25
0
int ofi_check_ep_attr(const struct util_prov *util_prov, uint32_t api_version,
		      const struct fi_info *prov_info,
		      const struct fi_info *user_info)
{
	const struct fi_ep_attr *prov_attr = prov_info->ep_attr;
	const struct fi_ep_attr *user_attr = user_info->ep_attr;
	const struct fi_provider *prov = util_prov->prov;
	int ret;

	ret = ofi_check_ep_type(prov, prov_attr, user_attr);
	if (ret)
		return ret;

	if ((user_attr->protocol != FI_PROTO_UNSPEC) &&
	    (user_attr->protocol != prov_attr->protocol)) {
		FI_INFO(prov, FI_LOG_CORE, "Unsupported protocol\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, protocol, FI_TYPE_PROTOCOL);
		return -FI_ENODATA;
	}

	if (user_attr->protocol_version &&
	    (user_attr->protocol_version > prov_attr->protocol_version)) {
		FI_INFO(prov, FI_LOG_CORE, "Unsupported protocol version\n");
		return -FI_ENODATA;
	}

	if (user_attr->max_msg_size > prov_attr->max_msg_size) {
		FI_INFO(prov, FI_LOG_CORE, "Max message size too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, max_msg_size);
		return -FI_ENODATA;
	}

	if (user_attr->tx_ctx_cnt > prov_info->domain_attr->max_ep_tx_ctx) {
		if (user_attr->tx_ctx_cnt == FI_SHARED_CONTEXT) {
			if (!prov_info->domain_attr->max_ep_stx_ctx) {
				FI_INFO(prov, FI_LOG_CORE,
					"Shared tx context not supported\n");
				return -FI_ENODATA;
			}
		} else {
			FI_INFO(prov, FI_LOG_CORE,
				"Requested tx_ctx_cnt exceeds supported."
				" Expected:%zd, Requested%zd\n",
				prov_info->domain_attr->max_ep_tx_ctx,
				user_attr->tx_ctx_cnt);
			return -FI_ENODATA;
		}
	}

	if (user_attr->rx_ctx_cnt > prov_info->domain_attr->max_ep_rx_ctx) {
		if (user_attr->rx_ctx_cnt == FI_SHARED_CONTEXT) {
			if (!prov_info->domain_attr->max_ep_srx_ctx) {
				FI_INFO(prov, FI_LOG_CORE,
					"Shared rx context not supported\n");
				return -FI_ENODATA;
			}
		} else {
			FI_INFO(prov, FI_LOG_CORE,
				"Requested rx_ctx_cnt exceeds supported."
				" Expected: %zd, Requested:%zd\n",
				prov_info->domain_attr->max_ep_rx_ctx,
				user_attr->rx_ctx_cnt);
			return -FI_ENODATA;
		}
	}

	if (user_info->caps & (FI_RMA | FI_ATOMIC)) {
		if (user_attr->max_order_raw_size >
		    prov_attr->max_order_raw_size) {
			FI_INFO(prov, FI_LOG_CORE,
				"Max order RAW size exceeds supported size\n");
			FI_INFO_CHECK_VAL(prov, prov_attr, user_attr,
					  max_order_raw_size);
			return -FI_ENODATA;
		}

		if (user_attr->max_order_war_size >
		    prov_attr->max_order_war_size) {
			FI_INFO(prov, FI_LOG_CORE,
				"Max order WAR size exceeds supported size\n");
			FI_INFO_CHECK_VAL(prov, prov_attr, user_attr,
					  max_order_war_size);
			return -FI_ENODATA;
		}

		if (user_attr->max_order_waw_size >
		    prov_attr->max_order_waw_size) {
			FI_INFO(prov, FI_LOG_CORE,
				"Max order WAW size exceeds supported size\n");
			FI_INFO_CHECK_VAL(prov, prov_attr, user_attr,
					  max_order_waw_size);
			return -FI_ENODATA;
		}
	}

	if (user_attr->auth_key_size &&
	    (user_attr->auth_key_size != prov_attr->auth_key_size)) {
		FI_INFO(prov, FI_LOG_CORE, "Unsupported authentication size.");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, auth_key_size);
		return -FI_ENODATA;
	}

	return 0;
}
예제 #26
0
int psmx_process_trigger(struct psmx_fid_domain *domain, struct psmx_trigger *trigger)
{
	switch (trigger->op) {
	case PSMX_TRIGGERED_SEND:
		_psmx_send(trigger->send.ep,
			   trigger->send.buf,
			   trigger->send.len,
			   trigger->send.desc,
			   trigger->send.dest_addr,
			   trigger->send.context,
			   trigger->send.flags);
		break;
	case PSMX_TRIGGERED_RECV:
		_psmx_recv(trigger->recv.ep,
			   trigger->recv.buf,
			   trigger->recv.len,
			   trigger->recv.desc,
			   trigger->recv.src_addr,
			   trigger->recv.context,
			   trigger->recv.flags);
		break;
	case PSMX_TRIGGERED_TSEND:
		_psmx_tagged_send(trigger->tsend.ep,
				  trigger->tsend.buf,
				  trigger->tsend.len,
				  trigger->tsend.desc,
				  trigger->tsend.dest_addr,
				  trigger->tsend.tag,
				  trigger->tsend.context,
				  trigger->tsend.flags);
		break;
	case PSMX_TRIGGERED_TRECV:
		_psmx_tagged_recv(trigger->trecv.ep,
				  trigger->trecv.buf,
				  trigger->trecv.len,
				  trigger->trecv.desc,
				  trigger->trecv.src_addr,
				  trigger->trecv.tag,
				  trigger->trecv.ignore,
				  trigger->trecv.context,
				  trigger->trecv.flags);
		break;
	case PSMX_TRIGGERED_WRITE:
		_psmx_write(trigger->write.ep,
			    trigger->write.buf,
			    trigger->write.len,
			    trigger->write.desc,
			    trigger->write.dest_addr,
			    trigger->write.addr,
			    trigger->write.key,
			    trigger->write.context,
			    trigger->write.flags,
			    trigger->write.data);
		break;

	case PSMX_TRIGGERED_READ:
		_psmx_read(trigger->read.ep,
			   trigger->read.buf,
			   trigger->read.len,
			   trigger->read.desc,
			   trigger->read.src_addr,
			   trigger->read.addr,
			   trigger->read.key,
			   trigger->read.context,
			   trigger->read.flags);
		break;

	case PSMX_TRIGGERED_ATOMIC_WRITE:
		_psmx_atomic_write(trigger->atomic_write.ep,
				   trigger->atomic_write.buf,
				   trigger->atomic_write.count,
				   trigger->atomic_write.desc,
				   trigger->atomic_write.dest_addr,
				   trigger->atomic_write.addr,
				   trigger->atomic_write.key,
				   trigger->atomic_write.datatype,
				   trigger->atomic_write.atomic_op,
				   trigger->atomic_write.context,
				   trigger->atomic_write.flags);
		break;

	case PSMX_TRIGGERED_ATOMIC_READWRITE:
		_psmx_atomic_readwrite(trigger->atomic_readwrite.ep,
					trigger->atomic_readwrite.buf,
					trigger->atomic_readwrite.count,
					trigger->atomic_readwrite.desc,
					trigger->atomic_readwrite.result,
					trigger->atomic_readwrite.result_desc,
					trigger->atomic_readwrite.dest_addr,
					trigger->atomic_readwrite.addr,
					trigger->atomic_readwrite.key,
					trigger->atomic_readwrite.datatype,
					trigger->atomic_readwrite.atomic_op,
					trigger->atomic_readwrite.context,
					trigger->atomic_readwrite.flags);
		break;

	case PSMX_TRIGGERED_ATOMIC_COMPWRITE:
		_psmx_atomic_compwrite(trigger->atomic_compwrite.ep,
					trigger->atomic_compwrite.buf,
					trigger->atomic_compwrite.count,
					trigger->atomic_compwrite.desc,
					trigger->atomic_compwrite.compare,
					trigger->atomic_compwrite.compare_desc,
					trigger->atomic_compwrite.result,
					trigger->atomic_compwrite.result_desc,
					trigger->atomic_compwrite.dest_addr,
					trigger->atomic_compwrite.addr,
					trigger->atomic_compwrite.key,
					trigger->atomic_compwrite.datatype,
					trigger->atomic_compwrite.atomic_op,
					trigger->atomic_compwrite.context,
					trigger->atomic_compwrite.flags);
		break;
	default:
		FI_INFO(&psmx_prov, FI_LOG_CQ,
			"%d unsupported op\n", trigger->op);
		break;
	}

	free(trigger);
	return 0;
}
예제 #27
0
int ofi_check_tx_attr(const struct fi_provider *prov,
		      const struct fi_tx_attr *prov_attr,
		      const struct fi_tx_attr *user_attr, uint64_t info_mode)
{
	if (user_attr->caps & ~(prov_attr->caps)) {
		FI_INFO(prov, FI_LOG_CORE, "caps not supported\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, caps, FI_TYPE_CAPS);
		return -FI_ENODATA;
	}

	info_mode = user_attr->mode ? user_attr->mode : info_mode;
	if ((info_mode & prov_attr->mode) != prov_attr->mode) {
		FI_INFO(prov, FI_LOG_CORE, "needed mode not set\n");
		FI_INFO_MODE(prov, prov_attr->mode, user_attr->mode);
		return -FI_ENODATA;
	}

	if (prov_attr->op_flags & ~(prov_attr->op_flags)) {
		FI_INFO(prov, FI_LOG_CORE, "op_flags not supported\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, op_flags,
			     FI_TYPE_OP_FLAGS);
		return -FI_ENODATA;
	}

	if (user_attr->msg_order & ~(prov_attr->msg_order)) {
		FI_INFO(prov, FI_LOG_CORE, "msg_order not supported\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, msg_order,
			     FI_TYPE_MSG_ORDER);
		return -FI_ENODATA;
	}

	if (user_attr->comp_order & ~(prov_attr->comp_order)) {
		FI_INFO(prov, FI_LOG_CORE, "comp_order not supported\n");
		FI_INFO_CHECK(prov, prov_attr, user_attr, comp_order,
			     FI_TYPE_MSG_ORDER);
		return -FI_ENODATA;
	}

	if (user_attr->inject_size > prov_attr->inject_size) {
		FI_INFO(prov, FI_LOG_CORE, "inject_size too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, inject_size);
		return -FI_ENODATA;
	}

	if (user_attr->size > prov_attr->size) {
		FI_INFO(prov, FI_LOG_CORE, "size is greater than supported\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, size);
		return -FI_ENODATA;
	}

	if (user_attr->iov_limit > prov_attr->iov_limit) {
		FI_INFO(prov, FI_LOG_CORE, "iov_limit too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, iov_limit);
		return -FI_ENODATA;
	}

	if (user_attr->rma_iov_limit > prov_attr->rma_iov_limit) {
		FI_INFO(prov, FI_LOG_CORE, "rma_iov_limit too large\n");
		FI_INFO_CHECK_VAL(prov, prov_attr, user_attr, rma_iov_limit);
		return -FI_ENODATA;
	}

	return 0;
}
예제 #28
0
static ssize_t
fi_ibv_rdm_process_connect_request(struct rdma_cm_event *event,
					  struct fi_ibv_rdm_ep *ep)
{
	struct ibv_qp_init_attr qp_attr;
	struct rdma_conn_param cm_params;
	struct fi_ibv_rdm_tagged_conn *conn = NULL;
	struct rdma_cm_id *id = event->id;
	ssize_t ret = FI_SUCCESS;

	char *p = (char *) event->param.conn.private_data;

	if (ep->is_closing) {
		int rej_message = 0xdeadbeef;
		if (rdma_reject(id, &rej_message, sizeof(int))) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_reject\n", errno);
			ret = -errno;
			if (rdma_destroy_id(id)) {
				VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n",
						 errno);
				ret = (ret == FI_SUCCESS) ? -errno : ret;
			}
		}
		assert(ret == FI_SUCCESS);
		return ret;
	}

	HASH_FIND(hh, fi_ibv_rdm_tagged_conn_hash, p, FI_IBV_RDM_DFLT_ADDRLEN,
		  conn);

	if (!conn) {
		conn = memalign(FI_IBV_RDM_MEM_ALIGNMENT, sizeof(*conn));
		if (!conn)
			return -FI_ENOMEM;

		memset(conn, 0, sizeof(struct fi_ibv_rdm_tagged_conn));

		conn->state = FI_VERBS_CONN_ALLOCATED;
		dlist_init(&conn->postponed_requests_head);
		fi_ibv_rdm_unpack_cm_params(&event->param.conn, conn, ep);
		fi_ibv_rdm_conn_init_cm_role(conn, ep);

		FI_INFO(&fi_ibv_prov, FI_LOG_AV,
			"CONN REQUEST, NOT found in hash, new conn %p %d, addr %s:%u, HASH ADD\n",
			conn, conn->cm_role, inet_ntoa(conn->addr.sin_addr),
			ntohs(conn->addr.sin_port));

		HASH_ADD(hh, fi_ibv_rdm_tagged_conn_hash, addr,
			FI_IBV_RDM_DFLT_ADDRLEN, conn);
	} else {
		if (conn->cm_role != FI_VERBS_CM_ACTIVE) {
			/*
			 * Do it before rdma_create_qp since that call would
			 * modify event->param.conn.private_data buffer
			 */
			fi_ibv_rdm_unpack_cm_params(&event->param.conn, conn,
						    ep);
		}

		FI_INFO(&fi_ibv_prov, FI_LOG_AV,
			"CONN REQUEST,  FOUND in hash, conn %p %d, addr %s:%u\n",
			conn, conn->cm_role, inet_ntoa(conn->addr.sin_addr),
			ntohs(conn->addr.sin_port));
	}

	if (conn->cm_role == FI_VERBS_CM_ACTIVE) {
		int rej_message = 0xdeadbeef;
		if (rdma_reject(id, &rej_message, sizeof(rej_message))) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_reject\n", errno);
			ret = -errno;
			if (rdma_destroy_id(id)) {
				VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n",
						 errno);
				ret = (ret == FI_SUCCESS) ? -errno : ret;
			}
		}
		if (conn->state == FI_VERBS_CONN_ALLOCATED) {
			ret = fi_ibv_rdm_start_connection(ep, conn);
			if (ret != FI_SUCCESS)
				goto err;
		}
	} else {
		assert(conn->state == FI_VERBS_CONN_ALLOCATED ||
		       conn->state == FI_VERBS_CONN_STARTED);

		const size_t idx = 
			(conn->cm_role == FI_VERBS_CM_PASSIVE) ? 0 : 1;

		conn->state = FI_VERBS_CONN_STARTED;

		assert (conn->id[idx] == NULL);
		conn->id[idx] = id;

		ret = fi_ibv_rdm_prepare_conn_memory(ep, conn);
		if (ret != FI_SUCCESS)
			goto err;

		fi_ibv_rdm_tagged_init_qp_attributes(&qp_attr, ep);
		if (rdma_create_qp(id, ep->domain->pd, &qp_attr)) {
			ret = -errno;
			goto err;
		}
		conn->qp[idx] = id->qp;

		ret = fi_ibv_rdm_repost_receives(conn, ep, ep->rq_wr_depth);
		if (ret < 0) {
			VERBS_INFO(FI_LOG_AV, "repost receives failed\n");
			goto err;
		} else {
			ret = FI_SUCCESS;
		}

		id->context = conn;

		fi_ibv_rdm_pack_cm_params(&cm_params, conn, ep);

		if (rdma_accept(id, &cm_params)) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_accept\n", errno);
			ret = -errno;
			goto err;
		}
		if (cm_params.private_data) {
			free((void *) cm_params.private_data);
		}
	}

	return ret;
err:
	/* ret err code is already set here, just cleanup resources */
	fi_ibv_rdm_conn_cleanup(conn);
	return ret;
}