예제 #1
0
파일: psmx_init.c 프로젝트: Slbomber/ompi
static int psmx_getinfo(uint32_t version, const char *node, const char *service,
			uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
	struct fi_info *psmx_info;
	uint32_t cnt = 0;
	void *dest_addr = NULL;
	int ep_type = FI_EP_RDM;
	int av_type = FI_AV_UNSPEC;
	enum fi_mr_mode mr_mode = FI_MR_SCALABLE;
	int caps = 0;
	uint64_t max_tag_value = 0;
	int err = -FI_ENODATA;

	FI_INFO(&psmx_prov, FI_LOG_CORE,"\n");

	*info = NULL;

	if (psm_ep_num_devunits(&cnt) || !cnt) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"no PSM device is found.\n");
		return -FI_ENODATA;
	}

	if (node && !(flags & FI_SOURCE))
		dest_addr = psmx_resolve_name(node, 0);

	if (hints) {
		switch (hints->addr_format) {
		case FI_FORMAT_UNSPEC:
		case FI_ADDR_PSMX:
			break;
		default:
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->addr_format=%d, supported=%d,%d.\n",
				hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX);
			goto err_out;
		}

		if (hints->ep_attr) {
			switch (hints->ep_attr->type) {
			case FI_EP_UNSPEC:
			case FI_EP_DGRAM:
			case FI_EP_RDM:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->type=%d, supported=%d,%d,%d.\n",
					hints->ep_attr->type, FI_EP_UNSPEC,
					FI_EP_DGRAM, FI_EP_RDM);
				goto err_out;
			}

			switch (hints->ep_attr->protocol) {
			case FI_PROTO_UNSPEC:
			case FI_PROTO_PSMX:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->protocol=%d, supported=%d %d\n",
					hints->ep_attr->protocol,
					FI_PROTO_UNSPEC, FI_PROTO_PSMX);
				goto err_out;
			}

			if (hints->ep_attr->tx_ctx_cnt > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->tx_ctx_cnt=%d, supported=0,1\n",
					hints->ep_attr->tx_ctx_cnt);
				goto err_out;
			}

			if (hints->ep_attr->rx_ctx_cnt > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->rx_ctx_cnt=%d, supported=0,1\n",
					hints->ep_attr->rx_ctx_cnt);
				goto err_out;
			}
		}

		if ((hints->caps & PSMX_CAPS) != hints->caps &&
		    (hints->caps & PSMX_CAPS2) != hints->caps) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->caps=0x%llx, supported=0x%llx,0x%llx\n",
				hints->caps, PSMX_CAPS, PSMX_CAPS2);
			goto err_out;
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) !=
			    hints->tx_attr->op_flags) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx->flags=0x%llx, "
					"supported=0x%llx\n",
					hints->tx_attr->op_flags,
					PSMX_OP_FLAGS);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%ld,"
					"supported=%ld.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
		}

		if (hints->rx_attr &&
		    (hints->rx_attr->op_flags & PSMX_OP_FLAGS) !=
		     hints->rx_attr->op_flags) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->rx->flags=0x%llx, supported=0x%llx\n",
				hints->rx_attr->op_flags, PSMX_OP_FLAGS);
			goto err_out;
		}

		if ((hints->mode & PSMX_MODE) != PSMX_MODE) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->mode=0x%llx, required=0x%llx\n",
				hints->mode, PSMX_MODE);
			goto err_out;
		}

		if (hints->fabric_attr && hints->fabric_attr->name &&
		    strncmp(hints->fabric_attr->name, PSMX_FABRIC_NAME, PSMX_FABRIC_NAME_LEN)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->fabric_name=%s, supported=psm\n",
				hints->fabric_attr->name);
			goto err_out;
		}

		if (hints->domain_attr) {
			if (hints->domain_attr->name &&
			    strncmp(hints->domain_attr->name, PSMX_DOMAIN_NAME,
				    PSMX_DOMAIN_NAME_LEN)) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_name=%s, supported=psm\n",
					hints->domain_attr->name);
				goto err_out;
			}

			switch (hints->domain_attr->av_type) {
			case FI_AV_UNSPEC:
			case FI_AV_MAP:
			case FI_AV_TABLE:
				av_type = hints->domain_attr->av_type;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->av_type=%d, supported=%d %d %d\n",
					hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP,
					FI_AV_TABLE);
				goto err_out;
			}

			switch (hints->domain_attr->mr_mode) {
			case FI_MR_UNSPEC:
				break;
			case FI_MR_BASIC:
			case FI_MR_SCALABLE:
				mr_mode = hints->domain_attr->mr_mode;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->mr_mode=%d, supported=%d %d %d\n",
					hints->domain_attr->mr_mode, FI_MR_UNSPEC, FI_MR_BASIC,
					FI_MR_SCALABLE);
				goto err_out;
			}
		}

		if (hints->ep_attr) {
			if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->max_msg_size=%ld,"
					"supported=%ld.\n",
					hints->ep_attr->max_msg_size,
					PSMX_MAX_MSG_SIZE);
				goto err_out;
			}
			max_tag_value = fi_tag_bits(hints->ep_attr->mem_tag_format);
		}

		caps = hints->caps;

		/* TODO: check other fields of hints */
	}

	if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0)
		goto err_out;

	psmx_info = fi_allocinfo();
	if (!psmx_info) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	psmx_info->ep_attr->type = ep_type;
	psmx_info->ep_attr->protocol = FI_PROTO_PSMX;
	psmx_info->ep_attr->protocol_version = PSM_VERNO;
	psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE;
	psmx_info->ep_attr->mem_tag_format = fi_tag_format(max_tag_value);
	psmx_info->ep_attr->tx_ctx_cnt = 1;
	psmx_info->ep_attr->rx_ctx_cnt = 1;

	psmx_info->domain_attr->threading = FI_THREAD_COMPLETION;
	psmx_info->domain_attr->control_progress = FI_PROGRESS_MANUAL;
	psmx_info->domain_attr->data_progress = FI_PROGRESS_MANUAL;
	psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME);
	psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED;
	psmx_info->domain_attr->av_type = av_type;
	psmx_info->domain_attr->mr_mode = mr_mode;
	psmx_info->domain_attr->mr_key_size = sizeof(uint64_t);
	psmx_info->domain_attr->cq_data_size = 4;
	psmx_info->domain_attr->cq_cnt = 65535;
	psmx_info->domain_attr->ep_cnt = 65535;
	psmx_info->domain_attr->tx_ctx_cnt = 1;
	psmx_info->domain_attr->rx_ctx_cnt = 1;
	psmx_info->domain_attr->max_ep_tx_ctx = 65535;
	psmx_info->domain_attr->max_ep_rx_ctx = 1;
	psmx_info->domain_attr->max_ep_stx_ctx = 65535;
	psmx_info->domain_attr->max_ep_srx_ctx = 0;

	psmx_info->next = NULL;
	psmx_info->caps = (hints && hints->caps) ? hints->caps : caps;
	psmx_info->mode = PSMX_MODE;
	psmx_info->addr_format = FI_ADDR_PSMX;
	psmx_info->src_addrlen = 0;
	psmx_info->dest_addrlen = sizeof(psm_epid_t);
	psmx_info->src_addr = NULL;
	psmx_info->dest_addr = dest_addr;
	psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME);
	psmx_info->fabric_attr->prov_name = strdup(PSMX_PROV_NAME);

	psmx_info->tx_attr->caps = psmx_info->caps;
	psmx_info->tx_attr->mode = psmx_info->mode;
	psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags)
					? hints->tx_attr->op_flags : 0;
	psmx_info->tx_attr->msg_order = FI_ORDER_SAS;
	psmx_info->tx_attr->comp_order = FI_ORDER_NONE;
	psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE;
	psmx_info->tx_attr->size = UINT64_MAX;
	psmx_info->tx_attr->iov_limit = 1;

	psmx_info->rx_attr->caps = psmx_info->caps;
	psmx_info->rx_attr->mode = psmx_info->mode;
	psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->tx_attr->op_flags)
					? hints->tx_attr->op_flags : 0;
	psmx_info->rx_attr->msg_order = FI_ORDER_SAS;
	psmx_info->rx_attr->comp_order = FI_ORDER_NONE;
	psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */
	psmx_info->rx_attr->size = UINT64_MAX;
	psmx_info->rx_attr->iov_limit = 1;

	*info = psmx_info;
	return 0;

err_out:
	return err;
}
예제 #2
0
파일: psm_ep.c 프로젝트: michich/opa-psm2
psm_error_t
__psm_ep_open_internal(psm_uuid_t const unique_job_key, int *devid_enabled,
		       struct psm_ep_open_opts const *opts_i, psm_mq_t mq,
		       psm_ep_t *epo, psm_epid_t *epido)
{
	psm_ep_t ep = NULL;
	uint32_t num_units;
	size_t len;
	psm_error_t err;
	psm_epaddr_t epaddr = NULL;
	char buf[128], *p, *e;
	union psmi_envvar_val envvar_val;
	size_t ptl_sizes;
	struct psm_ep_open_opts opts;
	ptl_t *amsh_ptl, *ips_ptl, *self_ptl;
	int i;

	/* First get the set of default options, we overwrite with the user's
	 * desired values afterwards */
	if ((err = psm_ep_open_opts_get_defaults(&opts)))
		goto fail;

	if (opts_i != NULL) {
		if (opts_i->timeout != -1)
			opts.timeout = opts_i->timeout;
		if (opts_i->unit != -1)
			opts.unit = opts_i->unit;
		if (opts_i->affinity != -1)
			opts.affinity = opts_i->affinity;

		if (opts_i->sendbufs_num != -1)
			opts.sendbufs_num = opts_i->sendbufs_num;

		if (opts_i->network_pkey != HFI_DEFAULT_P_KEY)
			opts.network_pkey = opts_i->network_pkey;

		if (opts_i->port != 0)
			opts.port = opts_i->port;

		if (opts_i->outsl != -1)
			opts.outsl = opts_i->outsl;

		if (opts_i->service_id)
			opts.service_id = (uint64_t) opts_i->service_id;
		if (opts_i->path_res_type != PSM_PATH_RES_NONE)
			opts.path_res_type = opts_i->path_res_type;

		if (opts_i->senddesc_num)
			opts.senddesc_num = opts_i->senddesc_num;
		if (opts_i->imm_size)
			opts.imm_size = opts_i->imm_size;
	}

	/* Get Service ID from environment */
	if (!psmi_getenv("PSM_IB_SERVICE_ID",
			 "HFI Service ID for path resolution",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_ULONG_ULONG,
			 (union psmi_envvar_val)HFI_DEFAULT_SERVICE_ID,
			 &envvar_val)) {
		opts.service_id = (uint64_t) envvar_val.e_ulonglong;
	}

	/* Get Path resolution type from environment Possible choices are:
	 *
	 * NONE : Default same as previous instances. Utilizes static data.
	 * OPP  : Use OFED Plus Plus library to do path record queries.
	 * UMAD : Use raw libibumad interface to form and process path records.
	 */
	if (!psmi_getenv("PSM_PATH_REC",
			 "Mechanism to query HFI path record (default is no path query)",
			 PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_STR,
			 (union psmi_envvar_val)"none", &envvar_val)) {
		if (!strcasecmp(envvar_val.e_str, "none"))
			opts.path_res_type = PSM_PATH_RES_NONE;
		else if (!strcasecmp(envvar_val.e_str, "opp"))
			opts.path_res_type = PSM_PATH_RES_OPP;
		else if (!strcasecmp(envvar_val.e_str, "umad"))
			opts.path_res_type = PSM_PATH_RES_UMAD;
		else {
			_HFI_ERROR("Unknown path resolution type %s. "
				   "Disabling use of path record query.\n",
				   envvar_val.e_str);
			opts.path_res_type = PSM_PATH_RES_NONE;
		}
	}

	/* If a specific unit is set in the environment, use that one. */
	if (!psmi_getenv("HFI_UNIT", "Device Unit number (-1 autodetects)",
			 PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_LONG,
			 (union psmi_envvar_val)HFI_UNIT_ID_ANY, &envvar_val)) {
		opts.unit = envvar_val.e_long;
	}

	/* Get user specified port number to use. */
	if (!psmi_getenv("HFI_PORT", "IB Port number (0 autodetects)",
			 PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_LONG,
			 (union psmi_envvar_val)HFI_PORT_NUM_ANY,
			 &envvar_val)) {
		opts.port = envvar_val.e_long;
	}

	/* Get service level from environment, path-query overrides it */
	if (!psmi_getenv
	    ("HFI_SL", "HFI outging ServiceLevel number (default 0)",
	     PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_LONG,
	     (union psmi_envvar_val)PSMI_SL_DEFAULT, &envvar_val)) {
		opts.outsl = envvar_val.e_long;
	}

	/* Get network key from environment. MVAPICH and other vendor MPIs do not
	 * specify it on ep open and we may require it for vFabrics.
	 * path-query will override it.
	 */
	if (!psmi_getenv("PSM_PKEY",
			 "HFI PKey to use for endpoint",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_ULONG,
			 (union psmi_envvar_val)HFI_DEFAULT_P_KEY,
			 &envvar_val)) {
		opts.network_pkey = (uint64_t) envvar_val.e_ulong;
	}

	/* BACKWARDS COMPATIBILITY:  Open MPI likes to choose its own PKEY of
	   0x7FFF.  That's no longer a valid default, so override it if the
	   client was compiled against PSM v1 */
	if (PSMI_VERNO_GET_MAJOR(psmi_verno_client()) < 2 &&
			opts.network_pkey == 0x7FFF) {
		opts.network_pkey = HFI_DEFAULT_P_KEY;
	}

	/* Get number of default send buffers from environment */
	if (!psmi_getenv("PSM_NUM_SEND_BUFFERS",
			 "Number of send buffers to allocate [1024]",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_UINT,
			 (union psmi_envvar_val)1024, &envvar_val)) {
		opts.sendbufs_num = envvar_val.e_uint;
	}

	/* Get immediate data size - transfers less than immediate data size do
	 * not consume a send buffer and require just a send descriptor.
	 */
	if (!psmi_getenv("PSM_SEND_IMMEDIATE_SIZE",
			 "Immediate data send size not requiring a buffer [128]",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_UINT,
			 (union psmi_envvar_val)128, &envvar_val)) {
		opts.imm_size = envvar_val.e_uint;
	}

	/* Get numner of send descriptors - by default this is 4 times the number
	 * of send buffers - mainly used for short/inlined messages.
	 */
	if (!psmi_getenv("PSM_NUM_SEND_DESCRIPTORS",
			 "Number of send descriptors to allocate [4096]",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_UINT,
			 (union psmi_envvar_val)4096, &envvar_val)) {
		opts.senddesc_num = envvar_val.e_uint;
	}

	if (psmi_device_is_enabled(devid_enabled, PTL_DEVID_IPS)) {
		if ((err = psm_ep_num_devunits(&num_units)) != PSM_OK)
			goto fail;
	} else
		num_units = 0;

	/* do some error checking */
	if (opts.timeout < -1) {
		err = psmi_handle_error(NULL, PSM_PARAM_ERR,
					"Invalid timeout value %lld",
					(long long)opts.timeout);
		goto fail;
	} else if (num_units && (opts.unit < -1 || opts.unit >= (int)num_units)) {
		err = psmi_handle_error(NULL, PSM_PARAM_ERR,
					"Invalid Device Unit ID %d (%d units found)",
					opts.unit, num_units);
		goto fail;
	} else if (opts.port < 0 || opts.port > HFI_MAX_PORT) {
		err = psmi_handle_error(NULL, PSM_PARAM_ERR,
					"Invalid Device port number %d",
					opts.port);
		goto fail;
	} else if (opts.affinity < 0
		   || opts.affinity > PSM_EP_OPEN_AFFINITY_FORCE) {
		err =
		    psmi_handle_error(NULL, PSM_PARAM_ERR,
				      "Invalid Affinity option: %d",
				      opts.affinity);
		goto fail;
	} else if (opts.outsl < PSMI_SL_MIN || opts.outsl > PSMI_SL_MAX) {
		err = psmi_handle_error(NULL, PSM_PARAM_ERR,
					"Invalid SL number: %lld",
					(unsigned long long)opts.outsl);
		goto fail;
	}

	/* Set environment variable if PSM is not allowed to set affinity */
	if (opts.affinity == PSM_EP_OPEN_AFFINITY_SKIP)
		setenv("HFI_NO_CPUAFFINITY", "1", 1);

	/* Allocate end point structure storage */
	ptl_sizes =
	    (psmi_device_is_enabled(devid_enabled, PTL_DEVID_SELF) ?
	     psmi_ptl_self.sizeof_ptl() : 0) +
	    (psmi_device_is_enabled(devid_enabled, PTL_DEVID_IPS) ?
	     psmi_ptl_ips.sizeof_ptl() : 0) +
	    (psmi_device_is_enabled(devid_enabled, PTL_DEVID_AMSH) ?
	     psmi_ptl_amsh.sizeof_ptl() : 0);
	if (ptl_sizes == 0)
		return PSM_EP_NO_DEVICE;

	ep = (psm_ep_t) psmi_memalign(PSMI_EP_NONE, UNDEFINED, 64,
				      sizeof(struct psm_ep) + ptl_sizes);
	epaddr = (psm_epaddr_t) psmi_calloc(PSMI_EP_NONE, PER_PEER_ENDPOINT,
					    1, sizeof(struct psm_epaddr));
	if (ep == NULL || epaddr == NULL) {
		err = psmi_handle_error(NULL, PSM_NO_MEMORY,
					"Couldn't allocate memory for %s structure",
					ep == NULL ? "psm_ep" : "psm_epaddr");
		goto fail;
	}

	/* Copy PTL enabled status */
	for (i = 0; i < PTL_MAX_INIT; i++)
		ep->devid_enabled[i] = devid_enabled[i];

	/* Matched Queue initialization.  We do this early because we have to
	 * make sure ep->mq exists and is valid before calling ips_do_work.
	 */
	ep->mq = mq;

	/* Get ready for PTL initialization */
	memcpy(&ep->uuid, (void *)unique_job_key, sizeof(psm_uuid_t));
	ep->epaddr = epaddr;
	ep->memmode = mq->memmode;
	ep->hfi_num_sendbufs = opts.sendbufs_num;
	ep->service_id = opts.service_id;
	ep->path_res_type = opts.path_res_type;
	ep->hfi_num_descriptors = opts.senddesc_num;
	ep->hfi_imm_size = opts.imm_size;
	ep->errh = psmi_errhandler_global;	/* by default use the global one */
	ep->ptl_amsh.ep_poll = psmi_poll_noop;
	ep->ptl_ips.ep_poll = psmi_poll_noop;
	ep->connections = 0;

	/* See how many iterations we want to spin before yielding */
	psmi_getenv("PSM_YIELD_SPIN_COUNT",
		    "Spin poll iterations before yield",
		    PSMI_ENVVAR_LEVEL_HIDDEN,
		    PSMI_ENVVAR_TYPE_UINT,
		    (union psmi_envvar_val)PSMI_BLOCKUNTIL_POLLS_BEFORE_YIELD,
		    &envvar_val);
	ep->yield_spin_cnt = envvar_val.e_uint;

	ptl_sizes = 0;
	amsh_ptl = ips_ptl = self_ptl = NULL;
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH)) {
		amsh_ptl = (ptl_t *) (ep->ptl_base_data + ptl_sizes);
		ptl_sizes += psmi_ptl_amsh.sizeof_ptl();
	}
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) {
		ips_ptl = (ptl_t *) (ep->ptl_base_data + ptl_sizes);
		ptl_sizes += psmi_ptl_ips.sizeof_ptl();
	}
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_SELF)) {
		self_ptl = (ptl_t *) (ep->ptl_base_data + ptl_sizes);
		ptl_sizes += psmi_ptl_self.sizeof_ptl();
	}

	if ((err = psmi_ep_open_device(ep, &opts, unique_job_key,
				       &(ep->context), &ep->epid)))
		goto fail;

	psmi_assert_always(ep->epid != 0);
	ep->epaddr->epid = ep->epid;

	_HFI_VDBG("psmi_ep_open_device() passed\n");

	/* Set our new label as soon as we know what it is */
	strncpy(buf, psmi_gethostname(), sizeof(buf) - 1);
	buf[sizeof(buf) - 1] = '\0';

	p = buf + strlen(buf);

	/* If our rank is set, use it. If not, use context.subcontext notation */
	if (((e = getenv("MPI_RANKID")) != NULL && *e) ||
	    ((e = getenv("PSC_MPI_RANK")) != NULL && *e))
		len = snprintf(p, sizeof(buf) - strlen(buf), ":%d.", atoi(e));
	else
		len = snprintf(p, sizeof(buf) - strlen(buf), ":%d.%d.",
			       (uint32_t) psm_epid_context(ep->epid),
			       (uint32_t) psmi_epid_subcontext(ep->epid));
	*(p + len) = '\0';
	ep->context_mylabel = psmi_strdup(ep, buf);
	if (ep->context_mylabel == NULL) {
		err = PSM_NO_MEMORY;
		goto fail;
	}
	/* hfi_set_mylabel(ep->context_mylabel); */

	if ((err = psmi_epid_set_hostname(psm_epid_nid(ep->epid), buf, 0)))
		goto fail;

	_HFI_VDBG("start ptl device init...\n");
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_SELF)) {
		if ((err = psmi_ptl_self.init(ep, self_ptl, &ep->ptl_self)))
			goto fail;
	}
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) {
		if ((err = psmi_ptl_ips.init(ep, ips_ptl, &ep->ptl_ips)))
			goto fail;
	}
	/* If we're shm-only, this device is enabled above */
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH)) {
		if ((err = psmi_ptl_amsh.init(ep, amsh_ptl, &ep->ptl_amsh)))
			goto fail;
	} else {
		/* We may have pre-attached as part of getting our rank for enabling
		 * shared contexts.  */
	}

	_HFI_VDBG("finish ptl device init...\n");

	/*
	 * Keep only IPS since only IPS support multi-rail, other devices
	 * are only setup once. IPS device can come to this function again.
	 */
	for (i = 0; i < PTL_MAX_INIT; i++) {
		if (devid_enabled[i] != PTL_DEVID_IPS) {
			devid_enabled[i] = -1;
		}
	}

	*epido = ep->epid;
	*epo = ep;

	return PSM_OK;

fail:
	if (ep != NULL) {
		if (ep->context.fd != -1)
			close(ep->context.fd);
		psmi_free(ep);
	}
	if (epaddr != NULL)
		psmi_free(epaddr);
	return err;
}
예제 #3
0
파일: psm_ep.c 프로젝트: michich/opa-psm2
static psm_error_t
psmi_ep_devlids(uint16_t **lids, uint32_t *num_lids_o,
		uint64_t my_gid_hi, uint64_t my_gid_lo)
{
	static uint16_t *hfi_lids;
	static uint32_t nlids;
	uint32_t num_units;
	int i;
	psm_error_t err = PSM_OK;

	PSMI_ERR_UNLESS_INITIALIZED(NULL);

	if (hfi_lids == NULL) {
		if ((err = psm_ep_num_devunits(&num_units)))
			goto fail;
		hfi_lids = (uint16_t *)
		    psmi_calloc(PSMI_EP_NONE, UNDEFINED,
				num_units * HFI_MAX_PORT, sizeof(uint16_t));
		if (hfi_lids == NULL) {
			err = psmi_handle_error(NULL, PSM_NO_MEMORY,
						"Couldn't allocate memory for dev_lids structure");
			goto fail;
		}

		for (i = 0; i < num_units; i++) {
			int j;
			for (j = 1; j <= HFI_MAX_PORT; j++) {
				int lid = hfi_get_port_lid(i, j);
				int ret;
				uint64_t gid_hi = 0, gid_lo = 0;

				if (lid == -1)
					continue;
				ret = hfi_get_port_gid(i, j, &gid_hi, &gid_lo);
				if (ret == -1)
					continue;
				else if (my_gid_hi != gid_hi) {
					_HFI_VDBG("LID %d, unit %d, port %d, "
						  "mismatched GID %llx:%llx and "
						  "%llx:%llx\n",
						  lid, i, j,
						  (unsigned long long)gid_hi,
						  (unsigned long long)gid_lo,
						  (unsigned long long)my_gid_hi,
						  (unsigned long long)
						  my_gid_lo);
					continue;
				}
				_HFI_VDBG("LID %d, unit %d, port %d, "
					  "matching GID %llx:%llx and "
					  "%llx:%llx\n", lid, i, j,
					  (unsigned long long)gid_hi,
					  (unsigned long long)gid_lo,
					  (unsigned long long)my_gid_hi,
					  (unsigned long long)my_gid_lo);

				hfi_lids[nlids++] = (uint16_t) lid;
			}
		}
		if (nlids == 0) {
			err = psmi_handle_error(NULL, PSM_EP_DEVICE_FAILURE,
						"Couldn't get lid&gid from any unit/port");
			goto fail;
		}
	}
	*lids = hfi_lids;
	*num_lids_o = nlids;

fail:
	return err;
}
예제 #4
0
파일: psm_ep.c 프로젝트: michich/opa-psm2
static psm_error_t
psmi_ep_multirail(int *num_rails, uint32_t *unit, uint16_t *port)
{
	uint32_t num_units;
	uint64_t gid_hi, gid_lo;
	int i, j, ret, count = 0;
	char *env;
	psm_error_t err = PSM_OK;
	uint64_t gidh[HFI_MAX_RAILS][3];

	env = getenv("PSM_MULTIRAIL");
	if (!env || atoi(env) == 0) {
		*num_rails = 0;
		return err;
	}

/*
 * map is in format: unit:port,unit:port,...
 */
	if ((env = getenv("PSM_MULTIRAIL_MAP"))) {
		if (sscanf(env, "%d:%d", &i, &j) == 2) {
			char *comma = strchr(env, ',');
			unit[count] = i;
			port[count] = j;
			count++;
			while (comma) {
				if (sscanf(comma, ",%d:%d", &i, &j) != 2) {
					break;
				}
				unit[count] = i;
				port[count] = j;
				count++;
				if (count == HFI_MAX_RAILS)
					break;
				comma = strchr(comma + 1, ',');
			}
		}
		*num_rails = count;

/*
 * Check if any of the port is not usable.
 */
		for (i = 0; i < count; i++) {
			ret = hfi_get_port_lid(unit[i], port[i]);
			if (ret == -1) {
				err =
				    psmi_handle_error(NULL,
						      PSM_EP_DEVICE_FAILURE,
						      "Couldn't get lid for unit %d:%d",
						      unit[i], port[i]);
				return err;
			}
			ret =
			    hfi_get_port_gid(unit[i], port[i], &gid_hi,
					     &gid_lo);
			if (ret == -1) {
				err =
				    psmi_handle_error(NULL,
						      PSM_EP_DEVICE_FAILURE,
						      "Couldn't get gid for unit %d:%d",
						      unit[i], port[i]);
				return err;
			}
		}

		return err;
	}

	if ((err = psm_ep_num_devunits(&num_units))) {
		return err;
	}
	if (num_units > HFI_MAX_RAILS) {
		_HFI_INFO
		    ("Found %d units, max %d units are supported, use %d\n",
		     num_units, HFI_MAX_RAILS, HFI_MAX_RAILS);
		num_units = HFI_MAX_RAILS;
	}

/*
 * Get all the ports with a valid lid and gid, one per unit.
 */
	for (i = 0; i < num_units; i++) {
		for (j = 1; j <= HFI_MAX_PORT; j++) {
			ret = hfi_get_port_lid(i, j);
			if (ret == -1)
				continue;
			ret = hfi_get_port_gid(i, j, &gid_hi, &gid_lo);
			if (ret == -1)
				continue;

			gidh[count][0] = gid_hi;
			gidh[count][1] = i;
			gidh[count][2] = j;
			count++;
			break;
		}
	}

/*
 * Sort all the ports with gidh from small to big.
 * This is for multiple fabrics, and we use fabric with the
 * smallest gid to make the master connection.
 */
	qsort(gidh, count, sizeof(uint64_t) * 3, cmpfunc);

	for (i = 0; i < count; i++) {
		unit[i] = (uint32_t) gidh[i][1];
		port[i] = (uint16_t) (uint32_t) gidh[i][2];
	}
	*num_rails = count;
	return err;
}
예제 #5
0
static int psmx_getinfo(uint32_t version, const char *node, const char *service,
			uint64_t flags, const struct fi_info *hints,
			struct fi_info **info)
{
	struct fi_info *psmx_info;
	uint32_t cnt = 0;
	psm_epid_t *dest_addr = NULL;
	struct psmx_src_name *src_addr = NULL;
	int ep_type = FI_EP_RDM;
	int av_type = FI_AV_UNSPEC;
	uint64_t mode = FI_CONTEXT;
	enum fi_mr_mode mr_mode = FI_MR_SCALABLE;
	enum fi_threading threading = FI_THREAD_COMPLETION;
	enum fi_progress control_progress = FI_PROGRESS_MANUAL;
	enum fi_progress data_progress = FI_PROGRESS_MANUAL;
	int caps = 0;
	uint64_t max_tag_value = 0;
	int err = -FI_ENODATA;
	int svc0, svc = PSMX_ANY_SERVICE;

	FI_INFO(&psmx_prov, FI_LOG_CORE,"\n");

	*info = NULL;

	/* Perform some quick check first to avoid unnecessary operations */
	if (hints) {
		if (hints->fabric_attr && hints->fabric_attr->name &&
		    strcasecmp(hints->fabric_attr->name, PSMX_FABRIC_NAME)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->fabric_name=%s, supported=%s\n",
				hints->fabric_attr->name, PSMX_FABRIC_NAME);
			goto err_out;
		}

		if (hints->domain_attr && hints->domain_attr->name &&
		    strcasecmp(hints->domain_attr->name, PSMX_DOMAIN_NAME)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->domain_name=%s, supported=%s\n",
				hints->domain_attr->name, PSMX_DOMAIN_NAME);
			goto err_out;
		}

		if (hints->ep_attr) {
			switch (hints->ep_attr->type) {
			case FI_EP_UNSPEC:
			case FI_EP_DGRAM:
			case FI_EP_RDM:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->type=%d, supported=%d,%d,%d.\n",
					hints->ep_attr->type, FI_EP_UNSPEC,
					FI_EP_DGRAM, FI_EP_RDM);
				goto err_out;
			}
		}

		if ((hints->caps & PSMX_CAPS) != hints->caps &&
		    (hints->caps & PSMX_CAPS2) != hints->caps) {
			uint64_t psmx_caps = PSMX_CAPS;
			uint64_t psmx_caps2 = PSMX_CAPS2;
			PSMX_INFO_DIFF("hints->caps", hints->caps, psmx_caps,
				       FI_TYPE_CAPS);
			PSMX_INFO_DIFF("alternatively, hints->caps",
				       hints->caps, psmx_caps2, FI_TYPE_CAPS);
			goto err_out;
		}
	}

	if (FI_VERSION_GE(version, FI_VERSION(1,5)))
		mr_mode = 0;

	if (psmx_init_lib())
		return -FI_ENODATA;

	if (psmx_compat_lib) {
		/*
		 * native PSM running over TrueScale doesn't have the issue handled
		 * here. it's only present when PSM is supported via the psm2-compat
		 * library, where the PSM functions are just wrappers around the PSM2
		 * counterparts.
		 *
		 * psm2_ep_num_devunits() may wait for 15 seconds before return
		 * when /dev/hfi1_0 is not present. Check the existence of any hfi1
		 * device interface first to avoid this delay. Note that the devices
		 * don't necessarily appear consecutively so we need to check all
		 * possible device names before returning "no device found" error.
		 * This also means if "/dev/hfi1_0" doesn't exist but other devices
		 * exist, we are still going to see the delay; but that's a rare case.
		 */
		glob_t glob_buf;

		if ((glob("/dev/hfi1_[0-9]", 0, NULL, &glob_buf) != 0) &&
		    (glob("/dev/hfi1_[0-9][0-9]", GLOB_APPEND, NULL, &glob_buf) != 0)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"no hfi1 device is found.\n");
			return -FI_ENODATA;
		}
		globfree(&glob_buf);
	}

	if (psm_ep_num_devunits(&cnt) || !cnt) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"no PSM device is found.\n");
		return -FI_ENODATA;
	}

	src_addr = calloc(1, sizeof(*src_addr));
	if (!src_addr) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"failed to allocate src addr.\n");
		return -FI_ENODATA;
	}
	src_addr->signature = 0xFFFF;
	src_addr->unit = PSMX_DEFAULT_UNIT;
	src_addr->port = PSMX_DEFAULT_PORT;
	src_addr->service = PSMX_ANY_SERVICE;

	if (flags & FI_SOURCE) {
		if (node)
			sscanf(node, "%*[^:]:%" SCNi8 ":%" SCNu8, &src_addr->unit, &src_addr->port);
		if (service)
			sscanf(service, "%" SCNu32, &src_addr->service);
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"node '%s' service '%s' converted to <unit=%d, port=%d, service=%d>\n",
			node, service, src_addr->unit, src_addr->port, src_addr->service);
	} else if (node) {
		psm_uuid_t uuid;
		psmx_get_uuid(uuid);

		struct util_ns ns = {
			.port = psmx_uuid_to_port(uuid),
			.name_len = sizeof(*dest_addr),
			.service_len = sizeof(svc),
			.service_cmp = psmx_ns_service_cmp,
			.is_service_wildcard = psmx_ns_is_service_wildcard,
		};
		ofi_ns_init(&ns);

		if (service)
			svc = atoi(service);
		svc0 = svc;
		dest_addr = (psm_epid_t *)ofi_ns_resolve_name(&ns, node, &svc);
		if (dest_addr) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"'%s:%u' resolved to <epid=%"PRIu64">:%u\n",
				node, svc0, *dest_addr, svc);
		} else {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"failed to resolve '%s:%u'.\n", node, svc);
			err = -FI_ENODATA;
			goto err_out;
		}
	}

	if (hints) {
		switch (hints->addr_format) {
		case FI_FORMAT_UNSPEC:
		case FI_ADDR_PSMX:
			break;
		default:
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->addr_format=%d, supported=%d,%d.\n",
				hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX);
			goto err_out;
		}

		if (hints->ep_attr) {
			switch (hints->ep_attr->protocol) {
			case FI_PROTO_UNSPEC:
			case FI_PROTO_PSMX:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->protocol=%d, supported=%d %d\n",
					hints->ep_attr->protocol,
					FI_PROTO_UNSPEC, FI_PROTO_PSMX);
				goto err_out;
			}

			if (hints->ep_attr->tx_ctx_cnt > 1 &&
			    hints->ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->tx_ctx_cnt=%"PRIu64", supported=0,1\n",
					hints->ep_attr->tx_ctx_cnt);
				goto err_out;
			}

			if (hints->ep_attr->rx_ctx_cnt > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->rx_ctx_cnt=%"PRIu64", supported=0,1\n",
					hints->ep_attr->rx_ctx_cnt);
				goto err_out;
			}
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) !=
			    hints->tx_attr->op_flags) {
				uint64_t psmx_op_flags = PSMX_OP_FLAGS;
				PSMX_INFO_DIFF("hints->tx_attr->of_flags",
					       hints->tx_attr->op_flags,
					       psmx_op_flags, FI_TYPE_OP_FLAGS);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%"PRIu64","
					"supported=%d.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
		}

		if (hints->rx_attr &&
		    (hints->rx_attr->op_flags & PSMX_OP_FLAGS) !=
		     hints->rx_attr->op_flags) {
			uint64_t psmx_op_flags = PSMX_OP_FLAGS;
			PSMX_INFO_DIFF("hints->rx_attr->of_flags",
				       hints->rx_attr->op_flags,
				       psmx_op_flags, FI_TYPE_OP_FLAGS);
			goto err_out;
		}

		if ((hints->caps & FI_TAGGED) ||
		    ((hints->caps & FI_MSG) && !psmx_env.am_msg)) {
			if ((hints->mode & FI_CONTEXT) != FI_CONTEXT) {
				uint64_t psmx_mode = FI_CONTEXT;
				PSMX_INFO_DIFF("hints->mode", hints->mode,
					       psmx_mode, FI_TYPE_MODE);
				goto err_out;
			}
		} else {
			mode = 0;
		}

		if (hints->domain_attr) {
			switch (hints->domain_attr->av_type) {
			case FI_AV_UNSPEC:
			case FI_AV_MAP:
			case FI_AV_TABLE:
				av_type = hints->domain_attr->av_type;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->av_type=%d, supported=%d %d %d\n",
					hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP,
					FI_AV_TABLE);
				goto err_out;
			}

			if (hints->domain_attr->mr_mode == FI_MR_BASIC) {
				mr_mode = FI_MR_BASIC;
			} else if (hints->domain_attr->mr_mode == FI_MR_SCALABLE) {
				mr_mode = FI_MR_SCALABLE;
			} else if (hints->domain_attr->mr_mode & (FI_MR_BASIC | FI_MR_SCALABLE)) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->mr_mode has FI_MR_BASIC or FI_MR_SCALABLE "
					"combined with other bits\n");
				goto err_out;
			}

			switch (hints->domain_attr->threading) {
			case FI_THREAD_UNSPEC:
				break;
			case FI_THREAD_FID:
			case FI_THREAD_ENDPOINT:
			case FI_THREAD_COMPLETION:
			case FI_THREAD_DOMAIN:
				threading = hints->domain_attr->threading;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->threading=%d, supported=%d %d %d %d %d\n",
					hints->domain_attr->threading, FI_THREAD_UNSPEC,
					FI_THREAD_FID, FI_THREAD_ENDPOINT, FI_THREAD_COMPLETION,
					FI_THREAD_DOMAIN);
				goto err_out;
			}

			switch (hints->domain_attr->control_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				control_progress = hints->domain_attr->control_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->control_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->control_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			switch (hints->domain_attr->data_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				data_progress = hints->domain_attr->data_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->data_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->data_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			if (hints->domain_attr->caps & FI_SHARED_AV) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->caps=%lx, shared AV is unsupported\n",
					hints->domain_attr->caps);
				goto err_out;
			}
		}

		if (hints->ep_attr) {
			if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->max_msg_size=%"PRIu64","
					"supported=%llu.\n",
					hints->ep_attr->max_msg_size,
					PSMX_MAX_MSG_SIZE);
				goto err_out;
			}
			max_tag_value = ofi_max_tag(hints->ep_attr->mem_tag_format);
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->tx_attr->msg_order) {
				uint64_t psmx_msg_order = PSMX_MSG_ORDER;
				PSMX_INFO_DIFF("hints->tx_attr->msg_order",
					       hints->tx_attr->msg_order,
					       psmx_msg_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if ((hints->tx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->tx_attr->comp_order) {
				uint64_t psmx_comp_order = PSMX_COMP_ORDER;
				PSMX_INFO_DIFF("hints->tx_attr->comp_order",
					       hints->tx_attr->comp_order,
					       psmx_comp_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%ld,"
					"supported=%d.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
			if (hints->tx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->iov_limit);
				goto err_out;
			}
			if (hints->tx_attr->rma_iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->rma_iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->rma_iov_limit);
				goto err_out;
			}
		}

		if (hints->rx_attr) {
			if ((hints->rx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->rx_attr->msg_order) {
				uint64_t psmx_msg_order = PSMX_MSG_ORDER;
				PSMX_INFO_DIFF("hints->rx_attr->msg_order",
					       hints->rx_attr->msg_order,
					       psmx_msg_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if ((hints->rx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->rx_attr->comp_order) {
				uint64_t psmx_comp_order = PSMX_COMP_ORDER;
				PSMX_INFO_DIFF("hints->rx_attr->comp_order",
					       hints->rx_attr->comp_order,
					       psmx_comp_order, FI_TYPE_MSG_ORDER);
				goto err_out;
			}
			if (hints->rx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->rx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->rx_attr->iov_limit);
				goto err_out;
			}
		}

		caps = hints->caps;

		/* TODO: check other fields of hints */
	}

	if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0)
		goto err_out;

	psmx_info = fi_allocinfo();
	if (!psmx_info) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	psmx_info->ep_attr->type = ep_type;
	psmx_info->ep_attr->protocol = FI_PROTO_PSMX;
	psmx_info->ep_attr->protocol_version = PSM_VERNO;
	psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE;
	psmx_info->ep_attr->max_order_raw_size = PSMX_RMA_ORDER_SIZE;
	psmx_info->ep_attr->max_order_war_size = PSMX_RMA_ORDER_SIZE;
	psmx_info->ep_attr->max_order_waw_size = PSMX_RMA_ORDER_SIZE;
	psmx_info->ep_attr->mem_tag_format = ofi_tag_format(max_tag_value);
	psmx_info->ep_attr->tx_ctx_cnt = 1;
	psmx_info->ep_attr->rx_ctx_cnt = 1;

	psmx_info->domain_attr->threading = threading;
	psmx_info->domain_attr->control_progress = control_progress;
	psmx_info->domain_attr->data_progress = data_progress;
	psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME);
	psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED;
	psmx_info->domain_attr->av_type = av_type;
	psmx_info->domain_attr->mr_mode = mr_mode;
	psmx_info->domain_attr->mr_key_size = sizeof(uint64_t);
	psmx_info->domain_attr->cq_data_size = 4;
	psmx_info->domain_attr->cq_cnt = 65535;
	psmx_info->domain_attr->ep_cnt = 65535;
	psmx_info->domain_attr->tx_ctx_cnt = 1;
	psmx_info->domain_attr->rx_ctx_cnt = 1;
	psmx_info->domain_attr->max_ep_tx_ctx = 1;
	psmx_info->domain_attr->max_ep_rx_ctx = 1;
	psmx_info->domain_attr->max_ep_stx_ctx = 65535;
	psmx_info->domain_attr->max_ep_srx_ctx = 0;
	psmx_info->domain_attr->cntr_cnt = 65535;
	psmx_info->domain_attr->mr_iov_limit = 65535;
	psmx_info->domain_attr->caps = PSMX_DOM_CAPS;
	psmx_info->domain_attr->mode = 0;
	psmx_info->domain_attr->mr_cnt = 65535;

	psmx_info->next = NULL;
	psmx_info->caps = (hints && hints->caps) ? hints->caps : caps;
	psmx_info->mode = mode;
	psmx_info->addr_format = FI_ADDR_PSMX;
	psmx_info->src_addr = src_addr;
	psmx_info->src_addrlen = sizeof(*src_addr);
	psmx_info->dest_addr = dest_addr;
	psmx_info->dest_addrlen = sizeof(*dest_addr);
	psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME);
	psmx_info->fabric_attr->prov_name = NULL;
	psmx_info->fabric_attr->prov_version = PSMX_VERSION;

	psmx_info->tx_attr->caps = psmx_info->caps;
	psmx_info->tx_attr->mode = psmx_info->mode;
	psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags)
					? hints->tx_attr->op_flags : 0;
	psmx_info->tx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->tx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE;
	psmx_info->tx_attr->size = UINT64_MAX;
	psmx_info->tx_attr->iov_limit = 1;
	psmx_info->tx_attr->rma_iov_limit = 1;

	psmx_info->rx_attr->caps = psmx_info->caps;
	psmx_info->rx_attr->mode = psmx_info->mode;
	psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->rx_attr->op_flags)
					? hints->rx_attr->op_flags : 0;
	psmx_info->rx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->rx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */
	psmx_info->rx_attr->size = UINT64_MAX;
	psmx_info->rx_attr->iov_limit = 1;

	*info = psmx_info;
	return 0;

err_out:
	free(dest_addr);
	free(src_addr);

	return err;
}

static void psmx_fini(void)
{
	FI_INFO(&psmx_prov, FI_LOG_CORE, "\n");

	if (! --psmx_init_count && psmx_lib_initialized) {
		/* This function is called from a library destructor, which is called
		 * automatically when exit() is called. The call to psm_finalize()
		 * might cause deadlock if the applicaiton is terminated with Ctrl-C
		 * -- the application could be inside a PSM call, holding a lock that
		 * psm_finalize() tries to acquire. This can be avoided by only
		 * calling psm_finalize() when PSM is guaranteed to be unused.
		 */
		if (psmx_active_fabric) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"psmx_active_fabric != NULL, skip psm_finalize\n");
		} else {
			psm_finalize();
			psmx_lib_initialized = 0;
		}
	}
}
예제 #6
0
static int psmx_getinfo(uint32_t version, const char *node, const char *service,
			uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
	struct fi_info *psmx_info;
	uint32_t cnt = 0;
	psm_epid_t *dest_addr = NULL;
	struct psmx_src_name *src_addr;
	int ep_type = FI_EP_RDM;
	int av_type = FI_AV_UNSPEC;
	uint64_t mode = FI_CONTEXT;
	enum fi_mr_mode mr_mode = FI_MR_SCALABLE;
	enum fi_threading threading = FI_THREAD_COMPLETION;
	enum fi_progress control_progress = FI_PROGRESS_MANUAL;
	enum fi_progress data_progress = FI_PROGRESS_MANUAL;
	int caps = 0;
	uint64_t max_tag_value = 0;
	int err = -FI_ENODATA;
	int svc0, svc = PSMX_ANY_SERVICE;

	FI_INFO(&psmx_prov, FI_LOG_CORE,"\n");

	*info = NULL;

	if (psmx_init_lib())
		return -FI_ENODATA;

	if (psm_ep_num_devunits(&cnt) || !cnt) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"no PSM device is found.\n");
		return -FI_ENODATA;
	}

	psmx_init_env();

	src_addr = calloc(1, sizeof(*src_addr));
	if (!src_addr) {
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"failed to allocate src addr.\n");
		return -FI_ENODATA;
	}
	src_addr->unit = PSMX_DEFAULT_UNIT;
	src_addr->port = PSMX_DEFAULT_PORT;
	src_addr->service = PSMX_ANY_SERVICE;

	if (flags & FI_SOURCE) {
		if (node)
			sscanf(node, "%*[^:]:%d:%d", &src_addr->unit, &src_addr->port);
		if (service)
			sscanf(service, "%d", &src_addr->service);
		FI_INFO(&psmx_prov, FI_LOG_CORE,
			"node '%s' service '%s' converted to <unit=%d, port=%d, service=%d>\n",
			node, service, src_addr->unit, src_addr->port, src_addr->service);
	} else if (node) {
		if (service)
			svc = atoi(service);
		svc0 = svc;
		dest_addr = psmx_ns_resolve_name(node, &svc);
		if (dest_addr) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"'%s:%u' resolved to <epid=0x%llx>:%u\n", node, svc0,
				*dest_addr, svc);
		} else {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"failed to resolve '%s:%u'.\n", node, svc);
			err = -FI_ENODATA;
			goto err_out;
		}
	}

	if (hints) {
		switch (hints->addr_format) {
		case FI_FORMAT_UNSPEC:
		case FI_ADDR_PSMX:
			break;
		default:
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->addr_format=%d, supported=%d,%d.\n",
				hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX);
			goto err_out;
		}

		if (hints->ep_attr) {
			switch (hints->ep_attr->type) {
			case FI_EP_UNSPEC:
			case FI_EP_DGRAM:
			case FI_EP_RDM:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->type=%d, supported=%d,%d,%d.\n",
					hints->ep_attr->type, FI_EP_UNSPEC,
					FI_EP_DGRAM, FI_EP_RDM);
				goto err_out;
			}

			switch (hints->ep_attr->protocol) {
			case FI_PROTO_UNSPEC:
			case FI_PROTO_PSMX:
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->protocol=%d, supported=%d %d\n",
					hints->ep_attr->protocol,
					FI_PROTO_UNSPEC, FI_PROTO_PSMX);
				goto err_out;
			}

			if (hints->ep_attr->tx_ctx_cnt > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->tx_ctx_cnt=%d, supported=0,1\n",
					hints->ep_attr->tx_ctx_cnt);
				goto err_out;
			}

			if (hints->ep_attr->rx_ctx_cnt > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->rx_ctx_cnt=%d, supported=0,1\n",
					hints->ep_attr->rx_ctx_cnt);
				goto err_out;
			}
		}

		if ((hints->caps & PSMX_CAPS) != hints->caps &&
		    (hints->caps & PSMX_CAPS2) != hints->caps) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->caps=0x%llx, supported=0x%llx,0x%llx\n",
				hints->caps, PSMX_CAPS, PSMX_CAPS2);
			goto err_out;
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) !=
			    hints->tx_attr->op_flags) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx->flags=0x%llx, "
					"supported=0x%llx\n",
					hints->tx_attr->op_flags,
					PSMX_OP_FLAGS);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%ld,"
					"supported=%ld.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
		}

		if (hints->rx_attr &&
		    (hints->rx_attr->op_flags & PSMX_OP_FLAGS) !=
		     hints->rx_attr->op_flags) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->rx->flags=0x%llx, supported=0x%llx\n",
				hints->rx_attr->op_flags, PSMX_OP_FLAGS);
			goto err_out;
		}

		if ((hints->caps & FI_TAGGED) ||
		    ((hints->caps & FI_MSG) && !psmx_env.am_msg)) {
			if ((hints->mode & FI_CONTEXT) != FI_CONTEXT) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->mode=0x%llx, required=0x%llx\n",
					hints->mode, FI_CONTEXT);
				goto err_out;
			}
		} else {
			mode = 0;
		}

		if (hints->fabric_attr && hints->fabric_attr->name &&
		    strcmp(hints->fabric_attr->name, PSMX_FABRIC_NAME)) {
			FI_INFO(&psmx_prov, FI_LOG_CORE,
				"hints->fabric_name=%s, supported=psm\n",
				hints->fabric_attr->name);
			goto err_out;
		}

		if (hints->domain_attr) {
			if (hints->domain_attr->name &&
			    strcmp(hints->domain_attr->name, PSMX_DOMAIN_NAME)) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_name=%s, supported=psm\n",
					hints->domain_attr->name);
				goto err_out;
			}

			switch (hints->domain_attr->av_type) {
			case FI_AV_UNSPEC:
			case FI_AV_MAP:
			case FI_AV_TABLE:
				av_type = hints->domain_attr->av_type;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->av_type=%d, supported=%d %d %d\n",
					hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP,
					FI_AV_TABLE);
				goto err_out;
			}

			switch (hints->domain_attr->mr_mode) {
			case FI_MR_UNSPEC:
				break;
			case FI_MR_BASIC:
			case FI_MR_SCALABLE:
				mr_mode = hints->domain_attr->mr_mode;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->mr_mode=%d, supported=%d %d %d\n",
					hints->domain_attr->mr_mode, FI_MR_UNSPEC, FI_MR_BASIC,
					FI_MR_SCALABLE);
				goto err_out;
			}

			switch (hints->domain_attr->threading) {
			case FI_THREAD_UNSPEC:
				break;
			case FI_THREAD_FID:
			case FI_THREAD_ENDPOINT:
			case FI_THREAD_COMPLETION:
			case FI_THREAD_DOMAIN:
				threading = hints->domain_attr->threading;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->threading=%d, supported=%d %d %d %d %d\n",
					hints->domain_attr->threading, FI_THREAD_UNSPEC,
					FI_THREAD_FID, FI_THREAD_ENDPOINT, FI_THREAD_COMPLETION,
					FI_THREAD_DOMAIN);
				goto err_out;
			}

			switch (hints->domain_attr->control_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				control_progress = hints->domain_attr->control_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->control_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->control_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			switch (hints->domain_attr->data_progress) {
			case FI_PROGRESS_UNSPEC:
				break;
			case FI_PROGRESS_MANUAL:
			case FI_PROGRESS_AUTO:
				data_progress = hints->domain_attr->data_progress;
				break;
			default:
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->data_progress=%d, supported=%d %d %d\n",
					hints->domain_attr->data_progress, FI_PROGRESS_UNSPEC,
					FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO);
				goto err_out;
			}

			if (hints->domain_attr->caps & FI_SHARED_AV) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->domain_attr->caps=%lx, shared AV is unsupported\n",
					hints->domain_attr->caps);
				goto err_out;
			}
		}

		if (hints->ep_attr) {
			if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->ep_attr->max_msg_size=%ld,"
					"supported=%ld.\n",
					hints->ep_attr->max_msg_size,
					PSMX_MAX_MSG_SIZE);
				goto err_out;
			}
			max_tag_value = fi_tag_bits(hints->ep_attr->mem_tag_format);
		}

		if (hints->tx_attr) {
			if ((hints->tx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->tx_attr->msg_order) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->msg_order=%lx,"
					"supported=%lx.\n",
					hints->tx_attr->msg_order,
					PSMX_MSG_ORDER);
				goto err_out;
			}
			if ((hints->tx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->tx_attr->comp_order) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->msg_order=%lx,"
					"supported=%lx.\n",
					hints->tx_attr->comp_order,
					PSMX_COMP_ORDER);
				goto err_out;
			}
			if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->inject_size=%ld,"
					"supported=%d.\n",
					hints->tx_attr->inject_size,
					PSMX_INJECT_SIZE);
				goto err_out;
			}
			if (hints->tx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->iov_limit);
				goto err_out;
			}
			if (hints->tx_attr->rma_iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->tx_attr->rma_iov_limit=%ld,"
					"supported=1.\n",
					hints->tx_attr->rma_iov_limit);
				goto err_out;
			}
		}

		if (hints->rx_attr) {
			if ((hints->rx_attr->msg_order & PSMX_MSG_ORDER) !=
			    hints->rx_attr->msg_order) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->rx_attr->msg_order=%lx,"
					"supported=%lx.\n",
					hints->rx_attr->msg_order,
					PSMX_MSG_ORDER);
				goto err_out;
			}
			if ((hints->rx_attr->comp_order & PSMX_COMP_ORDER) !=
			    hints->rx_attr->comp_order) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->rx_attr->msg_order=%lx,"
					"supported=%lx.\n",
					hints->rx_attr->comp_order,
					PSMX_COMP_ORDER);
				goto err_out;
			}
			if (hints->rx_attr->iov_limit > 1) {
				FI_INFO(&psmx_prov, FI_LOG_CORE,
					"hints->rx_attr->iov_limit=%ld,"
					"supported=1.\n",
					hints->rx_attr->iov_limit);
				goto err_out;
			}
		}

		caps = hints->caps;

		/* TODO: check other fields of hints */
	}

	if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0)
		goto err_out;

	psmx_info = fi_allocinfo();
	if (!psmx_info) {
		err = -FI_ENOMEM;
		goto err_out;
	}

	psmx_info->ep_attr->type = ep_type;
	psmx_info->ep_attr->protocol = FI_PROTO_PSMX;
	psmx_info->ep_attr->protocol_version = PSM_VERNO;
	psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE;
	psmx_info->ep_attr->mem_tag_format = fi_tag_format(max_tag_value);
	psmx_info->ep_attr->tx_ctx_cnt = 1;
	psmx_info->ep_attr->rx_ctx_cnt = 1;

	psmx_info->domain_attr->threading = threading;
	psmx_info->domain_attr->control_progress = control_progress;
	psmx_info->domain_attr->data_progress = data_progress;
	psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME);
	psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED;
	psmx_info->domain_attr->av_type = av_type;
	psmx_info->domain_attr->mr_mode = mr_mode;
	psmx_info->domain_attr->mr_key_size = sizeof(uint64_t);
	psmx_info->domain_attr->cq_data_size = 4;
	psmx_info->domain_attr->cq_cnt = 65535;
	psmx_info->domain_attr->ep_cnt = 65535;
	psmx_info->domain_attr->tx_ctx_cnt = 1;
	psmx_info->domain_attr->rx_ctx_cnt = 1;
	psmx_info->domain_attr->max_ep_tx_ctx = 1;
	psmx_info->domain_attr->max_ep_rx_ctx = 1;
	psmx_info->domain_attr->max_ep_stx_ctx = 65535;
	psmx_info->domain_attr->max_ep_srx_ctx = 0;
	psmx_info->domain_attr->cntr_cnt = 65535;
	psmx_info->domain_attr->mr_iov_limit = 65535;
	psmx_info->domain_attr->caps = PSMX_DOM_CAPS;
	psmx_info->domain_attr->mode = 0;

	psmx_info->next = NULL;
	psmx_info->caps = (hints && hints->caps) ? hints->caps : caps;
	psmx_info->mode = mode;
	psmx_info->addr_format = FI_ADDR_PSMX;
	psmx_info->src_addr = src_addr;
	psmx_info->src_addrlen = sizeof(*src_addr);
	psmx_info->dest_addr = dest_addr;
	psmx_info->dest_addrlen = sizeof(*dest_addr);
	psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME);
	psmx_info->fabric_attr->prov_name = NULL;
	psmx_info->fabric_attr->prov_version = PSMX_VERSION;

	psmx_info->tx_attr->caps = psmx_info->caps;
	psmx_info->tx_attr->mode = psmx_info->mode;
	psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags)
					? hints->tx_attr->op_flags : 0;
	psmx_info->tx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->tx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE;
	psmx_info->tx_attr->size = UINT64_MAX;
	psmx_info->tx_attr->iov_limit = 1;
	psmx_info->tx_attr->rma_iov_limit = 1;

	psmx_info->rx_attr->caps = psmx_info->caps;
	psmx_info->rx_attr->mode = psmx_info->mode;
	psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->rx_attr->op_flags)
					? hints->rx_attr->op_flags : 0;
	psmx_info->rx_attr->msg_order = PSMX_MSG_ORDER;
	psmx_info->rx_attr->comp_order = PSMX_COMP_ORDER;
	psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */
	psmx_info->rx_attr->size = UINT64_MAX;
	psmx_info->rx_attr->iov_limit = 1;

	*info = psmx_info;
	return 0;

err_out:
	free(dest_addr);
	free(src_addr);

	return err;
}