Esempio n. 1
0
psm_error_t
__psm_ep_epid_share_memory(psm_ep_t ep, psm_epid_t epid, int *result_o)
{
	uint32_t num_lids = 0;
	uint16_t *lids = NULL;
	int i;
	uint16_t epid_lid;
	int result = 0;
	psm_error_t err;

	psmi_assert_always(ep != NULL);
	PSMI_ERR_UNLESS_INITIALIZED(ep);

	epid_lid = (uint16_t) psm_epid_nid(epid);
	/* If we're in non-hfi mode, done bother listing lids */
	if (!psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) {
		uint64_t mylid = (uint16_t) psm_epid_nid(ep->epid);
		if (mylid == epid_lid)
			result = 1;
	} else {
		err = psmi_ep_devlids(&lids, &num_lids, ep->gid_hi, ep->gid_lo);
		if (err)
			return err;
		for (i = 0; i < num_lids; i++) {
			if (epid_lid == lids[i]) {
				result = 1;
				break;
			}
		}
	}
	*result_o = result;
	return PSM_OK;
}
Esempio n. 2
0
psm_error_t __psm_ep_epid_lookup(psm_epid_t epid, psm_epconn_t *epconn)
{
	psm_error_t err = PSM_OK;
	psm_epaddr_t epaddr;
	psm_ep_t ep;

	PSMI_ERR_UNLESS_INITIALIZED(NULL);

	/* Need to have an opened endpoint before we can resolve epids */
	if (psmi_opened_endpoint == NULL) {
		err = psmi_handle_error(NULL, PSM_EP_WAS_CLOSED,
					"PSM Endpoint is closed or does not exist");
		return err;
	}

	ep = psmi_opened_endpoint;
	while (ep) {
		epaddr = psmi_epid_lookup(ep, epid);
		if (!epaddr) {
			ep = ep->user_ep_next;
			continue;
		}

		/* Found connection for epid. Return info about endpoint to caller. */
		psmi_assert_always(epaddr->ptlctl->ep == ep);
		epconn->addr = epaddr;
		epconn->ep = ep;
		epconn->mq = ep->mq;
		return err;
	}

	err = psmi_handle_error(NULL, PSM_EPID_UNKNOWN,
				"Endpoint connection status unknown");
	return err;
}
Esempio n. 3
0
psm_error_t __psm_ep_num_devunits(uint32_t *num_units_o)
{
	static int num_units = -1;

	PSMI_ERR_UNLESS_INITIALIZED(NULL);

	if (num_units == -1) {
		num_units = hfi_get_num_units();
		if (num_units == -1)
			num_units = 0;
	}

	*num_units_o = (uint32_t) num_units;
	return PSM_OK;
}
Esempio n. 4
0
psm_error_t __psm_ep_open_opts_get_defaults(struct psm_ep_open_opts *opts)
{
	PSMI_ERR_UNLESS_INITIALIZED(NULL);

	/* Set in order in the structure. */
	opts->timeout = 30000000000LL;	/* 30 sec */
	opts->unit = HFI_UNIT_ID_ANY;
	opts->affinity = PSM_EP_OPEN_AFFINITY_SET;
	opts->shm_mbytes = 0;	/* deprecated in psm2.h */
	opts->sendbufs_num = 1024;
	opts->network_pkey = HFI_DEFAULT_P_KEY;
	opts->port = HFI_PORT_NUM_ANY;
	opts->outsl = PSMI_SL_DEFAULT;
	opts->service_id = HFI_DEFAULT_SERVICE_ID;
	opts->path_res_type = PSM_PATH_RES_NONE;
	opts->senddesc_num = 4096;
	opts->imm_size = 128;

	return PSM_OK;
}
Esempio n. 5
0
psm_error_t __psm_ep_query(int *num_of_epinfo, psm_epinfo_t *array_of_epinfo)
{
	psm_error_t err = PSM_OK;
	int i;
	psm_ep_t ep;

	PSMI_ERR_UNLESS_INITIALIZED(NULL);

	if (*num_of_epinfo <= 0) {
		err = psmi_handle_error(NULL, PSM_PARAM_ERR,
					"Invalid psm_ep_query parameters");
		return err;
	}

	if (psmi_opened_endpoint == NULL) {
		err = psmi_handle_error(NULL, PSM_EP_WAS_CLOSED,
					"PSM Endpoint is closed or does not exist");
		return err;
	}

	ep = psmi_opened_endpoint;
	for (i = 0; i < *num_of_epinfo; i++) {
		if (ep == NULL)
			break;
		array_of_epinfo[i].ep = ep;
		array_of_epinfo[i].epid = ep->epid;
		array_of_epinfo[i].jkey = ep->jkey;
		memcpy(array_of_epinfo[i].uuid,
		       (void *)ep->uuid, sizeof(psm_uuid_t));
		psmi_uuid_unparse(ep->uuid, array_of_epinfo[i].uuid_str);
		ep = ep->user_ep_next;
	}
	*num_of_epinfo = i;

	return err;
}
Esempio n. 6
0
psm2_error_t
__psm2_ep_connect(psm2_ep_t ep, int num_of_epid, psm2_epid_t const *array_of_epid,
		 int const *array_of_epid_mask,	/* can be NULL */
		 psm2_error_t *array_of_errors, psm2_epaddr_t *array_of_epaddr,
		 int64_t timeout)
{
	psm2_error_t err = PSM2_OK;
	ptl_ctl_t *ptlctl;
	ptl_t *ptl;
	int i, j, dup_idx;
	int num_toconnect = 0;
	int *epid_mask = NULL;
	int *epid_mask_isdupof = NULL;
	char *device;
	uint64_t t_start = get_cycles();
	uint64_t t_left;
	union psmi_envvar_val timeout_intval;

	PSM2_LOG_MSG("entering");
	PSMI_ERR_UNLESS_INITIALIZED(ep);

	PSMI_PLOCK();

	/*
	 * Normally we would lock here, but instead each implemented ptl component
	 * does its own locking.  This is mostly because the ptl components are
	 * ahead of the PSM interface in that they can disconnect their peers.
	 */
	if (ep == NULL || array_of_epaddr == NULL || array_of_epid == NULL ||
	    num_of_epid < 1) {
		err = psmi_handle_error(ep, PSM2_PARAM_ERR,
					"Invalid psm2_ep_connect parameters");
		goto fail;
	}

	/* We need two of these masks to detect duplicates */
	err = PSM2_NO_MEMORY;
	epid_mask =
	    (int *)psmi_malloc(ep, UNDEFINED, sizeof(int) * num_of_epid);
	if (epid_mask == NULL)
		goto fail;
	epid_mask_isdupof =
	    (int *)psmi_malloc(ep, UNDEFINED, sizeof(int) * num_of_epid);
	if (epid_mask_isdupof == NULL)
		goto fail;
	err = PSM2_OK;

	/* Eventually handle timeouts across all connects. */
	for (j = 0; j < num_of_epid; j++) {
		if (array_of_epid_mask != NULL && !array_of_epid_mask[j])
			epid_mask[j] = 0;
		else {
			epid_mask[j] = 1;
			array_of_errors[j] = PSM2_EPID_UNKNOWN;
			array_of_epaddr[j] = NULL;
			num_toconnect++;
		}
		epid_mask_isdupof[j] = -1;
	}

	psmi_getenv("PSM2_CONNECT_TIMEOUT",
		    "End-point connection timeout over-ride. 0 for no time-out.",
		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT,
		    (union psmi_envvar_val)0, &timeout_intval);

	if (getenv("PSM2_CONNECT_TIMEOUT")) {
		timeout = timeout_intval.e_uint * SEC_ULL;
	} else if (timeout > 0) {
		/* The timeout parameter provides the minimum timeout. A heuristic
		 * is used to scale up the timeout linearly with the number of
		 * endpoints, and we allow one second per 100 endpoints. */
		timeout = max(timeout, (num_toconnect * SEC_ULL) / 100);
	}

	if (timeout > 0 && timeout < PSMI_MIN_EP_CONNECT_TIMEOUT)
		timeout = PSMI_MIN_EP_CONNECT_TIMEOUT;
	_HFI_PRDBG("Connect to %d endpoints with time-out of %.2f secs\n",
		   num_toconnect, (double)timeout / 1e9);

	/* Look for duplicates in input array */
	for (i = 0; i < num_of_epid; i++) {
		for (j = i + 1; j < num_of_epid; j++) {
			if (array_of_epid[i] == array_of_epid[j] &&
			    epid_mask[i] && epid_mask[j]) {
				epid_mask[j] = 0;	/* don't connect more than once */
				epid_mask_isdupof[j] = i;
			}
		}
	}

	for (i = 0; i < PTL_MAX_INIT; i++) {
		if (ep->devid_enabled[i] == -1)
			continue;
		/* Set up the right connect ptrs */
		switch (ep->devid_enabled[i]) {
		case PTL_DEVID_IPS:
			ptlctl = &ep->ptl_ips;
			ptl = ep->ptl_ips.ptl;
			device = "ips";
			break;
		case PTL_DEVID_AMSH:
			ptlctl = &ep->ptl_amsh;
			ptl = ep->ptl_amsh.ptl;
			device = "amsh";
			break;
		case PTL_DEVID_SELF:
			ptlctl = &ep->ptl_self;
			ptl = ep->ptl_self.ptl;
			device = "self";
			break;
		default:
			device = "unknown";
			ptlctl = &ep->ptl_ips;	/*no-unused */
			ptl = ep->ptl_ips.ptl;	/*no-unused */
			device = "ips";	/*no-unused */
			psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
					  "Unknown/unhandled PTL id %d\n",
					  ep->devid_enabled[i]);
			break;
		}
		t_left = psmi_cycles_left(t_start, timeout);

		_HFI_VDBG("Trying to connect with device %s\n", device);
		if ((err = ptlctl->ep_connect(ptl, num_of_epid, array_of_epid,
					      epid_mask, array_of_errors,
					      array_of_epaddr,
					      cycles_to_nanosecs(t_left)))) {
			_HFI_PRDBG("Connect failure in device %s err=%d\n",
				   device, err);
			goto connect_fail;
		}

		/* Now process what's been connected */
		for (j = 0; j < num_of_epid; j++) {
			dup_idx = epid_mask_isdupof[j];
			if (!epid_mask[j] && dup_idx == -1)
				continue;

			if (dup_idx != -1) {	/* dup */
				array_of_epaddr[j] = array_of_epaddr[dup_idx];
				array_of_errors[j] = array_of_errors[dup_idx];
				epid_mask_isdupof[j] = -1;
			}

			if (array_of_errors[j] == PSM2_OK) {
				epid_mask[j] = 0;	/* don't try on next ptl */
				ep->connections++;
			}
		}
	}

	for (i = 0; i < num_of_epid; i++) {
		ptl_ctl_t *c = NULL;
		if (array_of_epid_mask != NULL && !array_of_epid_mask[i])
			continue;
		/* If we see unreachable here, that means some PTLs were not enabled */
		if (array_of_errors[i] == PSM2_EPID_UNREACHABLE) {
			err = PSM2_EPID_UNREACHABLE;
			break;
		}

		psmi_assert_always(array_of_epaddr[i] != NULL);
		c = array_of_epaddr[i]->ptlctl;
		psmi_assert_always(c != NULL);
		_HFI_VDBG("%-20s DEVICE %s (%p)\n",
			  psmi_epaddr_get_name(array_of_epid[i]),
			  c == &ep->ptl_ips ? "hfi" :
			  (c == &ep->ptl_amsh ? "amsh" : "self"),
			  (void *)array_of_epaddr[i]->ptlctl->ptl);
	}

connect_fail:
	/* If the error is a timeout (at worse) and the client is OPA MPI,
	 * just return timeout to let OPA MPI handle the hostnames that
	 * timed out */
	if (err != PSM2_OK) {
		char errbuf[PSM2_ERRSTRING_MAXLEN];
		size_t len;
		int j = 0;

		if (err == PSM2_EPID_UNREACHABLE) {
			char *deverr = "of an incorrect setting";
			char *eperr = " ";
			char *devname = NULL;
			if (!psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH)) {
				deverr =
				    "there is no shared memory PSM device (shm)";
				eperr = " shared memory ";
			} else
			    if (!psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) {
				deverr =
				    "there is no OPA PSM device (hfi)";
				eperr = " OPA ";
			}

			len = snprintf(errbuf, sizeof(errbuf) - 1,
				       "Some%sendpoints could not be connected because %s "
				       "in the currently enabled PSM_DEVICES (",
				       eperr, deverr);
			for (i = 0; i < PTL_MAX_INIT && len < sizeof(errbuf) - 1;
			     i++) {
				switch (ep->devid_enabled[i]) {
				case PTL_DEVID_IPS:
					devname = "hfi";
					break;
				case PTL_DEVID_AMSH:
					devname = "shm";
					break;
				case PTL_DEVID_SELF:
				default:
					devname = "self";
					break;
				}
				len +=
				    snprintf(errbuf + len,
					     sizeof(errbuf) - len - 1, "%s,",
					     devname);
			}
			if (len < sizeof(errbuf) - 1 && devname != NULL)
				/* parsed something, remove trailing comma */
				errbuf[len - 1] = ')';
		} else
			len = snprintf(errbuf, sizeof(errbuf) - 1,
				       "%s", err == PSM2_TIMEOUT ?
				       "Dectected connection timeout" :
				       psm2_error_get_string(err));

		/* first pass, look for all nodes with the error */
		for (i = 0; i < num_of_epid && len < sizeof(errbuf) - 1; i++) {
			if (array_of_epid_mask != NULL
			    && !array_of_epid_mask[i])
				continue;
			if (array_of_errors[i] == PSM2_OK)
				continue;
			if (array_of_errors[i] == PSM2_EPID_UNREACHABLE &&
			    err != PSM2_EPID_UNREACHABLE)
				continue;
			if (err == array_of_errors[i]) {
				len +=
				    snprintf(errbuf + len,
					     sizeof(errbuf) - len - 1, "%c %s",
					     j == 0 ? ':' : ',',
					     psmi_epaddr_get_hostname
					     (array_of_epid[i]));
				j++;
			}
		}
		errbuf[sizeof(errbuf) - 1] = '\0';
		err = psmi_handle_error(ep, err, errbuf);
	}

fail:
	PSMI_PUNLOCK();

	if (epid_mask != NULL)
		psmi_free(epid_mask);
	if (epid_mask_isdupof != NULL)
		psmi_free(epid_mask_isdupof);

	PSM2_LOG_MSG("leaving");
	return err;
}
Esempio n. 7
0
psm_error_t __psm_ep_close(psm_ep_t ep, int mode, int64_t timeout_in)
{
	psm_error_t err = PSM_OK;
	uint64_t t_start = get_cycles();
	union psmi_envvar_val timeout_intval;
	psm_ep_t tmp, mep;

	PSMI_ERR_UNLESS_INITIALIZED(ep);
	psmi_assert_always(ep->mctxt_master == ep);

	PSMI_PLOCK();

	if (psmi_opened_endpoint == NULL) {
		err = psmi_handle_error(NULL, PSM_EP_WAS_CLOSED,
					"PSM Endpoint is closed or does not exist");
		return err;
	}

	tmp = psmi_opened_endpoint;
	while (tmp && tmp != ep) {
		tmp = tmp->user_ep_next;
	}
	if (!tmp) {
		err = psmi_handle_error(NULL, PSM_EP_WAS_CLOSED,
					"PSM Endpoint is closed or does not exist");
		return err;
	}

	psmi_getenv("PSM_CLOSE_TIMEOUT",
		    "End-point close timeout over-ride.",
		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT,
		    (union psmi_envvar_val)0, &timeout_intval);

	if (getenv("PSM_CLOSE_TIMEOUT")) {
		timeout_in = timeout_intval.e_uint * SEC_ULL;
	} else if (timeout_in > 0) {
		/* The timeout parameter provides the minimum timeout. A heuristic
		 * is used to scale up the timeout linearly with the number of
		 * endpoints, and we allow one second per 100 endpoints. */
		timeout_in = max(timeout_in, (ep->connections * SEC_ULL) / 100);
	}

	if (timeout_in > 0 && timeout_in < PSMI_MIN_EP_CLOSE_TIMEOUT)
		timeout_in = PSMI_MIN_EP_CLOSE_TIMEOUT;

	/* Infinite and excessive close time-out are limited here to a max.
	 * The "rationale" is that there is no point waiting around forever for
	 * graceful termination. Normal (or forced) process termination should clean
	 * up the context state correctly even if termination is not graceful. */
	if (timeout_in <= 0 || timeout_in < PSMI_MAX_EP_CLOSE_TIMEOUT)
		timeout_in = PSMI_MAX_EP_CLOSE_TIMEOUT;
	_HFI_PRDBG("Closing endpoint %p with force=%s and to=%.2f seconds and "
		   "%d connections\n",
		   ep, mode == PSM_EP_CLOSE_FORCE ? "YES" : "NO",
		   (double)timeout_in / 1e9, (int)ep->connections);

	/* XXX We currently cheat in the sense that we leave each PTL the allowed
	 * timeout.  There's no good way to do this until we change the PTL
	 * interface to allow asynchronous finalization
	 */
	mep = ep;
	tmp = ep->mctxt_prev;
	do {
		ep = tmp;
		tmp = ep->mctxt_prev;
		PSM_MCTXT_REMOVE(ep);
		if (psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH))
			err =
			    psmi_ptl_amsh.fini(ep->ptl_amsh.ptl, mode,
					       timeout_in);

		if ((err == PSM_OK || err == PSM_TIMEOUT) &&
		    psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS))
			err =
			    psmi_ptl_ips.fini(ep->ptl_ips.ptl, mode,
					      timeout_in);

		/* If there's timeouts in the disconnect requests,
		 * still make sure that we still get to close the
		 *endpoint and mark it closed */
		if (psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS))
			psmi_context_close(&ep->context);

		psmi_free(ep->epaddr);
		psmi_free(ep->context_mylabel);
		/*
		 * Before freeing the master ep itself,
		 * remove it from the global linklist.
		 * We do it here to let atexit handler in ptl_am directory
		 * to search the global linklist and free the shared memory file.
		 */
		if (ep == mep) {
			if (psmi_opened_endpoint == ep) {
				psmi_opened_endpoint = ep->user_ep_next;
			} else {
				tmp = psmi_opened_endpoint;
				while (tmp->user_ep_next != ep) {
					tmp = tmp->user_ep_next;
				}
				tmp->user_ep_next = ep->user_ep_next;
			}
			psmi_opened_endpoint_count--;
		}
		psmi_free(ep);

	} while ((err == PSM_OK || err == PSM_TIMEOUT) && tmp != ep);

	PSMI_PUNLOCK();

	_HFI_PRDBG("Closed endpoint in %.3f secs\n",
		   (double)cycles_to_nanosecs(get_cycles() -
					      t_start) / SEC_ULL);
	return err;
}
Esempio n. 8
0
psm_error_t
__psm_ep_open(psm_uuid_t const unique_job_key,
	      struct psm_ep_open_opts const *opts_i, psm_ep_t *epo,
	      psm_epid_t *epido)
{
	psm_error_t err;
	psm_mq_t mq;
	psm_epid_t epid;
	psm_ep_t ep, tmp;
	uint32_t units[HFI_MAX_RAILS];
	uint16_t ports[HFI_MAX_RAILS];
	int i, num_rails = 0;
	char *uname = "HFI_UNIT";
	char *pname = "HFI_PORT";
	char uvalue[4], pvalue[4];
	int devid_enabled[PTL_MAX_INIT];
	union psmi_envvar_val devs;

	PSMI_ERR_UNLESS_INITIALIZED(NULL);

	/* Currently only one endpoint is supported. */
	if (psmi_opened_endpoint_count > 0)
		return PSM_TOO_MANY_ENDPOINTS;

	PSMI_PLOCK();

	/* Matched Queue initialization.  We do this early because we have to
	 * make sure ep->mq exists and is valid before calling ips_do_work.
	 */
	err = psmi_mq_malloc(&mq);
	if (err != PSM_OK)
		goto fail;

	/* See which ptl devices we want to use for this ep to be opened */
	psmi_getenv("PSM_DEVICES",
		    "Ordered list of PSM-level devices",
		    PSMI_ENVVAR_LEVEL_USER,
		    PSMI_ENVVAR_TYPE_STR,
		    (union psmi_envvar_val)PSMI_DEVICES_DEFAULT, &devs);

	if ((err = psmi_parse_devices(devid_enabled, devs.e_str)))
		goto fail;

	if (psmi_device_is_enabled(devid_enabled, PTL_DEVID_IPS)) {
		err = psmi_ep_multirail(&num_rails, units, ports);
		if (err != PSM_OK)
			goto fail;

		/* If multi-rail is used, set the first ep unit/port */
		if (num_rails > 0) {
			snprintf(uvalue, 4, "%1d", units[0]);
			snprintf(pvalue, 4, "%1d", ports[0]);
			setenv(uname, uvalue, 1);
			setenv(pname, pvalue, 1);
		}
	}

	err = __psm_ep_open_internal(unique_job_key,
				     devid_enabled, opts_i, mq, &ep, &epid);
	if (err != PSM_OK)
		goto fail;

	if (psmi_opened_endpoint == NULL) {
		psmi_opened_endpoint = ep;
	} else {
		tmp = psmi_opened_endpoint;
		while (tmp->user_ep_next)
			tmp = tmp->user_ep_next;
		tmp->user_ep_next = ep;
	}
	psmi_opened_endpoint_count++;
	ep->mctxt_prev = ep->mctxt_next = ep;
	ep->mctxt_master = ep;
	mq->ep = ep;

	/* Active Message initialization */
	err = psmi_am_init_internal(ep);
	if (err != PSM_OK)
		goto fail;

	*epo = ep;
	*epido = epid;

	if (psmi_device_is_enabled(devid_enabled, PTL_DEVID_IPS)) {
		for (i = 1; i < num_rails; i++) {
			snprintf(uvalue, 4, "%1d", units[i]);
			snprintf(pvalue, 4, "%1d", ports[i]);
			setenv(uname, uvalue, 1);
			setenv(pname, pvalue, 1);

			/* Create slave EP */
			err = __psm_ep_open_internal(unique_job_key,
						     devid_enabled, opts_i, mq,
						     &tmp, &epid);
			if (err)
				goto fail;

			/* Point back to shared resources on the master EP */
			tmp->am_htable = ep->am_htable;

			/* Link slave EP after master EP. */
			PSM_MCTXT_APPEND(ep, tmp);
		}
	}

	/* Once we've initialized all devices, we can update the MQ with its
	 * default values */
	if (err == PSM_OK)
		err = psmi_mq_initialize_defaults(mq);

	_HFI_VDBG("psm_ep_open() OK....\n");

fail:
	PSMI_PUNLOCK();
	return err;
}
Esempio n. 9
0
static psm_error_t
psmi_ep_devlids(uint16_t **lids, uint32_t *num_lids_o,
		uint64_t my_gid_hi, uint64_t my_gid_lo)
{
	static uint16_t *hfi_lids;
	static uint32_t nlids;
	uint32_t num_units;
	int i;
	psm_error_t err = PSM_OK;

	PSMI_ERR_UNLESS_INITIALIZED(NULL);

	if (hfi_lids == NULL) {
		if ((err = psm_ep_num_devunits(&num_units)))
			goto fail;
		hfi_lids = (uint16_t *)
		    psmi_calloc(PSMI_EP_NONE, UNDEFINED,
				num_units * HFI_MAX_PORT, sizeof(uint16_t));
		if (hfi_lids == NULL) {
			err = psmi_handle_error(NULL, PSM_NO_MEMORY,
						"Couldn't allocate memory for dev_lids structure");
			goto fail;
		}

		for (i = 0; i < num_units; i++) {
			int j;
			for (j = 1; j <= HFI_MAX_PORT; j++) {
				int lid = hfi_get_port_lid(i, j);
				int ret;
				uint64_t gid_hi = 0, gid_lo = 0;

				if (lid == -1)
					continue;
				ret = hfi_get_port_gid(i, j, &gid_hi, &gid_lo);
				if (ret == -1)
					continue;
				else if (my_gid_hi != gid_hi) {
					_HFI_VDBG("LID %d, unit %d, port %d, "
						  "mismatched GID %llx:%llx and "
						  "%llx:%llx\n",
						  lid, i, j,
						  (unsigned long long)gid_hi,
						  (unsigned long long)gid_lo,
						  (unsigned long long)my_gid_hi,
						  (unsigned long long)
						  my_gid_lo);
					continue;
				}
				_HFI_VDBG("LID %d, unit %d, port %d, "
					  "matching GID %llx:%llx and "
					  "%llx:%llx\n", lid, i, j,
					  (unsigned long long)gid_hi,
					  (unsigned long long)gid_lo,
					  (unsigned long long)my_gid_hi,
					  (unsigned long long)my_gid_lo);

				hfi_lids[nlids++] = (uint16_t) lid;
			}
		}
		if (nlids == 0) {
			err = psmi_handle_error(NULL, PSM_EP_DEVICE_FAILURE,
						"Couldn't get lid&gid from any unit/port");
			goto fail;
		}
	}
	*lids = hfi_lids;
	*num_lids_o = nlids;

fail:
	return err;
}