Ejemplo n.º 1
0
/*
 * Add ipsaddr with epid to the epstate table, return new index to caller in
 * 'connidx'.
 */
psm2_error_t
ips_epstate_add(struct ips_epstate *eps, struct ips_epaddr *ipsaddr,
		ips_epstate_idx *connidx_o)
{
	int i, j;
	ips_epstate_idx connidx;

	if (++eps->eps_tabsizeused > eps->eps_tabsize) {	/* realloc */
		struct ips_epstate_entry *newtab;
		eps->eps_tabsize += PTL_EPADDR_ALLOC_CHUNK;
		newtab = (struct ips_epstate_entry *)
		    psmi_calloc(eps->context->ep, PER_PEER_ENDPOINT,
				eps->eps_tabsize,
				sizeof(struct ips_epstate_entry));
		if (newtab == NULL)
			return PSM2_NO_MEMORY;
		else if (eps->eps_tab) {	/* NOT first alloc */
			for (i = 0;
			     i < eps->eps_tabsize - PTL_EPADDR_ALLOC_CHUNK; i++)
				newtab[i] = eps->eps_tab[i];	/* deep copy */
			psmi_free(eps->eps_tab);
		}
		eps->eps_tab = newtab;
	}
	/* Find the next free hole.  We can afford to do this since connect is not
	 * in the critical path */
	for (i = 0, j = eps->eps_tab_nextidx; i < eps->eps_tabsize; i++, j++) {
		if (j == eps->eps_tabsize)
			j = 0;
		if (eps->eps_tab[j].ipsaddr == NULL) {
			eps->eps_tab_nextidx = j + 1;
			if (eps->eps_tab_nextidx == eps->eps_tabsize)
				eps->eps_tab_nextidx = 0;
			break;
		}
	}
	psmi_assert_always(i != eps->eps_tabsize);
	connidx = (j - eps->eps_base_idx) & (IPS_EPSTATE_CONNIDX_MAX-1);
	_HFI_VDBG("node %s gets connidx=%d (table idx %d)\n",
		  psmi_epaddr_get_name(((psm2_epaddr_t) ipsaddr)->epid), connidx,
		  j);
	eps->eps_tab[j].ipsaddr = ipsaddr;
	if (j >= IPS_EPSTATE_CONNIDX_MAX) {
		return psmi_handle_error(eps->context->ep,
					 PSM2_TOO_MANY_ENDPOINTS,
					 "Can't connect to more than %d non-local endpoints",
					 IPS_EPSTATE_CONNIDX_MAX);
	}
	*connidx_o = connidx;
	return PSM2_OK;
}
Ejemplo n.º 2
0
psm_error_t
ips_tid_init(const psmi_context_t *context,
	     struct ips_tid *tidc, ips_tid_avail_cb_fn_t cb, void *cb_context)
{
	const struct hfi1_ctxt_info *ctxt_info = &context->ctrl->ctxt_info;

	struct psmi_stats_entry entries[] = {
		PSMI_STATS_DECL("tid update count", MPSPAWN_STATS_REDUCTION_ALL,
				NULL, &tidc->tid_num_total),
	};

	tidc->context = context;
	/* these are in group unit, a group is 8 tids or 4 tidpairs */
	tidc->tid_num_total = 0;
	tidc->tid_num_inuse = 0;
	tidc->tid_avail_cb = cb;
	tidc->tid_avail_context = cb_context;

	tidc->tid_ctrl = (struct ips_tid_ctrl *)context->tid_ctrl;
	if (!tidc->tid_ctrl) {
		tidc->tid_ctrl = (struct ips_tid_ctrl *)
		    psmi_calloc(context->ep, UNDEFINED, 1,
				sizeof(struct ips_tid_ctrl));
		if (tidc->tid_ctrl == NULL) {
			return PSM_NO_MEMORY;
		}
	}

	/*
	 * Only the master process can initialize.
	 */
	if (ctxt_info->subctxt == 0) {
		pthread_spin_init(&tidc->tid_ctrl->tid_ctrl_lock,
					PTHREAD_PROCESS_SHARED);

		/* check if exp tids are multiple of 8 (a group) */
		if (context->ctrl->__hfi_tidexpcnt % 8)
			return psmi_handle_error(context->ep,
			      PSM_INTERNAL_ERR,
			      "Expected tids(%d) are not multi-groups(8)",
			      context->ctrl->__hfi_tidexpcnt);

		tidc->tid_ctrl->tid_num_max =
		    context->ctrl->__hfi_tidexpcnt >> 3;
		tidc->tid_ctrl->tid_num_avail = tidc->tid_ctrl->tid_num_max;
	}
Ejemplo n.º 3
0
psm2_error_t
ips_tid_init(const psmi_context_t *context, struct ips_protoexp *protoexp,
	     ips_tid_avail_cb_fn_t cb, void *cb_context)
{
	const struct hfi1_user_info_dep *user_info = &context->user_info;
	const struct hfi1_base_info *base_info     = &context->ctrl->base_info;
	const struct hfi1_ctxt_info *ctxt_info     = &context->ctrl->ctxt_info;
	struct ips_tid *tidc = &protoexp->tidc;

	struct psmi_stats_entry entries[] = {
		PSMI_STATS_DECL("tid update count", MPSPAWN_STATS_REDUCTION_ALL,
				NULL, &tidc->tid_num_total),
	};

	tidc->context = context;
	tidc->protoexp = protoexp;
	tidc->tid_num_total = 0;
	tidc->tid_num_inuse = 0;
	tidc->tid_avail_cb = cb;
	tidc->tid_avail_context = cb_context;
	tidc->tid_array = NULL;
	tidc->invalidation_event = (uint64_t *)
		(ptrdiff_t) base_info->events_bufbase;

	/*
	 * PSM uses tid registration caching only if driver has enabled it.
	 */
	if (!(tidc->context->runtime_flags & HFI1_CAP_TID_UNMAP)) {
		int i;
		cl_qmap_t *p_map;
		cl_map_item_t *root,*nil_item;

		tidc->tid_array = (uint32_t *)
			psmi_calloc(context->ep, UNDEFINED,
				context->ctrl->__hfi_tidexpcnt,
				sizeof(uint32_t));
		if (tidc->tid_array == NULL)
			return PSM2_NO_MEMORY;

		/*
		 * first is root node, last is terminator node.
		 */
		p_map = &tidc->tid_cachemap;
		root = (cl_map_item_t *)
			psmi_calloc(context->ep, UNDEFINED,
				    context->ctrl->__hfi_tidexpcnt + 2,
				    sizeof(cl_map_item_t));

		if (root == NULL)
			return PSM2_NO_MEMORY;

		nil_item = &root
			[context->ctrl->__hfi_tidexpcnt + 1];

		ips_tidcache_map_init(p_map,root,nil_item);

		NTID = 0;
		NIDLE = 0;
		IPREV(IHEAD) = INEXT(IHEAD) = IHEAD;
		for (i = 1; i <= context->ctrl->__hfi_tidexpcnt; i++) {
			INVALIDATE(i) = 1;
		}

		/*
		 * if not shared context, all tids are used by the same
		 * process. Otherwise, subcontext process can only cache
		 * its own portion. Driver makes the same tid number
		 * assignment to subcontext processes.
		 */
		tidc->tid_cachesize = context->ctrl->__hfi_tidexpcnt;
		if (user_info->subctxt_cnt > 0) {
			uint16_t remainder = tidc->tid_cachesize %
					user_info->subctxt_cnt;
			tidc->tid_cachesize /= user_info->subctxt_cnt;
			if (ctxt_info->subctxt < remainder)
				tidc->tid_cachesize++;
		}
	}

	/*
	 * Setup shared control structure.
	 */
	tidc->tid_ctrl = (struct ips_tid_ctrl *)context->tid_ctrl;
	if (!tidc->tid_ctrl) {
		tidc->tid_ctrl = (struct ips_tid_ctrl *)
		    psmi_calloc(context->ep, UNDEFINED, 1,
				sizeof(struct ips_tid_ctrl));
		if (tidc->tid_ctrl == NULL) {
			return PSM2_NO_MEMORY;
		}
	}

	/*
	 * Only the master process can initialize.
	 */
	if (ctxt_info->subctxt == 0) {
		pthread_spin_init(&tidc->tid_ctrl->tid_ctrl_lock,
					PTHREAD_PROCESS_SHARED);

		tidc->tid_ctrl->tid_num_max =
			    context->ctrl->__hfi_tidexpcnt;
		tidc->tid_ctrl->tid_num_avail = tidc->tid_ctrl->tid_num_max;
	}

	return psmi_stats_register_type(PSMI_STATS_NO_HEADING,
					PSMI_STATSTYPE_TIDS,
					entries,
					PSMI_STATS_HOWMANY(entries), tidc);
}
Ejemplo n.º 4
0
psm_error_t
__psm_ep_open_internal(psm_uuid_t const unique_job_key, int *devid_enabled,
		       struct psm_ep_open_opts const *opts_i, psm_mq_t mq,
		       psm_ep_t *epo, psm_epid_t *epido)
{
	psm_ep_t ep = NULL;
	uint32_t num_units;
	size_t len;
	psm_error_t err;
	psm_epaddr_t epaddr = NULL;
	char buf[128], *p, *e;
	union psmi_envvar_val envvar_val;
	size_t ptl_sizes;
	struct psm_ep_open_opts opts;
	ptl_t *amsh_ptl, *ips_ptl, *self_ptl;
	int i;

	/* First get the set of default options, we overwrite with the user's
	 * desired values afterwards */
	if ((err = psm_ep_open_opts_get_defaults(&opts)))
		goto fail;

	if (opts_i != NULL) {
		if (opts_i->timeout != -1)
			opts.timeout = opts_i->timeout;
		if (opts_i->unit != -1)
			opts.unit = opts_i->unit;
		if (opts_i->affinity != -1)
			opts.affinity = opts_i->affinity;

		if (opts_i->sendbufs_num != -1)
			opts.sendbufs_num = opts_i->sendbufs_num;

		if (opts_i->network_pkey != HFI_DEFAULT_P_KEY)
			opts.network_pkey = opts_i->network_pkey;

		if (opts_i->port != 0)
			opts.port = opts_i->port;

		if (opts_i->outsl != -1)
			opts.outsl = opts_i->outsl;

		if (opts_i->service_id)
			opts.service_id = (uint64_t) opts_i->service_id;
		if (opts_i->path_res_type != PSM_PATH_RES_NONE)
			opts.path_res_type = opts_i->path_res_type;

		if (opts_i->senddesc_num)
			opts.senddesc_num = opts_i->senddesc_num;
		if (opts_i->imm_size)
			opts.imm_size = opts_i->imm_size;
	}

	/* Get Service ID from environment */
	if (!psmi_getenv("PSM_IB_SERVICE_ID",
			 "HFI Service ID for path resolution",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_ULONG_ULONG,
			 (union psmi_envvar_val)HFI_DEFAULT_SERVICE_ID,
			 &envvar_val)) {
		opts.service_id = (uint64_t) envvar_val.e_ulonglong;
	}

	/* Get Path resolution type from environment Possible choices are:
	 *
	 * NONE : Default same as previous instances. Utilizes static data.
	 * OPP  : Use OFED Plus Plus library to do path record queries.
	 * UMAD : Use raw libibumad interface to form and process path records.
	 */
	if (!psmi_getenv("PSM_PATH_REC",
			 "Mechanism to query HFI path record (default is no path query)",
			 PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_STR,
			 (union psmi_envvar_val)"none", &envvar_val)) {
		if (!strcasecmp(envvar_val.e_str, "none"))
			opts.path_res_type = PSM_PATH_RES_NONE;
		else if (!strcasecmp(envvar_val.e_str, "opp"))
			opts.path_res_type = PSM_PATH_RES_OPP;
		else if (!strcasecmp(envvar_val.e_str, "umad"))
			opts.path_res_type = PSM_PATH_RES_UMAD;
		else {
			_HFI_ERROR("Unknown path resolution type %s. "
				   "Disabling use of path record query.\n",
				   envvar_val.e_str);
			opts.path_res_type = PSM_PATH_RES_NONE;
		}
	}

	/* If a specific unit is set in the environment, use that one. */
	if (!psmi_getenv("HFI_UNIT", "Device Unit number (-1 autodetects)",
			 PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_LONG,
			 (union psmi_envvar_val)HFI_UNIT_ID_ANY, &envvar_val)) {
		opts.unit = envvar_val.e_long;
	}

	/* Get user specified port number to use. */
	if (!psmi_getenv("HFI_PORT", "IB Port number (0 autodetects)",
			 PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_LONG,
			 (union psmi_envvar_val)HFI_PORT_NUM_ANY,
			 &envvar_val)) {
		opts.port = envvar_val.e_long;
	}

	/* Get service level from environment, path-query overrides it */
	if (!psmi_getenv
	    ("HFI_SL", "HFI outging ServiceLevel number (default 0)",
	     PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_LONG,
	     (union psmi_envvar_val)PSMI_SL_DEFAULT, &envvar_val)) {
		opts.outsl = envvar_val.e_long;
	}

	/* Get network key from environment. MVAPICH and other vendor MPIs do not
	 * specify it on ep open and we may require it for vFabrics.
	 * path-query will override it.
	 */
	if (!psmi_getenv("PSM_PKEY",
			 "HFI PKey to use for endpoint",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_ULONG,
			 (union psmi_envvar_val)HFI_DEFAULT_P_KEY,
			 &envvar_val)) {
		opts.network_pkey = (uint64_t) envvar_val.e_ulong;
	}

	/* BACKWARDS COMPATIBILITY:  Open MPI likes to choose its own PKEY of
	   0x7FFF.  That's no longer a valid default, so override it if the
	   client was compiled against PSM v1 */
	if (PSMI_VERNO_GET_MAJOR(psmi_verno_client()) < 2 &&
			opts.network_pkey == 0x7FFF) {
		opts.network_pkey = HFI_DEFAULT_P_KEY;
	}

	/* Get number of default send buffers from environment */
	if (!psmi_getenv("PSM_NUM_SEND_BUFFERS",
			 "Number of send buffers to allocate [1024]",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_UINT,
			 (union psmi_envvar_val)1024, &envvar_val)) {
		opts.sendbufs_num = envvar_val.e_uint;
	}

	/* Get immediate data size - transfers less than immediate data size do
	 * not consume a send buffer and require just a send descriptor.
	 */
	if (!psmi_getenv("PSM_SEND_IMMEDIATE_SIZE",
			 "Immediate data send size not requiring a buffer [128]",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_UINT,
			 (union psmi_envvar_val)128, &envvar_val)) {
		opts.imm_size = envvar_val.e_uint;
	}

	/* Get numner of send descriptors - by default this is 4 times the number
	 * of send buffers - mainly used for short/inlined messages.
	 */
	if (!psmi_getenv("PSM_NUM_SEND_DESCRIPTORS",
			 "Number of send descriptors to allocate [4096]",
			 PSMI_ENVVAR_LEVEL_USER,
			 PSMI_ENVVAR_TYPE_UINT,
			 (union psmi_envvar_val)4096, &envvar_val)) {
		opts.senddesc_num = envvar_val.e_uint;
	}

	if (psmi_device_is_enabled(devid_enabled, PTL_DEVID_IPS)) {
		if ((err = psm_ep_num_devunits(&num_units)) != PSM_OK)
			goto fail;
	} else
		num_units = 0;

	/* do some error checking */
	if (opts.timeout < -1) {
		err = psmi_handle_error(NULL, PSM_PARAM_ERR,
					"Invalid timeout value %lld",
					(long long)opts.timeout);
		goto fail;
	} else if (num_units && (opts.unit < -1 || opts.unit >= (int)num_units)) {
		err = psmi_handle_error(NULL, PSM_PARAM_ERR,
					"Invalid Device Unit ID %d (%d units found)",
					opts.unit, num_units);
		goto fail;
	} else if (opts.port < 0 || opts.port > HFI_MAX_PORT) {
		err = psmi_handle_error(NULL, PSM_PARAM_ERR,
					"Invalid Device port number %d",
					opts.port);
		goto fail;
	} else if (opts.affinity < 0
		   || opts.affinity > PSM_EP_OPEN_AFFINITY_FORCE) {
		err =
		    psmi_handle_error(NULL, PSM_PARAM_ERR,
				      "Invalid Affinity option: %d",
				      opts.affinity);
		goto fail;
	} else if (opts.outsl < PSMI_SL_MIN || opts.outsl > PSMI_SL_MAX) {
		err = psmi_handle_error(NULL, PSM_PARAM_ERR,
					"Invalid SL number: %lld",
					(unsigned long long)opts.outsl);
		goto fail;
	}

	/* Set environment variable if PSM is not allowed to set affinity */
	if (opts.affinity == PSM_EP_OPEN_AFFINITY_SKIP)
		setenv("HFI_NO_CPUAFFINITY", "1", 1);

	/* Allocate end point structure storage */
	ptl_sizes =
	    (psmi_device_is_enabled(devid_enabled, PTL_DEVID_SELF) ?
	     psmi_ptl_self.sizeof_ptl() : 0) +
	    (psmi_device_is_enabled(devid_enabled, PTL_DEVID_IPS) ?
	     psmi_ptl_ips.sizeof_ptl() : 0) +
	    (psmi_device_is_enabled(devid_enabled, PTL_DEVID_AMSH) ?
	     psmi_ptl_amsh.sizeof_ptl() : 0);
	if (ptl_sizes == 0)
		return PSM_EP_NO_DEVICE;

	ep = (psm_ep_t) psmi_memalign(PSMI_EP_NONE, UNDEFINED, 64,
				      sizeof(struct psm_ep) + ptl_sizes);
	epaddr = (psm_epaddr_t) psmi_calloc(PSMI_EP_NONE, PER_PEER_ENDPOINT,
					    1, sizeof(struct psm_epaddr));
	if (ep == NULL || epaddr == NULL) {
		err = psmi_handle_error(NULL, PSM_NO_MEMORY,
					"Couldn't allocate memory for %s structure",
					ep == NULL ? "psm_ep" : "psm_epaddr");
		goto fail;
	}

	/* Copy PTL enabled status */
	for (i = 0; i < PTL_MAX_INIT; i++)
		ep->devid_enabled[i] = devid_enabled[i];

	/* Matched Queue initialization.  We do this early because we have to
	 * make sure ep->mq exists and is valid before calling ips_do_work.
	 */
	ep->mq = mq;

	/* Get ready for PTL initialization */
	memcpy(&ep->uuid, (void *)unique_job_key, sizeof(psm_uuid_t));
	ep->epaddr = epaddr;
	ep->memmode = mq->memmode;
	ep->hfi_num_sendbufs = opts.sendbufs_num;
	ep->service_id = opts.service_id;
	ep->path_res_type = opts.path_res_type;
	ep->hfi_num_descriptors = opts.senddesc_num;
	ep->hfi_imm_size = opts.imm_size;
	ep->errh = psmi_errhandler_global;	/* by default use the global one */
	ep->ptl_amsh.ep_poll = psmi_poll_noop;
	ep->ptl_ips.ep_poll = psmi_poll_noop;
	ep->connections = 0;

	/* See how many iterations we want to spin before yielding */
	psmi_getenv("PSM_YIELD_SPIN_COUNT",
		    "Spin poll iterations before yield",
		    PSMI_ENVVAR_LEVEL_HIDDEN,
		    PSMI_ENVVAR_TYPE_UINT,
		    (union psmi_envvar_val)PSMI_BLOCKUNTIL_POLLS_BEFORE_YIELD,
		    &envvar_val);
	ep->yield_spin_cnt = envvar_val.e_uint;

	ptl_sizes = 0;
	amsh_ptl = ips_ptl = self_ptl = NULL;
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH)) {
		amsh_ptl = (ptl_t *) (ep->ptl_base_data + ptl_sizes);
		ptl_sizes += psmi_ptl_amsh.sizeof_ptl();
	}
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) {
		ips_ptl = (ptl_t *) (ep->ptl_base_data + ptl_sizes);
		ptl_sizes += psmi_ptl_ips.sizeof_ptl();
	}
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_SELF)) {
		self_ptl = (ptl_t *) (ep->ptl_base_data + ptl_sizes);
		ptl_sizes += psmi_ptl_self.sizeof_ptl();
	}

	if ((err = psmi_ep_open_device(ep, &opts, unique_job_key,
				       &(ep->context), &ep->epid)))
		goto fail;

	psmi_assert_always(ep->epid != 0);
	ep->epaddr->epid = ep->epid;

	_HFI_VDBG("psmi_ep_open_device() passed\n");

	/* Set our new label as soon as we know what it is */
	strncpy(buf, psmi_gethostname(), sizeof(buf) - 1);
	buf[sizeof(buf) - 1] = '\0';

	p = buf + strlen(buf);

	/* If our rank is set, use it. If not, use context.subcontext notation */
	if (((e = getenv("MPI_RANKID")) != NULL && *e) ||
	    ((e = getenv("PSC_MPI_RANK")) != NULL && *e))
		len = snprintf(p, sizeof(buf) - strlen(buf), ":%d.", atoi(e));
	else
		len = snprintf(p, sizeof(buf) - strlen(buf), ":%d.%d.",
			       (uint32_t) psm_epid_context(ep->epid),
			       (uint32_t) psmi_epid_subcontext(ep->epid));
	*(p + len) = '\0';
	ep->context_mylabel = psmi_strdup(ep, buf);
	if (ep->context_mylabel == NULL) {
		err = PSM_NO_MEMORY;
		goto fail;
	}
	/* hfi_set_mylabel(ep->context_mylabel); */

	if ((err = psmi_epid_set_hostname(psm_epid_nid(ep->epid), buf, 0)))
		goto fail;

	_HFI_VDBG("start ptl device init...\n");
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_SELF)) {
		if ((err = psmi_ptl_self.init(ep, self_ptl, &ep->ptl_self)))
			goto fail;
	}
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) {
		if ((err = psmi_ptl_ips.init(ep, ips_ptl, &ep->ptl_ips)))
			goto fail;
	}
	/* If we're shm-only, this device is enabled above */
	if (psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH)) {
		if ((err = psmi_ptl_amsh.init(ep, amsh_ptl, &ep->ptl_amsh)))
			goto fail;
	} else {
		/* We may have pre-attached as part of getting our rank for enabling
		 * shared contexts.  */
	}

	_HFI_VDBG("finish ptl device init...\n");

	/*
	 * Keep only IPS since only IPS support multi-rail, other devices
	 * are only setup once. IPS device can come to this function again.
	 */
	for (i = 0; i < PTL_MAX_INIT; i++) {
		if (devid_enabled[i] != PTL_DEVID_IPS) {
			devid_enabled[i] = -1;
		}
	}

	*epido = ep->epid;
	*epo = ep;

	return PSM_OK;

fail:
	if (ep != NULL) {
		if (ep->context.fd != -1)
			close(ep->context.fd);
		psmi_free(ep);
	}
	if (epaddr != NULL)
		psmi_free(epaddr);
	return err;
}
Ejemplo n.º 5
0
static psm_error_t
psmi_ep_devlids(uint16_t **lids, uint32_t *num_lids_o,
		uint64_t my_gid_hi, uint64_t my_gid_lo)
{
	static uint16_t *hfi_lids;
	static uint32_t nlids;
	uint32_t num_units;
	int i;
	psm_error_t err = PSM_OK;

	PSMI_ERR_UNLESS_INITIALIZED(NULL);

	if (hfi_lids == NULL) {
		if ((err = psm_ep_num_devunits(&num_units)))
			goto fail;
		hfi_lids = (uint16_t *)
		    psmi_calloc(PSMI_EP_NONE, UNDEFINED,
				num_units * HFI_MAX_PORT, sizeof(uint16_t));
		if (hfi_lids == NULL) {
			err = psmi_handle_error(NULL, PSM_NO_MEMORY,
						"Couldn't allocate memory for dev_lids structure");
			goto fail;
		}

		for (i = 0; i < num_units; i++) {
			int j;
			for (j = 1; j <= HFI_MAX_PORT; j++) {
				int lid = hfi_get_port_lid(i, j);
				int ret;
				uint64_t gid_hi = 0, gid_lo = 0;

				if (lid == -1)
					continue;
				ret = hfi_get_port_gid(i, j, &gid_hi, &gid_lo);
				if (ret == -1)
					continue;
				else if (my_gid_hi != gid_hi) {
					_HFI_VDBG("LID %d, unit %d, port %d, "
						  "mismatched GID %llx:%llx and "
						  "%llx:%llx\n",
						  lid, i, j,
						  (unsigned long long)gid_hi,
						  (unsigned long long)gid_lo,
						  (unsigned long long)my_gid_hi,
						  (unsigned long long)
						  my_gid_lo);
					continue;
				}
				_HFI_VDBG("LID %d, unit %d, port %d, "
					  "matching GID %llx:%llx and "
					  "%llx:%llx\n", lid, i, j,
					  (unsigned long long)gid_hi,
					  (unsigned long long)gid_lo,
					  (unsigned long long)my_gid_hi,
					  (unsigned long long)my_gid_lo);

				hfi_lids[nlids++] = (uint16_t) lid;
			}
		}
		if (nlids == 0) {
			err = psmi_handle_error(NULL, PSM_EP_DEVICE_FAILURE,
						"Couldn't get lid&gid from any unit/port");
			goto fail;
		}
	}
	*lids = hfi_lids;
	*num_lids_o = nlids;

fail:
	return err;
}