Пример #1
0
int hfi_context_open(int unit, int port, uint64_t open_timeout)
{
	int fd;
	char dev_name[MAXPATHLEN];

	if (unit != HFI_UNIT_ID_ANY && unit >= 0)
		snprintf(dev_name, sizeof(dev_name), "%s_%u", HFI_DEVICE_PATH,
			 unit);
	else
		snprintf(dev_name, sizeof(dev_name), "%s", HFI_DEVICE_PATH);

	if (hfi_wait_for_device(dev_name, (long)open_timeout) == -1) {
		_HFI_DBG("Could not find an HFI Unit on device "
			 "%s (%lds elapsed)", dev_name,
			 (long)open_timeout / 1000);
		return -1;
	}

	if ((fd = open(dev_name, O_RDWR)) == -1) {
		_HFI_DBG("(host:Can't open %s for reading and writing",
			 dev_name);
		return -1;
	}

	if (fcntl(fd, F_SETFD, FD_CLOEXEC))
		_HFI_INFO("Failed to set close on exec for device: %s\n",
			  strerror(errno));

	return fd;
}
Пример #2
0
/* This exists as a separate routine called on (very rare)
   hfi_free_tid() errors, so as to avoid pulling unnecessary code
   into the instruction cache, keeping the fast path code as fast possible. */
int hfi_free_tid_err(void)
{
	int ret = errno;	/* preserve errno for return */

	_HFI_INFO("failed: %s\n", strerror(errno));
	return ret;
}
Пример #3
0
/* set the send context pkey to check BTH pkey in each packet.
   driver should check its pkey table to see if it can find
   this pkey, if not, driver should return error. */
int hfi_set_pkey(struct _hfi_ctrl *ctrl, uint16_t pkey)
{
	struct hfi1_cmd cmd;

	cmd.type = HFI1_CMD_SET_PKEY;
	cmd.len = 0;
	cmd.addr = (uint64_t) pkey;

	if (hfi_cmd_write(ctrl->fd, &cmd, sizeof(cmd)) == -1) {
		if (errno != EINVAL)
			_HFI_INFO("set pkey failed: %s\n", strerror(errno));
		return -1;
	}
	return 0;
}
Пример #4
0
/* Tell the driver to change the way packets can generate interrupts.

 HFI1_POLL_TYPE_URGENT: Generate interrupt only when packet sets
 HFI_KPF_INTR
 HFI1_POLL_TYPE_ANYRCV: wakeup on any rcv packet (when polled on).

 PSM: Uses TYPE_URGENT in ips protocol
*/
int hfi_poll_type(struct _hfi_ctrl *ctrl, uint16_t poll_type)
{
	struct hfi1_cmd cmd;

	cmd.type = HFI1_CMD_POLL_TYPE;
	cmd.len = 0;
	cmd.addr = (uint64_t) poll_type;

	if (hfi_cmd_write(ctrl->fd, &cmd, sizeof(cmd)) == -1) {
		if (errno != EINVAL)	/* not implemented in driver */
			_HFI_INFO("poll type failed: %s\n", strerror(errno));
		return -1;
	}
	return 0;
}
Пример #5
0
/* stop_start == 0 disables receive on the context, for use in queue
   overflow conditions.  stop_start==1 re-enables, to be used to
   re-init the software copy of the head register */
int hfi_manage_rcvq(struct _hfi_ctrl *ctrl, uint32_t stop_start)
{
	struct hfi1_cmd cmd;

	cmd.type = HFI1_CMD_RECV_CTRL;
	cmd.len = 0;
	cmd.addr = (uint64_t) stop_start;

	if (hfi_cmd_write(ctrl->fd, &cmd, sizeof(cmd)) == -1) {
		if (errno != EINVAL)	/* not implemented in driver */
			_HFI_INFO("manage rcvq failed: %s\n", strerror(errno));
		return -1;
	}
	return 0;
}
Пример #6
0
/* Returns an int, so -1 indicates an error.  0 */
int hfi_get_port_lmc(int unit, int port)
{
	int ret;
	int64_t val;

	ret = hfi_sysfs_port_read_s64(unit, port, "lid_mask_count", &val, 0);

	if (ret == -1) {
		_HFI_INFO("Failed to get LMC for unit %u:%u: %s\n",
			  unit, port, strerror(errno));
	} else
		ret = val;

	return ret;
}
Пример #7
0
/* Tell the driver to reset the send context. if the send context
   if halted, reset it, if not, return error back to caller.
   After context reset, the credit return should be reset to
   zero by a hardware credit return DMA.
   Driver will return ENOLCK if the reset is timeout, in this
   case PSM needs to re-call again. */
int hfi_reset_context(struct _hfi_ctrl *ctrl)
{
	struct hfi1_cmd cmd;

	cmd.type = HFI1_CMD_CTXT_RESET;
	cmd.len = 0;
	cmd.addr = 0;

retry:
	if (hfi_cmd_write(ctrl->fd, &cmd, sizeof(cmd)) == -1) {
		if (errno == ENOLCK)
			goto retry;

		if (errno != EINVAL)
			_HFI_INFO("reset ctxt failed: %s\n", strerror(errno));
		return -1;
	}
	return 0;
}
Пример #8
0
/* Returns an int, so -1 indicates an error. */
int hfi_get_port_rate(int unit, int port)
{
	int ret;
	double rate;
	char *data_rate = NULL, *newptr;

	ret = hfi_sysfs_port_read(unit, port, "rate", &data_rate);
	if (ret == -1)
		goto get_port_rate_error;
	else {
		rate = strtod(data_rate, &newptr);
		if ((rate == 0) && (data_rate == newptr))
			goto get_port_rate_error;
	}

	free(data_rate);
	return ((int)(rate * 2) >> 1);

get_port_rate_error:
	_HFI_INFO("Failed to get link rate for unit %u:%u: %s\n",
		  unit, port, strerror(errno));

	return ret;
}
Пример #9
0
static psm_error_t
psmi_ep_multirail(int *num_rails, uint32_t *unit, uint16_t *port)
{
	uint32_t num_units;
	uint64_t gid_hi, gid_lo;
	int i, j, ret, count = 0;
	char *env;
	psm_error_t err = PSM_OK;
	uint64_t gidh[HFI_MAX_RAILS][3];

	env = getenv("PSM_MULTIRAIL");
	if (!env || atoi(env) == 0) {
		*num_rails = 0;
		return err;
	}

/*
 * map is in format: unit:port,unit:port,...
 */
	if ((env = getenv("PSM_MULTIRAIL_MAP"))) {
		if (sscanf(env, "%d:%d", &i, &j) == 2) {
			char *comma = strchr(env, ',');
			unit[count] = i;
			port[count] = j;
			count++;
			while (comma) {
				if (sscanf(comma, ",%d:%d", &i, &j) != 2) {
					break;
				}
				unit[count] = i;
				port[count] = j;
				count++;
				if (count == HFI_MAX_RAILS)
					break;
				comma = strchr(comma + 1, ',');
			}
		}
		*num_rails = count;

/*
 * Check if any of the port is not usable.
 */
		for (i = 0; i < count; i++) {
			ret = hfi_get_port_lid(unit[i], port[i]);
			if (ret == -1) {
				err =
				    psmi_handle_error(NULL,
						      PSM_EP_DEVICE_FAILURE,
						      "Couldn't get lid for unit %d:%d",
						      unit[i], port[i]);
				return err;
			}
			ret =
			    hfi_get_port_gid(unit[i], port[i], &gid_hi,
					     &gid_lo);
			if (ret == -1) {
				err =
				    psmi_handle_error(NULL,
						      PSM_EP_DEVICE_FAILURE,
						      "Couldn't get gid for unit %d:%d",
						      unit[i], port[i]);
				return err;
			}
		}

		return err;
	}

	if ((err = psm_ep_num_devunits(&num_units))) {
		return err;
	}
	if (num_units > HFI_MAX_RAILS) {
		_HFI_INFO
		    ("Found %d units, max %d units are supported, use %d\n",
		     num_units, HFI_MAX_RAILS, HFI_MAX_RAILS);
		num_units = HFI_MAX_RAILS;
	}

/*
 * Get all the ports with a valid lid and gid, one per unit.
 */
	for (i = 0; i < num_units; i++) {
		for (j = 1; j <= HFI_MAX_PORT; j++) {
			ret = hfi_get_port_lid(i, j);
			if (ret == -1)
				continue;
			ret = hfi_get_port_gid(i, j, &gid_hi, &gid_lo);
			if (ret == -1)
				continue;

			gidh[count][0] = gid_hi;
			gidh[count][1] = i;
			gidh[count][2] = j;
			count++;
			break;
		}
	}

/*
 * Sort all the ports with gidh from small to big.
 * This is for multiple fabrics, and we use fabric with the
 * smallest gid to make the master connection.
 */
	qsort(gidh, count, sizeof(uint64_t) * 3, cmpfunc);

	for (i = 0; i < count; i++) {
		unit[i] = (uint32_t) gidh[i][1];
		port[i] = (uint16_t) (uint32_t) gidh[i][2];
	}
	*num_rails = count;
	return err;
}
Пример #10
0
/*
 * This function is necessary in a udev-based world.  There can be an
 * arbitrarily long (but typically less than one second) delay between
 * a driver getting loaded and any dynamic special files turning up.
 *
 * The timeout is in milliseconds.  A value of zero means "callee
 * decides timeout".  Negative is infinite.
 *
 * Returns 0 on success, -1 on error or timeout.  Check errno to see
 * whether there was a timeout (ETIMEDOUT) or an error (any other
 * non-zero value).
 */
int hfi_wait_for_device(const char *path, long timeout)
{
	int saved_errno;
	struct stat st;
	long elapsed;
	int ret;

	if (timeout == 0)
		timeout = 15000;

	elapsed = 0;

	while (1) {
		static const long default_ms = 250;
		struct timespec req = { 0 };
		long ms;

		ret = stat(path, &st);
		saved_errno = errno;

		if (ret == 0 || (ret == -1 && errno != ENOENT))
			break;

		if (timeout - elapsed == 0) {
			saved_errno = ETIMEDOUT;
			break;
		}

		if (elapsed == 0) {
			if (timeout == -1)
				_HFI_DBG
				    ("Device file %s not present on first check; "
				     "waiting indefinitely...\n", path);
			else
				_HFI_DBG
				    ("Device file %s not present on first check; "
				     "waiting up to %.1f seconds...\n", path,
				     timeout / 1e3);
		}

		if (timeout < 0 || timeout - elapsed >= default_ms)
			ms = default_ms;
		else
			ms = timeout;

		elapsed += ms;
		req.tv_nsec = ms * 1000000;

		ret = nanosleep(&req, NULL);
		saved_errno = errno;

		if (ret == -1)
			break;
	}

	if (ret == 0)
		_HFI_DBG("Found %s after %.1f seconds\n", path, elapsed / 1e3);
	else
		_HFI_INFO
		    ("The %s device failed to appear after %.1f seconds: %s\n",
		     path, elapsed / 1e3, strerror(saved_errno));

	errno = saved_errno;
	return ret;
}
Пример #11
0
/* Given the unit number, return an error, or the corresponding LID
   For now, it's used only so the MPI code can determine it's own
   LID, and which other LIDs (if any) are also assigned to this node
   Returns an int, so -1 indicates an error.  0 may indicate that
   the unit is valid, but no LID has been assigned.
   No error print because we call this for both potential
   ports without knowing if both ports exist (or are connected) */
int hfi_get_port_lid(int unit, int port)
{
	int ret;
	char *state;
	int64_t val;

	ret = hfi_sysfs_port_read(unit, port, "phys_state", &state);
	if (ret == -1) {
		if (errno == ENODEV)
			/* this is "normal" for port != 1, on single port chips */
			_HFI_VDBG
			    ("Failed to get phys_state for unit %u:%u: %s\n",
			     unit, port, strerror(errno));
		else
			_HFI_DBG
			    ("Failed to get phys_state for unit %u:%u: %s\n",
			     unit, port, strerror(errno));
	} else {
		if (strncmp(state, "5: LinkUp", 9)) {
			_HFI_DBG("Link is not Up for unit %u:%u\n", unit, port);
			ret = -1;
		}
		free(state);
	}
	/* If link is not up, we think lid not valid */
	if (ret == -1)
		return ret;

	ret = hfi_sysfs_port_read_s64(unit, port, "lid", &val, 0);
	_HFI_VDBG("hfi_get_port_lid: ret %d, unit %d port %d\n", ret, unit,
		  port);

	if (ret == -1) {
		if (errno == ENODEV)
			/* this is "normal" for port != 1, on single port chips */
			_HFI_VDBG("Failed to get LID for unit %u:%u: %s\n",
				  unit, port, strerror(errno));
		else
			_HFI_DBG("Failed to get LID for unit %u:%u: %s\n",
				 unit, port, strerror(errno));
	} else {
		ret = val;

/* disable this feature since we don't have a way to provide
   file descriptor in multiple context case. */
#if 0
		if (getenv("HFI_DIAG_LID_LOOP")) {
			/* provides diagnostic ability to run MPI, etc. even */
			/* on loopback, by claiming a different LID for each context */
			struct hfi1_ctxt_info info;
			struct hfi1_cmd cmd;
			cmd.type = HFI1_CMD_CTXT_INFO;
			cmd.cmd.ctxt_info = (uintptr_t) &info;
			if (__hfi_lastfd == -1)
				_HFI_INFO
				    ("Can't run CONTEXT_INFO for lid_loop, fd not set\n");
			else if (write(__hfi_lastfd, &cmd, sizeof(cmd)) == -1)
				_HFI_INFO("CONTEXT_INFO command failed: %s\n",
					  strerror(errno));
			else if (!info.context)
				_HFI_INFO("CONTEXT_INFO returned context 0!\n");
			else {
				_HFI_PRDBG
				    ("Using lid 0x%x, base %x, context %x\n",
				     ret + info.context, ret, info.context);
				ret += info.context;
			}
		}
#endif
	}

	return ret;
}