示例#1
0
static ib_api_status_t sl2vl_update(osm_sm_t * sm, osm_port_t * p_port,
				    osm_physp_t * p, uint8_t port_num,
				    unsigned force_update,
				    const struct qos_config *qcfg)
{
	ib_api_status_t status;
	uint8_t i, num_ports;
	osm_physp_t *p_physp;

	if (osm_node_get_type(osm_physp_get_node_ptr(p)) == IB_NODE_TYPE_SWITCH) {
		if (ib_port_info_get_vl_cap(&p->port_info) == 1) {
			/* Check port 0's capability mask */
			p_physp = p_port->p_physp;
			if (!
			    (p_physp->port_info.
			     capability_mask & IB_PORT_CAP_HAS_SL_MAP))
				return IB_SUCCESS;
		}
		num_ports = osm_node_get_num_physp(osm_physp_get_node_ptr(p));
	} else {
		if (!(p->port_info.capability_mask & IB_PORT_CAP_HAS_SL_MAP))
			return IB_SUCCESS;
		num_ports = 1;
	}

	for (i = 0; i < num_ports; i++) {
		status =
		    sl2vl_update_table(sm, p, i, port_num,
				       force_update, &qcfg->sl2vl);
		if (status != IB_SUCCESS)
			return status;
	}

	return IB_SUCCESS;
}
示例#2
0
static struct osm_remote_node *
osm_switch_find_guid_common(IN const osm_switch_t * const p_sw,
			    IN struct osm_remote_guids_count *r,
			    IN uint8_t port_num,
			    IN int find_sys_guid,
			    IN int find_node_guid)
{
	struct osm_remote_node *p_remote_guid = NULL;
	osm_physp_t *p_physp;
	osm_physp_t *p_rem_physp;
	osm_node_t *p_rem_node;
	uint64_t sys_guid;
	uint64_t node_guid;
	int i;

	CL_ASSERT(p_sw);

	p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
	p_rem_physp = osm_physp_get_remote(p_physp);
	p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
	sys_guid = p_rem_node->node_info.sys_guid;
	node_guid = p_rem_node->node_info.node_guid;

	for (i = 0; i < r->count; i++) {
		if ((!find_sys_guid
		     || r->guids[i].node->node_info.sys_guid == sys_guid)
		    && (!find_node_guid
			|| r->guids[i].node->node_info.node_guid == node_guid)) {
			p_remote_guid = &r->guids[i];
			break;
		}
	}

	return p_remote_guid;
}
示例#3
0
static qos_mad_item_t *osm_qos_mad_create(IN osm_sm_t * sm,
					  IN osm_physp_t * p,
					  IN uint32_t data_size,
					  IN uint8_t * p_data,
					  IN ib_net16_t attr_id,
					  IN uint32_t attr_mod)

{
	qos_mad_item_t *p_mad;
	osm_madw_context_t context;
	osm_madw_t *p_madw;
	osm_node_t *p_node;

	p_node = osm_physp_get_node_ptr(p);

	switch (attr_id){
	case IB_MAD_ATTR_SLVL_TABLE:
		context.slvl_context.node_guid = osm_node_get_node_guid(p_node);
		context.slvl_context.port_guid = osm_physp_get_port_guid(p);
		context.slvl_context.set_method = TRUE;
		break;
	case IB_MAD_ATTR_VL_ARBITRATION:
		context.vla_context.node_guid = osm_node_get_node_guid(p_node);
		context.vla_context.port_guid = osm_physp_get_port_guid(p);
		context.vla_context.set_method = TRUE;
		break;
	default:
		return NULL;
	}

	p_mad = (qos_mad_item_t *) malloc(sizeof(*p_mad));
	if (!p_mad)
		return NULL;

	memset(p_mad, 0, sizeof(*p_mad));

	p_madw = osm_prepare_req_set(sm, osm_physp_get_dr_path_ptr(p),
				     p_data, data_size,
				     attr_id, cl_hton32(attr_mod),
				     CL_DISP_MSGID_NONE, &context);

	if (p_madw == NULL) {
		free(p_mad);
		return NULL;
	}
	p_mad->p_madw = p_madw;
	return p_mad;
}
示例#4
0
osm_node_t *osm_node_get_remote_node(IN osm_node_t * p_node,
				     IN uint8_t port_num,
				     OUT uint8_t * p_remote_port_num)
{
	osm_physp_t *p_physp;
	osm_physp_t *p_remote_physp;

	p_physp = osm_node_get_physp_ptr(p_node, port_num);

	if (!p_physp || !osm_physp_has_any_link(p_physp))
		return NULL;

	p_remote_physp = osm_physp_get_remote(p_physp);
	if (p_remote_port_num)
		*p_remote_port_num = osm_physp_get_port_num(p_remote_physp);

	return osm_physp_get_node_ptr(p_remote_physp);
}
示例#5
0
/**********************************************************************
 Initiate a remote port info request for the given physical port
 **********************************************************************/
static void state_mgr_get_remote_port_info(IN osm_sm_t * sm,
					   IN osm_physp_t * p_physp)
{
	osm_dr_path_t *p_dr_path;
	osm_dr_path_t rem_node_dr_path;
	osm_madw_context_t mad_context;
	ib_api_status_t status;

	OSM_LOG_ENTER(sm->p_log);

	/* generate a dr path leaving on the physp to the remote node */
	p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
	memcpy(&rem_node_dr_path, p_dr_path, sizeof(osm_dr_path_t));
	if (osm_dr_path_extend(&rem_node_dr_path, osm_physp_get_port_num(p_physp))) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332D: "
			"DR path with hop count %d couldn't be extended "
			"so skipping PortInfo query\n",
			p_dr_path->hop_count);
		goto Exit;
	}

	memset(&mad_context, 0, sizeof(mad_context));

	mad_context.pi_context.node_guid =
	    osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
	mad_context.pi_context.port_guid = p_physp->port_guid;
	mad_context.pi_context.set_method = FALSE;
	mad_context.pi_context.light_sweep = TRUE;
	mad_context.pi_context.active_transition = FALSE;

	/* note that with some negative logic - if the query failed it means
	 * that there is no point in going to heavy sweep */
	status = osm_req_get(sm, &rem_node_dr_path, IB_MAD_ATTR_PORT_INFO, 0,
			     CL_DISP_MSGID_NONE, &mad_context);
	if (status != IB_SUCCESS)
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332E: "
			"Request for PortInfo failed (%s)\n",
			ib_get_err_str(status));

Exit:
	OSM_LOG_EXIT(sm->p_log);
}
示例#6
0
/**********************************************************************
 The plock must be held before calling this function.
**********************************************************************/
void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t * p_physp)
{
	ib_api_status_t status = IB_SUCCESS;
	osm_madw_context_t context;

	OSM_LOG_ENTER(sm->p_log);

	context.nd_context.node_guid =
	    osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));

	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
			     IB_MAD_ATTR_NODE_DESC, 0, TRUE, 0,
			     CL_DISP_MSGID_NONE, &context);
	if (status != IB_SUCCESS)
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: "
			"Failure initiating NodeDescription request (%s)\n",
			ib_get_err_str(status));

	OSM_LOG_EXIT(sm->p_log);
}
static int osm_pi_rcv_update_neighbor(IN osm_physp_t *p_physp)
{
	osm_physp_t *p_rem_physp = p_physp->p_remote_physp;
	osm_node_t *p_node;

	/*
	 * Our own port - this is the only case where CA port
	 * is discovered before its' neighbor port
	 */
	if (!p_rem_physp)
		return p_physp->need_update;

	p_node = osm_physp_get_node_ptr(p_rem_physp);
	CL_ASSERT(p_node);

	/* CA/RTR to CA/RTR connection */
	if (!p_node->sw)
		return p_physp->need_update;

	return (ib_switch_info_get_state_change(&p_node->sw->switch_info) ? 1 : p_physp->need_update);
}
示例#8
0
static ib_api_status_t sl2vl_update_table(osm_sm_t * sm,
					  osm_physp_t * p, uint8_t in_port,
					  uint8_t out_port,
					  unsigned force_update,
					  const ib_slvl_table_t * sl2vl_table)
{
	osm_madw_context_t context;
	ib_slvl_table_t tbl, *p_tbl;
	osm_node_t *p_node = osm_physp_get_node_ptr(p);
	uint32_t attr_mod;
	unsigned vl_mask;
	uint8_t vl1, vl2;
	int i;

	vl_mask = (1 << (ib_port_info_get_op_vls(&p->port_info) - 1)) - 1;

	for (i = 0; i < IB_MAX_NUM_VLS / 2; i++) {
		vl1 = sl2vl_table->raw_vl_by_sl[i] >> 4;
		vl2 = sl2vl_table->raw_vl_by_sl[i] & 0xf;
		if (vl1 != 15)
			vl1 &= vl_mask;
		if (vl2 != 15)
			vl2 &= vl_mask;
		tbl.raw_vl_by_sl[i] = (vl1 << 4) | vl2;
	}

	if (!force_update && (p_tbl = osm_physp_get_slvl_tbl(p, in_port)) &&
	    !memcmp(p_tbl, &tbl, sizeof(tbl)))
		return IB_SUCCESS;

	context.slvl_context.node_guid = osm_node_get_node_guid(p_node);
	context.slvl_context.port_guid = osm_physp_get_port_guid(p);
	context.slvl_context.set_method = TRUE;
	attr_mod = in_port << 8 | out_port;
	return osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
			   (uint8_t *) & tbl, sizeof(tbl),
			   IB_MAD_ATTR_SLVL_TABLE,
			   cl_hton32(attr_mod), CL_DISP_MSGID_NONE, &context);
}
示例#9
0
/*
 * QoS primitives
 */
static ib_api_status_t vlarb_update_table_block(osm_sm_t * sm,
						osm_physp_t * p,
						uint8_t port_num,
						unsigned force_update,
						const ib_vl_arb_table_t *
						table_block,
						unsigned block_length,
						unsigned block_num)
{
	ib_vl_arb_table_t block;
	osm_madw_context_t context;
	uint32_t attr_mod;
	unsigned vl_mask, i;

	vl_mask = (1 << (ib_port_info_get_op_vls(&p->port_info) - 1)) - 1;

	memset(&block, 0, sizeof(block));
	memcpy(&block, table_block, block_length * sizeof(block.vl_entry[0]));
	for (i = 0; i < block_length; i++)
		block.vl_entry[i].vl &= vl_mask;

	if (!force_update &&
	    !memcmp(&p->vl_arb[block_num], &block,
		    block_length * sizeof(block.vl_entry[0])))
		return IB_SUCCESS;

	context.vla_context.node_guid =
	    osm_node_get_node_guid(osm_physp_get_node_ptr(p));
	context.vla_context.port_guid = osm_physp_get_port_guid(p);
	context.vla_context.set_method = TRUE;
	attr_mod = ((block_num + 1) << 16) | port_num;

	return osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
			   (uint8_t *) & block, sizeof(block),
			   IB_MAD_ATTR_VL_ARBITRATION,
			   cl_hton32(attr_mod), CL_DISP_MSGID_NONE, &context);
}
示例#10
0
static ib_api_status_t mpr_rcv_get_path_parms(IN osm_sa_t * sa,
					      IN const ib_multipath_rec_t *
					      p_mpr,
					      IN const osm_alias_guid_t * p_src_alias_guid,
					      IN const osm_alias_guid_t * p_dest_alias_guid,
					      IN const uint16_t dest_lid_ho,
					      IN const ib_net64_t comp_mask,
					      OUT osm_path_parms_t * p_parms)
{
	const osm_node_t *p_node;
	const osm_physp_t *p_physp, *p_physp0;
	const osm_physp_t *p_src_physp;
	const osm_physp_t *p_dest_physp;
	const osm_prtn_t *p_prtn = NULL;
	const ib_port_info_t *p_pi, *p_pi0;
	ib_slvl_table_t *p_slvl_tbl;
	ib_api_status_t status = IB_SUCCESS;
	uint8_t mtu;
	uint8_t rate;
	uint8_t pkt_life;
	uint8_t required_mtu;
	uint8_t required_rate;
	ib_net16_t required_pkey;
	uint8_t required_sl;
	uint8_t required_pkt_life;
	ib_net16_t dest_lid;
	int hops = 0;
	int in_port_num = 0;
	uint8_t i;
	osm_qos_level_t *p_qos_level = NULL;
	uint16_t valid_sl_mask = 0xffff;

	OSM_LOG_ENTER(sa->p_log);

	dest_lid = cl_hton16(dest_lid_ho);

	p_dest_physp = p_dest_alias_guid->p_base_port->p_physp;
	p_physp = p_src_alias_guid->p_base_port->p_physp;
	p_src_physp = p_physp;
	p_pi = &p_physp->port_info;

	mtu = ib_port_info_get_mtu_cap(p_pi);
	rate = ib_port_info_compute_rate(p_pi,
					 p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS);

	/*
	   Mellanox Tavor device performance is better using 1K MTU.
	   If required MTU and MTU selector are such that 1K is OK
	   and at least one end of the path is Tavor we override the
	   port MTU with 1K.
	 */
	if (sa->p_subn->opt.enable_quirks &&
	    sa_multipath_rec_apply_tavor_mtu_limit(p_mpr,
						   p_src_alias_guid->p_base_port,
						   p_dest_alias_guid->p_base_port,
						   comp_mask))
		if (mtu > IB_MTU_LEN_1024) {
			mtu = IB_MTU_LEN_1024;
			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
				"Optimized Path MTU to 1K for Mellanox Tavor device\n");
		}

	/*
	   Walk the subnet object from source to destination,
	   tracking the most restrictive rate and mtu values along the way...

	   If source port node is a switch, then p_physp should
	   point to the port that routes the destination lid
	 */

	p_node = osm_physp_get_node_ptr(p_physp);

	if (p_node->sw) {
		/*
		 * Source node is a switch.
		 * Make sure that p_physp points to the out port of the
		 * switch that routes to the destination lid (dest_lid_ho)
		 */
		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
		if (p_physp == 0) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4514: "
				"Can't find routing to LID %u on switch %s "
				"(GUID 0x%016"PRIx64")\n", dest_lid_ho,
				p_node->print_desc,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			status = IB_NOT_FOUND;
			goto Exit;
		}
	}

	if (sa->p_subn->opt.qos) {

		/*
		 * Whether this node is switch or CA, the IN port for
		 * the sl2vl table is 0, because this is a source node.
		 */
		p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0);

		/* update valid SLs that still exist on this route */
		for (i = 0; i < IB_MAX_NUM_VLS; i++) {
			if (valid_sl_mask & (1 << i) &&
			    ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL)
				valid_sl_mask &= ~(1 << i);
		}
		if (!valid_sl_mask) {
			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
				"All the SLs lead to VL15 on this path\n");
			status = IB_NOT_FOUND;
			goto Exit;
		}
	}

	/*
	 * Same as above
	 */
	p_node = osm_physp_get_node_ptr(p_dest_physp);

	if (p_node->sw) {
		/*
		 * if destination is switch, we want p_dest_physp to point to port 0
		 */
		p_dest_physp =
		    osm_switch_get_route_by_lid(p_node->sw, dest_lid);

		if (p_dest_physp == 0) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4515: "
				"Can't find routing to LID %u on switch %s "
				"(GUID 0x%016"PRIx64")\n", dest_lid_ho,
				p_node->print_desc,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			status = IB_NOT_FOUND;
			goto Exit;
		}

	}

	/*
	 * Now go through the path step by step
	 */

	while (p_physp != p_dest_physp) {

		int tmp_pnum = p_physp->port_num;
		p_node = osm_physp_get_node_ptr(p_physp);
		p_physp = osm_physp_get_remote(p_physp);

		if (p_physp == 0) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4505: "
				"Can't find remote phys port of %s (GUID "
				"0x%016"PRIx64") port %d "
				"while routing to LID %u",
				p_node->print_desc,
				cl_ntoh64(osm_node_get_node_guid(p_node)),
				tmp_pnum,
				dest_lid_ho);
			status = IB_ERROR;
			goto Exit;
		}

		/* update number of hops traversed */
		hops++;
		if (hops > MAX_HOPS) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4520: "
				"Path from GUID 0x%016" PRIx64 " (%s) to"
				" lid %u GUID 0x%016" PRIx64 " (%s) needs"
				" more than %d hops, max %d hops allowed\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc, dest_lid_ho,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc, hops,
				MAX_HOPS);
			status = IB_NOT_FOUND;
			goto Exit;
		}

		in_port_num = osm_physp_get_port_num(p_physp);

		/*
		   This is point to point case (no switch in between)
		 */
		if (p_physp == p_dest_physp)
			break;

		p_node = osm_physp_get_node_ptr(p_physp);

		if (!p_node->sw) {
			/*
			   There is some sort of problem in the subnet object!
			   If this isn't a switch, we should have reached
			   the destination by now!
			 */
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4503: "
				"Internal error, bad path while routing "
				"from %s (GUID: 0x%016"PRIx64") port %d "
				"to %s (GUID: 0x%016"PRIx64") port %d; "
				"ended at %s port %d\n",
				p_src_alias_guid->p_base_port->p_node->print_desc,
				cl_ntoh64(p_src_alias_guid->p_base_port->p_node->node_info.node_guid),
				p_src_alias_guid->p_base_port->p_physp->port_num,
				p_dest_alias_guid->p_base_port->p_node->print_desc,
				cl_ntoh64(p_dest_alias_guid->p_base_port->p_node->node_info.node_guid),
				p_dest_alias_guid->p_base_port->p_physp->port_num,
				p_node->print_desc,
				p_physp->port_num);
			status = IB_ERROR;
			goto Exit;
		}

		/*
		   Check parameters for the ingress port in this switch.
		 */
		p_pi = &p_physp->port_info;

		if (mtu > ib_port_info_get_mtu_cap(p_pi))
			mtu = ib_port_info_get_mtu_cap(p_pi);

		p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0);
		p_pi0 = &p_physp0->port_info;
		if (ib_path_compare_rates(rate,
					  ib_port_info_compute_rate(p_pi,
								    p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS)) > 0)
			rate = ib_port_info_compute_rate(p_pi,
							 p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS);

		/*
		   Continue with the egress port on this switch.
		 */
		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
		if (p_physp == 0) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4516: "
				"Dead end path on switch "
				"%s (GUID: 0x%016"PRIx64") to LID %u\n",
				p_node->print_desc,
				cl_ntoh64(osm_node_get_node_guid(p_node)),
				dest_lid_ho);
			status = IB_ERROR;
			goto Exit;
		}

		p_pi = &p_physp->port_info;

		if (mtu > ib_port_info_get_mtu_cap(p_pi))
			mtu = ib_port_info_get_mtu_cap(p_pi);

		p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0);
		p_pi0 = &p_physp0->port_info;
		if (ib_path_compare_rates(rate,
					  ib_port_info_compute_rate(p_pi,
								    p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS)) > 0)
			rate = ib_port_info_compute_rate(p_pi,
							 p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS);

		if (sa->p_subn->opt.qos) {
			/*
			 * Check SL2VL table of the switch and update valid SLs
			 */
			p_slvl_tbl =
			    osm_physp_get_slvl_tbl(p_physp, in_port_num);
			for (i = 0; i < IB_MAX_NUM_VLS; i++) {
				if (valid_sl_mask & (1 << i) &&
				    ib_slvl_table_get(p_slvl_tbl,
						      i) == IB_DROP_VL)
					valid_sl_mask &= ~(1 << i);
			}
			if (!valid_sl_mask) {
				OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
					"All the SLs lead to VL15 "
					"on this path\n");
				status = IB_NOT_FOUND;
				goto Exit;
			}
		}
	}

	/*
	   p_physp now points to the destination
	 */
	p_pi = &p_physp->port_info;

	if (mtu > ib_port_info_get_mtu_cap(p_pi))
		mtu = ib_port_info_get_mtu_cap(p_pi);

	if (ib_path_compare_rates(rate,
				  ib_port_info_compute_rate(p_pi,
							    p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS)) > 0)
		rate = ib_port_info_compute_rate(p_pi,
						 p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS);

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
		"Path min MTU = %u, min rate = %u\n", mtu, rate);

	/*
	 * Get QoS Level object according to the MultiPath request
	 * and adjust MultiPath parameters according to QoS settings
	 */
	if (sa->p_subn->opt.qos && sa->p_subn->p_qos_policy &&
	    (p_qos_level =
	     osm_qos_policy_get_qos_level_by_mpr(sa->p_subn->p_qos_policy,
						 p_mpr, p_src_physp,
						 p_dest_physp, comp_mask))) {

		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
			"MultiPathRecord request matches QoS Level '%s' (%s)\n",
			p_qos_level->name,
			p_qos_level->use ? p_qos_level->use : "no description");

		if (p_qos_level->mtu_limit_set
		    && (mtu > p_qos_level->mtu_limit))
			mtu = p_qos_level->mtu_limit;

		if (p_qos_level->rate_limit_set
		    && (ib_path_compare_rates(rate, p_qos_level->rate_limit) > 0))
			rate = p_qos_level->rate_limit;

		if (p_qos_level->sl_set) {
			required_sl = p_qos_level->sl;
			if (!(valid_sl_mask & (1 << required_sl))) {
				status = IB_NOT_FOUND;
				goto Exit;
			}
		}
	}

	/*
	   Determine if these values meet the user criteria
	 */

	/* we silently ignore cases where only the MTU selector is defined */
	if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) &&
	    (comp_mask & IB_MPR_COMPMASK_MTU)) {
		required_mtu = ib_multipath_rec_mtu(p_mpr);
		switch (ib_multipath_rec_mtu_sel(p_mpr)) {
		case 0:	/* must be greater than */
			if (mtu <= required_mtu)
				status = IB_NOT_FOUND;
			break;

		case 1:	/* must be less than */
			if (mtu >= required_mtu) {
				/* adjust to use the highest mtu
				   lower then the required one */
				if (required_mtu > 1)
					mtu = required_mtu - 1;
				else
					status = IB_NOT_FOUND;
			}
			break;

		case 2:	/* exact match */
			if (mtu < required_mtu)
				status = IB_NOT_FOUND;
			else
				mtu = required_mtu;
			break;

		case 3:	/* largest available */
			/* can't be disqualified by this one */
			break;

		default:
			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
			CL_ASSERT(FALSE);
			status = IB_ERROR;
			break;
		}
	}
	if (status != IB_SUCCESS)
		goto Exit;

	/* we silently ignore cases where only the Rate selector is defined */
	if ((comp_mask & IB_MPR_COMPMASK_RATESELEC) &&
	    (comp_mask & IB_MPR_COMPMASK_RATE)) {
		required_rate = ib_multipath_rec_rate(p_mpr);
		switch (ib_multipath_rec_rate_sel(p_mpr)) {
		case 0:	/* must be greater than */
			if (ib_path_compare_rates(rate, required_rate) <= 0)
				status = IB_NOT_FOUND;
			break;

		case 1:	/* must be less than */
			if (ib_path_compare_rates(rate, required_rate) >= 0) {
				/* adjust the rate to use the highest rate
				   lower then the required one */
				rate = ib_path_rate_get_prev(required_rate);
				if (!rate)
					status = IB_NOT_FOUND;
			}
			break;

		case 2:	/* exact match */
			if (ib_path_compare_rates(rate, required_rate))
				status = IB_NOT_FOUND;
			else
				rate = required_rate;
			break;

		case 3:	/* largest available */
			/* can't be disqualified by this one */
			break;

		default:
			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
			CL_ASSERT(FALSE);
			status = IB_ERROR;
			break;
		}
	}
	if (status != IB_SUCCESS)
		goto Exit;

	/* Verify the pkt_life_time */
	/* According to spec definition IBA 1.2 Table 205 PacketLifeTime description,
	   for loopback paths, packetLifeTime shall be zero. */
	if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port)
		pkt_life = 0;	/* loopback */
	else if (p_qos_level && p_qos_level->pkt_life_set)
		pkt_life = p_qos_level->pkt_life;
	else
		pkt_life = sa->p_subn->opt.subnet_timeout;

	/* we silently ignore cases where only the PktLife selector is defined */
	if ((comp_mask & IB_MPR_COMPMASK_PKTLIFETIMESELEC) &&
	    (comp_mask & IB_MPR_COMPMASK_PKTLIFETIME)) {
		required_pkt_life = ib_multipath_rec_pkt_life(p_mpr);
		switch (ib_multipath_rec_pkt_life_sel(p_mpr)) {
		case 0:	/* must be greater than */
			if (pkt_life <= required_pkt_life)
				status = IB_NOT_FOUND;
			break;

		case 1:	/* must be less than */
			if (pkt_life >= required_pkt_life) {
				/* adjust the lifetime to use the highest possible
				   lower then the required one */
				if (required_pkt_life > 1)
					pkt_life = required_pkt_life - 1;
				else
					status = IB_NOT_FOUND;
			}
			break;

		case 2:	/* exact match */
			if (pkt_life < required_pkt_life)
				status = IB_NOT_FOUND;
			else
				pkt_life = required_pkt_life;
			break;

		case 3:	/* smallest available */
			/* can't be disqualified by this one */
			break;

		default:
			/* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */
			CL_ASSERT(FALSE);
			status = IB_ERROR;
			break;
		}
	}

	if (status != IB_SUCCESS)
		goto Exit;

	/*
	 * set Pkey for this MultiPath record request
	 */

	if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
	    cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
		required_pkey =
		    osm_physp_find_common_pkey(p_src_physp, p_dest_physp,
					       sa->p_subn->opt.allow_both_pkeys);

	else if (comp_mask & IB_MPR_COMPMASK_PKEY) {
		/*
		 * MPR request has a specific pkey:
		 * Check that source and destination share this pkey.
		 * If QoS level has pkeys, check that this pkey exists
		 * in the QoS level pkeys.
		 * MPR returned pkey is the requested pkey.
		 */
		required_pkey = p_mpr->pkey;
		if (!osm_physp_share_this_pkey
		    (p_src_physp, p_dest_physp, required_pkey,
		     sa->p_subn->opt.allow_both_pkeys)) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4518: "
				"Ports src 0x%016"PRIx64" (%s port %d) "
				"and dst 0x%016"PRIx64" (%s port %d) "
				"do not share the specified PKey 0x%04x\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc,
				p_src_physp->port_num,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc,
				p_dest_physp->port_num,
				cl_ntoh16(required_pkey));
			status = IB_NOT_FOUND;
			goto Exit;
		}
		if (p_qos_level && p_qos_level->pkey_range_len &&
		    !osm_qos_level_has_pkey(p_qos_level, required_pkey)) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451C: "
				"Ports src 0x%016"PRIx64" (%s port %d) "
				"and dst 0x%016"PRIx64" (%s port %d) "
				"do not share specified PKey (0x%04x) as "
				"defined by QoS level \"%s\"\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc,
				p_src_physp->port_num,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc,
				p_dest_physp->port_num,
				cl_ntoh16(required_pkey),
				p_qos_level->name);
			status = IB_NOT_FOUND;
			goto Exit;
		}

	} else if (p_qos_level && p_qos_level->pkey_range_len) {
		/*
		 * MPR request doesn't have a specific pkey, but QoS level
		 * has pkeys - get shared pkey from QoS level pkeys
		 */
		required_pkey = osm_qos_level_get_shared_pkey(p_qos_level,
							      p_src_physp,
							      p_dest_physp,
							      sa->p_subn->opt.allow_both_pkeys);
		if (!required_pkey) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451D: "
				"Ports src 0x%016"PRIx64" (%s port %d) "
				"and dst 0x%016"PRIx64" (%s port %d) "
				"do not share a PKey as defined by QoS "
				"level \"%s\"\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc,
				p_src_physp->port_num,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc,
				p_dest_physp->port_num,
				p_qos_level->name);
			status = IB_NOT_FOUND;
			goto Exit;
		}

	} else {
		/*
		 * Neither MPR request nor QoS level have pkey.
		 * Just get any shared pkey.
		 */
		required_pkey =
		    osm_physp_find_common_pkey(p_src_physp, p_dest_physp,
					       sa->p_subn->opt.allow_both_pkeys);
		if (!required_pkey) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4519: "
				"Ports src 0x%016"PRIx64" (%s port %d) "
				"and dst 0x%016"PRIx64" (%s port %d) "
				"do not have any shared PKeys\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc,
				p_src_physp->port_num,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc,
				p_dest_physp->port_num);
			status = IB_NOT_FOUND;
			goto Exit;
		}
	}

	if (required_pkey) {
		p_prtn =
		    (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
					       required_pkey &
					       cl_ntoh16((uint16_t) ~ 0x8000));
		if (p_prtn ==
		    (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl))
			p_prtn = NULL;
	}

	/*
	 * Set MultiPathRecord SL.
	 */

	if (comp_mask & IB_MPR_COMPMASK_SL) {
		/*
		 * Specific SL was requested
		 */
		required_sl = ib_multipath_rec_sl(p_mpr);

		if (p_qos_level && p_qos_level->sl_set &&
		    p_qos_level->sl != required_sl) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451E: "
				"QoS constraints: required MultiPathRecord SL "
				"(%u) doesn't match QoS policy \"%s\" SL (%u) "
				"[%s port %d <-> %s port %d]\n", required_sl,
				p_qos_level->name,
				p_qos_level->sl,
				p_src_alias_guid->p_base_port->p_node->print_desc,
				p_src_alias_guid->p_base_port->p_physp->port_num,
				p_dest_alias_guid->p_base_port->p_node->print_desc,
				p_dest_alias_guid->p_base_port->p_physp->port_num);
			status = IB_NOT_FOUND;
			goto Exit;
		}

	} else if (p_qos_level && p_qos_level->sl_set) {
		/*
		 * No specific SL was requested,
		 * but there is an SL in QoS level.
		 */
		required_sl = p_qos_level->sl;

		if (required_pkey && p_prtn && p_prtn->sl != p_qos_level->sl)
			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
				"QoS level SL (%u) overrides partition SL (%u)\n",
				p_qos_level->sl, p_prtn->sl);

	} else if (required_pkey) {
		/*
		 * No specific SL in request or in QoS level - use partition SL
		 */
		p_prtn =
		    (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
					       required_pkey &
					       cl_ntoh16((uint16_t) ~ 0x8000));
		if (!p_prtn) {
			required_sl = OSM_DEFAULT_SL;
			/* this may be possible when pkey tables are created somehow in
			   previous runs or things are going wrong here */
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451A: "
				"No partition found for PKey 0x%04x - "
				"using default SL %d "
				"[%s port %d <-> %s port %d]\n",
				cl_ntoh16(required_pkey), required_sl,
				p_src_alias_guid->p_base_port->p_node->print_desc,
				p_src_alias_guid->p_base_port->p_physp->port_num,
				p_dest_alias_guid->p_base_port->p_node->print_desc,
				p_dest_alias_guid->p_base_port->p_physp->port_num);
		} else
			required_sl = p_prtn->sl;

	} else if (sa->p_subn->opt.qos) {
		if (valid_sl_mask & (1 << OSM_DEFAULT_SL))
			required_sl = OSM_DEFAULT_SL;
		else {
			for (i = 0; i < IB_MAX_NUM_VLS; i++)
				if (valid_sl_mask & (1 << i))
					break;
			required_sl = i;
		}
	} else
		required_sl = OSM_DEFAULT_SL;

	if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << required_sl))) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451F: "
			"Selected SL (%u) leads to VL15 "
			"[%s port %d <-> %s port %d]\n",
			required_sl,
			p_src_alias_guid->p_base_port->p_node->print_desc,
			p_src_alias_guid->p_base_port->p_physp->port_num,
			p_dest_alias_guid->p_base_port->p_node->print_desc,
			p_dest_alias_guid->p_base_port->p_physp->port_num);
		status = IB_NOT_FOUND;
		goto Exit;
	}

	/* reset pkey when raw traffic */
	if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
	    cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
		required_pkey = 0;

	p_parms->mtu = mtu;
	p_parms->rate = rate;
	p_parms->pkey = required_pkey;
	p_parms->pkt_life = pkt_life;
	p_parms->sl = required_sl;
	p_parms->hops = hops;

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPath params:"
		" mtu = %u, rate = %u, packet lifetime = %u,"
		" pkey = 0x%04X, sl = %u, hops = %u\n", mtu, rate,
		pkt_life, cl_ntoh16(required_pkey), required_sl, hops);

Exit:
	OSM_LOG_EXIT(sa->p_log);
	return status;
}
示例#11
0
uint8_t osm_switch_recommend_path(IN const osm_switch_t * p_sw,
				  IN osm_port_t * p_port, IN uint16_t lid_ho,
				  IN unsigned start_from,
				  IN boolean_t ignore_existing,
				  IN boolean_t routing_for_lmc,
				  IN boolean_t dor,
				  IN boolean_t port_shifting,
				  IN uint32_t scatter_ports)
{
	/*
	   We support an enhanced LMC aware routing mode:
	   In the case of LMC > 0, we can track the remote side
	   system and node for all of the lids of the target
	   and try and avoid routing again through the same
	   system / node.

	   Assume if routing_for_lmc is true that this procedure was
	   provided the tracking array and counter via p_port->priv,
	   and we can conduct this algorithm.
	 */
	uint16_t base_lid;
	uint8_t hops;
	uint8_t least_hops;
	uint8_t port_num;
	uint8_t num_ports;
	uint32_t least_paths = 0xFFFFFFFF;
	unsigned i;
	/*
	   The follwing will track the least paths if the
	   route should go through a new system/node
	 */
	uint32_t least_paths_other_sys = 0xFFFFFFFF;
	uint32_t least_paths_other_nodes = 0xFFFFFFFF;
	uint32_t least_forwarded_to = 0xFFFFFFFF;
	uint32_t check_count;
	uint8_t best_port = 0;
	/*
	   These vars track the best port if it connects to
	   not used system/node.
	 */
	uint8_t best_port_other_sys = 0;
	uint8_t best_port_other_node = 0;
	boolean_t port_found = FALSE;
	osm_physp_t *p_physp;
	osm_physp_t *p_rem_physp;
	osm_node_t *p_rem_node;
	osm_node_t *p_rem_node_first = NULL;
	struct osm_remote_node *p_remote_guid = NULL;
	struct osm_remote_node null_remote_node = {NULL, 0, 0};
	struct switch_port_path port_paths[IB_NODE_NUM_PORTS_MAX];
	unsigned int port_paths_total_paths = 0;
	unsigned int port_paths_count = 0;
	uint8_t scatter_possible_ports[IB_NODE_NUM_PORTS_MAX];
	unsigned int scatter_possible_ports_count = 0;
	int found_sys_guid = 0;
	int found_node_guid = 0;

	CL_ASSERT(lid_ho > 0);

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		base_lid = osm_port_get_base_lid(p_port);
	} else {
		p_physp = p_port->p_physp;
		if (!p_physp || !p_physp->p_remote_physp ||
		    !p_physp->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;

		if (p_physp->p_remote_physp->p_node->sw == p_sw)
			return p_physp->p_remote_physp->port_num;
		base_lid =
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
	}
	base_lid = cl_ntoh16(base_lid);

	num_ports = p_sw->num_ports;

	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
	if (least_hops == OSM_NO_PATH)
		return OSM_NO_PATH;

	/*
	   First, inquire with the forwarding table for an existing
	   route.  If one is found, honor it unless:
	   1. the ignore existing flag is set.
	   2. the physical port is not a valid one or not healthy
	   3. the physical port has a remote port (the link is up)
	   4. the port has min-hops to the target (avoid loops)
	 */
	if (!ignore_existing) {
		port_num = osm_switch_get_port_by_lid(p_sw, lid_ho);

		if (port_num != OSM_NO_PATH) {
			CL_ASSERT(port_num < num_ports);

			p_physp =
			    osm_node_get_physp_ptr(p_sw->p_node, port_num);
			/*
			   Don't be too trusting of the current forwarding table!
			   Verify that the port number is legal and that the
			   LID is reachable through this port.
			 */
			if (p_physp && osm_physp_is_healthy(p_physp) &&
			    osm_physp_get_remote(p_physp)) {
				hops =
				    osm_switch_get_hop_count(p_sw, base_lid,
							     port_num);
				/*
				   If we aren't using pre-defined user routes
				   function, then we need to make sure that the
				   current path is the minimum one. In case of
				   having such a user function - this check will
				   not be done, and the old routing will be used.
				   Note: This means that it is the user's job to
				   clean all data in the forwarding tables that
				   he wants to be overridden by the minimum
				   hop function.
				 */
				if (hops == least_hops)
					return port_num;
			}
		}
	}

	/*
	   This algorithm selects a port based on a static load balanced
	   selection across equal hop-count ports.
	   There is lots of room for improved sophistication here,
	   possibly guided by user configuration info.
	 */

	/*
	   OpenSM routing is "local" - not considering a full lid to lid
	   path. As such we can not guarantee a path will not loop if we
	   do not always follow least hops.
	   So we must abort if not least hops.
	 */

	/* port number starts with one and num_ports is 1 + num phys ports */
	for (i = start_from; i < start_from + num_ports; i++) {
		port_num = osm_switch_get_dimn_port(p_sw, i % num_ports);
		if (!port_num ||
		    osm_switch_get_hop_count(p_sw, base_lid, port_num) !=
		    least_hops)
			continue;

		/* let us make sure it is not down or unhealthy */
		p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
		if (!p_physp || !osm_physp_is_healthy(p_physp) ||
		    /*
		       we require all - non sma ports to be linked
		       to be routed through
		     */
		    !osm_physp_get_remote(p_physp))
			continue;

		/*
		   We located a least-hop port, possibly one of many.
		   For this port, check the running total count of
		   the number of paths through this port.  Select
		   the port routing the least number of paths.
		 */
		check_count =
		    osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);


		if (dor) {
			/* Get the Remote Node */
			p_rem_physp = osm_physp_get_remote(p_physp);
			p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
			/* use the first dimension, but spread traffic
			 * out among the group of ports representing
			 * that dimension */
			if (!p_rem_node_first)
				p_rem_node_first = p_rem_node;
			else if (p_rem_node != p_rem_node_first)
				continue;
			if (routing_for_lmc) {
				struct osm_remote_guids_count *r = p_port->priv;
				uint8_t rem_port = osm_physp_get_port_num(p_rem_physp);
				unsigned int j;

				for (j = 0; j < r->count; j++) {
					p_remote_guid = &r->guids[j];
					if ((p_remote_guid->node == p_rem_node)
					    && (p_remote_guid->port == rem_port))
						break;
				}
				if (j == r->count)
					p_remote_guid = &null_remote_node;
			}
		/*
		   Advanced LMC routing requires tracking of the
		   best port by the node connected to the other side of
		   it.
		 */
		} else if (routing_for_lmc) {
			/* Is the sys guid already used ? */
			p_remote_guid = switch_find_sys_guid_count(p_sw,
								   p_port->priv,
								   port_num);

			/* If not update the least hops for this case */
			if (!p_remote_guid) {
				if (check_count < least_paths_other_sys) {
					least_paths_other_sys = check_count;
					best_port_other_sys = port_num;
					least_forwarded_to = 0;
				}
				found_sys_guid = 0;
			} else {	/* same sys found - try node */


				/* Else is the node guid already used ? */
				p_remote_guid = switch_find_node_guid_count(p_sw,
									    p_port->priv,
									    port_num);

				/* If not update the least hops for this case */
				if (!p_remote_guid
				    && check_count < least_paths_other_nodes) {
					least_paths_other_nodes = check_count;
					best_port_other_node = port_num;
					least_forwarded_to = 0;
				}
				/* else prior sys and node guid already used */

				if (!p_remote_guid)
					found_node_guid = 0;
				else
					found_node_guid = 1;
				found_sys_guid = 1;
			}	/* same sys found */
		}

		port_paths[port_paths_count].port_num = port_num;
		port_paths[port_paths_count].path_count = check_count;
		if (routing_for_lmc) {
			port_paths[port_paths_count].found_sys_guid = found_sys_guid;
			port_paths[port_paths_count].found_node_guid = found_node_guid;
		}
		if (routing_for_lmc && p_remote_guid)
			port_paths[port_paths_count].forwarded_to = p_remote_guid->forwarded_to;
		else
			port_paths[port_paths_count].forwarded_to = 0;
		port_paths_total_paths += check_count;
		port_paths_count++;

		/* routing for LMC mode */
		/*
		   the count is min but also lower then the max subscribed
		 */
		if (check_count < least_paths) {
			port_found = TRUE;
			best_port = port_num;
			least_paths = check_count;
			scatter_possible_ports_count = 0;
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
			if (routing_for_lmc
			    && p_remote_guid
			    && p_remote_guid->forwarded_to < least_forwarded_to)
				least_forwarded_to = p_remote_guid->forwarded_to;
		} else if (scatter_ports
			   && check_count == least_paths) {
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
		} else if (routing_for_lmc
			   && p_remote_guid
			   && check_count == least_paths
			   && p_remote_guid->forwarded_to < least_forwarded_to) {
			least_forwarded_to = p_remote_guid->forwarded_to;
			best_port = port_num;
		}
	}

	if (port_found == FALSE)
		return OSM_NO_PATH;

	if (port_shifting && port_paths_count) {
		/* In the port_paths[] array, we now have all the ports that we
		 * can route out of.  Using some shifting math below, possibly
		 * select a different one so that lids won't align in LFTs
		 *
		 * If lmc > 0, we need to loop through these ports to find the
		 * least_forwarded_to port, best_port_other_sys, and
		 * best_port_other_node just like before but through the different
		 * ordering.
		 */

		least_paths = 0xFFFFFFFF;
		least_paths_other_sys = 0xFFFFFFFF;
		least_paths_other_nodes = 0xFFFFFFFF;
	        least_forwarded_to = 0xFFFFFFFF;
		best_port = 0;
		best_port_other_sys = 0;
		best_port_other_node = 0;

		for (i = 0; i < port_paths_count; i++) {
			unsigned int idx;

			idx = (port_paths_total_paths/port_paths_count + i) % port_paths_count;

			if (routing_for_lmc) {
				if (!port_paths[idx].found_sys_guid
				    && port_paths[idx].path_count < least_paths_other_sys) {
					least_paths_other_sys = port_paths[idx].path_count;
					best_port_other_sys = port_paths[idx].port_num;
					least_forwarded_to = 0;
				}
				else if (!port_paths[idx].found_node_guid
					 && port_paths[idx].path_count < least_paths_other_nodes) {
					least_paths_other_nodes = port_paths[idx].path_count;
					best_port_other_node = port_paths[idx].port_num;
					least_forwarded_to = 0;
				}
			}

			if (port_paths[idx].path_count < least_paths) {
				best_port = port_paths[idx].port_num;
				least_paths = port_paths[idx].path_count;
				if (routing_for_lmc
				    && (port_paths[idx].found_sys_guid
					|| port_paths[idx].found_node_guid)
				    && port_paths[idx].forwarded_to < least_forwarded_to)
					least_forwarded_to = port_paths[idx].forwarded_to;
			}
			else if (routing_for_lmc
				 && (port_paths[idx].found_sys_guid
				     || port_paths[idx].found_node_guid)
				 && port_paths[idx].path_count == least_paths
				 && port_paths[idx].forwarded_to < least_forwarded_to) {
				least_forwarded_to = port_paths[idx].forwarded_to;
				best_port = port_paths[idx].port_num;
			}

		}
	}

	/*
	   if we are in enhanced routing mode and the best port is not
	   the local port 0
	 */
	if (routing_for_lmc && best_port && !scatter_ports) {
		/* Select the least hop port of the non used sys first */
		if (best_port_other_sys)
			best_port = best_port_other_sys;
		else if (best_port_other_node)
			best_port = best_port_other_node;
	} else if (scatter_ports) {
		/*
		 * There is some danger that this random could "rebalance" the routes
		 * every time, to combat this there is a global srandom that
		 * occurs at the start of every sweep.
		 */
		unsigned int idx = random() % scatter_possible_ports_count;
		best_port = scatter_possible_ports[idx];
	}
	return best_port;
}
示例#12
0
static boolean_t
__osm_link_mgr_set_physp_pi(osm_sm_t * sm,
			    IN osm_physp_t * const p_physp,
			    IN uint8_t const port_state)
{
	uint8_t payload[IB_SMP_DATA_SIZE];
	ib_port_info_t *const p_pi = (ib_port_info_t *) payload;
	const ib_port_info_t *p_old_pi;
	osm_madw_context_t context;
	osm_node_t *p_node;
	ib_api_status_t status;
	uint8_t port_num;
	uint8_t mtu;
	uint8_t op_vls;
	boolean_t esp0 = FALSE;
	boolean_t send_set = FALSE;
	osm_physp_t *p_remote_physp;

	OSM_LOG_ENTER(sm->p_log);

	p_node = osm_physp_get_node_ptr(p_physp);

	port_num = osm_physp_get_port_num(p_physp);

	if (port_num == 0) {
		/*
		   CAs don't have a port 0, and for switch port 0,
		   we need to check if this is enhanced or base port 0.
		   For base port 0 the following parameters are not valid (p822, table 145).
		 */
		if (!p_node->sw) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: "
				"Cannot find switch by guid: 0x%" PRIx64 "\n",
				cl_ntoh64(p_node->node_info.node_guid));
			goto Exit;
		}

		if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)
		    == FALSE) {
			/* This means the switch doesn't support enhanced port 0.
			   Can skip it. */
			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
				"Skipping port 0, GUID 0x%016" PRIx64 "\n",
				cl_ntoh64(osm_physp_get_port_guid(p_physp)));
			goto Exit;
		}
		esp0 = TRUE;
	}

	/*
	   PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0
	 */

	p_old_pi = &p_physp->port_info;

	memset(payload, 0, IB_SMP_DATA_SIZE);
	memcpy(payload, p_old_pi, sizeof(ib_port_info_t));

	/*
	   Should never write back a value that is bigger then 3 in
	   the PortPhysicalState field - so can not simply copy!

	   Actually we want to write there:
	   port physical state - no change,
	   link down default state = polling
	   port state - as requested.
	 */
	p_pi->state_info2 = 0x02;
	ib_port_info_set_port_state(p_pi, port_state);

	if (ib_port_info_get_link_down_def_state(p_pi) !=
	    ib_port_info_get_link_down_def_state(p_old_pi))
		send_set = TRUE;

	/* didn't get PortInfo before */
	if (!ib_port_info_get_port_state(p_old_pi))
		send_set = TRUE;

	/* we only change port fields if we do not change state */
	if (port_state == IB_LINK_NO_CHANGE) {
		/* The following fields are relevant only for CA port, router, or Enh. SP0 */
		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH ||
		    port_num == 0) {
			p_pi->m_key = sm->p_subn->opt.m_key;
			if (memcmp(&p_pi->m_key, &p_old_pi->m_key,
				   sizeof(p_pi->m_key)))
				send_set = TRUE;

			p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix;
			if (memcmp(&p_pi->subnet_prefix,
				   &p_old_pi->subnet_prefix,
				   sizeof(p_pi->subnet_prefix)))
				send_set = TRUE;

			p_pi->base_lid = osm_physp_get_base_lid(p_physp);
			if (memcmp(&p_pi->base_lid, &p_old_pi->base_lid,
				   sizeof(p_pi->base_lid)))
				send_set = TRUE;

			/* we are initializing the ports with our local sm_base_lid */
			p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid;
			if (memcmp(&p_pi->master_sm_base_lid,
				   &p_old_pi->master_sm_base_lid,
				   sizeof(p_pi->master_sm_base_lid)))
				send_set = TRUE;

			p_pi->m_key_lease_period =
			    sm->p_subn->opt.m_key_lease_period;
			if (memcmp(&p_pi->m_key_lease_period,
				   &p_old_pi->m_key_lease_period,
				   sizeof(p_pi->m_key_lease_period)))
				send_set = TRUE;

			if (esp0 == FALSE)
				p_pi->mkey_lmc = sm->p_subn->opt.lmc;
			else {
				if (sm->p_subn->opt.lmc_esp0)
					p_pi->mkey_lmc = sm->p_subn->opt.lmc;
				else
					p_pi->mkey_lmc = 0;
			}
			if (memcmp(&p_pi->mkey_lmc, &p_old_pi->mkey_lmc,
				   sizeof(p_pi->mkey_lmc)))
				send_set = TRUE;

			ib_port_info_set_timeout(p_pi,
						 sm->p_subn->opt.
						 subnet_timeout);
			if (ib_port_info_get_timeout(p_pi) !=
			    ib_port_info_get_timeout(p_old_pi))
				send_set = TRUE;
		}

		/*
		   Several timeout mechanisms:
		 */
		p_remote_physp = osm_physp_get_remote(p_physp);
		if (port_num != 0 && p_remote_physp) {
			if (osm_node_get_type(osm_physp_get_node_ptr(p_physp))
			    == IB_NODE_TYPE_ROUTER) {
				ib_port_info_set_hoq_lifetime(p_pi,
							      sm->p_subn->
							      opt.
							      leaf_head_of_queue_lifetime);
			} else
			    if (osm_node_get_type
				(osm_physp_get_node_ptr(p_physp)) ==
				IB_NODE_TYPE_SWITCH) {
				/* Is remote end CA or router (a leaf port) ? */
				if (osm_node_get_type
				    (osm_physp_get_node_ptr(p_remote_physp)) !=
				    IB_NODE_TYPE_SWITCH) {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      leaf_head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									leaf_vl_stall_count);
				} else {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									vl_stall_count);
				}
			}
			if (ib_port_info_get_hoq_lifetime(p_pi) !=
			    ib_port_info_get_hoq_lifetime(p_old_pi) ||
			    ib_port_info_get_vl_stall_count(p_pi) !=
			    ib_port_info_get_vl_stall_count(p_old_pi))
				send_set = TRUE;
		}

		ib_port_info_set_phy_and_overrun_err_thd(p_pi,
							 sm->p_subn->opt.
							 local_phy_errors_threshold,
							 sm->p_subn->opt.
							 overrun_errors_threshold);
		if (memcmp(&p_pi->error_threshold, &p_old_pi->error_threshold,
			   sizeof(p_pi->error_threshold)))
			send_set = TRUE;

		/*
		   Set the easy common parameters for all port types,
		   then determine the neighbor MTU.
		 */
		p_pi->link_width_enabled = p_old_pi->link_width_supported;
		if (memcmp(&p_pi->link_width_enabled,
			   &p_old_pi->link_width_enabled,
			   sizeof(p_pi->link_width_enabled)))
			send_set = TRUE;

		if (sm->p_subn->opt.force_link_speed &&
		    (sm->p_subn->opt.force_link_speed != 15 ||
		     ib_port_info_get_link_speed_enabled(p_pi) !=
		     ib_port_info_get_link_speed_sup(p_pi))) {
			ib_port_info_set_link_speed_enabled(p_pi,
							    sm->p_subn->opt.
							    force_link_speed);
			if (memcmp(&p_pi->link_speed, &p_old_pi->link_speed,
				   sizeof(p_pi->link_speed)))
				send_set = TRUE;
		}

		/* calc new op_vls and mtu */
		op_vls =
		    osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp);
		mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp);

		ib_port_info_set_neighbor_mtu(p_pi, mtu);
		if (ib_port_info_get_neighbor_mtu(p_pi) !=
		    ib_port_info_get_neighbor_mtu(p_old_pi))
			send_set = TRUE;

		ib_port_info_set_op_vls(p_pi, op_vls);
		if (ib_port_info_get_op_vls(p_pi) !=
		    ib_port_info_get_op_vls(p_old_pi))
			send_set = TRUE;

		/* provide the vl_high_limit from the qos mgr */
		if (sm->p_subn->opt.qos &&
		    p_physp->vl_high_limit != p_old_pi->vl_high_limit) {
			send_set = TRUE;
			p_pi->vl_high_limit = p_physp->vl_high_limit;
		}
	}

	if (port_state != IB_LINK_NO_CHANGE &&
	    port_state != ib_port_info_get_port_state(p_old_pi)) {
		send_set = TRUE;
		if (port_state == IB_LINK_ACTIVE)
			context.pi_context.active_transition = TRUE;
		else
			context.pi_context.active_transition = FALSE;
	}

	context.pi_context.node_guid = osm_node_get_node_guid(p_node);
	context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
	context.pi_context.set_method = TRUE;
	context.pi_context.light_sweep = FALSE;

	/* We need to send the PortInfoSet request with the new sm_lid
	   in the following cases:
	   1. There is a change in the values (send_set == TRUE)
	   2. This is a switch external port (so it wasn't handled yet by
	   osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE,
	   which means the SM just became master, and it then needs to send at
	   PortInfoSet to every port.
	 */
	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num
	    && sm->p_subn->first_time_master_sweep == TRUE)
		send_set = TRUE;

	if (send_set)
		status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp),
				     payload, sizeof(payload),
				     IB_MAD_ATTR_PORT_INFO,
				     cl_hton32(port_num),
				     CL_DISP_MSGID_NONE, &context);

Exit:
	OSM_LOG_EXIT(sm->p_log);
	return send_set;
}
示例#13
0
static int link_mgr_set_physp_pi(osm_sm_t * sm, IN osm_physp_t * p_physp,
				 IN uint8_t port_state)
{
	uint8_t payload[IB_SMP_DATA_SIZE], payload2[IB_SMP_DATA_SIZE];
	ib_port_info_t *p_pi = (ib_port_info_t *) payload;
	ib_mlnx_ext_port_info_t *p_epi = (ib_mlnx_ext_port_info_t *) payload2;
	const ib_port_info_t *p_old_pi;
	const ib_mlnx_ext_port_info_t *p_old_epi;
	osm_madw_context_t context;
	osm_node_t *p_node;
	ib_api_status_t status;
	uint8_t port_num, mtu, op_vls, smsl = OSM_DEFAULT_SL;
	boolean_t esp0 = FALSE, send_set = FALSE, send_set2 = FALSE;
	osm_physp_t *p_remote_physp, *physp0;
	int qdr_change = 0, fdr10_change = 0;
	int ret = 0;
	ib_net32_t attr_mod, cap_mask;

	OSM_LOG_ENTER(sm->p_log);

	p_node = osm_physp_get_node_ptr(p_physp);

	p_old_pi = &p_physp->port_info;

	port_num = osm_physp_get_port_num(p_physp);

	if (port_num == 0) {
		/*
		   CAs don't have a port 0, and for switch port 0,
		   we need to check if this is enhanced or base port 0.
		   For base port 0 the following parameters are not valid
		   (IBA 1.2.1 p.830 table 146).
		 */
		if (!p_node->sw) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: "
				"Cannot find switch by guid: 0x%" PRIx64 "\n",
				cl_ntoh64(p_node->node_info.node_guid));
			goto Exit;
		}

		if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)
		    == FALSE) {

			/* Even for base port 0 we might have to set smsl
			   (if we are using lash routing) */
			smsl = link_mgr_get_smsl(sm, p_physp);
			if (smsl != ib_port_info_get_master_smsl(p_old_pi)) {
				send_set = TRUE;
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Setting SMSL to %d on port 0 GUID 0x%016"
					PRIx64 "\n", smsl,
					cl_ntoh64(osm_physp_get_port_guid
						  (p_physp)));
			} else {
				/* This means the switch doesn't support
				   enhanced port 0 and we don't need to
				   change SMSL. Can skip it. */
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Skipping port 0, GUID 0x%016" PRIx64
					"\n",
					cl_ntoh64(osm_physp_get_port_guid
						  (p_physp)));
				goto Exit;
			}
		} else
			esp0 = TRUE;
	}

	memcpy(payload, p_old_pi, sizeof(ib_port_info_t));

	/*
	   Should never write back a value that is bigger then 3 in
	   the PortPhysicalState field - so can not simply copy!

	   Actually we want to write there:
	   port physical state - no change,
	   link down default state = polling
	   port state - as requested.
	 */
	p_pi->state_info2 = 0x02;
	ib_port_info_set_port_state(p_pi, port_state);

	/* Check whether this is base port0 smsl handling only */
	if (port_num == 0 && esp0 == FALSE) {
		ib_port_info_set_master_smsl(p_pi, smsl);
		goto Send;
	}

	/*
	   PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0
	 */

	if (ib_port_info_get_link_down_def_state(p_pi) !=
	    ib_port_info_get_link_down_def_state(p_old_pi))
		send_set = TRUE;

	/* didn't get PortInfo before */
	if (!ib_port_info_get_port_state(p_old_pi))
		send_set = TRUE;

	/* we only change port fields if we do not change state */
	if (port_state == IB_LINK_NO_CHANGE) {
		/* The following fields are relevant only for CA port, router, or Enh. SP0 */
		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH ||
		    port_num == 0) {
			p_pi->m_key = sm->p_subn->opt.m_key;
			if (memcmp(&p_pi->m_key, &p_old_pi->m_key,
				   sizeof(p_pi->m_key)))
				send_set = TRUE;

			p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix;
			if (memcmp(&p_pi->subnet_prefix,
				   &p_old_pi->subnet_prefix,
				   sizeof(p_pi->subnet_prefix)))
				send_set = TRUE;

			p_pi->base_lid = osm_physp_get_base_lid(p_physp);
			if (memcmp(&p_pi->base_lid, &p_old_pi->base_lid,
				   sizeof(p_pi->base_lid)))
				send_set = TRUE;

			/* we are initializing the ports with our local sm_base_lid */
			p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid;
			if (memcmp(&p_pi->master_sm_base_lid,
				   &p_old_pi->master_sm_base_lid,
				   sizeof(p_pi->master_sm_base_lid)))
				send_set = TRUE;

			smsl = link_mgr_get_smsl(sm, p_physp);
			if (smsl != ib_port_info_get_master_smsl(p_old_pi)) {

				ib_port_info_set_master_smsl(p_pi, smsl);

				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Setting SMSL to %d on GUID 0x%016"
					PRIx64 ", port %d\n", smsl,
					cl_ntoh64(osm_physp_get_port_guid
						  (p_physp)), port_num);

				send_set = TRUE;
			}

			p_pi->m_key_lease_period =
			    sm->p_subn->opt.m_key_lease_period;
			if (memcmp(&p_pi->m_key_lease_period,
				   &p_old_pi->m_key_lease_period,
				   sizeof(p_pi->m_key_lease_period)))
				send_set = TRUE;

			/* M_KeyProtectBits are currently always zero */
			p_pi->mkey_lmc = 0;
			if (esp0 == FALSE || sm->p_subn->opt.lmc_esp0)
				ib_port_info_set_lmc(p_pi, sm->p_subn->opt.lmc);
			if (ib_port_info_get_lmc(p_old_pi) !=
			    ib_port_info_get_lmc(p_pi) ||
			    ib_port_info_get_mpb(p_old_pi) !=
			    ib_port_info_get_mpb(p_pi))
				send_set = TRUE;

			ib_port_info_set_timeout(p_pi,
						 sm->p_subn->opt.
						 subnet_timeout);
			if (ib_port_info_get_timeout(p_pi) !=
			    ib_port_info_get_timeout(p_old_pi))
				send_set = TRUE;
		}

		/*
		   Several timeout mechanisms:
		 */
		p_remote_physp = osm_physp_get_remote(p_physp);
		if (port_num != 0 && p_remote_physp) {
			if (osm_node_get_type(osm_physp_get_node_ptr(p_physp))
			    == IB_NODE_TYPE_ROUTER) {
				ib_port_info_set_hoq_lifetime(p_pi,
							      sm->p_subn->
							      opt.
							      leaf_head_of_queue_lifetime);
			} else
			    if (osm_node_get_type
				(osm_physp_get_node_ptr(p_physp)) ==
				IB_NODE_TYPE_SWITCH) {
				/* Is remote end CA or router (a leaf port) ? */
				if (osm_node_get_type
				    (osm_physp_get_node_ptr(p_remote_physp)) !=
				    IB_NODE_TYPE_SWITCH) {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      leaf_head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									leaf_vl_stall_count);
				} else {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									vl_stall_count);
				}
			}
			if (ib_port_info_get_hoq_lifetime(p_pi) !=
			    ib_port_info_get_hoq_lifetime(p_old_pi) ||
			    ib_port_info_get_vl_stall_count(p_pi) !=
			    ib_port_info_get_vl_stall_count(p_old_pi))
				send_set = TRUE;
		}

		ib_port_info_set_phy_and_overrun_err_thd(p_pi,
							 sm->p_subn->opt.
							 local_phy_errors_threshold,
							 sm->p_subn->opt.
							 overrun_errors_threshold);
		if (memcmp(&p_pi->error_threshold, &p_old_pi->error_threshold,
			   sizeof(p_pi->error_threshold)))
			send_set = TRUE;

		/*
		   Set the easy common parameters for all port types,
		   then determine the neighbor MTU.
		 */
		p_pi->link_width_enabled = p_old_pi->link_width_supported;
		if (memcmp(&p_pi->link_width_enabled,
			   &p_old_pi->link_width_enabled,
			   sizeof(p_pi->link_width_enabled)))
			send_set = TRUE;

		if (sm->p_subn->opt.force_link_speed &&
		    (sm->p_subn->opt.force_link_speed != 15 ||
		     ib_port_info_get_link_speed_enabled(p_pi) !=
		     ib_port_info_get_link_speed_sup(p_pi))) {
			ib_port_info_set_link_speed_enabled(p_pi,
							    sm->p_subn->opt.
							    force_link_speed);
			if (memcmp(&p_pi->link_speed, &p_old_pi->link_speed,
				   sizeof(p_pi->link_speed))) {
				send_set = TRUE;
				/* Determine whether QDR in LSE is being changed */
				if ((ib_port_info_get_link_speed_enabled(p_pi) &
				     IB_LINK_SPEED_ACTIVE_10 &&
				     !(ib_port_info_get_link_speed_enabled(p_old_pi) &
				      IB_LINK_SPEED_ACTIVE_10)) ||
				    ((!(ib_port_info_get_link_speed_enabled(p_pi) &
				       IB_LINK_SPEED_ACTIVE_10) &&
				      ib_port_info_get_link_speed_enabled(p_old_pi) &
				      IB_LINK_SPEED_ACTIVE_10)))
				qdr_change = 1;
			}
		}

		if (sm->p_subn->opt.fdr10 &&
		    p_physp->ext_port_info.link_speed_supported & FDR10) {
			if (sm->p_subn->opt.fdr10 == 1) { /* enable */
				if (!(p_physp->ext_port_info.link_speed_enabled & FDR10))
					fdr10_change = 1;
			} else {	/* disable */
				if (p_physp->ext_port_info.link_speed_enabled & FDR10)
					fdr10_change = 1;
			}
			if (fdr10_change) {
				p_old_epi = &p_physp->ext_port_info;
				memcpy(payload2, p_old_epi,
				       sizeof(ib_mlnx_ext_port_info_t));
				p_epi->state_change_enable = 0x01;
				if (sm->p_subn->opt.fdr10 == 1)
					p_epi->link_speed_enabled = FDR10;
				else
					p_epi->link_speed_enabled = 0;
				send_set2 = TRUE;
			}
		}

		if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH) {
			physp0 = osm_node_get_physp_ptr(p_physp->p_node, 0);
			cap_mask = physp0->port_info.capability_mask;
		} else
			cap_mask = p_pi->capability_mask;
		if (!(cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS))
			qdr_change = 0;

		/* Do peer ports support extended link speeds ? */
		if (port_num != 0 && p_remote_physp) {
			osm_physp_t *rphysp0;
			ib_net32_t rem_cap_mask;

			if (osm_node_get_type(p_remote_physp->p_node) ==
			    IB_NODE_TYPE_SWITCH) {
				rphysp0 = osm_node_get_physp_ptr(p_remote_physp->p_node, 0);
				rem_cap_mask = rphysp0->port_info.capability_mask;
			} else
				rem_cap_mask = p_remote_physp->port_info.capability_mask;

			if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS &&
			    rem_cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) {
				if (sm->p_subn->opt.force_link_speed_ext &&
				    (sm->p_subn->opt.force_link_speed_ext != IB_LINK_SPEED_EXT_SET_LSES ||
				     p_pi->link_speed_ext_enabled !=
				     ib_port_info_get_link_speed_sup(p_pi))) {
					p_pi->link_speed_ext_enabled = sm->p_subn->opt.force_link_speed_ext;
					if (memcmp(&p_pi->link_speed_ext_enabled,
						   &p_old_pi->link_speed_ext_enabled,
						   sizeof(p_pi->link_speed_ext_enabled)))
						send_set = TRUE;
				}
			}
		}

		/* calc new op_vls and mtu */
		op_vls =
		    osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp);
		mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp);

		ib_port_info_set_neighbor_mtu(p_pi, mtu);
		if (ib_port_info_get_neighbor_mtu(p_pi) !=
		    ib_port_info_get_neighbor_mtu(p_old_pi))
			send_set = TRUE;

		ib_port_info_set_op_vls(p_pi, op_vls);
		if (ib_port_info_get_op_vls(p_pi) !=
		    ib_port_info_get_op_vls(p_old_pi))
			send_set = TRUE;

		/* provide the vl_high_limit from the qos mgr */
		if (sm->p_subn->opt.qos &&
		    p_physp->vl_high_limit != p_old_pi->vl_high_limit) {
			send_set = TRUE;
			p_pi->vl_high_limit = p_physp->vl_high_limit;
		}
	}

Send:
	if (port_state != IB_LINK_NO_CHANGE &&
	    port_state != ib_port_info_get_port_state(p_old_pi)) {
		send_set = TRUE;
		if (port_state == IB_LINK_ACTIVE)
			context.pi_context.active_transition = TRUE;
		else
			context.pi_context.active_transition = FALSE;
	}

	context.pi_context.node_guid = osm_node_get_node_guid(p_node);
	context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
	context.pi_context.set_method = TRUE;
	context.pi_context.light_sweep = FALSE;

	/* We need to send the PortInfoSet request with the new sm_lid
	   in the following cases:
	   1. There is a change in the values (send_set == TRUE)
	   2. This is a switch external port (so it wasn't handled yet by
	   osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE,
	   which means the SM just became master, and it then needs to send at
	   PortInfoSet to every port.
	 */
	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num
	    && sm->p_subn->first_time_master_sweep == TRUE)
		send_set = TRUE;

	if (!send_set)
		goto Exit;

	attr_mod = cl_hton32(port_num);
	if (qdr_change)
		attr_mod |= cl_hton32(1 << 31);	/* AM SMSupportExtendedSpeeds */
	status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp),
			     payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
			     attr_mod, CL_DISP_MSGID_NONE, &context);
	if (status)
		ret = -1;

	if (send_set2) {
		status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp),
				     payload2, sizeof(payload2),
				     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
				     cl_hton32(port_num),
				     CL_DISP_MSGID_NONE, &context);
		if (status)
			ret = -1;
	}

Exit:
	OSM_LOG_EXIT(sm->p_log);
	return ret;
}
示例#14
0
uint8_t
osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
			  IN osm_port_t * p_port,
			  IN const uint16_t lid_ho,
			  IN unsigned start_from,
			  IN const boolean_t ignore_existing,
			  IN const boolean_t dor)
{
	/*
	   We support an enhanced LMC aware routing mode:
	   In the case of LMC > 0, we can track the remote side
	   system and node for all of the lids of the target
	   and try and avoid routing again through the same
	   system / node.

	   If this procedure is provided with the tracking array
	   and counter we can conduct this algorithm.
	 */
	boolean_t routing_for_lmc = (p_port->priv != NULL);
	uint16_t base_lid;
	uint8_t hops;
	uint8_t least_hops;
	uint8_t port_num;
	uint8_t num_ports;
	uint32_t least_paths = 0xFFFFFFFF;
	unsigned i;
	/*
	   The follwing will track the least paths if the
	   route should go through a new system/node
	 */
	uint32_t least_paths_other_sys = 0xFFFFFFFF;
	uint32_t least_paths_other_nodes = 0xFFFFFFFF;
	uint32_t least_forwarded_to = 0xFFFFFFFF;
	uint32_t check_count;
	uint8_t best_port = 0;
	/*
	   These vars track the best port if it connects to
	   not used system/node.
	 */
	uint8_t best_port_other_sys = 0;
	uint8_t best_port_other_node = 0;
	boolean_t port_found = FALSE;
	osm_physp_t *p_physp;
	osm_physp_t *p_rem_physp;
	osm_node_t *p_rem_node;
	osm_node_t *p_rem_node_first = NULL;
	struct osm_remote_node *p_remote_guid = NULL;

	CL_ASSERT(lid_ho > 0);

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		base_lid = osm_port_get_base_lid(p_port);
	} else {
		p_physp = p_port->p_physp;
		if (!p_physp || !p_physp->p_remote_physp ||
		    !p_physp->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;

		if (p_physp->p_remote_physp->p_node->sw == p_sw)
			return p_physp->p_remote_physp->port_num;
		base_lid =
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
	}
	base_lid = cl_ntoh16(base_lid);

	num_ports = p_sw->num_ports;

	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
	if (least_hops == OSM_NO_PATH)
		return (OSM_NO_PATH);

	/*
	   First, inquire with the forwarding table for an existing
	   route.  If one is found, honor it unless:
	   1. the ignore existing flag is set.
	   2. the physical port is not a valid one or not healthy
	   3. the physical port has a remote port (the link is up)
	   4. the port has min-hops to the target (avoid loops)
	 */
	if (!ignore_existing) {
		port_num = osm_switch_get_port_by_lid(p_sw, lid_ho);

		if (port_num != OSM_NO_PATH) {
			CL_ASSERT(port_num < num_ports);

			p_physp =
			    osm_node_get_physp_ptr(p_sw->p_node, port_num);
			/*
			   Don't be too trusting of the current forwarding table!
			   Verify that the port number is legal and that the
			   LID is reachable through this port.
			 */
			if (p_physp && osm_physp_is_healthy(p_physp) &&
			    osm_physp_get_remote(p_physp)) {
				hops =
				    osm_switch_get_hop_count(p_sw, base_lid,
							     port_num);
				/*
				   If we aren't using pre-defined user routes
				   function, then we need to make sure that the
				   current path is the minimum one. In case of
				   having such a user function - this check will
				   not be done, and the old routing will be used.
				   Note: This means that it is the user's job to
				   clean all data in the forwarding tables that
				   he wants to be overridden by the minimum
				   hop function.
				 */
				if (hops == least_hops)
					return (port_num);
			}
		}
	}

	/*
	   This algorithm selects a port based on a static load balanced
	   selection across equal hop-count ports.
	   There is lots of room for improved sophistication here,
	   possibly guided by user configuration info.
	 */

	/*
	   OpenSM routing is "local" - not considering a full lid to lid
	   path. As such we can not guarantee a path will not loop if we
	   do not always follow least hops.
	   So we must abort if not least hops.
	 */

	/* port number starts with one and num_ports is 1 + num phys ports */
	for (i = start_from; i < start_from + num_ports; i++) {
		port_num = i%num_ports;
		if (!port_num ||
		    osm_switch_get_hop_count(p_sw, base_lid, port_num) !=
		    least_hops)
			continue;

		/* let us make sure it is not down or unhealthy */
		p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
		if (!p_physp || !osm_physp_is_healthy(p_physp) ||
		    /*
		       we require all - non sma ports to be linked
		       to be routed through
		     */
		    !osm_physp_get_remote(p_physp))
			continue;

		/*
		   We located a least-hop port, possibly one of many.
		   For this port, check the running total count of
		   the number of paths through this port.  Select
		   the port routing the least number of paths.
		 */
		check_count =
		    osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);

		/*
		   Advanced LMC routing requires tracking of the
		   best port by the node connected to the other side of
		   it.
		 */
		if (routing_for_lmc) {
			/* Is the sys guid already used ? */
			p_remote_guid = osm_switch_find_sys_guid_count(p_sw,
								       p_port->priv,
								       port_num);

			/* If not update the least hops for this case */
			if (!p_remote_guid) {
				if (check_count < least_paths_other_sys) {
					least_paths_other_sys = check_count;
					best_port_other_sys = port_num;
					least_forwarded_to = 0;
				}
			} else {	/* same sys found - try node */
				/* Else is the node guid already used ? */
				p_remote_guid = osm_switch_find_node_guid_count(p_sw,
										p_port->priv,
										port_num);

				/* If not update the least hops for this case */
				if (!p_remote_guid
				    && check_count < least_paths_other_nodes) {
					least_paths_other_nodes = check_count;
					best_port_other_node = port_num;
					least_forwarded_to = 0;
				}
				/* else prior sys and node guid already used */

			}	/* same sys found */
		}

		/* routing for LMC mode */
		/*
		   the count is min but also lower then the max subscribed
		 */
		if (check_count < least_paths) {
			if (dor) {
				/* Get the Remote Node */
				p_rem_physp = osm_physp_get_remote(p_physp);
				p_rem_node =
				    osm_physp_get_node_ptr(p_rem_physp);
				/* use the first dimension, but spread
				 * traffic out among the group of ports
				 * representing that dimension */
				if (port_found) {
					if (p_rem_node != p_rem_node_first)
						continue;
				} else
					p_rem_node_first = p_rem_node;
			}
			port_found = TRUE;
			best_port = port_num;
			least_paths = check_count;
			if (routing_for_lmc
			    && p_remote_guid
			    && p_remote_guid->forwarded_to < least_forwarded_to)
				least_forwarded_to = p_remote_guid->forwarded_to;
		} else if (routing_for_lmc
			   && p_remote_guid
			   && check_count == least_paths
			   && p_remote_guid->forwarded_to < least_forwarded_to) {
			least_forwarded_to = p_remote_guid->forwarded_to;
			best_port = port_num;
		}
	}

	if (port_found == FALSE)
		return (OSM_NO_PATH);

	/*
	   if we are in enhanced routing mode and the best port is not
	   the local port 0
	 */
	if (routing_for_lmc && best_port) {
		/* Select the least hop port of the non used sys first */
		if (best_port_other_sys)
			best_port = best_port_other_sys;
		else if (best_port_other_node)
			best_port = best_port_other_node;
	}

	return (best_port);
}
/**********************************************************************
 The plock must be held before calling this function.
**********************************************************************/
static void pi_rcv_process_switch_ext_port(IN osm_sm_t * sm,
					   IN osm_node_t * p_node,
					   IN osm_physp_t * p_physp,
					   IN ib_port_info_t * p_pi)
{
	ib_api_status_t status = IB_SUCCESS;
	osm_madw_context_t context;
	osm_physp_t *p_remote_physp, *physp0;
	osm_node_t *p_remote_node;
	ib_net64_t m_key;
	unsigned data_vls;
	uint8_t port_num;
	uint8_t remote_port_num;
	osm_dr_path_t path;
	int mlnx_epi_supported = 0;

	OSM_LOG_ENTER(sm->p_log);

	/*
	   Check the state of the physical port.
	   If there appears to be something on the other end of the wire,
	   then ask for NodeInfo.  Ignore the switch management port.
	 */
	port_num = osm_physp_get_port_num(p_physp);

	if (sm->p_subn->opt.fdr10)
		mlnx_epi_supported = is_mlnx_ext_port_info_supported(
						ib_node_info_get_vendor_id(&p_node->node_info),
						p_node->node_info.device_id);

	/* if in_sweep_hop_0 is TRUE, then this means the SM is on the switch,
	   and we got switchInfo of our local switch. Do not continue
	   probing through the switch. */
	switch (ib_port_info_get_port_state(p_pi)) {
	case IB_LINK_DOWN:
		p_remote_physp = osm_physp_get_remote(p_physp);
		if (p_remote_physp) {
			p_remote_node =
			    osm_physp_get_node_ptr(p_remote_physp);
			remote_port_num =
			    osm_physp_get_port_num(p_remote_physp);

			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
				"Unlinking local node 0x%" PRIx64
				", port %u"
				"\n\t\t\t\tand remote node 0x%" PRIx64
				", port %u\n",
				cl_ntoh64(osm_node_get_node_guid
					  (p_node)), port_num,
				cl_ntoh64(osm_node_get_node_guid
					  (p_remote_node)),
				remote_port_num);

			if (sm->ucast_mgr.cache_valid)
				osm_ucast_cache_add_link(&sm->ucast_mgr,
							 p_physp,
							 p_remote_physp);

			osm_node_unlink(p_node, (uint8_t) port_num,
					p_remote_node,
					(uint8_t) remote_port_num);

		}
		break;

	case IB_LINK_INIT:
	case IB_LINK_ARMED:
	case IB_LINK_ACTIVE:
		physp0 = osm_node_get_physp_ptr(p_node, 0);
		if (mlnx_epi_supported) {
			m_key = ib_port_info_get_m_key(&physp0->port_info);

			context.pi_context.node_guid = osm_node_get_node_guid(p_node);
			context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
			context.pi_context.set_method = FALSE;
			context.pi_context.light_sweep = FALSE;
			context.pi_context.active_transition = FALSE;
			context.pi_context.client_rereg = FALSE;
			status = osm_req_get(sm,
					     osm_physp_get_dr_path_ptr(p_physp),
					     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
					     cl_hton32(port_num), FALSE, m_key,
					     CL_DISP_MSGID_NONE, &context);
			if (status != IB_SUCCESS)
				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F11: "
					"Failure initiating MLNX ExtPortInfo request (%s)\n",
					ib_get_err_str(status));
		}
		if (sm->p_subn->in_sweep_hop_0 == FALSE) {
			/*
			   To avoid looping forever, only probe the port if it
			   is NOT the port that responded to the SMP.

			   Request node info from the other end of this link:
			   1) Copy the current path from the parent node.
			   2) Extend the path to the next hop thru this port.
			   3) Request node info with the new path

			 */
			if (p_pi->local_port_num !=
			    osm_physp_get_port_num(p_physp)) {
				path = *osm_physp_get_dr_path_ptr(p_physp);

				if (osm_dr_path_extend(&path,
						       osm_physp_get_port_num
						       (p_physp))) {
					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
						"ERR 0F08: "
						"DR path with hop count %d couldn't be extended\n",
						path.hop_count);
					break;
				}

				memset(&context, 0, sizeof(context));
				context.ni_context.node_guid =
				    osm_node_get_node_guid(p_node);
				context.ni_context.port_num =
				    osm_physp_get_port_num(p_physp);

				status = osm_req_get(sm, &path,
						     IB_MAD_ATTR_NODE_INFO, 0,
						     TRUE, 0,
						     CL_DISP_MSGID_NONE,
						     &context);

				if (status != IB_SUCCESS)
					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
						"ERR 0F02: "
						"Failure initiating NodeInfo request (%s)\n",
						ib_get_err_str(status));
			} else
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Skipping SMP responder port %u\n",
					p_pi->local_port_num);
		}
		break;

	default:
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F03: "
			"Unknown link state = %u, port = %u\n",
			ib_port_info_get_port_state(p_pi),
			p_pi->local_port_num);
		break;
	}

	if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw &&
	    !ib_switch_info_get_state_change(&p_node->sw->switch_info) &&
	    p_node->sw->need_update == 1)
		p_node->sw->need_update = 0;

	if (p_physp->need_update)
		sm->p_subn->ignore_existing_lfts = TRUE;

	/*
	   Update the PortInfo attribute.
	 */
	osm_physp_set_port_info(p_physp, p_pi, sm);

	if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
		goto Exit;

	p_remote_physp = osm_physp_get_remote(p_physp);
	if (p_remote_physp) {
		p_remote_node = osm_physp_get_node_ptr(p_remote_physp);
		if (p_remote_node->sw) {
			data_vls = 1U << (ib_port_info_get_vl_cap(p_pi) - 1);
			if (data_vls > 1U << (sm->p_subn->opt.max_op_vls - 1))
				data_vls = 1U << (sm->p_subn->opt.max_op_vls - 1);
			if (data_vls >= IB_MAX_NUM_VLS)
				data_vls = IB_MAX_NUM_VLS - 1;
			if ((uint8_t)data_vls < sm->p_subn->min_sw_data_vls) {
				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
					"Setting switch port minimal data VLs "
					"to:%u defined by node:0x%"
					PRIx64 ", port:%u\n", data_vls,
					cl_ntoh64(osm_node_get_node_guid(p_node)),
					port_num);
				sm->p_subn->min_sw_data_vls = data_vls;
			}
		}
	}

Exit:
	OSM_LOG_EXIT(sm->p_log);
}
示例#16
0
/**********************************************************************
 The plock must be held before calling this function.
**********************************************************************/
static void
__osm_pi_rcv_process_switch_port(IN osm_sm_t * sm,
				 IN osm_node_t * const p_node,
				 IN osm_physp_t * const p_physp,
				 IN ib_port_info_t * const p_pi)
{
	ib_api_status_t status = IB_SUCCESS;
	osm_madw_context_t context;
	osm_physp_t *p_remote_physp;
	osm_node_t *p_remote_node;
	uint8_t port_num;
	uint8_t remote_port_num;
	osm_dr_path_t path;

	OSM_LOG_ENTER(sm->p_log);

	/*
	   Check the state of the physical port.
	   If there appears to be something on the other end of the wire,
	   then ask for NodeInfo.  Ignore the switch management port.
	 */
	port_num = osm_physp_get_port_num(p_physp);
	/* if in_sweep_hop_0 is TRUE, then this means the SM is on the switch,
	   and we got switchInfo of our local switch. Do not continue
	   probing through the switch. */
	if (port_num != 0 && sm->p_subn->in_sweep_hop_0 == FALSE) {
		switch (ib_port_info_get_port_state(p_pi)) {
		case IB_LINK_DOWN:
			p_remote_physp = osm_physp_get_remote(p_physp);
			if (p_remote_physp) {
				p_remote_node =
				    osm_physp_get_node_ptr(p_remote_physp);
				remote_port_num =
				    osm_physp_get_port_num(p_remote_physp);

				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
					"Unlinking local node 0x%" PRIx64
					", port %u"
					"\n\t\t\t\tand remote node 0x%" PRIx64
					", port %u\n",
					cl_ntoh64(osm_node_get_node_guid
						  (p_node)), port_num,
					cl_ntoh64(osm_node_get_node_guid
						  (p_remote_node)),
					remote_port_num);

				if (sm->ucast_mgr.cache_valid)
					osm_ucast_cache_add_link(&sm->ucast_mgr,
								 p_physp,
								 p_remote_physp);

				osm_node_unlink(p_node, (uint8_t) port_num,
						p_remote_node,
						(uint8_t) remote_port_num);

			}
			break;

		case IB_LINK_INIT:
		case IB_LINK_ARMED:
		case IB_LINK_ACTIVE:
			/*
			   To avoid looping forever, only probe the port if it
			   is NOT the port that responded to the SMP.

			   Request node info from the other end of this link:
			   1) Copy the current path from the parent node.
			   2) Extend the path to the next hop thru this port.
			   3) Request node info with the new path

			 */
			if (p_pi->local_port_num !=
			    osm_physp_get_port_num(p_physp)) {
				path = *osm_physp_get_dr_path_ptr(p_physp);

				osm_dr_path_extend(&path,
						   osm_physp_get_port_num
						   (p_physp));

				memset(&context, 0, sizeof(context));
				context.ni_context.node_guid =
				    osm_node_get_node_guid(p_node);
				context.ni_context.port_num =
				    osm_physp_get_port_num(p_physp);

				status = osm_req_get(sm,
						     &path,
						     IB_MAD_ATTR_NODE_INFO,
						     0,
						     CL_DISP_MSGID_NONE,
						     &context);

				if (status != IB_SUCCESS)
					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
						"ERR 0F02: "
						"Failure initiating NodeInfo request (%s)\n",
						ib_get_err_str(status));
			} else
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Skipping SMP responder port %u\n",
					p_pi->local_port_num);
			break;

		default:
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F03: "
				"Unknown link state = %u, port = %u\n",
				ib_port_info_get_port_state(p_pi),
				p_pi->local_port_num);
			break;
		}
	}

	if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw &&
	    p_node->sw->need_update == 1)
		p_node->sw->need_update = 0;

	if (p_physp->need_update)
		sm->p_subn->ignore_existing_lfts = TRUE;

	if (port_num == 0)
		pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp);

	/*
	   Update the PortInfo attribute.
	 */
	osm_physp_set_port_info(p_physp, p_pi);

	if (port_num == 0) {
		/* Determine if base switch port 0 */
		if (p_node->sw &&
		    !ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info))
			/* PortState is not used on BSP0 but just in case it is DOWN */
			p_physp->port_info = *p_pi;
		__osm_pi_rcv_process_endport(sm, p_physp, p_pi);
	}

	OSM_LOG_EXIT(sm->p_log);
}