Exemple #1
0
/** ===========================================================================
 */
static void extract_port(osm_physp_t *p_physp, uint16_t *p_lid_ho,
			 uint64_t pkey_base_offset, uint16_t pkey_tbl_size,
			 uint64_t *p_port_offset,
			 struct ssa_db_extract *p_ssa_db)
{
	struct ep_map_rec *p_map_rec;
	uint64_t rec_key;
	uint16_t lid_ho;

	if (p_lid_ho) {
		/* in case of switch port */
		rec_key = ep_rec_gen_key(*p_lid_ho,
					 osm_physp_get_port_num(p_physp));
		lid_ho = *p_lid_ho;
	} else {
		rec_key = ep_rec_gen_key(ntohs(osm_physp_get_base_lid(p_physp)),
					 osm_physp_get_port_num(p_physp));
		lid_ho = 0;
	}

	smdb_port_init(p_physp, pkey_base_offset, pkey_tbl_size,
			     htons(lid_ho),
			     &p_ssa_db->p_port_tbl[*p_port_offset]);
	p_map_rec = ep_map_rec_init(*p_port_offset);
	cl_qmap_insert(&p_ssa_db->ep_port_tbl, rec_key,
		       &p_map_rec->map_item);
	*p_port_offset = *p_port_offset + 1;
}
Exemple #2
0
/** =========================================================================
 */
void ep_port_tbl_rec_init(osm_physp_t *p_physp, struct ep_port_tbl_rec *p_rec)
{
	const ib_port_info_t *p_pi;
	const osm_physp_t *p_physp0;
	uint8_t is_fdr10_active;
	uint8_t is_switch;

	if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH &&
	    osm_physp_get_port_num(p_physp) > 0) {
		/* for SW external ports, port 0 Capability Mask is used  */
		p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_physp->p_node, 0);
		p_pi = &p_physp0->port_info;
	} else {
		p_pi = &p_physp->port_info;
	}

	is_fdr10_active = ((p_physp->ext_port_info.link_speed_active & FDR10) ? 0xff : 0) &
					  SSA_DB_PORT_IS_FDR10_ACTIVE_MASK;
	is_switch = ((osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH) ? 0xff : 0) &
					  SSA_DB_PORT_IS_SWITCH_MASK;

	p_rec->pkey_tbl_offset		= 0;
	p_rec->pkey_tbl_size		= 0;
	p_rec->port_lid			= osm_physp_get_base_lid(p_physp);
	p_rec->port_num			= osm_physp_get_port_num(p_physp);
	p_rec->neighbor_mtu		= ib_port_info_get_neighbor_mtu(&p_physp->port_info);
	p_rec->rate			= ib_port_info_compute_rate(&p_physp->port_info,
								    p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) &
					  SSA_DB_PORT_RATE_MASK;
	p_rec->vl_enforce		= p_physp->port_info.vl_enforce;
	p_rec->rate			= (uint8_t) (p_rec->rate | is_fdr10_active | is_switch);
}
static void sa_pkey_create(IN osm_sa_t * sa, IN osm_physp_t * p_physp,
			   IN osm_pkey_search_ctxt_t * p_ctxt,
			   IN uint16_t block)
{
	osm_pkey_item_t *p_rec_item;
	uint16_t lid;
	ib_pkey_table_t *tbl;

	OSM_LOG_ENTER(sa->p_log);

	p_rec_item = malloc(sizeof(*p_rec_item));
	if (p_rec_item == NULL) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4602: "
			"rec_item alloc failed\n");
		goto Exit;
	}

	if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH)
		lid = p_physp->port_info.base_lid;
	else
		lid = osm_node_get_base_lid(p_physp->p_node, 0);

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
		"New P_Key table for: port 0x%016" PRIx64
		", lid %u, port %u Block:%u\n",
		cl_ntoh64(osm_physp_get_port_guid(p_physp)),
		cl_ntoh16(lid), osm_physp_get_port_num(p_physp), block);

	memset(p_rec_item, 0, sizeof(*p_rec_item));

	p_rec_item->rec.lid = lid;
	p_rec_item->rec.block_num = block;
	p_rec_item->rec.port_num = osm_physp_get_port_num(p_physp);
	/* FIXME: There are ninf.PartitionCap or swinf.PartitionEnforcementCap
	   pkey entries so everything in that range is a valid block number
	   even if opensm is not using it. Return 0. However things outside
	   that range should return no entries.. Not sure how to figure that
	   here? The range of pkey_tbl can be less than the cap, so
	   this falsely triggers. */
	tbl = osm_pkey_tbl_block_get(osm_physp_get_pkey_tbl(p_physp), block);
	if (tbl)
		p_rec_item->rec.pkey_tbl = *tbl;

	cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item);

Exit:
	OSM_LOG_EXIT(sa->p_log);
}
static void osm_lash_process_switch(lash_t * p_lash, osm_switch_t * p_sw)
{
	osm_log_t *p_log = &p_lash->p_osm->log;
	int i, port_count;
	osm_physp_t *p_current_physp, *p_remote_physp;
	unsigned switch_a_lash_id, switch_b_lash_id;

	OSM_LOG_ENTER(p_log);

	switch_a_lash_id = get_lash_id(p_sw);
	port_count = osm_node_get_num_physp(p_sw->p_node);

	/* starting at port 1, ignoring management port on switch */
	for (i = 1; i < port_count; i++) {

		p_current_physp = osm_node_get_physp_ptr(p_sw->p_node, i);
		if (p_current_physp) {
			p_remote_physp = p_current_physp->p_remote_physp;
			if (p_remote_physp && p_remote_physp->p_node->sw) {
				int physical_port_a_num =
				    osm_physp_get_port_num(p_current_physp);
				int physical_port_b_num =
				    osm_physp_get_port_num(p_remote_physp);
				switch_b_lash_id =
				    get_lash_id(p_remote_physp->p_node->sw);

				connect_switches(p_lash, switch_a_lash_id,
						 switch_b_lash_id,
						 physical_port_a_num);
				OSM_LOG(p_log, OSM_LOG_VERBOSE,
					"LASH SUCCESS connected G 0x%016" PRIx64
					" , lash_id(%u), P(%u) " " to G 0x%016"
					PRIx64 " , lash_id(%u) , P(%u)\n",
					cl_ntoh64(osm_physp_get_port_guid
						  (p_current_physp)),
					switch_a_lash_id, physical_port_a_num,
					cl_ntoh64(osm_physp_get_port_guid
						  (p_remote_physp)),
					switch_b_lash_id, physical_port_b_num);
			}
		}
	}

	OSM_LOG_EXIT(p_log);
}
static void
__osm_sa_pkey_create(IN osm_sa_t * sa,
		     IN osm_physp_t * const p_physp,
		     IN osm_pkey_search_ctxt_t * const p_ctxt,
		     IN uint16_t block)
{
	osm_pkey_item_t *p_rec_item;
	uint16_t lid;
	ib_api_status_t status = IB_SUCCESS;

	OSM_LOG_ENTER(sa->p_log);

	p_rec_item = malloc(sizeof(*p_rec_item));
	if (p_rec_item == NULL) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4602: "
			"rec_item alloc failed\n");
		status = IB_INSUFFICIENT_RESOURCES;
		goto Exit;
	}

	if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH)
		lid = p_physp->port_info.base_lid;
	else
		lid = osm_node_get_base_lid(p_physp->p_node, 0);

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
		"New P_Key table for: port 0x%016" PRIx64
		", lid %u, port %u Block:%u\n",
		cl_ntoh64(osm_physp_get_port_guid(p_physp)),
		cl_ntoh16(lid), osm_physp_get_port_num(p_physp), block);

	memset(p_rec_item, 0, sizeof(*p_rec_item));

	p_rec_item->rec.lid = lid;
	p_rec_item->rec.block_num = block;
	p_rec_item->rec.port_num = osm_physp_get_port_num(p_physp);
	p_rec_item->rec.pkey_tbl =
	    *(osm_pkey_tbl_block_get(osm_physp_get_pkey_tbl(p_physp), block));

	cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item);

Exit:
	OSM_LOG_EXIT(sa->p_log);
}
Exemple #6
0
static void
__osm_sa_slvl_create(IN osm_sa_t * sa,
		     IN const osm_physp_t * const p_physp,
		     IN osm_slvl_search_ctxt_t * const p_ctxt,
		     IN uint8_t in_port_idx)
{
	osm_slvl_item_t *p_rec_item;
	uint16_t lid;
	ib_api_status_t status = IB_SUCCESS;

	OSM_LOG_ENTER(sa->p_log);

	p_rec_item = malloc(sizeof(*p_rec_item));
	if (p_rec_item == NULL) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2602: "
			"rec_item alloc failed\n");
		status = IB_INSUFFICIENT_RESOURCES;
		goto Exit;
	}

	if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH)
		lid = p_physp->port_info.base_lid;
	else
		lid = osm_node_get_base_lid(p_physp->p_node, 0);

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
		"New SLtoVL Map for: OUT port 0x%016" PRIx64
		", lid 0x%X, port %u to In Port:%u\n",
		cl_ntoh64(osm_physp_get_port_guid(p_physp)),
		cl_ntoh16(lid), osm_physp_get_port_num(p_physp), in_port_idx);

	memset(p_rec_item, 0, sizeof(*p_rec_item));

	p_rec_item->rec.lid = lid;
	p_rec_item->rec.out_port_num = osm_physp_get_port_num(p_physp);
	p_rec_item->rec.in_port_num = in_port_idx;
	p_rec_item->rec.slvl_tbl =
	    *(osm_physp_get_slvl_tbl(p_physp, in_port_idx));

	cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item);

Exit:
	OSM_LOG_EXIT(sa->p_log);
}
Exemple #7
0
/** ===========================================================================
 */
static void extract_link(osm_physp_t *p_physp, uint16_t *p_lid_ho,
			      uint64_t *p_link_offset,
			      struct ssa_db_extract *p_ssa_db)
{
	struct ep_map_rec *p_map_rec;
	uint64_t rec_key;

	if (p_lid_ho)
		rec_key = ep_rec_gen_key(*p_lid_ho,
					 osm_physp_get_port_num(p_physp));
	else
		rec_key = ep_rec_gen_key(ntohs(osm_physp_get_base_lid(p_physp)),
					 osm_physp_get_port_num(p_physp));

	smdb_link_init(p_physp, &p_ssa_db->p_link_tbl[*p_link_offset]);
	p_map_rec = ep_map_rec_init(*p_link_offset);
	cl_qmap_insert(&p_ssa_db->ep_link_tbl, rec_key, &p_map_rec->map_item);
	*p_link_offset = *p_link_offset + 1;
}
Exemple #8
0
/** =========================================================================
 */
void ep_link_tbl_rec_init(osm_physp_t *p_physp, struct ep_link_tbl_rec *p_rec)
{
	osm_physp_t *p_remote_physp;

	if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH) {
		p_rec->from_lid = osm_node_get_base_lid(p_physp->p_node, 0);
		p_rec->from_port_num = osm_physp_get_port_num(p_physp);
	} else {
		p_rec->from_lid = osm_physp_get_base_lid(p_physp);
		p_rec->from_port_num = 0;
	}

	p_remote_physp = osm_physp_get_remote(p_physp);

	if (osm_node_get_type(p_remote_physp->p_node) ==
						IB_NODE_TYPE_SWITCH) {
		p_rec->to_lid = osm_node_get_base_lid(p_remote_physp->p_node, 0);
		p_rec->to_port_num =osm_physp_get_port_num(p_remote_physp);
	} else {
		p_rec->to_lid = osm_physp_get_base_lid(p_remote_physp);
		p_rec->to_port_num = 0;
	}
	memset(&p_rec->pad, 0, sizeof(p_rec->pad));
}
Exemple #9
0
osm_node_t *osm_node_get_remote_node(IN osm_node_t * p_node,
				     IN uint8_t port_num,
				     OUT uint8_t * p_remote_port_num)
{
	osm_physp_t *p_physp;
	osm_physp_t *p_remote_physp;

	p_physp = osm_node_get_physp_ptr(p_node, port_num);

	if (!p_physp || !osm_physp_has_any_link(p_physp))
		return NULL;

	p_remote_physp = osm_physp_get_remote(p_physp);
	if (p_remote_port_num)
		*p_remote_port_num = osm_physp_get_port_num(p_remote_physp);

	return osm_physp_get_node_ptr(p_remote_physp);
}
Exemple #10
0
/**********************************************************************
 Initiate a remote port info request for the given physical port
 **********************************************************************/
static void state_mgr_get_remote_port_info(IN osm_sm_t * sm,
					   IN osm_physp_t * p_physp)
{
	osm_dr_path_t *p_dr_path;
	osm_dr_path_t rem_node_dr_path;
	osm_madw_context_t mad_context;
	ib_api_status_t status;

	OSM_LOG_ENTER(sm->p_log);

	/* generate a dr path leaving on the physp to the remote node */
	p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
	memcpy(&rem_node_dr_path, p_dr_path, sizeof(osm_dr_path_t));
	if (osm_dr_path_extend(&rem_node_dr_path, osm_physp_get_port_num(p_physp))) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332D: "
			"DR path with hop count %d couldn't be extended "
			"so skipping PortInfo query\n",
			p_dr_path->hop_count);
		goto Exit;
	}

	memset(&mad_context, 0, sizeof(mad_context));

	mad_context.pi_context.node_guid =
	    osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
	mad_context.pi_context.port_guid = p_physp->port_guid;
	mad_context.pi_context.set_method = FALSE;
	mad_context.pi_context.light_sweep = TRUE;
	mad_context.pi_context.active_transition = FALSE;

	/* note that with some negative logic - if the query failed it means
	 * that there is no point in going to heavy sweep */
	status = osm_req_get(sm, &rem_node_dr_path, IB_MAD_ATTR_PORT_INFO, 0,
			     CL_DISP_MSGID_NONE, &mad_context);
	if (status != IB_SUCCESS)
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332E: "
			"Request for PortInfo failed (%s)\n",
			ib_get_err_str(status));

Exit:
	OSM_LOG_EXIT(sm->p_log);
}
static struct osm_remote_node *find_and_add_remote_sys(osm_switch_t * sw,
						       uint8_t port,
						       boolean_t dor, struct
						       osm_remote_guids_count
						       *r)
{
	unsigned i;
	osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, port);
	osm_node_t *node = p->p_remote_physp->p_node;
	uint8_t rem_port = osm_physp_get_port_num(p->p_remote_physp);

	for (i = 0; i < r->count; i++)
		if (r->guids[i].node == node)
			if (!dor || (r->guids[i].port == rem_port))
				return &r->guids[i];

	r->guids[i].node = node;
	r->guids[i].forwarded_to = 0;
	r->guids[i].port = rem_port;
	r->count++;
	return &r->guids[i];
}
uint8_t osm_switch_recommend_path(IN const osm_switch_t * p_sw,
				  IN osm_port_t * p_port, IN uint16_t lid_ho,
				  IN unsigned start_from,
				  IN boolean_t ignore_existing,
				  IN boolean_t routing_for_lmc,
				  IN boolean_t dor,
				  IN boolean_t port_shifting,
				  IN uint32_t scatter_ports)
{
	/*
	   We support an enhanced LMC aware routing mode:
	   In the case of LMC > 0, we can track the remote side
	   system and node for all of the lids of the target
	   and try and avoid routing again through the same
	   system / node.

	   Assume if routing_for_lmc is true that this procedure was
	   provided the tracking array and counter via p_port->priv,
	   and we can conduct this algorithm.
	 */
	uint16_t base_lid;
	uint8_t hops;
	uint8_t least_hops;
	uint8_t port_num;
	uint8_t num_ports;
	uint32_t least_paths = 0xFFFFFFFF;
	unsigned i;
	/*
	   The follwing will track the least paths if the
	   route should go through a new system/node
	 */
	uint32_t least_paths_other_sys = 0xFFFFFFFF;
	uint32_t least_paths_other_nodes = 0xFFFFFFFF;
	uint32_t least_forwarded_to = 0xFFFFFFFF;
	uint32_t check_count;
	uint8_t best_port = 0;
	/*
	   These vars track the best port if it connects to
	   not used system/node.
	 */
	uint8_t best_port_other_sys = 0;
	uint8_t best_port_other_node = 0;
	boolean_t port_found = FALSE;
	osm_physp_t *p_physp;
	osm_physp_t *p_rem_physp;
	osm_node_t *p_rem_node;
	osm_node_t *p_rem_node_first = NULL;
	struct osm_remote_node *p_remote_guid = NULL;
	struct osm_remote_node null_remote_node = {NULL, 0, 0};
	struct switch_port_path port_paths[IB_NODE_NUM_PORTS_MAX];
	unsigned int port_paths_total_paths = 0;
	unsigned int port_paths_count = 0;
	uint8_t scatter_possible_ports[IB_NODE_NUM_PORTS_MAX];
	unsigned int scatter_possible_ports_count = 0;
	int found_sys_guid = 0;
	int found_node_guid = 0;

	CL_ASSERT(lid_ho > 0);

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		base_lid = osm_port_get_base_lid(p_port);
	} else {
		p_physp = p_port->p_physp;
		if (!p_physp || !p_physp->p_remote_physp ||
		    !p_physp->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;

		if (p_physp->p_remote_physp->p_node->sw == p_sw)
			return p_physp->p_remote_physp->port_num;
		base_lid =
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
	}
	base_lid = cl_ntoh16(base_lid);

	num_ports = p_sw->num_ports;

	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
	if (least_hops == OSM_NO_PATH)
		return OSM_NO_PATH;

	/*
	   First, inquire with the forwarding table for an existing
	   route.  If one is found, honor it unless:
	   1. the ignore existing flag is set.
	   2. the physical port is not a valid one or not healthy
	   3. the physical port has a remote port (the link is up)
	   4. the port has min-hops to the target (avoid loops)
	 */
	if (!ignore_existing) {
		port_num = osm_switch_get_port_by_lid(p_sw, lid_ho);

		if (port_num != OSM_NO_PATH) {
			CL_ASSERT(port_num < num_ports);

			p_physp =
			    osm_node_get_physp_ptr(p_sw->p_node, port_num);
			/*
			   Don't be too trusting of the current forwarding table!
			   Verify that the port number is legal and that the
			   LID is reachable through this port.
			 */
			if (p_physp && osm_physp_is_healthy(p_physp) &&
			    osm_physp_get_remote(p_physp)) {
				hops =
				    osm_switch_get_hop_count(p_sw, base_lid,
							     port_num);
				/*
				   If we aren't using pre-defined user routes
				   function, then we need to make sure that the
				   current path is the minimum one. In case of
				   having such a user function - this check will
				   not be done, and the old routing will be used.
				   Note: This means that it is the user's job to
				   clean all data in the forwarding tables that
				   he wants to be overridden by the minimum
				   hop function.
				 */
				if (hops == least_hops)
					return port_num;
			}
		}
	}

	/*
	   This algorithm selects a port based on a static load balanced
	   selection across equal hop-count ports.
	   There is lots of room for improved sophistication here,
	   possibly guided by user configuration info.
	 */

	/*
	   OpenSM routing is "local" - not considering a full lid to lid
	   path. As such we can not guarantee a path will not loop if we
	   do not always follow least hops.
	   So we must abort if not least hops.
	 */

	/* port number starts with one and num_ports is 1 + num phys ports */
	for (i = start_from; i < start_from + num_ports; i++) {
		port_num = osm_switch_get_dimn_port(p_sw, i % num_ports);
		if (!port_num ||
		    osm_switch_get_hop_count(p_sw, base_lid, port_num) !=
		    least_hops)
			continue;

		/* let us make sure it is not down or unhealthy */
		p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
		if (!p_physp || !osm_physp_is_healthy(p_physp) ||
		    /*
		       we require all - non sma ports to be linked
		       to be routed through
		     */
		    !osm_physp_get_remote(p_physp))
			continue;

		/*
		   We located a least-hop port, possibly one of many.
		   For this port, check the running total count of
		   the number of paths through this port.  Select
		   the port routing the least number of paths.
		 */
		check_count =
		    osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);


		if (dor) {
			/* Get the Remote Node */
			p_rem_physp = osm_physp_get_remote(p_physp);
			p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
			/* use the first dimension, but spread traffic
			 * out among the group of ports representing
			 * that dimension */
			if (!p_rem_node_first)
				p_rem_node_first = p_rem_node;
			else if (p_rem_node != p_rem_node_first)
				continue;
			if (routing_for_lmc) {
				struct osm_remote_guids_count *r = p_port->priv;
				uint8_t rem_port = osm_physp_get_port_num(p_rem_physp);
				unsigned int j;

				for (j = 0; j < r->count; j++) {
					p_remote_guid = &r->guids[j];
					if ((p_remote_guid->node == p_rem_node)
					    && (p_remote_guid->port == rem_port))
						break;
				}
				if (j == r->count)
					p_remote_guid = &null_remote_node;
			}
		/*
		   Advanced LMC routing requires tracking of the
		   best port by the node connected to the other side of
		   it.
		 */
		} else if (routing_for_lmc) {
			/* Is the sys guid already used ? */
			p_remote_guid = switch_find_sys_guid_count(p_sw,
								   p_port->priv,
								   port_num);

			/* If not update the least hops for this case */
			if (!p_remote_guid) {
				if (check_count < least_paths_other_sys) {
					least_paths_other_sys = check_count;
					best_port_other_sys = port_num;
					least_forwarded_to = 0;
				}
				found_sys_guid = 0;
			} else {	/* same sys found - try node */


				/* Else is the node guid already used ? */
				p_remote_guid = switch_find_node_guid_count(p_sw,
									    p_port->priv,
									    port_num);

				/* If not update the least hops for this case */
				if (!p_remote_guid
				    && check_count < least_paths_other_nodes) {
					least_paths_other_nodes = check_count;
					best_port_other_node = port_num;
					least_forwarded_to = 0;
				}
				/* else prior sys and node guid already used */

				if (!p_remote_guid)
					found_node_guid = 0;
				else
					found_node_guid = 1;
				found_sys_guid = 1;
			}	/* same sys found */
		}

		port_paths[port_paths_count].port_num = port_num;
		port_paths[port_paths_count].path_count = check_count;
		if (routing_for_lmc) {
			port_paths[port_paths_count].found_sys_guid = found_sys_guid;
			port_paths[port_paths_count].found_node_guid = found_node_guid;
		}
		if (routing_for_lmc && p_remote_guid)
			port_paths[port_paths_count].forwarded_to = p_remote_guid->forwarded_to;
		else
			port_paths[port_paths_count].forwarded_to = 0;
		port_paths_total_paths += check_count;
		port_paths_count++;

		/* routing for LMC mode */
		/*
		   the count is min but also lower then the max subscribed
		 */
		if (check_count < least_paths) {
			port_found = TRUE;
			best_port = port_num;
			least_paths = check_count;
			scatter_possible_ports_count = 0;
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
			if (routing_for_lmc
			    && p_remote_guid
			    && p_remote_guid->forwarded_to < least_forwarded_to)
				least_forwarded_to = p_remote_guid->forwarded_to;
		} else if (scatter_ports
			   && check_count == least_paths) {
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
		} else if (routing_for_lmc
			   && p_remote_guid
			   && check_count == least_paths
			   && p_remote_guid->forwarded_to < least_forwarded_to) {
			least_forwarded_to = p_remote_guid->forwarded_to;
			best_port = port_num;
		}
	}

	if (port_found == FALSE)
		return OSM_NO_PATH;

	if (port_shifting && port_paths_count) {
		/* In the port_paths[] array, we now have all the ports that we
		 * can route out of.  Using some shifting math below, possibly
		 * select a different one so that lids won't align in LFTs
		 *
		 * If lmc > 0, we need to loop through these ports to find the
		 * least_forwarded_to port, best_port_other_sys, and
		 * best_port_other_node just like before but through the different
		 * ordering.
		 */

		least_paths = 0xFFFFFFFF;
		least_paths_other_sys = 0xFFFFFFFF;
		least_paths_other_nodes = 0xFFFFFFFF;
	        least_forwarded_to = 0xFFFFFFFF;
		best_port = 0;
		best_port_other_sys = 0;
		best_port_other_node = 0;

		for (i = 0; i < port_paths_count; i++) {
			unsigned int idx;

			idx = (port_paths_total_paths/port_paths_count + i) % port_paths_count;

			if (routing_for_lmc) {
				if (!port_paths[idx].found_sys_guid
				    && port_paths[idx].path_count < least_paths_other_sys) {
					least_paths_other_sys = port_paths[idx].path_count;
					best_port_other_sys = port_paths[idx].port_num;
					least_forwarded_to = 0;
				}
				else if (!port_paths[idx].found_node_guid
					 && port_paths[idx].path_count < least_paths_other_nodes) {
					least_paths_other_nodes = port_paths[idx].path_count;
					best_port_other_node = port_paths[idx].port_num;
					least_forwarded_to = 0;
				}
			}

			if (port_paths[idx].path_count < least_paths) {
				best_port = port_paths[idx].port_num;
				least_paths = port_paths[idx].path_count;
				if (routing_for_lmc
				    && (port_paths[idx].found_sys_guid
					|| port_paths[idx].found_node_guid)
				    && port_paths[idx].forwarded_to < least_forwarded_to)
					least_forwarded_to = port_paths[idx].forwarded_to;
			}
			else if (routing_for_lmc
				 && (port_paths[idx].found_sys_guid
				     || port_paths[idx].found_node_guid)
				 && port_paths[idx].path_count == least_paths
				 && port_paths[idx].forwarded_to < least_forwarded_to) {
				least_forwarded_to = port_paths[idx].forwarded_to;
				best_port = port_paths[idx].port_num;
			}

		}
	}

	/*
	   if we are in enhanced routing mode and the best port is not
	   the local port 0
	 */
	if (routing_for_lmc && best_port && !scatter_ports) {
		/* Select the least hop port of the non used sys first */
		if (best_port_other_sys)
			best_port = best_port_other_sys;
		else if (best_port_other_node)
			best_port = best_port_other_node;
	} else if (scatter_ports) {
		/*
		 * There is some danger that this random could "rebalance" the routes
		 * every time, to combat this there is a global srandom that
		 * occurs at the start of every sweep.
		 */
		unsigned int idx = random() % scatter_possible_ports_count;
		best_port = scatter_possible_ports[idx];
	}
	return best_port;
}
/**********************************************************************
 The plock must be held before calling this function.
**********************************************************************/
static void pi_rcv_process_switch_ext_port(IN osm_sm_t * sm,
					   IN osm_node_t * p_node,
					   IN osm_physp_t * p_physp,
					   IN ib_port_info_t * p_pi)
{
	ib_api_status_t status = IB_SUCCESS;
	osm_madw_context_t context;
	osm_physp_t *p_remote_physp, *physp0;
	osm_node_t *p_remote_node;
	ib_net64_t m_key;
	unsigned data_vls;
	uint8_t port_num;
	uint8_t remote_port_num;
	osm_dr_path_t path;
	int mlnx_epi_supported = 0;

	OSM_LOG_ENTER(sm->p_log);

	/*
	   Check the state of the physical port.
	   If there appears to be something on the other end of the wire,
	   then ask for NodeInfo.  Ignore the switch management port.
	 */
	port_num = osm_physp_get_port_num(p_physp);

	if (sm->p_subn->opt.fdr10)
		mlnx_epi_supported = is_mlnx_ext_port_info_supported(
						ib_node_info_get_vendor_id(&p_node->node_info),
						p_node->node_info.device_id);

	/* if in_sweep_hop_0 is TRUE, then this means the SM is on the switch,
	   and we got switchInfo of our local switch. Do not continue
	   probing through the switch. */
	switch (ib_port_info_get_port_state(p_pi)) {
	case IB_LINK_DOWN:
		p_remote_physp = osm_physp_get_remote(p_physp);
		if (p_remote_physp) {
			p_remote_node =
			    osm_physp_get_node_ptr(p_remote_physp);
			remote_port_num =
			    osm_physp_get_port_num(p_remote_physp);

			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
				"Unlinking local node 0x%" PRIx64
				", port %u"
				"\n\t\t\t\tand remote node 0x%" PRIx64
				", port %u\n",
				cl_ntoh64(osm_node_get_node_guid
					  (p_node)), port_num,
				cl_ntoh64(osm_node_get_node_guid
					  (p_remote_node)),
				remote_port_num);

			if (sm->ucast_mgr.cache_valid)
				osm_ucast_cache_add_link(&sm->ucast_mgr,
							 p_physp,
							 p_remote_physp);

			osm_node_unlink(p_node, (uint8_t) port_num,
					p_remote_node,
					(uint8_t) remote_port_num);

		}
		break;

	case IB_LINK_INIT:
	case IB_LINK_ARMED:
	case IB_LINK_ACTIVE:
		physp0 = osm_node_get_physp_ptr(p_node, 0);
		if (mlnx_epi_supported) {
			m_key = ib_port_info_get_m_key(&physp0->port_info);

			context.pi_context.node_guid = osm_node_get_node_guid(p_node);
			context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
			context.pi_context.set_method = FALSE;
			context.pi_context.light_sweep = FALSE;
			context.pi_context.active_transition = FALSE;
			context.pi_context.client_rereg = FALSE;
			status = osm_req_get(sm,
					     osm_physp_get_dr_path_ptr(p_physp),
					     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
					     cl_hton32(port_num), FALSE, m_key,
					     CL_DISP_MSGID_NONE, &context);
			if (status != IB_SUCCESS)
				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F11: "
					"Failure initiating MLNX ExtPortInfo request (%s)\n",
					ib_get_err_str(status));
		}
		if (sm->p_subn->in_sweep_hop_0 == FALSE) {
			/*
			   To avoid looping forever, only probe the port if it
			   is NOT the port that responded to the SMP.

			   Request node info from the other end of this link:
			   1) Copy the current path from the parent node.
			   2) Extend the path to the next hop thru this port.
			   3) Request node info with the new path

			 */
			if (p_pi->local_port_num !=
			    osm_physp_get_port_num(p_physp)) {
				path = *osm_physp_get_dr_path_ptr(p_physp);

				if (osm_dr_path_extend(&path,
						       osm_physp_get_port_num
						       (p_physp))) {
					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
						"ERR 0F08: "
						"DR path with hop count %d couldn't be extended\n",
						path.hop_count);
					break;
				}

				memset(&context, 0, sizeof(context));
				context.ni_context.node_guid =
				    osm_node_get_node_guid(p_node);
				context.ni_context.port_num =
				    osm_physp_get_port_num(p_physp);

				status = osm_req_get(sm, &path,
						     IB_MAD_ATTR_NODE_INFO, 0,
						     TRUE, 0,
						     CL_DISP_MSGID_NONE,
						     &context);

				if (status != IB_SUCCESS)
					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
						"ERR 0F02: "
						"Failure initiating NodeInfo request (%s)\n",
						ib_get_err_str(status));
			} else
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Skipping SMP responder port %u\n",
					p_pi->local_port_num);
		}
		break;

	default:
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F03: "
			"Unknown link state = %u, port = %u\n",
			ib_port_info_get_port_state(p_pi),
			p_pi->local_port_num);
		break;
	}

	if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw &&
	    !ib_switch_info_get_state_change(&p_node->sw->switch_info) &&
	    p_node->sw->need_update == 1)
		p_node->sw->need_update = 0;

	if (p_physp->need_update)
		sm->p_subn->ignore_existing_lfts = TRUE;

	/*
	   Update the PortInfo attribute.
	 */
	osm_physp_set_port_info(p_physp, p_pi, sm);

	if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
		goto Exit;

	p_remote_physp = osm_physp_get_remote(p_physp);
	if (p_remote_physp) {
		p_remote_node = osm_physp_get_node_ptr(p_remote_physp);
		if (p_remote_node->sw) {
			data_vls = 1U << (ib_port_info_get_vl_cap(p_pi) - 1);
			if (data_vls > 1U << (sm->p_subn->opt.max_op_vls - 1))
				data_vls = 1U << (sm->p_subn->opt.max_op_vls - 1);
			if (data_vls >= IB_MAX_NUM_VLS)
				data_vls = IB_MAX_NUM_VLS - 1;
			if ((uint8_t)data_vls < sm->p_subn->min_sw_data_vls) {
				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
					"Setting switch port minimal data VLs "
					"to:%u defined by node:0x%"
					PRIx64 ", port:%u\n", data_vls,
					cl_ntoh64(osm_node_get_node_guid(p_node)),
					port_num);
				sm->p_subn->min_sw_data_vls = data_vls;
			}
		}
	}

Exit:
	OSM_LOG_EXIT(sm->p_log);
}
Exemple #14
0
static boolean_t
__osm_link_mgr_set_physp_pi(osm_sm_t * sm,
			    IN osm_physp_t * const p_physp,
			    IN uint8_t const port_state)
{
	uint8_t payload[IB_SMP_DATA_SIZE];
	ib_port_info_t *const p_pi = (ib_port_info_t *) payload;
	const ib_port_info_t *p_old_pi;
	osm_madw_context_t context;
	osm_node_t *p_node;
	ib_api_status_t status;
	uint8_t port_num;
	uint8_t mtu;
	uint8_t op_vls;
	boolean_t esp0 = FALSE;
	boolean_t send_set = FALSE;
	osm_physp_t *p_remote_physp;

	OSM_LOG_ENTER(sm->p_log);

	p_node = osm_physp_get_node_ptr(p_physp);

	port_num = osm_physp_get_port_num(p_physp);

	if (port_num == 0) {
		/*
		   CAs don't have a port 0, and for switch port 0,
		   we need to check if this is enhanced or base port 0.
		   For base port 0 the following parameters are not valid (p822, table 145).
		 */
		if (!p_node->sw) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: "
				"Cannot find switch by guid: 0x%" PRIx64 "\n",
				cl_ntoh64(p_node->node_info.node_guid));
			goto Exit;
		}

		if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)
		    == FALSE) {
			/* This means the switch doesn't support enhanced port 0.
			   Can skip it. */
			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
				"Skipping port 0, GUID 0x%016" PRIx64 "\n",
				cl_ntoh64(osm_physp_get_port_guid(p_physp)));
			goto Exit;
		}
		esp0 = TRUE;
	}

	/*
	   PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0
	 */

	p_old_pi = &p_physp->port_info;

	memset(payload, 0, IB_SMP_DATA_SIZE);
	memcpy(payload, p_old_pi, sizeof(ib_port_info_t));

	/*
	   Should never write back a value that is bigger then 3 in
	   the PortPhysicalState field - so can not simply copy!

	   Actually we want to write there:
	   port physical state - no change,
	   link down default state = polling
	   port state - as requested.
	 */
	p_pi->state_info2 = 0x02;
	ib_port_info_set_port_state(p_pi, port_state);

	if (ib_port_info_get_link_down_def_state(p_pi) !=
	    ib_port_info_get_link_down_def_state(p_old_pi))
		send_set = TRUE;

	/* didn't get PortInfo before */
	if (!ib_port_info_get_port_state(p_old_pi))
		send_set = TRUE;

	/* we only change port fields if we do not change state */
	if (port_state == IB_LINK_NO_CHANGE) {
		/* The following fields are relevant only for CA port, router, or Enh. SP0 */
		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH ||
		    port_num == 0) {
			p_pi->m_key = sm->p_subn->opt.m_key;
			if (memcmp(&p_pi->m_key, &p_old_pi->m_key,
				   sizeof(p_pi->m_key)))
				send_set = TRUE;

			p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix;
			if (memcmp(&p_pi->subnet_prefix,
				   &p_old_pi->subnet_prefix,
				   sizeof(p_pi->subnet_prefix)))
				send_set = TRUE;

			p_pi->base_lid = osm_physp_get_base_lid(p_physp);
			if (memcmp(&p_pi->base_lid, &p_old_pi->base_lid,
				   sizeof(p_pi->base_lid)))
				send_set = TRUE;

			/* we are initializing the ports with our local sm_base_lid */
			p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid;
			if (memcmp(&p_pi->master_sm_base_lid,
				   &p_old_pi->master_sm_base_lid,
				   sizeof(p_pi->master_sm_base_lid)))
				send_set = TRUE;

			p_pi->m_key_lease_period =
			    sm->p_subn->opt.m_key_lease_period;
			if (memcmp(&p_pi->m_key_lease_period,
				   &p_old_pi->m_key_lease_period,
				   sizeof(p_pi->m_key_lease_period)))
				send_set = TRUE;

			if (esp0 == FALSE)
				p_pi->mkey_lmc = sm->p_subn->opt.lmc;
			else {
				if (sm->p_subn->opt.lmc_esp0)
					p_pi->mkey_lmc = sm->p_subn->opt.lmc;
				else
					p_pi->mkey_lmc = 0;
			}
			if (memcmp(&p_pi->mkey_lmc, &p_old_pi->mkey_lmc,
				   sizeof(p_pi->mkey_lmc)))
				send_set = TRUE;

			ib_port_info_set_timeout(p_pi,
						 sm->p_subn->opt.
						 subnet_timeout);
			if (ib_port_info_get_timeout(p_pi) !=
			    ib_port_info_get_timeout(p_old_pi))
				send_set = TRUE;
		}

		/*
		   Several timeout mechanisms:
		 */
		p_remote_physp = osm_physp_get_remote(p_physp);
		if (port_num != 0 && p_remote_physp) {
			if (osm_node_get_type(osm_physp_get_node_ptr(p_physp))
			    == IB_NODE_TYPE_ROUTER) {
				ib_port_info_set_hoq_lifetime(p_pi,
							      sm->p_subn->
							      opt.
							      leaf_head_of_queue_lifetime);
			} else
			    if (osm_node_get_type
				(osm_physp_get_node_ptr(p_physp)) ==
				IB_NODE_TYPE_SWITCH) {
				/* Is remote end CA or router (a leaf port) ? */
				if (osm_node_get_type
				    (osm_physp_get_node_ptr(p_remote_physp)) !=
				    IB_NODE_TYPE_SWITCH) {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      leaf_head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									leaf_vl_stall_count);
				} else {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									vl_stall_count);
				}
			}
			if (ib_port_info_get_hoq_lifetime(p_pi) !=
			    ib_port_info_get_hoq_lifetime(p_old_pi) ||
			    ib_port_info_get_vl_stall_count(p_pi) !=
			    ib_port_info_get_vl_stall_count(p_old_pi))
				send_set = TRUE;
		}

		ib_port_info_set_phy_and_overrun_err_thd(p_pi,
							 sm->p_subn->opt.
							 local_phy_errors_threshold,
							 sm->p_subn->opt.
							 overrun_errors_threshold);
		if (memcmp(&p_pi->error_threshold, &p_old_pi->error_threshold,
			   sizeof(p_pi->error_threshold)))
			send_set = TRUE;

		/*
		   Set the easy common parameters for all port types,
		   then determine the neighbor MTU.
		 */
		p_pi->link_width_enabled = p_old_pi->link_width_supported;
		if (memcmp(&p_pi->link_width_enabled,
			   &p_old_pi->link_width_enabled,
			   sizeof(p_pi->link_width_enabled)))
			send_set = TRUE;

		if (sm->p_subn->opt.force_link_speed &&
		    (sm->p_subn->opt.force_link_speed != 15 ||
		     ib_port_info_get_link_speed_enabled(p_pi) !=
		     ib_port_info_get_link_speed_sup(p_pi))) {
			ib_port_info_set_link_speed_enabled(p_pi,
							    sm->p_subn->opt.
							    force_link_speed);
			if (memcmp(&p_pi->link_speed, &p_old_pi->link_speed,
				   sizeof(p_pi->link_speed)))
				send_set = TRUE;
		}

		/* calc new op_vls and mtu */
		op_vls =
		    osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp);
		mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp);

		ib_port_info_set_neighbor_mtu(p_pi, mtu);
		if (ib_port_info_get_neighbor_mtu(p_pi) !=
		    ib_port_info_get_neighbor_mtu(p_old_pi))
			send_set = TRUE;

		ib_port_info_set_op_vls(p_pi, op_vls);
		if (ib_port_info_get_op_vls(p_pi) !=
		    ib_port_info_get_op_vls(p_old_pi))
			send_set = TRUE;

		/* provide the vl_high_limit from the qos mgr */
		if (sm->p_subn->opt.qos &&
		    p_physp->vl_high_limit != p_old_pi->vl_high_limit) {
			send_set = TRUE;
			p_pi->vl_high_limit = p_physp->vl_high_limit;
		}
	}

	if (port_state != IB_LINK_NO_CHANGE &&
	    port_state != ib_port_info_get_port_state(p_old_pi)) {
		send_set = TRUE;
		if (port_state == IB_LINK_ACTIVE)
			context.pi_context.active_transition = TRUE;
		else
			context.pi_context.active_transition = FALSE;
	}

	context.pi_context.node_guid = osm_node_get_node_guid(p_node);
	context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
	context.pi_context.set_method = TRUE;
	context.pi_context.light_sweep = FALSE;

	/* We need to send the PortInfoSet request with the new sm_lid
	   in the following cases:
	   1. There is a change in the values (send_set == TRUE)
	   2. This is a switch external port (so it wasn't handled yet by
	   osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE,
	   which means the SM just became master, and it then needs to send at
	   PortInfoSet to every port.
	 */
	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num
	    && sm->p_subn->first_time_master_sweep == TRUE)
		send_set = TRUE;

	if (send_set)
		status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp),
				     payload, sizeof(payload),
				     IB_MAD_ATTR_PORT_INFO,
				     cl_hton32(port_num),
				     CL_DISP_MSGID_NONE, &context);

Exit:
	OSM_LOG_EXIT(sm->p_log);
	return send_set;
}
Exemple #15
0
static int link_mgr_set_physp_pi(osm_sm_t * sm, IN osm_physp_t * p_physp,
				 IN uint8_t port_state)
{
	uint8_t payload[IB_SMP_DATA_SIZE], payload2[IB_SMP_DATA_SIZE];
	ib_port_info_t *p_pi = (ib_port_info_t *) payload;
	ib_mlnx_ext_port_info_t *p_epi = (ib_mlnx_ext_port_info_t *) payload2;
	const ib_port_info_t *p_old_pi;
	const ib_mlnx_ext_port_info_t *p_old_epi;
	osm_madw_context_t context;
	osm_node_t *p_node;
	ib_api_status_t status;
	uint8_t port_num, mtu, op_vls, smsl = OSM_DEFAULT_SL;
	boolean_t esp0 = FALSE, send_set = FALSE, send_set2 = FALSE;
	osm_physp_t *p_remote_physp, *physp0;
	int qdr_change = 0, fdr10_change = 0;
	int ret = 0;
	ib_net32_t attr_mod, cap_mask;

	OSM_LOG_ENTER(sm->p_log);

	p_node = osm_physp_get_node_ptr(p_physp);

	p_old_pi = &p_physp->port_info;

	port_num = osm_physp_get_port_num(p_physp);

	if (port_num == 0) {
		/*
		   CAs don't have a port 0, and for switch port 0,
		   we need to check if this is enhanced or base port 0.
		   For base port 0 the following parameters are not valid
		   (IBA 1.2.1 p.830 table 146).
		 */
		if (!p_node->sw) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: "
				"Cannot find switch by guid: 0x%" PRIx64 "\n",
				cl_ntoh64(p_node->node_info.node_guid));
			goto Exit;
		}

		if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)
		    == FALSE) {

			/* Even for base port 0 we might have to set smsl
			   (if we are using lash routing) */
			smsl = link_mgr_get_smsl(sm, p_physp);
			if (smsl != ib_port_info_get_master_smsl(p_old_pi)) {
				send_set = TRUE;
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Setting SMSL to %d on port 0 GUID 0x%016"
					PRIx64 "\n", smsl,
					cl_ntoh64(osm_physp_get_port_guid
						  (p_physp)));
			} else {
				/* This means the switch doesn't support
				   enhanced port 0 and we don't need to
				   change SMSL. Can skip it. */
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Skipping port 0, GUID 0x%016" PRIx64
					"\n",
					cl_ntoh64(osm_physp_get_port_guid
						  (p_physp)));
				goto Exit;
			}
		} else
			esp0 = TRUE;
	}

	memcpy(payload, p_old_pi, sizeof(ib_port_info_t));

	/*
	   Should never write back a value that is bigger then 3 in
	   the PortPhysicalState field - so can not simply copy!

	   Actually we want to write there:
	   port physical state - no change,
	   link down default state = polling
	   port state - as requested.
	 */
	p_pi->state_info2 = 0x02;
	ib_port_info_set_port_state(p_pi, port_state);

	/* Check whether this is base port0 smsl handling only */
	if (port_num == 0 && esp0 == FALSE) {
		ib_port_info_set_master_smsl(p_pi, smsl);
		goto Send;
	}

	/*
	   PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0
	 */

	if (ib_port_info_get_link_down_def_state(p_pi) !=
	    ib_port_info_get_link_down_def_state(p_old_pi))
		send_set = TRUE;

	/* didn't get PortInfo before */
	if (!ib_port_info_get_port_state(p_old_pi))
		send_set = TRUE;

	/* we only change port fields if we do not change state */
	if (port_state == IB_LINK_NO_CHANGE) {
		/* The following fields are relevant only for CA port, router, or Enh. SP0 */
		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH ||
		    port_num == 0) {
			p_pi->m_key = sm->p_subn->opt.m_key;
			if (memcmp(&p_pi->m_key, &p_old_pi->m_key,
				   sizeof(p_pi->m_key)))
				send_set = TRUE;

			p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix;
			if (memcmp(&p_pi->subnet_prefix,
				   &p_old_pi->subnet_prefix,
				   sizeof(p_pi->subnet_prefix)))
				send_set = TRUE;

			p_pi->base_lid = osm_physp_get_base_lid(p_physp);
			if (memcmp(&p_pi->base_lid, &p_old_pi->base_lid,
				   sizeof(p_pi->base_lid)))
				send_set = TRUE;

			/* we are initializing the ports with our local sm_base_lid */
			p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid;
			if (memcmp(&p_pi->master_sm_base_lid,
				   &p_old_pi->master_sm_base_lid,
				   sizeof(p_pi->master_sm_base_lid)))
				send_set = TRUE;

			smsl = link_mgr_get_smsl(sm, p_physp);
			if (smsl != ib_port_info_get_master_smsl(p_old_pi)) {

				ib_port_info_set_master_smsl(p_pi, smsl);

				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Setting SMSL to %d on GUID 0x%016"
					PRIx64 ", port %d\n", smsl,
					cl_ntoh64(osm_physp_get_port_guid
						  (p_physp)), port_num);

				send_set = TRUE;
			}

			p_pi->m_key_lease_period =
			    sm->p_subn->opt.m_key_lease_period;
			if (memcmp(&p_pi->m_key_lease_period,
				   &p_old_pi->m_key_lease_period,
				   sizeof(p_pi->m_key_lease_period)))
				send_set = TRUE;

			/* M_KeyProtectBits are currently always zero */
			p_pi->mkey_lmc = 0;
			if (esp0 == FALSE || sm->p_subn->opt.lmc_esp0)
				ib_port_info_set_lmc(p_pi, sm->p_subn->opt.lmc);
			if (ib_port_info_get_lmc(p_old_pi) !=
			    ib_port_info_get_lmc(p_pi) ||
			    ib_port_info_get_mpb(p_old_pi) !=
			    ib_port_info_get_mpb(p_pi))
				send_set = TRUE;

			ib_port_info_set_timeout(p_pi,
						 sm->p_subn->opt.
						 subnet_timeout);
			if (ib_port_info_get_timeout(p_pi) !=
			    ib_port_info_get_timeout(p_old_pi))
				send_set = TRUE;
		}

		/*
		   Several timeout mechanisms:
		 */
		p_remote_physp = osm_physp_get_remote(p_physp);
		if (port_num != 0 && p_remote_physp) {
			if (osm_node_get_type(osm_physp_get_node_ptr(p_physp))
			    == IB_NODE_TYPE_ROUTER) {
				ib_port_info_set_hoq_lifetime(p_pi,
							      sm->p_subn->
							      opt.
							      leaf_head_of_queue_lifetime);
			} else
			    if (osm_node_get_type
				(osm_physp_get_node_ptr(p_physp)) ==
				IB_NODE_TYPE_SWITCH) {
				/* Is remote end CA or router (a leaf port) ? */
				if (osm_node_get_type
				    (osm_physp_get_node_ptr(p_remote_physp)) !=
				    IB_NODE_TYPE_SWITCH) {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      leaf_head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									leaf_vl_stall_count);
				} else {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									vl_stall_count);
				}
			}
			if (ib_port_info_get_hoq_lifetime(p_pi) !=
			    ib_port_info_get_hoq_lifetime(p_old_pi) ||
			    ib_port_info_get_vl_stall_count(p_pi) !=
			    ib_port_info_get_vl_stall_count(p_old_pi))
				send_set = TRUE;
		}

		ib_port_info_set_phy_and_overrun_err_thd(p_pi,
							 sm->p_subn->opt.
							 local_phy_errors_threshold,
							 sm->p_subn->opt.
							 overrun_errors_threshold);
		if (memcmp(&p_pi->error_threshold, &p_old_pi->error_threshold,
			   sizeof(p_pi->error_threshold)))
			send_set = TRUE;

		/*
		   Set the easy common parameters for all port types,
		   then determine the neighbor MTU.
		 */
		p_pi->link_width_enabled = p_old_pi->link_width_supported;
		if (memcmp(&p_pi->link_width_enabled,
			   &p_old_pi->link_width_enabled,
			   sizeof(p_pi->link_width_enabled)))
			send_set = TRUE;

		if (sm->p_subn->opt.force_link_speed &&
		    (sm->p_subn->opt.force_link_speed != 15 ||
		     ib_port_info_get_link_speed_enabled(p_pi) !=
		     ib_port_info_get_link_speed_sup(p_pi))) {
			ib_port_info_set_link_speed_enabled(p_pi,
							    sm->p_subn->opt.
							    force_link_speed);
			if (memcmp(&p_pi->link_speed, &p_old_pi->link_speed,
				   sizeof(p_pi->link_speed))) {
				send_set = TRUE;
				/* Determine whether QDR in LSE is being changed */
				if ((ib_port_info_get_link_speed_enabled(p_pi) &
				     IB_LINK_SPEED_ACTIVE_10 &&
				     !(ib_port_info_get_link_speed_enabled(p_old_pi) &
				      IB_LINK_SPEED_ACTIVE_10)) ||
				    ((!(ib_port_info_get_link_speed_enabled(p_pi) &
				       IB_LINK_SPEED_ACTIVE_10) &&
				      ib_port_info_get_link_speed_enabled(p_old_pi) &
				      IB_LINK_SPEED_ACTIVE_10)))
				qdr_change = 1;
			}
		}

		if (sm->p_subn->opt.fdr10 &&
		    p_physp->ext_port_info.link_speed_supported & FDR10) {
			if (sm->p_subn->opt.fdr10 == 1) { /* enable */
				if (!(p_physp->ext_port_info.link_speed_enabled & FDR10))
					fdr10_change = 1;
			} else {	/* disable */
				if (p_physp->ext_port_info.link_speed_enabled & FDR10)
					fdr10_change = 1;
			}
			if (fdr10_change) {
				p_old_epi = &p_physp->ext_port_info;
				memcpy(payload2, p_old_epi,
				       sizeof(ib_mlnx_ext_port_info_t));
				p_epi->state_change_enable = 0x01;
				if (sm->p_subn->opt.fdr10 == 1)
					p_epi->link_speed_enabled = FDR10;
				else
					p_epi->link_speed_enabled = 0;
				send_set2 = TRUE;
			}
		}

		if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH) {
			physp0 = osm_node_get_physp_ptr(p_physp->p_node, 0);
			cap_mask = physp0->port_info.capability_mask;
		} else
			cap_mask = p_pi->capability_mask;
		if (!(cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS))
			qdr_change = 0;

		/* Do peer ports support extended link speeds ? */
		if (port_num != 0 && p_remote_physp) {
			osm_physp_t *rphysp0;
			ib_net32_t rem_cap_mask;

			if (osm_node_get_type(p_remote_physp->p_node) ==
			    IB_NODE_TYPE_SWITCH) {
				rphysp0 = osm_node_get_physp_ptr(p_remote_physp->p_node, 0);
				rem_cap_mask = rphysp0->port_info.capability_mask;
			} else
				rem_cap_mask = p_remote_physp->port_info.capability_mask;

			if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS &&
			    rem_cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) {
				if (sm->p_subn->opt.force_link_speed_ext &&
				    (sm->p_subn->opt.force_link_speed_ext != IB_LINK_SPEED_EXT_SET_LSES ||
				     p_pi->link_speed_ext_enabled !=
				     ib_port_info_get_link_speed_sup(p_pi))) {
					p_pi->link_speed_ext_enabled = sm->p_subn->opt.force_link_speed_ext;
					if (memcmp(&p_pi->link_speed_ext_enabled,
						   &p_old_pi->link_speed_ext_enabled,
						   sizeof(p_pi->link_speed_ext_enabled)))
						send_set = TRUE;
				}
			}
		}

		/* calc new op_vls and mtu */
		op_vls =
		    osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp);
		mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp);

		ib_port_info_set_neighbor_mtu(p_pi, mtu);
		if (ib_port_info_get_neighbor_mtu(p_pi) !=
		    ib_port_info_get_neighbor_mtu(p_old_pi))
			send_set = TRUE;

		ib_port_info_set_op_vls(p_pi, op_vls);
		if (ib_port_info_get_op_vls(p_pi) !=
		    ib_port_info_get_op_vls(p_old_pi))
			send_set = TRUE;

		/* provide the vl_high_limit from the qos mgr */
		if (sm->p_subn->opt.qos &&
		    p_physp->vl_high_limit != p_old_pi->vl_high_limit) {
			send_set = TRUE;
			p_pi->vl_high_limit = p_physp->vl_high_limit;
		}
	}

Send:
	if (port_state != IB_LINK_NO_CHANGE &&
	    port_state != ib_port_info_get_port_state(p_old_pi)) {
		send_set = TRUE;
		if (port_state == IB_LINK_ACTIVE)
			context.pi_context.active_transition = TRUE;
		else
			context.pi_context.active_transition = FALSE;
	}

	context.pi_context.node_guid = osm_node_get_node_guid(p_node);
	context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
	context.pi_context.set_method = TRUE;
	context.pi_context.light_sweep = FALSE;

	/* We need to send the PortInfoSet request with the new sm_lid
	   in the following cases:
	   1. There is a change in the values (send_set == TRUE)
	   2. This is a switch external port (so it wasn't handled yet by
	   osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE,
	   which means the SM just became master, and it then needs to send at
	   PortInfoSet to every port.
	 */
	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num
	    && sm->p_subn->first_time_master_sweep == TRUE)
		send_set = TRUE;

	if (!send_set)
		goto Exit;

	attr_mod = cl_hton32(port_num);
	if (qdr_change)
		attr_mod |= cl_hton32(1 << 31);	/* AM SMSupportExtendedSpeeds */
	status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp),
			     payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
			     attr_mod, CL_DISP_MSGID_NONE, &context);
	if (status)
		ret = -1;

	if (send_set2) {
		status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp),
				     payload2, sizeof(payload2),
				     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
				     cl_hton32(port_num),
				     CL_DISP_MSGID_NONE, &context);
		if (status)
			ret = -1;
	}

Exit:
	OSM_LOG_EXIT(sm->p_log);
	return ret;
}
static void get_pkey_table(IN osm_log_t * p_log, IN osm_sm_t * sm,
			   IN osm_node_t * p_node, IN osm_physp_t * p_physp)
{

	osm_madw_context_t context;
	ib_api_status_t status;
	osm_dr_path_t path;
	osm_physp_t *physp0;
	ib_net64_t m_key;
	uint8_t port_num;
	uint16_t block_num, max_blocks;
	uint32_t attr_mod_ho;

	OSM_LOG_ENTER(p_log);

	path = *osm_physp_get_dr_path_ptr(p_physp);

	context.pkey_context.node_guid = osm_node_get_node_guid(p_node);
	context.pkey_context.port_guid = osm_physp_get_port_guid(p_physp);
	context.pkey_context.set_method = FALSE;

	port_num = p_physp->port_num;

	if (!p_node->sw || port_num == 0)
		/* The maximum blocks is defined by the node info partition cap
		   for CA, router, and switch management ports. */
		max_blocks =
		    (cl_ntoh16(p_node->node_info.partition_cap) +
		     IB_NUM_PKEY_ELEMENTS_IN_BLOCK - 1)
		    / IB_NUM_PKEY_ELEMENTS_IN_BLOCK;
	else {
		/* This is a switch, and not a management port. The maximum blocks
		   is defined in the switch info partition enforcement cap. */

		/* Check for IBM eHCA firmware defect in reporting partition enforcement cap */
		if (cl_ntoh32(ib_node_info_get_vendor_id(&p_node->node_info)) ==
		    IBM_VENDOR_ID)
			p_node->sw->switch_info.enforce_cap = 0;

		/* Bail out if this is a switch with no partition enforcement capability */
		if (cl_ntoh16(p_node->sw->switch_info.enforce_cap) == 0)
			goto Exit;

		max_blocks = (cl_ntoh16(p_node->sw->switch_info.enforce_cap) +
			      IB_NUM_PKEY_ELEMENTS_IN_BLOCK -
			      1) / IB_NUM_PKEY_ELEMENTS_IN_BLOCK;
	}

	p_physp->pkeys.rcv_blocks_cnt = max_blocks;
	for (block_num = 0; block_num < max_blocks; block_num++) {
		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH ||
		    osm_physp_get_port_num(p_physp) == 0) {
			attr_mod_ho = block_num;
			m_key = ib_port_info_get_m_key(&p_physp->port_info);
		} else {
			attr_mod_ho = block_num | (port_num << 16);
			physp0 = osm_node_get_physp_ptr(p_node, 0);
			m_key = ib_port_info_get_m_key(&physp0->port_info);
		}
		status = osm_req_get(sm, &path, IB_MAD_ATTR_P_KEY_TABLE,
				     cl_hton32(attr_mod_ho), FALSE,
				     m_key, CL_DISP_MSGID_NONE, &context);

		if (status != IB_SUCCESS) {
			OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0F12: "
				"Failure initiating PKeyTable request (%s)\n",
				ib_get_err_str(status));
			goto Exit;
		}
	}

Exit:
	OSM_LOG_EXIT(p_log);
}
Exemple #17
0
static void print_node_report(cl_map_item_t * item, FILE * file, void *cxt)
{
	osm_node_t *p_node = (osm_node_t *) item;
	osm_opensm_t *osm = cxt;
	const osm_physp_t *p_physp, *p_remote_physp;
	const ib_port_info_t *p_pi;
	uint8_t port_num;
	uint32_t num_ports;
	uint8_t node_type;

	node_type = osm_node_get_type(p_node);

	num_ports = osm_node_get_num_physp(p_node);
	port_num = node_type == IB_NODE_TYPE_SWITCH ? 0 : 1;
	for (; port_num < num_ports; port_num++) {
		p_physp = osm_node_get_physp_ptr(p_node, port_num);
		if (!p_physp)
			continue;

		fprintf(file, "%-11s : %s : %02X :",
			osm_get_manufacturer_str(cl_ntoh64
						 (osm_node_get_node_guid
						  (p_node))),
			osm_get_node_type_str_fixed_width(node_type), port_num);

		p_pi = &p_physp->port_info;

		/*
		 * Port state is not defined for base switch port 0
		 */
		if (port_num == 0 &&
		    ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info) == FALSE)
			fprintf(file, "     :");
		else
			fprintf(file, " %s :",
				osm_get_port_state_str_fixed_width
				(ib_port_info_get_port_state(p_pi)));

		/*
		 * LID values are only meaningful in select cases.
		 */
		if (ib_port_info_get_port_state(p_pi) != IB_LINK_DOWN
		    && ((node_type == IB_NODE_TYPE_SWITCH && port_num == 0)
			|| node_type != IB_NODE_TYPE_SWITCH))
			fprintf(file, " %04X :  %01X  :",
				cl_ntoh16(p_pi->base_lid),
				ib_port_info_get_lmc(p_pi));
		else
			fprintf(file, "      :     :");

		if (port_num == 0 &&
		    ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info) == FALSE)
			fprintf(file, "      :     :      ");
		else
			fprintf(file, " %s : %s : %s ",
				osm_get_mtu_str
				(ib_port_info_get_neighbor_mtu(p_pi)),
				osm_get_lwa_str(p_pi->link_width_active),
				osm_get_lsa_str
				(ib_port_info_get_link_speed_active(p_pi),
				 ib_port_info_get_link_speed_ext_active(p_pi),
				 ib_port_info_get_port_state(p_pi),
				 p_physp->ext_port_info.link_speed_active & FDR10));

		if (osm_physp_get_port_guid(p_physp) == osm->subn.sm_port_guid)
			fprintf(file, "* %016" PRIx64 " *",
				cl_ntoh64(osm_physp_get_port_guid(p_physp)));
		else
			fprintf(file, ": %016" PRIx64 " :",
				cl_ntoh64(osm_physp_get_port_guid(p_physp)));

		if (port_num
		    && (ib_port_info_get_port_state(p_pi) != IB_LINK_DOWN)) {
			p_remote_physp = osm_physp_get_remote(p_physp);
			if (p_remote_physp)
				fprintf(file, " %016" PRIx64 " (%02X)",
					cl_ntoh64(osm_physp_get_port_guid
						  (p_remote_physp)),
					osm_physp_get_port_num(p_remote_physp));
			else
				fprintf(file, " UNKNOWN");
		}

		fprintf(file, "\n");
	}

	fprintf(file, "------------------------------------------------------"
		"------------------------------------------------\n");
}
static void pi_rcv_process_endport(IN osm_sm_t * sm, IN osm_physp_t * p_physp,
				   IN const ib_port_info_t * p_pi)
{
	osm_madw_context_t context;
	ib_api_status_t status;
	ib_net64_t port_guid;
	int extended;
	uint8_t rate, mtu, mpb;
	unsigned data_vls;
	cl_qmap_t *p_sm_tbl;
	osm_remote_sm_t *p_sm;

	OSM_LOG_ENTER(sm->p_log);

	port_guid = osm_physp_get_port_guid(p_physp);

	/* HACK extended port 0 should be handled too! */
	if (osm_physp_get_port_num(p_physp) != 0 &&
	    ib_port_info_get_port_state(p_pi) != IB_LINK_DOWN) {
		/* track the minimal endport MTU, rate, and operational VLs */
		mtu = ib_port_info_get_mtu_cap(p_pi);
		if (mtu < sm->p_subn->min_ca_mtu) {
			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
				"Setting endport minimal MTU to:%u defined by port:0x%"
				PRIx64 "\n", mtu, cl_ntoh64(port_guid));
			sm->p_subn->min_ca_mtu = mtu;
		}

		extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS;
		rate = ib_port_info_compute_rate(p_pi, extended);
		if (ib_path_compare_rates(rate, sm->p_subn->min_ca_rate) < 0) {
			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
				"Setting endport minimal rate to:%u defined by port:0x%"
				PRIx64 "\n", rate, cl_ntoh64(port_guid));
			sm->p_subn->min_ca_rate = rate;
		}

		data_vls = 1U << (ib_port_info_get_vl_cap(p_pi) - 1);
		if (data_vls > 1U << (sm->p_subn->opt.max_op_vls - 1))
			data_vls = 1U << (sm->p_subn->opt.max_op_vls - 1);
		if (data_vls >= IB_MAX_NUM_VLS)
			data_vls = IB_MAX_NUM_VLS - 1;
		if ((uint8_t)data_vls < sm->p_subn->min_data_vls) {
			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
				"Setting endport minimal data VLs to:%u defined by port:0x%"
				PRIx64 "\n", data_vls, cl_ntoh64(port_guid));
			sm->p_subn->min_data_vls = data_vls;
		}
	}

	/* Check M_Key vs M_Key protect, can we control the port ? */
	mpb = ib_port_info_get_mpb(p_pi);
	if (mpb > 0 && p_pi->m_key == 0) {
		OSM_LOG(sm->p_log, OSM_LOG_INFO,
			"Port 0x%" PRIx64 " has unknown M_Key, protection level %u\n",
			cl_ntoh64(port_guid), mpb);
	}

	if (port_guid != sm->p_subn->sm_port_guid) {
		p_sm_tbl = &sm->p_subn->sm_guid_tbl;
		if (p_pi->capability_mask & IB_PORT_CAP_IS_SM) {
			/*
			 * Before querying the SM - we want to make sure we
			 * clean its state, so if the querying fails we
			 * recognize that this SM is not active.
			 */
			p_sm =
			    (osm_remote_sm_t *) cl_qmap_get(p_sm_tbl,
							    port_guid);
			if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl))
				/* clean it up */
				p_sm->smi.pri_state =
				    0xF0 & p_sm->smi.pri_state;
			if (sm->p_subn->opt.ignore_other_sm)
				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
					"Ignoring SM on port 0x%" PRIx64 "\n",
					cl_ntoh64(port_guid));
			else {
				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
					"Detected another SM. Requesting SMInfo "
					"from port 0x%" PRIx64 "\n",
					cl_ntoh64(port_guid));

				/*
				   This port indicates it's an SM and
				   it's not our own port.
				   Acquire the SMInfo Attribute.
				 */
				memset(&context, 0, sizeof(context));
				context.smi_context.set_method = FALSE;
				context.smi_context.port_guid = port_guid;
				status = osm_req_get(sm,
						     osm_physp_get_dr_path_ptr
						     (p_physp),
						     IB_MAD_ATTR_SM_INFO, 0,
						     FALSE,
						     ib_port_info_get_m_key(&p_physp->port_info),
						     CL_DISP_MSGID_NONE,
						     &context);

				if (status != IB_SUCCESS)
					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
						"ERR 0F05: "
						"Failure requesting SMInfo (%s) "
						"from port 0x%" PRIx64 "\n",
						ib_get_err_str(status),
						cl_ntoh64(port_guid));
			}
		} else {
			p_sm =
			    (osm_remote_sm_t *) cl_qmap_remove(p_sm_tbl,
							       port_guid);
			if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl))
				free(p_sm);
		}
	}

	OSM_LOG_EXIT(sm->p_log);
}
/**********************************************************************
 The plock must be held before calling this function.
**********************************************************************/
static void
__osm_pi_rcv_process_switch_port(IN osm_sm_t * sm,
				 IN osm_node_t * const p_node,
				 IN osm_physp_t * const p_physp,
				 IN ib_port_info_t * const p_pi)
{
	ib_api_status_t status = IB_SUCCESS;
	osm_madw_context_t context;
	osm_physp_t *p_remote_physp;
	osm_node_t *p_remote_node;
	uint8_t port_num;
	uint8_t remote_port_num;
	osm_dr_path_t path;

	OSM_LOG_ENTER(sm->p_log);

	/*
	   Check the state of the physical port.
	   If there appears to be something on the other end of the wire,
	   then ask for NodeInfo.  Ignore the switch management port.
	 */
	port_num = osm_physp_get_port_num(p_physp);
	/* if in_sweep_hop_0 is TRUE, then this means the SM is on the switch,
	   and we got switchInfo of our local switch. Do not continue
	   probing through the switch. */
	if (port_num != 0 && sm->p_subn->in_sweep_hop_0 == FALSE) {
		switch (ib_port_info_get_port_state(p_pi)) {
		case IB_LINK_DOWN:
			p_remote_physp = osm_physp_get_remote(p_physp);
			if (p_remote_physp) {
				p_remote_node =
				    osm_physp_get_node_ptr(p_remote_physp);
				remote_port_num =
				    osm_physp_get_port_num(p_remote_physp);

				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
					"Unlinking local node 0x%" PRIx64
					", port %u"
					"\n\t\t\t\tand remote node 0x%" PRIx64
					", port %u\n",
					cl_ntoh64(osm_node_get_node_guid
						  (p_node)), port_num,
					cl_ntoh64(osm_node_get_node_guid
						  (p_remote_node)),
					remote_port_num);

				if (sm->ucast_mgr.cache_valid)
					osm_ucast_cache_add_link(&sm->ucast_mgr,
								 p_physp,
								 p_remote_physp);

				osm_node_unlink(p_node, (uint8_t) port_num,
						p_remote_node,
						(uint8_t) remote_port_num);

			}
			break;

		case IB_LINK_INIT:
		case IB_LINK_ARMED:
		case IB_LINK_ACTIVE:
			/*
			   To avoid looping forever, only probe the port if it
			   is NOT the port that responded to the SMP.

			   Request node info from the other end of this link:
			   1) Copy the current path from the parent node.
			   2) Extend the path to the next hop thru this port.
			   3) Request node info with the new path

			 */
			if (p_pi->local_port_num !=
			    osm_physp_get_port_num(p_physp)) {
				path = *osm_physp_get_dr_path_ptr(p_physp);

				osm_dr_path_extend(&path,
						   osm_physp_get_port_num
						   (p_physp));

				memset(&context, 0, sizeof(context));
				context.ni_context.node_guid =
				    osm_node_get_node_guid(p_node);
				context.ni_context.port_num =
				    osm_physp_get_port_num(p_physp);

				status = osm_req_get(sm,
						     &path,
						     IB_MAD_ATTR_NODE_INFO,
						     0,
						     CL_DISP_MSGID_NONE,
						     &context);

				if (status != IB_SUCCESS)
					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
						"ERR 0F02: "
						"Failure initiating NodeInfo request (%s)\n",
						ib_get_err_str(status));
			} else
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Skipping SMP responder port %u\n",
					p_pi->local_port_num);
			break;

		default:
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F03: "
				"Unknown link state = %u, port = %u\n",
				ib_port_info_get_port_state(p_pi),
				p_pi->local_port_num);
			break;
		}
	}

	if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw &&
	    p_node->sw->need_update == 1)
		p_node->sw->need_update = 0;

	if (p_physp->need_update)
		sm->p_subn->ignore_existing_lfts = TRUE;

	if (port_num == 0)
		pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp);

	/*
	   Update the PortInfo attribute.
	 */
	osm_physp_set_port_info(p_physp, p_pi);

	if (port_num == 0) {
		/* Determine if base switch port 0 */
		if (p_node->sw &&
		    !ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info))
			/* PortState is not used on BSP0 but just in case it is DOWN */
			p_physp->port_info = *p_pi;
		__osm_pi_rcv_process_endport(sm, p_physp, p_pi);
	}

	OSM_LOG_EXIT(sm->p_log);
}
static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr)
{
	cache_switch_t *p_cache_sw;
	cache_switch_t *p_remote_cache_sw;
	unsigned port_num;
	unsigned max_ports;
	uint8_t remote_node_type;
	uint16_t lid_ho;
	uint16_t remote_lid_ho;
	osm_switch_t *p_sw;
	osm_switch_t *p_remote_sw;
	osm_node_t *p_node;
	osm_physp_t *p_physp;
	osm_physp_t *p_remote_physp;
	osm_port_t *p_remote_port;
	cl_qmap_t *p_sw_tbl;

	OSM_LOG_ENTER(p_mgr->p_log);
	if (!p_mgr->cache_valid)
		goto Exit;

	/* If there are no switches in the subnet, we are done */
	p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
	if (cl_qmap_count(p_sw_tbl) == 0) {
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	/*
	 * Scan all the physical switch ports in the subnet.
	 * If the port need_update flag is on, check whether
	 * it's just some node/port reset or a cached topology
	 * change. Otherwise the cache is invalid.
	 */
	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {

		p_node = p_sw->p_node;

		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
		p_cache_sw = cache_get_sw(p_mgr, lid_ho);

		max_ports = osm_node_get_num_physp(p_node);

		/* skip port 0 */
		for (port_num = 1; port_num < max_ports; port_num++) {

			p_physp = osm_node_get_physp_ptr(p_node, port_num);

			if (!p_physp || !p_physp->p_remote_physp ||
			    !osm_physp_link_exists(p_physp,
						   p_physp->p_remote_physp))
				/* no valid link */
				continue;

			/*
			 * While scanning all the physical ports in the subnet,
			 * mark corresponding leaf switches in the cache.
			 */
			if (p_cache_sw &&
			    !p_cache_sw->dropped &&
			    !cache_sw_is_leaf(p_cache_sw) &&
			    p_physp->p_remote_physp->p_node &&
			    osm_node_get_type(p_physp->p_remote_physp->
					      p_node) != IB_NODE_TYPE_SWITCH)
				cache_sw_set_leaf(p_cache_sw);

			if (!p_physp->need_update)
				continue;

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Checking switch lid %u, port %u\n",
				lid_ho, port_num);

			p_remote_physp = osm_physp_get_remote(p_physp);
			remote_node_type =
			    osm_node_get_type(p_remote_physp->p_node);

			if (remote_node_type == IB_NODE_TYPE_SWITCH)
				remote_lid_ho =
				    cl_ntoh16(osm_node_get_base_lid
					      (p_remote_physp->p_node, 0));
			else
				remote_lid_ho =
				    cl_ntoh16(osm_node_get_base_lid
					      (p_remote_physp->p_node,
					       osm_physp_get_port_num
					       (p_remote_physp)));

			if (!p_cache_sw ||
			    port_num >= p_cache_sw->num_ports ||
			    !p_cache_sw->ports[port_num].remote_lid_ho) {
				/*
				 * There is some uncached change on the port.
				 * In general, the reasons might be as follows:
				 *  - switch reset
				 *  - port reset (or port down/up)
				 *  - quick connection location change
				 *  - new link (or new switch)
				 *
				 * First two reasons allow cache usage, while
				 * the last two reasons should invalidate cache.
				 *
				 * In case of quick connection location change,
				 * cache would have been invalidated by
				 * osm_ucast_cache_check_new_link() function.
				 *
				 * In case of new link between two known nodes,
				 * cache also would have been invalidated by
				 * osm_ucast_cache_check_new_link() function.
				 *
				 * Another reason is cached link between two
				 * known switches went back. In this case the
				 * osm_ucast_cache_check_new_link() function would
				 * clear both sides of the link from the cache
				 * during the discovery process, so effectively
				 * this would be equivalent to port reset.
				 *
				 * So three possible reasons remain:
				 *  - switch reset
				 *  - port reset (or port down/up)
				 *  - link of a new switch
				 *
				 * To validate cache, we need to check only the
				 * third reason - link of a new node/switch:
				 *  - If this is the local switch that is new,
				 *    then it should have (p_sw->need_update == 2).
				 *  - If the remote node is switch and it's new,
				 *    then it also should have
				 *    (p_sw->need_update == 2).
				 *  - If the remote node is CA/RTR and it's new,
				 *    then its port should have is_new flag on.
				 */
				if (p_sw->need_update == 2) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"New switch found (lid %u)\n",
						lid_ho);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				if (remote_node_type == IB_NODE_TYPE_SWITCH) {

					p_remote_sw =
					    p_remote_physp->p_node->sw;
					if (p_remote_sw->need_update == 2) {
						/* this could also be case of
						   switch coming back with an
						   additional link that it
						   didn't have before */
						OSM_LOG(p_mgr->p_log,
							OSM_LOG_DEBUG,
							"New switch/link found (lid %u)\n",
							remote_lid_ho);
						osm_ucast_cache_invalidate
						    (p_mgr);
						goto Exit;
					}
				} else {
					/*
					 * Remote node is CA/RTR.
					 * Get p_port of the remote node and
					 * check its p_port->is_new flag.
					 */
					p_remote_port =
					    osm_get_port_by_guid(p_mgr->p_subn,
								 osm_physp_get_port_guid
								 (p_remote_physp));
					if (p_remote_port->is_new) {
						OSM_LOG(p_mgr->p_log,
							OSM_LOG_DEBUG,
							"New CA/RTR found (lid %u)\n",
							remote_lid_ho);
						osm_ucast_cache_invalidate
						    (p_mgr);
						goto Exit;
					}
				}
			} else {
				/*
				 * The change on the port is cached.
				 * In general, the reasons might be as follows:
				 *  - link between two known nodes went back
				 *  - one or more nodes went back, causing all
				 *    the links to reappear
				 *
				 * If it was link that went back, then this case
				 * would have been taken care of during the
				 * discovery by osm_ucast_cache_check_new_link(),
				 * so it's some node that went back.
				 */
				if ((p_cache_sw->ports[port_num].is_leaf &&
				     remote_node_type == IB_NODE_TYPE_SWITCH) ||
				    (!p_cache_sw->ports[port_num].is_leaf &&
				     remote_node_type != IB_NODE_TYPE_SWITCH)) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"Remote node type change on switch lid %u, port %u\n",
						lid_ho, port_num);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				if (p_cache_sw->ports[port_num].remote_lid_ho !=
				    remote_lid_ho) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"Remote lid change on switch lid %u, port %u"
						"(was %u, now %u)\n",
						lid_ho, port_num,
						p_cache_sw->ports[port_num].
						remote_lid_ho, remote_lid_ho);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				/*
				 * We don't care who is the node that has
				 * reappeared in the subnet (local or remote).
				 * What's important that the cached link matches
				 * the real fabrics link.
				 * Just clean it from cache.
				 */

				p_cache_sw->ports[port_num].remote_lid_ho = 0;
				p_cache_sw->ports[port_num].is_leaf = FALSE;
				if (p_cache_sw->dropped) {
					cache_restore_ucast_info(p_mgr,
								 p_cache_sw,
								 p_sw);
					p_cache_sw->dropped = FALSE;
				}

				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Restored link from cache: lid %u, port %u to lid %u\n",
					lid_ho, port_num, remote_lid_ho);
			}
		}
	}

	/* Remove all the cached switches that
	   have all their ports restored */
	cache_cleanup_switches(p_mgr);

	/*
	 * Done scanning all the physical switch ports in the subnet.
	 * Now we need to check the other side:
	 * Scan all the cached switches and their ports:
	 *  - If the cached switch is missing in the subnet
	 *    (dropped flag is on), check that it's a leaf switch.
	 *    If it's not a leaf, the cache is invalid, because
	 *    cache can tolerate only leaf switch removal.
	 *  - If the cached switch exists in fabric, check all
	 *    its cached ports. These cached ports represent
	 *    missing link in the fabric.
	 *    The missing links that can be tolerated are:
	 *      + link to missing CA/RTR
	 *      + link to missing leaf switch
	 */
	for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
	     p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
	     p_cache_sw =
	     (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) {

		if (p_cache_sw->dropped) {
			if (!cache_sw_is_leaf(p_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Missing non-leaf switch (lid %u)\n",
					cache_sw_get_base_lid_ho(p_cache_sw));
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Missing leaf switch (lid %u) - "
				"continuing validation\n",
				cache_sw_get_base_lid_ho(p_cache_sw));
			continue;
		}

		for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) {
			if (!p_cache_sw->ports[port_num].remote_lid_ho)
				continue;

			if (p_cache_sw->ports[port_num].is_leaf) {
				CL_ASSERT(cache_sw_is_leaf(p_cache_sw));
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to CA/RTR - "
					"continuing validation\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				continue;
			}

			p_remote_cache_sw = cache_get_sw(p_mgr,
							 p_cache_sw->
							 ports[port_num].
							 remote_lid_ho);

			if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to existing switch\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			if (!cache_sw_is_leaf(p_remote_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to non-leaf switch\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			/*
			 * At this point we know that the missing link is to
			 * a leaf switch. However, one case deserves a special
			 * treatment. If there was a link between two leaf
			 * switches, then missing leaf switch might break
			 * routing. It is possible that there are routes
			 * that use leaf switches to get from switch to switch
			 * and not just to get to the CAs behind the leaf switch.
			 */
			if (cache_sw_is_leaf(p_cache_sw) &&
			    cache_sw_is_leaf(p_remote_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing leaf-2-leaf link\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Switch lid %u, port %u: missing remote leaf switch - "
				"continuing validation\n",
				cache_sw_get_base_lid_ho(p_cache_sw),
				port_num);
		}
	}

	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n");
	ucast_cache_dump(p_mgr);
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_validate() */
static void lr_rcv_get_physp_link(IN osm_sa_t * sa,
				  IN const ib_link_record_t * p_lr,
				  IN const osm_physp_t * p_src_physp,
				  IN const osm_physp_t * p_dest_physp,
				  IN const ib_net64_t comp_mask,
				  IN cl_qlist_t * p_list,
				  IN const osm_physp_t * p_req_physp)
{
	uint8_t src_port_num;
	uint8_t dest_port_num;
	ib_net16_t from_base_lid;
	ib_net16_t to_base_lid;
	ib_net16_t lmc_mask;

	OSM_LOG_ENTER(sa->p_log);

	/*
	   If only one end of the link is specified, determine
	   the other side.
	 */
	if (p_src_physp) {
		if (p_dest_physp) {
			/*
			   Ensure the two physp's are actually connected.
			   If not, bail out.
			 */
			if (osm_physp_get_remote(p_src_physp) != p_dest_physp)
				goto Exit;
		} else {
			p_dest_physp = osm_physp_get_remote(p_src_physp);
			if (p_dest_physp == NULL)
				goto Exit;
		}
	} else {
		if (p_dest_physp) {
			p_src_physp = osm_physp_get_remote(p_dest_physp);
			if (p_src_physp == NULL)
				goto Exit;
		} else
			goto Exit;	/* no physp's, so nothing to do */
	}

	/* Check that the p_src_physp, p_dest_physp and p_req_physp
	   all share a pkey (doesn't have to be the same p_key). */
	if (!osm_physp_share_pkey(sa->p_log, p_src_physp, p_dest_physp,
				  sa->p_subn->opt.allow_both_pkeys)) {
		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
			"Source and Dest PhysPorts do not share PKey\n");
		goto Exit;
	}
	if (!osm_physp_share_pkey(sa->p_log, p_src_physp, p_req_physp,
				  sa->p_subn->opt.allow_both_pkeys)) {
		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
			"Source and Requester PhysPorts do not share PKey\n");
		goto Exit;
	}
	if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_dest_physp,
				  sa->p_subn->opt.allow_both_pkeys)) {
		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
			"Requester and Dest PhysPorts do not share PKey\n");
		goto Exit;
	}

	src_port_num = osm_physp_get_port_num(p_src_physp);
	dest_port_num = osm_physp_get_port_num(p_dest_physp);

	if (comp_mask & IB_LR_COMPMASK_FROM_PORT)
		if (src_port_num != p_lr->from_port_num)
			goto Exit;

	if (comp_mask & IB_LR_COMPMASK_TO_PORT)
		if (dest_port_num != p_lr->to_port_num)
			goto Exit;

	from_base_lid = get_base_lid(p_src_physp);
	to_base_lid = get_base_lid(p_dest_physp);

	lmc_mask = ~((1 << sa->p_subn->opt.lmc) - 1);
	lmc_mask = cl_hton16(lmc_mask);

	if (comp_mask & IB_LR_COMPMASK_FROM_LID)
		if (from_base_lid != (p_lr->from_lid & lmc_mask))
			goto Exit;

	if (comp_mask & IB_LR_COMPMASK_TO_LID)
		if (to_base_lid != (p_lr->to_lid & lmc_mask))
			goto Exit;

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Acquiring link record\n"
		"\t\t\t\tsrc port 0x%" PRIx64 " (port %u)"
		", dest port 0x%" PRIx64 " (port %u)\n",
		cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), src_port_num,
		cl_ntoh64(osm_physp_get_port_guid(p_dest_physp)),
		dest_port_num);

	lr_rcv_build_physp_link(sa, from_base_lid, to_base_lid, src_port_num,
				dest_port_num, p_list);

Exit:
	OSM_LOG_EXIT(sa->p_log);
}
static ib_api_status_t mpr_rcv_get_path_parms(IN osm_sa_t * sa,
					      IN const ib_multipath_rec_t *
					      p_mpr,
					      IN const osm_alias_guid_t * p_src_alias_guid,
					      IN const osm_alias_guid_t * p_dest_alias_guid,
					      IN const uint16_t dest_lid_ho,
					      IN const ib_net64_t comp_mask,
					      OUT osm_path_parms_t * p_parms)
{
	const osm_node_t *p_node;
	const osm_physp_t *p_physp, *p_physp0;
	const osm_physp_t *p_src_physp;
	const osm_physp_t *p_dest_physp;
	const osm_prtn_t *p_prtn = NULL;
	const ib_port_info_t *p_pi, *p_pi0;
	ib_slvl_table_t *p_slvl_tbl;
	ib_api_status_t status = IB_SUCCESS;
	uint8_t mtu;
	uint8_t rate;
	uint8_t pkt_life;
	uint8_t required_mtu;
	uint8_t required_rate;
	ib_net16_t required_pkey;
	uint8_t required_sl;
	uint8_t required_pkt_life;
	ib_net16_t dest_lid;
	int hops = 0;
	int in_port_num = 0;
	uint8_t i;
	osm_qos_level_t *p_qos_level = NULL;
	uint16_t valid_sl_mask = 0xffff;

	OSM_LOG_ENTER(sa->p_log);

	dest_lid = cl_hton16(dest_lid_ho);

	p_dest_physp = p_dest_alias_guid->p_base_port->p_physp;
	p_physp = p_src_alias_guid->p_base_port->p_physp;
	p_src_physp = p_physp;
	p_pi = &p_physp->port_info;

	mtu = ib_port_info_get_mtu_cap(p_pi);
	rate = ib_port_info_compute_rate(p_pi,
					 p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS);

	/*
	   Mellanox Tavor device performance is better using 1K MTU.
	   If required MTU and MTU selector are such that 1K is OK
	   and at least one end of the path is Tavor we override the
	   port MTU with 1K.
	 */
	if (sa->p_subn->opt.enable_quirks &&
	    sa_multipath_rec_apply_tavor_mtu_limit(p_mpr,
						   p_src_alias_guid->p_base_port,
						   p_dest_alias_guid->p_base_port,
						   comp_mask))
		if (mtu > IB_MTU_LEN_1024) {
			mtu = IB_MTU_LEN_1024;
			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
				"Optimized Path MTU to 1K for Mellanox Tavor device\n");
		}

	/*
	   Walk the subnet object from source to destination,
	   tracking the most restrictive rate and mtu values along the way...

	   If source port node is a switch, then p_physp should
	   point to the port that routes the destination lid
	 */

	p_node = osm_physp_get_node_ptr(p_physp);

	if (p_node->sw) {
		/*
		 * Source node is a switch.
		 * Make sure that p_physp points to the out port of the
		 * switch that routes to the destination lid (dest_lid_ho)
		 */
		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
		if (p_physp == 0) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4514: "
				"Can't find routing to LID %u on switch %s "
				"(GUID 0x%016"PRIx64")\n", dest_lid_ho,
				p_node->print_desc,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			status = IB_NOT_FOUND;
			goto Exit;
		}
	}

	if (sa->p_subn->opt.qos) {

		/*
		 * Whether this node is switch or CA, the IN port for
		 * the sl2vl table is 0, because this is a source node.
		 */
		p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0);

		/* update valid SLs that still exist on this route */
		for (i = 0; i < IB_MAX_NUM_VLS; i++) {
			if (valid_sl_mask & (1 << i) &&
			    ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL)
				valid_sl_mask &= ~(1 << i);
		}
		if (!valid_sl_mask) {
			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
				"All the SLs lead to VL15 on this path\n");
			status = IB_NOT_FOUND;
			goto Exit;
		}
	}

	/*
	 * Same as above
	 */
	p_node = osm_physp_get_node_ptr(p_dest_physp);

	if (p_node->sw) {
		/*
		 * if destination is switch, we want p_dest_physp to point to port 0
		 */
		p_dest_physp =
		    osm_switch_get_route_by_lid(p_node->sw, dest_lid);

		if (p_dest_physp == 0) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4515: "
				"Can't find routing to LID %u on switch %s "
				"(GUID 0x%016"PRIx64")\n", dest_lid_ho,
				p_node->print_desc,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			status = IB_NOT_FOUND;
			goto Exit;
		}

	}

	/*
	 * Now go through the path step by step
	 */

	while (p_physp != p_dest_physp) {

		int tmp_pnum = p_physp->port_num;
		p_node = osm_physp_get_node_ptr(p_physp);
		p_physp = osm_physp_get_remote(p_physp);

		if (p_physp == 0) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4505: "
				"Can't find remote phys port of %s (GUID "
				"0x%016"PRIx64") port %d "
				"while routing to LID %u",
				p_node->print_desc,
				cl_ntoh64(osm_node_get_node_guid(p_node)),
				tmp_pnum,
				dest_lid_ho);
			status = IB_ERROR;
			goto Exit;
		}

		/* update number of hops traversed */
		hops++;
		if (hops > MAX_HOPS) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4520: "
				"Path from GUID 0x%016" PRIx64 " (%s) to"
				" lid %u GUID 0x%016" PRIx64 " (%s) needs"
				" more than %d hops, max %d hops allowed\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc, dest_lid_ho,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc, hops,
				MAX_HOPS);
			status = IB_NOT_FOUND;
			goto Exit;
		}

		in_port_num = osm_physp_get_port_num(p_physp);

		/*
		   This is point to point case (no switch in between)
		 */
		if (p_physp == p_dest_physp)
			break;

		p_node = osm_physp_get_node_ptr(p_physp);

		if (!p_node->sw) {
			/*
			   There is some sort of problem in the subnet object!
			   If this isn't a switch, we should have reached
			   the destination by now!
			 */
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4503: "
				"Internal error, bad path while routing "
				"from %s (GUID: 0x%016"PRIx64") port %d "
				"to %s (GUID: 0x%016"PRIx64") port %d; "
				"ended at %s port %d\n",
				p_src_alias_guid->p_base_port->p_node->print_desc,
				cl_ntoh64(p_src_alias_guid->p_base_port->p_node->node_info.node_guid),
				p_src_alias_guid->p_base_port->p_physp->port_num,
				p_dest_alias_guid->p_base_port->p_node->print_desc,
				cl_ntoh64(p_dest_alias_guid->p_base_port->p_node->node_info.node_guid),
				p_dest_alias_guid->p_base_port->p_physp->port_num,
				p_node->print_desc,
				p_physp->port_num);
			status = IB_ERROR;
			goto Exit;
		}

		/*
		   Check parameters for the ingress port in this switch.
		 */
		p_pi = &p_physp->port_info;

		if (mtu > ib_port_info_get_mtu_cap(p_pi))
			mtu = ib_port_info_get_mtu_cap(p_pi);

		p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0);
		p_pi0 = &p_physp0->port_info;
		if (ib_path_compare_rates(rate,
					  ib_port_info_compute_rate(p_pi,
								    p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS)) > 0)
			rate = ib_port_info_compute_rate(p_pi,
							 p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS);

		/*
		   Continue with the egress port on this switch.
		 */
		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
		if (p_physp == 0) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4516: "
				"Dead end path on switch "
				"%s (GUID: 0x%016"PRIx64") to LID %u\n",
				p_node->print_desc,
				cl_ntoh64(osm_node_get_node_guid(p_node)),
				dest_lid_ho);
			status = IB_ERROR;
			goto Exit;
		}

		p_pi = &p_physp->port_info;

		if (mtu > ib_port_info_get_mtu_cap(p_pi))
			mtu = ib_port_info_get_mtu_cap(p_pi);

		p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0);
		p_pi0 = &p_physp0->port_info;
		if (ib_path_compare_rates(rate,
					  ib_port_info_compute_rate(p_pi,
								    p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS)) > 0)
			rate = ib_port_info_compute_rate(p_pi,
							 p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS);

		if (sa->p_subn->opt.qos) {
			/*
			 * Check SL2VL table of the switch and update valid SLs
			 */
			p_slvl_tbl =
			    osm_physp_get_slvl_tbl(p_physp, in_port_num);
			for (i = 0; i < IB_MAX_NUM_VLS; i++) {
				if (valid_sl_mask & (1 << i) &&
				    ib_slvl_table_get(p_slvl_tbl,
						      i) == IB_DROP_VL)
					valid_sl_mask &= ~(1 << i);
			}
			if (!valid_sl_mask) {
				OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
					"All the SLs lead to VL15 "
					"on this path\n");
				status = IB_NOT_FOUND;
				goto Exit;
			}
		}
	}

	/*
	   p_physp now points to the destination
	 */
	p_pi = &p_physp->port_info;

	if (mtu > ib_port_info_get_mtu_cap(p_pi))
		mtu = ib_port_info_get_mtu_cap(p_pi);

	if (ib_path_compare_rates(rate,
				  ib_port_info_compute_rate(p_pi,
							    p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS)) > 0)
		rate = ib_port_info_compute_rate(p_pi,
						 p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS);

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
		"Path min MTU = %u, min rate = %u\n", mtu, rate);

	/*
	 * Get QoS Level object according to the MultiPath request
	 * and adjust MultiPath parameters according to QoS settings
	 */
	if (sa->p_subn->opt.qos && sa->p_subn->p_qos_policy &&
	    (p_qos_level =
	     osm_qos_policy_get_qos_level_by_mpr(sa->p_subn->p_qos_policy,
						 p_mpr, p_src_physp,
						 p_dest_physp, comp_mask))) {

		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
			"MultiPathRecord request matches QoS Level '%s' (%s)\n",
			p_qos_level->name,
			p_qos_level->use ? p_qos_level->use : "no description");

		if (p_qos_level->mtu_limit_set
		    && (mtu > p_qos_level->mtu_limit))
			mtu = p_qos_level->mtu_limit;

		if (p_qos_level->rate_limit_set
		    && (ib_path_compare_rates(rate, p_qos_level->rate_limit) > 0))
			rate = p_qos_level->rate_limit;

		if (p_qos_level->sl_set) {
			required_sl = p_qos_level->sl;
			if (!(valid_sl_mask & (1 << required_sl))) {
				status = IB_NOT_FOUND;
				goto Exit;
			}
		}
	}

	/*
	   Determine if these values meet the user criteria
	 */

	/* we silently ignore cases where only the MTU selector is defined */
	if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) &&
	    (comp_mask & IB_MPR_COMPMASK_MTU)) {
		required_mtu = ib_multipath_rec_mtu(p_mpr);
		switch (ib_multipath_rec_mtu_sel(p_mpr)) {
		case 0:	/* must be greater than */
			if (mtu <= required_mtu)
				status = IB_NOT_FOUND;
			break;

		case 1:	/* must be less than */
			if (mtu >= required_mtu) {
				/* adjust to use the highest mtu
				   lower then the required one */
				if (required_mtu > 1)
					mtu = required_mtu - 1;
				else
					status = IB_NOT_FOUND;
			}
			break;

		case 2:	/* exact match */
			if (mtu < required_mtu)
				status = IB_NOT_FOUND;
			else
				mtu = required_mtu;
			break;

		case 3:	/* largest available */
			/* can't be disqualified by this one */
			break;

		default:
			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
			CL_ASSERT(FALSE);
			status = IB_ERROR;
			break;
		}
	}
	if (status != IB_SUCCESS)
		goto Exit;

	/* we silently ignore cases where only the Rate selector is defined */
	if ((comp_mask & IB_MPR_COMPMASK_RATESELEC) &&
	    (comp_mask & IB_MPR_COMPMASK_RATE)) {
		required_rate = ib_multipath_rec_rate(p_mpr);
		switch (ib_multipath_rec_rate_sel(p_mpr)) {
		case 0:	/* must be greater than */
			if (ib_path_compare_rates(rate, required_rate) <= 0)
				status = IB_NOT_FOUND;
			break;

		case 1:	/* must be less than */
			if (ib_path_compare_rates(rate, required_rate) >= 0) {
				/* adjust the rate to use the highest rate
				   lower then the required one */
				rate = ib_path_rate_get_prev(required_rate);
				if (!rate)
					status = IB_NOT_FOUND;
			}
			break;

		case 2:	/* exact match */
			if (ib_path_compare_rates(rate, required_rate))
				status = IB_NOT_FOUND;
			else
				rate = required_rate;
			break;

		case 3:	/* largest available */
			/* can't be disqualified by this one */
			break;

		default:
			/* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */
			CL_ASSERT(FALSE);
			status = IB_ERROR;
			break;
		}
	}
	if (status != IB_SUCCESS)
		goto Exit;

	/* Verify the pkt_life_time */
	/* According to spec definition IBA 1.2 Table 205 PacketLifeTime description,
	   for loopback paths, packetLifeTime shall be zero. */
	if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port)
		pkt_life = 0;	/* loopback */
	else if (p_qos_level && p_qos_level->pkt_life_set)
		pkt_life = p_qos_level->pkt_life;
	else
		pkt_life = sa->p_subn->opt.subnet_timeout;

	/* we silently ignore cases where only the PktLife selector is defined */
	if ((comp_mask & IB_MPR_COMPMASK_PKTLIFETIMESELEC) &&
	    (comp_mask & IB_MPR_COMPMASK_PKTLIFETIME)) {
		required_pkt_life = ib_multipath_rec_pkt_life(p_mpr);
		switch (ib_multipath_rec_pkt_life_sel(p_mpr)) {
		case 0:	/* must be greater than */
			if (pkt_life <= required_pkt_life)
				status = IB_NOT_FOUND;
			break;

		case 1:	/* must be less than */
			if (pkt_life >= required_pkt_life) {
				/* adjust the lifetime to use the highest possible
				   lower then the required one */
				if (required_pkt_life > 1)
					pkt_life = required_pkt_life - 1;
				else
					status = IB_NOT_FOUND;
			}
			break;

		case 2:	/* exact match */
			if (pkt_life < required_pkt_life)
				status = IB_NOT_FOUND;
			else
				pkt_life = required_pkt_life;
			break;

		case 3:	/* smallest available */
			/* can't be disqualified by this one */
			break;

		default:
			/* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */
			CL_ASSERT(FALSE);
			status = IB_ERROR;
			break;
		}
	}

	if (status != IB_SUCCESS)
		goto Exit;

	/*
	 * set Pkey for this MultiPath record request
	 */

	if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
	    cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
		required_pkey =
		    osm_physp_find_common_pkey(p_src_physp, p_dest_physp,
					       sa->p_subn->opt.allow_both_pkeys);

	else if (comp_mask & IB_MPR_COMPMASK_PKEY) {
		/*
		 * MPR request has a specific pkey:
		 * Check that source and destination share this pkey.
		 * If QoS level has pkeys, check that this pkey exists
		 * in the QoS level pkeys.
		 * MPR returned pkey is the requested pkey.
		 */
		required_pkey = p_mpr->pkey;
		if (!osm_physp_share_this_pkey
		    (p_src_physp, p_dest_physp, required_pkey,
		     sa->p_subn->opt.allow_both_pkeys)) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4518: "
				"Ports src 0x%016"PRIx64" (%s port %d) "
				"and dst 0x%016"PRIx64" (%s port %d) "
				"do not share the specified PKey 0x%04x\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc,
				p_src_physp->port_num,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc,
				p_dest_physp->port_num,
				cl_ntoh16(required_pkey));
			status = IB_NOT_FOUND;
			goto Exit;
		}
		if (p_qos_level && p_qos_level->pkey_range_len &&
		    !osm_qos_level_has_pkey(p_qos_level, required_pkey)) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451C: "
				"Ports src 0x%016"PRIx64" (%s port %d) "
				"and dst 0x%016"PRIx64" (%s port %d) "
				"do not share specified PKey (0x%04x) as "
				"defined by QoS level \"%s\"\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc,
				p_src_physp->port_num,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc,
				p_dest_physp->port_num,
				cl_ntoh16(required_pkey),
				p_qos_level->name);
			status = IB_NOT_FOUND;
			goto Exit;
		}

	} else if (p_qos_level && p_qos_level->pkey_range_len) {
		/*
		 * MPR request doesn't have a specific pkey, but QoS level
		 * has pkeys - get shared pkey from QoS level pkeys
		 */
		required_pkey = osm_qos_level_get_shared_pkey(p_qos_level,
							      p_src_physp,
							      p_dest_physp,
							      sa->p_subn->opt.allow_both_pkeys);
		if (!required_pkey) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451D: "
				"Ports src 0x%016"PRIx64" (%s port %d) "
				"and dst 0x%016"PRIx64" (%s port %d) "
				"do not share a PKey as defined by QoS "
				"level \"%s\"\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc,
				p_src_physp->port_num,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc,
				p_dest_physp->port_num,
				p_qos_level->name);
			status = IB_NOT_FOUND;
			goto Exit;
		}

	} else {
		/*
		 * Neither MPR request nor QoS level have pkey.
		 * Just get any shared pkey.
		 */
		required_pkey =
		    osm_physp_find_common_pkey(p_src_physp, p_dest_physp,
					       sa->p_subn->opt.allow_both_pkeys);
		if (!required_pkey) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4519: "
				"Ports src 0x%016"PRIx64" (%s port %d) "
				"and dst 0x%016"PRIx64" (%s port %d) "
				"do not have any shared PKeys\n",
				cl_ntoh64(osm_physp_get_port_guid(p_src_physp)),
				p_src_physp->p_node->print_desc,
				p_src_physp->port_num,
				cl_ntoh64(osm_physp_get_port_guid
					  (p_dest_physp)),
				p_dest_physp->p_node->print_desc,
				p_dest_physp->port_num);
			status = IB_NOT_FOUND;
			goto Exit;
		}
	}

	if (required_pkey) {
		p_prtn =
		    (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
					       required_pkey &
					       cl_ntoh16((uint16_t) ~ 0x8000));
		if (p_prtn ==
		    (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl))
			p_prtn = NULL;
	}

	/*
	 * Set MultiPathRecord SL.
	 */

	if (comp_mask & IB_MPR_COMPMASK_SL) {
		/*
		 * Specific SL was requested
		 */
		required_sl = ib_multipath_rec_sl(p_mpr);

		if (p_qos_level && p_qos_level->sl_set &&
		    p_qos_level->sl != required_sl) {
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451E: "
				"QoS constraints: required MultiPathRecord SL "
				"(%u) doesn't match QoS policy \"%s\" SL (%u) "
				"[%s port %d <-> %s port %d]\n", required_sl,
				p_qos_level->name,
				p_qos_level->sl,
				p_src_alias_guid->p_base_port->p_node->print_desc,
				p_src_alias_guid->p_base_port->p_physp->port_num,
				p_dest_alias_guid->p_base_port->p_node->print_desc,
				p_dest_alias_guid->p_base_port->p_physp->port_num);
			status = IB_NOT_FOUND;
			goto Exit;
		}

	} else if (p_qos_level && p_qos_level->sl_set) {
		/*
		 * No specific SL was requested,
		 * but there is an SL in QoS level.
		 */
		required_sl = p_qos_level->sl;

		if (required_pkey && p_prtn && p_prtn->sl != p_qos_level->sl)
			OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
				"QoS level SL (%u) overrides partition SL (%u)\n",
				p_qos_level->sl, p_prtn->sl);

	} else if (required_pkey) {
		/*
		 * No specific SL in request or in QoS level - use partition SL
		 */
		p_prtn =
		    (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl,
					       required_pkey &
					       cl_ntoh16((uint16_t) ~ 0x8000));
		if (!p_prtn) {
			required_sl = OSM_DEFAULT_SL;
			/* this may be possible when pkey tables are created somehow in
			   previous runs or things are going wrong here */
			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451A: "
				"No partition found for PKey 0x%04x - "
				"using default SL %d "
				"[%s port %d <-> %s port %d]\n",
				cl_ntoh16(required_pkey), required_sl,
				p_src_alias_guid->p_base_port->p_node->print_desc,
				p_src_alias_guid->p_base_port->p_physp->port_num,
				p_dest_alias_guid->p_base_port->p_node->print_desc,
				p_dest_alias_guid->p_base_port->p_physp->port_num);
		} else
			required_sl = p_prtn->sl;

	} else if (sa->p_subn->opt.qos) {
		if (valid_sl_mask & (1 << OSM_DEFAULT_SL))
			required_sl = OSM_DEFAULT_SL;
		else {
			for (i = 0; i < IB_MAX_NUM_VLS; i++)
				if (valid_sl_mask & (1 << i))
					break;
			required_sl = i;
		}
	} else
		required_sl = OSM_DEFAULT_SL;

	if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << required_sl))) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451F: "
			"Selected SL (%u) leads to VL15 "
			"[%s port %d <-> %s port %d]\n",
			required_sl,
			p_src_alias_guid->p_base_port->p_node->print_desc,
			p_src_alias_guid->p_base_port->p_physp->port_num,
			p_dest_alias_guid->p_base_port->p_node->print_desc,
			p_dest_alias_guid->p_base_port->p_physp->port_num);
		status = IB_NOT_FOUND;
		goto Exit;
	}

	/* reset pkey when raw traffic */
	if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
	    cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31))
		required_pkey = 0;

	p_parms->mtu = mtu;
	p_parms->rate = rate;
	p_parms->pkey = required_pkey;
	p_parms->pkt_life = pkt_life;
	p_parms->sl = required_sl;
	p_parms->hops = hops;

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPath params:"
		" mtu = %u, rate = %u, packet lifetime = %u,"
		" pkey = 0x%04X, sl = %u, hops = %u\n", mtu, rate,
		pkt_life, cl_ntoh16(required_pkey), required_sl, hops);

Exit:
	OSM_LOG_EXIT(sa->p_log);
	return status;
}
static void
__osm_pi_rcv_process_endport(IN osm_sm_t * sm,
			     IN osm_physp_t * const p_physp,
			     IN const ib_port_info_t * const p_pi)
{
	osm_madw_context_t context;
	ib_api_status_t status;
	ib_net64_t port_guid;
	uint8_t rate, mtu;
	cl_qmap_t *p_sm_tbl;
	osm_remote_sm_t *p_sm;

	OSM_LOG_ENTER(sm->p_log);

	port_guid = osm_physp_get_port_guid(p_physp);

	/* HACK extended port 0 should be handled too! */
	if (osm_physp_get_port_num(p_physp) != 0) {
		/* track the minimal endport MTU and rate */
		mtu = ib_port_info_get_mtu_cap(p_pi);
		if (mtu < sm->p_subn->min_ca_mtu) {
			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
				"Setting endport minimal MTU to:%u defined by port:0x%"
				PRIx64 "\n", mtu, cl_ntoh64(port_guid));
			sm->p_subn->min_ca_mtu = mtu;
		}

		rate = ib_port_info_compute_rate(p_pi);
		if (rate < sm->p_subn->min_ca_rate) {
			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
				"Setting endport minimal rate to:%u defined by port:0x%"
				PRIx64 "\n", rate, cl_ntoh64(port_guid));
			sm->p_subn->min_ca_rate = rate;
		}
	}

	if (port_guid == sm->p_subn->sm_port_guid) {
		/*
		   We received the PortInfo for our own port.
		 */
		if (!(p_pi->capability_mask & IB_PORT_CAP_IS_SM))
			/*
			   Set the IS_SM bit to indicate our port hosts an SM.
			 */
			__osm_pi_rcv_set_sm(sm, p_physp);
	} else {
		p_sm_tbl = &sm->p_subn->sm_guid_tbl;
		if (p_pi->capability_mask & IB_PORT_CAP_IS_SM) {
			/*
			 * Before querying the SM - we want to make sure we
			 * clean its state, so if the querying fails we
			 * recognize that this SM is not active.
			 */
			p_sm = (osm_remote_sm_t *) cl_qmap_get(p_sm_tbl, port_guid);
			if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl))
				/* clean it up */
				p_sm->smi.pri_state = 0xF0 & p_sm->smi.pri_state;
			if (sm->p_subn->opt.ignore_other_sm)
				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
					"Ignoring SM on port 0x%" PRIx64 "\n",
					cl_ntoh64(port_guid));
			else {
				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
					"Detected another SM. Requesting SMInfo"
					"\n\t\t\t\tPort 0x%" PRIx64 "\n",
					cl_ntoh64(port_guid));

				/*
				   This port indicates it's an SM and
				   it's not our own port.
				   Acquire the SMInfo Attribute.
				 */
				memset(&context, 0, sizeof(context));
				context.smi_context.set_method = FALSE;
				context.smi_context.port_guid = port_guid;
				status = osm_req_get(sm,
						     osm_physp_get_dr_path_ptr
						     (p_physp),
						     IB_MAD_ATTR_SM_INFO, 0,
						     CL_DISP_MSGID_NONE,
						     &context);

				if (status != IB_SUCCESS)
					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
						"ERR 0F05: "
						"Failure requesting SMInfo (%s)\n",
						ib_get_err_str(status));
			}
		} else {
			p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_tbl, port_guid);
			if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl))
				free(p_sm);
		}
	}

	OSM_LOG_EXIT(sm->p_log);
}