Example #1
0
/** ===========================================================================
 */
static void extract_guid2lid(osm_port_t *p_port, uint64_t *p_offset,
			     struct ssa_db_extract *p_ssa_db)
{
	struct ep_map_rec *p_map_rec;
#ifdef SSA_PLUGIN_VERBOSE_LOGGING
	uint8_t is_fdr10_active;

	ssa_log(SSA_LOG_VERBOSE, "Port GUID 0x%" PRIx64 " LID %u Port state %d"
		"(%s)\n", ntohll(osm_physp_get_port_guid(p_port->p_physp)),
		ntohs(osm_port_get_base_lid(p_port)),
		osm_physp_get_port_state(p_port->p_physp),
		(osm_physp_get_port_state(p_port->p_physp) < 5 ?
		 port_state_str[osm_physp_get_port_state(p_port->p_physp)] :
		 "???"));
	is_fdr10_active =
	    p_port->p_physp->ext_port_info.link_speed_active & FDR10;
	ssa_log(SSA_LOG_VERBOSE, "FDR10 %s active\n",
		is_fdr10_active ? "" : "not");
#endif

	/* check for valid LID first */
	if ((ntohs(osm_port_get_base_lid(p_port)) < IB_LID_UCAST_START_HO) ||
	    (ntohs(osm_port_get_base_lid(p_port)) > IB_LID_UCAST_END_HO)) {
		ssa_log(SSA_LOG_VERBOSE, "Port GUID 0x%" PRIx64
			" has invalid LID %u\n",
			ntohll(osm_physp_get_port_guid(p_port->p_physp)),
			ntohs(osm_port_get_base_lid(p_port)));
	}

	smdb_guid2lid_init(p_port, &p_ssa_db->p_guid_to_lid_tbl[*p_offset]);
	p_map_rec = ep_map_rec_init(*p_offset);
	if (!p_map_rec) {
		/* add memory allocation failure handling */
		ssa_log(SSA_LOG_VERBOSE, "Quick MAP rec memory allocation failed\n");
	}
	cl_qmap_insert(&p_ssa_db->ep_guid_to_lid_tbl,
		       osm_physp_get_port_guid(p_port->p_physp),
		       &p_map_rec->map_item);

	*p_offset = *p_offset + 1;
}
Example #2
0
static void dump_sl2vl_tbl(cl_map_item_t * item, FILE * file, void *cxt)
{
	osm_port_t *p_port = (osm_port_t *) item;
	osm_node_t *p_node = p_port->p_node;
	uint32_t in_port, out_port,
		 num_ports = p_node->node_info.num_ports;
	ib_net16_t base_lid = osm_port_get_base_lid(p_port);
	osm_physp_t *p_physp;
	ib_slvl_table_t *p_tbl;
	int i, n;
	char buf[1024];
	const char * header_line =	"#in out : 0  1  2  3  4  5  6  7  8  9  10 11 12 13 14 15";
	const char * separator_line = "#--------------------------------------------------------";

	if (!num_ports)
		return;

	fprintf(file, "%s 0x%016" PRIx64 ", base LID %d, "
		"\"%s\"\n%s\n%s\n",
		ib_get_node_type_str(p_node->node_info.node_type),
		cl_ntoh64(p_port->guid), cl_ntoh16(base_lid),
		p_node->print_desc, header_line, separator_line);

	if (p_node->node_info.node_type == IB_NODE_TYPE_SWITCH) {
		for (out_port = 0; out_port <= num_ports; out_port++){
			p_physp = osm_node_get_physp_ptr(p_node, out_port);

			/* no need to print SL2VL table for port that is down */
			if (!p_physp || !p_physp->p_remote_physp)
				continue;

			for (in_port = 0; in_port <= num_ports; in_port++) {
				p_tbl = osm_physp_get_slvl_tbl(p_physp, in_port);
				for (i = 0, n = 0; i < 16; i++)
					n += sprintf(buf + n, " %-2d",
						ib_slvl_table_get(p_tbl, i));
				fprintf(file, "%-3d %-3d :%s\n",
					in_port, out_port, buf);
			}
		}
	} else {
		p_physp = p_port->p_physp;
		p_tbl = osm_physp_get_slvl_tbl(p_physp, 0);
		for (i = 0, n = 0; i < 16; i++)
			n += sprintf(buf + n, " %-2d",
					ib_slvl_table_get(p_tbl, i));
		fprintf(file, "%-3d %-3d :%s\n", 0, 0, buf);
	}

	fprintf(file, "%s\n\n", separator_line);
}
static ib_api_status_t smir_rcv_new_smir(IN osm_sa_t * sa,
					 IN const osm_port_t * p_port,
					 IN cl_qlist_t * p_list,
					 IN ib_net64_t const guid,
					 IN ib_net32_t const act_count,
					 IN uint8_t const pri_state,
					 IN const osm_physp_t * p_req_physp)
{
	osm_sa_item_t *p_rec_item;
	ib_api_status_t status = IB_SUCCESS;

	OSM_LOG_ENTER(sa->p_log);

	p_rec_item = malloc(SA_SMIR_RESP_SIZE);
	if (p_rec_item == NULL) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2801: "
			"rec_item alloc failed\n");
		status = IB_INSUFFICIENT_RESOURCES;
		goto Exit;
	}

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
		"New SMInfo: GUID 0x%016" PRIx64 "\n", cl_ntoh64(guid));

	memset(p_rec_item, 0, SA_SMIR_RESP_SIZE);

	p_rec_item->resp.sminfo_rec.lid = osm_port_get_base_lid(p_port);
	p_rec_item->resp.sminfo_rec.sm_info.guid = guid;
	p_rec_item->resp.sminfo_rec.sm_info.act_count = act_count;
	p_rec_item->resp.sminfo_rec.sm_info.pri_state = pri_state;

	cl_qlist_insert_tail(p_list, &p_rec_item->list_item);

Exit:
	OSM_LOG_EXIT(sa->p_log);
	return status;
}
static void mpr_rcv_get_apm_paths(IN osm_sa_t * sa,
				  IN const ib_multipath_rec_t * p_mpr,
				  IN const osm_port_t * p_req_port,
				  IN osm_alias_guid_t ** _pp_alias_guids,
				  IN const ib_net64_t comp_mask,
				  IN cl_qlist_t * p_list)
{
	osm_alias_guid_t *pp_alias_guids[4];
	osm_sa_item_t *matrix[2][2];
	int base_offs, src_lid_ho, dest_lid_ho;
	int sumA, sumB, minA, minB;

	OSM_LOG_ENTER(sa->p_log);

	/*
	 * We want to:
	 *    1. use different lid offsets (from base) for the resultant paths
	 *    to increase the probability of redundant paths or in case
	 *    of Clos - to ensure it (different offset => different spine!)
	 *    2. keep consistent paths no matter of direction and order of ports
	 *    3. distibute the lid offsets to balance the load
	 * So, we sort the ports (within the srcs, and within the dests),
	 * hash the lids of S0, D0 (after the sort), and call mpr_rcv_get_apm_port_pair_paths
	 * with base_lid for S0, D0 and base_lid + 1 for S1, D1. This way we will get
	 * always the same offsets - order independent, and make sure different spines are used.
	 * Note that the diagonals on a Clos have the same number of hops, so it doesn't
	 * really matter which diagonal we use.
	 */
	if (_pp_alias_guids[0]->p_base_port->guid <
	    _pp_alias_guids[1]->p_base_port->guid) {
		pp_alias_guids[0] = _pp_alias_guids[0];
		pp_alias_guids[1] = _pp_alias_guids[1];
	} else {
		pp_alias_guids[0] = _pp_alias_guids[1];
		pp_alias_guids[1] = _pp_alias_guids[0];
	}
	if (_pp_alias_guids[2]->p_base_port->guid <
	    _pp_alias_guids[3]->p_base_port->guid) {
		pp_alias_guids[2] = _pp_alias_guids[2];
		pp_alias_guids[3] = _pp_alias_guids[3];
	} else {
		pp_alias_guids[2] = _pp_alias_guids[3];
		pp_alias_guids[3] = _pp_alias_guids[2];
	}

	src_lid_ho = osm_port_get_base_lid(pp_alias_guids[0]->p_base_port);
	dest_lid_ho = osm_port_get_base_lid(pp_alias_guids[2]->p_base_port);

	base_offs = src_lid_ho < dest_lid_ho ?
	    hash_lids(src_lid_ho, dest_lid_ho, sa->p_subn->opt.lmc) :
	    hash_lids(dest_lid_ho, src_lid_ho, sa->p_subn->opt.lmc);

	matrix[0][0] =
	    mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[0],
					    pp_alias_guids[2], base_offs,
					    comp_mask, p_list);
	matrix[0][1] =
	    mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[0],
					    pp_alias_guids[3], base_offs,
					    comp_mask, p_list);
	matrix[1][0] =
	    mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[1],
					    pp_alias_guids[2], base_offs + 1,
					    comp_mask, p_list);
	matrix[1][1] =
	    mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[1],
					    pp_alias_guids[3], base_offs + 1,
					    comp_mask, p_list);

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "APM matrix:\n"
		"\t{0,0} 0x%X->0x%X (%d)\t| {0,1} 0x%X->0x%X (%d)\n"
		"\t{1,0} 0x%X->0x%X (%d)\t| {1,1} 0x%X->0x%X (%d)\n",
		matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.slid : 0,
		matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.dlid : 0,
		matrix[0][0] ? matrix[0][0]->resp.mpr_rec.hops : 0,
		matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.slid : 0,
		matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.dlid : 0,
		matrix[0][1] ? matrix[0][1]->resp.mpr_rec.hops : 0,
		matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.slid : 0,
		matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.dlid : 0,
		matrix[1][0] ? matrix[1][0]->resp.mpr_rec.hops : 0,
		matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.slid : 0,
		matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.dlid : 0,
		matrix[1][1] ? matrix[1][1]->resp.mpr_rec.hops : 0);

	sumA = minA = sumB = minB = 0;

	/* check diagonal A {(0,0), (1,1)} */
	if (matrix[0][0]) {
		sumA += matrix[0][0]->resp.mpr_rec.hops;
		minA = matrix[0][0]->resp.mpr_rec.hops;
	}
	if (matrix[1][1]) {
		sumA += matrix[1][1]->resp.mpr_rec.hops;
		if (minA)
			minA = min(minA, matrix[1][1]->resp.mpr_rec.hops);
		else
			minA = matrix[1][1]->resp.mpr_rec.hops;
	}

	/* check diagonal B {(0,1), (1,0)} */
	if (matrix[0][1]) {
		sumB += matrix[0][1]->resp.mpr_rec.hops;
		minB = matrix[0][1]->resp.mpr_rec.hops;
	}
	if (matrix[1][0]) {
		sumB += matrix[1][0]->resp.mpr_rec.hops;
		if (minB)
			minB = min(minB, matrix[1][0]->resp.mpr_rec.hops);
		else
			minB = matrix[1][0]->resp.mpr_rec.hops;
	}

	/* and the winner is... */
	if (minA <= minB || (minA == minB && sumA < sumB)) {
		/* Diag A */
		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
			"Diag {0,0} & {1,1} is the best:\n"
			"\t{0,0} 0x%X->0x%X (%d)\t & {1,1} 0x%X->0x%X (%d)\n",
			matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.slid : 0,
			matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.dlid : 0,
			matrix[0][0] ? matrix[0][0]->resp.mpr_rec.hops : 0,
			matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.slid : 0,
			matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.dlid : 0,
			matrix[1][1] ? matrix[1][1]->resp.mpr_rec.hops : 0);
		if (matrix[0][0])
			cl_qlist_insert_tail(p_list, &matrix[0][0]->list_item);
		if (matrix[1][1])
			cl_qlist_insert_tail(p_list, &matrix[1][1]->list_item);
		free(matrix[0][1]);
		free(matrix[1][0]);
	} else {
		/* Diag B */
		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
			"Diag {0,1} & {1,0} is the best:\n"
			"\t{0,1} 0x%X->0x%X (%d)\t & {1,0} 0x%X->0x%X (%d)\n",
			matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.slid : 0,
			matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.dlid : 0,
			matrix[0][1] ? matrix[0][1]->resp.mpr_rec.hops : 0,
			matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.slid : 0,
			matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.dlid: 0,
			matrix[1][0] ? matrix[1][0]->resp.mpr_rec.hops : 0);
		if (matrix[0][1])
			cl_qlist_insert_tail(p_list, &matrix[0][1]->list_item);
		if (matrix[1][0])
			cl_qlist_insert_tail(p_list, &matrix[1][0]->list_item);
		free(matrix[0][0]);
		free(matrix[1][1]);
	}

	OSM_LOG_EXIT(sa->p_log);
}
uint8_t osm_switch_recommend_mcast_path(IN osm_switch_t * p_sw,
					IN osm_port_t * p_port,
					IN uint16_t mlid_ho,
					IN boolean_t ignore_existing)
{
	uint16_t base_lid;
	uint8_t hops;
	uint8_t port_num;
	uint8_t num_ports;
	uint8_t least_hops;

	CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO);

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		base_lid = osm_port_get_base_lid(p_port);
	} else {
		osm_physp_t *p_physp = p_port->p_physp;
		if (!p_physp || !p_physp->p_remote_physp ||
		    !p_physp->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;
		if (p_physp->p_remote_physp->p_node->sw == p_sw)
			return p_physp->p_remote_physp->port_num;
		base_lid =
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
	}
	base_lid = cl_ntoh16(base_lid);
	num_ports = p_sw->num_ports;

	/*
	   If the user wants us to ignore existing multicast routes,
	   then simply return the shortest hop count path to the
	   target port.

	   Otherwise, return the first port that has a path to the target,
	   picking from the ports that are already in the multicast group.
	 */
	if (!ignore_existing) {
		for (port_num = 1; port_num < num_ports; port_num++) {
			if (!osm_mcast_tbl_is_port
			    (&p_sw->mcast_tbl, mlid_ho, port_num))
				continue;
			/*
			   Don't be too trusting of the current forwarding table!
			   Verify that the LID is reachable through this port.
			 */
			hops =
			    osm_switch_get_hop_count(p_sw, base_lid, port_num);
			if (hops != OSM_NO_PATH)
				return port_num;
		}
	}

	/*
	   Either no existing mcast paths reach this port or we are
	   ignoring existing paths.

	   Determine the best multicast path to the target.  Note that this
	   algorithm is slightly different from the one used for unicast route
	   recommendation.  In this case (multicast), we must NOT
	   perform any sort of load balancing.  We MUST take the FIRST
	   port found that has <= the lowest hop count path.  This prevents
	   more than one multicast path to the same remote switch which
	   prevents a multicast loop.  Multicast loops are bad since the same
	   multicast packet will go around and around, inevitably creating
	   a black hole that will destroy the Earth in a firey conflagration.
	 */
	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
	if (least_hops == OSM_NO_PATH)
		return OSM_NO_PATH;
	for (port_num = 1; port_num < num_ports; port_num++)
		if (osm_switch_get_hop_count(p_sw, base_lid, port_num) ==
		    least_hops)
			break;

	CL_ASSERT(port_num < num_ports);
	return port_num;
}
uint8_t osm_switch_recommend_path(IN const osm_switch_t * p_sw,
				  IN osm_port_t * p_port, IN uint16_t lid_ho,
				  IN unsigned start_from,
				  IN boolean_t ignore_existing,
				  IN boolean_t routing_for_lmc,
				  IN boolean_t dor,
				  IN boolean_t port_shifting,
				  IN uint32_t scatter_ports)
{
	/*
	   We support an enhanced LMC aware routing mode:
	   In the case of LMC > 0, we can track the remote side
	   system and node for all of the lids of the target
	   and try and avoid routing again through the same
	   system / node.

	   Assume if routing_for_lmc is true that this procedure was
	   provided the tracking array and counter via p_port->priv,
	   and we can conduct this algorithm.
	 */
	uint16_t base_lid;
	uint8_t hops;
	uint8_t least_hops;
	uint8_t port_num;
	uint8_t num_ports;
	uint32_t least_paths = 0xFFFFFFFF;
	unsigned i;
	/*
	   The follwing will track the least paths if the
	   route should go through a new system/node
	 */
	uint32_t least_paths_other_sys = 0xFFFFFFFF;
	uint32_t least_paths_other_nodes = 0xFFFFFFFF;
	uint32_t least_forwarded_to = 0xFFFFFFFF;
	uint32_t check_count;
	uint8_t best_port = 0;
	/*
	   These vars track the best port if it connects to
	   not used system/node.
	 */
	uint8_t best_port_other_sys = 0;
	uint8_t best_port_other_node = 0;
	boolean_t port_found = FALSE;
	osm_physp_t *p_physp;
	osm_physp_t *p_rem_physp;
	osm_node_t *p_rem_node;
	osm_node_t *p_rem_node_first = NULL;
	struct osm_remote_node *p_remote_guid = NULL;
	struct osm_remote_node null_remote_node = {NULL, 0, 0};
	struct switch_port_path port_paths[IB_NODE_NUM_PORTS_MAX];
	unsigned int port_paths_total_paths = 0;
	unsigned int port_paths_count = 0;
	uint8_t scatter_possible_ports[IB_NODE_NUM_PORTS_MAX];
	unsigned int scatter_possible_ports_count = 0;
	int found_sys_guid = 0;
	int found_node_guid = 0;

	CL_ASSERT(lid_ho > 0);

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		base_lid = osm_port_get_base_lid(p_port);
	} else {
		p_physp = p_port->p_physp;
		if (!p_physp || !p_physp->p_remote_physp ||
		    !p_physp->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;

		if (p_physp->p_remote_physp->p_node->sw == p_sw)
			return p_physp->p_remote_physp->port_num;
		base_lid =
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
	}
	base_lid = cl_ntoh16(base_lid);

	num_ports = p_sw->num_ports;

	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
	if (least_hops == OSM_NO_PATH)
		return OSM_NO_PATH;

	/*
	   First, inquire with the forwarding table for an existing
	   route.  If one is found, honor it unless:
	   1. the ignore existing flag is set.
	   2. the physical port is not a valid one or not healthy
	   3. the physical port has a remote port (the link is up)
	   4. the port has min-hops to the target (avoid loops)
	 */
	if (!ignore_existing) {
		port_num = osm_switch_get_port_by_lid(p_sw, lid_ho);

		if (port_num != OSM_NO_PATH) {
			CL_ASSERT(port_num < num_ports);

			p_physp =
			    osm_node_get_physp_ptr(p_sw->p_node, port_num);
			/*
			   Don't be too trusting of the current forwarding table!
			   Verify that the port number is legal and that the
			   LID is reachable through this port.
			 */
			if (p_physp && osm_physp_is_healthy(p_physp) &&
			    osm_physp_get_remote(p_physp)) {
				hops =
				    osm_switch_get_hop_count(p_sw, base_lid,
							     port_num);
				/*
				   If we aren't using pre-defined user routes
				   function, then we need to make sure that the
				   current path is the minimum one. In case of
				   having such a user function - this check will
				   not be done, and the old routing will be used.
				   Note: This means that it is the user's job to
				   clean all data in the forwarding tables that
				   he wants to be overridden by the minimum
				   hop function.
				 */
				if (hops == least_hops)
					return port_num;
			}
		}
	}

	/*
	   This algorithm selects a port based on a static load balanced
	   selection across equal hop-count ports.
	   There is lots of room for improved sophistication here,
	   possibly guided by user configuration info.
	 */

	/*
	   OpenSM routing is "local" - not considering a full lid to lid
	   path. As such we can not guarantee a path will not loop if we
	   do not always follow least hops.
	   So we must abort if not least hops.
	 */

	/* port number starts with one and num_ports is 1 + num phys ports */
	for (i = start_from; i < start_from + num_ports; i++) {
		port_num = osm_switch_get_dimn_port(p_sw, i % num_ports);
		if (!port_num ||
		    osm_switch_get_hop_count(p_sw, base_lid, port_num) !=
		    least_hops)
			continue;

		/* let us make sure it is not down or unhealthy */
		p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
		if (!p_physp || !osm_physp_is_healthy(p_physp) ||
		    /*
		       we require all - non sma ports to be linked
		       to be routed through
		     */
		    !osm_physp_get_remote(p_physp))
			continue;

		/*
		   We located a least-hop port, possibly one of many.
		   For this port, check the running total count of
		   the number of paths through this port.  Select
		   the port routing the least number of paths.
		 */
		check_count =
		    osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);


		if (dor) {
			/* Get the Remote Node */
			p_rem_physp = osm_physp_get_remote(p_physp);
			p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
			/* use the first dimension, but spread traffic
			 * out among the group of ports representing
			 * that dimension */
			if (!p_rem_node_first)
				p_rem_node_first = p_rem_node;
			else if (p_rem_node != p_rem_node_first)
				continue;
			if (routing_for_lmc) {
				struct osm_remote_guids_count *r = p_port->priv;
				uint8_t rem_port = osm_physp_get_port_num(p_rem_physp);
				unsigned int j;

				for (j = 0; j < r->count; j++) {
					p_remote_guid = &r->guids[j];
					if ((p_remote_guid->node == p_rem_node)
					    && (p_remote_guid->port == rem_port))
						break;
				}
				if (j == r->count)
					p_remote_guid = &null_remote_node;
			}
		/*
		   Advanced LMC routing requires tracking of the
		   best port by the node connected to the other side of
		   it.
		 */
		} else if (routing_for_lmc) {
			/* Is the sys guid already used ? */
			p_remote_guid = switch_find_sys_guid_count(p_sw,
								   p_port->priv,
								   port_num);

			/* If not update the least hops for this case */
			if (!p_remote_guid) {
				if (check_count < least_paths_other_sys) {
					least_paths_other_sys = check_count;
					best_port_other_sys = port_num;
					least_forwarded_to = 0;
				}
				found_sys_guid = 0;
			} else {	/* same sys found - try node */


				/* Else is the node guid already used ? */
				p_remote_guid = switch_find_node_guid_count(p_sw,
									    p_port->priv,
									    port_num);

				/* If not update the least hops for this case */
				if (!p_remote_guid
				    && check_count < least_paths_other_nodes) {
					least_paths_other_nodes = check_count;
					best_port_other_node = port_num;
					least_forwarded_to = 0;
				}
				/* else prior sys and node guid already used */

				if (!p_remote_guid)
					found_node_guid = 0;
				else
					found_node_guid = 1;
				found_sys_guid = 1;
			}	/* same sys found */
		}

		port_paths[port_paths_count].port_num = port_num;
		port_paths[port_paths_count].path_count = check_count;
		if (routing_for_lmc) {
			port_paths[port_paths_count].found_sys_guid = found_sys_guid;
			port_paths[port_paths_count].found_node_guid = found_node_guid;
		}
		if (routing_for_lmc && p_remote_guid)
			port_paths[port_paths_count].forwarded_to = p_remote_guid->forwarded_to;
		else
			port_paths[port_paths_count].forwarded_to = 0;
		port_paths_total_paths += check_count;
		port_paths_count++;

		/* routing for LMC mode */
		/*
		   the count is min but also lower then the max subscribed
		 */
		if (check_count < least_paths) {
			port_found = TRUE;
			best_port = port_num;
			least_paths = check_count;
			scatter_possible_ports_count = 0;
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
			if (routing_for_lmc
			    && p_remote_guid
			    && p_remote_guid->forwarded_to < least_forwarded_to)
				least_forwarded_to = p_remote_guid->forwarded_to;
		} else if (scatter_ports
			   && check_count == least_paths) {
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
		} else if (routing_for_lmc
			   && p_remote_guid
			   && check_count == least_paths
			   && p_remote_guid->forwarded_to < least_forwarded_to) {
			least_forwarded_to = p_remote_guid->forwarded_to;
			best_port = port_num;
		}
	}

	if (port_found == FALSE)
		return OSM_NO_PATH;

	if (port_shifting && port_paths_count) {
		/* In the port_paths[] array, we now have all the ports that we
		 * can route out of.  Using some shifting math below, possibly
		 * select a different one so that lids won't align in LFTs
		 *
		 * If lmc > 0, we need to loop through these ports to find the
		 * least_forwarded_to port, best_port_other_sys, and
		 * best_port_other_node just like before but through the different
		 * ordering.
		 */

		least_paths = 0xFFFFFFFF;
		least_paths_other_sys = 0xFFFFFFFF;
		least_paths_other_nodes = 0xFFFFFFFF;
	        least_forwarded_to = 0xFFFFFFFF;
		best_port = 0;
		best_port_other_sys = 0;
		best_port_other_node = 0;

		for (i = 0; i < port_paths_count; i++) {
			unsigned int idx;

			idx = (port_paths_total_paths/port_paths_count + i) % port_paths_count;

			if (routing_for_lmc) {
				if (!port_paths[idx].found_sys_guid
				    && port_paths[idx].path_count < least_paths_other_sys) {
					least_paths_other_sys = port_paths[idx].path_count;
					best_port_other_sys = port_paths[idx].port_num;
					least_forwarded_to = 0;
				}
				else if (!port_paths[idx].found_node_guid
					 && port_paths[idx].path_count < least_paths_other_nodes) {
					least_paths_other_nodes = port_paths[idx].path_count;
					best_port_other_node = port_paths[idx].port_num;
					least_forwarded_to = 0;
				}
			}

			if (port_paths[idx].path_count < least_paths) {
				best_port = port_paths[idx].port_num;
				least_paths = port_paths[idx].path_count;
				if (routing_for_lmc
				    && (port_paths[idx].found_sys_guid
					|| port_paths[idx].found_node_guid)
				    && port_paths[idx].forwarded_to < least_forwarded_to)
					least_forwarded_to = port_paths[idx].forwarded_to;
			}
			else if (routing_for_lmc
				 && (port_paths[idx].found_sys_guid
				     || port_paths[idx].found_node_guid)
				 && port_paths[idx].path_count == least_paths
				 && port_paths[idx].forwarded_to < least_forwarded_to) {
				least_forwarded_to = port_paths[idx].forwarded_to;
				best_port = port_paths[idx].port_num;
			}

		}
	}

	/*
	   if we are in enhanced routing mode and the best port is not
	   the local port 0
	 */
	if (routing_for_lmc && best_port && !scatter_ports) {
		/* Select the least hop port of the non used sys first */
		if (best_port_other_sys)
			best_port = best_port_other_sys;
		else if (best_port_other_node)
			best_port = best_port_other_node;
	} else if (scatter_ports) {
		/*
		 * There is some danger that this random could "rebalance" the routes
		 * every time, to combat this there is a global srandom that
		 * occurs at the start of every sweep.
		 */
		unsigned int idx = random() % scatter_possible_ports_count;
		best_port = scatter_possible_ports[idx];
	}
	return best_port;
}
Example #7
0
uint8_t
osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
			  IN osm_port_t * p_port,
			  IN const uint16_t lid_ho,
			  IN unsigned start_from,
			  IN const boolean_t ignore_existing,
			  IN const boolean_t dor)
{
	/*
	   We support an enhanced LMC aware routing mode:
	   In the case of LMC > 0, we can track the remote side
	   system and node for all of the lids of the target
	   and try and avoid routing again through the same
	   system / node.

	   If this procedure is provided with the tracking array
	   and counter we can conduct this algorithm.
	 */
	boolean_t routing_for_lmc = (p_port->priv != NULL);
	uint16_t base_lid;
	uint8_t hops;
	uint8_t least_hops;
	uint8_t port_num;
	uint8_t num_ports;
	uint32_t least_paths = 0xFFFFFFFF;
	unsigned i;
	/*
	   The follwing will track the least paths if the
	   route should go through a new system/node
	 */
	uint32_t least_paths_other_sys = 0xFFFFFFFF;
	uint32_t least_paths_other_nodes = 0xFFFFFFFF;
	uint32_t least_forwarded_to = 0xFFFFFFFF;
	uint32_t check_count;
	uint8_t best_port = 0;
	/*
	   These vars track the best port if it connects to
	   not used system/node.
	 */
	uint8_t best_port_other_sys = 0;
	uint8_t best_port_other_node = 0;
	boolean_t port_found = FALSE;
	osm_physp_t *p_physp;
	osm_physp_t *p_rem_physp;
	osm_node_t *p_rem_node;
	osm_node_t *p_rem_node_first = NULL;
	struct osm_remote_node *p_remote_guid = NULL;

	CL_ASSERT(lid_ho > 0);

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		base_lid = osm_port_get_base_lid(p_port);
	} else {
		p_physp = p_port->p_physp;
		if (!p_physp || !p_physp->p_remote_physp ||
		    !p_physp->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;

		if (p_physp->p_remote_physp->p_node->sw == p_sw)
			return p_physp->p_remote_physp->port_num;
		base_lid =
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
	}
	base_lid = cl_ntoh16(base_lid);

	num_ports = p_sw->num_ports;

	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
	if (least_hops == OSM_NO_PATH)
		return (OSM_NO_PATH);

	/*
	   First, inquire with the forwarding table for an existing
	   route.  If one is found, honor it unless:
	   1. the ignore existing flag is set.
	   2. the physical port is not a valid one or not healthy
	   3. the physical port has a remote port (the link is up)
	   4. the port has min-hops to the target (avoid loops)
	 */
	if (!ignore_existing) {
		port_num = osm_switch_get_port_by_lid(p_sw, lid_ho);

		if (port_num != OSM_NO_PATH) {
			CL_ASSERT(port_num < num_ports);

			p_physp =
			    osm_node_get_physp_ptr(p_sw->p_node, port_num);
			/*
			   Don't be too trusting of the current forwarding table!
			   Verify that the port number is legal and that the
			   LID is reachable through this port.
			 */
			if (p_physp && osm_physp_is_healthy(p_physp) &&
			    osm_physp_get_remote(p_physp)) {
				hops =
				    osm_switch_get_hop_count(p_sw, base_lid,
							     port_num);
				/*
				   If we aren't using pre-defined user routes
				   function, then we need to make sure that the
				   current path is the minimum one. In case of
				   having such a user function - this check will
				   not be done, and the old routing will be used.
				   Note: This means that it is the user's job to
				   clean all data in the forwarding tables that
				   he wants to be overridden by the minimum
				   hop function.
				 */
				if (hops == least_hops)
					return (port_num);
			}
		}
	}

	/*
	   This algorithm selects a port based on a static load balanced
	   selection across equal hop-count ports.
	   There is lots of room for improved sophistication here,
	   possibly guided by user configuration info.
	 */

	/*
	   OpenSM routing is "local" - not considering a full lid to lid
	   path. As such we can not guarantee a path will not loop if we
	   do not always follow least hops.
	   So we must abort if not least hops.
	 */

	/* port number starts with one and num_ports is 1 + num phys ports */
	for (i = start_from; i < start_from + num_ports; i++) {
		port_num = i%num_ports;
		if (!port_num ||
		    osm_switch_get_hop_count(p_sw, base_lid, port_num) !=
		    least_hops)
			continue;

		/* let us make sure it is not down or unhealthy */
		p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
		if (!p_physp || !osm_physp_is_healthy(p_physp) ||
		    /*
		       we require all - non sma ports to be linked
		       to be routed through
		     */
		    !osm_physp_get_remote(p_physp))
			continue;

		/*
		   We located a least-hop port, possibly one of many.
		   For this port, check the running total count of
		   the number of paths through this port.  Select
		   the port routing the least number of paths.
		 */
		check_count =
		    osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);

		/*
		   Advanced LMC routing requires tracking of the
		   best port by the node connected to the other side of
		   it.
		 */
		if (routing_for_lmc) {
			/* Is the sys guid already used ? */
			p_remote_guid = osm_switch_find_sys_guid_count(p_sw,
								       p_port->priv,
								       port_num);

			/* If not update the least hops for this case */
			if (!p_remote_guid) {
				if (check_count < least_paths_other_sys) {
					least_paths_other_sys = check_count;
					best_port_other_sys = port_num;
					least_forwarded_to = 0;
				}
			} else {	/* same sys found - try node */
				/* Else is the node guid already used ? */
				p_remote_guid = osm_switch_find_node_guid_count(p_sw,
										p_port->priv,
										port_num);

				/* If not update the least hops for this case */
				if (!p_remote_guid
				    && check_count < least_paths_other_nodes) {
					least_paths_other_nodes = check_count;
					best_port_other_node = port_num;
					least_forwarded_to = 0;
				}
				/* else prior sys and node guid already used */

			}	/* same sys found */
		}

		/* routing for LMC mode */
		/*
		   the count is min but also lower then the max subscribed
		 */
		if (check_count < least_paths) {
			if (dor) {
				/* Get the Remote Node */
				p_rem_physp = osm_physp_get_remote(p_physp);
				p_rem_node =
				    osm_physp_get_node_ptr(p_rem_physp);
				/* use the first dimension, but spread
				 * traffic out among the group of ports
				 * representing that dimension */
				if (port_found) {
					if (p_rem_node != p_rem_node_first)
						continue;
				} else
					p_rem_node_first = p_rem_node;
			}
			port_found = TRUE;
			best_port = port_num;
			least_paths = check_count;
			if (routing_for_lmc
			    && p_remote_guid
			    && p_remote_guid->forwarded_to < least_forwarded_to)
				least_forwarded_to = p_remote_guid->forwarded_to;
		} else if (routing_for_lmc
			   && p_remote_guid
			   && check_count == least_paths
			   && p_remote_guid->forwarded_to < least_forwarded_to) {
			least_forwarded_to = p_remote_guid->forwarded_to;
			best_port = port_num;
		}
	}

	if (port_found == FALSE)
		return (OSM_NO_PATH);

	/*
	   if we are in enhanced routing mode and the best port is not
	   the local port 0
	 */
	if (routing_for_lmc && best_port) {
		/* Select the least hop port of the non used sys first */
		if (best_port_other_sys)
			best_port = best_port_other_sys;
		else if (best_port_other_node)
			best_port = best_port_other_node;
	}

	return (best_port);
}
Example #8
0
static void lftr_rcv_by_comp_mask(IN cl_map_item_t * p_map_item, IN void *cxt)
{
	const osm_lftr_search_ctxt_t *p_ctxt = cxt;
	const osm_switch_t *p_sw = (osm_switch_t *) p_map_item;
	const ib_lft_record_t *const p_rcvd_rec = p_ctxt->p_rcvd_rec;
	osm_sa_t *sa = p_ctxt->sa;
	ib_net64_t const comp_mask = p_ctxt->comp_mask;
	const osm_physp_t *const p_req_physp = p_ctxt->p_req_physp;
	osm_port_t *p_port;
	uint16_t min_lid_ho, max_lid_ho;
	uint16_t min_block, max_block, block;
	const osm_physp_t *p_physp;

	/* In switches, the port guid is the node guid. */
	p_port = osm_get_port_by_guid(sa->p_subn,
				      p_sw->p_node->node_info.port_guid);
	if (!p_port) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4405: "
			"Failed to find Port by Node Guid:0x%016" PRIx64
			"\n", cl_ntoh64(p_sw->p_node->node_info.node_guid));
		return;
	}

	/* check that the requester physp and the current physp are under
	   the same partition. */
	p_physp = p_port->p_physp;
	if (!p_physp) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4406: "
			"Failed to find default physical Port by Node Guid:0x%016"
			PRIx64 "\n",
			cl_ntoh64(p_sw->p_node->node_info.node_guid));
		return;
	}
	if (!osm_physp_share_pkey(sa->p_log, p_req_physp,
				  p_physp, sa->p_subn->opt.allow_both_pkeys))
		return;

	/* get the port 0 of the switch */
	osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);

	/* compare the lids - if required */
	if (comp_mask & IB_LFTR_COMPMASK_LID) {
		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
			"Comparing lid:%u to port lid range: %u .. %u\n",
			cl_ntoh16(p_rcvd_rec->lid), min_lid_ho, max_lid_ho);
		/* ok we are ready for range check */
		if (min_lid_ho > cl_ntoh16(p_rcvd_rec->lid) ||
		    max_lid_ho < cl_ntoh16(p_rcvd_rec->lid))
			return;
	}

	/* now we need to decide which blocks to output */
	max_block = osm_switch_get_max_block_id_in_use(p_sw);
	if (comp_mask & IB_LFTR_COMPMASK_BLOCK) {
		min_block = cl_ntoh16(p_rcvd_rec->block_num);
		if (min_block > max_block)
			return;
		max_block = min_block;
	} else			/* use as many blocks as "in use" */
		min_block = 0;

	/* so we can add these blocks one by one ... */
	for (block = min_block; block <= max_block; block++)
		lftr_rcv_new_lftr(sa, p_sw, p_ctxt->p_list,
				  osm_port_get_base_lid(p_port), block);
}