Esempio n. 1
0
/**********************************************************************
 Add each switch's own and neighbor LIDs to its LID matrix
**********************************************************************/
static void
__osm_ucast_mgr_process_hop_0_1(IN cl_map_item_t * const p_map_item,
				IN void *context)
{
	osm_switch_t *const p_sw = (osm_switch_t *) p_map_item;
	osm_node_t *p_remote_node;
	uint16_t lid, remote_lid;
	uint8_t i, remote_port;

	lid = osm_node_get_base_lid(p_sw->p_node, 0);
	lid = cl_ntoh16(lid);
	osm_switch_set_hops(p_sw, lid, 0, 0);

	for (i = 1; i < p_sw->num_ports; i++) {
		p_remote_node =
		    osm_node_get_remote_node(p_sw->p_node, i, &remote_port);

		if (p_remote_node && p_remote_node->sw &&
		    (p_remote_node != p_sw->p_node)) {
			remote_lid = osm_node_get_base_lid(p_remote_node, 0);
			remote_lid = cl_ntoh16(remote_lid);
			osm_switch_set_hops(p_sw, remote_lid, i, 1);
			osm_switch_set_hops(p_remote_node->sw, lid, remote_port,
					    1);
		}
	}
}
Esempio n. 2
0
/** ===========================================================================
 */
static void extract_lft(osm_switch_t *p_sw, uint64_t *p_top_offset,
			uint64_t *p_block_offset)
{
	struct ep_map_rec *p_map_rec;
	uint64_t rec_key;
	uint16_t max_block, lid_ho, i;

	max_block = p_sw->lft_size / IB_SMP_DATA_SIZE;
	lid_ho = ntohs(osm_node_get_base_lid(p_sw->p_node, 0));
	rec_key = (uint64_t) lid_ho;

	smdb_lft_top_init(lid_ho, p_sw->lft_size,
			  &ssa_db->p_lft_db->p_db_lft_top_tbl[*p_top_offset]);
	p_map_rec = ep_map_rec_init(*p_top_offset);
	cl_qmap_insert(&ssa_db->p_lft_db->ep_db_lft_top_tbl,
		       rec_key, &p_map_rec->map_item);
	*p_top_offset = *p_top_offset + 1;

	for (i = 0; i < max_block; i++) {
		rec_key = ep_rec_gen_key(lid_ho, i);
		smdb_lft_block_init(p_sw, lid_ho, i,
				    &ssa_db->p_lft_db->p_db_lft_block_tbl[*p_block_offset]);

		p_map_rec = ep_map_rec_init(*p_block_offset);
		cl_qmap_insert(&ssa_db->p_lft_db->ep_db_lft_block_tbl,
			       rec_key, &p_map_rec->map_item);
		*p_block_offset = *p_block_offset + 1;
	}
}
Esempio n. 3
0
uint8_t osm_switch_get_port_least_hops(IN const osm_switch_t * p_sw,
				       IN const osm_port_t * p_port)
{
	uint16_t lid;

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		lid = osm_node_get_base_lid(p_port->p_node, 0);
		return osm_switch_get_least_hops(p_sw, cl_ntoh16(lid));
	} else {
		osm_physp_t *p = p_port->p_physp;
		uint8_t hops;

		if (!p || !p->p_remote_physp || !p->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;
		if (p->p_remote_physp->p_node->sw == p_sw)
			return 1;
		lid = osm_node_get_base_lid(p->p_remote_physp->p_node, 0);
		hops = osm_switch_get_least_hops(p_sw, cl_ntoh16(lid));
		return hops != OSM_NO_PATH ? hops + 1 : OSM_NO_PATH;
	}
}
Esempio n. 4
0
/**********************************************************************
 Add each switch's own and neighbor LIDs to its LID matrix
**********************************************************************/
static void ucast_mgr_process_hop_0_1(IN cl_map_item_t * p_map_item,
				      IN void *context)
{
	osm_switch_t * p_sw = (osm_switch_t *) p_map_item;
	osm_node_t *p_remote_node;
	uint16_t lid, remote_lid;
	uint8_t i;

	lid = cl_ntoh16(osm_node_get_base_lid(p_sw->p_node, 0));
	osm_switch_set_hops(p_sw, lid, 0, 0);

	for (i = 1; i < p_sw->num_ports; i++) {
		osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, i);
		p_remote_node = (p && p->p_remote_physp) ?
		    p->p_remote_physp->p_node : NULL;

		if (p_remote_node && p_remote_node->sw &&
		    p_remote_node != p_sw->p_node) {
			remote_lid = osm_node_get_base_lid(p_remote_node, 0);
			remote_lid = cl_ntoh16(remote_lid);
			osm_switch_set_hops(p_sw, remote_lid, i, p->hop_wf);
		}
	}
}
Esempio n. 5
0
/** =========================================================================
 */
void ep_link_tbl_rec_init(osm_physp_t *p_physp, struct ep_link_tbl_rec *p_rec)
{
	osm_physp_t *p_remote_physp;

	if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH) {
		p_rec->from_lid = osm_node_get_base_lid(p_physp->p_node, 0);
		p_rec->from_port_num = osm_physp_get_port_num(p_physp);
	} else {
		p_rec->from_lid = osm_physp_get_base_lid(p_physp);
		p_rec->from_port_num = 0;
	}

	p_remote_physp = osm_physp_get_remote(p_physp);

	if (osm_node_get_type(p_remote_physp->p_node) ==
						IB_NODE_TYPE_SWITCH) {
		p_rec->to_lid = osm_node_get_base_lid(p_remote_physp->p_node, 0);
		p_rec->to_port_num =osm_physp_get_port_num(p_remote_physp);
	} else {
		p_rec->to_lid = osm_physp_get_base_lid(p_remote_physp);
		p_rec->to_port_num = 0;
	}
	memset(&p_rec->pad, 0, sizeof(p_rec->pad));
}
static void sa_pkey_create(IN osm_sa_t * sa, IN osm_physp_t * p_physp,
			   IN osm_pkey_search_ctxt_t * p_ctxt,
			   IN uint16_t block)
{
	osm_pkey_item_t *p_rec_item;
	uint16_t lid;
	ib_pkey_table_t *tbl;

	OSM_LOG_ENTER(sa->p_log);

	p_rec_item = malloc(sizeof(*p_rec_item));
	if (p_rec_item == NULL) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4602: "
			"rec_item alloc failed\n");
		goto Exit;
	}

	if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH)
		lid = p_physp->port_info.base_lid;
	else
		lid = osm_node_get_base_lid(p_physp->p_node, 0);

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
		"New P_Key table for: port 0x%016" PRIx64
		", lid %u, port %u Block:%u\n",
		cl_ntoh64(osm_physp_get_port_guid(p_physp)),
		cl_ntoh16(lid), osm_physp_get_port_num(p_physp), block);

	memset(p_rec_item, 0, sizeof(*p_rec_item));

	p_rec_item->rec.lid = lid;
	p_rec_item->rec.block_num = block;
	p_rec_item->rec.port_num = osm_physp_get_port_num(p_physp);
	/* FIXME: There are ninf.PartitionCap or swinf.PartitionEnforcementCap
	   pkey entries so everything in that range is a valid block number
	   even if opensm is not using it. Return 0. However things outside
	   that range should return no entries.. Not sure how to figure that
	   here? The range of pkey_tbl can be less than the cap, so
	   this falsely triggers. */
	tbl = osm_pkey_tbl_block_get(osm_physp_get_pkey_tbl(p_physp), block);
	if (tbl)
		p_rec_item->rec.pkey_tbl = *tbl;

	cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item);

Exit:
	OSM_LOG_EXIT(sa->p_log);
}
Esempio n. 7
0
static void ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * p_mgr,
				       IN osm_switch_t * p_this_sw,
				       IN osm_switch_t * p_remote_sw,
				       IN uint8_t port_num,
				       IN uint8_t remote_port_num)
{
	osm_switch_t *p_sw;
	cl_map_item_t *item;
	uint16_t lid_ho;
	uint16_t hops;
	osm_physp_t *p;

	OSM_LOG_ENTER(p_mgr->p_log);

	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
		"Node 0x%" PRIx64 ", remote node 0x%" PRIx64
		", port %u, remote port %u\n",
		cl_ntoh64(osm_node_get_node_guid(p_this_sw->p_node)),
		cl_ntoh64(osm_node_get_node_guid(p_remote_sw->p_node)),
		port_num, remote_port_num);

	p = osm_node_get_physp_ptr(p_this_sw->p_node, port_num);

	for (item = cl_qmap_head(&p_mgr->p_subn->sw_guid_tbl);
	     item != cl_qmap_end(&p_mgr->p_subn->sw_guid_tbl);
	     item = cl_qmap_next(item)) {
		p_sw = (osm_switch_t *) item;
		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_sw->p_node, 0));
		hops = osm_switch_get_least_hops(p_remote_sw, lid_ho);
		if (hops == OSM_NO_PATH)
			continue;
		hops += p->hop_wf;
		if (hops <
		    osm_switch_get_hop_count(p_this_sw, lid_ho, port_num)) {
			if (osm_switch_set_hops
			    (p_this_sw, lid_ho, port_num, (uint8_t) hops) != 0)
				OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A03: "
					"cannot set hops for lid %u at switch 0x%"
					PRIx64 "\n", lid_ho,
					cl_ntoh64(osm_node_get_node_guid
						  (p_this_sw->p_node)));
			p_mgr->some_hop_count_set = TRUE;
		}
	}

	OSM_LOG_EXIT(p_mgr->p_log);
}
static void cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p,
			      uint16_t remote_lid_ho, boolean_t is_ca)
{
	cache_switch_t *p_cache_sw;
	uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0));

	OSM_LOG_ENTER(p_mgr->p_log);

	if (!lid_ho || !remote_lid_ho || !p->port_num)
		goto Exit;

	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
		"Caching switch port: lid %u [port %u] -> lid %u (%s)\n",
		lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW");

	p_cache_sw = cache_get_sw(p_mgr, lid_ho);
	if (!p_cache_sw) {
		p_cache_sw = cache_sw_new(lid_ho, p->p_node->sw->num_ports);
		if (!p_cache_sw) {
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
				"ERR AD01: Out of memory - cache is invalid\n");
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}
		cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho,
			       &p_cache_sw->map_item);
	}

	if (p->port_num >= p_cache_sw->num_ports) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
			"ERR AD02: Wrong switch? - cache is invalid\n");
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	if (is_ca)
		cache_sw_set_leaf(p_cache_sw);

	if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) {
		/* cache this link only if it hasn't been already cached */
		p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho;
		p_cache_sw->ports[p->port_num].is_leaf = is_ca;
	}
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}
Esempio n. 9
0
static void
__osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr,
				 IN osm_switch_t * const p_this_sw,
				 IN osm_switch_t * const p_remote_sw,
				 IN const uint8_t port_num,
				 IN const uint8_t remote_port_num)
{
	osm_switch_t *p_sw, *p_next_sw;
	uint16_t lid_ho;
	uint8_t hops;

	OSM_LOG_ENTER(p_mgr->p_log);

	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
		"Node 0x%" PRIx64 ", remote node 0x%" PRIx64
		", port %u, remote port %u\n",
		cl_ntoh64(osm_node_get_node_guid(p_this_sw->p_node)),
		cl_ntoh64(osm_node_get_node_guid(p_remote_sw->p_node)),
		port_num, remote_port_num);

	p_next_sw = (osm_switch_t *) cl_qmap_head(&p_mgr->p_subn->sw_guid_tbl);
	while (p_next_sw !=
	       (osm_switch_t *) cl_qmap_end(&p_mgr->p_subn->sw_guid_tbl)) {
		p_sw = p_next_sw;
		p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
		lid_ho = osm_node_get_base_lid(p_sw->p_node, 0);
		lid_ho = cl_ntoh16(lid_ho);
		hops = osm_switch_get_least_hops(p_remote_sw, lid_ho);
		if (hops == OSM_NO_PATH)
			continue;
		hops++;
		if (hops <
		    osm_switch_get_hop_count(p_this_sw, lid_ho, port_num)) {
			if (osm_switch_set_hops
			    (p_this_sw, lid_ho, port_num, hops) != 0)
				OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
					"cannot set hops for lid %u at switch 0x%"
					PRIx64 "\n", lid_ho,
					cl_ntoh64(osm_node_get_node_guid
						  (p_this_sw->p_node)));
			p_mgr->some_hop_count_set = TRUE;
		}
	}

	OSM_LOG_EXIT(p_mgr->p_log);
}
Esempio n. 10
0
static void
__osm_sa_slvl_create(IN osm_sa_t * sa,
		     IN const osm_physp_t * const p_physp,
		     IN osm_slvl_search_ctxt_t * const p_ctxt,
		     IN uint8_t in_port_idx)
{
	osm_slvl_item_t *p_rec_item;
	uint16_t lid;
	ib_api_status_t status = IB_SUCCESS;

	OSM_LOG_ENTER(sa->p_log);

	p_rec_item = malloc(sizeof(*p_rec_item));
	if (p_rec_item == NULL) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2602: "
			"rec_item alloc failed\n");
		status = IB_INSUFFICIENT_RESOURCES;
		goto Exit;
	}

	if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH)
		lid = p_physp->port_info.base_lid;
	else
		lid = osm_node_get_base_lid(p_physp->p_node, 0);

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
		"New SLtoVL Map for: OUT port 0x%016" PRIx64
		", lid 0x%X, port %u to In Port:%u\n",
		cl_ntoh64(osm_physp_get_port_guid(p_physp)),
		cl_ntoh16(lid), osm_physp_get_port_num(p_physp), in_port_idx);

	memset(p_rec_item, 0, sizeof(*p_rec_item));

	p_rec_item->rec.lid = lid;
	p_rec_item->rec.out_port_num = osm_physp_get_port_num(p_physp);
	p_rec_item->rec.in_port_num = in_port_idx;
	p_rec_item->rec.slvl_tbl =
	    *(osm_physp_get_slvl_tbl(p_physp, in_port_idx));

	cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item);

Exit:
	OSM_LOG_EXIT(sa->p_log);
}
Esempio n. 11
0
static void
__osm_sa_pkey_create(IN osm_sa_t * sa,
		     IN osm_physp_t * const p_physp,
		     IN osm_pkey_search_ctxt_t * const p_ctxt,
		     IN uint16_t block)
{
	osm_pkey_item_t *p_rec_item;
	uint16_t lid;
	ib_api_status_t status = IB_SUCCESS;

	OSM_LOG_ENTER(sa->p_log);

	p_rec_item = malloc(sizeof(*p_rec_item));
	if (p_rec_item == NULL) {
		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4602: "
			"rec_item alloc failed\n");
		status = IB_INSUFFICIENT_RESOURCES;
		goto Exit;
	}

	if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH)
		lid = p_physp->port_info.base_lid;
	else
		lid = osm_node_get_base_lid(p_physp->p_node, 0);

	OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
		"New P_Key table for: port 0x%016" PRIx64
		", lid %u, port %u Block:%u\n",
		cl_ntoh64(osm_physp_get_port_guid(p_physp)),
		cl_ntoh16(lid), osm_physp_get_port_num(p_physp), block);

	memset(p_rec_item, 0, sizeof(*p_rec_item));

	p_rec_item->rec.lid = lid;
	p_rec_item->rec.block_num = block;
	p_rec_item->rec.port_num = osm_physp_get_port_num(p_physp);
	p_rec_item->rec.pkey_tbl =
	    *(osm_pkey_tbl_block_get(osm_physp_get_pkey_tbl(p_physp), block));

	cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item);

Exit:
	OSM_LOG_EXIT(sa->p_log);
}
Esempio n. 12
0
static void dump_ucast_lfts(cl_map_item_t * item, FILE * file, void *cxt)
{
	osm_switch_t *p_sw = (osm_switch_t *) item;
	osm_opensm_t *p_osm = cxt;
	osm_node_t *p_node = p_sw->p_node;
	unsigned max_lid = p_sw->max_lid_ho;
	unsigned max_port = p_sw->num_ports;
	uint16_t lid;
	uint8_t port;

	fprintf(file, "Unicast lids [0-%u] of switch Lid %u guid 0x%016"
		PRIx64 " (\'%s\'):\n",
		max_lid, cl_ntoh16(osm_node_get_base_lid(p_node, 0)),
		cl_ntoh64(osm_node_get_node_guid(p_node)), p_node->print_desc);
	for (lid = 0; lid <= max_lid; lid++) {
		osm_port_t *p_port;
		port = osm_switch_get_port_by_lid(p_sw, lid, OSM_NEW_LFT);

		if (port >= max_port)
			continue;

		fprintf(file, "0x%04x %03u # ", lid, port);

		p_port = osm_get_port_by_lid_ho(&p_osm->subn, lid);
		if (p_port) {
			p_node = p_port->p_node;
			fprintf(file, "%s portguid 0x%016" PRIx64 ": \'%s\'",
				ib_get_node_type_str(osm_node_get_type(p_node)),
				cl_ntoh64(osm_port_get_guid(p_port)),
				p_node->print_desc);
		} else
			fprintf(file, "unknown node and type");
		fprintf(file, "\n");
	}
	fprintf(file, "%u lids dumped\n", max_lid);
}
Esempio n. 13
0
uint8_t osm_switch_recommend_mcast_path(IN osm_switch_t * p_sw,
					IN osm_port_t * p_port,
					IN uint16_t mlid_ho,
					IN boolean_t ignore_existing)
{
	uint16_t base_lid;
	uint8_t hops;
	uint8_t port_num;
	uint8_t num_ports;
	uint8_t least_hops;

	CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO);

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		base_lid = osm_port_get_base_lid(p_port);
	} else {
		osm_physp_t *p_physp = p_port->p_physp;
		if (!p_physp || !p_physp->p_remote_physp ||
		    !p_physp->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;
		if (p_physp->p_remote_physp->p_node->sw == p_sw)
			return p_physp->p_remote_physp->port_num;
		base_lid =
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
	}
	base_lid = cl_ntoh16(base_lid);
	num_ports = p_sw->num_ports;

	/*
	   If the user wants us to ignore existing multicast routes,
	   then simply return the shortest hop count path to the
	   target port.

	   Otherwise, return the first port that has a path to the target,
	   picking from the ports that are already in the multicast group.
	 */
	if (!ignore_existing) {
		for (port_num = 1; port_num < num_ports; port_num++) {
			if (!osm_mcast_tbl_is_port
			    (&p_sw->mcast_tbl, mlid_ho, port_num))
				continue;
			/*
			   Don't be too trusting of the current forwarding table!
			   Verify that the LID is reachable through this port.
			 */
			hops =
			    osm_switch_get_hop_count(p_sw, base_lid, port_num);
			if (hops != OSM_NO_PATH)
				return port_num;
		}
	}

	/*
	   Either no existing mcast paths reach this port or we are
	   ignoring existing paths.

	   Determine the best multicast path to the target.  Note that this
	   algorithm is slightly different from the one used for unicast route
	   recommendation.  In this case (multicast), we must NOT
	   perform any sort of load balancing.  We MUST take the FIRST
	   port found that has <= the lowest hop count path.  This prevents
	   more than one multicast path to the same remote switch which
	   prevents a multicast loop.  Multicast loops are bad since the same
	   multicast packet will go around and around, inevitably creating
	   a black hole that will destroy the Earth in a firey conflagration.
	 */
	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
	if (least_hops == OSM_NO_PATH)
		return OSM_NO_PATH;
	for (port_num = 1; port_num < num_ports; port_num++)
		if (osm_switch_get_hop_count(p_sw, base_lid, port_num) ==
		    least_hops)
			break;

	CL_ASSERT(port_num < num_ports);
	return port_num;
}
Esempio n. 14
0
/**********************************************************************
 * This function does the bfs of min hop table calculation by guid index
 * as a starting point.
 **********************************************************************/
static int updn_bfs_by_node(IN osm_log_t * p_log, IN osm_subn_t * p_subn,
			    IN osm_switch_t * p_sw)
{
	uint8_t pn, pn_rem;
	cl_qlist_t list;
	uint16_t lid;
	struct updn_node *u;
	updn_switch_dir_t next_dir, current_dir;

	OSM_LOG_ENTER(p_log);

	lid = osm_node_get_base_lid(p_sw->p_node, 0);
	lid = cl_ntoh16(lid);
	osm_switch_set_hops(p_sw, lid, 0, 0);

	OSM_LOG(p_log, OSM_LOG_DEBUG,
		"Starting from switch - port GUID 0x%" PRIx64 " lid %u\n",
		cl_ntoh64(p_sw->p_node->node_info.port_guid), lid);

	u = p_sw->priv;
	u->dir = UP;

	/* Update list with the new element */
	cl_qlist_init(&list);
	cl_qlist_insert_tail(&list, &u->list);

	/* BFS the list till no next element */
	while (!cl_is_qlist_empty(&list)) {
		u = (struct updn_node *)cl_qlist_remove_head(&list);
		u->visited = 0;	/* cleanup */
		current_dir = u->dir;
		/* Go over all ports of the switch and find unvisited remote nodes */
		for (pn = 1; pn < u->sw->num_ports; pn++) {
			osm_node_t *p_remote_node;
			struct updn_node *rem_u;
			uint8_t current_min_hop, remote_min_hop,
			    set_hop_return_value;
			osm_switch_t *p_remote_sw;

			p_remote_node =
			    osm_node_get_remote_node(u->sw->p_node, pn,
						     &pn_rem);
			/* If no remote node OR remote node is not a SWITCH
			   continue to next pn */
			if (!p_remote_node || !p_remote_node->sw)
				continue;
			/* Fetch remote guid only after validation of remote node */
			p_remote_sw = p_remote_node->sw;
			rem_u = p_remote_sw->priv;
			/* Decide which direction to mark it (UP/DOWN) */
			next_dir = updn_get_dir(u->rank, rem_u->rank,
						u->id, rem_u->id);

			/* Check if this is a legal step : the only illegal step is going
			   from DOWN to UP */
			if ((current_dir == DOWN) && (next_dir == UP)) {
				OSM_LOG(p_log, OSM_LOG_DEBUG,
					"Avoiding move from 0x%016" PRIx64
					" to 0x%016" PRIx64 "\n",
					cl_ntoh64(osm_node_get_node_guid(u->sw->p_node)),
					cl_ntoh64(osm_node_get_node_guid(p_remote_node)));
				/* Illegal step */
				continue;
			}
			/* Set MinHop value for the current lid */
			current_min_hop = osm_switch_get_least_hops(u->sw, lid);
			/* Check hop count if better insert into list && update
			   the remote node Min Hop Table */
			remote_min_hop =
			    osm_switch_get_hop_count(p_remote_sw, lid, pn_rem);
			if (current_min_hop + 1 < remote_min_hop) {
				set_hop_return_value =
				    osm_switch_set_hops(p_remote_sw, lid,
							pn_rem,
							current_min_hop + 1);
				if (set_hop_return_value) {
					OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AA01: "
						"Invalid value returned from set min hop is: %d\n",
						set_hop_return_value);
				}
				/* Check if remote port has already been visited */
				if (!rem_u->visited) {
					/* Insert updn_switch item into the list */
					rem_u->dir = next_dir;
					rem_u->visited = 1;
					cl_qlist_insert_tail(&list,
							     &rem_u->list);
				}
			}
		}
	}

	OSM_LOG_EXIT(p_log);
	return 0;
}
Esempio n. 15
0
uint8_t osm_switch_recommend_path(IN const osm_switch_t * p_sw,
				  IN osm_port_t * p_port, IN uint16_t lid_ho,
				  IN unsigned start_from,
				  IN boolean_t ignore_existing,
				  IN boolean_t routing_for_lmc,
				  IN boolean_t dor,
				  IN boolean_t port_shifting,
				  IN uint32_t scatter_ports)
{
	/*
	   We support an enhanced LMC aware routing mode:
	   In the case of LMC > 0, we can track the remote side
	   system and node for all of the lids of the target
	   and try and avoid routing again through the same
	   system / node.

	   Assume if routing_for_lmc is true that this procedure was
	   provided the tracking array and counter via p_port->priv,
	   and we can conduct this algorithm.
	 */
	uint16_t base_lid;
	uint8_t hops;
	uint8_t least_hops;
	uint8_t port_num;
	uint8_t num_ports;
	uint32_t least_paths = 0xFFFFFFFF;
	unsigned i;
	/*
	   The follwing will track the least paths if the
	   route should go through a new system/node
	 */
	uint32_t least_paths_other_sys = 0xFFFFFFFF;
	uint32_t least_paths_other_nodes = 0xFFFFFFFF;
	uint32_t least_forwarded_to = 0xFFFFFFFF;
	uint32_t check_count;
	uint8_t best_port = 0;
	/*
	   These vars track the best port if it connects to
	   not used system/node.
	 */
	uint8_t best_port_other_sys = 0;
	uint8_t best_port_other_node = 0;
	boolean_t port_found = FALSE;
	osm_physp_t *p_physp;
	osm_physp_t *p_rem_physp;
	osm_node_t *p_rem_node;
	osm_node_t *p_rem_node_first = NULL;
	struct osm_remote_node *p_remote_guid = NULL;
	struct osm_remote_node null_remote_node = {NULL, 0, 0};
	struct switch_port_path port_paths[IB_NODE_NUM_PORTS_MAX];
	unsigned int port_paths_total_paths = 0;
	unsigned int port_paths_count = 0;
	uint8_t scatter_possible_ports[IB_NODE_NUM_PORTS_MAX];
	unsigned int scatter_possible_ports_count = 0;
	int found_sys_guid = 0;
	int found_node_guid = 0;

	CL_ASSERT(lid_ho > 0);

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		base_lid = osm_port_get_base_lid(p_port);
	} else {
		p_physp = p_port->p_physp;
		if (!p_physp || !p_physp->p_remote_physp ||
		    !p_physp->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;

		if (p_physp->p_remote_physp->p_node->sw == p_sw)
			return p_physp->p_remote_physp->port_num;
		base_lid =
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
	}
	base_lid = cl_ntoh16(base_lid);

	num_ports = p_sw->num_ports;

	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
	if (least_hops == OSM_NO_PATH)
		return OSM_NO_PATH;

	/*
	   First, inquire with the forwarding table for an existing
	   route.  If one is found, honor it unless:
	   1. the ignore existing flag is set.
	   2. the physical port is not a valid one or not healthy
	   3. the physical port has a remote port (the link is up)
	   4. the port has min-hops to the target (avoid loops)
	 */
	if (!ignore_existing) {
		port_num = osm_switch_get_port_by_lid(p_sw, lid_ho);

		if (port_num != OSM_NO_PATH) {
			CL_ASSERT(port_num < num_ports);

			p_physp =
			    osm_node_get_physp_ptr(p_sw->p_node, port_num);
			/*
			   Don't be too trusting of the current forwarding table!
			   Verify that the port number is legal and that the
			   LID is reachable through this port.
			 */
			if (p_physp && osm_physp_is_healthy(p_physp) &&
			    osm_physp_get_remote(p_physp)) {
				hops =
				    osm_switch_get_hop_count(p_sw, base_lid,
							     port_num);
				/*
				   If we aren't using pre-defined user routes
				   function, then we need to make sure that the
				   current path is the minimum one. In case of
				   having such a user function - this check will
				   not be done, and the old routing will be used.
				   Note: This means that it is the user's job to
				   clean all data in the forwarding tables that
				   he wants to be overridden by the minimum
				   hop function.
				 */
				if (hops == least_hops)
					return port_num;
			}
		}
	}

	/*
	   This algorithm selects a port based on a static load balanced
	   selection across equal hop-count ports.
	   There is lots of room for improved sophistication here,
	   possibly guided by user configuration info.
	 */

	/*
	   OpenSM routing is "local" - not considering a full lid to lid
	   path. As such we can not guarantee a path will not loop if we
	   do not always follow least hops.
	   So we must abort if not least hops.
	 */

	/* port number starts with one and num_ports is 1 + num phys ports */
	for (i = start_from; i < start_from + num_ports; i++) {
		port_num = osm_switch_get_dimn_port(p_sw, i % num_ports);
		if (!port_num ||
		    osm_switch_get_hop_count(p_sw, base_lid, port_num) !=
		    least_hops)
			continue;

		/* let us make sure it is not down or unhealthy */
		p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
		if (!p_physp || !osm_physp_is_healthy(p_physp) ||
		    /*
		       we require all - non sma ports to be linked
		       to be routed through
		     */
		    !osm_physp_get_remote(p_physp))
			continue;

		/*
		   We located a least-hop port, possibly one of many.
		   For this port, check the running total count of
		   the number of paths through this port.  Select
		   the port routing the least number of paths.
		 */
		check_count =
		    osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);


		if (dor) {
			/* Get the Remote Node */
			p_rem_physp = osm_physp_get_remote(p_physp);
			p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
			/* use the first dimension, but spread traffic
			 * out among the group of ports representing
			 * that dimension */
			if (!p_rem_node_first)
				p_rem_node_first = p_rem_node;
			else if (p_rem_node != p_rem_node_first)
				continue;
			if (routing_for_lmc) {
				struct osm_remote_guids_count *r = p_port->priv;
				uint8_t rem_port = osm_physp_get_port_num(p_rem_physp);
				unsigned int j;

				for (j = 0; j < r->count; j++) {
					p_remote_guid = &r->guids[j];
					if ((p_remote_guid->node == p_rem_node)
					    && (p_remote_guid->port == rem_port))
						break;
				}
				if (j == r->count)
					p_remote_guid = &null_remote_node;
			}
		/*
		   Advanced LMC routing requires tracking of the
		   best port by the node connected to the other side of
		   it.
		 */
		} else if (routing_for_lmc) {
			/* Is the sys guid already used ? */
			p_remote_guid = switch_find_sys_guid_count(p_sw,
								   p_port->priv,
								   port_num);

			/* If not update the least hops for this case */
			if (!p_remote_guid) {
				if (check_count < least_paths_other_sys) {
					least_paths_other_sys = check_count;
					best_port_other_sys = port_num;
					least_forwarded_to = 0;
				}
				found_sys_guid = 0;
			} else {	/* same sys found - try node */


				/* Else is the node guid already used ? */
				p_remote_guid = switch_find_node_guid_count(p_sw,
									    p_port->priv,
									    port_num);

				/* If not update the least hops for this case */
				if (!p_remote_guid
				    && check_count < least_paths_other_nodes) {
					least_paths_other_nodes = check_count;
					best_port_other_node = port_num;
					least_forwarded_to = 0;
				}
				/* else prior sys and node guid already used */

				if (!p_remote_guid)
					found_node_guid = 0;
				else
					found_node_guid = 1;
				found_sys_guid = 1;
			}	/* same sys found */
		}

		port_paths[port_paths_count].port_num = port_num;
		port_paths[port_paths_count].path_count = check_count;
		if (routing_for_lmc) {
			port_paths[port_paths_count].found_sys_guid = found_sys_guid;
			port_paths[port_paths_count].found_node_guid = found_node_guid;
		}
		if (routing_for_lmc && p_remote_guid)
			port_paths[port_paths_count].forwarded_to = p_remote_guid->forwarded_to;
		else
			port_paths[port_paths_count].forwarded_to = 0;
		port_paths_total_paths += check_count;
		port_paths_count++;

		/* routing for LMC mode */
		/*
		   the count is min but also lower then the max subscribed
		 */
		if (check_count < least_paths) {
			port_found = TRUE;
			best_port = port_num;
			least_paths = check_count;
			scatter_possible_ports_count = 0;
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
			if (routing_for_lmc
			    && p_remote_guid
			    && p_remote_guid->forwarded_to < least_forwarded_to)
				least_forwarded_to = p_remote_guid->forwarded_to;
		} else if (scatter_ports
			   && check_count == least_paths) {
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
		} else if (routing_for_lmc
			   && p_remote_guid
			   && check_count == least_paths
			   && p_remote_guid->forwarded_to < least_forwarded_to) {
			least_forwarded_to = p_remote_guid->forwarded_to;
			best_port = port_num;
		}
	}

	if (port_found == FALSE)
		return OSM_NO_PATH;

	if (port_shifting && port_paths_count) {
		/* In the port_paths[] array, we now have all the ports that we
		 * can route out of.  Using some shifting math below, possibly
		 * select a different one so that lids won't align in LFTs
		 *
		 * If lmc > 0, we need to loop through these ports to find the
		 * least_forwarded_to port, best_port_other_sys, and
		 * best_port_other_node just like before but through the different
		 * ordering.
		 */

		least_paths = 0xFFFFFFFF;
		least_paths_other_sys = 0xFFFFFFFF;
		least_paths_other_nodes = 0xFFFFFFFF;
	        least_forwarded_to = 0xFFFFFFFF;
		best_port = 0;
		best_port_other_sys = 0;
		best_port_other_node = 0;

		for (i = 0; i < port_paths_count; i++) {
			unsigned int idx;

			idx = (port_paths_total_paths/port_paths_count + i) % port_paths_count;

			if (routing_for_lmc) {
				if (!port_paths[idx].found_sys_guid
				    && port_paths[idx].path_count < least_paths_other_sys) {
					least_paths_other_sys = port_paths[idx].path_count;
					best_port_other_sys = port_paths[idx].port_num;
					least_forwarded_to = 0;
				}
				else if (!port_paths[idx].found_node_guid
					 && port_paths[idx].path_count < least_paths_other_nodes) {
					least_paths_other_nodes = port_paths[idx].path_count;
					best_port_other_node = port_paths[idx].port_num;
					least_forwarded_to = 0;
				}
			}

			if (port_paths[idx].path_count < least_paths) {
				best_port = port_paths[idx].port_num;
				least_paths = port_paths[idx].path_count;
				if (routing_for_lmc
				    && (port_paths[idx].found_sys_guid
					|| port_paths[idx].found_node_guid)
				    && port_paths[idx].forwarded_to < least_forwarded_to)
					least_forwarded_to = port_paths[idx].forwarded_to;
			}
			else if (routing_for_lmc
				 && (port_paths[idx].found_sys_guid
				     || port_paths[idx].found_node_guid)
				 && port_paths[idx].path_count == least_paths
				 && port_paths[idx].forwarded_to < least_forwarded_to) {
				least_forwarded_to = port_paths[idx].forwarded_to;
				best_port = port_paths[idx].port_num;
			}

		}
	}

	/*
	   if we are in enhanced routing mode and the best port is not
	   the local port 0
	 */
	if (routing_for_lmc && best_port && !scatter_ports) {
		/* Select the least hop port of the non used sys first */
		if (best_port_other_sys)
			best_port = best_port_other_sys;
		else if (best_port_other_node)
			best_port = best_port_other_node;
	} else if (scatter_ports) {
		/*
		 * There is some danger that this random could "rebalance" the routes
		 * every time, to combat this there is a global srandom that
		 * occurs at the start of every sweep.
		 */
		unsigned int idx = random() % scatter_possible_ports_count;
		best_port = scatter_possible_ports[idx];
	}
	return best_port;
}
Esempio n. 16
0
uint8_t
osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
			  IN osm_port_t * p_port,
			  IN const uint16_t lid_ho,
			  IN unsigned start_from,
			  IN const boolean_t ignore_existing,
			  IN const boolean_t dor)
{
	/*
	   We support an enhanced LMC aware routing mode:
	   In the case of LMC > 0, we can track the remote side
	   system and node for all of the lids of the target
	   and try and avoid routing again through the same
	   system / node.

	   If this procedure is provided with the tracking array
	   and counter we can conduct this algorithm.
	 */
	boolean_t routing_for_lmc = (p_port->priv != NULL);
	uint16_t base_lid;
	uint8_t hops;
	uint8_t least_hops;
	uint8_t port_num;
	uint8_t num_ports;
	uint32_t least_paths = 0xFFFFFFFF;
	unsigned i;
	/*
	   The follwing will track the least paths if the
	   route should go through a new system/node
	 */
	uint32_t least_paths_other_sys = 0xFFFFFFFF;
	uint32_t least_paths_other_nodes = 0xFFFFFFFF;
	uint32_t least_forwarded_to = 0xFFFFFFFF;
	uint32_t check_count;
	uint8_t best_port = 0;
	/*
	   These vars track the best port if it connects to
	   not used system/node.
	 */
	uint8_t best_port_other_sys = 0;
	uint8_t best_port_other_node = 0;
	boolean_t port_found = FALSE;
	osm_physp_t *p_physp;
	osm_physp_t *p_rem_physp;
	osm_node_t *p_rem_node;
	osm_node_t *p_rem_node_first = NULL;
	struct osm_remote_node *p_remote_guid = NULL;

	CL_ASSERT(lid_ho > 0);

	if (p_port->p_node->sw) {
		if (p_port->p_node->sw == p_sw)
			return 0;
		base_lid = osm_port_get_base_lid(p_port);
	} else {
		p_physp = p_port->p_physp;
		if (!p_physp || !p_physp->p_remote_physp ||
		    !p_physp->p_remote_physp->p_node->sw)
			return OSM_NO_PATH;

		if (p_physp->p_remote_physp->p_node->sw == p_sw)
			return p_physp->p_remote_physp->port_num;
		base_lid =
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
	}
	base_lid = cl_ntoh16(base_lid);

	num_ports = p_sw->num_ports;

	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
	if (least_hops == OSM_NO_PATH)
		return (OSM_NO_PATH);

	/*
	   First, inquire with the forwarding table for an existing
	   route.  If one is found, honor it unless:
	   1. the ignore existing flag is set.
	   2. the physical port is not a valid one or not healthy
	   3. the physical port has a remote port (the link is up)
	   4. the port has min-hops to the target (avoid loops)
	 */
	if (!ignore_existing) {
		port_num = osm_switch_get_port_by_lid(p_sw, lid_ho);

		if (port_num != OSM_NO_PATH) {
			CL_ASSERT(port_num < num_ports);

			p_physp =
			    osm_node_get_physp_ptr(p_sw->p_node, port_num);
			/*
			   Don't be too trusting of the current forwarding table!
			   Verify that the port number is legal and that the
			   LID is reachable through this port.
			 */
			if (p_physp && osm_physp_is_healthy(p_physp) &&
			    osm_physp_get_remote(p_physp)) {
				hops =
				    osm_switch_get_hop_count(p_sw, base_lid,
							     port_num);
				/*
				   If we aren't using pre-defined user routes
				   function, then we need to make sure that the
				   current path is the minimum one. In case of
				   having such a user function - this check will
				   not be done, and the old routing will be used.
				   Note: This means that it is the user's job to
				   clean all data in the forwarding tables that
				   he wants to be overridden by the minimum
				   hop function.
				 */
				if (hops == least_hops)
					return (port_num);
			}
		}
	}

	/*
	   This algorithm selects a port based on a static load balanced
	   selection across equal hop-count ports.
	   There is lots of room for improved sophistication here,
	   possibly guided by user configuration info.
	 */

	/*
	   OpenSM routing is "local" - not considering a full lid to lid
	   path. As such we can not guarantee a path will not loop if we
	   do not always follow least hops.
	   So we must abort if not least hops.
	 */

	/* port number starts with one and num_ports is 1 + num phys ports */
	for (i = start_from; i < start_from + num_ports; i++) {
		port_num = i%num_ports;
		if (!port_num ||
		    osm_switch_get_hop_count(p_sw, base_lid, port_num) !=
		    least_hops)
			continue;

		/* let us make sure it is not down or unhealthy */
		p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
		if (!p_physp || !osm_physp_is_healthy(p_physp) ||
		    /*
		       we require all - non sma ports to be linked
		       to be routed through
		     */
		    !osm_physp_get_remote(p_physp))
			continue;

		/*
		   We located a least-hop port, possibly one of many.
		   For this port, check the running total count of
		   the number of paths through this port.  Select
		   the port routing the least number of paths.
		 */
		check_count =
		    osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);

		/*
		   Advanced LMC routing requires tracking of the
		   best port by the node connected to the other side of
		   it.
		 */
		if (routing_for_lmc) {
			/* Is the sys guid already used ? */
			p_remote_guid = osm_switch_find_sys_guid_count(p_sw,
								       p_port->priv,
								       port_num);

			/* If not update the least hops for this case */
			if (!p_remote_guid) {
				if (check_count < least_paths_other_sys) {
					least_paths_other_sys = check_count;
					best_port_other_sys = port_num;
					least_forwarded_to = 0;
				}
			} else {	/* same sys found - try node */
				/* Else is the node guid already used ? */
				p_remote_guid = osm_switch_find_node_guid_count(p_sw,
										p_port->priv,
										port_num);

				/* If not update the least hops for this case */
				if (!p_remote_guid
				    && check_count < least_paths_other_nodes) {
					least_paths_other_nodes = check_count;
					best_port_other_node = port_num;
					least_forwarded_to = 0;
				}
				/* else prior sys and node guid already used */

			}	/* same sys found */
		}

		/* routing for LMC mode */
		/*
		   the count is min but also lower then the max subscribed
		 */
		if (check_count < least_paths) {
			if (dor) {
				/* Get the Remote Node */
				p_rem_physp = osm_physp_get_remote(p_physp);
				p_rem_node =
				    osm_physp_get_node_ptr(p_rem_physp);
				/* use the first dimension, but spread
				 * traffic out among the group of ports
				 * representing that dimension */
				if (port_found) {
					if (p_rem_node != p_rem_node_first)
						continue;
				} else
					p_rem_node_first = p_rem_node;
			}
			port_found = TRUE;
			best_port = port_num;
			least_paths = check_count;
			if (routing_for_lmc
			    && p_remote_guid
			    && p_remote_guid->forwarded_to < least_forwarded_to)
				least_forwarded_to = p_remote_guid->forwarded_to;
		} else if (routing_for_lmc
			   && p_remote_guid
			   && check_count == least_paths
			   && p_remote_guid->forwarded_to < least_forwarded_to) {
			least_forwarded_to = p_remote_guid->forwarded_to;
			best_port = port_num;
		}
	}

	if (port_found == FALSE)
		return (OSM_NO_PATH);

	/*
	   if we are in enhanced routing mode and the best port is not
	   the local port 0
	 */
	if (routing_for_lmc && best_port) {
		/* Select the least hop port of the non used sys first */
		if (best_port_other_sys)
			best_port = best_port_other_sys;
		else if (best_port_other_node)
			best_port = best_port_other_node;
	}

	return (best_port);
}
Esempio n. 17
0
static void dump_ucast_routes(cl_map_item_t * item, FILE * file, void *cxt)
{
	const osm_node_t *p_node;
	osm_port_t *p_port;
	uint8_t port_num;
	uint8_t num_hops;
	uint8_t best_hops;
	uint8_t best_port;
	uint16_t max_lid_ho;
	uint16_t lid_ho, base_lid;
	boolean_t direct_route_exists = FALSE;
	boolean_t dor;
	osm_switch_t *p_sw = (osm_switch_t *) item;
	osm_opensm_t *p_osm = cxt;

	p_node = p_sw->p_node;

	max_lid_ho = p_sw->max_lid_ho;

	fprintf(file, "dump_ucast_routes: "
		"Switch 0x%016" PRIx64 "\nLID    : Port : Hops : Optimal\n",
		cl_ntoh64(osm_node_get_node_guid(p_node)));

	dor = (p_osm->routing_engine_used &&
	       p_osm->routing_engine_used->type == OSM_ROUTING_ENGINE_TYPE_DOR);

	for (lid_ho = 1; lid_ho <= max_lid_ho; lid_ho++) {
		fprintf(file, "0x%04X : ", lid_ho);

		p_port = osm_get_port_by_lid_ho(&p_osm->subn, lid_ho);
		if (!p_port) {
			fprintf(file, "UNREACHABLE\n");
			continue;
		}

		port_num = osm_switch_get_port_by_lid(p_sw, lid_ho,
						      OSM_NEW_LFT);
		if (port_num == OSM_NO_PATH) {
			/*
			   This may occur if there are 'holes' in the existing
			   LID assignments.  Running SM with --reassign_lids
			   will reassign and compress the LID range.  The
			   subnet should work fine either way.
			 */
			fprintf(file, "UNREACHABLE\n");
			continue;
		}
		/*
		   Switches can lie about which port routes a given
		   lid due to a recent reconfiguration of the subnet.
		   Therefore, ensure that the hop count is better than
		   OSM_NO_PATH.
		 */
		if (p_port->p_node->sw) {
			/* Target LID is switch.
			   Get its base lid and check hop count for this base LID only. */
			base_lid = osm_node_get_base_lid(p_port->p_node, 0);
			base_lid = cl_ntoh16(base_lid);
			num_hops =
			    osm_switch_get_hop_count(p_sw, base_lid, port_num);
		} else {
			/* Target LID is not switch (CA or router).
			   Check if we have route to this target from current switch. */
			num_hops =
			    osm_switch_get_hop_count(p_sw, lid_ho, port_num);
			if (num_hops != OSM_NO_PATH) {
				direct_route_exists = TRUE;
				base_lid = lid_ho;
			} else {
				osm_physp_t *p_physp = p_port->p_physp;

				if (!p_physp || !p_physp->p_remote_physp ||
				    !p_physp->p_remote_physp->p_node->sw)
					num_hops = OSM_NO_PATH;
				else {
					base_lid =
					    osm_node_get_base_lid(p_physp->
								  p_remote_physp->
								  p_node, 0);
					base_lid = cl_ntoh16(base_lid);
					num_hops =
					    p_physp->p_remote_physp->p_node->
					    sw ==
					    p_sw ? 0 :
					    osm_switch_get_hop_count(p_sw,
								     base_lid,
								     port_num);
				}
			}
		}

		if (num_hops == OSM_NO_PATH) {
			fprintf(file, "%03u  : HOPS UNKNOWN\n", port_num);
			continue;
		}

		best_hops = osm_switch_get_least_hops(p_sw, base_lid);
		if (!p_port->p_node->sw && !direct_route_exists) {
			best_hops++;
			num_hops++;
		}

		fprintf(file, "%03u  : %02u   : ", port_num, num_hops);

		if (best_hops == num_hops)
			fprintf(file, "yes");
		else {
			/* No LMC Optimization */
			best_port = osm_switch_recommend_path(p_sw, p_port,
							      lid_ho, 1, TRUE,
							      FALSE, dor,
							      p_osm->subn.opt.port_shifting,
							      p_osm->subn.opt.scatter_ports,
							      OSM_NEW_LFT);
			fprintf(file, "No %u hop path possible via port %u!",
				best_hops, best_port);
		}

		fprintf(file, "\n");
	}
}
Esempio n. 18
0
/* Find Root nodes automatically by Min Hop Table info */
static void updn_find_root_nodes_by_min_hop(OUT updn_t * p_updn)
{
	osm_opensm_t *p_osm = p_updn->p_osm;
	osm_switch_t *p_sw;
	osm_port_t *p_port;
	osm_physp_t *p_physp;
	cl_map_item_t *item;
	double thd1, thd2;
	unsigned i, cas_num = 0;
	unsigned *cas_per_sw;
	uint16_t lid_ho;

	OSM_LOG_ENTER(&p_osm->log);

	OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
		"Current number of ports in the subnet is %d\n",
		cl_qmap_count(&p_osm->subn.port_guid_tbl));

	lid_ho = (uint16_t) cl_ptr_vector_get_size(&p_updn->p_osm->subn.port_lid_tbl) + 1;
	cas_per_sw = malloc(lid_ho * sizeof(*cas_per_sw));
	if (!cas_per_sw) {
		OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR AA14: "
			"cannot alloc mem for CAs per switch counter array\n");
		goto _exit;
	}
	memset(cas_per_sw, 0, lid_ho * sizeof(*cas_per_sw));

	/* Find the Maximum number of CAs (and routers) for histogram normalization */
	OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
		"Finding the number of CAs and storing them in cl_map\n");
	for (item = cl_qmap_head(&p_updn->p_osm->subn.port_guid_tbl);
	     item != cl_qmap_end(&p_updn->p_osm->subn.port_guid_tbl);
	     item = cl_qmap_next(item)) {
		p_port = (osm_port_t *)item;
		if (!p_port->p_node->sw) {
			p_physp = p_port->p_physp->p_remote_physp;
			if (!p_physp || !p_physp->p_node->sw)
				continue;
			lid_ho = osm_node_get_base_lid(p_physp->p_node, 0);
			lid_ho = cl_ntoh16(lid_ho);
			cas_per_sw[lid_ho]++;
			cas_num++;
		}
	}

	thd1 = cas_num * 0.9;
	thd2 = cas_num * 0.05;
	OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
		"Found %u CAs and RTRs, %u SWs in the subnet. "
		"Thresholds are thd1 = %f && thd2 = %f\n",
		cas_num, cl_qmap_count(&p_osm->subn.sw_guid_tbl), thd1, thd2);

	OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
		"Passing through all switches to collect Min Hop info\n");
	for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl);
	     item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl);
	     item = cl_qmap_next(item)) {
		unsigned hop_hist[IB_SUBNET_PATH_HOPS_MAX];
		uint16_t max_lid_ho;
		uint8_t hop_val;
		uint16_t numHopBarsOverThd1 = 0;
		uint16_t numHopBarsOverThd2 = 0;

		p_sw = (osm_switch_t *) item;

		memset(hop_hist, 0, sizeof(hop_hist));

		max_lid_ho = p_sw->max_lid_ho;
		for (lid_ho = 1; lid_ho <= max_lid_ho; lid_ho++)
			if (cas_per_sw[lid_ho]) {
				hop_val =
				    osm_switch_get_least_hops(p_sw, lid_ho);
				if (hop_val >= IB_SUBNET_PATH_HOPS_MAX)
					continue;

				hop_hist[hop_val] += cas_per_sw[lid_ho];
			}

		/* Now recognize the spines by requiring one bar to be
		   above 90% of the number of CAs and RTRs */
		for (i = 0; i < IB_SUBNET_PATH_HOPS_MAX; i++) {
			if (hop_hist[i] > thd1)
				numHopBarsOverThd1++;
			if (hop_hist[i] > thd2)
				numHopBarsOverThd2++;
		}

		/* If thd conditions are valid - rank the root node */
		if (numHopBarsOverThd1 == 1 && numHopBarsOverThd2 == 1) {
			OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
				"Ranking GUID 0x%" PRIx64 " as root node\n",
				cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)));
			((struct updn_node *)p_sw->priv)->rank = 0;
			p_updn->num_roots++;
		}
	}

	free(cas_per_sw);
_exit:
	OSM_LOG_EXIT(&p_osm->log);
	return;
}
static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr)
{
	cache_switch_t *p_cache_sw;
	cache_switch_t *p_remote_cache_sw;
	unsigned port_num;
	unsigned max_ports;
	uint8_t remote_node_type;
	uint16_t lid_ho;
	uint16_t remote_lid_ho;
	osm_switch_t *p_sw;
	osm_switch_t *p_remote_sw;
	osm_node_t *p_node;
	osm_physp_t *p_physp;
	osm_physp_t *p_remote_physp;
	osm_port_t *p_remote_port;
	cl_qmap_t *p_sw_tbl;

	OSM_LOG_ENTER(p_mgr->p_log);
	if (!p_mgr->cache_valid)
		goto Exit;

	/* If there are no switches in the subnet, we are done */
	p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
	if (cl_qmap_count(p_sw_tbl) == 0) {
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	/*
	 * Scan all the physical switch ports in the subnet.
	 * If the port need_update flag is on, check whether
	 * it's just some node/port reset or a cached topology
	 * change. Otherwise the cache is invalid.
	 */
	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {

		p_node = p_sw->p_node;

		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
		p_cache_sw = cache_get_sw(p_mgr, lid_ho);

		max_ports = osm_node_get_num_physp(p_node);

		/* skip port 0 */
		for (port_num = 1; port_num < max_ports; port_num++) {

			p_physp = osm_node_get_physp_ptr(p_node, port_num);

			if (!p_physp || !p_physp->p_remote_physp ||
			    !osm_physp_link_exists(p_physp,
						   p_physp->p_remote_physp))
				/* no valid link */
				continue;

			/*
			 * While scanning all the physical ports in the subnet,
			 * mark corresponding leaf switches in the cache.
			 */
			if (p_cache_sw &&
			    !p_cache_sw->dropped &&
			    !cache_sw_is_leaf(p_cache_sw) &&
			    p_physp->p_remote_physp->p_node &&
			    osm_node_get_type(p_physp->p_remote_physp->
					      p_node) != IB_NODE_TYPE_SWITCH)
				cache_sw_set_leaf(p_cache_sw);

			if (!p_physp->need_update)
				continue;

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Checking switch lid %u, port %u\n",
				lid_ho, port_num);

			p_remote_physp = osm_physp_get_remote(p_physp);
			remote_node_type =
			    osm_node_get_type(p_remote_physp->p_node);

			if (remote_node_type == IB_NODE_TYPE_SWITCH)
				remote_lid_ho =
				    cl_ntoh16(osm_node_get_base_lid
					      (p_remote_physp->p_node, 0));
			else
				remote_lid_ho =
				    cl_ntoh16(osm_node_get_base_lid
					      (p_remote_physp->p_node,
					       osm_physp_get_port_num
					       (p_remote_physp)));

			if (!p_cache_sw ||
			    port_num >= p_cache_sw->num_ports ||
			    !p_cache_sw->ports[port_num].remote_lid_ho) {
				/*
				 * There is some uncached change on the port.
				 * In general, the reasons might be as follows:
				 *  - switch reset
				 *  - port reset (or port down/up)
				 *  - quick connection location change
				 *  - new link (or new switch)
				 *
				 * First two reasons allow cache usage, while
				 * the last two reasons should invalidate cache.
				 *
				 * In case of quick connection location change,
				 * cache would have been invalidated by
				 * osm_ucast_cache_check_new_link() function.
				 *
				 * In case of new link between two known nodes,
				 * cache also would have been invalidated by
				 * osm_ucast_cache_check_new_link() function.
				 *
				 * Another reason is cached link between two
				 * known switches went back. In this case the
				 * osm_ucast_cache_check_new_link() function would
				 * clear both sides of the link from the cache
				 * during the discovery process, so effectively
				 * this would be equivalent to port reset.
				 *
				 * So three possible reasons remain:
				 *  - switch reset
				 *  - port reset (or port down/up)
				 *  - link of a new switch
				 *
				 * To validate cache, we need to check only the
				 * third reason - link of a new node/switch:
				 *  - If this is the local switch that is new,
				 *    then it should have (p_sw->need_update == 2).
				 *  - If the remote node is switch and it's new,
				 *    then it also should have
				 *    (p_sw->need_update == 2).
				 *  - If the remote node is CA/RTR and it's new,
				 *    then its port should have is_new flag on.
				 */
				if (p_sw->need_update == 2) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"New switch found (lid %u)\n",
						lid_ho);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				if (remote_node_type == IB_NODE_TYPE_SWITCH) {

					p_remote_sw =
					    p_remote_physp->p_node->sw;
					if (p_remote_sw->need_update == 2) {
						/* this could also be case of
						   switch coming back with an
						   additional link that it
						   didn't have before */
						OSM_LOG(p_mgr->p_log,
							OSM_LOG_DEBUG,
							"New switch/link found (lid %u)\n",
							remote_lid_ho);
						osm_ucast_cache_invalidate
						    (p_mgr);
						goto Exit;
					}
				} else {
					/*
					 * Remote node is CA/RTR.
					 * Get p_port of the remote node and
					 * check its p_port->is_new flag.
					 */
					p_remote_port =
					    osm_get_port_by_guid(p_mgr->p_subn,
								 osm_physp_get_port_guid
								 (p_remote_physp));
					if (p_remote_port->is_new) {
						OSM_LOG(p_mgr->p_log,
							OSM_LOG_DEBUG,
							"New CA/RTR found (lid %u)\n",
							remote_lid_ho);
						osm_ucast_cache_invalidate
						    (p_mgr);
						goto Exit;
					}
				}
			} else {
				/*
				 * The change on the port is cached.
				 * In general, the reasons might be as follows:
				 *  - link between two known nodes went back
				 *  - one or more nodes went back, causing all
				 *    the links to reappear
				 *
				 * If it was link that went back, then this case
				 * would have been taken care of during the
				 * discovery by osm_ucast_cache_check_new_link(),
				 * so it's some node that went back.
				 */
				if ((p_cache_sw->ports[port_num].is_leaf &&
				     remote_node_type == IB_NODE_TYPE_SWITCH) ||
				    (!p_cache_sw->ports[port_num].is_leaf &&
				     remote_node_type != IB_NODE_TYPE_SWITCH)) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"Remote node type change on switch lid %u, port %u\n",
						lid_ho, port_num);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				if (p_cache_sw->ports[port_num].remote_lid_ho !=
				    remote_lid_ho) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"Remote lid change on switch lid %u, port %u"
						"(was %u, now %u)\n",
						lid_ho, port_num,
						p_cache_sw->ports[port_num].
						remote_lid_ho, remote_lid_ho);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				/*
				 * We don't care who is the node that has
				 * reappeared in the subnet (local or remote).
				 * What's important that the cached link matches
				 * the real fabrics link.
				 * Just clean it from cache.
				 */

				p_cache_sw->ports[port_num].remote_lid_ho = 0;
				p_cache_sw->ports[port_num].is_leaf = FALSE;
				if (p_cache_sw->dropped) {
					cache_restore_ucast_info(p_mgr,
								 p_cache_sw,
								 p_sw);
					p_cache_sw->dropped = FALSE;
				}

				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Restored link from cache: lid %u, port %u to lid %u\n",
					lid_ho, port_num, remote_lid_ho);
			}
		}
	}

	/* Remove all the cached switches that
	   have all their ports restored */
	cache_cleanup_switches(p_mgr);

	/*
	 * Done scanning all the physical switch ports in the subnet.
	 * Now we need to check the other side:
	 * Scan all the cached switches and their ports:
	 *  - If the cached switch is missing in the subnet
	 *    (dropped flag is on), check that it's a leaf switch.
	 *    If it's not a leaf, the cache is invalid, because
	 *    cache can tolerate only leaf switch removal.
	 *  - If the cached switch exists in fabric, check all
	 *    its cached ports. These cached ports represent
	 *    missing link in the fabric.
	 *    The missing links that can be tolerated are:
	 *      + link to missing CA/RTR
	 *      + link to missing leaf switch
	 */
	for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
	     p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
	     p_cache_sw =
	     (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) {

		if (p_cache_sw->dropped) {
			if (!cache_sw_is_leaf(p_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Missing non-leaf switch (lid %u)\n",
					cache_sw_get_base_lid_ho(p_cache_sw));
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Missing leaf switch (lid %u) - "
				"continuing validation\n",
				cache_sw_get_base_lid_ho(p_cache_sw));
			continue;
		}

		for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) {
			if (!p_cache_sw->ports[port_num].remote_lid_ho)
				continue;

			if (p_cache_sw->ports[port_num].is_leaf) {
				CL_ASSERT(cache_sw_is_leaf(p_cache_sw));
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to CA/RTR - "
					"continuing validation\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				continue;
			}

			p_remote_cache_sw = cache_get_sw(p_mgr,
							 p_cache_sw->
							 ports[port_num].
							 remote_lid_ho);

			if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to existing switch\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			if (!cache_sw_is_leaf(p_remote_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to non-leaf switch\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			/*
			 * At this point we know that the missing link is to
			 * a leaf switch. However, one case deserves a special
			 * treatment. If there was a link between two leaf
			 * switches, then missing leaf switch might break
			 * routing. It is possible that there are routes
			 * that use leaf switches to get from switch to switch
			 * and not just to get to the CAs behind the leaf switch.
			 */
			if (cache_sw_is_leaf(p_cache_sw) &&
			    cache_sw_is_leaf(p_remote_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing leaf-2-leaf link\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Switch lid %u, port %u: missing remote leaf switch - "
				"continuing validation\n",
				cache_sw_get_base_lid_ho(p_cache_sw),
				port_num);
		}
	}

	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n");
	ucast_cache_dump(p_mgr);
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_validate() */
void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,
				    osm_node_t * p_node_1, uint8_t port_num_1,
				    osm_node_t * p_node_2, uint8_t port_num_2)
{
	uint16_t lid_ho_1;
	uint16_t lid_ho_2;

	OSM_LOG_ENTER(p_mgr->p_log);

	if (!p_mgr->cache_valid)
		goto Exit;

	cache_check_link_change(p_mgr,
				osm_node_get_physp_ptr(p_node_1, port_num_1),
				osm_node_get_physp_ptr(p_node_2, port_num_2));

	if (!p_mgr->cache_valid)
		goto Exit;

	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Found CA-2-CA link\n");
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	/* for code simplicity, we want the first node to be switch */
	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
		osm_node_t *tmp_node = p_node_1;
		uint8_t tmp_port_num = port_num_1;
		p_node_1 = p_node_2;
		port_num_1 = port_num_2;
		p_node_2 = tmp_node;
		port_num_2 = tmp_port_num;
	}

	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));

	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
	else
		lid_ho_2 =
		    cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2));

	if (!lid_ho_1 || !lid_ho_2) {
		/*
		 * No lid assigned, which means that one of the nodes is new.
		 * Need to wait for lid manager to process this node.
		 * The switches and their links will be checked later when
		 * the whole cache validity will be verified.
		 */
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Link port %u <-> %u reveals new node - cache will "
			"be validated later\n", port_num_1, port_num_2);
		goto Exit;
	}

	cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2,
			  (osm_node_get_type(p_node_2) !=
			  IB_NODE_TYPE_SWITCH));

	/* if node_2 is a switch, the link should be cleaned from its cache */

	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
		cache_remove_port(p_mgr, lid_ho_2,
				  port_num_2, lid_ho_1, FALSE);

Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_check_new_link() */
void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node)
{
	uint16_t lid_ho;
	uint8_t max_ports;
	uint8_t port_num;
	osm_physp_t *p_physp;
	cache_switch_t *p_cache_sw;

	OSM_LOG_ENTER(p_mgr->p_log);

	if (!p_mgr->cache_valid)
		goto Exit;

	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {

		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));

		if (!lid_ho) {
			OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
				"Skip caching. Switch dropped before "
				"it gets a valid lid.\n");
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Caching dropped switch lid %u\n", lid_ho);

		if (!p_node->sw) {
			/* something is wrong - forget about cache */
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
				"ERR AD03: no switch info for node lid %u - "
				"clearing cache\n", lid_ho);
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		/* unlink (add to cache) all the ports of this switch */
		max_ports = osm_node_get_num_physp(p_node);
		for (port_num = 1; port_num < max_ports; port_num++) {

			p_physp = osm_node_get_physp_ptr(p_node, port_num);
			if (!p_physp || !p_physp->p_remote_physp)
				continue;

			osm_ucast_cache_add_link(p_mgr, p_physp,
						 p_physp->p_remote_physp);
		}

		/*
		 * All the ports have been dropped (cached).
		 * If one of the ports was connected to CA/RTR,
		 * then the cached switch would be marked as leaf.
		 * If it isn't, then the dropped switch isn't a leaf,
		 * and cache can't handle it.
		 */

		p_cache_sw = cache_get_sw(p_mgr, lid_ho);

		/* p_cache_sw could be NULL if it has no remote phys ports */
		if (!p_cache_sw || !cache_sw_is_leaf(p_cache_sw)) {
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Dropped non-leaf switch (lid %u)\n", lid_ho);
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		p_cache_sw->dropped = TRUE;

		if (!p_node->sw->num_hops || !p_node->sw->hops) {
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"No LID matrices for switch lid %u\n", lid_ho);
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		/* lid matrices */

		p_cache_sw->num_hops = p_node->sw->num_hops;
		p_node->sw->num_hops = 0;
		p_cache_sw->hops = p_node->sw->hops;
		p_node->sw->hops = NULL;

		/* linear forwarding table */

		if (p_node->sw->new_lft) {
			/* LFT buffer exists - we use it, because
			   it is more updated than the switch's LFT */
			p_cache_sw->lft = p_node->sw->new_lft;
			p_node->sw->new_lft = NULL;
		} else {
			/* no LFT buffer, so we use the switch's LFT */
			p_cache_sw->lft = p_node->sw->lft;
			p_node->sw->lft = NULL;
			p_node->sw->lft_size = 0;
		}
		p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho;
	} else {
		/* dropping CA/RTR: add to cache all the ports of this node */
		max_ports = osm_node_get_num_physp(p_node);
		for (port_num = 1; port_num < max_ports; port_num++) {

			p_physp = osm_node_get_physp_ptr(p_node, port_num);
			if (!p_physp || !p_physp->p_remote_physp)
				continue;

			CL_ASSERT(osm_node_get_type
				  (p_physp->p_remote_physp->p_node) ==
				  IB_NODE_TYPE_SWITCH);

			osm_ucast_cache_add_link(p_mgr,
						 p_physp->p_remote_physp,
						 p_physp);
		}
	}
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_add_node() */
void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,
			      osm_physp_t * p_physp1, osm_physp_t * p_physp2)
{
	osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node;
	uint16_t lid_ho_1, lid_ho_2;

	OSM_LOG_ENTER(p_mgr->p_log);

	if (!p_mgr->cache_valid)
		goto Exit;

	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Dropping CA-2-CA link\n");
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH &&
	     !osm_node_get_physp_ptr(p_node_1, 0)) ||
	    (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH &&
	     !osm_node_get_physp_ptr(p_node_2, 0))) {
		/* we're caching a link when one of the nodes
		   has already been dropped and cached */
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Port %u <-> port %u: port0 on one of the nodes "
			"has already been dropped and cached\n",
			p_physp1->port_num, p_physp2->port_num);
		goto Exit;
	}

	/* One of the nodes is switch. Just for code
	   simplicity, make sure that it's the first node. */

	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
		osm_physp_t *tmp = p_physp1;
		p_physp1 = p_physp2;
		p_physp2 = tmp;
		p_node_1 = p_physp1->p_node;
		p_node_2 = p_physp2->p_node;
	}

	if (!p_node_1->sw) {
		/* something is wrong - we'd better not use cache */
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));

	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) {

		if (!p_node_2->sw) {
			/* something is wrong - we'd better not use cache */
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));

		/* lost switch-2-switch link - cache both sides */
		cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE);
		cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE);
	} else {
		lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2));

		/* lost link to CA/RTR - cache only switch side */
		cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE);
	}

Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_add_link() */