Esempio n. 1
0
int osm_ucast_mgr_process(IN osm_ucast_mgr_t * p_mgr)
{
	osm_opensm_t *p_osm;
	struct osm_routing_engine *p_routing_eng;
	cl_qmap_t *p_sw_guid_tbl;
	int failed = 0;

	OSM_LOG_ENTER(p_mgr->p_log);

	p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
	p_osm = p_mgr->p_subn->p_osm;
	p_routing_eng = p_osm->routing_engine_list;

	CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);

	/*
	   If there are no switches in the subnet, we are done.
	 */
	if (cl_qmap_count(p_sw_guid_tbl) == 0 ||
	    ucast_mgr_setup_all_switches(p_mgr->p_subn) < 0)
		goto Exit;

	failed = -1;
	p_osm->routing_engine_used = NULL;
	while (p_routing_eng) {
		failed = ucast_mgr_route(p_routing_eng, p_osm);
		if (!failed)
			break;
		p_routing_eng = p_routing_eng->next;
	}

	if (!p_osm->routing_engine_used &&
	    p_osm->no_fallback_routing_engine != TRUE) {
		/* If configured routing algorithm failed, use default MinHop */
		failed = ucast_mgr_route(p_osm->default_routing_engine, p_osm);
	}

	if (p_osm->routing_engine_used) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
			"%s tables configured on all switches\n",
			osm_routing_engine_type_str(p_osm->
						    routing_engine_used->type));

		if (p_mgr->p_subn->opt.use_ucast_cache)
			p_mgr->cache_valid = TRUE;
	} else {
		p_mgr->p_subn->subnet_initialization_error = TRUE;
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
			"No routing engine able to successfully configure "
			" switch tables on current fabric\n");
	}
Exit:
	CL_PLOCK_RELEASE(p_mgr->p_lock);
	OSM_LOG_EXIT(p_mgr->p_log);
	return failed;
}
Esempio n. 2
0
/** ===========================================================================
 */
void ssa_db_validate_lft(int first)
{
	struct smdb_lft_block lft_block;
	struct smdb_lft_top lft_top;
	int i;

	if (!first || !(ssa_get_log_level() & SSA_LOG_DB))
		return;

	for (i = 0;
	     i < cl_qmap_count(&ssa_db->p_lft_db->ep_db_lft_block_tbl); i++) {
		lft_block = ssa_db->p_lft_db->p_db_lft_block_tbl[i];
		ssa_log(SSA_LOG_DB, "LFT Block Record: LID %u Block num %u\n",
			ntohs(lft_block.lid), ntohs(lft_block.block_num));
	}

	for (i = 0;
	     i < cl_qmap_count(&ssa_db->p_lft_db->ep_db_lft_top_tbl); i++) {
		lft_top = ssa_db->p_lft_db->p_db_lft_top_tbl[i];
		ssa_log(SSA_LOG_DB, "LFT Top Record: LID %u New Top %u\n",
			ntohs(lft_top.lid), ntohs(lft_top.lft_top));
	}
}
Esempio n. 3
0
int osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
{
	osm_opensm_t *p_osm;
	struct osm_routing_engine *p_routing_eng;
	cl_qmap_t *p_sw_guid_tbl;

	OSM_LOG_ENTER(p_mgr->p_log);

	p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
	p_osm = p_mgr->p_subn->p_osm;
	p_routing_eng = p_osm->routing_engine_list;

	CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);

	/*
	   If there are no switches in the subnet, we are done.
	 */
	if (cl_qmap_count(p_sw_guid_tbl) == 0 ||
	    ucast_mgr_setup_all_switches(p_mgr->p_subn) < 0)
		goto Exit;

	p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
	while (p_routing_eng) {
		if (!ucast_mgr_route(p_routing_eng, p_osm))
			break;
		p_routing_eng = p_routing_eng->next;
	}

	if (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_NONE) {
		/* If configured routing algorithm failed, use default MinHop */
		osm_ucast_mgr_build_lid_matrices(p_mgr);
		ucast_mgr_build_lfts(p_mgr);
		p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_MINHOP;
	}

	OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
		"%s tables configured on all switches\n",
		osm_routing_engine_type_str(p_osm->routing_engine_used));

	if (p_mgr->p_subn->opt.use_ucast_cache)
		p_mgr->cache_valid = TRUE;

Exit:
	CL_PLOCK_RELEASE(p_mgr->p_lock);
	OSM_LOG_EXIT(p_mgr->p_log);
	return 0;
}
Esempio n. 4
0
/* Dump out the complete state of the event wheel */
void __cl_event_wheel_dump(IN cl_event_wheel_t * const p_event_wheel)
{
	cl_list_item_t *p_list_item;
	cl_map_item_t *p_map_item;
	cl_event_wheel_reg_info_t *p_event;

	printf("************** Event Wheel Dump ***********************\n");
	printf("Event Wheel List has %u items:\n",
	       cl_qlist_count(&p_event_wheel->events_wheel));

	p_list_item = cl_qlist_head(&p_event_wheel->events_wheel);
	while (p_list_item != cl_qlist_end(&p_event_wheel->events_wheel)) {
		p_event =
		    PARENT_STRUCT(p_list_item, cl_event_wheel_reg_info_t,
				  list_item);
		printf("Event key:0x%" PRIx64 " Context:%s NumRegs:%u\n",
		       p_event->key, (char *)p_event->context,
		       p_event->num_regs);

		/* next */
		p_list_item = cl_qlist_next(p_list_item);
	}

	printf("Event Map has %u items:\n",
	       cl_qmap_count(&p_event_wheel->events_map));

	p_map_item = cl_qmap_head(&p_event_wheel->events_map);
	while (p_map_item != cl_qmap_end(&p_event_wheel->events_map)) {
		p_event =
		    PARENT_STRUCT(p_map_item, cl_event_wheel_reg_info_t,
				  map_item);
		printf("Event key:0x%" PRIx64 " Context:%s NumRegs:%u\n",
		       p_event->key, (char *)p_event->context,
		       p_event->num_regs);

		/* next */
		p_map_item = cl_qmap_next(p_map_item);
	}

}
Esempio n. 5
0
/** ===========================================================================
 */
static void lft_block_handle(struct ssa_db_lft_change_rec *p_lft_change_rec)
{
	struct ep_map_rec *p_map_rec, *p_map_rec_old;
	uint64_t rec_num, key;
	uint16_t block_num;

	rec_num = cl_qmap_count(&ssa_db->p_lft_db->ep_dump_lft_block_tbl);
	if (rec_num % SSA_TABLE_BLOCK_SIZE == 0) {
		ssa_db->p_lft_db->p_dump_lft_block_tbl =
		    (struct smdb_lft_block *)
			realloc(&ssa_db->p_lft_db->p_dump_lft_block_tbl[0],
				(rec_num / SSA_TABLE_BLOCK_SIZE + 1) *
				 SSA_TABLE_BLOCK_SIZE *
				 sizeof(*ssa_db->p_lft_db->p_dump_lft_block_tbl));
	}

	block_num = p_lft_change_rec->lft_change.block_num;
	ssa_log(SSA_LOG_VERBOSE, "LFT change block event received "
				 "for LID %u Block %u\n",
				 ntohs(p_lft_change_rec->lid), block_num);

	key = ep_rec_gen_key(ntohs(p_lft_change_rec->lid), block_num);

	p_map_rec = ep_map_rec_init(rec_num);
	p_map_rec_old = (struct ep_map_rec *)
		cl_qmap_insert(&ssa_db->p_lft_db->ep_dump_lft_block_tbl,
			       key, &p_map_rec->map_item);
	if (p_map_rec != p_map_rec_old) {
		/* in case of a record with the same key already exist */
		rec_num = p_map_rec_old->offset;
		free(p_map_rec);
	}

	ssa_db->p_lft_db->p_dump_lft_block_tbl[rec_num].lid = p_lft_change_rec->lid;
	ssa_db->p_lft_db->p_dump_lft_block_tbl[rec_num].block_num = htons(block_num);

	memcpy(ssa_db->p_lft_db->p_dump_lft_block_tbl[rec_num].block,
	       p_lft_change_rec->block, IB_SMP_DATA_SIZE);
}
Esempio n. 6
0
static void sort_ports_by_switch_load(osm_ucast_mgr_t * m)
{
	int i, num = cl_qmap_count(&m->p_subn->sw_guid_tbl);
	void **s = malloc(num * sizeof(*s));
	if (!s) {
		OSM_LOG(m->p_log, OSM_LOG_ERROR, "ERR 3A0C: "
			"No memory, skip by switch load sorting.\n");
		return;
	}
	s[0] = cl_qmap_head(&m->p_subn->sw_guid_tbl);
	for (i = 1; i < num; i++)
		s[i] = cl_qmap_next(s[i - 1]);

	for (i = 0; i < num; i++)
		sw_count_endport_links(s[i]);

	qsort(s, num, sizeof(*s), compar_sw_load);

	for (i = 0; i < num; i++)
		add_sw_endports_to_order_list(s[i], m);
	free(s);
}
Esempio n. 7
0
/** ===========================================================================
 */
static void lft_top_handle(struct ssa_db_lft_change_rec *p_lft_change_rec)
{
	struct ep_map_rec *p_map_rec, *p_map_rec_old;
	uint64_t rec_num, key;

	rec_num = cl_qmap_count(&ssa_db->p_lft_db->ep_dump_lft_top_tbl);
	if (rec_num % SSA_TABLE_BLOCK_SIZE == 0) {
		ssa_db->p_lft_db->p_dump_lft_top_tbl =
		    (struct smdb_lft_top *)
			realloc(&ssa_db->p_lft_db->p_dump_lft_top_tbl[0],
				(rec_num / SSA_TABLE_BLOCK_SIZE + 1) *
				 SSA_TABLE_BLOCK_SIZE *
				 sizeof(*ssa_db->p_lft_db->p_dump_lft_top_tbl));
	}

	ssa_log(SSA_LOG_VERBOSE, "LFT change top event received "
				 "for LID %u New Top %u\n",
				 ntohs(p_lft_change_rec->lid),
				 p_lft_change_rec->lft_change.lft_top);

	key = (uint64_t) ntohs(p_lft_change_rec->lid);

	p_map_rec = ep_map_rec_init(rec_num);
	p_map_rec_old = (struct ep_map_rec *)
		cl_qmap_insert(&ssa_db->p_lft_db->ep_dump_lft_top_tbl,
			       key, &p_map_rec->map_item);
	if (p_map_rec != p_map_rec_old) {
		/* in case of a record with the same key already exist */
		rec_num = p_map_rec_old->offset;
		free(p_map_rec);
	}

	ssa_db->p_lft_db->p_dump_lft_top_tbl[rec_num].lid =
		p_lft_change_rec->lid;
	ssa_db->p_lft_db->p_dump_lft_top_tbl[rec_num].lft_top =
		htons(p_lft_change_rec->lft_change.lft_top);
}
Esempio n. 8
0
/**********************************************************************
 The plock must be held before calling this function.
**********************************************************************/
static void ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t * p_node,
			     const uint8_t port_num,
			     const osm_ni_context_t * p_ni_context)
{
	osm_node_t *p_neighbor_node;
	osm_physp_t *p_physp, *p_remote_physp;

	OSM_LOG_ENTER(sm->p_log);

	/*
	   A special case exists in which the node we're trying to
	   link is our own node.  In this case, the guid value in
	   the ni_context will be zero.
	 */
	if (p_ni_context->node_guid == 0) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"Nothing to link for our own node 0x%" PRIx64 "\n",
			cl_ntoh64(osm_node_get_node_guid(p_node)));
		goto _exit;
	}

	p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
					       p_ni_context->node_guid);
	if (PF(!p_neighbor_node)) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
			"Unexpected removal of neighbor node 0x%" PRIx64 "\n",
			cl_ntoh64(p_ni_context->node_guid));
		goto _exit;
	}

	/* When setting the link, ports on both
	   sides of the link should be initialized */
	CL_ASSERT(osm_node_link_has_valid_ports(p_node, port_num,
						p_neighbor_node,
						p_ni_context->port_num));

	if (osm_node_link_exists(p_node, port_num,
				 p_neighbor_node, p_ni_context->port_num)) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
		goto _exit;
	}

	p_physp = osm_node_get_physp_ptr(p_node, port_num);
	if (!p_physp) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0E: "
			"Failed to find physp for port %d of Node GUID 0x%"
			PRIx64 "\n", port_num,
			cl_ntoh64(osm_node_get_node_guid(p_node)));
		goto _exit;
	}

	/*
	 * If the link went UP, after we already discovered it, we shouldn't
	 * set the link between the ports and resweep.
	 */
	if (osm_physp_get_port_state(p_physp) == IB_LINK_DOWN &&
	    p_node->physp_discovered[port_num]) {
		/* Link down on another side. Don't create a link*/
		p_node->physp_discovered[port_num] = 0;
		sm->p_subn->force_heavy_sweep = TRUE;
		goto _exit;
	}

	if (osm_node_has_any_link(p_node, port_num) &&
	    sm->p_subn->force_heavy_sweep == FALSE &&
	    (!p_ni_context->dup_count ||
	     (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
	      p_ni_context->dup_port_num == port_num))) {
		/*
		   Uh oh...
		   This could be reconnected ports, but also duplicated GUID
		   (2 nodes have the same guid) or a 12x link with lane reversal
		   that is not configured correctly.
		   We will try to recover by querying NodeInfo again.
		   In order to catch even fast port moving to new location(s)
		   and back we will count up to 5.
		   Some crazy reconnections (newly created switch loop right
		   before targeted CA) will not be catched this way. So in worst
		   case - report GUID duplication and request new discovery.
		   When switch node is targeted NodeInfo querying will be done
		   in opposite order, this is much stronger check, unfortunately
		   it is impossible with CAs.
		 */
		p_physp = osm_node_get_physp_ptr(p_node, port_num);
		if (!p_physp) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0F: "
				"Failed to find physp for port %d of Node GUID 0x%"
				PRIx64 "\n", port_num,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			goto _exit;
		}

		if (p_ni_context->dup_count > 5) {
			report_duplicated_guid(sm, p_physp, p_neighbor_node,
					       p_ni_context->port_num);
			sm->p_subn->force_heavy_sweep = TRUE;
		} else if (p_node->sw)
			requery_dup_node_info(sm, p_physp->p_remote_physp,
					      p_ni_context->dup_count + 1);
		else
			requery_dup_node_info(sm, p_physp,
					      p_ni_context->dup_count + 1);
	}

	/*
	   When there are only two nodes with exact same guids (connected back
	   to back) - the previous check for duplicated guid will not catch
	   them. But the link will be from the port to itself...
	   Enhanced Port 0 is an exception to this
	 */
	if (osm_node_get_node_guid(p_node) == p_ni_context->node_guid &&
	    port_num == p_ni_context->port_num &&
	    port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
			"Duplicate GUID found by link from a port to itself:"
			"node 0x%" PRIx64 ", port number %u\n",
			cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
		p_physp = osm_node_get_physp_ptr(p_node, port_num);
		if (!p_physp) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1D: "
				"Failed to find physp for port %d of Node GUID 0x%"
				PRIx64 "\n", port_num,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			goto _exit;
		}

		osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
				    FILE_ID, OSM_LOG_VERBOSE);

		if (sm->p_subn->opt.exit_on_fatal == TRUE) {
			osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID,
				   "Errors on subnet. Duplicate GUID found "
				   "by link from a port to itself. "
				   "See verbose opensm.log for more details\n");
			exit(1);
		}
	}

	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
		"Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
		", port number %u and\n\t\t\t\tnode 0x%" PRIx64
		", port number %u\n",
		cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
		cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);

	if (sm->ucast_mgr.cache_valid)
		osm_ucast_cache_check_new_link(&sm->ucast_mgr, p_node, port_num,
					       p_neighbor_node,
					       p_ni_context->port_num);

	p_physp = osm_node_get_physp_ptr(p_node, port_num);
	p_remote_physp = osm_node_get_physp_ptr(p_neighbor_node,
						p_ni_context->port_num);
	if (!p_physp || !p_remote_physp)
		goto _exit;

	osm_node_link(p_node, port_num, p_neighbor_node, p_ni_context->port_num);

	osm_db_neighbor_set(sm->p_subn->p_neighbor,
			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
			    port_num,
			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
			    p_ni_context->port_num);
	osm_db_neighbor_set(sm->p_subn->p_neighbor,
			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
			    p_ni_context->port_num,
			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
			    port_num);

_exit:
	OSM_LOG_EXIT(sm->p_log);
}
Esempio n. 9
0
/** ===========================================================================
 */
static int
extract_alloc_tbls(osm_subn_t *p_subn, struct ssa_db_extract *p_ssa_db)
{
	const osm_pkey_tbl_t *p_pkey_tbl;
	osm_switch_t *p_sw;
	osm_port_t *p_port;
	uint64_t links, ports, lft_blocks;
	uint32_t guids, nodes, lft_tops;
	uint32_t switch_ports_num = 0;
	uint32_t pkey_cnt = 0;
	uint16_t lids;

	nodes = (uint32_t) cl_qmap_count(&p_subn->node_guid_tbl);
	if (!p_ssa_db->p_node_tbl) {
		p_ssa_db->p_node_tbl = (struct smdb_node *)
		    malloc(sizeof(*p_ssa_db->p_node_tbl) * nodes);
		if (!p_ssa_db->p_node_tbl) {
			ssa_log(SSA_LOG_DEFAULT,
				"ERROR - unable to allocate nodes table\n");
			goto err0;
		}
	}

	lft_tops = (uint32_t) cl_qmap_count(&p_subn->sw_guid_tbl);
	if (!ssa_db->p_lft_db->p_db_lft_top_tbl) {
		ssa_db->p_lft_db->p_db_lft_top_tbl = (struct smdb_lft_top *)
			malloc(sizeof(*ssa_db->p_lft_db->p_db_lft_top_tbl) *
			       lft_tops);
		if (!ssa_db->p_lft_db->p_db_lft_top_tbl) {
			ssa_log(SSA_LOG_DEFAULT,
				"ERROR - unable to allocate LFT tops table\n");
			goto err1;
		}
	}

	lids = (uint16_t) cl_ptr_vector_get_size(&p_subn->port_lid_tbl);

	lft_blocks = ((lids % IB_SMP_DATA_SIZE) ?
	    (lids / IB_SMP_DATA_SIZE + 1) : (lids / IB_SMP_DATA_SIZE));
	lft_blocks = (uint64_t) lft_tops * lft_blocks * (1 << p_ssa_db->lmc);
	if (!ssa_db->p_lft_db->p_db_lft_block_tbl) {
		ssa_db->p_lft_db->p_db_lft_block_tbl =
		    (struct smdb_lft_block *)
			malloc(sizeof(*ssa_db->p_lft_db->p_db_lft_block_tbl) *
			       lft_blocks);
		if (!ssa_db->p_lft_db->p_db_lft_block_tbl) {
			ssa_log(SSA_LOG_DEFAULT,
				"ERROR - unable to allocate LFT blocks table\n");
			goto err2;
		}
	}

	guids = (uint32_t) cl_qmap_count(&p_subn->port_guid_tbl);
	if (!p_ssa_db->p_guid_to_lid_tbl) {
		p_ssa_db->p_guid_to_lid_tbl = (struct smdb_guid2lid *)
				malloc(sizeof(*p_ssa_db->p_guid_to_lid_tbl) *
				       guids);
		if (!p_ssa_db->p_guid_to_lid_tbl) {
			ssa_log(SSA_LOG_DEFAULT,
				"ERROR - unable to allocate GUID to LID table\n");
			goto err3;
		}
	}

	for (p_sw = (osm_switch_t *)cl_qmap_head(&p_subn->sw_guid_tbl);
	     p_sw != (osm_switch_t *)cl_qmap_end(&p_subn->sw_guid_tbl);
	     p_sw = (osm_switch_t *)cl_qmap_next(&p_sw->map_item))
			switch_ports_num += p_sw->num_ports;

	links = guids + switch_ports_num;
	if (!p_ssa_db->p_link_tbl) {
		p_ssa_db->p_link_tbl = (struct smdb_link *)
				malloc(sizeof(*p_ssa_db->p_link_tbl) * links);
		if (!p_ssa_db->p_link_tbl) {
			ssa_log(SSA_LOG_DEFAULT,
				"ERROR - unable to allocate links table\n");
			goto err4;
		}
	}

	ports = links;
	if (!p_ssa_db->p_port_tbl) {
		p_ssa_db->p_port_tbl = (struct smdb_port *)
				malloc(sizeof(*p_ssa_db->p_port_tbl) * ports);
		if (!p_ssa_db->p_port_tbl) {
			ssa_log(SSA_LOG_DEFAULT,
				"ERROR - unable to allocate ports table\n");
			goto err5;
		}
	}

	for (p_port = (osm_port_t *)cl_qmap_head(&p_subn->port_guid_tbl);
	     p_port != (osm_port_t *)cl_qmap_end(&p_subn->port_guid_tbl);
	     p_port = (osm_port_t *)cl_qmap_next(&p_port->map_item)) {
		p_pkey_tbl = osm_physp_get_pkey_tbl(p_port->p_physp);
		pkey_cnt += (uint32_t)
		    cl_map_count((const cl_map_t *) &p_pkey_tbl->keys);
	}

	if (!p_ssa_db->p_pkey_tbl) {
		p_ssa_db->p_pkey_tbl = (uint16_t *)
		    malloc(sizeof(*p_ssa_db->p_pkey_tbl) * pkey_cnt);
		if (!p_ssa_db->p_pkey_tbl) {
			ssa_log(SSA_LOG_DEFAULT,
				"ERROR - unable to allocate pkeys table\n");
			goto err6;
		}
	}
	p_ssa_db->pkey_tbl_rec_num = pkey_cnt;

	return 0;

err6:
	free(p_ssa_db->p_port_tbl);
err5:
	free(p_ssa_db->p_link_tbl);
err4:
	free(p_ssa_db->p_guid_to_lid_tbl);
err3:
	free(ssa_db->p_lft_db->p_db_lft_block_tbl);
err2:
	free(ssa_db->p_lft_db->p_db_lft_top_tbl);
err1:
	free(p_ssa_db->p_node_tbl);
err0:
	return -1;
}
Esempio n. 10
0
/** ===========================================================================
 */
void ssa_db_validate(struct ssa_db_extract *p_ssa_db)
{
	struct smdb_guid2lid guid2lid;
	struct smdb_node node;
	struct smdb_link link;
	struct smdb_port port;
	uint64_t i;
	char buffer[64];

	if (!p_ssa_db || !p_ssa_db->initialized ||
	    !(ssa_get_log_level() & SSA_LOG_DB))
		return;

	ssa_log(SSA_LOG_DB, "[\n");

	/* First, most Fabric/SM related parameters */
	ssa_log(SSA_LOG_DB, "Subnet prefix 0x%" PRIx64 "\n",
		ntohll(p_ssa_db->subnet_prefix));
	ssa_log(SSA_LOG_DB,
		"LMC %u Subnet timeout %u Both Pkeys %sabled\n",
		p_ssa_db->lmc, p_ssa_db->subnet_timeout,
		p_ssa_db->allow_both_pkeys ? "en" : "dis");

	for (i = 0; i < cl_qmap_count(&p_ssa_db->ep_node_tbl); i++) {
		node = p_ssa_db->p_node_tbl[i];
		if (node.node_type == IB_NODE_TYPE_SWITCH)
			sprintf(buffer, " with %s Switch Port 0\n",
				node.is_enhanced_sp0 ? "Enhanced" : "Base");
		else
			sprintf(buffer, "\n");
		ssa_log(SSA_LOG_DB, "Node GUID 0x%" PRIx64 " Type %d%s",
			ntohll(node.node_guid), node.node_type, buffer);
	}

	for (i = 0; i < cl_qmap_count(&p_ssa_db->ep_guid_to_lid_tbl); i++) {
		guid2lid = p_ssa_db->p_guid_to_lid_tbl[i];
		ssa_log(SSA_LOG_DB,
			"Port GUID 0x%" PRIx64 " LID %u LMC %u is_switch %d\n",
			ntohll(guid2lid.guid), ntohs(guid2lid.lid),
			guid2lid.lmc, guid2lid.is_switch);

	}

	for (i = 0; i < cl_qmap_count(&p_ssa_db->ep_port_tbl); i++) {
		port = p_ssa_db->p_port_tbl[i];
		ssa_log(SSA_LOG_DB, "Port LID %u Port Num %u\n",
			ntohs(port.port_lid), port.port_num);
		ssa_log(SSA_LOG_DB, "MTUCapability %u rate %u\n",
			port.mtu_cap, port.rate & SSA_DB_PORT_RATE_MASK);
		ssa_log(SSA_LOG_DB, "FDR10 %s active\n",
			(port.rate & SSA_DB_PORT_IS_FDR10_ACTIVE_MASK)
			? "" : "not");
		ssa_log(SSA_LOG_DB, "PKeys %u\n",
			ntohs(port.pkey_tbl_size) /
			      sizeof(*p_ssa_db->p_pkey_tbl));
	}

	for (i = 0; i < cl_qmap_count(&p_ssa_db->ep_link_tbl); i++) {
		link = p_ssa_db->p_link_tbl[i];
		ssa_log(SSA_LOG_DB,
			"Link Record: from LID %u port %u to LID %u port %u\n",
			ntohs(link.from_lid), link.from_port_num,
			ntohs(link.to_lid), link.to_port_num);
	}

	ssa_log(SSA_LOG_DB, "]\n");
}
Esempio n. 11
0
static void do_sweep(osm_sm_t * sm)
{
	ib_api_status_t status;
	osm_remote_sm_t *p_remote_sm;
	unsigned config_parsed = 0;

	if (sm->p_subn->force_heavy_sweep) {
		if (osm_subn_rescan_conf_files(sm->p_subn) < 0)
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
				"osm_subn_rescan_conf_file failed\n");
		else
			config_parsed = 1;
	}

	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER &&
	    sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
		return;

	if (sm->p_subn->coming_out_of_standby)
		/*
		 * Need to force re-write of sm_base_lid to all ports
		 * to do that we want all the ports to be considered
		 * foreign
		 */
		state_mgr_clean_known_lids(sm);

	sm->master_sm_found = 0;

	/*
	 * If we already have switches, then try a light sweep.
	 * Otherwise, this is probably our first discovery pass
	 * or we are connected in loopback. In both cases do a
	 * heavy sweep.
	 * Note: If we are connected in loopback we want a heavy
	 * sweep, since we will not be getting any traps if there is
	 * a lost connection.
	 */
	/*  if we are in DISCOVERING state - this means it is either in
	 *  initializing or wake up from STANDBY - run the heavy sweep */
	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
	    && sm->p_subn->opt.force_heavy_sweep == FALSE
	    && sm->p_subn->force_heavy_sweep == FALSE
	    && sm->p_subn->force_reroute == FALSE
	    && sm->p_subn->subnet_initialization_error == FALSE
	    && (state_mgr_light_sweep_start(sm) == IB_SUCCESS)) {
		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;
		if (!sm->p_subn->force_heavy_sweep) {
			if (sm->p_subn->opt.sa_db_dump &&
			    !osm_sa_db_file_dump(sm->p_subn->p_osm))
				osm_opensm_report_event(sm->p_subn->p_osm,
					OSM_EVENT_ID_SA_DB_DUMPED, NULL);
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"LIGHT SWEEP COMPLETE");
			return;
		}
	}

	/*
	 * Unicast cache should be invalidated if there were errors
	 * during initialization or if subnet re-route is requested.
	 */
	if (sm->p_subn->opt.use_ucast_cache &&
	    (sm->p_subn->subnet_initialization_error ||
	     sm->p_subn->force_reroute || sm->p_subn->coming_out_of_standby))
		osm_ucast_cache_invalidate(&sm->ucast_mgr);

	/*
	 * If we don't need to do a heavy sweep and we want to do a reroute,
	 * just reroute only.
	 */
	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
	    && sm->p_subn->opt.force_heavy_sweep == FALSE
	    && sm->p_subn->force_heavy_sweep == FALSE
	    && sm->p_subn->force_reroute == TRUE
	    && sm->p_subn->subnet_initialization_error == FALSE) {
		/* Reset flag */
		sm->p_subn->force_reroute = FALSE;

		/* Re-program the switches fully */
		sm->p_subn->ignore_existing_lfts = TRUE;

		if (osm_ucast_mgr_process(&sm->ucast_mgr)) {
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"REROUTE FAILED");
			return;
		}
		osm_qos_setup(sm->p_subn->p_osm);

		/* Reset flag */
		sm->p_subn->ignore_existing_lfts = FALSE;

		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;

		if (!sm->p_subn->subnet_initialization_error) {
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"REROUTE COMPLETE");
			osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_UCAST_ROUTING_DONE, NULL);
			return;
		}
	}

	osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_HEAVY_SWEEP_START, NULL);

	/* go to heavy sweep */
repeat_discovery:

	/* First of all - unset all flags */
	sm->p_subn->force_heavy_sweep = FALSE;
	sm->p_subn->force_reroute = FALSE;
	sm->p_subn->subnet_initialization_error = FALSE;

	/* Reset tracking values in case limiting component got removed
	 * from fabric. */
	sm->p_subn->min_ca_mtu = IB_MAX_MTU;
	sm->p_subn->min_ca_rate = IB_MAX_RATE;
	sm->p_subn->min_data_vls = IB_MAX_NUM_VLS - 1;

	/* rescan configuration updates */
	if (!config_parsed && osm_subn_rescan_conf_files(sm->p_subn) < 0)
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
			"osm_subn_rescan_conf_file failed\n");

	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
		sm->p_subn->need_update = 1;

	status = state_mgr_sweep_hop_0(sm);
	if (status != IB_SUCCESS ||
	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	if (state_mgr_is_sm_port_down(sm) == TRUE) {
		if (sm->p_subn->last_sm_port_state) {
			sm->p_subn->last_sm_port_state = 0;
			osm_log(sm->p_log, OSM_LOG_SYS, "SM port is down\n");
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"SM PORT DOWN");
		}

		/* Run the drop manager - we want to clear all records */
		osm_drop_mgr_process(sm);

		/* Move to DISCOVERING state */
		 if (sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
			osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER);
		osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_STATE_CHANGE, NULL);
		return;
	} else {
		if (!sm->p_subn->last_sm_port_state) {
			sm->p_subn->last_sm_port_state = 1;
			osm_log(sm->p_log, OSM_LOG_SYS, "SM port is up\n");
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"SM PORT UP");
		}
	}

	status = state_mgr_sweep_hop_1(sm);
	if (status != IB_SUCCESS ||
	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* discovery completed - check other sm presence */
	if (sm->master_sm_found) {
		/*
		 * Call the sm_state_mgr with signal
		 * MASTER_OR_HIGHER_SM_DETECTED_DONE
		 */
		osm_sm_state_mgr_process(sm,
					 OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED);
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
				"ENTERING STANDBY STATE");
		/* notify master SM about us */
		osm_send_trap144(sm, 0);
		osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_STATE_CHANGE, NULL);
		return;
	}

	/* if new sweep requested - don't bother with the rest */
	if (sm->p_subn->force_heavy_sweep)
		goto repeat_discovery;

	osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_HEAVY_SWEEP_DONE, NULL);

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "HEAVY SWEEP COMPLETE");

	/* If we are MASTER - get the highest remote_sm, and
	 * see if it is higher than our local sm.
	 */
	if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) {
		p_remote_sm = state_mgr_get_highest_sm(sm);
		if (p_remote_sm != NULL) {
			/* report new ports (trap 64) before leaving MASTER */
			state_mgr_report_new_ports(sm);

			/* need to handover the mastership
			 * to the remote sm, and move to standby */
			state_mgr_send_handover(sm, p_remote_sm);
			osm_sm_state_mgr_process(sm,
						 OSM_SM_SIGNAL_HANDOVER_SENT);
			return;
		} else {
			/* We are the highest sm - check to see if there is
			 * a remote SM that is in master state. */
			p_remote_sm = state_mgr_exists_other_master_sm(sm);
			if (p_remote_sm != NULL) {
				/* There is a remote SM that is master.
				 * need to wait for that SM to relinquish control
				 * of its portion of the subnet. C14-60.2.1.
				 * Also - need to start polling on that SM. */
				sm->p_polling_sm = p_remote_sm;
				osm_sm_state_mgr_process(sm,
							 OSM_SM_SIGNAL_WAIT_FOR_HANDOVER);
				return;
			}
		}
	}

	/* Need to continue with lid assignment */
	osm_drop_mgr_process(sm);

	/*
	 * If we are not MASTER already - this means that we are
	 * in discovery state. call osm_sm_state_mgr with signal
	 * DISCOVERY_COMPLETED
	 */
	if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
		osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED);

	osm_pkey_mgr_process(sm->p_subn->p_osm);

	/* try to restore SA DB (this should be before lid_mgr
	   because we may want to disable clients reregistration
	   when SA DB is restored) */
	osm_sa_db_file_load(sm->p_subn->p_osm);

	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"PKEY and QOS setup completed - STARTING SM LID CONFIG");

	osm_lid_mgr_process_sm(&sm->lid_mgr);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG");
	state_mgr_notify_lid_change(sm);

	osm_lid_mgr_process_subnet(&sm->lid_mgr);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* At this point we need to check the consistency of
	 * the port_lid_tbl under the subnet. There might be
	 * errors in it if PortInfo Set requests didn't reach
	 * their destination. */
	state_mgr_check_tbl_consistency(sm);

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG");

	/*
	 * Proceed with unicast forwarding table configuration; if it fails
	 * return early to wait for a trap or the next sweep interval.
	 */

	if (!sm->ucast_mgr.cache_valid ||
	    osm_ucast_cache_process(&sm->ucast_mgr))
		if (osm_ucast_mgr_process(&sm->ucast_mgr))
			return;

	osm_qos_setup(sm->p_subn->p_osm);

	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* cleanup switch lft buffers */
	cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, cleanup_switch, sm->p_log);

	/* We are done setting all LFTs so clear the ignore existing.
	 * From now on, as long as we are still master, we want to
	 * take into account these lfts. */
	sm->p_subn->ignore_existing_lfts = FALSE;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"SWITCHES CONFIGURED FOR UNICAST");
	osm_opensm_report_event(sm->p_subn->p_osm,
			OSM_EVENT_ID_UCAST_ROUTING_DONE, NULL);

	if (!sm->p_subn->opt.disable_multicast) {
		osm_mcast_mgr_process(sm, TRUE);
		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
				"SWITCHES CONFIGURED FOR MULTICAST");
	}

	/*
	 * The LINK_PORTS state is required since we cannot count on
	 * the port state change MADs to succeed. This is an artifact
	 * of the spec defining state change from state X to state X
	 * as an error. The hardware then is not required to process
	 * other parameters provided by the Set(PortInfo) Packet.
	 */

	osm_link_mgr_process(sm, IB_LINK_NO_CHANGE);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE");

	osm_link_mgr_process(sm, IB_LINK_ARMED);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LINKS ARMED - SET LINKS TO ACTIVE STATE");

	osm_link_mgr_process(sm, IB_LINK_ACTIVE);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/*
	 * The sweep completed!
	 */

	/*
	 * Send trap 64 on newly discovered endports
	 */
	state_mgr_report_new_ports(sm);

	/* in any case we zero this flag */
	sm->p_subn->coming_out_of_standby = FALSE;

	/* If there were errors - then the subnet is not really up */
	if (sm->p_subn->subnet_initialization_error == TRUE) {
		osm_log(sm->p_log, OSM_LOG_SYS,
			"Errors during initialization\n");
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_ERROR,
				"ERRORS DURING INITIALIZATION");
	} else {
		sm->p_subn->need_update = 0;
		osm_dump_all(sm->p_subn->p_osm);
		state_mgr_up_msg(sm);
		sm->p_subn->first_time_master_sweep = FALSE;

		if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE) ||
		    sm->p_subn->opt.sa_db_dump)
			osm_sa_db_file_dump(sm->p_subn->p_osm);
	}

	/*
	 * Finally signal the subnet up event
	 */
	cl_event_signal(&sm->subnet_up_event);

	osm_opensm_report_event(sm->p_subn->p_osm, OSM_EVENT_ID_SUBNET_UP,
				NULL);

	/* if we got a signal to force heavy sweep or errors
	 * in the middle of the sweep - try another sweep. */
	if (sm->p_subn->force_heavy_sweep
	    || sm->p_subn->subnet_initialization_error)
		osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
}
Esempio n. 12
0
int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * p_mgr)
{
	uint32_t i;
	uint32_t iteration_max;
	cl_qmap_t *p_sw_guid_tbl;

	p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;

	OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
		"Starting switches' Min Hop Table Assignment\n");

	/*
	   Set up the weighting factors for the routing.
	 */
	cl_qmap_apply_func(p_sw_guid_tbl, set_default_hop_wf, NULL);
	if (p_mgr->p_subn->opt.hop_weights_file) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Fetching hop weight factor file \'%s\'\n",
			p_mgr->p_subn->opt.hop_weights_file);
		if (parse_node_map(p_mgr->p_subn->opt.hop_weights_file,
				   set_hop_wf, p_mgr)) {
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A05: "
				"cannot parse hop_weights_file \'%s\'\n",
				p_mgr->p_subn->opt.hop_weights_file);
		}
	}

	/*
	   Set the switch matrices for each switch's own port 0 LID(s)
	   then set the lid matrices for the each switch's leaf nodes.
	 */
	cl_qmap_apply_func(p_sw_guid_tbl, ucast_mgr_process_hop_0_1, p_mgr);

	/*
	   Get the switch matrices for each switch's neighbors.
	   This process requires a number of iterations equal to
	   the number of switches in the subnet minus 1.

	   In each iteration, a switch learns the lid/port/hop
	   information (as contained by a switch's lid matrix) from
	   its immediate neighbors.  After each iteration, a switch
	   (and it's neighbors) know more routing information than
	   it did on the previous iteration.
	   Thus, by repeatedly absorbing the routing information of
	   neighbor switches, every switch eventually learns how to
	   route all LIDs on the subnet.

	   Note that there may not be any switches in the subnet if
	   we are in simple p2p configuration.
	 */
	iteration_max = cl_qmap_count(p_sw_guid_tbl);

	/*
	   If there are switches in the subnet, iterate until the lid
	   matrix has been constructed.  Otherwise, just immediately
	   indicate we're done if no switches exist.
	 */
	if (iteration_max) {
		iteration_max--;

		/*
		   we need to find out when the propagation of
		   hop counts has relaxed. So this global variable
		   is preset to 0 on each iteration and if
		   if non of the switches was set will exit the
		   while loop
		 */
		p_mgr->some_hop_count_set = TRUE;
		for (i = 0; (i < iteration_max) && p_mgr->some_hop_count_set;
		     i++) {
			p_mgr->some_hop_count_set = FALSE;
			cl_qmap_apply_func(p_sw_guid_tbl,
					   ucast_mgr_process_neighbors, p_mgr);
		}
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Min-hop propagated in %d steps\n", i);
	}

	return 0;
}
static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr)
{
	cache_switch_t *p_cache_sw;
	cache_switch_t *p_remote_cache_sw;
	unsigned port_num;
	unsigned max_ports;
	uint8_t remote_node_type;
	uint16_t lid_ho;
	uint16_t remote_lid_ho;
	osm_switch_t *p_sw;
	osm_switch_t *p_remote_sw;
	osm_node_t *p_node;
	osm_physp_t *p_physp;
	osm_physp_t *p_remote_physp;
	osm_port_t *p_remote_port;
	cl_qmap_t *p_sw_tbl;

	OSM_LOG_ENTER(p_mgr->p_log);
	if (!p_mgr->cache_valid)
		goto Exit;

	/* If there are no switches in the subnet, we are done */
	p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
	if (cl_qmap_count(p_sw_tbl) == 0) {
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	/*
	 * Scan all the physical switch ports in the subnet.
	 * If the port need_update flag is on, check whether
	 * it's just some node/port reset or a cached topology
	 * change. Otherwise the cache is invalid.
	 */
	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {

		p_node = p_sw->p_node;

		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
		p_cache_sw = cache_get_sw(p_mgr, lid_ho);

		max_ports = osm_node_get_num_physp(p_node);

		/* skip port 0 */
		for (port_num = 1; port_num < max_ports; port_num++) {

			p_physp = osm_node_get_physp_ptr(p_node, port_num);

			if (!p_physp || !p_physp->p_remote_physp ||
			    !osm_physp_link_exists(p_physp,
						   p_physp->p_remote_physp))
				/* no valid link */
				continue;

			/*
			 * While scanning all the physical ports in the subnet,
			 * mark corresponding leaf switches in the cache.
			 */
			if (p_cache_sw &&
			    !p_cache_sw->dropped &&
			    !cache_sw_is_leaf(p_cache_sw) &&
			    p_physp->p_remote_physp->p_node &&
			    osm_node_get_type(p_physp->p_remote_physp->
					      p_node) != IB_NODE_TYPE_SWITCH)
				cache_sw_set_leaf(p_cache_sw);

			if (!p_physp->need_update)
				continue;

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Checking switch lid %u, port %u\n",
				lid_ho, port_num);

			p_remote_physp = osm_physp_get_remote(p_physp);
			remote_node_type =
			    osm_node_get_type(p_remote_physp->p_node);

			if (remote_node_type == IB_NODE_TYPE_SWITCH)
				remote_lid_ho =
				    cl_ntoh16(osm_node_get_base_lid
					      (p_remote_physp->p_node, 0));
			else
				remote_lid_ho =
				    cl_ntoh16(osm_node_get_base_lid
					      (p_remote_physp->p_node,
					       osm_physp_get_port_num
					       (p_remote_physp)));

			if (!p_cache_sw ||
			    port_num >= p_cache_sw->num_ports ||
			    !p_cache_sw->ports[port_num].remote_lid_ho) {
				/*
				 * There is some uncached change on the port.
				 * In general, the reasons might be as follows:
				 *  - switch reset
				 *  - port reset (or port down/up)
				 *  - quick connection location change
				 *  - new link (or new switch)
				 *
				 * First two reasons allow cache usage, while
				 * the last two reasons should invalidate cache.
				 *
				 * In case of quick connection location change,
				 * cache would have been invalidated by
				 * osm_ucast_cache_check_new_link() function.
				 *
				 * In case of new link between two known nodes,
				 * cache also would have been invalidated by
				 * osm_ucast_cache_check_new_link() function.
				 *
				 * Another reason is cached link between two
				 * known switches went back. In this case the
				 * osm_ucast_cache_check_new_link() function would
				 * clear both sides of the link from the cache
				 * during the discovery process, so effectively
				 * this would be equivalent to port reset.
				 *
				 * So three possible reasons remain:
				 *  - switch reset
				 *  - port reset (or port down/up)
				 *  - link of a new switch
				 *
				 * To validate cache, we need to check only the
				 * third reason - link of a new node/switch:
				 *  - If this is the local switch that is new,
				 *    then it should have (p_sw->need_update == 2).
				 *  - If the remote node is switch and it's new,
				 *    then it also should have
				 *    (p_sw->need_update == 2).
				 *  - If the remote node is CA/RTR and it's new,
				 *    then its port should have is_new flag on.
				 */
				if (p_sw->need_update == 2) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"New switch found (lid %u)\n",
						lid_ho);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				if (remote_node_type == IB_NODE_TYPE_SWITCH) {

					p_remote_sw =
					    p_remote_physp->p_node->sw;
					if (p_remote_sw->need_update == 2) {
						/* this could also be case of
						   switch coming back with an
						   additional link that it
						   didn't have before */
						OSM_LOG(p_mgr->p_log,
							OSM_LOG_DEBUG,
							"New switch/link found (lid %u)\n",
							remote_lid_ho);
						osm_ucast_cache_invalidate
						    (p_mgr);
						goto Exit;
					}
				} else {
					/*
					 * Remote node is CA/RTR.
					 * Get p_port of the remote node and
					 * check its p_port->is_new flag.
					 */
					p_remote_port =
					    osm_get_port_by_guid(p_mgr->p_subn,
								 osm_physp_get_port_guid
								 (p_remote_physp));
					if (p_remote_port->is_new) {
						OSM_LOG(p_mgr->p_log,
							OSM_LOG_DEBUG,
							"New CA/RTR found (lid %u)\n",
							remote_lid_ho);
						osm_ucast_cache_invalidate
						    (p_mgr);
						goto Exit;
					}
				}
			} else {
				/*
				 * The change on the port is cached.
				 * In general, the reasons might be as follows:
				 *  - link between two known nodes went back
				 *  - one or more nodes went back, causing all
				 *    the links to reappear
				 *
				 * If it was link that went back, then this case
				 * would have been taken care of during the
				 * discovery by osm_ucast_cache_check_new_link(),
				 * so it's some node that went back.
				 */
				if ((p_cache_sw->ports[port_num].is_leaf &&
				     remote_node_type == IB_NODE_TYPE_SWITCH) ||
				    (!p_cache_sw->ports[port_num].is_leaf &&
				     remote_node_type != IB_NODE_TYPE_SWITCH)) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"Remote node type change on switch lid %u, port %u\n",
						lid_ho, port_num);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				if (p_cache_sw->ports[port_num].remote_lid_ho !=
				    remote_lid_ho) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"Remote lid change on switch lid %u, port %u"
						"(was %u, now %u)\n",
						lid_ho, port_num,
						p_cache_sw->ports[port_num].
						remote_lid_ho, remote_lid_ho);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				/*
				 * We don't care who is the node that has
				 * reappeared in the subnet (local or remote).
				 * What's important that the cached link matches
				 * the real fabrics link.
				 * Just clean it from cache.
				 */

				p_cache_sw->ports[port_num].remote_lid_ho = 0;
				p_cache_sw->ports[port_num].is_leaf = FALSE;
				if (p_cache_sw->dropped) {
					cache_restore_ucast_info(p_mgr,
								 p_cache_sw,
								 p_sw);
					p_cache_sw->dropped = FALSE;
				}

				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Restored link from cache: lid %u, port %u to lid %u\n",
					lid_ho, port_num, remote_lid_ho);
			}
		}
	}

	/* Remove all the cached switches that
	   have all their ports restored */
	cache_cleanup_switches(p_mgr);

	/*
	 * Done scanning all the physical switch ports in the subnet.
	 * Now we need to check the other side:
	 * Scan all the cached switches and their ports:
	 *  - If the cached switch is missing in the subnet
	 *    (dropped flag is on), check that it's a leaf switch.
	 *    If it's not a leaf, the cache is invalid, because
	 *    cache can tolerate only leaf switch removal.
	 *  - If the cached switch exists in fabric, check all
	 *    its cached ports. These cached ports represent
	 *    missing link in the fabric.
	 *    The missing links that can be tolerated are:
	 *      + link to missing CA/RTR
	 *      + link to missing leaf switch
	 */
	for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
	     p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
	     p_cache_sw =
	     (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) {

		if (p_cache_sw->dropped) {
			if (!cache_sw_is_leaf(p_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Missing non-leaf switch (lid %u)\n",
					cache_sw_get_base_lid_ho(p_cache_sw));
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Missing leaf switch (lid %u) - "
				"continuing validation\n",
				cache_sw_get_base_lid_ho(p_cache_sw));
			continue;
		}

		for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) {
			if (!p_cache_sw->ports[port_num].remote_lid_ho)
				continue;

			if (p_cache_sw->ports[port_num].is_leaf) {
				CL_ASSERT(cache_sw_is_leaf(p_cache_sw));
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to CA/RTR - "
					"continuing validation\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				continue;
			}

			p_remote_cache_sw = cache_get_sw(p_mgr,
							 p_cache_sw->
							 ports[port_num].
							 remote_lid_ho);

			if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to existing switch\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			if (!cache_sw_is_leaf(p_remote_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to non-leaf switch\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			/*
			 * At this point we know that the missing link is to
			 * a leaf switch. However, one case deserves a special
			 * treatment. If there was a link between two leaf
			 * switches, then missing leaf switch might break
			 * routing. It is possible that there are routes
			 * that use leaf switches to get from switch to switch
			 * and not just to get to the CAs behind the leaf switch.
			 */
			if (cache_sw_is_leaf(p_cache_sw) &&
			    cache_sw_is_leaf(p_remote_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing leaf-2-leaf link\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Switch lid %u, port %u: missing remote leaf switch - "
				"continuing validation\n",
				cache_sw_get_base_lid_ho(p_cache_sw),
				port_num);
		}
	}

	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n");
	ucast_cache_dump(p_mgr);
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_validate() */
Esempio n. 14
0
/* Find Root nodes automatically by Min Hop Table info */
static void updn_find_root_nodes_by_min_hop(OUT updn_t * p_updn)
{
	osm_opensm_t *p_osm = p_updn->p_osm;
	osm_switch_t *p_sw;
	osm_port_t *p_port;
	osm_physp_t *p_physp;
	cl_map_item_t *item;
	double thd1, thd2;
	unsigned i, cas_num = 0;
	unsigned *cas_per_sw;
	uint16_t lid_ho;

	OSM_LOG_ENTER(&p_osm->log);

	OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
		"Current number of ports in the subnet is %d\n",
		cl_qmap_count(&p_osm->subn.port_guid_tbl));

	lid_ho = (uint16_t) cl_ptr_vector_get_size(&p_updn->p_osm->subn.port_lid_tbl) + 1;
	cas_per_sw = malloc(lid_ho * sizeof(*cas_per_sw));
	if (!cas_per_sw) {
		OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR AA14: "
			"cannot alloc mem for CAs per switch counter array\n");
		goto _exit;
	}
	memset(cas_per_sw, 0, lid_ho * sizeof(*cas_per_sw));

	/* Find the Maximum number of CAs (and routers) for histogram normalization */
	OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
		"Finding the number of CAs and storing them in cl_map\n");
	for (item = cl_qmap_head(&p_updn->p_osm->subn.port_guid_tbl);
	     item != cl_qmap_end(&p_updn->p_osm->subn.port_guid_tbl);
	     item = cl_qmap_next(item)) {
		p_port = (osm_port_t *)item;
		if (!p_port->p_node->sw) {
			p_physp = p_port->p_physp->p_remote_physp;
			if (!p_physp || !p_physp->p_node->sw)
				continue;
			lid_ho = osm_node_get_base_lid(p_physp->p_node, 0);
			lid_ho = cl_ntoh16(lid_ho);
			cas_per_sw[lid_ho]++;
			cas_num++;
		}
	}

	thd1 = cas_num * 0.9;
	thd2 = cas_num * 0.05;
	OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
		"Found %u CAs and RTRs, %u SWs in the subnet. "
		"Thresholds are thd1 = %f && thd2 = %f\n",
		cas_num, cl_qmap_count(&p_osm->subn.sw_guid_tbl), thd1, thd2);

	OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
		"Passing through all switches to collect Min Hop info\n");
	for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl);
	     item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl);
	     item = cl_qmap_next(item)) {
		unsigned hop_hist[IB_SUBNET_PATH_HOPS_MAX];
		uint16_t max_lid_ho;
		uint8_t hop_val;
		uint16_t numHopBarsOverThd1 = 0;
		uint16_t numHopBarsOverThd2 = 0;

		p_sw = (osm_switch_t *) item;

		memset(hop_hist, 0, sizeof(hop_hist));

		max_lid_ho = p_sw->max_lid_ho;
		for (lid_ho = 1; lid_ho <= max_lid_ho; lid_ho++)
			if (cas_per_sw[lid_ho]) {
				hop_val =
				    osm_switch_get_least_hops(p_sw, lid_ho);
				if (hop_val >= IB_SUBNET_PATH_HOPS_MAX)
					continue;

				hop_hist[hop_val] += cas_per_sw[lid_ho];
			}

		/* Now recognize the spines by requiring one bar to be
		   above 90% of the number of CAs and RTRs */
		for (i = 0; i < IB_SUBNET_PATH_HOPS_MAX; i++) {
			if (hop_hist[i] > thd1)
				numHopBarsOverThd1++;
			if (hop_hist[i] > thd2)
				numHopBarsOverThd2++;
		}

		/* If thd conditions are valid - rank the root node */
		if (numHopBarsOverThd1 == 1 && numHopBarsOverThd2 == 1) {
			OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
				"Ranking GUID 0x%" PRIx64 " as root node\n",
				cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)));
			((struct updn_node *)p_sw->priv)->rank = 0;
			p_updn->num_roots++;
		}
	}

	free(cas_per_sw);
_exit:
	OSM_LOG_EXIT(&p_osm->log);
	return;
}
Esempio n. 15
0
static int discover_network_properties(lash_t * p_lash)
{
	int i, id = 0;
	uint8_t vl_min;
	osm_subn_t *p_subn = &p_lash->p_osm->subn;
	osm_switch_t *p_next_sw, *p_sw;
	osm_log_t *p_log = &p_lash->p_osm->log;

	p_lash->num_switches = cl_qmap_count(&p_subn->sw_guid_tbl);

	p_lash->switches = calloc(p_lash->num_switches, sizeof(switch_t *));
	if (!p_lash->switches)
		return -1;

	vl_min = 5;		/* set to a high value */

	p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
	while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) {
		uint16_t port_count;
		p_sw = p_next_sw;
		p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);

		p_lash->switches[id] = switch_create(p_lash, id, p_sw);
		if (!p_lash->switches[id])
			return -1;
		id++;

		port_count = osm_node_get_num_physp(p_sw->p_node);

		/* Note, ignoring port 0. management port */
		for (i = 1; i < port_count; i++) {
			osm_physp_t *p_current_physp =
			    osm_node_get_physp_ptr(p_sw->p_node, i);

			if (p_current_physp
			    && p_current_physp->p_remote_physp) {

				ib_port_info_t *p_port_info =
				    &p_current_physp->port_info;
				uint8_t port_vl_min =
				    ib_port_info_get_op_vls(p_port_info);
				if (port_vl_min && port_vl_min < vl_min)
					vl_min = port_vl_min;
			}
		}		/* for */
	}			/* while */

	vl_min = 1 << (vl_min - 1);
	if (vl_min > 15)
		vl_min = 15;

	if (p_lash->p_osm->subn.opt.lash_start_vl >= vl_min) {
		OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D03: "
			"Start VL(%d) too high for min operational vl(%d)\n",
			p_lash->p_osm->subn.opt.lash_start_vl, vl_min);
		return -1;
	}

	p_lash->vl_min = vl_min - p_lash->p_osm->subn.opt.lash_start_vl;

	OSM_LOG(p_log, OSM_LOG_INFO,
		"min operational vl(%d) start vl(%d) max_switches(%d)\n",
		p_lash->vl_min, p_lash->p_osm->subn.opt.lash_start_vl,
		p_lash->num_switches);
	return 0;
}
Esempio n. 16
0
File: point.c Progetto: 01org/opa-ff
/* If possible compress a point into a simpler format
 * This looks for lists which consist of a single entry or
 * lists which include all the components of a higher level type
 */
void PointFabricCompress(Point *point)
{
	switch (point->Type) {
	case POINT_TYPE_NONE:
		break;
	case POINT_TYPE_PORT:
		break;
	case POINT_TYPE_PORT_LIST:
		{
		PortData *portp;
		LIST_ITERATOR head = ListHead(&point->u.portList);
		ASSERT(head);

		ASSERT(ListCount(&point->u.portList) >= 1);

		portp = (PortData*)ListObj(head);
		if (ListCount(&point->u.portList) == 1) {
			/* degenerate case, simplify as a single port */
			PointFabricDestroy(point);
			PointInitSimple(point, POINT_TYPE_PORT, portp);
		} else if (ListCount(&point->u.portList) == cl_qmap_count(&portp->nodep->Ports)) {
			/* maybe we can consolidate to a single node */
			LIST_ITERATOR i;
			DLIST *pList = &point->u.portList;

			for (i=ListHead(pList); portp && i != NULL; i = ListNext(pList, i)) {
				if (portp->nodep != ((PortData*)ListObj(i))->nodep)
					portp = NULL;	/* not in same node, flag for below */
			}
			if (portp) {
				/* degenerate case, simplify as a single node */
				PointFabricDestroy(point);
				PointInitSimple(point, POINT_TYPE_NODE, portp->nodep);
			}
#if 0
		} else {
			// the likelihood of this is low for port oriented searches
			// and it would present just the system image guide in the summary
			// and may be less obvious to the user than a list of ports
			/* maybe we can consolidate to a single system */
			LIST_ITERATOR i;
			DLIST *pList = &point->u.portList;

			for (i=ListHead(pList); portp && i != NULL; i = ListNext(pList, i)) {
				if (portp->nodep->systemp != ((PortData*)ListObj(i))->nodep->systemp)
					portp = NULL;	/* not in same system, flag for below */
			}
			if (portp) {
				/* all ports are in same system. is it a complete list? */
				/* count ports in the system */
				uint32 count = 0;
				cl_map_item_t *p;

				for (p=cl_qmap_head(&portp->nodep->systemp->Nodes); p != cl_qmap_end(&portp->nodep->systemp->Nodes); p = cl_qmap_next(p)) {
					NodeData *nodep = PARENT_STRUCT(p, NodeData, SystemNodesEntry);
					count += cl_qmap_count(&nodep->Ports);
				}
				if (ListCount(&point->u.portList) != count)
					portp = NULL;	/* incomplete list, flag for below */
			}
			if (portp) {
				/* degenerate case, simplify as a single system */
				PointFabricDestroy(point);
				PointInitSimple(point, POINT_TYPE_SYSTEM, portp->nodep->systemp);
			}
#endif
		}
		break;
		}
	case POINT_TYPE_NODE:
		break;
	case POINT_TYPE_NODE_LIST:
		{
		NodeData *nodep;
		LIST_ITERATOR head = ListHead(&point->u.nodeList);
		ASSERT(head);

		ASSERT(ListCount(&point->u.nodeList) >= 1);
		nodep = (NodeData*)ListObj(head);
		if (ListCount(&point->u.nodeList) == 1) {
			/* degenerate case, simplify as a single node */
			PointFabricDestroy(point);
			PointInitSimple(point, POINT_TYPE_NODE, nodep);
		} else if (ListCount(&point->u.nodeList) == cl_qmap_count(&nodep->systemp->Nodes)) {
			/* maybe we can consolidate to a single system */
			LIST_ITERATOR i;
			DLIST *pList = &point->u.nodeList;

			for (i=ListHead(pList); nodep && i != NULL; i = ListNext(pList, i)) {
				if (nodep->systemp != ((NodeData*)ListObj(i))->systemp)
					nodep = NULL;	/* not in same system, flag for below */
			}
			if (nodep) {
				/* degenerate case, simplify as a single system */
				PointFabricDestroy(point);
				PointInitSimple(point, POINT_TYPE_SYSTEM, nodep->systemp);
			}
		}
		break;
		}
#if !defined(VXWORKS) || defined(BUILD_DMC)
	case POINT_TYPE_IOC:
		break;
	case POINT_TYPE_IOC_LIST:
		{
		IocData *iocp;
		LIST_ITERATOR head = ListHead(&point->u.iocList);
		ASSERT(head);

		ASSERT(ListCount(&point->u.iocList) >= 1);
		iocp = (IocData*)ListObj(head);
		if (ListCount(&point->u.iocList) == 1) {
			/* degenerate case, simplify as a single IOC */
			PointFabricDestroy(point);
			PointInitSimple(point, POINT_TYPE_IOC, iocp);
		} else if (ListCount(&point->u.iocList) == QListCount(&iocp->ioup->Iocs)) {
			/* maybe we can consolidate to a single node */
			LIST_ITERATOR i;
			DLIST *pList = &point->u.iocList;

			for (i=ListHead(pList); iocp && i != NULL; i = ListNext(pList, i)) {
				if (iocp->ioup != ((IocData*)ListObj(i))->ioup)
					iocp = NULL;	/* not in same iou, flag for below */
			}
			if (iocp) {
				/* degenerate case, simplify as a single node */
				PointFabricDestroy(point);
				PointInitSimple(point, POINT_TYPE_NODE, iocp->ioup->nodep);
			}
#if 0
		} else {
			// the likelihood of this is low for ioc oriented searches
			// and it would present just the system image guide in the summary
			// and may be less obvious to the user than a list of iocs
			/* maybe we can consolidate to a single system */
			LIST_ITERATOR i;
			DLIST *pList = &point->u.iocList;

			for (i=ListHead(pList); iocp && i != NULL; i = ListNext(pList, i)) {
				if (iocp->ioup->nodep->systemp != ((IocData*)ListObj(i))->ioup->nodep->systemp)
					iocp = NULL;	/* not in same system, flag for below */
			}
			if (iocp) {
				/* all IOCs are in same system. is it a complete list? */
				/* count IOCs in the system */
				uint32 count = 0;
				cl_map_item_t *p;

				for (p=cl_qmap_head(&iocp->ioup->nodep->systemp->Nodes); p != cl_qmap_end(&iocp->ioup->nodep->systemp->Nodes); p = cl_qmap_next(p)) {
					NodeData *nodep = PARENT_STRUCT(p, NodeData, SystemNodesEntry);
					if (nodep->ioup)
						count += QListCount(&nodep->ioup->Iocs);
				}
				if (ListCount(&point->u.iocList) != count)
					iocp = NULL;	/* incomplete list, flag for below */
			}
			if (iocp) {
				/* degenerate case, simplify as a single system */
				PointFabricDestroy(point);
				PointInitSimple(point, POINT_TYPE_SYSTEM, iocp->ioup->nodep->systemp);
			}
#endif
		}
		break;
		}
#endif
	case POINT_TYPE_SYSTEM:
		break;
	}
}