void osm_nd_rcv_process(IN void *context, IN void *data)
{
    osm_sm_t *sm = context;
    osm_madw_t *p_madw = data;
    ib_node_desc_t *p_nd;
    ib_smp_t *p_smp;
    osm_node_t *p_node;
    ib_net64_t node_guid;

    CL_ASSERT(sm);

    OSM_LOG_ENTER(sm->p_log);

    CL_ASSERT(p_madw);

    p_smp = osm_madw_get_smp_ptr(p_madw);
    p_nd = ib_smp_get_payload_ptr(p_smp);

    /* Acquire the node object and add the node description. */
    node_guid = osm_madw_get_nd_context_ptr(p_madw)->node_guid;
    CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
    p_node = osm_get_node_by_guid(sm->p_subn, node_guid);
    if (!p_node)
        OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0B01: "
                "NodeDescription received for nonexistent node "
                "0x%" PRIx64 "\n", cl_ntoh64(node_guid));
    else
        nd_rcv_process_nd(sm, p_node, p_nd);

    CL_PLOCK_RELEASE(sm->p_lock);
    OSM_LOG_EXIT(sm->p_log);
}
Пример #2
0
static int mark_ignored_port(void *ctx, uint64_t guid, char *p)
{
	osm_ucast_mgr_t *m = ctx;
	osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid));
	osm_physp_t *physp;
	unsigned port;

	if (!node || !node->sw) {
		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
			"switch with guid 0x%016" PRIx64 " is not found\n",
			guid);
		return 0;
	}

	if (!p || !*p || !(port = strtoul(p, NULL, 0)) ||
	    port >= node->sw->num_ports) {
		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
			"bad port specified for guid 0x%016" PRIx64 "\n", guid);
		return 0;
	}

	physp = osm_node_get_physp_ptr(node, port);
	if (!physp)
		return 0;

	physp->is_prof_ignored = 1;

	return 0;
}
Пример #3
0
/**********************************************************************
 * Go over all the remote SMs (as updated in the sm_guid_tbl).
 * Find if there is a remote sm that is a master SM.
 * If there is a remote master SM - return a pointer to it,
 * else - return NULL.
 **********************************************************************/
static osm_remote_sm_t *state_mgr_exists_other_master_sm(IN osm_sm_t * sm)
{
	cl_qmap_t *p_sm_tbl;
	osm_remote_sm_t *p_sm;
	osm_remote_sm_t *p_sm_res = NULL;
	osm_node_t *p_node;

	OSM_LOG_ENTER(sm->p_log);

	p_sm_tbl = &sm->p_subn->sm_guid_tbl;

	/* go over all the remote SMs */
	for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl);
	     p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl);
	     p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) {
		/* If the sm is in MASTER state - return a pointer to it */
		p_node = osm_get_node_by_guid(sm->p_subn, p_sm->smi.guid);
		if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) {
			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
				"Found remote master SM with guid:0x%016" PRIx64
				" (node %s)\n", cl_ntoh64(p_sm->smi.guid),
				p_node ? p_node->print_desc : "UNKNOWN");
			p_sm_res = p_sm;
			goto Exit;
		}
	}

Exit:
	OSM_LOG_EXIT(sm->p_log);
	return p_sm_res;
}
Пример #4
0
/**********************************************************************
 * Go over all remote SMs (as updated in the sm_guid_tbl).
 * Find the one with the highest priority and lowest guid.
 * Compare this SM to the local SM. If the local SM is higher -
 * return NULL, if the remote SM is higher - return a pointer to it.
 **********************************************************************/
static osm_remote_sm_t *state_mgr_get_highest_sm(IN osm_sm_t * sm)
{
	cl_qmap_t *p_sm_tbl;
	osm_remote_sm_t *p_sm = NULL;
	osm_remote_sm_t *p_highest_sm;
	uint8_t highest_sm_priority;
	ib_net64_t highest_sm_guid;
	osm_node_t *p_node;

	OSM_LOG_ENTER(sm->p_log);

	p_sm_tbl = &sm->p_subn->sm_guid_tbl;

	/* Start with the local sm as the standard */
	p_highest_sm = NULL;
	highest_sm_priority = sm->p_subn->opt.sm_priority;
	highest_sm_guid = sm->p_subn->sm_port_guid;

	/* go over all the remote SMs */
	for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl);
	     p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl);
	     p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) {

		/* If the sm is in NOTACTIVE state - continue */
		if (ib_sminfo_get_state(&p_sm->smi) ==
		    IB_SMINFO_STATE_NOTACTIVE)
			continue;

		if (osm_sm_is_greater_than(ib_sminfo_get_priority(&p_sm->smi),
					   p_sm->smi.guid, highest_sm_priority,
					   highest_sm_guid)) {
			/* the new p_sm is with higher priority - update the highest_sm */
			/* to this sm */
			p_highest_sm = p_sm;
			highest_sm_priority =
			    ib_sminfo_get_priority(&p_sm->smi);
			highest_sm_guid = p_sm->smi.guid;
		}
	}

	if (p_highest_sm != NULL) {
		p_node = osm_get_node_by_guid(sm->p_subn, p_highest_sm->smi.guid);
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"Found higher SM with guid: %016" PRIx64 " (node %s)\n",
			cl_ntoh64(p_highest_sm->smi.guid),
			p_node ? p_node->print_desc : "UNKNOWN");
	}
	OSM_LOG_EXIT(sm->p_log);
	return p_highest_sm;
}
Пример #5
0
static int set_hop_wf(void *ctx, uint64_t guid, char *p)
{
	osm_ucast_mgr_t *m = ctx;
	osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid));
	osm_physp_t *physp;
	unsigned port, hop_wf;
	char *e;

	if (!node || !node->sw) {
		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
			"switch with guid 0x%016" PRIx64 " is not found\n",
			guid);
		return 0;
	}

	if (!p || !*p || !(port = strtoul(p, &e, 0)) || (p == e) ||
	    port >= node->sw->num_ports) {
		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
			"bad port specified for guid 0x%016" PRIx64 "\n", guid);
		return 0;
	}

	p = e + 1;

	if (!*p || !(hop_wf = strtoul(p, &e, 0)) || p == e || hop_wf >= 0x100) {
		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
			"bad hop weight factor specified for guid 0x%016" PRIx64
			"port %u\n", guid, port);
		return 0;
	}

	physp = osm_node_get_physp_ptr(node, port);
	if (!physp)
		return 0;

	physp->hop_wf = hop_wf;

	return 0;
}
Пример #6
0
void osm_ni_rcv_process(IN void *context, IN void *data)
{
	osm_sm_t *sm = context;
	osm_madw_t *p_madw = data;
	ib_node_info_t *p_ni;
	ib_smp_t *p_smp;
	osm_node_t *p_node;

	CL_ASSERT(sm);

	OSM_LOG_ENTER(sm->p_log);

	CL_ASSERT(p_madw);

	p_smp = osm_madw_get_smp_ptr(p_madw);
	p_ni = ib_smp_get_payload_ptr(p_smp);

	CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO);

	if (PF(p_ni->node_guid == 0)) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
			"Got Zero Node GUID! Found on the directed route:\n");
		osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
		goto Exit;
	}

	if (PF(p_ni->port_guid == 0)) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: "
			"Got Zero Port GUID! Found on the directed route:\n");
		osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
		goto Exit;
	}

	if (ib_smp_get_status(p_smp)) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"MAD status 0x%x received\n",
			cl_ntoh16(ib_smp_get_status(p_smp)));
		goto Exit;
	}

	/*
	   Determine if this node has already been discovered,
	   and process accordingly.
	   During processing of this node, hold the shared lock.
	 */

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
	p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid);

	osm_dump_node_info_v2(sm->p_log, p_ni, FILE_ID, OSM_LOG_DEBUG);

	if (!p_node)
		ni_rcv_process_new(sm, p_madw);
	else
		ni_rcv_process_existing(sm, p_node, p_madw);

	CL_PLOCK_RELEASE(sm->p_lock);

Exit:
	OSM_LOG_EXIT(sm->p_log);
}
Пример #7
0
/**********************************************************************
 The plock must be held before calling this function.
**********************************************************************/
static void ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t * p_node,
			     const uint8_t port_num,
			     const osm_ni_context_t * p_ni_context)
{
	osm_node_t *p_neighbor_node;
	osm_physp_t *p_physp, *p_remote_physp;

	OSM_LOG_ENTER(sm->p_log);

	/*
	   A special case exists in which the node we're trying to
	   link is our own node.  In this case, the guid value in
	   the ni_context will be zero.
	 */
	if (p_ni_context->node_guid == 0) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"Nothing to link for our own node 0x%" PRIx64 "\n",
			cl_ntoh64(osm_node_get_node_guid(p_node)));
		goto _exit;
	}

	p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
					       p_ni_context->node_guid);
	if (PF(!p_neighbor_node)) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
			"Unexpected removal of neighbor node 0x%" PRIx64 "\n",
			cl_ntoh64(p_ni_context->node_guid));
		goto _exit;
	}

	/* When setting the link, ports on both
	   sides of the link should be initialized */
	CL_ASSERT(osm_node_link_has_valid_ports(p_node, port_num,
						p_neighbor_node,
						p_ni_context->port_num));

	if (osm_node_link_exists(p_node, port_num,
				 p_neighbor_node, p_ni_context->port_num)) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
		goto _exit;
	}

	p_physp = osm_node_get_physp_ptr(p_node, port_num);
	if (!p_physp) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0E: "
			"Failed to find physp for port %d of Node GUID 0x%"
			PRIx64 "\n", port_num,
			cl_ntoh64(osm_node_get_node_guid(p_node)));
		goto _exit;
	}

	/*
	 * If the link went UP, after we already discovered it, we shouldn't
	 * set the link between the ports and resweep.
	 */
	if (osm_physp_get_port_state(p_physp) == IB_LINK_DOWN &&
	    p_node->physp_discovered[port_num]) {
		/* Link down on another side. Don't create a link*/
		p_node->physp_discovered[port_num] = 0;
		sm->p_subn->force_heavy_sweep = TRUE;
		goto _exit;
	}

	if (osm_node_has_any_link(p_node, port_num) &&
	    sm->p_subn->force_heavy_sweep == FALSE &&
	    (!p_ni_context->dup_count ||
	     (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
	      p_ni_context->dup_port_num == port_num))) {
		/*
		   Uh oh...
		   This could be reconnected ports, but also duplicated GUID
		   (2 nodes have the same guid) or a 12x link with lane reversal
		   that is not configured correctly.
		   We will try to recover by querying NodeInfo again.
		   In order to catch even fast port moving to new location(s)
		   and back we will count up to 5.
		   Some crazy reconnections (newly created switch loop right
		   before targeted CA) will not be catched this way. So in worst
		   case - report GUID duplication and request new discovery.
		   When switch node is targeted NodeInfo querying will be done
		   in opposite order, this is much stronger check, unfortunately
		   it is impossible with CAs.
		 */
		p_physp = osm_node_get_physp_ptr(p_node, port_num);
		if (!p_physp) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0F: "
				"Failed to find physp for port %d of Node GUID 0x%"
				PRIx64 "\n", port_num,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			goto _exit;
		}

		if (p_ni_context->dup_count > 5) {
			report_duplicated_guid(sm, p_physp, p_neighbor_node,
					       p_ni_context->port_num);
			sm->p_subn->force_heavy_sweep = TRUE;
		} else if (p_node->sw)
			requery_dup_node_info(sm, p_physp->p_remote_physp,
					      p_ni_context->dup_count + 1);
		else
			requery_dup_node_info(sm, p_physp,
					      p_ni_context->dup_count + 1);
	}

	/*
	   When there are only two nodes with exact same guids (connected back
	   to back) - the previous check for duplicated guid will not catch
	   them. But the link will be from the port to itself...
	   Enhanced Port 0 is an exception to this
	 */
	if (osm_node_get_node_guid(p_node) == p_ni_context->node_guid &&
	    port_num == p_ni_context->port_num &&
	    port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
			"Duplicate GUID found by link from a port to itself:"
			"node 0x%" PRIx64 ", port number %u\n",
			cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
		p_physp = osm_node_get_physp_ptr(p_node, port_num);
		if (!p_physp) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1D: "
				"Failed to find physp for port %d of Node GUID 0x%"
				PRIx64 "\n", port_num,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			goto _exit;
		}

		osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
				    FILE_ID, OSM_LOG_VERBOSE);

		if (sm->p_subn->opt.exit_on_fatal == TRUE) {
			osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID,
				   "Errors on subnet. Duplicate GUID found "
				   "by link from a port to itself. "
				   "See verbose opensm.log for more details\n");
			exit(1);
		}
	}

	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
		"Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
		", port number %u and\n\t\t\t\tnode 0x%" PRIx64
		", port number %u\n",
		cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
		cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);

	if (sm->ucast_mgr.cache_valid)
		osm_ucast_cache_check_new_link(&sm->ucast_mgr, p_node, port_num,
					       p_neighbor_node,
					       p_ni_context->port_num);

	p_physp = osm_node_get_physp_ptr(p_node, port_num);
	p_remote_physp = osm_node_get_physp_ptr(p_neighbor_node,
						p_ni_context->port_num);
	if (!p_physp || !p_remote_physp)
		goto _exit;

	osm_node_link(p_node, port_num, p_neighbor_node, p_ni_context->port_num);

	osm_db_neighbor_set(sm->p_subn->p_neighbor,
			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
			    port_num,
			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
			    p_ni_context->port_num);
	osm_db_neighbor_set(sm->p_subn->p_neighbor,
			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
			    p_ni_context->port_num,
			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
			    port_num);

_exit:
	OSM_LOG_EXIT(sm->p_log);
}
void osm_si_rcv_process(IN void *context, IN void *data)
{
	osm_sm_t *sm = context;
	osm_madw_t *p_madw = data;
	ib_switch_info_t *p_si;
	ib_smp_t *p_smp;
	osm_node_t *p_node;
	ib_net64_t node_guid;
	osm_si_context_t *p_context;

	CL_ASSERT(sm);

	OSM_LOG_ENTER(sm->p_log);

	CL_ASSERT(p_madw);

	p_smp = osm_madw_get_smp_ptr(p_madw);
	p_si = ib_smp_get_payload_ptr(p_smp);
	p_context = osm_madw_get_si_context_ptr(p_madw);
	node_guid = p_context->node_guid;

	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
		"Switch GUID 0x%016" PRIx64 ", TID 0x%" PRIx64 "\n",
		cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id));

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);

	p_node = osm_get_node_by_guid(sm->p_subn, node_guid);
	if (!p_node) {
		CL_PLOCK_RELEASE(sm->p_lock);
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3606: "
			"SwitchInfo received for nonexistent node "
			"with GUID 0x%" PRIx64 "\n", cl_ntoh64(node_guid));
		goto Exit;
	}

	/* Hack for bad value in Mellanox switch */
	if (cl_ntoh16(p_si->lin_top) > IB_LID_UCAST_END_HO) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3610: "
			"\n\t\t\t\tBad LinearFDBTop value = 0x%X "
			"on switch 0x%" PRIx64
			"\n\t\t\t\tForcing internal correction to 0x%X\n",
			cl_ntoh16(p_si->lin_top),
			cl_ntoh64(osm_node_get_node_guid(p_node)), 0);
		p_si->lin_top = 0;
	}

	/* Acquire the switch object for this switch. */
	if (!p_node->sw) {
		si_rcv_process_new(sm, p_node, p_madw);
		/* A new switch was found during the sweep so we need
		   to ignore the current LFT settings. */
		sm->p_subn->ignore_existing_lfts = TRUE;
	} else if (si_rcv_process_existing(sm, p_node, p_madw))
		/* we might get back a request for signaling change was detected */
		sm->p_subn->force_heavy_sweep = TRUE;

	CL_PLOCK_RELEASE(sm->p_lock);
Exit:
	OSM_LOG_EXIT(sm->p_log);
}
Пример #9
0
static int set_search_ordering_ports(void *ctx, uint64_t guid, char *p)
{
	osm_subn_t *p_subn = ctx;
	osm_node_t *node = osm_get_node_by_guid(p_subn, cl_hton64(guid));
	osm_switch_t *sw;
	uint8_t *search_ordering_ports = NULL;
	uint8_t port;
	unsigned int *ports = NULL;
	const int bpw = sizeof(*ports)*8;
	int words;
	int i = 1; /* port 0 maps to port 0 */

	if (!node || !(sw = node->sw)) {
		OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE,
			"switch with guid 0x%016" PRIx64 " is not found\n",
			guid);
		return 0;
	}

	if (sw->search_ordering_ports) {
		OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE,
			"switch with guid 0x%016" PRIx64 " already listed\n",
			guid);
		return 0;
	}

	search_ordering_ports = malloc(sizeof(*search_ordering_ports)*sw->num_ports);
	if (!search_ordering_ports) {
		OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR,
			"ERR 3A07: cannot allocate memory for search_ordering_ports\n");
		return -1;
	}
	memset(search_ordering_ports, 0, sizeof(*search_ordering_ports)*sw->num_ports);

	/* the ports array is for record keeping of which ports have
	 * been seen */
	words = (sw->num_ports + bpw - 1)/bpw;
	ports = malloc(words*sizeof(*ports));
	if (!ports) {
		OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR,
			"ERR 3A08: cannot allocate memory for ports\n");
		free(search_ordering_ports);
		return -1;
	}
	memset(ports, 0, words*sizeof(*ports));

	while ((*p != '\0') && (*p != '#')) {
		char *e;

		port = strtoul(p, &e, 0);
		if ((p == e) || (port == 0) || (port >= sw->num_ports) ||
		    !osm_node_get_physp_ptr(node, port)) {
			OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE,
				"bad port %d specified for guid 0x%016" PRIx64 "\n",
				port, guid);
			free(search_ordering_ports);
			free(ports);
			return 0;
		}

		if (ports[port/bpw] & (1u << (port%bpw))) {
			OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE,
				"port %d already specified for guid 0x%016" PRIx64 "\n",
				port, guid);
			free(search_ordering_ports);
			free(ports);
			return 0;
		}

		ports[port/bpw] |= (1u << (port%bpw));
		search_ordering_ports[i++] = port;

		p = e;
		while (isspace(*p)) {
			p++;
		}
	}

	if (i > 1) {
		for (port = 1; port < sw->num_ports; port++) {
			/* fill out the rest of the search_ordering_ports array
			 * in sequence using the remaining unspecified
			 * ports.
			 */
			if (!(ports[port/bpw] & (1u << (port%bpw)))) {
				search_ordering_ports[i++] = port;
			}
		}
		sw->search_ordering_ports = search_ordering_ports;
	} else {
		free(search_ordering_ports);
	}

	free(ports);
	return 0;
}