Пример #1
0
/**********************************************************************
 Returns true if the SM port is down.
 The SM's port object must exist in the port_guid table.
**********************************************************************/
static boolean_t state_mgr_is_sm_port_down(IN osm_sm_t * sm)
{
	ib_net64_t port_guid;
	osm_port_t *p_port;
	osm_physp_t *p_physp;
	uint8_t state;

	OSM_LOG_ENTER(sm->p_log);

	port_guid = sm->p_subn->sm_port_guid;

	/*
	 * If we don't know our own port guid yet, assume the port is down.
	 */
	if (port_guid == 0) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3308: "
			"SM port GUID unknown\n");
		state = IB_LINK_DOWN;
		goto Exit;
	}

	CL_ASSERT(port_guid);

	CL_PLOCK_ACQUIRE(sm->p_lock);
	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
	if (!p_port) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3309: "
			"SM port with GUID:%016" PRIx64 " is unknown\n",
			cl_ntoh64(port_guid));
		state = IB_LINK_DOWN;
		CL_PLOCK_RELEASE(sm->p_lock);
		goto Exit;
	}

	p_physp = p_port->p_physp;

	CL_ASSERT(p_physp);

	if (p_port->p_node->sw &&
	    !ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info))
		state = IB_LINK_ACTIVE;	/* base SP0 */
	else
		state = osm_physp_get_port_state(p_physp);
	CL_PLOCK_RELEASE(sm->p_lock);

Exit:
	OSM_LOG_EXIT(sm->p_log);
	return (state == IB_LINK_DOWN);
}
void osm_nd_rcv_process(IN void *context, IN void *data)
{
    osm_sm_t *sm = context;
    osm_madw_t *p_madw = data;
    ib_node_desc_t *p_nd;
    ib_smp_t *p_smp;
    osm_node_t *p_node;
    ib_net64_t node_guid;

    CL_ASSERT(sm);

    OSM_LOG_ENTER(sm->p_log);

    CL_ASSERT(p_madw);

    p_smp = osm_madw_get_smp_ptr(p_madw);
    p_nd = ib_smp_get_payload_ptr(p_smp);

    /* Acquire the node object and add the node description. */
    node_guid = osm_madw_get_nd_context_ptr(p_madw)->node_guid;
    CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
    p_node = osm_get_node_by_guid(sm->p_subn, node_guid);
    if (!p_node)
        OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0B01: "
                "NodeDescription received for nonexistent node "
                "0x%" PRIx64 "\n", cl_ntoh64(node_guid));
    else
        nd_rcv_process_nd(sm, p_node, p_nd);

    CL_PLOCK_RELEASE(sm->p_lock);
    OSM_LOG_EXIT(sm->p_log);
}
Пример #3
0
void osm_update_node_desc(IN osm_opensm_t *osm)
{
	CL_PLOCK_ACQUIRE(&osm->lock);
	cl_qmap_apply_func(&osm->subn.node_guid_tbl, state_mgr_update_node_desc,
			   &osm->sm);
	CL_PLOCK_RELEASE(&osm->lock);
}
Пример #4
0
osm_signal_t osm_link_mgr_process(osm_sm_t * sm, IN const uint8_t link_state)
{
	cl_qmap_t *p_node_guid_tbl;
	osm_node_t *p_node;
	osm_signal_t signal = OSM_SIGNAL_DONE;

	OSM_LOG_ENTER(sm->p_log);

	p_node_guid_tbl = &sm->p_subn->node_guid_tbl;

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);

	for (p_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
	     p_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl);
	     p_node = (osm_node_t *) cl_qmap_next(&p_node->map_item)) {
		if (__osm_link_mgr_process_node(sm, p_node, link_state) ==
		    OSM_SIGNAL_DONE_PENDING)
			signal = OSM_SIGNAL_DONE_PENDING;
	}

	CL_PLOCK_RELEASE(sm->p_lock);

	OSM_LOG_EXIT(sm->p_log);
	return (signal);
}
Пример #5
0
/**********************************************************************
 Initiates a thorough sweep of the subnet.
 Used when there is suspicion that something on the subnet has changed.
**********************************************************************/
static ib_api_status_t state_mgr_sweep_hop_0(IN osm_sm_t * sm)
{
	ib_api_status_t status;
	osm_dr_path_t dr_path;
	osm_bind_handle_t h_bind;
	uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];

	OSM_LOG_ENTER(sm->p_log);

	memset(path_array, 0, sizeof(path_array));

	/*
	 * First, get the bind handle.
	 */
	h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
	if (h_bind != OSM_BIND_INVALID_HANDLE) {
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
				"INITIATING HEAVY SWEEP");
		/*
		 * Start the sweep by clearing the port counts, then
		 * get our own NodeInfo at 0 hops.
		 */
		CL_PLOCK_ACQUIRE(sm->p_lock);

		cl_qmap_apply_func(&sm->p_subn->node_guid_tbl,
				   state_mgr_reset_node_count, sm);

		cl_qmap_apply_func(&sm->p_subn->port_guid_tbl,
				   state_mgr_reset_port_count, sm);

		cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl,
				   state_mgr_reset_switch_count, sm);

		/* Set the in_sweep_hop_0 flag in subn to be TRUE.
		 * This will indicate the sweeping not to continue beyond the
		 * the current node.
		 * This is relevant for the case of SM on switch, since in the
		 * switch info we need to signal somehow not to continue
		 * the sweeping. */
		sm->p_subn->in_sweep_hop_0 = TRUE;

		CL_PLOCK_RELEASE(sm->p_lock);

		osm_dr_path_init(&dr_path, h_bind, 0, path_array);
		status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0,
				     CL_DISP_MSGID_NONE, NULL);
		if (status != IB_SUCCESS)
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3305: "
				"Request for NodeInfo failed (%s)\n",
				ib_get_err_str(status));
	} else {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"No bound ports. Deferring sweep...\n");
		status = IB_INVALID_STATE;
	}

	OSM_LOG_EXIT(sm->p_log);
	return status;
}
Пример #6
0
int osm_ucast_mgr_process(IN osm_ucast_mgr_t * p_mgr)
{
	osm_opensm_t *p_osm;
	struct osm_routing_engine *p_routing_eng;
	cl_qmap_t *p_sw_guid_tbl;
	int failed = 0;

	OSM_LOG_ENTER(p_mgr->p_log);

	p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
	p_osm = p_mgr->p_subn->p_osm;
	p_routing_eng = p_osm->routing_engine_list;

	CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);

	/*
	   If there are no switches in the subnet, we are done.
	 */
	if (cl_qmap_count(p_sw_guid_tbl) == 0 ||
	    ucast_mgr_setup_all_switches(p_mgr->p_subn) < 0)
		goto Exit;

	failed = -1;
	p_osm->routing_engine_used = NULL;
	while (p_routing_eng) {
		failed = ucast_mgr_route(p_routing_eng, p_osm);
		if (!failed)
			break;
		p_routing_eng = p_routing_eng->next;
	}

	if (!p_osm->routing_engine_used &&
	    p_osm->no_fallback_routing_engine != TRUE) {
		/* If configured routing algorithm failed, use default MinHop */
		failed = ucast_mgr_route(p_osm->default_routing_engine, p_osm);
	}

	if (p_osm->routing_engine_used) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
			"%s tables configured on all switches\n",
			osm_routing_engine_type_str(p_osm->
						    routing_engine_used->type));

		if (p_mgr->p_subn->opt.use_ucast_cache)
			p_mgr->cache_valid = TRUE;
	} else {
		p_mgr->p_subn->subnet_initialization_error = TRUE;
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
			"No routing engine able to successfully configure "
			" switch tables on current fabric\n");
	}
Exit:
	CL_PLOCK_RELEASE(p_mgr->p_lock);
	OSM_LOG_EXIT(p_mgr->p_log);
	return failed;
}
Пример #7
0
void osm_lft_rcv_process(IN void *context, IN void *data)
{
	osm_sm_t *sm = context;
	osm_madw_t *p_madw = data;
	ib_smp_t *p_smp;
	uint32_t block_num;
	osm_switch_t *p_sw;
	osm_lft_context_t *p_lft_context;
	uint8_t *p_block;
	ib_net64_t node_guid;
	ib_api_status_t status;

	CL_ASSERT(sm);

	OSM_LOG_ENTER(sm->p_log);

	CL_ASSERT(p_madw);

	p_smp = osm_madw_get_smp_ptr(p_madw);
	p_block = ib_smp_get_payload_ptr(p_smp);
	block_num = cl_ntoh32(p_smp->attr_mod);

	/*
	   Acquire the switch object for this switch.
	 */
	p_lft_context = osm_madw_get_lft_context_ptr(p_madw);
	node_guid = p_lft_context->node_guid;

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
	p_sw = osm_get_switch_by_guid(sm->p_subn, node_guid);

	if (!p_sw) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0401: "
			"LFT received for nonexistent node "
			"0x%" PRIx64 "\n", cl_ntoh64(node_guid));
	} else {
		status = osm_switch_set_lft_block(p_sw, p_block, block_num);
		if (status != IB_SUCCESS) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0402: "
				"Setting forwarding table block failed (%s)"
				", Switch 0x%" PRIx64 " %s\n",
				ib_get_err_str(status), cl_ntoh64(node_guid),
				p_sw->p_node->print_desc);
		}
	}

	CL_PLOCK_RELEASE(sm->p_lock);
	OSM_LOG_EXIT(sm->p_log);
}
Пример #8
0
int osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
{
	osm_opensm_t *p_osm;
	struct osm_routing_engine *p_routing_eng;
	cl_qmap_t *p_sw_guid_tbl;

	OSM_LOG_ENTER(p_mgr->p_log);

	p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
	p_osm = p_mgr->p_subn->p_osm;
	p_routing_eng = p_osm->routing_engine_list;

	CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);

	/*
	   If there are no switches in the subnet, we are done.
	 */
	if (cl_qmap_count(p_sw_guid_tbl) == 0 ||
	    ucast_mgr_setup_all_switches(p_mgr->p_subn) < 0)
		goto Exit;

	p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
	while (p_routing_eng) {
		if (!ucast_mgr_route(p_routing_eng, p_osm))
			break;
		p_routing_eng = p_routing_eng->next;
	}

	if (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_NONE) {
		/* If configured routing algorithm failed, use default MinHop */
		osm_ucast_mgr_build_lid_matrices(p_mgr);
		ucast_mgr_build_lfts(p_mgr);
		p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_MINHOP;
	}

	OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
		"%s tables configured on all switches\n",
		osm_routing_engine_type_str(p_osm->routing_engine_used));

	if (p_mgr->p_subn->opt.use_ucast_cache)
		p_mgr->cache_valid = TRUE;

Exit:
	CL_PLOCK_RELEASE(p_mgr->p_lock);
	OSM_LOG_EXIT(p_mgr->p_log);
	return 0;
}
Пример #9
0
/**********************************************************************
 Clear out all existing port lid assignments
**********************************************************************/
static ib_api_status_t state_mgr_clean_known_lids(IN osm_sm_t * sm)
{
	ib_api_status_t status = IB_SUCCESS;
	cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl);
	uint32_t i;

	OSM_LOG_ENTER(sm->p_log);

	/* we need a lock here! */
	CL_PLOCK_ACQUIRE(sm->p_lock);

	for (i = 0; i < cl_ptr_vector_get_size(p_vec); i++)
		cl_ptr_vector_set(p_vec, i, NULL);

	CL_PLOCK_RELEASE(sm->p_lock);

	OSM_LOG_EXIT(sm->p_log);
	return status;
}
Пример #10
0
int osm_link_mgr_process(osm_sm_t * sm, IN const uint8_t link_state)
{
	cl_qmap_t *p_node_guid_tbl;
	osm_node_t *p_node;
	int ret = 0;

	OSM_LOG_ENTER(sm->p_log);

	p_node_guid_tbl = &sm->p_subn->node_guid_tbl;

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);

	for (p_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
	     p_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl);
	     p_node = (osm_node_t *) cl_qmap_next(&p_node->map_item))
		if (link_mgr_process_node(sm, p_node, link_state))
			ret = -1;

	CL_PLOCK_RELEASE(sm->p_lock);

	OSM_LOG_EXIT(sm->p_log);
	return ret;
}
Пример #11
0
void osm_ni_rcv_process(IN void *context, IN void *data)
{
	osm_sm_t *sm = context;
	osm_madw_t *p_madw = data;
	ib_node_info_t *p_ni;
	ib_smp_t *p_smp;
	osm_node_t *p_node;

	CL_ASSERT(sm);

	OSM_LOG_ENTER(sm->p_log);

	CL_ASSERT(p_madw);

	p_smp = osm_madw_get_smp_ptr(p_madw);
	p_ni = ib_smp_get_payload_ptr(p_smp);

	CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO);

	if (PF(p_ni->node_guid == 0)) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
			"Got Zero Node GUID! Found on the directed route:\n");
		osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
		goto Exit;
	}

	if (PF(p_ni->port_guid == 0)) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: "
			"Got Zero Port GUID! Found on the directed route:\n");
		osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
		goto Exit;
	}

	if (ib_smp_get_status(p_smp)) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"MAD status 0x%x received\n",
			cl_ntoh16(ib_smp_get_status(p_smp)));
		goto Exit;
	}

	/*
	   Determine if this node has already been discovered,
	   and process accordingly.
	   During processing of this node, hold the shared lock.
	 */

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
	p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid);

	osm_dump_node_info_v2(sm->p_log, p_ni, FILE_ID, OSM_LOG_DEBUG);

	if (!p_node)
		ni_rcv_process_new(sm, p_madw);
	else
		ni_rcv_process_existing(sm, p_node, p_madw);

	CL_PLOCK_RELEASE(sm->p_lock);

Exit:
	OSM_LOG_EXIT(sm->p_log);
}
Пример #12
0
void osm_si_rcv_process(IN void *context, IN void *data)
{
	osm_sm_t *sm = context;
	osm_madw_t *p_madw = data;
	ib_switch_info_t *p_si;
	ib_smp_t *p_smp;
	osm_node_t *p_node;
	ib_net64_t node_guid;
	osm_si_context_t *p_context;

	CL_ASSERT(sm);

	OSM_LOG_ENTER(sm->p_log);

	CL_ASSERT(p_madw);

	p_smp = osm_madw_get_smp_ptr(p_madw);
	p_si = ib_smp_get_payload_ptr(p_smp);
	p_context = osm_madw_get_si_context_ptr(p_madw);
	node_guid = p_context->node_guid;

	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
		"Switch GUID 0x%016" PRIx64 ", TID 0x%" PRIx64 "\n",
		cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id));

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);

	p_node = osm_get_node_by_guid(sm->p_subn, node_guid);
	if (!p_node) {
		CL_PLOCK_RELEASE(sm->p_lock);
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3606: "
			"SwitchInfo received for nonexistent node "
			"with GUID 0x%" PRIx64 "\n", cl_ntoh64(node_guid));
		goto Exit;
	}

	/* Hack for bad value in Mellanox switch */
	if (cl_ntoh16(p_si->lin_top) > IB_LID_UCAST_END_HO) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3610: "
			"\n\t\t\t\tBad LinearFDBTop value = 0x%X "
			"on switch 0x%" PRIx64
			"\n\t\t\t\tForcing internal correction to 0x%X\n",
			cl_ntoh16(p_si->lin_top),
			cl_ntoh64(osm_node_get_node_guid(p_node)), 0);
		p_si->lin_top = 0;
	}

	/* Acquire the switch object for this switch. */
	if (!p_node->sw) {
		si_rcv_process_new(sm, p_node, p_madw);
		/* A new switch was found during the sweep so we need
		   to ignore the current LFT settings. */
		sm->p_subn->ignore_existing_lfts = TRUE;
	} else if (si_rcv_process_existing(sm, p_node, p_madw))
		/* we might get back a request for signaling change was detected */
		sm->p_subn->force_heavy_sweep = TRUE;

	CL_PLOCK_RELEASE(sm->p_lock);
Exit:
	OSM_LOG_EXIT(sm->p_log);
}
Пример #13
0
/**********************************************************************
 Sweeps the node 1 hop away.
 This sets off a "chain reaction" that causes discovery of the subnet.
 Used when there is suspicion that something on the subnet has changed.
**********************************************************************/
static ib_api_status_t state_mgr_sweep_hop_1(IN osm_sm_t * sm)
{
	ib_api_status_t status = IB_SUCCESS;
	osm_madw_context_t context;
	osm_node_t *p_node;
	osm_port_t *p_port;
	osm_dr_path_t hop_1_path;
	ib_net64_t port_guid;
	uint8_t port_num;
	uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
	uint8_t num_ports;
	osm_physp_t *p_ext_physp;

	OSM_LOG_ENTER(sm->p_log);

	/*
	 * First, get our own port and node objects.
	 */
	port_guid = sm->p_subn->sm_port_guid;

	CL_ASSERT(port_guid);

	/* Set the in_sweep_hop_0 flag in subn to be FALSE.
	 * This will indicate the sweeping to continue beyond the
	 * the current node.
	 * This is relevant for the case of SM on switch, since in the
	 * switch info we need to signal that the sweeping should
	 * continue through the switch. */
	sm->p_subn->in_sweep_hop_0 = FALSE;

	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
	if (!p_port) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3310: "
			"No SM port object\n");
		status = IB_ERROR;
		goto Exit;
	}

	p_node = p_port->p_node;
	CL_ASSERT(p_node);

	port_num = ib_node_info_get_local_port_num(&p_node->node_info);

	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
		"Probing hop 1 on local port %u\n", port_num);

	memset(path_array, 0, sizeof(path_array));
	/* the hop_1 operations depend on the type of our node.
	 * Currently - legal nodes that can host SM are SW and CA */
	switch (osm_node_get_type(p_node)) {
	case IB_NODE_TYPE_CA:
	case IB_NODE_TYPE_ROUTER:
		memset(&context, 0, sizeof(context));
		context.ni_context.node_guid = osm_node_get_node_guid(p_node);
		context.ni_context.port_num = port_num;

		path_array[1] = port_num;

		osm_dr_path_init(&hop_1_path, 1, path_array);
		CL_PLOCK_ACQUIRE(sm->p_lock);
		status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0,
				     CL_DISP_MSGID_NONE, &context);
		CL_PLOCK_RELEASE(sm->p_lock);
		if (status != IB_SUCCESS)
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3311: "
				"Request for NodeInfo failed (%s)\n",
				ib_get_err_str(status));
		break;

	case IB_NODE_TYPE_SWITCH:
		/* Need to go over all the ports of the switch, and send a
		 * node_info from them. This doesn't include the port 0 of the
		 * switch, which hosts the SM.
		 * Note: We'll send another switchInfo on port 0, since if no
		 * ports are connected, we still want to get some response, and
		 * have the subnet come up.
		 */
		num_ports = osm_node_get_num_physp(p_node);
		for (port_num = 1; port_num < num_ports; port_num++) {
			/* go through the port only if the port is not DOWN */
			p_ext_physp = osm_node_get_physp_ptr(p_node, port_num);
			if (p_ext_physp && ib_port_info_get_port_state
			    (&(p_ext_physp->port_info)) > IB_LINK_DOWN) {
				memset(&context, 0, sizeof(context));
				context.ni_context.node_guid =
				    osm_node_get_node_guid(p_node);
				context.ni_context.port_num = port_num;

				path_array[1] = port_num;
				osm_dr_path_init(&hop_1_path, 1, path_array);
				CL_PLOCK_ACQUIRE(sm->p_lock);
				status = osm_req_get(sm, &hop_1_path,
						     IB_MAD_ATTR_NODE_INFO, 0,
						     CL_DISP_MSGID_NONE,
						     &context);
				CL_PLOCK_RELEASE(sm->p_lock);
				if (status != IB_SUCCESS)
					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
						"ERR 3312: "
						"Request for NodeInfo failed (%s)\n",
						ib_get_err_str(status));
			}
		}
		break;

	default:
		OSM_LOG(sm->p_log, OSM_LOG_ERROR,
			"ERR 3313: Unknown node type %d (%s)\n",
			osm_node_get_type(p_node), p_node->print_desc);
	}

Exit:
	OSM_LOG_EXIT(sm->p_log);
	return status;
}
Пример #14
0
/**********************************************************************
 * Send Trap 64 on all new ports.
 **********************************************************************/
static void state_mgr_report_new_ports(IN osm_sm_t * sm)
{
	ib_gid_t port_gid;
	ib_mad_notice_attr_t notice;
	ib_api_status_t status;
	ib_net64_t port_guid;
	cl_map_item_t *p_next;
	osm_port_t *p_port;
	uint16_t min_lid_ho;
	uint16_t max_lid_ho;

	OSM_LOG_ENTER(sm->p_log);

	CL_PLOCK_ACQUIRE(sm->p_lock);
	p_next = cl_qmap_head(&sm->p_subn->port_guid_tbl);
	while (p_next != cl_qmap_end(&sm->p_subn->port_guid_tbl)) {
		p_port = (osm_port_t *) p_next;
		p_next = cl_qmap_next(p_next);

		if (!p_port->is_new)
			continue;

		port_guid = osm_port_get_guid(p_port);
		/* issue a notice - trap 64 */

		/* details of the notice */
		notice.generic_type = 0x83;	/* is generic subn mgt type */
		ib_notice_set_prod_type_ho(&notice, 4);	/* A Class Manager generator */
		/* endport becomes reachable */
		notice.g_or_v.generic.trap_num = CL_HTON16(64);
		/* The sm_base_lid is saved in network order already. */
		notice.issuer_lid = sm->p_subn->sm_base_lid;
		/* following C14-72.1.1 and table 119 p739 */
		/* we need to provide the GID */
		port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
		port_gid.unicast.interface_id = port_guid;
		memcpy(&(notice.data_details.ntc_64_67.gid), &(port_gid),
		       sizeof(ib_gid_t));

		/* According to page 653 - the issuer gid in this case of trap
		 * is the SM gid, since the SM is the initiator of this trap. */
		notice.issuer_gid.unicast.prefix =
		    sm->p_subn->opt.subnet_prefix;
		notice.issuer_gid.unicast.interface_id =
		    sm->p_subn->sm_port_guid;

		status = osm_report_notice(sm->p_log, sm->p_subn, &notice);
		if (status != IB_SUCCESS)
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3318: "
				"Error sending trap reports on GUID:0x%016"
				PRIx64 " (%s)\n", port_gid.unicast.interface_id,
				ib_get_err_str(status));
		osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
		OSM_LOG(sm->p_log, OSM_LOG_INFO,
			"Discovered new port with GUID:0x%016" PRIx64
			" LID range [%u,%u] of node: %s\n",
			cl_ntoh64(port_gid.unicast.interface_id),
			min_lid_ho, max_lid_ho,
			p_port->p_node ? p_port->p_node->
			print_desc : "UNKNOWN");

		p_port->is_new = 0;
	}
	CL_PLOCK_RELEASE(sm->p_lock);

	OSM_LOG_EXIT(sm->p_log);
}
Пример #15
0
/**********************************************************************
 Initiates a lightweight sweep of the subnet.
 Used during normal sweeps after the subnet is up.
**********************************************************************/
static ib_api_status_t state_mgr_light_sweep_start(IN osm_sm_t * sm)
{
	ib_api_status_t status = IB_SUCCESS;
	osm_bind_handle_t h_bind;
	cl_qmap_t *p_sw_tbl;
	cl_map_item_t *p_next;
	osm_node_t *p_node;
	osm_physp_t *p_physp;
	uint8_t port_num;

	OSM_LOG_ENTER(sm->p_log);

	p_sw_tbl = &sm->p_subn->sw_guid_tbl;

	/*
	 * First, get the bind handle.
	 */
	h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
	if (h_bind == OSM_BIND_INVALID_HANDLE) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"No bound ports. Deferring sweep...\n");
		status = IB_INVALID_STATE;
		goto _exit;
	}

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "INITIATING LIGHT SWEEP");
	CL_PLOCK_ACQUIRE(sm->p_lock);
	cl_qmap_apply_func(p_sw_tbl, state_mgr_get_sw_info, sm);
	CL_PLOCK_RELEASE(sm->p_lock);

	CL_PLOCK_ACQUIRE(sm->p_lock);
	cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, state_mgr_get_node_desc,
			   sm);
	CL_PLOCK_RELEASE(sm->p_lock);

	/* now scan the list of physical ports that were not down but have no remote port */
	CL_PLOCK_ACQUIRE(sm->p_lock);
	p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl);
	while (p_next != cl_qmap_end(&sm->p_subn->node_guid_tbl)) {
		p_node = (osm_node_t *) p_next;
		p_next = cl_qmap_next(p_next);

		for (port_num = 1; port_num < osm_node_get_num_physp(p_node);
		     port_num++) {
			p_physp = osm_node_get_physp_ptr(p_node, port_num);
			if (p_physp && (osm_physp_get_port_state(p_physp) !=
					IB_LINK_DOWN)
			    && !osm_physp_get_remote(p_physp)) {
				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3315: "
					"Unknown remote side for node 0x%016"
					PRIx64
					" (%s) port %u. Adding to light sweep sampling list\n",
					cl_ntoh64(osm_node_get_node_guid
						  (p_node)),
					p_node->print_desc, port_num);

				osm_dump_dr_path(sm->p_log,
						 osm_physp_get_dr_path_ptr
						 (p_physp), OSM_LOG_ERROR);

				state_mgr_get_remote_port_info(sm, p_physp);
			}
		}
	}

	cl_qmap_apply_func(&sm->p_subn->sm_guid_tbl, query_sm_info, sm);

	CL_PLOCK_RELEASE(sm->p_lock);

_exit:
	OSM_LOG_EXIT(sm->p_log);
	return status;
}
Пример #16
0
/**********************************************************************
 * Send SubnSet(SMInfo) SMP with HANDOVER attribute to the
 * remote_sm indicated.
 **********************************************************************/
static void state_mgr_send_handover(IN osm_sm_t * sm, IN osm_remote_sm_t * p_sm)
{
	uint8_t payload[IB_SMP_DATA_SIZE];
	ib_sm_info_t *p_smi = (ib_sm_info_t *) payload;
	osm_madw_context_t context;
	const osm_port_t *p_port;
	ib_api_status_t status;

	OSM_LOG_ENTER(sm->p_log);

	/*
	 * Send a query of SubnSet(SMInfo) HANDOVER to the remote sm given.
	 */

	memset(&context, 0, sizeof(context));
	p_port = osm_get_port_by_guid(sm->p_subn, p_sm->smi.guid);
	if (p_port == NULL) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3316: "
			"No port object on given remote_sm object\n");
		goto Exit;
	}

	/* update the master_guid in the sm_state_mgr object according to */
	/* the guid of the port where the new Master SM should reside. */
	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
		"Handing over mastership. Updating sm_state_mgr master_guid: %016"
		PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid),
		p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN");
	sm->master_sm_guid = p_port->guid;

	context.smi_context.port_guid = p_port->guid;
	context.smi_context.set_method = TRUE;

	p_smi->guid = sm->p_subn->sm_port_guid;
	p_smi->act_count = cl_hton32(sm->p_subn->p_osm->stats.qp0_mads_sent);
	p_smi->pri_state = (uint8_t) (sm->p_subn->sm_state |
				      sm->p_subn->opt.sm_priority << 4);
	/*
	 * Return 0 for the SM key unless we authenticate the requester
	 * as the master SM.
	 */
	if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"Responding to master SM with real sm_key\n");
		p_smi->sm_key = sm->p_subn->opt.sm_key;
	} else {
		/* The requester is not authenticated as master - set sm_key to zero */
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"Responding to SM not master with zero sm_key\n");
		p_smi->sm_key = 0;
	}

	CL_PLOCK_ACQUIRE(sm->p_lock);
	status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_port->p_physp),
			     payload, sizeof(payload), IB_MAD_ATTR_SM_INFO,
			     IB_SMINFO_ATTR_MOD_HANDOVER, CL_DISP_MSGID_NONE,
			     &context);
	CL_PLOCK_RELEASE(sm->p_lock);

	if (status != IB_SUCCESS)
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3317: "
			"Failure requesting SMInfo (%s)\n",
			ib_get_err_str(status));

Exit:
	OSM_LOG_EXIT(sm->p_log);
}
Пример #17
0
void osm_pi_rcv_process(IN void *context, IN void *data)
{
	osm_sm_t *sm = context;
	osm_madw_t *p_madw = data;
	ib_port_info_t *p_pi;
	ib_smp_t *p_smp;
	osm_port_t *p_port;
	osm_physp_t *p_physp;
	osm_dr_path_t *p_dr_path;
	osm_node_t *p_node;
	osm_pi_context_t *p_context;
	ib_net64_t port_guid, node_guid;
	uint8_t port_num;

	CL_ASSERT(sm);

	OSM_LOG_ENTER(sm->p_log);

	CL_ASSERT(p_madw);

	p_smp = osm_madw_get_smp_ptr(p_madw);
	p_context = osm_madw_get_pi_context_ptr(p_madw);
	p_pi = ib_smp_get_payload_ptr(p_smp);

	CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_PORT_INFO);

	/*
	 * Attribute modifier has already been validated upon MAD receive,
	 * which means that port_num has to be valid - it originated from
	 * the request attribute modifier.
	 */
	port_num = (uint8_t) cl_ntoh32(p_smp->attr_mod);

	port_guid = p_context->port_guid;
	node_guid = p_context->node_guid;

	osm_dump_port_info_v2(sm->p_log, node_guid, port_guid, port_num, p_pi,
			      FILE_ID, OSM_LOG_DEBUG);

	/* On receipt of client reregister, clear the reregister bit so
	   reregistering won't be sent again and again */
	if (p_context->set_method &&
	    (ib_port_info_get_client_rereg(p_pi) || p_context->client_rereg)) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"Client reregister received on response\n");
		ib_port_info_set_client_rereg(p_pi, 0);
		p_context->client_rereg = FALSE;
	}

	/*
	   we might get a response during a light sweep looking for a change in
	   the status of a remote port that did not respond in earlier sweeps.
	   So if the context of the Get was light_sweep - we do not need to
	   do anything with the response - just flag that we need a heavy sweep
	 */
	if (p_context->light_sweep == TRUE) {
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
			"Got light sweep response from remote port of parent node "
			"GUID 0x%" PRIx64 " port 0x%016" PRIx64
			", Commencing heavy sweep\n",
			cl_ntoh64(node_guid), cl_ntoh64(port_guid));
		sm->p_subn->force_heavy_sweep = TRUE;
		sm->p_subn->ignore_existing_lfts = TRUE;
		goto Exit;
	}

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
	if (PF(!p_port)) {
		CL_PLOCK_RELEASE(sm->p_lock);
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F06: "
			"No port object for port with GUID 0x%" PRIx64
			"\n\t\t\t\tfor parent node GUID 0x%" PRIx64
			", TID 0x%" PRIx64 "\n",
			cl_ntoh64(port_guid),
			cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id));
		goto Exit;
	}

	p_node = p_port->p_node;
	CL_ASSERT(p_node);

	if (PF(p_pi->local_port_num > p_node->node_info.num_ports)) {
		CL_PLOCK_RELEASE(sm->p_lock);
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F15: "
			"Received PortInfo for port GUID 0x%" PRIx64 " is "
			"non-compliant and is being ignored since the "
			"local port num %u > num ports %u\n",
			cl_ntoh64(port_guid), p_pi->local_port_num,
			p_node->node_info.num_ports);
		goto Exit;
	}

	/*
	   If we were setting the PortInfo, then receiving
	   this attribute was not part of sweeping the subnet.
	   In this case, just update the PortInfo attribute.

	   In an unfortunate blunder, the IB spec defines the
	   return method for Set() as a GetResp().  Thus, we can't
	   use the method (what would have been SetResp()) to determine
	   our course of action.  So, we have to carry this extra
	   boolean around to determine if we were doing Get() or Set().
	 */
	if (p_context->set_method)
		pi_rcv_process_set(sm, p_node, port_num, p_madw);
	else {

		/*
		   This PortInfo arrived because we did a Get() method,
		   most likely due to a subnet sweep in progress.
		 */
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
			"Discovered port num %u with GUID 0x%" PRIx64
			" for parent node GUID 0x%" PRIx64
			", TID 0x%" PRIx64 "\n",
			port_num, cl_ntoh64(port_guid),
			cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id));

		p_physp = osm_node_get_physp_ptr(p_node, port_num);

		CL_ASSERT(p_physp);

		/* Update the directed route path to this port
		   in case the old path is no longer usable. */
		p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
		osm_dr_path_init(p_dr_path, p_smp->hop_count,
				 p_smp->initial_path);

		p_physp->need_update = osm_pi_rcv_update_self(sm, p_physp, p_pi);

		switch (osm_node_get_type(p_node)) {
		case IB_NODE_TYPE_CA:
		case IB_NODE_TYPE_ROUTER:
			if (!p_node->physp_discovered[port_num]) {
				p_port->discovery_count++;
				p_node->physp_discovered[port_num] = 1;
			}
			p_physp->need_update = osm_pi_rcv_update_neighbor(p_physp);
			pi_rcv_process_ca_or_router_port(sm, p_node, p_physp,
							 p_pi);
			break;
		case IB_NODE_TYPE_SWITCH:
			if (!p_node->physp_discovered[port_num]) {
				p_port->discovery_count++;
				p_node->physp_discovered[port_num] = 1;
			}
			if (port_num == 0)
				pi_rcv_process_switch_port0(sm, p_node,
							    p_physp, p_pi);
			else
				pi_rcv_process_switch_ext_port(sm, p_node,
							       p_physp, p_pi);
			break;
		default:
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F07: "
				"Unknown node type %u with GUID 0x%" PRIx64
				"\n", osm_node_get_type(p_node),
				cl_ntoh64(node_guid));
			break;
		}

		/*
		   Get the tables on the physp.
		 */
		if (p_physp->need_update || (p_node->sw &&
					     p_node->sw->need_update))
			pi_rcv_get_pkey_slvl_vla_tables(sm, p_node, p_physp);

	}

	CL_PLOCK_RELEASE(sm->p_lock);

Exit:
	/*
	   Release the lock before jumping here!!
	 */
	OSM_LOG_EXIT(sm->p_log);
}
Пример #18
0
void osm_pi_rcv_process(IN void *context, IN void *data)
{
	osm_sm_t *sm = context;
	osm_madw_t *p_madw = data;
	ib_port_info_t *p_pi;
	ib_smp_t *p_smp;
	osm_port_t *p_port;
	osm_physp_t *p_physp;
	osm_dr_path_t *p_dr_path;
	osm_node_t *p_node;
	osm_pi_context_t *p_context;
	ib_net64_t port_guid;
	ib_net64_t node_guid;
	uint8_t port_num;

	OSM_LOG_ENTER(sm->p_log);

	CL_ASSERT(sm);
	CL_ASSERT(p_madw);

	p_smp = osm_madw_get_smp_ptr(p_madw);
	p_context = osm_madw_get_pi_context_ptr(p_madw);
	p_pi = (ib_port_info_t *) ib_smp_get_payload_ptr(p_smp);

	CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_PORT_INFO);

	port_num = (uint8_t) cl_ntoh32(p_smp->attr_mod);

	port_guid = p_context->port_guid;
	node_guid = p_context->node_guid;

	osm_dump_port_info(sm->p_log,
			   node_guid, port_guid, port_num, p_pi, OSM_LOG_DEBUG);

	/* On receipt of client reregister, clear the reregister bit so
	   reregistering won't be sent again and again */
	if (ib_port_info_get_client_rereg(p_pi)) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"Client reregister received on response\n");
		ib_port_info_set_client_rereg(p_pi, 0);
	}

	/*
	   we might get a response during a light sweep looking for a change in
	   the status of a remote port that did not respond in earlier sweeps.
	   So if the context of the Get was light_sweep - we do not need to
	   do anything with the response - just flag that we need a heavy sweep
	 */
	if (p_context->light_sweep == TRUE) {
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
			"Got light sweep response from remote port of parent node "
			"GUID 0x%" PRIx64 " port 0x%016" PRIx64
			", Commencing heavy sweep\n",
			cl_ntoh64(node_guid), cl_ntoh64(port_guid));
		sm->p_subn->force_heavy_sweep = TRUE;
		sm->p_subn->ignore_existing_lfts = TRUE;
		goto Exit;
	}

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
	if (!p_port) {
		CL_PLOCK_RELEASE(sm->p_lock);
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F06: "
			"No port object for port with GUID 0x%" PRIx64
			"\n\t\t\t\tfor parent node GUID 0x%" PRIx64
			", TID 0x%" PRIx64 "\n",
			cl_ntoh64(port_guid),
			cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id));
		goto Exit;
	}

	p_node = p_port->p_node;
	CL_ASSERT(p_node);

	/*
	   If we were setting the PortInfo, then receiving
	   this attribute was not part of sweeping the subnet.
	   In this case, just update the PortInfo attribute.

	   In an unfortunate blunder, the IB spec defines the
	   return method for Set() as a GetResp().  Thus, we can't
	   use the method (what would have been SetResp()) to determine
	   our course of action.  So, we have to carry this extra
	   boolean around to determine if we were doing Get() or Set().
	 */
	if (p_context->set_method)
		osm_pi_rcv_process_set(sm, p_node, port_num, p_madw);
	else {
		p_port->discovery_count++;

		/*
		   This PortInfo arrived because we did a Get() method,
		   most likely due to a subnet sweep in progress.
		 */
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
			"Discovered port num %u with GUID 0x%" PRIx64
			" for parent node GUID 0x%" PRIx64
			", TID 0x%" PRIx64 "\n",
			port_num, cl_ntoh64(port_guid),
			cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id));

		p_physp = osm_node_get_physp_ptr(p_node, port_num);

		/*
		   Determine if we encountered a new Physical Port.
		   If so, initialize the new Physical Port then
		   continue processing as normal.
		 */
		if (!p_physp) {
			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
				"Initializing port number %u\n", port_num);
			p_physp = &p_node->physp_table[port_num];
			osm_physp_init(p_physp,
				       port_guid,
				       port_num,
				       p_node,
				       osm_madw_get_bind_handle(p_madw),
				       p_smp->hop_count, p_smp->initial_path);
		} else {
			/*
			   Update the directed route path to this port
			   in case the old path is no longer usable.
			 */
			p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
			osm_dr_path_init(p_dr_path,
					 osm_madw_get_bind_handle(p_madw),
					 p_smp->hop_count, p_smp->initial_path);
		}

		/* if port just inited or reached INIT state (external reset)
		   request update for port related tables */
		p_physp->need_update =
		    (ib_port_info_get_port_state(p_pi) == IB_LINK_INIT ||
		     p_physp->need_update > 1) ? 1 : 0;

		switch (osm_node_get_type(p_node)) {
		case IB_NODE_TYPE_CA:
		case IB_NODE_TYPE_ROUTER:
			__osm_pi_rcv_process_ca_or_router_port(sm,
							       p_node, p_physp,
							       p_pi);
			break;
		case IB_NODE_TYPE_SWITCH:
			__osm_pi_rcv_process_switch_port(sm,
							 p_node, p_physp, p_pi);
			break;
		default:
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F07: "
				"Unknown node type %u with GUID 0x%" PRIx64
				"\n", osm_node_get_type(p_node),
				cl_ntoh64(node_guid));
			break;
		}

		/*
		   Get the tables on the physp.
		 */
		if (p_physp->need_update || sm->p_subn->need_update)
			__osm_pi_rcv_get_pkey_slvl_vla_tables(sm, p_node,
							      p_physp);

	}

	CL_PLOCK_RELEASE(sm->p_lock);

Exit:
	/*
	   Release the lock before jumping here!!
	 */
	OSM_LOG_EXIT(sm->p_log);
}