void osm_update_node_desc(IN osm_opensm_t *osm) { CL_PLOCK_ACQUIRE(&osm->lock); cl_qmap_apply_func(&osm->subn.node_guid_tbl, state_mgr_update_node_desc, &osm->sm); CL_PLOCK_RELEASE(&osm->lock); }
/********************************************************************** Initiates a thorough sweep of the subnet. Used when there is suspicion that something on the subnet has changed. **********************************************************************/ static ib_api_status_t state_mgr_sweep_hop_0(IN osm_sm_t * sm) { ib_api_status_t status; osm_dr_path_t dr_path; osm_bind_handle_t h_bind; uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; OSM_LOG_ENTER(sm->p_log); memset(path_array, 0, sizeof(path_array)); /* * First, get the bind handle. */ h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind != OSM_BIND_INVALID_HANDLE) { OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "INITIATING HEAVY SWEEP"); /* * Start the sweep by clearing the port counts, then * get our own NodeInfo at 0 hops. */ CL_PLOCK_ACQUIRE(sm->p_lock); cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, state_mgr_reset_node_count, sm); cl_qmap_apply_func(&sm->p_subn->port_guid_tbl, state_mgr_reset_port_count, sm); cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, state_mgr_reset_switch_count, sm); /* Set the in_sweep_hop_0 flag in subn to be TRUE. * This will indicate the sweeping not to continue beyond the * the current node. * This is relevant for the case of SM on switch, since in the * switch info we need to signal somehow not to continue * the sweeping. */ sm->p_subn->in_sweep_hop_0 = TRUE; CL_PLOCK_RELEASE(sm->p_lock); osm_dr_path_init(&dr_path, h_bind, 0, path_array); status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0, CL_DISP_MSGID_NONE, NULL); if (status != IB_SUCCESS) OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3305: " "Request for NodeInfo failed (%s)\n", ib_get_err_str(status)); } else { OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports. Deferring sweep...\n"); status = IB_INVALID_STATE; } OSM_LOG_EXIT(sm->p_log); return status; }
/********************************************************************** Returns true if the SM port is down. The SM's port object must exist in the port_guid table. **********************************************************************/ static boolean_t state_mgr_is_sm_port_down(IN osm_sm_t * sm) { ib_net64_t port_guid; osm_port_t *p_port; osm_physp_t *p_physp; uint8_t state; OSM_LOG_ENTER(sm->p_log); port_guid = sm->p_subn->sm_port_guid; /* * If we don't know our own port guid yet, assume the port is down. */ if (port_guid == 0) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3308: " "SM port GUID unknown\n"); state = IB_LINK_DOWN; goto Exit; } CL_ASSERT(port_guid); CL_PLOCK_ACQUIRE(sm->p_lock); p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3309: " "SM port with GUID:%016" PRIx64 " is unknown\n", cl_ntoh64(port_guid)); state = IB_LINK_DOWN; CL_PLOCK_RELEASE(sm->p_lock); goto Exit; } p_physp = p_port->p_physp; CL_ASSERT(p_physp); if (p_port->p_node->sw && !ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info)) state = IB_LINK_ACTIVE; /* base SP0 */ else state = osm_physp_get_port_state(p_physp); CL_PLOCK_RELEASE(sm->p_lock); Exit: OSM_LOG_EXIT(sm->p_log); return (state == IB_LINK_DOWN); }
/********************************************************************** Clear out all existing port lid assignments **********************************************************************/ static ib_api_status_t state_mgr_clean_known_lids(IN osm_sm_t * sm) { ib_api_status_t status = IB_SUCCESS; cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl); uint32_t i; OSM_LOG_ENTER(sm->p_log); /* we need a lock here! */ CL_PLOCK_ACQUIRE(sm->p_lock); for (i = 0; i < cl_ptr_vector_get_size(p_vec); i++) cl_ptr_vector_set(p_vec, i, NULL); CL_PLOCK_RELEASE(sm->p_lock); OSM_LOG_EXIT(sm->p_log); return status; }
/********************************************************************** * Send Trap 64 on all new ports. **********************************************************************/ static void state_mgr_report_new_ports(IN osm_sm_t * sm) { ib_gid_t port_gid; ib_mad_notice_attr_t notice; ib_api_status_t status; ib_net64_t port_guid; cl_map_item_t *p_next; osm_port_t *p_port; uint16_t min_lid_ho; uint16_t max_lid_ho; OSM_LOG_ENTER(sm->p_log); CL_PLOCK_ACQUIRE(sm->p_lock); p_next = cl_qmap_head(&sm->p_subn->port_guid_tbl); while (p_next != cl_qmap_end(&sm->p_subn->port_guid_tbl)) { p_port = (osm_port_t *) p_next; p_next = cl_qmap_next(p_next); if (!p_port->is_new) continue; port_guid = osm_port_get_guid(p_port); /* issue a notice - trap 64 */ /* details of the notice */ notice.generic_type = 0x83; /* is generic subn mgt type */ ib_notice_set_prod_type_ho(¬ice, 4); /* A Class Manager generator */ /* endport becomes reachable */ notice.g_or_v.generic.trap_num = CL_HTON16(64); /* The sm_base_lid is saved in network order already. */ notice.issuer_lid = sm->p_subn->sm_base_lid; /* following C14-72.1.1 and table 119 p739 */ /* we need to provide the GID */ port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; port_gid.unicast.interface_id = port_guid; memcpy(&(notice.data_details.ntc_64_67.gid), &(port_gid), sizeof(ib_gid_t)); /* According to page 653 - the issuer gid in this case of trap * is the SM gid, since the SM is the initiator of this trap. */ notice.issuer_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; notice.issuer_gid.unicast.interface_id = sm->p_subn->sm_port_guid; status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice); if (status != IB_SUCCESS) OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3318: " "Error sending trap reports on GUID:0x%016" PRIx64 " (%s)\n", port_gid.unicast.interface_id, ib_get_err_str(status)); osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); OSM_LOG(sm->p_log, OSM_LOG_INFO, "Discovered new port with GUID:0x%016" PRIx64 " LID range [%u,%u] of node: %s\n", cl_ntoh64(port_gid.unicast.interface_id), min_lid_ho, max_lid_ho, p_port->p_node ? p_port->p_node-> print_desc : "UNKNOWN"); p_port->is_new = 0; } CL_PLOCK_RELEASE(sm->p_lock); OSM_LOG_EXIT(sm->p_log); }
/********************************************************************** Initiates a lightweight sweep of the subnet. Used during normal sweeps after the subnet is up. **********************************************************************/ static ib_api_status_t state_mgr_light_sweep_start(IN osm_sm_t * sm) { ib_api_status_t status = IB_SUCCESS; osm_bind_handle_t h_bind; cl_qmap_t *p_sw_tbl; cl_map_item_t *p_next; osm_node_t *p_node; osm_physp_t *p_physp; uint8_t port_num; OSM_LOG_ENTER(sm->p_log); p_sw_tbl = &sm->p_subn->sw_guid_tbl; /* * First, get the bind handle. */ h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind == OSM_BIND_INVALID_HANDLE) { OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports. Deferring sweep...\n"); status = IB_INVALID_STATE; goto _exit; } OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "INITIATING LIGHT SWEEP"); CL_PLOCK_ACQUIRE(sm->p_lock); cl_qmap_apply_func(p_sw_tbl, state_mgr_get_sw_info, sm); CL_PLOCK_RELEASE(sm->p_lock); CL_PLOCK_ACQUIRE(sm->p_lock); cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, state_mgr_get_node_desc, sm); CL_PLOCK_RELEASE(sm->p_lock); /* now scan the list of physical ports that were not down but have no remote port */ CL_PLOCK_ACQUIRE(sm->p_lock); p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl); while (p_next != cl_qmap_end(&sm->p_subn->node_guid_tbl)) { p_node = (osm_node_t *) p_next; p_next = cl_qmap_next(p_next); for (port_num = 1; port_num < osm_node_get_num_physp(p_node); port_num++) { p_physp = osm_node_get_physp_ptr(p_node, port_num); if (p_physp && (osm_physp_get_port_state(p_physp) != IB_LINK_DOWN) && !osm_physp_get_remote(p_physp)) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3315: " "Unknown remote side for node 0x%016" PRIx64 " (%s) port %u. Adding to light sweep sampling list\n", cl_ntoh64(osm_node_get_node_guid (p_node)), p_node->print_desc, port_num); osm_dump_dr_path(sm->p_log, osm_physp_get_dr_path_ptr (p_physp), OSM_LOG_ERROR); state_mgr_get_remote_port_info(sm, p_physp); } } } cl_qmap_apply_func(&sm->p_subn->sm_guid_tbl, query_sm_info, sm); CL_PLOCK_RELEASE(sm->p_lock); _exit: OSM_LOG_EXIT(sm->p_log); return status; }
/********************************************************************** * Send SubnSet(SMInfo) SMP with HANDOVER attribute to the * remote_sm indicated. **********************************************************************/ static void state_mgr_send_handover(IN osm_sm_t * sm, IN osm_remote_sm_t * p_sm) { uint8_t payload[IB_SMP_DATA_SIZE]; ib_sm_info_t *p_smi = (ib_sm_info_t *) payload; osm_madw_context_t context; const osm_port_t *p_port; ib_api_status_t status; OSM_LOG_ENTER(sm->p_log); /* * Send a query of SubnSet(SMInfo) HANDOVER to the remote sm given. */ memset(&context, 0, sizeof(context)); p_port = osm_get_port_by_guid(sm->p_subn, p_sm->smi.guid); if (p_port == NULL) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3316: " "No port object on given remote_sm object\n"); goto Exit; } /* update the master_guid in the sm_state_mgr object according to */ /* the guid of the port where the new Master SM should reside. */ OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Handing over mastership. Updating sm_state_mgr master_guid: %016" PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid), p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); sm->master_sm_guid = p_port->guid; context.smi_context.port_guid = p_port->guid; context.smi_context.set_method = TRUE; p_smi->guid = sm->p_subn->sm_port_guid; p_smi->act_count = cl_hton32(sm->p_subn->p_osm->stats.qp0_mads_sent); p_smi->pri_state = (uint8_t) (sm->p_subn->sm_state | sm->p_subn->opt.sm_priority << 4); /* * Return 0 for the SM key unless we authenticate the requester * as the master SM. */ if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) { OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to master SM with real sm_key\n"); p_smi->sm_key = sm->p_subn->opt.sm_key; } else { /* The requester is not authenticated as master - set sm_key to zero */ OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to SM not master with zero sm_key\n"); p_smi->sm_key = 0; } CL_PLOCK_ACQUIRE(sm->p_lock); status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_port->p_physp), payload, sizeof(payload), IB_MAD_ATTR_SM_INFO, IB_SMINFO_ATTR_MOD_HANDOVER, CL_DISP_MSGID_NONE, &context); CL_PLOCK_RELEASE(sm->p_lock); if (status != IB_SUCCESS) OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3317: " "Failure requesting SMInfo (%s)\n", ib_get_err_str(status)); Exit: OSM_LOG_EXIT(sm->p_log); }
/********************************************************************** Sweeps the node 1 hop away. This sets off a "chain reaction" that causes discovery of the subnet. Used when there is suspicion that something on the subnet has changed. **********************************************************************/ static ib_api_status_t state_mgr_sweep_hop_1(IN osm_sm_t * sm) { ib_api_status_t status = IB_SUCCESS; osm_madw_context_t context; osm_node_t *p_node; osm_port_t *p_port; osm_dr_path_t hop_1_path; ib_net64_t port_guid; uint8_t port_num; uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; uint8_t num_ports; osm_physp_t *p_ext_physp; OSM_LOG_ENTER(sm->p_log); /* * First, get our own port and node objects. */ port_guid = sm->p_subn->sm_port_guid; CL_ASSERT(port_guid); /* Set the in_sweep_hop_0 flag in subn to be FALSE. * This will indicate the sweeping to continue beyond the * the current node. * This is relevant for the case of SM on switch, since in the * switch info we need to signal that the sweeping should * continue through the switch. */ sm->p_subn->in_sweep_hop_0 = FALSE; p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3310: " "No SM port object\n"); status = IB_ERROR; goto Exit; } p_node = p_port->p_node; CL_ASSERT(p_node); port_num = ib_node_info_get_local_port_num(&p_node->node_info); OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Probing hop 1 on local port %u\n", port_num); memset(path_array, 0, sizeof(path_array)); /* the hop_1 operations depend on the type of our node. * Currently - legal nodes that can host SM are SW and CA */ switch (osm_node_get_type(p_node)) { case IB_NODE_TYPE_CA: case IB_NODE_TYPE_ROUTER: memset(&context, 0, sizeof(context)); context.ni_context.node_guid = osm_node_get_node_guid(p_node); context.ni_context.port_num = port_num; path_array[1] = port_num; osm_dr_path_init(&hop_1_path, 1, path_array); CL_PLOCK_ACQUIRE(sm->p_lock); status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, CL_DISP_MSGID_NONE, &context); CL_PLOCK_RELEASE(sm->p_lock); if (status != IB_SUCCESS) OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3311: " "Request for NodeInfo failed (%s)\n", ib_get_err_str(status)); break; case IB_NODE_TYPE_SWITCH: /* Need to go over all the ports of the switch, and send a * node_info from them. This doesn't include the port 0 of the * switch, which hosts the SM. * Note: We'll send another switchInfo on port 0, since if no * ports are connected, we still want to get some response, and * have the subnet come up. */ num_ports = osm_node_get_num_physp(p_node); for (port_num = 1; port_num < num_ports; port_num++) { /* go through the port only if the port is not DOWN */ p_ext_physp = osm_node_get_physp_ptr(p_node, port_num); if (p_ext_physp && ib_port_info_get_port_state (&(p_ext_physp->port_info)) > IB_LINK_DOWN) { memset(&context, 0, sizeof(context)); context.ni_context.node_guid = osm_node_get_node_guid(p_node); context.ni_context.port_num = port_num; path_array[1] = port_num; osm_dr_path_init(&hop_1_path, 1, path_array); CL_PLOCK_ACQUIRE(sm->p_lock); status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, CL_DISP_MSGID_NONE, &context); CL_PLOCK_RELEASE(sm->p_lock); if (status != IB_SUCCESS) OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3312: " "Request for NodeInfo failed (%s)\n", ib_get_err_str(status)); } } break; default: OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3313: Unknown node type %d (%s)\n", osm_node_get_type(p_node), p_node->print_desc); } Exit: OSM_LOG_EXIT(sm->p_log); return status; }