static osm_signal_t __osm_link_mgr_process_node(osm_sm_t * sm, IN osm_node_t * const p_node, IN const uint8_t link_state) { uint32_t i; uint32_t num_physp; osm_physp_t *p_physp; uint8_t current_state; osm_signal_t signal = OSM_SIGNAL_DONE; OSM_LOG_ENTER(sm->p_log); OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Node 0x%" PRIx64 " going to %s\n", cl_ntoh64(osm_node_get_node_guid(p_node)), ib_get_port_state_str(link_state)); /* Set the PortInfo for every Physical Port associated with this Port. Start iterating with port 1, since the linkstate is not applicable to the management port on switches. */ num_physp = osm_node_get_num_physp(p_node); for (i = 0; i < num_physp; i++) { /* Don't bother doing anything if this Physical Port is not valid. or if the state of the port is already better then the specified state. */ p_physp = osm_node_get_physp_ptr(p_node, (uint8_t) i); if (!p_physp) continue; current_state = osm_physp_get_port_state(p_physp); if (current_state == IB_LINK_DOWN) continue; /* Normally we only send state update if state is lower then required state. However, we need to send update if no state change required. */ if (link_state != IB_LINK_NO_CHANGE && link_state <= current_state) OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Physical port %u already %s. Skipping\n", p_physp->port_num, ib_get_port_state_str(current_state)); else if (__osm_link_mgr_set_physp_pi(sm, p_physp, link_state)) signal = OSM_SIGNAL_DONE_PENDING; } OSM_LOG_EXIT(sm->p_log); return (signal); }
/** =========================================================================== */ static void extract_guid2lid(osm_port_t *p_port, uint64_t *p_offset, struct ssa_db_extract *p_ssa_db) { struct ep_map_rec *p_map_rec; #ifdef SSA_PLUGIN_VERBOSE_LOGGING uint8_t is_fdr10_active; ssa_log(SSA_LOG_VERBOSE, "Port GUID 0x%" PRIx64 " LID %u Port state %d" "(%s)\n", ntohll(osm_physp_get_port_guid(p_port->p_physp)), ntohs(osm_port_get_base_lid(p_port)), osm_physp_get_port_state(p_port->p_physp), (osm_physp_get_port_state(p_port->p_physp) < 5 ? port_state_str[osm_physp_get_port_state(p_port->p_physp)] : "???")); is_fdr10_active = p_port->p_physp->ext_port_info.link_speed_active & FDR10; ssa_log(SSA_LOG_VERBOSE, "FDR10 %s active\n", is_fdr10_active ? "" : "not"); #endif /* check for valid LID first */ if ((ntohs(osm_port_get_base_lid(p_port)) < IB_LID_UCAST_START_HO) || (ntohs(osm_port_get_base_lid(p_port)) > IB_LID_UCAST_END_HO)) { ssa_log(SSA_LOG_VERBOSE, "Port GUID 0x%" PRIx64 " has invalid LID %u\n", ntohll(osm_physp_get_port_guid(p_port->p_physp)), ntohs(osm_port_get_base_lid(p_port))); } smdb_guid2lid_init(p_port, &p_ssa_db->p_guid_to_lid_tbl[*p_offset]); p_map_rec = ep_map_rec_init(*p_offset); if (!p_map_rec) { /* add memory allocation failure handling */ ssa_log(SSA_LOG_VERBOSE, "Quick MAP rec memory allocation failed\n"); } cl_qmap_insert(&p_ssa_db->ep_guid_to_lid_tbl, osm_physp_get_port_guid(p_port->p_physp), &p_map_rec->map_item); *p_offset = *p_offset + 1; }
/********************************************************************** Returns true if the SM port is down. The SM's port object must exist in the port_guid table. **********************************************************************/ static boolean_t state_mgr_is_sm_port_down(IN osm_sm_t * sm) { ib_net64_t port_guid; osm_port_t *p_port; osm_physp_t *p_physp; uint8_t state; OSM_LOG_ENTER(sm->p_log); port_guid = sm->p_subn->sm_port_guid; /* * If we don't know our own port guid yet, assume the port is down. */ if (port_guid == 0) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3308: " "SM port GUID unknown\n"); state = IB_LINK_DOWN; goto Exit; } CL_ASSERT(port_guid); CL_PLOCK_ACQUIRE(sm->p_lock); p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3309: " "SM port with GUID:%016" PRIx64 " is unknown\n", cl_ntoh64(port_guid)); state = IB_LINK_DOWN; CL_PLOCK_RELEASE(sm->p_lock); goto Exit; } p_physp = p_port->p_physp; CL_ASSERT(p_physp); if (p_port->p_node->sw && !ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info)) state = IB_LINK_ACTIVE; /* base SP0 */ else state = osm_physp_get_port_state(p_physp); CL_PLOCK_RELEASE(sm->p_lock); Exit: OSM_LOG_EXIT(sm->p_log); return (state == IB_LINK_DOWN); }
/********************************************************************** The plock must be held before calling this function. **********************************************************************/ static void ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t * p_node, const uint8_t port_num, const osm_ni_context_t * p_ni_context) { osm_node_t *p_neighbor_node; osm_physp_t *p_physp, *p_remote_physp; OSM_LOG_ENTER(sm->p_log); /* A special case exists in which the node we're trying to link is our own node. In this case, the guid value in the ni_context will be zero. */ if (p_ni_context->node_guid == 0) { OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Nothing to link for our own node 0x%" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); goto _exit; } p_neighbor_node = osm_get_node_by_guid(sm->p_subn, p_ni_context->node_guid); if (PF(!p_neighbor_node)) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: " "Unexpected removal of neighbor node 0x%" PRIx64 "\n", cl_ntoh64(p_ni_context->node_guid)); goto _exit; } /* When setting the link, ports on both sides of the link should be initialized */ CL_ASSERT(osm_node_link_has_valid_ports(p_node, port_num, p_neighbor_node, p_ni_context->port_num)); if (osm_node_link_exists(p_node, port_num, p_neighbor_node, p_ni_context->port_num)) { OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n"); goto _exit; } p_physp = osm_node_get_physp_ptr(p_node, port_num); if (!p_physp) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0E: " "Failed to find physp for port %d of Node GUID 0x%" PRIx64 "\n", port_num, cl_ntoh64(osm_node_get_node_guid(p_node))); goto _exit; } /* * If the link went UP, after we already discovered it, we shouldn't * set the link between the ports and resweep. */ if (osm_physp_get_port_state(p_physp) == IB_LINK_DOWN && p_node->physp_discovered[port_num]) { /* Link down on another side. Don't create a link*/ p_node->physp_discovered[port_num] = 0; sm->p_subn->force_heavy_sweep = TRUE; goto _exit; } if (osm_node_has_any_link(p_node, port_num) && sm->p_subn->force_heavy_sweep == FALSE && (!p_ni_context->dup_count || (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) && p_ni_context->dup_port_num == port_num))) { /* Uh oh... This could be reconnected ports, but also duplicated GUID (2 nodes have the same guid) or a 12x link with lane reversal that is not configured correctly. We will try to recover by querying NodeInfo again. In order to catch even fast port moving to new location(s) and back we will count up to 5. Some crazy reconnections (newly created switch loop right before targeted CA) will not be catched this way. So in worst case - report GUID duplication and request new discovery. When switch node is targeted NodeInfo querying will be done in opposite order, this is much stronger check, unfortunately it is impossible with CAs. */ p_physp = osm_node_get_physp_ptr(p_node, port_num); if (!p_physp) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0F: " "Failed to find physp for port %d of Node GUID 0x%" PRIx64 "\n", port_num, cl_ntoh64(osm_node_get_node_guid(p_node))); goto _exit; } if (p_ni_context->dup_count > 5) { report_duplicated_guid(sm, p_physp, p_neighbor_node, p_ni_context->port_num); sm->p_subn->force_heavy_sweep = TRUE; } else if (p_node->sw) requery_dup_node_info(sm, p_physp->p_remote_physp, p_ni_context->dup_count + 1); else requery_dup_node_info(sm, p_physp, p_ni_context->dup_count + 1); } /* When there are only two nodes with exact same guids (connected back to back) - the previous check for duplicated guid will not catch them. But the link will be from the port to itself... Enhanced Port 0 is an exception to this */ if (osm_node_get_node_guid(p_node) == p_ni_context->node_guid && port_num == p_ni_context->port_num && port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) { OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Duplicate GUID found by link from a port to itself:" "node 0x%" PRIx64 ", port number %u\n", cl_ntoh64(osm_node_get_node_guid(p_node)), port_num); p_physp = osm_node_get_physp_ptr(p_node, port_num); if (!p_physp) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1D: " "Failed to find physp for port %d of Node GUID 0x%" PRIx64 "\n", port_num, cl_ntoh64(osm_node_get_node_guid(p_node))); goto _exit; } osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp), FILE_ID, OSM_LOG_VERBOSE); if (sm->p_subn->opt.exit_on_fatal == TRUE) { osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, "Errors on subnet. Duplicate GUID found " "by link from a port to itself. " "See verbose opensm.log for more details\n"); exit(1); } } OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Creating new link between:\n\t\t\t\tnode 0x%" PRIx64 ", port number %u and\n\t\t\t\tnode 0x%" PRIx64 ", port number %u\n", cl_ntoh64(osm_node_get_node_guid(p_node)), port_num, cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num); if (sm->ucast_mgr.cache_valid) osm_ucast_cache_check_new_link(&sm->ucast_mgr, p_node, port_num, p_neighbor_node, p_ni_context->port_num); p_physp = osm_node_get_physp_ptr(p_node, port_num); p_remote_physp = osm_node_get_physp_ptr(p_neighbor_node, p_ni_context->port_num); if (!p_physp || !p_remote_physp) goto _exit; osm_node_link(p_node, port_num, p_neighbor_node, p_ni_context->port_num); osm_db_neighbor_set(sm->p_subn->p_neighbor, cl_ntoh64(osm_physp_get_port_guid(p_physp)), port_num, cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), p_ni_context->port_num); osm_db_neighbor_set(sm->p_subn->p_neighbor, cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), p_ni_context->port_num, cl_ntoh64(osm_physp_get_port_guid(p_physp)), port_num); _exit: OSM_LOG_EXIT(sm->p_log); }
static int link_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node, IN const uint8_t link_state) { osm_physp_t *p_physp, *p_physp_remote; uint32_t i, num_physp; int ret = 0; uint8_t current_state; OSM_LOG_ENTER(sm->p_log); OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Node 0x%" PRIx64 " going to %s\n", cl_ntoh64(osm_node_get_node_guid(p_node)), ib_get_port_state_str(link_state)); /* Set the PortInfo for every Physical Port associated with this Port. Start iterating with port 1, since the linkstate is not applicable to the management port on switches. */ num_physp = osm_node_get_num_physp(p_node); for (i = 0; i < num_physp; i++) { /* Don't bother doing anything if this Physical Port is not valid. or if the state of the port is already better then the specified state. */ p_physp = osm_node_get_physp_ptr(p_node, (uint8_t) i); if (!p_physp) continue; current_state = osm_physp_get_port_state(p_physp); if (current_state == IB_LINK_DOWN) continue; /* Set PortState to DOWN in case Remote Physical Port is unreachable. We have to check this for all ports, except port zero. */ p_physp_remote = osm_physp_get_remote(p_physp); if ((i != 0) && (!p_physp_remote || !osm_physp_is_valid(p_physp_remote))) { if (current_state != IB_LINK_INIT) link_mgr_set_physp_pi(sm, p_physp, IB_LINK_DOWN); continue; } /* Normally we only send state update if state is lower then required state. However, we need to send update if no state change required. */ if (link_state != IB_LINK_NO_CHANGE && link_state <= current_state) OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Physical port %u already %s. Skipping\n", p_physp->port_num, ib_get_port_state_str(current_state)); else if (link_mgr_set_physp_pi(sm, p_physp, link_state)) ret = -1; } OSM_LOG_EXIT(sm->p_log); return ret; }
/********************************************************************** Initiates a lightweight sweep of the subnet. Used during normal sweeps after the subnet is up. **********************************************************************/ static ib_api_status_t state_mgr_light_sweep_start(IN osm_sm_t * sm) { ib_api_status_t status = IB_SUCCESS; osm_bind_handle_t h_bind; cl_qmap_t *p_sw_tbl; cl_map_item_t *p_next; osm_node_t *p_node; osm_physp_t *p_physp; uint8_t port_num; OSM_LOG_ENTER(sm->p_log); p_sw_tbl = &sm->p_subn->sw_guid_tbl; /* * First, get the bind handle. */ h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind == OSM_BIND_INVALID_HANDLE) { OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports. Deferring sweep...\n"); status = IB_INVALID_STATE; goto _exit; } OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "INITIATING LIGHT SWEEP"); CL_PLOCK_ACQUIRE(sm->p_lock); cl_qmap_apply_func(p_sw_tbl, state_mgr_get_sw_info, sm); CL_PLOCK_RELEASE(sm->p_lock); CL_PLOCK_ACQUIRE(sm->p_lock); cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, state_mgr_get_node_desc, sm); CL_PLOCK_RELEASE(sm->p_lock); /* now scan the list of physical ports that were not down but have no remote port */ CL_PLOCK_ACQUIRE(sm->p_lock); p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl); while (p_next != cl_qmap_end(&sm->p_subn->node_guid_tbl)) { p_node = (osm_node_t *) p_next; p_next = cl_qmap_next(p_next); for (port_num = 1; port_num < osm_node_get_num_physp(p_node); port_num++) { p_physp = osm_node_get_physp_ptr(p_node, port_num); if (p_physp && (osm_physp_get_port_state(p_physp) != IB_LINK_DOWN) && !osm_physp_get_remote(p_physp)) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3315: " "Unknown remote side for node 0x%016" PRIx64 " (%s) port %u. Adding to light sweep sampling list\n", cl_ntoh64(osm_node_get_node_guid (p_node)), p_node->print_desc, port_num); osm_dump_dr_path(sm->p_log, osm_physp_get_dr_path_ptr (p_physp), OSM_LOG_ERROR); state_mgr_get_remote_port_info(sm, p_physp); } } } cl_qmap_apply_func(&sm->p_subn->sm_guid_tbl, query_sm_info, sm); CL_PLOCK_RELEASE(sm->p_lock); _exit: OSM_LOG_EXIT(sm->p_log); return status; }