コード例 #1
0
static inline void
debug_dump_dc_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port_num,
		      db_port_t * port, perfmgr_db_data_cnt_reading_t * cur)
{
	osm_log_t *log = db->perfmgr->log;
	if (!OSM_LOG_IS_ACTIVE_V2(log, OSM_LOG_DEBUG))
		return;

	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "xd %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->xmit_data, port->dc_previous.xmit_data,
		   port->dc_total.xmit_data);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "rd %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", cur->rcv_data,
		   port->dc_previous.rcv_data, port->dc_total.rcv_data);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "xp %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->xmit_pkts, port->dc_previous.xmit_pkts,
		   port->dc_total.xmit_pkts);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "rp %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", cur->rcv_pkts,
		   port->dc_previous.rcv_pkts, port->dc_total.rcv_pkts);
}
コード例 #2
0
static void state_mgr_up_msg(IN const osm_sm_t * sm)
{
	/*
	 * This message should be written only once - when the
	 * SM moves to Master state and the subnet is up for
	 * the first time.
	 */
	osm_log_v2(sm->p_log, sm->p_subn->first_time_master_sweep ?
		   OSM_LOG_SYS : OSM_LOG_INFO, FILE_ID, "SUBNET UP\n");

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			sm->p_subn->opt.sweep_interval ?
			"SUBNET UP" : "SUBNET UP (sweep disabled)");
}
コード例 #3
0
static void cleanup_switch(cl_map_item_t * item, void *log)
{
	osm_switch_t *sw = (osm_switch_t *) item;

	if (!sw->new_lft)
		return;

	if (memcmp(sw->lft, sw->new_lft, sw->max_lid_ho + 1))
		osm_log_v2(log, OSM_LOG_ERROR, FILE_ID, "ERR 331D: "
			   "LFT of switch 0x%016" PRIx64 " (%s) is not up to date\n",
			   cl_ntoh64(sw->p_node->node_info.node_guid),
			   sw->p_node->print_desc);
	else {
		free(sw->new_lft);
		sw->new_lft = NULL;
	}
}
コード例 #4
0
/**********************************************************************
 The plock must be held before calling this function.
**********************************************************************/
static void ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t * p_node,
			     const uint8_t port_num,
			     const osm_ni_context_t * p_ni_context)
{
	osm_node_t *p_neighbor_node;
	osm_physp_t *p_physp, *p_remote_physp;

	OSM_LOG_ENTER(sm->p_log);

	/*
	   A special case exists in which the node we're trying to
	   link is our own node.  In this case, the guid value in
	   the ni_context will be zero.
	 */
	if (p_ni_context->node_guid == 0) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"Nothing to link for our own node 0x%" PRIx64 "\n",
			cl_ntoh64(osm_node_get_node_guid(p_node)));
		goto _exit;
	}

	p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
					       p_ni_context->node_guid);
	if (PF(!p_neighbor_node)) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
			"Unexpected removal of neighbor node 0x%" PRIx64 "\n",
			cl_ntoh64(p_ni_context->node_guid));
		goto _exit;
	}

	/* When setting the link, ports on both
	   sides of the link should be initialized */
	CL_ASSERT(osm_node_link_has_valid_ports(p_node, port_num,
						p_neighbor_node,
						p_ni_context->port_num));

	if (osm_node_link_exists(p_node, port_num,
				 p_neighbor_node, p_ni_context->port_num)) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
		goto _exit;
	}

	p_physp = osm_node_get_physp_ptr(p_node, port_num);
	if (!p_physp) {
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0E: "
			"Failed to find physp for port %d of Node GUID 0x%"
			PRIx64 "\n", port_num,
			cl_ntoh64(osm_node_get_node_guid(p_node)));
		goto _exit;
	}

	/*
	 * If the link went UP, after we already discovered it, we shouldn't
	 * set the link between the ports and resweep.
	 */
	if (osm_physp_get_port_state(p_physp) == IB_LINK_DOWN &&
	    p_node->physp_discovered[port_num]) {
		/* Link down on another side. Don't create a link*/
		p_node->physp_discovered[port_num] = 0;
		sm->p_subn->force_heavy_sweep = TRUE;
		goto _exit;
	}

	if (osm_node_has_any_link(p_node, port_num) &&
	    sm->p_subn->force_heavy_sweep == FALSE &&
	    (!p_ni_context->dup_count ||
	     (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
	      p_ni_context->dup_port_num == port_num))) {
		/*
		   Uh oh...
		   This could be reconnected ports, but also duplicated GUID
		   (2 nodes have the same guid) or a 12x link with lane reversal
		   that is not configured correctly.
		   We will try to recover by querying NodeInfo again.
		   In order to catch even fast port moving to new location(s)
		   and back we will count up to 5.
		   Some crazy reconnections (newly created switch loop right
		   before targeted CA) will not be catched this way. So in worst
		   case - report GUID duplication and request new discovery.
		   When switch node is targeted NodeInfo querying will be done
		   in opposite order, this is much stronger check, unfortunately
		   it is impossible with CAs.
		 */
		p_physp = osm_node_get_physp_ptr(p_node, port_num);
		if (!p_physp) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0F: "
				"Failed to find physp for port %d of Node GUID 0x%"
				PRIx64 "\n", port_num,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			goto _exit;
		}

		if (p_ni_context->dup_count > 5) {
			report_duplicated_guid(sm, p_physp, p_neighbor_node,
					       p_ni_context->port_num);
			sm->p_subn->force_heavy_sweep = TRUE;
		} else if (p_node->sw)
			requery_dup_node_info(sm, p_physp->p_remote_physp,
					      p_ni_context->dup_count + 1);
		else
			requery_dup_node_info(sm, p_physp,
					      p_ni_context->dup_count + 1);
	}

	/*
	   When there are only two nodes with exact same guids (connected back
	   to back) - the previous check for duplicated guid will not catch
	   them. But the link will be from the port to itself...
	   Enhanced Port 0 is an exception to this
	 */
	if (osm_node_get_node_guid(p_node) == p_ni_context->node_guid &&
	    port_num == p_ni_context->port_num &&
	    port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
			"Duplicate GUID found by link from a port to itself:"
			"node 0x%" PRIx64 ", port number %u\n",
			cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
		p_physp = osm_node_get_physp_ptr(p_node, port_num);
		if (!p_physp) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1D: "
				"Failed to find physp for port %d of Node GUID 0x%"
				PRIx64 "\n", port_num,
				cl_ntoh64(osm_node_get_node_guid(p_node)));
			goto _exit;
		}

		osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
				    FILE_ID, OSM_LOG_VERBOSE);

		if (sm->p_subn->opt.exit_on_fatal == TRUE) {
			osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID,
				   "Errors on subnet. Duplicate GUID found "
				   "by link from a port to itself. "
				   "See verbose opensm.log for more details\n");
			exit(1);
		}
	}

	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
		"Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
		", port number %u and\n\t\t\t\tnode 0x%" PRIx64
		", port number %u\n",
		cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
		cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);

	if (sm->ucast_mgr.cache_valid)
		osm_ucast_cache_check_new_link(&sm->ucast_mgr, p_node, port_num,
					       p_neighbor_node,
					       p_ni_context->port_num);

	p_physp = osm_node_get_physp_ptr(p_node, port_num);
	p_remote_physp = osm_node_get_physp_ptr(p_neighbor_node,
						p_ni_context->port_num);
	if (!p_physp || !p_remote_physp)
		goto _exit;

	osm_node_link(p_node, port_num, p_neighbor_node, p_ni_context->port_num);

	osm_db_neighbor_set(sm->p_subn->p_neighbor,
			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
			    port_num,
			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
			    p_ni_context->port_num);
	osm_db_neighbor_set(sm->p_subn->p_neighbor,
			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
			    p_ni_context->port_num,
			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
			    port_num);

_exit:
	OSM_LOG_EXIT(sm->p_log);
}
コード例 #5
0
/**********************************************************************
 * Dump a reading vs the previous reading to stdout
 **********************************************************************/
static inline void
debug_dump_err_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port_num,
		       db_port_t * port, perfmgr_db_err_reading_t * cur)
{
	osm_log_t *log = db->perfmgr->log;

	if (!OSM_LOG_IS_ACTIVE_V2(log, OSM_LOG_DEBUG))
		return;		/* optimize this a bit */

	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "GUID 0x%" PRIx64 " Port %u:\n", guid, port_num);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "sym %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->symbol_err_cnt, port->err_previous.symbol_err_cnt,
		   port->err_total.symbol_err_cnt);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "ler %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->link_err_recover, port->err_previous.link_err_recover,
		   port->err_total.link_err_recover);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "ld %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->link_downed, port->err_previous.link_downed,
		   port->err_total.link_downed);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "re %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", cur->rcv_err,
		   port->err_previous.rcv_err, port->err_total.rcv_err);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "rrp %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->rcv_rem_phys_err, port->err_previous.rcv_rem_phys_err,
		   port->err_total.rcv_rem_phys_err);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "rsr %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->rcv_switch_relay_err,
		   port->err_previous.rcv_switch_relay_err,
		   port->err_total.rcv_switch_relay_err);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "xd %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->xmit_discards, port->err_previous.xmit_discards,
		   port->err_total.xmit_discards);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "xce %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->xmit_constraint_err,
		   port->err_previous.xmit_constraint_err,
		   port->err_total.xmit_constraint_err);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "rce %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->rcv_constraint_err, port->err_previous.rcv_constraint_err,
		   port->err_total.rcv_constraint_err);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "li %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->link_integrity, port->err_previous.link_integrity,
		   port->err_total.link_integrity);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "bo %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->buffer_overrun, port->err_previous.buffer_overrun,
		   port->err_total.buffer_overrun);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "vld %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->vl15_dropped, port->err_previous.vl15_dropped,
		   port->err_total.vl15_dropped);
	osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID,
		   "xw %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n",
		   cur->xmit_wait, port->err_previous.xmit_wait,
		   port->err_total.xmit_wait);
}
コード例 #6
0
static void do_sweep(osm_sm_t * sm)
{
	ib_api_status_t status;
	osm_remote_sm_t *p_remote_sm;
	unsigned config_parsed = 0;

	if (sm->p_subn->force_heavy_sweep) {
		if (osm_subn_rescan_conf_files(sm->p_subn) < 0)
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
				"osm_subn_rescan_conf_file failed\n");
		else
			config_parsed = 1;
	}

	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER &&
	    sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
		return;

	if (sm->p_subn->coming_out_of_standby) {
		/*
		 * Need to force re-write of sm_base_lid to all ports
		 * to do that we want all the ports to be considered
		 * foreign
		 */
		state_mgr_clean_known_lids(sm);

		/*
		 * Need to reconfigure LFTs, PKEYs, and QoS on all switches
		 * when coming out of STANDBY
		 */
		sm->p_subn->need_update = 1;
	}

	sm->master_sm_found = 0;

	/*
	 * If we already have switches, then try a light sweep.
	 * Otherwise, this is probably our first discovery pass
	 * or we are connected in loopback. In both cases do a
	 * heavy sweep.
	 * Note: If we are connected in loopback we want a heavy
	 * sweep, since we will not be getting any traps if there is
	 * a lost connection.
	 */
	/*  if we are in DISCOVERING state - this means it is either in
	 *  initializing or wake up from STANDBY - run the heavy sweep */
	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
	    && sm->p_subn->opt.force_heavy_sweep == FALSE
	    && sm->p_subn->force_heavy_sweep == FALSE
	    && sm->p_subn->force_reroute == FALSE
	    && sm->p_subn->subnet_initialization_error == FALSE
	    && (state_mgr_light_sweep_start(sm) == IB_SUCCESS)) {
		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;
		if (!sm->p_subn->force_heavy_sweep) {
			if (sm->p_subn->opt.sa_db_dump &&
			    !osm_sa_db_file_dump(sm->p_subn->p_osm))
				osm_opensm_report_event(sm->p_subn->p_osm,
							OSM_EVENT_ID_SA_DB_DUMPED,
							NULL);
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"LIGHT SWEEP COMPLETE");
			return;
		}
	}

	/*
	 * Unicast cache should be invalidated if there were errors
	 * during initialization or if subnet re-route is requested.
	 */
	if (sm->p_subn->opt.use_ucast_cache &&
	    (sm->p_subn->subnet_initialization_error ||
	     sm->p_subn->force_reroute || sm->p_subn->coming_out_of_standby))
		osm_ucast_cache_invalidate(&sm->ucast_mgr);

	/*
	 * If we don't need to do a heavy sweep and we want to do a reroute,
	 * just reroute only.
	 */
	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
	    && sm->p_subn->opt.force_heavy_sweep == FALSE
	    && sm->p_subn->force_heavy_sweep == FALSE
	    && sm->p_subn->force_reroute == TRUE
	    && sm->p_subn->subnet_initialization_error == FALSE) {
		/* Reset flag */
		sm->p_subn->force_reroute = FALSE;

		/* Re-program the switches fully */
		sm->p_subn->ignore_existing_lfts = TRUE;

		if (osm_ucast_mgr_process(&sm->ucast_mgr)) {
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"REROUTE FAILED");
			return;
		}
		osm_qos_setup(sm->p_subn->p_osm);

		/* Reset flag */
		sm->p_subn->ignore_existing_lfts = FALSE;

		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;

		osm_congestion_control_setup(sm->p_subn->p_osm);

		if (osm_congestion_control_wait_pending_transactions (sm->p_subn->p_osm))
			return;

		if (!sm->p_subn->subnet_initialization_error) {
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"REROUTE COMPLETE");
			osm_opensm_report_event(sm->p_subn->p_osm,
						OSM_EVENT_ID_UCAST_ROUTING_DONE,
						NULL);
			return;
		}
	}

	osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_HEAVY_SWEEP_START, NULL);

	/* go to heavy sweep */
repeat_discovery:

	/* First of all - unset all flags */
	sm->p_subn->force_heavy_sweep = FALSE;
	sm->p_subn->force_reroute = FALSE;
	sm->p_subn->subnet_initialization_error = FALSE;

	/* Reset tracking values in case limiting component got removed
	 * from fabric. */
	sm->p_subn->min_ca_mtu = IB_MAX_MTU;
	sm->p_subn->min_ca_rate = IB_MAX_RATE;
	sm->p_subn->min_data_vls = IB_MAX_NUM_VLS - 1;

	/* rescan configuration updates */
	if (!config_parsed && osm_subn_rescan_conf_files(sm->p_subn) < 0)
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
			"osm_subn_rescan_conf_file failed\n");

	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
		sm->p_subn->need_update = 1;

	status = state_mgr_sweep_hop_0(sm);
	if (status != IB_SUCCESS ||
	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	if (state_mgr_is_sm_port_down(sm) == TRUE) {
		if (sm->p_subn->last_sm_port_state) {
			sm->p_subn->last_sm_port_state = 0;
			osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, "SM port is down\n");
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"SM PORT DOWN");
		}

		/* Run the drop manager - we want to clear all records */
		osm_drop_mgr_process(sm);

		/* Move to DISCOVERING state */
		if (sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
			osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER);
		osm_opensm_report_event(sm->p_subn->p_osm,
					OSM_EVENT_ID_STATE_CHANGE, NULL);
		return;
	} else {
		if (!sm->p_subn->last_sm_port_state) {
			sm->p_subn->last_sm_port_state = 1;
			osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, "SM port is up\n");
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"SM PORT UP");
		}
	}

	status = state_mgr_sweep_hop_1(sm);
	if (status != IB_SUCCESS ||
	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* discovery completed - check other sm presence */
	if (sm->master_sm_found) {
		/*
		 * Call the sm_state_mgr with signal
		 * MASTER_OR_HIGHER_SM_DETECTED_DONE
		 */
		osm_sm_state_mgr_process(sm,
					 OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED);
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
				"ENTERING STANDBY STATE");
		/* notify master SM about us */
		osm_send_trap144(sm, 0);
		osm_opensm_report_event(sm->p_subn->p_osm,
					OSM_EVENT_ID_STATE_CHANGE, NULL);
		return;
	}

	/* if new sweep requested - don't bother with the rest */
	if (sm->p_subn->force_heavy_sweep) {
		config_parsed = 0;
		goto repeat_discovery;
	}

	osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_HEAVY_SWEEP_DONE, NULL);

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "HEAVY SWEEP COMPLETE");

	osm_drop_mgr_process(sm);

	/* If we are MASTER - get the highest remote_sm, and
	 * see if it is higher than our local sm.
	 */
	if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) {
		p_remote_sm = state_mgr_get_highest_sm(sm);
		if (p_remote_sm != NULL) {
			/* report new ports (trap 64) before leaving MASTER */
			state_mgr_report_new_ports(sm);

			/* need to handover the mastership
			 * to the remote sm, and move to standby */
			state_mgr_send_handover(sm, p_remote_sm);
			osm_sm_state_mgr_process(sm,
						 OSM_SM_SIGNAL_HANDOVER_SENT);
			return;
		} else {
			/* We are the highest sm - check to see if there is
			 * a remote SM that is in master state. */
			p_remote_sm = state_mgr_exists_other_master_sm(sm);
			if (p_remote_sm != NULL) {
				/* There is a remote SM that is master.
				 * need to wait for that SM to relinquish control
				 * of its portion of the subnet. C14-60.2.1.
				 * Also - need to start polling on that SM. */
				sm->p_polling_sm = p_remote_sm;
				osm_sm_state_mgr_process(sm,
							 OSM_SM_SIGNAL_WAIT_FOR_HANDOVER);
				return;
			}
		}
	}

	/*
	 * If we are not MASTER already - this means that we are
	 * in discovery state. call osm_sm_state_mgr with signal
	 * DISCOVERY_COMPLETED
	 */
	if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
		osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED);

	osm_pkey_mgr_process(sm->p_subn->p_osm);

	/* try to restore SA DB (this should be before lid_mgr
	   because we may want to disable clients reregistration
	   when SA DB is restored) */
	osm_sa_db_file_load(sm->p_subn->p_osm);

	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"PKEY setup completed - STARTING SM LID CONFIG");

	osm_lid_mgr_process_sm(&sm->lid_mgr);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG");
	state_mgr_notify_lid_change(sm);

	osm_lid_mgr_process_subnet(&sm->lid_mgr);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* At this point we need to check the consistency of
	 * the port_lid_tbl under the subnet. There might be
	 * errors in it if PortInfo Set requests didn't reach
	 * their destination. */
	state_mgr_check_tbl_consistency(sm);

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG");

	/*
	 * Proceed with unicast forwarding table configuration; if it fails
	 * return early to wait for a trap or the next sweep interval.
	 */

	if (!sm->ucast_mgr.cache_valid ||
	    osm_ucast_cache_process(&sm->ucast_mgr))
		if (osm_ucast_mgr_process(&sm->ucast_mgr))
			return;

	osm_qos_setup(sm->p_subn->p_osm);

	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* cleanup switch lft buffers */
	cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, cleanup_switch, sm->p_log);

	/* We are done setting all LFTs so clear the ignore existing.
	 * From now on, as long as we are still master, we want to
	 * take into account these lfts. */
	sm->p_subn->ignore_existing_lfts = FALSE;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"SWITCHES CONFIGURED FOR UNICAST");
	osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_UCAST_ROUTING_DONE, NULL);

	if (!sm->p_subn->opt.disable_multicast) {
		osm_mcast_mgr_process(sm, TRUE);
		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
				"SWITCHES CONFIGURED FOR MULTICAST");
	}

	osm_guid_mgr_process(sm);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;
	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "ALIAS GUIDS CONFIGURED");

	/*
	 * The LINK_PORTS state is required since we cannot count on
	 * the port state change MADs to succeed. This is an artifact
	 * of the spec defining state change from state X to state X
	 * as an error. The hardware then is not required to process
	 * other parameters provided by the Set(PortInfo) Packet.
	 */

	osm_link_mgr_process(sm, IB_LINK_NO_CHANGE);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE");

	osm_link_mgr_process(sm, IB_LINK_ARMED);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LINKS ARMED - SET LINKS TO ACTIVE STATE");

	osm_link_mgr_process(sm, IB_LINK_ACTIVE);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/*
	 * The sweep completed!
	 */

	/* Now do GSI configuration */

	osm_congestion_control_setup(sm->p_subn->p_osm);

	if (osm_congestion_control_wait_pending_transactions (sm->p_subn->p_osm))
		return;

	/*
	 * Send trap 64 on newly discovered endports
	 */
	state_mgr_report_new_ports(sm);

	/* in any case we zero this flag */
	sm->p_subn->coming_out_of_standby = FALSE;

	/* If there were errors - then the subnet is not really up */
	if (sm->p_subn->subnet_initialization_error == TRUE) {
		osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID,
			   "Errors during initialization\n");
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_ERROR,
				"ERRORS DURING INITIALIZATION");
	} else {
		sm->p_subn->need_update = 0;
		osm_dump_all(sm->p_subn->p_osm);
		state_mgr_up_msg(sm);
		sm->p_subn->first_time_master_sweep = FALSE;
		sm->p_subn->set_client_rereg_on_sweep = FALSE;

		if (OSM_LOG_IS_ACTIVE_V2(sm->p_log, OSM_LOG_VERBOSE) ||
		    sm->p_subn->opt.sa_db_dump)
			osm_sa_db_file_dump(sm->p_subn->p_osm);
	}

	/*
	 * Finally signal the subnet up event
	 */
	cl_event_signal(&sm->subnet_up_event);

	osm_opensm_report_event(sm->p_subn->p_osm, OSM_EVENT_ID_SUBNET_UP,
				NULL);

	/* if we got a signal to force heavy sweep or errors
	 * in the middle of the sweep - try another sweep. */
	if (sm->p_subn->force_heavy_sweep
	    || sm->p_subn->subnet_initialization_error)
		osm_sm_signal(sm, OSM_SIGNAL_SWEEP);

	/* Write a new copy of our persistent guid2mkey database */
	osm_db_store(sm->p_subn->p_g2m);
	osm_db_store(sm->p_subn->p_neighbor);
}