static void cache_remove_port(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho,
			      uint8_t port_num, uint16_t remote_lid_ho,
			      boolean_t is_ca)
{
	cache_switch_t *p_cache_sw;

	OSM_LOG_ENTER(p_mgr->p_log);

	if (!p_mgr->cache_valid)
		goto Exit;

	p_cache_sw = cache_get_sw(p_mgr, lid_ho);
	if (!p_cache_sw) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Found uncached switch/link (lid %u, port %u)\n",
			lid_ho, port_num);
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	if (port_num >= p_cache_sw->num_ports ||
	    !p_cache_sw->ports[port_num].remote_lid_ho) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Found uncached switch link (lid %u, port %u)\n",
			lid_ho, port_num);
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	if (p_cache_sw->ports[port_num].remote_lid_ho != remote_lid_ho) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Remote lid change on switch lid %u, port %u "
			"(was %u, now %u)\n", lid_ho, port_num,
			p_cache_sw->ports[port_num].remote_lid_ho,
			remote_lid_ho);
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	if ((p_cache_sw->ports[port_num].is_leaf && !is_ca) ||
	    (!p_cache_sw->ports[port_num].is_leaf && is_ca)) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Remote node type change on switch lid %u, port %u\n",
			lid_ho, port_num);
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
		"New link from lid %u, port %u to lid %u - "
		"found in cache\n", lid_ho, port_num, remote_lid_ho);

	/* the new link was cached - clean it from the cache */

	p_cache_sw->ports[port_num].remote_lid_ho = 0;
	p_cache_sw->ports[port_num].is_leaf = FALSE;
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* cache_remove_port() */
static void
cache_check_link_change(osm_ucast_mgr_t * p_mgr,
			osm_physp_t * p_physp_1, osm_physp_t * p_physp_2)
{
	OSM_LOG_ENTER(p_mgr->p_log);
	CL_ASSERT(p_physp_1 && p_physp_2);

	if (!p_mgr->cache_valid)
		goto Exit;

	if (!p_physp_1->p_remote_physp && !p_physp_2->p_remote_physp)
		/* both ports were down - new link */
		goto Exit;

	/* unicast cache cannot tolerate any link location change */

	if ((p_physp_1->p_remote_physp &&
	     p_physp_1->p_remote_physp->p_remote_physp) ||
	    (p_physp_2->p_remote_physp &&
	     p_physp_2->p_remote_physp->p_remote_physp)) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Link location change discovered\n");
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}
static void cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p,
			      uint16_t remote_lid_ho, boolean_t is_ca)
{
	cache_switch_t *p_cache_sw;
	uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0));

	OSM_LOG_ENTER(p_mgr->p_log);

	if (!lid_ho || !remote_lid_ho || !p->port_num)
		goto Exit;

	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
		"Caching switch port: lid %u [port %u] -> lid %u (%s)\n",
		lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW");

	p_cache_sw = cache_get_sw(p_mgr, lid_ho);
	if (!p_cache_sw) {
		p_cache_sw = cache_sw_new(lid_ho, p->p_node->sw->num_ports);
		if (!p_cache_sw) {
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
				"ERR AD01: Out of memory - cache is invalid\n");
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}
		cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho,
			       &p_cache_sw->map_item);
	}

	if (p->port_num >= p_cache_sw->num_ports) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
			"ERR AD02: Wrong switch? - cache is invalid\n");
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	if (is_ca)
		cache_sw_set_leaf(p_cache_sw);

	if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) {
		/* cache this link only if it hasn't been already cached */
		p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho;
		p_cache_sw->ports[p->port_num].is_leaf = is_ca;
	}
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}
예제 #4
0
void osm_ucast_mgr_destroy(IN osm_ucast_mgr_t * p_mgr)
{
	CL_ASSERT(p_mgr);

	OSM_LOG_ENTER(p_mgr->p_log);

	if (p_mgr->cache_valid)
		osm_ucast_cache_invalidate(p_mgr);

	OSM_LOG_EXIT(p_mgr->p_log);
}
예제 #5
0
static void do_sweep(osm_sm_t * sm)
{
	ib_api_status_t status;
	osm_remote_sm_t *p_remote_sm;
	unsigned config_parsed = 0;

	if (sm->p_subn->force_heavy_sweep) {
		if (osm_subn_rescan_conf_files(sm->p_subn) < 0)
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
				"osm_subn_rescan_conf_file failed\n");
		else
			config_parsed = 1;
	}

	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER &&
	    sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
		return;

	if (sm->p_subn->coming_out_of_standby)
		/*
		 * Need to force re-write of sm_base_lid to all ports
		 * to do that we want all the ports to be considered
		 * foreign
		 */
		state_mgr_clean_known_lids(sm);

	sm->master_sm_found = 0;

	/*
	 * If we already have switches, then try a light sweep.
	 * Otherwise, this is probably our first discovery pass
	 * or we are connected in loopback. In both cases do a
	 * heavy sweep.
	 * Note: If we are connected in loopback we want a heavy
	 * sweep, since we will not be getting any traps if there is
	 * a lost connection.
	 */
	/*  if we are in DISCOVERING state - this means it is either in
	 *  initializing or wake up from STANDBY - run the heavy sweep */
	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
	    && sm->p_subn->opt.force_heavy_sweep == FALSE
	    && sm->p_subn->force_heavy_sweep == FALSE
	    && sm->p_subn->force_reroute == FALSE
	    && sm->p_subn->subnet_initialization_error == FALSE
	    && (state_mgr_light_sweep_start(sm) == IB_SUCCESS)) {
		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;
		if (!sm->p_subn->force_heavy_sweep) {
			if (sm->p_subn->opt.sa_db_dump &&
			    !osm_sa_db_file_dump(sm->p_subn->p_osm))
				osm_opensm_report_event(sm->p_subn->p_osm,
					OSM_EVENT_ID_SA_DB_DUMPED, NULL);
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"LIGHT SWEEP COMPLETE");
			return;
		}
	}

	/*
	 * Unicast cache should be invalidated if there were errors
	 * during initialization or if subnet re-route is requested.
	 */
	if (sm->p_subn->opt.use_ucast_cache &&
	    (sm->p_subn->subnet_initialization_error ||
	     sm->p_subn->force_reroute || sm->p_subn->coming_out_of_standby))
		osm_ucast_cache_invalidate(&sm->ucast_mgr);

	/*
	 * If we don't need to do a heavy sweep and we want to do a reroute,
	 * just reroute only.
	 */
	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
	    && sm->p_subn->opt.force_heavy_sweep == FALSE
	    && sm->p_subn->force_heavy_sweep == FALSE
	    && sm->p_subn->force_reroute == TRUE
	    && sm->p_subn->subnet_initialization_error == FALSE) {
		/* Reset flag */
		sm->p_subn->force_reroute = FALSE;

		/* Re-program the switches fully */
		sm->p_subn->ignore_existing_lfts = TRUE;

		if (osm_ucast_mgr_process(&sm->ucast_mgr)) {
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"REROUTE FAILED");
			return;
		}
		osm_qos_setup(sm->p_subn->p_osm);

		/* Reset flag */
		sm->p_subn->ignore_existing_lfts = FALSE;

		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;

		if (!sm->p_subn->subnet_initialization_error) {
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"REROUTE COMPLETE");
			osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_UCAST_ROUTING_DONE, NULL);
			return;
		}
	}

	osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_HEAVY_SWEEP_START, NULL);

	/* go to heavy sweep */
repeat_discovery:

	/* First of all - unset all flags */
	sm->p_subn->force_heavy_sweep = FALSE;
	sm->p_subn->force_reroute = FALSE;
	sm->p_subn->subnet_initialization_error = FALSE;

	/* Reset tracking values in case limiting component got removed
	 * from fabric. */
	sm->p_subn->min_ca_mtu = IB_MAX_MTU;
	sm->p_subn->min_ca_rate = IB_MAX_RATE;
	sm->p_subn->min_data_vls = IB_MAX_NUM_VLS - 1;

	/* rescan configuration updates */
	if (!config_parsed && osm_subn_rescan_conf_files(sm->p_subn) < 0)
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
			"osm_subn_rescan_conf_file failed\n");

	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
		sm->p_subn->need_update = 1;

	status = state_mgr_sweep_hop_0(sm);
	if (status != IB_SUCCESS ||
	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	if (state_mgr_is_sm_port_down(sm) == TRUE) {
		if (sm->p_subn->last_sm_port_state) {
			sm->p_subn->last_sm_port_state = 0;
			osm_log(sm->p_log, OSM_LOG_SYS, "SM port is down\n");
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"SM PORT DOWN");
		}

		/* Run the drop manager - we want to clear all records */
		osm_drop_mgr_process(sm);

		/* Move to DISCOVERING state */
		 if (sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
			osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER);
		osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_STATE_CHANGE, NULL);
		return;
	} else {
		if (!sm->p_subn->last_sm_port_state) {
			sm->p_subn->last_sm_port_state = 1;
			osm_log(sm->p_log, OSM_LOG_SYS, "SM port is up\n");
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"SM PORT UP");
		}
	}

	status = state_mgr_sweep_hop_1(sm);
	if (status != IB_SUCCESS ||
	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* discovery completed - check other sm presence */
	if (sm->master_sm_found) {
		/*
		 * Call the sm_state_mgr with signal
		 * MASTER_OR_HIGHER_SM_DETECTED_DONE
		 */
		osm_sm_state_mgr_process(sm,
					 OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED);
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
				"ENTERING STANDBY STATE");
		/* notify master SM about us */
		osm_send_trap144(sm, 0);
		osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_STATE_CHANGE, NULL);
		return;
	}

	/* if new sweep requested - don't bother with the rest */
	if (sm->p_subn->force_heavy_sweep)
		goto repeat_discovery;

	osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_HEAVY_SWEEP_DONE, NULL);

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "HEAVY SWEEP COMPLETE");

	/* If we are MASTER - get the highest remote_sm, and
	 * see if it is higher than our local sm.
	 */
	if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) {
		p_remote_sm = state_mgr_get_highest_sm(sm);
		if (p_remote_sm != NULL) {
			/* report new ports (trap 64) before leaving MASTER */
			state_mgr_report_new_ports(sm);

			/* need to handover the mastership
			 * to the remote sm, and move to standby */
			state_mgr_send_handover(sm, p_remote_sm);
			osm_sm_state_mgr_process(sm,
						 OSM_SM_SIGNAL_HANDOVER_SENT);
			return;
		} else {
			/* We are the highest sm - check to see if there is
			 * a remote SM that is in master state. */
			p_remote_sm = state_mgr_exists_other_master_sm(sm);
			if (p_remote_sm != NULL) {
				/* There is a remote SM that is master.
				 * need to wait for that SM to relinquish control
				 * of its portion of the subnet. C14-60.2.1.
				 * Also - need to start polling on that SM. */
				sm->p_polling_sm = p_remote_sm;
				osm_sm_state_mgr_process(sm,
							 OSM_SM_SIGNAL_WAIT_FOR_HANDOVER);
				return;
			}
		}
	}

	/* Need to continue with lid assignment */
	osm_drop_mgr_process(sm);

	/*
	 * If we are not MASTER already - this means that we are
	 * in discovery state. call osm_sm_state_mgr with signal
	 * DISCOVERY_COMPLETED
	 */
	if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
		osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED);

	osm_pkey_mgr_process(sm->p_subn->p_osm);

	/* try to restore SA DB (this should be before lid_mgr
	   because we may want to disable clients reregistration
	   when SA DB is restored) */
	osm_sa_db_file_load(sm->p_subn->p_osm);

	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"PKEY and QOS setup completed - STARTING SM LID CONFIG");

	osm_lid_mgr_process_sm(&sm->lid_mgr);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG");
	state_mgr_notify_lid_change(sm);

	osm_lid_mgr_process_subnet(&sm->lid_mgr);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* At this point we need to check the consistency of
	 * the port_lid_tbl under the subnet. There might be
	 * errors in it if PortInfo Set requests didn't reach
	 * their destination. */
	state_mgr_check_tbl_consistency(sm);

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG");

	/*
	 * Proceed with unicast forwarding table configuration; if it fails
	 * return early to wait for a trap or the next sweep interval.
	 */

	if (!sm->ucast_mgr.cache_valid ||
	    osm_ucast_cache_process(&sm->ucast_mgr))
		if (osm_ucast_mgr_process(&sm->ucast_mgr))
			return;

	osm_qos_setup(sm->p_subn->p_osm);

	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* cleanup switch lft buffers */
	cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, cleanup_switch, sm->p_log);

	/* We are done setting all LFTs so clear the ignore existing.
	 * From now on, as long as we are still master, we want to
	 * take into account these lfts. */
	sm->p_subn->ignore_existing_lfts = FALSE;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"SWITCHES CONFIGURED FOR UNICAST");
	osm_opensm_report_event(sm->p_subn->p_osm,
			OSM_EVENT_ID_UCAST_ROUTING_DONE, NULL);

	if (!sm->p_subn->opt.disable_multicast) {
		osm_mcast_mgr_process(sm, TRUE);
		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
				"SWITCHES CONFIGURED FOR MULTICAST");
	}

	/*
	 * The LINK_PORTS state is required since we cannot count on
	 * the port state change MADs to succeed. This is an artifact
	 * of the spec defining state change from state X to state X
	 * as an error. The hardware then is not required to process
	 * other parameters provided by the Set(PortInfo) Packet.
	 */

	osm_link_mgr_process(sm, IB_LINK_NO_CHANGE);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE");

	osm_link_mgr_process(sm, IB_LINK_ARMED);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LINKS ARMED - SET LINKS TO ACTIVE STATE");

	osm_link_mgr_process(sm, IB_LINK_ACTIVE);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/*
	 * The sweep completed!
	 */

	/*
	 * Send trap 64 on newly discovered endports
	 */
	state_mgr_report_new_ports(sm);

	/* in any case we zero this flag */
	sm->p_subn->coming_out_of_standby = FALSE;

	/* If there were errors - then the subnet is not really up */
	if (sm->p_subn->subnet_initialization_error == TRUE) {
		osm_log(sm->p_log, OSM_LOG_SYS,
			"Errors during initialization\n");
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_ERROR,
				"ERRORS DURING INITIALIZATION");
	} else {
		sm->p_subn->need_update = 0;
		osm_dump_all(sm->p_subn->p_osm);
		state_mgr_up_msg(sm);
		sm->p_subn->first_time_master_sweep = FALSE;

		if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE) ||
		    sm->p_subn->opt.sa_db_dump)
			osm_sa_db_file_dump(sm->p_subn->p_osm);
	}

	/*
	 * Finally signal the subnet up event
	 */
	cl_event_signal(&sm->subnet_up_event);

	osm_opensm_report_event(sm->p_subn->p_osm, OSM_EVENT_ID_SUBNET_UP,
				NULL);

	/* if we got a signal to force heavy sweep or errors
	 * in the middle of the sweep - try another sweep. */
	if (sm->p_subn->force_heavy_sweep
	    || sm->p_subn->subnet_initialization_error)
		osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
}
void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node)
{
	uint16_t lid_ho;
	uint8_t max_ports;
	uint8_t port_num;
	osm_physp_t *p_physp;
	cache_switch_t *p_cache_sw;

	OSM_LOG_ENTER(p_mgr->p_log);

	if (!p_mgr->cache_valid)
		goto Exit;

	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {

		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));

		if (!lid_ho) {
			OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
				"Skip caching. Switch dropped before "
				"it gets a valid lid.\n");
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Caching dropped switch lid %u\n", lid_ho);

		if (!p_node->sw) {
			/* something is wrong - forget about cache */
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
				"ERR AD03: no switch info for node lid %u - "
				"clearing cache\n", lid_ho);
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		/* unlink (add to cache) all the ports of this switch */
		max_ports = osm_node_get_num_physp(p_node);
		for (port_num = 1; port_num < max_ports; port_num++) {

			p_physp = osm_node_get_physp_ptr(p_node, port_num);
			if (!p_physp || !p_physp->p_remote_physp)
				continue;

			osm_ucast_cache_add_link(p_mgr, p_physp,
						 p_physp->p_remote_physp);
		}

		/*
		 * All the ports have been dropped (cached).
		 * If one of the ports was connected to CA/RTR,
		 * then the cached switch would be marked as leaf.
		 * If it isn't, then the dropped switch isn't a leaf,
		 * and cache can't handle it.
		 */

		p_cache_sw = cache_get_sw(p_mgr, lid_ho);

		/* p_cache_sw could be NULL if it has no remote phys ports */
		if (!p_cache_sw || !cache_sw_is_leaf(p_cache_sw)) {
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Dropped non-leaf switch (lid %u)\n", lid_ho);
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		p_cache_sw->dropped = TRUE;

		if (!p_node->sw->num_hops || !p_node->sw->hops) {
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"No LID matrices for switch lid %u\n", lid_ho);
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		/* lid matrices */

		p_cache_sw->num_hops = p_node->sw->num_hops;
		p_node->sw->num_hops = 0;
		p_cache_sw->hops = p_node->sw->hops;
		p_node->sw->hops = NULL;

		/* linear forwarding table */

		if (p_node->sw->new_lft) {
			/* LFT buffer exists - we use it, because
			   it is more updated than the switch's LFT */
			p_cache_sw->lft = p_node->sw->new_lft;
			p_node->sw->new_lft = NULL;
		} else {
			/* no LFT buffer, so we use the switch's LFT */
			p_cache_sw->lft = p_node->sw->lft;
			p_node->sw->lft = NULL;
			p_node->sw->lft_size = 0;
		}
		p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho;
	} else {
		/* dropping CA/RTR: add to cache all the ports of this node */
		max_ports = osm_node_get_num_physp(p_node);
		for (port_num = 1; port_num < max_ports; port_num++) {

			p_physp = osm_node_get_physp_ptr(p_node, port_num);
			if (!p_physp || !p_physp->p_remote_physp)
				continue;

			CL_ASSERT(osm_node_get_type
				  (p_physp->p_remote_physp->p_node) ==
				  IB_NODE_TYPE_SWITCH);

			osm_ucast_cache_add_link(p_mgr,
						 p_physp->p_remote_physp,
						 p_physp);
		}
	}
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_add_node() */
void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,
			      osm_physp_t * p_physp1, osm_physp_t * p_physp2)
{
	osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node;
	uint16_t lid_ho_1, lid_ho_2;

	OSM_LOG_ENTER(p_mgr->p_log);

	if (!p_mgr->cache_valid)
		goto Exit;

	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Dropping CA-2-CA link\n");
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH &&
	     !osm_node_get_physp_ptr(p_node_1, 0)) ||
	    (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH &&
	     !osm_node_get_physp_ptr(p_node_2, 0))) {
		/* we're caching a link when one of the nodes
		   has already been dropped and cached */
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Port %u <-> port %u: port0 on one of the nodes "
			"has already been dropped and cached\n",
			p_physp1->port_num, p_physp2->port_num);
		goto Exit;
	}

	/* One of the nodes is switch. Just for code
	   simplicity, make sure that it's the first node. */

	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
		osm_physp_t *tmp = p_physp1;
		p_physp1 = p_physp2;
		p_physp2 = tmp;
		p_node_1 = p_physp1->p_node;
		p_node_2 = p_physp2->p_node;
	}

	if (!p_node_1->sw) {
		/* something is wrong - we'd better not use cache */
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));

	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) {

		if (!p_node_2->sw) {
			/* something is wrong - we'd better not use cache */
			osm_ucast_cache_invalidate(p_mgr);
			goto Exit;
		}

		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));

		/* lost switch-2-switch link - cache both sides */
		cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE);
		cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE);
	} else {
		lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2));

		/* lost link to CA/RTR - cache only switch side */
		cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE);
	}

Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_add_link() */
void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,
				    osm_node_t * p_node_1, uint8_t port_num_1,
				    osm_node_t * p_node_2, uint8_t port_num_2)
{
	uint16_t lid_ho_1;
	uint16_t lid_ho_2;

	OSM_LOG_ENTER(p_mgr->p_log);

	if (!p_mgr->cache_valid)
		goto Exit;

	cache_check_link_change(p_mgr,
				osm_node_get_physp_ptr(p_node_1, port_num_1),
				osm_node_get_physp_ptr(p_node_2, port_num_2));

	if (!p_mgr->cache_valid)
		goto Exit;

	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Found CA-2-CA link\n");
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	/* for code simplicity, we want the first node to be switch */
	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
		osm_node_t *tmp_node = p_node_1;
		uint8_t tmp_port_num = port_num_1;
		p_node_1 = p_node_2;
		port_num_1 = port_num_2;
		p_node_2 = tmp_node;
		port_num_2 = tmp_port_num;
	}

	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));

	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
	else
		lid_ho_2 =
		    cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2));

	if (!lid_ho_1 || !lid_ho_2) {
		/*
		 * No lid assigned, which means that one of the nodes is new.
		 * Need to wait for lid manager to process this node.
		 * The switches and their links will be checked later when
		 * the whole cache validity will be verified.
		 */
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
			"Link port %u <-> %u reveals new node - cache will "
			"be validated later\n", port_num_1, port_num_2);
		goto Exit;
	}

	cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2,
			  (osm_node_get_type(p_node_2) !=
			  IB_NODE_TYPE_SWITCH));

	/* if node_2 is a switch, the link should be cleaned from its cache */

	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
		cache_remove_port(p_mgr, lid_ho_2,
				  port_num_2, lid_ho_1, FALSE);

Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_check_new_link() */
static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr)
{
	cache_switch_t *p_cache_sw;
	cache_switch_t *p_remote_cache_sw;
	unsigned port_num;
	unsigned max_ports;
	uint8_t remote_node_type;
	uint16_t lid_ho;
	uint16_t remote_lid_ho;
	osm_switch_t *p_sw;
	osm_switch_t *p_remote_sw;
	osm_node_t *p_node;
	osm_physp_t *p_physp;
	osm_physp_t *p_remote_physp;
	osm_port_t *p_remote_port;
	cl_qmap_t *p_sw_tbl;

	OSM_LOG_ENTER(p_mgr->p_log);
	if (!p_mgr->cache_valid)
		goto Exit;

	/* If there are no switches in the subnet, we are done */
	p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
	if (cl_qmap_count(p_sw_tbl) == 0) {
		osm_ucast_cache_invalidate(p_mgr);
		goto Exit;
	}

	/*
	 * Scan all the physical switch ports in the subnet.
	 * If the port need_update flag is on, check whether
	 * it's just some node/port reset or a cached topology
	 * change. Otherwise the cache is invalid.
	 */
	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {

		p_node = p_sw->p_node;

		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
		p_cache_sw = cache_get_sw(p_mgr, lid_ho);

		max_ports = osm_node_get_num_physp(p_node);

		/* skip port 0 */
		for (port_num = 1; port_num < max_ports; port_num++) {

			p_physp = osm_node_get_physp_ptr(p_node, port_num);

			if (!p_physp || !p_physp->p_remote_physp ||
			    !osm_physp_link_exists(p_physp,
						   p_physp->p_remote_physp))
				/* no valid link */
				continue;

			/*
			 * While scanning all the physical ports in the subnet,
			 * mark corresponding leaf switches in the cache.
			 */
			if (p_cache_sw &&
			    !p_cache_sw->dropped &&
			    !cache_sw_is_leaf(p_cache_sw) &&
			    p_physp->p_remote_physp->p_node &&
			    osm_node_get_type(p_physp->p_remote_physp->
					      p_node) != IB_NODE_TYPE_SWITCH)
				cache_sw_set_leaf(p_cache_sw);

			if (!p_physp->need_update)
				continue;

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Checking switch lid %u, port %u\n",
				lid_ho, port_num);

			p_remote_physp = osm_physp_get_remote(p_physp);
			remote_node_type =
			    osm_node_get_type(p_remote_physp->p_node);

			if (remote_node_type == IB_NODE_TYPE_SWITCH)
				remote_lid_ho =
				    cl_ntoh16(osm_node_get_base_lid
					      (p_remote_physp->p_node, 0));
			else
				remote_lid_ho =
				    cl_ntoh16(osm_node_get_base_lid
					      (p_remote_physp->p_node,
					       osm_physp_get_port_num
					       (p_remote_physp)));

			if (!p_cache_sw ||
			    port_num >= p_cache_sw->num_ports ||
			    !p_cache_sw->ports[port_num].remote_lid_ho) {
				/*
				 * There is some uncached change on the port.
				 * In general, the reasons might be as follows:
				 *  - switch reset
				 *  - port reset (or port down/up)
				 *  - quick connection location change
				 *  - new link (or new switch)
				 *
				 * First two reasons allow cache usage, while
				 * the last two reasons should invalidate cache.
				 *
				 * In case of quick connection location change,
				 * cache would have been invalidated by
				 * osm_ucast_cache_check_new_link() function.
				 *
				 * In case of new link between two known nodes,
				 * cache also would have been invalidated by
				 * osm_ucast_cache_check_new_link() function.
				 *
				 * Another reason is cached link between two
				 * known switches went back. In this case the
				 * osm_ucast_cache_check_new_link() function would
				 * clear both sides of the link from the cache
				 * during the discovery process, so effectively
				 * this would be equivalent to port reset.
				 *
				 * So three possible reasons remain:
				 *  - switch reset
				 *  - port reset (or port down/up)
				 *  - link of a new switch
				 *
				 * To validate cache, we need to check only the
				 * third reason - link of a new node/switch:
				 *  - If this is the local switch that is new,
				 *    then it should have (p_sw->need_update == 2).
				 *  - If the remote node is switch and it's new,
				 *    then it also should have
				 *    (p_sw->need_update == 2).
				 *  - If the remote node is CA/RTR and it's new,
				 *    then its port should have is_new flag on.
				 */
				if (p_sw->need_update == 2) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"New switch found (lid %u)\n",
						lid_ho);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				if (remote_node_type == IB_NODE_TYPE_SWITCH) {

					p_remote_sw =
					    p_remote_physp->p_node->sw;
					if (p_remote_sw->need_update == 2) {
						/* this could also be case of
						   switch coming back with an
						   additional link that it
						   didn't have before */
						OSM_LOG(p_mgr->p_log,
							OSM_LOG_DEBUG,
							"New switch/link found (lid %u)\n",
							remote_lid_ho);
						osm_ucast_cache_invalidate
						    (p_mgr);
						goto Exit;
					}
				} else {
					/*
					 * Remote node is CA/RTR.
					 * Get p_port of the remote node and
					 * check its p_port->is_new flag.
					 */
					p_remote_port =
					    osm_get_port_by_guid(p_mgr->p_subn,
								 osm_physp_get_port_guid
								 (p_remote_physp));
					if (p_remote_port->is_new) {
						OSM_LOG(p_mgr->p_log,
							OSM_LOG_DEBUG,
							"New CA/RTR found (lid %u)\n",
							remote_lid_ho);
						osm_ucast_cache_invalidate
						    (p_mgr);
						goto Exit;
					}
				}
			} else {
				/*
				 * The change on the port is cached.
				 * In general, the reasons might be as follows:
				 *  - link between two known nodes went back
				 *  - one or more nodes went back, causing all
				 *    the links to reappear
				 *
				 * If it was link that went back, then this case
				 * would have been taken care of during the
				 * discovery by osm_ucast_cache_check_new_link(),
				 * so it's some node that went back.
				 */
				if ((p_cache_sw->ports[port_num].is_leaf &&
				     remote_node_type == IB_NODE_TYPE_SWITCH) ||
				    (!p_cache_sw->ports[port_num].is_leaf &&
				     remote_node_type != IB_NODE_TYPE_SWITCH)) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"Remote node type change on switch lid %u, port %u\n",
						lid_ho, port_num);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				if (p_cache_sw->ports[port_num].remote_lid_ho !=
				    remote_lid_ho) {
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
						"Remote lid change on switch lid %u, port %u"
						"(was %u, now %u)\n",
						lid_ho, port_num,
						p_cache_sw->ports[port_num].
						remote_lid_ho, remote_lid_ho);
					osm_ucast_cache_invalidate(p_mgr);
					goto Exit;
				}

				/*
				 * We don't care who is the node that has
				 * reappeared in the subnet (local or remote).
				 * What's important that the cached link matches
				 * the real fabrics link.
				 * Just clean it from cache.
				 */

				p_cache_sw->ports[port_num].remote_lid_ho = 0;
				p_cache_sw->ports[port_num].is_leaf = FALSE;
				if (p_cache_sw->dropped) {
					cache_restore_ucast_info(p_mgr,
								 p_cache_sw,
								 p_sw);
					p_cache_sw->dropped = FALSE;
				}

				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Restored link from cache: lid %u, port %u to lid %u\n",
					lid_ho, port_num, remote_lid_ho);
			}
		}
	}

	/* Remove all the cached switches that
	   have all their ports restored */
	cache_cleanup_switches(p_mgr);

	/*
	 * Done scanning all the physical switch ports in the subnet.
	 * Now we need to check the other side:
	 * Scan all the cached switches and their ports:
	 *  - If the cached switch is missing in the subnet
	 *    (dropped flag is on), check that it's a leaf switch.
	 *    If it's not a leaf, the cache is invalid, because
	 *    cache can tolerate only leaf switch removal.
	 *  - If the cached switch exists in fabric, check all
	 *    its cached ports. These cached ports represent
	 *    missing link in the fabric.
	 *    The missing links that can be tolerated are:
	 *      + link to missing CA/RTR
	 *      + link to missing leaf switch
	 */
	for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
	     p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
	     p_cache_sw =
	     (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) {

		if (p_cache_sw->dropped) {
			if (!cache_sw_is_leaf(p_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Missing non-leaf switch (lid %u)\n",
					cache_sw_get_base_lid_ho(p_cache_sw));
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Missing leaf switch (lid %u) - "
				"continuing validation\n",
				cache_sw_get_base_lid_ho(p_cache_sw));
			continue;
		}

		for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) {
			if (!p_cache_sw->ports[port_num].remote_lid_ho)
				continue;

			if (p_cache_sw->ports[port_num].is_leaf) {
				CL_ASSERT(cache_sw_is_leaf(p_cache_sw));
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to CA/RTR - "
					"continuing validation\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				continue;
			}

			p_remote_cache_sw = cache_get_sw(p_mgr,
							 p_cache_sw->
							 ports[port_num].
							 remote_lid_ho);

			if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to existing switch\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			if (!cache_sw_is_leaf(p_remote_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing link to non-leaf switch\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			/*
			 * At this point we know that the missing link is to
			 * a leaf switch. However, one case deserves a special
			 * treatment. If there was a link between two leaf
			 * switches, then missing leaf switch might break
			 * routing. It is possible that there are routes
			 * that use leaf switches to get from switch to switch
			 * and not just to get to the CAs behind the leaf switch.
			 */
			if (cache_sw_is_leaf(p_cache_sw) &&
			    cache_sw_is_leaf(p_remote_cache_sw)) {
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
					"Switch lid %u, port %u: missing leaf-2-leaf link\n",
					cache_sw_get_base_lid_ho(p_cache_sw),
					port_num);
				osm_ucast_cache_invalidate(p_mgr);
				goto Exit;
			}

			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
				"Switch lid %u, port %u: missing remote leaf switch - "
				"continuing validation\n",
				cache_sw_get_base_lid_ho(p_cache_sw),
				port_num);
		}
	}

	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n");
	ucast_cache_dump(p_mgr);
Exit:
	OSM_LOG_EXIT(p_mgr->p_log);
}				/* osm_ucast_cache_validate() */