/*
  Send Error Callback:

  Only report the error and get rid of the mad wrapper
*/
static void __osmv_sa_mad_err_cb(IN void *bind_context, IN osm_madw_t * p_madw)
{
	osmv_sa_bind_info_t *p_bind = (osmv_sa_bind_info_t *) bind_context;
	osmv_query_req_t *p_query_req_copy = NULL;
	osmv_query_res_t query_res;

	OSM_LOG_ENTER(p_bind->p_log);

	/* Obtain the sent context etc */
	p_query_req_copy =
	    (osmv_query_req_t *) (long *)(long)(p_madw->context.ni_context.
						node_guid);

	/* provide the context of the original request in the result */
	query_res.query_context = p_query_req_copy->query_context;

	query_res.p_result_madw = p_madw;

	query_res.status = IB_TIMEOUT;
	query_res.result_cnt = 0;

	query_res.query_type = p_query_req_copy->query_type;

	p_query_req_copy->pfn_query_cb(&query_res);

	if ((p_query_req_copy->flags & OSM_SA_FLAGS_SYNC) == OSM_SA_FLAGS_SYNC)
		cl_event_signal(&p_bind->sync_event);

	if (p_query_req_copy)
		free(p_query_req_copy);
	OSM_LOG_EXIT(p_bind->p_log);
}
Esempio n. 2
0
void osmv_txn_abort_rmpp_txns(osm_bind_handle_t h_bind)
{
	osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind;
	cl_map_item_t *p_item;
	cl_map_obj_t *p_obj;
	osmv_txn_ctx_t *p_txn;
	osmv_rmpp_send_ctx_t *p_send_ctx;
	cl_qmap_t *p_map = p_bo->txn_mgr.p_txn_map;

	OSM_LOG_ENTER(p_bo->p_vendor->p_log);

	while (FALSE == cl_is_qmap_empty(p_map)) {

		p_item = cl_qmap_head(p_map);
		p_obj = PARENT_STRUCT(p_item, cl_map_obj_t, item);
		p_txn = (osmv_txn_ctx_t *) cl_qmap_obj(p_obj);
		p_send_ctx = osmv_txn_get_rmpp_send_ctx(p_txn);

		if (NULL != p_send_ctx) {

			p_send_ctx->status = IB_INTERRUPTED;

			/* Wake up the sender thread to let it break out */
			cl_event_signal(&p_send_ctx->event);
		}

		cl_qmap_remove_item(p_map, p_item);
	}

	OSM_LOG_EXIT(p_bo->p_vendor->p_log);
}
Esempio n. 3
0
void osm_vl15_destroy(IN osm_vl15_t * p_vl, IN struct osm_mad_pool *p_pool)
{
	osm_madw_t *p_madw;

	OSM_LOG_ENTER(p_vl->p_log);

	/*
	   Signal our threads that we're leaving.
	 */
	p_vl->thread_state = OSM_THREAD_STATE_EXIT;

	/*
	   Don't trigger unless event has been initialized.
	   Destroy the thread before we tear down the other objects.
	 */
	if (p_vl->state != OSM_VL15_STATE_INIT)
		cl_event_signal(&p_vl->signal);

	cl_thread_destroy(&p_vl->poller);

	/*
	   Return the outstanding messages to the pool
	 */

	cl_spinlock_acquire(&p_vl->lock);

	while (!cl_is_qlist_empty(&p_vl->rfifo)) {
		p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->rfifo);
		osm_mad_pool_put(p_pool, p_madw);
	}
	while (!cl_is_qlist_empty(&p_vl->ufifo)) {
		p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->ufifo);
		osm_mad_pool_put(p_pool, p_madw);
	}

	cl_spinlock_release(&p_vl->lock);

	cl_event_destroy(&p_vl->signal);
	p_vl->state = OSM_VL15_STATE_INIT;
	cl_spinlock_destroy(&p_vl->lock);

	OSM_LOG_EXIT(p_vl->p_log);
}
Esempio n. 4
0
void osm_vl15_poll(IN osm_vl15_t * p_vl)
{
	OSM_LOG_ENTER(p_vl->p_log);

	CL_ASSERT(p_vl->state == OSM_VL15_STATE_READY);

	/*
	   If we have room for more VL15 MADs on the wire,
	   then signal the poller thread.

	   This is not an airtight check, since the poller thread
	   could be just about to send another MAD as we signal
	   the event here.  To cover this rare case, the poller
	   thread checks for a spurious wake-up.
	 */
	if (p_vl->p_stats->qp0_mads_outstanding_on_wire <
	    (int32_t) p_vl->max_wire_smps) {
		OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG,
			"Signalling poller thread\n");
		cl_event_signal(&p_vl->signal);
	}

	OSM_LOG_EXIT(p_vl->p_log);
}
/*
  Call back on new mad received:

  We basically only need to set the context of the query.
  Or report an error.

  A pointer to the actual context of the request (a copy of the oriignal
  request structure) is attached as the p_madw->context.ni_context.node_guid
*/
static void
__osmv_sa_mad_rcv_cb(IN osm_madw_t * p_madw,
		     IN void *bind_context, IN osm_madw_t * p_req_madw)
{
	osmv_sa_bind_info_t *p_bind = (osmv_sa_bind_info_t *) bind_context;
	osmv_query_req_t *p_query_req_copy = NULL;
	osmv_query_res_t query_res;
	ib_sa_mad_t *p_sa_mad;
	ib_net16_t mad_status;

	OSM_LOG_ENTER(p_bind->p_log);

	if (!p_req_madw) {
		OSM_LOG(p_bind->p_log, OSM_LOG_DEBUG,
			"Ignoring a non-response mad\n");
		osm_mad_pool_put(p_bind->p_mad_pool, p_madw);
		goto Exit;
	}

	/* obtain the sent context since we store it during send in the ni_ctx */
	p_query_req_copy =
	    (osmv_query_req_t *) (long *)(long)(p_req_madw->context.ni_context.
						node_guid);

	/* provide the context of the original request in the result */
	query_res.query_context = p_query_req_copy->query_context;

	/* provide the resulting madw */
	query_res.p_result_madw = p_madw;

	/* update the req fields */
	p_sa_mad = (ib_sa_mad_t *) p_madw->p_mad;

	/* if we got a remote error track it in the status */
	mad_status = (ib_net16_t) (p_sa_mad->status & IB_SMP_STATUS_MASK);
	if (mad_status != IB_SUCCESS) {
		OSM_LOG(p_bind->p_log, OSM_LOG_ERROR, "ERR 5501: "
			"Remote error:0x%04X\n", cl_ntoh16(mad_status));
		query_res.status = IB_REMOTE_ERROR;
	} else
		query_res.status = IB_SUCCESS;

	/* what if we have got back an empty mad ? */
	if (!p_madw->mad_size) {
		OSM_LOG(p_bind->p_log, OSM_LOG_ERROR, "ERR 5502: "
			"Got an empty mad\n");
		query_res.status = IB_ERROR;
	}

	if (IB_SUCCESS == mad_status) {

		/* if we are in not in a method response of an rmpp nature we must get only 1 */
		/* HACK: in the future we might need to be smarter for other methods... */
		if (p_sa_mad->method != IB_MAD_METHOD_GETTABLE_RESP) {
			query_res.result_cnt = 1;
		} else {
#ifndef VENDOR_RMPP_SUPPORT
			if (mad_status != IB_SUCCESS)
				query_res.result_cnt = 0;
			else
				query_res.result_cnt = 1;
#else
			if (ib_get_attr_size(p_sa_mad->attr_offset)) {
				/* we used the offset value to calculate the
				   number of records in here */
				query_res.result_cnt = (uintn_t)
				    ((p_madw->mad_size - IB_SA_MAD_HDR_SIZE) /
				     ib_get_attr_size(p_sa_mad->attr_offset));
				OSM_LOG(p_bind->p_log, OSM_LOG_DEBUG,
					"Count = %u = %zu / %u (%zu)\n",
					query_res.result_cnt,
					p_madw->mad_size - IB_SA_MAD_HDR_SIZE,
					ib_get_attr_size(p_sa_mad->attr_offset),
					(p_madw->mad_size -
					 IB_SA_MAD_HDR_SIZE) %
					ib_get_attr_size(p_sa_mad->attr_offset));
			} else
				query_res.result_cnt = 0;
#endif
		}
	}

	query_res.query_type = p_query_req_copy->query_type;

	p_query_req_copy->pfn_query_cb(&query_res);

	if ((p_query_req_copy->flags & OSM_SA_FLAGS_SYNC) == OSM_SA_FLAGS_SYNC)
		cl_event_signal(&p_bind->sync_event);

Exit:

	/* free the copied query request if found */
	if (p_query_req_copy)
		free(p_query_req_copy);

	/* put back the request madw */
	if (p_req_madw)
		osm_mad_pool_put(p_bind->p_mad_pool, p_req_madw);

	OSM_LOG_EXIT(p_bind->p_log);
}
Esempio n. 6
0
static uint64_t
__osmv_txn_timeout_cb(IN uint64_t key,
		      IN uint32_t num_regs, IN void *cb_context)
{
	osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) cb_context;
	uint64_t ret = 0;
	osmv_txn_ctx_t *p_txn;
	osmv_rmpp_send_ctx_t *p_send_ctx;
	osm_madw_t *p_madw = NULL;
	ib_mad_t *p_mad;
	osm_mad_addr_t *p_mad_addr;
	boolean_t invoke_err_cb = FALSE;

	OSM_LOG_ENTER(p_bo->p_vendor->p_log);

	/* Don't try to acquire a lock on the Bind Object -
	 * it's taken by the mechanism that drives the timeout based events!
	 * (Recall the special constructor that the Event Wheel is applied with)
	 */
	if (p_bo->is_closing) {
		goto txn_done;
	}

	ret = osmv_txn_lookup(p_bo, key, &p_txn);
	if (IB_NOT_FOUND == ret) {
		/* Prevent a race - the transaction is already destroyed */
		goto txn_done;
	}

	p_madw = p_txn->p_madw;

	switch (osmv_txn_get_rmpp_state(p_txn)) {

	case OSMV_TXN_RMPP_NONE:
		if (num_regs <= OSMV_MAX_RETRANSMIT) {
			/* We still did not exceed the limit of retransmissions.
			 * Set the next timeout's value.
			 */
			osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG,
				"__osmv_txn_timeout_cb: "
				"The transaction request (tid=0x%llX) timed out %d times. "
				"Retrying the send.\n",
				osmv_txn_get_tid(p_txn), num_regs);

			/* resend this mad */
			ret = osmv_simple_send_madw((osm_bind_handle_t *) p_bo,
						    p_madw, p_txn, TRUE);
			if (ret != IB_SUCCESS) {
				osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR,
					"__osmv_txn_timeout_cb: "
					"Fail to send retry for transaction request (tid=0x%llX).\n",
					osmv_txn_get_tid(p_txn));

				osmv_txn_done((osm_bind_handle_t) p_bo, key,
					      TRUE /*in timeout callback */ );

				/* This is a requester. Always apply the callback */
				invoke_err_cb = TRUE;
			} else {
				uint64_t next_timeout_ms;
				next_timeout_ms =
				    p_bo->p_vendor->resp_timeout * (num_regs +
								    1) *
				    (num_regs + 1);
				/* when do we need to timeout again */
				ret =
				    cl_get_time_stamp() +
				    (uint64_t) (1000 * next_timeout_ms);

				osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG,
					"__osmv_txn_timeout_cb: "
					"Retry request timout in : %lu [msec].\n",
					next_timeout_ms);
			}
		} else {
			osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR,
				"__osmv_txn_timeout_cb: ERR 6702: "
				"The transaction request (tid=0x%llX) timed out (after %d retries). "
				"Invoking the error callback.\n",
				osmv_txn_get_tid(p_txn), num_regs);

			osmv_txn_done((osm_bind_handle_t) p_bo, key,
				      TRUE /*in timeout callback */ );

			/* This is a requester. Always apply the callback */
			invoke_err_cb = TRUE;
		}
		break;

	case OSMV_TXN_RMPP_SENDER:
		osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG,
			"RMPP sender (tid=0x%llX) did not receive ACK "
			"on every segment in the current send window.\n",
			osmv_txn_get_tid(p_txn));

		p_send_ctx = osmv_txn_get_rmpp_send_ctx(p_txn);
		if (num_regs <= OSMV_MAX_RETRANSMIT) {
			/* We still did not exceed the limit of retransmissions.
			 * Set the next timeout's value.
			 */
			ret =
			    cl_get_time_stamp() +
			    1000 * p_bo->p_vendor->resp_timeout;
		} else {
			p_send_ctx->status = IB_TIMEOUT;

			p_mad = osm_madw_get_mad_ptr(p_madw);
			p_mad_addr = osm_madw_get_mad_addr_ptr(p_madw);

			/* Send an ABORT to the other side */
			osmv_rmpp_send_nak((osm_bind_handle_t) p_bo, p_mad,
					   p_mad_addr, IB_RMPP_TYPE_ABORT,
					   IB_RMPP_STATUS_T2L);
		}

		/* Wake the RMPP sender thread up */
		cl_event_signal(&p_send_ctx->event);
		break;

	case OSMV_TXN_RMPP_RECEIVER:
		osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG,
			"Transaction timeout on an RMPP receiver (tid=0x%llX). "
			"Dropping the transaction.\n", osmv_txn_get_tid(p_txn));

		osmv_txn_done((osm_bind_handle_t) p_bo, key,
			      TRUE /*in timeout callback */ );

		if (FALSE == osmv_txn_is_rmpp_init_by_peer(p_txn)) {
			/* This is a requester, still waiting for the reply. Apply the callback */
			invoke_err_cb = TRUE;
		}

		break;

	default:
		CL_ASSERT(FALSE);
	}

	if (TRUE == invoke_err_cb) {
		CL_ASSERT(NULL != p_madw);
		/* update the status in the p_madw */
		p_madw->status = IB_TIMEOUT;
		p_bo->send_err_cb(p_bo->cb_context, p_madw);
		/* no re-registration */
		ret = 0;
	}

txn_done:
	OSM_LOG_EXIT(p_bo->p_vendor->p_log);
	return ret;
}
Esempio n. 7
0
static void do_sweep(osm_sm_t * sm)
{
	ib_api_status_t status;
	osm_remote_sm_t *p_remote_sm;
	unsigned config_parsed = 0;

	if (sm->p_subn->force_heavy_sweep) {
		if (osm_subn_rescan_conf_files(sm->p_subn) < 0)
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
				"osm_subn_rescan_conf_file failed\n");
		else
			config_parsed = 1;
	}

	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER &&
	    sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
		return;

	if (sm->p_subn->coming_out_of_standby)
		/*
		 * Need to force re-write of sm_base_lid to all ports
		 * to do that we want all the ports to be considered
		 * foreign
		 */
		state_mgr_clean_known_lids(sm);

	sm->master_sm_found = 0;

	/*
	 * If we already have switches, then try a light sweep.
	 * Otherwise, this is probably our first discovery pass
	 * or we are connected in loopback. In both cases do a
	 * heavy sweep.
	 * Note: If we are connected in loopback we want a heavy
	 * sweep, since we will not be getting any traps if there is
	 * a lost connection.
	 */
	/*  if we are in DISCOVERING state - this means it is either in
	 *  initializing or wake up from STANDBY - run the heavy sweep */
	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
	    && sm->p_subn->opt.force_heavy_sweep == FALSE
	    && sm->p_subn->force_heavy_sweep == FALSE
	    && sm->p_subn->force_reroute == FALSE
	    && sm->p_subn->subnet_initialization_error == FALSE
	    && (state_mgr_light_sweep_start(sm) == IB_SUCCESS)) {
		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;
		if (!sm->p_subn->force_heavy_sweep) {
			if (sm->p_subn->opt.sa_db_dump &&
			    !osm_sa_db_file_dump(sm->p_subn->p_osm))
				osm_opensm_report_event(sm->p_subn->p_osm,
					OSM_EVENT_ID_SA_DB_DUMPED, NULL);
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"LIGHT SWEEP COMPLETE");
			return;
		}
	}

	/*
	 * Unicast cache should be invalidated if there were errors
	 * during initialization or if subnet re-route is requested.
	 */
	if (sm->p_subn->opt.use_ucast_cache &&
	    (sm->p_subn->subnet_initialization_error ||
	     sm->p_subn->force_reroute || sm->p_subn->coming_out_of_standby))
		osm_ucast_cache_invalidate(&sm->ucast_mgr);

	/*
	 * If we don't need to do a heavy sweep and we want to do a reroute,
	 * just reroute only.
	 */
	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
	    && sm->p_subn->opt.force_heavy_sweep == FALSE
	    && sm->p_subn->force_heavy_sweep == FALSE
	    && sm->p_subn->force_reroute == TRUE
	    && sm->p_subn->subnet_initialization_error == FALSE) {
		/* Reset flag */
		sm->p_subn->force_reroute = FALSE;

		/* Re-program the switches fully */
		sm->p_subn->ignore_existing_lfts = TRUE;

		if (osm_ucast_mgr_process(&sm->ucast_mgr)) {
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"REROUTE FAILED");
			return;
		}
		osm_qos_setup(sm->p_subn->p_osm);

		/* Reset flag */
		sm->p_subn->ignore_existing_lfts = FALSE;

		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;

		if (!sm->p_subn->subnet_initialization_error) {
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"REROUTE COMPLETE");
			osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_UCAST_ROUTING_DONE, NULL);
			return;
		}
	}

	osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_HEAVY_SWEEP_START, NULL);

	/* go to heavy sweep */
repeat_discovery:

	/* First of all - unset all flags */
	sm->p_subn->force_heavy_sweep = FALSE;
	sm->p_subn->force_reroute = FALSE;
	sm->p_subn->subnet_initialization_error = FALSE;

	/* Reset tracking values in case limiting component got removed
	 * from fabric. */
	sm->p_subn->min_ca_mtu = IB_MAX_MTU;
	sm->p_subn->min_ca_rate = IB_MAX_RATE;
	sm->p_subn->min_data_vls = IB_MAX_NUM_VLS - 1;

	/* rescan configuration updates */
	if (!config_parsed && osm_subn_rescan_conf_files(sm->p_subn) < 0)
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
			"osm_subn_rescan_conf_file failed\n");

	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
		sm->p_subn->need_update = 1;

	status = state_mgr_sweep_hop_0(sm);
	if (status != IB_SUCCESS ||
	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	if (state_mgr_is_sm_port_down(sm) == TRUE) {
		if (sm->p_subn->last_sm_port_state) {
			sm->p_subn->last_sm_port_state = 0;
			osm_log(sm->p_log, OSM_LOG_SYS, "SM port is down\n");
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"SM PORT DOWN");
		}

		/* Run the drop manager - we want to clear all records */
		osm_drop_mgr_process(sm);

		/* Move to DISCOVERING state */
		 if (sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
			osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER);
		osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_STATE_CHANGE, NULL);
		return;
	} else {
		if (!sm->p_subn->last_sm_port_state) {
			sm->p_subn->last_sm_port_state = 1;
			osm_log(sm->p_log, OSM_LOG_SYS, "SM port is up\n");
			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
					"SM PORT UP");
		}
	}

	status = state_mgr_sweep_hop_1(sm);
	if (status != IB_SUCCESS ||
	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* discovery completed - check other sm presence */
	if (sm->master_sm_found) {
		/*
		 * Call the sm_state_mgr with signal
		 * MASTER_OR_HIGHER_SM_DETECTED_DONE
		 */
		osm_sm_state_mgr_process(sm,
					 OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED);
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
				"ENTERING STANDBY STATE");
		/* notify master SM about us */
		osm_send_trap144(sm, 0);
		osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_STATE_CHANGE, NULL);
		return;
	}

	/* if new sweep requested - don't bother with the rest */
	if (sm->p_subn->force_heavy_sweep)
		goto repeat_discovery;

	osm_opensm_report_event(sm->p_subn->p_osm,
				OSM_EVENT_ID_HEAVY_SWEEP_DONE, NULL);

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "HEAVY SWEEP COMPLETE");

	/* If we are MASTER - get the highest remote_sm, and
	 * see if it is higher than our local sm.
	 */
	if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) {
		p_remote_sm = state_mgr_get_highest_sm(sm);
		if (p_remote_sm != NULL) {
			/* report new ports (trap 64) before leaving MASTER */
			state_mgr_report_new_ports(sm);

			/* need to handover the mastership
			 * to the remote sm, and move to standby */
			state_mgr_send_handover(sm, p_remote_sm);
			osm_sm_state_mgr_process(sm,
						 OSM_SM_SIGNAL_HANDOVER_SENT);
			return;
		} else {
			/* We are the highest sm - check to see if there is
			 * a remote SM that is in master state. */
			p_remote_sm = state_mgr_exists_other_master_sm(sm);
			if (p_remote_sm != NULL) {
				/* There is a remote SM that is master.
				 * need to wait for that SM to relinquish control
				 * of its portion of the subnet. C14-60.2.1.
				 * Also - need to start polling on that SM. */
				sm->p_polling_sm = p_remote_sm;
				osm_sm_state_mgr_process(sm,
							 OSM_SM_SIGNAL_WAIT_FOR_HANDOVER);
				return;
			}
		}
	}

	/* Need to continue with lid assignment */
	osm_drop_mgr_process(sm);

	/*
	 * If we are not MASTER already - this means that we are
	 * in discovery state. call osm_sm_state_mgr with signal
	 * DISCOVERY_COMPLETED
	 */
	if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
		osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED);

	osm_pkey_mgr_process(sm->p_subn->p_osm);

	/* try to restore SA DB (this should be before lid_mgr
	   because we may want to disable clients reregistration
	   when SA DB is restored) */
	osm_sa_db_file_load(sm->p_subn->p_osm);

	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"PKEY and QOS setup completed - STARTING SM LID CONFIG");

	osm_lid_mgr_process_sm(&sm->lid_mgr);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG");
	state_mgr_notify_lid_change(sm);

	osm_lid_mgr_process_subnet(&sm->lid_mgr);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* At this point we need to check the consistency of
	 * the port_lid_tbl under the subnet. There might be
	 * errors in it if PortInfo Set requests didn't reach
	 * their destination. */
	state_mgr_check_tbl_consistency(sm);

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG");

	/*
	 * Proceed with unicast forwarding table configuration; if it fails
	 * return early to wait for a trap or the next sweep interval.
	 */

	if (!sm->ucast_mgr.cache_valid ||
	    osm_ucast_cache_process(&sm->ucast_mgr))
		if (osm_ucast_mgr_process(&sm->ucast_mgr))
			return;

	osm_qos_setup(sm->p_subn->p_osm);

	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/* cleanup switch lft buffers */
	cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, cleanup_switch, sm->p_log);

	/* We are done setting all LFTs so clear the ignore existing.
	 * From now on, as long as we are still master, we want to
	 * take into account these lfts. */
	sm->p_subn->ignore_existing_lfts = FALSE;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"SWITCHES CONFIGURED FOR UNICAST");
	osm_opensm_report_event(sm->p_subn->p_osm,
			OSM_EVENT_ID_UCAST_ROUTING_DONE, NULL);

	if (!sm->p_subn->opt.disable_multicast) {
		osm_mcast_mgr_process(sm, TRUE);
		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
			return;
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
				"SWITCHES CONFIGURED FOR MULTICAST");
	}

	/*
	 * The LINK_PORTS state is required since we cannot count on
	 * the port state change MADs to succeed. This is an artifact
	 * of the spec defining state change from state X to state X
	 * as an error. The hardware then is not required to process
	 * other parameters provided by the Set(PortInfo) Packet.
	 */

	osm_link_mgr_process(sm, IB_LINK_NO_CHANGE);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE");

	osm_link_mgr_process(sm, IB_LINK_ARMED);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
			"LINKS ARMED - SET LINKS TO ACTIVE STATE");

	osm_link_mgr_process(sm, IB_LINK_ACTIVE);
	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
		return;

	/*
	 * The sweep completed!
	 */

	/*
	 * Send trap 64 on newly discovered endports
	 */
	state_mgr_report_new_ports(sm);

	/* in any case we zero this flag */
	sm->p_subn->coming_out_of_standby = FALSE;

	/* If there were errors - then the subnet is not really up */
	if (sm->p_subn->subnet_initialization_error == TRUE) {
		osm_log(sm->p_log, OSM_LOG_SYS,
			"Errors during initialization\n");
		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_ERROR,
				"ERRORS DURING INITIALIZATION");
	} else {
		sm->p_subn->need_update = 0;
		osm_dump_all(sm->p_subn->p_osm);
		state_mgr_up_msg(sm);
		sm->p_subn->first_time_master_sweep = FALSE;

		if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE) ||
		    sm->p_subn->opt.sa_db_dump)
			osm_sa_db_file_dump(sm->p_subn->p_osm);
	}

	/*
	 * Finally signal the subnet up event
	 */
	cl_event_signal(&sm->subnet_up_event);

	osm_opensm_report_event(sm->p_subn->p_osm, OSM_EVENT_ID_SUBNET_UP,
				NULL);

	/* if we got a signal to force heavy sweep or errors
	 * in the middle of the sweep - try another sweep. */
	if (sm->p_subn->force_heavy_sweep
	    || sm->p_subn->subnet_initialization_error)
		osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
}
Esempio n. 8
0
void osm_sm_state_mgr_polling_callback(IN void *context)
{
	osm_sm_t *sm = context;
	uint32_t timeout = sm->p_subn->opt.sminfo_polling_timeout;
	cl_status_t cl_status;

	OSM_LOG_ENTER(sm->p_log);

	/*
	 * We can be here in one of two cases:
	 * 1. We are a STANDBY sm polling on the master SM.
	 * 2. We are a MASTER sm, waiting for a handover from a remote master sm.
	 * If we are not in one of these cases - don't need to restart the poller.
	 */
	if (!((sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER &&
	       sm->p_polling_sm != NULL) ||
	      sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY))
		goto Exit;

	/*
	 * If we are a STANDBY sm and the osm_exit_flag is set, then let's
	 * signal the subnet_up. This is relevant for the case of running only
	 * once. In that case - the program is stuck until this signal is
	 * received. In other cases - it is not relevant whether or not the
	 * signal is on - since we are currently in exit flow
	 */
	if (sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY && osm_exit_flag) {
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
			"Signalling subnet_up_event\n");
		cl_event_signal(&sm->subnet_up_event);
		goto Exit;
	}

	/*
	 * Incr the retry number.
	 * If it reached the max_retry_number in the subnet opt - call
	 * osm_sm_state_mgr_process with signal OSM_SM_SIGNAL_POLLING_TIMEOUT
	 */
	sm->retry_number++;
	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Retry number:%d\n",
		sm->retry_number);

	if (sm->retry_number >= sm->p_subn->opt.polling_retry_number) {
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
			"Reached polling_retry_number value in retry_number. "
			"Go to DISCOVERY state\n");
		osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_POLLING_TIMEOUT);
		goto Exit;
	}

	/* Send a SubnGet(SMInfo) request to the remote sm (depends on our state) */
	sm_state_mgr_send_master_sm_info_req(sm);

	/* restart the timer */
	cl_status = cl_timer_start(&sm->polling_timer, timeout);
	if (cl_status != CL_SUCCESS)
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3211: "
			"Failed to restart timer\n");

Exit:
	OSM_LOG_EXIT(sm->p_log);
}
static void
__osmv_dispatch_rmpp_snd(IN osm_bind_handle_t h_bind,
			 IN const ib_mad_t * p_mad,
			 IN osmv_txn_ctx_t * p_txn,
			 IN const osm_mad_addr_t * p_mad_addr)
{
	osmv_rmpp_send_ctx_t *p_send_ctx = osmv_txn_get_rmpp_send_ctx(p_txn);

	uint32_t old_wl = p_send_ctx->window_last;
	uint32_t total_segs = osmv_rmpp_send_ctx_get_num_segs(p_send_ctx);
	uint32_t seg_num = cl_ntoh32(((ib_rmpp_mad_t *) p_mad)->seg_num);
	uint32_t new_wl = cl_ntoh32(((ib_rmpp_mad_t *) p_mad)->paylen_newwin);
	osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind;

	OSM_LOG_ENTER(p_bo->p_vendor->p_log);

	if (TRUE == osmv_rmpp_is_abort_stop(p_mad)) {

		osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR,
			"__osmv_dispatch_rmpp_snd: ERR 6502: "
			"The remote side sent an ABORT/STOP indication.\n");
		osmv_rmpp_snd_error(p_send_ctx, IB_REMOTE_ERROR);
		goto dispatch_rmpp_snd_done;
	}

	if (FALSE == osmv_rmpp_is_ack(p_mad)) {

		osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG,
			"Not supposed to receive DATA packets --> dropping the MAD\n");
		goto dispatch_rmpp_snd_done;
	}

	/* Continue processing the ACK */
	if (seg_num > old_wl) {

		osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR,
			"__osmv_dispatch_rmpp_snd: ERR 6503: "
			"ACK received for a non-sent segment %d\n", seg_num);

		osmv_rmpp_send_nak(h_bind, p_mad, p_mad_addr,
				   IB_RMPP_TYPE_ABORT, IB_RMPP_STATUS_S2B);

		osmv_rmpp_snd_error(p_send_ctx, IB_REMOTE_ERROR);
		goto dispatch_rmpp_snd_done;
	}

	osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG,
		"__osmv_dispatch_rmpp_snd: "
		"New WL = %u Old WL = %u Total Segs = %u\n",
		new_wl, old_wl, total_segs);

	if (new_wl < old_wl) {
		osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR,
			"__osmv_dispatch_rmpp_snd: ERR 6508: "
			"The receiver requests a smaller WL (%d) than before (%d)\n",
			new_wl, old_wl);

		osmv_rmpp_send_nak(h_bind, p_mad, p_mad_addr,
				   IB_RMPP_TYPE_ABORT, IB_RMPP_STATUS_W2S);

		osmv_rmpp_snd_error(p_send_ctx, IB_REMOTE_ERROR);
		goto dispatch_rmpp_snd_done;
	}

	/* Update the sender's window, and optionally wake up the sender thread
	 * Note! A single ACK can acknowledge a whole range of segments: [WF..SEG_NUM]
	 */
	osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG,
		"ACK for seg_num #%d accepted.\n", seg_num);

	if (seg_num == old_wl) {

		osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG,
			"The send window [%d:%d] is totally acknowledged.\n",
			p_send_ctx->window_first, old_wl);

		p_send_ctx->window_first = seg_num + 1;
		p_send_ctx->window_last =
		    (new_wl < total_segs) ? new_wl : total_segs;

		/* Remove the response timeout event for the window */
		osmv_txn_remove_timeout_ev(h_bind, osmv_txn_get_key(p_txn));

		/* Wake up the sending thread */
		cl_event_signal(&p_send_ctx->event);
	}

dispatch_rmpp_snd_done:
	OSM_LOG_EXIT(p_bo->p_vendor->p_log);
}