Ejemplo n.º 1
0
int sa_reader_filter(Mai_t *maip)
{
	STL_SA_MAD_HEADER samad;

    // check whether a previous filter has indicated to drop the packet 
    if (maip->type == MAI_TYPE_DROP)
        return 1;   // indicate that no match has been found

    BSWAPCOPY_STL_SA_MAD_HEADER((STL_SA_MAD_HEADER*)maip->data, &samad);
    if (sa_filter_validate_mad(maip, &samad))
        return 1;   // indicate that no match has been found

	if (maip->base.method == SA_CM_REPORT) return 1;
    // reader is only processing new requests
	if (maip->base.method == SA_CM_GETTABLE || maip->base.method == SA_CM_GETMULTI || maip->base.method == SA_CM_GETTRACETABLE)
	{
        // check for inflight rmpp
        if ((samad.u.tf.rmppFlags & RMPP_FLAGS_ACTIVE) && 
            ((samad.rmppType == RMPP_TYPE_ACK && samad.segNum != 0) 
                                  ||
              samad.rmppType == RMPP_TYPE_STOP || samad.rmppType == RMPP_TYPE_ABORT)) {
            return 1;  // not processing inflight rmpp stuff either
        }
	}
    if (saDebugRmpp) {
        IB_LOG_INFINI_INFO_FMT( "sa_reader_filter",
               "Processing request for %s[%s] from LID[0x%x], TID="FMT_U64, 
               sa_getMethodText((int)maip->base.method), sa_getAidName((int)maip->base.aid), maip->addrInfo.slid, maip->base.tid);
    }
	return 0;
}
Ejemplo n.º 2
0
int sa_writer_filter(Mai_t *maip)
{
	STL_SA_MAD_HEADER samad;

    BSWAPCOPY_STL_SA_MAD_HEADER((STL_SA_MAD_HEADER*)maip->data, &samad);
    if (sa_filter_validate_mad(maip, &samad))
        return 1;   // indicate that no match has been found

    // the sa writer thread is only responsible for in flight rmpp packets
	if (maip->base.method == SA_CM_GETTABLE || maip->base.method == SA_CM_GETMULTI || maip->base.method == SA_CM_GETTRACETABLE)
	{
        // check for inflight rmpp
        if ((samad.u.tf.rmppFlags & RMPP_FLAGS_ACTIVE) && 
            ((samad.rmppType == RMPP_TYPE_ACK && samad.segNum > 0) || samad.rmppType == RMPP_TYPE_STOP 
             || samad.rmppType == RMPP_TYPE_ABORT)) {
            if (saDebugRmpp) {
                IB_LOG_INFINI_INFO_FMT( "sa_writer_filter",
                       "Processing inflight Rmpp packet for %s[%s] from LID[0x%x], TID="FMT_U64, 
                       sa_getMethodText((int)maip->base.method), sa_getAidName((int)maip->base.aid), maip->addrInfo.slid, maip->base.tid);
            }
            return 0;  // process inflight rmpp responses
        }
	}
	return 1;
}
Ejemplo n.º 3
0
Status_t
sm_fsm_notactive(Mai_t *maip, char *nodename)
{
	Status_t	status;
    long        new_state=-1;
    STL_SM_INFO    theirSmInfo;

	IB_ENTER(__func__, maip->base.amod, sm_state, 0, 0);

    BSWAPCOPY_STL_SM_INFO((STL_SM_INFO *)STL_GET_SMP_DATA(maip), &theirSmInfo);

	switch (maip->base.amod) {
    case SM_AMOD_STANDBY:				// C14-54.1.1
        IB_LOG_INFINI_INFO_FMT(__func__, 
               "[%s] SM received request to transition to STANDBY from SM node %s, LID [0x%x], portguid ["FMT_U64"], TID="FMT_U64,
               sm_getStateText(sm_smInfo.u.s.SMStateCurrent), nodename, maip->addrInfo.slid, theirSmInfo.PortGUID, maip->base.tid);
		new_state = SM_STATE_STANDBY;
		break;
    case SM_AMOD_DISABLE:
        IB_LOG_WARN_FMT(__func__, 
               "[%s] SM received request to transition to NOTACTIVE from SM node %s, LID [0x%x], portguid ["FMT_U64"], TID="FMT_U64,
               sm_getStateText(sm_smInfo.u.s.SMStateCurrent), nodename, maip->addrInfo.slid, theirSmInfo.PortGUID, maip->base.tid);
		new_state = SM_STATE_NOTACTIVE;
		break;
	default:
		maip->base.status = MAD_STATUS_BAD_ATTR;
		IB_LOG_WARN_FMT(__func__,
			"[%s] SM received invalid transition request %s (%u) from SM node %s, LID [0x%x], portguid ["FMT_U64"], TID="FMT_U64,
			sm_getStateText(sm_smInfo.u.s.SMStateCurrent), getAmod(maip->base.amod), maip->base.amod,
			nodename, maip->addrInfo.slid, theirSmInfo.PortGUID, maip->base.tid);
		break;
	}
    
    /*
     * Reply to this Set(SMInfo).
     */
	sm_smInfo.ActCount++;
    BSWAPCOPY_STL_SM_INFO(&sm_smInfo, (STL_SM_INFO *)STL_GET_SMP_DATA(maip));
	status = mai_stl_reply(fd_async, maip, sizeof(STL_SM_INFO));
	if (status != VSTATUS_OK) {
		IB_LOG_ERRORRC("sm_fsm_notactive - bad mai_reply rc:", status);
	}

    /* make appropriate transition if necessary */
    if (new_state >= SM_STATE_NOTACTIVE) 
        (void)sm_transition(new_state);
    
	IB_EXIT(__func__, 0);
	return(VSTATUS_OK);
}
Ejemplo n.º 4
0
static Status_t
sm_arm_port(Topology_t * topop, Node_t * nodep, Port_t * portp)
{
	Status_t status = VSTATUS_OK;
	STL_PORT_INFO portInfo;
	STL_LID dlid;
	uint32_t madStatus = 0;

	IB_ENTER(__func__, topop, nodep, portp, 0);

	//do not try to activate ports connected to quarantined nodes
	if(portp->portData->neighborQuarantined) {
		IB_EXIT(__func__, status);
		return (status);
	}

	if (nodep->nodeInfo.NodeType == NI_TYPE_SWITCH) {
		Port_t *swportp;
		swportp = sm_get_port(nodep, 0);
		if (!sm_valid_port(swportp)) {
			IB_LOG_WARN_FMT(__func__, "Failed to get Port 0 of Switch " FMT_U64,
							nodep->nodeInfo.NodeGUID);
			return VSTATUS_BAD;
		}
		dlid = swportp->portData->lid;
	} else {
		dlid = portp->portData->lid;
	}

	portInfo = portp->portData->portInfo;

	if (portInfo.PortStates.s.PortState < IB_PORT_INIT) {
		IB_LOG_WARN_FMT(__func__, "Node %s guid "FMT_U64
			" port %u isn't in INIT state.",
			sm_nodeDescString(nodep), nodep->nodeInfo.NodeGUID, portp->index);
		IB_EXIT(__func__, VSTATUS_BAD);
		return (VSTATUS_BAD);
	} else if (portInfo.PortStates.s.PortState > IB_PORT_INIT) {
		IB_LOG_INFINI_INFO_FMT(__func__, "Node %s guid "FMT_U64
			" port %u already armed or active.",
			sm_nodeDescString(nodep), nodep->nodeInfo.NodeGUID, portp->index);
		IB_EXIT(__func__, VSTATUS_OK);
		return (VSTATUS_OK);
	}

	portInfo.PortStates.s.PortState = IB_PORT_ARMED;
	portInfo.LinkDownReason = STL_LINKDOWN_REASON_NONE;
	portInfo.NeighborLinkDownReason = STL_LINKDOWN_REASON_NONE;

	if (sm_is_scae_allowed(nodep))
		portInfo.PortMode.s.IsActiveOptimizeEnabled = 1;

	//
	//  Set the "No change" attributes.
	//
	portInfo.LinkSpeed.Enabled = 0;
	portInfo.LinkWidth.Enabled = 0;
	portInfo.PortStates.s.PortPhysicalState = 0;
	portInfo.s4.OperationalVL = 0;

	//
	//  Tell the port its new state.
	//
	status = SM_Set_PortInfo_LR(fd_topology, (1 << 24) | (portp->index),
		sm_lid, dlid, &portInfo, portp->portData->portInfo.M_Key, &madStatus);

	if (status != VSTATUS_OK && madStatus != MAD_STATUS_INVALID_ATTRIB) {
		IB_LOG_WARN_FMT(__func__,
			"Cannot set PORTINFO for node %s nodeGuid " FMT_U64
			" port %d status=%d", sm_nodeDescString(nodep),
			nodep->nodeInfo.NodeGUID, portp->index, status);

	} else if(madStatus == MAD_STATUS_INVALID_ATTRIB &&
		portInfo.PortStates.s.IsSMConfigurationStarted == 0) {
		handle_activate_bounce(nodep, portp);
	} else if (portInfo.PortStates.s.PortState != IB_PORT_ARMED &&
		!(madStatus == MAD_STATUS_INVALID_ATTRIB)) {

		IB_LOG_WARN_FMT(__func__,
						"Activate port for node %s guid " FMT_U64
						" port %d: tried to set state to %d but returned %d",
						sm_nodeDescString(nodep), nodep->nodeInfo.NodeGUID, portp->index,
						IB_PORT_ARMED, portInfo.PortStates.s.PortState);
		// limit resweeps, if a device consistently can't be activated, it may
		// have a HW or config issue
		handle_activate_failure(portp);
	}

	// To save an additional Set(PortInfo) per port, the HOQLife/XmitQ values may not have been
	// sent for a port in topology_assignments() if at that time the port was not
	// ARMED or ACTIVE since at least one more Set(PortInfo) will be required to move
	// the port to ARMED.  Thus it is necessary to check the request/response
	// XmitQ values here as well
	if (status == VSTATUS_OK &&
		!sm_eq_XmitQ(portInfo.XmitQ, portp->portData->portInfo.XmitQ, STL_MAX_VLS)) {
		IB_LOG_ERROR_FMT(__func__,
			 "XmitQ requested/response value mismatch for node %s guid " FMT_U64
			 " port %d", sm_nodeDescString(nodep), nodep->nodeInfo.NodeGUID,
			 portp->index);
		status = VSTATUS_BAD;
	} else if (status == VSTATUS_OK) {
		//
		//  Save the port state and the port info
		//
		portp->state = portInfo.PortStates.s.PortState;
		portp->portData->portInfo = portInfo;

		// Clear the LED if it was previously turned on.
		sm_enable_port_led(nodep, portp, FALSE);
	}

	IB_EXIT(__func__, status);
	return (status);
}
Ejemplo n.º 5
0
void
sa_main_writer(uint32_t argc, uint8_t ** argv) {
	Status_t	status;
	Mai_t		in_mad;
	Filter_t	filter;
	sa_cntxt_t	*sa_cntxt;
	uint64_t	now, srLastAged=0, cacheLastCleaned=0;
    uint32_t    records=0;

	IB_ENTER("sa_main_writer", 0, 0, 0, 0);

	sa_main_writer_exit = 0;
    
    //
    //	Create the SubnAdm(*) MAD filter for the SA thread.
    //
	SA_Filter_Init(&filter);
	filter.value.mclass = MAD_CV_SUBN_ADM;
	filter.mask.mclass = 0xff;
	filter.value.method = 0x00;
	filter.mask.method = 0x80;
	filter.mai_filter_check_packet = sa_writer_filter;
	MAI_SET_FILTER_NAME (&filter, "SA Writer");

	if (mai_filter_create(fd_sa_w, &filter, VFILTER_SHARE) != VSTATUS_OK) {
		IB_LOG_ERROR0("esm_saw: can't create SubnAdm(*) filter");
		(void)vs_thread_exit(&sm_threads[SM_THREAD_SA_WRITER].handle);
	}

	while (1) {
		status = mai_recv(fd_sa_w, &in_mad, VTIMER_1S/4);
        if (status != VSTATUS_OK && status != VSTATUS_TIMEOUT) {
            IB_LOG_ERRORRC("sa_main_writer: error on mai_recv rc:", status);
            vs_thread_sleep(VTIMER_1S/10);
        }

        if (sa_main_writer_exit == 1){
#ifdef __VXWORKS__
            ESM_LOG_ESMINFO("SA Writer Task exiting OK.", 0);
#endif
            break;
        }
        /* don't process messages if not master SM or still doing first sweep */
		if (sm_state != SM_STATE_MASTER || topology_passcount < 1) {
            continue;
        }
		/* 
         * process the rmpp ack and send out the next set of segments
         */
        if (status == VSTATUS_OK) {
            /* locate and process in flight rmpp request */
            sa_cntxt = sa_cntxt_find( &in_mad );
            if (sa_cntxt) {
                sa_process_inflight_rmpp_request( &in_mad, sa_cntxt );
                /*
                 * This may not necessarily release context
                 * based on if someone else has reserved it
                 */
                sa_cntxt_release( sa_cntxt );
            } else {
				INCREMENT_COUNTER(smCounterSaDeadRmppPacket);
                if (saDebugRmpp) {
                    IB_LOG_INFINI_INFO_FMT( "sa_main_writer", 
                           "dropping %s[%s] RMPP packet from LID[0x%x], TID ["FMT_U64"] already completed/aborted",
                           sa_getMethodText((int)in_mad.base.method), sa_getAidName(in_mad.base.aid), 
                           in_mad.addrInfo.slid, in_mad.base.tid);
                }
            }
        }

        /* age contexts if more than 1 second since last time */
        vs_time_get( &now );
        if ((now - timeLastAged) > (VTIMER_1S)) {
            (void) sa_cntxt_age();
        }

        /* age the service records */
        if ((now - srLastAged) > 5*VTIMER_1S) {
            srLastAged = now;
            if ((status = sa_ServiceRecord_Age(&records)) != VSTATUS_OK) {
                IB_LOG_ERRORRC("sa_main_writer: failed to age service records, rc:", status);
            } else if (records) {
                if (smDebugPerf) IB_LOG_INFINI_INFO("sa_main_writer: Number of service records aged out was", records);
            }
        }
		
		/* clean the SA cache */
		if ((now - cacheLastCleaned) > SA_CACHE_CLEAN_INTERVAL) {
			cacheLastCleaned = now;
			(void)vs_lock(&saCache.lock);
			sa_cache_clean();
			(void)vs_unlock(&saCache.lock);
		}
	}
    /* clean up cache before exit */
    sa_cache_clean();
    (void)vs_lock_delete(&saCache.lock);
	//IB_LOG_INFINI_INFO0("sa_main_writer thread: Exiting OK");
} // SA_MAIN_WRITER
Ejemplo n.º 6
0
void
sa_main_reader(uint32_t argc, uint8_t ** argv) {
	Status_t	status;
	Mai_t		in_mad;
	Filter_t	filter;
	sa_cntxt_t	*sa_cntxt=NULL;
	uint64_t	now, delta, max_delta;
	int			tries=0, retry=0;
    uint64_t    reqTimeToLive=0;
    SAContextGet_t  cntxGetStatus=0;
    int         numContextBusy=0;

	IB_ENTER("sa_main_reader", 0, 0, 0, 0);

	sa_main_reader_exit = 0;
    
    /*
     *	Create the SubnAdm(*) MAD filter for the SA thread.
     */
	SA_Filter_Init(&filter);
	filter.value.mclass = MAD_CV_SUBN_ADM;
	filter.mask.mclass = 0xff;
	filter.value.method = 0x00;
	filter.mask.method = 0x80;
	filter.mai_filter_check_packet = sa_reader_filter;
	MAI_SET_FILTER_NAME (&filter, "SA Reader");

	if (mai_filter_create(fd_sa, &filter, VFILTER_SHARE) != VSTATUS_OK) {
		IB_LOG_ERROR0("sa_main_reader: can't create SubnAdm(*) filter");
		(void)vs_thread_exit(&sm_threads[SM_THREAD_SA_READER].handle);
	}

    timeMftLastUpdated = 0;
    /* 
     * calculate request time to live on queue
     * ~ 3.2secs for defaults: sa_packetLifetime=18 and sa_respTimeValue=18 
     */
    reqTimeToLive = 4ull * ( (2*(1 << sm_config.sa_packet_lifetime_n2)) + (1 << sm_config.sa_resp_time_n2) ); 
	while (1) {
		status = mai_recv(fd_sa, &in_mad, VTIMER_1S/4);

        if (sa_main_reader_exit == 1){
#ifdef __VXWORKS__
            ESM_LOG_ESMINFO("sa_main_reader: exiting OK.", 0);
#endif
            break;
        }
        /* don't process messages if not master SM or still doing first sweep */
		if ((sm_state != SM_STATE_MASTER) ||
		    (topology_passcount < 1)) {
            continue;
        }

		/* 
         * If the mai layer shuts down we end up in this infinite loop here.
		 * This may happen on initialization
         */
		if( status != VSTATUS_OK ){
            if (status != VSTATUS_TIMEOUT)
                IB_LOG_ERRORRC("sa_main_reader: error on mai_recv rc:", status);
        } else {
            /* 
             * Drop new requests that have been sitting on SA reader queue for too long 
             */
            if (in_mad.intime) {
				/* PR 110586 - On some RHEL 5 systems, we've seen  weird issues with gettimeofday() [used by vs_time_get()]
				 * where once in a while the time difference calculated from successive calls to gettimeofday()
				 * results in a negative value. Due to this, we might actually consider a request stale even if
				 * its not. Work around this by making calls to gettimeofday() till it returns us some
				 * sane values. Just to be extra cautious, bound the retries so that we don't get stuck in the loop.  
				 */
				tries = 0;
				/* Along with negative values also check for unreasonably high values of delta*/
				max_delta = 30*reqTimeToLive;
				do {
					vs_time_get( &now );
					delta = now - in_mad.intime;
					tries++;
					
					if ((now < in_mad.intime) || (delta > max_delta)) {
						vs_thread_sleep(1);
						retry = 1;
					} else {
						retry = 0;
					}	
				} while (retry && tries < 20);

                if (delta > reqTimeToLive) {
					INCREMENT_COUNTER(smCounterSaDroppedRequests);
                    if (smDebugPerf || saDebugPerf) {
                        IB_LOG_INFINI_INFO_FMT( "sa_main_reader",
                               "Dropping stale %s[%s] request from LID[0x%x], TID="FMT_U64"; On queue for %d.%d seconds.", 
                               sa_getMethodText((int)in_mad.base.method), sa_getAidName((int)in_mad.base.aid), in_mad.addrInfo.slid, 
                               in_mad.base.tid, (int)(delta/1000000), (int)((delta - delta/1000000*1000000))/1000);
                    }
                    /* drop the request without returning a response; sender will retry */
                    continue;
                }
            }
            /* 
             * get a context to process request; sa_cntxt can be:
             *   1. NULL if resources are scarce
             *   2. NULL if request is dup of existing request
             *   3. in progress getMulti request context
             *   4. New context for a brand new request 
             */
            cntxGetStatus = sa_cntxt_get( &in_mad, (void *)&sa_cntxt );
            if (cntxGetStatus == ContextAllocated) {
				/* process the new request */
				sa_process_mad( &in_mad, sa_cntxt );
				/* 
				 * This may not necessarily release context based on if someone else has reserved it
				 */
				if(sa_cntxt) sa_cntxt_release( sa_cntxt );
			} else if (cntxGetStatus == ContextExist) {
				INCREMENT_COUNTER(smCounterSaDuplicateRequests);
				/* this is a duplicate request */
				if (saDebugPerf || saDebugRmpp) {
					IB_LOG_INFINI_INFO_FMT( "sa_main_reader",
					       "SA_READER received duplicate %s[%s] from LID [0x%x] with TID ["FMT_U64"] ", 
					       sa_getMethodText((int)in_mad.base.method), sa_getAidName((int)in_mad.base.aid),in_mad.addrInfo.slid, in_mad.base.tid);
				}
            } else if (cntxGetStatus == ContextNotAvailable) {
				INCREMENT_COUNTER(smCounterSaContextNotAvailable);
                /* we are swamped, return BUSY to caller */
                if (saDebugPerf || saDebugRmpp) { /* log msg before send changes method and lids */
                    IB_LOG_INFINI_INFO_FMT( "sa_main_reader",
                           "NO CONTEXT AVAILABLE, returning MAD_STATUS_BUSY to %s[%s] request from LID [0x%x], TID ["FMT_U64"]!",
                           sa_getMethodText((int)in_mad.base.method), sa_getAidName((int)in_mad.base.aid), in_mad.addrInfo.slid, in_mad.base.tid);
                }
                in_mad.base.status = MAD_STATUS_BUSY;
                sa_send_reply( &in_mad, sa_cntxt );
                if ((++numContextBusy % sa_max_cntxt) == 0) {
                    IB_LOG_INFINI_INFO_FMT( "sa_main_reader",
                           "Had to drop %d SA requests since start due to no available contexts",
                           numContextBusy);
                }
            } else if (cntxGetStatus == ContextExistGetMulti) {
                /* continue processing the getMulti request */
                sa_process_getmulti( &in_mad, sa_cntxt );
                if(sa_cntxt) sa_cntxt_release( sa_cntxt );
            } else {
                IB_LOG_WARN("sa_main_reader: Invalid sa_cntxt_get return code:", cntxGetStatus);
            }
        }

        /* 
         * signal sm_top to reprogram the MFTs
         * Wait one second to allow mcmember requests to accumulate before asking
         */
        vs_time_get( &now );
        if (sa_mft_reprog && timeMftLastUpdated == 0) {
            timeMftLastUpdated = now;
        } else if (sa_mft_reprog && (now - timeMftLastUpdated) > VTIMER_1S) {
            topology_wakeup_time = 0ull;
            if ((status = vs_lock(&sa_lock)) != VSTATUS_OK) {
                IB_LOG_ERRORRC("sa_main_reader: Failed to lock sa_lock rc:", status);
            } else {
                sm_McGroups_Need_Prog = 1;      /* tells Topoloy thread that MFT reprogramming is needed */
                (void)vs_unlock(&sa_lock);
            }
            sm_trigger_sweep(SM_SWEEP_REASON_MCMEMBER);
            /* clear the indicators */
            timeMftLastUpdated = 0;
            sa_mft_reprog = 0;
        }
	}
    /* cleanup before exit, but allow some time for the other threads to flush out first */
    (void)vs_thread_sleep(VTIMER_1S);     
    (void)sa_SubscriberDelete();
    (void)sa_ServiceRecDelete();
    (void)sa_McGroupDelete();
	if (mai_filter_delete(fd_sa, &filter, VFILTER_SHARE) != VSTATUS_OK) {
		IB_LOG_ERROR0("sa_main_reader: can't delete SubnAdm(*) filter");
	}
	//IB_LOG_INFINI_INFO0("sa_main_reader thread: Exiting OK");
}
Ejemplo n.º 7
0
Status_t
sm_check_Master() {
	Status_t	status;
	STL_SM_INFO	theirSmInfo;
	STL_PORT_INFO	portInfo;
	uint8_t		path[64];

    static  uint32_t fsmCheckMasterFailed=0; // count of fails to check master
    static  uint32_t fsmMultMaxFail = 1; // multiplier for sm_config.master_ping_max_fail

    IB_ENTER(__func__, 0, 0, 0, 0);

    (void)memset((void *)path, 0, 64);

    if ((status = SM_Get_PortInfo(fd_sminfo, 1<<24, path, &portInfo)) != VSTATUS_OK) {
        IB_LOG_ERRORRC("failed to get master SM Lid from my PortInfo, rc:", status);
        // having a local problem
        // reset count, must be healthy before we can consider becoming master
        fsmCheckMasterFailed = 0;
        goto stay_standby;
    }

    if (portInfo.LID == 0 || portInfo.MasterSMLID == 0
        || portInfo.PortStates.s.PortState != IB_PORT_ACTIVE
        ) {
        if (smDebugPerf) {
            if (portInfo.PortStates.s.PortState != IB_PORT_ACTIVE) {
                IB_LOG_INFINI_INFO("our portInfo indicates state not active; portState=", (int)portInfo.PortStates.s.PortState);
            } else
            if (portInfo.MasterSMLID == 0) {
                IB_LOG_INFINI_INFOX("our portInfo smLid is not set yet; Lid=", portInfo.LID);
            }
        }
        // stay in standby until link comes up
        if (portInfo.PortStates.s.PortState > IB_PORT_DOWN) {
            IB_LOG_WARN0("Switching to DISCOVERY state; Local port uninitialized");
            goto discovering;
        }
        goto stay_standby;
    }

    // make sure we aren't trying to talk to ourself during the handover window
    if (portInfo.LID == portInfo.MasterSMLID) {
        // we're talking to ourself.  if a master SM doesn't come along and
        // reprogram our SM LID in the timeout period, attempt a discovery.
        // Since for large fabrics, the master can take some time to program
        // our LID, use a higher upper limit for failure count to give the
        // master enough time to program our SM LID.
        fsmMultMaxFail = 2;
        if (++fsmCheckMasterFailed >= fsmMultMaxFail * sm_config.master_ping_max_fail) {
            IB_LOG_WARN0("Switching to DISCOVERY state; Timed out waiting for SM LID to get reprogrammed");
            goto discovering;
        }
        if (smDebugPerf) {
            IB_LOG_INFINI_INFOX("our portInfo smLid is not set yet; smLid=", portInfo.MasterSMLID);
        }
        goto stay_standby; // not yet at threshold
    }

    sm_topop->slid = portInfo.LID;
    sm_topop->dlid = portInfo.MasterSMLID;
    if ((status = SM_Get_SMInfo(fd_sminfo, 0, NULL, &theirSmInfo)) != VSTATUS_OK) {
        if (++fsmCheckMasterFailed >= fsmMultMaxFail * sm_config.master_ping_max_fail) {
            IB_LOG_WARNX("Switching to DISCOVERY state; Failed to get SmInfo from master SM at LID:", portInfo.MasterSMLID);
            goto discovering;
        } else {
            IB_LOG_INFINI_INFO_FMT(__func__,
                   "failed to get SmInfo from master SM at LID[0x%X], retry count=%d",
                   portInfo.MasterSMLID, fsmCheckMasterFailed);
            goto stay_standby; // not yet at threshold
        }
    }

    sm_saw_another_sm = TRUE;

    /* 
     * PR 105313 - restart results in 2 standby SMs 
     * Must check the state we get back to make sure master is still master
     */
    if (theirSmInfo.u.s.SMStateCurrent != SM_STATE_MASTER) {
        IB_LOG_WARN_FMT(__func__,
               "SmInfo from SM at SMLID[0x%X] indicates SM is no longer master, switching to DISCOVERY state",
               portInfo.MasterSMLID);
        goto discovering;
    }

    // all OK, save data about master, reset fail count and threshold
    sm_topop->sm_count = theirSmInfo.ActCount;
    sm_topop->sm_key = theirSmInfo.SM_Key;

    fsmCheckMasterFailed = 0;
    fsmMultMaxFail = 1;

    if (smDebugPerf) {
        IB_LOG_INFINI_INFO_FMT(__func__, 
               "Master SM["FMT_U64"] at LID=0x%x has priority[%d] and smKey ["FMT_U64"]",
               theirSmInfo.PortGUID, portInfo.MasterSMLID, theirSmInfo.u.s.Priority, theirSmInfo.SM_Key);
    }

stay_standby:
    status = VSTATUS_OK;

done:
    IB_EXIT(__func__, status);
    return(status);

discovering:
    // reset count and threshold in case stay standby
    fsmCheckMasterFailed = 0;
    fsmMultMaxFail = 1;
    status = VSTATUS_BAD;
    goto done;
}
Ejemplo n.º 8
0
Status_t
sm_fsm_master(Mai_t *maip, char *nodename)
{
	Status_t	status;
    long        new_state=-1;
    STL_SM_INFO    theirSmInfo;
	uint8_t		ipath[64];
	uint8_t		*rpath;
	uint8_t		*path;
    int         i, wakeTpThread=0;
	STL_SM_INFO 	smInfoCopy;
    Lid_t       slid=maip->addrInfo.slid;    // original source lid; mai_reply swaps slid-dlid
    Lid_t       dlid=maip->addrInfo.dlid;    // original destination lid(us); mai_reply swaps slid-dlid

	IB_ENTER(__func__, maip->base.amod, sm_state, 0, 0);

    BSWAPCOPY_STL_SM_INFO((STL_SM_INFO *)STL_GET_SMP_DATA(maip), &theirSmInfo);

	switch (maip->base.amod) {
    case SM_AMOD_HANDOVER:				// C14-61
        new_state = SM_STATE_MASTER;
        IB_LOG_INFINI_INFO_FMT(__func__, 
               "[%s] SM received '%s' from SM node %s, LID [0x%x], portguid ["FMT_U64"], TID="FMT_U64,
               sm_getStateText(sm_smInfo.u.s.SMStateCurrent), getAmod(maip->base.amod), nodename, maip->addrInfo.slid, theirSmInfo.PortGUID, maip->base.tid);
		break;
    case SM_AMOD_ACKNOWLEDGE:
        if (theirSmInfo.u.s.SMStateCurrent < SM_STATE_STANDBY) {  // C14-38.1.1 almost
            maip->base.status = MAD_STATUS_BAD_FIELD;
            IB_LOG_WARN_FMT(__func__, 
                   "[%s] SM received invalid Handover Ack from remote SM %s, LID [0x%x], portguid ["FMT_U64"], TID="FMT_U64"; remote not in STANDBY state [%s]",
                   sm_getStateText(sm_smInfo.u.s.SMStateCurrent), nodename, maip->addrInfo.slid, theirSmInfo.PortGUID, maip->base.tid, sm_getStateText(theirSmInfo.u.s.SMStateCurrent));
        } else {
            new_state = SM_STATE_STANDBY;
            wakeTpThread = 1;
            IB_LOG_INFINI_INFO_FMT(__func__, 
                   "[%s] SM received '%s' from SM node %s, LID [0x%x], portguid ["FMT_U64"], TID="FMT_U64,
                   sm_getStateText(sm_smInfo.u.s.SMStateCurrent), getAmod(maip->base.amod), nodename, maip->addrInfo.slid, theirSmInfo.PortGUID, maip->base.tid);
        }
		break;
	default:
		maip->base.status = MAD_STATUS_BAD_ATTR;
        IB_LOG_WARN_FMT(__func__, 
               "[%s] SM received invalid MASTER transition [%s] from remote [%s] SM %s, LID [0x%x], portguid ["FMT_U64"], TID="FMT_U64,
               sm_getStateText(sm_smInfo.u.s.SMStateCurrent), getAmod(maip->base.amod), sm_getStateText(theirSmInfo.u.s.SMStateCurrent), nodename, 
               maip->addrInfo.slid, theirSmInfo.PortGUID, maip->base.tid);
		break;
	}
    
    /*
     * Reply to this Set(SMInfo).
     */
	sm_smInfo.ActCount++;
    BSWAPCOPY_STL_SM_INFO(&sm_smInfo, (STL_SM_INFO *)STL_GET_SMP_DATA(maip));
	status = mai_stl_reply(fd_async, maip, sizeof(STL_SM_INFO));
	if (status != VSTATUS_OK) {
		IB_LOG_ERRORRC("sm_fsm_master - bad mai_reply rc:", status);
    }

	(void)vs_lock(&new_topology_lock);

    /* make appropriate transition if necessary */
    if (new_state >= SM_STATE_NOTACTIVE && new_state != sm_state)
        (void)sm_transition(new_state);
    if (wakeTpThread) {
        /* wakeup the topology thread to clean up SA tables */
        sm_trigger_sweep(SM_SWEEP_REASON_HANDOFF);
    }
    
    /*
     * If this was a HANDOVER, we need to ACK back.
     */
    if (maip->base.amod == SM_AMOD_HANDOVER) {
        maip->addrInfo.destqp &= 0x00ffffff;

        if (maip->base.mclass == MAD_CV_SUBN_LR) {
            path = NULL;
            sm_topop->dlid = slid;    // this is original sender of SmInfo
            sm_topop->slid = dlid;    // this is us
            IB_LOG_INFINI_INFO_FMT(__func__, "sending LR HANDOVER ACK to node %s, Lid [0x%x], portguid "FMT_U64,
                   nodename, slid, theirSmInfo.PortGUID);
        } else {
            DRStlSmp_t *drsmp = (DRStlSmp_t *)maip->data;

            path = ipath;
            memset((void *)ipath, 0, 64);

            ipath[0] = maip->base.hopCount;
			rpath = drsmp->RetPath;
            for (i = 1; i <= maip->base.hopCount; i++) {
                ipath[i] = rpath[maip->base.hopCount + 1 - i];
            }
            IB_LOG_INFINI_INFO_FMT(__func__, "sending DR HANDOVER ACK to node %s, portguid "FMT_U64,
                   nodename, theirSmInfo.PortGUID);
        }
        smInfoCopy = sm_smInfo;
        status = SM_Set_SMInfo(fd_sminfo, SM_AMOD_ACKNOWLEDGE, path, &smInfoCopy, sm_config.mkey);
        if (status != VSTATUS_OK) {
            IB_LOG_WARN_FMT(__func__, 
                   "[%s] SM did not receive response to Handover Acknowledgement from [%s] SM node %s, LID [0x%x], portguid ["FMT_U64"]",
                   sm_getStateText(sm_smInfo.u.s.SMStateCurrent), sm_getStateText(theirSmInfo.u.s.SMStateCurrent), nodename, slid, theirSmInfo.PortGUID);
        } else {
            IB_LOG_INFINI_INFO_FMT(__func__, 
                   "[%s] SM successfully acknowleded Handover from remote SM node %s, LID [0x%x], portguid ["FMT_U64"]",
                   sm_getStateText(sm_smInfo.u.s.SMStateCurrent), nodename, slid, theirSmInfo.PortGUID);
        }
        /* make transition to MASTER state */
        (void)sm_transition(new_state);
        sm_trigger_sweep(SM_SWEEP_REASON_STATE_TRANSITION); // wakeup the topology thread
    }
	(void)vs_unlock(&new_topology_lock);

	IB_EXIT(__func__, 0);
	return(VSTATUS_OK);
}
Ejemplo n.º 9
0
Status_t
sm_fsm_standby(Mai_t *maip, char *nodename)
{
	int		i;
	uint8_t		ipath[64];
	uint8_t		*rpath;
	uint8_t		*path;
	Status_t	status;
    long        new_state=-1;
    STL_SM_INFO    theirSmInfo;
	STL_SM_INFO smInfoCopy;
    Lid_t       slid=maip->addrInfo.slid;    // original source lid; mai_reply swaps slid-dlid
    Lid_t       dlid=maip->addrInfo.dlid;    // original destination lid(us); mai_reply swaps slid-dlid

	IB_ENTER(__func__, maip->base.amod, 0, 0, 0);

    BSWAPCOPY_STL_SM_INFO((STL_SM_INFO *)STL_GET_SMP_DATA(maip), &theirSmInfo);

	switch (maip->base.amod) {
	case SM_AMOD_DISCOVER:				// C14-48
		new_state = SM_STATE_DISCOVERING;
		break;
	case SM_AMOD_DISABLE:				// C14-19
		new_state = SM_STATE_NOTACTIVE;
		break;
	case SM_AMOD_HANDOVER:				// C14-50
		new_state = SM_STATE_MASTER;
		break;
	case SM_AMOD_ACKNOWLEDGE:			// from previous HANDOVER
		break;
	default:
		maip->base.status = MAD_STATUS_BAD_ATTR;
		break;
	}

    if (maip->base.status > MAD_STATUS_OK) {
		IB_LOG_WARN_FMT(__func__,
			"[%s] SM received invalid AMOD[%d] from SM node %s, LID [0x%x], portguid ["FMT_U64"], TID="FMT_U64,
			sm_getStateText(sm_smInfo.u.s.SMStateCurrent), maip->base.amod, nodename,
			maip->addrInfo.slid, theirSmInfo.PortGUID, maip->base.tid);
    } else {
        IB_LOG_INFINI_INFO_FMT(__func__, 
               "SM in [%s] state after processing %s smInfo control from SM node %s, LID [0x%x], portguid ["FMT_U64"], TID="FMT_U64,
               (new_state < 0 ? sm_getStateText(sm_smInfo.u.s.SMStateCurrent):sm_getStateText(new_state)), getAmod(maip->base.amod), 
               nodename, maip->addrInfo.slid, theirSmInfo.PortGUID, maip->base.tid);
    }
    
    /*
     * Reply to this Set(SMInfo).
     */
	sm_smInfo.ActCount++;
    BSWAPCOPY_STL_SM_INFO(&sm_smInfo, (STL_SM_INFO *)STL_GET_SMP_DATA(maip));
	status = mai_stl_reply(fd_async, maip, sizeof(STL_SM_INFO));
	if (status != VSTATUS_OK) {
		IB_LOG_ERRORRC("sm_fsm_standby - bad mai_reply rc:", status);
	}
    
	(void)vs_lock(&new_topology_lock);

    /* make appropriate none master transition if necessary */
    if (new_state >= SM_STATE_NOTACTIVE && new_state != SM_STATE_MASTER) {
        (void)sm_transition(new_state);
        if (new_state == SM_STATE_DISCOVERING) {
            sm_trigger_sweep(SM_SWEEP_REASON_STATE_TRANSITION); // wakeup the topology thread to start sweep
        }
    }

    /*
     * If this was a HANDOVER, we need to ACK back.
     */
	if (maip->base.amod == SM_AMOD_HANDOVER) {
		maip->addrInfo.destqp &= 0x00ffffff;

		if (maip->base.mclass == MAD_CV_SUBN_LR) {
			path = NULL;
            sm_topop->dlid = slid;    // this is original sender of SmInfo
            sm_topop->slid = dlid;    // this is us
            IB_LOG_INFINI_INFO_FMT(__func__, "sending LR HANDOVER ACK to node %s, Lid [0x%x], portguid "FMT_U64,
                   nodename, slid, theirSmInfo.PortGUID);
		} else {
            DRStlSmp_t *drsmp = (DRStlSmp_t *)maip->data;

			path = ipath;
			memset((void *)ipath, 0, 64);

			ipath[0] = maip->base.hopCount;
			rpath = drsmp->RetPath;
			for (i = 1; i <= maip->base.hopCount; i++) {
				ipath[i] = rpath[maip->base.hopCount + 1 - i];
			}
            IB_LOG_INFINI_INFO_FMT(__func__, "sending DR HANDOVER ACK to node %s, portguid "FMT_U64,
                   nodename, theirSmInfo.PortGUID);
		}
		smInfoCopy = sm_smInfo;
		status = SM_Set_SMInfo(fd_sminfo, SM_AMOD_ACKNOWLEDGE, path, &smInfoCopy, sm_config.mkey);
		if (status != VSTATUS_OK) {
            IB_LOG_WARN_FMT(__func__, 
                   "[%s] SM did not receive response to Handover Acknowledgement from SM node %s, LID [0x%x], portguid ["FMT_U64"]",
                   sm_getStateText(new_state), nodename, slid, theirSmInfo.PortGUID);
		} 
        /* make transition to MASTER state */
        (void)sm_transition(new_state);
        sm_trigger_sweep(SM_SWEEP_REASON_STATE_TRANSITION);
    }
	(void)vs_unlock(&new_topology_lock);

	IB_EXIT(__func__, 0);
	return(VSTATUS_OK);
}
Ejemplo n.º 10
0
Status_t
state_event_mad(Mai_t *maip) {
	Status_t	status, mkeyCheckStatus, dbsyncCheckStat=VSTATUS_OK;
    STL_SM_INFO theirSmInfo, ourSmInfo;
    uint64_t    mkey=0, portguid=0;
    Node_t      *nodep=NULL;
    Port_t      *portp=NULL;
    char        nodescription[64]={0};
    int         identified=0;
	boolean		uptodate = 0;
	SmRecp      smrecp = NULL;

	IB_ENTER(__func__, maip, 0, 0, 0);

	status = VSTATUS_OK;

//
//	Redundancy check on the AID.
//
	if (maip->base.aid != MAD_SMA_SMINFO) {
		IB_LOG_ERROR("not an SMInfo MAD aid:", maip->base.aid);
		IB_EXIT(__func__, VSTATUS_BAD);
		return(VSTATUS_BAD);
	}

    /* 
     * get requester's sm info
     * ONLY OUR SM (as far as I know) provides its smInfo in the LR GET request
     * we'll use that to find the standby sm node in topology (only if passcount > 0)
     */
    BSWAPCOPY_STL_SM_INFO((STL_SM_INFO *)STL_GET_SMP_DATA(maip), &theirSmInfo);

    /* get requester's nodeInfo using the portGuid from the smInfo request */
    portguid = theirSmInfo.PortGUID;
    if (sm_state == SM_STATE_MASTER && topology_passcount > 0) {
        (void)vs_rdlock(&old_topology_lock);
        if (portguid != 0) {
            if ((portp = sm_find_port_guid(&old_topology, portguid)) != NULL)
                nodep = sm_find_port_node(&old_topology, portp);
        }
        if (nodep == NULL) {
    		if (maip->addrInfo.slid != PERMISSIVE_LID) {
            	/* try find by lid */
            	portp = sm_find_node_and_port_lid(&old_topology, maip->addrInfo.slid, &nodep);
			}
        } else if (portguid != 0 && portguid != sm_smInfo.PortGUID) {
            identified = 1;        /* remote included his identity */
        }
        if (nodep) {
            /* get all data that we need before releasing topology lock */
            (void)memcpy(nodescription, nodep->nodeDesc.NodeString, 64);
            if (portp->portData) portguid = portp->portData->guid;
        }
        (void)vs_rwunlock(&old_topology_lock);
        if ((identified || (portguid != 0)) && sm_smInfo.PortGUID != portguid) {
            /* see if it's time to do a full sync of this SM */
            dbsyncCheckStat = sm_dbsync_fullSyncCheck(portguid);
            /* get it's current dbsync state */
			uptodate = sm_dbsync_isUpToDate(portguid, NULL);
        }
    }

    /*
     *	For 1.1 (C14-53), SM in the NOTACTIVE state, replies to SubnGet/Set of SMInfo
     *	Do an MKey check
     */
	if ((mkeyCheckStatus = sm_mkey_check(maip, &mkey)) == VSTATUS_MISMATCH) {
        IB_LOG_WARN_FMT(__func__, "Mismatch mKey["FMT_U64"] SMInfo from node %s with , lid[0x%x], portguid "FMT_U64", TID="FMT_U64,
               mkey, nodescription, maip->addrInfo.slid, portguid, maip->base.tid);
		IB_EXIT(__func__, VSTATUS_BAD);
		return(VSTATUS_BAD);
	}

    /*
     *	If this is a Get method, then send the requestor our SMInfo.
     */
	if (maip->base.method == MAD_CM_GET) {
        if (smDebugPerf) {
			IB_LOG_INFINI_INFO_FMT(__func__, "got SMInfo GET request from node %s, lid[0x%x], portguid "FMT_U64", TID="FMT_U64,
			       nodescription, maip->addrInfo.slid, portguid, maip->base.tid);
        }
		sm_smInfo.ActCount++;
        /*
         *  Do not return our SMkey if the requester fails the Mkey check
         */
        memcpy((void *)&ourSmInfo, (void *)&sm_smInfo, sizeof(sm_smInfo));
        if (mkeyCheckStatus == VSTATUS_CONDITIONAL) {
            ourSmInfo.SM_Key = 0;
        }

        // If this SM is Master, then fill in the elapsed time.
        if (ourSmInfo.u.s.SMStateCurrent == SM_STATE_MASTER) {
            ourSmInfo.ElapsedTime = (uint32_t)difftime(time(NULL),sm_masterStartTime);
        } else {
            ourSmInfo.ElapsedTime = 0;
        }

        BSWAPCOPY_STL_SM_INFO(&ourSmInfo, (STL_SM_INFO *)STL_GET_SMP_DATA(maip));
		if ((status = mai_stl_reply(fd_async, maip, sizeof(STL_SM_INFO))) != VSTATUS_OK) {
            IB_LOG_WARN_FMT(__func__, "failed to send reply [status=%d] to SMInfo GET request from node %s, lid[0x%x], portguid "FMT_U64", TID="FMT_U64,
                   status, nodescription, maip->addrInfo.slid, portguid, maip->base.tid);
        }

        /*
         * THIS IS NOT IN THE SPEC!  GET request does not really include the requester's own smInfo
         * We made our SM work this way so that the handover would happen right away instead of waiting
         * until the next sweep.  If sweep is turned off, you may never handover!
         *
         * If we're master and get is from higher priority SM in standby or higher, force sweep so we'll handover 
         */
        if (identified && sm_smInfo.u.s.SMStateCurrent == SM_STATE_MASTER)
		switch (theirSmInfo.u.s.SMStateCurrent) {
        case SM_STATE_MASTER:
        case SM_STATE_STANDBY:

			/* If this is an SM that is not in our list, trigger a sweep to add it */

			/* lock out SM record table */
			if ((status = vs_lock(&smRecords.smLock)) != VSTATUS_OK) {
				IB_LOG_ERROR("Can't lock SM Record table, rc:", status);
			} else {
				if ((smrecp = (SmRecp)cs_hashtable_search(smRecords.smMap, &theirSmInfo.PortGUID)) != NULL) {

					//trigger sweep if theirSmInfo.u.s.SMStateCurrent changed
					if (smrecp->smInfoRec.SMInfo.u.s.SMStateCurrent != theirSmInfo.u.s.SMStateCurrent) {
						IB_LOG_INFINI_INFO_FMT(__func__, 
											   "triggering a sweep, remote SM lid[0x%x] state changed from %s to %s", 
											   maip->addrInfo.dlid, sm_getStateText(smrecp->smInfoRec.SMInfo.u.s.SMStateCurrent), 
											   sm_getStateText(theirSmInfo.u.s.SMStateCurrent));
						sm_trigger_sweep(SM_SWEEP_REASON_UPDATED_STANDBY);
					}

				} else {
					sm_trigger_sweep(SM_SWEEP_REASON_UNEXPECTED_SM);
				}
				(void)vs_unlock(&smRecords.smLock);
			}

            if ((sm_smInfo.u.s.Priority < theirSmInfo.u.s.Priority) ||
                (sm_smInfo.u.s.Priority == theirSmInfo.u.s.Priority && sm_smInfo.PortGUID > theirSmInfo.PortGUID)) {
                if (uptodate) {
					(void)vs_lock(&handover_sent_lock);
					/* clear flags if a handover has been sent*/		
					if (handover_sent) {
						handover_sent = 0;
						triggered_handover = 0;
					}
					(void)vs_unlock(&handover_sent_lock);
					/* Prevent multiple triggers for handover sweep while the topo thread
					 * is stll sweeping to do a handover due to our previous trigger.
					 * With this even if we get multiple Get SmInfo requests (due to
					 * standby SM pings) while topo thread is still processing our
					 * previous trigger, we do not cause multiple triggers.
					 */
					if (!triggered_handover) {
						IB_LOG_INFINI_INFO_FMT(__func__, 
											   "triggering a sweep to hand over to node %s, lid[0x%x], portguid "FMT_U64", TID="FMT_U64,
											   nodescription, maip->addrInfo.dlid, theirSmInfo.PortGUID, maip->base.tid);  // lids got swapped by mai_reply
						sm_trigger_sweep(SM_SWEEP_REASON_HANDOFF);
						triggered_handover = 1;
					}
                }
			} else if (sm_config.monitor_standby_enable) {
                sm_dbsync_standbyHello(theirSmInfo.PortGUID);
            }
			break;
        case SM_STATE_DISCOVERING:
        	if (dbsyncCheckStat != VSTATUS_NOT_FOUND) {
				IB_LOG_INFINI_INFO_FMT(__func__, 
					"Standby SM in Discovery, may have been restarted, node %s, lid[0x%x], portguid "FMT_U64,
					nodescription,  maip->addrInfo.dlid, theirSmInfo.PortGUID); 
				/* If it restarted, forget what we think we know about it */
				sm_dbsync_deleteSm(theirSmInfo.PortGUID);
			}
			break;
        case SM_STATE_NOTACTIVE:
			IB_LOG_INFINI_INFO_FMT(__func__, 
				"standby SM indicating no longer active, node %s, lid[0x%x], portguid "FMT_U64,
				nodescription,  maip->addrInfo.dlid, theirSmInfo.PortGUID); 
			sm_dbsync_deleteSm(theirSmInfo.PortGUID);

			break;

        } // end of case

		IB_EXIT(__func__, VSTATUS_OK);
		return(VSTATUS_OK);
	} // end if MAD_CM_GET

    if (smDebugPerf) {
        IB_LOG_INFINI_INFO_FMT(__func__, "got SMInfo SET request from node %s, Lid [0x%x], portguid "FMT_U64", TID="FMT_U64,
               nodescription, maip->addrInfo.slid, portguid, maip->base.tid);
    }
    /*
     *	This is now a Set method.  Let's check the transitions.  C14-38.1.1
     */
    if ( (maip->base.amod != SM_AMOD_ACKNOWLEDGE && theirSmInfo.u.s.SMStateCurrent != SM_STATE_MASTER) ) {
		maip->base.status = MAD_STATUS_BAD_FIELD;
        IB_LOG_WARN_FMT(__func__, 
               "SmInfo SET control packet not from a Master SM on node %s, lid [0x%x], portguid "FMT_U64", TID="FMT_U64,
               nodescription, maip->addrInfo.slid, portguid, maip->base.tid);
		if ((status = mai_stl_reply(fd_async, maip, sizeof(STL_SM_INFO))) != VSTATUS_OK)
            IB_LOG_WARN_FMT(__func__, 
                   "failed to send reply [status=%d] to SMInfo SET request from node %s, lid [0x%x], portguid "FMT_U64", TID="FMT_U64,
                   status, nodescription, maip->addrInfo.slid, portguid, maip->base.tid);
		IB_EXIT(__func__, VSTATUS_OK);
		return(VSTATUS_OK);
    }

	switch (sm_state) {
	case SM_STATE_STANDBY:
		(void)sm_fsm_standby(maip, nodescription);
		break;
	case SM_STATE_NOTACTIVE:
		(void)sm_fsm_notactive(maip, nodescription);
		break;
	case SM_STATE_MASTER:
		(void)sm_fsm_master(maip, nodescription);
		break;
	case SM_STATE_DISCOVERING:
		(void)sm_fsm_discovering(maip, nodescription);
		break;
	default:
		(void)sm_fsm_default(maip, nodescription);
		break;
	}

	IB_EXIT(__func__, status);
	return(status);
}
Ejemplo n.º 11
0
Status_t
sa_VFabric_GetTable(Mai_t *maip, uint32_t *records) {
	uint8_t			*data;
	uint32_t		bytes;
	STL_SA_MAD			samad;
	Status_t		status;
	STL_VFINFO_RECORD vFabricRecord;
	int				vf;
	int				reqInFullDefault=0;	// requestor full member of Default PKey
	Node_t *reqNodep;
	Port_t *reqPortp;
	uint64_t serviceId = 0;	// only reported if in samad.header.mask
	IB_GID mGid = (IB_GID){.Raw = {0}}; // only reported if in samad.header.mask

	IB_ENTER("sa_VFabric_GetTable", maip, *records, 0, 0);

	*records = 0;
	data = sa_data;
	bytes = Calculate_Padding(sizeof(STL_VFINFO_RECORD));
	BSWAPCOPY_STL_SA_MAD((STL_SA_MAD*)maip->data, &samad, sizeof(STL_VFINFO_RECORD));

    /*
     *	Create the template mask for the lookup.
     */
	if ((status = sa_create_template_mask(maip->base.aid, samad.header.mask)) != VSTATUS_OK) {
        IB_LOG_WARNRC("sa_VFabric_GetTable: failed to create template mask, rc:", status);
        IB_EXIT("sa_VFabric_GetTable", status);
		return(status);
	}

	maip->base.status = MAD_STATUS_OK;

	BSWAPCOPY_STL_VFINFO_RECORD((STL_VFINFO_RECORD*)samad.data, &vFabricRecord);
	
	vFabricRecord.rsvd1 = 0;
	vFabricRecord.s1.rsvd2 = 0;
	vFabricRecord.s1.rsvd3 = 0;
	vFabricRecord.s1.rsvd4 = 0;
	vFabricRecord.s1.rsvd5 = 0;
	vFabricRecord.rsvd6 = 0;
	memset(vFabricRecord.rsvd7, 0, sizeof(vFabricRecord.rsvd7));

	if (samad.header.mask & STL_VFINFO_REC_COMP_SERVICEID) {
		serviceId = vFabricRecord.ServiceID;
	}

	if (samad.header.mask & STL_VFINFO_REC_COMP_MGID) {
		mGid = vFabricRecord.MGID; // for response
	}

	(void)vs_rdlock(&old_topology_lock);

	VirtualFabrics_t *VirtualFabrics = old_topology.vfs_ptr;

	if (!VirtualFabrics) {
		maip->base.status = MAD_STATUS_SA_REQ_INVALID;
		goto reply_vFabric;
	}

	reqPortp = sm_find_node_and_port_lid(&old_topology, maip->addrInfo.slid, &reqNodep);
	if (!sm_valid_port(reqPortp) || reqPortp->state <= IB_PORT_DOWN) {
		if (saDebugPerf) {
			IB_LOG_INFINI_INFO_FMT("sa_VFabric_GetTable",
				"Request from node which is no longer valid, slid=0x%x", maip->addrInfo.slid);
		}
		maip->base.status = MAD_STATUS_SA_REQ_INVALID;
		goto reply_vFabric;
	}

	if (smValidatePortPKey(DEFAULT_PKEY, reqPortp))
		reqInFullDefault=1;

	for (vf=0; vf < VirtualFabrics->number_of_vfs; vf++) {

		if ((samad.header.mask & STL_VFINFO_REC_COMP_PKEY) && 
			(PKEY_VALUE(VirtualFabrics->v_fabric[vf].pkey) != PKEY_VALUE(vFabricRecord.pKey))) continue;

		if ((samad.header.mask & STL_VFINFO_REC_COMP_INDEX) &&
			(VirtualFabrics->v_fabric[vf].index != vFabricRecord.vfIndex)) continue;

		if (samad.header.mask & STL_VFINFO_REC_COMP_NAME) {
			if (strncmp((void*)VirtualFabrics->v_fabric[vf].name, 
				(void*)vFabricRecord.vfName, STL_VFABRIC_NAME_LEN) != 0) continue;
		}

		// TBD - when allow multiple SLs for DOR, adjust this query to check
		// if SL matches any of the SLs used by given VF
		if (samad.header.mask & STL_VFINFO_REC_COMP_SL) {
			if (VirtualFabrics->v_fabric[vf].base_sl != vFabricRecord.s1.sl) continue;
		}

		if (samad.header.mask & STL_VFINFO_REC_COMP_SERVICEID) {
			if (VSTATUS_OK != smVFValidateVfServiceId(vf, vFabricRecord.ServiceID) ) continue;
		}

		if (samad.header.mask & STL_VFINFO_REC_COMP_MGID) {
			if (VSTATUS_OK != smVFValidateVfMGid(vf, (uint64_t*)vFabricRecord.MGID.Raw) ) continue;
		}

		// If requestor is not a member of the VF and requestor is not
		// a full member of DEFAULT_PKEY (0x7fff), skip this VF 
		// convert VF pkey to a FULL PKey so we allow limited member reqPortp
		if (!reqInFullDefault
			&& !smValidatePortPKey(MAKE_PKEY(PKEY_TYPE_FULL, 
				VirtualFabrics->v_fabric[vf].pkey), reqPortp)) continue;
		
		if ((status = sa_check_len(data, sizeof(STL_VFINFO_RECORD), bytes)) != VSTATUS_OK) {
			maip->base.status = MAD_STATUS_SA_NO_RESOURCES;
			IB_LOG_ERROR_FMT( "sa_VFabric_GetTable", "Reached size limit at %d records", *records);
			break;
		}

		// VFabric_Set doesn't use the samad paramater, so for the time being just type cast. (Should work either way).
		sa_VFabric_Set(data, vf, &samad, serviceId, mGid);
		BSWAP_STL_VFINFO_RECORD((STL_VFINFO_RECORD*)data);
	
		if (samad.header.mask) {
			(void)sa_template_test(samad.data, &data, sizeof(STL_VFINFO_RECORD), bytes, records);
		} else {
			sa_increment_and_pad(&data, sizeof(STL_VFINFO_RECORD), bytes, records);
		}
	
		// pkey not necessarily a unique identifier.
		if (samad.header.mask & STL_VFINFO_REC_COMP_INDEX) goto reply_vFabric;
		if (samad.header.mask & STL_VFINFO_REC_COMP_NAME) goto reply_vFabric;
	}

	if ((samad.header.mask & STL_VFINFO_REC_COMP_PKEY) && (*records == 0)) {
		if (saDebugPerf) {
       		IB_LOG_INFINI_INFO_FMT("sa_VFabric_GetTable",
				"No Virtual Fabric defined with PKey 0x%x", vFabricRecord.pKey);
		}
	}

	if ((samad.header.mask & STL_VFINFO_REC_COMP_SL) && (*records == 0)) {
		if (saDebugPerf) {
       		IB_LOG_INFINI_INFO_FMT("sa_VFabric_GetTable",
				"No Virtual Fabric defined with SL %u", vFabricRecord.s1.sl);
		}
	}

	if ((samad.header.mask & STL_VFINFO_REC_COMP_SERVICEID) && (*records == 0)) {
		if (saDebugPerf) {
       		IB_LOG_INFINI_INFO_FMT("sa_VFabric_GetTable",
				"No Virtual Fabric defined with ServiceId "FMT_U64, vFabricRecord.ServiceID);
		}
	}

	if ((samad.header.mask & STL_VFINFO_REC_COMP_MGID) && (*records == 0)) {
		if (saDebugPerf) {
       		IB_LOG_INFINI_INFO_FMT("sa_VFabric_GetTable",
				"No Virtual Fabric defined with MGID "FMT_GID,
		   		vFabricRecord.MGID.Type.Global.SubnetPrefix,
				vFabricRecord.MGID.Type.Global.InterfaceID);
		}
	}

	if ((samad.header.mask & STL_VFINFO_REC_COMP_INDEX) && (*records == 0)) {
		if (saDebugPerf) {
       		IB_LOG_INFINI_INFO_FMT("sa_VFabric_GetTable",
				"No Virtual Fabric defined with VF Index %d", vFabricRecord.vfIndex);
		}
	}

	if ((samad.header.mask & STL_VFINFO_REC_COMP_NAME) && (*records == 0)) {
		if (saDebugPerf) {
       		IB_LOG_INFINI_INFO_FMT( "sa_VFabric_GetTable",
				"No Virtual Fabric defined with VF Name %s", vFabricRecord.vfName);
		}
	}


reply_vFabric:
	(void)vs_rwunlock(&old_topology_lock);

	IB_EXIT("sa_VFabric_GetTable", status);
	return(status);
}
Ejemplo n.º 12
0
//
//  This uses log level INFINI_INFO and should probably only
//  be called under debug mode.
//
void bitset_info_log(bitset_t* bitset, char* prelude) {
	char*	string = NULL;
	int		first = 1;
	int		range = 0;
	int		range_start = -1;
	int		prev = -1;
	int		bit = -1;
	size_t	max_str_len = bitset->nset_m*5+1;
	size_t	pos = 0;
	int		res = 0;
    Status_t	status;
	
	if (!bitset) return;

	if (bitset->bits_m == NULL) {
		IB_LOG_INFINI_INFO_FMT( __func__, "NOBITS");
		return;
	}

	if (bitset->nset_m == 0) {
		if (prelude) {
			IB_LOG_INFINI_INFO_FMT(__func__, "%s <nil>", prelude);
		} else {
			IB_LOG_INFINI_INFO_FMT(__func__, "<nil>");
		}
		return;

	} else if (!bitset->pool_m || (bitset->nset_m>500)) {
		if (prelude) {
			IB_LOG_INFINI_INFO_FMT(__func__, "%s, nset= %d", prelude, (int)bitset->nset_m);
		} else {
			IB_LOG_INFINI_INFO_FMT(__func__, "nset= %d", (int)bitset->nset_m);
		}
		return;
	}

	status = vs_pool_alloc(bitset->pool_m, max_str_len, (void *)&string);
	if (status != VSTATUS_OK) {
		if (prelude) {
			IB_LOG_INFINI_INFO_FMT(__func__, "%s, nset= %d", prelude, (int)bitset->nset_m);
		} else {
			IB_LOG_INFINI_INFO_FMT(__func__, "nset= %d", (int)bitset->nset_m);
		}
		return;
	}
	string[0] = '\0';

	bit = bitset_find_first_one(bitset);

	while (bit != -1) {
		if (first) {
			res = snprintf(string + pos, max_str_len - pos, "%d", bit);
			if (res > 0){
				pos += res;
			} else {
				if (res == 0)
					break;
				else
					goto bail;
			}
			first = 0;
		} else {
			if (range && (prev != bit-1)) {
				range = 0;
				if ((prev - range_start) > 1) {
					res = snprintf(string + pos, max_str_len - pos, "-%d,%d", prev, bit);
				} else {
					res = snprintf(string + pos, max_str_len - pos, ",%d,%d", prev, bit);
				}
				if (res > 0){
					pos += res;
				} else {
					if (res == 0)
						break;
					else
						goto bail;
				}
				prev = -1;
				range_start = -1;
			} else if (!range && (prev == bit-1)) {
				range_start = prev;
				range = 1;
			} else if (!range) {
				res = snprintf(string + pos, max_str_len - pos, ",%d", bit);
				if (res > 0){
					pos += res;
				} else {
					if (res == 0)
						break;
					else
						goto bail;
				}
			}
		}
		prev = bit;
		bit = bitset_find_next_one(bitset, bit+1);
	}

	if (range && (prev != -1)) {
		if ((prev - range_start) > 1) {
			res = snprintf(string + pos, max_str_len - pos, "-%d", prev);
		} else {
			res = snprintf(string + pos, max_str_len - pos, ",%d", prev);
		}
		if (res > 0){
			pos += res;
		}
	}

bail:  
	if (prelude) {
		IB_LOG_INFINI_INFO_FMT(__func__, "%s %s", prelude, string);
	} else {
		IB_LOG_INFINI_INFO_FMT(__func__, "%s", string);
	}

	if ((status = vs_pool_free(bitset->pool_m, string)) != VSTATUS_OK) {
		IB_LOG_ERRORRC("can't free allocated space for bitset log, rc:", status);
	}
}