Ejemplo n.º 1
0
void
sendLoad(void)
{
    static int noSendCount = 0;
    struct loadVectorStruct myLoadVector;
    enum   loadstruct loadType;
    struct hostNode *hPtr;
    struct sockaddr_in toAddr;
    int    i;
    int    bufSize;
    enum   limReqCode limReqCode;
    XDR    xdrs;
    char   *repBuf;
    int    sendInfo = SEND_NO_INFO;
    struct LSFHeader reqHdr;

    limReqCode = LIM_LOAD_UPD;
    resInactivityCount++;

    if (resInactivityCount > resInactivityLimit)
        myHostPtr->status[0] |= LIM_RESDOWN;

    if (time(0) - lastSbdActiveTime > SBD_ACTIVE_TIME)
        myHostPtr->status[0] |= LIM_SBDDOWN;

    if (logclass & LC_TRACE)
       ls_syslog(LOG_DEBUG, "%s: Entering ..", __func__);

    if (masterMe) {

        for (hPtr = myClusterPtr->hostList; hPtr; hPtr = hPtr->nextPtr) {

            if (hPtr == myHostPtr)
                continue;

                hPtr->hostInactivityCount++;
                if (hPtr->hostInactivityCount > 10000)
                    hPtr->hostInactivityCount = 100;

                if (! LS_ISUNAVAIL(hPtr->status)) {
                    if (hPtr->hostInactivityCount > (hostInactivityLimit + retryLimit)) {
                        ls_syslog(LOG_DEBUG, "\
%s: Declaring %s unavailable inactivity Count=%d", __func__,
                                  hPtr->hostName, hPtr->hostInactivityCount);

                        hPtr->status[0] |= LIM_UNAVAIL;
                        hPtr->infoValid = FALSE;
                        if (hPtr->numInstances > 0) {
                            int resNo;
                            for (i = 0; i < hPtr->numInstances; i++) {
                                if (hPtr->instances[i]->updHost == NULL
                                      || hPtr->instances[i]->updHost != hPtr)
                                    continue;
                                resNo = resNameDefined(hPtr->instances[i]->resName);
                                if (allInfo.resTable[resNo].flags & RESF_DYNAMIC) {
                                    strcpy (hPtr->instances[i]->value, "-");
                                    hPtr->instances[i]->updHost = NULL;
                                }
                            }
                        }
                        hPtr->loadMask  = 0;
                        hPtr->infoMask  = 0;
                    }
                    if ( (hPtr->hostInactivityCount > hostInactivityLimit) &&
                         (hPtr->hostInactivityCount <= (hostInactivityLimit + retryLimit))) {
                        if (logclass & LC_COMM) {
                            ls_syslog(LOG_DEBUG3,
                              "%s: Asking %s to send load info %d %d", __func__,
                              hPtr->hostName, hPtr->hostInactivityCount,
                              hostInactivityLimit + retryLimit);
                        }
                        announceMasterToHost(hPtr, SEND_LOAD_INFO);
                    }
                }
        }

    } else {
Ejemplo n.º 2
0
static int
shouldStop (struct hostLoad *loadV,
	    struct jobCard *jobCard, int *reasons, int *subreasons, int num, int *stopmore)
{
    static char fname[] = "shouldStop";
    int i, numLoad = -1, j;
    struct hostLoad *load = NULL;
    static struct tclHostData tclHostData;
    static int first = TRUE;

    *reasons = 0;
    *subreasons = 0;


    if( jobCard->postJobStarted ) {
        return false;
    }


    if (jobCard->jobSpecs.jAttrib & JOB_URGENT_NOSTOP)
	return false;


    if (now - jobCard->windWarnTime < sbdSleepTime)
        return FALSE;


    if (!JOB_STARTED(jobCard))
        return FALSE;


    if (LS_ISUNAVAIL(loadV->status))
	return FALSE;
    if (num <= 0)
	return FALSE;


    for (i = 0; i <jobCard->jobSpecs.numToHosts && (*reasons) == 0; i++) {
        if (i > 0 && !strcmp (jobCard->jobSpecs.toHosts[i],
					     jobCard->jobSpecs.toHosts[i-1]))
            continue;
        numLoad++;
	load = NULL;
        for (j = 0; j < num; j ++) {
    	    if (equalHost_(jobCard->jobSpecs.toHosts[i], loadV[j].hostName)) {
	        load = &(loadV[j]);
	        break;
            }
        }
        if (load == NULL) {
	    ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5705,
		"%s: Can not find load information for host <%s>"), fname, jobCard->jobSpecs.toHosts[i]); /* catgets 5705 */
            return FALSE;
        }
        if (LS_ISLOCKEDU(load->status)
            && !(jobCard->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE)) {
            *reasons = SUSP_HOST_LOCK;
            *stopmore = TRUE;
        }
	else if (LS_ISLOCKEDM(load->status)) {
            *reasons = SUSP_HOST_LOCK_MASTER;
            *stopmore = TRUE;
        }
        else if (load->li[IT] <= jobCard->jobSpecs.thresholds.loadStop[numLoad][IT]
            && load->li[IT] != -INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][IT] != -INFINIT_LOAD) {
	    *reasons |= SUSP_LOAD_REASON;
            *subreasons = IT;
            *stopmore = TRUE;
        }
        else if (load->li[LS] >=
			  jobCard->jobSpecs.thresholds.loadStop[numLoad][LS]
            && load->li[LS] != INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][LS]
						      != INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = LS;
            *stopmore = TRUE;
        }
        else if (load->li[UT] >=
			 jobCard->jobSpecs.thresholds.loadStop[numLoad][UT]
            && load->li[UT] != INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][UT] !=
							   INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = UT;
        }
        else if(load->li[PG] >=
		      jobCard->jobSpecs.thresholds.loadStop[numLoad][PG]
            && load->li[PG] != INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][PG]
						    != INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = PG;
        }
        else if(load->li[IO] >=
		     jobCard->jobSpecs.thresholds.loadStop[numLoad][IO]
            && load->li[IO] != INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][IO]
						      != INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = IO;
        }
        else if(load->li[MEM]
			 <= jobCard->jobSpecs.thresholds.loadStop[numLoad][MEM]
            && load->li[MEM] != -INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][MEM]
						      != -INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = MEM;
        }

        else if(load->li[SWP]
			 <= jobCard->jobSpecs.thresholds.loadStop[numLoad][SWP]
            && load->li[SWP] != -INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][SWP]
						      != -INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = SWP;
        }
        else if(load->li[TMP]
			 <= jobCard->jobSpecs.thresholds.loadStop[numLoad][TMP]
            && load->li[TMP] != -INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][TMP]
						      != -INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = TMP;
        }

        for (j = R15S; !(*reasons) && j <= R15M; j++)
	    if ((load->li[j] != INFINIT_LOAD)
	        && (jobCard->jobSpecs.thresholds.loadStop[numLoad][j]
							 != INFINIT_LOAD)
	        && (load->li[j]
			>= jobCard->jobSpecs.thresholds.loadStop[numLoad][j])) {
	        *reasons |= SUSP_LOAD_REASON;
                *subreasons = j;
                break;
	    }


        for (j = MEM + 1; !(*reasons) &&
               j < MIN(allLsInfo->numIndx, jobCard->jobSpecs.thresholds.nIdx);
	              j++) {
            if (load->li[j] >= INFINIT_LOAD || load->li[j] <= -INFINIT_LOAD
                || jobCard->jobSpecs.thresholds.loadStop[numLoad][j]
							 >= INFINIT_LOAD
                || jobCard->jobSpecs.thresholds.loadStop[numLoad][j]
							 <= -INFINIT_LOAD) {
                continue;
            }
	    if (allLsInfo->resTable[j].orderType == INCR) {
	        if (load->li[j]
		       >= jobCard->jobSpecs.thresholds.loadStop[numLoad][j]) {
		    *reasons |= SUSP_LOAD_REASON;
                    *subreasons = j;
		    break;
                }
	    } else {
	        if (load->li[j]
		      <= jobCard->jobSpecs.thresholds.loadStop[numLoad][j]) {
		    *reasons |= SUSP_LOAD_REASON;
                    *subreasons = j;
		    break;
                }
	    }
        }

        if (!(*reasons) && jobCard->stopCondVal != NULL) {
            int returnCode;
            if (first == TRUE) {
                initTclHostData (&tclHostData);
                returnCode = getTclHostData (load, &tclHostData, FALSE);
                first = FALSE;
            } else {
                returnCode = getTclHostData (load, &tclHostData, TRUE);
            }
            if (returnCode >= 0
		     && evalResReq (jobCard->stopCondVal->selectStr,
    	       	        &tclHostData, DFT_FROMTYPE) == 1) {
        	*reasons |= SUSP_QUE_STOP_COND;
		break;
            }
        }
    }


    if (! (*reasons))
	return FALSE;


    if (LS_ISLOCKEDU(load->status) || LS_ISLOCKEDM(load->status)) {
	return TRUE;
    } else if (shouldStop1 (load)) {
        if (logclass & (LC_SCHED | LC_EXEC))
            ls_syslog (LOG_DEBUG2,
			"%s: Should stop job %s; reason=%x, subreasons=%d",
                        fname, lsb_jobid2str(jobCard->jobSpecs.jobId),
			*reasons, *subreasons);

        return TRUE;
    }
    return FALSE;

}