void sendLoad(void) { static int noSendCount = 0; struct loadVectorStruct myLoadVector; enum loadstruct loadType; struct hostNode *hPtr; struct sockaddr_in toAddr; int i; int bufSize; enum limReqCode limReqCode; XDR xdrs; char *repBuf; int sendInfo = SEND_NO_INFO; struct LSFHeader reqHdr; limReqCode = LIM_LOAD_UPD; resInactivityCount++; if (resInactivityCount > resInactivityLimit) myHostPtr->status[0] |= LIM_RESDOWN; if (time(0) - lastSbdActiveTime > SBD_ACTIVE_TIME) myHostPtr->status[0] |= LIM_SBDDOWN; if (logclass & LC_TRACE) ls_syslog(LOG_DEBUG, "%s: Entering ..", __func__); if (masterMe) { for (hPtr = myClusterPtr->hostList; hPtr; hPtr = hPtr->nextPtr) { if (hPtr == myHostPtr) continue; hPtr->hostInactivityCount++; if (hPtr->hostInactivityCount > 10000) hPtr->hostInactivityCount = 100; if (! LS_ISUNAVAIL(hPtr->status)) { if (hPtr->hostInactivityCount > (hostInactivityLimit + retryLimit)) { ls_syslog(LOG_DEBUG, "\ %s: Declaring %s unavailable inactivity Count=%d", __func__, hPtr->hostName, hPtr->hostInactivityCount); hPtr->status[0] |= LIM_UNAVAIL; hPtr->infoValid = FALSE; if (hPtr->numInstances > 0) { int resNo; for (i = 0; i < hPtr->numInstances; i++) { if (hPtr->instances[i]->updHost == NULL || hPtr->instances[i]->updHost != hPtr) continue; resNo = resNameDefined(hPtr->instances[i]->resName); if (allInfo.resTable[resNo].flags & RESF_DYNAMIC) { strcpy (hPtr->instances[i]->value, "-"); hPtr->instances[i]->updHost = NULL; } } } hPtr->loadMask = 0; hPtr->infoMask = 0; } if ( (hPtr->hostInactivityCount > hostInactivityLimit) && (hPtr->hostInactivityCount <= (hostInactivityLimit + retryLimit))) { if (logclass & LC_COMM) { ls_syslog(LOG_DEBUG3, "%s: Asking %s to send load info %d %d", __func__, hPtr->hostName, hPtr->hostInactivityCount, hostInactivityLimit + retryLimit); } announceMasterToHost(hPtr, SEND_LOAD_INFO); } } } } else {
static int shouldStop (struct hostLoad *loadV, struct jobCard *jobCard, int *reasons, int *subreasons, int num, int *stopmore) { static char fname[] = "shouldStop"; int i, numLoad = -1, j; struct hostLoad *load = NULL; static struct tclHostData tclHostData; static int first = TRUE; *reasons = 0; *subreasons = 0; if( jobCard->postJobStarted ) { return false; } if (jobCard->jobSpecs.jAttrib & JOB_URGENT_NOSTOP) return false; if (now - jobCard->windWarnTime < sbdSleepTime) return FALSE; if (!JOB_STARTED(jobCard)) return FALSE; if (LS_ISUNAVAIL(loadV->status)) return FALSE; if (num <= 0) return FALSE; for (i = 0; i <jobCard->jobSpecs.numToHosts && (*reasons) == 0; i++) { if (i > 0 && !strcmp (jobCard->jobSpecs.toHosts[i], jobCard->jobSpecs.toHosts[i-1])) continue; numLoad++; load = NULL; for (j = 0; j < num; j ++) { if (equalHost_(jobCard->jobSpecs.toHosts[i], loadV[j].hostName)) { load = &(loadV[j]); break; } } if (load == NULL) { ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5705, "%s: Can not find load information for host <%s>"), fname, jobCard->jobSpecs.toHosts[i]); /* catgets 5705 */ return FALSE; } if (LS_ISLOCKEDU(load->status) && !(jobCard->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE)) { *reasons = SUSP_HOST_LOCK; *stopmore = TRUE; } else if (LS_ISLOCKEDM(load->status)) { *reasons = SUSP_HOST_LOCK_MASTER; *stopmore = TRUE; } else if (load->li[IT] <= jobCard->jobSpecs.thresholds.loadStop[numLoad][IT] && load->li[IT] != -INFINIT_LOAD && jobCard->jobSpecs.thresholds.loadStop[numLoad][IT] != -INFINIT_LOAD) { *reasons |= SUSP_LOAD_REASON; *subreasons = IT; *stopmore = TRUE; } else if (load->li[LS] >= jobCard->jobSpecs.thresholds.loadStop[numLoad][LS] && load->li[LS] != INFINIT_LOAD && jobCard->jobSpecs.thresholds.loadStop[numLoad][LS] != INFINIT_LOAD) { *reasons |= SUSP_LOAD_REASON; *subreasons = LS; *stopmore = TRUE; } else if (load->li[UT] >= jobCard->jobSpecs.thresholds.loadStop[numLoad][UT] && load->li[UT] != INFINIT_LOAD && jobCard->jobSpecs.thresholds.loadStop[numLoad][UT] != INFINIT_LOAD) { *reasons |= SUSP_LOAD_REASON; *subreasons = UT; } else if(load->li[PG] >= jobCard->jobSpecs.thresholds.loadStop[numLoad][PG] && load->li[PG] != INFINIT_LOAD && jobCard->jobSpecs.thresholds.loadStop[numLoad][PG] != INFINIT_LOAD) { *reasons |= SUSP_LOAD_REASON; *subreasons = PG; } else if(load->li[IO] >= jobCard->jobSpecs.thresholds.loadStop[numLoad][IO] && load->li[IO] != INFINIT_LOAD && jobCard->jobSpecs.thresholds.loadStop[numLoad][IO] != INFINIT_LOAD) { *reasons |= SUSP_LOAD_REASON; *subreasons = IO; } else if(load->li[MEM] <= jobCard->jobSpecs.thresholds.loadStop[numLoad][MEM] && load->li[MEM] != -INFINIT_LOAD && jobCard->jobSpecs.thresholds.loadStop[numLoad][MEM] != -INFINIT_LOAD) { *reasons |= SUSP_LOAD_REASON; *subreasons = MEM; } else if(load->li[SWP] <= jobCard->jobSpecs.thresholds.loadStop[numLoad][SWP] && load->li[SWP] != -INFINIT_LOAD && jobCard->jobSpecs.thresholds.loadStop[numLoad][SWP] != -INFINIT_LOAD) { *reasons |= SUSP_LOAD_REASON; *subreasons = SWP; } else if(load->li[TMP] <= jobCard->jobSpecs.thresholds.loadStop[numLoad][TMP] && load->li[TMP] != -INFINIT_LOAD && jobCard->jobSpecs.thresholds.loadStop[numLoad][TMP] != -INFINIT_LOAD) { *reasons |= SUSP_LOAD_REASON; *subreasons = TMP; } for (j = R15S; !(*reasons) && j <= R15M; j++) if ((load->li[j] != INFINIT_LOAD) && (jobCard->jobSpecs.thresholds.loadStop[numLoad][j] != INFINIT_LOAD) && (load->li[j] >= jobCard->jobSpecs.thresholds.loadStop[numLoad][j])) { *reasons |= SUSP_LOAD_REASON; *subreasons = j; break; } for (j = MEM + 1; !(*reasons) && j < MIN(allLsInfo->numIndx, jobCard->jobSpecs.thresholds.nIdx); j++) { if (load->li[j] >= INFINIT_LOAD || load->li[j] <= -INFINIT_LOAD || jobCard->jobSpecs.thresholds.loadStop[numLoad][j] >= INFINIT_LOAD || jobCard->jobSpecs.thresholds.loadStop[numLoad][j] <= -INFINIT_LOAD) { continue; } if (allLsInfo->resTable[j].orderType == INCR) { if (load->li[j] >= jobCard->jobSpecs.thresholds.loadStop[numLoad][j]) { *reasons |= SUSP_LOAD_REASON; *subreasons = j; break; } } else { if (load->li[j] <= jobCard->jobSpecs.thresholds.loadStop[numLoad][j]) { *reasons |= SUSP_LOAD_REASON; *subreasons = j; break; } } } if (!(*reasons) && jobCard->stopCondVal != NULL) { int returnCode; if (first == TRUE) { initTclHostData (&tclHostData); returnCode = getTclHostData (load, &tclHostData, FALSE); first = FALSE; } else { returnCode = getTclHostData (load, &tclHostData, TRUE); } if (returnCode >= 0 && evalResReq (jobCard->stopCondVal->selectStr, &tclHostData, DFT_FROMTYPE) == 1) { *reasons |= SUSP_QUE_STOP_COND; break; } } } if (! (*reasons)) return FALSE; if (LS_ISLOCKEDU(load->status) || LS_ISLOCKEDM(load->status)) { return TRUE; } else if (shouldStop1 (load)) { if (logclass & (LC_SCHED | LC_EXEC)) ls_syslog (LOG_DEBUG2, "%s: Should stop job %s; reason=%x, subreasons=%d", fname, lsb_jobid2str(jobCard->jobSpecs.jobId), *reasons, *subreasons); return TRUE; } return FALSE; }