/* parseUsage() */ static int parseUsage(char *usageReq, struct resVal *resVal, struct lsInfo *lsInfo) { int i; int m; int entry; float value; char *token; link_t *link; linkiter_t iter; struct _rusage_ *r; char *s; int *rusage_bit_map; float *val; char *usageReq2; char *s2; if ((i = strlen(usageReq)) == 0) return PARSE_OK; for (m = 0; m < i; m++) if (usageReq[m] != ' ') break; if (m == i) return PARSE_OK; s2 = usageReq2 = strip_spaces(usageReq); resVal->rl = make_link(); link = get_rusage_entries(usageReq2); i = 0; traverse_init(link, &iter); while ((s = traverse_link(&iter))) { /* Allocate for each element of the link */ rusage_bit_map = calloc(GET_INTNUM(lsInfo->nRes), sizeof(int)); val = calloc(lsInfo->nRes, sizeof(float)); resVal->genClass = 0; while ((token = getNextToken(&s)) != NULL) { if (token[0] == '-') token++; entry = getKeyEntry(token); if (entry > 0) { if (entry != KEY_DURATION && entry != KEY_DECAY) goto pryc; if (s[0] == '=') { int returnValue; if (entry == KEY_DURATION) returnValue = getTimeVal(&s, &value); else returnValue = getVal(&s, &value); if (returnValue < 0 || value < 0.0) return PARSE_BAD_VAL; if (entry == KEY_DURATION) resVal->duration = value; else resVal->decay = value; continue; } } entry = getResEntry(token); if (entry < 0) goto pryc; if (!(lsInfo->resTable[entry].flags & RESF_DYNAMIC) && (lsInfo->resTable[entry].valueType != LS_NUMERIC)) { if (s[0] == '=') { if (getVal(&s, &value) < 0 || value < 0.0) goto pryc; } continue; } if (entry < MAXSRES) resVal->genClass |= 1 << entry; SET_BIT(entry, rusage_bit_map); if (s[0] == '=') { if (getVal(&s, &value) < 0 || value < 0.0) goto pryc; val[entry] = value; } } /* Save the current rusage block */ r = calloc(1, sizeof(struct _rusage_)); r->bitmap = rusage_bit_map; r->val = val; enqueue_link(resVal->rl, r); if (i == 0) { /* The entry 0 is both in the link and * in the resVal. The default values * were allocated in setDefaults() */ _free_(resVal->rusage_bit_map); _free_(resVal->val); /* Copy the values as later we free them separately */ resVal->rusage_bit_map = calloc(GET_INTNUM(lsInfo->nRes), sizeof(int)); memcpy(resVal->rusage_bit_map, rusage_bit_map, GET_INTNUM(lsInfo->nRes) * sizeof(int)); resVal->val = calloc(lsInfo->nRes, sizeof(float)); memcpy(resVal->val, r->val, lsInfo->nRes * sizeof(float)); } ++i; } /* while (s = traverse_link()) */ resVal->options |= PR_RUSAGE; while ((s = pop_link(link))) _free_(s); fin_link(link); _free_(s2); return PARSE_OK; pryc: _free_(rusage_bit_map); _free_(val); while ((s = pop_link(link))) _free_(s); fin_link(link); while ((r = pop_link(resVal->rl))) { _free_(r->bitmap); _free_(r->val); _free_(r); } fin_link(resVal->rl); resVal->rl = NULL; _free_(s2); return PARSE_BAD_NAME; }
void job_checking (void) { static char fname[] = "job_checking"; struct jobCard *jobCard, *nextJob; struct hostLoad *myload, savedLoad; char *myhostnm; static time_t last_check; char preempted = FALSE; int i; if (last_check == 0) last_check = now; if (jobcnt <= 0) { last_check = now; return; } checkFinish (); for (jobCard = jobQueHead->forw; (jobCard != jobQueHead); jobCard = nextJob) { nextJob = jobCard->forw; if (IS_FINISH(jobCard->jobSpecs.jStatus) || (jobCard->jobSpecs.jStatus & JOB_STAT_PEND)) continue; ruLimits(jobCard); if (IS_RUN_JOB_CMD(jobCard->jobSpecs.jStatus)) { jobCard->runTime += (int) (now - last_check); } if (jobCard->runTime > jobCard->jobSpecs.lsfLimits[LSF_RLIMIT_RUN].rlim_curl) { if ((jobCard->jobSpecs.terminateActCmd == NULL) || (jobCard->jobSpecs.terminateActCmd[0] == '\0')) { if (jobCard->runTime > jobCard->jobSpecs.lsfLimits[LSF_RLIMIT_RUN].rlim_curl + WARN_TIME && jobCard->timeExpire) { if ((IS_SUSP (jobCard->jobSpecs.jStatus)) && (jobCard->jobSpecs.reasons & SUSP_RES_LIMIT) && (jobCard->jobSpecs.subreasons & SUB_REASON_RUNLIMIT)) continue; else if (jobCard->jobSpecs.jStatus & JOB_STAT_KILL) continue; else { ls_syslog(LOG_INFO, \ "%s: warning period expired killing the job=%d", fname, jobCard->jobSpecs.jobId); jobSigStart (jobCard, SIG_TERM_RUNLIMIT, 0, 0, SIGLOG); sbdlog_newstatus(jobCard); jobCard->jobSpecs.jStatus |= JOB_STAT_KILL; } } else if (!jobCard->timeExpire) { ls_syslog(LOG_INFO, I18N(5704, "%s: sending warning signal to job=%d"), /* catgets 5704 */ fname, jobCard->jobSpecs.jobId); jobsig(jobCard, SIGUSR2, FALSE); jobCard->timeExpire = TRUE; } } else { if (jobCard->runTime > jobCard->jobSpecs.lsfLimits[LSF_RLIMIT_RUN].rlim_curl) { if ((IS_SUSP (jobCard->jobSpecs.jStatus)) && (jobCard->jobSpecs.reasons & SUSP_RES_LIMIT) && (jobCard->jobSpecs.subreasons & SUB_REASON_RUNLIMIT)) continue; else { jobSigStart (jobCard, SIG_TERM_RUNLIMIT, 0, 0, SIGLOG); sbdlog_newstatus(jobCard); } } } continue; } if (jobCard->jobSpecs.termTime && now > jobCard->jobSpecs.termTime && !(jobCard->jobSpecs.jAttrib & JOB_FORCE_KILL)) { if ((jobCard->jobSpecs.terminateActCmd == NULL) || (jobCard->jobSpecs.terminateActCmd[0] == '\0')) { if (now > jobCard->jobSpecs.termTime + WARN_TIME && jobCard->timeExpire) { if ((IS_SUSP (jobCard->jobSpecs.jStatus)) && (jobCard->jobSpecs.reasons & SUSP_RES_LIMIT) && (jobCard->jobSpecs.subreasons & SUB_REASON_DEADLINE)) continue; else if (jobCard->jobSpecs.jStatus & JOB_STAT_KILL) continue; else { jobSigStart (jobCard, SIG_TERM_DEADLINE, 0, 0, SIGLOG); sbdlog_newstatus(jobCard); jobCard->jobSpecs.jStatus |= JOB_STAT_KILL; } } else if (!jobCard->timeExpire) { jobsig(jobCard, SIGUSR2, FALSE); jobCard->timeExpire = TRUE; } } else { if (now > jobCard->jobSpecs.termTime) { if ((IS_SUSP (jobCard->jobSpecs.jStatus)) && (jobCard->jobSpecs.reasons & SUSP_RES_LIMIT) && (jobCard->jobSpecs.subreasons & SUB_REASON_DEADLINE)) continue; else { jobSigStart (jobCard, SIG_TERM_DEADLINE, 0, 0, SIGLOG); sbdlog_newstatus(jobCard); } } } continue; } if (! window_ok (jobCard) && !(jobCard->jobSpecs.jAttrib & JOB_URGENT_NOSTOP)) { if (! (jobCard->jobSpecs.options & SUB_WINDOW_SIG) || ((jobCard->jobSpecs.options & SUB_WINDOW_SIG) && now - jobCard->windWarnTime >= WARN_TIME)) { jobSuspendAction(jobCard, SIG_SUSP_WINDOW, SUSP_QUEUE_WINDOW, 0); continue; } } else { jobResumeAction(jobCard, SIG_RESUME_WINDOW, SUSP_QUEUE_WINDOW); continue; } } if ((myhostnm = ls_getmyhostname()) == NULL) { ls_syslog(LOG_ERR, I18N_FUNC_FAIL_MM, fname, "ls_getmyhostname"); die(SLAVE_FATAL); } myload = ls_loadofhosts (NULL, 0, EXACT|EFFECTIVE, 0, &myhostnm, 1); if (myload == NULL) { if (myStatus != NO_LIM) ls_syslog(LOG_INFO, I18N_FUNC_FAIL_MM, fname, "ls_loadofhosts"); if (lserrno == LSE_LIM_BADHOST) relife(); if (lserrno == LSE_BAD_XDR) relife(); if (lserrno == LSE_LIM_DOWN || lserrno == LSE_TIME_OUT) { myStatus |= NO_LIM; tryChkpntMig(); } last_check = now; return; } else myStatus = 0; memcpy ((char *)&savedLoad, (char *)myload, sizeof (struct hostLoad)); savedLoad.li = (float *) my_malloc (allLsInfo->numIndx * sizeof (float), "job_checking"); savedLoad.status = (int *) my_malloc ((1 + GET_INTNUM(allLsInfo->numIndx)) * sizeof (int), "job_checking"); for (i = 0; i < allLsInfo->numIndx; i++) savedLoad.li[i] = myload->li[i]; for (i = 0; i < 1 + GET_INTNUM(allLsInfo->numIndx); i++) savedLoad.status[i] = myload->status[i]; tryResume (&savedLoad); if (!preempted) tryStop (myhostnm, &savedLoad); tryChkpntMig(); FREEUP(savedLoad.li); FREEUP(savedLoad.status); last_check = now; return; }
static int setDefaults(struct resVal *resVal, struct lsInfo *lsInfo, int options) { int i; if (options & PR_DEFFROMTYPE) strcpy(resVal->selectStr, "expr [type \"eq\" \"local\"]"); else strcpy(resVal->selectStr, "expr [type \"eq\" \"any\"]"); resVal->nphase = 2; resVal->order[0] = R15S + 1; resVal->order[1] = PG + 1; resVal->val = calloc(lsInfo->nRes, sizeof(float)); resVal->indicies = calloc((lsInfo->numIndx), sizeof(int)); if (!resVal->val || !resVal->indicies) { freeResVal (resVal); lserrno = LSE_MALLOC; ls_perror("intlib:resreq"); return PARSE_BAD_MEM; } for (i = 0; i < lsInfo->nRes; i++) resVal->val[i] = INFINIT_LOAD; resVal->genClass = 0; if (!(options & PR_BATCH)) { resVal->genClass |= 1 << R15S; resVal->genClass |= 1 << R1M; resVal->genClass |= 1 << R15M; resVal->val[R15S] = 1.0; resVal->val[R1M] = 1.0; resVal->val[R15M] = 1.0; } resVal->nindex = lsInfo->numIndx; for(i = 0; i < resVal->nindex; i++) resVal->indicies[i] = i; resVal->rusage_bit_map = calloc (GET_INTNUM(lsInfo->nRes), sizeof(int)); if (resVal->rusage_bit_map == NULL) { lserrno = LSE_MALLOC; freeResVal (resVal); return PARSE_BAD_MEM; } for (i = 0; i < GET_INTNUM(lsInfo->nRes); i++) resVal->rusage_bit_map[i] = 0; if (!(options &PR_BATCH)) { SET_BIT(R15S, resVal->rusage_bit_map); SET_BIT(R1M, resVal->rusage_bit_map); SET_BIT(R15M, resVal->rusage_bit_map); } resVal->duration = INFINIT_INT; resVal->decay = INFINIT_FLOAT; resVal->numHosts = INFINIT_INT; resVal->maxNumHosts = INFINIT_INT; resVal->pTile = INFINIT_INT; resVal->options = 0; return 0; }