Beispiel #1
0
/* parseUsage()
 */
static int
parseUsage(char *usageReq, struct resVal *resVal, struct lsInfo *lsInfo)
{
    int i;
    int m;
    int entry;
    float value;
    char *token;
    link_t *link;
    linkiter_t iter;
    struct _rusage_ *r;
    char *s;
    int *rusage_bit_map;
    float *val;
    char *usageReq2;
    char *s2;

    if ((i = strlen(usageReq)) == 0)
        return PARSE_OK;

    for (m = 0; m < i; m++)
        if (usageReq[m] != ' ')
            break;
    if (m == i)
        return PARSE_OK;

    s2 = usageReq2 = strip_spaces(usageReq);

    resVal->rl = make_link();
    link = get_rusage_entries(usageReq2);

    i = 0;
    traverse_init(link, &iter);
    while ((s = traverse_link(&iter))) {

        /* Allocate for each element of the link
         */
        rusage_bit_map = calloc(GET_INTNUM(lsInfo->nRes), sizeof(int));
        val = calloc(lsInfo->nRes, sizeof(float));

        resVal->genClass = 0;
        while ((token = getNextToken(&s)) != NULL) {

            if (token[0] == '-')
                token++;

            entry = getKeyEntry(token);
            if (entry > 0) {
                if (entry != KEY_DURATION && entry != KEY_DECAY)
                    goto pryc;

                if (s[0] == '=') {
                    int returnValue;
                    if (entry == KEY_DURATION)
                        returnValue =  getTimeVal(&s, &value);
                    else
                        returnValue = getVal(&s, &value);
                    if (returnValue < 0 || value < 0.0)
                        return PARSE_BAD_VAL;
                    if (entry == KEY_DURATION)
                        resVal->duration = value;
                    else
                        resVal->decay = value;

                    continue;
                }
            }

            entry = getResEntry(token);
            if (entry < 0)
		goto pryc;

            if (!(lsInfo->resTable[entry].flags & RESF_DYNAMIC)
                && (lsInfo->resTable[entry].valueType != LS_NUMERIC)) {
                if (s[0] == '=') {
                    if (getVal(&s, &value) < 0 || value < 0.0)
                        goto pryc;
                }
                continue;
            }

            if (entry < MAXSRES)
                resVal->genClass |= 1 << entry;

            SET_BIT(entry, rusage_bit_map);

            if (s[0] == '=') {
                if (getVal(&s, &value) < 0 || value < 0.0)
                    goto pryc;
                val[entry] = value;
            }
        }

        /* Save the current rusage block
         */
        r = calloc(1, sizeof(struct _rusage_));
        r->bitmap = rusage_bit_map;
        r->val = val;
        enqueue_link(resVal->rl, r);

        if (i == 0) {
            /* The entry 0 is both in the link and
             * in the resVal. The default values
             * were allocated in setDefaults()
             */
            _free_(resVal->rusage_bit_map);
            _free_(resVal->val);
	    /* Copy the values as later we free them separately
	     */
            resVal->rusage_bit_map = calloc(GET_INTNUM(lsInfo->nRes),
					    sizeof(int));
	    memcpy(resVal->rusage_bit_map,
		   rusage_bit_map,
		   GET_INTNUM(lsInfo->nRes) * sizeof(int));
            resVal->val = calloc(lsInfo->nRes, sizeof(float));
	    memcpy(resVal->val, r->val, lsInfo->nRes * sizeof(float));
        }
        ++i;
    } /* while (s = traverse_link()) */

    resVal->options |= PR_RUSAGE;

    while ((s = pop_link(link)))
        _free_(s);
    fin_link(link);
    _free_(s2);

    return PARSE_OK;

pryc:

    _free_(rusage_bit_map);
    _free_(val);
    while ((s = pop_link(link)))
        _free_(s);
    fin_link(link);
    while ((r = pop_link(resVal->rl))) {
        _free_(r->bitmap);
        _free_(r->val);
        _free_(r);
    }
    fin_link(resVal->rl);
    resVal->rl = NULL;
    _free_(s2);

    return PARSE_BAD_NAME;
}
Beispiel #2
0
void
job_checking (void)
{
    static char fname[] = "job_checking";
    struct jobCard *jobCard, *nextJob;
    struct hostLoad *myload, savedLoad;
    char *myhostnm;
    static time_t last_check;
    char preempted = FALSE;
    int i;

    if (last_check == 0)
	last_check = now;
    if (jobcnt <= 0) {
        last_check = now;
        return;
    }

    checkFinish ();

    for (jobCard = jobQueHead->forw; (jobCard != jobQueHead);
         jobCard = nextJob) {

	nextJob = jobCard->forw;
        if (IS_FINISH(jobCard->jobSpecs.jStatus)
              || (jobCard->jobSpecs.jStatus & JOB_STAT_PEND))
            continue;

	ruLimits(jobCard);

	if (IS_RUN_JOB_CMD(jobCard->jobSpecs.jStatus)) {

	    jobCard->runTime += (int) (now - last_check);
	}
	if (jobCard->runTime >
	    jobCard->jobSpecs.lsfLimits[LSF_RLIMIT_RUN].rlim_curl) {
            if ((jobCard->jobSpecs.terminateActCmd == NULL)
                || (jobCard->jobSpecs.terminateActCmd[0] == '\0')) {
	        if (jobCard->runTime >
		    jobCard->jobSpecs.lsfLimits[LSF_RLIMIT_RUN].rlim_curl
		    + WARN_TIME && jobCard->timeExpire) {

                    if ((IS_SUSP (jobCard->jobSpecs.jStatus))
                       && (jobCard->jobSpecs.reasons & SUSP_RES_LIMIT)
                       && (jobCard->jobSpecs.subreasons & SUB_REASON_RUNLIMIT))
                        continue;
		    else if (jobCard->jobSpecs.jStatus & JOB_STAT_KILL)
			continue;
                    else {

                        ls_syslog(LOG_INFO, \
                                  "%s: warning period expired killing the job=%d",
			    fname, jobCard->jobSpecs.jobId);
                        jobSigStart (jobCard, SIG_TERM_RUNLIMIT, 0, 0, SIGLOG);
                        sbdlog_newstatus(jobCard);
			jobCard->jobSpecs.jStatus |= JOB_STAT_KILL;
                    }
	        } else if (!jobCard->timeExpire) {
		    ls_syslog(LOG_INFO, I18N(5704,
                        "%s: sending warning signal to job=%d"), /* catgets 5704 */
			fname, jobCard->jobSpecs.jobId);
		    jobsig(jobCard, SIGUSR2, FALSE);
		    jobCard->timeExpire = TRUE;
	        }
            } else {
                if (jobCard->runTime >
                    jobCard->jobSpecs.lsfLimits[LSF_RLIMIT_RUN].rlim_curl) {

                    if ((IS_SUSP (jobCard->jobSpecs.jStatus))
                       && (jobCard->jobSpecs.reasons & SUSP_RES_LIMIT)
                       && (jobCard->jobSpecs.subreasons & SUB_REASON_RUNLIMIT))
                        continue;
                    else {
                        jobSigStart (jobCard, SIG_TERM_RUNLIMIT, 0, 0, SIGLOG);
                        sbdlog_newstatus(jobCard);
                    }
                }
            }
	    continue;
	}

        if (jobCard->jobSpecs.termTime && now > jobCard->jobSpecs.termTime

             && !(jobCard->jobSpecs.jAttrib & JOB_FORCE_KILL)) {
            if ((jobCard->jobSpecs.terminateActCmd == NULL)
                 || (jobCard->jobSpecs.terminateActCmd[0] == '\0')) {
                if (now > jobCard->jobSpecs.termTime + WARN_TIME
                                                   && jobCard->timeExpire) {

                    if ((IS_SUSP (jobCard->jobSpecs.jStatus))
                       && (jobCard->jobSpecs.reasons & SUSP_RES_LIMIT)
                       && (jobCard->jobSpecs.subreasons & SUB_REASON_DEADLINE))
                        continue;
		    else if (jobCard->jobSpecs.jStatus & JOB_STAT_KILL)
			continue;
                    else {

                        jobSigStart (jobCard, SIG_TERM_DEADLINE, 0, 0, SIGLOG);
                        sbdlog_newstatus(jobCard);
			jobCard->jobSpecs.jStatus |= JOB_STAT_KILL;
                    }
                } else
		    if (!jobCard->timeExpire) {
		        jobsig(jobCard, SIGUSR2, FALSE);
		        jobCard->timeExpire = TRUE;
		    }
            } else {
                if (now > jobCard->jobSpecs.termTime) {

                    if ((IS_SUSP (jobCard->jobSpecs.jStatus))
                       && (jobCard->jobSpecs.reasons & SUSP_RES_LIMIT)
                       && (jobCard->jobSpecs.subreasons & SUB_REASON_DEADLINE))
                        continue;
                    else {
                        jobSigStart (jobCard, SIG_TERM_DEADLINE, 0, 0, SIGLOG);
                        sbdlog_newstatus(jobCard);
                    }
                }
            }
            continue;
        }


        if (! window_ok (jobCard)
	    && !(jobCard->jobSpecs.jAttrib & JOB_URGENT_NOSTOP)) {
	    if (! (jobCard->jobSpecs.options & SUB_WINDOW_SIG)
                || ((jobCard->jobSpecs.options & SUB_WINDOW_SIG)
                          && now - jobCard->windWarnTime >= WARN_TIME)) {


	        jobSuspendAction(jobCard, SIG_SUSP_WINDOW, SUSP_QUEUE_WINDOW, 0);
		continue;

	    }
	} else {

		jobResumeAction(jobCard, SIG_RESUME_WINDOW, SUSP_QUEUE_WINDOW);
                continue;
	}
    }


    if ((myhostnm = ls_getmyhostname()) == NULL) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL_MM, fname, "ls_getmyhostname");
        die(SLAVE_FATAL);
    }
    myload = ls_loadofhosts (NULL, 0, EXACT|EFFECTIVE, 0, &myhostnm, 1);
    if (myload == NULL) {
        if (myStatus != NO_LIM)

	    ls_syslog(LOG_INFO, I18N_FUNC_FAIL_MM, fname, "ls_loadofhosts");
	if (lserrno == LSE_LIM_BADHOST)
	    relife();
	if (lserrno == LSE_BAD_XDR)
	    relife();
	if (lserrno == LSE_LIM_DOWN || lserrno == LSE_TIME_OUT) {
	    myStatus |= NO_LIM;


            tryChkpntMig();
        }
        last_check = now;
	return;
    } else
	myStatus = 0;



    memcpy ((char *)&savedLoad, (char *)myload, sizeof (struct hostLoad));
    savedLoad.li = (float *) my_malloc (allLsInfo->numIndx * sizeof (float),
				   "job_checking");
    savedLoad.status = (int *) my_malloc
       ((1 + GET_INTNUM(allLsInfo->numIndx)) * sizeof (int), "job_checking");
    for (i = 0; i < allLsInfo->numIndx; i++)
        savedLoad.li[i] = myload->li[i];
    for (i = 0; i < 1 + GET_INTNUM(allLsInfo->numIndx); i++)
        savedLoad.status[i] = myload->status[i];
    tryResume (&savedLoad);

    if (!preempted)
        tryStop (myhostnm, &savedLoad);

    tryChkpntMig();


    FREEUP(savedLoad.li);
    FREEUP(savedLoad.status);
    last_check = now;
    return;

}
Beispiel #3
0
static int
setDefaults(struct resVal *resVal, struct lsInfo *lsInfo, int options)
{
    int i;

    if (options & PR_DEFFROMTYPE)
        strcpy(resVal->selectStr, "expr [type \"eq\" \"local\"]");
    else
        strcpy(resVal->selectStr, "expr [type \"eq\" \"any\"]");

    resVal->nphase = 2;
    resVal->order[0] = R15S + 1;
    resVal->order[1] = PG + 1;
    resVal->val = calloc(lsInfo->nRes, sizeof(float));
    resVal->indicies = calloc((lsInfo->numIndx), sizeof(int));
    if (!resVal->val || !resVal->indicies) {
	freeResVal (resVal);
        lserrno = LSE_MALLOC;
        ls_perror("intlib:resreq");
        return PARSE_BAD_MEM;
    }

    for (i = 0; i < lsInfo->nRes; i++)
        resVal->val[i] = INFINIT_LOAD;

    resVal->genClass =  0;
    if (!(options & PR_BATCH)) {
        resVal->genClass |=  1 << R15S;
        resVal->genClass |=  1 << R1M;
        resVal->genClass |=  1 << R15M;
        resVal->val[R15S] = 1.0;
        resVal->val[R1M]  = 1.0;
        resVal->val[R15M] = 1.0;
    }

    resVal->nindex = lsInfo->numIndx;
    for(i = 0; i < resVal->nindex; i++)
        resVal->indicies[i] = i;

    resVal->rusage_bit_map =
        calloc (GET_INTNUM(lsInfo->nRes), sizeof(int));
    if (resVal->rusage_bit_map == NULL) {
	lserrno = LSE_MALLOC;
	freeResVal (resVal);
        return PARSE_BAD_MEM;
    }

    for (i = 0; i < GET_INTNUM(lsInfo->nRes); i++)
	resVal->rusage_bit_map[i] = 0;

    if (!(options &PR_BATCH)) {
        SET_BIT(R15S, resVal->rusage_bit_map);
        SET_BIT(R1M, resVal->rusage_bit_map);
        SET_BIT(R15M, resVal->rusage_bit_map);
    }

    resVal->duration = INFINIT_INT;
    resVal->decay = INFINIT_FLOAT;
    resVal->numHosts = INFINIT_INT;
    resVal->maxNumHosts = INFINIT_INT;
    resVal->pTile = INFINIT_INT;

    resVal->options = 0;
    return 0;
}