Example #1
0
bool_t
xdr_jobCard (XDR *xdrs, struct jobCard *jCard, struct LSFHeader *hdr)
{
    static char   fname[]="xdr_jobCard()";
    char          *sp1;

    if (xdrs->x_op == XDR_FREE) {
        FREEUP(jCard->actCmd);
        FREEUP(jCard->exitFile);
        if (!xdr_jobSpecs(xdrs, &jCard->jobSpecs, hdr))
            return(FALSE);
        return(TRUE);
    }
    
    sp1 = jCard->execUsername;
    if (! xdr_int(xdrs, (int *)&jCard->execGid)  
        || !xdr_int(xdrs, &jCard->notReported) 
        || !xdr_time_t(xdrs, &jCard->windEdge) 
        || !xdr_char(xdrs, &jCard->active) 
        || !xdr_char(xdrs, &jCard->timeExpire) 
        || !xdr_char(xdrs, &jCard->missing) 
        || !xdr_char(xdrs, &jCard->mbdRestarted) 
        || !xdr_time_t(xdrs, &jCard->windWarnTime) 
        || !xdr_int(xdrs, &jCard->runTime) 
        || !xdr_int(xdrs, &jCard->w_status) 
        || !xdr_float(xdrs, &jCard->cpuTime) 
        || !xdr_time_t(xdrs, &jCard->lastChkpntTime) 
        || !xdr_int(xdrs, &jCard->migCnt) 
        || !xdr_int(xdrs, &jCard->cleanupPid) 
        || !xdr_int(xdrs, &jCard->execJobFlag)) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr");
        return(FALSE);
    }

    if (! xdr_time_t(xdrs, &jCard->lastStatusMbdTime)) {
	return(FALSE);
    }
    
    if (xdrs->x_op == XDR_DECODE) {
        sp1[0] = '\0';
    }

    if (!xdr_string(xdrs, &sp1, MAX_LSB_NAME_LEN)) {
        ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL_M, fname, 
                  "xdr_string", "execUserName");
        return(FALSE);
    }
    
    if (! xdr_int(xdrs, &jCard->actReasons) 
        || !xdr_int(xdrs, &jCard->actSubReasons)) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr");
        return(FALSE);
    }
    
    if (! xdr_var_string(xdrs, &jCard->actCmd) 
        || !xdr_var_string(xdrs, &jCard->exitFile) 
        || !xdr_var_string(xdrs, &jCard->clusterName))
        return(FALSE);

    if (!xdr_arrayElement(xdrs, 
                          (char *)&jCard->jobSpecs, 
                          hdr,
                          xdr_jobSpecs)) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs");
        return(FALSE);
    }
  
    if (!xdr_int(xdrs, &jCard->servSocket))
        return(FALSE);

    if (! xdr_int(xdrs, &jCard->maxRusage.mem) 
        || !xdr_int(xdrs, &jCard->maxRusage.swap)
        || !xdr_int(xdrs, &jCard->maxRusage.utime)
        || !xdr_int(xdrs, &jCard->maxRusage.stime) 
        || !xdr_int(xdrs, &jCard->maxRusage.npids)) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_int jRusage");
        return(FALSE);
    }
    
    if (!xdr_int(xdrs, (int *)&jCard->actFlags)) {
    	ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_actFlags");
        return(FALSE);
    }

    return(TRUE);
} 
Example #2
0
void
do_newjob(XDR *xdrs, int chfd, struct LSFHeader *reqHdr)
{
    static char        fname[] = "do_newjob()";
    char               reply_buf[MSGSIZE];
    XDR                xdrs2;
    struct jobSpecs    jobSpecs;
    struct jobReply    jobReply;
    struct jobCard     *jp;
    sbdReplyType       reply;
    struct LSFHeader   replyHdr;
    char               *replyStruct;
    struct lsfAuth     *auth = NULL;

    memset(&jobReply, 0, sizeof(struct jobReply));

    if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) {
	reply = ERR_BAD_REQ;
	ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs");
	goto sendReply;
    }


    for (jp = jobQueHead->forw; (jp != jobQueHead); jp = jp->forw) {
        if (jp->jobSpecs.jobId == jobSpecs.jobId) {

	    jobReply.jobId = jp->jobSpecs.jobId;
	    jobReply.jobPid = jp->jobSpecs.jobPid;
	    jobReply.jobPGid = jp->jobSpecs.jobPGid;
	    jobReply.jStatus = jp->jobSpecs.jStatus;
	    reply = ERR_NO_ERROR;
	    goto sendReply;
	}
    }

    jp = calloc(1, sizeof(struct jobCard));
    if (jp == NULL) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
                  lsb_jobid2str(jobSpecs.jobId), "calloc");
	reply = ERR_MEM;
	goto sendReply;
    }

    memcpy((char *) &jp->jobSpecs, (char *) &jobSpecs,
	   sizeof(struct jobSpecs));

    jp->jobSpecs.jStatus &= ~JOB_STAT_MIG;
    jp->jobSpecs.startTime = now;
    jp->jobSpecs.reasons = 0;
    jp->jobSpecs.subreasons = 0;
    /* Initialize the core number
     */
    jp->core_num = -1;

    if (jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) {
	if (lockHosts (jp) < 0) {
	    ls_syslog(LOG_ERR, I18N_JOB_FAIL_S, fname,
                      lsb_jobid2str(jp->jobSpecs.jobId), "lockHosts");
            unlockHosts (jp, jp->jobSpecs.numToHosts);
	    reply = ERR_LOCK_FAIL;
	    freeWeek(jp->week);
	    FREEUP(jp);
	    goto sendReply;
        }
    }
    jp->runTime = 0;
    if (initJobCard(jp, &jobSpecs, (int *)&reply) < 0) {

	if (jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) {
	    unlockHosts (jp, jp->jobSpecs.numToHosts);
	}
	FREEUP(jp);
	goto sendReply;
    }

    jp->execJobFlag = 0;

    if (jp->runTime < 0) {
        jp->runTime = 0;
    }
    jp->execGid = 0;
    jp->execUsername[0] = '\0';
    jp->jobSpecs.execUid = -1;
    jp->jobSpecs.execUsername[0] = '\0';

    if (jp->jobSpecs.jobSpoolDir[0] != '\0') {
        char *tmp;

        if ((tmp = getUnixSpoolDir (jp->jobSpecs.jobSpoolDir)) == NULL) {

            jp->jobSpecs.jobSpoolDir[0] = '\0';
        }
    }

    if ((logclass & LC_TRACE) && jp->jobSpecs.jobSpoolDir[0] != 0) {
        ls_syslog(LOG_DEBUG,
                  "%s: the SpoolDir for  job <%s>  is %s \n",
                  fname, lsb_jobid2str(jp->jobSpecs.jobId),
                  jp->jobSpecs.jobSpoolDir);
    }
    if (jp->jobSpecs.options & SUB_PRE_EXEC)
	SBD_SET_STATE(jp, (JOB_STAT_RUN | JOB_STAT_PRE_EXEC))
        else
            SBD_SET_STATE(jp, JOB_STAT_RUN);

    reply = job_exec(jp, chfd);

    if (reply != ERR_NO_ERROR) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S, fname,
                  lsb_jobid2str(jp->jobSpecs.jobId), "job_exec");
	if (jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) {
            unlockHosts (jp, jp->jobSpecs.numToHosts);
	}
	deallocJobCard(jp);
    } else {
	jobReply.jobId = jp->jobSpecs.jobId;
	jobReply.jobPid = jp->jobSpecs.jobPid;
	jobReply.jobPGid = jp->jobSpecs.jobPGid;
	jobReply.jStatus = jp->jobSpecs.jStatus;
    }


sendReply:
    xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr);
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&replyHdr);
    replyHdr.opCode = reply;
    replyStruct = (reply == ERR_NO_ERROR) ? (char *) &jobReply : (char *) NULL;
    if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) {
	ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobReply");
	lsb_merr(_i18n_msg_get(ls_catd , NL_SETN, 5804,
			       "Fatal error: xdr_jobReply() failed; sbatchd relifing")); /* catgets 5804 */
	relife();
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
	ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5805,
					 "%s: Sending jobReply (len=%d) to master failed: %m"), /* catgets 5805 */
		  fname, XDR_GETPOS(&xdrs2));
    }

    xdr_destroy(&xdrs2);


    if (reply == ERR_NO_ERROR && !daemonParams[LSB_BSUBI_OLD].paramValue &&
	PURE_INTERACTIVE(&jp->jobSpecs)) {
  	if (status_job (BATCH_STATUS_JOB, jp, jp->jobSpecs.jStatus,
		        ERR_NO_ERROR) < 0) {
            jp->notReported++;
	}
    }

}
Example #3
0
void
do_modifyjob(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char        fname[] = "do_switchjob()";
    char               reply_buf[MSGSIZE];
    XDR                xdrs2;
    struct jobSpecs    jobSpecs;
    struct jobReply    jobReply;
    sbdReplyType       reply;
    char               found = FALSE;
    struct LSFHeader   replyHdr;
    char               *replyStruct;
    struct jobCard     *jp;
    struct lsfAuth     *auth = NULL;

    memset(&jobReply, 0, sizeof(struct jobReply));

    if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) {
	reply = ERR_BAD_REQ;
	ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs");
	goto sendReply;
    }
    for (jp = jobQueHead->back; jp != jobQueHead; jp = jp->back)
	if (jp->jobSpecs.jobId == jobSpecs.jobId) {
	    found = TRUE;
	    break;
	}
    if (!found) {
	reply = ERR_NO_JOB;
	ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5808,
                                         "%s: mbatchd trying to modify a non-existent job <%s>"), fname, lsb_jobid2str(jobSpecs.jobId)); /* catgets 5808 */
	goto sendReply;
    }
    if (jp->jobSpecs.jStatus & (JOB_STAT_DONE | JOB_STAT_EXIT)) {
	reply = ERR_JOB_FINISH;
	goto sendReply;
    }
    if ((lsbJobCpuLimit != 1) &&
	((jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxl
	  != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxl) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxh
	  != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxh) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curl
	  != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curl) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curh
	  != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curh)
	    )) {
        ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd, NL_SETN, 5809, "%s, LSB_JOB_CPULIMIT is not set for the host, job <%s>, CPU limit not modified"), fname, lsb_jobid2str(jobSpecs.jobId));
    } else {
	memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU],
	       (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_CPU],
	       sizeof(struct lsfLimit));
    }
    if ((lsbJobMemLimit != 1) &&
	((jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxl
	  != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxl) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxh
	  != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxh) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curl
	  != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curl) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curh
	  != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curh)
	    )) {
	ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd, NL_SETN, 5810, "%s, LSB_JOB_MEMLIMIT is not set for the host, job <%s>, memory limit not modified"), fname, lsb_jobid2str(jobSpecs.jobId));
    } else {
	memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS],
	       (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RSS],
	       sizeof(struct lsfLimit));
    }

    memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   sizeof(struct lsfLimit));
    setRunLimit(jp, FALSE);
    if (strcmp(jp->jobSpecs.outFile, jobSpecs.outFile) ||
	!(strcmp(jobSpecs.outFile, "/dev/null")))
    {
	strcpy(jp->jobSpecs.outFile, jobSpecs.outFile);
	if (strcmp(jobSpecs.outFile, "/dev/null") ||
	    (jobSpecs.options & SUB_OUT_FILE)) {
	    jp->jobSpecs.options |= SUB_OUT_FILE;
	}
	else {
	    jp->jobSpecs.options &= ~SUB_OUT_FILE;
	}
    }
    if (strcmp(jp->jobSpecs.errFile, jobSpecs.errFile))
    {
	strcpy(jp->jobSpecs.errFile, jobSpecs.errFile);
	if (!strcmp(jp->jobSpecs.errFile, "/dev/null")
	    && !(jobSpecs.options & SUB_ERR_FILE)) {
            jp->jobSpecs.options &= ~SUB_ERR_FILE;
	}
    }

    if (jobSpecs.options & SUB_RERUNNABLE) {
	jp->jobSpecs.options |= SUB_RERUNNABLE;
    } else {
	jp->jobSpecs.options &= ~SUB_RERUNNABLE;
    }

sendReply:
    xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr);
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&replyHdr);
    replyHdr.opCode = reply;
    if (reply == ERR_NO_ERROR)
	replyStruct = (char *) &jobReply;
    else {
	replyStruct = (char *) 0;
    }

    if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId),
		  "xdr_jobReply");
	relife();
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId), "chanWrite_");
    }

    xdr_destroy(&xdrs2);

    return;

}
Example #4
0
void
do_switchjob(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char        fname[] = "do_switchjob()";
    char               reply_buf[MSGSIZE];
    XDR                xdrs2;
    struct jobSpecs    jobSpecs;
    struct jobReply    jobReply;
    int                i;
    sbdReplyType       reply;
    char               *cp;
    char               *word;
    char               found = FALSE;
    struct LSFHeader   replyHdr;
    char               *replyStruct;
    struct jobCard     *jp;
    struct lsfAuth     *auth = NULL;

    memset(&jobReply, 0, sizeof(struct jobReply));

    if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) {
        reply = ERR_BAD_REQ;
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs");
        goto sendReply;
    }
    for (jp = jobQueHead->back; jp != jobQueHead; jp = jp->back) {
        if (jp->jobSpecs.jobId == jobSpecs.jobId) {
            found = TRUE;
            break;
        }
    }
    if (!found) {
        reply = ERR_NO_JOB;
        ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5807,
                                         "%s: mbatchd trying to switch a non-existent job <%s>"), fname, lsb_jobid2str(jobSpecs.jobId)); /* catgets 5807 */
        goto sendReply;
    }
    if (jp->jobSpecs.jStatus & (JOB_STAT_DONE | JOB_STAT_EXIT)) {
        reply = ERR_JOB_FINISH;
        goto sendReply;
    }


    cp = jobSpecs.windows;
    freeWeek(jp->week);
    while ((word = getNextWord_(&cp)) != NULL) {
        if (addWindow(word, jp->week, "switchJob jobSpecs") < 0) {
            ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S_M, fname,
                      lsb_jobid2str(jp->jobSpecs.jobId), "addWindow", word);
            freeWeek(jp->week);
            reply = ERR_BAD_REQ;
            goto sendReply;
        }
    }
    jp->windEdge = now;


    if ((jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE)
	&& !(jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE))
	for (i = 0; i < jp->jobSpecs.numToHosts; i++)
	    if (unlockHost_(jp->jobSpecs.toHosts[i]) < 0
		&& lserrno != LSE_LIM_NLOCKED)
		ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S_MM, fname,
                          lsb_jobid2str(jp->jobSpecs.jobId), "unlockHost_", jp->jobSpecs.toHosts[i]);



    strcpy(jp->jobSpecs.queue, jobSpecs.queue);
    strcpy(jp->jobSpecs.windows, jobSpecs.windows);
    jp->jobSpecs.priority = jobSpecs.priority;
    jp->jobSpecs.nice = jobSpecs.nice;
    jp->jobSpecs.jAttrib = jobSpecs.jAttrib;

    freeThresholds (&jp->jobSpecs.thresholds);
    saveThresholds (&jp->jobSpecs, &jobSpecs.thresholds);


    memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   sizeof(struct lsfLimit));


    strcpy (jp->jobSpecs.requeueEValues, jobSpecs.requeueEValues);
    strcpy (jp->jobSpecs.resumeCond, jobSpecs.resumeCond);
    strcpy (jp->jobSpecs.stopCond, jobSpecs.stopCond);

    lsbFreeResVal (&jp->resumeCondVal);
    if (jobSpecs.resumeCond && jobSpecs.resumeCond[0] != '\0') {
        if ((jp->resumeCondVal = checkThresholdCond (jobSpecs.resumeCond))
            == NULL)
            ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S, fname,
		      lsb_jobid2str(jp->jobSpecs.jobId),
		      "checkThresholdCond", jobSpecs.resumeCond);
    }

    lsbFreeResVal (&jp->stopCondVal);
    if (jobSpecs.stopCond && jobSpecs.stopCond[0] != '\0') {
        if ((jp->stopCondVal = checkThresholdCond (jobSpecs.stopCond))
            == NULL)
            ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S, fname,
		      lsb_jobid2str(jp->jobSpecs.jobId),
		      "checkThresholdCond", jobSpecs.stopCond);
    }

    if (jobSpecs.options & SUB_LOGIN_SHELL) {
	FREEUP (jp->jobSpecs.loginShell);
	jp->jobSpecs.loginShell = safeSave (jobSpecs.loginShell);
    }

    strcpy (jp->jobSpecs.suspendActCmd, jobSpecs.suspendActCmd);
    strcpy (jp->jobSpecs.resumeActCmd, jobSpecs.resumeActCmd);
    strcpy (jp->jobSpecs.terminateActCmd, jobSpecs.terminateActCmd);

    setRunLimit (jp, FALSE);
    offList ((struct listEntry *)jp);
    inJobLink (jp);

    if (reniceJob(jp) < 0)
	ls_syslog(LOG_DEBUG, "%s: renice job <%s> failed",
		  fname, lsb_jobid2str(jp->jobSpecs.jobId));

    reply = ERR_NO_ERROR;
    jobReply.jobId = jp->jobSpecs.jobId;
    jobReply.jobPid = jp->jobSpecs.jobPid;
    jobReply.jobPGid = jp->jobSpecs.jobPGid;
    jobReply.jStatus = jp->jobSpecs.jStatus;

sendReply:
    xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr);
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&replyHdr);
    replyHdr.opCode = reply;
    if (reply == ERR_NO_ERROR)
	replyStruct = (char *) &jobReply;
    else {
	replyStruct = (char *) 0;
    }

    if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId),
		  "xdr_jobReply");
	relife();
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId), "chanWrite_");
    }

    xdr_destroy(&xdrs2);

    return;

}