bool_t xdr_jobCard (XDR *xdrs, struct jobCard *jCard, struct LSFHeader *hdr) { static char fname[]="xdr_jobCard()"; char *sp1; if (xdrs->x_op == XDR_FREE) { FREEUP(jCard->actCmd); FREEUP(jCard->exitFile); if (!xdr_jobSpecs(xdrs, &jCard->jobSpecs, hdr)) return(FALSE); return(TRUE); } sp1 = jCard->execUsername; if (! xdr_int(xdrs, (int *)&jCard->execGid) || !xdr_int(xdrs, &jCard->notReported) || !xdr_time_t(xdrs, &jCard->windEdge) || !xdr_char(xdrs, &jCard->active) || !xdr_char(xdrs, &jCard->timeExpire) || !xdr_char(xdrs, &jCard->missing) || !xdr_char(xdrs, &jCard->mbdRestarted) || !xdr_time_t(xdrs, &jCard->windWarnTime) || !xdr_int(xdrs, &jCard->runTime) || !xdr_int(xdrs, &jCard->w_status) || !xdr_float(xdrs, &jCard->cpuTime) || !xdr_time_t(xdrs, &jCard->lastChkpntTime) || !xdr_int(xdrs, &jCard->migCnt) || !xdr_int(xdrs, &jCard->cleanupPid) || !xdr_int(xdrs, &jCard->execJobFlag)) { ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr"); return(FALSE); } if (! xdr_time_t(xdrs, &jCard->lastStatusMbdTime)) { return(FALSE); } if (xdrs->x_op == XDR_DECODE) { sp1[0] = '\0'; } if (!xdr_string(xdrs, &sp1, MAX_LSB_NAME_LEN)) { ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL_M, fname, "xdr_string", "execUserName"); return(FALSE); } if (! xdr_int(xdrs, &jCard->actReasons) || !xdr_int(xdrs, &jCard->actSubReasons)) { ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr"); return(FALSE); } if (! xdr_var_string(xdrs, &jCard->actCmd) || !xdr_var_string(xdrs, &jCard->exitFile) || !xdr_var_string(xdrs, &jCard->clusterName)) return(FALSE); if (!xdr_arrayElement(xdrs, (char *)&jCard->jobSpecs, hdr, xdr_jobSpecs)) { ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs"); return(FALSE); } if (!xdr_int(xdrs, &jCard->servSocket)) return(FALSE); if (! xdr_int(xdrs, &jCard->maxRusage.mem) || !xdr_int(xdrs, &jCard->maxRusage.swap) || !xdr_int(xdrs, &jCard->maxRusage.utime) || !xdr_int(xdrs, &jCard->maxRusage.stime) || !xdr_int(xdrs, &jCard->maxRusage.npids)) { ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_int jRusage"); return(FALSE); } if (!xdr_int(xdrs, (int *)&jCard->actFlags)) { ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_actFlags"); return(FALSE); } return(TRUE); }
void do_newjob(XDR *xdrs, int chfd, struct LSFHeader *reqHdr) { static char fname[] = "do_newjob()"; char reply_buf[MSGSIZE]; XDR xdrs2; struct jobSpecs jobSpecs; struct jobReply jobReply; struct jobCard *jp; sbdReplyType reply; struct LSFHeader replyHdr; char *replyStruct; struct lsfAuth *auth = NULL; memset(&jobReply, 0, sizeof(struct jobReply)); if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) { reply = ERR_BAD_REQ; ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs"); goto sendReply; } for (jp = jobQueHead->forw; (jp != jobQueHead); jp = jp->forw) { if (jp->jobSpecs.jobId == jobSpecs.jobId) { jobReply.jobId = jp->jobSpecs.jobId; jobReply.jobPid = jp->jobSpecs.jobPid; jobReply.jobPGid = jp->jobSpecs.jobPGid; jobReply.jStatus = jp->jobSpecs.jStatus; reply = ERR_NO_ERROR; goto sendReply; } } jp = calloc(1, sizeof(struct jobCard)); if (jp == NULL) { ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname, lsb_jobid2str(jobSpecs.jobId), "calloc"); reply = ERR_MEM; goto sendReply; } memcpy((char *) &jp->jobSpecs, (char *) &jobSpecs, sizeof(struct jobSpecs)); jp->jobSpecs.jStatus &= ~JOB_STAT_MIG; jp->jobSpecs.startTime = now; jp->jobSpecs.reasons = 0; jp->jobSpecs.subreasons = 0; /* Initialize the core number */ jp->core_num = -1; if (jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) { if (lockHosts (jp) < 0) { ls_syslog(LOG_ERR, I18N_JOB_FAIL_S, fname, lsb_jobid2str(jp->jobSpecs.jobId), "lockHosts"); unlockHosts (jp, jp->jobSpecs.numToHosts); reply = ERR_LOCK_FAIL; freeWeek(jp->week); FREEUP(jp); goto sendReply; } } jp->runTime = 0; if (initJobCard(jp, &jobSpecs, (int *)&reply) < 0) { if (jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) { unlockHosts (jp, jp->jobSpecs.numToHosts); } FREEUP(jp); goto sendReply; } jp->execJobFlag = 0; if (jp->runTime < 0) { jp->runTime = 0; } jp->execGid = 0; jp->execUsername[0] = '\0'; jp->jobSpecs.execUid = -1; jp->jobSpecs.execUsername[0] = '\0'; if (jp->jobSpecs.jobSpoolDir[0] != '\0') { char *tmp; if ((tmp = getUnixSpoolDir (jp->jobSpecs.jobSpoolDir)) == NULL) { jp->jobSpecs.jobSpoolDir[0] = '\0'; } } if ((logclass & LC_TRACE) && jp->jobSpecs.jobSpoolDir[0] != 0) { ls_syslog(LOG_DEBUG, "%s: the SpoolDir for job <%s> is %s \n", fname, lsb_jobid2str(jp->jobSpecs.jobId), jp->jobSpecs.jobSpoolDir); } if (jp->jobSpecs.options & SUB_PRE_EXEC) SBD_SET_STATE(jp, (JOB_STAT_RUN | JOB_STAT_PRE_EXEC)) else SBD_SET_STATE(jp, JOB_STAT_RUN); reply = job_exec(jp, chfd); if (reply != ERR_NO_ERROR) { ls_syslog(LOG_ERR, I18N_JOB_FAIL_S, fname, lsb_jobid2str(jp->jobSpecs.jobId), "job_exec"); if (jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) { unlockHosts (jp, jp->jobSpecs.numToHosts); } deallocJobCard(jp); } else { jobReply.jobId = jp->jobSpecs.jobId; jobReply.jobPid = jp->jobSpecs.jobPid; jobReply.jobPGid = jp->jobSpecs.jobPGid; jobReply.jStatus = jp->jobSpecs.jStatus; } sendReply: xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr); xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE); initLSFHeader_(&replyHdr); replyHdr.opCode = reply; replyStruct = (reply == ERR_NO_ERROR) ? (char *) &jobReply : (char *) NULL; if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) { ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobReply"); lsb_merr(_i18n_msg_get(ls_catd , NL_SETN, 5804, "Fatal error: xdr_jobReply() failed; sbatchd relifing")); /* catgets 5804 */ relife(); } if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) { ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5805, "%s: Sending jobReply (len=%d) to master failed: %m"), /* catgets 5805 */ fname, XDR_GETPOS(&xdrs2)); } xdr_destroy(&xdrs2); if (reply == ERR_NO_ERROR && !daemonParams[LSB_BSUBI_OLD].paramValue && PURE_INTERACTIVE(&jp->jobSpecs)) { if (status_job (BATCH_STATUS_JOB, jp, jp->jobSpecs.jStatus, ERR_NO_ERROR) < 0) { jp->notReported++; } } }
void do_modifyjob(XDR * xdrs, int chfd, struct LSFHeader * reqHdr) { static char fname[] = "do_switchjob()"; char reply_buf[MSGSIZE]; XDR xdrs2; struct jobSpecs jobSpecs; struct jobReply jobReply; sbdReplyType reply; char found = FALSE; struct LSFHeader replyHdr; char *replyStruct; struct jobCard *jp; struct lsfAuth *auth = NULL; memset(&jobReply, 0, sizeof(struct jobReply)); if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) { reply = ERR_BAD_REQ; ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs"); goto sendReply; } for (jp = jobQueHead->back; jp != jobQueHead; jp = jp->back) if (jp->jobSpecs.jobId == jobSpecs.jobId) { found = TRUE; break; } if (!found) { reply = ERR_NO_JOB; ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5808, "%s: mbatchd trying to modify a non-existent job <%s>"), fname, lsb_jobid2str(jobSpecs.jobId)); /* catgets 5808 */ goto sendReply; } if (jp->jobSpecs.jStatus & (JOB_STAT_DONE | JOB_STAT_EXIT)) { reply = ERR_JOB_FINISH; goto sendReply; } if ((lsbJobCpuLimit != 1) && ((jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxl != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxl) || (jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxh != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxh) || (jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curl != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curl) || (jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curh != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curh) )) { ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd, NL_SETN, 5809, "%s, LSB_JOB_CPULIMIT is not set for the host, job <%s>, CPU limit not modified"), fname, lsb_jobid2str(jobSpecs.jobId)); } else { memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU], (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_CPU], sizeof(struct lsfLimit)); } if ((lsbJobMemLimit != 1) && ((jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxl != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxl) || (jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxh != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxh) || (jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curl != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curl) || (jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curh != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curh) )) { ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd, NL_SETN, 5810, "%s, LSB_JOB_MEMLIMIT is not set for the host, job <%s>, memory limit not modified"), fname, lsb_jobid2str(jobSpecs.jobId)); } else { memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS], (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RSS], sizeof(struct lsfLimit)); } memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RUN], (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RUN], sizeof(struct lsfLimit)); setRunLimit(jp, FALSE); if (strcmp(jp->jobSpecs.outFile, jobSpecs.outFile) || !(strcmp(jobSpecs.outFile, "/dev/null"))) { strcpy(jp->jobSpecs.outFile, jobSpecs.outFile); if (strcmp(jobSpecs.outFile, "/dev/null") || (jobSpecs.options & SUB_OUT_FILE)) { jp->jobSpecs.options |= SUB_OUT_FILE; } else { jp->jobSpecs.options &= ~SUB_OUT_FILE; } } if (strcmp(jp->jobSpecs.errFile, jobSpecs.errFile)) { strcpy(jp->jobSpecs.errFile, jobSpecs.errFile); if (!strcmp(jp->jobSpecs.errFile, "/dev/null") && !(jobSpecs.options & SUB_ERR_FILE)) { jp->jobSpecs.options &= ~SUB_ERR_FILE; } } if (jobSpecs.options & SUB_RERUNNABLE) { jp->jobSpecs.options |= SUB_RERUNNABLE; } else { jp->jobSpecs.options &= ~SUB_RERUNNABLE; } sendReply: xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr); xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE); initLSFHeader_(&replyHdr); replyHdr.opCode = reply; if (reply == ERR_NO_ERROR) replyStruct = (char *) &jobReply; else { replyStruct = (char *) 0; } if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) { ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname, lsb_jobid2str(jp->jobSpecs.jobId), "xdr_jobReply"); relife(); } if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) { ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname, lsb_jobid2str(jp->jobSpecs.jobId), "chanWrite_"); } xdr_destroy(&xdrs2); return; }
void do_switchjob(XDR * xdrs, int chfd, struct LSFHeader * reqHdr) { static char fname[] = "do_switchjob()"; char reply_buf[MSGSIZE]; XDR xdrs2; struct jobSpecs jobSpecs; struct jobReply jobReply; int i; sbdReplyType reply; char *cp; char *word; char found = FALSE; struct LSFHeader replyHdr; char *replyStruct; struct jobCard *jp; struct lsfAuth *auth = NULL; memset(&jobReply, 0, sizeof(struct jobReply)); if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) { reply = ERR_BAD_REQ; ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs"); goto sendReply; } for (jp = jobQueHead->back; jp != jobQueHead; jp = jp->back) { if (jp->jobSpecs.jobId == jobSpecs.jobId) { found = TRUE; break; } } if (!found) { reply = ERR_NO_JOB; ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5807, "%s: mbatchd trying to switch a non-existent job <%s>"), fname, lsb_jobid2str(jobSpecs.jobId)); /* catgets 5807 */ goto sendReply; } if (jp->jobSpecs.jStatus & (JOB_STAT_DONE | JOB_STAT_EXIT)) { reply = ERR_JOB_FINISH; goto sendReply; } cp = jobSpecs.windows; freeWeek(jp->week); while ((word = getNextWord_(&cp)) != NULL) { if (addWindow(word, jp->week, "switchJob jobSpecs") < 0) { ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S_M, fname, lsb_jobid2str(jp->jobSpecs.jobId), "addWindow", word); freeWeek(jp->week); reply = ERR_BAD_REQ; goto sendReply; } } jp->windEdge = now; if ((jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) && !(jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE)) for (i = 0; i < jp->jobSpecs.numToHosts; i++) if (unlockHost_(jp->jobSpecs.toHosts[i]) < 0 && lserrno != LSE_LIM_NLOCKED) ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S_MM, fname, lsb_jobid2str(jp->jobSpecs.jobId), "unlockHost_", jp->jobSpecs.toHosts[i]); strcpy(jp->jobSpecs.queue, jobSpecs.queue); strcpy(jp->jobSpecs.windows, jobSpecs.windows); jp->jobSpecs.priority = jobSpecs.priority; jp->jobSpecs.nice = jobSpecs.nice; jp->jobSpecs.jAttrib = jobSpecs.jAttrib; freeThresholds (&jp->jobSpecs.thresholds); saveThresholds (&jp->jobSpecs, &jobSpecs.thresholds); memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RUN], (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RUN], sizeof(struct lsfLimit)); strcpy (jp->jobSpecs.requeueEValues, jobSpecs.requeueEValues); strcpy (jp->jobSpecs.resumeCond, jobSpecs.resumeCond); strcpy (jp->jobSpecs.stopCond, jobSpecs.stopCond); lsbFreeResVal (&jp->resumeCondVal); if (jobSpecs.resumeCond && jobSpecs.resumeCond[0] != '\0') { if ((jp->resumeCondVal = checkThresholdCond (jobSpecs.resumeCond)) == NULL) ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S, fname, lsb_jobid2str(jp->jobSpecs.jobId), "checkThresholdCond", jobSpecs.resumeCond); } lsbFreeResVal (&jp->stopCondVal); if (jobSpecs.stopCond && jobSpecs.stopCond[0] != '\0') { if ((jp->stopCondVal = checkThresholdCond (jobSpecs.stopCond)) == NULL) ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S, fname, lsb_jobid2str(jp->jobSpecs.jobId), "checkThresholdCond", jobSpecs.stopCond); } if (jobSpecs.options & SUB_LOGIN_SHELL) { FREEUP (jp->jobSpecs.loginShell); jp->jobSpecs.loginShell = safeSave (jobSpecs.loginShell); } strcpy (jp->jobSpecs.suspendActCmd, jobSpecs.suspendActCmd); strcpy (jp->jobSpecs.resumeActCmd, jobSpecs.resumeActCmd); strcpy (jp->jobSpecs.terminateActCmd, jobSpecs.terminateActCmd); setRunLimit (jp, FALSE); offList ((struct listEntry *)jp); inJobLink (jp); if (reniceJob(jp) < 0) ls_syslog(LOG_DEBUG, "%s: renice job <%s> failed", fname, lsb_jobid2str(jp->jobSpecs.jobId)); reply = ERR_NO_ERROR; jobReply.jobId = jp->jobSpecs.jobId; jobReply.jobPid = jp->jobSpecs.jobPid; jobReply.jobPGid = jp->jobSpecs.jobPGid; jobReply.jStatus = jp->jobSpecs.jStatus; sendReply: xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr); xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE); initLSFHeader_(&replyHdr); replyHdr.opCode = reply; if (reply == ERR_NO_ERROR) replyStruct = (char *) &jobReply; else { replyStruct = (char *) 0; } if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) { ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname, lsb_jobid2str(jp->jobSpecs.jobId), "xdr_jobReply"); relife(); } if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) { ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname, lsb_jobid2str(jp->jobSpecs.jobId), "chanWrite_"); } xdr_destroy(&xdrs2); return; }