int status_job (mbdReqType reqType, struct jobCard *jp, int newStatus, sbdReplyType err) { static char fname[] = "status_job()"; static int seq = 1; static char lastHost[MAXHOSTNAMELEN]; int reply; char *request_buf; char *reply_buf = NULL; XDR xdrs; struct LSFHeader hdr; int cc; struct statusReq statusReq; int flags; int i; int len; struct lsfAuth *auth = NULL; if ((logclass & LC_TRACE) && (logclass & LC_SIGNAL)) ls_syslog (LOG_DEBUG, "%s: Entering ... regType %d jobId %s", fname, reqType, lsb_jobid2str (jp->jobSpecs.jobId)); if (newStatus == JOB_STAT_EXIT) { jp->userJobSucc = FALSE; } if (MASK_STATUS (newStatus) == JOB_STAT_DONE) { jp->userJobSucc = TRUE; } if (IS_POST_FINISH (newStatus)) { if (jp->userJobSucc != TRUE) { return 0; } } if (masterHost == NULL) return -1; if (jp->notReported < 0) { jp->notReported = -INFINIT_INT; return (0); } statusReq.jobId = jp->jobSpecs.jobId; statusReq.actPid = jp->jobSpecs.actPid; statusReq.jobPid = jp->jobSpecs.jobPid; statusReq.jobPGid = jp->jobSpecs.jobPGid; statusReq.newStatus = newStatus; statusReq.reason = jp->jobSpecs.reasons; statusReq.subreasons = jp->jobSpecs.subreasons; statusReq.sbdReply = err; statusReq.lsfRusage = jp->lsfRusage; statusReq.execUid = jp->jobSpecs.execUid; statusReq.numExecHosts = 0; statusReq.execHosts = NULL; statusReq.exitStatus = jp->w_status; statusReq.execCwd = jp->jobSpecs.execCwd; statusReq.execHome = jp->jobSpecs.execHome; statusReq.execUsername = jp->execUsername; statusReq.queuePostCmd = ""; statusReq.queuePreCmd = ""; statusReq.msgId = jp->delieveredMsgId; if (IS_FINISH (newStatus)) { if (jp->maxRusage.mem > jp->runRusage.mem) jp->runRusage.mem = jp->maxRusage.mem; if (jp->maxRusage.swap > jp->runRusage.swap) jp->runRusage.swap = jp->maxRusage.swap; if (jp->maxRusage.stime > jp->runRusage.stime) jp->runRusage.stime = jp->maxRusage.stime; if (jp->maxRusage.utime > jp->runRusage.utime) jp->runRusage.utime = jp->maxRusage.utime; } statusReq.runRusage.mem = jp->runRusage.mem; statusReq.runRusage.swap = jp->runRusage.swap; statusReq.runRusage.utime = jp->runRusage.utime; statusReq.runRusage.stime = jp->runRusage.stime; statusReq.runRusage.npids = jp->runRusage.npids; statusReq.runRusage.pidInfo = jp->runRusage.pidInfo; statusReq.runRusage.npgids = jp->runRusage.npgids; statusReq.runRusage.pgid = jp->runRusage.pgid; statusReq.actStatus = jp->actStatus; statusReq.sigValue = jp->jobSpecs.actValue; statusReq.seq = seq; seq++; if (seq >= MAX_SEQ_NUM) seq = 1; len = 1024 + ALIGNWORD_ (sizeof (struct statusReq)); len += ALIGNWORD_ (strlen (statusReq.execHome)) + 4 + ALIGNWORD_ (strlen (statusReq.execCwd)) + 4 + ALIGNWORD_ (strlen (statusReq.execUsername)) + 4; for (i = 0; i < statusReq.runRusage.npids; i++) len += ALIGNWORD_ (sizeof (struct pidInfo)) + 4; for (i = 0; i < statusReq.runRusage.npgids; i++) len += ALIGNWORD_ (sizeof (int)) + 4; if (logclass & (LC_TRACE | LC_COMM)) ls_syslog (LOG_DEBUG, "%s: The length of the job message is: <%d>", fname, len); if ((request_buf = malloc (len)) == NULL) { ls_syslog (LOG_ERR, I18N_FUNC_FAIL_M, fname, "malloc"); return (-1); } xdrmem_create (&xdrs, request_buf, len, XDR_ENCODE); initLSFHeader_ (&hdr); hdr.opCode = reqType; if (!xdr_encodeMsg (&xdrs, (char *) &statusReq, &hdr, xdr_statusReq, 0, auth)) { ls_syslog (LOG_ERR, I18N_JOB_FAIL_S_M, fname, lsb_jobid2str (jp->jobSpecs.jobId), "xdr_statusReq"); lsb_merr2 (I18N_FUNC_FAIL, fname, "xdr_statusReq"); xdr_destroy (&xdrs); FREEUP (request_buf); relife (); } flags = CALL_SERVER_NO_HANDSHAKE; if (statusChan >= 0) flags |= CALL_SERVER_USE_SOCKET; if (reqType == BATCH_RUSAGE_JOB) flags |= CALL_SERVER_NO_WAIT_REPLY; if (logclass & LC_COMM) ls_syslog (LOG_DEBUG1, "%s: before call_server statusChan=%d flags=%d", fname, statusChan, flags); cc = call_server (masterHost, mbd_port, request_buf, XDR_GETPOS (&xdrs), &reply_buf, &hdr, connTimeout, readTimeout, &statusChan, NULL, NULL, flags); if (cc < 0) { statusChan = -1; if (!equalHost_ (masterHost, lastHost)) { if (errno != EINTR) ls_syslog (LOG_DEBUG, "%s: Failed to reach mbatchd on host <%s> for job <%s>: %s", fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId), lsb_sysmsg ()); strcpy (lastHost, masterHost); } xdr_destroy (&xdrs); FREEUP (request_buf); failcnt++; return (-1); } else if (cc == 0) { } failcnt = 0; lastHost[0] = '\0'; xdr_destroy (&xdrs); FREEUP (request_buf); if (cc) free (reply_buf); if (flags & CALL_SERVER_NO_WAIT_REPLY) { struct timeval timeval; timeval.tv_sec = 0; timeval.tv_usec = 0; if (rd_select_ (chanSock_ (statusChan), &timeval) == 0) { jp->needReportRU = FALSE; jp->lastStatusMbdTime = now; return 0; } CLOSECD (statusChan); if (logclass & LC_COMM) ls_syslog (LOG_DEBUG1, "%s: Job <%s> rd_select() failed, assume connection broken", fname, lsb_jobid2str (jp->jobSpecs.jobId)); return (-1); } reply = hdr.opCode; switch (reply) { case LSBE_NO_ERROR: case LSBE_LOCK_JOB: jp->needReportRU = FALSE; jp->lastStatusMbdTime = now; if (reply == LSBE_LOCK_JOB) { if (IS_SUSP (jp->jobSpecs.jStatus)) jp->jobSpecs.reasons |= SUSP_MBD_LOCK; else ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5204, "%s: Job <%s> is in status <%x> and mbatchd wants to lock it, ignored."), /* catgets 5204 */ fname, lsb_jobid2str (jp->jobSpecs.jobId), jp->jobSpecs.jStatus); } return (0); case LSBE_NO_JOB: if (!IS_POST_FINISH (jp->jobSpecs.jStatus)) { ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5205, "%s: Job <%s> is forgotten by mbatchd on host <%s>, ignored."), fname, lsb_jobid2str (jp->jobSpecs.jobId), masterHost); /* catgets 5205 */ } jp->notReported = -INFINIT_INT; return (0); case LSBE_STOP_JOB: if (jobsig (jp, SIGSTOP, TRUE) < 0) SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_EXIT); else { SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_USUSP); jp->jobSpecs.reasons |= SUSP_USER_STOP; } return (-1); case LSBE_SBATCHD: ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5206, "%s: mbatchd on host <%s> doesn't think I'm configured as a batch server when I report the status for job <%s>"), /* catgets 5206 */ fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId)); return (-1); default: ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5207, "%s: Illegal reply code <%d> from mbatchd on host <%s> for job <%s>"), /* catgets 5207 */ fname, reply, masterHost, lsb_jobid2str (jp->jobSpecs.jobId)); return (-1); } }
void die(int sig) { static char fname[] = "die"; char myhost[MAXHOSTNAMELEN]; if (debug > 1) fprintf(stderr, "%s: signal %d\n", fname, sig); if (masterme) { releaseElogLock(); } if (gethostname(myhost, MAXHOSTNAMELEN) <0) { ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL_M, fname, "gethostname", myhost); strcpy(myhost, "localhost"); } if (sig > 0 && sig < 100) { ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8216, "Daemon on host <%s> received signal <%d>; exiting"), /* catgets 8216 */ myhost, sig); } else { switch (sig) { case MASTER_RESIGN: ls_syslog(LOG_INFO, (_i18n_msg_get(ls_catd , NL_SETN, 8272, "Master daemon on host <%s> resigned; exiting")), myhost); /* catgets 8272 */ break; case MASTER_RECONFIG: ls_syslog(LOG_INFO, (_i18n_msg_get(ls_catd , NL_SETN, 8273, "Master daemon on host <%s> exiting for reconfiguration")), myhost); /* catgets 8273 */ break; case SLAVE_MEM: ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8217, "Slave daemon on host <%s> failed in memory allocation; fatal error - exiting"), myhost); /* catgets 8217 */ lsb_merr1(_i18n_msg_get(ls_catd , NL_SETN, 8217, "Slave daemon on host <%s> failed in memory allocation; fatal error - exiting"), myhost); break; case MASTER_MEM: ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8218, "Master daemon on host <%s> failed in memory allocation; fatal error - exiting"), myhost); /* catgets 8218 */ break; case SLAVE_FATAL: ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8219, "Slave daemon on host <%s> dying; fatal error - see above messages for reason"), myhost); /* catgets 8219 */ break; case MASTER_FATAL: ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8220, "Master daemon on host <%s> dying; fatal error - see above messages for reason"), myhost); /* catgets 8220 */ break; case MASTER_CONF: ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8221, "Master daemon on host <%s> died of bad configuration file"), myhost); /* catgets 8221 */ break; case SLAVE_RESTART: ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8222, "Slave daemon on host <%s> restarting"), myhost); /* catgets 8222 */ break; case SLAVE_SHUTDOWN: ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8223, "Slave daemon on host <%s> shutdown"), myhost); /* catgets 8223 */ break; default: ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8224, "Daemon on host <%s> exiting; cause code <%d> unknown"), myhost, sig); /* catgets 8224 */ break; } } shutdown(chanSock_(batchSock), 2); exit(sig); }
int ls_initrex(int num, int options) { struct servent *sv; if (geteuid() == 0) rootuid_ = TRUE; if (initenv_(NULL, NULL)<0) { if (rootuid_ && !(options & KEEPUID)) lsfSetUid(getuid()); return(-1); } inithostsock_(); lsQueueInit_(&requestQ, lsReqCmp_, NULL); if (requestQ == NULL) { lserrno = LSE_MALLOC; return(-1); } res_addr_.sin_family = AF_INET; if (genParams_[LSF_RES_PORT].paramValue) { if ((res_addr_.sin_port = atoi(genParams_[LSF_RES_PORT].paramValue)) != 0) res_addr_.sin_port = htons(res_addr_.sin_port); else goto res_init_fail; } else if (genParams_[LSF_RES_DEBUG].paramValue) { res_addr_.sin_port = htons(RES_PORT); } else { # if defined(_COMPANY_X_) if ((res_addr_.sin_port = get_port_number(RES_SERVICE,(char *)NULL)) == -1) { # else if ((sv = getservbyname("res", "tcp")) != NULL) res_addr_.sin_port = sv->s_port; else { # endif res_init_fail: lserrno = LSE_RES_NREG; if (rootuid_ && !(options & KEEPUID)) lsfSetUid(getuid()); return (-1); } } initconntbl_(); FD_ZERO(&connection_ok_); if ((rootuid_) && (genParams_[LSF_AUTH].paramValue == NULL)) { int i; i = opensocks_(num); if (!(options & KEEPUID)) lsfSetUid(getuid()); return (i); } else { return (num); } } int opensocks_(int num) { static char fname[] = "opensocks_"; int s; int nextdescr; int i; totsockets_ = (num <= 0 || num > MAXCONNECT) ? LSF_DEFAULT_SOCKS : num; if (logclass & LC_COMM) ls_syslog(LOG_DEBUG,"%s: try to allocate num <%d> of socks",fname,num); nextdescr = FIRST_RES_SOCK; for (i = 0; i < totsockets_; i++) { if ((s = CreateSock_(SOCK_STREAM)) < 0) { if (logclass & LC_COMM) ls_syslog(LOG_DEBUG, "%s: CreateSock_ failed, iter:<%d> %s", fname,i,strerror(errno)); totsockets_ = i; if (i > 0) { break; } else { return(-1); } } if (s != nextdescr) { if (dup2(s,nextdescr) < 0) { if (logclass & LC_COMM) ls_syslog(LOG_DEBUG, "%s: dup2() failed, old:<%d>, new<%d>, iter:<%d> %s", fname,s,nextdescr,i,strerror(errno)); close(s); lserrno = LSE_SOCK_SYS; totsockets_ = i; if (i > 0) break; else return (-1); } #if defined(FD_CLOEXEC) fcntl(nextdescr, F_SETFD, (fcntl(nextdescr, F_GETFD) | FD_CLOEXEC)) ; #else #if defined(FIOCLEX) (void)ioctl(nextdescr, FIOCLEX, (char *)NULL); #endif #endif close(s); } nextdescr++; } currentsocket_ = FIRST_RES_SOCK; if (logclass & LC_COMM) ls_syslog(LOG_DEBUG,"%s: returning num=<%d>",fname,totsockets_); return (totsockets_); } /* ls_fdbusy() */ int ls_fdbusy(int fd) { sTab hashSearchPtr; hEnt *hEntPtr; if (fd == chanSock_(limchans_[PRIMARY]) || fd == chanSock_(limchans_[MASTER]) || fd == chanSock_(limchans_[UNBOUND])) return TRUE; if (fd == cli_nios_fd[0]) return TRUE; hEntPtr = h_firstEnt_(&conn_table, &hashSearchPtr); while (hEntPtr) { int *pfd; pfd = hEntPtr->hData; if (fd == pfd[0] || fd == pfd[1]) return (TRUE); hEntPtr = h_nextEnt_(&hashSearchPtr); } if (rootuid_ && fd >= currentsocket_ && fd < FIRST_RES_SOCK + totsockets_) return TRUE; return FALSE; }