int sendCmdBill_ (int s, resCmd cmd, struct resCmdBill *cmdmsg, int *retsock, struct timeval *timeout) { char *buf; int i,bufsize,cc; bufsize = 1024; bufsize = bufsize + ALIGNWORD_(strlen(cmdmsg->cwd)) + sizeof(int); for (i=0; cmdmsg->argv[i]; i++) bufsize = bufsize + ALIGNWORD_(strlen(cmdmsg->argv[i])) + sizeof(int); if ((buf=malloc(bufsize)) == NULL) { lserrno = LSE_MALLOC; return -1; } umask(cmdmsg->filemask = (int)umask(0)); cmdmsg->priority = 0; if (getLimits(cmdmsg->lsfLimits) < 0) { lserrno = LSE_LIMIT_SYS; free(buf); return -1; } cc=callRes_(s, cmd, (char *) cmdmsg, buf, bufsize, xdr_resCmdBill, retsock, timeout, NULL); free(buf); return cc; }
int status_job (mbdReqType reqType, struct jobCard *jp, int newStatus, sbdReplyType err) { static char fname[] = "status_job()"; static int seq = 1; static char lastHost[MAXHOSTNAMELEN]; int reply; char *request_buf; char *reply_buf = NULL; XDR xdrs; struct LSFHeader hdr; int cc; struct statusReq statusReq; int flags; int i; int len; struct lsfAuth *auth = NULL; if ((logclass & LC_TRACE) && (logclass & LC_SIGNAL)) ls_syslog (LOG_DEBUG, "%s: Entering ... regType %d jobId %s", fname, reqType, lsb_jobid2str (jp->jobSpecs.jobId)); if (newStatus == JOB_STAT_EXIT) { jp->userJobSucc = FALSE; } if (MASK_STATUS (newStatus) == JOB_STAT_DONE) { jp->userJobSucc = TRUE; } if (IS_POST_FINISH (newStatus)) { if (jp->userJobSucc != TRUE) { return 0; } } if (masterHost == NULL) return -1; if (jp->notReported < 0) { jp->notReported = -INFINIT_INT; return (0); } statusReq.jobId = jp->jobSpecs.jobId; statusReq.actPid = jp->jobSpecs.actPid; statusReq.jobPid = jp->jobSpecs.jobPid; statusReq.jobPGid = jp->jobSpecs.jobPGid; statusReq.newStatus = newStatus; statusReq.reason = jp->jobSpecs.reasons; statusReq.subreasons = jp->jobSpecs.subreasons; statusReq.sbdReply = err; statusReq.lsfRusage = jp->lsfRusage; statusReq.execUid = jp->jobSpecs.execUid; statusReq.numExecHosts = 0; statusReq.execHosts = NULL; statusReq.exitStatus = jp->w_status; statusReq.execCwd = jp->jobSpecs.execCwd; statusReq.execHome = jp->jobSpecs.execHome; statusReq.execUsername = jp->execUsername; statusReq.queuePostCmd = ""; statusReq.queuePreCmd = ""; statusReq.msgId = jp->delieveredMsgId; if (IS_FINISH (newStatus)) { if (jp->maxRusage.mem > jp->runRusage.mem) jp->runRusage.mem = jp->maxRusage.mem; if (jp->maxRusage.swap > jp->runRusage.swap) jp->runRusage.swap = jp->maxRusage.swap; if (jp->maxRusage.stime > jp->runRusage.stime) jp->runRusage.stime = jp->maxRusage.stime; if (jp->maxRusage.utime > jp->runRusage.utime) jp->runRusage.utime = jp->maxRusage.utime; } statusReq.runRusage.mem = jp->runRusage.mem; statusReq.runRusage.swap = jp->runRusage.swap; statusReq.runRusage.utime = jp->runRusage.utime; statusReq.runRusage.stime = jp->runRusage.stime; statusReq.runRusage.npids = jp->runRusage.npids; statusReq.runRusage.pidInfo = jp->runRusage.pidInfo; statusReq.runRusage.npgids = jp->runRusage.npgids; statusReq.runRusage.pgid = jp->runRusage.pgid; statusReq.actStatus = jp->actStatus; statusReq.sigValue = jp->jobSpecs.actValue; statusReq.seq = seq; seq++; if (seq >= MAX_SEQ_NUM) seq = 1; len = 1024 + ALIGNWORD_ (sizeof (struct statusReq)); len += ALIGNWORD_ (strlen (statusReq.execHome)) + 4 + ALIGNWORD_ (strlen (statusReq.execCwd)) + 4 + ALIGNWORD_ (strlen (statusReq.execUsername)) + 4; for (i = 0; i < statusReq.runRusage.npids; i++) len += ALIGNWORD_ (sizeof (struct pidInfo)) + 4; for (i = 0; i < statusReq.runRusage.npgids; i++) len += ALIGNWORD_ (sizeof (int)) + 4; if (logclass & (LC_TRACE | LC_COMM)) ls_syslog (LOG_DEBUG, "%s: The length of the job message is: <%d>", fname, len); if ((request_buf = malloc (len)) == NULL) { ls_syslog (LOG_ERR, I18N_FUNC_FAIL_M, fname, "malloc"); return (-1); } xdrmem_create (&xdrs, request_buf, len, XDR_ENCODE); initLSFHeader_ (&hdr); hdr.opCode = reqType; if (!xdr_encodeMsg (&xdrs, (char *) &statusReq, &hdr, xdr_statusReq, 0, auth)) { ls_syslog (LOG_ERR, I18N_JOB_FAIL_S_M, fname, lsb_jobid2str (jp->jobSpecs.jobId), "xdr_statusReq"); lsb_merr2 (I18N_FUNC_FAIL, fname, "xdr_statusReq"); xdr_destroy (&xdrs); FREEUP (request_buf); relife (); } flags = CALL_SERVER_NO_HANDSHAKE; if (statusChan >= 0) flags |= CALL_SERVER_USE_SOCKET; if (reqType == BATCH_RUSAGE_JOB) flags |= CALL_SERVER_NO_WAIT_REPLY; if (logclass & LC_COMM) ls_syslog (LOG_DEBUG1, "%s: before call_server statusChan=%d flags=%d", fname, statusChan, flags); cc = call_server (masterHost, mbd_port, request_buf, XDR_GETPOS (&xdrs), &reply_buf, &hdr, connTimeout, readTimeout, &statusChan, NULL, NULL, flags); if (cc < 0) { statusChan = -1; if (!equalHost_ (masterHost, lastHost)) { if (errno != EINTR) ls_syslog (LOG_DEBUG, "%s: Failed to reach mbatchd on host <%s> for job <%s>: %s", fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId), lsb_sysmsg ()); strcpy (lastHost, masterHost); } xdr_destroy (&xdrs); FREEUP (request_buf); failcnt++; return (-1); } else if (cc == 0) { } failcnt = 0; lastHost[0] = '\0'; xdr_destroy (&xdrs); FREEUP (request_buf); if (cc) free (reply_buf); if (flags & CALL_SERVER_NO_WAIT_REPLY) { struct timeval timeval; timeval.tv_sec = 0; timeval.tv_usec = 0; if (rd_select_ (chanSock_ (statusChan), &timeval) == 0) { jp->needReportRU = FALSE; jp->lastStatusMbdTime = now; return 0; } CLOSECD (statusChan); if (logclass & LC_COMM) ls_syslog (LOG_DEBUG1, "%s: Job <%s> rd_select() failed, assume connection broken", fname, lsb_jobid2str (jp->jobSpecs.jobId)); return (-1); } reply = hdr.opCode; switch (reply) { case LSBE_NO_ERROR: case LSBE_LOCK_JOB: jp->needReportRU = FALSE; jp->lastStatusMbdTime = now; if (reply == LSBE_LOCK_JOB) { if (IS_SUSP (jp->jobSpecs.jStatus)) jp->jobSpecs.reasons |= SUSP_MBD_LOCK; else ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5204, "%s: Job <%s> is in status <%x> and mbatchd wants to lock it, ignored."), /* catgets 5204 */ fname, lsb_jobid2str (jp->jobSpecs.jobId), jp->jobSpecs.jStatus); } return (0); case LSBE_NO_JOB: if (!IS_POST_FINISH (jp->jobSpecs.jStatus)) { ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5205, "%s: Job <%s> is forgotten by mbatchd on host <%s>, ignored."), fname, lsb_jobid2str (jp->jobSpecs.jobId), masterHost); /* catgets 5205 */ } jp->notReported = -INFINIT_INT; return (0); case LSBE_STOP_JOB: if (jobsig (jp, SIGSTOP, TRUE) < 0) SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_EXIT); else { SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_USUSP); jp->jobSpecs.reasons |= SUSP_USER_STOP; } return (-1); case LSBE_SBATCHD: ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5206, "%s: mbatchd on host <%s> doesn't think I'm configured as a batch server when I report the status for job <%s>"), /* catgets 5206 */ fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId)); return (-1); default: ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5207, "%s: Illegal reply code <%d> from mbatchd on host <%s> for job <%s>"), /* catgets 5207 */ fname, reply, masterHost, lsb_jobid2str (jp->jobSpecs.jobId)); return (-1); } }
int ls_connect(char *host) { struct hostent *hp; int s, descriptor[2], size; char official[MAXHOSTNAMELEN]; struct resConnect connReq; char *reqBuf; struct lsfAuth auth; int resTimeout; if (genParams_[LSF_RES_TIMEOUT].paramValue) resTimeout = atoi(genParams_[LSF_RES_TIMEOUT].paramValue); else resTimeout = RES_TIMEOUT; if (_isconnected_(host, descriptor)) return(descriptor[0]); if ((hp = Gethostbyname_(host)) == NULL) { lserrno = LSE_BAD_HOST; return -1; } strcpy(official, hp->h_name); memcpy((char *)&res_addr_.sin_addr,(char *)hp->h_addr,(int)hp->h_length); if ((rootuid_) && (genParams_[LSF_AUTH].paramValue == NULL)) { if (currentsocket_ > (FIRST_RES_SOCK + totsockets_ - 1)) { lserrno = LSE_NOMORE_SOCK; return -1; } s = currentsocket_; currentsocket_++; } else { if ((s = CreateSockEauth_(SOCK_STREAM)) < 0) return -1; } putEauthClientEnvVar("user"); putEauthServerEnvVar("res"); #ifdef INTER_DAEMON_AUTH putEnv("LSF_EAUTH_AUX_PASS", "yes"); #endif if (getAuth_(&auth, official) == -1) { closesocket(s); return -1; } runEsub_(&connReq.eexec, NULL); size = sizeof(struct LSFHeader) + sizeof(connReq) + sizeof(struct lsfAuth) + ALIGNWORD_(connReq.eexec.len) + sizeof(int) * 5 ; if ((reqBuf = malloc(size)) == NULL) { lserrno = LSE_MALLOC; goto Fail; } if (b_connect_(s, (struct sockaddr *)&res_addr_, sizeof(res_addr_), resTimeout) < 0) { lserrno = LSE_CONN_SYS; goto Fail; } if (callRes_(s, RES_CONNECT, (char *) &connReq, reqBuf, size, xdr_resConnect, 0, 0, &auth) == -1) { goto Fail; } if (connReq.eexec.len > 0) free(connReq.eexec.data); free(reqBuf); (void)connected_(official, s, -1, currentSN); return(s); Fail: CLOSESOCKET(s); if (connReq.eexec.len > 0) free(connReq.eexec.data); free(reqBuf); return -1; }
int rsetenv_ (char *host, char **envp, int option) { int descriptor[2]; struct resSetenv envMsg; char *sendBuf; int bufferSize; int i, s; resCmd resCmdOption; bufferSize = 512; if (logclass & (LC_TRACE)) ls_syslog(LOG_DEBUG, "rsetenv_: Entering this routine..."); if (!envp) return 0; for(i=0; envp[i] != NULL; i++) bufferSize = bufferSize + ALIGNWORD_(strlen(envp[i])) + sizeof(int); sendBuf = (char *)malloc(bufferSize); if (sendBuf == NULL) { lserrno = LSE_MALLOC; goto err; } if (_isconnected_(host, descriptor)) s = descriptor[0]; else if ((s = ls_connect(host)) < 0) { free(sendBuf); goto err; } if (!FD_ISSET(s,&connection_ok_)){ FD_SET(s,&connection_ok_); if (ackReturnCode_(s) < 0) { closesocket(s); _lostconnection_(host); free(sendBuf); goto err; } } envMsg.env = envp; if (option == RSETENV_SYNCH) resCmdOption = RES_SETENV; else if (option == RSETENV_ASYNC) resCmdOption = RES_SETENV_ASYNC; if (callRes_(s, resCmdOption, (char *) &envMsg, sendBuf, bufferSize, xdr_resSetenv, 0, 0, NULL) == -1) { closesocket(s); _lostconnection_(host); free(sendBuf); goto err; } free(sendBuf); if (option == RSETENV_SYNCH){ if (ackReturnCode_(s) < 0) { closesocket(s); _lostconnection_(host); goto err; } } return 0; err: return -1; }