SEXP lsf_job_submit(SEXP sexp_debug, SEXP sexp_command, SEXP sexp_ncpus) { int jobId, debug, i; struct submit submitRequest; struct submitReply submitReply; debug = INTEGER(sexp_debug)[0]; memset(&submitRequest, 0, sizeof(submitRequest)); for (i = 0; i < LSF_RLIM_NLIMITS; i++) submitRequest.rLimits[i] = DEFAULT_RLIMIT; submitRequest.command = CHAR(STRING_ELT(sexp_command, 0)); submitRequest.options |= SUB_OUT_FILE; if (debug) { submitRequest.outFile = "Rlsf_job_output.%J"; } else { submitRequest.outFile = "/dev/null"; } submitRequest.numProcessors = INTEGER(sexp_ncpus)[0]; submitRequest.maxNumProcessors = INTEGER(sexp_ncpus)[0]; jobId = lsb_submit(&submitRequest, &submitReply); if (jobId == -1) { Rprintf("lsf_job_submit: lsb_submit: %s\n", lsb_sysmsg()); return AsInt(0); } return AsInt(jobId); }
void sub_perror (char *usrMsg) { if (usrMsg) { fputs(usrMsg, stderr); fputs(": ", stderr); } fputs(lsb_sysmsg(), stderr); }
SEXP lsf_initialize(void) { if (lsb_init("R")) { Rprintf("lsf_initialize: lsb_init: %s\n", lsb_sysmsg()); return AsInt(-1); } else { return AsInt(0); } if (putenv("BSUB_QUIET=1")) { return AsInt(0); } }
SEXP lsf_resume_job(SEXP sexp_jobid) { int jobid, rc; jobid = INTEGER(sexp_jobid)[0]; rc = lsb_signaljob(jobid, SIGCONT); if (rc < 0) { Rprintf("lsf_resume_job: lsb_signaljob: %s\n", lsb_sysmsg()); return AsInt(-1); } return AsInt(0); }
char * lsb_sperror(char *usrMsg) { char errmsg[256]; char *rtstr; errmsg[0] = '\0'; if (usrMsg) sprintf(errmsg, "%s: ", usrMsg); strcat(errmsg, lsb_sysmsg()); if ((rtstr=(char *)malloc(sizeof(char)*(strlen(errmsg)+1))) == NULL){ lserrno = LSE_MALLOC; return NULL; } strcpy(rtstr, errmsg); return rtstr; }
int main(int argc, char **argv) { int cc; struct job_group jg; if (lsb_init(argv[0]) < 0) { lsb_perror("lsb_init"); return -1; } while ((cc = getopt(argc, argv, "hV")) != EOF) { switch (cc) { case 'V': fputs(_LS_VERSION_, stderr); return 0; case 'h': usage(); exit(-1); } } if (argc <= optind) { usage(); return -1; } jg.group_name = argv[argc - 1]; cc = lsb_deljgrp(&jg); if (cc != LSBE_NO_ERROR) { fprintf(stderr, "bgdel: %s.\n", lsb_sysmsg()); return -1; } printf("Group %s removed successfully.\n", jg.group_name); return 0; }
int status_job (mbdReqType reqType, struct jobCard *jp, int newStatus, sbdReplyType err) { static char fname[] = "status_job()"; static int seq = 1; static char lastHost[MAXHOSTNAMELEN]; int reply; char *request_buf; char *reply_buf = NULL; XDR xdrs; struct LSFHeader hdr; int cc; struct statusReq statusReq; int flags; int i; int len; struct lsfAuth *auth = NULL; if ((logclass & LC_TRACE) && (logclass & LC_SIGNAL)) ls_syslog (LOG_DEBUG, "%s: Entering ... regType %d jobId %s", fname, reqType, lsb_jobid2str (jp->jobSpecs.jobId)); if (newStatus == JOB_STAT_EXIT) { jp->userJobSucc = FALSE; } if (MASK_STATUS (newStatus) == JOB_STAT_DONE) { jp->userJobSucc = TRUE; } if (IS_POST_FINISH (newStatus)) { if (jp->userJobSucc != TRUE) { return 0; } } if (masterHost == NULL) return -1; if (jp->notReported < 0) { jp->notReported = -INFINIT_INT; return (0); } statusReq.jobId = jp->jobSpecs.jobId; statusReq.actPid = jp->jobSpecs.actPid; statusReq.jobPid = jp->jobSpecs.jobPid; statusReq.jobPGid = jp->jobSpecs.jobPGid; statusReq.newStatus = newStatus; statusReq.reason = jp->jobSpecs.reasons; statusReq.subreasons = jp->jobSpecs.subreasons; statusReq.sbdReply = err; statusReq.lsfRusage = jp->lsfRusage; statusReq.execUid = jp->jobSpecs.execUid; statusReq.numExecHosts = 0; statusReq.execHosts = NULL; statusReq.exitStatus = jp->w_status; statusReq.execCwd = jp->jobSpecs.execCwd; statusReq.execHome = jp->jobSpecs.execHome; statusReq.execUsername = jp->execUsername; statusReq.queuePostCmd = ""; statusReq.queuePreCmd = ""; statusReq.msgId = jp->delieveredMsgId; if (IS_FINISH (newStatus)) { if (jp->maxRusage.mem > jp->runRusage.mem) jp->runRusage.mem = jp->maxRusage.mem; if (jp->maxRusage.swap > jp->runRusage.swap) jp->runRusage.swap = jp->maxRusage.swap; if (jp->maxRusage.stime > jp->runRusage.stime) jp->runRusage.stime = jp->maxRusage.stime; if (jp->maxRusage.utime > jp->runRusage.utime) jp->runRusage.utime = jp->maxRusage.utime; } statusReq.runRusage.mem = jp->runRusage.mem; statusReq.runRusage.swap = jp->runRusage.swap; statusReq.runRusage.utime = jp->runRusage.utime; statusReq.runRusage.stime = jp->runRusage.stime; statusReq.runRusage.npids = jp->runRusage.npids; statusReq.runRusage.pidInfo = jp->runRusage.pidInfo; statusReq.runRusage.npgids = jp->runRusage.npgids; statusReq.runRusage.pgid = jp->runRusage.pgid; statusReq.actStatus = jp->actStatus; statusReq.sigValue = jp->jobSpecs.actValue; statusReq.seq = seq; seq++; if (seq >= MAX_SEQ_NUM) seq = 1; len = 1024 + ALIGNWORD_ (sizeof (struct statusReq)); len += ALIGNWORD_ (strlen (statusReq.execHome)) + 4 + ALIGNWORD_ (strlen (statusReq.execCwd)) + 4 + ALIGNWORD_ (strlen (statusReq.execUsername)) + 4; for (i = 0; i < statusReq.runRusage.npids; i++) len += ALIGNWORD_ (sizeof (struct pidInfo)) + 4; for (i = 0; i < statusReq.runRusage.npgids; i++) len += ALIGNWORD_ (sizeof (int)) + 4; if (logclass & (LC_TRACE | LC_COMM)) ls_syslog (LOG_DEBUG, "%s: The length of the job message is: <%d>", fname, len); if ((request_buf = malloc (len)) == NULL) { ls_syslog (LOG_ERR, I18N_FUNC_FAIL_M, fname, "malloc"); return (-1); } xdrmem_create (&xdrs, request_buf, len, XDR_ENCODE); initLSFHeader_ (&hdr); hdr.opCode = reqType; if (!xdr_encodeMsg (&xdrs, (char *) &statusReq, &hdr, xdr_statusReq, 0, auth)) { ls_syslog (LOG_ERR, I18N_JOB_FAIL_S_M, fname, lsb_jobid2str (jp->jobSpecs.jobId), "xdr_statusReq"); lsb_merr2 (I18N_FUNC_FAIL, fname, "xdr_statusReq"); xdr_destroy (&xdrs); FREEUP (request_buf); relife (); } flags = CALL_SERVER_NO_HANDSHAKE; if (statusChan >= 0) flags |= CALL_SERVER_USE_SOCKET; if (reqType == BATCH_RUSAGE_JOB) flags |= CALL_SERVER_NO_WAIT_REPLY; if (logclass & LC_COMM) ls_syslog (LOG_DEBUG1, "%s: before call_server statusChan=%d flags=%d", fname, statusChan, flags); cc = call_server (masterHost, mbd_port, request_buf, XDR_GETPOS (&xdrs), &reply_buf, &hdr, connTimeout, readTimeout, &statusChan, NULL, NULL, flags); if (cc < 0) { statusChan = -1; if (!equalHost_ (masterHost, lastHost)) { if (errno != EINTR) ls_syslog (LOG_DEBUG, "%s: Failed to reach mbatchd on host <%s> for job <%s>: %s", fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId), lsb_sysmsg ()); strcpy (lastHost, masterHost); } xdr_destroy (&xdrs); FREEUP (request_buf); failcnt++; return (-1); } else if (cc == 0) { } failcnt = 0; lastHost[0] = '\0'; xdr_destroy (&xdrs); FREEUP (request_buf); if (cc) free (reply_buf); if (flags & CALL_SERVER_NO_WAIT_REPLY) { struct timeval timeval; timeval.tv_sec = 0; timeval.tv_usec = 0; if (rd_select_ (chanSock_ (statusChan), &timeval) == 0) { jp->needReportRU = FALSE; jp->lastStatusMbdTime = now; return 0; } CLOSECD (statusChan); if (logclass & LC_COMM) ls_syslog (LOG_DEBUG1, "%s: Job <%s> rd_select() failed, assume connection broken", fname, lsb_jobid2str (jp->jobSpecs.jobId)); return (-1); } reply = hdr.opCode; switch (reply) { case LSBE_NO_ERROR: case LSBE_LOCK_JOB: jp->needReportRU = FALSE; jp->lastStatusMbdTime = now; if (reply == LSBE_LOCK_JOB) { if (IS_SUSP (jp->jobSpecs.jStatus)) jp->jobSpecs.reasons |= SUSP_MBD_LOCK; else ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5204, "%s: Job <%s> is in status <%x> and mbatchd wants to lock it, ignored."), /* catgets 5204 */ fname, lsb_jobid2str (jp->jobSpecs.jobId), jp->jobSpecs.jStatus); } return (0); case LSBE_NO_JOB: if (!IS_POST_FINISH (jp->jobSpecs.jStatus)) { ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5205, "%s: Job <%s> is forgotten by mbatchd on host <%s>, ignored."), fname, lsb_jobid2str (jp->jobSpecs.jobId), masterHost); /* catgets 5205 */ } jp->notReported = -INFINIT_INT; return (0); case LSBE_STOP_JOB: if (jobsig (jp, SIGSTOP, TRUE) < 0) SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_EXIT); else { SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_USUSP); jp->jobSpecs.reasons |= SUSP_USER_STOP; } return (-1); case LSBE_SBATCHD: ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5206, "%s: mbatchd on host <%s> doesn't think I'm configured as a batch server when I report the status for job <%s>"), /* catgets 5206 */ fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId)); return (-1); default: ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5207, "%s: Illegal reply code <%d> from mbatchd on host <%s> for job <%s>"), /* catgets 5207 */ fname, reply, masterHost, lsb_jobid2str (jp->jobSpecs.jobId)); return (-1); } }
SEXP lsf_job_submit2(SEXP ctrl) { char *name; char *val; char *vals[100]; int jobId, debug, i, j; SEXP elmt = R_NilValue; SEXP names = getAttrib(ctrl, R_NamesSymbol); struct submit submitRequest; struct submitReply submitReply; memset(&submitRequest, 0, sizeof(submitRequest)); for (i = 0; i < LSF_RLIM_NLIMITS; i++) submitRequest.rLimits[i] = DEFAULT_RLIMIT; for (i = 0; i < length(names); i++) { name = CHAR(STRING_ELT(names, i)); elmt = VECTOR_ELT(ctrl, i); val = CHAR(STRING_ELT(elmt, 0)); if (0) { } else if (!strcmp(name, "jobName")) { submitRequest.options |= SUB_JOB_NAME; submitRequest.jobName = val; } else if (!strcmp(name, "queue")) { submitRequest.options |= SUB_QUEUE; submitRequest.queue = val; } else if (!strcmp(name, "askedHosts")){ submitRequest.options |= SUB_HOST; char **chars; chars = (char **)calloc(length(elmt), sizeof(char *)); for (j = 0; j < length(elmt); j++){ val=CHAR(STRING_ELT(elmt, j)); chars[j] = val; } submitRequest.numAskedHosts = length(elmt); submitRequest.askedHosts = chars; } else if (!strcmp(name, "resReq")){ submitRequest.options |= SUB_RES_REQ; submitRequest.resReq = val; } else if (!strcmp(name, "rlimit_cpu")) { submitRequest.rLimits[LSF_RLIMIT_CPU] = atoi(val); } else if (!strcmp(name, "rlimit_fsize")) { submitRequest.rLimits[LSF_RLIMIT_FSIZE] = atoi(val); } else if (!strcmp(name, "rlimit_data")) { submitRequest.rLimits[LSF_RLIMIT_DATA] = atoi(val); } else if (!strcmp(name, "rlimit_stack")) { submitRequest.rLimits[LSF_RLIMIT_STACK] = atoi(val); } else if (!strcmp(name, "rlimit_core")) { submitRequest.rLimits[LSF_RLIMIT_CORE] = atoi(val); } else if (!strcmp(name, "rlimit_rss")) { submitRequest.rLimits[LSF_RLIMIT_RSS] = atoi(val); } else if (!strcmp(name, "rlimit_nofile")) { submitRequest.rLimits[LSF_RLIMIT_NOFILE] = atoi(val); } else if (!strcmp(name, "rlimit_open_max")) { submitRequest.rLimits[LSF_RLIMIT_OPEN_MAX] = atoi(val); } else if (!strcmp(name, "rlimit_swap")) { submitRequest.rLimits[LSF_RLIMIT_SWAP] = atoi(val); } else if (!strcmp(name, "rlimit_run")) { submitRequest.rLimits[LSF_RLIMIT_RUN] = atoi(val); } else if (!strcmp(name, "rlimit_process")) { submitRequest.rLimits[LSF_RLIMIT_PROCESS] = atoi(val); } else if (!strcmp(name, "hostSpec")) { submitRequest.options |= SUB_HOST_SPEC; submitRequest.hostSpec = val; } else if (!strcmp(name, "numProcessors")){ submitRequest.numProcessors = atoi(val); } else if (!strcmp(name, "dependCond")){ submitRequest.options |= SUB_DEPEND_COND; submitRequest.dependCond = val; } else if (!strcmp(name, "beginTime")) { submitRequest.beginTime = atoi(val); } else if (!strcmp(name, "termTime")) { submitRequest.termTime = atoi(val); } else if (!strcmp(name, "sigValue")) { submitRequest.options |= SUB_WINDOW_SIG; submitRequest.sigValue= atoi(val); } else if (!strcmp(name, "command")){ submitRequest.command = val; } else if (!strcmp(name, "inFile")){ submitRequest.options |= SUB_IN_FILE; submitRequest.inFile = val; } else if (!strcmp(name, "outFile")){ submitRequest.options |= SUB_OUT_FILE; submitRequest.outFile = val; } else if (!strcmp(name, "errFile")){ submitRequest.options |= SUB_ERR_FILE; submitRequest.errFile = val; } else if (!strcmp(name, "chkpntPeriod")){ submitRequest.options |= SUB_CHKPNTABLE; submitRequest.options |= SUB_CHKPNT_PERIOD; submitRequest.chkpntPeriod = atoi(val); } else if (!strcmp(name, "chkpntDir")){ submitRequest.options |= SUB_CHKPNTABLE; submitRequest.options |= SUB_CHKPNT_DIR; submitRequest.chkpntDir = val; } else if (!strcmp(name, "xFile")){ /*not implemented*/ /* SUB_OTHER_FILES probably should be set */ /* submitRequest.nxf should be assigned */ } else if (!strcmp(name, "preExecCmd")){ submitRequest.options |= SUB_PRE_EXEC; submitRequest.preExecCmd = val; } else if (!strcmp(name, "mailUser")){ submitRequest.options |= SUB_MAIL_USER; submitRequest.mailUser = val; } else if (!strcmp(name, "delOptions")){ submitRequest.delOptions = atoi(val); } else if (!strcmp(name, "projectName")){ submitRequest.options |= SUB_PROJECT_NAME; submitRequest.projectName = val; } else if (!strcmp(name, "maxNumProcessors")){ submitRequest.maxNumProcessors = atoi(val); } else if (!strcmp(name, "loginShell")){ submitRequest.options |= SUB_LOGIN_SHELL; submitRequest.loginShell = val; } else if (!strcmp(name, "userGroup")){ submitRequest.options |= SUB_USER_GROUP; submitRequest.loginShell = val; } else if (!strcmp(name, "exceptList")){ submitRequest.exceptList = val; } else if (!strcmp(name, "exclusive")){ submitRequest.options |= SUB_EXCLUSIVE; } else if (!strcmp(name, "notifyBegin")){ submitRequest.options |= SUB_NOTIFY_BEGIN; } else if (!strcmp(name, "notifyEnd")){ submitRequest.options |= SUB_NOTIFY_END; } else if (!strcmp(name, "restart")){ submitRequest.options |= SUB_RESTART; } else if (!strcmp(name, "restartForce")){ submitRequest.options |= SUB_RESTART_FORCE; } else if (!strcmp(name, "rerunnable")){ submitRequest.options |= SUB_RERUNNABLE; /*} else if (!strcmp(name, "chkpnt_copy")){ submitRequest.options |= SUB_CHKPNT_COPY;*/ /*} else if (!strcmp(name, "chkpnt_force")){ submitRequest.options |= SUB_CHKPNT_FORCE;*/ } else if (!strcmp(name, "interactive")){ submitRequest.options |= SUB_INTERACTIVE; } else if (!strcmp(name, "pty")){ submitRequest.options |= SUB_PTY; } else if (!strcmp(name, "pty_shell")){ submitRequest.options |= SUB_PTY_SHELL; } else if (!strcmp(name, "hold")){ submitRequest.options2 |= SUB2_HOLD; } else if (!strcmp(name, "wait")){ /* This will require forking */ /*submitRequest.options2 |= SUB2_BSUB_BLOCK;*/ } else if (!strcmp(name, "debug")){ /* is debug used anymore? */ if (val) debug = 1; } val = ""; } jobId = lsb_submit(&submitRequest, &submitReply); if (jobId == -1) { Rprintf("lsf_job_submit: lsb_submit: %s\n", lsb_sysmsg()); return AsInt(0); } return AsInt(jobId); }
SEXP lsf_job_status(SEXP sexp_jobid) { int jobid, numrec; struct jobInfoEnt *jInfo; SEXP status; jobid = INTEGER(sexp_jobid)[0]; if ((numrec = lsb_openjobinfo(jobid, NULL, NULL, NULL, NULL, ALL_JOB)) < 0) { Rprintf("lsf_job_status: lsb_openjobinfo: %s\n", lsb_sysmsg()); return R_NilValue; } jInfo = lsb_readjobinfo(&numrec); if (jInfo == NULL) { Rprintf("lsf_job_status: lsb_readjobinfo: %s\n", lsb_sysmsg()); lsb_closejobinfo(); return R_NilValue; } lsb_closejobinfo(); PROTECT(status = allocVector(STRSXP, 1)); switch(jInfo->status) { case JOB_STAT_NULL: SET_STRING_ELT(status, 0, mkChar("NULL")); break; case JOB_STAT_PEND: SET_STRING_ELT(status, 0, mkChar("PEND")); break; case JOB_STAT_PSUSP: SET_STRING_ELT(status, 0, mkChar("PSUSP")); break; case JOB_STAT_RUN: SET_STRING_ELT(status, 0, mkChar("RUN")); break; case JOB_STAT_RUN|JOB_STAT_WAIT: SET_STRING_ELT(status, 0, mkChar("WAIT")); break; case JOB_STAT_SSUSP: SET_STRING_ELT(status, 0, mkChar("SSUSP")); break; case JOB_STAT_USUSP: SET_STRING_ELT(status, 0, mkChar("USUSP")); break; case JOB_STAT_EXIT: if (jInfo->reasons & EXIT_ZOMBIE) SET_STRING_ELT(status, 0, mkChar("ZOMBI")); else SET_STRING_ELT(status, 0, mkChar("EXIT")); break; case JOB_STAT_DONE: case JOB_STAT_DONE|JOB_STAT_PDONE: case JOB_STAT_DONE|JOB_STAT_PERR: case JOB_STAT_DONE|JOB_STAT_WAIT: SET_STRING_ELT(status, 0, mkChar("DONE")); break; case JOB_STAT_UNKWN: SET_STRING_ELT(status, 0, mkChar("UNKWN")); break; default: Rprintf("lsf_job_status: job state <%d> is unknown.\n", jInfo->status); SET_STRING_ELT(status, 0, mkChar("ERROR")); break; } UNPROTECT(1); return status; }