Пример #1
0
Файл: Rlsf.c Проект: cran/Rlsf
SEXP
lsf_job_submit(SEXP sexp_debug, SEXP sexp_command, SEXP sexp_ncpus)
{
  int jobId, debug, i;
  struct submit submitRequest;
  struct submitReply submitReply;

  debug = INTEGER(sexp_debug)[0];
  memset(&submitRequest, 0, sizeof(submitRequest));
  for (i = 0; i < LSF_RLIM_NLIMITS; i++)
    submitRequest.rLimits[i] = DEFAULT_RLIMIT;
  submitRequest.command = CHAR(STRING_ELT(sexp_command, 0));
  submitRequest.options |= SUB_OUT_FILE;
  if (debug) {
    submitRequest.outFile = "Rlsf_job_output.%J";
  } else {
    submitRequest.outFile = "/dev/null";
  }
  submitRequest.numProcessors = INTEGER(sexp_ncpus)[0];
  submitRequest.maxNumProcessors = INTEGER(sexp_ncpus)[0];

  jobId = lsb_submit(&submitRequest, &submitReply);
  if (jobId == -1) {
    Rprintf("lsf_job_submit: lsb_submit: %s\n", lsb_sysmsg());
    return AsInt(0);
  }

  return AsInt(jobId);
}
Пример #2
0
void
sub_perror (char *usrMsg)
{
    if (usrMsg) {
        fputs(usrMsg, stderr);
        fputs(": ", stderr);
    }
    fputs(lsb_sysmsg(), stderr);

}
Пример #3
0
Файл: Rlsf.c Проект: cran/Rlsf
SEXP
lsf_initialize(void)
{
  if (lsb_init("R")) {
    Rprintf("lsf_initialize: lsb_init: %s\n", lsb_sysmsg());
    return AsInt(-1);
  }
  else {
    return AsInt(0);
  }
  if (putenv("BSUB_QUIET=1")) {
    return AsInt(0);
  }
}
Пример #4
0
Файл: Rlsf.c Проект: cran/Rlsf
SEXP
lsf_resume_job(SEXP sexp_jobid)
{
  int jobid, rc;

  jobid = INTEGER(sexp_jobid)[0];

  rc = lsb_signaljob(jobid, SIGCONT);

  if (rc < 0) {
    Rprintf("lsf_resume_job: lsb_signaljob: %s\n", lsb_sysmsg());
    return AsInt(-1);
  }

  return AsInt(0);
}
Пример #5
0
char *
lsb_sperror(char *usrMsg)
{
    char errmsg[256];
    char *rtstr;

    errmsg[0] = '\0';

    if (usrMsg)
        sprintf(errmsg, "%s: ", usrMsg);

    strcat(errmsg, lsb_sysmsg());

    if ((rtstr=(char *)malloc(sizeof(char)*(strlen(errmsg)+1))) == NULL){
        lserrno = LSE_MALLOC;
        return NULL;
    }

    strcpy(rtstr, errmsg);
    return rtstr;
}
Пример #6
0
int
main(int argc, char **argv)
{
    int cc;
    struct job_group jg;

    if (lsb_init(argv[0]) < 0) {
        lsb_perror("lsb_init");
        return -1;
    }

    while ((cc = getopt(argc, argv, "hV")) != EOF) {
        switch (cc) {
            case 'V':
                fputs(_LS_VERSION_, stderr);
                return 0;
            case 'h':
                usage();
                exit(-1);
        }
    }

    if (argc <= optind) {
        usage();
        return -1;
    }

    jg.group_name = argv[argc - 1];

    cc = lsb_deljgrp(&jg);
    if (cc != LSBE_NO_ERROR) {
        fprintf(stderr, "bgdel: %s.\n", lsb_sysmsg());
        return -1;
    }

    printf("Group %s removed successfully.\n", jg.group_name);

    return 0;
}
Пример #7
0
int
status_job (mbdReqType reqType,
	    struct jobCard *jp, int newStatus, sbdReplyType err)
{
  static char fname[] = "status_job()";
  static int seq = 1;
  static char lastHost[MAXHOSTNAMELEN];
  int reply;
  char *request_buf;
  char *reply_buf = NULL;
  XDR xdrs;
  struct LSFHeader hdr;
  int cc;
  struct statusReq statusReq;
  int flags;
  int i;
  int len;
  struct lsfAuth *auth = NULL;

  if ((logclass & LC_TRACE) && (logclass & LC_SIGNAL))
    ls_syslog (LOG_DEBUG, "%s: Entering ... regType %d jobId %s",
	       fname, reqType, lsb_jobid2str (jp->jobSpecs.jobId));

  if (newStatus == JOB_STAT_EXIT)
    {
      jp->userJobSucc = FALSE;
    }

  if (MASK_STATUS (newStatus) == JOB_STAT_DONE)
    {
      jp->userJobSucc = TRUE;
    }

  if (IS_POST_FINISH (newStatus))
    {
      if (jp->userJobSucc != TRUE)
	{
	  return 0;
	}
    }

  if (masterHost == NULL)
    return -1;

  if (jp->notReported < 0)
    {
      jp->notReported = -INFINIT_INT;
      return (0);
    }

  statusReq.jobId = jp->jobSpecs.jobId;
  statusReq.actPid = jp->jobSpecs.actPid;
  statusReq.jobPid = jp->jobSpecs.jobPid;
  statusReq.jobPGid = jp->jobSpecs.jobPGid;
  statusReq.newStatus = newStatus;
  statusReq.reason = jp->jobSpecs.reasons;
  statusReq.subreasons = jp->jobSpecs.subreasons;
  statusReq.sbdReply = err;
  statusReq.lsfRusage = jp->lsfRusage;
  statusReq.execUid = jp->jobSpecs.execUid;
  statusReq.numExecHosts = 0;
  statusReq.execHosts = NULL;
  statusReq.exitStatus = jp->w_status;
  statusReq.execCwd = jp->jobSpecs.execCwd;
  statusReq.execHome = jp->jobSpecs.execHome;
  statusReq.execUsername = jp->execUsername;
  statusReq.queuePostCmd = "";
  statusReq.queuePreCmd = "";
  statusReq.msgId = jp->delieveredMsgId;

  if (IS_FINISH (newStatus))
    {
      if (jp->maxRusage.mem > jp->runRusage.mem)
	jp->runRusage.mem = jp->maxRusage.mem;
      if (jp->maxRusage.swap > jp->runRusage.swap)
	jp->runRusage.swap = jp->maxRusage.swap;
      if (jp->maxRusage.stime > jp->runRusage.stime)
	jp->runRusage.stime = jp->maxRusage.stime;
      if (jp->maxRusage.utime > jp->runRusage.utime)
	jp->runRusage.utime = jp->maxRusage.utime;
    }
  statusReq.runRusage.mem = jp->runRusage.mem;
  statusReq.runRusage.swap = jp->runRusage.swap;
  statusReq.runRusage.utime = jp->runRusage.utime;
  statusReq.runRusage.stime = jp->runRusage.stime;
  statusReq.runRusage.npids = jp->runRusage.npids;
  statusReq.runRusage.pidInfo = jp->runRusage.pidInfo;
  statusReq.runRusage.npgids = jp->runRusage.npgids;
  statusReq.runRusage.pgid = jp->runRusage.pgid;
  statusReq.actStatus = jp->actStatus;
  statusReq.sigValue = jp->jobSpecs.actValue;
  statusReq.seq = seq;
  seq++;
  if (seq >= MAX_SEQ_NUM)
    seq = 1;

  len = 1024 + ALIGNWORD_ (sizeof (struct statusReq));

  len += ALIGNWORD_ (strlen (statusReq.execHome)) + 4 +
    ALIGNWORD_ (strlen (statusReq.execCwd)) + 4 +
    ALIGNWORD_ (strlen (statusReq.execUsername)) + 4;

  for (i = 0; i < statusReq.runRusage.npids; i++)
    len += ALIGNWORD_ (sizeof (struct pidInfo)) + 4;

  for (i = 0; i < statusReq.runRusage.npgids; i++)
    len += ALIGNWORD_ (sizeof (int)) + 4;

  if (logclass & (LC_TRACE | LC_COMM))
    ls_syslog (LOG_DEBUG, "%s: The length of the job message is: <%d>", fname,
	       len);

  if ((request_buf = malloc (len)) == NULL)
    {
      ls_syslog (LOG_ERR, I18N_FUNC_FAIL_M, fname, "malloc");
      return (-1);
    }

  xdrmem_create (&xdrs, request_buf, len, XDR_ENCODE);
  initLSFHeader_ (&hdr);
  hdr.opCode = reqType;

  if (!xdr_encodeMsg (&xdrs, (char *) &statusReq, &hdr, xdr_statusReq, 0,
		      auth))
    {
      ls_syslog (LOG_ERR, I18N_JOB_FAIL_S_M,
		 fname, lsb_jobid2str (jp->jobSpecs.jobId), "xdr_statusReq");
      lsb_merr2 (I18N_FUNC_FAIL, fname, "xdr_statusReq");
      xdr_destroy (&xdrs);
      FREEUP (request_buf);
      relife ();
    }

  flags = CALL_SERVER_NO_HANDSHAKE;
  if (statusChan >= 0)
    flags |= CALL_SERVER_USE_SOCKET;

  if (reqType == BATCH_RUSAGE_JOB)
    flags |= CALL_SERVER_NO_WAIT_REPLY;

  if (logclass & LC_COMM)
    ls_syslog (LOG_DEBUG1, "%s: before call_server statusChan=%d flags=%d",
	       fname, statusChan, flags);

  cc = call_server (masterHost,
		    mbd_port,
		    request_buf,
		    XDR_GETPOS (&xdrs),
		    &reply_buf,
		    &hdr,
		    connTimeout, readTimeout, &statusChan, NULL, NULL, flags);
  if (cc < 0)
    {
      statusChan = -1;
      if (!equalHost_ (masterHost, lastHost))
	{
	  if (errno != EINTR)
	    ls_syslog (LOG_DEBUG,
		       "%s: Failed to reach mbatchd on host <%s> for job <%s>: %s",
		       fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId),
		       lsb_sysmsg ());
	  strcpy (lastHost, masterHost);
	}
      xdr_destroy (&xdrs);
      FREEUP (request_buf);
      failcnt++;
      return (-1);
    }
  else if (cc == 0)
    {

    }

  failcnt = 0;
  lastHost[0] = '\0';
  xdr_destroy (&xdrs);
  FREEUP (request_buf);

  if (cc)
    free (reply_buf);

  if (flags & CALL_SERVER_NO_WAIT_REPLY)
    {

      struct timeval timeval;

      timeval.tv_sec = 0;
      timeval.tv_usec = 0;

      if (rd_select_ (chanSock_ (statusChan), &timeval) == 0)
	{
	  jp->needReportRU = FALSE;
	  jp->lastStatusMbdTime = now;
	  return 0;
	}

      CLOSECD (statusChan);

      if (logclass & LC_COMM)
	ls_syslog (LOG_DEBUG1,
		   "%s: Job <%s> rd_select() failed, assume connection broken",
		   fname, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    }
  reply = hdr.opCode;
  switch (reply)
    {
    case LSBE_NO_ERROR:
    case LSBE_LOCK_JOB:
      jp->needReportRU = FALSE;
      jp->lastStatusMbdTime = now;
      if (reply == LSBE_LOCK_JOB)
	{
	  if (IS_SUSP (jp->jobSpecs.jStatus))
	    jp->jobSpecs.reasons |= SUSP_MBD_LOCK;
	  else
	    ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5204, "%s: Job <%s> is in status <%x> and mbatchd wants to lock it, ignored."),	/* catgets 5204 */
		       fname,
		       lsb_jobid2str (jp->jobSpecs.jobId),
		       jp->jobSpecs.jStatus);
	}
      return (0);
    case LSBE_NO_JOB:
      if (!IS_POST_FINISH (jp->jobSpecs.jStatus))
	{
	  ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5205, "%s: Job <%s> is forgotten by mbatchd on host <%s>, ignored."), fname, lsb_jobid2str (jp->jobSpecs.jobId), masterHost);	/* catgets 5205 */
	}

      jp->notReported = -INFINIT_INT;
      return (0);
    case LSBE_STOP_JOB:
      if (jobsig (jp, SIGSTOP, TRUE) < 0)
	SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_EXIT);
      else
	{
	  SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_USUSP);
	  jp->jobSpecs.reasons |= SUSP_USER_STOP;
	}
      return (-1);
    case LSBE_SBATCHD:
      ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5206, "%s: mbatchd on host <%s> doesn't think I'm configured as a batch server when I report the status for job <%s>"),	/* catgets 5206 */
		 fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    default:
      ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5207, "%s: Illegal reply code <%d> from mbatchd on host <%s> for job <%s>"),	/* catgets 5207 */
		 fname,
		 reply, masterHost, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    }
}
Пример #8
0
Файл: Rlsf.c Проект: cran/Rlsf
SEXP
lsf_job_submit2(SEXP ctrl) {
	char *name;
	char *val;
	char *vals[100];
	int jobId, debug, i, j;
	SEXP elmt = R_NilValue;
	SEXP names = getAttrib(ctrl, R_NamesSymbol);
	struct submit submitRequest;
	struct submitReply submitReply;

	memset(&submitRequest, 0, sizeof(submitRequest));
	for (i = 0; i < LSF_RLIM_NLIMITS; i++)
		submitRequest.rLimits[i] = DEFAULT_RLIMIT;
	for (i = 0; i < length(names); i++) {
		name = CHAR(STRING_ELT(names, i));
		elmt = VECTOR_ELT(ctrl, i);
		val = CHAR(STRING_ELT(elmt, 0));
		if (0) {
		} else if (!strcmp(name, "jobName")) {
			submitRequest.options |= SUB_JOB_NAME;
			submitRequest.jobName = val;
		} else if (!strcmp(name, "queue")) {
			submitRequest.options |= SUB_QUEUE;
			submitRequest.queue = val;
		} else if (!strcmp(name, "askedHosts")){
			submitRequest.options |= SUB_HOST;
			char **chars;
			chars = (char **)calloc(length(elmt), sizeof(char *));
			for (j = 0; j < length(elmt); j++){
				val=CHAR(STRING_ELT(elmt, j));
				chars[j] = val;
			}
			submitRequest.numAskedHosts = length(elmt);
			submitRequest.askedHosts = chars;
		} else if (!strcmp(name, "resReq")){
			submitRequest.options |= SUB_RES_REQ;
			submitRequest.resReq = val;
		} else if (!strcmp(name, "rlimit_cpu")) {
			submitRequest.rLimits[LSF_RLIMIT_CPU] = atoi(val);
		} else if (!strcmp(name, "rlimit_fsize")) {
			submitRequest.rLimits[LSF_RLIMIT_FSIZE] = atoi(val);
		} else if (!strcmp(name, "rlimit_data")) {
			submitRequest.rLimits[LSF_RLIMIT_DATA] = atoi(val);
		} else if (!strcmp(name, "rlimit_stack")) {
			submitRequest.rLimits[LSF_RLIMIT_STACK] = atoi(val);
		} else if (!strcmp(name, "rlimit_core")) {
			submitRequest.rLimits[LSF_RLIMIT_CORE] = atoi(val);
		} else if (!strcmp(name, "rlimit_rss")) {
			submitRequest.rLimits[LSF_RLIMIT_RSS] = atoi(val);
		} else if (!strcmp(name, "rlimit_nofile")) {
			submitRequest.rLimits[LSF_RLIMIT_NOFILE] = atoi(val);
		} else if (!strcmp(name, "rlimit_open_max")) {
			submitRequest.rLimits[LSF_RLIMIT_OPEN_MAX] = atoi(val);
		} else if (!strcmp(name, "rlimit_swap")) {
			submitRequest.rLimits[LSF_RLIMIT_SWAP] = atoi(val);
		} else if (!strcmp(name, "rlimit_run")) {
			submitRequest.rLimits[LSF_RLIMIT_RUN] = atoi(val);
		} else if (!strcmp(name, "rlimit_process")) {
			submitRequest.rLimits[LSF_RLIMIT_PROCESS] = atoi(val);
		} else if (!strcmp(name, "hostSpec")) {
			submitRequest.options |= SUB_HOST_SPEC;
			submitRequest.hostSpec = val;
		} else if (!strcmp(name, "numProcessors")){
			submitRequest.numProcessors = atoi(val);
		} else if (!strcmp(name, "dependCond")){
			submitRequest.options |= SUB_DEPEND_COND;
			submitRequest.dependCond = val;
		} else if (!strcmp(name, "beginTime")) {
			submitRequest.beginTime = atoi(val);
		} else if (!strcmp(name, "termTime")) {
			submitRequest.termTime = atoi(val);
		} else if (!strcmp(name, "sigValue")) {
			submitRequest.options |= SUB_WINDOW_SIG;
			submitRequest.sigValue= atoi(val);
		} else if (!strcmp(name, "command")){
			submitRequest.command = val;
		} else if (!strcmp(name, "inFile")){
			submitRequest.options |= SUB_IN_FILE;
			submitRequest.inFile = val;
		} else if (!strcmp(name, "outFile")){
			submitRequest.options |= SUB_OUT_FILE;
			submitRequest.outFile = val;
		} else if (!strcmp(name, "errFile")){
			submitRequest.options |= SUB_ERR_FILE;
			submitRequest.errFile = val;
		} else if (!strcmp(name, "chkpntPeriod")){
			submitRequest.options |= SUB_CHKPNTABLE;
			submitRequest.options |= SUB_CHKPNT_PERIOD;
			submitRequest.chkpntPeriod = atoi(val);
		} else if (!strcmp(name, "chkpntDir")){
			submitRequest.options |= SUB_CHKPNTABLE;
			submitRequest.options |= SUB_CHKPNT_DIR;
			submitRequest.chkpntDir = val;
		} else if (!strcmp(name, "xFile")){
			/*not implemented*/
			/* SUB_OTHER_FILES probably should be set  */
			/* submitRequest.nxf should be assigned */
		} else if (!strcmp(name, "preExecCmd")){
			submitRequest.options |= SUB_PRE_EXEC;
			submitRequest.preExecCmd = val;
		} else if (!strcmp(name, "mailUser")){
			submitRequest.options |= SUB_MAIL_USER;
			submitRequest.mailUser = val;
		} else if (!strcmp(name, "delOptions")){
			submitRequest.delOptions = atoi(val);
		} else if (!strcmp(name, "projectName")){
			submitRequest.options |= SUB_PROJECT_NAME;
			submitRequest.projectName = val;
		} else if (!strcmp(name, "maxNumProcessors")){
			submitRequest.maxNumProcessors = atoi(val);
		} else if (!strcmp(name, "loginShell")){
			submitRequest.options |= SUB_LOGIN_SHELL; 
			submitRequest.loginShell = val; }
		else if (!strcmp(name, "userGroup")){
			submitRequest.options |= SUB_USER_GROUP;
			submitRequest.loginShell = val;
		} else if (!strcmp(name, "exceptList")){
			submitRequest.exceptList = val;
		} else if (!strcmp(name, "exclusive")){
			submitRequest.options |= SUB_EXCLUSIVE;
		} else if (!strcmp(name, "notifyBegin")){
			submitRequest.options |= SUB_NOTIFY_BEGIN; 
		} else if (!strcmp(name, "notifyEnd")){
			submitRequest.options |= SUB_NOTIFY_END; 
		} else if (!strcmp(name, "restart")){
			submitRequest.options |= SUB_RESTART;
		} else if (!strcmp(name, "restartForce")){
			submitRequest.options |= SUB_RESTART_FORCE;
		} else if (!strcmp(name, "rerunnable")){
			submitRequest.options |= SUB_RERUNNABLE;
		/*} else if (!strcmp(name, "chkpnt_copy")){
			submitRequest.options |= SUB_CHKPNT_COPY;*/
		/*} else if (!strcmp(name, "chkpnt_force")){
			submitRequest.options |= SUB_CHKPNT_FORCE;*/
		} else if (!strcmp(name, "interactive")){
			submitRequest.options |= SUB_INTERACTIVE;
		} else if (!strcmp(name, "pty")){
			submitRequest.options |= SUB_PTY;
		} else if (!strcmp(name, "pty_shell")){
			submitRequest.options |= SUB_PTY_SHELL;
		} else if (!strcmp(name, "hold")){
			submitRequest.options2 |= SUB2_HOLD;
		} else if (!strcmp(name, "wait")){
			/* This will require forking */
			/*submitRequest.options2 |= SUB2_BSUB_BLOCK;*/
		} else if (!strcmp(name, "debug")){
			/* is debug used anymore? */
			if (val) debug = 1;
		}
		val = "";
	}

	jobId = lsb_submit(&submitRequest, &submitReply);
	if (jobId == -1) {
	 Rprintf("lsf_job_submit: lsb_submit: %s\n", lsb_sysmsg());
	 return AsInt(0);
	}
	return AsInt(jobId);
}
Пример #9
0
Файл: Rlsf.c Проект: cran/Rlsf
SEXP
lsf_job_status(SEXP sexp_jobid)
{
  int jobid, numrec;
  struct jobInfoEnt *jInfo;
  SEXP status;

  jobid = INTEGER(sexp_jobid)[0];

  if ((numrec = lsb_openjobinfo(jobid, NULL, NULL, NULL, NULL, ALL_JOB)) < 0) {
    Rprintf("lsf_job_status: lsb_openjobinfo: %s\n", lsb_sysmsg());
    return R_NilValue;
  }

  jInfo = lsb_readjobinfo(&numrec);
  if (jInfo == NULL) {
    Rprintf("lsf_job_status: lsb_readjobinfo: %s\n", lsb_sysmsg());
    lsb_closejobinfo();
    return R_NilValue;
  }

  lsb_closejobinfo();

  PROTECT(status = allocVector(STRSXP, 1));
  switch(jInfo->status) {
  case JOB_STAT_NULL:
    SET_STRING_ELT(status, 0, mkChar("NULL"));
    break;
  case JOB_STAT_PEND:
    SET_STRING_ELT(status, 0, mkChar("PEND"));
    break;
  case JOB_STAT_PSUSP:
    SET_STRING_ELT(status, 0, mkChar("PSUSP"));
    break;
  case JOB_STAT_RUN:
    SET_STRING_ELT(status, 0, mkChar("RUN"));
    break;
  case JOB_STAT_RUN|JOB_STAT_WAIT:
    SET_STRING_ELT(status, 0, mkChar("WAIT"));
    break;
  case JOB_STAT_SSUSP:
    SET_STRING_ELT(status, 0, mkChar("SSUSP"));
    break;
  case JOB_STAT_USUSP:
    SET_STRING_ELT(status, 0, mkChar("USUSP"));
    break;
  case JOB_STAT_EXIT:
    if (jInfo->reasons & EXIT_ZOMBIE)
      SET_STRING_ELT(status, 0, mkChar("ZOMBI"));
    else
      SET_STRING_ELT(status, 0, mkChar("EXIT"));
    break;
  case JOB_STAT_DONE:
  case JOB_STAT_DONE|JOB_STAT_PDONE:
  case JOB_STAT_DONE|JOB_STAT_PERR:
  case JOB_STAT_DONE|JOB_STAT_WAIT:
    SET_STRING_ELT(status, 0, mkChar("DONE"));
    break;
  case JOB_STAT_UNKWN:
    SET_STRING_ELT(status, 0, mkChar("UNKWN"));
    break;
  default:
    Rprintf("lsf_job_status: job state <%d> is unknown.\n", jInfo->status);
    SET_STRING_ELT(status, 0, mkChar("ERROR"));
    break;
  }
  UNPROTECT(1);

  return status;
}