Пример #1
0
int
sendCmdBill_ (int s, resCmd cmd, struct resCmdBill *cmdmsg, int *retsock,
	     struct timeval *timeout)

{
    char *buf;
    int  i,bufsize,cc;

    bufsize = 1024;
    bufsize = bufsize + ALIGNWORD_(strlen(cmdmsg->cwd)) + sizeof(int);
    for (i=0; cmdmsg->argv[i]; i++)
        bufsize = bufsize + ALIGNWORD_(strlen(cmdmsg->argv[i])) + sizeof(int);
    if ((buf=malloc(bufsize)) == NULL) {
        lserrno = LSE_MALLOC;
        return -1;
    }

    umask(cmdmsg->filemask = (int)umask(0));
    cmdmsg->priority = 0;
    if (getLimits(cmdmsg->lsfLimits) < 0) {
	lserrno = LSE_LIMIT_SYS;
        free(buf);
	return -1;
    }
    cc=callRes_(s, cmd, (char *) cmdmsg, buf, bufsize, xdr_resCmdBill,
		retsock, timeout, NULL);
    free(buf);
    return cc;

}
Пример #2
0
int
status_job (mbdReqType reqType,
	    struct jobCard *jp, int newStatus, sbdReplyType err)
{
  static char fname[] = "status_job()";
  static int seq = 1;
  static char lastHost[MAXHOSTNAMELEN];
  int reply;
  char *request_buf;
  char *reply_buf = NULL;
  XDR xdrs;
  struct LSFHeader hdr;
  int cc;
  struct statusReq statusReq;
  int flags;
  int i;
  int len;
  struct lsfAuth *auth = NULL;

  if ((logclass & LC_TRACE) && (logclass & LC_SIGNAL))
    ls_syslog (LOG_DEBUG, "%s: Entering ... regType %d jobId %s",
	       fname, reqType, lsb_jobid2str (jp->jobSpecs.jobId));

  if (newStatus == JOB_STAT_EXIT)
    {
      jp->userJobSucc = FALSE;
    }

  if (MASK_STATUS (newStatus) == JOB_STAT_DONE)
    {
      jp->userJobSucc = TRUE;
    }

  if (IS_POST_FINISH (newStatus))
    {
      if (jp->userJobSucc != TRUE)
	{
	  return 0;
	}
    }

  if (masterHost == NULL)
    return -1;

  if (jp->notReported < 0)
    {
      jp->notReported = -INFINIT_INT;
      return (0);
    }

  statusReq.jobId = jp->jobSpecs.jobId;
  statusReq.actPid = jp->jobSpecs.actPid;
  statusReq.jobPid = jp->jobSpecs.jobPid;
  statusReq.jobPGid = jp->jobSpecs.jobPGid;
  statusReq.newStatus = newStatus;
  statusReq.reason = jp->jobSpecs.reasons;
  statusReq.subreasons = jp->jobSpecs.subreasons;
  statusReq.sbdReply = err;
  statusReq.lsfRusage = jp->lsfRusage;
  statusReq.execUid = jp->jobSpecs.execUid;
  statusReq.numExecHosts = 0;
  statusReq.execHosts = NULL;
  statusReq.exitStatus = jp->w_status;
  statusReq.execCwd = jp->jobSpecs.execCwd;
  statusReq.execHome = jp->jobSpecs.execHome;
  statusReq.execUsername = jp->execUsername;
  statusReq.queuePostCmd = "";
  statusReq.queuePreCmd = "";
  statusReq.msgId = jp->delieveredMsgId;

  if (IS_FINISH (newStatus))
    {
      if (jp->maxRusage.mem > jp->runRusage.mem)
	jp->runRusage.mem = jp->maxRusage.mem;
      if (jp->maxRusage.swap > jp->runRusage.swap)
	jp->runRusage.swap = jp->maxRusage.swap;
      if (jp->maxRusage.stime > jp->runRusage.stime)
	jp->runRusage.stime = jp->maxRusage.stime;
      if (jp->maxRusage.utime > jp->runRusage.utime)
	jp->runRusage.utime = jp->maxRusage.utime;
    }
  statusReq.runRusage.mem = jp->runRusage.mem;
  statusReq.runRusage.swap = jp->runRusage.swap;
  statusReq.runRusage.utime = jp->runRusage.utime;
  statusReq.runRusage.stime = jp->runRusage.stime;
  statusReq.runRusage.npids = jp->runRusage.npids;
  statusReq.runRusage.pidInfo = jp->runRusage.pidInfo;
  statusReq.runRusage.npgids = jp->runRusage.npgids;
  statusReq.runRusage.pgid = jp->runRusage.pgid;
  statusReq.actStatus = jp->actStatus;
  statusReq.sigValue = jp->jobSpecs.actValue;
  statusReq.seq = seq;
  seq++;
  if (seq >= MAX_SEQ_NUM)
    seq = 1;

  len = 1024 + ALIGNWORD_ (sizeof (struct statusReq));

  len += ALIGNWORD_ (strlen (statusReq.execHome)) + 4 +
    ALIGNWORD_ (strlen (statusReq.execCwd)) + 4 +
    ALIGNWORD_ (strlen (statusReq.execUsername)) + 4;

  for (i = 0; i < statusReq.runRusage.npids; i++)
    len += ALIGNWORD_ (sizeof (struct pidInfo)) + 4;

  for (i = 0; i < statusReq.runRusage.npgids; i++)
    len += ALIGNWORD_ (sizeof (int)) + 4;

  if (logclass & (LC_TRACE | LC_COMM))
    ls_syslog (LOG_DEBUG, "%s: The length of the job message is: <%d>", fname,
	       len);

  if ((request_buf = malloc (len)) == NULL)
    {
      ls_syslog (LOG_ERR, I18N_FUNC_FAIL_M, fname, "malloc");
      return (-1);
    }

  xdrmem_create (&xdrs, request_buf, len, XDR_ENCODE);
  initLSFHeader_ (&hdr);
  hdr.opCode = reqType;

  if (!xdr_encodeMsg (&xdrs, (char *) &statusReq, &hdr, xdr_statusReq, 0,
		      auth))
    {
      ls_syslog (LOG_ERR, I18N_JOB_FAIL_S_M,
		 fname, lsb_jobid2str (jp->jobSpecs.jobId), "xdr_statusReq");
      lsb_merr2 (I18N_FUNC_FAIL, fname, "xdr_statusReq");
      xdr_destroy (&xdrs);
      FREEUP (request_buf);
      relife ();
    }

  flags = CALL_SERVER_NO_HANDSHAKE;
  if (statusChan >= 0)
    flags |= CALL_SERVER_USE_SOCKET;

  if (reqType == BATCH_RUSAGE_JOB)
    flags |= CALL_SERVER_NO_WAIT_REPLY;

  if (logclass & LC_COMM)
    ls_syslog (LOG_DEBUG1, "%s: before call_server statusChan=%d flags=%d",
	       fname, statusChan, flags);

  cc = call_server (masterHost,
		    mbd_port,
		    request_buf,
		    XDR_GETPOS (&xdrs),
		    &reply_buf,
		    &hdr,
		    connTimeout, readTimeout, &statusChan, NULL, NULL, flags);
  if (cc < 0)
    {
      statusChan = -1;
      if (!equalHost_ (masterHost, lastHost))
	{
	  if (errno != EINTR)
	    ls_syslog (LOG_DEBUG,
		       "%s: Failed to reach mbatchd on host <%s> for job <%s>: %s",
		       fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId),
		       lsb_sysmsg ());
	  strcpy (lastHost, masterHost);
	}
      xdr_destroy (&xdrs);
      FREEUP (request_buf);
      failcnt++;
      return (-1);
    }
  else if (cc == 0)
    {

    }

  failcnt = 0;
  lastHost[0] = '\0';
  xdr_destroy (&xdrs);
  FREEUP (request_buf);

  if (cc)
    free (reply_buf);

  if (flags & CALL_SERVER_NO_WAIT_REPLY)
    {

      struct timeval timeval;

      timeval.tv_sec = 0;
      timeval.tv_usec = 0;

      if (rd_select_ (chanSock_ (statusChan), &timeval) == 0)
	{
	  jp->needReportRU = FALSE;
	  jp->lastStatusMbdTime = now;
	  return 0;
	}

      CLOSECD (statusChan);

      if (logclass & LC_COMM)
	ls_syslog (LOG_DEBUG1,
		   "%s: Job <%s> rd_select() failed, assume connection broken",
		   fname, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    }
  reply = hdr.opCode;
  switch (reply)
    {
    case LSBE_NO_ERROR:
    case LSBE_LOCK_JOB:
      jp->needReportRU = FALSE;
      jp->lastStatusMbdTime = now;
      if (reply == LSBE_LOCK_JOB)
	{
	  if (IS_SUSP (jp->jobSpecs.jStatus))
	    jp->jobSpecs.reasons |= SUSP_MBD_LOCK;
	  else
	    ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5204, "%s: Job <%s> is in status <%x> and mbatchd wants to lock it, ignored."),	/* catgets 5204 */
		       fname,
		       lsb_jobid2str (jp->jobSpecs.jobId),
		       jp->jobSpecs.jStatus);
	}
      return (0);
    case LSBE_NO_JOB:
      if (!IS_POST_FINISH (jp->jobSpecs.jStatus))
	{
	  ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5205, "%s: Job <%s> is forgotten by mbatchd on host <%s>, ignored."), fname, lsb_jobid2str (jp->jobSpecs.jobId), masterHost);	/* catgets 5205 */
	}

      jp->notReported = -INFINIT_INT;
      return (0);
    case LSBE_STOP_JOB:
      if (jobsig (jp, SIGSTOP, TRUE) < 0)
	SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_EXIT);
      else
	{
	  SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_USUSP);
	  jp->jobSpecs.reasons |= SUSP_USER_STOP;
	}
      return (-1);
    case LSBE_SBATCHD:
      ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5206, "%s: mbatchd on host <%s> doesn't think I'm configured as a batch server when I report the status for job <%s>"),	/* catgets 5206 */
		 fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    default:
      ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5207, "%s: Illegal reply code <%d> from mbatchd on host <%s> for job <%s>"),	/* catgets 5207 */
		 fname,
		 reply, masterHost, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    }
}
Пример #3
0
int
ls_connect(char *host)
{
    struct  hostent *hp;
    int     s, descriptor[2], size;
    char    official[MAXHOSTNAMELEN];
    struct resConnect connReq;
    char *reqBuf;
    struct lsfAuth auth;
    int resTimeout;



    if (genParams_[LSF_RES_TIMEOUT].paramValue)
	resTimeout = atoi(genParams_[LSF_RES_TIMEOUT].paramValue);
    else
	resTimeout = RES_TIMEOUT;

    if (_isconnected_(host, descriptor))
        return(descriptor[0]);

    if ((hp = Gethostbyname_(host)) == NULL) {
        lserrno = LSE_BAD_HOST;
        return -1;
    }

    strcpy(official, hp->h_name);
    memcpy((char *)&res_addr_.sin_addr,(char *)hp->h_addr,(int)hp->h_length);
    if ((rootuid_) && (genParams_[LSF_AUTH].paramValue == NULL)) {
        if (currentsocket_ > (FIRST_RES_SOCK + totsockets_ - 1)) {
            lserrno = LSE_NOMORE_SOCK;
            return -1;
        }
        s = currentsocket_;
        currentsocket_++;
    } else {
        if ((s = CreateSockEauth_(SOCK_STREAM)) < 0)
            return -1;
    }

    putEauthClientEnvVar("user");
    putEauthServerEnvVar("res");

#ifdef INTER_DAEMON_AUTH

    putEnv("LSF_EAUTH_AUX_PASS", "yes");
#endif


    if (getAuth_(&auth, official) == -1) {
	closesocket(s);
	return -1;
    }

    runEsub_(&connReq.eexec, NULL);

    size = sizeof(struct LSFHeader) + sizeof(connReq) +
	   sizeof(struct lsfAuth) +
	   ALIGNWORD_(connReq.eexec.len) +
	   sizeof(int) * 5 ;

    if ((reqBuf = malloc(size)) == NULL) {
	lserrno = LSE_MALLOC;
	goto Fail;
    }

    if (b_connect_(s, (struct sockaddr *)&res_addr_,
				 sizeof(res_addr_), resTimeout) < 0) {
	lserrno = LSE_CONN_SYS;
	goto Fail;
    }

    if (callRes_(s, RES_CONNECT, (char *) &connReq, reqBuf,
		 size, xdr_resConnect, 0, 0, &auth) == -1) {
	goto Fail;
    }

    if (connReq.eexec.len > 0)
	free(connReq.eexec.data);

    free(reqBuf);

    (void)connected_(official, s, -1, currentSN);

    return(s);

  Fail:
    CLOSESOCKET(s);

    if (connReq.eexec.len > 0)
	free(connReq.eexec.data);

    free(reqBuf);
    return -1;
}
Пример #4
0
int
rsetenv_ (char *host, char **envp, int option)
{
    int    descriptor[2];
    struct resSetenv envMsg;
    char   *sendBuf;
    int    bufferSize;
    int    i, s;
    resCmd resCmdOption;

    bufferSize = 512;

    if (logclass & (LC_TRACE))
	ls_syslog(LOG_DEBUG, "rsetenv_: Entering this routine...");

    if (!envp)
        return 0;

    for(i=0; envp[i] != NULL; i++)
        bufferSize = bufferSize + ALIGNWORD_(strlen(envp[i])) + sizeof(int);

    sendBuf = (char *)malloc(bufferSize);
    if (sendBuf == NULL) {
        lserrno = LSE_MALLOC;
        goto err;
    }

    if (_isconnected_(host, descriptor))
	s = descriptor[0];
    else if ((s = ls_connect(host)) < 0) {
        free(sendBuf);
	goto err;
    }

    if (!FD_ISSET(s,&connection_ok_)){
	FD_SET(s,&connection_ok_);
	if (ackReturnCode_(s) < 0) {
	   closesocket(s);
	   _lostconnection_(host);
           free(sendBuf);
	   goto err;
        }
    }

    envMsg.env = envp;
    if (option == RSETENV_SYNCH)
        resCmdOption = RES_SETENV;
    else if (option == RSETENV_ASYNC)
        resCmdOption = RES_SETENV_ASYNC;
    if (callRes_(s, resCmdOption, (char *) &envMsg, sendBuf, bufferSize,
		 xdr_resSetenv, 0, 0, NULL) == -1) {
	closesocket(s);
	_lostconnection_(host);
        free(sendBuf);
        goto err;
    }
    free(sendBuf);

    if (option == RSETENV_SYNCH){
        if (ackReturnCode_(s) < 0) {
	    closesocket(s);
	    _lostconnection_(host);
            goto err;
        }
    }

    return 0;

err:

    return -1;
}