Ejemplo n.º 1
0
int
equivalentXferFile(lsRcpXfer *lsXfer, char *szLocalFile, char *szRemoteFile,
                    struct stat *psLstat, struct stat *psRstat, char *szRhost)
{
    char *pszH;
    char szHost1[MAXHOSTNAMELEN], szHost2[MAXHOSTNAMELEN];
    char *hostlist[1];
    struct hostInfo *hostinfo;
    char  * szFileName1, * szFileName2;

    if (logclass & (LC_FILE))
        ls_syslog(LOG_DEBUG,"equivalentXferFile(), ls_getmnthost() for '%s'",
                szLocalFile);

    hostlist[0] = szRhost;
    hostinfo = ls_gethostinfo((char *)NULL, (int *)NULL, (char **)hostlist, 1, 0);
    if ( hostinfo == (struct hostInfo *)NULL ) {
	return -1;
    } else {
	if ( strcmp(hostinfo->hostType, "NTX86") == 0
             || strcmp(hostinfo->hostType, "NTALPHA") == 0) {
	    return (1);
	}
    }

    if ((pszH = ls_getmnthost(szLocalFile)) == NULL) {
        return -1;
    }

    strcpy(szHost1, pszH);

    if (logclass & (LC_FILE))
        ls_syslog(LOG_DEBUG,
	    "equivalentXferFile(),ls_rgetmnthost() for '%s' on '%s'"
            , szLocalFile, szRhost);

    if ((pszH = ls_rgetmnthost(szRhost, szRemoteFile)) == NULL) {
        return -1;
    }

    strcpy(szHost2, pszH);

    szFileName1 = strrchr(lsXfer->ppszHostFnames[0],'/');
    szFileName2 = strrchr(lsXfer->ppszDestFnames[0],'/');
    if (szFileName1 == NULL)
        szFileName1 = lsXfer->ppszHostFnames[0];
    else szFileName1 ++;
    if (szFileName2 == NULL)
        szFileName2 = lsXfer->ppszDestFnames[0];
    else szFileName2 ++;
    if ( psLstat->st_ino == psRstat->st_ino
         && (strcmp(szFileName1,szFileName2) == 0)
         && equalHost_(szHost1,szHost2)) {
         return 0;
    }
    return(1);

}
Ejemplo n.º 2
0
char
hostQMember (char *host, struct qData *qp)
{
  int i;

  if (qp->hostList == NULL)
    return TRUE;
  if (qp->askedOthPrio >= 0)
    return TRUE;

  for (i = 0; i < qp->numAskedPtr; i++)
    {
      if (equalHost_ (host, qp->askedPtr[i].hData->host))
	return TRUE;
    }
  return FALSE;

}
Ejemplo n.º 3
0
struct hostLoad *
loadinfo_(char *resReq, struct decisionReq *loadReqPtr, char *fromhost, int *numHosts, char ***outnlist)
{
    static char fname[] = "loadinfo_";
    static struct loadReply loadReply;
    int i;
    char *hname;
    int options = 0;

    if (logclass & (LC_TRACE))
        ls_syslog(LOG_DEBUG, "%s: Entering this routine...", fname);

    if (loadReqPtr->numHosts <=0) {
        lserrno = LSE_BAD_ARGS;
	goto error;
    }

    if (!fromhost) {
	if ((hname = ls_getmyhostname()) == NULL)
	    goto error;
        loadReqPtr->preferredHosts[0] = putstr_(hname);
    } else 
        loadReqPtr->preferredHosts[0] = putstr_(fromhost);

    if (loadReqPtr->preferredHosts[0] == NULL) {
	lserrno = LSE_MALLOC;
	goto error;
    }
	
    if (resReq) {
        strncpy(loadReqPtr->resReq, resReq, MAXLINELEN);
    } else
        strcpy(loadReqPtr->resReq, " ");

    loadReqPtr->resReq[MAXLINELEN-1] = '\0';
    if (loadReqPtr->ofWhat == OF_HOSTS && loadReqPtr->numPrefs == 2
	 && loadReqPtr->numHosts == 1 
	 && equalHost_(loadReqPtr->preferredHosts[1], 
	 loadReqPtr->preferredHosts[0]))
        options |= _LOCAL_;
    else
	options |= _USE_TCP_;

    if (callLim_(LIM_LOAD_REQ, loadReqPtr, xdr_decisionReq, 
	    &loadReply, xdr_loadReply, NULL, options, NULL) < 0)
	goto error;
    if (loadReply.flags & LOAD_REPLY_SHARED_RESOURCE) {
        sharedResConfigured_ = TRUE;
    }
    for (i=0; i<loadReqPtr->numPrefs; i++) 
	FREEUP(loadReqPtr->preferredHosts[i]);
    FREEUP (loadReqPtr->preferredHosts);
    *numHosts = loadReply.nEntry;
    *outnlist = loadReply.indicies;
    return (loadReply.loadMatrix);

error:
    for (i=0; i<loadReqPtr->numPrefs; i++) 
	FREEUP(loadReqPtr->preferredHosts[i]);
    FREEUP (loadReqPtr->preferredHosts);
    return (NULL);

} 
Ejemplo n.º 4
0
int
status_job (mbdReqType reqType,
	    struct jobCard *jp, int newStatus, sbdReplyType err)
{
  static char fname[] = "status_job()";
  static int seq = 1;
  static char lastHost[MAXHOSTNAMELEN];
  int reply;
  char *request_buf;
  char *reply_buf = NULL;
  XDR xdrs;
  struct LSFHeader hdr;
  int cc;
  struct statusReq statusReq;
  int flags;
  int i;
  int len;
  struct lsfAuth *auth = NULL;

  if ((logclass & LC_TRACE) && (logclass & LC_SIGNAL))
    ls_syslog (LOG_DEBUG, "%s: Entering ... regType %d jobId %s",
	       fname, reqType, lsb_jobid2str (jp->jobSpecs.jobId));

  if (newStatus == JOB_STAT_EXIT)
    {
      jp->userJobSucc = FALSE;
    }

  if (MASK_STATUS (newStatus) == JOB_STAT_DONE)
    {
      jp->userJobSucc = TRUE;
    }

  if (IS_POST_FINISH (newStatus))
    {
      if (jp->userJobSucc != TRUE)
	{
	  return 0;
	}
    }

  if (masterHost == NULL)
    return -1;

  if (jp->notReported < 0)
    {
      jp->notReported = -INFINIT_INT;
      return (0);
    }

  statusReq.jobId = jp->jobSpecs.jobId;
  statusReq.actPid = jp->jobSpecs.actPid;
  statusReq.jobPid = jp->jobSpecs.jobPid;
  statusReq.jobPGid = jp->jobSpecs.jobPGid;
  statusReq.newStatus = newStatus;
  statusReq.reason = jp->jobSpecs.reasons;
  statusReq.subreasons = jp->jobSpecs.subreasons;
  statusReq.sbdReply = err;
  statusReq.lsfRusage = jp->lsfRusage;
  statusReq.execUid = jp->jobSpecs.execUid;
  statusReq.numExecHosts = 0;
  statusReq.execHosts = NULL;
  statusReq.exitStatus = jp->w_status;
  statusReq.execCwd = jp->jobSpecs.execCwd;
  statusReq.execHome = jp->jobSpecs.execHome;
  statusReq.execUsername = jp->execUsername;
  statusReq.queuePostCmd = "";
  statusReq.queuePreCmd = "";
  statusReq.msgId = jp->delieveredMsgId;

  if (IS_FINISH (newStatus))
    {
      if (jp->maxRusage.mem > jp->runRusage.mem)
	jp->runRusage.mem = jp->maxRusage.mem;
      if (jp->maxRusage.swap > jp->runRusage.swap)
	jp->runRusage.swap = jp->maxRusage.swap;
      if (jp->maxRusage.stime > jp->runRusage.stime)
	jp->runRusage.stime = jp->maxRusage.stime;
      if (jp->maxRusage.utime > jp->runRusage.utime)
	jp->runRusage.utime = jp->maxRusage.utime;
    }
  statusReq.runRusage.mem = jp->runRusage.mem;
  statusReq.runRusage.swap = jp->runRusage.swap;
  statusReq.runRusage.utime = jp->runRusage.utime;
  statusReq.runRusage.stime = jp->runRusage.stime;
  statusReq.runRusage.npids = jp->runRusage.npids;
  statusReq.runRusage.pidInfo = jp->runRusage.pidInfo;
  statusReq.runRusage.npgids = jp->runRusage.npgids;
  statusReq.runRusage.pgid = jp->runRusage.pgid;
  statusReq.actStatus = jp->actStatus;
  statusReq.sigValue = jp->jobSpecs.actValue;
  statusReq.seq = seq;
  seq++;
  if (seq >= MAX_SEQ_NUM)
    seq = 1;

  len = 1024 + ALIGNWORD_ (sizeof (struct statusReq));

  len += ALIGNWORD_ (strlen (statusReq.execHome)) + 4 +
    ALIGNWORD_ (strlen (statusReq.execCwd)) + 4 +
    ALIGNWORD_ (strlen (statusReq.execUsername)) + 4;

  for (i = 0; i < statusReq.runRusage.npids; i++)
    len += ALIGNWORD_ (sizeof (struct pidInfo)) + 4;

  for (i = 0; i < statusReq.runRusage.npgids; i++)
    len += ALIGNWORD_ (sizeof (int)) + 4;

  if (logclass & (LC_TRACE | LC_COMM))
    ls_syslog (LOG_DEBUG, "%s: The length of the job message is: <%d>", fname,
	       len);

  if ((request_buf = malloc (len)) == NULL)
    {
      ls_syslog (LOG_ERR, I18N_FUNC_FAIL_M, fname, "malloc");
      return (-1);
    }

  xdrmem_create (&xdrs, request_buf, len, XDR_ENCODE);
  initLSFHeader_ (&hdr);
  hdr.opCode = reqType;

  if (!xdr_encodeMsg (&xdrs, (char *) &statusReq, &hdr, xdr_statusReq, 0,
		      auth))
    {
      ls_syslog (LOG_ERR, I18N_JOB_FAIL_S_M,
		 fname, lsb_jobid2str (jp->jobSpecs.jobId), "xdr_statusReq");
      lsb_merr2 (I18N_FUNC_FAIL, fname, "xdr_statusReq");
      xdr_destroy (&xdrs);
      FREEUP (request_buf);
      relife ();
    }

  flags = CALL_SERVER_NO_HANDSHAKE;
  if (statusChan >= 0)
    flags |= CALL_SERVER_USE_SOCKET;

  if (reqType == BATCH_RUSAGE_JOB)
    flags |= CALL_SERVER_NO_WAIT_REPLY;

  if (logclass & LC_COMM)
    ls_syslog (LOG_DEBUG1, "%s: before call_server statusChan=%d flags=%d",
	       fname, statusChan, flags);

  cc = call_server (masterHost,
		    mbd_port,
		    request_buf,
		    XDR_GETPOS (&xdrs),
		    &reply_buf,
		    &hdr,
		    connTimeout, readTimeout, &statusChan, NULL, NULL, flags);
  if (cc < 0)
    {
      statusChan = -1;
      if (!equalHost_ (masterHost, lastHost))
	{
	  if (errno != EINTR)
	    ls_syslog (LOG_DEBUG,
		       "%s: Failed to reach mbatchd on host <%s> for job <%s>: %s",
		       fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId),
		       lsb_sysmsg ());
	  strcpy (lastHost, masterHost);
	}
      xdr_destroy (&xdrs);
      FREEUP (request_buf);
      failcnt++;
      return (-1);
    }
  else if (cc == 0)
    {

    }

  failcnt = 0;
  lastHost[0] = '\0';
  xdr_destroy (&xdrs);
  FREEUP (request_buf);

  if (cc)
    free (reply_buf);

  if (flags & CALL_SERVER_NO_WAIT_REPLY)
    {

      struct timeval timeval;

      timeval.tv_sec = 0;
      timeval.tv_usec = 0;

      if (rd_select_ (chanSock_ (statusChan), &timeval) == 0)
	{
	  jp->needReportRU = FALSE;
	  jp->lastStatusMbdTime = now;
	  return 0;
	}

      CLOSECD (statusChan);

      if (logclass & LC_COMM)
	ls_syslog (LOG_DEBUG1,
		   "%s: Job <%s> rd_select() failed, assume connection broken",
		   fname, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    }
  reply = hdr.opCode;
  switch (reply)
    {
    case LSBE_NO_ERROR:
    case LSBE_LOCK_JOB:
      jp->needReportRU = FALSE;
      jp->lastStatusMbdTime = now;
      if (reply == LSBE_LOCK_JOB)
	{
	  if (IS_SUSP (jp->jobSpecs.jStatus))
	    jp->jobSpecs.reasons |= SUSP_MBD_LOCK;
	  else
	    ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5204, "%s: Job <%s> is in status <%x> and mbatchd wants to lock it, ignored."),	/* catgets 5204 */
		       fname,
		       lsb_jobid2str (jp->jobSpecs.jobId),
		       jp->jobSpecs.jStatus);
	}
      return (0);
    case LSBE_NO_JOB:
      if (!IS_POST_FINISH (jp->jobSpecs.jStatus))
	{
	  ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5205, "%s: Job <%s> is forgotten by mbatchd on host <%s>, ignored."), fname, lsb_jobid2str (jp->jobSpecs.jobId), masterHost);	/* catgets 5205 */
	}

      jp->notReported = -INFINIT_INT;
      return (0);
    case LSBE_STOP_JOB:
      if (jobsig (jp, SIGSTOP, TRUE) < 0)
	SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_EXIT);
      else
	{
	  SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_USUSP);
	  jp->jobSpecs.reasons |= SUSP_USER_STOP;
	}
      return (-1);
    case LSBE_SBATCHD:
      ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5206, "%s: mbatchd on host <%s> doesn't think I'm configured as a batch server when I report the status for job <%s>"),	/* catgets 5206 */
		 fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    default:
      ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5207, "%s: Illegal reply code <%d> from mbatchd on host <%s> for job <%s>"),	/* catgets 5207 */
		 fname,
		 reply, masterHost, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    }
}
Ejemplo n.º 5
0
static int
shouldResume (struct hostLoad *loadV, struct jobCard *jp, int num)
{
    static char fname[] = "shouldResume";
    int i, j, numHosts = -1;
    int resume = TRUE, found;
    int lastReasons = jp->jobSpecs.reasons;
    int lastSubreasons = jp->jobSpecs.subreasons;
    struct hostLoad *loads = NULL;
    struct tclHostData *tclHostData = NULL;

    if (logclass & (LC_SCHED | LC_EXEC))
        ls_syslog(LOG_DEBUG3, "%s: job=%s; jStatus=%d; reasons=%x, subreasons=%d, numHosts=%d", fname, lsb_jobid2str(jp->jobSpecs.jobId), jp->jobSpecs.jStatus, jp->jobSpecs.reasons, jp->jobSpecs.subreasons, num);

    if (num <= 0)
        return FALSE;


    if (!(jp->jobSpecs.jStatus & JOB_STAT_SSUSP))
        return FALSE;



    if ((jp->jobSpecs.reasons & SUSP_QUEUE_WINDOW)
        || (jp->jobSpecs.reasons & SUSP_USER_STOP)
        || (jp->jobSpecs.reasons & SUSP_MBD_LOCK))
        return FALSE;




    loads = (struct hostLoad *)
			my_malloc (num * sizeof (struct hostLoad), fname);
    if (jp->resumeCondVal != NULL) {
        tclHostData = (struct tclHostData *)
		       my_malloc (num * sizeof (struct tclHostData), fname);
        for (i = 0; i < num; i++) {
            initTclHostData (&tclHostData[i]);
        }
    } else {
	tclHostData = NULL;
    }
    for (j = 0; j <jp->jobSpecs.numToHosts; j++) {
        if (j > 0 && !strcmp (jp->jobSpecs.toHosts[j],
                                    jp->jobSpecs.toHosts[j-1]))
            continue;
        numHosts++;
        found = FALSE;
        for (i = 0; i < num; i++) {
            if (equalHost_(jp->jobSpecs.toHosts[j], loadV[i].hostName)) {
                loads[numHosts] = loadV[i];
                if (tclHostData != NULL) {
                    if (getTclHostData (&loadV[i],
                                     &tclHostData[numHosts], FALSE) < 0) {
                        break;
                    }
                }
                found = TRUE;
                break;
            }
        }
        if (found != TRUE) {

            ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5706,
		"%s: Can not find load information for host <%s> to check resume condiftions for job <%s>"), fname, jp->jobSpecs.toHosts[j], lsb_jobid2str(jp->jobSpecs.jobId)); /* catgets 5706 */
            loads[numHosts].li = NULL;
            continue;
        }
    }
    if (numHosts >= 0) {
	numHosts++;
        resume = checkResumeByLoad (jp->jobSpecs.jobId, numHosts,
               jp->jobSpecs.thresholds, loads, &jp->jobSpecs.reasons,
               &jp->jobSpecs.subreasons,
               jp->jobSpecs.jAttrib, jp->resumeCondVal, tclHostData);

        FREEUP (loads);
        if (tclHostData != NULL) {
            for (i = 0; i < numHosts; i++)  {
                 FREEUP (tclHostData[i].resBitMaps);
                 FREEUP (tclHostData[i].loadIndex);
            }
            FREEUP (tclHostData);
        }
    } else {
	ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5707,
	    "%s: No valid load information is found for job <%s>"), fname, lsb_jobid2str(jp->jobSpecs.jobId)); /* catgets 5707 */
    }
    if ((logclass & (LC_SCHED | LC_EXEC)) && !resume)
        ls_syslog(LOG_DEBUG2, "%s: Can't resume job %s; reason=%x, subreasons=%d", fname, lsb_jobid2str(jp->jobSpecs.jobId), jp->jobSpecs.reasons, jp->jobSpecs.subreasons);

    if (!resume) {


	if ((jp->jobSpecs.reasons != lastReasons ||
	     (jp->jobSpecs.reasons == lastReasons &&
	      jp->jobSpecs.subreasons != lastSubreasons)) &&
	    (now - jp->lastStatusMbdTime > rusageUpdateRate * sbdSleepTime))
	    jp->notReported++;
    }

    return (resume);

}
Ejemplo n.º 6
0
static int
shouldStop (struct hostLoad *loadV,
	    struct jobCard *jobCard, int *reasons, int *subreasons, int num, int *stopmore)
{
    static char fname[] = "shouldStop";
    int i, numLoad = -1, j;
    struct hostLoad *load = NULL;
    static struct tclHostData tclHostData;
    static int first = TRUE;

    *reasons = 0;
    *subreasons = 0;


    if( jobCard->postJobStarted ) {
        return false;
    }


    if (jobCard->jobSpecs.jAttrib & JOB_URGENT_NOSTOP)
	return false;


    if (now - jobCard->windWarnTime < sbdSleepTime)
        return FALSE;


    if (!JOB_STARTED(jobCard))
        return FALSE;


    if (LS_ISUNAVAIL(loadV->status))
	return FALSE;
    if (num <= 0)
	return FALSE;


    for (i = 0; i <jobCard->jobSpecs.numToHosts && (*reasons) == 0; i++) {
        if (i > 0 && !strcmp (jobCard->jobSpecs.toHosts[i],
					     jobCard->jobSpecs.toHosts[i-1]))
            continue;
        numLoad++;
	load = NULL;
        for (j = 0; j < num; j ++) {
    	    if (equalHost_(jobCard->jobSpecs.toHosts[i], loadV[j].hostName)) {
	        load = &(loadV[j]);
	        break;
            }
        }
        if (load == NULL) {
	    ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5705,
		"%s: Can not find load information for host <%s>"), fname, jobCard->jobSpecs.toHosts[i]); /* catgets 5705 */
            return FALSE;
        }
        if (LS_ISLOCKEDU(load->status)
            && !(jobCard->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE)) {
            *reasons = SUSP_HOST_LOCK;
            *stopmore = TRUE;
        }
	else if (LS_ISLOCKEDM(load->status)) {
            *reasons = SUSP_HOST_LOCK_MASTER;
            *stopmore = TRUE;
        }
        else if (load->li[IT] <= jobCard->jobSpecs.thresholds.loadStop[numLoad][IT]
            && load->li[IT] != -INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][IT] != -INFINIT_LOAD) {
	    *reasons |= SUSP_LOAD_REASON;
            *subreasons = IT;
            *stopmore = TRUE;
        }
        else if (load->li[LS] >=
			  jobCard->jobSpecs.thresholds.loadStop[numLoad][LS]
            && load->li[LS] != INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][LS]
						      != INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = LS;
            *stopmore = TRUE;
        }
        else if (load->li[UT] >=
			 jobCard->jobSpecs.thresholds.loadStop[numLoad][UT]
            && load->li[UT] != INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][UT] !=
							   INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = UT;
        }
        else if(load->li[PG] >=
		      jobCard->jobSpecs.thresholds.loadStop[numLoad][PG]
            && load->li[PG] != INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][PG]
						    != INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = PG;
        }
        else if(load->li[IO] >=
		     jobCard->jobSpecs.thresholds.loadStop[numLoad][IO]
            && load->li[IO] != INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][IO]
						      != INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = IO;
        }
        else if(load->li[MEM]
			 <= jobCard->jobSpecs.thresholds.loadStop[numLoad][MEM]
            && load->li[MEM] != -INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][MEM]
						      != -INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = MEM;
        }

        else if(load->li[SWP]
			 <= jobCard->jobSpecs.thresholds.loadStop[numLoad][SWP]
            && load->li[SWP] != -INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][SWP]
						      != -INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = SWP;
        }
        else if(load->li[TMP]
			 <= jobCard->jobSpecs.thresholds.loadStop[numLoad][TMP]
            && load->li[TMP] != -INFINIT_LOAD
            && jobCard->jobSpecs.thresholds.loadStop[numLoad][TMP]
						      != -INFINIT_LOAD) {
            *reasons |= SUSP_LOAD_REASON;
            *subreasons = TMP;
        }

        for (j = R15S; !(*reasons) && j <= R15M; j++)
	    if ((load->li[j] != INFINIT_LOAD)
	        && (jobCard->jobSpecs.thresholds.loadStop[numLoad][j]
							 != INFINIT_LOAD)
	        && (load->li[j]
			>= jobCard->jobSpecs.thresholds.loadStop[numLoad][j])) {
	        *reasons |= SUSP_LOAD_REASON;
                *subreasons = j;
                break;
	    }


        for (j = MEM + 1; !(*reasons) &&
               j < MIN(allLsInfo->numIndx, jobCard->jobSpecs.thresholds.nIdx);
	              j++) {
            if (load->li[j] >= INFINIT_LOAD || load->li[j] <= -INFINIT_LOAD
                || jobCard->jobSpecs.thresholds.loadStop[numLoad][j]
							 >= INFINIT_LOAD
                || jobCard->jobSpecs.thresholds.loadStop[numLoad][j]
							 <= -INFINIT_LOAD) {
                continue;
            }
	    if (allLsInfo->resTable[j].orderType == INCR) {
	        if (load->li[j]
		       >= jobCard->jobSpecs.thresholds.loadStop[numLoad][j]) {
		    *reasons |= SUSP_LOAD_REASON;
                    *subreasons = j;
		    break;
                }
	    } else {
	        if (load->li[j]
		      <= jobCard->jobSpecs.thresholds.loadStop[numLoad][j]) {
		    *reasons |= SUSP_LOAD_REASON;
                    *subreasons = j;
		    break;
                }
	    }
        }

        if (!(*reasons) && jobCard->stopCondVal != NULL) {
            int returnCode;
            if (first == TRUE) {
                initTclHostData (&tclHostData);
                returnCode = getTclHostData (load, &tclHostData, FALSE);
                first = FALSE;
            } else {
                returnCode = getTclHostData (load, &tclHostData, TRUE);
            }
            if (returnCode >= 0
		     && evalResReq (jobCard->stopCondVal->selectStr,
    	       	        &tclHostData, DFT_FROMTYPE) == 1) {
        	*reasons |= SUSP_QUE_STOP_COND;
		break;
            }
        }
    }


    if (! (*reasons))
	return FALSE;


    if (LS_ISLOCKEDU(load->status) || LS_ISLOCKEDM(load->status)) {
	return TRUE;
    } else if (shouldStop1 (load)) {
        if (logclass & (LC_SCHED | LC_EXEC))
            ls_syslog (LOG_DEBUG2,
			"%s: Should stop job %s; reason=%x, subreasons=%d",
                        fname, lsb_jobid2str(jobCard->jobSpecs.jobId),
			*reasons, *subreasons);

        return TRUE;
    }
    return FALSE;

}
Ejemplo n.º 7
0
static int
getTclHostData (struct hostLoad *load, struct tclHostData *tclHostData,
                                                              int freeMem)
{

    static char fname[] = "getTclHostData";
    static time_t lastUpdHostInfo = 0;
    static int numLsfHosts = 0;
    static struct hostInfo *hostInfo = NULL;
    struct hostInfo *temp;
    int i, num;

    if (now - lastUpdHostInfo > 10 * 60) {

        if ((temp = ls_gethostinfo("-:server", &num, 0,
                    0, LOCAL_ONLY)) == NULL) {
            ls_syslog(LOG_ERR, I18N_FUNC_FAIL_MM, fname, "ls_gethostinfo");
	    return -1;
        }
        if (hostInfo != NULL) {
	    freeLsfHostInfo (hostInfo, numLsfHosts);
	    FREEUP (hostInfo);
	    numLsfHosts = 0;
        }

        hostInfo = (struct hostInfo *) my_malloc
                (num * sizeof (struct hostInfo), fname);
        for (i = 0; i < num; i++) {
            copyLsfHostInfo (&hostInfo[i], &temp[i]);

	    if ( logclass & LC_TRACE) {
	        ls_syslog(LOG_DEBUG2, "%s: host <%s> ncpus <%d> maxmem <%u> maxswp <%u> maxtmp <%u> ndisk <%d>",
		    fname, hostInfo[i].hostName, hostInfo[i].maxCpus,
	            hostInfo[i].maxMem, hostInfo[i].maxSwap,
	            hostInfo[i].maxTmp, hostInfo[i].nDisks);
	    }
        }
        numLsfHosts = num;
	lastUpdHostInfo = now;

    }
    if (freeMem == TRUE)  {
        FREEUP (tclHostData->resBitMaps);
        FREEUP (tclHostData->loadIndex);
    }
    for (i = 0; i < numLsfHosts; i++) {
	if (equalHost_(hostInfo[i].hostName, load->hostName))
	    break;
    }
    if (i == numLsfHosts) {
        ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5716,
	    "%s: Host <%s> is not used by the batch system"), /* catgets 5716 */
	    fname, load->hostName);
        return -1;
    }
    tclHostData->hostName = hostInfo[i].hostName;
    tclHostData->hostType = hostInfo[i].hostType;
    tclHostData->hostModel = hostInfo[i].hostModel;
    tclHostData->maxCpus = hostInfo[i].maxCpus;
    tclHostData->maxMem = hostInfo[i].maxMem;
    tclHostData->maxSwap = hostInfo[i].maxSwap;
    tclHostData->maxTmp = hostInfo[i].maxTmp;
    tclHostData->nDisks = hostInfo[i].nDisks;
    tclHostData->hostInactivityCount = 0;
    tclHostData->rexPriority = 0;
    tclHostData->fromHostType = tclHostData->hostType;
    tclHostData->fromHostModel = tclHostData->hostModel;
    tclHostData->cpuFactor = hostInfo[i].cpuFactor;
    tclHostData->ignDedicatedResource = FALSE;
    tclHostData->resBitMaps = getResMaps(hostInfo[i].nRes,
					       hostInfo[i].resources);
    tclHostData->DResBitMaps = NULL;
    tclHostData->flag = TCL_CHECK_EXPRESSION;
    tclHostData->status = load->status;
    tclHostData->loadIndex
       = (float *) my_malloc (allLsInfo->numIndx * sizeof(float), fname);
    tclHostData->loadIndex[R15S] = (hostInfo[i].cpuFactor != 0.0)?
		((load->li[R15S] + 1.0)/hostInfo[i].cpuFactor):load->li[R15S];
    tclHostData->loadIndex[R1M] = (hostInfo[i].cpuFactor != 0.0)?
		((load->li[R1M] + 1.0)/hostInfo[i].cpuFactor):load->li[R1M];
    tclHostData->loadIndex[R15M] = (hostInfo[i].cpuFactor != 0.0)?
		((load->li[R15M] + 1.0)/hostInfo[i].cpuFactor):load->li[R15M];

    for (i = 3; i < allLsInfo->numIndx; i++)
        tclHostData->loadIndex[i] = load->li[i];

    return 0;

}