Beispiel #1
0
struct jData *
copyJData(struct jData *jp)
{
    struct jData *jData;
    struct rqHistory *reqHistory;
    int          i;

    jData = initJData(jp->shared);

    if (jData->jobSpoolDir) {
        FREEUP(jData->jobSpoolDir);
    }
    reqHistory = jData->reqHistory;
    memcpy((char *)jData, (char *)jp, sizeof(struct jData));
    jData->reqHistory = reqHistory;
    jData->numRef = 0;
    jData->nextJob = NULL;

    jData->userName = safeSave(jp->userName);
    jData->schedHost = safeSave(jp->schedHost);
    jData->uPtr = getUserData(jData->userName);

    if (jp->askedPtr) {
        jData->askedPtr = (struct askedHost *) my_calloc (jp->numAskedPtr,
                                                          sizeof(struct askedHost), "copyJData");
        for (i = 0; i < jp->numAskedPtr; i++) {
            jData->askedPtr[i].hData = jp->askedPtr[i].hData;
            jData->askedPtr[i].priority = jp->askedPtr[i].priority;
        }
    }
    if (jp->jobSpoolDir) {
        jData->jobSpoolDir = safeSave(jp->jobSpoolDir);
    }
    return(jData);
}
Beispiel #2
0
int
checkQueues (struct infoReq *queueInfoReqPtr,
	     struct queueInfoReply *queueInfoReplyPtr)
{
  static char fname[] = "checkQueues()";
  struct qData *qp;
  struct qData *next;
  struct queueInfoEnt *qRep = NULL;
  int i;
  int j;
  int checkRet;
  int allQ = FALSE;
  int defaultQ = FALSE;
  int found = FALSE;
  char *checkUsers = NULL;
  char *checkHosts = NULL;
  float *cpuFactor;

  queueInfoReplyPtr->numQueues = 0;
  queueInfoReplyPtr->nIdx = allLsInfo->numIndx;

  if (queueInfoReqPtr->options & ALL_QUEUE)
    {
      queueInfoReqPtr->numNames = 1;
      allQ = TRUE;
    }
  else if (queueInfoReqPtr->options & DFT_QUEUE)
    {

      queueInfoReqPtr->numNames = 1;
      defaultQ = TRUE;
    }

  if ((checkRet = getCheckList (queueInfoReqPtr, &checkHosts, &checkUsers))
      != LSBE_NO_ERROR)
    return (checkRet);

  for (j = 0; j < queueInfoReqPtr->numNames; j++)
    {
      for (qp = qDataList->back; (qp != qDataList); qp = next)
	{
	  next = qp->back;

	  if (strcmp (qp->queue, LOST_AND_FOUND) == 0 && qp->numJobs == 0)
	    {
	      continue;
	    }
	  if (!allQ && !defaultQ
	      && strcmp (qp->queue, queueInfoReqPtr->names[j]) != 0)
	    continue;

	  if (!allQ && defaultQ && !isDefQueue (qp->queue))
	    continue;

	  found = TRUE;

	  if ((checkRet = checkHU (checkHosts, checkUsers, qp))
	      != LSBE_NO_ERROR)
	    continue;

	  for (i = 0; i < queueInfoReplyPtr->numQueues; i++)
	    {
	      if (strcmp (qp->queue, queueInfoReplyPtr->queues[i].queue) == 0)
		{
		  if (strcmp (qp->queue, LOST_AND_FOUND) != 0)
		    {

		      break;
		    }
		  else
		    {

		      queueInfoReplyPtr->queues[i].numJobs += qp->numJobs;
		      queueInfoReplyPtr->queues[i].numPEND += qp->numPEND;
		      queueInfoReplyPtr->queues[i].numRUN += qp->numRUN;
		      queueInfoReplyPtr->queues[i].numSSUSP += qp->numSSUSP;
		      queueInfoReplyPtr->queues[i].numUSUSP += qp->numUSUSP;
		      break;
		    }
		}
	    }
	  if (i < queueInfoReplyPtr->numQueues)
	    continue;

	  qRep = &(queueInfoReplyPtr->queues[queueInfoReplyPtr->numQueues]);

	  qRep->queue = qp->queue;
	  qRep->description = qp->description;
	  qRep->schedDelay = qp->schedDelay;
	  qRep->mig = (qp->mig != INFINIT_INT) ? qp->mig / 60 : INFINIT_INT;

	  if (qp->acceptIntvl == DEF_ACCEPT_INTVL
	      || qp->acceptIntvl == INFINIT_INT)
	    qRep->acceptIntvl = INFINIT_INT;
	  else
	    qRep->acceptIntvl = qp->acceptIntvl * msleeptime;


	  if (qp->windows)
	    qRep->windows = safeSave (qp->windows);
	  else
	    qRep->windows = safeSave (" ");
	  if (qp->windowsD)
	    qRep->windowsD = safeSave (qp->windowsD);
	  else
	    qRep->windowsD = safeSave (" ");

	  if (qp->uGPtr)
	    {
	      qRep->userList = getGroupMembers (qp->uGPtr, FALSE);
	    }
	  else
	    {
	      qRep->userList = safeSave (" ");
	    }
	  if (qp->hostList)
	    {



	      char *word = NULL, *hostList = NULL;
	      int len = 0;
	      struct gData *gp = NULL;

	      hostList = qp->hostList;
	      while ((hostList = strstr (hostList, " ")) != NULL)
		{
		  hostList++;
		  len++;
		}

	      qRep->hostList =
		(char *) calloc ((strlen (qp->hostList) + len * 2 + 2),
				 sizeof (char));
	      if (qRep->hostList == NULL)
		{
		  ls_syslog (LOG_ERR, I18N_FUNC_D_FAIL_M, fname, "calloc",
			     (strlen (qp->hostList) + len * 2 +
			      2) * sizeof (char));
		  return (LSBE_NO_MEM);
		}
	      hostList = qp->hostList;
	      while ((word = getNextWord_ (&hostList)) != NULL)
		{
		  strcat (qRep->hostList, word);
		  if ((gp = getHGrpData (word)) != NULL)
		    {
		      strcat (qRep->hostList, "/");
		    }
		  strcat (qRep->hostList, " ");
		}
	    }
	  else
	    {
	      qRep->hostList = safeSave (" ");
	    }
	  qRep->priority = qp->priority;
	  qRep->nice = qp->nice;
	  qRep->userJobLimit = qp->uJobLimit;
	  if (qp->pJobLimit >= INFINIT_FLOAT)
	    qRep->procJobLimit = INFINIT_FLOAT;
	  else
	    qRep->procJobLimit = qp->pJobLimit;
	  qRep->hostJobLimit = qp->hJobLimit;
	  qRep->maxJobs = qp->maxJobs;
	  qRep->numJobs = qp->numJobs;
	  qRep->numPEND = qp->numPEND;
	  qRep->numRUN = qp->numRUN;
	  qRep->numSSUSP = qp->numSSUSP;
	  qRep->numUSUSP = qp->numUSUSP;
	  qRep->numRESERVE = qp->numRESERVE;

	  qRep->qAttrib = qp->qAttrib;
	  qRep->qStatus = qp->qStatus;
	  for (i = 0; i < LSF_RLIM_NLIMITS; i++)
	    {
	      qRep->rLimits[i] = qp->rLimits[i];
	      qRep->defLimits[i] = qp->defLimits[i];
	    }
	  if (qp->hostSpec == NULL)
	    qRep->hostSpec = safeSave (" ");
	  else
	    {
	      qRep->hostSpec = safeSave (qp->hostSpec);
	      if ((cpuFactor = getModelFactor (qp->hostSpec)) == NULL)
		{
		  if ((cpuFactor = getHostFactor (qp->hostSpec)) == NULL)
		    {
		      float one = 1.0;

		      ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 7500, "%s: Cannot find cpu factor for hostSpec <%s> in queue <%s>; cpuFactor is set to 1.0"),	/* catgets 7500 */
				 fname, qp->hostSpec, qp->queue);
		      cpuFactor = &one;
		    }
		}
	      if (cpuFactor != NULL)
		{
		  if (qRep->rLimits[LSF_RLIMIT_CPU] > 0)
		    qRep->rLimits[LSF_RLIMIT_CPU] /= *cpuFactor;
		  if (qRep->rLimits[LSF_RLIMIT_RUN] > 0)
		    qRep->rLimits[LSF_RLIMIT_RUN] /= *cpuFactor;
		  if (qRep->defLimits[LSF_RLIMIT_CPU] > 0)
		    qRep->defLimits[LSF_RLIMIT_CPU] /= *cpuFactor;
		  if (qRep->defLimits[LSF_RLIMIT_RUN] > 0)
		    qRep->defLimits[LSF_RLIMIT_RUN] /= *cpuFactor;
		}
	    }
	  if (qp->defaultHostSpec)
	    qRep->defaultHostSpec = safeSave (qp->defaultHostSpec);
	  else
	    qRep->defaultHostSpec = safeSave (" ");
	  qRep->loadSched = qp->loadSched;
	  qRep->loadStop = qp->loadStop;

	  qRep->procLimit = qp->procLimit;
	  qRep->minProcLimit = qp->minProcLimit;
	  qRep->defProcLimit = qp->defProcLimit;
	  if (qp->nAdmins > 0)
	    qRep->admins = safeSave (qp->admins);
	  else
	    qRep->admins = safeSave (" ");

	  if (qp->preCmd)
	    qRep->preCmd = safeSave (qp->preCmd);
	  else
	    qRep->preCmd = safeSave (" ");

	  if (qp->prepostUsername)
	    {
	      qRep->prepostUsername = safeSave (qp->prepostUsername);
	    }
	  else
	    {
	      qRep->prepostUsername = safeSave (" ");
	    }

	  qRep->chkpntPeriod = qp->chkpntPeriod;
	  if (qp->chkpntDir)
	    qRep->chkpntDir = safeSave (qp->chkpntDir);
	  else
	    qRep->chkpntDir = safeSave (" ");

	  if (qp->postCmd)
	    qRep->postCmd = safeSave (qp->postCmd);
	  else
	    qRep->postCmd = safeSave (" ");
	  if (qp->requeueEValues)
	    qRep->requeueEValues = safeSave (qp->requeueEValues);
	  else
	    qRep->requeueEValues = safeSave (" ");

	  if (qp->resReq)
	    qRep->resReq = safeSave (qp->resReq);
	  else
	    qRep->resReq = safeSave (" ");
	  qRep->slotHoldTime = qp->slotHoldTime;

	  if (qp->resumeCond)
	    qRep->resumeCond = safeSave (qp->resumeCond);
	  else
	    qRep->resumeCond = safeSave (" ");

	  if (qp->stopCond)
	    qRep->stopCond = safeSave (qp->stopCond);
	  else
	    qRep->stopCond = safeSave (" ");

	  if (qp->jobStarter)
	    qRep->jobStarter = safeSave (qp->jobStarter);
	  else
	    qRep->jobStarter = safeSave (" ");

	  if (qp->suspendActCmd)
	    {
	      if (strcmp (qp->suspendActCmd, "SIG_CHKPNT") == 0)
		qRep->suspendActCmd = safeSave ("CHKPNT");
	      else
		qRep->suspendActCmd = safeSave (qp->suspendActCmd);
	    }
	  else
	    qRep->suspendActCmd = safeSave (" ");

	  if (qp->resumeActCmd)
	    qRep->resumeActCmd = safeSave (qp->resumeActCmd);
	  else
	    qRep->resumeActCmd = safeSave (" ");

	  if (qp->terminateActCmd)
	    {
	      if (strcmp (qp->terminateActCmd, "SIG_CHKPNT") == 0)
		qRep->terminateActCmd = safeSave ("CHKPNT");
	      else
		qRep->terminateActCmd = safeSave (qp->terminateActCmd);
	    }
	  else
	    qRep->terminateActCmd = safeSave (" ");

	  for (i = 0; i < LSB_SIG_NUM; i++)
	    qRep->sigMap[i] = qp->sigMap[i];

	  queueInfoReplyPtr->numQueues++;

	}

      if (!allQ && !defaultQ && !found)
	{
	  if (queueInfoReplyPtr->numQueues > 0)
	    freeQueueInfoReply (queueInfoReplyPtr, "freeAll");
	  queueInfoReplyPtr->badQueue = j;
	  queueInfoReplyPtr->numQueues = 0;
	  FREEUP (checkUsers);
	  FREEUP (checkHosts);
	  return (LSBE_BAD_QUEUE);
	}

      found = FALSE;
      if (allQ || defaultQ)
	break;
    }

  FREEUP (checkUsers);
  FREEUP (checkHosts);
  if (queueInfoReplyPtr->numQueues == 0)
    {
      return (checkRet);
    }

  return (LSBE_NO_ERROR);

}
Beispiel #3
0
static int
getCheckList (struct infoReq *qInfoReq, char **hostList, char **userList)
{
  char *sp;
  int numNames;
  struct hostent *hp;
  struct gData *gp;
  struct passwd *pp;
  char **allHosts;
  int numAllHosts, i;

  *hostList = NULL;
  *userList = NULL;
  numNames = qInfoReq->numNames;

  if (qInfoReq->options & CHECK_USER)
    {
      sp = qInfoReq->names[numNames];
      ++numNames;

      if (strcmp (sp, "all") == 0)
	*userList = safeSave (sp);
      else if ((pp = getpwlsfuser_ (sp)) != NULL)
	{
	  if (!isManager (sp) && pp->pw_uid != 0)
	    {

	      *userList = safeSave (sp);
	    }
	}
      else if ((gp = getUGrpData (sp)) != NULL)
	*userList = getGroupMembers (gp, TRUE);
      else
	*userList = safeSave (sp);
    }

  if (qInfoReq->options & CHECK_HOST)
    {
      sp = qInfoReq->names[numNames];

      if (strcmp (sp, "all") == 0)
	*hostList = safeSave (sp);
      else if ((hp = Gethostbyname_ (sp)) != NULL)
	*hostList = safeSave (hp->h_name);
      else if ((gp = getHGrpData (sp)) != NULL)
	*hostList = getGroupMembers (gp, TRUE);
      else
	return (LSBE_BAD_HOST);


      if (hostList == NULL)
	{
	  return (LSBE_BAD_HOST);
	}
      if (strcmp (*hostList, "all") == 0)
	{
	  FREEUP (*hostList);
	  if ((numAllHosts = getLsbHostNames (&allHosts)) <= 0)
	    {
	      ls_syslog (LOG_ERR, I18N (7512, "getCheckList: Unable to obtain host list"));	/* catgets 7512 */
	      return (LSBE_BAD_HOST);
	    }
	  (*hostList) =
	    (char *) my_malloc (numAllHosts * MAX_LSB_NAME_LEN,
				"getCheckList");
	  if (*hostList == NULL)
	    {
	      ls_syslog (LOG_ERR, I18N_FUNC_FAIL, "getCheckList",
			 "my_malloc");
	      return (LSBE_BAD_HOST);
	    }
	  (*hostList)[0] = '\0';
	  for (i = 0; i < numAllHosts; i++)
	    {

	      strcat (*hostList, allHosts[i]);
	      if (i < numAllHosts - 1)
		{
		  strcat (*hostList, " ");
		}
	    }
	}
    }

  return (LSBE_NO_ERROR);

}
Beispiel #4
0
void
do_switchjob(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char        fname[] = "do_switchjob()";
    char               reply_buf[MSGSIZE];
    XDR                xdrs2;
    struct jobSpecs    jobSpecs;
    struct jobReply    jobReply;
    int                i;
    sbdReplyType       reply;
    char               *cp;
    char               *word;
    char               found = FALSE;
    struct LSFHeader   replyHdr;
    char               *replyStruct;
    struct jobCard     *jp;
    struct lsfAuth     *auth = NULL;

    memset(&jobReply, 0, sizeof(struct jobReply));

    if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) {
        reply = ERR_BAD_REQ;
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs");
        goto sendReply;
    }
    for (jp = jobQueHead->back; jp != jobQueHead; jp = jp->back) {
        if (jp->jobSpecs.jobId == jobSpecs.jobId) {
            found = TRUE;
            break;
        }
    }
    if (!found) {
        reply = ERR_NO_JOB;
        ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5807,
                                         "%s: mbatchd trying to switch a non-existent job <%s>"), fname, lsb_jobid2str(jobSpecs.jobId)); /* catgets 5807 */
        goto sendReply;
    }
    if (jp->jobSpecs.jStatus & (JOB_STAT_DONE | JOB_STAT_EXIT)) {
        reply = ERR_JOB_FINISH;
        goto sendReply;
    }


    cp = jobSpecs.windows;
    freeWeek(jp->week);
    while ((word = getNextWord_(&cp)) != NULL) {
        if (addWindow(word, jp->week, "switchJob jobSpecs") < 0) {
            ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S_M, fname,
                      lsb_jobid2str(jp->jobSpecs.jobId), "addWindow", word);
            freeWeek(jp->week);
            reply = ERR_BAD_REQ;
            goto sendReply;
        }
    }
    jp->windEdge = now;


    if ((jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE)
	&& !(jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE))
	for (i = 0; i < jp->jobSpecs.numToHosts; i++)
	    if (unlockHost_(jp->jobSpecs.toHosts[i]) < 0
		&& lserrno != LSE_LIM_NLOCKED)
		ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S_MM, fname,
                          lsb_jobid2str(jp->jobSpecs.jobId), "unlockHost_", jp->jobSpecs.toHosts[i]);



    strcpy(jp->jobSpecs.queue, jobSpecs.queue);
    strcpy(jp->jobSpecs.windows, jobSpecs.windows);
    jp->jobSpecs.priority = jobSpecs.priority;
    jp->jobSpecs.nice = jobSpecs.nice;
    jp->jobSpecs.jAttrib = jobSpecs.jAttrib;

    freeThresholds (&jp->jobSpecs.thresholds);
    saveThresholds (&jp->jobSpecs, &jobSpecs.thresholds);


    memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   sizeof(struct lsfLimit));


    strcpy (jp->jobSpecs.requeueEValues, jobSpecs.requeueEValues);
    strcpy (jp->jobSpecs.resumeCond, jobSpecs.resumeCond);
    strcpy (jp->jobSpecs.stopCond, jobSpecs.stopCond);

    lsbFreeResVal (&jp->resumeCondVal);
    if (jobSpecs.resumeCond && jobSpecs.resumeCond[0] != '\0') {
        if ((jp->resumeCondVal = checkThresholdCond (jobSpecs.resumeCond))
            == NULL)
            ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S, fname,
		      lsb_jobid2str(jp->jobSpecs.jobId),
		      "checkThresholdCond", jobSpecs.resumeCond);
    }

    lsbFreeResVal (&jp->stopCondVal);
    if (jobSpecs.stopCond && jobSpecs.stopCond[0] != '\0') {
        if ((jp->stopCondVal = checkThresholdCond (jobSpecs.stopCond))
            == NULL)
            ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S, fname,
		      lsb_jobid2str(jp->jobSpecs.jobId),
		      "checkThresholdCond", jobSpecs.stopCond);
    }

    if (jobSpecs.options & SUB_LOGIN_SHELL) {
	FREEUP (jp->jobSpecs.loginShell);
	jp->jobSpecs.loginShell = safeSave (jobSpecs.loginShell);
    }

    strcpy (jp->jobSpecs.suspendActCmd, jobSpecs.suspendActCmd);
    strcpy (jp->jobSpecs.resumeActCmd, jobSpecs.resumeActCmd);
    strcpy (jp->jobSpecs.terminateActCmd, jobSpecs.terminateActCmd);

    setRunLimit (jp, FALSE);
    offList ((struct listEntry *)jp);
    inJobLink (jp);

    if (reniceJob(jp) < 0)
	ls_syslog(LOG_DEBUG, "%s: renice job <%s> failed",
		  fname, lsb_jobid2str(jp->jobSpecs.jobId));

    reply = ERR_NO_ERROR;
    jobReply.jobId = jp->jobSpecs.jobId;
    jobReply.jobPid = jp->jobSpecs.jobPid;
    jobReply.jobPGid = jp->jobSpecs.jobPGid;
    jobReply.jStatus = jp->jobSpecs.jStatus;

sendReply:
    xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr);
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&replyHdr);
    replyHdr.opCode = reply;
    if (reply == ERR_NO_ERROR)
	replyStruct = (char *) &jobReply;
    else {
	replyStruct = (char *) 0;
    }

    if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId),
		  "xdr_jobReply");
	relife();
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId), "chanWrite_");
    }

    xdr_destroy(&xdrs2);

    return;

}