コード例 #1
0
ファイル: comment.c プロジェクト: CESNET/torque
/*
 * Record the reason that the current candidate job cannot currently run.
 * When it is decided that the job will remain queued, place the reason
 * string in the comment field of the job structure.
 */
void
schd_comment_job(Job *job, char *reason, int optional)
  {
  char   *id = "schd_comment_job";
  char   *msg_ptr;
  char   *old_msg;

  /*
   * If the 'optional' argument is true, then this comment is optional.
   * Do not bother commenting this job if this is not the first time it
   * has been seen, and it has been recently modified (hopefully it was
   * a comment change).  If there is no comment for the job, comment it
   * this time.
   */

  if (optional &&
      (!schd_FirstRun) &&
      (job->comment != NULL) &&
      !(job->flags & JFLAGS_FIRST_SEEN) &&
      (MIN_COMMENT_AGE && ((schd_TimeNow - job->mtime) < MIN_COMMENT_AGE)))
    {
    return;
    }

  if (reason == NULL)
    reason = "";

  old_msg = job->comment;

  /* If there is no old message, or they are different, set it. */
  if ((old_msg == NULL) || (strcmp(reason, old_msg) != 0))
    {

    msg_ptr = schd_strdup(reason);

    /* Alter PBS' view of the job. */
    schd_alterjob(connector, job, ATTR_comment, msg_ptr, NULL);

    /* Copy the new comment into the job field. */

    if (job->comment)
      free(job->comment);

    job->comment = msg_ptr;

    if (job->comment == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(job->comment)");
      return;
      }
    }

  return;
  }
コード例 #2
0
ファイル: how_many.c プロジェクト: Johnlihj/torque
int
schd_how_many(char *str, char *state)
  {
  /* char   *id = "how_many"; */
  char   *avalue, *ptr1;
  int     intval = 0;

  /* Make a duplicate of the string since strtok() is destructive. */
  avalue = schd_strdup(str);

  if (avalue == NULL)
    return (0); /* XXX - return an error (malloc) here? */

  /* Search for a token that matches the requested state */
  for (ptr1 = strtok(avalue, " "); ptr1 != NULL; ptr1 = strtok(NULL, " "))
    if (!strncmp(state, ptr1, strlen(state)))
      break;

  if (ptr1 == NULL)
    goto free_and_exit; /* XXX return an error */

  /* Locate the number after the colon */
  ptr1 = strchr(ptr1, ':');

  if (ptr1 == NULL)
    goto free_and_exit; /* XXX return an error? */

  ptr1++;

  intval = atoi(ptr1);

free_and_exit:
  free(avalue);  /* Free the memory schd_strdup() allocated. */

  return (intval);
  }
コード例 #3
0
ファイル: fair_access.c プロジェクト: AlbertDeFusco/torque
/* extract and store the Fair Access Directives from a line just read from
 * the scheduler's configuration file.
 */
int
arg_to_fairshare(char *arg, char *sep, FairAccessList **fairacl_ptr)
  {
  int     num = 0, max_A = 0, max_B = 0, fieldcnt, found;
  char   *id = "arg_to_fairshare";
  char   *field, aclname[30];
  AccessEntry *new_ae, *FAptr = NULL, *fptr = NULL;

  /*
   * Multiple lines may be used to add entries to the FairACL list. Find
   * the tail of the passed-in list (if there is one), and assign the
   * FAptr to the tail element.  Later, the new element will be hung off
   * FAptr's next field (or FAptr will be set to it.)
   */

  if (*fairacl_ptr == NULL)
    {
    *fairacl_ptr = (FairAccessList *)malloc(sizeof(FairAccessList));
    (*fairacl_ptr)->next = NULL;
    (*fairacl_ptr)->entry = NULL;
    }

  FAptr = (*fairacl_ptr)->entry;

  /* first we process the configuration line passed in to use, saving
   * the important bits for later; at this point we don't know if we
   * have a new queue entry, or simply an addition to an existing
   * queue's FairAccess list.
   */
  fieldcnt = 0;

  for (field = strtok(arg, sep); field != NULL; field = strtok(NULL, sep))
    {
    fieldcnt++;

    if (fieldcnt == 1 && /* first field on FAIR_SHARE line */
        (strcmp(field, "QUEUE")))
      {
      sprintf(log_buffer,
              "Unrecognized FAIR_SHARE directive: %s", field);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      return(-1);
      }
    else if (fieldcnt == 2)  /* Queue name */
      strcpy(aclname, field);

    else if (fieldcnt == 3)  /* Queue max shares (%) */
      max_A = atoi(field);

    else if (fieldcnt == 4)  /* Queue max running jobs (% CPUs) */
      max_B = atoi(field);
    }

  if (fieldcnt != 4)
    {
    sprintf(log_buffer,
            "Incomplete FAIR_SHARE directive: %s", arg);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    return(-1);
    }

  if (max_A < 0 || max_A > 100)
    {
    sprintf(log_buffer,
            "FAIR_SHARE share percentage (%d) should be between 1 and 100",
            max_A);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    return(-1);
    }

  if (max_B < 0 || max_B > 100)
    {
    sprintf(log_buffer,
            "FAIR_SHARE running job percentage (%d) should be between 1 and 100",
            max_B);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    return(-1);
    }

  found = 0;

  if (FAptr)
    {
    /* search for an existing entry for this queue  */
    for (fptr = FAptr; fptr != NULL; fptr = fptr->next)
      {
      if (!strcmp(fptr->name, aclname))
        {
        found = 1;
        break; /* need to add a new ACL on the entry */
        }
      }

    if (!found)
      {
      for (fptr = FAptr; fptr->next != NULL; fptr = fptr->next)
        /* Walk the list, looking for last element. */;
      }
    }

  /* if we found the entry, then we need add the new info to it;
   * otherwise, we first need to create a new entry struct, and
   * then add to it...
   */

  if (!found)
    {
    new_ae = (AccessEntry *)malloc(sizeof(AccessEntry));

    if (new_ae == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "malloc(newAccessEntry)");
      return(-1);
      }

    memset(new_ae, 0, sizeof(AccessEntry));

    if (FAptr == NULL)
      {
      (*fairacl_ptr)->entry = new_ae;
      fptr = (*fairacl_ptr)->entry;
      }
    else
      {
      fptr->next = new_ae;
      fptr = new_ae;
      }

    new_ae->name = schd_strdup(aclname);
    }

  /* Finially we get to fill it in with the info we saved earlier */
  new_ae->past_ndays   = 0;

  new_ae->past_percent = 0.0;

  new_ae->max_percent  = (double)max_A;

  new_ae->max_running  = max_B;

  new_ae->today_max    = 0.0;

  new_ae->today_usage  = 0.0;

  new_ae->default_mem  = (size_t)0;

  return (num);
  }
コード例 #4
0
ファイル: getrsrcs.c プロジェクト: CESNET/torque
/*
 * Find an entry for the resources for the requested host in the list of
 * existing resources, or create a new one for that host and return it.
 */
Resources *
schd_get_resources(char *exechost)
{
    char   *id = "schd_get_resources";
    Resources *rptr, *new_rsrcs;
    int     rm;

    char   *response = NULL;
    int     badreply   = 0;
    int     cpus_avail = 0;
    size_t  pmem_avail = 0;

    char    hpm_ctl[64];

    struct sigaction act, oact;

    unsigned int remain; /* Time remaining in any old alarm(). */
    time_t then;  /* When this alarm() was started. */

#ifdef NODEMASK
    Bitfield cpy;
    int     i, j;
#endif /* NODEMASK */

    /*
     * Check for a local copy of the resources being available already.
     * If so, just return a reference to that Resources structure.
     */

    if (schd_RsrcsList != NULL)
    {
        for (rptr = schd_RsrcsList; rptr != NULL; rptr = rptr->next)
            if (strcmp(rptr->exechost, exechost) == 0)
                return (rptr);
    }

    schd_timestamp("get_rsrcs");

    /*
     * No cached resource information for 'exechost'.  Need to query the
     * host for its information.
     */

    if ((new_rsrcs = (Resources *)malloc(sizeof(Resources))) == NULL)
    {
        (void)sprintf(log_buffer, "Unable to alloc space for Resources.");
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));

        return (NULL); /* Can't get the information - nowhere to store it. */
    }

    memset((void *)new_rsrcs, 0, sizeof(Resources));

    act.sa_flags = 0;
    act.sa_handler = connect_interrupt;
    sigemptyset(&act.sa_mask);
    remain = 0;
    then = 0;

    /*
     * Set the alarm, and maintain some idea of how long was left on any
     * previously set alarm.
     */

    if (sigaction(SIGALRM, &act, &oact) == 0)
    {
        remain = alarm(GETRSRCS_CONNECT_TIME);
        then = time(NULL);
    }

    if ((rm = openrm(exechost, 0)) == -1)
    {
        (void)sprintf(log_buffer,
                      "Unable to contact resmom@%s (%d)", exechost, pbs_errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

        badreply = 1;
        goto bail;
    }

    /*
     * Turn off full response.  Responses will be received in the order in
     * which they are sent.
     */
    fullresp(0);

    /* Build a list of all the resources about which we want information. */

    addreq(rm, "loadave");

    addreq(rm, "availmem");

    addreq(rm, "physmem");

    addreq(rm, "ncpus");

#ifdef NODEMASK
    addreq(rm, "availmask");

#endif /* NODEMASK */

    if (schd_MANAGE_HPM)
    {
        (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_QUERY_STR);
        addreq(rm, hpm_ctl);
    }

    /* Get the values back from the resource monitor, and round up. */

    /* Receive LOADAVE response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        new_rsrcs->loadave = atof(response) * schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(loadave), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

    /* Receive AVAILMEM response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        new_rsrcs->freemem = schd_val2byte(response);
        new_rsrcs->freemem *= schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(freemem), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

    /* Receive PHYSMEM response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        pmem_avail = schd_val2byte(response);
        pmem_avail *= schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(realmem), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

    /* Receive NCPUS response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        cpus_avail = atoi(response) * schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

#ifdef NODEMASK
    /* Receive available nodes from resource monitor. */
    response = getreq(rm);

    if (response == NULL)
    {
        (void)sprintf(log_buffer, "bad return from getreq(availmask), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }
    else
    {
        if (schd_bits2mask(response, &new_rsrcs->availmask) != 0)
        {
            if (schd_str2mask(response, &new_rsrcs->availmask) != 0)
            {
                (void)sprintf(log_buffer, "can't parse availmask '%s'", response);
                log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
                badreply = 1;
                goto bail;
            }
        }

        (void)free(response);
    }

#endif /* NODEMASK */

    if (schd_MANAGE_HPM)
    {
        /* Receive HPM_CTL response from resource monitor. */
        response = getreq(rm);

        if (response != NULL)
        {
            if (strcmp(response, HPM_CTL_USERMODE_STR) == 0)
                new_rsrcs->flags |= RSRCS_FLAGS_HPM_USER;
            else if (strcmp(response, HPM_CTL_GLOBALMODE_STR) == 0)
                new_rsrcs->flags &= ~RSRCS_FLAGS_HPM_USER;
            else
            {
                (void)sprintf(log_buffer, "bad response '%s' for '%s@%s'",
                              response, hpm_ctl, exechost);
                log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                           log_buffer);
                badreply = 1;
                goto bail;
            }
        }
        else
        {
            (void)sprintf(log_buffer, "bad return from getreq(%s), %d, %d",
                          hpm_ctl, pbs_errno, errno);
            log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
            badreply = 1;
            goto bail;
        }
    }

    /*
     * NOTE: response will be free()'d in bail.  Be sure to explicitly free()
     * response if more getreq() calls are added before the code below.
     */

bail:
    if (response != NULL)
        (void)free(response);

    /* Disconnect from the resource monitor. */
    if (rm >= 0)  /* resmom handle "0" is valid in RPP. */
        closerm(rm);

    /* And unset the alarm and handler. */
    alarm(0);

    sigaction(SIGALRM, &oact, &act);

    /* Reset the old alarm, taking into account how much time has passed. */
    if (remain)
    {
        DBPRT(("%s: old alarm had %d secs remaining, %d elapsed, ", id,
               remain, (time(NULL) - then)));
        /* How much time remains even after the time spent above? */
        remain -= (time(NULL) - then);

        /*
         * Would the previous time have already expired?  If so, schedule
         * an alarm call in 1 second (close enough, hopefully).
         */

        if (remain < 1)
            remain = 1;

        DBPRT(("reset to %d secs\n", remain));

        alarm(remain);
    }

    /*
     * Verify all the data came back as expected; if not, abort this
     * iteration of the scheduler.
     */
    if (badreply)
    {
        (void)sprintf(log_buffer,
                      "Got bad info from mom@%s - aborting sched run", exechost);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));

        free(new_rsrcs);
        return (NULL);
    }

    /* Make a copy of the hostname for the resources struct. */
    new_rsrcs->exechost = schd_strdup(exechost);

    if (new_rsrcs->exechost == NULL)
    {
        (void)sprintf(log_buffer, "Unable to copy exechost %s to rsrcs",
                      exechost);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));

        free(new_rsrcs);
        return (NULL);
    }

    new_rsrcs->nodes_total = NODES_REQD(cpus_avail, pmem_avail);

#ifdef NODEMASK
    /* Copy the availmask schd_FAKE_MACH_MULT times to match avail cpus. */
    BITFIELD_CPY(&cpy, &(new_rsrcs->availmask));

    for (i = 2; i <= schd_FAKE_MACH_MULT; i++)
    {
        for (j = 0; j < (cpus_avail / schd_FAKE_MACH_MULT / 2); j++)
            BITFIELD_SHIFTL(&cpy);

        BITFIELD_SETM(&(new_rsrcs->availmask), &cpy);
    }

#endif /* NODEMASK */

    if (schd_RsrcsList == NULL)
    {
        schd_RsrcsList  = new_rsrcs; /* Start the list. */
    }
    else
    {
        for (rptr = schd_RsrcsList; rptr->next != NULL; rptr = rptr->next)
            /* Find the last element in the list. */ ;

        rptr->next = new_rsrcs;
    }

    /* Next pointer for the tail of the list points to nothing. */
    new_rsrcs->next = NULL;

    return (new_rsrcs);
}
コード例 #5
0
ファイル: jobinfo.c プロジェクト: Johnlihj/torque
/*
 * This function takes a pointer to a struct batch_status for a job, and
 * fills in the appropriate fields of the supplied job struct.  It returns
 * the number of items that were found.
 */
int
schd_get_jobinfo(Batch_Status *bs, Job *job)
  {
  int       changed = 0;
  int       istrue;
  char      tmp_str[120];
  char     *id = "schd_get_jobinfo";
  char     *host;
  char     *p, *tmp_p, *var_p;
  AttrList *attr;

  memset((void *)job, 0, sizeof(Job));

  job->jobid = schd_strdup(bs->name);

  if (job->jobid == NULL)
    {
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
               "schd_strdup(bs->name)");
    return (-1);
    }

  changed ++;

  for (attr = bs->attribs; attr != NULL; attr = attr->next)
    {

    /*
     * If this is the 'owner' field, chop it into 'owner' and 'host'
     * fields, and copy them into the Job struct.
     */
    if (!strcmp(attr->name, ATTR_owner))
      {

      /* Look for the '@' that separates user and hostname. */
      strcpy(tmp_str, attr->value);
      host = strchr(tmp_str, '@');

      if (host)
        {
        *host = '\0'; /* Replace '@' with NULL (ends username). */
        host ++; /* Move to first character of hostname. */
        }

      job->owner = schd_strdup(tmp_str);

      if (job->owner == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->owner)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The group to which to charge the resources for this job. */
    if (!strcmp(attr->name, ATTR_egroup))
      {
      job->group = schd_strdup(attr->value);

      if (job->group == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->group)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The comment currently assigned to this job. */
    if (!strcmp(attr->name, ATTR_comment))
      {
      job->comment = schd_strdup(attr->value);

      if (job->comment == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->comment)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The host on which this job is running. */
    if (!strcmp(attr->name, ATTR_exechost))
      {
      job->exechost = schd_strdup(attr->value);

      if (job->exechost == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->exechost)");
        return (-1);
        }

      changed ++;

      continue;
      }

    if (!strcmp(attr->name, ATTR_inter))
      {
      /* Is this job interactive or not? */
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue)
          job->flags |= JFLAGS_INTERACTIVE;
        else
          job->flags &= ~JFLAGS_INTERACTIVE;

        changed ++;
        }
      else
        {
        DBPRT(("%s: can't parse %s = %s into boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    if (!strcmp(attr->name, ATTR_state))
      {
      /* State is one of 'R', 'Q', 'E', etc. */
      job->state = attr->value[0];
      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_queue))
      {
      job->qname = schd_strdup(attr->value);

      if (job->qname == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->qname)");
        return (-1);
        }

      job->flags |= JFLAGS_QNAME_LOCAL;

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_v))
      {
      var_p = schd_strdup(attr->value);

      if (var_p == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(Variable_List)");
        return (-1);
        }

      p = NULL;

      tmp_p = strstr(var_p, "PBS_O_QUEUE");

      if (tmp_p)
        {
        p = strtok(tmp_p, "=");
        p = strtok(NULL,  ", ");
        }

      if (p != NULL)
        {
        job->oqueue = schd_strdup(p);
        }
      else
        {
        /* if the originating queue is unknown, default
         * to the locally defined "submit" queue.
         */
        job->oqueue = schd_strdup(schd_SubmitQueue->queue->qname);
        }

      free(var_p);

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_l))
      {
      if (!strcmp(attr->resource, "arch"))
        {
        job->arch = schd_strdup(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "mem"))
        {
        job->memory = schd_val2byte(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "ncpus"))
        {
        job->ncpus = atoi(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime = schd_val2sec(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "speed"))
        {
        job->speed = atoi(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "tmpdir"))
        {
        job->tmpdir = schd_val2byte(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, FEATURE_A))
        {
        job->featureA = schd_strdup(attr->value);
        changed ++;
        }
      else if (!strcmp(attr->resource, FEATURE_B))
        {
        job->featureB = schd_strdup(attr->value);
        changed ++;
        }
      else if (!strcmp(attr->resource, FEATURE_C))
        {
        job->featureC = schd_strdup(attr->value);
        changed ++;
        }
      else if (!strcmp(attr->resource, FEATURE_D))
        {
        job->featureD = atol(attr->value);
        changed ++;
        }
      else if (!strcmp(attr->resource, FEATURE_E))
        {
        job->featureE = atol(attr->value);
        changed ++;
        }
      else if (!strcmp(attr->resource, FEATURE_F))
        {
        job->featureF = atol(attr->value);
        changed ++;
        }
      else if (!strcmp(attr->resource, FEATURE_G))
        {
        schd_val2bool(attr->value, &istrue);
        job->featureG = istrue;
        changed ++;
        }
      else if (!strcmp(attr->resource, FEATURE_H))
        {
        schd_val2bool(attr->value, &istrue);
        job->featureH = istrue;
        changed ++;
        }
      else if (!strcmp(attr->resource, FEATURE_I))
        {
        schd_val2bool(attr->value, &istrue);
        job->featureI = istrue;
        changed ++;
        }

      /* That's all for requested resources. */
      continue;
      }

    if (!strcmp(attr->name, ATTR_used))
      {
      if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime_used = schd_val2sec(attr->value);
        changed ++;
        }

      /* No other interesting cases. */
      continue;
      }

    /* Creation time attribute. */
    if (!strcmp(attr->name, ATTR_ctime))
      {
      /* How long ago was it put in the queue ? */
      job->time_queued = schd_TimeNow - atoi(attr->value);
      continue;
      }

    /* Modified time attribute. */
    if (!strcmp(attr->name, ATTR_mtime))
      {
      /* When was the job last modified? */
      job->mtime = atoi(attr->value);
      continue;
      }

    /*
     * When was the job last eligible to run?  When a user-hold is
     * released, this value is updated to the current time.  This
     * prevents users from gaining higher priority from holding their
     * jobs.
     */
    if (!strcmp(attr->name, ATTR_etime))
      {
      job->eligible = schd_TimeNow - atoi(attr->value);

      continue;
      }
    }

  /*
   * If this job is in the "Running" state, compute how many seconds
   * remain until it is completed.
   */
  if (job->state == 'R')
    {
    job->time_left = job->walltime - job->walltime_used;
    }

  /*
   * If this job was enqueued since the last time we ran, set the job
   * flag to indicate that we have not yet seen this job.  This makes it
   * a candidate for additional processing.  There may be some inaccuracy,
   * since the time_t has resolution of 1 second.  Attempt to err on the
   * side of caution.
   */
  if ((job->state == 'Q') && (job->time_queued != UNSPECIFIED))
    {
    if (job->time_queued <= (schd_TimeNow - schd_TimeLast))
      {
      job->flags |= JFLAGS_FIRST_SEEN;
      }
    }

  /*
   * If the 'etime' attribute wasn't found, set it to the time the job has
   * been queued.  Most jobs will be eligible to run their entire lifetime.
   * The exception is a job that has been held - if it was a user hold,
   * the release will reset the etime to the latest value.
   * If not eligible time was given, use the job's creation time.
   */
  if (!job->eligible)
    job->eligible = job->time_queued;

  return (changed);
  }
コード例 #6
0
ファイル: jobinfo.c プロジェクト: AlbertDeFusco/torque
/*
 * This function takes a pointer to a struct batch_status for a job, and
 * fills in the appropriate fields of the supplied job struct.  It returns
 * the number of items that were found.
 */
int
schd_get_jobinfo(Batch_Status *bs, Job *job)
  {
  int       changed = 0;
  int       istrue;
  char      tmp_str[120];
  char     *id = "schd_get_jobinfo";
  char     *host;
  char     *p, *tmp_p, *var_p;
  AttrList *attr;

  memset((void *)job, 0, sizeof(Job));

  job->jobid = schd_strdup(bs->name);

  if (job->jobid == NULL)
    {
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
               "schd_strdup(bs->name)");
    return (-1);
    }

  changed ++;

  for (attr = bs->attribs; attr != NULL; attr = attr->next)
    {

    /*
     * If this is the 'owner' field, chop it into 'owner' and 'host'
     * fields, and copy them into the Job struct.
     */
    if (!strcmp(attr->name, ATTR_owner))
      {

      /* Look for the '@' that separates user and hostname. */
      strcpy(tmp_str, attr->value);
      host = strchr(tmp_str, '@');

      if (host)
        {
        *host = '\0'; /* Replace '@' with NULL (ends username). */
        host ++; /* Move to first character of hostname. */
        }

      job->owner = schd_strdup(tmp_str);

      if (job->owner == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->owner)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The group to which to charge the resources for this job. */
    if (!strcmp(attr->name, ATTR_egroup))
      {
      job->group = schd_strdup(attr->value);

      if (job->group == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->group)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The comment currently assigned to this job. */
    if (!strcmp(attr->name, ATTR_comment))
      {
      job->comment = schd_strdup(attr->value);

      if (job->comment == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->comment)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The host on which this job is running (or was running for
     * suspended or checkpointed jobs. */

    if (!strcmp(attr->name, ATTR_exechost))
      {
      job->exechost = schd_strdup(attr->value);

      if (job->exechost == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->exechost)");
        return (-1);
        }

      changed ++;

      continue;
      }

    if (!strcmp(attr->name, ATTR_inter))
      {
      /* Is this job interactive or not? */
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue)
          job->flags |= JFLAGS_INTERACTIVE;
        else
          job->flags &= ~JFLAGS_INTERACTIVE;

        changed ++;
        }
      else
        {
        DBPRT(("%s: can't parse %s = %s into boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    if (!strcmp(attr->name, ATTR_state))
      {
      /* State is one of 'R', 'Q', 'E', etc. */
      job->state = attr->value[0];
      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_queue))
      {
      job->qname = schd_strdup(attr->value);

      if (job->qname == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->qname)");
        return (-1);
        }

      job->flags |= JFLAGS_QNAME_LOCAL;

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_v))
      {
      var_p = schd_strdup(attr->value);

      if (var_p == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(Variable_List)");
        return (-1);
        }

      p = NULL;

      tmp_p = strstr(var_p, "PBS_O_QUEUE");

      if (tmp_p)
        {
        p = strtok(tmp_p, "=");
        p = strtok(NULL,  ", ");
        }

      if (p != NULL)
        {
        job->oqueue = schd_strdup(p);
        }
      else
        {
        /* if the originating queue is unknown, default
         * to the locally defined "submit" queue.
         */
        job->oqueue = schd_strdup(schd_SubmitQueue->queue->qname);
        }

      free(var_p);

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_l))
      {
      if (!strcmp(attr->resource, "arch"))
        {
        job->arch = schd_strdup(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "mem"))
        {
        job->memory = schd_val2byte(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "ncpus"))
        {
        job->ncpus = atoi(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime = schd_val2sec(attr->value);
        changed ++;

        }

      /* That's all for requested resources. */
      continue;
      }

    if (!strcmp(attr->name, ATTR_used))
      {
      if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime_used = schd_val2sec(attr->value);
        changed ++;
        }

      /* No other interesting cases. */
      continue;
      }

    /* Creation time attribute. */
    if (!strcmp(attr->name, ATTR_ctime))
      {
      /* How long ago was it put in the queue ? */
      job->time_queued = schd_TimeNow - atoi(attr->value);
      continue;
      }

    /* Modified time attribute. */
    if (!strcmp(attr->name, ATTR_mtime))
      {
      /* When was the job last modified? */
      job->mtime = atoi(attr->value);
      continue;
      }

    /* Job Substate attribute. */
    if (!strcmp(attr->name, ATTR_substate))
      {
      if (atoi(attr->value) == 43 /* JOB_SUBSTATE_SUSPEND */)
        job->flags |= JFLAGS_SUSPENDED;

      continue;
      }

    /*
     * When was the job last eligible to run?  When a user-hold is
     * released, this value is updated to the current time.  This
     * prevents users from gaining higher priority from holding their
     * jobs.
     */
    if (!strcmp(attr->name, ATTR_etime))
      {
      job->eligible = schd_TimeNow - atoi(attr->value);

      continue;
      }
    }

  if (job->memory < 1)
    {
    job->memory = get_default_mem(job->oqueue);
    schd_alterjob(connector, job, ATTR_l, schd_byte2val(job->memory), "mem");
    changed++;
    }

  /*
   * If this job is in the "Running" or "Suspended" state, compute how
   * many seconds remain until it is completed.
   */
  if (job->state == 'R' || job->state == 'S')
    {
    job->time_left = job->walltime - job->walltime_used;
    }

  /*
   * If this job was enqueued since the last time we ran, set the job
   * flag to indicate that we have not yet seen this job.  This makes it
   * a candidate for additional processing.  There may be some inaccuracy,
   * since the time_t has resolution of 1 second.  Attempt to err on the
   * side of caution.
   */
  if ((job->state == 'Q') && (job->time_queued != UNSPECIFIED))
    {
    if (job->time_queued <= (schd_TimeNow - schd_TimeLast))
      {
      job->flags |= JFLAGS_FIRST_SEEN;
      }
    }

  /*
   * If this job was previously running and is now queued, then we
   * need to (a) flag it as having been checkpointed, and (b) move
   * it back to the submit queue, if its not already there.
   */
  if (job->exechost && job->state == 'Q')
    {
    job->flags |= JFLAGS_CHKPTD;

    if (strcmp(job->qname, schd_SubmitQueue->queue->qname))
      {
      sprintf(log_buffer, "moving Q'd job %s back to SUBMIT Q",
              job->jobid);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      pbs_movejob(connector, job->jobid, schd_SubmitQueue->queue->qname,
                  NULL);
      }
    }

  /*
   * if this job is currently Suspended (a substate of 'R'unning), then
   * pretend its queued, so that the scheduling logic will work.
   */
  if (job->state == 'S')
    {
    job->state = 'Q';
    job->flags |= JFLAGS_SUSPENDED;
    }

  /* if this job is suspended, checkpointed, or otherwise "queued"
   * on an exection queue, update the internal representation of
   * to pretend it is really on the submit queue.
   */

  if ((job->flags & JFLAGS_SUSPENDED) || (job->flags & JFLAGS_CHKPTD))
    {
    free(job->qname);
    job->qname = schd_strdup(schd_SubmitQueue->queue->qname);
    }

  /*
   * If this job came from the EXPRESS queue, set the flag so that it
   * will be treated with the highest of priority.
   */
  if (!strcmp(job->oqueue, schd_EXPRESS_Q_NAME))
    job->flags |= JFLAGS_PRIORITY;

  /*
   * If the 'etime' attribute wasn't found, set it to the time the job has
   * been queued.  Most jobs will be eligible to run their entire lifetime.
   * The exception is a job that has been held - if it was a user hold,
   * the release will reset the etime to the latest value.
   * If not eligible time was given, use the job's creation time.
   */
  if (!job->eligible)
    job->eligible = job->time_queued;

  /* if this job has waited too long, and its queue is NOT over its
   * shares, then bump it up in priority.
   */
  if (job->eligible > schd_MAX_WAIT_TIME && job->sort_order <= 100)
    job->flags |= JFLAGS_WAITING;

  return (changed);
  }
コード例 #7
0
ファイル: jobinfo.c プロジェクト: Johnlihj/torque
/*
 * This function takes a pointer to a struct batch_status for a job, and
 * fills in the appropriate fields of the supplied job struct.  It returns
 * the number of items that were found.
 */
int
schd_get_jobinfo(Batch_Status *bs, Job *job)
  {
  char *id = "schd_get_jobinfo";
  int     changed = 0;
  int     cpu_req = 0;
  size_t  mem_req = 0;
  char     *host;
  char *p, *tmp_p, *var_p;
  AttrList *attr;
  char      canon[PBS_MAXHOSTNAME + 1];
  int     istrue;

  memset((void *)job, 0, sizeof(Job));

  job->jobid = schd_strdup(bs->name);

  if (job->jobid == NULL)
    {
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
               "schd_strdup(bs->name)");
    return (-1);
    }

  changed ++;

  for (attr = bs->attribs; attr != NULL; attr = attr->next)
    {

    /*
     * If this is the 'owner' field, chop it into 'owner' and 'host'
     * fields, and copy them into the Job struct.
     */
    if (!strcmp(attr->name, ATTR_owner))
      {

      /* Look for the '@' that separates user and hostname. */
      host = strchr(attr->value, '@');

      if (host)
        {
        *host = '\0'; /* Replace '@' with NULL (ends username). */
        host ++; /* Move to first character of hostname. */
        }

      job->owner = schd_strdup(attr->value);

      if (job->owner == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->owner)");
        return (-1);
        }

      changed ++;

      job->host  = schd_strdup(host);

      if (job->host == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->host)");
        return (-1);
        }

      changed ++;

      /*
       * We don't "own" the attribute strings, so put back the '@'
       * character we removed above, in case something else expects
       * it to be there.
       * Note that 'host' points to the first character of the host-
       * name, not the hole one character behind.
       */

      if (host)
        {
        host --; /* Step back one character. */
        *host = '@'; /* Replace the '@' that was deleted above. */
        }

      /* That's all for the owner field. */
      continue;
      }

    /* The group to which to charge the resources for this job. */
    if (!strcmp(attr->name, ATTR_egroup))
      {
      job->group = schd_strdup(attr->value);

      if (job->group == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->group)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The comment currently assigned to this job. */
    if (!strcmp(attr->name, ATTR_comment))
      {
      job->comment = schd_strdup(attr->value);

      if (job->comment == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->comment)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The host on which this job is running. */
    if (!strcmp(attr->name, ATTR_exechost))
      {
      job->exechost = schd_strdup(attr->value);

      if (job->exechost == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->exechost)");
        return (-1);
        }

      changed ++;

      continue;
      }

    if (!strcmp(attr->name, ATTR_inter))
      {
      /* Is this job interactive or not? */
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue)
          job->flags |= JFLAGS_INTERACTIVE;
        else
          job->flags &= ~JFLAGS_INTERACTIVE;

        changed ++;
        }
      else
        {
        DBPRT(("%s: can't parse %s = %s into boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    if (!strcmp(attr->name, ATTR_state))
      {
      /* State is one of 'R', 'Q', 'E', etc. */
      job->state = attr->value[0];
      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_queue))
      {
      job->qname = schd_strdup(attr->value);

      if (job->qname == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->qname)");
        return (-1);
        }

      job->flags |= JFLAGS_QNAME_LOCAL;

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_v))
      {
      var_p = schd_strdup(attr->value);

      if (var_p == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(Variable_List)");
        return (-1);
        }

      p = NULL;

      tmp_p = strstr(var_p, "PBS_O_QUEUE");

      if (tmp_p)
        {
        p = strtok(tmp_p, "=");
        p = strtok(NULL,  ", ");
        }

      if (p != NULL)
        {
        job->oqueue = schd_strdup(p);
        }
      else
        {
        /* if the originating queue is unknown, default
         * to the locally defined "submit" queue.
         */
        job->oqueue = schd_strdup(schd_SubmitQueue->queue->qname);
        }

      free(var_p);

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_l))
      {
      if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime = schd_val2sec(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "ncpus"))
        {
        cpu_req = atoi(attr->value);
        job->nodes = MAX(job->nodes, cpu_req);
        changed ++;

        }
      else if (!strcmp(attr->resource, "mppe"))
        {
        cpu_req = atoi(attr->value);
        job->nodes = MAX(job->nodes, cpu_req);
        changed ++;

        }
      else if (!strcmp(attr->resource, "mem"))
        {
        mem_req = schd_val2byte(attr->value);
        job->nodes = MAX(job->nodes, NODES_FROM_MEM(mem_req));
        changed ++;

#if PE_MASK != 0
        }
      else if (!strcmp(attr->resource, "pe_mask"))
        {
        if (schd_str2mask(attr->value, &job->nodemask))
          {
          (void)sprintf(log_buffer,
                        "bad pe_mask %s for job %s", attr->value, job->jobid);
          log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                     log_buffer);
          }
        else
          changed++; /* Job pe_mask was valid. */

#endif /* PE_MASK */
        }

      /* That's all for requested resources. */
      continue;
      }

    if (!strcmp(attr->name, ATTR_used))
      {
      if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime_used = schd_val2sec(attr->value);

        changed ++;
        }

      /* No other interesting cases. */
      continue;
      }

    /* Session ID for running jobs (used to correlate GRM info */
    if (!strcmp(attr->name, ATTR_session))
      {
      job->session = atoi(attr->value);
      continue;
      }

    /* Job Priority attribute (inherited from queue) */
    if (!strcmp(attr->name, ATTR_p))
      {
      job->priority = atoi(attr->value);
      continue;
      }

    /* Creation time attribute. */
    if (!strcmp(attr->name, ATTR_ctime))
      {
      /* How long ago was it put in the queue ? */
      job->time_queued = schd_TimeNow - atoi(attr->value);

      continue;
      }

    /* Modified time attribute. */
    if (!strcmp(attr->name, ATTR_mtime))
      {
      /* When was the job last modified? */
      job->mtime = atoi(attr->value);

      continue;
      }

#ifdef ATTR_etime
    /*
     * When was the job last eligible to run?  When a user-hold is
     * released, this value is updated to the current time.  This
     * prevents users from gaining higher priority from holding their
     * jobs.
     */
    if (!strcmp(attr->name, ATTR_etime))
      {
      job->eligible = schd_TimeNow - atoi(attr->value);

      continue;
      }

#endif /* ATTR_etime */
    }

  /*
   * If this job is in the "Running" state, compute how many seconds
   * remain until it is completed.
   */
  if (job->state == 'R')
    {
    job->time_left = job->walltime - job->walltime_used;
    }

  /*
   * If this job was enqueued since the last time we ran, set the job
   * flag to indicate that we have not yet seen this job.  This makes it
   * a candidate for additional processing.  There may be some inaccuracy,
   * since the time_t has resolution of 1 second.  Attempt to err on the
   * side of caution.
   */
  if ((job->state == 'Q') && (job->time_queued != UNSPECIFIED))
    {
    if (job->time_queued <= (schd_TimeNow - schd_TimeLast))
      {
      job->flags |= JFLAGS_FIRST_SEEN;
      }
    }

  /*
   * If the 'etime' attribute wasn't found, set it to the time the job has
   * been queued.  Most jobs will be eligible to run their entire lifetime.
   * The exception is a job that has been held - if it was a user hold,
   * the release will reset the etime to the latest value.
   * If not eligible time was given, use the job's creation time.
   */
  if (!job->eligible)
    job->eligible = job->time_queued;

#if defined(sgi)
  /*
   * If the job provided a memory or CPU resource that does not match
   * the resources that will be allocated by the assigned nodes (i.e.
   * a request for 100mb of memory and 16 CPUs - the job will "get" all
   * 4GB of memory anyway), alter the job attributes such that they
   * will align with the assigned nodes later.
   */
  bump_rsrc_requests(job, cpu_req, mem_req);

#endif /* defined(sgi) */

  /*
   * Need to update the  time_until_eligible  and  total_delay fields,
   * probably from a global array of information saved from previous
   * scheduler iteration.
   */

  /*
   * Calculate the job priority weight sort key to be used later in
   * job sorting. This is the "priority" the job should have during
   * sorting based on the size of the job, the length of time queued,
   * and the job type.
   */
  calc_job_weight(job);

  return (changed);
  }
コード例 #8
0
ファイル: file_chgs.c プロジェクト: AlbertDeFusco/torque
int
schd_register_file(char *filename)
  {
  char    *id = "schd_register_file";
  FileStatus *stats, *tail, *new_fs = NULL;

  /*
   * Look for the tail of the list.  While walking the list, check to see
   * that the filename is not already registered.
   */
  tail = NULL;

  for (stats = filestats; stats != NULL; stats = stats->next)
    {
    if (strcmp(filename, stats->filename) == 0)
      {
      sprintf(log_buffer, "%s: file %s already registered.", id,
              filename);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));
      return (-1);
      }

    tail = stats;
    }

  /* Create space for the new record. */
  new_fs = (FileStatus *) malloc(sizeof(FileStatus));

  if (new_fs == NULL)
    {
    sprintf(log_buffer,
            "%s: out of memory allocating FileStatus for file %s",
            id, filename);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    return (-1);
    }

  /* Clear the record out -- this clears the ctime and next pointer. */
  memset(new_fs, 0, sizeof(FileStatus));

  /* Keep a copy of the filename around. */
  new_fs->filename = schd_strdup(filename);

  if (new_fs->filename == NULL)
    {
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
               "schd_strdup(filename)");
    return (-1);
    }

  /*
   * If this is not the first element, tack it on the end of the list.
   * Otherwise, start the list with it.
   */
  if (tail)
    tail->next = new_fs;
  else
    filestats = new_fs;

  (void)sprintf(log_buffer, "%s: file %s registered.", id, filename);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  /*
   * Load the new element with the initial values for the file.  Ignore
   * the return value - only setting up the timestamp and file existence
   * status are important.
   */
  (void)schd_file_has_changed(filename, 1);

  return (0);
  }
コード例 #9
0
ファイル: getqueues.c プロジェクト: AlbertDeFusco/torque
/*
 * schd_get_queue_limits - query queue information from the server.
 *
 * Returns 0 on success, -1 for "fatal errors", and 1 for a transient
 * error (i.e., the queue failed the sanity checks imposed by the
 * queue_sanity() function).
 */
int
schd_get_queue_limits(Queue *queue)
  {
  char   *id = "schd_get_queue_limits";
  int     istrue;
  int     local_errno = 0;
  Batch_Status *bs;
  AttrList *attr;
  static AttrList alist[] =
    {
      {&alist[1],  ATTR_start, "", ""},
    {&alist[2],  ATTR_enable, "", ""},
    {&alist[3],  ATTR_count, "", ""},
    {&alist[4],  ATTR_maxuserrun, "", ""},
    {&alist[5],  ATTR_rescavail, "", ""},
    {&alist[6],  ATTR_rescassn, "", ""},
    {&alist[7],  ATTR_rescdflt, "", ""},
    {&alist[8],  ATTR_rescmax, "", ""},
    {&alist[9],  ATTR_rescmin, "", ""},
    {&alist[10], ATTR_acluren, "", ""},
    {&alist[11], ATTR_acluser, "", ""},
    {&alist[12], ATTR_p,  "", ""},
    {NULL,       ATTR_maxrun, "", ""}
    };
  queue->running = UNSPECIFIED;
  queue->queued = UNSPECIFIED;
  queue->maxrun = UNSPECIFIED;
  queue->userrun      = UNSPECIFIED;
  queue->ncpus_max = UNSPECIFIED;
  queue->ncpus_min = UNSPECIFIED;
  queue->ncpus_default = UNSPECIFIED;
  queue->ncpus_assn = UNSPECIFIED;
  queue->mem_max = UNSPECIFIED;
  queue->mem_min = UNSPECIFIED;
  queue->mem_default  = UNSPECIFIED;
  queue->wallt_max = UNSPECIFIED;
  queue->wallt_min = UNSPECIFIED;
  queue->wallt_default = UNSPECIFIED;
  queue->rsrcs = NULL;
  queue->flags = 0;
  queue->priority     = UNSPECIFIED;
  queue->speed   = UNSPECIFIED;

  if (queue->featureA)
    {
    free(queue->featureA);
    queue->featureA = NULL;
    }

  if (queue->featureB)
    {
    free(queue->featureB);
    queue->featureB = NULL;
    }

  if (queue->featureC)
    {
    free(queue->featureC);
    queue->featureC = NULL;
    }

  queue->featureD = UNSPECIFIED;

  queue->featureE = UNSPECIFIED;
  queue->featureF = UNSPECIFIED;
  queue->featureG = UNSPECIFIED;
  queue->featureH = UNSPECIFIED;
  queue->featureI = UNSPECIFIED;

  if (queue->rsrcs)
    {
    DBPRT(("%s: found resource list on queue '%s'!  Freeing them...\n", id,
           queue->qname));
    cleanup_rsrcs(queue->rsrcs);
    queue->rsrcs        = NULL;
    }

  if (queue->jobs)
    {
    DBPRT(("%s: found jobs on queue '%s'!  Freeing them...\n", id,
           queue->qname));
    schd_free_jobs(queue->jobs);
    queue->jobs         = NULL;
    }

  if (queue->useracl)
    {
    DBPRT(("%s: found user ACL list on queue '%s'!  Freeing it...\n", id,
           queue->qname));
    schd_free_useracl(queue->useracl);
    queue->useracl = NULL;
    }


  /* Ask the server for information about the specified queue. */

  if ((bs = pbs_statque_err(connector, queue->qname, alist, NULL, &local_errno)) == NULL)
    {
    sprintf(log_buffer, "pbs_statque failed, \"%s\" %d",
            queue->qname, local_errno);
    log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    return (-1);
    }

  /* Process the list of attributes returned by the server. */

  for (attr = bs->attribs; attr != NULL; attr = attr->next)
    {

    /* Is queue started? */
    if (!strcmp(attr->name, ATTR_start))
      {
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue) /* if true, queue is not stopped. */
          queue->flags &= ~QFLAGS_STOPPED;
        else
          queue->flags |= QFLAGS_STOPPED;
        }
      else
        {
        DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    /* Is queue enabled? */
    if (!strcmp(attr->name, ATTR_enable))
      {
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue) /* if true, queue is not disabled. */
          queue->flags &= ~QFLAGS_DISABLED;
        else
          queue->flags |= QFLAGS_DISABLED;
        }
      else
        {
        DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    /* How many jobs are queued and running? */
    if (!strcmp(attr->name, ATTR_count))
      {
      queue->queued = schd_how_many(attr->value, SC_QUEUED);
      queue->running = schd_how_many(attr->value, SC_RUNNING);
      continue;
      }

    /* Queue-wide maximum number of jobs running. */
    if (!strcmp(attr->name, ATTR_maxrun))
      {
      queue->maxrun = atoi(attr->value);
      continue;
      }

    /* Per-user maximum number of jobs running. */
    if (!strcmp(attr->name, ATTR_maxuserrun))
      {
      queue->userrun = atoi(attr->value);
      continue;
      }

    /* Queue Priority Value */
    if (!strcmp(attr->name, ATTR_p))
      {
      queue->priority = atoi(attr->value);
      continue;
      }

    /* Is there an enabled user access control list on this queue? */
    if (!strcmp(attr->name, ATTR_acluren))
      {
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue) /* if true, queue has an ACL */
          queue->flags |= QFLAGS_USER_ACL;
        else
          queue->flags &= ~QFLAGS_USER_ACL;
        }
      else
        {
        DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    if (!strcmp(attr->name, ATTR_acluser))
      {
      if (queue->useracl)
        {
        DBPRT(("queue %s acluser already set!\n", queue->qname));
        schd_free_useracl(queue->useracl);
        }

      queue->useracl = schd_create_useracl(attr->value);

      continue;
      }

    /* Queue maximum resource usage. */
    if (!strcmp(attr->name, ATTR_rescmax))
      {
      if (!strcmp("mem", attr->resource))
        {
        queue->mem_max = schd_val2byte(attr->value);
        continue;
        }

      if (!strcmp("ncpus", attr->resource))
        {
        queue->ncpus_max = atoi(attr->value);
        continue;
        }

      if (!strcmp("walltime", attr->resource))
        {
        queue->wallt_max = schd_val2sec(attr->value);
        continue;
        }

      if (!strcmp("speed", attr->resource))
        {
        queue->speed = atoi(attr->value);
        continue;
        }

      if (!strcmp(FEATURE_A, attr->resource))
        {
        queue->featureA = schd_strdup(attr->value);
        continue;
        }

      if (!strcmp(FEATURE_B, attr->resource))
        {
        queue->featureB = schd_strdup(attr->value);
        continue;
        }

      if (!strcmp(FEATURE_C, attr->resource))
        {
        queue->featureC = schd_strdup(attr->value);
        continue;
        }

      if (!strcmp(FEATURE_D, attr->resource))
        {
        queue->featureD = atol(attr->value);
        continue;
        }

      if (!strcmp(FEATURE_E, attr->resource))
        {
        queue->featureE = atol(attr->value);
        continue;
        }

      if (!strcmp(FEATURE_F, attr->resource))
        {
        queue->featureF = atol(attr->value);
        continue;
        }

      if (!strcmp(FEATURE_G, attr->resource))
        {
        schd_val2bool(attr->value, &istrue);
        queue->featureG = istrue;
        continue;
        }

      if (!strcmp(FEATURE_H, attr->resource))
        {
        schd_val2bool(attr->value, &istrue);
        queue->featureH = istrue;
        continue;
        }

      if (!strcmp(FEATURE_I, attr->resource))
        {
        schd_val2bool(attr->value, &istrue);
        queue->featureI = istrue;
        continue;
        }

      continue;
      }

    /* Queue minimum resource usage. */
    if (!strcmp(attr->name, ATTR_rescmin))
      {
      if (!strcmp("mem", attr->resource))
        {
        queue->mem_min = schd_val2byte(attr->value);
        continue;
        }

      if (!strcmp("ncpus", attr->resource))
        {
        queue->ncpus_min = atoi(attr->value);
        continue;
        }

      if (!strcmp("walltime", attr->resource))
        {
        queue->wallt_min = schd_val2sec(attr->value);
        continue;
        }

      continue;
      }

    /* Queue assigned (in use) resource usage. */
    if (!strcmp(attr->name, ATTR_rescassn))
      {
      if (!strcmp("mem", attr->resource))
        {
        queue->mem_assn = schd_val2byte(attr->value);
        continue;
        }

      if (!strcmp("ncpus", attr->resource))
        {
        queue->ncpus_assn = atoi(attr->value);
        }

      continue;
      }

    if (!strcmp(attr->name, ATTR_rescdflt))
      {
      if (!strcmp("mem", attr->resource))
        {
        queue->mem_default = schd_val2byte(attr->value);
        continue;
        }

      if (!strcmp("ncpus", attr->resource))
        {
        queue->ncpus_default = atoi(attr->value);
        continue;
        }

      if (!strcmp("walltime", attr->resource))
        queue->wallt_default = schd_val2sec(attr->value);
      }

    /* Ignore anything else */
    }

  pbs_statfree(bs);

  return (0);
  }
コード例 #10
0
ファイル: getconfig.c プロジェクト: AlbertDeFusco/torque
/*
 * Now that an option and its argument have been read, validate them and
 * set the appropriate global configuration variables.
 */
static int
set_cfg_opt(char *cfg_option, char *cfg_arg)
  {
  char   *id = "set_cfg_opt";

  /* XXX Should smash case on these before doing string compares? */

  if (!strcmp(cfg_option, "TARGET_LOAD_PCT"))
    {
    schd_TARGET_LOAD_PCT = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "TARGET_LOAD_VARIANCE"))
    {
    return (get_variance(cfg_arg,
                         &schd_TARGET_LOAD_MINUS, &schd_TARGET_LOAD_PLUS));
    }

  if (!strcmp(cfg_option, "HIGH_SYSTIME"))
    {
    schd_HIGH_SYSTIME = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "MAX_JOBS"))
    {
    schd_MAX_JOBS = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "MIN_JOBS"))
    {
    schd_MIN_JOBS = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "MAX_DEDICATED_JOBS"))
    {
    schd_MAX_DEDICATED_JOBS = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "MAX_USER_RUN_JOBS"))
    {
    schd_MAX_USER_RUN_JOBS = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "ENFORCE_ALLOCATION"))
    {
    return schd_val2booltime(cfg_arg, &schd_ENFORCE_ALLOCATION);
    }

  if (!strcmp(cfg_option, "TEST_ONLY"))
    {
    return schd_val2bool(cfg_arg, &schd_TEST_ONLY);
    }

  if (!strcmp(cfg_option, "ENFORCE_DEDICATED_TIME"))
    {
    return schd_val2booltime(cfg_arg, &schd_ENFORCE_DEDTIME);
    }

  if (!strcmp(cfg_option, "DEDICATED_TIME_COMMAND"))
    {
    if (schd_DEDTIME_COMMAND)
      free(schd_DEDTIME_COMMAND);

    schd_DEDTIME_COMMAND = schd_strdup(cfg_arg);

    if (schd_DEDTIME_COMMAND == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(schd_DEDTIME_COMMAND)");
      return (-1);
      }

    return (0);
    }

  if (!strcmp(cfg_option, "DEDICATED_TIME_CACHE_SECS"))
    {
    schd_DEDTIME_CACHE_SECS = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "DECAY_FACTOR"))
    {
    schd_DECAY_FACTOR = atof(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "OA_DECAY_FACTOR"))
    {
    schd_OA_DECAY_FACTOR = atof(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "SCHED_ACCT_DIR"))
    {
    if (schd_SCHED_ACCT_DIR)
      free(schd_SCHED_ACCT_DIR);

    schd_SCHED_ACCT_DIR = schd_strdup(cfg_arg);

    if (schd_SCHED_ACCT_DIR == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(schd_SCHED_ACCT_DIR)");
      return (-1);
      }

    return (0);
    }

  if (!strcmp(cfg_option, "SYSTEM_NAME"))
    {
    if (schd_SYSTEM_NAME)
      free(schd_SYSTEM_NAME);

    schd_SYSTEM_NAME = schd_strdup(cfg_arg);

    schd_lowercase(schd_SYSTEM_NAME);

    return (0);
    }

  if (!strcmp(cfg_option, "SERVER_HOST"))
    {
    if (schd_SERVER_HOST)
      free(schd_SERVER_HOST);

    schd_SERVER_HOST = schd_strdup(cfg_arg);

    if (schd_SERVER_HOST == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(schd_SERVER_HOST)");
      return (-1);
      }

    schd_lowercase(schd_SERVER_HOST);

    return (0);
    }

  if (!strcmp(cfg_option, "SCHED_HOST"))
    {
    if (schd_SCHED_HOST)
      free(schd_SCHED_HOST);

    schd_SCHED_HOST = schd_strdup(cfg_arg);

    if (schd_SCHED_HOST == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(schd_SCHED_HOST)");
      return (-1);
      }

    schd_lowercase(schd_SCHED_HOST);

    return (0);
    }

  if (!strcmp(cfg_option, "SORTED_JOB_DUMPFILE"))
    {
    if (schd_JOB_DUMPFILE)
      free(schd_JOB_DUMPFILE);

    schd_JOB_DUMPFILE = schd_strdup(cfg_arg);

    if (schd_JOB_DUMPFILE == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(schd_JOB_DUMPFILE)");
      return (-1);
      }

    return (0);
    }

  if (!strcmp(cfg_option, "SCHED_RESTART_ACTION"))
    {
    if (strcmp(cfg_arg, "NONE") == 0)
      {
      schd_SCHED_RESTART_ACTION = SCHD_RESTART_NONE;
      return (0);
      }

    if (strcmp(cfg_arg, "RESUBMIT") == 0)
      {
      schd_SCHED_RESTART_ACTION = SCHD_RESTART_RESUBMIT;
      return (0);
      }

    if (strcmp(cfg_arg, "RERUN") == 0)
      {
      schd_SCHED_RESTART_ACTION = SCHD_RESTART_RERUN;
      return (0);
      }

    return (-1);  /* Bad argument */
    }

  if (!strcmp(cfg_option, "SORT_BY_PAST_USAGE"))
    {
    return schd_val2bool(cfg_arg, &schd_SORT_BY_PAST_USAGE);
    }

  if (!strcmp(cfg_option, "ENFORCE_PRIME_TIME"))
    {
    return schd_val2booltime(cfg_arg, &schd_ENFORCE_PRIME_TIME);
    }

  if (!strcmp(cfg_option, "PRIME_TIME_START"))
    {
    schd_PRIME_TIME_START = schd_val2sec(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "PRIME_TIME_END"))
    {
    schd_PRIME_TIME_END = schd_val2sec(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "PRIME_TIME_SMALL_NODE_LIMIT"))
    {
    schd_PT_SMALL_NODE_LIMIT = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "PRIME_TIME_SMALL_WALLT_LIMIT"))
    {
    schd_PT_SMALL_WALLT_LIMIT = schd_val2sec(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "PRIME_TIME_WALLT_LIMIT"))
    {
    schd_PT_WALLT_LIMIT = schd_val2sec(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "NONPRIME_DRAIN_SYS"))
    {
    return schd_val2bool(cfg_arg, &schd_NONPRIME_DRAIN_SYS);
    }

  if (!strcmp(cfg_option, "NP_DRAIN_BACKTIME"))
    {
    schd_NP_DRAIN_BACKTIME = schd_val2sec(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "NP_DRAIN_IDLETIME"))
    {
    schd_NP_DRAIN_IDLETIME = schd_val2sec(cfg_arg);
    return (0);
    }


  if (!strcmp(cfg_option, "SMALL_JOB_MAX"))
    {
    schd_SMALL_JOB_MAX = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "WALLT_LIMIT_LARGE_JOB"))
    {
    schd_WALLT_LARGE_LIMIT = schd_val2sec(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "WALLT_LIMIT_SMALL_JOB"))
    {
    schd_WALLT_SMALL_LIMIT = schd_val2sec(cfg_arg);
    return (0);
    }


  if (!strcmp(cfg_option, "MAX_QUEUED_TIME"))
    {
    schd_MAX_QUEUED_TIME = schd_val2sec(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "SMALL_QUEUED_TIME"))
    {
    schd_SMALL_QUEUED_TIME = schd_val2sec(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "INTERACTIVE_LONG_WAIT"))
    {
    schd_INTERACTIVE_LONG_WAIT = schd_val2sec(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "SUBMIT_QUEUE"))
    {
    arg_to_qlist(cfg_arg, ",", &schd_SubmitQueue);
    return (0);
    }

  if (!strcmp(cfg_option, "BATCH_QUEUES"))
    {
    arg_to_qlist(cfg_arg, ",", &schd_BatchQueues);
    return (0);
    }

  if (!strcmp(cfg_option, "SPECIAL_QUEUE"))
    {
    arg_to_qlist(cfg_arg, ",", &schd_SpecialQueue);
    return (0);
    }

  if (!strcmp(cfg_option, "DEDICATED_QUEUES"))
    {
    arg_to_qlist(cfg_arg, ",", &schd_DedQueues);
    return (0);
    }

  if (!strcmp(cfg_option, "EXTERNAL_QUEUES"))
    {
    arg_to_qlist(cfg_arg, ",", &schd_ExternQueues);
    return (0);
    }

  if (!strcmp(cfg_option, "FAKE_MACHINE_MULT"))
    {
    schd_FAKE_MACH_MULT = atoi(cfg_arg);
    return (0);
    }

  if (!strcmp(cfg_option, "AVOID_FRAGMENTATION"))
    {
    return schd_val2bool(cfg_arg, &schd_AVOID_FRAGS);
    }

  if (!strcmp(cfg_option, "MANAGE_HPM_COUNTERS"))
    {
    return schd_val2bool(cfg_arg, &schd_MANAGE_HPM);
    }

  if (!strcmp(cfg_option, "REVOKE_HPM_COUNTERS"))
    {
    return schd_val2bool(cfg_arg, &schd_REVOKE_HPM);
    }

  /* Unknown option -- return an error. */
  return (-1);
  }
コード例 #11
0
ファイル: getrsrcs.c プロジェクト: Johnlihj/torque
/*
 * Find an entry for the resources for the requested host in the list of
 * existing resources, or create a new one for that host and return it.
 */
Resources *
schd_get_resources(char *exechost)
  {
  char   *id = "schd_get_resources";
  Resources *rptr, *new_rsrcs;
  int     rm;

  char   *response;
  int     badreply   = 0;
  int     cpus_avail = 0;
  int     cpus_tot   = 0;

  struct sigaction act, oact;

  unsigned int remain; /* Time remaining in any old alarm(). */
  time_t then;  /* When this alarm() was started. */

  /*
   * Check for a local copy of the resources being available already.
   * If so, just return a reference to that Resources structure.
   */

  if (schd_RsrcsList != NULL)
    {
    for (rptr = schd_RsrcsList; rptr != NULL; rptr = rptr->next)
      if (strcmp(rptr->exechost, exechost) == 0)
        return (rptr);
    }

  schd_timestamp("get_rsrcs");

  /*
   * No cached resource information for 'exechost'.  Need to query the
   * host for its information.
   */

  if ((new_rsrcs = (Resources *)malloc(sizeof(Resources))) == NULL)
    {
    (void)sprintf(log_buffer, "Unable to alloc space for Resources.");
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));

    return (NULL); /* Can't get the information - nowhere to store it. */
    }

  memset((void *)new_rsrcs, 0, sizeof(Resources));

  act.sa_flags = 0;
  act.sa_handler = connect_interrupt;
  sigemptyset(&act.sa_mask);
  remain = 0;
  then = 0;

  /*
   * Set the alarm, and maintain some idea of how long was left on any
   * previously set alarm.
   */

  if (sigaction(SIGALRM, &act, &oact) == 0)
    {
    remain = alarm(GETRSRCS_CONNECT_TIME);
    then = time(NULL);
    }

  if ((rm = openrm(exechost, 0)) == -1)
    {
    (void)sprintf(log_buffer,
                  "Unable to contact resmom@%s (%d)", exechost, pbs_errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    badreply = 1;
    goto bail;
    }

  /*
   * Turn off full response.  Responses will be received in the order in
   * which they are sent.
   */
  fullresp(0);

  /* Build a list of all the resources about which we want information. */

  addreq(rm, "mppe_app");

  addreq(rm, "mppe_avail");

  /* Get the values back from the resource monitor, and round up. */

  /* Receive MPPE_APP response from resource monitor. */
  /* returns the total number of Application PEs configured */
  response = getreq(rm);

  if (response != NULL)
    {
    cpus_tot = atoi(response) * schd_FAKE_MACH_MULT;
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d",
                  pbs_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive MPPE_AVAIL response from resource monitor. */
  /* returns the largest contiguous block of APP PEs */
  response = getreq(rm);

  if (response != NULL)
    {
    cpus_avail = atoi(response) * schd_FAKE_MACH_MULT;
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d",
                  pbs_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  new_rsrcs->freemem = MB_PER_NODE * schd_FAKE_MACH_MULT;

bail:
  /* Disconnect from the resource monitor. */

  if (rm)
    closerm(rm);

  /* And unset the alarm and handler. */
  alarm(0);

  sigaction(SIGALRM, &oact, &act);

  /* Reset the old alarm, taking into account how much time has passed. */
  if (remain)
    {
    DBPRT(("%s: old alarm had %d secs remaining, %d elapsed, ", id,
           remain, (time(NULL) - then)));
    /* How much time remains even after the time spent above? */
    remain -= (time(NULL) - then);

    /*
     * Would the previous time have already expired?  If so, schedule
     * an alarm call in 1 second (close enough, hopefully).
     */

    if (remain < 1)
      remain = 1;

    DBPRT(("reset to %d secs\n", remain));

    alarm(remain);
    }

  /*
   * Verify all the data came back as expected; if not, abort this
   * iteration of the scheduler.
   */
  if (badreply)
    {
    (void)sprintf(log_buffer,
                  "Got bad info from mom@%s - aborting sched run", exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));

    free(new_rsrcs);
    return (NULL);
    }

  /* Make a copy of the hostname for the resources struct. */
  new_rsrcs->exechost = schd_strdup(exechost);

  if (new_rsrcs->exechost == NULL)
    {
    (void)sprintf(log_buffer, "Unable to copy exechost %s to rsrcs",
                  exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));

    free(new_rsrcs);
    return (NULL);
    }

  new_rsrcs->nodes_total = cpus_tot;

  new_rsrcs->nodes_alloc = cpus_tot - cpus_avail;

  if (schd_RsrcsList == NULL)
    {
    schd_RsrcsList  = new_rsrcs; /* Start the list. */
    }
  else
    {
    for (rptr = schd_RsrcsList; rptr->next != NULL; rptr = rptr->next)
      /* Find the last element in the list. */ ;

    rptr->next = new_rsrcs;
    }

  /* Next pointer for the tail of the list points to nothing. */
  new_rsrcs->next = NULL;

  return (new_rsrcs);
  }
コード例 #12
0
ファイル: getconfig.c プロジェクト: AlbertDeFusco/torque
static int
get_variance(char *string, int *lowp, int *highp)
  {
  /*    char   *id = "get_variance"; */
  char   *ptr, *buf, *end, sign;
  long    n;
  int     i, low = -1, high = -1;

  if ((string == NULL) || ((buf = schd_strdup(string)) == NULL))
    return (-1);

  ptr = strtok(buf, ",");

  while (ptr != NULL)
    {
    /* Ensure that the string matches '{+-}[0-9][0-9]*%'. */

    sign = *ptr;

    if ((sign != '+') && (sign != '-'))
      goto parse_error;

    ptr++;

    if ((*ptr < '0') || (*ptr > '9'))
      goto parse_error;

    n = strtol(ptr, &end, 10);

    if (n > INT_MAX)
      goto parse_error;

    i = (int)n;

    if (*end != '%')
      goto parse_error;

    if (sign == '-')
      {
      if (low >= 0)  /* Already set. */
        goto parse_error;
      else
        low = i;
      }
    else
      {
      if (high >= 0)  /* Already set. */
        goto parse_error;
      else
        high = i;
      }

    ptr = strtok(NULL, ",");
    }

  free(buf);

  *lowp  = (low >= 0) ? low : 0;
  *highp = (high >= 0) ? high : 0;

  return (0);

parse_error:
  free(buf);
  return (-1);
  }
コード例 #13
0
ファイル: getconfig.c プロジェクト: AlbertDeFusco/torque
static int
arg_to_qlist(char *arg, char *sep, QueueList **qlist_ptr)
  {
  char *id = "arg_to_qlist";
  QueueList  *qptr = NULL, *new_qlist;
  int     num = 0;
  char   *name, *exechost, canon[PBS_MAXHOSTNAME + 1];

  /*
   * Multiple lines may be used to add queues to the queue list.  Find
   * the tail of the passed-in list (if there is one), and assign the
   * qptr to the tail element.  Later, the new element will be hung off
   * qptr's next field (or qptr will be set to it.)
   */

  if (*qlist_ptr)
    {
    for (qptr = *qlist_ptr; qptr->next != NULL; qptr = qptr->next)
      /* Walk the list, looking for last element. */;
    }
  else
    {
    qptr = NULL;
    }

  for (name = strtok(arg, sep); name != NULL; name = strtok(NULL, sep))
    {

    /*
     * If the list is NULL, create the first element and point qptr
     * at it.  If not, take the qptr from the last iteration (which
     * will be the head the second time through) and place a new
     * element on its next pointer.  Then replace qptr with the
     * address of the newly allocated struct.
     */

    new_qlist = (QueueList *)malloc(sizeof(QueueList));

    if (new_qlist == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "malloc(newQueue)");
      goto error_in_list;
      }

    memset(new_qlist, 0, sizeof(QueueList));

    if (qptr == NULL)
      {
      *qlist_ptr = new_qlist;
      qptr = *qlist_ptr;
      }
    else
      {
      qptr->next = new_qlist;
      qptr = new_qlist;
      }

    new_qlist->queue = (Queue *)malloc(sizeof(Queue));

    if (new_qlist->queue == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "malloc(newQueue->queue)");
      goto error_in_list;
      }

    memset(new_qlist->queue, 0, sizeof(Queue));

    /*
     * Queue names may be either 'queue3' or 'queue3@exechost'.
     * If there is a '@', convert it to a '\0' and copy the two
     * halves of the string into the qname and exechost fields.
     * Otherwise, this queue is local to this host - paste in the
     * "local" hostname.
     */

    if ((exechost = strchr(name, '@')) != NULL)
      {
      /* Parse queue@host into queue and hostname. */
      *exechost = '\0';  /* '@' ==> '\0' to terminate qname   */
      exechost ++;  /* Next character after the new '\0' */

      if (get_fullhostname(exechost, canon, PBS_MAXHOSTNAME) == 0)
        {
        exechost = canon; /* Point at canonical name. */

        }
      else
        {
        sprintf(log_buffer, "Warning: Cannot canonicalize queue %s@%s",
                name, exechost);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));
        }
      }
    else
      {
      exechost = schd_ThisHost; /* Queue lives on localhost. */
      }

    new_qlist->queue->qname = schd_strdup(name);

    if (new_qlist->queue->qname == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(qname)");
      goto error_in_list;
      }

    new_qlist->queue->exechost = schd_strdup(exechost);

    if (new_qlist->queue->exechost == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(exechost)");
      goto error_in_list;
      }

    num++;
    }

  return (num);

error_in_list:
  /* Something went wrong - delete the new list and return a fatal error. */

  if (*qlist_ptr)
    {
    schd_destroy_qlist(*qlist_ptr);
    *qlist_ptr = NULL;
    }

  return (-1);
  }
コード例 #14
0
ファイル: schedinit.c プロジェクト: Johnlihj/torque
/*ARGSUSED*/
int
schedinit(int argc, char *argv[])
  {
  char   *id = "schedinit";

  struct utsname name;

  struct sigaction act, oact;
  char   *ptr, canon[PBS_MAXHOSTNAME + 1];

  DBPRT(("\n%s\n", schd_VersionString));
  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
             schd_VersionString);

  /*
   * If this is the initial startup configuration, then schd_TimeNow will
   * be 0.  Initialize it to the current time, so it can be tested against
   * various times when initializing.
   */

  if (schd_TimeNow == 0)
    {
    schd_TimeNow = time(NULL);
    DBPRT(("%s: initialize/startup at %s", id, ctime(&schd_TimeNow)));
    }

  /*
   * Determine location of configuration file.  Check for the presence of
   * the PBSSCHED_CONFIG environment variable.  If not defined, fall back
   * to the compiled default CONFIGFILE.
   *
   * Since neither the environment variables nor the compiled-in default
   * can be changed (with the exception of someone wreaking havoc with
   * a debugger or something), this only needs to be done once.
   */
  if (schd_CfgFilename == NULL)
    {
    ptr = getenv("PBSSCHED_CONFIG");

    if (ptr == NULL)
      ptr = CONFIGFILE;

    schd_CfgFilename = schd_strdup(ptr);

    if (schd_CfgFilename == NULL)
      {
      (void)sprintf(log_buffer, "schd_strdup() failed for configfile");
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));
      return (-1);
      }

    }

  /*
   * From this point on, goto cleanup_and_error: to clean up any allocated
   * storage for filenames.
   */

  DBPRT(("SCHEDINIT: Reading configuration from '%s'\n", schd_CfgFilename));

  /* Reset the configuration to a "known" state. */
  reset_config();

  /* Determine on what host this scheduler running. */
  uname(&name);

  if (get_fullhostname(name.nodename, canon, PBS_MAXHOSTNAME) == 0)
    {
    strncpy(schd_ThisHost, canon, PBS_MAXHOSTNAME);
    }
  else
    {
    (void)sprintf(log_buffer,
                  "Failed to canonicalize uname %s (using it anyway)",
                  name.nodename);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    strncpy(schd_ThisHost, name.nodename, PBS_MAXHOSTNAME);
    }

  schd_lowercase(schd_ThisHost);

  DBPRT(("%s: This host is '%s'\n", id, schd_ThisHost));

  /*
   * Register the state of the config file.  The call to reset_config()
   * above will have cleared all file watches.
   */

  if (schd_register_file(schd_CfgFilename))
    {
    (void)sprintf(log_buffer, "cannot watch %s", schd_CfgFilename);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));

    goto cleanup_and_error;
    }

  /* Read the configuration file. */
  if (schd_get_config(schd_CfgFilename))
    return (-1);


  /* if we are NOT making a distinction between Prime and Non-Prime Time,
   * then simply empty the holidays array; otherwise, try to read and load
   * the holidays file.
   */
  if (!schd_ENFORCE_PRIME_TIME)
    init_holidays();
  else
    {
    /*
     * Register the state of the holidays file.  This allows schd_req() to
     * reload it if it is changed.
     */
    if (schd_register_file(HOLIDAYS_FILE))
      {
      (void)sprintf(log_buffer, "cannot watch %s", HOLIDAYS_FILE);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      goto cleanup_and_error;
      }

    /* Get a list of prime/non-prime times from the holidays file */
    if (schd_read_holidays() < 0)
      return (-1);
    }

  /*
   * Set up a signal handler for SIGHUP.  catch_HUP() will re-read the
   * configuration file.
   */
  act.sa_flags   = 0;

  act.sa_handler = catch_HUP;

  sigemptyset(&act.sa_mask);

  if (sigaction(SIGHUP, &act, &oact))
    {
    (void)sprintf(log_buffer, "Failed to setup SIGHUP handler.");
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    }

  DBPRT(("SCHEDINIT: configuration complete.\n"));

  return (0);

cleanup_and_error:

  /*
   * Some error occurred.  Remove watches and free the storage allocated
   * for the filenames.
   */

  if (schd_CfgFilename)
    {
    schd_forget_file(schd_CfgFilename);
    free(schd_CfgFilename);
    }

  schd_CfgFilename     = NULL;

  return (-1);
  }
コード例 #15
0
ファイル: acl_support.c プロジェクト: AlbertDeFusco/torque
/*
 * Parse a string like "foo@*.bar.com,[email protected],[email protected]" into a
 * linked list of UserAcl's.  Each element's user [and possibly host] field
 * points to an individually schd_strdup()'d string.
 */
UserAcl *
schd_create_useracl(char *useracl)
  {
  char  *id = "schd_create_useracl";
  char  *useracl_copy, *user, *atsign;
  UserAcl *acl, *new_acl, *acltail;

  /*
   * Copy the string.  This copy will be chopped up with '\0's to create
   * the strings pointed to by the array of UserAcl's pointed to by acl.
   */

  if ((useracl_copy = schd_strdup(useracl)) == NULL)
    {
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
               "schd_strdup(useracl) failed");
    DBPRT(("schd_strdup(useracl) failed\n"));
    return (NULL);
    }

  acl     = NULL;

  acltail = NULL;

  user = strtok(useracl_copy, ",");

  while (user != NULL)
    {

    new_acl = (UserAcl *)malloc(sizeof(UserAcl));

    if (new_acl == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "malloc(UserAcl) failed");
      DBPRT(("malloc(UserAcl) failed\n"));

      if (acl)
        schd_free_useracl(acl);

      return (NULL);
      }

    /*
     * If a host string is given, change the '@' into a '\0' to terminate
     * the user string (for the strncpy() below) and place a reference to
     * a copy of the host string into the host pointer.
     */

    new_acl->host = NULL;

    if ((atsign = strchr(user, '@')) != NULL)
      {
      *atsign = '\0';

      /* Skip forward to the start of the remaining host string. */
      atsign ++;

      new_acl->host = schd_strdup(atsign);

      if (new_acl->host == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(host) failed");
        DBPRT(("schd_strdup(host) failed\n"));

        if (acl)
          schd_free_useracl(acl);

        free(new_acl);

        return (NULL);
        }
      }

    /*
     * Copy the username into the static array in the UserAcl struct.
     */
    strncpy(new_acl->user, user, PBS_MAXUSER);

    /*
     * Place the new ACL element on the tail of the list, or create it
     * if this is the first element.
     */
    if (acltail)
      acltail->next = new_acl;
    else
      acl = new_acl;

    acltail = new_acl;

    acltail->next = NULL;

    /* Move on to the next user entry in the list. */
    user = strtok(NULL, ",");
    }

  /*
   * Free the storage used by the copy of the string that was strtok()'d.
   */
  free(useracl_copy);

  return (acl);
  }
コード例 #16
0
ファイル: getrsrcs.c プロジェクト: gto11520/torque
/*
 * Find an entry for the resources for the requested host in the list of
 * existing resources, or create a new one for that host and return it.
 */
Resources *
schd_get_resources(char *exechost)
  {
  char *id = "schd_get_resources";
  Resources *rptr, *new_rsrcs;
  int  rm;
  char *response = NULL;
  int  badreply   = 0;
  int  local_errno = 0;

  struct sigaction act, oact;
  unsigned int remain;  /* Time remaining in any old alarm(). */
  time_t  then;  /* When this alarm() was started. */

  /*
   * Check for a local copy of the resources being available already.
   * If so, just return a reference to that Resources structure.
   */

  if (schd_RsrcsList != NULL)
    {
    for (rptr = schd_RsrcsList; rptr != NULL; rptr = rptr->next)
      if (strcmp(rptr->exechost, exechost) == 0)
        return (rptr);
    }

  schd_timestamp("get_rsrcs");

  /*
   * No cached resource information for 'exechost'.  Need to query the
   * host for its information.
   */

  if ((new_rsrcs = (Resources *)malloc(sizeof(Resources))) == NULL)
    {
    (void)sprintf(log_buffer, "Unable to alloc space for Resources.");
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));

    return (NULL); /* Can't get the information - nowhere to store it. */
    }

  memset((void *)new_rsrcs, 0, sizeof(Resources));

  act.sa_flags = 0;
  act.sa_handler = connect_interrupt;
  sigemptyset(&act.sa_mask);
  remain = 0;
  then = 0;

  /*
   * Set the alarm, and maintain some idea of how long was left on any
   * previously set alarm.
   */

  if (sigaction(SIGALRM, &act, &oact) == 0)
    {
    remain = alarm(GETRSRCS_CONNECT_TIME);
    then = time(NULL);
    }

  if ((rm = openrm(exechost, 0)) == -1)
    {
    (void)sprintf(log_buffer,
                  "Unable to contact resmom@%s ", exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    badreply = 1;
    goto bail;
    }

  /*
   * Turn off full response.  Responses will be received in the order in
   * which they are sent.
   */
  fullresp(0);

  /* Build a list of all the resources about which we want information. */

  addreq(rm, "loadave");

  addreq(rm, "availmem");

  addreq(rm, "physmem");

  addreq(rm, "ncpus");

  addreq(rm, "tmpdir");

  addreq(rm, "arch");

  /* Get the values back from the resource monitor, and round up. */

  /* Receive LOADAVE response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->loadave = atof(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(loadave), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive AVAILMEM response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->freemem = schd_val2byte(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(freemem), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive PHYSMEM response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->mem_total = schd_val2byte(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(realmem), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive NCPUS response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->ncpus_total = atoi(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive TMPDIR response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->tmpdir = schd_val2byte(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(tmpdir), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive ARCH response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->arch = schd_strdup(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(arch), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

bail:

  /* Disconnect from the resource monitor. */

  if (rm >= 0)  /* resmom handle "0" is valid in RPP. */
    closerm(rm);

  /* And unset the alarm and handler. */
  alarm(0);

  sigaction(SIGALRM, &oact, &act);

  /* Reset the old alarm, taking into account how much time has passed. */
  if (remain)
    {
    DBPRT(("%s: old alarm had %d secs remaining, %d elapsed, ", id,
           remain, (time(NULL) - then)));

    /* How much time remains even after the time spent above? */
    remain -= (time(NULL) - then);

    /*
     * Would the previous time have already expired?  If so, schedule
     * an alarm call in 1 second (close enough, hopefully).
     */

    if (remain < 1)
      remain = 1;

    DBPRT(("reset to %d secs\n", remain));

    alarm(remain);
    }

  /*
   * Verify all the data came back as expected; if not, abort this
   * iteration of the scheduler.
   */

  if (badreply)
    {
    (void)sprintf(log_buffer,
                  "Got bad info from mom@%s - skipping this node", exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    free(new_rsrcs);
    return (NULL);
    }

  /* Make a copy of the hostname for the resources struct. */
  new_rsrcs->exechost = schd_strdup(exechost);

  if (new_rsrcs->exechost == NULL)
    {
    (void)sprintf(log_buffer, "Unable to copy exechost %s to rsrcs",
                  exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    free(new_rsrcs);
    return (NULL);
    }

  if (schd_RsrcsList == NULL)
    {
    schd_RsrcsList  = new_rsrcs; /* Start the list. */
    }
  else
    {
    for (rptr = schd_RsrcsList; rptr->next != NULL; rptr = rptr->next)
      /* Find the last element in the list. */ ;

    rptr->next = new_rsrcs;
    }

  /* Next pointer for the tail of the list points to nothing. */
  new_rsrcs->next = NULL;

  return (new_rsrcs);
  }