Exemple #1
0
/*
 * Record the reason that the current candidate job cannot currently run.
 * When it is decided that the job will remain queued, place the reason
 * string in the comment field of the job structure.
 */
void
schd_comment_job(Job *job, char *reason, int optional)
  {
  char   *id = "schd_comment_job";
  char   *msg_ptr;
  char   *old_msg;

  /*
   * If the 'optional' argument is true, then this comment is optional.
   * Do not bother commenting this job if this is not the first time it
   * has been seen, and it has been recently modified (hopefully it was
   * a comment change).  If there is no comment for the job, comment it
   * this time.
   */

  if (optional &&
      (!schd_FirstRun) &&
      (job->comment != NULL) &&
      !(job->flags & JFLAGS_FIRST_SEEN) &&
      (MIN_COMMENT_AGE && ((schd_TimeNow - job->mtime) < MIN_COMMENT_AGE)))
    {
    return;
    }

  if (reason == NULL)
    reason = "";

  old_msg = job->comment;

  /* If there is no old message, or they are different, set it. */
  if ((old_msg == NULL) || (strcmp(reason, old_msg) != 0))
    {

    msg_ptr = schd_strdup(reason);

    /* Alter PBS' view of the job. */
    schd_alterjob(connector, job, ATTR_comment, msg_ptr, NULL);

    /* Copy the new comment into the job field. */

    if (job->comment)
      free(job->comment);

    job->comment = msg_ptr;

    if (job->comment == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(job->comment)");
      return;
      }
    }

  return;
  }
Exemple #2
0
/*
 * This function takes a pointer to a struct batch_status for a job, and
 * fills in the appropriate fields of the supplied job struct.  It returns
 * the number of items that were found.
 */
int
schd_get_jobinfo(Batch_Status *bs, Job *job)
  {
  int       changed = 0;
  int       istrue;
  char      tmp_str[120];
  char     *id = "schd_get_jobinfo";
  char     *host;
  char     *p, *tmp_p, *var_p;
  AttrList *attr;

  memset((void *)job, 0, sizeof(Job));

  job->jobid = schd_strdup(bs->name);

  if (job->jobid == NULL)
    {
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
               "schd_strdup(bs->name)");
    return (-1);
    }

  changed ++;

  for (attr = bs->attribs; attr != NULL; attr = attr->next)
    {

    /*
     * If this is the 'owner' field, chop it into 'owner' and 'host'
     * fields, and copy them into the Job struct.
     */
    if (!strcmp(attr->name, ATTR_owner))
      {

      /* Look for the '@' that separates user and hostname. */
      strcpy(tmp_str, attr->value);
      host = strchr(tmp_str, '@');

      if (host)
        {
        *host = '\0'; /* Replace '@' with NULL (ends username). */
        host ++; /* Move to first character of hostname. */
        }

      job->owner = schd_strdup(tmp_str);

      if (job->owner == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->owner)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The group to which to charge the resources for this job. */
    if (!strcmp(attr->name, ATTR_egroup))
      {
      job->group = schd_strdup(attr->value);

      if (job->group == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->group)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The comment currently assigned to this job. */
    if (!strcmp(attr->name, ATTR_comment))
      {
      job->comment = schd_strdup(attr->value);

      if (job->comment == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->comment)");
        return (-1);
        }

      changed ++;

      continue;
      }

    /* The host on which this job is running (or was running for
     * suspended or checkpointed jobs. */

    if (!strcmp(attr->name, ATTR_exechost))
      {
      job->exechost = schd_strdup(attr->value);

      if (job->exechost == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->exechost)");
        return (-1);
        }

      changed ++;

      continue;
      }

    if (!strcmp(attr->name, ATTR_inter))
      {
      /* Is this job interactive or not? */
      if (schd_val2bool(attr->value, &istrue) == 0)
        {
        if (istrue)
          job->flags |= JFLAGS_INTERACTIVE;
        else
          job->flags &= ~JFLAGS_INTERACTIVE;

        changed ++;
        }
      else
        {
        DBPRT(("%s: can't parse %s = %s into boolean\n", id,
               attr->name, attr->value));
        }

      continue;
      }

    if (!strcmp(attr->name, ATTR_state))
      {
      /* State is one of 'R', 'Q', 'E', etc. */
      job->state = attr->value[0];
      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_queue))
      {
      job->qname = schd_strdup(attr->value);

      if (job->qname == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(job->qname)");
        return (-1);
        }

      job->flags |= JFLAGS_QNAME_LOCAL;

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_v))
      {
      var_p = schd_strdup(attr->value);

      if (var_p == NULL)
        {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   "schd_strdup(Variable_List)");
        return (-1);
        }

      p = NULL;

      tmp_p = strstr(var_p, "PBS_O_QUEUE");

      if (tmp_p)
        {
        p = strtok(tmp_p, "=");
        p = strtok(NULL,  ", ");
        }

      if (p != NULL)
        {
        job->oqueue = schd_strdup(p);
        }
      else
        {
        /* if the originating queue is unknown, default
         * to the locally defined "submit" queue.
         */
        job->oqueue = schd_strdup(schd_SubmitQueue->queue->qname);
        }

      free(var_p);

      changed ++;
      continue;
      }

    if (!strcmp(attr->name, ATTR_l))
      {
      if (!strcmp(attr->resource, "arch"))
        {
        job->arch = schd_strdup(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "mem"))
        {
        job->memory = schd_val2byte(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "ncpus"))
        {
        job->ncpus = atoi(attr->value);
        changed ++;

        }
      else if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime = schd_val2sec(attr->value);
        changed ++;

        }

      /* That's all for requested resources. */
      continue;
      }

    if (!strcmp(attr->name, ATTR_used))
      {
      if (!strcmp(attr->resource, "walltime"))
        {
        job->walltime_used = schd_val2sec(attr->value);
        changed ++;
        }

      /* No other interesting cases. */
      continue;
      }

    /* Creation time attribute. */
    if (!strcmp(attr->name, ATTR_ctime))
      {
      /* How long ago was it put in the queue ? */
      job->time_queued = schd_TimeNow - atoi(attr->value);
      continue;
      }

    /* Modified time attribute. */
    if (!strcmp(attr->name, ATTR_mtime))
      {
      /* When was the job last modified? */
      job->mtime = atoi(attr->value);
      continue;
      }

    /* Job Substate attribute. */
    if (!strcmp(attr->name, ATTR_substate))
      {
      if (atoi(attr->value) == 43 /* JOB_SUBSTATE_SUSPEND */)
        job->flags |= JFLAGS_SUSPENDED;

      continue;
      }

    /*
     * When was the job last eligible to run?  When a user-hold is
     * released, this value is updated to the current time.  This
     * prevents users from gaining higher priority from holding their
     * jobs.
     */
    if (!strcmp(attr->name, ATTR_etime))
      {
      job->eligible = schd_TimeNow - atoi(attr->value);

      continue;
      }
    }

  if (job->memory < 1)
    {
    job->memory = get_default_mem(job->oqueue);
    schd_alterjob(connector, job, ATTR_l, schd_byte2val(job->memory), "mem");
    changed++;
    }

  /*
   * If this job is in the "Running" or "Suspended" state, compute how
   * many seconds remain until it is completed.
   */
  if (job->state == 'R' || job->state == 'S')
    {
    job->time_left = job->walltime - job->walltime_used;
    }

  /*
   * If this job was enqueued since the last time we ran, set the job
   * flag to indicate that we have not yet seen this job.  This makes it
   * a candidate for additional processing.  There may be some inaccuracy,
   * since the time_t has resolution of 1 second.  Attempt to err on the
   * side of caution.
   */
  if ((job->state == 'Q') && (job->time_queued != UNSPECIFIED))
    {
    if (job->time_queued <= (schd_TimeNow - schd_TimeLast))
      {
      job->flags |= JFLAGS_FIRST_SEEN;
      }
    }

  /*
   * If this job was previously running and is now queued, then we
   * need to (a) flag it as having been checkpointed, and (b) move
   * it back to the submit queue, if its not already there.
   */
  if (job->exechost && job->state == 'Q')
    {
    job->flags |= JFLAGS_CHKPTD;

    if (strcmp(job->qname, schd_SubmitQueue->queue->qname))
      {
      sprintf(log_buffer, "moving Q'd job %s back to SUBMIT Q",
              job->jobid);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      pbs_movejob(connector, job->jobid, schd_SubmitQueue->queue->qname,
                  NULL);
      }
    }

  /*
   * if this job is currently Suspended (a substate of 'R'unning), then
   * pretend its queued, so that the scheduling logic will work.
   */
  if (job->state == 'S')
    {
    job->state = 'Q';
    job->flags |= JFLAGS_SUSPENDED;
    }

  /* if this job is suspended, checkpointed, or otherwise "queued"
   * on an exection queue, update the internal representation of
   * to pretend it is really on the submit queue.
   */

  if ((job->flags & JFLAGS_SUSPENDED) || (job->flags & JFLAGS_CHKPTD))
    {
    free(job->qname);
    job->qname = schd_strdup(schd_SubmitQueue->queue->qname);
    }

  /*
   * If this job came from the EXPRESS queue, set the flag so that it
   * will be treated with the highest of priority.
   */
  if (!strcmp(job->oqueue, schd_EXPRESS_Q_NAME))
    job->flags |= JFLAGS_PRIORITY;

  /*
   * If the 'etime' attribute wasn't found, set it to the time the job has
   * been queued.  Most jobs will be eligible to run their entire lifetime.
   * The exception is a job that has been held - if it was a user hold,
   * the release will reset the etime to the latest value.
   * If not eligible time was given, use the job's creation time.
   */
  if (!job->eligible)
    job->eligible = job->time_queued;

  /* if this job has waited too long, and its queue is NOT over its
   * shares, then bump it up in priority.
   */
  if (job->eligible > schd_MAX_WAIT_TIME && job->sort_order <= 100)
    job->flags |= JFLAGS_WAITING;

  return (changed);
  }
Exemple #3
0
static int
bump_rsrc_requests(Job *job, int cpu_req, size_t mem_req)
  {
  char   *id = "bump_rsrc_requests";
  char   *val;
  char    buf[64];
  int     bumped = 0;

  /*
   * If a job gives the "wrong" value for the memory request (for the
   * number of nodes required to fulfill the request), then bump the
   * memory request to the amount of memory the assigned nodes would
   * consume.
   */

  if ((mem_req == 0) || (mem_req != (job->nodes * MB_PER_NODE)))
    {

    /* Make a printable version of the requested memory. */
    strcpy(buf, schd_byte2val(mem_req));

    /* Compute the "right" memory request, based on the nodes. */
    val = schd_byte2val(job->nodes * MB_PER_NODE);

    if (val == NULL)
      return (1);

    if (schd_alterjob(connector, job, ATTR_l, val, "mem"))
      {
      DBPRT(("%s: Failed to set job %s \"mem\" attr to %s\n", id,
             job->jobid, val));
      return (1);
      }

    bumped++;
    }

  /*
   * If a job gives the "wrong" value for the CPU request (for the
   * number of nodes required to fulfill the request), then bump the
   * CPU request to the number of CPUs the assigned nodes would
   * consume.
   */
  if ((cpu_req == 0) || (cpu_req != (job->nodes * PE_PER_NODE)))
    {

    /* Compute the "right" memory request, based on the nodes. */
    sprintf(buf, "%d", (job->nodes * PE_PER_NODE));

    if (schd_alterjob(connector, job, ATTR_l, buf, "ncpus"))
      {
      DBPRT(("%s: Failed to set job %s \"ncpus\" attr to %s\n", id,
             job->jobid, buf));
      return (1);
      }

    bumped++;
    }

  if (bumped)
    {
    strncpy(buf, schd_byte2val(job->nodes * MB_PER_NODE), sizeof(buf) - 1);
    sprintf(log_buffer, "%s cpu/mem (%d/%s) bumped to %d/%s (%d nodes)",
            job->jobid, cpu_req, schd_byte2val(mem_req),
            job->nodes * PE_PER_NODE, buf, job->nodes);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    }

  return (0);
  }