Example #1
0
int
schd_reject_job(Job *job, char *reason)
  {
  char   *id = "schd_reject_job";
  static char *message = NULL;
  int     rc = 0;

  if (message == NULL)
    {
    if ((message = (char *)malloc(MSG_BUFFER_SIZE)) == NULL)
      {
      (void)sprintf(log_buffer, "cannot malloc %d bytes\n",
                    MSG_BUFFER_SIZE);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      return (-1);
      }
    }

  DBPRT((

          "*************************************************************************\n"));

  if (schd_TEST_ONLY)
    {
    DBPRT(("JOB %s WOULD HAVE BEEN DELETED!!!\n", job->jobid));
    DBPRT(("Message: %s\n", reason));
    }
  else
    {
    (void)sprintf(message,
                  "\n"
                  "PBS job '%s' was rejected by all execution queues.\n"
                  "\n"
                  "The reason given for this action was :\n"
                  "\n"
                  "       %s\n"
                  "\n"
                  "Please correct the problem and resubmit your job, or contact the PBS\n"
                  "administrator for assistance.\n"
                  "\n"
                  "Thank you.\n"
                  "\n",
                  job->jobid, reason);


    /*
     * Ask PBS to delete the job from the queue, which should deliver the
     * message to the user.
     */

    rc = pbs_deljob(connector, job->jobid, message);

    if (rc)
      {
      (void)sprintf(log_buffer, "pbs_deljob failed: error %d", rc);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));
      return 1;
      }

    /*
     * Delete this job from the queue's list (move to a NULL queue)
     */
    schd_move_job_to(job, NULL);

    DBPRT(("JOB %s DELETED!!!\n", job->jobid));

    DBPRT(("Message: %s\n", reason));
    }

  DBPRT((

          "*************************************************************************\n"));
  return 0;
  }
Example #2
0
/*
 * Jobs queued on the special queue should be treated as highest priority.
 * They are sorted onto the top of the list of jobs that is created in the
 * usersort.c code.  That sorted list is then split out onto each of the
 * queues, so that each queue has a list of the jobs it "owns".  The jobs
 * then carry a backpointer to their owner queue.
 *
 * This works really nicely, since all the information about each queue
 * (including the list of jobs queued/running/etc on it) lives right on
 * the Queue structure.  It is a clean, elegant and fully general solution.
 *
 * An unfortunate side effect of this "demultiplexing" is that jobs that
 * were marked "special" end up claimed by the SpecialQueue.  The scheduler
 * looks for jobs only on the SubmitQueue queue, so it never notices that
 * there are special jobs enqueued.  In order to address this, any jobs on
 * the SpecialQueue are marked "waiting/high priority", and placed at the
 * head of the list of jobs in the SubmitQueue.
 *
 * This seems like an evil hack at first, and it arguably is.  However, if
 * there are multiple submission queues, it is relatively simple to support
 * them by simply causing them to be inserted in the submit queue's list.
 */
static int
fixup_special(void)
  {
  char   *id = "fixup_special";
  Job    *job, *submitjobs, *nextjob, *specialtail;
  Queue  *queue;
  char    comment[MAX_TXT + 1];
  int     fixedup = 0, justcomment = 0;

  /*
   * Sanity check -- this function shouldn't be called if there is no
   * valid SpecialQueue.
   */

  if (schd_SpecialQueue == NULL || schd_SpecialQueue->queue == NULL)
    {
    DBPRT(("%s: special code called but no special queue defined!\n", id));
    return (-1);
    }

  queue = schd_SpecialQueue->queue;

  if (queue->jobs == NULL)
    {
    DBPRT(("%s: no jobs on special queue '%s'.  Ignoring.\n", id,
           queue->qname));
    return (0);
    }

  /*
   * See if the special queue has anything to do, and if it will allow
   * anything to be done to it.
   */
  if (queue->flags & QFLAGS_DISABLED)
    {
    (void)sprintf(comment, "Queue %s not enabled", queue->qname);
    justcomment ++;
    }

  if (queue->flags & QFLAGS_STOPPED)
    {
    (void)sprintf(comment, "Queue %s not started", queue->qname);
    justcomment ++;
    }

  /*
   * If the jobs on the speical queue should just be commented, do so and
   * return 0 -- no jobs were fixed up.
   */
  if (justcomment)
    {
    for (job = queue->jobs; job != NULL; job = job->next)
      schd_comment_job(job, comment, JOB_COMMENT_REQUIRED);

    return 0;
    }

  /*
   * Detach the list of jobs from the SubmitQueue.  They will be tacked
   * back onto the end of the list once the special jobs have been moved
   * to the head.
   */
  submitjobs = schd_SubmitQueue->queue->jobs;

  schd_SubmitQueue->queue->jobs = NULL;

  /*
   * Any jobs queued on the special queue are now moved to the tail of the
   * newly empty SubmitQueue list.  Mark the jobs as high priority and
   * waiting.
   */
  for (job = queue->jobs; job != NULL; job = nextjob)
    {

    /*
     * Keep track of the next job -- the next pointer on this job will
     * be modified by the schd_move_job_to() function.  Also keep a
     * pointer to the last job in the list.
     */
    nextjob = job->next;
    specialtail = job;

    if (job->state != 'Q')
      continue;

    job->flags |= (JFLAGS_WAITING | JFLAGS_PRIORITY);

    /*
     * Move the job from the special queue to the tail of the submit
     * queue.  This keeps the counts of the queued jobs correct in both
     * queues.
     */
    schd_move_job_to(job, schd_SubmitQueue->queue);

    fixedup ++;
    }

  /*
   * Now that the submit queue has the list of all queued jobs from the
   * special queue, attach the original submit jobs onto the tail of the
   * special job list.
   */
  specialtail->next = submitjobs;

  DBPRT(("%s: fixed up %d jobs.\n", id, fixedup));

  return (fixedup);
  }
Example #3
0
int
schd_run_job_on(Job *job, Queue *destq, char *exechost, int set_comment)
  {
  char   *id = "schd_run_job_on";
  char    reason[128], tmp_word[20];
  char   *date;
  Queue  *srcq = NULL;
  int     ret = 0;
  int     local_errno = 0;

  /* Get the datestamp from 'ctime()'.  Remove the trailing '\n'. */
  date = ctime(&schd_TimeNow);
  date[strlen(date) - 1] = '\0';

  if (set_comment)
    {
    sprintf(reason, "Started on %s", date);

    if (job->flags & JFLAGS_PRIORITY)
      {
      strcat(reason, " (EXPRESS/high priority job)");
      }

    if (job->flags & JFLAGS_WAITING)
      {
      strcat(reason, " (long-waiting job)");
      }

    schd_comment_job(job, reason, JOB_COMMENT_REQUIRED);
    }

  /* If this is NOT a suspended job... */
  if (!(job->flags & JFLAGS_SUSPENDED))
    {

    /*
     * If a destination Queue is provided, and it is different from the
     * source queue, then ask PBS to move the job to that queue before
     * running it.
     */
    srcq = job->queue;

    /*
     * Move the job from its queue to the specified run queue.
     */

    if ((destq != NULL) && (strcmp(destq->qname, srcq->qname) != 0))
      {
      if (pbs_movejob_err(connector, job->jobid, destq->qname, NULL, &local_errno))
        {
        (void)sprintf(log_buffer, "move job %s to queue %s failed, %d",
                      job->jobid, destq->qname, local_errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                   id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));
        return (-1);
        }

      schd_move_job_to(job, destq);
      }

    /*
    * Give the job handle (JOBID) to PBS to run.
    */
    if (pbs_runjob_err(connector, job->jobid, exechost, NULL, &local_errno))
      {
      (void)sprintf(log_buffer, "failed start job %s on queue %s@%s, %d",
                    job->jobid, destq->qname, exechost, local_errno);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      /*
       * Running failed! Move the job back to the source queue (if
       * applicable) before returning. This prevents jobs being left
       * in execution queues.
       */

      if (srcq)
        {
        DBPRT(("Attempting to move job %s back to queue %s\n",
               job->jobid, srcq->qname));

        if (pbs_movejob_err(connector, job->jobid, srcq->qname, NULL, &local_errno))
          {
          (void)sprintf(log_buffer,
                        "failed to move job %s back to queue %s, %d",
                        job->jobid, srcq->qname, local_errno);
          log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                     log_buffer);
          DBPRT(("%s: %s\n", id, log_buffer));
          }

        schd_move_job_to(job, srcq);
        }

      return (-1);
      }

    strcpy(tmp_word, "started");
    }
  else    /* it IS a suspended job */
    {

    schd_move_job_to(job, destq);
    ret = pbs_sigjob(connector, job->jobid, "resume", NULL);

    if (ret)
      {
      sprintf(log_buffer, "resume of job %s FAILED (%d)",
              job->jobid, ret);
      return (-1);
      }

    job->flags &= ~JFLAGS_SUSPENDED;

    strcpy(tmp_word, "resumed");
    }

  /* PBS accepted the job (and presumably will run it). Log the fact. */
  (void)sprintf(log_buffer, "job %s %s on %s@%s", job->jobid, tmp_word,
                destq->qname, exechost);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  DBPRT(("%s: %s\n", id, log_buffer));

  /*
   * Change the state of the local representation of the job to "Running".
   */
  job->state = 'R';

  /*
   * Account for the job on this queue's statistics.  'queued' will be
   * bumped up if the queued job was moved to a new destination queue.
   */

  job->queue->queued --;

  job->queue->running ++;

  /* The queue is no longer idle.  Unset the idle timer. */
  job->queue->idle_since = 0;

  return (0);    /* Job successfully started. */
  }