Exemplo n.º 1
0
/*
 * Given a list of jobs, ordered from most-eligible to least-eligible to
 * run, attempt to place as many of them as possible into the queues listed
 * in qlist.  Jobs that will not currently fit are ignored (i.e. the entire
 * list is processed, running each job in order that fits).
 *
 * Jobs are run by run_job_on() as they are found on the list.  Resources
 * must be supplied because the jobs are tested against them in
 * 'schd_resources_avail()', and they are updated by sched_run_jobs_on().
 *
 * This function returns the number of jobs run, or -1 on error.
 */
int
schd_pack_queues(Job *jobs, QueueList *qlist, char *reason)
{
    char   *id = "schd_pack_queues";
    Job    *job, *nextjob;
    QueueList *qptr;
    Queue *queue;
    int    allfull, jobsrun;

    jobsrun = 0;

    DBPRT(("%s: scheduling queues", id));

    for (qptr = qlist; qptr != NULL; qptr = qptr->next)
    {
        if (!(qptr->queue->flags & QFLAGS_DISABLED) &&
                !(qptr->queue->flags & QFLAGS_NODEDOWN))
            DBPRT((" %s@%s", qptr->queue->qname, qptr->queue->exechost));
    }

    DBPRT((".\n"));

    if (jobs == NULL)
    {
        DBPRT(("No jobs available for QueueList %s%s - all done!",
               qlist->queue->qname, qlist->next ? " ..." : ""));
        return (0);
    }

    /*
     * Consider jobs from the list of queues. For each job, if it appears
     * eligible to run, try to find a queue on which to place it.
     *
     * Note that schd_run_job_on() may remove the job from the list, so
     * this function must keep track of the current job's next pointer.
     */
    for (job = jobs; job != NULL; job = nextjob)
    {
        nextjob = job->next;

        /* Ignore any non-queued jobs in the list. */

        if (job->state != 'Q')
            continue;

        strcpy(reason, "Requested architecture not currently available");

        queue = schd_static_backfill(job, qlist, reason);

        if (queue == NULL)
        {
            /* Hummm, there is not a queue currently available in which
             * to run this job. But resources have been researved for
             * this job by the static_backfill() routine, and the job
             * comment contains the estimated start time and the reason
             * why it can't run now. Comment the job and go on to the
             * next job on the list.
             */
            schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);
            continue;
        }

        /* Attempt to run this job on the above-supplied queue */
        if (schd_run_job_on(job, queue, queue->exechost, SET_JOB_COMMENT))
        {
            (void)sprintf(log_buffer, "Unable to run batch job %s on queue %s",
                          job->jobid, queue->qname);
            log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
            DBPRT(("%s: %s\n", id, log_buffer))
        }
        else
        {
Exemplo n.º 2
0
/*
 * Jobs queued on the special queue should be treated as highest priority.
 * They are sorted onto the top of the list of jobs that is created in the
 * usersort.c code.  That sorted list is then split out onto each of the
 * queues, so that each queue has a list of the jobs it "owns".  The jobs
 * then carry a backpointer to their owner queue.
 *
 * This works really nicely, since all the information about each queue
 * (including the list of jobs queued/running/etc on it) lives right on
 * the Queue structure.  It is a clean, elegant and fully general solution.
 *
 * An unfortunate side effect of this "demultiplexing" is that jobs that
 * were marked "special" end up claimed by the SpecialQueue.  The scheduler
 * looks for jobs only on the SubmitQueue queue, so it never notices that
 * there are special jobs enqueued.  In order to address this, any jobs on
 * the SpecialQueue are marked "waiting/high priority", and placed at the
 * head of the list of jobs in the SubmitQueue.
 *
 * This seems like an evil hack at first, and it arguably is.  However, if
 * there are multiple submission queues, it is relatively simple to support
 * them by simply causing them to be inserted in the submit queue's list.
 */
static int
fixup_special(void)
  {
  char   *id = "fixup_special";
  Job    *job, *submitjobs, *nextjob, *specialtail;
  Queue  *queue;
  char    comment[MAX_TXT + 1];
  int     fixedup = 0, justcomment = 0;

  /*
   * Sanity check -- this function shouldn't be called if there is no
   * valid SpecialQueue.
   */

  if (schd_SpecialQueue == NULL || schd_SpecialQueue->queue == NULL)
    {
    DBPRT(("%s: special code called but no special queue defined!\n", id));
    return (-1);
    }

  queue = schd_SpecialQueue->queue;

  if (queue->jobs == NULL)
    {
    DBPRT(("%s: no jobs on special queue '%s'.  Ignoring.\n", id,
           queue->qname));
    return (0);
    }

  /*
   * See if the special queue has anything to do, and if it will allow
   * anything to be done to it.
   */
  if (queue->flags & QFLAGS_DISABLED)
    {
    (void)sprintf(comment, "Queue %s not enabled", queue->qname);
    justcomment ++;
    }

  if (queue->flags & QFLAGS_STOPPED)
    {
    (void)sprintf(comment, "Queue %s not started", queue->qname);
    justcomment ++;
    }

  /*
   * If the jobs on the speical queue should just be commented, do so and
   * return 0 -- no jobs were fixed up.
   */
  if (justcomment)
    {
    for (job = queue->jobs; job != NULL; job = job->next)
      schd_comment_job(job, comment, JOB_COMMENT_REQUIRED);

    return 0;
    }

  /*
   * Detach the list of jobs from the SubmitQueue.  They will be tacked
   * back onto the end of the list once the special jobs have been moved
   * to the head.
   */
  submitjobs = schd_SubmitQueue->queue->jobs;

  schd_SubmitQueue->queue->jobs = NULL;

  /*
   * Any jobs queued on the special queue are now moved to the tail of the
   * newly empty SubmitQueue list.  Mark the jobs as high priority and
   * waiting.
   */
  for (job = queue->jobs; job != NULL; job = nextjob)
    {

    /*
     * Keep track of the next job -- the next pointer on this job will
     * be modified by the schd_move_job_to() function.  Also keep a
     * pointer to the last job in the list.
     */
    nextjob = job->next;
    specialtail = job;

    if (job->state != 'Q')
      continue;

    job->flags |= (JFLAGS_WAITING | JFLAGS_PRIORITY);

    /*
     * Move the job from the special queue to the tail of the submit
     * queue.  This keeps the counts of the queued jobs correct in both
     * queues.
     */
    schd_move_job_to(job, schd_SubmitQueue->queue);

    fixedup ++;
    }

  /*
   * Now that the submit queue has the list of all queued jobs from the
   * special queue, attach the original submit jobs onto the tail of the
   * special job list.
   */
  specialtail->next = submitjobs;

  DBPRT(("%s: fixed up %d jobs.\n", id, fixedup));

  return (fixedup);
  }
Exemplo n.º 3
0
/*
 * Given a list of jobs, ordered from most-eligible to least-eligible to
 * run, attempt to place as many of them as possible into the queues listed
 * in qlist.  Jobs that will not currently fit are ignored (i.e. the entire
 * list is processed, running each job in order that fits).
 *
 * Jobs are run by run_job_on() as they are found on the list.  Resources
 * must be supplied because the jobs are tested against them in
 * 'schd_resource_limits()', and they are updated by sched_run_jobs_on().
 *
 * This function returns the number of jobs run, or -1 on error.
 */
int
schd_pack_queues(Job *jobs, QueueList *qlist, char *reason)
  {
  char   *id = "schd_pack_queues";
  Job    *job, *nextjob;
  QueueList *qptr;
  Queue *queue, *firstfit;
  int    allfull, jobsrun, rerun;
  char   str[PBS_MAXHOSTNAME + 32];

  jobsrun = 0;

  DBPRT(("%s: scheduling queues", id));

  for (qptr = qlist; qptr != NULL; qptr = qptr->next)
    {
    DBPRT((" %s@%s", qptr->queue->qname, qptr->queue->exechost));
    }

  DBPRT((".\n"));

  if (jobs == NULL)
    {
    DBPRT(("No jobs available for QueueList %s%s - all done!",
           qlist->queue->qname, qlist->next ? " ..." : ""));
    return (0);
    }

  /* This is the first run through this part of the code. */
  rerun = 0;

  /*
   * Consider jobs from the list of queues.  For each job, if it appears
   * eligible to run, try to find a queue on which to place it.
   *
   * Note that schd_run_job_on() may remove the job from the list, so
   * this function must keep track of the current job's next pointer.
   */

run_job_list:

  for (job = jobs; job != NULL; job = nextjob)
    {
    nextjob = job->next;

    /* Ignore any non-queued jobs in the list. */

    if (job->state != 'Q')
      continue;

    /*
     * Would this job cause the user to exceed group's current allocation?
     */
    if (schd_ENFORCE_ALLOCATION && schd_TimeNow >= schd_ENFORCE_ALLOCATION)
      {
      if (job->group != NULL)
        {
        if (schd_is_over_alloc(job->group))
          {

          /*
           * schd_reject_over_alloc() will delete the job from
           * PBS, and also from the queue's job list.
           */
          if (schd_reject_over_alloc(job))
            {
            (void)sprintf(log_buffer,
                          "reject_over_alloc() failed for job %s\n",
                          job->jobid);
            log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                       id, log_buffer);
            }

          continue;
          }
        }
      else
        {
        (void)sprintf(log_buffer,
                      "ENFORCE ALLOCATION set, but job %s has no group field",
                      job->jobid);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   log_buffer);

        continue;
        }
      }

    /* See if any queues are available for scheduling now. */
    allfull = 1;  /* Assume all queues are full to start. */

    for (qptr = qlist; qptr != NULL; qptr = qptr->next)
      {

      if ((qptr->queue->flags & QFLAGS_FULL == 0) &&
          ((qptr->queue->rsrcs == NULL) ||
           (!schd_evaluate_system(qptr->queue->rsrcs, reason))))
        {
        DBPRT(("%s: evaluate_system: %s\n", id,
               qptr->queue->rsrcs ? reason : "No resources"));
        DBPRT(("%s: Marking queue %s@%s full\n", id,
               qptr->queue->qname, qptr->queue->exechost));
        qptr->queue->flags |= QFLAGS_FULL;
        }

      /*
       * If an empty queue has not yet been found, check this one.
       * if this queue is not full, then all queues are not full.
       * Note this and continue - if all queues are full at the
       * end of this exercise, we will give up.
       */
      if (allfull)
        if (!schd_check_queue_limits(qptr->queue, NULL))
          allfull = 0;
      }

    /*
     * Check each queue in the list in order, to see if this job
     * will fit, and if it should be run now.
     */
    firstfit = NULL; /* Haven't found the first queue it fits. */

    for (qptr = qlist; qptr != NULL; qptr = qptr->next)
      {
      queue = qptr->queue;
      /*
       * If this is the first queue that this job will fit in, then
       * note it.  This is the "best fit" queue (hopefully) and the
       * job comment should not be modified unless the comment refers
       * to this queue.
       *
       */

      if (!schd_job_fits_queue(job, queue, reason))
        continue;

      /*
       * If this job has a user access control list, check that this
       * job can be allowed in it.
       */
      if (queue->useracl && (queue->flags & QFLAGS_USER_ACL))
        {
        if (!schd_useracl_okay(job, queue, reason))
          {
          DBPRT(("%s: %s %s\n", id, job->jobid, reason));
          continue;
          }
        }

      if (!firstfit)
        firstfit = queue;

      /*
       * Check that this job will not overrun a dedicated time.  The '0'
       * indicates that we are interested in "now".
       */
      if (schd_ENFORCE_DEDTIME && schd_TimeNow >= schd_ENFORCE_DEDTIME)
        {
        if (!schd_dedicated_can_run(job, queue, 0, reason))
          {
          if (firstfit == queue)
            schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);

          continue;
          }
        }

      /*
       * Check primetime limits.  If it is primetime now, and the job
       * will complete before primetime, limit it to 1 hour.  If it will
       * overrun primetime, be sure that the amount that falls within
       * primetime will not violate the primetime limit.
       * Special jobs are not subject to primetime walltime limits.
       */
      if (schd_ENFORCE_PRIME_TIME &&
          (schd_TimeNow >= schd_ENFORCE_PRIME_TIME) &&
          (schd_SMALL_JOB_MAX <= 0))
        {
        if (!(job->flags & JFLAGS_PRIORITY))
          {
          if (schd_primetime_limits(job, queue, 0, reason))
            {
            schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);

            /* Note that a job could have run if it were not pt. */

            if (!rerun)
              queue->flags |= QFLAGS_NPT_JOBS;

            continue;
            }
          }
        }

      /*
       * Check that this job will complete before the beginning of
       * nonprime. The '0' indicates that we are interested in "now".
       */
      if (schd_NONPRIME_DRAIN_SYS)
        {
        /* Note: Special jobs are not subject to this restriction */
        if (!(job->flags & JFLAGS_PRIORITY))
          {
          if (!schd_finish_before_np(job, queue, 0, reason))
            {
            schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);

            /* Note that a job could have run if it were not pt. */

            if (!rerun)
              queue->flags |= QFLAGS_NPT_JOBS;

            continue;
            }
          }
        }

      /*
       * Check that the queue is actually available to pack jobs
       * into.  Although it was checked above, the above test is
       * very inexpensive, so it's not a big deal to do it again.
       */

      if (schd_check_queue_limits(queue, reason))
        {
        if (firstfit == queue)
          schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);

        continue;
        }

      /*
       * Check this job against the execution resource limits.
       * There is no point to going any further if this job would
       * overrun the system limits.
       * It is possible for the size of the queues to be larger than
       * the available resources on the machine (i.e. a node board
       * goes down).  Make sure that jobs are not considered that will
       * not be able to run within the system's current resources.
       */

      if (!schd_resources_avail(job, queue->rsrcs, reason) ||
          schd_resource_limits(job, queue->rsrcs, reason))
        {
        if (firstfit == queue)
          schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);

        /* Continue to the next job. */
        continue;
        }

      /*
       * Check that this job will complete before the beginning of
       * nonprime. The '0' indicates that we are interested in "now".
       * Note: Special jobs are not subject to this restriction.
       */
      if (schd_NONPRIME_DRAIN_SYS && !(job->flags & JFLAGS_PRIORITY))
        {
        if (!schd_finish_before_np(job, queue, 0, reason))
          {
          schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);
          continue;
          }
        }

      /*
       * If the queue is being drained, don't run the job in it unless
       * (1) there are unreserved resources available, or
       * (2) it would *not* increase the time it will take to drain
       * the queue. Note that drain_by is the absolute time, while
       * the job->time_left is relative to now.
       */

      /* (1) are there unreserved nodes available?
       */
      if (job->nodes >
          (queue->nodes_max - (queue->nodes_assn + queue->nodes_rsvd)))
        {
        /* no, so (2) see if we can backfill with this job... */

        if (queue->flags & QFLAGS_DRAINING)
          {
          if ((schd_TimeNow + job->walltime) > queue->drain_by)
            {
            (void)sprintf(reason, "Queue %s is being drained for high-priority job.",
                          queue->qname);

            if (firstfit == queue)
              schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);

            continue;
            }
          }
        }

      /*
       * Queue can accept a job, provided the job does not overrun the
       * queue limits.
       */
      if (schd_user_limits(job, queue, reason))
        {
        if (firstfit == queue)
          schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);

        continue;
        }

      /*
       * Check for fragmentation in the queue, and don't run it if so.
       * fragment_okay() returns -1 if queue is nonsensical, 1 if this
       * job would induce or perpetuate fragmentation.  If this job is
       * waiting, we don't care - just run it.
       */
      if (schd_AVOID_FRAGS && !(job->flags & JFLAGS_WAITING))
        {
        if (!schd_fragment_okay(job, queue, reason))
          {
          if (firstfit == queue)
            schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL);

          continue;
          }
        }

      /*
       * Found a queue on which this job can run.  Attempt to run it.
       */
      break;
      }

    /*
     * If the job was unable to fit on all of the queues, go on to the
     * next one.  For "strict" packing, make this a 'break' and it will
     * stop processing jobs when it finds the first one that does not
     * fit.
     */
    if (!qptr)
      {
      /*
       * If the job did not fit in any of the provided queues, assume
       * that the queue it wants is not available.  Provide a comment,
       * even if it is sort of vague.
       */
      if (firstfit == NULL)
        {
        schd_comment_job(job, schd_JobMsg[NO_RESOURCES],
                         JOB_COMMENT_OPTIONAL);
        }

      continue;
      }

    /* There is a queue free enough to run this job. */
    if (schd_run_job_on(job, qptr->queue, qptr->queue->exechost,
                        SET_JOB_COMMENT))
      {
      (void)sprintf(log_buffer,
                    "Unable to run batch job %s on queue %s",
                    job->jobid, qptr->queue->qname);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

      return (-1);
      }

    /*
     * Account for the job's impact on the queue.
     * This includes subtracting the job's resource requests from the
     * queue's available resources.
     */

    schd_charge_job(job, queue, queue->rsrcs);

    jobsrun ++;
    }

  /*
   * Adjust the observance of primetime on the queues, if necessary.  Do
   * it only if the queue is observing primetime, it has been idle for a
   * while(*), it's close enough(*) to primetime, and there were some jobs
   * that could have been run if primetime had been not-observed.
   */
  if (!rerun &&
      (schd_ENFORCE_PRIME_TIME && schd_TimeNow >= schd_ENFORCE_PRIME_TIME) &&
      schd_NP_DRAIN_BACKTIME > 0 &&
      schd_prime_time(0) &&
      schd_secs_til_nonprime(0) <= schd_NP_DRAIN_BACKTIME)
    {
    for (qptr = qlist; qptr != NULL; qptr = qptr->next)
      {
      queue = qptr->queue;

      /* Were there any jobs that could have run if it were non-pt? */

      if (!(queue->flags & QFLAGS_NPT_JOBS))
        continue;

      if (queue->running)
        continue;

      /* Already disabled or idle - ignore this queue. */
      if (!queue->observe_pt)
        continue;

      /* If a minimum idle time is given, check it. */
      if ((schd_NP_DRAIN_IDLETIME > 0) &&
          (schd_TimeNow - queue->idle_since) <= schd_NP_DRAIN_IDLETIME)
        continue;

      /*
       * Queue has been idle for some time.  Start non-primetime early
       * so those jobs that were refused can start running now.
       */
      (void)sprintf(log_buffer,
                    "Turning off prime-time enforcement on queue %s\n",
                    queue->qname);

      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

      queue->observe_pt = 0;

      rerun ++;
      }

    if (rerun)
      {
      (void)sprintf(log_buffer,
                    "Prime-time enforcement adjusted - reconsidering active jobs.");
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      goto run_job_list;
      }
    }

  return (jobsrun);
  }
Exemplo n.º 4
0
static int
schedule_restart(Job *joblist)
  {
  char   *id = "schedule_restart";
  Job    *job, *nextjob;
  QueueList *qptr;
  int     found, changed;
  int     local_errno = 0;

  changed = found = 0;

  for (job = joblist; job != NULL; job = nextjob)
    {
    nextjob = job->next;

    if (job->state != 'Q')
      continue;

    /*
     * See if the job is queued on one of the batch queues.  If not,
     * go on to the next job.
     */
    for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next)
      if (strcmp(qptr->queue->qname, job->qname) == 0)
        break;

    if (qptr == NULL)
      continue;

    found++;

    if (schd_SCHED_RESTART_ACTION == SCHD_RESTART_RERUN)
      {
      (void)sprintf(log_buffer, "Restart job '%s' on queue '%s'.",
                    job->jobid, job->qname);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                 id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      schd_comment_job(job, schd_JobMsg[JOB_RESTARTED],
                       JOB_COMMENT_REQUIRED);

      if (schd_run_job_on(job, job->queue, schd_SCHED_HOST,
                          LEAVE_JOB_COMMENT))
        {
        (void)sprintf(log_buffer,
                      "Unable to run job '%s' on queue '%s'.", job->jobid,
                      job->qname);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   log_buffer);
        }
      else
        changed ++;

      }
    else /* (SCHED_RESTART_ACTION == SCHD_RESTART_RESUBMIT) */
      {
      if (schd_TEST_ONLY)
        {
        DBPRT(("%s: would have moved %s back to queue %s\n", id,
               job->jobid, schd_SubmitQueue->queue->qname));
        }
      else
        {
        /* Move the job back to its originating queue. */
        if (pbs_movejob_err(connector, job->jobid, job->oqueue, NULL, &local_errno) != 0)
          {
          (void)sprintf(log_buffer,
                        "failed to move %s to queue %s, %d", job->jobid,
                        job->oqueue, local_errno);
          log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                     log_buffer);
          DBPRT(("%s: %s\n", id, log_buffer));
          }
        else
          {
          (void)sprintf(log_buffer,
                        "Requeued job '%s' on queue '%s'.", job->jobid,
                        job->oqueue);
          log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                     id, log_buffer);
          DBPRT(("%s: %s\n", id, log_buffer));
          schd_comment_job(job, schd_JobMsg[JOB_RESUBMITTED],
                           JOB_COMMENT_REQUIRED);
          changed ++;
          }
        }
      }
    }

  if (found)
    {
    if (schd_SCHED_RESTART_ACTION == SCHD_RESTART_RERUN)
      {
      (void)sprintf(log_buffer,
                    "Re-ran %d jobs (of %d) found queued on run queues.\n",
                    changed, found);
      }
    else
      {
      (void)sprintf(log_buffer,
                    "Moved %d queued jobs (of %d) from run queues back to '%s'.\n",
                    changed, found, schd_SubmitQueue->queue->qname);
      }

    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    DBPRT(("%s: %s\n", id, log_buffer));
    }

  return (changed);
  }
Exemplo n.º 5
0
int
schd_run_job_on(Job *job, Queue *destq, char *exechost, int set_comment)
  {
  char   *id = "schd_run_job_on";
  char    reason[128], tmp_word[20];
  char   *date;
  Queue  *srcq = NULL;
  int     ret = 0;
  int     local_errno = 0;

  /* Get the datestamp from 'ctime()'.  Remove the trailing '\n'. */
  date = ctime(&schd_TimeNow);
  date[strlen(date) - 1] = '\0';

  if (set_comment)
    {
    sprintf(reason, "Started on %s", date);

    if (job->flags & JFLAGS_PRIORITY)
      {
      strcat(reason, " (EXPRESS/high priority job)");
      }

    if (job->flags & JFLAGS_WAITING)
      {
      strcat(reason, " (long-waiting job)");
      }

    schd_comment_job(job, reason, JOB_COMMENT_REQUIRED);
    }

  /* If this is NOT a suspended job... */
  if (!(job->flags & JFLAGS_SUSPENDED))
    {

    /*
     * If a destination Queue is provided, and it is different from the
     * source queue, then ask PBS to move the job to that queue before
     * running it.
     */
    srcq = job->queue;

    /*
     * Move the job from its queue to the specified run queue.
     */

    if ((destq != NULL) && (strcmp(destq->qname, srcq->qname) != 0))
      {
      if (pbs_movejob_err(connector, job->jobid, destq->qname, NULL, &local_errno))
        {
        (void)sprintf(log_buffer, "move job %s to queue %s failed, %d",
                      job->jobid, destq->qname, local_errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                   id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));
        return (-1);
        }

      schd_move_job_to(job, destq);
      }

    /*
    * Give the job handle (JOBID) to PBS to run.
    */
    if (pbs_runjob_err(connector, job->jobid, exechost, NULL, &local_errno))
      {
      (void)sprintf(log_buffer, "failed start job %s on queue %s@%s, %d",
                    job->jobid, destq->qname, exechost, local_errno);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      /*
       * Running failed! Move the job back to the source queue (if
       * applicable) before returning. This prevents jobs being left
       * in execution queues.
       */

      if (srcq)
        {
        DBPRT(("Attempting to move job %s back to queue %s\n",
               job->jobid, srcq->qname));

        if (pbs_movejob_err(connector, job->jobid, srcq->qname, NULL, &local_errno))
          {
          (void)sprintf(log_buffer,
                        "failed to move job %s back to queue %s, %d",
                        job->jobid, srcq->qname, local_errno);
          log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                     log_buffer);
          DBPRT(("%s: %s\n", id, log_buffer));
          }

        schd_move_job_to(job, srcq);
        }

      return (-1);
      }

    strcpy(tmp_word, "started");
    }
  else    /* it IS a suspended job */
    {

    schd_move_job_to(job, destq);
    ret = pbs_sigjob(connector, job->jobid, "resume", NULL);

    if (ret)
      {
      sprintf(log_buffer, "resume of job %s FAILED (%d)",
              job->jobid, ret);
      return (-1);
      }

    job->flags &= ~JFLAGS_SUSPENDED;

    strcpy(tmp_word, "resumed");
    }

  /* PBS accepted the job (and presumably will run it). Log the fact. */
  (void)sprintf(log_buffer, "job %s %s on %s@%s", job->jobid, tmp_word,
                destq->qname, exechost);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  DBPRT(("%s: %s\n", id, log_buffer));

  /*
   * Change the state of the local representation of the job to "Running".
   */
  job->state = 'R';

  /*
   * Account for the job on this queue's statistics.  'queued' will be
   * bumped up if the queued job was moved to a new destination queue.
   */

  job->queue->queued --;

  job->queue->running ++;

  /* The queue is no longer idle.  Unset the idle timer. */
  job->queue->idle_since = 0;

  return (0);    /* Job successfully started. */
  }