/* * Given a list of jobs, ordered from most-eligible to least-eligible to * run, attempt to place as many of them as possible into the queues listed * in qlist. Jobs that will not currently fit are ignored (i.e. the entire * list is processed, running each job in order that fits). * * Jobs are run by run_job_on() as they are found on the list. Resources * must be supplied because the jobs are tested against them in * 'schd_resources_avail()', and they are updated by sched_run_jobs_on(). * * This function returns the number of jobs run, or -1 on error. */ int schd_pack_queues(Job *jobs, QueueList *qlist, char *reason) { char *id = "schd_pack_queues"; Job *job, *nextjob; QueueList *qptr; Queue *queue; int allfull, jobsrun; jobsrun = 0; DBPRT(("%s: scheduling queues", id)); for (qptr = qlist; qptr != NULL; qptr = qptr->next) { if (!(qptr->queue->flags & QFLAGS_DISABLED) && !(qptr->queue->flags & QFLAGS_NODEDOWN)) DBPRT((" %s@%s", qptr->queue->qname, qptr->queue->exechost)); } DBPRT((".\n")); if (jobs == NULL) { DBPRT(("No jobs available for QueueList %s%s - all done!", qlist->queue->qname, qlist->next ? " ..." : "")); return (0); } /* * Consider jobs from the list of queues. For each job, if it appears * eligible to run, try to find a queue on which to place it. * * Note that schd_run_job_on() may remove the job from the list, so * this function must keep track of the current job's next pointer. */ for (job = jobs; job != NULL; job = nextjob) { nextjob = job->next; /* Ignore any non-queued jobs in the list. */ if (job->state != 'Q') continue; strcpy(reason, "Requested architecture not currently available"); queue = schd_static_backfill(job, qlist, reason); if (queue == NULL) { /* Hummm, there is not a queue currently available in which * to run this job. But resources have been researved for * this job by the static_backfill() routine, and the job * comment contains the estimated start time and the reason * why it can't run now. Comment the job and go on to the * next job on the list. */ schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } /* Attempt to run this job on the above-supplied queue */ if (schd_run_job_on(job, queue, queue->exechost, SET_JOB_COMMENT)) { (void)sprintf(log_buffer, "Unable to run batch job %s on queue %s", job->jobid, queue->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)) } else {
/* * Jobs queued on the special queue should be treated as highest priority. * They are sorted onto the top of the list of jobs that is created in the * usersort.c code. That sorted list is then split out onto each of the * queues, so that each queue has a list of the jobs it "owns". The jobs * then carry a backpointer to their owner queue. * * This works really nicely, since all the information about each queue * (including the list of jobs queued/running/etc on it) lives right on * the Queue structure. It is a clean, elegant and fully general solution. * * An unfortunate side effect of this "demultiplexing" is that jobs that * were marked "special" end up claimed by the SpecialQueue. The scheduler * looks for jobs only on the SubmitQueue queue, so it never notices that * there are special jobs enqueued. In order to address this, any jobs on * the SpecialQueue are marked "waiting/high priority", and placed at the * head of the list of jobs in the SubmitQueue. * * This seems like an evil hack at first, and it arguably is. However, if * there are multiple submission queues, it is relatively simple to support * them by simply causing them to be inserted in the submit queue's list. */ static int fixup_special(void) { char *id = "fixup_special"; Job *job, *submitjobs, *nextjob, *specialtail; Queue *queue; char comment[MAX_TXT + 1]; int fixedup = 0, justcomment = 0; /* * Sanity check -- this function shouldn't be called if there is no * valid SpecialQueue. */ if (schd_SpecialQueue == NULL || schd_SpecialQueue->queue == NULL) { DBPRT(("%s: special code called but no special queue defined!\n", id)); return (-1); } queue = schd_SpecialQueue->queue; if (queue->jobs == NULL) { DBPRT(("%s: no jobs on special queue '%s'. Ignoring.\n", id, queue->qname)); return (0); } /* * See if the special queue has anything to do, and if it will allow * anything to be done to it. */ if (queue->flags & QFLAGS_DISABLED) { (void)sprintf(comment, "Queue %s not enabled", queue->qname); justcomment ++; } if (queue->flags & QFLAGS_STOPPED) { (void)sprintf(comment, "Queue %s not started", queue->qname); justcomment ++; } /* * If the jobs on the speical queue should just be commented, do so and * return 0 -- no jobs were fixed up. */ if (justcomment) { for (job = queue->jobs; job != NULL; job = job->next) schd_comment_job(job, comment, JOB_COMMENT_REQUIRED); return 0; } /* * Detach the list of jobs from the SubmitQueue. They will be tacked * back onto the end of the list once the special jobs have been moved * to the head. */ submitjobs = schd_SubmitQueue->queue->jobs; schd_SubmitQueue->queue->jobs = NULL; /* * Any jobs queued on the special queue are now moved to the tail of the * newly empty SubmitQueue list. Mark the jobs as high priority and * waiting. */ for (job = queue->jobs; job != NULL; job = nextjob) { /* * Keep track of the next job -- the next pointer on this job will * be modified by the schd_move_job_to() function. Also keep a * pointer to the last job in the list. */ nextjob = job->next; specialtail = job; if (job->state != 'Q') continue; job->flags |= (JFLAGS_WAITING | JFLAGS_PRIORITY); /* * Move the job from the special queue to the tail of the submit * queue. This keeps the counts of the queued jobs correct in both * queues. */ schd_move_job_to(job, schd_SubmitQueue->queue); fixedup ++; } /* * Now that the submit queue has the list of all queued jobs from the * special queue, attach the original submit jobs onto the tail of the * special job list. */ specialtail->next = submitjobs; DBPRT(("%s: fixed up %d jobs.\n", id, fixedup)); return (fixedup); }
/* * Given a list of jobs, ordered from most-eligible to least-eligible to * run, attempt to place as many of them as possible into the queues listed * in qlist. Jobs that will not currently fit are ignored (i.e. the entire * list is processed, running each job in order that fits). * * Jobs are run by run_job_on() as they are found on the list. Resources * must be supplied because the jobs are tested against them in * 'schd_resource_limits()', and they are updated by sched_run_jobs_on(). * * This function returns the number of jobs run, or -1 on error. */ int schd_pack_queues(Job *jobs, QueueList *qlist, char *reason) { char *id = "schd_pack_queues"; Job *job, *nextjob; QueueList *qptr; Queue *queue, *firstfit; int allfull, jobsrun, rerun; char str[PBS_MAXHOSTNAME + 32]; jobsrun = 0; DBPRT(("%s: scheduling queues", id)); for (qptr = qlist; qptr != NULL; qptr = qptr->next) { DBPRT((" %s@%s", qptr->queue->qname, qptr->queue->exechost)); } DBPRT((".\n")); if (jobs == NULL) { DBPRT(("No jobs available for QueueList %s%s - all done!", qlist->queue->qname, qlist->next ? " ..." : "")); return (0); } /* This is the first run through this part of the code. */ rerun = 0; /* * Consider jobs from the list of queues. For each job, if it appears * eligible to run, try to find a queue on which to place it. * * Note that schd_run_job_on() may remove the job from the list, so * this function must keep track of the current job's next pointer. */ run_job_list: for (job = jobs; job != NULL; job = nextjob) { nextjob = job->next; /* Ignore any non-queued jobs in the list. */ if (job->state != 'Q') continue; /* * Would this job cause the user to exceed group's current allocation? */ if (schd_ENFORCE_ALLOCATION && schd_TimeNow >= schd_ENFORCE_ALLOCATION) { if (job->group != NULL) { if (schd_is_over_alloc(job->group)) { /* * schd_reject_over_alloc() will delete the job from * PBS, and also from the queue's job list. */ if (schd_reject_over_alloc(job)) { (void)sprintf(log_buffer, "reject_over_alloc() failed for job %s\n", job->jobid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } continue; } } else { (void)sprintf(log_buffer, "ENFORCE ALLOCATION set, but job %s has no group field", job->jobid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); continue; } } /* See if any queues are available for scheduling now. */ allfull = 1; /* Assume all queues are full to start. */ for (qptr = qlist; qptr != NULL; qptr = qptr->next) { if ((qptr->queue->flags & QFLAGS_FULL == 0) && ((qptr->queue->rsrcs == NULL) || (!schd_evaluate_system(qptr->queue->rsrcs, reason)))) { DBPRT(("%s: evaluate_system: %s\n", id, qptr->queue->rsrcs ? reason : "No resources")); DBPRT(("%s: Marking queue %s@%s full\n", id, qptr->queue->qname, qptr->queue->exechost)); qptr->queue->flags |= QFLAGS_FULL; } /* * If an empty queue has not yet been found, check this one. * if this queue is not full, then all queues are not full. * Note this and continue - if all queues are full at the * end of this exercise, we will give up. */ if (allfull) if (!schd_check_queue_limits(qptr->queue, NULL)) allfull = 0; } /* * Check each queue in the list in order, to see if this job * will fit, and if it should be run now. */ firstfit = NULL; /* Haven't found the first queue it fits. */ for (qptr = qlist; qptr != NULL; qptr = qptr->next) { queue = qptr->queue; /* * If this is the first queue that this job will fit in, then * note it. This is the "best fit" queue (hopefully) and the * job comment should not be modified unless the comment refers * to this queue. * */ if (!schd_job_fits_queue(job, queue, reason)) continue; /* * If this job has a user access control list, check that this * job can be allowed in it. */ if (queue->useracl && (queue->flags & QFLAGS_USER_ACL)) { if (!schd_useracl_okay(job, queue, reason)) { DBPRT(("%s: %s %s\n", id, job->jobid, reason)); continue; } } if (!firstfit) firstfit = queue; /* * Check that this job will not overrun a dedicated time. The '0' * indicates that we are interested in "now". */ if (schd_ENFORCE_DEDTIME && schd_TimeNow >= schd_ENFORCE_DEDTIME) { if (!schd_dedicated_can_run(job, queue, 0, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } } /* * Check primetime limits. If it is primetime now, and the job * will complete before primetime, limit it to 1 hour. If it will * overrun primetime, be sure that the amount that falls within * primetime will not violate the primetime limit. * Special jobs are not subject to primetime walltime limits. */ if (schd_ENFORCE_PRIME_TIME && (schd_TimeNow >= schd_ENFORCE_PRIME_TIME) && (schd_SMALL_JOB_MAX <= 0)) { if (!(job->flags & JFLAGS_PRIORITY)) { if (schd_primetime_limits(job, queue, 0, reason)) { schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); /* Note that a job could have run if it were not pt. */ if (!rerun) queue->flags |= QFLAGS_NPT_JOBS; continue; } } } /* * Check that this job will complete before the beginning of * nonprime. The '0' indicates that we are interested in "now". */ if (schd_NONPRIME_DRAIN_SYS) { /* Note: Special jobs are not subject to this restriction */ if (!(job->flags & JFLAGS_PRIORITY)) { if (!schd_finish_before_np(job, queue, 0, reason)) { schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); /* Note that a job could have run if it were not pt. */ if (!rerun) queue->flags |= QFLAGS_NPT_JOBS; continue; } } } /* * Check that the queue is actually available to pack jobs * into. Although it was checked above, the above test is * very inexpensive, so it's not a big deal to do it again. */ if (schd_check_queue_limits(queue, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } /* * Check this job against the execution resource limits. * There is no point to going any further if this job would * overrun the system limits. * It is possible for the size of the queues to be larger than * the available resources on the machine (i.e. a node board * goes down). Make sure that jobs are not considered that will * not be able to run within the system's current resources. */ if (!schd_resources_avail(job, queue->rsrcs, reason) || schd_resource_limits(job, queue->rsrcs, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); /* Continue to the next job. */ continue; } /* * Check that this job will complete before the beginning of * nonprime. The '0' indicates that we are interested in "now". * Note: Special jobs are not subject to this restriction. */ if (schd_NONPRIME_DRAIN_SYS && !(job->flags & JFLAGS_PRIORITY)) { if (!schd_finish_before_np(job, queue, 0, reason)) { schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } } /* * If the queue is being drained, don't run the job in it unless * (1) there are unreserved resources available, or * (2) it would *not* increase the time it will take to drain * the queue. Note that drain_by is the absolute time, while * the job->time_left is relative to now. */ /* (1) are there unreserved nodes available? */ if (job->nodes > (queue->nodes_max - (queue->nodes_assn + queue->nodes_rsvd))) { /* no, so (2) see if we can backfill with this job... */ if (queue->flags & QFLAGS_DRAINING) { if ((schd_TimeNow + job->walltime) > queue->drain_by) { (void)sprintf(reason, "Queue %s is being drained for high-priority job.", queue->qname); if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } } } /* * Queue can accept a job, provided the job does not overrun the * queue limits. */ if (schd_user_limits(job, queue, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } /* * Check for fragmentation in the queue, and don't run it if so. * fragment_okay() returns -1 if queue is nonsensical, 1 if this * job would induce or perpetuate fragmentation. If this job is * waiting, we don't care - just run it. */ if (schd_AVOID_FRAGS && !(job->flags & JFLAGS_WAITING)) { if (!schd_fragment_okay(job, queue, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } } /* * Found a queue on which this job can run. Attempt to run it. */ break; } /* * If the job was unable to fit on all of the queues, go on to the * next one. For "strict" packing, make this a 'break' and it will * stop processing jobs when it finds the first one that does not * fit. */ if (!qptr) { /* * If the job did not fit in any of the provided queues, assume * that the queue it wants is not available. Provide a comment, * even if it is sort of vague. */ if (firstfit == NULL) { schd_comment_job(job, schd_JobMsg[NO_RESOURCES], JOB_COMMENT_OPTIONAL); } continue; } /* There is a queue free enough to run this job. */ if (schd_run_job_on(job, qptr->queue, qptr->queue->exechost, SET_JOB_COMMENT)) { (void)sprintf(log_buffer, "Unable to run batch job %s on queue %s", job->jobid, qptr->queue->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); return (-1); } /* * Account for the job's impact on the queue. * This includes subtracting the job's resource requests from the * queue's available resources. */ schd_charge_job(job, queue, queue->rsrcs); jobsrun ++; } /* * Adjust the observance of primetime on the queues, if necessary. Do * it only if the queue is observing primetime, it has been idle for a * while(*), it's close enough(*) to primetime, and there were some jobs * that could have been run if primetime had been not-observed. */ if (!rerun && (schd_ENFORCE_PRIME_TIME && schd_TimeNow >= schd_ENFORCE_PRIME_TIME) && schd_NP_DRAIN_BACKTIME > 0 && schd_prime_time(0) && schd_secs_til_nonprime(0) <= schd_NP_DRAIN_BACKTIME) { for (qptr = qlist; qptr != NULL; qptr = qptr->next) { queue = qptr->queue; /* Were there any jobs that could have run if it were non-pt? */ if (!(queue->flags & QFLAGS_NPT_JOBS)) continue; if (queue->running) continue; /* Already disabled or idle - ignore this queue. */ if (!queue->observe_pt) continue; /* If a minimum idle time is given, check it. */ if ((schd_NP_DRAIN_IDLETIME > 0) && (schd_TimeNow - queue->idle_since) <= schd_NP_DRAIN_IDLETIME) continue; /* * Queue has been idle for some time. Start non-primetime early * so those jobs that were refused can start running now. */ (void)sprintf(log_buffer, "Turning off prime-time enforcement on queue %s\n", queue->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); queue->observe_pt = 0; rerun ++; } if (rerun) { (void)sprintf(log_buffer, "Prime-time enforcement adjusted - reconsidering active jobs."); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); goto run_job_list; } } return (jobsrun); }
static int schedule_restart(Job *joblist) { char *id = "schedule_restart"; Job *job, *nextjob; QueueList *qptr; int found, changed; int local_errno = 0; changed = found = 0; for (job = joblist; job != NULL; job = nextjob) { nextjob = job->next; if (job->state != 'Q') continue; /* * See if the job is queued on one of the batch queues. If not, * go on to the next job. */ for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) if (strcmp(qptr->queue->qname, job->qname) == 0) break; if (qptr == NULL) continue; found++; if (schd_SCHED_RESTART_ACTION == SCHD_RESTART_RERUN) { (void)sprintf(log_buffer, "Restart job '%s' on queue '%s'.", job->jobid, job->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); schd_comment_job(job, schd_JobMsg[JOB_RESTARTED], JOB_COMMENT_REQUIRED); if (schd_run_job_on(job, job->queue, schd_SCHED_HOST, LEAVE_JOB_COMMENT)) { (void)sprintf(log_buffer, "Unable to run job '%s' on queue '%s'.", job->jobid, job->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } else changed ++; } else /* (SCHED_RESTART_ACTION == SCHD_RESTART_RESUBMIT) */ { if (schd_TEST_ONLY) { DBPRT(("%s: would have moved %s back to queue %s\n", id, job->jobid, schd_SubmitQueue->queue->qname)); } else { /* Move the job back to its originating queue. */ if (pbs_movejob_err(connector, job->jobid, job->oqueue, NULL, &local_errno) != 0) { (void)sprintf(log_buffer, "failed to move %s to queue %s, %d", job->jobid, job->oqueue, local_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } else { (void)sprintf(log_buffer, "Requeued job '%s' on queue '%s'.", job->jobid, job->oqueue); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); schd_comment_job(job, schd_JobMsg[JOB_RESUBMITTED], JOB_COMMENT_REQUIRED); changed ++; } } } } if (found) { if (schd_SCHED_RESTART_ACTION == SCHD_RESTART_RERUN) { (void)sprintf(log_buffer, "Re-ran %d jobs (of %d) found queued on run queues.\n", changed, found); } else { (void)sprintf(log_buffer, "Moved %d queued jobs (of %d) from run queues back to '%s'.\n", changed, found, schd_SubmitQueue->queue->qname); } log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } return (changed); }
int schd_run_job_on(Job *job, Queue *destq, char *exechost, int set_comment) { char *id = "schd_run_job_on"; char reason[128], tmp_word[20]; char *date; Queue *srcq = NULL; int ret = 0; int local_errno = 0; /* Get the datestamp from 'ctime()'. Remove the trailing '\n'. */ date = ctime(&schd_TimeNow); date[strlen(date) - 1] = '\0'; if (set_comment) { sprintf(reason, "Started on %s", date); if (job->flags & JFLAGS_PRIORITY) { strcat(reason, " (EXPRESS/high priority job)"); } if (job->flags & JFLAGS_WAITING) { strcat(reason, " (long-waiting job)"); } schd_comment_job(job, reason, JOB_COMMENT_REQUIRED); } /* If this is NOT a suspended job... */ if (!(job->flags & JFLAGS_SUSPENDED)) { /* * If a destination Queue is provided, and it is different from the * source queue, then ask PBS to move the job to that queue before * running it. */ srcq = job->queue; /* * Move the job from its queue to the specified run queue. */ if ((destq != NULL) && (strcmp(destq->qname, srcq->qname) != 0)) { if (pbs_movejob_err(connector, job->jobid, destq->qname, NULL, &local_errno)) { (void)sprintf(log_buffer, "move job %s to queue %s failed, %d", job->jobid, destq->qname, local_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } schd_move_job_to(job, destq); } /* * Give the job handle (JOBID) to PBS to run. */ if (pbs_runjob_err(connector, job->jobid, exechost, NULL, &local_errno)) { (void)sprintf(log_buffer, "failed start job %s on queue %s@%s, %d", job->jobid, destq->qname, exechost, local_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); /* * Running failed! Move the job back to the source queue (if * applicable) before returning. This prevents jobs being left * in execution queues. */ if (srcq) { DBPRT(("Attempting to move job %s back to queue %s\n", job->jobid, srcq->qname)); if (pbs_movejob_err(connector, job->jobid, srcq->qname, NULL, &local_errno)) { (void)sprintf(log_buffer, "failed to move job %s back to queue %s, %d", job->jobid, srcq->qname, local_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } schd_move_job_to(job, srcq); } return (-1); } strcpy(tmp_word, "started"); } else /* it IS a suspended job */ { schd_move_job_to(job, destq); ret = pbs_sigjob(connector, job->jobid, "resume", NULL); if (ret) { sprintf(log_buffer, "resume of job %s FAILED (%d)", job->jobid, ret); return (-1); } job->flags &= ~JFLAGS_SUSPENDED; strcpy(tmp_word, "resumed"); } /* PBS accepted the job (and presumably will run it). Log the fact. */ (void)sprintf(log_buffer, "job %s %s on %s@%s", job->jobid, tmp_word, destq->qname, exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); /* * Change the state of the local representation of the job to "Running". */ job->state = 'R'; /* * Account for the job on this queue's statistics. 'queued' will be * bumped up if the queued job was moved to a new destination queue. */ job->queue->queued --; job->queue->running ++; /* The queue is no longer idle. Unset the idle timer. */ job->queue->idle_since = 0; return (0); /* Job successfully started. */ }