int schd_queue_available(Job *job, Queue *queue, char *reason) { /* * If this queue is missing its resource info, or if its * STOPPED, etc., skip it. */ if (queue->rsrcs == NULL || (queue->flags & QFLAGS_DISABLED) || (queue->flags & QFLAGS_NODEDOWN) || (queue->flags & QFLAGS_STOPPED)) { /* sprintf(reason, "Queue unavailable"); */ return(0); } /* * Check if this job *could* run in this queue or not, based on * queue minimum and maximum limits, architecture, etc. */ if (!schd_job_fits_queue(job, queue, reason)) return(0); /* * If this job has a user access control list, check that this * job can be allowed in it. */ if (queue->useracl && (queue->flags & QFLAGS_USER_ACL)) { if (!schd_useracl_okay(job, queue, reason)) return(0); } return(1); /* okay to run in this queue */ }
int schd_job_can_queue(Job *job) { QueueList *qptr; /* No? Try the batch queues, if any. */ if (schd_BatchQueues) { for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) { if (schd_job_fits_queue(job, qptr->queue, NULL)) return (1); } } /* * No queues were found that could run this job. Reject it, since it * will never be able to run. */ return (0); }
/* * Given a list of jobs, ordered from most-eligible to least-eligible to * run, attempt to place as many of them as possible into the queues listed * in qlist. Jobs that will not currently fit are ignored (i.e. the entire * list is processed, running each job in order that fits). * * Jobs are run by run_job_on() as they are found on the list. Resources * must be supplied because the jobs are tested against them in * 'schd_resource_limits()', and they are updated by sched_run_jobs_on(). * * This function returns the number of jobs run, or -1 on error. */ int schd_pack_queues(Job *jobs, QueueList *qlist, char *reason) { char *id = "schd_pack_queues"; Job *job, *nextjob; QueueList *qptr; Queue *queue, *firstfit; int allfull, jobsrun, rerun; char str[PBS_MAXHOSTNAME + 32]; jobsrun = 0; DBPRT(("%s: scheduling queues", id)); for (qptr = qlist; qptr != NULL; qptr = qptr->next) { DBPRT((" %s@%s", qptr->queue->qname, qptr->queue->exechost)); } DBPRT((".\n")); if (jobs == NULL) { DBPRT(("No jobs available for QueueList %s%s - all done!", qlist->queue->qname, qlist->next ? " ..." : "")); return (0); } /* This is the first run through this part of the code. */ rerun = 0; /* * Consider jobs from the list of queues. For each job, if it appears * eligible to run, try to find a queue on which to place it. * * Note that schd_run_job_on() may remove the job from the list, so * this function must keep track of the current job's next pointer. */ run_job_list: for (job = jobs; job != NULL; job = nextjob) { nextjob = job->next; /* Ignore any non-queued jobs in the list. */ if (job->state != 'Q') continue; /* * Would this job cause the user to exceed group's current allocation? */ if (schd_ENFORCE_ALLOCATION && schd_TimeNow >= schd_ENFORCE_ALLOCATION) { if (job->group != NULL) { if (schd_is_over_alloc(job->group)) { /* * schd_reject_over_alloc() will delete the job from * PBS, and also from the queue's job list. */ if (schd_reject_over_alloc(job)) { (void)sprintf(log_buffer, "reject_over_alloc() failed for job %s\n", job->jobid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } continue; } } else { (void)sprintf(log_buffer, "ENFORCE ALLOCATION set, but job %s has no group field", job->jobid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); continue; } } /* See if any queues are available for scheduling now. */ allfull = 1; /* Assume all queues are full to start. */ for (qptr = qlist; qptr != NULL; qptr = qptr->next) { if ((qptr->queue->flags & QFLAGS_FULL == 0) && ((qptr->queue->rsrcs == NULL) || (!schd_evaluate_system(qptr->queue->rsrcs, reason)))) { DBPRT(("%s: evaluate_system: %s\n", id, qptr->queue->rsrcs ? reason : "No resources")); DBPRT(("%s: Marking queue %s@%s full\n", id, qptr->queue->qname, qptr->queue->exechost)); qptr->queue->flags |= QFLAGS_FULL; } /* * If an empty queue has not yet been found, check this one. * if this queue is not full, then all queues are not full. * Note this and continue - if all queues are full at the * end of this exercise, we will give up. */ if (allfull) if (!schd_check_queue_limits(qptr->queue, NULL)) allfull = 0; } /* * Check each queue in the list in order, to see if this job * will fit, and if it should be run now. */ firstfit = NULL; /* Haven't found the first queue it fits. */ for (qptr = qlist; qptr != NULL; qptr = qptr->next) { queue = qptr->queue; /* * If this is the first queue that this job will fit in, then * note it. This is the "best fit" queue (hopefully) and the * job comment should not be modified unless the comment refers * to this queue. * */ if (!schd_job_fits_queue(job, queue, reason)) continue; /* * If this job has a user access control list, check that this * job can be allowed in it. */ if (queue->useracl && (queue->flags & QFLAGS_USER_ACL)) { if (!schd_useracl_okay(job, queue, reason)) { DBPRT(("%s: %s %s\n", id, job->jobid, reason)); continue; } } if (!firstfit) firstfit = queue; /* * Check that this job will not overrun a dedicated time. The '0' * indicates that we are interested in "now". */ if (schd_ENFORCE_DEDTIME && schd_TimeNow >= schd_ENFORCE_DEDTIME) { if (!schd_dedicated_can_run(job, queue, 0, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } } /* * Check primetime limits. If it is primetime now, and the job * will complete before primetime, limit it to 1 hour. If it will * overrun primetime, be sure that the amount that falls within * primetime will not violate the primetime limit. * Special jobs are not subject to primetime walltime limits. */ if (schd_ENFORCE_PRIME_TIME && (schd_TimeNow >= schd_ENFORCE_PRIME_TIME) && (schd_SMALL_JOB_MAX <= 0)) { if (!(job->flags & JFLAGS_PRIORITY)) { if (schd_primetime_limits(job, queue, 0, reason)) { schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); /* Note that a job could have run if it were not pt. */ if (!rerun) queue->flags |= QFLAGS_NPT_JOBS; continue; } } } /* * Check that this job will complete before the beginning of * nonprime. The '0' indicates that we are interested in "now". */ if (schd_NONPRIME_DRAIN_SYS) { /* Note: Special jobs are not subject to this restriction */ if (!(job->flags & JFLAGS_PRIORITY)) { if (!schd_finish_before_np(job, queue, 0, reason)) { schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); /* Note that a job could have run if it were not pt. */ if (!rerun) queue->flags |= QFLAGS_NPT_JOBS; continue; } } } /* * Check that the queue is actually available to pack jobs * into. Although it was checked above, the above test is * very inexpensive, so it's not a big deal to do it again. */ if (schd_check_queue_limits(queue, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } /* * Check this job against the execution resource limits. * There is no point to going any further if this job would * overrun the system limits. * It is possible for the size of the queues to be larger than * the available resources on the machine (i.e. a node board * goes down). Make sure that jobs are not considered that will * not be able to run within the system's current resources. */ if (!schd_resources_avail(job, queue->rsrcs, reason) || schd_resource_limits(job, queue->rsrcs, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); /* Continue to the next job. */ continue; } /* * Check that this job will complete before the beginning of * nonprime. The '0' indicates that we are interested in "now". * Note: Special jobs are not subject to this restriction. */ if (schd_NONPRIME_DRAIN_SYS && !(job->flags & JFLAGS_PRIORITY)) { if (!schd_finish_before_np(job, queue, 0, reason)) { schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } } /* * If the queue is being drained, don't run the job in it unless * (1) there are unreserved resources available, or * (2) it would *not* increase the time it will take to drain * the queue. Note that drain_by is the absolute time, while * the job->time_left is relative to now. */ /* (1) are there unreserved nodes available? */ if (job->nodes > (queue->nodes_max - (queue->nodes_assn + queue->nodes_rsvd))) { /* no, so (2) see if we can backfill with this job... */ if (queue->flags & QFLAGS_DRAINING) { if ((schd_TimeNow + job->walltime) > queue->drain_by) { (void)sprintf(reason, "Queue %s is being drained for high-priority job.", queue->qname); if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } } } /* * Queue can accept a job, provided the job does not overrun the * queue limits. */ if (schd_user_limits(job, queue, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } /* * Check for fragmentation in the queue, and don't run it if so. * fragment_okay() returns -1 if queue is nonsensical, 1 if this * job would induce or perpetuate fragmentation. If this job is * waiting, we don't care - just run it. */ if (schd_AVOID_FRAGS && !(job->flags & JFLAGS_WAITING)) { if (!schd_fragment_okay(job, queue, reason)) { if (firstfit == queue) schd_comment_job(job, reason, JOB_COMMENT_OPTIONAL); continue; } } /* * Found a queue on which this job can run. Attempt to run it. */ break; } /* * If the job was unable to fit on all of the queues, go on to the * next one. For "strict" packing, make this a 'break' and it will * stop processing jobs when it finds the first one that does not * fit. */ if (!qptr) { /* * If the job did not fit in any of the provided queues, assume * that the queue it wants is not available. Provide a comment, * even if it is sort of vague. */ if (firstfit == NULL) { schd_comment_job(job, schd_JobMsg[NO_RESOURCES], JOB_COMMENT_OPTIONAL); } continue; } /* There is a queue free enough to run this job. */ if (schd_run_job_on(job, qptr->queue, qptr->queue->exechost, SET_JOB_COMMENT)) { (void)sprintf(log_buffer, "Unable to run batch job %s on queue %s", job->jobid, qptr->queue->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); return (-1); } /* * Account for the job's impact on the queue. * This includes subtracting the job's resource requests from the * queue's available resources. */ schd_charge_job(job, queue, queue->rsrcs); jobsrun ++; } /* * Adjust the observance of primetime on the queues, if necessary. Do * it only if the queue is observing primetime, it has been idle for a * while(*), it's close enough(*) to primetime, and there were some jobs * that could have been run if primetime had been not-observed. */ if (!rerun && (schd_ENFORCE_PRIME_TIME && schd_TimeNow >= schd_ENFORCE_PRIME_TIME) && schd_NP_DRAIN_BACKTIME > 0 && schd_prime_time(0) && schd_secs_til_nonprime(0) <= schd_NP_DRAIN_BACKTIME) { for (qptr = qlist; qptr != NULL; qptr = qptr->next) { queue = qptr->queue; /* Were there any jobs that could have run if it were non-pt? */ if (!(queue->flags & QFLAGS_NPT_JOBS)) continue; if (queue->running) continue; /* Already disabled or idle - ignore this queue. */ if (!queue->observe_pt) continue; /* If a minimum idle time is given, check it. */ if ((schd_NP_DRAIN_IDLETIME > 0) && (schd_TimeNow - queue->idle_since) <= schd_NP_DRAIN_IDLETIME) continue; /* * Queue has been idle for some time. Start non-primetime early * so those jobs that were refused can start running now. */ (void)sprintf(log_buffer, "Turning off prime-time enforcement on queue %s\n", queue->qname); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); queue->observe_pt = 0; rerun ++; } if (rerun) { (void)sprintf(log_buffer, "Prime-time enforcement adjusted - reconsidering active jobs."); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); goto run_job_list; } } return (jobsrun); }