/* * Determine if a job *can* run in this queue. This is distinct from if * it *should* be run in the queue. * * A job *can* fit in a queue if its requested resources are not greater * than the queue's maximums. * * A job *should* be run only if its requested resources do not exceed the * queue's *available* resources. */ int schd_job_fits_queue(Job *job, Queue *queue, char *reason) { /* char *id = "schd_job_fits_queue"; */ /* * Compare the job's requested resources against the queue's limits. */ if ((queue->wallt_min != UNSPECIFIED) && (job->walltime < queue->wallt_min)) { if (reason) (void)sprintf(reason, "Does not meet queue '%s' walltime minimum (%s).", queue->qname, schd_sec2val(queue->wallt_min)); return (0); } if ((queue->wallt_max != UNSPECIFIED) && (job->walltime > queue->wallt_max)) { if (reason) (void)sprintf(reason, "Would exceed queue '%s' walltime limit (%s).", queue->qname, schd_sec2val(queue->wallt_max)); return (0); } if ((queue->ncpus_min != UNSPECIFIED) && (job->ncpus < queue->ncpus_min)) { if (reason) (void)sprintf(reason, "Does not meet queue '%s' CPU minimum (%d).", queue->qname, queue->ncpus_min); return (0); } if ((queue->ncpus_max != UNSPECIFIED) && (job->ncpus > queue->ncpus_max)) { if (reason) (void)sprintf(reason, "Would exceed queue '%s' CPU limit (%d).", queue->qname, queue->ncpus_max); return (0); } if ((queue->mem_min != UNSPECIFIED) && (job->memory < queue->mem_min)) { if (reason) (void)sprintf(reason, "Does not meet queue '%s' memory minimum (%ld).", queue->qname, (long)queue->mem_min); return (0); } if ((queue->mem_max != UNSPECIFIED) && (job->memory > queue->mem_max)) { if (reason) (void)sprintf(reason, "Would exceed queue '%s' memory limit (%ld).", queue->qname, (long)queue->mem_max); return (0); } if ((queue->speed != UNSPECIFIED) && (job->speed != UNSPECIFIED) && (job->speed > queue->speed)) { if (reason) (void)sprintf(reason, "Host %s is too slow (%d MHz) to fill request (%d MHz).", queue->qname, queue->speed, job->speed); return (0); } if ((queue->featureA != NULL) && (job->featureA != NULL) && (!strcmp(queue->featureA, job->featureA))) { if (reason) (void)sprintf(reason, "Requested %s %s not available on Queue %s.", FEATURE_A, job->featureA, queue->qname); return (0); } if ((queue->featureB != NULL) && (job->featureB != NULL) && (!strcmp(queue->featureB, job->featureB))) { if (reason) (void)sprintf(reason, "Requested %s %s not available on Queue %s.", FEATURE_B, job->featureB, queue->qname); return (0); } if ((queue->featureC != NULL) && (job->featureC != NULL) && (!strcmp(queue->featureC, job->featureC))) { if (reason) (void)sprintf(reason, "Requested %s %s not available on Queue %s.", FEATURE_C, job->featureC, queue->qname); return (0); } if ((queue->featureD != UNSPECIFIED) && (job->featureD != UNSPECIFIED) && (job->featureD > queue->featureD)) { if (reason) (void)sprintf(reason, "Requested %s (%ld) exceeds queue %s limit (%ld).", FEATURE_D, job->featureD, queue->qname, queue->featureD); return (0); } if ((queue->featureE != UNSPECIFIED) && (job->featureE != UNSPECIFIED) && (job->featureE > queue->featureE)) { if (reason) (void)sprintf(reason, "Requested %s (%ld) exceeds queue %s limit (%ld).", FEATURE_E, job->featureE, queue->qname, queue->featureE); return (0); } if ((queue->featureF != UNSPECIFIED) && (job->featureF != UNSPECIFIED) && (job->featureF > queue->featureF)) { if (reason) (void)sprintf(reason, "Requested %s (%ld) exceeds queue %s limit (%ld).", FEATURE_F, job->featureF, queue->qname, queue->featureF); return (0); } if ((queue->featureG != UNSPECIFIED) && (job->featureG != UNSPECIFIED) && (job->featureG > queue->featureG)) { if (reason) (void)sprintf(reason, "Requested %s (%d) exceeds queue %s limit (%d).", FEATURE_G, job->featureG, queue->qname, queue->featureG); return (0); } if ((queue->featureH != UNSPECIFIED) && (job->featureH != UNSPECIFIED) && (job->featureH > queue->featureH)) { if (reason) (void)sprintf(reason, "Requested %s (%d) exceeds queue %s limit (%d).", FEATURE_H, job->featureH, queue->qname, queue->featureH); return (0); } if ((queue->featureI != UNSPECIFIED) && (job->featureI != UNSPECIFIED) && (job->featureI > queue->featureI)) { if (reason) (void)sprintf(reason, "Requested %s (%d) exceeds queue %s limit (%d).", FEATURE_I, job->featureI, queue->qname, queue->featureI); return (0); } if ((job->arch != NULL) && (queue->rsrcs->arch != NULL)) { if (strcmp(job->arch, queue->rsrcs->arch) && reason) { (void)sprintf(reason, "Host %s is wrong architecture (%s) to fill request (%s).", queue->qname, queue->rsrcs->arch, job->arch); return (0); } } /* * The job _can_ fit in this queue. This doesn't mean it *will* fit * in the queue as it currently exists, but it *would* fit if the queue * was completely empty. */ return (1); }
void schd_dump_queue(Queue *queue, int dumpjobs) { Job *job; UserAcl *aclent; char num[32]; char *ptr; int columns; #ifdef NODEMASK Bitfield all_ones; #endif /* NODEMASK */ DBPRT(("\nQueue '%s@%s': %sabled/%sed", queue->qname, queue->exechost, (queue->flags & QFLAGS_DISABLED) ? "Dis" : "En", (queue->flags & QFLAGS_STOPPED) ? "Stopp" : "Start")); DBPRT(("%s%s%s%s ", (queue->flags & QFLAGS_FULL) ? "/Full" : "", (queue->flags & QFLAGS_MAXRUN) ? "/MaxRun" : "", (queue->flags & QFLAGS_DRAINING) ? "/Drain" : "", (queue->flags & QFLAGS_USER_ACL) ? "/ACL" : "")); if (schd_ENFORCE_PRIME_TIME && schd_TimeNow >= schd_ENFORCE_PRIME_TIME) DBPRT(("obsv_pt:%s", queue->observe_pt ? "Yes" : "No")); DBPRT(("\n")); #ifdef NODEMASK if (queue->flags & QFLAGS_NODEMASK) { BITFIELD_SETALL(&all_ones); DBPRT((" Nodes: %s\n", schd_format_nodemask(&queue->queuemask, &all_ones))); DBPRT((" Avail: %s\n", schd_format_nodemask(&queue->queuemask, &queue->availmask))); } #endif /* NODEMASK */ sprintf(num, "%d", queue->running); DBPRT((" Job counts: %s running, ", queue->running != UNSPECIFIED ? num : "???")); sprintf(num, "%d", queue->maxrun); DBPRT(("%s max ", queue->maxrun != UNSPECIFIED ? num : "???")); sprintf(num, "%d", queue->userrun); DBPRT(("(%s/user), ", queue->userrun != UNSPECIFIED ? num : "???")); sprintf(num, "%d", queue->queued); DBPRT(("%s queued\n", queue->queued != UNSPECIFIED ? num : "???")); sprintf(num, "%d", queue->nodes_assn); DBPRT((" Nodes:%s/", queue->nodes_assn != UNSPECIFIED ? num : "???")); sprintf(num, "%d", queue->nodes_max); DBPRT(("%s", queue->nodes_max != UNSPECIFIED ? num : "???")); sprintf(num, "%d", queue->nodes_default); DBPRT((" [def %s, ", queue->nodes_default != UNSPECIFIED ? num : "???")); sprintf(num, "%d", queue->nodes_min); DBPRT(("min %s], ", queue->nodes_min != UNSPECIFIED ? num : "???")); DBPRT(("wallt max %s ", (queue->wallt_max != UNSPECIFIED) ? schd_sec2val(queue->wallt_max) : "???")); DBPRT(("[def %s ", queue->wallt_default != UNSPECIFIED ? schd_sec2val(queue->wallt_default) : "???")); DBPRT(("min %s]\n", (queue->wallt_min != UNSPECIFIED) ? schd_sec2val(queue->wallt_min) : "???")); if (queue->empty_by) /* ctime(2) returns a '\n'-terminated string, so no additional '\n' */ DBPRT((" Queue will empty by: %s", ctime(&queue->empty_by))); if (queue->idle_since) /* ctime(2) returns a '\n'-terminated string, so no additional '\n' */ DBPRT((" Queue idle since: %s", ctime(&queue->idle_since))); if (queue->useracl && (queue->flags & QFLAGS_USER_ACL)) { DBPRT((" User ACL: ")); columns = 9; /* Start with 9 columns for 'User ACL: ' */ for (aclent = queue->useracl; aclent != NULL; aclent = aclent->next) { columns += strlen(aclent->user) + 1; if (columns >= 72) { DBPRT(("\n ")); columns = 0; } DBPRT(("%s%s", ((columns == 0) || (aclent == queue->useracl)) ? "" : "/", aclent->user)); } DBPRT(("\n")); } if (dumpjobs && queue->jobs) { DBPRT((" Jobs: ")); columns = 5; /* Start with 5 columns for 'Jobs: ' */ for (job = queue->jobs; job != NULL; job = job->next) { /* Just the job numbers -- but be sure to put the '.' back! */ if ((ptr = strchr(job->jobid, '.')) != NULL) * ptr = '\0'; columns += strlen(job->jobid) + 3; /* 3 == job->state + '/' + ' ' */ if (columns >= 72) { DBPRT(("\n ")); columns = 0; } DBPRT((" %s/", job->jobid)); DBPRT(("%c", (job->flags & JFLAGS_PRIORITY) ? '!' : (job->flags & JFLAGS_WAITING) ? 'W' : job->state)); if (ptr != NULL) *ptr = '.'; } DBPRT(("\n")); } }
static int schedule_jobs(QueueList *queues, Job *jobs, char *reason) { char *id = "schedule_jobs"; int numran; Job *job; Queue *shortest; int priority_to_1st = 1; /* * Since the sorting code has provided an order in which the jobs should * be run, attempt to honor that order by treating the first job on the * list as our first priority. This amounts to draining the queue in * order to run that job, if necessary. * * If the job has been waiting too long, find the smallest queue in which * the job will fit, and consider its expected run time. If the waiting * job cannot run when the queue has emptied, then go on to the next. * However, if there are jobs running on the queue, it is possible that * this queue could support the waiting job if it were started draining * now. When enough jobs had exited, the waiting job would be runnable. * In order to determine if this is true, walk through the list of jobs, * which are sorted in order of completion (from soonest to last), and * find how many resources would be available after that job finished. * If there is space, calculate what time it will be when that many jobs * have completed, and see if the primetime limits apply at that time. * If the job fits in the primetime limits at that time, then start the * queue draining. If it will not fit after all jobs have been tested, * then give up on this queue and go on to the next. * * If a queue was found that requires draining, mark it for draining. * * After the waiting job handling has completed, collect a list of * all the available execution queues, and place it into the pointer * given to this function by the caller. */ for (job = jobs; job != NULL; job = job->next) { if (job->state != 'Q') continue; if (!priority_to_1st && !(job->flags & JFLAGS_WAITING)) continue; DBPRT(("%s: job %s is %s (eligible for %s, needs %d nodes)\n", id, job->jobid, priority_to_1st ? "FIRSTJOB" : (job->flags & JFLAGS_PRIORITY) ? "SPECIAL" : "WAITING", schd_sec2val(job->eligible), job->nodes)); /* * Find the smallest, shortest-wait queue in which this job will * fit. If it is empty, great. If not, mark it to be drained, * in anticipation of the job being run soon. Note that the queue * drain_by time should only be shortened - it doesn't make sense * to push it out. */ shortest = schd_find_drain(queues, job); if (shortest) { /* * If there are no jobs running in the queue, then unset the * draining flag (if present), so that the queue will be * available for this job. * * If there are running jobs, set the draining flag, and * adjust the empty_by value to be the expected time when * the job will first become runnable. */ if (shortest->running == 0) { shortest->flags &= ~QFLAGS_DRAINING; } else { /* If running jobs, empty_by should be non-zero. */ if (shortest->drain_by <= shortest->empty_by) { shortest->flags |= QFLAGS_DRAINING; DBPRT(("%s: shortest queue %s now draining, drain_by %s", id, shortest->qname, ctime(&shortest->drain_by))); } } } /* * We have looked at (and possibly arranged for special treatment * of) the first job on the list. Now only look for special or * waiting jobs. */ priority_to_1st = 0; } numran = schd_pack_queues(jobs, queues, reason); if (numran < 0) { (void)sprintf(log_buffer, "sched_pack_queues() failed!"); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } return (numran); }
/* print_config(): Dump the current config to the log */ static void print_config(void) { char *id = "print_config"; QueueList *qptr; if (schd_TEST_ONLY) { (void)sprintf(log_buffer, "%-24s = %s", "TEST_ONLY", schd_bool2val(schd_TEST_ONLY)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } if (schd_SubmitQueue) { (void)sprintf(log_buffer, "%-24s = %s@%s", "SUBMIT_QUEUE", schd_SubmitQueue->queue->qname, schd_SubmitQueue->queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } if (schd_BatchQueues) { for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next) { (void)sprintf(log_buffer, "%-24s = %s@%s", (qptr == schd_BatchQueues) ? "BATCH_QUEUES" : "", qptr->queue->qname, qptr->queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } } if (schd_ExternQueues) { for (qptr = schd_ExternQueues; qptr != NULL; qptr = qptr->next) { (void)sprintf(log_buffer, "%-24s = %s@%s", (qptr == schd_ExternQueues) ? "EXTERN_QUEUES" : "", qptr->queue->qname, qptr->queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } } if (schd_SpecialQueue) { (void)sprintf(log_buffer, "%-24s = %s@%s", "SPECIAL_QUEUE", schd_SpecialQueue->queue->qname, schd_SpecialQueue->queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } if (schd_DedQueues) { for (qptr = schd_DedQueues; qptr != NULL; qptr = qptr->next) { (void)sprintf(log_buffer, "%-24s = %s@%s", (qptr == schd_DedQueues) ? "DEDICATED_QUEUES" : "", qptr->queue->qname, qptr->queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } } (void)sprintf(log_buffer, "%-24s = %s", "ENFORCE_PRIME_TIME", schd_booltime2val(schd_ENFORCE_PRIME_TIME)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "PRIME_TIME_WALLT_LIMIT", schd_sec2val(schd_PT_WALLT_LIMIT)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); if (schd_PT_SMALL_NODE_LIMIT) { (void)sprintf(log_buffer, "%-24s = %d", "PRIME_TIME_SMALL_NODE_LIMIT", schd_PT_SMALL_NODE_LIMIT); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "PRIME_TIME_SMALL_WALLT_LIMIT", schd_sec2val(schd_PT_SMALL_WALLT_LIMIT)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } (void)sprintf(log_buffer, "%-24s = %s", "NONPRIME_DRAIN_SYS", schd_bool2val(schd_NONPRIME_DRAIN_SYS)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); if (schd_NP_DRAIN_BACKTIME > 0) { (void)sprintf(log_buffer, "%-24s = %s", "NP_DRAIN_BACKTIME", schd_sec2val(schd_NP_DRAIN_BACKTIME)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } if (schd_NP_DRAIN_IDLETIME > 0) { (void)sprintf(log_buffer, "%-24s = %s", "NP_DRAIN_IDLETIME", schd_sec2val(schd_NP_DRAIN_IDLETIME)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } (void)sprintf(log_buffer, "%-24s = %s", "WALLT_LIMIT_LARGE_JOB", schd_sec2val(schd_WALLT_LARGE_LIMIT)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); if (schd_SMALL_JOB_MAX) { (void)sprintf(log_buffer, "%-24s = %d", "SMALL_JOB_MAX", schd_SMALL_JOB_MAX); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "WALLT_LIMIT_SMALL_JOB", schd_sec2val(schd_WALLT_SMALL_LIMIT)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } (void)sprintf(log_buffer, "%-24s = %s", "PRIME_TIME_START", schd_sec2val(schd_PRIME_TIME_START)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "PRIME_TIME_END", schd_sec2val(schd_PRIME_TIME_END)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %d%%", "TARGET_LOAD_PCT", schd_TARGET_LOAD_PCT); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = -%d%%,+%d%%", "TARGET_LOAD_VARIANCE", schd_TARGET_LOAD_MINUS, schd_TARGET_LOAD_PLUS); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %d", "HIGH_SYSTIME", schd_HIGH_SYSTIME); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %d", "MAX_JOBS", schd_MAX_JOBS); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %d", "MIN_JOBS", schd_MIN_JOBS); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "MAX_QUEUED_TIME", schd_sec2val(schd_MAX_QUEUED_TIME)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "SMALL_QUEUED_TIME", schd_sec2val(schd_SMALL_QUEUED_TIME)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %d", "INTERACTIVE_LONG_WAIT", schd_INTERACTIVE_LONG_WAIT); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %d", "MAX_DEDICATED_JOBS", schd_MAX_DEDICATED_JOBS); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "SORT_BY_PAST_USAGE", schd_bool2val(schd_SORT_BY_PAST_USAGE)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "ENFORCE_ALLOCATION", schd_booltime2val(schd_ENFORCE_ALLOCATION)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "ENFORCE_DEDICATED_TIME", schd_booltime2val(schd_ENFORCE_DEDTIME)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "SCHED_ACCT_DIR", schd_SCHED_ACCT_DIR ? schd_SCHED_ACCT_DIR : "[null]"); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "DEDICATED_TIME_COMMAND", schd_DEDTIME_COMMAND ? schd_DEDTIME_COMMAND : "[null]"); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "SYSTEM_NAME", schd_SYSTEM_NAME ? schd_SYSTEM_NAME : "[null]"); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "SERVER_HOST", schd_SERVER_HOST ? schd_SERVER_HOST : "[null]"); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "SCHED_HOST", schd_SCHED_HOST ? schd_SCHED_HOST : "[null]"); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, "%-24s = %s", "SCHED_RESTART_ACTION", (schd_SCHED_RESTART_ACTION == SCHD_RESTART_NONE ? "NONE" : (schd_SCHED_RESTART_ACTION == SCHD_RESTART_RESUBMIT ? "RESUBMIT" : (schd_SCHED_RESTART_ACTION == SCHD_RESTART_RERUN ? "RERUN" : "?")))); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); if (schd_AVOID_FRAGS) { (void)sprintf(log_buffer, "%-24s = %s", "AVOID_FRAGMENTATION", schd_bool2val(schd_AVOID_FRAGS)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } if (schd_JOB_DUMPFILE) { (void)sprintf(log_buffer, "%-24s = %s", "SORTED_JOB_DUMPFILE", schd_JOB_DUMPFILE); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } if (schd_MANAGE_HPM) { (void)sprintf(log_buffer, "%-24s = %s", "MANAGE_HPM_COUNTERS", schd_bool2val(schd_MANAGE_HPM)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); if (schd_REVOKE_HPM) { (void)sprintf(log_buffer, "%-24s = %s", "REVOKE_HPM_COUNTERS", schd_bool2val(schd_MANAGE_HPM)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } } if (schd_FAKE_MACH_MULT != 1) { (void)sprintf(log_buffer, "%-24s = %d", "FAKE_MACHINE_MULT", schd_FAKE_MACH_MULT); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } }
static int dump_sorted_jobs(FILE *dump, Job *joblist) { Job *job; int njobs; int elig_mesg = 0; #define DUMP_JID_LEN 16 #define DUMP_STATE_LEN 1 #define DUMP_OWNER_LEN 8 #define DUMP_NODES_LEN 3 #define DUMP_WALLT_LEN 8 #define DUMP_WAITT_LEN 8 #define DUMP_ELIGI_LEN 9 /* time plus '*' if wait != eligible */ #define DUMP_FLAGS_LEN 18 char jid[DUMP_JID_LEN + 1]; char owner[DUMP_OWNER_LEN + 1]; char wallt[DUMP_WALLT_LEN + 1]; char waitt[DUMP_WAITT_LEN + 1]; char eligi[DUMP_ELIGI_LEN + 1]; char flags[DUMP_FLAGS_LEN + 1]; fprintf(dump, " %*s %*s %*s %*s %*s %*s %*s %*s\n", -DUMP_JID_LEN, "Job ID", -DUMP_STATE_LEN, "S", -DUMP_OWNER_LEN, "Owner", -DUMP_NODES_LEN, "Nds", -DUMP_WALLT_LEN, "Walltime", -DUMP_WAITT_LEN, "Q'd for", -DUMP_ELIGI_LEN, "Eligible", -DUMP_FLAGS_LEN, "Flags"); fprintf(dump, " %*s %c %*s %*s %*s %*s %*s %*s\n", -DUMP_JID_LEN, "----------------", '-', -DUMP_OWNER_LEN, "--------", -DUMP_NODES_LEN, "---", -DUMP_WALLT_LEN, "--------", -DUMP_WAITT_LEN, "--------", -DUMP_ELIGI_LEN, "---------", -DUMP_FLAGS_LEN, "------------------"); for (njobs = 0, job = joblist; job != NULL; job = job->next) { njobs++; strncpy(jid, job->jobid, DUMP_JID_LEN); strncpy(owner, job->owner, DUMP_OWNER_LEN); strcpy(wallt, schd_sec2val(job->walltime)); strcpy(waitt, schd_sec2val(job->time_queued)); strcpy(eligi, schd_sec2val(job->eligible)); if (job->time_queued != job->eligible) { strcat(eligi, "*"); elig_mesg ++; } flags[0] = '\0'; /* Watch length of 'flags[]' array! */ if (job->flags & JFLAGS_INTERACTIVE) strcat(flags, "Int "); /* "Priority" jobs are marked as being waiting, even if they're new. */ if (job->flags & JFLAGS_PRIORITY) strcat(flags, "High "); else if (job->flags & JFLAGS_WAITING) strcat(flags, "Wait "); if (job->flags & JFLAGS_DEDICATED) strcat(flags, "Ded "); if (job->flags & JFLAGS_NEEDS_HPM) strcat(flags, "HPM "); /* Trim off the trailing space if any flags were listed. */ if (flags[0] != '\0') flags[strlen(flags) - 1] = '\0'; fprintf(dump, " %*s %c %*s %*d %*s %*s %*s %*s\n", -DUMP_JID_LEN, jid, job->state, -DUMP_OWNER_LEN, job->owner, -DUMP_NODES_LEN, job->nodes, -DUMP_WALLT_LEN, wallt, -DUMP_WAITT_LEN, waitt, -DUMP_ELIGI_LEN, eligi, -DUMP_FLAGS_LEN, flags); } fprintf(dump, " Total: %d job%s\n\n", njobs, (njobs == 1) ? "" : "s"); if (elig_mesg) { fprintf(dump, "Jobs marked with a ``*'' have an etime different " "from their ctime.\n\n"); } return (njobs); }
static int make_job_dump(char *dumpfile) { char *id = "make_job_dump"; FILE *dump; QueueList *qptr; /* * Attempt to open the dump file, creating it if necessary. It should * be truncated each time this runs, so don't open with append mode. */ if ((dump = fopen(dumpfile, "w")) == NULL) { (void)sprintf(log_buffer, "Cannot write to %s: %s\n", dumpfile, strerror(errno)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } /* Head the file with a timestamp. */ fprintf(dump, "%s\n", ctime(&schd_TimeNow)); /* Include the version string compiled into the scheduler binary. */ fprintf(dump, "%s\n", schd_VersionString); /* And some more useful information about the state of the world. */ fprintf(dump, "Scheduler running on '%s'\n", schd_ThisHost); fprintf(dump, "Prime-time is "); if (schd_ENFORCE_PRIME_TIME && schd_TimeNow >= schd_ENFORCE_PRIME_TIME) { fprintf(dump, "from %s ", schd_sec2val(schd_PRIME_TIME_START)); fprintf(dump, "to %s.\n", schd_sec2val(schd_PRIME_TIME_END)); } else fprintf(dump, "not enforced.\n"); fprintf(dump, "\nJOBS LISTED IN ORDER FROM HIGHEST TO LOWEST PRIORITY\n\n"); /* Now dump the jobs queued on the various queues, in order of priority. */ qptr = schd_SubmitQueue; if (qptr->queue->jobs) { fprintf(dump, "Jobs on submit queue '%s':\n", qptr->queue->qname); dump_sorted_jobs(dump, qptr->queue->jobs); } for (qptr = schd_ExternQueues; qptr != NULL; qptr = qptr->next) { if (qptr->queue->jobs) { fprintf(dump, "Jobs on external queue '%s':\n", qptr->queue->qname); dump_sorted_jobs(dump, qptr->queue->jobs); } } for (qptr = schd_DedQueues; qptr != NULL; qptr = qptr->next) { if (qptr->queue->jobs) { fprintf(dump, "Jobs on dedicated queue '%s':\n", qptr->queue->qname); dump_sorted_jobs(dump, qptr->queue->jobs); } } if (fclose(dump)) { (void)sprintf(log_buffer, "close(%s): %s\n", dumpfile, strerror(errno)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } return (0); }
static Job * reject_unrunnables(Job *jobs) { Job *this, *nextjob; char tmpstr[300]; for (this = jobs; this != NULL; this = nextjob) { nextjob = this->next; if (!schd_job_can_queue(this)) { /* * If this job is at the head of the list, we must deal with * it specially. We need to advance the list pointer forward * so that further scheduling will not be done on the now * bogus job. Advance 'jobs', and make 'nextjob' the 'next' * pointer for the new head of the list. */ if (this == jobs) { jobs = jobs->next; nextjob = jobs ? jobs->next : NULL; } DBPRT(("job %s does not fit on any execution queue - reject\n", this->jobid)); schd_reject_job(this, "Job will not fit on any execution queue.\n" "\n" "Use 'qstat -q' to get execution queue limits.\n"); continue; } /* * Enforce maximum job limits * "Big" jobs are given a maximum walltime limit (WALLT_LARGE_LIMIT) * that differs from "small" jobs. (Job size distinction based on * the size specified by SMALL_JOB_MAX.) We need to reject any job * which violate these limits. * * Special-priority jobs are not affected. */ if (!(this->flags & JFLAGS_PRIORITY) && (schd_SMALL_JOB_MAX > 0)) { if (this->nodes <= schd_SMALL_JOB_MAX) { if (this->walltime > schd_WALLT_SMALL_LIMIT) { if (this == jobs) { jobs = jobs->next; nextjob = jobs ? jobs->next : NULL; } DBPRT(("job %s exceeds Small job walltime limit - reject\n", this->jobid)); sprintf(tmpstr, "Job exceeds maximum walltime limit (%s) policy\n" "\tfor small jobs (1 - %d nodes).\n", schd_sec2val(schd_WALLT_SMALL_LIMIT), schd_SMALL_JOB_MAX); schd_reject_job(this, tmpstr); continue; } } else { if (this->walltime > schd_WALLT_LARGE_LIMIT) { if (this == jobs) { jobs = jobs->next; nextjob = jobs ? jobs->next : NULL; } DBPRT(("job %s exceeds Large job walltime limit - reject\n", this->jobid)); sprintf(tmpstr, "Job exceeds maximum walltime limit (%s) policy\n" "\tfor large jobs (%d+ nodes).\n", schd_sec2val(schd_WALLT_LARGE_LIMIT), schd_SMALL_JOB_MAX + 1); schd_reject_job(this, tmpstr); continue; } } } } return (jobs); }
/* * Determine if a job *can* run in this queue. This is distinct from if * it *should* be run in the queue. * * A job *can* fit in a queue if its requested resources are not greater * than the queue's maximums. * * A job *should* be run only if its requested resources do not exceed the * queue's *available* resources. */ int schd_job_fits_queue(Job *job, Queue *queue, char *reason) { /* char *id = "schd_job_fits_queue"; */ /* Is the System architecture correct for this job? */ if (job->arch != NULL) { if (strcmp(job->arch, queue->rsrcs->arch)) { return (0); } } /* * Compare the job's requested resources against the queue's limits. */ if ((queue->wallt_min != UNSPECIFIED) && (job->walltime < queue->wallt_min)) { if (reason) (void)sprintf(reason, "Does not meet queue '%s' walltime minimum (%s).", queue->qname, schd_sec2val(queue->wallt_min)); return (0); } if ((queue->wallt_max != UNSPECIFIED) && (job->walltime > queue->wallt_max)) { if (reason) (void)sprintf(reason, "Exceeds queue '%s' walltime limit (%s).", queue->qname, schd_sec2val(queue->wallt_max)); return (0); } if ((queue->ncpus_min != UNSPECIFIED) && (job->ncpus < queue->ncpus_min)) { if (reason) (void)sprintf(reason, "Does not meet queue '%s' CPU minimum (%d).", queue->qname, queue->ncpus_min); return (0); } if ((queue->ncpus_max != UNSPECIFIED) && (job->ncpus > queue->ncpus_max)) { if (reason) (void)sprintf(reason, "Exceeds queue '%s' CPU limit (%d).", queue->qname, queue->ncpus_max); return (0); } if ((queue->mem_min != UNSPECIFIED) && (job->memory < queue->mem_min)) { if (reason) (void)sprintf(reason, "Does not meet queue '%s' memory minimum (%ul).", queue->qname, queue->mem_min); return (0); } if ((queue->mem_max != UNSPECIFIED) && (job->memory > queue->mem_max)) { if (reason) (void)sprintf(reason, "Exceeds queue '%s' memory limit (%ul).", queue->qname, queue->mem_max); return (0); } /* * The job _can_ fit in this queue. This doesn't mean it *will* fit * in the queue as it currently exists, but it *would* fit if the queue * was completely empty. */ return (1); }
/* * Determine if a job *can* run in this queue. This is distinct from if * it *should* be run in the queue. * * A job *can* fit in a queue if its requested resources are not greater * than the queue's maximums. * * A job *should* be run only if its requested resources do not exceed the * queue's *available* resources. */ int schd_job_fits_queue(Job *job, Queue *queue, char *reason) { /* char *id = "schd_job_fits_queue"; */ /* check if this job has to run on a specific host (e.g. it was * previously checkpointed on this host. */ if (job->exechost) if (strcmp(job->exechost, queue->exechost)) return(0); /* * Compare the job's requested resources against the queue's limits. */ if ((queue->wallt_min != UNSPECIFIED) && (job->walltime < queue->wallt_min)) { if (reason) (void)sprintf(reason, "Does not meet queue '%s' walltime minimum (%s).", queue->qname, schd_sec2val(queue->wallt_min)); return (0); } if ((queue->wallt_max != UNSPECIFIED) && (job->walltime > queue->wallt_max)) { if (reason) (void)sprintf(reason, "Would exceed queue '%s' walltime limit (%s).", queue->qname, schd_sec2val(queue->wallt_max)); return (0); } if ((queue->ncpus_min != UNSPECIFIED) && (job->ncpus < queue->ncpus_min)) { if (reason) (void)sprintf(reason, "Does not meet queue '%s' CPU minimum (%d).", queue->qname, queue->ncpus_min); return (0); } if ((queue->ncpus_max != UNSPECIFIED) && (job->ncpus > queue->ncpus_max)) { if (reason) (void)sprintf(reason, "Would exceed queue '%s' CPU limit (%d).", queue->qname, queue->ncpus_max); return (0); } if ((queue->mem_min != UNSPECIFIED) && (job->memory < queue->mem_min)) { if (reason) (void)sprintf(reason, "Does not meet queue '%s' memory minimum (%ul).", queue->qname, queue->mem_min); return (0); } if ((queue->mem_max != UNSPECIFIED) && (job->memory > queue->mem_max)) { if (reason) (void)sprintf(reason, "Would exceed queue '%s' memory limit (%ul).", queue->qname, queue->mem_max); return (0); } if ((queue->rsrcs->mem_total != UNSPECIFIED) && (job->memory > queue->rsrcs->mem_total)) { if (reason) (void)sprintf(reason, "Exceeds host '%s' memory limit (%ul).", queue->exechost, queue->rsrcs->mem_total); return (0); } /* * The job _can_ fit in this queue. This doesn't mean it *will* fit * in the queue as it currently exists, but it *would* fit if the queue * was completely empty. */ return (1); }