/* * Populate the queue struct with the information needed for scheduling; * querying the resource monitor for queue->exechost's information. */ int schd_get_queue_info(Queue *queue) { char *id = "get_queue_info"; if (queue->ncpus_assn == UNSPECIFIED) queue->ncpus_assn = 0; if (queue->mem_assn == UNSPECIFIED) queue->mem_assn = 0; if (queue->running == UNSPECIFIED) queue->running = 0; /* * Get the resources for this queue from the resource monitor (if * available). If the resmom is not accessible, disable the queue. * Don't bother checking if the queue is Stopped. */ if (strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0 && (queue->flags & QFLAGS_STOPPED) == 0) { queue->rsrcs = schd_get_resources(queue->exechost); if (queue->rsrcs != NULL) { /* Account for this queue's resources. */ queue->rsrcs->ncpus_alloc += queue->ncpus_assn; queue->rsrcs->mem_alloc += queue->mem_assn; queue->rsrcs->njobs += queue->running; queue->ncpus_max = (queue->ncpus_max <= queue->rsrcs->ncpus_total ? queue->ncpus_max : queue->rsrcs->ncpus_total); } else { (void)sprintf(log_buffer, "Can't get resources for %s@%s - marking unavailable.", queue->qname, queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); queue->flags |= QFLAGS_NODEDOWN; } } return (0); }
/* * schd_get_queue_limits - query queue information from the server. * * Returns 0 on success, -1 for "fatal errors", and 1 for a transient * error (i.e., the queue failed the sanity checks imposed by the * queue_sanity() function). */ int schd_get_queue_limits(Queue *queue) { char *id = "schd_get_queue_limits"; int moved = 0, istrue; Batch_Status *bs; AttrList *attr; static AttrList alist[] = { {&alist[1], ATTR_start, "", ""}, {&alist[2], ATTR_enable, "", ""}, {&alist[3], ATTR_count, "", ""}, {&alist[4], ATTR_maxuserrun, "", ""}, {&alist[5], ATTR_rescavail, "", ""}, {&alist[6], ATTR_rescassn, "", ""}, {&alist[7], ATTR_rescdflt, "", ""}, {&alist[8], ATTR_rescmax, "", ""}, {&alist[9], ATTR_rescmin, "", ""}, {&alist[10], ATTR_acluren, "", ""}, {&alist[11], ATTR_acluser, "", ""}, {NULL, ATTR_maxrun, "", ""} }; size_t mem_default = UNSPECIFIED; size_t mem_assn = UNSPECIFIED; size_t mem_max = UNSPECIFIED; size_t mem_min = UNSPECIFIED; int cpu_default = UNSPECIFIED; int cpu_assn = UNSPECIFIED; int cpu_max = UNSPECIFIED; int cpu_min = UNSPECIFIED; int nodes_from_cpu, nodes_from_mem; queue->running = UNSPECIFIED; queue->queued = UNSPECIFIED; queue->maxrun = UNSPECIFIED; queue->userrun = UNSPECIFIED; queue->nodes_max = UNSPECIFIED; queue->nodes_min = UNSPECIFIED; queue->nodes_default = UNSPECIFIED; queue->nodes_assn = UNSPECIFIED; queue->nodes_rsvd = UNSPECIFIED; queue->wallt_max = UNSPECIFIED; queue->wallt_min = UNSPECIFIED; queue->wallt_default = UNSPECIFIED; queue->flags = 0; #ifdef NODEMASK BITFIELD_CLRALL(&queue->queuemask); BITFIELD_CLRALL(&queue->availmask); #endif /* NODEMASK */ queue->rsrcs = NULL; if (queue->jobs) { DBPRT(("%s: found jobs on queue '%s'! Freeing them...\n", id, queue->qname)); schd_free_jobs(queue->jobs); } if (queue->useracl) { DBPRT(("%s: found user ACL list on queue '%s'! Freeing it...\n", id, queue->qname)); schd_free_useracl(queue->useracl); } queue->jobs = NULL; queue->useracl = NULL; /* Ask the server for information about the specified queue. */ if ((bs = pbs_statque(connector, queue->qname, alist, NULL)) == NULL) { sprintf(log_buffer, "pbs_statque failed, \"%s\" %d", queue->qname, pbs_errno); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } /* Process the list of attributes returned by the server. */ for (attr = bs->attribs; attr != NULL; attr = attr->next) { /* Is queue started? */ if (!strcmp(attr->name, ATTR_start)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue is not stopped. */ queue->flags &= ~QFLAGS_STOPPED; else queue->flags |= QFLAGS_STOPPED; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } /* Is queue enabled? */ if (!strcmp(attr->name, ATTR_enable)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue is not disabled. */ queue->flags &= ~QFLAGS_DISABLED; else queue->flags |= QFLAGS_DISABLED; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } /* How many jobs are queued and running? */ if (!strcmp(attr->name, ATTR_count)) { queue->queued = schd_how_many(attr->value, SC_QUEUED); queue->running = schd_how_many(attr->value, SC_RUNNING); continue; } /* Queue-wide maximum number of jobs running. */ if (!strcmp(attr->name, ATTR_maxrun)) { queue->maxrun = atoi(attr->value); continue; } /* Per-user maximum number of jobs running. */ if (!strcmp(attr->name, ATTR_maxuserrun)) { queue->userrun = atoi(attr->value); continue; } /* Is there an enabled user access control list on this queue? */ if (!strcmp(attr->name, ATTR_acluren)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue has an ACL */ queue->flags |= QFLAGS_USER_ACL; else queue->flags &= ~QFLAGS_USER_ACL; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } if (!strcmp(attr->name, ATTR_acluser)) { if (queue->useracl) { DBPRT(("queue %s acluser already set!\n", queue->qname)); schd_free_useracl(queue->useracl); } queue->useracl = schd_create_useracl(attr->value); continue; } /* Queue maximum resource usage. */ if (!strcmp(attr->name, ATTR_rescmax)) { if (!strcmp("mem", attr->resource)) { mem_max = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_max = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) { queue->wallt_max = schd_val2sec(attr->value); continue; } #ifdef NODEMASK if (!strcmp("nodemask", attr->resource)) { if (schd_str2mask(attr->value, &queue->queuemask)) { (void)sprintf(log_buffer, "couldn't convert nodemask %s", attr->value); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } else queue->flags |= QFLAGS_NODEMASK; /* Valid nodemask. */ } #endif /* NODEMASK */ continue; } /* Queue minimum resource usage. */ if (!strcmp(attr->name, ATTR_rescmin)) { if (!strcmp("mem", attr->resource)) { mem_min = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_min = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) { queue->wallt_min = schd_val2sec(attr->value); continue; } continue; } /* Queue assigned (in use) resource usage. */ if (!strcmp(attr->name, ATTR_rescassn)) { if (!strcmp("mem", attr->resource)) { mem_assn = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_assn = atoi(attr->value); } continue; } if (!strcmp(attr->name, ATTR_rescdflt)) { if (!strcmp("mem", attr->resource)) { mem_default = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_default = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) queue->wallt_default = schd_val2sec(attr->value); } /* Ignore anything else */ } pbs_statfree(bs); /* * Calculate values for queue node limits, given memory and cpu values. * Note any discrepancies. */ nodes_from_cpu = NODES_FROM_CPU(cpu_default); nodes_from_mem = NODES_FROM_MEM(mem_default); if (nodes_from_cpu != nodes_from_mem) { sprintf(log_buffer, "%s: Queue '%s' default cpu/mem (%d/%s) convert to %d != %d nodes", id, queue->qname, cpu_default, schd_byte2val(mem_default), nodes_from_cpu, nodes_from_mem); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } nodes_from_cpu = NODES_FROM_CPU(cpu_max); nodes_from_mem = NODES_FROM_MEM(mem_max); if (nodes_from_cpu != nodes_from_mem) { sprintf(log_buffer, "%s: Queue '%s' maximum cpu/mem (%d/%s) convert to %d != %d nodes", id, queue->qname, cpu_max, schd_byte2val(mem_max), nodes_from_cpu, nodes_from_mem); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } nodes_from_cpu = NODES_FROM_CPU(cpu_min); nodes_from_mem = NODES_FROM_MEM(mem_min); if (nodes_from_cpu != nodes_from_mem) { sprintf(log_buffer, "%s: Queue '%s' minimum cpu/mem (%d/%s) convert to %d != %d nodes", id, queue->qname, cpu_min, schd_byte2val(mem_min), nodes_from_cpu, nodes_from_mem); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } /* * Note: The assigned cpus and memory need not be exactly the same * node equivalency. */ if ((cpu_default != UNSPECIFIED) && (mem_default != UNSPECIFIED)) queue->nodes_default = NODES_REQD(cpu_default, mem_default); if ((cpu_max != UNSPECIFIED) && (mem_max != UNSPECIFIED)) queue->nodes_max = NODES_REQD(cpu_max, mem_max); if ((cpu_min != UNSPECIFIED) && (mem_min != UNSPECIFIED)) queue->nodes_min = NODES_REQD(cpu_min, mem_min); if ((cpu_assn != UNSPECIFIED) && (mem_assn != UNSPECIFIED)) queue->nodes_assn = NODES_REQD(cpu_assn, mem_assn); /* * Move any jobs on this queue from the global list onto the queue's * list. Keep track of when the longest-running job will end, and set * the 'empty_by' field to that value. Maintain the ordering as it was * in "schd_AllJobs". */ if (schd_AllJobs) moved = queue_claim_jobs(queue, &schd_AllJobs); if (moved < 0) { sprintf(log_buffer, "%s: WARNING! Queue '%s' failed to claim jobs.", id, queue->qname); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } if (queue->nodes_assn == UNSPECIFIED) queue->nodes_assn = 0; if (queue->running == UNSPECIFIED) queue->running = 0; /* * Find out if the queue is idle, and if it was not before, set the idle * time to now. If there are running jobs, the queue is not idle at the * start of this iteration - set idle_since to 0. */ if (queue->running) { queue->idle_since = 0; } else { if (queue->idle_since == 0) queue->idle_since = schd_TimeNow; } /* * Get the resources for this queue from the resource monitor (if * available). If the resmom is not accessible, disable the queue. * If the resources were received okay, compute the available node * masks from the resources and jobs. * Don't bother with resources for the special or submit queues. */ if ((strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0) || ((schd_SpecialQueue != NULL) && (!strcmp(queue->qname, schd_SpecialQueue->queue->qname)))) { queue->rsrcs = schd_get_resources(queue->exechost); if (queue->rsrcs != NULL) { /* Account for this queue's resources. */ queue->rsrcs->nodes_alloc += queue->nodes_assn; queue->rsrcs->njobs += queue->running; /* * If the HPM counters do not appear to be in use on this host, * check for jobs on the queue that are using hpm. If so, set * the 'HPM_IN_USE' flag on the resources. This will prevent the * HPM counters from being released to global mode at the end * of the scheduling run (c.f. cleanup.c). * The 'HPM_IN_USE' flag will also be asserted if a job is run * that uses the HPM counters. */ if (schd_MANAGE_HPM && !(queue->rsrcs->flags & RSRCS_FLAGS_HPM_IN_USE)) { if (schd_hpm_job_count(queue->jobs)) queue->rsrcs->flags |= RSRCS_FLAGS_HPM_IN_USE; } #ifdef NODEMASK /* And find the nodemasks for the queue and resources. */ find_nodemasks(queue, queue->rsrcs); #endif /* NODEMASK */ } else { (void)sprintf(log_buffer, "Can't get resources for %s@%s - marking unavailable.", queue->qname, queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); queue->flags |= QFLAGS_DISABLED; } } #ifdef DEBUG schd_dump_queue(queue, QUEUE_DUMP_JOBS); #endif /* DEBUG */ /* * It would probably be better to wait for the world to stabilize * than to try to impose some artificial order upon it. Do not do * the sanity check if the queue is stopped. */ if ((queue->flags & QFLAGS_STOPPED) == 0) { if (!queue_sanity(queue)) { sprintf(log_buffer, "WARNING! Queue '%s' failed sanity checks.", queue->qname); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (1); } } return (0); }
/* * Populate the queue struct with the information needed for scheduling; * this includes "claiming" jobs that "belong" to this queue; and querying * the resource monitor for queue->exechost's information. */ int schd_get_queue_info(Queue *queue) { char *id = "get_queue_info"; int moved = 0; /* * Move any jobs on this queue from the global list onto the queue's * list. Keep track of when the longest-running job will end, and set * the 'empty_by' field to that value. Maintain the ordering as it was * in "schd_AllJobs". */ if (schd_AllJobs) moved = queue_claim_jobs(queue, &schd_AllJobs); if (moved < 0) { sprintf(log_buffer, "%s: WARNING! Queue '%s' failed to claim jobs.", id, queue->qname); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } if (queue->ncpus_assn == UNSPECIFIED) queue->ncpus_assn = 0; if (queue->mem_assn == UNSPECIFIED) queue->mem_assn = 0; if (queue->running == UNSPECIFIED) queue->running = 0; /* * Get the resources for this queue from the resource monitor (if * available). If the resmom is not accessible, disable the queue. * Don't bother checking if the queue is Stopped. */ if (strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0 && (queue->flags & QFLAGS_STOPPED) == 0) { queue->rsrcs = schd_get_resources(queue->exechost); if (queue->rsrcs != NULL) { /* Account for this queue's resources. */ queue->rsrcs->ncpus_alloc += queue->ncpus_assn; queue->rsrcs->mem_alloc += queue->mem_assn; queue->rsrcs->njobs += queue->running; } else { (void)sprintf(log_buffer, "Can't get resources for %s@%s - marking unavailable.", queue->qname, queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); queue->flags |= QFLAGS_NODEDOWN; } } return (0); }