static void dump_resources(Resources *rsrcs) { char *id = "dump_res"; char tmpstr[40]; long long tmpval; /* Log the system's status */ (void)sprintf(log_buffer, "Resources for host %s", rsrcs->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %s", "SYSTEM Architecture:", rsrcs->arch); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %0.2f", "CPU Load average:", rsrcs->loadave); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %d / %d (%.2f%%)", "CPUs allocated:", rsrcs->ncpus_alloc, rsrcs->ncpus_total, (rsrcs->ncpus_alloc * 100.0) / rsrcs->ncpus_total); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); strcpy(tmpstr, schd_byte2val(rsrcs->mem_total)); tmpval = rsrcs->mem_total - rsrcs->mem_alloc; if (tmpval < 0L) tmpval = 0L; (void)sprintf(log_buffer, " :: %-20s = %s / %s", "Memory (free):", schd_byte2val((size_t)tmpval), tmpstr); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %d", "Running jobs:", rsrcs->njobs); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); }
static void dump_resources(Resources *rsrcs) { char *id = "dump_resources"; /* Log the system's status */ #if 0 (void)sprintf(log_buffer, " %d%% usr, %d%% sys, %d%% idl", rsrcs->usrtime, rsrcs->systime, rsrcs->idltime); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); #endif /* 0 */ (void)sprintf(log_buffer, "Resources for host %s", rsrcs->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-24s = %s", "Memory (free):", schd_byte2val(rsrcs->freemem)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-24s = %d / %d (%.2f%% utilization)", "Nodes allocated:", rsrcs->nodes_alloc, rsrcs->nodes_total, (rsrcs->nodes_alloc * 100.0) / rsrcs->nodes_total); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); #ifdef NODEMASK (void)sprintf(log_buffer, " :: %-24s = %s", "Nodes configured:", schd_format_nodemask(&(rsrcs->availmask), &(rsrcs->availmask))); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-24s = %s", "Nodes in use:", schd_format_nodemask(&(rsrcs->availmask), &(rsrcs->nodes_used))); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); #endif /* NODEMASK */ (void)sprintf(log_buffer, " :: %-24s = %0.2f", "CPU Load average:", rsrcs->loadave); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-24s = %d", "Running jobs:", rsrcs->njobs); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); if (schd_MANAGE_HPM) { (void)sprintf(log_buffer, " :: %-24s = %s mode, %sin use", "HPM Counters:", rsrcs->flags & RSRCS_FLAGS_HPM_USER ? "user" : "global", rsrcs->flags & RSRCS_FLAGS_HPM_IN_USE ? "" : "not "); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } }
static void dump_resources(Resources *rsrcs) { char *id = "dump_res"; /* Log the system's status */ (void)sprintf(log_buffer, "Resources for host %s", rsrcs->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %s", "SYSTEM Architecture:", rsrcs->arch); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %d / %d (%.2f%%)", "CPUs allocated:", rsrcs->ncpus_alloc, rsrcs->ncpus_total, (rsrcs->ncpus_alloc * 100.0) / rsrcs->ncpus_total); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %0.2f", "CPU Load average:", rsrcs->loadave); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %s", "Memory (free):", schd_byte2val(rsrcs->freemem)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %s", "/tmp (free):", schd_byte2val(rsrcs->tmpdir)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-20s = %d", "Running jobs:", rsrcs->njobs); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); }
void schd_print_schedule(void) { int i; /* print Schedule for sanity check */ #if 0 printf("DEBUG: (job schedule)\n"); for (i = 0; i < nJobSchedule; i++) { printf("%c %ld %s %d cpus %ld(b)memory %ld\n", JobSchedule[i].event, JobSchedule[i].time, JobSchedule[i].qname, JobSchedule[i].cpu, schd_byte2val(JobSchedule[i].mem), JobSchedule[i].walltime); } #endif }
static void dump_resources(Resources *rsrcs) { char *id = "dump_resources"; /* Log the system's status */ (void)sprintf(log_buffer, "Resources for host %s", rsrcs->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-24s = %s", "Memory (free):", schd_byte2val(rsrcs->freemem)); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-24s = %d / %d (%.2f%% utilization)", "Nodes allocated:", rsrcs->nodes_alloc, rsrcs->nodes_total, (rsrcs->nodes_alloc * 100.0) / rsrcs->nodes_total); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)sprintf(log_buffer, " :: %-24s = %d", "Running jobs:", rsrcs->njobs); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); }
/* * schd_get_queue_limits - query queue information from the server. * * Returns 0 on success, -1 for "fatal errors", and 1 for a transient * error (i.e., the queue failed the sanity checks imposed by the * queue_sanity() function). */ int schd_get_queue_limits(Queue *queue) { char *id = "schd_get_queue_limits"; int moved = 0, istrue; Batch_Status *bs; AttrList *attr; static AttrList alist[] = { {&alist[1], ATTR_start, "", ""}, {&alist[2], ATTR_enable, "", ""}, {&alist[3], ATTR_count, "", ""}, {&alist[4], ATTR_maxuserrun, "", ""}, {&alist[5], ATTR_rescavail, "", ""}, {&alist[6], ATTR_rescassn, "", ""}, {&alist[7], ATTR_rescdflt, "", ""}, {&alist[8], ATTR_rescmax, "", ""}, {&alist[9], ATTR_rescmin, "", ""}, {&alist[10], ATTR_acluren, "", ""}, {&alist[11], ATTR_acluser, "", ""}, {NULL, ATTR_maxrun, "", ""} }; size_t mem_default = UNSPECIFIED; size_t mem_assn = UNSPECIFIED; size_t mem_max = UNSPECIFIED; size_t mem_min = UNSPECIFIED; int cpu_default = UNSPECIFIED; int cpu_assn = UNSPECIFIED; int cpu_max = UNSPECIFIED; int cpu_min = UNSPECIFIED; int nodes_from_cpu, nodes_from_mem; queue->running = UNSPECIFIED; queue->queued = UNSPECIFIED; queue->maxrun = UNSPECIFIED; queue->userrun = UNSPECIFIED; queue->nodes_max = UNSPECIFIED; queue->nodes_min = UNSPECIFIED; queue->nodes_default = UNSPECIFIED; queue->nodes_assn = UNSPECIFIED; queue->nodes_rsvd = UNSPECIFIED; queue->wallt_max = UNSPECIFIED; queue->wallt_min = UNSPECIFIED; queue->wallt_default = UNSPECIFIED; queue->flags = 0; #ifdef NODEMASK BITFIELD_CLRALL(&queue->queuemask); BITFIELD_CLRALL(&queue->availmask); #endif /* NODEMASK */ queue->rsrcs = NULL; if (queue->jobs) { DBPRT(("%s: found jobs on queue '%s'! Freeing them...\n", id, queue->qname)); schd_free_jobs(queue->jobs); } if (queue->useracl) { DBPRT(("%s: found user ACL list on queue '%s'! Freeing it...\n", id, queue->qname)); schd_free_useracl(queue->useracl); } queue->jobs = NULL; queue->useracl = NULL; /* Ask the server for information about the specified queue. */ if ((bs = pbs_statque(connector, queue->qname, alist, NULL)) == NULL) { sprintf(log_buffer, "pbs_statque failed, \"%s\" %d", queue->qname, pbs_errno); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } /* Process the list of attributes returned by the server. */ for (attr = bs->attribs; attr != NULL; attr = attr->next) { /* Is queue started? */ if (!strcmp(attr->name, ATTR_start)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue is not stopped. */ queue->flags &= ~QFLAGS_STOPPED; else queue->flags |= QFLAGS_STOPPED; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } /* Is queue enabled? */ if (!strcmp(attr->name, ATTR_enable)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue is not disabled. */ queue->flags &= ~QFLAGS_DISABLED; else queue->flags |= QFLAGS_DISABLED; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } /* How many jobs are queued and running? */ if (!strcmp(attr->name, ATTR_count)) { queue->queued = schd_how_many(attr->value, SC_QUEUED); queue->running = schd_how_many(attr->value, SC_RUNNING); continue; } /* Queue-wide maximum number of jobs running. */ if (!strcmp(attr->name, ATTR_maxrun)) { queue->maxrun = atoi(attr->value); continue; } /* Per-user maximum number of jobs running. */ if (!strcmp(attr->name, ATTR_maxuserrun)) { queue->userrun = atoi(attr->value); continue; } /* Is there an enabled user access control list on this queue? */ if (!strcmp(attr->name, ATTR_acluren)) { if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) /* if true, queue has an ACL */ queue->flags |= QFLAGS_USER_ACL; else queue->flags &= ~QFLAGS_USER_ACL; } else { DBPRT(("%s: couldn't parse attr %s value %s to boolean\n", id, attr->name, attr->value)); } continue; } if (!strcmp(attr->name, ATTR_acluser)) { if (queue->useracl) { DBPRT(("queue %s acluser already set!\n", queue->qname)); schd_free_useracl(queue->useracl); } queue->useracl = schd_create_useracl(attr->value); continue; } /* Queue maximum resource usage. */ if (!strcmp(attr->name, ATTR_rescmax)) { if (!strcmp("mem", attr->resource)) { mem_max = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_max = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) { queue->wallt_max = schd_val2sec(attr->value); continue; } #ifdef NODEMASK if (!strcmp("nodemask", attr->resource)) { if (schd_str2mask(attr->value, &queue->queuemask)) { (void)sprintf(log_buffer, "couldn't convert nodemask %s", attr->value); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } else queue->flags |= QFLAGS_NODEMASK; /* Valid nodemask. */ } #endif /* NODEMASK */ continue; } /* Queue minimum resource usage. */ if (!strcmp(attr->name, ATTR_rescmin)) { if (!strcmp("mem", attr->resource)) { mem_min = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_min = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) { queue->wallt_min = schd_val2sec(attr->value); continue; } continue; } /* Queue assigned (in use) resource usage. */ if (!strcmp(attr->name, ATTR_rescassn)) { if (!strcmp("mem", attr->resource)) { mem_assn = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_assn = atoi(attr->value); } continue; } if (!strcmp(attr->name, ATTR_rescdflt)) { if (!strcmp("mem", attr->resource)) { mem_default = schd_val2byte(attr->value); continue; } if (!strcmp("ncpus", attr->resource)) { cpu_default = atoi(attr->value); continue; } if (!strcmp("walltime", attr->resource)) queue->wallt_default = schd_val2sec(attr->value); } /* Ignore anything else */ } pbs_statfree(bs); /* * Calculate values for queue node limits, given memory and cpu values. * Note any discrepancies. */ nodes_from_cpu = NODES_FROM_CPU(cpu_default); nodes_from_mem = NODES_FROM_MEM(mem_default); if (nodes_from_cpu != nodes_from_mem) { sprintf(log_buffer, "%s: Queue '%s' default cpu/mem (%d/%s) convert to %d != %d nodes", id, queue->qname, cpu_default, schd_byte2val(mem_default), nodes_from_cpu, nodes_from_mem); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } nodes_from_cpu = NODES_FROM_CPU(cpu_max); nodes_from_mem = NODES_FROM_MEM(mem_max); if (nodes_from_cpu != nodes_from_mem) { sprintf(log_buffer, "%s: Queue '%s' maximum cpu/mem (%d/%s) convert to %d != %d nodes", id, queue->qname, cpu_max, schd_byte2val(mem_max), nodes_from_cpu, nodes_from_mem); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } nodes_from_cpu = NODES_FROM_CPU(cpu_min); nodes_from_mem = NODES_FROM_MEM(mem_min); if (nodes_from_cpu != nodes_from_mem) { sprintf(log_buffer, "%s: Queue '%s' minimum cpu/mem (%d/%s) convert to %d != %d nodes", id, queue->qname, cpu_min, schd_byte2val(mem_min), nodes_from_cpu, nodes_from_mem); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } /* * Note: The assigned cpus and memory need not be exactly the same * node equivalency. */ if ((cpu_default != UNSPECIFIED) && (mem_default != UNSPECIFIED)) queue->nodes_default = NODES_REQD(cpu_default, mem_default); if ((cpu_max != UNSPECIFIED) && (mem_max != UNSPECIFIED)) queue->nodes_max = NODES_REQD(cpu_max, mem_max); if ((cpu_min != UNSPECIFIED) && (mem_min != UNSPECIFIED)) queue->nodes_min = NODES_REQD(cpu_min, mem_min); if ((cpu_assn != UNSPECIFIED) && (mem_assn != UNSPECIFIED)) queue->nodes_assn = NODES_REQD(cpu_assn, mem_assn); /* * Move any jobs on this queue from the global list onto the queue's * list. Keep track of when the longest-running job will end, and set * the 'empty_by' field to that value. Maintain the ordering as it was * in "schd_AllJobs". */ if (schd_AllJobs) moved = queue_claim_jobs(queue, &schd_AllJobs); if (moved < 0) { sprintf(log_buffer, "%s: WARNING! Queue '%s' failed to claim jobs.", id, queue->qname); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } if (queue->nodes_assn == UNSPECIFIED) queue->nodes_assn = 0; if (queue->running == UNSPECIFIED) queue->running = 0; /* * Find out if the queue is idle, and if it was not before, set the idle * time to now. If there are running jobs, the queue is not idle at the * start of this iteration - set idle_since to 0. */ if (queue->running) { queue->idle_since = 0; } else { if (queue->idle_since == 0) queue->idle_since = schd_TimeNow; } /* * Get the resources for this queue from the resource monitor (if * available). If the resmom is not accessible, disable the queue. * If the resources were received okay, compute the available node * masks from the resources and jobs. * Don't bother with resources for the special or submit queues. */ if ((strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0) || ((schd_SpecialQueue != NULL) && (!strcmp(queue->qname, schd_SpecialQueue->queue->qname)))) { queue->rsrcs = schd_get_resources(queue->exechost); if (queue->rsrcs != NULL) { /* Account for this queue's resources. */ queue->rsrcs->nodes_alloc += queue->nodes_assn; queue->rsrcs->njobs += queue->running; /* * If the HPM counters do not appear to be in use on this host, * check for jobs on the queue that are using hpm. If so, set * the 'HPM_IN_USE' flag on the resources. This will prevent the * HPM counters from being released to global mode at the end * of the scheduling run (c.f. cleanup.c). * The 'HPM_IN_USE' flag will also be asserted if a job is run * that uses the HPM counters. */ if (schd_MANAGE_HPM && !(queue->rsrcs->flags & RSRCS_FLAGS_HPM_IN_USE)) { if (schd_hpm_job_count(queue->jobs)) queue->rsrcs->flags |= RSRCS_FLAGS_HPM_IN_USE; } #ifdef NODEMASK /* And find the nodemasks for the queue and resources. */ find_nodemasks(queue, queue->rsrcs); #endif /* NODEMASK */ } else { (void)sprintf(log_buffer, "Can't get resources for %s@%s - marking unavailable.", queue->qname, queue->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); queue->flags |= QFLAGS_DISABLED; } } #ifdef DEBUG schd_dump_queue(queue, QUEUE_DUMP_JOBS); #endif /* DEBUG */ /* * It would probably be better to wait for the world to stabilize * than to try to impose some artificial order upon it. Do not do * the sanity check if the queue is stopped. */ if ((queue->flags & QFLAGS_STOPPED) == 0) { if (!queue_sanity(queue)) { sprintf(log_buffer, "WARNING! Queue '%s' failed sanity checks.", queue->qname); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (1); } } return (0); }
/* * This function takes a pointer to a struct batch_status for a job, and * fills in the appropriate fields of the supplied job struct. It returns * the number of items that were found. */ int schd_get_jobinfo(Batch_Status *bs, Job *job) { int changed = 0; int istrue; char tmp_str[120]; char *id = "schd_get_jobinfo"; char *host; char *p, *tmp_p, *var_p; AttrList *attr; memset((void *)job, 0, sizeof(Job)); job->jobid = schd_strdup(bs->name); if (job->jobid == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(bs->name)"); return (-1); } changed ++; for (attr = bs->attribs; attr != NULL; attr = attr->next) { /* * If this is the 'owner' field, chop it into 'owner' and 'host' * fields, and copy them into the Job struct. */ if (!strcmp(attr->name, ATTR_owner)) { /* Look for the '@' that separates user and hostname. */ strcpy(tmp_str, attr->value); host = strchr(tmp_str, '@'); if (host) { *host = '\0'; /* Replace '@' with NULL (ends username). */ host ++; /* Move to first character of hostname. */ } job->owner = schd_strdup(tmp_str); if (job->owner == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->owner)"); return (-1); } changed ++; continue; } /* The group to which to charge the resources for this job. */ if (!strcmp(attr->name, ATTR_egroup)) { job->group = schd_strdup(attr->value); if (job->group == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->group)"); return (-1); } changed ++; continue; } /* The comment currently assigned to this job. */ if (!strcmp(attr->name, ATTR_comment)) { job->comment = schd_strdup(attr->value); if (job->comment == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->comment)"); return (-1); } changed ++; continue; } /* The host on which this job is running (or was running for * suspended or checkpointed jobs. */ if (!strcmp(attr->name, ATTR_exechost)) { job->exechost = schd_strdup(attr->value); if (job->exechost == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->exechost)"); return (-1); } changed ++; continue; } if (!strcmp(attr->name, ATTR_inter)) { /* Is this job interactive or not? */ if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) job->flags |= JFLAGS_INTERACTIVE; else job->flags &= ~JFLAGS_INTERACTIVE; changed ++; } else { DBPRT(("%s: can't parse %s = %s into boolean\n", id, attr->name, attr->value)); } continue; } if (!strcmp(attr->name, ATTR_state)) { /* State is one of 'R', 'Q', 'E', etc. */ job->state = attr->value[0]; changed ++; continue; } if (!strcmp(attr->name, ATTR_queue)) { job->qname = schd_strdup(attr->value); if (job->qname == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->qname)"); return (-1); } job->flags |= JFLAGS_QNAME_LOCAL; changed ++; continue; } if (!strcmp(attr->name, ATTR_v)) { var_p = schd_strdup(attr->value); if (var_p == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(Variable_List)"); return (-1); } p = NULL; tmp_p = strstr(var_p, "PBS_O_QUEUE"); if (tmp_p) { p = strtok(tmp_p, "="); p = strtok(NULL, ", "); } if (p != NULL) { job->oqueue = schd_strdup(p); } else { /* if the originating queue is unknown, default * to the locally defined "submit" queue. */ job->oqueue = schd_strdup(schd_SubmitQueue->queue->qname); } free(var_p); changed ++; continue; } if (!strcmp(attr->name, ATTR_l)) { if (!strcmp(attr->resource, "arch")) { job->arch = schd_strdup(attr->value); changed ++; } else if (!strcmp(attr->resource, "mem")) { job->memory = schd_val2byte(attr->value); changed ++; } else if (!strcmp(attr->resource, "ncpus")) { job->ncpus = atoi(attr->value); changed ++; } else if (!strcmp(attr->resource, "walltime")) { job->walltime = schd_val2sec(attr->value); changed ++; } /* That's all for requested resources. */ continue; } if (!strcmp(attr->name, ATTR_used)) { if (!strcmp(attr->resource, "walltime")) { job->walltime_used = schd_val2sec(attr->value); changed ++; } /* No other interesting cases. */ continue; } /* Creation time attribute. */ if (!strcmp(attr->name, ATTR_ctime)) { /* How long ago was it put in the queue ? */ job->time_queued = schd_TimeNow - atoi(attr->value); continue; } /* Modified time attribute. */ if (!strcmp(attr->name, ATTR_mtime)) { /* When was the job last modified? */ job->mtime = atoi(attr->value); continue; } /* Job Substate attribute. */ if (!strcmp(attr->name, ATTR_substate)) { if (atoi(attr->value) == 43 /* JOB_SUBSTATE_SUSPEND */) job->flags |= JFLAGS_SUSPENDED; continue; } /* * When was the job last eligible to run? When a user-hold is * released, this value is updated to the current time. This * prevents users from gaining higher priority from holding their * jobs. */ if (!strcmp(attr->name, ATTR_etime)) { job->eligible = schd_TimeNow - atoi(attr->value); continue; } } if (job->memory < 1) { job->memory = get_default_mem(job->oqueue); schd_alterjob(connector, job, ATTR_l, schd_byte2val(job->memory), "mem"); changed++; } /* * If this job is in the "Running" or "Suspended" state, compute how * many seconds remain until it is completed. */ if (job->state == 'R' || job->state == 'S') { job->time_left = job->walltime - job->walltime_used; } /* * If this job was enqueued since the last time we ran, set the job * flag to indicate that we have not yet seen this job. This makes it * a candidate for additional processing. There may be some inaccuracy, * since the time_t has resolution of 1 second. Attempt to err on the * side of caution. */ if ((job->state == 'Q') && (job->time_queued != UNSPECIFIED)) { if (job->time_queued <= (schd_TimeNow - schd_TimeLast)) { job->flags |= JFLAGS_FIRST_SEEN; } } /* * If this job was previously running and is now queued, then we * need to (a) flag it as having been checkpointed, and (b) move * it back to the submit queue, if its not already there. */ if (job->exechost && job->state == 'Q') { job->flags |= JFLAGS_CHKPTD; if (strcmp(job->qname, schd_SubmitQueue->queue->qname)) { sprintf(log_buffer, "moving Q'd job %s back to SUBMIT Q", job->jobid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); pbs_movejob(connector, job->jobid, schd_SubmitQueue->queue->qname, NULL); } } /* * if this job is currently Suspended (a substate of 'R'unning), then * pretend its queued, so that the scheduling logic will work. */ if (job->state == 'S') { job->state = 'Q'; job->flags |= JFLAGS_SUSPENDED; } /* if this job is suspended, checkpointed, or otherwise "queued" * on an exection queue, update the internal representation of * to pretend it is really on the submit queue. */ if ((job->flags & JFLAGS_SUSPENDED) || (job->flags & JFLAGS_CHKPTD)) { free(job->qname); job->qname = schd_strdup(schd_SubmitQueue->queue->qname); } /* * If this job came from the EXPRESS queue, set the flag so that it * will be treated with the highest of priority. */ if (!strcmp(job->oqueue, schd_EXPRESS_Q_NAME)) job->flags |= JFLAGS_PRIORITY; /* * If the 'etime' attribute wasn't found, set it to the time the job has * been queued. Most jobs will be eligible to run their entire lifetime. * The exception is a job that has been held - if it was a user hold, * the release will reset the etime to the latest value. * If not eligible time was given, use the job's creation time. */ if (!job->eligible) job->eligible = job->time_queued; /* if this job has waited too long, and its queue is NOT over its * shares, then bump it up in priority. */ if (job->eligible > schd_MAX_WAIT_TIME && job->sort_order <= 100) job->flags |= JFLAGS_WAITING; return (changed); }
static int bump_rsrc_requests(Job *job, int cpu_req, size_t mem_req) { char *id = "bump_rsrc_requests"; char *val; char buf[64]; int bumped = 0; /* * If a job gives the "wrong" value for the memory request (for the * number of nodes required to fulfill the request), then bump the * memory request to the amount of memory the assigned nodes would * consume. */ if ((mem_req == 0) || (mem_req != (job->nodes * MB_PER_NODE))) { /* Make a printable version of the requested memory. */ strcpy(buf, schd_byte2val(mem_req)); /* Compute the "right" memory request, based on the nodes. */ val = schd_byte2val(job->nodes * MB_PER_NODE); if (val == NULL) return (1); if (schd_alterjob(connector, job, ATTR_l, val, "mem")) { DBPRT(("%s: Failed to set job %s \"mem\" attr to %s\n", id, job->jobid, val)); return (1); } bumped++; } /* * If a job gives the "wrong" value for the CPU request (for the * number of nodes required to fulfill the request), then bump the * CPU request to the number of CPUs the assigned nodes would * consume. */ if ((cpu_req == 0) || (cpu_req != (job->nodes * PE_PER_NODE))) { /* Compute the "right" memory request, based on the nodes. */ sprintf(buf, "%d", (job->nodes * PE_PER_NODE)); if (schd_alterjob(connector, job, ATTR_l, buf, "ncpus")) { DBPRT(("%s: Failed to set job %s \"ncpus\" attr to %s\n", id, job->jobid, buf)); return (1); } bumped++; } if (bumped) { strncpy(buf, schd_byte2val(job->nodes * MB_PER_NODE), sizeof(buf) - 1); sprintf(log_buffer, "%s cpu/mem (%d/%s) bumped to %d/%s (%d nodes)", job->jobid, cpu_req, schd_byte2val(mem_req), job->nodes * PE_PER_NODE, buf, job->nodes); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } return (0); }