/* * Record the reason that the current candidate job cannot currently run. * When it is decided that the job will remain queued, place the reason * string in the comment field of the job structure. */ void schd_comment_job(Job *job, char *reason, int optional) { char *id = "schd_comment_job"; char *msg_ptr; char *old_msg; /* * If the 'optional' argument is true, then this comment is optional. * Do not bother commenting this job if this is not the first time it * has been seen, and it has been recently modified (hopefully it was * a comment change). If there is no comment for the job, comment it * this time. */ if (optional && (!schd_FirstRun) && (job->comment != NULL) && !(job->flags & JFLAGS_FIRST_SEEN) && (MIN_COMMENT_AGE && ((schd_TimeNow - job->mtime) < MIN_COMMENT_AGE))) { return; } if (reason == NULL) reason = ""; old_msg = job->comment; /* If there is no old message, or they are different, set it. */ if ((old_msg == NULL) || (strcmp(reason, old_msg) != 0)) { msg_ptr = schd_strdup(reason); /* Alter PBS' view of the job. */ schd_alterjob(connector, job, ATTR_comment, msg_ptr, NULL); /* Copy the new comment into the job field. */ if (job->comment) free(job->comment); job->comment = msg_ptr; if (job->comment == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->comment)"); return; } } return; }
/* * This function takes a pointer to a struct batch_status for a job, and * fills in the appropriate fields of the supplied job struct. It returns * the number of items that were found. */ int schd_get_jobinfo(Batch_Status *bs, Job *job) { int changed = 0; int istrue; char tmp_str[120]; char *id = "schd_get_jobinfo"; char *host; char *p, *tmp_p, *var_p; AttrList *attr; memset((void *)job, 0, sizeof(Job)); job->jobid = schd_strdup(bs->name); if (job->jobid == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(bs->name)"); return (-1); } changed ++; for (attr = bs->attribs; attr != NULL; attr = attr->next) { /* * If this is the 'owner' field, chop it into 'owner' and 'host' * fields, and copy them into the Job struct. */ if (!strcmp(attr->name, ATTR_owner)) { /* Look for the '@' that separates user and hostname. */ strcpy(tmp_str, attr->value); host = strchr(tmp_str, '@'); if (host) { *host = '\0'; /* Replace '@' with NULL (ends username). */ host ++; /* Move to first character of hostname. */ } job->owner = schd_strdup(tmp_str); if (job->owner == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->owner)"); return (-1); } changed ++; continue; } /* The group to which to charge the resources for this job. */ if (!strcmp(attr->name, ATTR_egroup)) { job->group = schd_strdup(attr->value); if (job->group == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->group)"); return (-1); } changed ++; continue; } /* The comment currently assigned to this job. */ if (!strcmp(attr->name, ATTR_comment)) { job->comment = schd_strdup(attr->value); if (job->comment == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->comment)"); return (-1); } changed ++; continue; } /* The host on which this job is running (or was running for * suspended or checkpointed jobs. */ if (!strcmp(attr->name, ATTR_exechost)) { job->exechost = schd_strdup(attr->value); if (job->exechost == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->exechost)"); return (-1); } changed ++; continue; } if (!strcmp(attr->name, ATTR_inter)) { /* Is this job interactive or not? */ if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) job->flags |= JFLAGS_INTERACTIVE; else job->flags &= ~JFLAGS_INTERACTIVE; changed ++; } else { DBPRT(("%s: can't parse %s = %s into boolean\n", id, attr->name, attr->value)); } continue; } if (!strcmp(attr->name, ATTR_state)) { /* State is one of 'R', 'Q', 'E', etc. */ job->state = attr->value[0]; changed ++; continue; } if (!strcmp(attr->name, ATTR_queue)) { job->qname = schd_strdup(attr->value); if (job->qname == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->qname)"); return (-1); } job->flags |= JFLAGS_QNAME_LOCAL; changed ++; continue; } if (!strcmp(attr->name, ATTR_v)) { var_p = schd_strdup(attr->value); if (var_p == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(Variable_List)"); return (-1); } p = NULL; tmp_p = strstr(var_p, "PBS_O_QUEUE"); if (tmp_p) { p = strtok(tmp_p, "="); p = strtok(NULL, ", "); } if (p != NULL) { job->oqueue = schd_strdup(p); } else { /* if the originating queue is unknown, default * to the locally defined "submit" queue. */ job->oqueue = schd_strdup(schd_SubmitQueue->queue->qname); } free(var_p); changed ++; continue; } if (!strcmp(attr->name, ATTR_l)) { if (!strcmp(attr->resource, "arch")) { job->arch = schd_strdup(attr->value); changed ++; } else if (!strcmp(attr->resource, "mem")) { job->memory = schd_val2byte(attr->value); changed ++; } else if (!strcmp(attr->resource, "ncpus")) { job->ncpus = atoi(attr->value); changed ++; } else if (!strcmp(attr->resource, "walltime")) { job->walltime = schd_val2sec(attr->value); changed ++; } /* That's all for requested resources. */ continue; } if (!strcmp(attr->name, ATTR_used)) { if (!strcmp(attr->resource, "walltime")) { job->walltime_used = schd_val2sec(attr->value); changed ++; } /* No other interesting cases. */ continue; } /* Creation time attribute. */ if (!strcmp(attr->name, ATTR_ctime)) { /* How long ago was it put in the queue ? */ job->time_queued = schd_TimeNow - atoi(attr->value); continue; } /* Modified time attribute. */ if (!strcmp(attr->name, ATTR_mtime)) { /* When was the job last modified? */ job->mtime = atoi(attr->value); continue; } /* Job Substate attribute. */ if (!strcmp(attr->name, ATTR_substate)) { if (atoi(attr->value) == 43 /* JOB_SUBSTATE_SUSPEND */) job->flags |= JFLAGS_SUSPENDED; continue; } /* * When was the job last eligible to run? When a user-hold is * released, this value is updated to the current time. This * prevents users from gaining higher priority from holding their * jobs. */ if (!strcmp(attr->name, ATTR_etime)) { job->eligible = schd_TimeNow - atoi(attr->value); continue; } } if (job->memory < 1) { job->memory = get_default_mem(job->oqueue); schd_alterjob(connector, job, ATTR_l, schd_byte2val(job->memory), "mem"); changed++; } /* * If this job is in the "Running" or "Suspended" state, compute how * many seconds remain until it is completed. */ if (job->state == 'R' || job->state == 'S') { job->time_left = job->walltime - job->walltime_used; } /* * If this job was enqueued since the last time we ran, set the job * flag to indicate that we have not yet seen this job. This makes it * a candidate for additional processing. There may be some inaccuracy, * since the time_t has resolution of 1 second. Attempt to err on the * side of caution. */ if ((job->state == 'Q') && (job->time_queued != UNSPECIFIED)) { if (job->time_queued <= (schd_TimeNow - schd_TimeLast)) { job->flags |= JFLAGS_FIRST_SEEN; } } /* * If this job was previously running and is now queued, then we * need to (a) flag it as having been checkpointed, and (b) move * it back to the submit queue, if its not already there. */ if (job->exechost && job->state == 'Q') { job->flags |= JFLAGS_CHKPTD; if (strcmp(job->qname, schd_SubmitQueue->queue->qname)) { sprintf(log_buffer, "moving Q'd job %s back to SUBMIT Q", job->jobid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); pbs_movejob(connector, job->jobid, schd_SubmitQueue->queue->qname, NULL); } } /* * if this job is currently Suspended (a substate of 'R'unning), then * pretend its queued, so that the scheduling logic will work. */ if (job->state == 'S') { job->state = 'Q'; job->flags |= JFLAGS_SUSPENDED; } /* if this job is suspended, checkpointed, or otherwise "queued" * on an exection queue, update the internal representation of * to pretend it is really on the submit queue. */ if ((job->flags & JFLAGS_SUSPENDED) || (job->flags & JFLAGS_CHKPTD)) { free(job->qname); job->qname = schd_strdup(schd_SubmitQueue->queue->qname); } /* * If this job came from the EXPRESS queue, set the flag so that it * will be treated with the highest of priority. */ if (!strcmp(job->oqueue, schd_EXPRESS_Q_NAME)) job->flags |= JFLAGS_PRIORITY; /* * If the 'etime' attribute wasn't found, set it to the time the job has * been queued. Most jobs will be eligible to run their entire lifetime. * The exception is a job that has been held - if it was a user hold, * the release will reset the etime to the latest value. * If not eligible time was given, use the job's creation time. */ if (!job->eligible) job->eligible = job->time_queued; /* if this job has waited too long, and its queue is NOT over its * shares, then bump it up in priority. */ if (job->eligible > schd_MAX_WAIT_TIME && job->sort_order <= 100) job->flags |= JFLAGS_WAITING; return (changed); }
static int bump_rsrc_requests(Job *job, int cpu_req, size_t mem_req) { char *id = "bump_rsrc_requests"; char *val; char buf[64]; int bumped = 0; /* * If a job gives the "wrong" value for the memory request (for the * number of nodes required to fulfill the request), then bump the * memory request to the amount of memory the assigned nodes would * consume. */ if ((mem_req == 0) || (mem_req != (job->nodes * MB_PER_NODE))) { /* Make a printable version of the requested memory. */ strcpy(buf, schd_byte2val(mem_req)); /* Compute the "right" memory request, based on the nodes. */ val = schd_byte2val(job->nodes * MB_PER_NODE); if (val == NULL) return (1); if (schd_alterjob(connector, job, ATTR_l, val, "mem")) { DBPRT(("%s: Failed to set job %s \"mem\" attr to %s\n", id, job->jobid, val)); return (1); } bumped++; } /* * If a job gives the "wrong" value for the CPU request (for the * number of nodes required to fulfill the request), then bump the * CPU request to the number of CPUs the assigned nodes would * consume. */ if ((cpu_req == 0) || (cpu_req != (job->nodes * PE_PER_NODE))) { /* Compute the "right" memory request, based on the nodes. */ sprintf(buf, "%d", (job->nodes * PE_PER_NODE)); if (schd_alterjob(connector, job, ATTR_l, buf, "ncpus")) { DBPRT(("%s: Failed to set job %s \"ncpus\" attr to %s\n", id, job->jobid, buf)); return (1); } bumped++; } if (bumped) { strncpy(buf, schd_byte2val(job->nodes * MB_PER_NODE), sizeof(buf) - 1); sprintf(log_buffer, "%s cpu/mem (%d/%s) bumped to %d/%s (%d nodes)", job->jobid, cpu_req, schd_byte2val(mem_req), job->nodes * PE_PER_NODE, buf, job->nodes); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } return (0); }