int schd_reject_job(Job *job, char *reason) { char *id = "schd_reject_job"; static char *message = NULL; int rc = 0; if (message == NULL) { if ((message = (char *)malloc(MSG_BUFFER_SIZE)) == NULL) { (void)sprintf(log_buffer, "cannot malloc %d bytes\n", MSG_BUFFER_SIZE); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); return (-1); } } DBPRT(( "*************************************************************************\n")); if (schd_TEST_ONLY) { DBPRT(("JOB %s WOULD HAVE BEEN DELETED!!!\n", job->jobid)); DBPRT(("Message: %s\n", reason)); } else { (void)sprintf(message, "\n" "PBS job '%s' was rejected by all execution queues.\n" "\n" "The reason given for this action was :\n" "\n" " %s\n" "\n" "Please correct the problem and resubmit your job, or contact the PBS\n" "administrator for assistance.\n" "\n" "Thank you.\n" "\n", job->jobid, reason); /* * Ask PBS to delete the job from the queue, which should deliver the * message to the user. */ rc = pbs_deljob(connector, job->jobid, message); if (rc) { (void)sprintf(log_buffer, "pbs_deljob failed: error %d", rc); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return 1; } /* * Delete this job from the queue's list (move to a NULL queue) */ schd_move_job_to(job, NULL); DBPRT(("JOB %s DELETED!!!\n", job->jobid)); DBPRT(("Message: %s\n", reason)); } DBPRT(( "*************************************************************************\n")); return 0; }
/* * Jobs queued on the special queue should be treated as highest priority. * They are sorted onto the top of the list of jobs that is created in the * usersort.c code. That sorted list is then split out onto each of the * queues, so that each queue has a list of the jobs it "owns". The jobs * then carry a backpointer to their owner queue. * * This works really nicely, since all the information about each queue * (including the list of jobs queued/running/etc on it) lives right on * the Queue structure. It is a clean, elegant and fully general solution. * * An unfortunate side effect of this "demultiplexing" is that jobs that * were marked "special" end up claimed by the SpecialQueue. The scheduler * looks for jobs only on the SubmitQueue queue, so it never notices that * there are special jobs enqueued. In order to address this, any jobs on * the SpecialQueue are marked "waiting/high priority", and placed at the * head of the list of jobs in the SubmitQueue. * * This seems like an evil hack at first, and it arguably is. However, if * there are multiple submission queues, it is relatively simple to support * them by simply causing them to be inserted in the submit queue's list. */ static int fixup_special(void) { char *id = "fixup_special"; Job *job, *submitjobs, *nextjob, *specialtail; Queue *queue; char comment[MAX_TXT + 1]; int fixedup = 0, justcomment = 0; /* * Sanity check -- this function shouldn't be called if there is no * valid SpecialQueue. */ if (schd_SpecialQueue == NULL || schd_SpecialQueue->queue == NULL) { DBPRT(("%s: special code called but no special queue defined!\n", id)); return (-1); } queue = schd_SpecialQueue->queue; if (queue->jobs == NULL) { DBPRT(("%s: no jobs on special queue '%s'. Ignoring.\n", id, queue->qname)); return (0); } /* * See if the special queue has anything to do, and if it will allow * anything to be done to it. */ if (queue->flags & QFLAGS_DISABLED) { (void)sprintf(comment, "Queue %s not enabled", queue->qname); justcomment ++; } if (queue->flags & QFLAGS_STOPPED) { (void)sprintf(comment, "Queue %s not started", queue->qname); justcomment ++; } /* * If the jobs on the speical queue should just be commented, do so and * return 0 -- no jobs were fixed up. */ if (justcomment) { for (job = queue->jobs; job != NULL; job = job->next) schd_comment_job(job, comment, JOB_COMMENT_REQUIRED); return 0; } /* * Detach the list of jobs from the SubmitQueue. They will be tacked * back onto the end of the list once the special jobs have been moved * to the head. */ submitjobs = schd_SubmitQueue->queue->jobs; schd_SubmitQueue->queue->jobs = NULL; /* * Any jobs queued on the special queue are now moved to the tail of the * newly empty SubmitQueue list. Mark the jobs as high priority and * waiting. */ for (job = queue->jobs; job != NULL; job = nextjob) { /* * Keep track of the next job -- the next pointer on this job will * be modified by the schd_move_job_to() function. Also keep a * pointer to the last job in the list. */ nextjob = job->next; specialtail = job; if (job->state != 'Q') continue; job->flags |= (JFLAGS_WAITING | JFLAGS_PRIORITY); /* * Move the job from the special queue to the tail of the submit * queue. This keeps the counts of the queued jobs correct in both * queues. */ schd_move_job_to(job, schd_SubmitQueue->queue); fixedup ++; } /* * Now that the submit queue has the list of all queued jobs from the * special queue, attach the original submit jobs onto the tail of the * special job list. */ specialtail->next = submitjobs; DBPRT(("%s: fixed up %d jobs.\n", id, fixedup)); return (fixedup); }
int schd_run_job_on(Job *job, Queue *destq, char *exechost, int set_comment) { char *id = "schd_run_job_on"; char reason[128], tmp_word[20]; char *date; Queue *srcq = NULL; int ret = 0; int local_errno = 0; /* Get the datestamp from 'ctime()'. Remove the trailing '\n'. */ date = ctime(&schd_TimeNow); date[strlen(date) - 1] = '\0'; if (set_comment) { sprintf(reason, "Started on %s", date); if (job->flags & JFLAGS_PRIORITY) { strcat(reason, " (EXPRESS/high priority job)"); } if (job->flags & JFLAGS_WAITING) { strcat(reason, " (long-waiting job)"); } schd_comment_job(job, reason, JOB_COMMENT_REQUIRED); } /* If this is NOT a suspended job... */ if (!(job->flags & JFLAGS_SUSPENDED)) { /* * If a destination Queue is provided, and it is different from the * source queue, then ask PBS to move the job to that queue before * running it. */ srcq = job->queue; /* * Move the job from its queue to the specified run queue. */ if ((destq != NULL) && (strcmp(destq->qname, srcq->qname) != 0)) { if (pbs_movejob_err(connector, job->jobid, destq->qname, NULL, &local_errno)) { (void)sprintf(log_buffer, "move job %s to queue %s failed, %d", job->jobid, destq->qname, local_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } schd_move_job_to(job, destq); } /* * Give the job handle (JOBID) to PBS to run. */ if (pbs_runjob_err(connector, job->jobid, exechost, NULL, &local_errno)) { (void)sprintf(log_buffer, "failed start job %s on queue %s@%s, %d", job->jobid, destq->qname, exechost, local_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); /* * Running failed! Move the job back to the source queue (if * applicable) before returning. This prevents jobs being left * in execution queues. */ if (srcq) { DBPRT(("Attempting to move job %s back to queue %s\n", job->jobid, srcq->qname)); if (pbs_movejob_err(connector, job->jobid, srcq->qname, NULL, &local_errno)) { (void)sprintf(log_buffer, "failed to move job %s back to queue %s, %d", job->jobid, srcq->qname, local_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } schd_move_job_to(job, srcq); } return (-1); } strcpy(tmp_word, "started"); } else /* it IS a suspended job */ { schd_move_job_to(job, destq); ret = pbs_sigjob(connector, job->jobid, "resume", NULL); if (ret) { sprintf(log_buffer, "resume of job %s FAILED (%d)", job->jobid, ret); return (-1); } job->flags &= ~JFLAGS_SUSPENDED; strcpy(tmp_word, "resumed"); } /* PBS accepted the job (and presumably will run it). Log the fact. */ (void)sprintf(log_buffer, "job %s %s on %s@%s", job->jobid, tmp_word, destq->qname, exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); /* * Change the state of the local representation of the job to "Running". */ job->state = 'R'; /* * Account for the job on this queue's statistics. 'queued' will be * bumped up if the queued job was moved to a new destination queue. */ job->queue->queued --; job->queue->running ++; /* The queue is no longer idle. Unset the idle timer. */ job->queue->idle_since = 0; return (0); /* Job successfully started. */ }