int main(int argc, char **argv, char **envp) /* qmove */ { int any_failed=0; char job_id[PBS_MAXCLTJOBID]; /* from the command line */ char destination[PBS_MAXSERVERNAME]; /* from the command line */ char *q_n_out, *s_n_out; char job_id_out[PBS_MAXCLTJOBID]; char server_out[MAXSERVERNAME]; char rmt_server[MAXSERVERNAME]; /*test for real deal or just version and exit*/ execution_mode(argc, argv); #ifdef WIN32 winsock_init(); #endif if (argc < 3) { static char usage[]="usage: qmove destination job_identifier...\n"; static char usag2[]=" qmove --version\n"; fprintf(stderr, usage); fprintf(stderr, usag2); exit(2); } strcpy(destination, argv[1]); if (parse_destination_id(destination, &q_n_out, &s_n_out)) { fprintf(stderr, "qmove: illegally formed destination: %s\n", destination); exit(2); } /*perform needed security library initializations (including none)*/ if (CS_client_init() != CS_SUCCESS) { fprintf(stderr, "qmove: unable to initialize security library.\n"); exit(2); } for (optind = 2; optind < argc; optind++) { int connect; int stat=0; int located = FALSE; strcpy(job_id, argv[optind]); if (get_server(job_id, job_id_out, server_out)) { fprintf(stderr, "qmove: illegally formed job identifier: %s\n", job_id); any_failed = 1; continue; } cnt: connect = cnt2server(server_out); if (connect <= 0) { fprintf(stderr, "qmove: cannot connect to server %s (errno=%d)\n", pbs_server, pbs_errno); any_failed = pbs_errno; continue; } stat = pbs_movejob(connect, job_id_out, destination, NULL); if (stat && (pbs_errno != PBSE_UNKJOBID)) { if (stat != PBSE_NEEDQUET) { prt_job_err("qmove", connect, job_id_out); any_failed = pbs_errno; } else { fprintf(stderr, "qmove: Queue type not set for queue \'%s\'\n", destination); } } else if (stat && (pbs_errno == PBSE_UNKJOBID) && !located) { located = TRUE; if (locate_job(job_id_out, server_out, rmt_server)) { pbs_disconnect(connect); strcpy(server_out, rmt_server); goto cnt; } prt_job_err("qmove", connect, job_id_out); any_failed = pbs_errno; } pbs_disconnect(connect); } /*cleanup security library initializations before exiting*/ CS_close_app(); exit(any_failed); }
/* * This function takes a pointer to a struct batch_status for a job, and * fills in the appropriate fields of the supplied job struct. It returns * the number of items that were found. */ int schd_get_jobinfo(Batch_Status *bs, Job *job) { int changed = 0; int istrue; char tmp_str[120]; char *id = "schd_get_jobinfo"; char *host; char *p, *tmp_p, *var_p; AttrList *attr; memset((void *)job, 0, sizeof(Job)); job->jobid = schd_strdup(bs->name); if (job->jobid == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(bs->name)"); return (-1); } changed ++; for (attr = bs->attribs; attr != NULL; attr = attr->next) { /* * If this is the 'owner' field, chop it into 'owner' and 'host' * fields, and copy them into the Job struct. */ if (!strcmp(attr->name, ATTR_owner)) { /* Look for the '@' that separates user and hostname. */ strcpy(tmp_str, attr->value); host = strchr(tmp_str, '@'); if (host) { *host = '\0'; /* Replace '@' with NULL (ends username). */ host ++; /* Move to first character of hostname. */ } job->owner = schd_strdup(tmp_str); if (job->owner == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->owner)"); return (-1); } changed ++; continue; } /* The group to which to charge the resources for this job. */ if (!strcmp(attr->name, ATTR_egroup)) { job->group = schd_strdup(attr->value); if (job->group == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->group)"); return (-1); } changed ++; continue; } /* The comment currently assigned to this job. */ if (!strcmp(attr->name, ATTR_comment)) { job->comment = schd_strdup(attr->value); if (job->comment == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->comment)"); return (-1); } changed ++; continue; } /* The host on which this job is running (or was running for * suspended or checkpointed jobs. */ if (!strcmp(attr->name, ATTR_exechost)) { job->exechost = schd_strdup(attr->value); if (job->exechost == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->exechost)"); return (-1); } changed ++; continue; } if (!strcmp(attr->name, ATTR_inter)) { /* Is this job interactive or not? */ if (schd_val2bool(attr->value, &istrue) == 0) { if (istrue) job->flags |= JFLAGS_INTERACTIVE; else job->flags &= ~JFLAGS_INTERACTIVE; changed ++; } else { DBPRT(("%s: can't parse %s = %s into boolean\n", id, attr->name, attr->value)); } continue; } if (!strcmp(attr->name, ATTR_state)) { /* State is one of 'R', 'Q', 'E', etc. */ job->state = attr->value[0]; changed ++; continue; } if (!strcmp(attr->name, ATTR_queue)) { job->qname = schd_strdup(attr->value); if (job->qname == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(job->qname)"); return (-1); } job->flags |= JFLAGS_QNAME_LOCAL; changed ++; continue; } if (!strcmp(attr->name, ATTR_v)) { var_p = schd_strdup(attr->value); if (var_p == NULL) { log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, "schd_strdup(Variable_List)"); return (-1); } p = NULL; tmp_p = strstr(var_p, "PBS_O_QUEUE"); if (tmp_p) { p = strtok(tmp_p, "="); p = strtok(NULL, ", "); } if (p != NULL) { job->oqueue = schd_strdup(p); } else { /* if the originating queue is unknown, default * to the locally defined "submit" queue. */ job->oqueue = schd_strdup(schd_SubmitQueue->queue->qname); } free(var_p); changed ++; continue; } if (!strcmp(attr->name, ATTR_l)) { if (!strcmp(attr->resource, "arch")) { job->arch = schd_strdup(attr->value); changed ++; } else if (!strcmp(attr->resource, "mem")) { job->memory = schd_val2byte(attr->value); changed ++; } else if (!strcmp(attr->resource, "ncpus")) { job->ncpus = atoi(attr->value); changed ++; } else if (!strcmp(attr->resource, "walltime")) { job->walltime = schd_val2sec(attr->value); changed ++; } /* That's all for requested resources. */ continue; } if (!strcmp(attr->name, ATTR_used)) { if (!strcmp(attr->resource, "walltime")) { job->walltime_used = schd_val2sec(attr->value); changed ++; } /* No other interesting cases. */ continue; } /* Creation time attribute. */ if (!strcmp(attr->name, ATTR_ctime)) { /* How long ago was it put in the queue ? */ job->time_queued = schd_TimeNow - atoi(attr->value); continue; } /* Modified time attribute. */ if (!strcmp(attr->name, ATTR_mtime)) { /* When was the job last modified? */ job->mtime = atoi(attr->value); continue; } /* Job Substate attribute. */ if (!strcmp(attr->name, ATTR_substate)) { if (atoi(attr->value) == 43 /* JOB_SUBSTATE_SUSPEND */) job->flags |= JFLAGS_SUSPENDED; continue; } /* * When was the job last eligible to run? When a user-hold is * released, this value is updated to the current time. This * prevents users from gaining higher priority from holding their * jobs. */ if (!strcmp(attr->name, ATTR_etime)) { job->eligible = schd_TimeNow - atoi(attr->value); continue; } } if (job->memory < 1) { job->memory = get_default_mem(job->oqueue); schd_alterjob(connector, job, ATTR_l, schd_byte2val(job->memory), "mem"); changed++; } /* * If this job is in the "Running" or "Suspended" state, compute how * many seconds remain until it is completed. */ if (job->state == 'R' || job->state == 'S') { job->time_left = job->walltime - job->walltime_used; } /* * If this job was enqueued since the last time we ran, set the job * flag to indicate that we have not yet seen this job. This makes it * a candidate for additional processing. There may be some inaccuracy, * since the time_t has resolution of 1 second. Attempt to err on the * side of caution. */ if ((job->state == 'Q') && (job->time_queued != UNSPECIFIED)) { if (job->time_queued <= (schd_TimeNow - schd_TimeLast)) { job->flags |= JFLAGS_FIRST_SEEN; } } /* * If this job was previously running and is now queued, then we * need to (a) flag it as having been checkpointed, and (b) move * it back to the submit queue, if its not already there. */ if (job->exechost && job->state == 'Q') { job->flags |= JFLAGS_CHKPTD; if (strcmp(job->qname, schd_SubmitQueue->queue->qname)) { sprintf(log_buffer, "moving Q'd job %s back to SUBMIT Q", job->jobid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); pbs_movejob(connector, job->jobid, schd_SubmitQueue->queue->qname, NULL); } } /* * if this job is currently Suspended (a substate of 'R'unning), then * pretend its queued, so that the scheduling logic will work. */ if (job->state == 'S') { job->state = 'Q'; job->flags |= JFLAGS_SUSPENDED; } /* if this job is suspended, checkpointed, or otherwise "queued" * on an exection queue, update the internal representation of * to pretend it is really on the submit queue. */ if ((job->flags & JFLAGS_SUSPENDED) || (job->flags & JFLAGS_CHKPTD)) { free(job->qname); job->qname = schd_strdup(schd_SubmitQueue->queue->qname); } /* * If this job came from the EXPRESS queue, set the flag so that it * will be treated with the highest of priority. */ if (!strcmp(job->oqueue, schd_EXPRESS_Q_NAME)) job->flags |= JFLAGS_PRIORITY; /* * If the 'etime' attribute wasn't found, set it to the time the job has * been queued. Most jobs will be eligible to run their entire lifetime. * The exception is a job that has been held - if it was a user hold, * the release will reset the etime to the latest value. * If not eligible time was given, use the job's creation time. */ if (!job->eligible) job->eligible = job->time_queued; /* if this job has waited too long, and its queue is NOT over its * shares, then bump it up in priority. */ if (job->eligible > schd_MAX_WAIT_TIME && job->sort_order <= 100) job->flags |= JFLAGS_WAITING; return (changed); }
int schd_run_job_on(Job *job, Queue *destq, char *exechost, int set_comment) { char *id = "schd_run_job_on"; char reason[128], tmp_word[20]; char *date; Queue *srcq = NULL; int ret = 0; /* Get the datestamp from 'ctime()'. Remove the trailing '\n'. */ date = ctime(&schd_TimeNow); date[strlen(date) - 1] = '\0'; if (set_comment) { sprintf(reason, "Started on %s", date); if (job->flags & JFLAGS_PRIORITY) { strcat(reason, " (EXPRESS/high priority job)"); } if (job->flags & JFLAGS_WAITING) { strcat(reason, " (long-waiting job)"); } schd_comment_job(job, reason, JOB_COMMENT_REQUIRED); } /* If this is NOT a suspended job... */ if (!(job->flags & JFLAGS_SUSPENDED)) { /* * If a destination Queue is provided, and it is different from the * source queue, then ask PBS to move the job to that queue before * running it. */ srcq = job->queue; /* * Move the job from its queue to the specified run queue. */ if ((destq != NULL) && (strcmp(destq->qname, srcq->qname) != 0)) { if (pbs_movejob(connector, job->jobid, destq->qname, NULL)) { (void)sprintf(log_buffer, "move job %s to queue %s failed, %d", job->jobid, destq->qname, pbs_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (-1); } schd_move_job_to(job, destq); } /* * Give the job handle (JOBID) to PBS to run. */ if (pbs_runjob(connector, job->jobid, exechost, NULL)) { (void)sprintf(log_buffer, "failed start job %s on queue %s@%s, %d", job->jobid, destq->qname, exechost, pbs_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); /* * Running failed! Move the job back to the source queue (if * applicable) before returning. This prevents jobs being left * in execution queues. */ if (srcq) { DBPRT(("Attempting to move job %s back to queue %s\n", job->jobid, srcq->qname)); if (pbs_movejob(connector, job->jobid, srcq->qname, NULL)) { (void)sprintf(log_buffer, "failed to move job %s back to queue %s, %d", job->jobid, srcq->qname, pbs_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); } schd_move_job_to(job, srcq); } return (-1); } strcpy(tmp_word, "started"); } else /* it IS a suspended job */ { schd_move_job_to(job, destq); ret = pbs_sigjob(connector, job->jobid, "resume", NULL); if (ret) { sprintf(log_buffer, "resume of job %s FAILED (%d)", job->jobid, ret); return (-1); } job->flags &= ~JFLAGS_SUSPENDED; strcpy(tmp_word, "resumed"); } /* PBS accepted the job (and presumably will run it). Log the fact. */ (void)sprintf(log_buffer, "job %s %s on %s@%s", job->jobid, tmp_word, destq->qname, exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); /* * Change the state of the local representation of the job to "Running". */ job->state = 'R'; /* * Account for the job on this queue's statistics. 'queued' will be * bumped up if the queued job was moved to a new destination queue. */ job->queue->queued --; job->queue->running ++; /* The queue is no longer idle. Unset the idle timer. */ job->queue->idle_since = 0; return (0); /* Job successfully started. */ }