/* * Attempt to start a job * jobid (IN) - job id * task_cnt (IN) - total count of tasks to start * hostlist (IN) - SLURM hostlist expression with no repeated hostnames * tasklist (IN/OUT) - comma separated list of hosts with tasks to be started, * list hostname once per task to start * comment_ptr (IN) - new comment field for the job or NULL for no change * err_code (OUT) - Moab error code * err_msg (OUT) - Moab error message */ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, char *tasklist, char *comment_ptr, int *err_code, char **err_msg) { int rc = 0, old_task_cnt = 1; struct job_record *job_ptr; /* Write lock on job info, read lock on node info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK }; char *new_node_list = NULL; static char tmp_msg[128]; bitstr_t *new_bitmap = (bitstr_t *) NULL; bitstr_t *save_req_bitmap = (bitstr_t *) NULL; bitoff_t i, bsize; int ll; /* layout info index */ char *node_name, *node_idx, *node_cur, *save_req_nodes = NULL; size_t node_name_len; static uint32_t cr_test = 0, cr_enabled = 0; if (cr_test == 0) { select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL, &cr_enabled); cr_test = 1; } lock_slurmctld(job_write_lock); job_ptr = find_job_record(jobid); if (job_ptr == NULL) { *err_code = -700; *err_msg = "No such job"; error("wiki: Failed to find job %u", jobid); rc = -1; goto fini; } if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) { *err_code = -700; *err_msg = "Job not pending, can't start"; error("wiki: Attempt to start job %u in state %s", jobid, job_state_string(job_ptr->job_state)); rc = -1; goto fini; } if (comment_ptr) { char *reserved = strstr(comment_ptr, "RESERVED:"); if (reserved) { reserved += 9; job_ptr->details->reserved_resources = strtol(reserved, NULL, 10); } xfree(job_ptr->comment); job_ptr->comment = xstrdup(comment_ptr); } if (task_cnt) { new_node_list = xstrdup(hostlist); if (node_name2bitmap(new_node_list, false, &new_bitmap) != 0) { *err_code = -700; *err_msg = "Invalid TASKLIST"; error("wiki: Attempt to set invalid node list for " "job %u, %s", jobid, hostlist); xfree(new_node_list); rc = -1; goto fini; } if (!bit_super_set(new_bitmap, avail_node_bitmap)) { /* Selected node is UP and not responding * or it just went DOWN */ *err_code = -700; *err_msg = "TASKLIST includes non-responsive node"; error("wiki: Attempt to use non-responsive nodes for " "job %u, %s", jobid, hostlist); xfree(new_node_list); FREE_NULL_BITMAP(new_bitmap); rc = -1; goto fini; } /* User excluded node list incompatible with Wiki * Exclude all nodes not explicitly requested */ FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); job_ptr->details->exc_node_bitmap = bit_copy(new_bitmap); bit_not(job_ptr->details->exc_node_bitmap); } /* Build layout information from tasklist (assuming that Moab * sends a non-bracketed list of nodes, repeated as many times * as cpus should be used per node); at this point, node names * are comma-separated. This is _not_ a fast algorithm as it * performs many string compares. */ xfree(job_ptr->details->req_node_layout); if (task_cnt && cr_enabled) { uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task); job_ptr->details->req_node_layout = (uint16_t *) xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t)); bsize = bit_size(new_bitmap); for (i = 0, ll = -1; i < bsize; i++) { if (!bit_test(new_bitmap, i)) continue; ll++; node_name = node_record_table_ptr[i].name; node_name_len = strlen(node_name); if (node_name_len == 0) continue; node_cur = tasklist; while (*node_cur) { if ((node_idx = strstr(node_cur, node_name))) { if ((node_idx[node_name_len] == ',') || (node_idx[node_name_len] == '\0')) { job_ptr->details-> req_node_layout[ll] += cpus_per_task; } node_cur = strchr(node_idx, ','); if (node_cur) continue; } break; } } } /* save and update job state to start now */ save_req_nodes = job_ptr->details->req_nodes; job_ptr->details->req_nodes = new_node_list; save_req_bitmap = job_ptr->details->req_node_bitmap; job_ptr->details->req_node_bitmap = new_bitmap; old_task_cnt = job_ptr->details->min_cpus; job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt); job_ptr->priority = 100000000; fini: unlock_slurmctld(job_write_lock); if (rc) return rc; /* No errors so far */ (void) schedule(INFINITE); /* provides own locking */ /* Check to insure the job was actually started */ lock_slurmctld(job_write_lock); if (job_ptr->job_id != jobid) job_ptr = find_job_record(jobid); if (job_ptr && (job_ptr->job_id == jobid) && (!IS_JOB_RUNNING(job_ptr))) { uint16_t wait_reason = 0; char *wait_string; if (IS_JOB_FAILED(job_ptr)) wait_string = "Invalid request, job aborted"; else { wait_reason = job_ptr->state_reason; if (wait_reason == WAIT_HELD) { /* some job is completing, slurmctld did * not even try to schedule this job */ wait_reason = WAIT_RESOURCES; } wait_string = job_reason_string(wait_reason); job_ptr->state_reason = WAIT_HELD; xfree(job_ptr->state_desc); } *err_code = -910 - wait_reason; snprintf(tmp_msg, sizeof(tmp_msg), "Could not start job %u(%s): %s", jobid, new_node_list, wait_string); *err_msg = tmp_msg; error("wiki: %s", tmp_msg); /* restore some of job state */ job_ptr->priority = 0; job_ptr->details->min_cpus = old_task_cnt; rc = -1; } if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) { /* Restore required node list in case job requeued */ xfree(job_ptr->details->req_nodes); job_ptr->details->req_nodes = save_req_nodes; FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap); job_ptr->details->req_node_bitmap = save_req_bitmap; FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); xfree(job_ptr->details->req_node_layout); } else { error("wiki: start_job(%u) job missing", jobid); xfree(save_req_nodes); FREE_NULL_BITMAP(save_req_bitmap); } unlock_slurmctld(job_write_lock); schedule_node_save(); /* provides own locking */ schedule_job_save(); /* provides own locking */ return rc; }
/* * Convert Moab supplied TASKLIST expression into a SLURM hostlist expression * * Moab format 1: tux0:tux0:tux1:tux1:tux2 (list host for each cpu) * Moab format 2: tux[0-1]*2:tux2 (list cpu count after host name) * * SLURM format: tux0,tux0,tux1,tux1,tux2 (if consumable resources enabled) * SLURM format: tux0,tux1,tux2 (if consumable resources disabled) * * NOTE: returned string must be released with xfree() */ extern char * moab2slurm_task_list(char *moab_tasklist, int *task_cnt) { char *slurm_tasklist = NULL, *host = NULL, *tmp1 = NULL, *tmp2 = NULL, *tok = NULL, *tok_p = NULL; int i, reps; hostlist_t hl; static uint32_t cr_test = 0, cr_enabled = 0; if (cr_test == 0) { select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL, &cr_enabled); cr_test = 1; } *task_cnt = 0; /* Moab format 2 if string contains '*' or '[' */ tmp1 = strchr(moab_tasklist, (int) '*'); if (tmp1 == NULL) tmp1 = strchr(moab_tasklist, (int) '['); if (tmp1 == NULL) { /* Moab format 1 */ slurm_tasklist = xstrdup(moab_tasklist); if (moab_tasklist[0]) *task_cnt = 1; for (i=0; slurm_tasklist[i]!='\0'; i++) { if (slurm_tasklist[i] == ':') { slurm_tasklist[i] = ','; (*task_cnt)++; } else if (slurm_tasklist[i] == ',') (*task_cnt)++; } return slurm_tasklist; } /* Moab format 2 */ slurm_tasklist = xstrdup(""); tmp1 = xstrdup(moab_tasklist); tok = strtok_r(tmp1, ":", &tok_p); while (tok) { /* find task count, assume 1 if no "*" */ tmp2 = strchr(tok, (int) '*'); if (tmp2) { reps = atoi(tmp2 + 1); tmp2[0] = '\0'; } else reps = 1; /* find host expression */ hl = hostlist_create(tok); while ((host = hostlist_shift(hl))) { for (i=0; i<reps; i++) { if (slurm_tasklist[0]) xstrcat(slurm_tasklist, ","); xstrcat(slurm_tasklist, host); if (!cr_enabled) break; } free(host); (*task_cnt) += reps; } hostlist_destroy(hl); /* get next token */ tok = strtok_r(NULL, ":", &tok_p); } xfree(tmp1); return slurm_tasklist; }
/* * get_jobs - get information on specific job(s) changed since some time * cmd_ptr IN - CMD=GETJOBS ARG=[<UPDATETIME>:<JOBID>[:<JOBID>]...] * [<UPDATETIME>:ALL] * err_code OUT - 0 or an error code * err_msg OUT - response message * NOTE: xfree() err_msg if err_code is zero * RET 0 on success, -1 on failure * * Response format * ARG=<cnt>#<JOBID>; * STATE=<state>; Moab equivalent job state * [EXITCODE=<number>;] Job exit code, if completed * [RFEATURES=<features>;] required features, if any, * NOTE: OR operator not supported * [HOSTLIST=<node1:node2>;] list of required nodes, if any * [EXCLUDE_HOSTLIST=<node1:node2>;list of excluded nodes, if any * [STARTDATE=<uts>;] earliest start time, if any * [MAXNODES=<nodes>;] maximum number of nodes, 0 if no limit * [TASKLIST=<node1:node2>;] nodes in use, if running or completing * [REJMESSAGE=<str>;] reason job is not running, if any * [IWD=<directory>;] Initial Working Directory * [FLAGS=INTERACTIVE;] set if interactive (not batch) job * [GRES=<name>[:<count>[*cpus]],...;] generic resources required by the * job on a per node basis * [WCKEY=<key>;] workload characterization key for job * UPDATETIME=<uts>; time last active * WCLIMIT=<secs>; wall clock time limit, seconds * TASKS=<cpus>; CPUs required * NODES=<nodes>; count of nodes required or allocated * DPROCS=<cpus_per_task>; count of CPUs required per task * QUEUETIME=<uts>; submission time * STARTTIME=<uts>; time execution started * RCLASS=<partition>; SLURM partition name * RMEM=<MB>; MB of memory required * RDISK=<MB>; MB of disk space required * [COMMENT=<whatever>;] job dependency or account number * [COMPLETETIME=<uts>;] termination time * [SUSPENDTIME=<secs>;] seconds that job has been suspended * UNAME=<user_name>; user name * GNAME=<group_name>; group name * NAME=<job_name>; job name * [#<JOBID>;...]; additional jobs, if any * */ extern int get_jobs(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr = NULL, *tmp_char = NULL, *tmp_buf = NULL, *buf = NULL; time_t update_time; /* Locks: read job, partition */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; int job_rec_cnt = 0, buf_size = 0; if (cr_test == 0) { select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL, &cr_enabled); cr_test = 1; } arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "GETJOBS lacks ARG"; error("wiki: GETJOBS lacks ARG"); return -1; } update_time = (time_t) strtoul(arg_ptr+4, &tmp_char, 10); if (tmp_char[0] != ':') { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: GETJOBS has invalid ARG value"); return -1; } if (job_list == NULL) { *err_code = -140; *err_msg = "Still performing initialization"; error("wiki: job_list not yet initilized"); return -1; } tmp_char++; lock_slurmctld(job_read_lock); if (xstrncmp(tmp_char, "ALL", 3) == 0) { /* report all jobs */ buf = _dump_all_jobs(&job_rec_cnt, update_time); } else { struct job_record *job_ptr = NULL; char *job_name = NULL, *tmp2_char = NULL; uint32_t job_id; job_name = strtok_r(tmp_char, ":", &tmp2_char); while (job_name) { job_id = (uint32_t) strtoul(job_name, NULL, 10); job_ptr = find_job_record(job_id); tmp_buf = _dump_job(job_ptr, update_time); if (job_rec_cnt > 0) xstrcat(buf, "#"); xstrcat(buf, tmp_buf); xfree(tmp_buf); job_rec_cnt++; job_name = strtok_r(NULL, ":", &tmp2_char); } } unlock_slurmctld(job_read_lock); /* Prepend ("ARG=%d", job_rec_cnt) to reply message */ if (buf) buf_size = strlen(buf); tmp_buf = xmalloc(buf_size + 32); if (job_rec_cnt) sprintf(tmp_buf, "SC=0 ARG=%d#%s", job_rec_cnt, buf); else sprintf(tmp_buf, "SC=0 ARG=0#"); xfree(buf); *err_code = 0; *err_msg = tmp_buf; return 0; }