/* * Attempt to start a job * jobid (IN) - job id * task_cnt (IN) - total count of tasks to start * hostlist (IN) - SLURM hostlist expression with no repeated hostnames * tasklist (IN/OUT) - comma separated list of hosts with tasks to be started, * list hostname once per task to start * comment_ptr (IN) - new comment field for the job or NULL for no change * err_code (OUT) - Moab error code * err_msg (OUT) - Moab error message */ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, char *tasklist, char *comment_ptr, int *err_code, char **err_msg) { int rc = 0, old_task_cnt = 1; struct job_record *job_ptr; /* Write lock on job info, read lock on node info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK }; char *new_node_list = NULL; static char tmp_msg[128]; bitstr_t *new_bitmap = (bitstr_t *) NULL; bitstr_t *save_req_bitmap = (bitstr_t *) NULL; bitoff_t i, bsize; int ll; /* layout info index */ char *node_name, *node_idx, *node_cur, *save_req_nodes = NULL; size_t node_name_len; static uint32_t cr_test = 0, cr_enabled = 0; if (cr_test == 0) { select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL, &cr_enabled); cr_test = 1; } lock_slurmctld(job_write_lock); job_ptr = find_job_record(jobid); if (job_ptr == NULL) { *err_code = -700; *err_msg = "No such job"; error("wiki: Failed to find job %u", jobid); rc = -1; goto fini; } if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) { *err_code = -700; *err_msg = "Job not pending, can't start"; error("wiki: Attempt to start job %u in state %s", jobid, job_state_string(job_ptr->job_state)); rc = -1; goto fini; } if (comment_ptr) { char *reserved = strstr(comment_ptr, "RESERVED:"); if (reserved) { reserved += 9; job_ptr->details->reserved_resources = strtol(reserved, NULL, 10); } xfree(job_ptr->comment); job_ptr->comment = xstrdup(comment_ptr); } if (task_cnt) { new_node_list = xstrdup(hostlist); if (node_name2bitmap(new_node_list, false, &new_bitmap) != 0) { *err_code = -700; *err_msg = "Invalid TASKLIST"; error("wiki: Attempt to set invalid node list for " "job %u, %s", jobid, hostlist); xfree(new_node_list); rc = -1; goto fini; } if (!bit_super_set(new_bitmap, avail_node_bitmap)) { /* Selected node is UP and not responding * or it just went DOWN */ *err_code = -700; *err_msg = "TASKLIST includes non-responsive node"; error("wiki: Attempt to use non-responsive nodes for " "job %u, %s", jobid, hostlist); xfree(new_node_list); FREE_NULL_BITMAP(new_bitmap); rc = -1; goto fini; } /* User excluded node list incompatible with Wiki * Exclude all nodes not explicitly requested */ FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); job_ptr->details->exc_node_bitmap = bit_copy(new_bitmap); bit_not(job_ptr->details->exc_node_bitmap); } /* Build layout information from tasklist (assuming that Moab * sends a non-bracketed list of nodes, repeated as many times * as cpus should be used per node); at this point, node names * are comma-separated. This is _not_ a fast algorithm as it * performs many string compares. */ xfree(job_ptr->details->req_node_layout); if (task_cnt && cr_enabled) { uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task); job_ptr->details->req_node_layout = (uint16_t *) xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t)); bsize = bit_size(new_bitmap); for (i = 0, ll = -1; i < bsize; i++) { if (!bit_test(new_bitmap, i)) continue; ll++; node_name = node_record_table_ptr[i].name; node_name_len = strlen(node_name); if (node_name_len == 0) continue; node_cur = tasklist; while (*node_cur) { if ((node_idx = strstr(node_cur, node_name))) { if ((node_idx[node_name_len] == ',') || (node_idx[node_name_len] == '\0')) { job_ptr->details-> req_node_layout[ll] += cpus_per_task; } node_cur = strchr(node_idx, ','); if (node_cur) continue; } break; } } } /* save and update job state to start now */ save_req_nodes = job_ptr->details->req_nodes; job_ptr->details->req_nodes = new_node_list; save_req_bitmap = job_ptr->details->req_node_bitmap; job_ptr->details->req_node_bitmap = new_bitmap; old_task_cnt = job_ptr->details->min_cpus; job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt); job_ptr->priority = 100000000; fini: unlock_slurmctld(job_write_lock); if (rc) return rc; /* No errors so far */ (void) schedule(INFINITE); /* provides own locking */ /* Check to insure the job was actually started */ lock_slurmctld(job_write_lock); if (job_ptr->job_id != jobid) job_ptr = find_job_record(jobid); if (job_ptr && (job_ptr->job_id == jobid) && (!IS_JOB_RUNNING(job_ptr))) { uint16_t wait_reason = 0; char *wait_string; if (IS_JOB_FAILED(job_ptr)) wait_string = "Invalid request, job aborted"; else { wait_reason = job_ptr->state_reason; if (wait_reason == WAIT_HELD) { /* some job is completing, slurmctld did * not even try to schedule this job */ wait_reason = WAIT_RESOURCES; } wait_string = job_reason_string(wait_reason); job_ptr->state_reason = WAIT_HELD; xfree(job_ptr->state_desc); } *err_code = -910 - wait_reason; snprintf(tmp_msg, sizeof(tmp_msg), "Could not start job %u(%s): %s", jobid, new_node_list, wait_string); *err_msg = tmp_msg; error("wiki: %s", tmp_msg); /* restore some of job state */ job_ptr->priority = 0; job_ptr->details->min_cpus = old_task_cnt; rc = -1; } if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) { /* Restore required node list in case job requeued */ xfree(job_ptr->details->req_nodes); job_ptr->details->req_nodes = save_req_nodes; FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap); job_ptr->details->req_node_bitmap = save_req_bitmap; FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); xfree(job_ptr->details->req_node_layout); } else { error("wiki: start_job(%u) job missing", jobid); xfree(save_req_nodes); FREE_NULL_BITMAP(save_req_bitmap); } unlock_slurmctld(job_write_lock); schedule_node_save(); /* provides own locking */ schedule_job_save(); /* provides own locking */ return rc; }
static void _notify_slurmctld_nodes(agent_info_t *agent_ptr, int no_resp_cnt, int retry_cnt) { ListIterator itr = NULL; ret_data_info_t *ret_data_info = NULL; state_t state; int is_ret_list = 1; /* Locks: Read config, write job, write node */ slurmctld_lock_t node_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; thd_t *thread_ptr = agent_ptr->thread_struct; int i; /* Notify slurmctld of non-responding nodes */ if (no_resp_cnt) { /* Update node table data for non-responding nodes */ lock_slurmctld(node_write_lock); if (agent_ptr->msg_type == REQUEST_BATCH_JOB_LAUNCH) { /* Requeue the request */ batch_job_launch_msg_t *launch_msg_ptr = *agent_ptr->msg_args_pptr; uint32_t job_id = launch_msg_ptr->job_id; job_complete(job_id, 0, true, false, 0); } unlock_slurmctld(node_write_lock); } if (retry_cnt && agent_ptr->retry) _queue_agent_retry(agent_ptr, retry_cnt); /* Update last_response on responding nodes */ lock_slurmctld(node_write_lock); for (i = 0; i < agent_ptr->thread_count; i++) { char *down_msg, *node_names; if (!thread_ptr[i].ret_list) { state = thread_ptr[i].state; is_ret_list = 0; goto switch_on_state; } is_ret_list = 1; itr = list_iterator_create(thread_ptr[i].ret_list); while ((ret_data_info = list_next(itr))) { state = ret_data_info->err; switch_on_state: switch(state) { case DSH_NO_RESP: if (!is_ret_list) { node_not_resp(thread_ptr[i].nodelist, thread_ptr[i]. start_time); } else { node_not_resp(ret_data_info->node_name, thread_ptr[i].start_time); } break; case DSH_FAILED: if (is_ret_list) node_names = ret_data_info->node_name; else node_names = thread_ptr[i].nodelist; #ifdef HAVE_FRONT_END down_msg = ""; #else set_node_down(node_names, "Prolog/Epilog failure"); down_msg = ", set to state DOWN"; #endif error("Prolog/Epilog failure on nodes %s%s", node_names, down_msg); break; case DSH_DONE: if (!is_ret_list) node_did_resp(thread_ptr[i].nodelist); else node_did_resp(ret_data_info->node_name); break; default: if (!is_ret_list) { error("unknown state returned for %s", thread_ptr[i].nodelist); } else { error("unknown state returned for %s", ret_data_info->node_name); } break; } if (!is_ret_list) goto finished; } list_iterator_destroy(itr); finished: ; } unlock_slurmctld(node_write_lock); if (run_scheduler) { run_scheduler = false; /* below functions all have their own locking */ if (schedule(0)) { schedule_job_save(); schedule_node_save(); } } if ((agent_ptr->msg_type == REQUEST_PING) || (agent_ptr->msg_type == REQUEST_HEALTH_CHECK) || (agent_ptr->msg_type == REQUEST_NODE_REGISTRATION_STATUS)) ping_end(); }