static void *_step_fini(void *args) { struct step_record *step_ptr = (struct step_record *)args; select_jobinfo_t *jobinfo = NULL; nhc_info_t nhc_info; /* Locks: Write job, write node */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; if (!step_ptr) { error("_step_fini: no step ptr given, " "this should never happen"); return NULL; } memset(&nhc_info, 0, sizeof(nhc_info_t)); nhc_info.step = 1; lock_slurmctld(job_read_lock); nhc_info.jobid = step_ptr->job_ptr->job_id; nhc_info.apid = SLURM_ID_HASH(step_ptr->job_ptr->job_id, step_ptr->step_id); nhc_info.exit_code = step_ptr->exit_code; nhc_info.user_id = step_ptr->job_ptr->user_id; if (!step_ptr->step_layout || !step_ptr->step_layout->node_list) { if (step_ptr->job_ptr) nhc_info.nodelist = xstrdup(step_ptr->job_ptr->nodes); } else nhc_info.nodelist = xstrdup(step_ptr->step_layout->node_list); unlock_slurmctld(job_read_lock); /* run NHC */ _run_nhc(&nhc_info); /***********/ xfree(nhc_info.nodelist); lock_slurmctld(job_write_lock); if (!step_ptr->job_ptr || !step_ptr->step_node_bitmap) { error("For some reason we don't have a step_node_bitmap or " "a job_ptr for %"PRIu64". This should never happen.", nhc_info.apid); } else { other_step_finish(step_ptr); jobinfo = step_ptr->select_jobinfo->data; jobinfo->cleaning = 0; /* free resources on the job */ post_job_step(step_ptr); } unlock_slurmctld(job_write_lock); return NULL; }
static void *_step_fini(void *args) { struct step_record *step_ptr = (struct step_record *)args; select_jobinfo_t *jobinfo = NULL; uint64_t apid = 0; char *node_list = NULL; /* Locks: Write job, write node */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; if (!step_ptr) { error("_step_fini: no step ptr given, " "this should never happen"); return NULL; } lock_slurmctld(job_read_lock); apid = SLURM_ID_HASH(step_ptr->job_ptr->job_id, step_ptr->step_id); if (!step_ptr->step_layout || !step_ptr->step_layout->node_list) { if (step_ptr->job_ptr) node_list = xstrdup(step_ptr->job_ptr->nodes); } else node_list = xstrdup(step_ptr->step_layout->node_list); unlock_slurmctld(job_read_lock); /* run NHC */ _run_nhc(apid, node_list, 0); /***********/ xfree(node_list); lock_slurmctld(job_write_lock); if (!step_ptr->job_ptr || !step_ptr->step_node_bitmap) { error("For some reason we don't have a step_node_bitmap or " "a job_ptr for %"PRIu64". This should never happen.", apid); } else { other_step_finish(step_ptr); jobinfo = step_ptr->select_jobinfo->data; jobinfo->cleaning = 0; /* free resources on the job */ post_job_step(step_ptr); } unlock_slurmctld(job_write_lock); return NULL; }
/* backfill_agent - detached thread periodically attempts to backfill jobs */ extern void *backfill_agent(void *args) { time_t now; double wait_time; static time_t last_backfill_time = 0; /* Read config and partitions; Write jobs and nodes */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; _load_config(); last_backfill_time = time(NULL); while (!stop_backfill) { _my_sleep(backfill_interval); if (stop_backfill) break; if (config_flag) { config_flag = false; _load_config(); } now = time(NULL); wait_time = difftime(now, last_backfill_time); if ((wait_time < backfill_interval) || _job_is_completing() || _many_pending_rpcs() || !avail_front_end(NULL) || !_more_work(last_backfill_time)) continue; lock_slurmctld(all_locks); while (_attempt_backfill()) ; last_backfill_time = time(NULL); unlock_slurmctld(all_locks); } return NULL; }
/* builtin_agent - detached thread periodically when pending jobs can start */ extern void *builtin_agent(void *args) { time_t now; double wait_time; static time_t last_sched_time = 0; /* Read config, nodes and partitions; Write jobs */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK, READ_LOCK }; _load_config(); last_sched_time = time(NULL); while (!stop_builtin) { _my_sleep(builtin_interval); if (stop_builtin) break; if (config_flag) { config_flag = false; _load_config(); } now = time(NULL); wait_time = difftime(now, last_sched_time); if ((wait_time < builtin_interval)) continue; lock_slurmctld(all_locks); _compute_start_times(); last_sched_time = time(NULL); (void) bb_g_job_try_stage_in(); unlock_slurmctld(all_locks); } return NULL; }
/* Ping primary ControlMachine * RET 0 if no error */ static int _ping_controller(void) { int rc; slurm_msg_t req; /* Locks: Read configuration */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; /* * Set address of controller to ping */ slurm_msg_t_init(&req); lock_slurmctld(config_read_lock); debug3("pinging slurmctld at %s", slurmctld_conf.control_addr); slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port, slurmctld_conf.control_addr); unlock_slurmctld(config_read_lock); req.msg_type = REQUEST_PING; if (slurm_send_recv_rc_msg_only_one(&req, &rc, 0) < 0) { error("_ping_controller/slurm_send_node_msg error: %m"); return SLURM_ERROR; } if (rc) { error("_ping_controller/response error %d", rc); return SLURM_PROTOCOL_ERROR; } return SLURM_PROTOCOL_SUCCESS; }
/* block_state_mutex must be unlocked before calling this. */ extern void bg_requeue_job(uint32_t job_id, bool wait_for_start, bool slurmctld_locked, uint32_t job_state, bool preempted) { int rc; slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; /* Wait for the slurmd to begin the batch script, slurm_fail_job() is a no-op if issued prior to the script initiation do clean up just incase the fail job isn't ran. */ if (wait_for_start) sleep(2); if (!slurmctld_locked) lock_slurmctld(job_write_lock); rc = job_requeue(0, job_id, NULL, preempted, 0); if (rc == ESLURM_JOB_PENDING) { error("%s: Could not requeue pending job %u", __func__, job_id); } else if (rc != SLURM_SUCCESS) { error("%s: Could not requeue job %u, failing it: %s", __func__, job_id, slurm_strerror(rc)); job_fail(job_id, job_state); } if (!slurmctld_locked) unlock_slurmctld(job_write_lock); }
static void _requeue_when_finished(uint32_t job_id) { /* Locks: read job */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; struct job_record *job_ptr; while (1) { lock_slurmctld(job_write_lock); job_ptr = find_job_record(job_id); if (IS_JOB_FINISHED(job_ptr)) { job_ptr->job_state = JOB_PENDING; job_ptr->details->submit_time = time(NULL); job_ptr->restart_cnt++; /* Since the job completion logger * removes the submit we need to add it again. */ acct_policy_add_job_submit(job_ptr); unlock_slurmctld(job_write_lock); break; } else { unlock_slurmctld(job_write_lock); sleep(1); } } }
/* Set a job's default partition to all partitions in the cluster */ extern int job_submit(struct job_descriptor *job_desc, uint32_t submit_uid, char **err_msg) { /* Locks: Read partition */ slurmctld_lock_t part_read_lock = { NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; ListIterator part_iterator; struct part_record *part_ptr; if (job_desc->partition) /* job already specified partition */ return SLURM_SUCCESS; lock_slurmctld(part_read_lock); part_iterator = list_iterator_create(part_list); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { if (!(part_ptr->state_up & PARTITION_SUBMIT)) continue; /* nobody can submit jobs here */ if (job_desc->partition) xstrcat(job_desc->partition, ","); xstrcat(job_desc->partition, part_ptr->name); } list_iterator_destroy(part_iterator); unlock_slurmctld(part_read_lock); //info("Set partition of submitted job to %s", job_desc->partition); return SLURM_SUCCESS; }
static void *_job_fini(void *args) { struct job_record *job_ptr = (struct job_record *)args; nhc_info_t nhc_info; /* Locks: Write job, write node */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; if (!job_ptr) { error("_job_fini: no job ptr given, this should never happen"); return NULL; } memset(&nhc_info, 0, sizeof(nhc_info_t)); lock_slurmctld(job_read_lock); nhc_info.jobid = job_ptr->job_id; nhc_info.nodelist = xstrdup(job_ptr->nodes); nhc_info.exit_code = 1; /* hard code to 1 to always run */ nhc_info.user_id = job_ptr->user_id; unlock_slurmctld(job_read_lock); /* run NHC */ _run_nhc(&nhc_info); /***********/ xfree(nhc_info.nodelist); lock_slurmctld(job_write_lock); if (job_ptr->magic == JOB_MAGIC) { select_jobinfo_t *jobinfo = NULL; other_job_fini(job_ptr); jobinfo = job_ptr->select_jobinfo->data; jobinfo->cleaning = 0; } else error("_job_fini: job %u had a bad magic, " "this should never happen", nhc_info.jobid); unlock_slurmctld(job_write_lock); return NULL; }
static void *_job_fini(void *args) { struct job_record *job_ptr = (struct job_record *)args; uint32_t job_id = 0; char *node_list = NULL; /* Locks: Write job, write node */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; if (!job_ptr) { error("_job_fini: no job ptr given, this should never happen"); return NULL; } lock_slurmctld(job_read_lock); job_id = job_ptr->job_id; node_list = xstrdup(job_ptr->nodes); unlock_slurmctld(job_read_lock); /* run NHC */ _run_nhc(job_id, node_list, 0); /***********/ xfree(node_list); lock_slurmctld(job_write_lock); if (job_ptr->magic == JOB_MAGIC) { select_jobinfo_t *jobinfo = NULL; other_job_fini(job_ptr); jobinfo = job_ptr->select_jobinfo->data; jobinfo->cleaning = 0; } else error("_job_fini: job %u had a bad magic, " "this should never happen", job_id); unlock_slurmctld(job_write_lock); return NULL; }
/* Create reservations to contain hot-spare nodes * and purge vestigial reservations */ extern void create_hot_spare_resv(void) { int i; char resv_name[1024]; ListIterator part_iterator; struct part_record *part_ptr; /* Locks: Read partition */ slurmctld_lock_t part_read_lock = { NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; reservation_name_msg_t delete_resv_msg; resv_desc_msg_t resv_msg; time_t now = time(NULL); uint32_t node_cnt[2]; lock_slurmctld(part_read_lock); part_iterator = list_iterator_create(part_list); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { snprintf(resv_name, sizeof(resv_name), "HOT_SPARE_%s", part_ptr->name); for (i = 0; i < hot_spare_info_cnt; i++) { if (hot_spare_info[i].part_ptr != part_ptr) continue; memset(&resv_msg, 0, sizeof(resv_desc_msg_t)); node_cnt[0] = hot_spare_info[i].node_cnt; node_cnt[1] = 0; resv_msg.duration = 356 * 24 * 60 * 60; resv_msg.end_time = (time_t) NO_VAL; resv_msg.flags = RESERVE_FLAG_MAINT | RESERVE_FLAG_IGN_JOBS; resv_msg.name = resv_name; resv_msg.node_cnt = node_cnt; resv_msg.partition = xstrdup(part_ptr->name); resv_msg.start_time = now; resv_msg.users = xstrdup("root"); if (find_resv_name(resv_name)) { info("Updating vestigial reservation %s", resv_name); (void) update_resv(&resv_msg); } else { info("Creating vestigial reservation %s", resv_name); (void) create_resv(&resv_msg); } xfree(resv_msg.partition); xfree(resv_msg.users); break; } if ((i >= hot_spare_info_cnt) && find_resv_name(resv_name)) { info("Deleting vestigial reservation %s", resv_name); delete_resv_msg.name = resv_name; (void) delete_resv(&delete_resv_msg); } } list_iterator_destroy(part_iterator); unlock_slurmctld(part_read_lock); }
/* _background_signal_hand - Process daemon-wide signals for the * backup controller */ static void *_background_signal_hand(void *no_data) { int sig, rc; sigset_t set; /* Locks: Write configuration, job, node, and partition */ slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); while (slurmctld_config.shutdown_time == 0) { xsignal_sigset_create(backup_sigarray, &set); rc = sigwait(&set, &sig); if (rc == EINTR) continue; switch (sig) { case SIGINT: /* kill -2 or <CTRL-C> */ case SIGTERM: /* kill -15 */ info("Terminate signal (SIGINT or SIGTERM) received"); slurmctld_config.shutdown_time = time(NULL); slurmctld_shutdown(); return NULL; /* Normal termination */ break; case SIGHUP: /* kill -1 */ info("Reconfigure signal (SIGHUP) received"); /* * XXX - need to shut down the scheduler * plugin, re-read the configuration, and then * restart the (possibly new) plugin. */ lock_slurmctld(config_write_lock); rc = _backup_reconfig(); if (rc) error("_backup_reconfig: %s", slurm_strerror(rc)); else { /* Leave config lock set through this */ _update_cred_key(); } unlock_slurmctld(config_write_lock); break; case SIGABRT: /* abort */ info("SIGABRT received"); slurmctld_config.shutdown_time = time(NULL); slurmctld_shutdown(); dump_core = true; return NULL; /* Normal termination */ break; default: error("Invalid signal (%d) received", sig); } } return NULL; }
extern int fed_mgr_update_feds(slurmdb_update_object_t *update) { List feds; slurmdb_federation_rec_t *fed = NULL; slurmdb_cluster_rec_t *cluster = NULL; slurmctld_lock_t fed_write_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; if (!update->objects) return SLURM_SUCCESS; slurm_mutex_lock(&init_mutex); if (!inited) { slurm_mutex_unlock(&init_mutex); return SLURM_SUCCESS; /* we haven't started the fed mgr and we * can't start it from here, don't worry * all will get set up later. */ } slurm_mutex_unlock(&init_mutex); /* we only want one update happening at a time. */ slurm_mutex_lock(&update_mutex); if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("Got a federation update"); feds = update->objects; /* find the federation that this cluster is in. * if it's changed from last time then update stored information. * grab other clusters in federation * establish connections with each cluster in federation */ /* what if a remote cluster is removed from federation. * have to detect that and close the connection to the remote */ while ((fed = list_pop(feds))) { if (fed->cluster_list && (cluster = list_find_first(fed->cluster_list, slurmdb_find_cluster_in_list, slurmctld_cluster_name))) { _join_federation(fed, cluster, true); break; } slurmdb_destroy_federation_rec(fed); } if (!fed) { if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("Not part of any federation"); lock_slurmctld(fed_write_lock); _leave_federation(); unlock_slurmctld(fed_write_lock); } slurm_mutex_unlock(&update_mutex); return SLURM_SUCCESS; }
/* * job_will_run - Determine if, when and where a priority ordered list of jobs * can be initiated with the currently running jobs as a * backgorund * cmd_ptr IN - CMD=JOBWILLRUN ARG=JOBID=<JOBID>[@<TIME>],<AVAIL_NODES> * err_code OUT - 0 on success or some error code * err_msg OUT - error message if any of the specified jobs can not be started * at the specified time (if given) on the available nodes. * Otherwise information on when and where the pending jobs * will be initiated * ARG=<JOBID>:<PROCS>@<TIME>,<USED_NODES> * NOTE: xfree() err_msg if err_code is zero * RET 0 on success, -1 on failure */ extern int job_will_run(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr, *buf, *tmp_buf, *tmp_char; uint32_t jobid; time_t start_time; char *avail_nodes; /* Locks: write job, read node and partition info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "JOBWILLRUN lacks ARG"; error("wiki: JOBWILLRUN lacks ARG"); return -1; } arg_ptr += 4; if (strncmp(arg_ptr, "JOBID=", 6)) { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: JOBWILLRUN has invalid ARG value"); return -1; } arg_ptr += 6; jobid = strtoul(arg_ptr, &tmp_char, 10); if (tmp_char[0] == '@') start_time = strtoul(tmp_char+1, &tmp_char, 10); else start_time = time(NULL); if (tmp_char[0] != ',') { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: JOBWILLRUN has invalid ARG value"); return -1; } avail_nodes = tmp_char + 1; lock_slurmctld(job_write_lock); buf = _will_run_test(jobid, start_time, avail_nodes, err_code, err_msg); unlock_slurmctld(job_write_lock); if (!buf) return -1; tmp_buf = xmalloc(strlen(buf) + 32); sprintf(tmp_buf, "SC=0 ARG=%s", buf); xfree(buf); *err_code = 0; *err_msg = tmp_buf; return 0; }
/* * Returns true if the cluster is part of a federation. */ extern bool fed_mgr_is_active() { int rc = false; slurmctld_lock_t fed_read_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; lock_slurmctld(fed_read_lock); if (fed_mgr_fed_rec) rc = true; unlock_slurmctld(fed_read_lock); return rc; }
/* Fair Tree code called from the decay thread loop */ extern void fair_tree_decay(List jobs, time_t start) { slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; /* apply decayed usage */ lock_slurmctld(job_write_lock); list_for_each(jobs, (ListForF) _ft_decay_apply_new_usage, &start); unlock_slurmctld(job_write_lock); /* calculate fs factor for associations */ assoc_mgr_lock(&locks); _apply_priority_fs(); assoc_mgr_unlock(&locks); /* assign job priorities */ lock_slurmctld(job_write_lock); list_for_each(jobs, (ListForF) decay_apply_weighted_factors, &start); unlock_slurmctld(job_write_lock); }
/* RET 0 on success, -1 on failure */ extern int job_requeue_wiki(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr, *tmp_char; uint32_t jobid; struct job_record *job_ptr; static char reply_msg[128]; int slurm_rc; /* Write lock on job and node info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "REQUEUEJOB lacks ARG"; error("wiki: REQUEUEJOB lacks ARG"); return -1; } jobid = strtoul(arg_ptr+4, &tmp_char, 10); if ((tmp_char[0] != '\0') && (!isspace(tmp_char[0]))) { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: REQUEUEJOB has invalid jobid"); return -1; } lock_slurmctld(job_write_lock); slurm_rc = job_requeue(0, jobid, NULL, false, 0); if (slurm_rc != SLURM_SUCCESS) { unlock_slurmctld(job_write_lock); *err_code = -700; *err_msg = slurm_strerror(slurm_rc); error("wiki: Failed to requeue job %u (%m)", jobid); return -1; } /* We need to clear the required node list here. * If the job was submitted with srun and a * required node list, it gets lost here. */ job_ptr = find_job_record(jobid); if (job_ptr && job_ptr->details) { xfree(job_ptr->details->req_nodes); FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap); } info("wiki: requeued job %u", jobid); unlock_slurmctld(job_write_lock); snprintf(reply_msg, sizeof(reply_msg), "job %u requeued successfully", jobid); *err_msg = reply_msg; return 0; }
/* * Must have FED unlocked prior to entering */ static void _fed_mgr_ptr_init(slurmdb_federation_rec_t *db_fed, slurmdb_cluster_rec_t *cluster) { ListIterator c_itr; slurmdb_cluster_rec_t *tmp_cluster, *db_cluster; slurmctld_lock_t fed_write_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; xassert(cluster); if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("Joining federation %s", db_fed->name); lock_slurmctld(fed_write_lock); if (fed_mgr_fed_rec) { /* we are already part of a federation, preserve existing * conenctions */ c_itr = list_iterator_create(db_fed->cluster_list); while ((db_cluster = list_next(c_itr))) { if (!xstrcmp(db_cluster->name, slurmctld_cluster_name)) { fed_mgr_cluster_rec = db_cluster; continue; } if (!(tmp_cluster = list_find_first(fed_mgr_fed_rec->cluster_list, slurmdb_find_cluster_in_list, db_cluster->name))) { /* don't worry about destroying the connection * here. It will happen below when we free * fed_mgr_fed_rec (automagically). */ continue; } slurm_mutex_lock(&tmp_cluster->lock); /* transfer over the connections we already have */ db_cluster->fed.send = tmp_cluster->fed.send; tmp_cluster->fed.send = NULL; db_cluster->fed.recv = tmp_cluster->fed.recv; tmp_cluster->fed.recv = NULL; slurm_mutex_unlock(&tmp_cluster->lock); } list_iterator_destroy(c_itr); slurmdb_destroy_federation_rec(fed_mgr_fed_rec); } else fed_mgr_cluster_rec = cluster; fed_mgr_fed_rec = db_fed; unlock_slurmctld(fed_write_lock); }
/* The timeslicer thread */ static void *_timeslicer_thread(void *arg) { /* Write locks on job and read lock on nodes */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK }; ListIterator part_iterator; struct gs_part *p_ptr; if (gs_debug_flags & DEBUG_FLAG_GANG) info("gang: starting timeslicer loop"); while (!thread_shutdown) { _slice_sleep(); if (thread_shutdown) break; lock_slurmctld(job_write_lock); pthread_mutex_lock(&data_mutex); list_sort(gs_part_list, _sort_partitions); /* scan each partition... */ if (gs_debug_flags & DEBUG_FLAG_GANG) info("gang: _timeslicer_thread: scanning partitions"); part_iterator = list_iterator_create(gs_part_list); if (part_iterator == NULL) fatal("memory allocation failure"); while ((p_ptr = (struct gs_part *) list_next(part_iterator))) { if (gs_debug_flags & DEBUG_FLAG_GANG) { info("gang: _timeslicer_thread: part %s: " "run %u total %u", p_ptr->part_name, p_ptr->jobs_active, p_ptr->num_jobs); } if (p_ptr->jobs_active < (p_ptr->num_jobs + p_ptr->num_shadows)) { _cycle_job_list(p_ptr); } } list_iterator_destroy(part_iterator); pthread_mutex_unlock(&data_mutex); /* Preempt jobs that were formerly only suspended */ _preempt_job_dequeue(); /* MUST BE OUTSIDE data_mutex lock */ unlock_slurmctld(job_write_lock); } timeslicer_thread_id = (pthread_t) 0; pthread_exit((void *) 0); return NULL; }
extern char *fed_mgr_find_sibling_name_by_ip(char *ip) { char *name = NULL; slurmdb_cluster_rec_t *sibling = NULL; slurmctld_lock_t fed_read_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; lock_slurmctld(fed_read_lock); if (fed_mgr_fed_rec && fed_mgr_fed_rec->cluster_list && (sibling = list_find_first(fed_mgr_fed_rec->cluster_list, _find_sibling_by_ip, ip))) name = xstrdup(sibling->name); unlock_slurmctld(fed_read_lock); return name; }
static void _persist_callback_fini(void *arg) { slurm_persist_conn_t *persist_conn = arg; slurmdb_cluster_rec_t *cluster; slurmctld_lock_t fed_write_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; /* If we are shutting down just return or you will get deadlock since * all these locks are already locked. */ if (!persist_conn || *persist_conn->shutdown) return; lock_slurmctld(fed_write_lock); /* shuting down */ if (!fed_mgr_fed_rec) { unlock_slurmctld(fed_write_lock); return; } if (!(cluster = list_find_first(fed_mgr_fed_rec->cluster_list, slurmdb_find_cluster_in_list, persist_conn->cluster_name))) { info("Couldn't find cluster %s?", persist_conn->cluster_name); unlock_slurmctld(fed_write_lock); return; } slurm_mutex_lock(&cluster->lock); /* This will get handled at the end of the thread, don't free it here */ cluster->fed.recv = NULL; // persist_conn = cluster->fed.recv; // slurm_persist_conn_close(persist_conn); persist_conn = cluster->fed.send; if (persist_conn) { if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("Closing send to sibling cluster %s", cluster->name); slurm_persist_conn_close(persist_conn); } slurm_mutex_unlock(&cluster->lock); unlock_slurmctld(fed_write_lock); }
/* Perform periodic background activities */ static void *_bb_agent(void *args) { /* Locks: write job */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; while (!bb_state.term_flag) { bb_sleep(&bb_state, AGENT_INTERVAL); if (bb_state.term_flag) break; lock_slurmctld(job_write_lock); pthread_mutex_lock(&bb_state.bb_mutex); _load_state(0); _timeout_bb_rec(); pthread_mutex_unlock(&bb_state.bb_mutex); unlock_slurmctld(job_write_lock); } return NULL; }
/* Notify a job via arbitrary message: * CMD=NOTIFYJOB ARG=<jobid> MSG=<string> * RET 0 on success, -1 on failure */ extern int job_notify_wiki(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr, *msg_ptr; int slurm_rc; uint32_t jobid; static char reply_msg[128]; /* Locks: read job */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "NOTIFYJOB lacks ARG="; error("wiki: NOTIFYJOB lacks ARG="); return -1; } arg_ptr += 4; jobid = atol(arg_ptr); msg_ptr = strstr(cmd_ptr, "MSG="); if (msg_ptr == NULL) { *err_code = -300; *err_msg = "NOTIFYJOB lacks MSG="; error("wiki: NOTIFYJOB lacks MSG="); return -1; } msg_ptr += 4; lock_slurmctld(job_read_lock); slurm_rc = _job_notify(jobid, msg_ptr); unlock_slurmctld(job_read_lock); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; *err_msg = slurm_strerror(slurm_rc); error("wiki: Failed to notify job %u (%m)", jobid); return -1; } snprintf(reply_msg, sizeof(reply_msg), "job %u notified successfully", jobid); *err_msg = reply_msg; return 0; }
static void _join_federation(slurmdb_federation_rec_t *fed, slurmdb_cluster_rec_t *cluster, bool update) { slurmctld_lock_t fed_read_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; _fed_mgr_ptr_init(fed, cluster); /* We must open the connections after we get out of the * write_lock or we will end up in deadlock. */ if (!update) { lock_slurmctld(fed_read_lock); _open_persist_sends(); unlock_slurmctld(fed_read_lock); } _create_ping_thread(); }
extern int fed_mgr_fini() { slurmctld_lock_t fed_write_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; slurm_mutex_lock(&init_mutex); inited = false; slurm_mutex_unlock(&init_mutex); lock_slurmctld(fed_write_lock); slurm_persist_conn_recv_server_fini(); _leave_federation(); unlock_slurmctld(fed_write_lock); return SLURM_SUCCESS; }
/* RET 0 on success, -1 on failure */ extern int suspend_job(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr, *tmp_char; int slurm_rc; suspend_msg_t msg; uint32_t jobid; static char reply_msg[128]; /* Locks: write job and node info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "SUSPENDJOB lacks ARG"; error("wiki: SUSPENDJOB lacks ARG"); return -1; } jobid = strtoul(arg_ptr+4, &tmp_char, 10); if ((tmp_char[0] != '\0') && (!isspace(tmp_char[0]))) { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: SUSPENDJOB has invalid jobid"); return -1; } msg.job_id = jobid; msg.op = SUSPEND_JOB; lock_slurmctld(job_write_lock); slurm_rc = job_suspend(&msg, 0, -1, false, (uint16_t)NO_VAL); unlock_slurmctld(job_write_lock); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; *err_msg = slurm_strerror(slurm_rc); error("wiki: Failed to suspend job %u (%m)", jobid); return -1; } snprintf(reply_msg, sizeof(reply_msg), "job %u suspended successfully", jobid); *err_msg = reply_msg; return 0; }
/* block_state_mutex must be unlocked before calling this. */ extern void bg_requeue_job(uint32_t job_id, bool wait_for_start) { int rc; slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; /* Wait for the slurmd to begin the batch script, slurm_fail_job() is a no-op if issued prior to the script initiation do clean up just incase the fail job isn't ran. */ if (wait_for_start) sleep(2); lock_slurmctld(job_write_lock); if ((rc = job_requeue(0, job_id, -1, (uint16_t)NO_VAL, false))) { error("Couldn't requeue job %u, failing it: %s", job_id, slurm_strerror(rc)); job_fail(job_id); } unlock_slurmctld(job_write_lock); }
static void _notify_slurmctld_jobs(agent_info_t *agent_ptr) { /* Locks: Write job */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; uint32_t job_id = 0, step_id = 0; thd_t *thread_ptr = agent_ptr->thread_struct; if (agent_ptr->msg_type == SRUN_PING) { srun_ping_msg_t *msg = *agent_ptr->msg_args_pptr; job_id = msg->job_id; step_id = msg->step_id; } else if (agent_ptr->msg_type == SRUN_TIMEOUT) { srun_timeout_msg_t *msg = *agent_ptr->msg_args_pptr; job_id = msg->job_id; step_id = msg->step_id; } else if (agent_ptr->msg_type == RESPONSE_RESOURCE_ALLOCATION) { resource_allocation_response_msg_t *msg = *agent_ptr->msg_args_pptr; job_id = msg->job_id; step_id = NO_VAL; } else if ((agent_ptr->msg_type == SRUN_JOB_COMPLETE) || (agent_ptr->msg_type == SRUN_STEP_MISSING) || (agent_ptr->msg_type == SRUN_STEP_SIGNAL) || (agent_ptr->msg_type == SRUN_EXEC) || (agent_ptr->msg_type == SRUN_USER_MSG)) { return; /* no need to note srun response */ } else if (agent_ptr->msg_type == SRUN_NODE_FAIL) { return; /* no need to note srun response */ } else { error("_notify_slurmctld_jobs invalid msg_type %u", agent_ptr->msg_type); return; } lock_slurmctld(job_write_lock); if (thread_ptr[0].state == DSH_DONE) { srun_response(job_id, step_id); } unlock_slurmctld(job_write_lock); }
/* backfill_agent - detached thread periodically attempts to backfill jobs */ extern void *backfill_agent(void *args) { struct timeval tv1, tv2; char tv_str[20]; time_t now; double wait_time; static time_t last_backfill_time = 0; /* Read config and partitions; Write jobs and nodes */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; _load_config(); last_backfill_time = time(NULL); while (!stop_backfill) { _my_sleep(backfill_interval); if (stop_backfill) break; if (config_flag) { config_flag = false; _load_config(); } now = time(NULL); wait_time = difftime(now, last_backfill_time); if ((wait_time < backfill_interval) || _job_is_completing() || _many_pending_rpcs() || !avail_front_end() || !_more_work(last_backfill_time)) continue; gettimeofday(&tv1, NULL); lock_slurmctld(all_locks); while (_attempt_backfill()) ; last_backfill_time = time(NULL); unlock_slurmctld(all_locks); gettimeofday(&tv2, NULL); _diff_tv_str(&tv1, &tv2, tv_str, 20); if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill: completed, %s", tv_str); } return NULL; }
/*****************************************************************************\ * spawn message hander thread \*****************************************************************************/ extern int spawn_msg_thread(void) { pthread_attr_t thread_attr_msg; slurm_ctl_conf_t *conf; /* Locks: Read configurationn */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; lock_slurmctld(config_read_lock); conf = slurm_conf_lock(); sched_port = conf->dynalloc_port; slurm_conf_unlock(); unlock_slurmctld(config_read_lock); if (sched_port == 0) { error("DynAllocPort == 0, not spawning communication thread"); return SLURM_ERROR; } slurm_mutex_lock( &thread_flag_mutex ); if (thread_running) { error("dynalloc thread already running, not starting another"); slurm_mutex_unlock(&thread_flag_mutex); return SLURM_ERROR; } slurm_attr_init(&thread_attr_msg); if (pthread_create(&msg_thread_id, &thread_attr_msg, _msg_thread, NULL)) fatal("pthread_create %m"); else info("dynalloc: msg thread create successful!"); slurm_attr_destroy(&thread_attr_msg); thread_running = true; slurm_mutex_unlock(&thread_flag_mutex); return SLURM_SUCCESS; }