/* builtin_agent - detached thread periodically when pending jobs can start */ extern void *builtin_agent(void *args) { time_t now; double wait_time; static time_t last_sched_time = 0; /* Read config, nodes and partitions; Write jobs */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK, READ_LOCK }; _load_config(); last_sched_time = time(NULL); while (!stop_builtin) { _my_sleep(builtin_interval); if (stop_builtin) break; if (config_flag) { config_flag = false; _load_config(); } now = time(NULL); wait_time = difftime(now, last_sched_time); if ((wait_time < builtin_interval)) continue; lock_slurmctld(all_locks); _compute_start_times(); last_sched_time = time(NULL); (void) bb_g_job_try_stage_in(); unlock_slurmctld(all_locks); } return NULL; }
/* backfill_agent - detached thread periodically attempts to backfill jobs */ extern void *backfill_agent(void *args) { time_t now; double wait_time; static time_t last_backfill_time = 0; /* Read config and partitions; Write jobs and nodes */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; _load_config(); last_backfill_time = time(NULL); while (!stop_backfill) { _my_sleep(backfill_interval); if (stop_backfill) break; if (config_flag) { config_flag = false; _load_config(); } now = time(NULL); wait_time = difftime(now, last_backfill_time); if ((wait_time < backfill_interval) || _job_is_completing() || _many_pending_rpcs() || !avail_front_end(NULL) || !_more_work(last_backfill_time)) continue; lock_slurmctld(all_locks); while (_attempt_backfill()) ; last_backfill_time = time(NULL); unlock_slurmctld(all_locks); } return NULL; }
/* Checkpoint processing pthread * Never returns, but is cancelled on plugin termiantion */ static void *_ckpt_agent_thr(void *arg) { ListIterator iter; struct ckpt_timeout_info *rec; time_t now; while (1) { _my_sleep(1); if (ckpt_agent_stop) break; if (!ckpt_timeout_list) continue; now = time(NULL); iter = list_iterator_create(ckpt_timeout_list); slurm_mutex_lock(&ckpt_agent_mutex); /* look for and process any timeouts */ while ((rec = list_next(iter))) { if (rec->end_time > now) continue; info("checkpoint timeout for %u.%u", rec->job_id, rec->step_id); _ckpt_signal_step(rec); list_delete_item(iter); } slurm_mutex_unlock(&ckpt_agent_mutex); list_iterator_destroy(iter); } return NULL; }
extern void *_sicp_agent(void *args) { static time_t last_sicp_time = 0; time_t now; double wait_time; while (!sicp_stop) { _my_sleep(1); if (sicp_stop) break; now = time(NULL); wait_time = difftime(now, last_sicp_time); if (wait_time < sicp_interval) continue; last_sicp_time = now; _load_sicp_other_cluster(); pthread_mutex_lock(&sicp_lock); list_delete_all(sicp_job_list, &_list_find_sicp_old, ""); if (slurm_get_debug_flags() & DEBUG_FLAG_SICP) _log_sicp_recs(); pthread_mutex_unlock(&sicp_lock); _dump_sicp_state(); /* Has own locking */ } return NULL; }
/* backfill_agent - detached thread periodically attempts to backfill jobs */ extern void *backfill_agent(void *args) { struct timeval tv1, tv2; char tv_str[20]; time_t now; double wait_time; static time_t last_backfill_time = 0; /* Read config and partitions; Write jobs and nodes */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; _load_config(); last_backfill_time = time(NULL); while (!stop_backfill) { _my_sleep(backfill_interval); if (stop_backfill) break; if (config_flag) { config_flag = false; _load_config(); } now = time(NULL); wait_time = difftime(now, last_backfill_time); if ((wait_time < backfill_interval) || _job_is_completing() || _many_pending_rpcs() || !avail_front_end() || !_more_work(last_backfill_time)) continue; gettimeofday(&tv1, NULL); lock_slurmctld(all_locks); while (_attempt_backfill()) ; last_backfill_time = time(NULL); unlock_slurmctld(all_locks); gettimeofday(&tv2, NULL); _diff_tv_str(&tv1, &tv2, tv_str, 20); if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill: completed, %s", tv_str); } return NULL; }
/* backfill_agent - detached thread periodically attempts to backfill jobs */ extern void *backfill_agent(void *args) { time_t now; double wait_time; static time_t last_backfill_time = 0; /* Read config and partitions; Write jobs and nodes */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; #if HAVE_SYS_PRCTL_H if (prctl(PR_SET_NAME, "slurmctld_bckfl", NULL, NULL, NULL) < 0) { error("%s: cannot set my name to %s %m", __func__, "slurm_backfill"); } #endif _load_config(); last_backfill_time = time(NULL); while (!stop_backfill) { _my_sleep(backfill_interval * 1000000); if (stop_backfill) break; if (config_flag) { config_flag = false; _load_config(); } now = time(NULL); wait_time = difftime(now, last_backfill_time); if ((wait_time < backfill_interval) || _job_is_completing() || _many_pending_rpcs() || !avail_front_end(NULL) || !_more_work(last_backfill_time)) continue; lock_slurmctld(all_locks); (void) _attempt_backfill(); last_backfill_time = time(NULL); unlock_slurmctld(all_locks); } return NULL; }
/* Return non-zero to break the backfill loop if change in job, node or * partition state or the backfill scheduler needs to be stopped. */ static int _yield_locks(void) { slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; time_t job_update, node_update, part_update; job_update = last_job_update; node_update = last_node_update; part_update = last_part_update; unlock_slurmctld(all_locks); _my_sleep(backfill_interval); lock_slurmctld(all_locks); if ((last_job_update == job_update) && (last_node_update == node_update) && (last_part_update == part_update) && (! stop_backfill) && (! config_flag)) return 0; else return 1; }