/* Initialize power_save module parameters. * Return 0 on valid configuration to run power saving, * otherwise log the problem and return -1 */ static int _init_power_config(void) { slurm_ctl_conf_t *conf = slurm_conf_lock(); last_config = slurmctld_conf.last_update; idle_time = conf->suspend_time - 1; suspend_rate = conf->suspend_rate; resume_timeout = conf->resume_timeout; resume_rate = conf->resume_rate; slurmd_timeout = conf->slurmd_timeout; suspend_timeout = conf->suspend_timeout; _clear_power_config(); if (conf->suspend_program) suspend_prog = xstrdup(conf->suspend_program); if (conf->resume_program) resume_prog = xstrdup(conf->resume_program); if (conf->suspend_exc_nodes) exc_nodes = xstrdup(conf->suspend_exc_nodes); if (conf->suspend_exc_parts) exc_parts = xstrdup(conf->suspend_exc_parts); slurm_conf_unlock(); if (idle_time < 0) { /* not an error */ debug("power_save module disabled, SuspendTime < 0"); return -1; } if (suspend_rate < 0) { error("power_save module disabled, SuspendRate < 0"); return -1; } if (resume_rate < 0) { error("power_save module disabled, ResumeRate < 0"); return -1; } if (suspend_prog == NULL) { error("power_save module disabled, NULL SuspendProgram"); return -1; } else if (!_valid_prog(suspend_prog)) { error("power_save module disabled, invalid SuspendProgram %s", suspend_prog); return -1; } if (resume_prog == NULL) { error("power_save module disabled, NULL ResumeProgram"); return -1; } else if (!_valid_prog(resume_prog)) { error("power_save module disabled, invalid ResumeProgram %s", resume_prog); return -1; } return 0; }
/* * init_power_save - Initialize the power save module. Started as a * pthread. Terminates automatically at slurmctld shutdown time. * Input and output are unused. */ static void *_init_power_save(void *arg) { /* Locks: Read nodes */ slurmctld_lock_t node_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; /* Locks: Write nodes */ slurmctld_lock_t node_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; time_t now, boot_time = 0, last_power_scan = 0; if (power_save_config && !power_save_enabled) { debug("power_save mode not enabled"); return NULL; } suspend_node_bitmap = bit_alloc(node_record_count); resume_node_bitmap = bit_alloc(node_record_count); while (slurmctld_config.shutdown_time == 0) { sleep(1); if (_reap_procs() < 2) { debug("power_save programs getting backlogged"); continue; } if ((last_config != slurmctld_conf.last_update) && (_init_power_config())) { info("power_save mode has been disabled due to " "configuration changes"); goto fini; } now = time(NULL); if (boot_time == 0) boot_time = now; /* Only run every 60 seconds or after a node state change, * whichever happens first */ if ((last_node_update >= last_power_scan) || (now >= (last_power_scan + 60))) { lock_slurmctld(node_write_lock); _do_power_work(now); unlock_slurmctld(node_write_lock); last_power_scan = now; } if (slurmd_timeout && (now > (boot_time + (slurmd_timeout / 2)))) { lock_slurmctld(node_read_lock); _re_wake(); unlock_slurmctld(node_read_lock); /* prevent additional executions */ boot_time += (365 * 24 * 60 * 60); slurmd_timeout = 0; } } fini: _clear_power_config(); FREE_NULL_BITMAP(suspend_node_bitmap); FREE_NULL_BITMAP(resume_node_bitmap); _shutdown_power(); slurm_mutex_lock(&power_mutex); power_save_enabled = false; pthread_cond_signal(&power_cond); slurm_mutex_unlock(&power_mutex); pthread_exit(NULL); return NULL; }
/* Initialize power_save module parameters. * Return 0 on valid configuration to run power saving, * otherwise log the problem and return -1 */ static int _init_power_config(void) { slurm_ctl_conf_t *conf = slurm_conf_lock(); last_config = slurmctld_conf.last_update; idle_time = conf->suspend_time - 1; suspend_rate = conf->suspend_rate; resume_timeout = conf->resume_timeout; resume_rate = conf->resume_rate; slurmd_timeout = conf->slurmd_timeout; suspend_timeout = conf->suspend_timeout; _clear_power_config(); if (conf->suspend_program) suspend_prog = xstrdup(conf->suspend_program); if (conf->resume_program) resume_prog = xstrdup(conf->resume_program); if (conf->suspend_exc_nodes) exc_nodes = xstrdup(conf->suspend_exc_nodes); if (conf->suspend_exc_parts) exc_parts = xstrdup(conf->suspend_exc_parts); slurm_conf_unlock(); if (idle_time < 0) { /* not an error */ debug("power_save module disabled, SuspendTime < 0"); return -1; } if (suspend_rate < 0) { error("power_save module disabled, SuspendRate < 0"); return -1; } if (resume_rate < 0) { error("power_save module disabled, ResumeRate < 0"); return -1; } if (suspend_prog == NULL) { error("power_save module disabled, NULL SuspendProgram"); return -1; } else if (!_valid_prog(suspend_prog)) { error("power_save module disabled, invalid SuspendProgram %s", suspend_prog); return -1; } if (resume_prog == NULL) { error("power_save module disabled, NULL ResumeProgram"); return -1; } else if (!_valid_prog(resume_prog)) { error("power_save module disabled, invalid ResumeProgram %s", resume_prog); return -1; } if (exc_nodes && (node_name2bitmap(exc_nodes, false, &exc_node_bitmap))) { error("power_save module disabled, " "invalid SuspendExcNodes %s", exc_nodes); return -1; } if (exc_parts) { char *tmp = NULL, *one_part = NULL, *part_list = NULL; struct part_record *part_ptr = NULL; int rc = 0; part_list = xstrdup(exc_parts); one_part = strtok_r(part_list, ",", &tmp); while (one_part != NULL) { part_ptr = find_part_record(one_part); if (!part_ptr) { error("power_save module disabled, " "invalid SuspendExcPart %s", one_part); rc = -1; break; } if (exc_node_bitmap) bit_or(exc_node_bitmap, part_ptr->node_bitmap); else exc_node_bitmap = bit_copy(part_ptr-> node_bitmap); one_part = strtok_r(NULL, ",", &tmp); } xfree(part_list); if (rc) return rc; } if (exc_node_bitmap) { char *tmp = bitmap2node_name(exc_node_bitmap); debug("power_save module, excluded nodes %s", tmp); xfree(tmp); } return 0; }