static void _set_freq(int type, char *freq, char *freq_def) { if ((acct_gather_profile_timer[type].freq = acct_gather_parse_freq(type, freq)) == -1) if ((acct_gather_profile_timer[type].freq = acct_gather_parse_freq(type, freq_def)) == -1) acct_gather_profile_timer[type].freq = 0; }
extern int acct_gather_check_acct_freq_task(uint64_t job_mem_lim, char *acctg_freq) { int task_freq; static uint32_t acct_freq_task = NO_VAL; if (acct_freq_task == NO_VAL) { char *acct_freq = slurm_get_jobacct_gather_freq(); int i = acct_gather_parse_freq(PROFILE_TASK, acct_freq); xfree(acct_freq); /* If the value is -1 lets set the freq to something really high so we don't check this again. */ if (i == -1) acct_freq_task = (uint16_t)NO_VAL; else acct_freq_task = i; } if (!job_mem_lim || !acct_freq_task) return 0; task_freq = acct_gather_parse_freq(PROFILE_TASK, acctg_freq); if (task_freq == -1) return 0; if (task_freq == 0) { error("Can't turn accounting frequency off. " "We need it to monitor memory usage."); slurm_seterrno(ESLURMD_INVALID_ACCT_FREQ); return 1; } else if (task_freq > acct_freq_task) { error("Can't set frequency to %d, it is higher than %u. " "We need it to be at least at this level to " "monitor memory usage.", task_freq, acct_freq_task); slurm_seterrno(ESLURMD_INVALID_ACCT_FREQ); return 1; } return 0; }
static int _check_acct_freq_task(uint32_t job_mem_lim, char *acctg_freq) { int task_freq; if (!job_mem_lim || !conf->acct_freq_task) return 0; task_freq = acct_gather_parse_freq(PROFILE_TASK, acctg_freq); if (task_freq == -1) return 0; if ((task_freq == 0) || (task_freq > conf->acct_freq_task)) { error("Can't set frequency to %d, it is higher than %u. " "We need it to be at least at this level to " "monitor memory usage.", task_freq, conf->acct_freq_task); slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ); return 1; } return 0; }
/* * Read the slurm configuration file (slurm.conf) and substitute some * values into the slurmd configuration in preference of the defaults. */ static void _read_config(void) { char *path_pubkey = NULL; slurm_ctl_conf_t *cf = NULL; uint16_t tmp16 = 0; #ifndef HAVE_FRONT_END bool cr_flag = false, gang_flag = false; #endif cf = slurm_conf_lock(); slurm_mutex_lock(&conf->config_mutex); if (conf->conffile == NULL) conf->conffile = xstrdup(cf->slurm_conf); conf->slurm_user_id = cf->slurm_user_id; conf->cr_type = cf->select_type_param; path_pubkey = xstrdup(cf->job_credential_public_certificate); if (!conf->logfile) conf->logfile = xstrdup(cf->slurmd_logfile); #ifndef HAVE_FRONT_END if (!strcmp(cf->select_type, "select/cons_res")) cr_flag = true; if (cf->preempt_mode & PREEMPT_MODE_GANG) gang_flag = true; #endif slurm_conf_unlock(); /* node_name may already be set from a command line parameter */ if (conf->node_name == NULL) conf->node_name = slurm_conf_get_nodename(conf->hostname); /* if we didn't match the form of the hostname already * stored in conf->hostname, check to see if we match any * valid aliases */ if (conf->node_name == NULL) conf->node_name = slurm_conf_get_aliased_nodename(); if (conf->node_name == NULL) conf->node_name = slurm_conf_get_nodename("localhost"); if (conf->node_name == NULL) fatal("Unable to determine this slurmd's NodeName"); _massage_pathname(&conf->logfile); /* set node_addr if relevant */ if ((conf->node_addr == NULL) && (conf->node_addr = slurm_conf_get_nodeaddr(conf->hostname)) && (strcmp(conf->node_addr, conf->hostname) == 0)) { xfree(conf->node_addr); /* Sets to NULL */ } conf->port = slurm_conf_get_port(conf->node_name); slurm_conf_get_cpus_bsct(conf->node_name, &conf->conf_cpus, &conf->conf_boards, &conf->conf_sockets, &conf->conf_cores, &conf->conf_threads); /* store hardware properties in slurmd_config */ xfree(conf->block_map); xfree(conf->block_map_inv); _update_logging(); _update_nice(); get_cpuinfo(&conf->actual_cpus, &conf->actual_boards, &conf->actual_sockets, &conf->actual_cores, &conf->actual_threads, &conf->block_map_size, &conf->block_map, &conf->block_map_inv); #ifdef HAVE_FRONT_END /* * When running with multiple frontends, the slurmd S:C:T values are not * relevant, hence ignored by both _register_front_ends (sets all to 1) * and validate_nodes_via_front_end (uses slurm.conf values). * Report actual hardware configuration, irrespective of FastSchedule. */ conf->cpus = conf->actual_cpus; conf->boards = conf->actual_boards; conf->sockets = conf->actual_sockets; conf->cores = conf->actual_cores; conf->threads = conf->actual_threads; #else /* If the actual resources on a node differ than what is in * the configuration file and we are using * cons_res or gang scheduling we have to use what is in the * configuration file because the slurmctld creates bitmaps * for scheduling before these nodes check in. */ if (((cf->fast_schedule == 0) && !cr_flag && !gang_flag) || ((cf->fast_schedule == 1) && (conf->actual_cpus < conf->conf_cpus))) { conf->cpus = conf->actual_cpus; conf->boards = conf->actual_boards; conf->sockets = conf->actual_sockets; conf->cores = conf->actual_cores; conf->threads = conf->actual_threads; } else { conf->cpus = conf->conf_cpus; conf->boards = conf->conf_boards; conf->sockets = conf->conf_sockets; conf->cores = conf->conf_cores; conf->threads = conf->conf_threads; } if ((conf->cpus != conf->actual_cpus) || (conf->sockets != conf->actual_sockets) || (conf->cores != conf->actual_cores) || (conf->threads != conf->actual_threads)) { if (cf->fast_schedule) { info("Node configuration differs from hardware: " "CPUs=%u:%u(hw) Boards=%u:%u(hw) " "SocketsPerBoard=%u:%u(hw) CoresPerSocket=%u:%u(hw) " "ThreadsPerCore=%u:%u(hw)", conf->cpus, conf->actual_cpus, conf->boards, conf->actual_boards, conf->sockets, conf->actual_sockets, conf->cores, conf->actual_cores, conf->threads, conf->actual_threads); } else if ((cf->fast_schedule == 0) && (cr_flag || gang_flag)) { error("You are using cons_res or gang scheduling with " "Fastschedule=0 and node configuration differs " "from hardware. The node configuration used " "will be what is in the slurm.conf because of " "the bitmaps the slurmctld must create before " "the slurmd registers.\n" " CPUs=%u:%u(hw) Boards=%u:%u(hw) " "SocketsPerBoard=%u:%u(hw) CoresPerSocket=%u:%u(hw) " "ThreadsPerCore=%u:%u(hw)", conf->cpus, conf->actual_cpus, conf->boards, conf->actual_boards, conf->sockets, conf->actual_sockets, conf->cores, conf->actual_cores, conf->threads, conf->actual_threads); } } #endif get_memory(&conf->real_memory_size); get_up_time(&conf->up_time); cf = slurm_conf_lock(); get_tmp_disk(&conf->tmp_disk_space, cf->tmp_fs); _free_and_set(&conf->epilog, xstrdup(cf->epilog)); _free_and_set(&conf->prolog, xstrdup(cf->prolog)); _free_and_set(&conf->tmpfs, xstrdup(cf->tmp_fs)); _free_and_set(&conf->health_check_program, xstrdup(cf->health_check_program)); _free_and_set(&conf->spooldir, xstrdup(cf->slurmd_spooldir)); _massage_pathname(&conf->spooldir); _free_and_set(&conf->pidfile, xstrdup(cf->slurmd_pidfile)); _massage_pathname(&conf->pidfile); _free_and_set(&conf->select_type, xstrdup(cf->select_type)); _free_and_set(&conf->task_prolog, xstrdup(cf->task_prolog)); _free_and_set(&conf->task_epilog, xstrdup(cf->task_epilog)); _free_and_set(&conf->pubkey, path_pubkey); conf->debug_flags = cf->debug_flags; conf->propagate_prio = cf->propagate_prio_process; _free_and_set(&conf->job_acct_gather_freq, xstrdup(cf->job_acct_gather_freq)); conf->acct_freq_task = (uint16_t)NO_VAL; tmp16 = acct_gather_parse_freq(PROFILE_TASK, conf->job_acct_gather_freq); if (tmp16 != -1) conf->acct_freq_task = tmp16; _free_and_set(&conf->acct_gather_energy_type, xstrdup(cf->acct_gather_energy_type)); _free_and_set(&conf->acct_gather_filesystem_type, xstrdup(cf->acct_gather_filesystem_type)); _free_and_set(&conf->acct_gather_infiniband_type, xstrdup(cf->acct_gather_infiniband_type)); _free_and_set(&conf->acct_gather_profile_type, xstrdup(cf->acct_gather_profile_type)); _free_and_set(&conf->job_acct_gather_type, xstrdup(cf->job_acct_gather_type)); if ( (conf->node_name == NULL) || (conf->node_name[0] == '\0') ) fatal("Node name lookup failure"); if (cf->control_addr == NULL) fatal("Unable to establish controller machine"); if (cf->slurmctld_port == 0) fatal("Unable to establish controller port"); conf->slurmd_timeout = cf->slurmd_timeout; conf->use_pam = cf->use_pam; conf->task_plugin_param = cf->task_plugin_param; slurm_mutex_unlock(&conf->config_mutex); slurm_conf_unlock(); }
static slurmd_conf_t *read_slurmd_conf_lite(int fd) { int rc; int len; Buf buffer = NULL; slurmd_conf_t *confl, *local_conf = NULL; int tmp_int = 0; /* First check to see if we've already initialized the * global slurmd_conf_t in 'conf'. Allocate memory if not. */ if (conf) { confl = conf; } else { local_conf = xmalloc(sizeof(slurmd_conf_t)); confl = local_conf; } safe_read(fd, &len, sizeof(int)); buffer = init_buf(len); safe_read(fd, buffer->head, len); rc = unpack_slurmd_conf_lite_no_alloc(confl, buffer); if (rc == SLURM_ERROR) fatal("slurmstepd: problem with unpack of slurmd_conf"); free_buf(buffer); confl->log_opts.prefix_level = 1; confl->log_opts.stderr_level = confl->debug_level; confl->log_opts.logfile_level = confl->debug_level; confl->log_opts.syslog_level = confl->debug_level; /* * If daemonizing, turn off stderr logging -- also, if * logging to a file, turn off syslog. * * Otherwise, if remaining in foreground, turn off logging * to syslog (but keep logfile level) */ if (confl->daemonize) { confl->log_opts.stderr_level = LOG_LEVEL_QUIET; if (confl->logfile) confl->log_opts.syslog_level = LOG_LEVEL_QUIET; } else confl->log_opts.syslog_level = LOG_LEVEL_QUIET; confl->acct_freq_task = (uint16_t)NO_VAL; tmp_int = acct_gather_parse_freq(PROFILE_TASK, confl->job_acct_gather_freq); if (tmp_int != -1) confl->acct_freq_task = tmp_int; return (confl); rwfail: FREE_NULL_BUFFER(buffer); xfree(local_conf); return (NULL); }