extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, uint16_t protocol_version) { acct_gather_energy_t *energy_ptr = acct_gather_energy_alloc(); *energy = energy_ptr; if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpack32(&energy_ptr->base_consumed_energy, buffer); safe_unpack32(&energy_ptr->base_watts, buffer); safe_unpack32(&energy_ptr->consumed_energy, buffer); safe_unpack32(&energy_ptr->current_watts, buffer); safe_unpack32(&energy_ptr->previous_consumed_energy, buffer); safe_unpack_time(&energy_ptr->poll_time, buffer); } else { safe_unpack32(&energy_ptr->base_consumed_energy, buffer); safe_unpack32(&energy_ptr->base_watts, buffer); safe_unpack32(&energy_ptr->consumed_energy, buffer); safe_unpack32(&energy_ptr->current_watts, buffer); } return SLURM_SUCCESS; unpack_error: acct_gather_energy_destroy(energy_ptr); *energy = NULL; return SLURM_ERROR; }
/* * create_node_record - create a node record and set its values to defaults * IN config_ptr - pointer to node's configuration information * IN node_name - name of the node * RET pointer to the record or NULL if error * NOTE: allocates memory at node_record_table_ptr that must be xfreed when * the global node table is no longer required */ extern struct node_record *create_node_record ( struct config_record *config_ptr, char *node_name) { struct node_record *node_ptr; int old_buffer_size, new_buffer_size; last_node_update = time (NULL); xassert(config_ptr); xassert(node_name); /* round up the buffer size to reduce overhead of xrealloc */ old_buffer_size = (node_record_count) * sizeof (struct node_record); old_buffer_size = ((int) ((old_buffer_size / BUF_SIZE) + 1)) * BUF_SIZE; new_buffer_size = (node_record_count + 1) * sizeof (struct node_record); new_buffer_size = ((int) ((new_buffer_size / BUF_SIZE) + 1)) * BUF_SIZE; if (!node_record_table_ptr) { node_record_table_ptr = (struct node_record *) xmalloc (new_buffer_size); } else if (old_buffer_size != new_buffer_size) xrealloc (node_record_table_ptr, new_buffer_size); node_ptr = node_record_table_ptr + (node_record_count++); node_ptr->name = xstrdup(node_name); node_ptr->config_ptr = config_ptr; /* these values will be overwritten when the node actually registers */ node_ptr->cpus = config_ptr->cpus; node_ptr->cpu_load = NO_VAL; node_ptr->free_mem = NO_VAL64; node_ptr->cpu_spec_list = xstrdup(config_ptr->cpu_spec_list); node_ptr->boards = config_ptr->boards; node_ptr->sockets = config_ptr->sockets; node_ptr->cores = config_ptr->cores; node_ptr->core_spec_cnt = config_ptr->core_spec_cnt; node_ptr->threads = config_ptr->threads; node_ptr->mem_spec_limit = config_ptr->mem_spec_limit; node_ptr->real_memory = config_ptr->real_memory; node_ptr->node_spec_bitmap = NULL; node_ptr->tmp_disk = config_ptr->tmp_disk; node_ptr->select_nodeinfo = select_g_select_nodeinfo_alloc(); node_ptr->energy = acct_gather_energy_alloc(1); node_ptr->ext_sensors = ext_sensors_alloc(); node_ptr->owner = NO_VAL; node_ptr->mcs_label = NULL; node_ptr->protocol_version = SLURM_MIN_PROTOCOL_VERSION; xassert (node_ptr->magic = NODE_MAGIC) /* set value */; return node_ptr; }
extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, uint16_t protocol_version, bool need_alloc) { uint32_t uint32_tmp; acct_gather_energy_t *energy_ptr; if (need_alloc) { energy_ptr = acct_gather_energy_alloc(1); *energy = energy_ptr; } else { energy_ptr = *energy; } if (protocol_version >= SLURM_15_08_PROTOCOL_VERSION) { safe_unpack64(&energy_ptr->base_consumed_energy, buffer); safe_unpack32(&energy_ptr->base_watts, buffer); safe_unpack64(&energy_ptr->consumed_energy, buffer); safe_unpack32(&energy_ptr->current_watts, buffer); safe_unpack64(&energy_ptr->previous_consumed_energy, buffer); safe_unpack_time(&energy_ptr->poll_time, buffer); } else if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack32(&uint32_tmp, buffer); energy_ptr->base_consumed_energy = (uint64_t) uint32_tmp; safe_unpack32(&energy_ptr->base_watts, buffer); safe_unpack32(&uint32_tmp, buffer); energy_ptr->consumed_energy = (uint64_t) uint32_tmp; safe_unpack32(&energy_ptr->current_watts, buffer); safe_unpack32(&uint32_tmp, buffer); energy_ptr->previous_consumed_energy = (uint64_t) uint32_tmp; safe_unpack_time(&energy_ptr->poll_time, buffer); } return SLURM_SUCCESS; unpack_error: if (need_alloc) { acct_gather_energy_destroy(energy_ptr); *energy = NULL; } else memset(energy_ptr, 0, sizeof(acct_gather_energy_t)); return SLURM_ERROR; }
extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) { static bool flag_init = 0; if (!_run_in_daemon()) return; if (!flag_init) { flag_init = 1; local_energy = acct_gather_energy_alloc(1); if (!_get_latest_stats(GET_ENERGY)) local_energy->current_watts = NO_VAL; else _get_joules_task(local_energy); } debug("%s loaded", plugin_name); return; }
/* * create_node_record - create a node record and set its values to defaults * IN config_ptr - pointer to node's configuration information * IN node_name - name of the node * RET pointer to the record or NULL if error * NOTE: allocates memory at node_record_table_ptr that must be xfreed when * the global node table is no longer required */ extern struct node_record *create_node_record ( struct config_record *config_ptr, char *node_name) { struct node_record *node_ptr; int old_buffer_size, new_buffer_size; last_node_update = time (NULL); xassert(config_ptr); xassert(node_name); /* round up the buffer size to reduce overhead of xrealloc */ old_buffer_size = (node_record_count) * sizeof (struct node_record); old_buffer_size = ((int) ((old_buffer_size / BUF_SIZE) + 1)) * BUF_SIZE; new_buffer_size = (node_record_count + 1) * sizeof (struct node_record); new_buffer_size = ((int) ((new_buffer_size / BUF_SIZE) + 1)) * BUF_SIZE; if (!node_record_table_ptr) { node_record_table_ptr = (struct node_record *) xmalloc (new_buffer_size); } else if (old_buffer_size != new_buffer_size) xrealloc (node_record_table_ptr, new_buffer_size); node_ptr = node_record_table_ptr + (node_record_count++); node_ptr->name = xstrdup(node_name); node_ptr->config_ptr = config_ptr; /* these values will be overwritten when the node actually registers */ node_ptr->cpus = config_ptr->cpus; node_ptr->cpu_load = NO_VAL; node_ptr->boards = config_ptr->boards; node_ptr->sockets = config_ptr->sockets; node_ptr->cores = config_ptr->cores; node_ptr->threads = config_ptr->threads; node_ptr->real_memory = config_ptr->real_memory; node_ptr->tmp_disk = config_ptr->tmp_disk; node_ptr->select_nodeinfo = select_g_select_nodeinfo_alloc(); node_ptr->energy = acct_gather_energy_alloc(); xassert (node_ptr->magic = NODE_MAGIC) /* set value */; return node_ptr; }
static void _fill_registration_msg(slurm_node_registration_status_msg_t *msg) { List steps; ListIterator i; step_loc_t *stepd; int n; char *arch, *os; struct utsname buf; static bool first_msg = true; static time_t slurmd_start_time = 0; Buf gres_info; msg->node_name = xstrdup (conf->node_name); msg->cpus = conf->cpus; msg->boards = conf->boards; msg->sockets = conf->sockets; msg->cores = conf->cores; msg->threads = conf->threads; msg->real_memory = conf->real_memory_size; msg->tmp_disk = conf->tmp_disk_space; msg->hash_val = slurm_get_hash_val(); get_cpu_load(&msg->cpu_load); gres_info = init_buf(1024); if (gres_plugin_node_config_pack(gres_info) != SLURM_SUCCESS) error("error packing gres configuration"); else msg->gres_info = gres_info; get_up_time(&conf->up_time); msg->up_time = conf->up_time; if (slurmd_start_time == 0) slurmd_start_time = time(NULL); msg->slurmd_start_time = slurmd_start_time; if (first_msg) { first_msg = false; info("CPUs=%u Boards=%u Sockets=%u Cores=%u Threads=%u " "Memory=%u TmpDisk=%u Uptime=%u", msg->cpus, msg->boards, msg->sockets, msg->cores, msg->threads, msg->real_memory, msg->tmp_disk, msg->up_time); } else { debug3("CPUs=%u Boards=%u Sockets=%u Cores=%u Threads=%u " "Memory=%u TmpDisk=%u Uptime=%u", msg->cpus, msg->boards, msg->sockets, msg->cores, msg->threads, msg->real_memory, msg->tmp_disk, msg->up_time); } uname(&buf); if ((arch = getenv("SLURM_ARCH"))) msg->arch = xstrdup(arch); else msg->arch = xstrdup(buf.machine); if ((os = getenv("SLURM_OS"))) msg->os = xstrdup(os); else msg->os = xstrdup(buf.sysname); if (msg->startup) { if (switch_g_alloc_node_info(&msg->switch_nodeinfo)) error("switch_g_alloc_node_info: %m"); if (switch_g_build_node_info(msg->switch_nodeinfo)) error("switch_g_build_node_info: %m"); } steps = stepd_available(conf->spooldir, conf->node_name); msg->job_count = list_count(steps); msg->job_id = xmalloc(msg->job_count * sizeof(*msg->job_id)); /* Note: Running batch jobs will have step_id == NO_VAL */ msg->step_id = xmalloc(msg->job_count * sizeof(*msg->step_id)); i = list_iterator_create(steps); n = 0; while ((stepd = list_next(i))) { int fd; fd = stepd_connect(stepd->directory, stepd->nodename, stepd->jobid, stepd->stepid); if (fd == -1) { --(msg->job_count); continue; } if (stepd_state(fd) == SLURMSTEPD_NOT_RUNNING) { debug("stale domain socket for stepd %u.%u ", stepd->jobid, stepd->stepid); --(msg->job_count); close(fd); continue; } close(fd); if (stepd->stepid == NO_VAL) debug("found apparently running job %u", stepd->jobid); else debug("found apparently running step %u.%u", stepd->jobid, stepd->stepid); msg->job_id[n] = stepd->jobid; msg->step_id[n] = stepd->stepid; n++; } list_iterator_destroy(i); list_destroy(steps); if (!msg->energy) msg->energy = acct_gather_energy_alloc(); acct_gather_energy_g_get_data(ENERGY_DATA_STRUCT, msg->energy); msg->timestamp = time(NULL); return; }
extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) { char *tmp_char; /* Set initial values */ reset_slurm_ipmi_conf(&slurm_ipmi_conf); if (tbl) { /* ipmi initialisation parameters */ s_p_get_uint32(&slurm_ipmi_conf.driver_type, "EnergyIPMIDriverType", tbl); s_p_get_uint32(&slurm_ipmi_conf.disable_auto_probe, "EnergyIPMIDisableAutoProbe", tbl); s_p_get_uint32(&slurm_ipmi_conf.driver_address, "EnergyIPMIDriverAddress", tbl); s_p_get_uint32(&slurm_ipmi_conf.register_spacing, "EnergyIPMIRegisterSpacing", tbl); s_p_get_string(&slurm_ipmi_conf.driver_device, "EnergyIPMIDriverDevice", tbl); s_p_get_uint32(&slurm_ipmi_conf.protocol_version, "EnergyIPMIProtocolVersion", tbl); if (!s_p_get_string(&slurm_ipmi_conf.username, "EnergyIPMIUsername", tbl)) slurm_ipmi_conf.username = xstrdup(DEFAULT_IPMI_USER); s_p_get_string(&slurm_ipmi_conf.password, "EnergyIPMIPassword", tbl); if (!slurm_ipmi_conf.password) slurm_ipmi_conf.password = xstrdup("foopassword"); s_p_get_uint32(&slurm_ipmi_conf.privilege_level, "EnergyIPMIPrivilegeLevel", tbl); s_p_get_uint32(&slurm_ipmi_conf.authentication_type, "EnergyIPMIAuthenticationType", tbl); s_p_get_uint32(&slurm_ipmi_conf.cipher_suite_id, "EnergyIPMICipherSuiteId", tbl); s_p_get_uint32(&slurm_ipmi_conf.session_timeout, "EnergyIPMISessionTimeout", tbl); s_p_get_uint32(&slurm_ipmi_conf.retransmission_timeout, "EnergyIPMIRetransmissionTimeout", tbl); s_p_get_uint32(&slurm_ipmi_conf. workaround_flags, "EnergyIPMIWorkaroundFlags", tbl); if (!s_p_get_boolean(&slurm_ipmi_conf.reread_sdr_cache, "EnergyIPMIRereadSdrCache", tbl)) slurm_ipmi_conf.reread_sdr_cache = false; if (!s_p_get_boolean(&slurm_ipmi_conf. ignore_non_interpretable_sensors, "EnergyIPMIIgnoreNonInterpretableSensors", tbl)) slurm_ipmi_conf.ignore_non_interpretable_sensors = false; if (!s_p_get_boolean(&slurm_ipmi_conf.bridge_sensors, "EnergyIPMIBridgeSensors", tbl)) slurm_ipmi_conf.bridge_sensors = false; if (!s_p_get_boolean(&slurm_ipmi_conf.interpret_oem_data, "EnergyIPMIInterpretOemData", tbl)) slurm_ipmi_conf.interpret_oem_data = false; if (!s_p_get_boolean(&slurm_ipmi_conf.shared_sensors, "EnergyIPMISharedSensors", tbl)) slurm_ipmi_conf.shared_sensors = false; if (!s_p_get_boolean(&slurm_ipmi_conf.discrete_reading, "EnergyIPMIDiscreteReading", tbl)) slurm_ipmi_conf.discrete_reading = false; if (!s_p_get_boolean(&slurm_ipmi_conf.ignore_scanning_disabled, "EnergyIPMIIgnoreScanningDisabled", tbl)) slurm_ipmi_conf.ignore_scanning_disabled = false; if (!s_p_get_boolean(&slurm_ipmi_conf.assume_bmc_owner, "EnergyIPMIAssumeBmcOwner", tbl)) slurm_ipmi_conf.assume_bmc_owner = false; if (!s_p_get_boolean(&slurm_ipmi_conf.entity_sensor_names, "EnergyIPMIEntitySensorNames", tbl)) slurm_ipmi_conf.entity_sensor_names = false; s_p_get_uint32(&slurm_ipmi_conf.freq, "EnergyIPMIFrequency", tbl); if ((int)slurm_ipmi_conf.freq <= 0) fatal("EnergyIPMIFrequency must be a positive integer " "in acct_gather.conf."); if (!s_p_get_boolean(&(slurm_ipmi_conf.adjustment), "EnergyIPMICalcAdjustment", tbl)) slurm_ipmi_conf.adjustment = false; s_p_get_uint32(&slurm_ipmi_conf.power_sensor_num, "EnergyIPMIPowerSensor", tbl); s_p_get_uint32(&slurm_ipmi_conf.timeout, "EnergyIPMITimeout", tbl); if (s_p_get_string(&tmp_char, "EnergyIPMIVariable", tbl)) { if (!strcmp(tmp_char, "Temp")) slurm_ipmi_conf.variable = IPMI_MONITORING_SENSOR_TYPE_TEMPERATURE; xfree(tmp_char); } } if (!_run_in_daemon()) return; if (!flag_init) { local_energy = acct_gather_energy_alloc(); local_energy->consumed_energy=0; local_energy->base_consumed_energy=0; local_energy->base_watts=0; flag_init = true; if (_is_thread_launcher()) { pthread_attr_t attr; slurm_attr_init(&attr); if (pthread_create(&thread_ipmi_id_launcher, &attr, &_thread_launcher, NULL)) { //if (pthread_create(... (void *)arg)) { debug("energy accounting failed to create " "_thread_launcher thread: %m"); } slurm_attr_destroy(&attr); if (debug_flags & DEBUG_FLAG_ENERGY) info("%s thread launched", plugin_name); } else _get_joules_task(0); } verbose("%s loaded", plugin_name); }