static void _set_node_mixed(node_info_msg_t *resp) { node_info_t *node_ptr = NULL; int i; if (!resp) return; for (i = 0, node_ptr = resp->node_array; i < resp->record_count; i++, node_ptr++) { uint16_t used_cpus = 0; select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ALLOCATED, &used_cpus); if ((used_cpus != 0) && (used_cpus != node_ptr->cpus)) { node_ptr->node_state &= NODE_STATE_FLAGS; node_ptr->node_state |= NODE_STATE_MIXED; } } }
extern int clusteracct_storage_g_node_up(void *db_conn, struct node_record *node_ptr, time_t event_time) { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; /* on some systems we need to make sure we don't say something is completely up if there are cpus in an error state */ if(node_ptr->select_nodeinfo) { uint16_t err_cpus = 0; select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ERROR, &err_cpus); if(err_cpus) { char *reason = "Setting partial node down."; struct node_record send_node; struct config_record config_rec; uint16_t cpu_cnt = 0; select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT, &cpu_cnt); err_cpus *= cpu_cnt; memset(&send_node, 0, sizeof(struct node_record)); memset(&config_rec, 0, sizeof(struct config_record)); send_node.name = node_ptr->name; send_node.config_ptr = &config_rec; send_node.cpus = err_cpus; config_rec.cpus = err_cpus; send_node.node_state = NODE_STATE_ERROR; return (*(g_acct_storage_context->ops.node_down)) (db_conn, &send_node, event_time, reason, slurm_get_slurm_user_id()); } } return (*(g_acct_storage_context->ops.node_up)) (db_conn, node_ptr, event_time); }
static int _insert_node_ptr(List sinfo_list, uint16_t part_num, partition_info_t *part_ptr, node_info_t *node_ptr, uint32_t node_scaling) { int rc = SLURM_SUCCESS; sinfo_data_t *sinfo_ptr = NULL; ListIterator itr = NULL; if (params.cluster_flags & CLUSTER_FLAG_BG) { uint16_t error_cpus = 0; select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ERROR, &error_cpus); if (error_cpus && !node_ptr->reason) node_ptr->reason = xstrdup("Block(s) in error state"); } itr = list_iterator_create(sinfo_list); while ((sinfo_ptr = list_next(itr))) { if (!_match_part_data(sinfo_ptr, part_ptr)) continue; if (sinfo_ptr->nodes_total && (!_match_node_data(sinfo_ptr, node_ptr))) continue; _update_sinfo(sinfo_ptr, node_ptr, node_scaling); break; } list_iterator_destroy(itr); /* if no match, create new sinfo_data entry */ if (!sinfo_ptr) { list_append(sinfo_list, _create_sinfo(part_ptr, part_num, node_ptr, node_scaling)); } return rc; }
extern int slurm_get_select_nodeinfo(dynamic_plugin_data_t *nodeinfo, enum select_nodedata_type data_type, enum node_states state, void *data) { return select_g_select_nodeinfo_get(nodeinfo, data_type, state, data); }
/* Spawn health check function for every node that is not DOWN */ extern void run_health_check(void) { #ifdef HAVE_FRONT_END front_end_record_t *front_end_ptr; #else struct node_record *node_ptr; int node_test_cnt = 0, node_limit, node_states, run_cyclic; static int base_node_loc = -1; static time_t cycle_start_time = (time_t) 0; #endif int i; char *host_str = NULL; agent_arg_t *check_agent_args = NULL; /* Sync plugin internal data with * node select_nodeinfo. This is important * after reconfig otherwise select_nodeinfo * will not return the correct number of * allocated cpus. */ select_g_select_nodeinfo_set_all(); check_agent_args = xmalloc (sizeof (agent_arg_t)); check_agent_args->msg_type = REQUEST_HEALTH_CHECK; check_agent_args->retry = 0; check_agent_args->hostlist = hostlist_create(NULL); #ifdef HAVE_FRONT_END for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { if (IS_NODE_NO_RESPOND(front_end_ptr)) continue; hostlist_push_host(check_agent_args->hostlist, front_end_ptr->name); check_agent_args->node_count++; } #else run_cyclic = slurmctld_conf.health_check_node_state & HEALTH_CHECK_CYCLE; node_states = slurmctld_conf.health_check_node_state & (~HEALTH_CHECK_CYCLE); if (run_cyclic) { time_t now = time(NULL); if (cycle_start_time == (time_t) 0) cycle_start_time = now; else if (base_node_loc >= 0) ; /* mid-cycle */ else if (difftime(now, cycle_start_time) < slurmctld_conf.health_check_interval) { return; /* Wait to start next cycle */ } cycle_start_time = now; /* Determine how many nodes we want to test on each call of * run_health_check() to spread out the work. */ node_limit = (node_record_count * 2) / slurmctld_conf.health_check_interval; node_limit = MAX(node_limit, 10); } if ((node_states != HEALTH_CHECK_NODE_ANY) && (node_states != HEALTH_CHECK_NODE_IDLE)) { /* Update each node's alloc_cpus count */ select_g_select_nodeinfo_set_all(); } for (i = 0; i < node_record_count; i++) { if (run_cyclic) { if (node_test_cnt++ >= node_limit) break; base_node_loc++; if (base_node_loc >= node_record_count) { base_node_loc = -1; break; } node_ptr = node_record_table_ptr + base_node_loc; } else { node_ptr = node_record_table_ptr + i; } if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) || IS_NODE_POWER_SAVE(node_ptr)) continue; if (node_states != HEALTH_CHECK_NODE_ANY) { uint16_t cpus_total, cpus_used = 0; if (slurmctld_conf.fast_schedule) { cpus_total = node_ptr->config_ptr->cpus; } else { cpus_total = node_ptr->cpus; } if (!IS_NODE_IDLE(node_ptr)) { select_g_select_nodeinfo_get( node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ALLOCATED, &cpus_used); } /* Here the node state is inferred from * the cpus allocated on it. * - cpus_used == 0 * means node is idle * - cpus_used < cpus_total * means the node is in mixed state * else cpus_used == cpus_total * means the node is allocated */ if (cpus_used == 0) { if (!(node_states & HEALTH_CHECK_NODE_IDLE)) continue; if (!IS_NODE_IDLE(node_ptr)) continue; } else if (cpus_used < cpus_total) { if (!(node_states & HEALTH_CHECK_NODE_MIXED)) continue; } else { if (!(node_states & HEALTH_CHECK_NODE_ALLOC)) continue; } } hostlist_push_host(check_agent_args->hostlist, node_ptr->name); check_agent_args->node_count++; } if (run_cyclic && (i >= node_record_count)) base_node_loc = -1; #endif if (check_agent_args->node_count == 0) { hostlist_destroy(check_agent_args->hostlist); xfree (check_agent_args); } else { hostlist_uniq(check_agent_args->hostlist); host_str = hostlist_ranged_string_xmalloc( check_agent_args->hostlist); debug("Spawning health check agent for %s", host_str); xfree(host_str); ping_begin(); agent_queue_request(check_agent_args); } }
static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr, uint32_t node_scaling) { uint16_t base_state; uint16_t used_cpus = 0, error_cpus = 0; int total_cpus = 0, total_nodes = 0; /* since node_scaling could be less here, we need to use the * global node scaling which should never change. */ int single_node_cpus = (node_ptr->cpus / g_node_scaling); base_state = node_ptr->node_state & NODE_STATE_BASE; if (sinfo_ptr->nodes_total == 0) { /* first node added */ sinfo_ptr->node_state = node_ptr->node_state; sinfo_ptr->features = node_ptr->features; sinfo_ptr->gres = node_ptr->gres; sinfo_ptr->reason = node_ptr->reason; sinfo_ptr->reason_time= node_ptr->reason_time; sinfo_ptr->reason_uid = node_ptr->reason_uid; sinfo_ptr->min_cpus = node_ptr->cpus; sinfo_ptr->max_cpus = node_ptr->cpus; sinfo_ptr->min_sockets = node_ptr->sockets; sinfo_ptr->max_sockets = node_ptr->sockets; sinfo_ptr->min_cores = node_ptr->cores; sinfo_ptr->max_cores = node_ptr->cores; sinfo_ptr->min_threads = node_ptr->threads; sinfo_ptr->max_threads = node_ptr->threads; sinfo_ptr->min_disk = node_ptr->tmp_disk; sinfo_ptr->max_disk = node_ptr->tmp_disk; sinfo_ptr->min_mem = node_ptr->real_memory; sinfo_ptr->max_mem = node_ptr->real_memory; sinfo_ptr->min_weight = node_ptr->weight; sinfo_ptr->max_weight = node_ptr->weight; sinfo_ptr->min_cpu_load = node_ptr->cpu_load; sinfo_ptr->max_cpu_load = node_ptr->cpu_load; sinfo_ptr->max_cpus_per_node = sinfo_ptr->part_info-> max_cpus_per_node; sinfo_ptr->version = node_ptr->version; } else if (hostlist_find(sinfo_ptr->nodes, node_ptr->name) != -1) { /* we already have this node in this record, * just return, don't duplicate */ return; } else { if (sinfo_ptr->min_cpus > node_ptr->cpus) sinfo_ptr->min_cpus = node_ptr->cpus; if (sinfo_ptr->max_cpus < node_ptr->cpus) sinfo_ptr->max_cpus = node_ptr->cpus; if (sinfo_ptr->min_sockets > node_ptr->sockets) sinfo_ptr->min_sockets = node_ptr->sockets; if (sinfo_ptr->max_sockets < node_ptr->sockets) sinfo_ptr->max_sockets = node_ptr->sockets; if (sinfo_ptr->min_cores > node_ptr->cores) sinfo_ptr->min_cores = node_ptr->cores; if (sinfo_ptr->max_cores < node_ptr->cores) sinfo_ptr->max_cores = node_ptr->cores; if (sinfo_ptr->min_threads > node_ptr->threads) sinfo_ptr->min_threads = node_ptr->threads; if (sinfo_ptr->max_threads < node_ptr->threads) sinfo_ptr->max_threads = node_ptr->threads; if (sinfo_ptr->min_disk > node_ptr->tmp_disk) sinfo_ptr->min_disk = node_ptr->tmp_disk; if (sinfo_ptr->max_disk < node_ptr->tmp_disk) sinfo_ptr->max_disk = node_ptr->tmp_disk; if (sinfo_ptr->min_mem > node_ptr->real_memory) sinfo_ptr->min_mem = node_ptr->real_memory; if (sinfo_ptr->max_mem < node_ptr->real_memory) sinfo_ptr->max_mem = node_ptr->real_memory; if (sinfo_ptr->min_weight> node_ptr->weight) sinfo_ptr->min_weight = node_ptr->weight; if (sinfo_ptr->max_weight < node_ptr->weight) sinfo_ptr->max_weight = node_ptr->weight; if (sinfo_ptr->min_cpu_load > node_ptr->cpu_load) sinfo_ptr->min_cpu_load = node_ptr->cpu_load; if (sinfo_ptr->max_cpu_load < node_ptr->cpu_load) sinfo_ptr->max_cpu_load = node_ptr->cpu_load; } hostlist_push_host(sinfo_ptr->nodes, node_ptr->name); if (params.match_flags.node_addr_flag) hostlist_push_host(sinfo_ptr->node_addr, node_ptr->node_addr); if (params.match_flags.hostnames_flag) hostlist_push_host(sinfo_ptr->hostnames, node_ptr->node_hostname); total_cpus = node_ptr->cpus; total_nodes = node_scaling; select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ALLOCATED, &used_cpus); select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ERROR, &error_cpus); if (params.cluster_flags & CLUSTER_FLAG_BG) { if (!params.match_flags.state_flag && (used_cpus || error_cpus)) { /* We only get one shot at this (because all states * are combined together), so we need to make * sure we get all the subgrps accounted. (So use * g_node_scaling for safe measure) */ total_nodes = g_node_scaling; sinfo_ptr->nodes_alloc += used_cpus; sinfo_ptr->nodes_other += error_cpus; sinfo_ptr->nodes_idle += (total_nodes - (used_cpus + error_cpus)); used_cpus *= single_node_cpus; error_cpus *= single_node_cpus; } else { /* process only for this subgrp and then return */ total_cpus = total_nodes * single_node_cpus; if ((base_state == NODE_STATE_ALLOCATED) || (base_state == NODE_STATE_MIXED) || (node_ptr->node_state & NODE_STATE_COMPLETING)) { sinfo_ptr->nodes_alloc += total_nodes; sinfo_ptr->cpus_alloc += total_cpus; } else if (IS_NODE_DRAIN(node_ptr) || (base_state == NODE_STATE_DOWN)) { sinfo_ptr->nodes_other += total_nodes; sinfo_ptr->cpus_other += total_cpus; } else { sinfo_ptr->nodes_idle += total_nodes; sinfo_ptr->cpus_idle += total_cpus; } sinfo_ptr->nodes_total += total_nodes; sinfo_ptr->cpus_total += total_cpus; return; } } else { if ((base_state == NODE_STATE_ALLOCATED) || (base_state == NODE_STATE_MIXED) || IS_NODE_COMPLETING(node_ptr)) sinfo_ptr->nodes_alloc += total_nodes; else if (IS_NODE_DRAIN(node_ptr) || (base_state == NODE_STATE_DOWN)) sinfo_ptr->nodes_other += total_nodes; else sinfo_ptr->nodes_idle += total_nodes; } sinfo_ptr->nodes_total += total_nodes; sinfo_ptr->cpus_alloc += used_cpus; sinfo_ptr->cpus_total += total_cpus; total_cpus -= used_cpus + error_cpus; if (error_cpus) { sinfo_ptr->cpus_idle += total_cpus; sinfo_ptr->cpus_other += error_cpus; } else if (IS_NODE_DRAIN(node_ptr) || (base_state == NODE_STATE_DOWN)) { sinfo_ptr->cpus_other += total_cpus; } else sinfo_ptr->cpus_idle += total_cpus; }
/* * _build_sinfo_data - make a sinfo_data entry for each unique node * configuration and add it to the sinfo_list for later printing. * sinfo_list IN/OUT - list of unique sinfo_data records to report * partition_msg IN - partition info message * node_msg IN - node info message * RET zero or error code */ static int _build_sinfo_data(List sinfo_list, partition_info_msg_t *partition_msg, node_info_msg_t *node_msg) { pthread_attr_t attr_sinfo; pthread_t thread_sinfo; build_part_info_t *build_struct_ptr; node_info_t *node_ptr = NULL; partition_info_t *part_ptr = NULL; int j; g_node_scaling = node_msg->node_scaling; /* by default every partition is shown, even if no nodes */ if ((!params.node_flag) && params.match_flags.partition_flag) { part_ptr = partition_msg->partition_array; for (j=0; j<partition_msg->record_count; j++, part_ptr++) { if ((!params.partition) || (_strcmp(params.partition, part_ptr->name) == 0)) { list_append(sinfo_list, _create_sinfo( part_ptr, (uint16_t) j, NULL, node_msg->node_scaling)); } } } if (params.filtering) { for (j = 0; j < node_msg->record_count; j++) { node_ptr = &(node_msg->node_array[j]); if (node_ptr->name && _filter_out(node_ptr)) xfree(node_ptr->name); } } /* make sinfo_list entries for every node in every partition */ for (j=0; j<partition_msg->record_count; j++, part_ptr++) { part_ptr = &(partition_msg->partition_array[j]); if (params.filtering && params.partition && _strcmp(part_ptr->name, params.partition)) continue; if (node_msg->record_count == 1) { /* node_name_single */ int pos = -1; uint16_t subgrp_size = 0; hostlist_t hl; node_ptr = &(node_msg->node_array[0]); if ((node_ptr->name == NULL) || (part_ptr->nodes == NULL)) continue; hl = hostlist_create(part_ptr->nodes); pos = hostlist_find(hl, node_msg->node_array[0].name); hostlist_destroy(hl); if (pos < 0) continue; if (select_g_select_nodeinfo_get( node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBGRP_SIZE, 0, &subgrp_size) == SLURM_SUCCESS && subgrp_size) { _handle_subgrps(sinfo_list, (uint16_t) j, part_ptr, node_ptr, node_msg-> node_scaling); } else { _insert_node_ptr(sinfo_list, (uint16_t) j, part_ptr, node_ptr, node_msg-> node_scaling); } continue; } /* Process each partition using a separate thread */ build_struct_ptr = xmalloc(sizeof(build_part_info_t)); build_struct_ptr->node_msg = node_msg; build_struct_ptr->part_num = (uint16_t) j; build_struct_ptr->part_ptr = part_ptr; build_struct_ptr->sinfo_list = sinfo_list; slurm_mutex_lock(&sinfo_cnt_mutex); sinfo_cnt++; slurm_mutex_unlock(&sinfo_cnt_mutex); slurm_attr_init(&attr_sinfo); if (pthread_attr_setdetachstate (&attr_sinfo, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); while (pthread_create(&thread_sinfo, &attr_sinfo, _build_part_info, (void *) build_struct_ptr)) { error("pthread_create error %m"); usleep(10000); /* sleep and retry */ } slurm_attr_destroy(&attr_sinfo); } slurm_mutex_lock(&sinfo_cnt_mutex); while (sinfo_cnt) { pthread_cond_wait(&sinfo_cnt_cond, &sinfo_cnt_mutex); } slurm_mutex_unlock(&sinfo_cnt_mutex); _sort_hostlist(sinfo_list); return SLURM_SUCCESS; }
/* Build information about a partition using one pthread per partition */ void *_build_part_info(void *args) { build_part_info_t *build_struct_ptr; List sinfo_list; partition_info_t *part_ptr; node_info_msg_t *node_msg; node_info_t *node_ptr = NULL; uint16_t part_num; int j = 0; if (_serial_part_data()) slurm_mutex_lock(&sinfo_list_mutex); build_struct_ptr = (build_part_info_t *) args; sinfo_list = build_struct_ptr->sinfo_list; part_num = build_struct_ptr->part_num; part_ptr = build_struct_ptr->part_ptr; node_msg = build_struct_ptr->node_msg; while (part_ptr->node_inx[j] >= 0) { int i = 0; uint16_t subgrp_size = 0; for (i = part_ptr->node_inx[j]; i <= part_ptr->node_inx[j+1]; i++) { if (i >= node_msg->record_count) { /* If info for single node name is loaded */ break; } node_ptr = &(node_msg->node_array[i]); if (node_ptr->name == NULL) continue; if (select_g_select_nodeinfo_get( node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBGRP_SIZE, 0, &subgrp_size) == SLURM_SUCCESS && subgrp_size) { _handle_subgrps(sinfo_list, part_num, part_ptr, node_ptr, node_msg->node_scaling); } else { _insert_node_ptr(sinfo_list, part_num, part_ptr, node_ptr, node_msg->node_scaling); } } j += 2; } xfree(args); if (_serial_part_data()) slurm_mutex_unlock(&sinfo_list_mutex); slurm_mutex_lock(&sinfo_cnt_mutex); if (sinfo_cnt > 0) { sinfo_cnt--; } else { error("sinfo_cnt underflow"); sinfo_cnt = 0; } pthread_cond_broadcast(&sinfo_cnt_cond); slurm_mutex_unlock(&sinfo_cnt_mutex); return NULL; }
/* * _query_server - download the current server state * part_pptr IN/OUT - partition information message * node_pptr IN/OUT - node information message * block_pptr IN/OUT - BlueGene block data * reserv_pptr IN/OUT - reservation information message * clear_old IN - If set, then always replace old data, needed when going * between clusters. * RET zero or error code */ static int _query_server(partition_info_msg_t ** part_pptr, node_info_msg_t ** node_pptr, block_info_msg_t ** block_pptr, reserve_info_msg_t ** reserv_pptr, bool clear_old) { static partition_info_msg_t *old_part_ptr = NULL, *new_part_ptr; static node_info_msg_t *old_node_ptr = NULL, *new_node_ptr; static block_info_msg_t *old_bg_ptr = NULL, *new_bg_ptr; static reserve_info_msg_t *old_resv_ptr = NULL, *new_resv_ptr; int error_code; uint16_t show_flags = 0; int cc; node_info_t *node_ptr; if (params.all_flag) show_flags |= SHOW_ALL; if (old_part_ptr) { if (clear_old) old_part_ptr->last_update = 0; error_code = slurm_load_partitions(old_part_ptr->last_update, &new_part_ptr, show_flags); if (error_code == SLURM_SUCCESS) slurm_free_partition_info_msg(old_part_ptr); else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_part_ptr = old_part_ptr; } } else { error_code = slurm_load_partitions((time_t) NULL, &new_part_ptr, show_flags); } if (error_code) { slurm_perror("slurm_load_partitions"); return error_code; } old_part_ptr = new_part_ptr; *part_pptr = new_part_ptr; if (old_node_ptr) { if (clear_old) old_node_ptr->last_update = 0; if (params.node_name_single) { error_code = slurm_load_node_single(&new_node_ptr, params.nodes, show_flags); } else { error_code = slurm_load_node(old_node_ptr->last_update, &new_node_ptr, show_flags); } if (error_code == SLURM_SUCCESS) slurm_free_node_info_msg(old_node_ptr); else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_node_ptr = old_node_ptr; } } else if (params.node_name_single) { error_code = slurm_load_node_single(&new_node_ptr, params.nodes, show_flags); } else { error_code = slurm_load_node((time_t) NULL, &new_node_ptr, show_flags); } if (error_code) { slurm_perror("slurm_load_node"); return error_code; } old_node_ptr = new_node_ptr; *node_pptr = new_node_ptr; /* Set the node state as NODE_STATE_MIXED. */ for (cc = 0; cc < new_node_ptr->record_count; cc++) { node_ptr = &(new_node_ptr->node_array[cc]); if (IS_NODE_DRAIN(node_ptr)) { /* don't worry about mixed since the * whole node is being drained. */ } else { uint16_t alloc_cpus = 0, err_cpus = 0, idle_cpus; int single_node_cpus = (node_ptr->cpus / g_node_scaling); select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ALLOCATED, &alloc_cpus); if (params.cluster_flags & CLUSTER_FLAG_BG) { if (!alloc_cpus && (IS_NODE_ALLOCATED(node_ptr) || IS_NODE_COMPLETING(node_ptr))) alloc_cpus = node_ptr->cpus; else alloc_cpus *= single_node_cpus; } idle_cpus = node_ptr->cpus - alloc_cpus; select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ERROR, &err_cpus); if (params.cluster_flags & CLUSTER_FLAG_BG) err_cpus *= single_node_cpus; idle_cpus -= err_cpus; if ((alloc_cpus && err_cpus) || (idle_cpus && (idle_cpus != node_ptr->cpus))) { node_ptr->node_state &= NODE_STATE_FLAGS; node_ptr->node_state |= NODE_STATE_MIXED; } } } if (old_resv_ptr) { if (clear_old) old_resv_ptr->last_update = 0; error_code = slurm_load_reservations(old_resv_ptr->last_update, &new_resv_ptr); if (error_code == SLURM_SUCCESS) slurm_free_reservation_info_msg(old_resv_ptr); else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_resv_ptr = old_resv_ptr; } } else { error_code = slurm_load_reservations((time_t) NULL, &new_resv_ptr); } if (error_code) { slurm_perror("slurm_load_reservations"); return error_code; } old_resv_ptr = new_resv_ptr; *reserv_pptr = new_resv_ptr; if (!params.bg_flag) return SLURM_SUCCESS; if (params.cluster_flags & CLUSTER_FLAG_BG) { if (old_bg_ptr) { if (clear_old) old_bg_ptr->last_update = 0; error_code = slurm_load_block_info( old_bg_ptr->last_update, &new_bg_ptr, show_flags); if (error_code == SLURM_SUCCESS) slurm_free_block_info_msg(old_bg_ptr); else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_bg_ptr = old_bg_ptr; } } else { error_code = slurm_load_block_info((time_t) NULL, &new_bg_ptr, show_flags); } } if (error_code) { slurm_perror("slurm_load_block"); return error_code; } old_bg_ptr = new_bg_ptr; *block_pptr = new_bg_ptr; return SLURM_SUCCESS; }
static int _handle_subgrps(List sinfo_list, uint16_t part_num, partition_info_t *part_ptr, node_info_t *node_ptr, uint32_t node_scaling) { uint16_t size; int *node_state; int i=0, state_cnt = 2; ListIterator iterator = NULL; enum node_states state[] = { NODE_STATE_ALLOCATED, NODE_STATE_ERROR }; /* If we ever update the hostlist stuff to support this stuff * then we can use this to tack on the end of the node name * the subgrp stuff. On bluegene systems this would be nice * to see the ionodes in certain states. * When asking for nodes that are reserved, we need to return * all states of those nodes. */ if (params.state_list) iterator = list_iterator_create(params.state_list); for(i=0; i<state_cnt; i++) { if (iterator) { node_info_t tmp_node, *tmp_node_ptr = &tmp_node; while ((node_state = list_next(iterator))) { tmp_node_ptr->node_state = *node_state; if ((((state[i] == NODE_STATE_ALLOCATED) && IS_NODE_DRAINING(tmp_node_ptr)) || (*node_state == NODE_STATE_DRAIN)) || (*node_state == state[i]) || (*node_state == NODE_STATE_RES)) break; } list_iterator_reset(iterator); if (!node_state) continue; } if (select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, state[i], &size) == SLURM_SUCCESS && size) { node_scaling -= size; node_ptr->node_state &= NODE_STATE_FLAGS; node_ptr->node_state |= state[i]; _insert_node_ptr(sinfo_list, part_num, part_ptr, node_ptr, size); } } /* now handle the idle */ if (iterator) { while ((node_state = list_next(iterator))) { node_info_t tmp_node, *tmp_node_ptr = &tmp_node; tmp_node_ptr->node_state = *node_state; if (((*node_state == NODE_STATE_DRAIN) || IS_NODE_DRAINED(tmp_node_ptr)) || (*node_state == NODE_STATE_IDLE) || (*node_state == NODE_STATE_RES)) break; } list_iterator_destroy(iterator); if (!node_state) return SLURM_SUCCESS; } node_ptr->node_state &= NODE_STATE_FLAGS; node_ptr->node_state |= NODE_STATE_IDLE; if ((int)node_scaling > 0) _insert_node_ptr(sinfo_list, part_num, part_ptr, node_ptr, node_scaling); return SLURM_SUCCESS; }
/* * slurm_sprint_node_table - output information about a specific Slurm nodes * based upon message as loaded using slurm_load_node * IN node_ptr - an individual node information record pointer * IN node_scaling - number of nodes each node represents * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ char * slurm_sprint_node_table (node_info_t * node_ptr, int node_scaling, int one_liner ) { uint32_t my_state = node_ptr->node_state; char *cloud_str = "", *comp_str = "", *drain_str = "", *power_str = ""; char time_str[32]; char *out = NULL, *reason_str = NULL, *select_reason_str = NULL; uint16_t err_cpus = 0, alloc_cpus = 0; int cpus_per_node = 1; int idle_cpus; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); uint64_t alloc_memory; char *node_alloc_tres = NULL; char *line_end = (one_liner) ? " " : "\n "; if (node_scaling) cpus_per_node = node_ptr->cpus / node_scaling; if (my_state & NODE_STATE_CLOUD) { my_state &= (~NODE_STATE_CLOUD); cloud_str = "+CLOUD"; } if (my_state & NODE_STATE_COMPLETING) { my_state &= (~NODE_STATE_COMPLETING); comp_str = "+COMPLETING"; } if (my_state & NODE_STATE_DRAIN) { my_state &= (~NODE_STATE_DRAIN); drain_str = "+DRAIN"; } if (my_state & NODE_STATE_FAIL) { my_state &= (~NODE_STATE_FAIL); drain_str = "+FAIL"; } if (my_state & NODE_STATE_POWER_SAVE) { my_state &= (~NODE_STATE_POWER_SAVE); power_str = "+POWER"; } slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ALLOCATED, &alloc_cpus); if (cluster_flags & CLUSTER_FLAG_BG) { if (!alloc_cpus && (IS_NODE_ALLOCATED(node_ptr) || IS_NODE_COMPLETING(node_ptr))) alloc_cpus = node_ptr->cpus; else alloc_cpus *= cpus_per_node; } idle_cpus = node_ptr->cpus - alloc_cpus; slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ERROR, &err_cpus); if (cluster_flags & CLUSTER_FLAG_BG) err_cpus *= cpus_per_node; idle_cpus -= err_cpus; if ((alloc_cpus && err_cpus) || (idle_cpus && (idle_cpus != node_ptr->cpus))) { my_state &= NODE_STATE_FLAGS; my_state |= NODE_STATE_MIXED; } /****** Line 1 ******/ xstrfmtcat(out, "NodeName=%s ", node_ptr->name); if (cluster_flags & CLUSTER_FLAG_BG) { slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, SELECT_NODEDATA_RACK_MP, 0, &select_reason_str); if (select_reason_str) { xstrfmtcat(out, "RackMidplane=%s ", select_reason_str); xfree(select_reason_str); } } if (node_ptr->arch) xstrfmtcat(out, "Arch=%s ", node_ptr->arch); xstrfmtcat(out, "CoresPerSocket=%u", node_ptr->cores); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "CPUAlloc=%u CPUErr=%u CPUTot=%u ", alloc_cpus, err_cpus, node_ptr->cpus); if (node_ptr->cpu_load == NO_VAL) xstrcat(out, "CPULoad=N/A"); else xstrfmtcat(out, "CPULoad=%.2f", (node_ptr->cpu_load / 100.0)); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "AvailableFeatures=%s", node_ptr->features); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "ActiveFeatures=%s", node_ptr->features_act); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "Gres=%s", node_ptr->gres); xstrcat(out, line_end); /****** Line (optional) ******/ if (node_ptr->gres_drain) { xstrfmtcat(out, "GresDrain=%s", node_ptr->gres_drain); xstrcat(out, line_end); } /****** Line (optional) ******/ if (node_ptr->gres_used) { xstrfmtcat(out, "GresUsed=%s", node_ptr->gres_used); xstrcat(out, line_end); } /****** Line (optional) ******/ if (node_ptr->node_hostname || node_ptr->node_addr) { xstrfmtcat(out, "NodeAddr=%s NodeHostName=%s Version=%s", node_ptr->node_addr, node_ptr->node_hostname, node_ptr->version); xstrcat(out, line_end); } /****** Line ******/ if (node_ptr->os) xstrfmtcat(out, "OS=%s ", node_ptr->os); slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, SELECT_NODEDATA_MEM_ALLOC, NODE_STATE_ALLOCATED, &alloc_memory); xstrfmtcat(out, "RealMemory=%"PRIu64" AllocMem=%"PRIu64" ", node_ptr->real_memory, alloc_memory); if (node_ptr->free_mem == NO_VAL64) xstrcat(out, "FreeMem=N/A "); else xstrfmtcat(out, "FreeMem=%"PRIu64" ", node_ptr->free_mem); xstrfmtcat(out, "Sockets=%u Boards=%u", node_ptr->sockets, node_ptr->boards); xstrcat(out, line_end); /****** core & memory specialization Line (optional) ******/ if (node_ptr->core_spec_cnt || node_ptr->cpu_spec_list || node_ptr->mem_spec_limit) { if (node_ptr->core_spec_cnt) { xstrfmtcat(out, "CoreSpecCount=%u ", node_ptr->core_spec_cnt); } if (node_ptr->cpu_spec_list) { xstrfmtcat(out, "CPUSpecList=%s ", node_ptr->cpu_spec_list); } if (node_ptr->mem_spec_limit) { xstrfmtcat(out, "MemSpecLimit=%"PRIu64"", node_ptr->mem_spec_limit); } xstrcat(out, line_end); } /****** Line ******/ xstrfmtcat(out, "State=%s%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u ", node_state_string(my_state), cloud_str, comp_str, drain_str, power_str, node_ptr->threads, node_ptr->tmp_disk, node_ptr->weight); if (node_ptr->owner == NO_VAL) { xstrcat(out, "Owner=N/A "); } else { char *user_name = uid_to_string((uid_t) node_ptr->owner); xstrfmtcat(out, "Owner=%s(%u) ", user_name, node_ptr->owner); xfree(user_name); } xstrfmtcat(out, "MCS_label=%s", (node_ptr->mcs_label == NULL) ? "N/A" : node_ptr->mcs_label); xstrcat(out, line_end); /****** Line ******/ if (node_ptr->partitions) { xstrfmtcat(out, "Partitions=%s ", node_ptr->partitions); xstrcat(out, line_end); } /****** Line ******/ if (node_ptr->boot_time) { slurm_make_time_str((time_t *)&node_ptr->boot_time, time_str, sizeof(time_str)); xstrfmtcat(out, "BootTime=%s ", time_str); } else { xstrcat(out, "BootTime=None "); } if (node_ptr->slurmd_start_time) { slurm_make_time_str ((time_t *)&node_ptr->slurmd_start_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SlurmdStartTime=%s", time_str); } else { xstrcat(out, "SlurmdStartTime=None"); } xstrcat(out, line_end); /****** TRES Line ******/ select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_TRES_ALLOC_FMT_STR, NODE_STATE_ALLOCATED, &node_alloc_tres); xstrfmtcat(out, "CfgTRES=%s", node_ptr->tres_fmt_str); xstrcat(out, line_end); xstrfmtcat(out, "AllocTRES=%s", (node_alloc_tres) ? node_alloc_tres : ""); xfree(node_alloc_tres); xstrcat(out, line_end); /****** Power Management Line ******/ if (!node_ptr->power || (node_ptr->power->cap_watts == NO_VAL)) xstrcat(out, "CapWatts=n/a"); else xstrfmtcat(out, "CapWatts=%u", node_ptr->power->cap_watts); xstrcat(out, line_end); /****** Power Consumption Line ******/ if (!node_ptr->energy || node_ptr->energy->current_watts == NO_VAL) xstrcat(out, "CurrentWatts=n/s LowestJoules=n/s ConsumedJoules=n/s"); else xstrfmtcat(out, "CurrentWatts=%u " "LowestJoules=%"PRIu64" " "ConsumedJoules=%"PRIu64"", node_ptr->energy->current_watts, node_ptr->energy->base_consumed_energy, node_ptr->energy->consumed_energy); xstrcat(out, line_end); /****** external sensors Line ******/ if (!node_ptr->ext_sensors || node_ptr->ext_sensors->consumed_energy == NO_VAL) xstrcat(out, "ExtSensorsJoules=n/s "); else xstrfmtcat(out, "ExtSensorsJoules=%"PRIu64" ", node_ptr->ext_sensors->consumed_energy); if (!node_ptr->ext_sensors || node_ptr->ext_sensors->current_watts == NO_VAL) xstrcat(out, "ExtSensorsWatts=n/s "); else xstrfmtcat(out, "ExtSensorsWatts=%u ", node_ptr->ext_sensors->current_watts); if (!node_ptr->ext_sensors || node_ptr->ext_sensors->temperature == NO_VAL) xstrcat(out, "ExtSensorsTemp=n/s"); else xstrfmtcat(out, "ExtSensorsTemp=%u", node_ptr->ext_sensors->temperature); xstrcat(out, line_end); /****** Line ******/ if (node_ptr->reason && node_ptr->reason[0]) xstrcat(reason_str, node_ptr->reason); slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, SELECT_NODEDATA_EXTRA_INFO, 0, &select_reason_str); if (select_reason_str && select_reason_str[0]) { if (reason_str) xstrcat(reason_str, "\n"); xstrcat(reason_str, select_reason_str); } xfree(select_reason_str); if (reason_str) { int inx = 1; char *save_ptr = NULL, *tok, *user_name; tok = strtok_r(reason_str, "\n", &save_ptr); while (tok) { if (inx == 1) { xstrcat(out, "Reason="); } else { xstrcat(out, line_end); xstrcat(out, " "); } xstrfmtcat(out, "%s", tok); if ((inx++ == 1) && node_ptr->reason_time) { user_name = uid_to_string(node_ptr->reason_uid); slurm_make_time_str((time_t *)&node_ptr->reason_time, time_str, sizeof(time_str)); xstrfmtcat(out, " [%s@%s]", user_name, time_str); xfree(user_name); } tok = strtok_r(NULL, "\n", &save_ptr); } xfree(reason_str); } if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); return out; }
/* * _build_sinfo_data - make a sinfo_data entry for each unique node * configuration and add it to the sinfo_list for later printing. * sinfo_list IN/OUT - list of unique sinfo_data records to report * partition_msg IN - partition info message * node_msg IN - node info message * RET zero or error code */ static int _build_sinfo_data(List sinfo_list, partition_info_msg_t *partition_msg, node_info_msg_t *node_msg) { node_info_t *node_ptr = NULL; partition_info_t *part_ptr = NULL; int j, j2; g_node_scaling = node_msg->node_scaling; /* by default every partition is shown, even if no nodes */ if ((!params.node_flag) && params.match_flags.partition_flag) { part_ptr = partition_msg->partition_array; for (j=0; j<partition_msg->record_count; j++, part_ptr++) { if ((!params.partition) || (_strcmp(params.partition, part_ptr->name) == 0)) { list_append(sinfo_list, _create_sinfo( part_ptr, (uint16_t) j, NULL, node_msg->node_scaling)); } } } /* make sinfo_list entries for every node in every partition */ for (j=0; j<partition_msg->record_count; j++, part_ptr++) { part_ptr = &(partition_msg->partition_array[j]); if (params.filtering && params.partition && _strcmp(part_ptr->name, params.partition)) continue; j2 = 0; while (part_ptr->node_inx[j2] >= 0) { int i2 = 0; uint16_t subgrp_size = 0; for (i2 = part_ptr->node_inx[j2]; i2 <= part_ptr->node_inx[j2+1]; i2++) { if (i2 >= node_msg->record_count) { /* This can happen if info for single * node name is loaded */ break; } node_ptr = &(node_msg->node_array[i2]); if ((node_ptr->name == NULL) || (params.filtering && _filter_out(node_ptr))) continue; if (select_g_select_nodeinfo_get( node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBGRP_SIZE, 0, &subgrp_size) == SLURM_SUCCESS && subgrp_size) { _handle_subgrps(sinfo_list, (uint16_t) j, part_ptr, node_ptr, node_msg-> node_scaling); } else { _insert_node_ptr(sinfo_list, (uint16_t) j, part_ptr, node_ptr, node_msg-> node_scaling); } } j2 += 2; } } _sort_hostlist(sinfo_list); return SLURM_SUCCESS; }
/* * slurm_sprint_node_table - output information about a specific Slurm nodes * based upon message as loaded using slurm_load_node * IN node_ptr - an individual node information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ char *slurm_sprint_node_table(node_info_t *node_ptr, int one_liner) { uint32_t my_state = node_ptr->node_state; char *cloud_str = "", *comp_str = "", *drain_str = "", *power_str = ""; char time_str[32]; char *out = NULL, *reason_str = NULL; uint16_t alloc_cpus = 0; int idle_cpus; uint64_t alloc_memory; char *node_alloc_tres = NULL; char *line_end = (one_liner) ? " " : "\n "; slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr = NULL; if (slurm_load_ctl_conf((time_t) NULL, &slurm_ctl_conf_ptr) != SLURM_SUCCESS) fatal("Cannot load slurmctld conf file"); if (my_state & NODE_STATE_CLOUD) { my_state &= (~NODE_STATE_CLOUD); cloud_str = "+CLOUD"; } if (my_state & NODE_STATE_COMPLETING) { my_state &= (~NODE_STATE_COMPLETING); comp_str = "+COMPLETING"; } if (my_state & NODE_STATE_DRAIN) { my_state &= (~NODE_STATE_DRAIN); drain_str = "+DRAIN"; } if (my_state & NODE_STATE_FAIL) { my_state &= (~NODE_STATE_FAIL); drain_str = "+FAIL"; } if (my_state & NODE_STATE_POWER_SAVE) { my_state &= (~NODE_STATE_POWER_SAVE); power_str = "+POWER"; } if (my_state & NODE_STATE_POWERING_DOWN) { my_state &= (~NODE_STATE_POWERING_DOWN); power_str = "+POWERING_DOWN"; } slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ALLOCATED, &alloc_cpus); idle_cpus = node_ptr->cpus - alloc_cpus; if (idle_cpus && (idle_cpus != node_ptr->cpus)) { my_state &= NODE_STATE_FLAGS; my_state |= NODE_STATE_MIXED; } /****** Line 1 ******/ xstrfmtcat(out, "NodeName=%s ", node_ptr->name); if (node_ptr->arch) xstrfmtcat(out, "Arch=%s ", node_ptr->arch); if (node_ptr->cpu_bind) { char tmp_str[128]; slurm_sprint_cpu_bind_type(tmp_str, node_ptr->cpu_bind); xstrfmtcat(out, "CpuBind=%s ", tmp_str); } xstrfmtcat(out, "CoresPerSocket=%u ", node_ptr->cores); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "CPUAlloc=%u CPUTot=%u ", alloc_cpus, node_ptr->cpus); if (node_ptr->cpu_load == NO_VAL) xstrcat(out, "CPULoad=N/A"); else xstrfmtcat(out, "CPULoad=%.2f", (node_ptr->cpu_load / 100.0)); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "AvailableFeatures=%s", node_ptr->features); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "ActiveFeatures=%s", node_ptr->features_act); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "Gres=%s", node_ptr->gres); xstrcat(out, line_end); /****** Line (optional) ******/ if (node_ptr->gres_drain) { xstrfmtcat(out, "GresDrain=%s", node_ptr->gres_drain); xstrcat(out, line_end); } /****** Line (optional) ******/ if (node_ptr->gres_used) { xstrfmtcat(out, "GresUsed=%s", node_ptr->gres_used); xstrcat(out, line_end); } /****** Line (optional) ******/ { bool line_used = false; if (node_ptr->node_addr) { xstrfmtcat(out, "NodeAddr=%s ", node_ptr->node_addr); line_used = true; } if (node_ptr->node_hostname) { xstrfmtcat(out, "NodeHostName=%s ", node_ptr->node_hostname); line_used = true; } if (node_ptr->port != slurm_get_slurmd_port()) { xstrfmtcat(out, "Port=%u ", node_ptr->port); line_used = true; } if (node_ptr->version && xstrcmp(node_ptr->version, slurm_ctl_conf_ptr->version)) { xstrfmtcat(out, "Version=%s", node_ptr->version); line_used = true; } if (line_used) xstrcat(out, line_end); } /****** Line ******/ if (node_ptr->os) { xstrfmtcat(out, "OS=%s ", node_ptr->os); xstrcat(out, line_end); } /****** Line ******/ slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, SELECT_NODEDATA_MEM_ALLOC, NODE_STATE_ALLOCATED, &alloc_memory); xstrfmtcat(out, "RealMemory=%"PRIu64" AllocMem=%"PRIu64" ", node_ptr->real_memory, alloc_memory); if (node_ptr->free_mem == NO_VAL64) xstrcat(out, "FreeMem=N/A "); else xstrfmtcat(out, "FreeMem=%"PRIu64" ", node_ptr->free_mem); xstrfmtcat(out, "Sockets=%u Boards=%u", node_ptr->sockets, node_ptr->boards); xstrcat(out, line_end); /****** core & memory specialization Line (optional) ******/ if (node_ptr->core_spec_cnt || node_ptr->cpu_spec_list || node_ptr->mem_spec_limit) { if (node_ptr->core_spec_cnt) { xstrfmtcat(out, "CoreSpecCount=%u ", node_ptr->core_spec_cnt); } if (node_ptr->cpu_spec_list) { xstrfmtcat(out, "CPUSpecList=%s ", node_ptr->cpu_spec_list); } if (node_ptr->mem_spec_limit) { xstrfmtcat(out, "MemSpecLimit=%"PRIu64"", node_ptr->mem_spec_limit); } xstrcat(out, line_end); } /****** Line ******/ xstrfmtcat(out, "State=%s%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u ", node_state_string(my_state), cloud_str, comp_str, drain_str, power_str, node_ptr->threads, node_ptr->tmp_disk, node_ptr->weight); if (node_ptr->owner == NO_VAL) { xstrcat(out, "Owner=N/A "); } else { char *user_name = uid_to_string((uid_t) node_ptr->owner); xstrfmtcat(out, "Owner=%s(%u) ", user_name, node_ptr->owner); xfree(user_name); } xstrfmtcat(out, "MCS_label=%s", (node_ptr->mcs_label == NULL) ? "N/A" : node_ptr->mcs_label); xstrcat(out, line_end); /****** Line ******/ if ((node_ptr->next_state != NO_VAL) && (my_state & NODE_STATE_REBOOT)) { xstrfmtcat(out, "NextState=%s", node_state_string(node_ptr->next_state)); xstrcat(out, line_end); } /****** Line ******/ if (node_ptr->partitions) { xstrfmtcat(out, "Partitions=%s ", node_ptr->partitions); xstrcat(out, line_end); } /****** Line ******/ if (node_ptr->boot_time) { slurm_make_time_str((time_t *)&node_ptr->boot_time, time_str, sizeof(time_str)); xstrfmtcat(out, "BootTime=%s ", time_str); } else { xstrcat(out, "BootTime=None "); } if (node_ptr->slurmd_start_time) { slurm_make_time_str ((time_t *)&node_ptr->slurmd_start_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SlurmdStartTime=%s", time_str); } else { xstrcat(out, "SlurmdStartTime=None"); } xstrcat(out, line_end); /****** TRES Line ******/ select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_TRES_ALLOC_FMT_STR, NODE_STATE_ALLOCATED, &node_alloc_tres); xstrfmtcat(out, "CfgTRES=%s", node_ptr->tres_fmt_str); xstrcat(out, line_end); xstrfmtcat(out, "AllocTRES=%s", (node_alloc_tres) ? node_alloc_tres : ""); xfree(node_alloc_tres); xstrcat(out, line_end); /****** Power Management Line ******/ if (!node_ptr->power || (node_ptr->power->cap_watts == NO_VAL)) xstrcat(out, "CapWatts=n/a"); else xstrfmtcat(out, "CapWatts=%u", node_ptr->power->cap_watts); xstrcat(out, line_end); /****** Power Consumption Line ******/ if (!node_ptr->energy || node_ptr->energy->current_watts == NO_VAL) xstrcat(out, "CurrentWatts=n/s AveWatts=n/s"); else xstrfmtcat(out, "CurrentWatts=%u AveWatts=%u", node_ptr->energy->current_watts, node_ptr->energy->ave_watts); xstrcat(out, line_end); /****** external sensors Line ******/ if (!node_ptr->ext_sensors || node_ptr->ext_sensors->consumed_energy == NO_VAL64) xstrcat(out, "ExtSensorsJoules=n/s "); else xstrfmtcat(out, "ExtSensorsJoules=%"PRIu64" ", node_ptr->ext_sensors->consumed_energy); if (!node_ptr->ext_sensors || node_ptr->ext_sensors->current_watts == NO_VAL) xstrcat(out, "ExtSensorsWatts=n/s "); else xstrfmtcat(out, "ExtSensorsWatts=%u ", node_ptr->ext_sensors->current_watts); if (!node_ptr->ext_sensors || node_ptr->ext_sensors->temperature == NO_VAL) xstrcat(out, "ExtSensorsTemp=n/s"); else xstrfmtcat(out, "ExtSensorsTemp=%u", node_ptr->ext_sensors->temperature); xstrcat(out, line_end); /****** Line ******/ if (node_ptr->reason && node_ptr->reason[0]) xstrcat(reason_str, node_ptr->reason); if (reason_str) { int inx = 1; char *save_ptr = NULL, *tok, *user_name; tok = strtok_r(reason_str, "\n", &save_ptr); while (tok) { if (inx == 1) { xstrcat(out, "Reason="); } else { xstrcat(out, line_end); xstrcat(out, " "); } xstrfmtcat(out, "%s", tok); if ((inx++ == 1) && node_ptr->reason_time) { user_name = uid_to_string(node_ptr->reason_uid); slurm_make_time_str((time_t *)&node_ptr->reason_time, time_str, sizeof(time_str)); xstrfmtcat(out, " [%s@%s]", user_name, time_str); xfree(user_name); } tok = strtok_r(NULL, "\n", &save_ptr); } xfree(reason_str); } if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); slurm_free_ctl_conf(slurm_ctl_conf_ptr); return out; }
/* Return false if this node's data needs to be added to sinfo's table of * data to print. Return true if it is duplicate/redundant data. */ static bool _match_node_data(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr) { uint32_t tmp = 0; if (params.node_flag) return false; if (params.match_flags.hostnames_flag && (hostlist_find(sinfo_ptr->hostnames, node_ptr->node_hostname) == -1)) return false; if (params.match_flags.node_addr_flag && (hostlist_find(sinfo_ptr->node_addr, node_ptr->node_addr) == -1)) return false; if (sinfo_ptr->nodes && params.match_flags.features_flag && (xstrcmp(node_ptr->features, sinfo_ptr->features))) return false; if (sinfo_ptr->nodes && params.match_flags.features_act_flag && (xstrcmp(node_ptr->features_act, sinfo_ptr->features_act))) return false; if (sinfo_ptr->nodes && params.match_flags.gres_flag && (xstrcmp(node_ptr->gres, sinfo_ptr->gres))) return false; if (sinfo_ptr->nodes && params.match_flags.reason_flag && (xstrcmp(node_ptr->reason, sinfo_ptr->reason))) return false; if (sinfo_ptr->nodes && params.match_flags.reason_timestamp_flag && (node_ptr->reason_time != sinfo_ptr->reason_time)) return false; if (sinfo_ptr->nodes && params.match_flags.reason_user_flag && node_ptr->reason_uid != sinfo_ptr->reason_uid) { return false; } if (params.match_flags.state_flag) { char *state1, *state2; state1 = node_state_string(node_ptr->node_state); state2 = node_state_string(sinfo_ptr->node_state); if (xstrcmp(state1, state2)) return false; } select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_MEM_ALLOC, NODE_STATE_ALLOCATED, &tmp); if (params.match_flags.alloc_mem_flag && (tmp != sinfo_ptr->alloc_memory)) return false; /* If no need to exactly match sizes, just return here * otherwise check cpus, disk, memory and weigth individually */ if (!params.exact_match) return true; if (params.match_flags.cpus_flag && (node_ptr->cpus != sinfo_ptr->min_cpus)) return false; if (params.match_flags.sockets_flag && (node_ptr->sockets != sinfo_ptr->min_sockets)) return false; if (params.match_flags.cores_flag && (node_ptr->cores != sinfo_ptr->min_cores)) return false; if (params.match_flags.threads_flag && (node_ptr->threads != sinfo_ptr->min_threads)) return false; if (params.match_flags.sct_flag && ((node_ptr->sockets != sinfo_ptr->min_sockets) || (node_ptr->cores != sinfo_ptr->min_cores) || (node_ptr->threads != sinfo_ptr->min_threads))) return false; if (params.match_flags.disk_flag && (node_ptr->tmp_disk != sinfo_ptr->min_disk)) return false; if (params.match_flags.memory_flag && (node_ptr->real_memory != sinfo_ptr->min_mem)) return false; if (params.match_flags.weight_flag && (node_ptr->weight != sinfo_ptr->min_weight)) return false; if (params.match_flags.cpu_load_flag && (node_ptr->cpu_load != sinfo_ptr->min_cpu_load)) return false; if (params.match_flags.free_mem_flag && (node_ptr->free_mem != sinfo_ptr->min_free_mem)) return false; if (params.match_flags.version_flag && (node_ptr->version != sinfo_ptr->version)) return false; return true; }