static int _handle_subgrps(List sinfo_list, uint16_t part_num, partition_info_t *part_ptr, node_info_t *node_ptr, uint32_t node_scaling) { uint16_t size; int *node_state; int i=0, state_cnt = 2; ListIterator iterator = NULL; enum node_states state[] = { NODE_STATE_ALLOCATED, NODE_STATE_ERROR }; /* If we ever update the hostlist stuff to support this stuff * then we can use this to tack on the end of the node name * the subgrp stuff. On bluegene systems this would be nice * to see the ionodes in certain states. * When asking for nodes that are reserved, we need to return * all states of those nodes. */ if (params.state_list) iterator = list_iterator_create(params.state_list); for(i=0; i<state_cnt; i++) { if (iterator) { node_info_t tmp_node, *tmp_node_ptr = &tmp_node; while ((node_state = list_next(iterator))) { tmp_node_ptr->node_state = *node_state; if ((((state[i] == NODE_STATE_ALLOCATED) && IS_NODE_DRAINING(tmp_node_ptr)) || (*node_state == NODE_STATE_DRAIN)) || (*node_state == state[i]) || (*node_state == NODE_STATE_RES)) break; } list_iterator_reset(iterator); if (!node_state) continue; } if (select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, state[i], &size) == SLURM_SUCCESS && size) { node_scaling -= size; node_ptr->node_state &= NODE_STATE_FLAGS; node_ptr->node_state |= state[i]; _insert_node_ptr(sinfo_list, part_num, part_ptr, node_ptr, size); } } /* now handle the idle */ if (iterator) { while ((node_state = list_next(iterator))) { node_info_t tmp_node, *tmp_node_ptr = &tmp_node; tmp_node_ptr->node_state = *node_state; if (((*node_state == NODE_STATE_DRAIN) || IS_NODE_DRAINED(tmp_node_ptr)) || (*node_state == NODE_STATE_IDLE) || (*node_state == NODE_STATE_RES)) break; } list_iterator_destroy(iterator); if (!node_state) return SLURM_SUCCESS; } node_ptr->node_state &= NODE_STATE_FLAGS; node_ptr->node_state |= NODE_STATE_IDLE; if ((int)node_scaling > 0) _insert_node_ptr(sinfo_list, part_num, part_ptr, node_ptr, node_scaling); return SLURM_SUCCESS; }
/* * _filter_out - Determine if the specified node should be filtered out or * reported. * node_ptr IN - node to consider filtering out * RET - true if node should not be reported, false otherwise */ static bool _filter_out(node_info_t *node_ptr) { static hostlist_t host_list = NULL; if (params.nodes) { if (host_list == NULL) host_list = hostlist_create(params.nodes); if (hostlist_find (host_list, node_ptr->name) == -1) return true; } if (params.dead_nodes && !IS_NODE_NO_RESPOND(node_ptr)) return true; if (params.responding_nodes && IS_NODE_NO_RESPOND(node_ptr)) return true; if (params.state_list) { int *node_state; bool match = false; uint16_t base_state; ListIterator iterator; uint16_t cpus = 0; node_info_t tmp_node, *tmp_node_ptr = &tmp_node; iterator = list_iterator_create(params.state_list); while ((node_state = list_next(iterator))) { tmp_node_ptr->node_state = *node_state; if (*node_state == NODE_STATE_DRAIN) { /* We search for anything that has the * drain flag set */ if (IS_NODE_DRAIN(node_ptr)) { match = true; break; } } else if (IS_NODE_DRAINING(tmp_node_ptr)) { /* We search for anything that gets mapped to * DRAINING in node_state_string */ if (IS_NODE_DRAINING(node_ptr)) { match = true; break; } } else if (IS_NODE_DRAINED(tmp_node_ptr)) { /* We search for anything that gets mapped to * DRAINED in node_state_string */ if (IS_NODE_DRAINED(node_ptr)) { match = true; break; } } else if (*node_state & NODE_STATE_FLAGS) { if (*node_state & node_ptr->node_state) { match = true; break; } } else if (*node_state == NODE_STATE_ERROR) { slurm_get_select_nodeinfo( node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ERROR, &cpus); if (cpus) { match = true; break; } } else if (*node_state == NODE_STATE_ALLOCATED) { slurm_get_select_nodeinfo( node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ALLOCATED, &cpus); if (params.cluster_flags & CLUSTER_FLAG_BG && !cpus && (IS_NODE_ALLOCATED(node_ptr) || IS_NODE_COMPLETING(node_ptr))) cpus = node_ptr->cpus; if (cpus) { match = true; break; } } else if (*node_state == NODE_STATE_IDLE) { base_state = node_ptr->node_state & (~NODE_STATE_NO_RESPOND); if (base_state == NODE_STATE_IDLE) { match = true; break; } } else { base_state = node_ptr->node_state & NODE_STATE_BASE; if (base_state == *node_state) { match = true; break; } } } list_iterator_destroy(iterator); if (!match) return true; } return false; }
/* block_state_mutex should be locked before calling */ static int _check_all_blocks_error(int node_inx, time_t event_time, char *reason) { bg_record_t *bg_record = NULL; ListIterator itr = NULL; struct node_record send_node, *node_ptr; struct config_record config_rec; int total_cpus = 0; int rc = SLURM_SUCCESS; xassert(node_inx <= node_record_count); node_ptr = &node_record_table_ptr[node_inx]; /* only do this if the node isn't in the DRAINED state. DRAINING is ok */ if (IS_NODE_DRAINED(node_ptr)) return rc; memset(&send_node, 0, sizeof(struct node_record)); memset(&config_rec, 0, sizeof(struct config_record)); send_node.name = xstrdup(node_ptr->name); send_node.config_ptr = &config_rec; /* here we need to check if there are any other blocks on this midplane and adjust things correctly */ itr = list_iterator_create(bg_lists->main); while ((bg_record = list_next(itr))) { /* only look at other nodes in error state */ if (!(bg_record->state & BG_BLOCK_ERROR_FLAG)) continue; if (!bit_test(bg_record->mp_bitmap, node_inx)) continue; if (bg_record->cpu_cnt >= bg_conf->cpus_per_mp) { total_cpus = bg_conf->cpus_per_mp; break; } else total_cpus += bg_record->cpu_cnt; } list_iterator_destroy(itr); send_node.cpus = total_cpus; config_rec.cpus = total_cpus; if (send_node.cpus) { if (!reason) reason = "update block: setting partial node down."; if (!node_ptr->reason) node_ptr->reason = xstrdup(reason); node_ptr->reason_time = event_time; node_ptr->reason_uid = slurm_get_slurm_user_id(); send_node.node_state = NODE_STATE_ERROR; rc = clusteracct_storage_g_node_down(acct_db_conn, &send_node, event_time, reason, node_ptr->reason_uid); } else { if (node_ptr->reason) xfree(node_ptr->reason); node_ptr->reason_time = 0; node_ptr->reason_uid = NO_VAL; send_node.node_state = NODE_STATE_IDLE; rc = clusteracct_storage_g_node_up(acct_db_conn, &send_node, event_time); } xfree(send_node.name); return rc; }