extern int configure_small_block(bg_record_t *bg_record) { int rc = SLURM_SUCCESS; #if defined HAVE_BG_FILES bool small = true; ba_mp_t* ba_node = NULL; rm_BP_t *curr_mp = NULL; rm_bp_id_t mp_id = NULL; #ifndef HAVE_BGL rm_nodecard_id_t nc_char = NULL; #endif int nc_id = 0; int num_ncards = 0, sub_nodecard = 0, ionode_card = 0, nc_count = 0; rm_nodecard_t *ncard; rm_nodecard_list_t *ncard_list = NULL; int num, i; int use_nc[bg_conf->mp_nodecard_cnt]; double nc_pos = 0; #endif xassert(bg_record->ionode_bitmap); if (bg_record->mp_count != 1) { error("Requesting small block with %d mps, needs to be 1.", bg_record->mp_count); return SLURM_ERROR; } /* info("configuring small block on ionodes %s out of %d ncs", */ /* bg_record->ionodes, bg_conf->mp_nodecard_cnt); */ #if defined HAVE_BG_FILES /* set that we are doing a small block */ if ((rc = bridge_set_data(bg_record->bg_block, RM_PartitionSmall, &small)) != SLURM_SUCCESS) { fatal("bridge_set_data(RM_PartitionPsetsPerBP): %s", bg_err_str(rc)); } num_ncards = bg_record->cnode_cnt/bg_conf->nodecard_cnode_cnt; if (num_ncards < 1) { num_ncards = 1; sub_nodecard = 1; } memset(use_nc, 0, sizeof(use_nc)); /* find out how many nodecards to get for each ionode */ for(i = 0; i<bg_conf->ionodes_per_mp; i++) { if (bit_test(bg_record->ionode_bitmap, i)) { if (bg_conf->nc_ratio > 1) { int j=0; for(j=0; j<bg_conf->nc_ratio; j++) use_nc[(int)nc_pos+j] = 1; } else { use_nc[(int)nc_pos] = 1; if (i%2) ionode_card = 1; } } nc_pos += bg_conf->nc_ratio; } if ((rc = bridge_set_data(bg_record->bg_block, RM_PartitionNodeCardNum, &num_ncards)) != SLURM_SUCCESS) { fatal("bridge_set_data: RM_PartitionBPNum: %s", bg_err_str(rc)); } ba_node = list_peek(bg_record->ba_mp_list); if (_get_mp_by_location(bg, ba_node->coord, &curr_mp) == SLURM_ERROR) { fatal("_get_mp_by_location()"); } /* Set the one MP */ if ((rc = bridge_set_data(bg_record->bg_block, RM_PartitionBPNum, &bg_record->mp_count)) != SLURM_SUCCESS) { fatal("bridge_set_data: RM_PartitionBPNum: %s", bg_err_str(rc)); return SLURM_ERROR; } if ((rc = bridge_set_data(bg_record->bg_block, RM_PartitionFirstBP, curr_mp)) != SLURM_SUCCESS) { fatal("bridge_set_data(" "BRIDGE_PartitionFirstBP): %s", bg_err_str(rc)); return SLURM_ERROR; } /* find the mp_id of the mp to get the small32 */ if ((rc = bridge_get_data(curr_mp, RM_BPID, &mp_id)) != SLURM_SUCCESS) { error("bridge_get_data(): %d", rc); return SLURM_ERROR; } if (!mp_id) { error("No MP ID was returned from database"); return SLURM_ERROR; } if ((rc = bridge_get_nodecards(mp_id, &ncard_list)) != SLURM_SUCCESS) { error("bridge_get_nodecards(%s): %d", mp_id, rc); free(mp_id); return SLURM_ERROR; } free(mp_id); if ((rc = bridge_get_data(ncard_list, RM_NodeCardListSize, &num)) != SLURM_SUCCESS) { error("bridge_get_data(RM_NodeCardListSize): %s", bg_err_str(rc)); return SLURM_ERROR; } if (num_ncards > num) { error("You requested more (%d > %d) nodecards " "than are available on this block %s", num_ncards, num, bg_record->mp_str); } for(i=0; i<num; i++) { if (i) { if ((rc = bridge_get_data(ncard_list, RM_NodeCardListNext, &ncard)) != SLURM_SUCCESS) { error("bridge_get_data" "(RM_NodeCardListNext): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } } else { if ((rc = bridge_get_data(ncard_list, RM_NodeCardListFirst, &ncard)) != SLURM_SUCCESS) { error("bridge_get_data" "(RM_NodeCardListFirst): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } } #ifdef HAVE_BGL /* on BG/L we assume the order never changes when the system is up. This could change when a reboot of the system happens, but that should be rare. */ nc_id = i; if (!use_nc[i]) continue; #else if ((rc = bridge_get_data(ncard, RM_NodeCardID, &nc_char)) != SLURM_SUCCESS) { error("bridge_get_data(RM_NodeCardID): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } if (!nc_char) { error("No NodeCard ID was returned from database"); rc = SLURM_ERROR; goto cleanup; } nc_id = atoi((char*)nc_char+1); if (!use_nc[nc_id]) { free(nc_char); continue; } if (sub_nodecard) { rm_ionode_t *ionode; char *ionode_id = "J00"; if ((rc = bridge_new_nodecard(&ncard)) != SLURM_SUCCESS) { error("bridge_new_nodecard(): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } if ((rc = bridge_set_data(ncard, RM_NodeCardID, nc_char)) != SLURM_SUCCESS) { error("bridge_set_data(" "RM_NodeCardID): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } if ((rc = bridge_set_data(ncard, RM_NodeCardIONodeNum, &sub_nodecard)) != SLURM_SUCCESS) { error("bridge_set_data(" "RM_NodeCardIONodeNum): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } if ((rc = bridge_new_ionode(&ionode)) != SLURM_SUCCESS) { error("bridge_new_ionode(): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } if (ionode_card) ionode_id = "J01"; if ((rc = bridge_set_data(ionode, RM_IONodeID, ionode_id)) != SLURM_SUCCESS) { error("bridge_set_data(" "RM_NodeCardIONodeNum): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } if ((rc = bridge_set_data(ncard, RM_NodeCardFirstIONode, ionode)) != SLURM_SUCCESS) { error("bridge_set_data(" "RM_NodeCardFirstIONode): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } if ((rc = bridge_free_ionode(ionode)) != SLURM_SUCCESS) { error("bridge_free_ionode(): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } } free(nc_char); #endif if (nc_count) { if ((rc = bridge_set_data(bg_record->bg_block, RM_PartitionNextNodeCard, ncard)) != SLURM_SUCCESS) { error("bridge_set_data(" "RM_PartitionNextNodeCard): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } } else { if ((rc = bridge_set_data(bg_record->bg_block, RM_PartitionFirstNodeCard, ncard)) != SLURM_SUCCESS) { error("bridge_set_data(" "RM_PartitionFirstNodeCard): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } } nc_count++; #ifndef HAVE_BGL if (sub_nodecard) { if ((rc = bridge_free_nodecard(ncard)) != SLURM_SUCCESS) { error("bridge_free_nodecard(): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } } #endif if (nc_count == num_ncards) break; } cleanup: if ((rc = bridge_free_nodecard_list(ncard_list)) != SLURM_SUCCESS) { error("bridge_free_nodecard_list(): %s", bg_err_str(rc)); return SLURM_ERROR; } #endif if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) info("making the small block"); if (rc != SLURM_ERROR) rc = SLURM_SUCCESS; return rc; }
/* * This could potentially lock the node lock in the slurmctld with * slurm_drain_node, so if nodes_locked is called we will call the * drainning function without locking the lock again. */ static int _test_down_nodecards(rm_BP_t *bp_ptr, bool slurmctld_locked) { rm_bp_id_t bp_id = NULL; int num = 0; int marked_down = 0; int i=0; int rc = SLURM_SUCCESS; rm_nodecard_list_t *ncard_list = NULL; rm_nodecard_t *ncard = NULL; //bitstr_t *ionode_bitmap = NULL; //bg_record_t *bg_record = NULL; char *node_name = NULL; //int bp_bit = 0; //int io_cnt = 1; /* Translate 1 nodecard count to ionode count */ /* if ((io_cnt *= bg_conf->io_ratio)) */ /* io_cnt--; */ if ((rc = bridge_get_data(bp_ptr, RM_BPID, &bp_id)) != SLURM_SUCCESS) { error("bridge_get_data(RM_BPID): %s", bg_err_str(rc)); return SLURM_ERROR; } if ((rc = bridge_get_nodecards(bp_id, &ncard_list)) != SLURM_SUCCESS) { error("bridge_get_nodecards(%s): %d", bp_id, rc); rc = SLURM_ERROR; goto clean_up; } /* The node_name will only be NULL if this system doesn't really have the node. */ if (!(node_name = _get_bp_node_name(bp_ptr))) { rc = SLURM_ERROR; goto clean_up; } if ((rc = bridge_get_data(ncard_list, RM_NodeCardListSize, &num)) != SLURM_SUCCESS) { error("bridge_get_data(RM_NodeCardListSize): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto clean_up; } for(i=0; i<num; i++) { if (i) { if ((rc = bridge_get_data(ncard_list, RM_NodeCardListNext, &ncard)) != SLURM_SUCCESS) { error("bridge_get_data" "(RM_NodeCardListNext): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto clean_up; } } else { if ((rc = bridge_get_data(ncard_list, RM_NodeCardListFirst, &ncard)) != SLURM_SUCCESS) { error("bridge_get_data" "(RM_NodeCardListFirst: %s", bg_err_str(rc)); rc = SLURM_ERROR; goto clean_up; } } if (_test_nodecard_state(ncard, i, node_name, slurmctld_locked) != SLURM_SUCCESS) marked_down++; } /* this code is here to bring up a block after it is in an error state. It is commented out because it hasn't been tested very well yet. If you ever want to use this code there should probably be a configurable option in the bluegene.conf file that gives you an option as to have this happen or not automatically. */ /* if (ionode_bitmap) { */ /* info("got ionode_bitmap"); */ /* bit_not(ionode_bitmap); */ /* up_nodecard(node_name, ionode_bitmap); */ /* } else { */ /* int ret = 0; */ /* info("no ionode_bitmap"); */ /* ListIterator itr = NULL; */ /* slurm_mutex_lock(&block_state_mutex); */ /* itr = list_iterator_create(bg_lists->main); */ /* while ((bg_record = list_next(itr))) { */ /* if (bg_record->job_running != BLOCK_ERROR_STATE) */ /* continue; */ /* if (!bit_test(bg_record->mp_bitmap, bp_bit)) */ /* continue; */ /* info("bringing %s back to service", */ /* bg_record->bg_block_id); */ /* bg_record->job_running = NO_JOB_RUNNING; */ /* bg_record->state = BG_BLOCK_FREE; */ /* last_bg_update = time(NULL); */ /* } */ /* list_iterator_destroy(itr); */ /* slurm_mutex_unlock(&block_state_mutex); */ /* /\* FIX ME: This needs to call the opposite of */ /* slurm_drain_nodes which does not yet exist. */ /* *\/ */ /* if ((ret = node_already_down(node_name))) { */ /* /\* means it was drained *\/ */ /* if (ret == 2) { */ /* /\* debug("node %s put back into * service after " *\/ */ /* /\* "being in an error state", *\/ */ /* /\* node_name); *\/ */ /* } */ /* } */ /* } */ clean_up: if (ncard_list) bridge_free_nodecard_list(ncard_list); xfree(node_name); /* if (ionode_bitmap) */ /* FREE_NULL_BITMAP(ionode_bitmap); */ free(bp_id); /* If we marked any nodecard down we need to state it here */ if ((rc == SLURM_SUCCESS) && marked_down) rc = SLURM_ERROR; return rc; }