extern void print_bg_record(bg_record_t* bg_record) { if (!bg_record) { error("print_bg_record, record given is null"); return; } #if _DEBUG info(" bg_record: "); if (bg_record->bg_block_id) info("\tbg_block_id: %s", bg_record->bg_block_id); info("\tnodes: %s", bg_record->mp_str); info("\tsize: %d MPs %u Nodes %d cpus", bg_record->mp_count, bg_record->cnode_cnt, bg_record->cpu_cnt); info("\tgeo: %ux%ux%u", bg_record->geo[X], bg_record->geo[Y], bg_record->geo[Z]); info("\tconn_type: %s", conn_type_string(bg_record->conn_type[0])); #ifdef HAVE_BGL info("\tnode_use: %s", node_use_string(bg_record->node_use)); #endif if (bg_record->mp_bitmap) { char bitstring[BITSIZE]; bit_fmt(bitstring, BITSIZE, bg_record->mp_bitmap); info("\tbitmap: %s", bitstring); } #else { char tmp_char[256]; format_node_name(bg_record, tmp_char, sizeof(tmp_char)); info("Record: BlockID:%s Nodes:%s Conn:%s", bg_record->bg_block_id, tmp_char, conn_type_string(bg_record->conn_type[0])); } #endif }
static void _update_block_record(sview_block_info_t *block_ptr, GtkTreeStore *treestore, GtkTreeIter *iter) { char job_running[20], cnode_cnt[20]; if (block_ptr->job_running > NO_JOB_RUNNING) snprintf(job_running, sizeof(job_running), "%d", block_ptr->job_running); else snprintf(job_running, sizeof(job_running), "-"); convert_num_unit((float)block_ptr->cnode_cnt, cnode_cnt, sizeof(cnode_cnt), UNIT_NONE); /* Combining these records provides a slight performance improvement */ gtk_tree_store_set(treestore, iter, SORTID_BLOCK, block_ptr->bg_block_name, SORTID_COLOR, sview_colors[block_ptr->color_inx], SORTID_COLOR_INX, block_ptr->color_inx, SORTID_CONN, conn_type_string(block_ptr->bg_conn_type), SORTID_IMAGERAMDISK, block_ptr->imageramdisk, SORTID_IMAGELINUX, block_ptr->imagelinux, SORTID_IMAGEMLOADER, block_ptr->imagemloader, SORTID_JOB, job_running, SORTID_NODE_INX, block_ptr->bp_inx, SORTID_MP_STR, cnode_cnt, SORTID_NODELIST, block_ptr->mp_str, SORTID_PARTITION, block_ptr->slurm_part_name, SORTID_SMALL_BLOCK, block_ptr->small_block, SORTID_STATE, bg_block_state_string(block_ptr->state), SORTID_USER, block_ptr->bg_user_name, SORTID_UPDATED, 1, -1); if (cluster_flags & CLUSTER_FLAG_BGL) { gtk_tree_store_set(treestore, iter, SORTID_IMAGEBLRTS, block_ptr->imageblrts, SORTID_USE, node_use_string(block_ptr->bg_node_use), -1); } return; }
static bg_record_t *_find_matching_block(List block_list, struct job_record* job_ptr, bitstr_t* slurm_block_bitmap, select_ba_request_t *request, uint32_t max_cpus, int *allow, int check_image, int overlap_check, List overlapped_list, uint16_t query_mode) { bg_record_t *bg_record = NULL; ListIterator itr = NULL; char tmp_char[256]; if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("number of blocks to check: %d state %d " "asking for %u-%u cpus", list_count(block_list), query_mode, request->procs, max_cpus); itr = list_iterator_create(block_list); while ((bg_record = list_next(itr))) { /* If test_only we want to fall through to tell the scheduler that it is runnable just not right now. */ /* The job running could be reset so set it back up here if there is a job_ptr */ if (bg_record->job_ptr) bg_record->job_running = bg_record->job_ptr->job_id; /*block is messed up some how (BLOCK_ERROR_STATE_FLAG) * ignore it or if state == BG_BLOCK_ERROR */ if ((bg_record->job_running == BLOCK_ERROR_STATE) || (bg_record->state & BG_BLOCK_ERROR_FLAG)) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("block %s is in an error " "state (can't use)", bg_record->bg_block_id); continue; } else if ((bg_conf->layout_mode == LAYOUT_DYNAMIC) || ((!SELECT_IS_CHECK_FULL_SET(query_mode) || SELECT_IS_MODE_RUN_NOW(query_mode)) && (bg_conf->layout_mode != LAYOUT_DYNAMIC))) { if (bg_record->free_cnt) { /* No reason to look at a block that is being freed unless we are running static and looking at the full set. */ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("block %s being free for other " "job(s), skipping", bg_record->bg_block_id); continue; } else if ((bg_record->job_running != NO_JOB_RUNNING) && (bg_record->job_running != job_ptr->job_id)) { /* Look here if you are trying to run now or if you aren't looking at the full set. We don't continue on running blocks for the full set because we are seeing if the job can ever run so look here. */ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("block %s in use by %s job %d", bg_record->bg_block_id, bg_record->user_name, bg_record->job_running); continue; } } /* Check processor count */ if ((bg_record->cpu_cnt < request->procs) || ((max_cpus != NO_VAL) && (bg_record->cpu_cnt > max_cpus))) { /* We use the proccessor count per block here mostly to see if we can run on a smaller block. */ if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) { convert_num_unit((float)bg_record->cpu_cnt, tmp_char, sizeof(tmp_char), UNIT_NONE); info("block %s CPU count (%s) not suitable", bg_record->bg_block_id, tmp_char); } continue; } /* * Next we check that this block's bitmap is within * the set of nodes which the job can use. * Nodes not available for the job could be down, * drained, allocated to some other job, or in some * SLURM block not available to this job. */ if (!bit_super_set(bg_record->mp_bitmap, slurm_block_bitmap)) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) { char *temp = bitmap2node_name( bg_record->mp_bitmap); char *temp2 = bitmap2node_name( slurm_block_bitmap); info("bg block %s has nodes not " "usable by this job %s %s", bg_record->bg_block_id, temp, temp2); xfree(temp); xfree(temp2); } continue; } /* * Insure that any required nodes are in this BG block */ if (job_ptr->details->req_node_bitmap && (!bit_super_set(job_ptr->details->req_node_bitmap, bg_record->mp_bitmap))) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("bg block %s lacks required nodes", bg_record->bg_block_id); continue; } if (_check_for_booted_overlapping_blocks( block_list, itr, bg_record, overlap_check, overlapped_list, query_mode)) continue; if (check_image) { #ifdef HAVE_BGL if (request->blrtsimage && strcasecmp(request->blrtsimage, bg_record->blrtsimage)) { *allow = 1; continue; } #endif #ifdef HAVE_BG_L_P if (request->linuximage && strcasecmp(request->linuximage, bg_record->linuximage)) { *allow = 1; continue; } if (request->ramdiskimage && strcasecmp(request->ramdiskimage, bg_record->ramdiskimage)) { *allow = 1; continue; } #endif if (request->mloaderimage && strcasecmp(request->mloaderimage, bg_record->mloaderimage)) { *allow = 1; continue; } } /***********************************************/ /* check the connection type specified matches */ /***********************************************/ if ((request->conn_type[0] != bg_record->conn_type[0]) && (request->conn_type[0] != SELECT_NAV)) { #ifdef HAVE_BGP if (request->conn_type[0] >= SELECT_SMALL) { /* we only want to reboot blocks if they have to be so skip booted blocks if in small state */ if (check_image && (bg_record->state == BG_BLOCK_INITED)) { *allow = 1; continue; } goto good_conn_type; } else if (bg_record->conn_type[0] >= SELECT_SMALL) { /* since we already checked to see if the cpus were good this means we are looking for a block in a range that includes small and regular blocks. So we can just continue on. */ goto good_conn_type; } #endif if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("bg block %s conn-type not usable " "asking for %s bg_record is %s", bg_record->bg_block_id, conn_type_string(request->conn_type[0]), conn_type_string(bg_record->conn_type[0])); continue; } #ifdef HAVE_BGP good_conn_type: #endif /*****************************************/ /* match up geometry as "best" possible */ /*****************************************/ if ((request->geometry[0] != (uint16_t)NO_VAL) && (!_check_rotate_geo(bg_record->geo, request->geometry, request->rotate))) continue; if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("we found one! %s", bg_record->bg_block_id); break; } list_iterator_destroy(itr); return bg_record; }
extern int parse_blockreq(void **dest, slurm_parser_enum_t type, const char *key, const char *value, const char *line, char **leftover) { s_p_options_t block_options[] = { {"Type", S_P_STRING}, {"32CNBlocks", S_P_UINT16}, {"128CNBlocks", S_P_UINT16}, #ifdef HAVE_BGL {"Nodecards", S_P_UINT16}, {"Quarters", S_P_UINT16}, {"BlrtsImage", S_P_STRING}, {"LinuxImage", S_P_STRING}, {"RamDiskImage", S_P_STRING}, #else #ifdef HAVE_BGP {"16CNBlocks", S_P_UINT16}, {"CnloadImage", S_P_STRING}, {"IoloadImage", S_P_STRING}, #endif {"64CNBlocks", S_P_UINT16}, {"256CNBlocks", S_P_UINT16}, #endif {"MloaderImage", S_P_STRING}, {NULL} }; s_p_hashtbl_t *tbl; char *tmp = NULL; select_ba_request_t *n = NULL; hostlist_t hl = NULL; tbl = s_p_hashtbl_create(block_options); s_p_parse_line(tbl, *leftover, leftover); if (!value) { return 0; } n = xmalloc(sizeof(select_ba_request_t)); hl = hostlist_create(value); n->save_name = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); #ifdef HAVE_BGL s_p_get_string(&n->blrtsimage, "BlrtsImage", tbl); s_p_get_string(&n->linuximage, "LinuxImage", tbl); s_p_get_string(&n->ramdiskimage, "RamDiskImage", tbl); #elif defined HAVE_BGP s_p_get_string(&n->linuximage, "CnloadImage", tbl); s_p_get_string(&n->ramdiskimage, "IoloadImage", tbl); #endif s_p_get_string(&n->mloaderimage, "MloaderImage", tbl); s_p_get_string(&tmp, "Type", tbl); if (tmp) { verify_conn_type(tmp, n->conn_type); xfree(tmp); } if (!s_p_get_uint16(&n->small32, "32CNBlocks", tbl)) { #ifdef HAVE_BGL s_p_get_uint16(&n->small32, "Nodecards", tbl); #else ; #endif } if (!s_p_get_uint16(&n->small128, "128CNBlocks", tbl)) { #ifdef HAVE_BGL s_p_get_uint16(&n->small128, "Quarters", tbl); #else ; #endif } #ifndef HAVE_BGL #ifdef HAVE_BGP s_p_get_uint16(&n->small16, "16CNBlocks", tbl); #endif s_p_get_uint16(&n->small64, "64CNBlocks", tbl); s_p_get_uint16(&n->small256, "256CNBlocks", tbl); #endif if (n->small16 || n->small32 || n->small64 || n->small128 || n->small256) { if (n->conn_type[0] < SELECT_SMALL) { error("Block def on midplane(s) %s is " "asking for small blocks but given " "TYPE=%s, setting it to Small", n->save_name, conn_type_string(n->conn_type[0])); n->conn_type[0] = SELECT_SMALL; } } else { if (n->conn_type[0] == (uint16_t)NO_VAL) { n->conn_type[0] = bg_conf->default_conn_type[0]; } else if (n->conn_type[0] >= SELECT_SMALL) { error("Block def on midplane(s) %s is given " "TYPE=%s but isn't asking for any small " "blocks. Giving it %s.", n->save_name, conn_type_string(n->conn_type[0]), conn_type_string( bg_conf->default_conn_type[0])); n->conn_type[0] = bg_conf->default_conn_type[0]; } #ifndef HAVE_BG_L_P int i; for (i=1; i<SYSTEM_DIMENSIONS; i++) { if (n->conn_type[i] == (uint16_t)NO_VAL) n->conn_type[i] = bg_conf->default_conn_type[i]; else if (n->conn_type[i] >= SELECT_SMALL) { error("Block def on midplane(s) %s dim %d " "is given TYPE=%s but isn't asking " "for any small blocks. Giving it %s.", n->save_name, i, conn_type_string(n->conn_type[i]), conn_type_string( bg_conf->default_conn_type[i])); n->conn_type[i] = bg_conf->default_conn_type[i]; } } #endif } s_p_hashtbl_destroy(tbl); *dest = (void *)n; return 1; }
/* Perform job initiation work */ static void _start_agent(bg_action_t *bg_action_ptr) { int rc, set_user_rc = SLURM_SUCCESS; bg_record_t *bg_record = NULL; bg_record_t *found_record = NULL; ListIterator itr; List delete_list = NULL; int requeue_job = 0; slurm_mutex_lock(&block_state_mutex); bg_record = find_bg_record_in_list(bg_lists->main, bg_action_ptr->bg_block_id); if (!bg_record) { slurm_mutex_unlock(&block_state_mutex); error("block %s not found in bg_lists->main", bg_action_ptr->bg_block_id); bg_requeue_job(bg_action_ptr->job_ptr->job_id, 1); return; } if (bg_record->job_running <= NO_JOB_RUNNING) { // bg_reset_block(bg_record); should already happened slurm_mutex_unlock(&block_state_mutex); debug("job %u finished during the queueing job " "(everything is ok)", bg_action_ptr->job_ptr->job_id); return; } if (bg_record->state == BG_BLOCK_TERM) { debug("Block is in Deallocating state, waiting for free."); /* It doesn't appear state of a small block (conn_type) is held on a BGP system so if we to reset it so, just set the reboot flag and handle it later in that code. */ bg_action_ptr->reboot = 1; } delete_list = list_create(NULL); itr = list_iterator_create(bg_lists->main); while ((found_record = list_next(itr))) { if ((!found_record) || (bg_record == found_record)) continue; if (!blocks_overlap(bg_record, found_record)) { debug2("block %s isn't part of %s", found_record->bg_block_id, bg_record->bg_block_id); continue; } if (found_record->job_ptr) { error("Trying to start job %u on block %s, " "but there is a job %u running on an overlapping " "block %s it will not end until %ld. " "This should never happen.", bg_action_ptr->job_ptr->job_id, bg_record->bg_block_id, found_record->job_ptr->job_id, found_record->bg_block_id, found_record->job_ptr->end_time); requeue_job = 1; break; } debug2("need to make sure %s is free, it's part of %s", found_record->bg_block_id, bg_record->bg_block_id); list_push(delete_list, found_record); } list_iterator_destroy(itr); if (requeue_job) { list_destroy(delete_list); bg_reset_block(bg_record); slurm_mutex_unlock(&block_state_mutex); bg_requeue_job(bg_action_ptr->job_ptr->job_id, 0); return; } slurm_mutex_unlock(&block_state_mutex); rc = free_block_list(bg_action_ptr->job_ptr->job_id, delete_list, 0, 1); list_destroy(delete_list); if (rc != SLURM_SUCCESS) { error("Problem with deallocating blocks to run job %u " "on block %s", bg_action_ptr->job_ptr->job_id, bg_action_ptr->bg_block_id); if (IS_JOB_CONFIGURING(bg_action_ptr->job_ptr)) bg_requeue_job(bg_action_ptr->job_ptr->job_id, 0); return; } slurm_mutex_lock(&block_state_mutex); /* Failure will unlock block_state_mutex so no need to unlock before return. Failure will unlock block_state_mutex so no need to unlock before return. */ if (!_make_sure_block_still_exists(bg_action_ptr, bg_record)) return; if (bg_record->job_running <= NO_JOB_RUNNING) { // bg_reset_block(bg_record); should already happened slurm_mutex_unlock(&block_state_mutex); debug("job %u already finished before boot", bg_action_ptr->job_ptr->job_id); return; } rc = 0; #ifdef HAVE_BGL if (bg_action_ptr->blrtsimage && strcasecmp(bg_action_ptr->blrtsimage, bg_record->blrtsimage)) { debug3("changing BlrtsImage from %s to %s", bg_record->blrtsimage, bg_action_ptr->blrtsimage); xfree(bg_record->blrtsimage); bg_record->blrtsimage = xstrdup(bg_action_ptr->blrtsimage); rc = 1; } #elif defined HAVE_BGP if ((bg_action_ptr->conn_type[0] >= SELECT_SMALL) && (bg_action_ptr->conn_type[0] != bg_record->conn_type[0])) { debug3("changing small block mode from %s to %s", conn_type_string(bg_record->conn_type[0]), conn_type_string(bg_action_ptr->conn_type[0])); rc = 1; # ifndef HAVE_BG_FILES /* since we don't check state on an emulated system we * have to change it here */ bg_record->conn_type[0] = bg_action_ptr->conn_type[0]; # endif } #endif #ifdef HAVE_BG_L_P if (bg_action_ptr->linuximage && strcasecmp(bg_action_ptr->linuximage, bg_record->linuximage)) { # ifdef HAVE_BGL debug3("changing LinuxImage from %s to %s", bg_record->linuximage, bg_action_ptr->linuximage); # else debug3("changing CnloadImage from %s to %s", bg_record->linuximage, bg_action_ptr->linuximage); # endif xfree(bg_record->linuximage); bg_record->linuximage = xstrdup(bg_action_ptr->linuximage); rc = 1; } if (bg_action_ptr->ramdiskimage && strcasecmp(bg_action_ptr->ramdiskimage, bg_record->ramdiskimage)) { # ifdef HAVE_BGL debug3("changing RamDiskImage from %s to %s", bg_record->ramdiskimage, bg_action_ptr->ramdiskimage); # else debug3("changing IoloadImage from %s to %s", bg_record->ramdiskimage, bg_action_ptr->ramdiskimage); # endif xfree(bg_record->ramdiskimage); bg_record->ramdiskimage = xstrdup(bg_action_ptr->ramdiskimage); rc = 1; } #endif if (bg_action_ptr->mloaderimage && strcasecmp(bg_action_ptr->mloaderimage, bg_record->mloaderimage)) { debug3("changing MloaderImage from %s to %s", bg_record->mloaderimage, bg_action_ptr->mloaderimage); xfree(bg_record->mloaderimage); bg_record->mloaderimage = xstrdup(bg_action_ptr->mloaderimage); rc = 1; } if (rc || bg_action_ptr->reboot) { bg_record->modifying = 1; /* Increment free_cnt to make sure we don't loose this * block since bg_free_block will unlock block_state_mutex. */ bg_record->free_cnt++; bg_free_block(bg_record, 1, 1); bg_record->free_cnt--; #if defined HAVE_BG_FILES && defined HAVE_BG_L_P #ifdef HAVE_BGL if ((rc = bridge_block_modify(bg_record->bg_block_id, RM_MODIFY_BlrtsImg, bg_record->blrtsimage)) != SLURM_SUCCESS) error("bridge_block_modify(RM_MODIFY_BlrtsImg): %s", bg_err_str(rc)); if ((rc = bridge_block_modify(bg_record->bg_block_id, RM_MODIFY_LinuxImg, bg_record->linuximage)) != SLURM_SUCCESS) error("bridge_block_modify(RM_MODIFY_LinuxImg): %s", bg_err_str(rc)); if ((rc = bridge_block_modify(bg_record->bg_block_id, RM_MODIFY_RamdiskImg, bg_record->ramdiskimage)) != SLURM_SUCCESS) error("bridge_block_modify(RM_MODIFY_RamdiskImg): %s", bg_err_str(rc)); #elif defined HAVE_BGP if ((rc = bridge_block_modify(bg_record->bg_block_id, RM_MODIFY_CnloadImg, bg_record->linuximage)) != SLURM_SUCCESS) error("bridge_block_modify(RM_MODIFY_CnloadImg): %s", bg_err_str(rc)); if ((rc = bridge_block_modify(bg_record->bg_block_id, RM_MODIFY_IoloadImg, bg_record->ramdiskimage)) != SLURM_SUCCESS) error("bridge_block_modify(RM_MODIFY_IoloadImg): %s", bg_err_str(rc)); if (bg_action_ptr->conn_type[0] > SELECT_SMALL) { char *conn_type = NULL; switch(bg_action_ptr->conn_type[0]) { case SELECT_HTC_S: conn_type = "s"; break; case SELECT_HTC_D: conn_type = "d"; break; case SELECT_HTC_V: conn_type = "v"; break; case SELECT_HTC_L: conn_type = "l"; break; default: break; } /* the option has to be set before the pool can be set */ if ((rc = bridge_block_modify( bg_record->bg_block_id, RM_MODIFY_Options, conn_type)) != SLURM_SUCCESS) error("bridge_set_data(RM_MODIFY_Options): %s", bg_err_str(rc)); } #endif if ((rc = bridge_block_modify(bg_record->bg_block_id, RM_MODIFY_MloaderImg, bg_record->mloaderimage)) != SLURM_SUCCESS) error("bridge_block_modify(RM_MODIFY_MloaderImg): %s", bg_err_str(rc)); #endif bg_record->modifying = 0; } if (bg_record->state == BG_BLOCK_FREE) { if ((rc = bridge_block_boot(bg_record)) != SLURM_SUCCESS) { char reason[200]; bg_record->boot_state = 0; bg_record->boot_count = 0; if (rc == BG_ERROR_INVALID_STATE) snprintf(reason, sizeof(reason), "Block %s is in an incompatible " "state. This usually means " "hardware is allocated " "by another block (maybe outside " "of SLURM).", bg_record->bg_block_id); else snprintf(reason, sizeof(reason), "Couldn't boot block %s: %s", bg_record->bg_block_id, bg_err_str(rc)); slurm_mutex_unlock(&block_state_mutex); requeue_and_error(bg_record, reason); return; } } else if (bg_record->state == BG_BLOCK_BOOTING) { #ifdef HAVE_BG_FILES bg_record->boot_state = 1; #else if (!block_ptr_exist_in_list(bg_lists->booted, bg_record)) list_push(bg_lists->booted, bg_record); bg_record->state = BG_BLOCK_INITED; last_bg_update = time(NULL); #endif } if (bg_record->job_running <= NO_JOB_RUNNING) { slurm_mutex_unlock(&block_state_mutex); debug("job %u finished during the start of the boot " "(everything is ok)", bg_action_ptr->job_ptr->job_id); return; } /* Don't reset boot_count, it will be reset when state changes, and needs to outlast a job allocation. */ /* bg_record->boot_count = 0; */ xfree(bg_record->target_name); bg_record->target_name = uid_to_string(bg_action_ptr->job_ptr->user_id); debug("setting the target_name for Block %s to %s", bg_record->bg_block_id, bg_record->target_name); if (bg_record->state == BG_BLOCK_INITED) { debug("block %s is ready.", bg_record->bg_block_id); set_user_rc = set_block_user(bg_record); if (bg_action_ptr->job_ptr) { bg_action_ptr->job_ptr->job_state &= (~JOB_CONFIGURING); last_job_update = time(NULL); } } slurm_mutex_unlock(&block_state_mutex); if (set_user_rc == SLURM_ERROR) { sleep(2); /* wait for the slurmd to begin the batch script, slurm_fail_job() is a no-op if issued prior to the script initiation do clean up just incase the fail job isn't ran */ (void) slurm_fail_job(bg_record->job_running); slurm_mutex_lock(&block_state_mutex); if (remove_from_bg_list(bg_lists->job_running, bg_record) == SLURM_SUCCESS) num_unused_cpus += bg_record->cpu_cnt; slurm_mutex_unlock(&block_state_mutex); } }
static int _save_allocation(char *com, List allocated_blocks) { int len = strlen(com); int i=5, j=0; allocated_block_t *allocated_block = NULL; char filename[50]; char *save_string = NULL; FILE *file_ptr = NULL; char *extra = NULL; ListIterator results_i; memset(filename, 0, 50); if (len > 5) while (i<len) { while (com[i-1]!=' ' && i<len) { i++; } while (i<len && com[i]!=' ') { filename[j] = com[i]; i++; j++; } } if (filename[0]=='\0') { time_t now_time = time(NULL); sprintf(filename,"bluegene.conf.%ld", (long int) now_time); } file_ptr = fopen(filename,"w"); if (file_ptr!=NULL) { char *image_dir = NULL; xstrcat(save_string, "#\n# bluegene.conf file generated by smap\n"); xstrcat(save_string, "# See the bluegene.conf man page for " "more information\n"); xstrcat(save_string, "#\n"); #ifdef HAVE_BGL image_dir = "/bgl/BlueLight/ppcfloor/bglsys/bin"; xstrfmtcat(save_string, "BlrtsImage=%s/rts_hw.rts\n", image_dir); xstrfmtcat(save_string, "LinuxImage=%s/zImage.elf\n", image_dir); xstrfmtcat(save_string, "MloaderImage=%s/mmcs-mloader.rts\n", image_dir); xstrfmtcat(save_string, "RamDiskImage=%s/ramdisk.elf\n", image_dir); xstrcat(save_string, "IONodesPerMP=8 # io poor\n"); xstrcat(save_string, "# IONodesPerMP=64 # io rich\n"); #elif defined HAVE_BGP image_dir = "/bgsys/drivers/ppcfloor/boot"; xstrfmtcat(save_string, "CnloadImage=%s/cns,%s/cnk\n", image_dir, image_dir); xstrfmtcat(save_string, "MloaderImage=%s/uloader\n", image_dir); xstrfmtcat(save_string, "IoloadImage=%s/cns,%s/linux,%s/ramdisk\n", image_dir, image_dir, image_dir); xstrcat(save_string, "IONodesPerMP=4 # io poor\n"); xstrcat(save_string, "# IONodesPerMP=32 # io rich\n"); #else image_dir = "/bgsys/drivers/ppcfloor/boot"; xstrfmtcat(save_string, "MloaderImage=%s/firmware\n", image_dir); xstrcat(save_string, "IONodesPerMP=4 # io semi-poor\n"); xstrcat(save_string, "# IONodesPerMP=16 # io rich\n"); #endif xstrcat(save_string, "BridgeAPILogFile=" "/var/log/slurm/bridgeapi.log\n"); xstrcat(save_string, "BridgeAPIVerbose=2\n"); xstrfmtcat(save_string, "BasePartitionNodeCnt=%d\n", base_part_node_cnt); xstrfmtcat(save_string, "NodeCardNodeCnt=%d\n", nodecard_node_cnt); if (!list_count(allocated_blocks)) xstrcat(save_string, "LayoutMode=DYNAMIC\n"); else { xstrfmtcat(save_string, "LayoutMode=%s\n", layout_mode); xstrfmtcat(save_string, "#\n# Block Layout\n#\n"); } results_i = list_iterator_create(allocated_blocks); while ((allocated_block = list_next(results_i)) != NULL) { select_ba_request_t *request = allocated_block->request; if (request->small16 || request->small32 || request->small64 || request->small128 || request->small256) { #ifdef HAVE_BGL xstrfmtcat(extra, " 32CNBlocks=%d " "128CNBlocks=%d", request->small32, request->small128); #elif defined HAVE_BGP xstrfmtcat(extra, " 16CNBlocks=%d " "32CNBlocks=%d " "64CNBlocks=%d " "128CNBlocks=%d " "256CNBlocks=%d", request->small16, request->small32, request->small64, request->small128, request->small256); #else xstrfmtcat(extra, " 32CNBlocks=%d " "64CNBlocks=%d " "128CNBlocks=%d " "256CNBlocks=%d", request->small32, request->small64, request->small128, request->small256); #endif } xstrfmtcat(save_string, "MPs=%s", request->save_name); for (i=0; i<SYSTEM_DIMENSIONS; i++) { if (request->conn_type[i] == (uint16_t)NO_VAL) break; if (i) xstrcat(save_string, ","); else xstrcat(save_string, " Type="); xstrfmtcat(save_string, "%s", conn_type_string( request->conn_type[i])); #ifdef HAVE_BG_L_P break; #endif } if (extra) { xstrfmtcat(save_string, "%s\n", extra); xfree(extra); } else xstrcat(save_string, "\n"); } list_iterator_destroy(results_i); fputs(save_string, file_ptr); xfree(save_string); fclose (file_ptr); } return 1; }
static int _print_text_part(partition_info_t *part_ptr, db2_block_info_t *db2_info_ptr) { int printed = 0; int tempxcord; int prefixlen; int i = 0; int width = 0; char *nodes = NULL, time_buf[20]; char tmp_cnt[8]; char tmp_char[8]; if (params.cluster_flags & CLUSTER_FLAG_BG) convert_num_unit((float)part_ptr->total_nodes, tmp_cnt, sizeof(tmp_cnt), UNIT_NONE); else snprintf(tmp_cnt, sizeof(tmp_cnt), "%u", part_ptr->total_nodes); if (!params.commandline) { mvwprintw(text_win, main_ycord, main_xcord, "%c", part_ptr->flags); main_xcord += 4; if (part_ptr->name) { mvwprintw(text_win, main_ycord, main_xcord, "%.9s", part_ptr->name); main_xcord += 10; if (params.display != BGPART) { char *tmp_state; if (part_ptr->state_up == PARTITION_INACTIVE) tmp_state = "inact"; else if (part_ptr->state_up == PARTITION_UP) tmp_state = "up"; else if (part_ptr->state_up == PARTITION_DOWN) tmp_state = "down"; else if (part_ptr->state_up == PARTITION_DRAIN) tmp_state = "drain"; else tmp_state = "unk"; mvwprintw(text_win, main_ycord, main_xcord, tmp_state); main_xcord += 7; if (part_ptr->max_time == INFINITE) snprintf(time_buf, sizeof(time_buf), "infinite"); else { secs2time_str((part_ptr->max_time * 60), time_buf, sizeof(time_buf)); } width = strlen(time_buf); mvwprintw(text_win, main_ycord, main_xcord + (9 - width), "%s", time_buf); main_xcord += 11; } } else main_xcord += 10; if (params.display == BGPART) { if (db2_info_ptr) { mvwprintw(text_win, main_ycord, main_xcord, "%.16s", db2_info_ptr->bg_block_name); main_xcord += 18; mvwprintw(text_win, main_ycord, main_xcord, bg_block_state_string( db2_info_ptr->state)); main_xcord += 7; if (db2_info_ptr->job_running > NO_JOB_RUNNING) snprintf(tmp_char, sizeof(tmp_char), "%d", db2_info_ptr->job_running); else snprintf(tmp_char, sizeof(tmp_char), "-"); mvwprintw(text_win, main_ycord, main_xcord, "%.8s", tmp_char); main_xcord += 8; mvwprintw(text_win, main_ycord, main_xcord, "%.8s", db2_info_ptr->bg_user_name); main_xcord += 9; mvwprintw(text_win, main_ycord, main_xcord, "%.5s", conn_type_string( db2_info_ptr-> bg_conn_type)); main_xcord += 7; if (params.cluster_flags & CLUSTER_FLAG_BGL) { mvwprintw(text_win, main_ycord, main_xcord, "%.9s", node_use_string( db2_info_ptr-> bg_node_use)); main_xcord += 10; } } else { mvwprintw(text_win, main_ycord, main_xcord, "?"); main_xcord += 18; mvwprintw(text_win, main_ycord, main_xcord, "?"); main_xcord += 7; mvwprintw(text_win, main_ycord, main_xcord, "?"); main_xcord += 8; mvwprintw(text_win, main_ycord, main_xcord, "?"); main_xcord += 9; mvwprintw(text_win, main_ycord, main_xcord, "?"); main_xcord += 7; mvwprintw(text_win, main_ycord, main_xcord, "?"); main_xcord += 10; } } mvwprintw(text_win, main_ycord, main_xcord, "%5s", tmp_cnt); main_xcord += 7; tempxcord = main_xcord; if (params.display == BGPART) nodes = part_ptr->allow_groups; else nodes = part_ptr->nodes; i = 0; prefixlen = i; while (nodes && nodes[i]) { width = text_win->_maxx - main_xcord; if (!prefixlen && (nodes[i] == '[') && (nodes[i - 1] == ',')) prefixlen = i + 1; if (nodes[i - 1] == ',' && (width - 12) <= 0) { main_ycord++; main_xcord = tempxcord + prefixlen; } else if (main_xcord > text_win->_maxx) { main_ycord++; main_xcord = tempxcord + prefixlen; } if ((printed = mvwaddch(text_win, main_ycord, main_xcord, nodes[i])) < 0) return printed; main_xcord++; i++; } if ((params.display == BGPART) && db2_info_ptr && (db2_info_ptr->ionode_str)) { mvwprintw(text_win, main_ycord, main_xcord, "[%s]", db2_info_ptr->ionode_str); } main_xcord = 1; main_ycord++; } else { if (part_ptr->name) { printf("%9.9s ", part_ptr->name); if (params.display != BGPART) { if (part_ptr->state_up == PARTITION_INACTIVE) printf(" inact "); else if (part_ptr->state_up == PARTITION_UP) printf(" up "); else if (part_ptr->state_up == PARTITION_DOWN) printf(" down "); else if (part_ptr->state_up == PARTITION_DRAIN) printf(" drain "); else printf(" unk "); if (part_ptr->max_time == INFINITE) snprintf(time_buf, sizeof(time_buf), "infinite"); else { secs2time_str((part_ptr->max_time * 60), time_buf, sizeof(time_buf)); } width = strlen(time_buf); printf("%9.9s ", time_buf); } } if (params.display == BGPART) { if (db2_info_ptr) { printf("%16.16s ", db2_info_ptr->bg_block_name); printf("%5.5s ", bg_block_state_string( db2_info_ptr->state)); if (db2_info_ptr->job_running > NO_JOB_RUNNING) snprintf(tmp_char, sizeof(tmp_char), "%d", db2_info_ptr->job_running); else snprintf(tmp_char, sizeof(tmp_char), "-"); printf("%8.8s ", tmp_char); printf("%8.8s ", db2_info_ptr->bg_user_name); printf("%5.5s ", conn_type_string( db2_info_ptr->bg_conn_type)); if (params.cluster_flags & CLUSTER_FLAG_BGL) printf("%9.9s ", node_use_string( db2_info_ptr-> bg_node_use)); } } printf("%5s ", tmp_cnt); if (params.display == BGPART) nodes = part_ptr->allow_groups; else nodes = part_ptr->nodes; if ((params.display == BGPART) && db2_info_ptr && (db2_info_ptr->ionode_str)) { printf("%s[%s]\n", nodes, db2_info_ptr->ionode_str); } else printf("%s\n",nodes); } return printed; }
/* block_state_mutex must be locked before calling this. */ extern int add_bg_record(List records, List *used_nodes, select_ba_request_t *blockreq, bool no_check, bitoff_t io_start) { bg_record_t *bg_record = NULL; ba_mp_t *ba_mp = NULL; ListIterator itr; uid_t pw_uid; int i, len; int small_count = 0; xassert(bg_conf->slurm_user_name); if (!records) { fatal("add_bg_record: no records list given"); } bg_record = (bg_record_t*) xmalloc(sizeof(bg_record_t)); bg_record->magic = BLOCK_MAGIC; bg_record->user_name = xstrdup(bg_conf->slurm_user_name); bg_record->target_name = xstrdup(bg_conf->slurm_user_name); if (uid_from_string (bg_record->user_name, &pw_uid) < 0) error("add_bg_record: No such user: %s", bg_record->user_name); else bg_record->user_uid = pw_uid; if (used_nodes && *used_nodes) { #ifdef HAVE_BGQ bg_record->ba_mp_list = *used_nodes; *used_nodes = NULL; #else bg_record->ba_mp_list = list_create(destroy_ba_mp); if (copy_node_path(*used_nodes, &bg_record->ba_mp_list) == SLURM_ERROR) error("add_bg_record: " "couldn't copy the path for the allocation"); #endif } else bg_record->ba_mp_list = list_create(destroy_ba_mp); /* bg_record->boot_state = 0; Implicit */ bg_record->state = BG_BLOCK_FREE; #ifdef HAVE_BGL if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("add_bg_record: asking for %s %d %d %s", blockreq->save_name, blockreq->small32, blockreq->small128, conn_type_string(blockreq->conn_type[0])); #else if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("add_bg_record: asking for %s %d %d %d %d %d %s", blockreq->save_name, blockreq->small256, blockreq->small128, blockreq->small64, blockreq->small32, blockreq->small16, conn_type_string(blockreq->conn_type[0])); #endif /* Set the bitmap blank here if it is a full node we don't want anything set we also don't want the bg_record->ionode_str set. */ bg_record->ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp); bg_record->mp_used_bitmap = bit_alloc(node_record_count); len = strlen(blockreq->save_name); i=0; while (i<len && blockreq->save_name[i] != '[' && (blockreq->save_name[i] < '0' || blockreq->save_name[i] > 'Z' || (blockreq->save_name[i] > '9' && blockreq->save_name[i] < 'A'))) i++; if (i<len) { len -= i; len += strlen(bg_conf->slurm_node_prefix)+1; bg_record->mp_str = xmalloc(len); snprintf(bg_record->mp_str, len, "%s%s", bg_conf->slurm_node_prefix, blockreq->save_name+i); } else fatal("add_bg_record: MPs=%s is in a weird format", blockreq->save_name); process_nodes(bg_record, false); #ifdef HAVE_BGL bg_record->node_use = SELECT_COPROCESSOR_MODE; #endif memcpy(bg_record->conn_type, blockreq->conn_type, sizeof(bg_record->conn_type)); bg_record->cpu_cnt = bg_conf->cpus_per_mp * bg_record->mp_count; bg_record->cnode_cnt = bg_conf->mp_cnode_cnt * bg_record->mp_count; bg_record->job_running = NO_JOB_RUNNING; #ifdef HAVE_BGL if (blockreq->blrtsimage) bg_record->blrtsimage = xstrdup(blockreq->blrtsimage); else bg_record->blrtsimage = xstrdup(bg_conf->default_blrtsimage); #endif #ifdef HAVE_BG_L_P if (blockreq->linuximage) bg_record->linuximage = xstrdup(blockreq->linuximage); else bg_record->linuximage = xstrdup(bg_conf->default_linuximage); if (blockreq->ramdiskimage) bg_record->ramdiskimage = xstrdup(blockreq->ramdiskimage); else bg_record->ramdiskimage = xstrdup(bg_conf->default_ramdiskimage); #endif if (blockreq->mloaderimage) bg_record->mloaderimage = xstrdup(blockreq->mloaderimage); else bg_record->mloaderimage = xstrdup(bg_conf->default_mloaderimage); if (bg_record->conn_type[0] < SELECT_SMALL) { /* this needs to be an append so we keep things in the order we got them, they will be sorted later */ list_append(records, bg_record); /* this isn't a correct list so we need to set it later for now we just used it to be the mp number */ if (!used_nodes) { debug4("add_bg_record: " "we didn't get a request list so we are " "destroying this mp list"); list_destroy(bg_record->ba_mp_list); bg_record->ba_mp_list = NULL; } } else { List ba_mp_list = NULL; if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("add_bg_record: adding a small block"); if (no_check) goto no_check; /* if the ionode cnt for small32 is 0 then don't allow a sub quarter allocation */ if (bg_conf->nodecard_ionode_cnt < 2) { if (!bg_conf->nodecard_ionode_cnt && blockreq->small32) fatal("add_bg_record: " "There is an error in your " "bluegene.conf file.\n" "Can't create a 32 node block with " "IonodesPerMP=%u. (Try setting it " "to at least 16)", bg_conf->ionodes_per_mp); #ifndef HAVE_BGL if (blockreq->small16) fatal("add_bg_record: " "There is an error in your " "bluegene.conf file.\n" "Can't create a 16 node block with " "IonodesPerMP=%u. (Try setting it to " "at least 32)", bg_conf->ionodes_per_mp); if ((bg_conf->io_ratio < 0.5) && blockreq->small64) fatal("add_bg_record: " "There is an error in your " "bluegene.conf file.\n" "Can't create a 64 node block with " "IonodesPerMP=%u. (Try setting it " "to at least 8)", bg_conf->ionodes_per_mp); #endif } #ifdef HAVE_BGL if (blockreq->small32==0 && blockreq->small128==0) { info("add_bg_record: " "No specs given for this small block, " "I am spliting this block into 4 128CnBlocks"); blockreq->small128=4; } i = (blockreq->small32*bg_conf->nodecard_cnode_cnt) + (blockreq->small128*bg_conf->quarter_cnode_cnt); if (i != bg_conf->mp_cnode_cnt) fatal("add_bg_record: " "There is an error in your bluegene.conf file.\n" "I am unable to request %d nodes consisting of " "%u 32CnBlocks and\n%u 128CnBlocks in one " "base partition with %u nodes.", i, blockreq->small32, blockreq->small128, bg_conf->mp_cnode_cnt); small_count = blockreq->small32+blockreq->small128; #else if (!blockreq->small16 && !blockreq->small32 && !blockreq->small64 && !blockreq->small128 && !blockreq->small256) { info("add_bg_record: " "No specs given for this small block, " "I am spliting this block into 2 256CnBlocks"); blockreq->small256=2; } i = (blockreq->small16*16) + (blockreq->small32*32) + (blockreq->small64*64) + (blockreq->small128*128) + (blockreq->small256*256); if (i != bg_conf->mp_cnode_cnt) fatal("add_bg_record: " "There is an error in your bluegene.conf file.\n" "I am unable to request %d nodes consisting of " "%u 16CNBlocks, %u 32CNBlocks,\n" "%u 64CNBlocks, %u 128CNBlocks, " "and %u 256CNBlocks\n" "in one base partition with %u nodes.", i, blockreq->small16, blockreq->small32, blockreq->small64, blockreq->small128, blockreq->small256, bg_conf->mp_cnode_cnt); small_count = blockreq->small16 + blockreq->small32 + blockreq->small64 + blockreq->small128 + blockreq->small256; #endif no_check: /* Automatically create 2-way split if * conn_type == SELECT_SMALL in bluegene.conf * Here we go through each node listed and do the same thing * for each node. */ ba_mp_list = bg_record->ba_mp_list; bg_record->ba_mp_list = list_create(NULL); itr = list_iterator_create(ba_mp_list); while ((ba_mp = list_next(itr)) != NULL) { xfree(bg_record->mp_str); bg_record->mp_str = xstrdup_printf( "%s%s", bg_conf->slurm_node_prefix, ba_mp->coord_str); list_append(bg_record->ba_mp_list, ba_mp); handle_small_record_request(records, blockreq, bg_record, io_start); list_flush(bg_record->ba_mp_list); } list_iterator_destroy(itr); destroy_bg_record(bg_record); list_destroy(ba_mp_list); } return SLURM_SUCCESS; }
static void _layout_block_record(GtkTreeView *treeview, sview_block_info_t *block_ptr, int update) { char tmp_cnt[18]; GtkTreeIter iter; GtkTreeStore *treestore = GTK_TREE_STORE(gtk_tree_view_get_model(treeview)); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_NODELIST), block_ptr->mp_str); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_CONN), conn_type_string( block_ptr->bg_conn_type)); if (cluster_flags & CLUSTER_FLAG_BGL) { add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_IMAGEBLRTS), block_ptr->imageblrts); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_IMAGELINUX), block_ptr->imagelinux); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_IMAGEMLOADER), block_ptr->imagemloader); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_IMAGERAMDISK), block_ptr->imageramdisk); } else { add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_IMAGELINUX), block_ptr->imagelinux); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_IMAGERAMDISK), block_ptr->imageramdisk); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_IMAGEMLOADER), block_ptr->imagemloader); } if (block_ptr->job_running > NO_JOB_RUNNING) snprintf(tmp_cnt, sizeof(tmp_cnt), "%d", block_ptr->job_running); else snprintf(tmp_cnt, sizeof(tmp_cnt), "-"); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_JOB), tmp_cnt); if (cluster_flags & CLUSTER_FLAG_BGL) { add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_USE), node_use_string( block_ptr->bg_node_use)); } convert_num_unit((float)block_ptr->cnode_cnt, tmp_cnt, sizeof(tmp_cnt), UNIT_NONE); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_MP_STR), tmp_cnt); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_PARTITION), block_ptr->slurm_part_name); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_STATE), bg_block_state_string(block_ptr->state)); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_block, SORTID_USER), block_ptr->bg_user_name); }