/* * create_dynamic_block - create new block(s) to be used for a new * job allocation. * RET - a list of created block(s) or NULL on failure errno is set. */ extern List create_dynamic_block(List block_list, select_ba_request_t *request, List my_block_list, bool track_down_nodes) { int rc = SLURM_SUCCESS; ListIterator itr, itr2; bg_record_t *bg_record = NULL, *found_record = NULL; List results = NULL; List new_blocks = NULL; bitstr_t *my_bitmap = NULL; select_ba_request_t blockreq; int cnodes = request->procs / bg_conf->cpu_ratio; uint16_t start_geo[SYSTEM_DIMENSIONS]; if (cnodes < bg_conf->smallest_block) { error("Can't create this size %d " "on this system ionodes_per_mp is %d", request->procs, bg_conf->ionodes_per_mp); goto finished; } memset(&blockreq, 0, sizeof(select_ba_request_t)); memcpy(start_geo, request->geometry, sizeof(start_geo)); /* We need to lock this just incase a blocks_overlap is called which will in turn reset and set the system as it sees fit. */ slurm_mutex_lock(&block_state_mutex); if (my_block_list) { reset_ba_system(track_down_nodes); itr = list_iterator_create(my_block_list); while ((bg_record = list_next(itr))) { if (bg_record->magic != BLOCK_MAGIC) { /* This should never happen since we only call this on copies of blocks and we check on this during the copy. */ error("create_dynamic_block: " "got a block with bad magic?"); continue; } if (bg_record->free_cnt) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) { int dim; char start_geo[SYSTEM_DIMENSIONS+1]; char geo[SYSTEM_DIMENSIONS+1]; for (dim=0; dim<SYSTEM_DIMENSIONS; dim++) { start_geo[dim] = alpha_num[ bg_record->start[dim]]; geo[dim] = alpha_num[ bg_record->geo[dim]]; } start_geo[dim] = '\0'; geo[dim] = '\0'; info("not adding %s(%s) %s %s %s %u " "(free_cnt)", bg_record->bg_block_id, bg_record->mp_str, bg_block_state_string( bg_record->state), start_geo, geo, bg_record->cnode_cnt); } continue; } if (!my_bitmap) { my_bitmap = bit_alloc(bit_size(bg_record->bitmap)); } if (!bit_super_set(bg_record->bitmap, my_bitmap)) { bit_or(my_bitmap, bg_record->bitmap); if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) { int dim; char start_geo[SYSTEM_DIMENSIONS+1]; char geo[SYSTEM_DIMENSIONS+1]; for (dim=0; dim<SYSTEM_DIMENSIONS; dim++) { start_geo[dim] = alpha_num[ bg_record->start[dim]]; geo[dim] = alpha_num[ bg_record->geo[dim]]; } start_geo[dim] = '\0'; geo[dim] = '\0'; info("adding %s(%s) %s %s %s %u", bg_record->bg_block_id, bg_record->mp_str, bg_block_state_string( bg_record->state), start_geo, geo, bg_record->cnode_cnt); } if (check_and_set_mp_list( bg_record->ba_mp_list) == SLURM_ERROR) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("something happened in " "the load of %s", bg_record->bg_block_id); list_iterator_destroy(itr); FREE_NULL_BITMAP(my_bitmap); rc = SLURM_ERROR; goto finished; } } else { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) { int dim; char start_geo[SYSTEM_DIMENSIONS+1]; char geo[SYSTEM_DIMENSIONS+1]; for (dim=0; dim<SYSTEM_DIMENSIONS; dim++) { start_geo[dim] = alpha_num[ bg_record->start[dim]]; geo[dim] = alpha_num[ bg_record->geo[dim]]; } start_geo[dim] = '\0'; geo[dim] = '\0'; info("not adding %s(%s) %s %s %s %u ", bg_record->bg_block_id, bg_record->mp_str, bg_block_state_string( bg_record->state), start_geo, geo, bg_record->cnode_cnt); } /* just so we don't look at it later */ bg_record->free_cnt = -1; } } list_iterator_destroy(itr); FREE_NULL_BITMAP(my_bitmap); } else { reset_ba_system(false); if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("No list was given"); } if (request->avail_mp_bitmap) ba_set_removable_mps(request->avail_mp_bitmap, 1); if (request->size==1 && cnodes < bg_conf->mp_cnode_cnt) { switch(cnodes) { #ifdef HAVE_BGL case 32: blockreq.small32 = 4; blockreq.small128 = 3; break; case 128: blockreq.small128 = 4; break; #else case 16: blockreq.small16 = 2; blockreq.small32 = 1; blockreq.small64 = 1; blockreq.small128 = 1; blockreq.small256 = 1; break; case 32: blockreq.small32 = 2; blockreq.small64 = 1; blockreq.small128 = 1; blockreq.small256 = 1; break; case 64: blockreq.small64 = 2; blockreq.small128 = 1; blockreq.small256 = 1; break; case 128: blockreq.small128 = 2; blockreq.small256 = 1; break; case 256: blockreq.small256 = 2; break; #endif default: error("This size %d is unknown on this system", cnodes); goto finished; break; } /* Sort the list so the small blocks are in the order * of ionodes. */ list_sort(block_list, (ListCmpF)bg_record_cmpf_inc); request->conn_type[0] = SELECT_SMALL; new_blocks = list_create(destroy_bg_record); /* check only blocks that are free and small */ if (_breakup_blocks(block_list, new_blocks, request, my_block_list, true, true) == SLURM_SUCCESS) goto finished; /* check only blocks that are free and any size */ if (_breakup_blocks(block_list, new_blocks, request, my_block_list, true, false) == SLURM_SUCCESS) goto finished; /* check usable blocks that are small with any state */ if (_breakup_blocks(block_list, new_blocks, request, my_block_list, false, true) == SLURM_SUCCESS) goto finished; /* check all usable blocks */ if (_breakup_blocks(block_list, new_blocks, request, my_block_list, false, false) == SLURM_SUCCESS) goto finished; /* Re-sort the list back to the original order. */ list_sort(block_list, (ListCmpF)bg_record_sort_aval_inc); list_destroy(new_blocks); new_blocks = NULL; if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("small block not able to be placed inside others"); } if (request->conn_type[0] == SELECT_NAV) request->conn_type[0] = SELECT_TORUS; //debug("going to create %d", request->size); if (!new_ba_request(request)) { if (request->geometry[0] != (uint16_t)NO_VAL) { char *geo = give_geo(request->geometry); error("Problems with request for size %d geo %s", request->size, geo); xfree(geo); } else { error("Problems with request for size %d. " "No geo given.", request->size); } rc = ESLURM_INTERCONNECT_FAILURE; goto finished; } /* try on free midplanes */ rc = SLURM_SUCCESS; if (results) list_flush(results); else { #ifdef HAVE_BGQ results = list_create(destroy_ba_mp); #else results = list_create(NULL); #endif } rc = allocate_block(request, results); /* This could be changed in allocate_block so set it back up */ memcpy(request->geometry, start_geo, sizeof(start_geo)); if (rc) { rc = SLURM_SUCCESS; goto setup_records; } if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("allocate failure for size %d base " "partitions of free midplanes", request->size); rc = SLURM_ERROR; if (!list_count(my_block_list) || !my_block_list) goto finished; /*Try to put block starting in the smallest of the exisiting blocks*/ itr = list_iterator_create(my_block_list); itr2 = list_iterator_create(my_block_list); while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) { bool is_small = 0; /* never check a block with a job running */ if (bg_record->free_cnt || bg_record->job_running != NO_JOB_RUNNING) continue; /* Here we are only looking for the first block on the midplane. So either the count is greater or equal than bg_conf->mp_cnode_cnt or the first bit is set in the ionode_bitmap. */ if (bg_record->cnode_cnt < bg_conf->mp_cnode_cnt) { bool found = 0; if (bit_ffs(bg_record->ionode_bitmap) != 0) continue; /* Check to see if we have other blocks in this midplane that have jobs running. */ while ((found_record = list_next(itr2))) { if (!found_record->free_cnt && (found_record->job_running != NO_JOB_RUNNING) && bit_overlap(bg_record->bitmap, found_record->bitmap)) { found = 1; break; } } list_iterator_reset(itr2); if (found) continue; is_small = 1; } if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("removing %s(%s) for request %d", bg_record->bg_block_id, bg_record->mp_str, request->size); remove_block(bg_record->ba_mp_list, is_small); rc = SLURM_SUCCESS; if (results) list_flush(results); else { #ifdef HAVE_BGQ results = list_create(destroy_ba_mp); #else results = list_create(NULL); #endif } rc = allocate_block(request, results); /* This could be changed in allocate_block so set it back up */ memcpy(request->geometry, start_geo, sizeof(start_geo)); if (rc) { rc = SLURM_SUCCESS; break; } if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) info("allocate failure for size %d base partitions", request->size); rc = SLURM_ERROR; } list_iterator_destroy(itr); list_iterator_destroy(itr2); setup_records: if (rc == SLURM_SUCCESS) { /*set up bg_record(s) here */ new_blocks = list_create(destroy_bg_record); blockreq.save_name = request->save_name; #ifdef HAVE_BGL blockreq.blrtsimage = request->blrtsimage; #endif blockreq.linuximage = request->linuximage; blockreq.mloaderimage = request->mloaderimage; blockreq.ramdiskimage = request->ramdiskimage; memcpy(blockreq.conn_type, request->conn_type, sizeof(blockreq.conn_type)); add_bg_record(new_blocks, &results, &blockreq, 0, 0); } finished: if (request->avail_mp_bitmap && (bit_ffc(request->avail_mp_bitmap) == -1)) ba_reset_all_removed_mps(); slurm_mutex_unlock(&block_state_mutex); /* reset the ones we mucked with */ itr = list_iterator_create(my_block_list); while ((bg_record = (bg_record_t *) list_next(itr))) { if (bg_record->free_cnt == -1) bg_record->free_cnt = 0; } list_iterator_destroy(itr); xfree(request->save_name); if (results) list_destroy(results); errno = rc; return new_blocks; }
/* write select job info to a string * IN jobinfo - a select job credential * IN mode - print mode, see enum select_print_mode * RET - char * containing string of request */ extern char *xstrdup_select_jobinfo(select_jobinfo_t *jobinfo, int mode) { char *geo = NULL; int i; char *tmp_image = "default"; char *buf = NULL; char *header = "CONNECT REBOOT ROTATE GEOMETRY BLOCK_ID"; bool print_x = 1; char *conn_type = NULL; if ((mode != SELECT_PRINT_DATA) && jobinfo && (jobinfo->magic != JOBINFO_MAGIC)) { error("xstrdup_jobinfo: jobinfo magic bad"); return NULL; } if (jobinfo == NULL) { if (mode != SELECT_PRINT_HEAD) { error("xstrdup_jobinfo: jobinfo bad"); return NULL; } xstrcat(buf, header); return buf; } if (mode == SELECT_PRINT_GEOMETRY) print_x = 0; if (jobinfo->geometry[0] == (uint16_t) NO_VAL) { for (i=0; i<SYSTEM_DIMENSIONS; i++) { if (geo && print_x) xstrcat(geo, "x0"); else xstrcat(geo, "0"); } } else if (mode != SELECT_PRINT_START_LOC) { geo = give_geo(jobinfo->geometry, jobinfo->dim_cnt, print_x); conn_type = conn_type_string_full(jobinfo->conn_type); } switch (mode) { case SELECT_PRINT_HEAD: xstrcat(buf, header); break; case SELECT_PRINT_DATA: xstrfmtcat(buf, "%7.7s %6.6s %6.6s %s %-16s", conn_type, _yes_no_string(jobinfo->reboot), _yes_no_string(jobinfo->rotate), geo, jobinfo->bg_block_id); break; case SELECT_PRINT_MIXED: xstrfmtcat(buf, "Connection=%s Reboot=%s Rotate=%s " "Geometry=%s Block_ID=%s", conn_type, _yes_no_string(jobinfo->reboot), _yes_no_string(jobinfo->rotate), geo, jobinfo->bg_block_id); break; case SELECT_PRINT_BG_ID: xstrfmtcat(buf, "%s", jobinfo->bg_block_id); break; case SELECT_PRINT_NODES: if (jobinfo->ionode_str && jobinfo->ionode_str[0]) xstrfmtcat(buf, "%s[%s]", jobinfo->mp_str, jobinfo->ionode_str); else xstrfmtcat(buf, "%s", jobinfo->mp_str); break; case SELECT_PRINT_CONNECTION: xstrfmtcat(buf, "%s", conn_type); break; case SELECT_PRINT_REBOOT: xstrfmtcat(buf, "%s", _yes_no_string(jobinfo->reboot)); break; case SELECT_PRINT_ROTATE: xstrfmtcat(buf, "%s", _yes_no_string(jobinfo->rotate)); break; case SELECT_PRINT_GEOMETRY: xstrfmtcat(buf, "%s", geo); break; case SELECT_PRINT_BLRTS_IMAGE: if (jobinfo->blrtsimage) tmp_image = jobinfo->blrtsimage; xstrfmtcat(buf, "%s", tmp_image); break; case SELECT_PRINT_LINUX_IMAGE: if (jobinfo->linuximage) tmp_image = jobinfo->linuximage; xstrfmtcat(buf, "%s", tmp_image); break; case SELECT_PRINT_MLOADER_IMAGE: if (jobinfo->mloaderimage) tmp_image = jobinfo->mloaderimage; xstrfmtcat(buf, "%s", tmp_image); break; case SELECT_PRINT_RAMDISK_IMAGE: if (jobinfo->ramdiskimage) tmp_image = jobinfo->ramdiskimage; xstrfmtcat(buf, "%s", tmp_image); break; case SELECT_PRINT_START_LOC: xfree(geo); geo = give_geo(jobinfo->start_loc, jobinfo->dim_cnt, 0); xstrfmtcat(buf, "%s", geo); break; default: error("xstrdup_jobinfo: bad mode %d", mode); } xfree(geo); xfree(conn_type); return buf; }