/* block_state_mutex should be locked before calling this */ static int _post_block_free(bg_record_t *bg_record, bool restore) { int rc = SLURM_SUCCESS; if (bg_record->magic != BLOCK_MAGIC) { error("block already destroyed %p", bg_record); xassert(0); return SLURM_ERROR; } bg_record->free_cnt--; if (bg_record->free_cnt == -1) { info("we got a negative 1 here for %s", bg_record->bg_block_id); xassert(0); return SLURM_SUCCESS; } else if (bg_record->modifying) { info("others are modifing this block %s, don't clear it up", bg_record->bg_block_id); return SLURM_SUCCESS; } else if (bg_record->free_cnt) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("%d others are trying to destroy this block %s", bg_record->free_cnt, bg_record->bg_block_id); return SLURM_SUCCESS; } /* Even if the block is already in error state we need to do this to avoid any overlapping blocks that may have been created due to bad hardware. */ if ((bg_record->state & (~BG_BLOCK_ERROR_FLAG)) != BG_BLOCK_FREE) { /* Something isn't right, go mark this one in an error state. */ update_block_msg_t block_msg; if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: block %s is not in state " "free (%s), putting it in error state.", bg_record->bg_block_id, bg_block_state_string(bg_record->state)); slurm_init_update_block_msg(&block_msg); block_msg.bg_block_id = bg_record->bg_block_id; block_msg.state = BG_BLOCK_ERROR_FLAG; block_msg.reason = "Block would not deallocate"; slurm_mutex_unlock(&block_state_mutex); select_g_update_block(&block_msg); slurm_mutex_lock(&block_state_mutex); if (block_ptr_exist_in_list(bg_lists->main, bg_record)) bg_record->destroy = 0; return SLURM_SUCCESS; } /* The reason restore is used on the entire list is if this * was for a bunch of small blocks. If we record is marked to * be destroyed and it is bigger than 1 midplane destroy it * even if restore is true. */ if (restore && bg_record->destroy && (bg_record->mp_count > 1)) restore = false; /* If we are here we are done with the destroy so just reset it. */ bg_record->destroy = 0; /* A bit of a sanity check to make sure blocks are being removed out of all the lists. */ remove_from_bg_list(bg_lists->booted, bg_record); if (remove_from_bg_list(bg_lists->job_running, bg_record) == SLURM_SUCCESS) { debug2("_post_block_free: we are freeing block %s and " "it was in the job_running list. This can happen if a " "block is removed while waiting for mmcs to finish " "removing the job from the block.", bg_record->bg_block_id); num_unused_cpus += bg_record->cpu_cnt; } /* If we don't have any mp_counts force block removal */ if (restore && bg_record->mp_count) return SLURM_SUCCESS; if (remove_from_bg_list(bg_lists->main, bg_record) != SLURM_SUCCESS) { /* This should only happen if called from * bg_job_place.c where the block was never added to * the list. */ debug("_post_block_free: It appears this block %s isn't " "in the main list anymore.", bg_record->bg_block_id); } if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: removing %s from database", bg_record->bg_block_id); rc = bridge_block_remove(bg_record); if (rc != SLURM_SUCCESS) { if (rc == BG_ERROR_BLOCK_NOT_FOUND) { debug("_post_block_free: block %s is not found", bg_record->bg_block_id); } else { error("_post_block_free: " "bridge_block_remove(%s): %s", bg_record->bg_block_id, bg_err_str(rc)); } } else if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: done %s(%p)", bg_record->bg_block_id, bg_record); destroy_bg_record(bg_record); if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: destroyed"); return SLURM_SUCCESS; }
/* block_state_mutex should be locked before calling this */ static int _post_block_free(bg_record_t *bg_record, bool restore) { int rc = SLURM_SUCCESS; if (bg_record->magic != BLOCK_MAGIC) { error("block already destroyed %p", bg_record); xassert(0); return SLURM_ERROR; } bg_record->free_cnt--; if (bg_record->free_cnt == -1) { info("we got a negative 1 here for %s", bg_record->bg_block_id); xassert(0); return SLURM_SUCCESS; } else if (bg_record->modifying) { info("%d others are modifing this block %s", bg_record->free_cnt, bg_record->bg_block_id); return SLURM_SUCCESS; } else if (bg_record->free_cnt) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("%d others are trying to destroy this block %s", bg_record->free_cnt, bg_record->bg_block_id); return SLURM_SUCCESS; } if (!(bg_record->state & BG_BLOCK_ERROR_FLAG) && (bg_record->state != BG_BLOCK_FREE)) { /* Something isn't right, go mark this one in an error state. */ update_block_msg_t block_msg; if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: block %s is not in state " "free (%s), putting it in error state.", bg_record->bg_block_id, bg_block_state_string(bg_record->state)); slurm_init_update_block_msg(&block_msg); block_msg.bg_block_id = bg_record->bg_block_id; block_msg.state = BG_BLOCK_ERROR_FLAG; block_msg.reason = "Block would not deallocate"; slurm_mutex_unlock(&block_state_mutex); select_g_update_block(&block_msg); slurm_mutex_lock(&block_state_mutex); return SLURM_SUCCESS; } /* A bit of a sanity check to make sure blocks are being removed out of all the lists. */ remove_from_bg_list(bg_lists->booted, bg_record); if (remove_from_bg_list(bg_lists->job_running, bg_record) == SLURM_SUCCESS) num_unused_cpus += bg_record->cpu_cnt; if (restore) return SLURM_SUCCESS; if (remove_from_bg_list(bg_lists->main, bg_record) != SLURM_SUCCESS) { /* This should only happen if called from * bg_job_place.c where the block was never added to * the list. */ debug("_post_block_free: It appears this block %s isn't " "in the main list anymore.", bg_record->bg_block_id); } if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: removing %s from database", bg_record->bg_block_id); rc = bridge_block_remove(bg_record); if (rc != SLURM_SUCCESS) { if (rc == BG_ERROR_BLOCK_NOT_FOUND) { debug("_post_block_free: block %s is not found", bg_record->bg_block_id); } else { error("_post_block_free: " "bridge_block_remove(%s): %s", bg_record->bg_block_id, bg_err_str(rc)); } } else if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: done %s(%p)", bg_record->bg_block_id, bg_record); destroy_bg_record(bg_record); if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: destroyed"); return SLURM_SUCCESS; }