/* Return the burst buffer size requested by a job */ static uint64_t _get_bb_size(struct job_record *job_ptr) { char *tok; uint64_t bb_size_u = 0; if (job_ptr->burst_buffer) { tok = strstr(job_ptr->burst_buffer, "size="); if (tok) bb_size_u = bb_get_size_num(tok + 5, bb_state.bb_config.granularity); } return bb_size_u; }
/* * Preliminary validation of a job submit request with respect to burst buffer * options. Performed after setting default account + qos, but prior to * establishing job ID or creating script file. * * Returns a SLURM errno. */ extern int bb_p_job_validate(struct job_descriptor *job_desc, uid_t submit_uid) { int64_t bb_size = 0; char *key; int i; xassert(job_desc); xassert(job_desc->tres_req_cnt); if (bb_state.bb_config.debug_flag) { info("%s: %s: job_user_id:%u, submit_uid:%d", plugin_type, __func__, job_desc->user_id, submit_uid); info("%s: burst_buffer:%s", __func__, job_desc->burst_buffer); info("%s: script:%s", __func__, job_desc->script); } if (job_desc->burst_buffer) { key = strstr(job_desc->burst_buffer, "size="); if (key) { bb_size = bb_get_size_num(key + 5, bb_state.bb_config.granularity); } } if (bb_size == 0) return SLURM_SUCCESS; if (bb_size < 0) return ESLURM_BURST_BUFFER_LIMIT; pthread_mutex_lock(&bb_state.bb_mutex); if (bb_state.bb_config.allow_users) { for (i = 0; bb_state.bb_config.allow_users[i]; i++) { if (job_desc->user_id == bb_state.bb_config.allow_users[i]) break; } if (bb_state.bb_config.allow_users[i] == 0) { pthread_mutex_unlock(&bb_state.bb_mutex); return ESLURM_BURST_BUFFER_PERMISSION; } } if (bb_state.bb_config.deny_users) { for (i = 0; bb_state.bb_config.deny_users[i]; i++) { if (job_desc->user_id == bb_state.bb_config.deny_users[i]) break; } if (bb_state.bb_config.deny_users[i] != 0) { pthread_mutex_unlock(&bb_state.bb_mutex); return ESLURM_BURST_BUFFER_PERMISSION; } } if (bb_size > bb_state.total_space) { info("Job from user %u requested burst buffer size of " "%"PRIu64", but total space is only %"PRIu64"", job_desc->user_id, bb_size, bb_state.total_space); } job_desc->tres_req_cnt[bb_state.tres_pos] = bb_size / (1024 * 1024); pthread_mutex_unlock(&bb_state.bb_mutex); return SLURM_SUCCESS; }
/* * Determine the current actual burst buffer state. * Run the program "get_sys_state" and parse stdout for details. * job_id IN - specific job to get information about, or 0 for all jobs */ static void _load_state(uint32_t job_id) { static uint64_t last_total_space = 0; char *save_ptr = NULL, *tok, *leftover = NULL, *resp, *tmp = NULL; char *script_args[4], job_id_str[32]; s_p_hashtbl_t *state_hashtbl = NULL; static s_p_options_t state_options[] = { {"ENOENT", S_P_STRING}, {"UserID", S_P_ARRAY, _parse_job_info, _destroy_job_info}, {"TotalSize", S_P_STRING}, {NULL} }; int status = 0; DEF_TIMERS; if (!bb_state.bb_config.get_sys_state) return; bb_state.last_load_time = time(NULL); tok = strrchr(bb_state.bb_config.get_sys_state, '/'); if (tok) script_args[0] = tok + 1; else script_args[0] = bb_state.bb_config.get_sys_state; if (job_id) { script_args[1] = "get_job"; snprintf(job_id_str, sizeof(job_id_str), "%u", job_id); script_args[3] = NULL; } else { script_args[1] = "get_sys"; script_args[2] = NULL; } START_TIMER; resp = bb_run_script("GetSysState", bb_state.bb_config.get_sys_state, script_args, 2000, &status); if (resp == NULL) return; END_TIMER; if (DELTA_TIMER > 200000) /* 0.2 secs */ info("%s: GetSysState ran for %s", __func__, TIME_STR); else if (bb_state.bb_config.debug_flag) debug("%s: GetSysState ran for %s", __func__, TIME_STR); state_hashtbl = s_p_hashtbl_create(state_options); tok = strtok_r(resp, "\n", &save_ptr); while (tok) { s_p_parse_line(state_hashtbl, tok, &leftover); tok = strtok_r(NULL, "\n", &save_ptr); } if (s_p_get_string(&tmp, "TotalSize", state_hashtbl)) { bb_state.total_space = bb_get_size_num(tmp, bb_state.bb_config.granularity); xfree(tmp); if (bb_state.bb_config.debug_flag && (bb_state.total_space != last_total_space)) { info("%s: total_space:%"PRIu64"", __func__, bb_state.total_space); } last_total_space = bb_state.total_space; } else if (job_id == 0) { error("%s: GetSysState failed to respond with TotalSize", plugin_type); } s_p_hashtbl_destroy(state_hashtbl); xfree(resp); }
static int _parse_job_info(void **dest, slurm_parser_enum_t type, const char *key, const char *value, const char *line, char **leftover) { s_p_hashtbl_t *job_tbl; char *name = NULL, *tmp = NULL, local_name[64] = ""; uint64_t size = 0; uint32_t job_id = 0, user_id = 0; uint16_t state = 0; bb_alloc_t *bb_ptr; struct job_record *job_ptr = NULL; bb_job_t *bb_spec; static s_p_options_t _job_options[] = { {"JobID",S_P_STRING}, {"Name", S_P_STRING}, {"Size", S_P_STRING}, {"State", S_P_STRING}, {NULL} }; *dest = NULL; user_id = strtol(value, NULL, 10); job_tbl = s_p_hashtbl_create(_job_options); s_p_parse_line(job_tbl, *leftover, leftover); if (s_p_get_string(&tmp, "JobID", job_tbl)) { job_id = strtol(tmp, NULL, 10); xfree(tmp); } if (s_p_get_string(&name, "Name", job_tbl)) { snprintf(local_name, sizeof(local_name), "%s", name); xfree(name); } if (s_p_get_string(&tmp, "Size", job_tbl)) { size = bb_get_size_num(tmp, bb_state.bb_config.granularity); xfree(tmp); } if (s_p_get_string(&tmp, "State", job_tbl)) { state = bb_state_num(tmp); xfree(tmp); } s_p_hashtbl_destroy(job_tbl); #if 0 info("%s: JobID:%u Name:%s Size:%"PRIu64" State:%u UserID:%u", __func__, job_id, local_name, size, state, user_id); #endif if (job_id) { job_ptr = find_job_record(job_id); if (!job_ptr && (state == BB_STATE_STAGED_OUT)) { struct job_record job_rec; job_rec.job_id = job_id; job_rec.user_id = user_id; bb_ptr = bb_find_alloc_rec(&bb_state, &job_rec); _stop_stage_out(job_id); /* Purge buffer */ if (bb_ptr) { bb_ptr->cancelled = true; bb_ptr->end_time = 0; } else { /* Slurm knows nothing about this job, * may be result of slurmctld cold start */ error("%s: Vestigial buffer for purged job %u", plugin_type, job_id); } return SLURM_SUCCESS; } else if (!job_ptr && ((state == BB_STATE_STAGING_IN) || (state == BB_STATE_STAGED_IN))) { struct job_record job_rec; job_rec.job_id = job_id; job_rec.user_id = user_id; bb_ptr = bb_find_alloc_rec(&bb_state, &job_rec); _stop_stage_in(job_id); /* Purge buffer */ if (bb_ptr) { bb_ptr->cancelled = true; bb_ptr->end_time = 0; } else { /* Slurm knows nothing about this job, * may be result of slurmctld cold start */ error("%s: Vestigial buffer for purged job %u", plugin_type, job_id); } return SLURM_SUCCESS; } else if (!job_ptr) { error("%s: Vestigial buffer for job ID %u. " "Clear manually", plugin_type, job_id); } snprintf(local_name, sizeof(local_name), "VestigialJob%u", job_id); } if (job_ptr) { bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr); if (bb_ptr == NULL) { bb_spec = xmalloc(sizeof(bb_job_t)); bb_spec->total_size = _get_bb_size(job_ptr); bb_ptr = bb_alloc_job_rec(&bb_state, job_ptr, bb_spec); xfree(bb_spec); bb_ptr->state = state; /* bb_ptr->state_time set in bb_alloc_job_rec() */ } } else { if ((bb_ptr = _find_bb_name_rec(local_name, user_id)) == NULL) { bb_ptr = bb_alloc_name_rec(&bb_state, local_name, user_id); bb_ptr->size = size; bb_ptr->state = state; //FIXME: VESTIGIAL: Use bb_limit_add // bb_add_user_load(bb_ptr, &bb_state); return SLURM_SUCCESS; } } bb_ptr->seen_time = time(NULL); /* used to purge defunct recs */ /* UserID set to 0 on some failure modes */ if ((bb_ptr->user_id != user_id) && (user_id != 0)) { error("%s: User ID mismatch (%u != %u). " "BB UserID=%u JobID=%u Name=%s", plugin_type, bb_ptr->user_id, user_id, bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name); } if ((bb_ptr->state == BB_STATE_RUNNING) && (state == BB_STATE_STAGED_IN)) state = BB_STATE_RUNNING; /* More precise state info */ if (bb_ptr->state != state) { /* State is subject to real-time changes */ debug("%s: State changed (%s to %s). " "BB UserID=%u JobID=%u Name=%s", plugin_type, bb_state_string(bb_ptr->state), bb_state_string(state), bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name); bb_ptr->state = state; bb_ptr->state_time = time(NULL); if (bb_ptr->state == BB_STATE_STAGED_OUT) { if (bb_ptr->size != 0) { //FIXME: VESTIGIAL: Use bb_limit_rem // bb_remove_user_load(bb_ptr, &bb_state); bb_ptr->size = 0; } } if (bb_ptr->state == BB_STATE_STAGED_IN) queue_job_scheduler(); } if ((bb_ptr->state != BB_STATE_STAGED_OUT) && (bb_ptr->size != size)) { //FIXME: VESTIGIAL: Use bb_limit_rem // bb_remove_user_load(bb_ptr, &bb_state); if (size != 0) { error("%s: Size mismatch (%"PRIu64" != %"PRIu64"). " "BB UserID=%u JobID=%u Name=%s", plugin_type, bb_ptr->size, size, bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name); } bb_ptr->size = MAX(bb_ptr->size, size); //FIXME: VESTIGIAL: Use bb_limit_add // bb_add_user_load(bb_ptr, &bb_state); } return SLURM_SUCCESS; }
/* Load and process configuration parameters */ extern void bb_load_config(bb_state_t *state_ptr, char *plugin_type) { s_p_hashtbl_t *bb_hashtbl = NULL; char *bb_conf, *tmp = NULL, *value; #if _SUPPORT_ALT_POOL char *colon, *save_ptr = NULL, *tok; uint32_t pool_cnt; #endif int fd, i; static s_p_options_t bb_options[] = { {"AllowUsers", S_P_STRING}, #if _SUPPORT_ALT_POOL {"AltPool", S_P_STRING}, #endif {"CreateBuffer", S_P_STRING}, {"DefaultPool", S_P_STRING}, {"DenyUsers", S_P_STRING}, {"DestroyBuffer", S_P_STRING}, {"Flags", S_P_STRING}, {"GetSysState", S_P_STRING}, {"Granularity", S_P_STRING}, {"OtherTimeout", S_P_UINT32}, {"StageInTimeout", S_P_UINT32}, {"StageOutTimeout", S_P_UINT32}, {"StartStageIn", S_P_STRING}, {"StartStageOut", S_P_STRING}, {"StopStageIn", S_P_STRING}, {"StopStageOut", S_P_STRING}, {"ValidateTimeout", S_P_UINT32}, {NULL} }; xfree(state_ptr->name); if (plugin_type) { tmp = strchr(plugin_type, '/'); if (tmp) tmp++; else tmp = plugin_type; state_ptr->name = xstrdup(tmp); } /* Set default configuration */ bb_clear_config(&state_ptr->bb_config, false); if (slurm_get_debug_flags() & DEBUG_FLAG_BURST_BUF) state_ptr->bb_config.debug_flag = true; state_ptr->bb_config.flags |= BB_FLAG_DISABLE_PERSISTENT; state_ptr->bb_config.other_timeout = DEFAULT_OTHER_TIMEOUT; state_ptr->bb_config.stage_in_timeout = DEFAULT_STATE_IN_TIMEOUT; state_ptr->bb_config.stage_out_timeout = DEFAULT_STATE_OUT_TIMEOUT; state_ptr->bb_config.validate_timeout = DEFAULT_VALIDATE_TIMEOUT; /* First look for "burst_buffer.conf" then with "type" field, * for example "burst_buffer_cray.conf" */ bb_conf = get_extra_conf_path("burst_buffer.conf"); fd = open(bb_conf, 0); if (fd >= 0) { close(fd); } else { char *new_path = NULL; xfree(bb_conf); xstrfmtcat(new_path, "burst_buffer_%s.conf", state_ptr->name); bb_conf = get_extra_conf_path(new_path); fd = open(bb_conf, 0); if (fd < 0) { info("%s: Unable to find configuration file %s or " "burst_buffer.conf", __func__, new_path); xfree(bb_conf); xfree(new_path); return; } close(fd); xfree(new_path); } bb_hashtbl = s_p_hashtbl_create(bb_options); if (s_p_parse_file(bb_hashtbl, NULL, bb_conf, false) == SLURM_ERROR) { fatal("%s: something wrong with opening/reading %s: %m", __func__, bb_conf); } if (s_p_get_string(&state_ptr->bb_config.allow_users_str, "AllowUsers", bb_hashtbl)) { state_ptr->bb_config.allow_users = _parse_users( state_ptr->bb_config.allow_users_str); } s_p_get_string(&state_ptr->bb_config.create_buffer, "CreateBuffer", bb_hashtbl); s_p_get_string(&state_ptr->bb_config.default_pool, "DefaultPool", bb_hashtbl); if (s_p_get_string(&state_ptr->bb_config.deny_users_str, "DenyUsers", bb_hashtbl)) { state_ptr->bb_config.deny_users = _parse_users( state_ptr->bb_config.deny_users_str); } s_p_get_string(&state_ptr->bb_config.destroy_buffer, "DestroyBuffer", bb_hashtbl); if (s_p_get_string(&tmp, "Flags", bb_hashtbl)) { state_ptr->bb_config.flags = slurm_bb_str2flags(tmp); xfree(tmp); } /* By default, disable persistent buffer creation by normal users */ if (state_ptr->bb_config.flags & BB_FLAG_ENABLE_PERSISTENT) state_ptr->bb_config.flags &= (~BB_FLAG_DISABLE_PERSISTENT); s_p_get_string(&state_ptr->bb_config.get_sys_state, "GetSysState", bb_hashtbl); if (s_p_get_string(&tmp, "Granularity", bb_hashtbl)) { state_ptr->bb_config.granularity = bb_get_size_num(tmp, 1); xfree(tmp); if (state_ptr->bb_config.granularity == 0) { error("%s: Granularity=0 is invalid", __func__); state_ptr->bb_config.granularity = 1; } } #if _SUPPORT_ALT_POOL if (s_p_get_string(&tmp, "AltPool", bb_hashtbl)) { tok = strtok_r(tmp, ",", &save_ptr); while (tok) { colon = strchr(tok, ':'); if (colon) { colon[0] = '\0'; pool_cnt = _atoi(colon + 1); } else pool_cnt = 1; state_ptr->bb_config.pool_ptr = xrealloc( state_ptr->bb_config.pool_ptr, sizeof(burst_buffer_pool_t) * (state_ptr->bb_config.pool_cnt + 1)); state_ptr->bb_config. pool_ptr[state_ptr->bb_config.pool_cnt].name = xstrdup(tok); state_ptr->bb_config. pool_ptr[state_ptr->bb_config.pool_cnt]. avail_space = pool_cnt; state_ptr->bb_config.pool_cnt++; tok = strtok_r(NULL, ",", &save_ptr); } xfree(tmp); } #endif (void) s_p_get_uint32(&state_ptr->bb_config.other_timeout, "OtherTimeout", bb_hashtbl); (void) s_p_get_uint32(&state_ptr->bb_config.stage_in_timeout, "StageInTimeout", bb_hashtbl); (void) s_p_get_uint32(&state_ptr->bb_config.stage_out_timeout, "StageOutTimeout", bb_hashtbl); s_p_get_string(&state_ptr->bb_config.start_stage_in, "StartStageIn", bb_hashtbl); s_p_get_string(&state_ptr->bb_config.start_stage_out, "StartStageOut", bb_hashtbl); s_p_get_string(&state_ptr->bb_config.stop_stage_in, "StopStageIn", bb_hashtbl); s_p_get_string(&state_ptr->bb_config.stop_stage_out, "StopStageOut", bb_hashtbl); (void) s_p_get_uint32(&state_ptr->bb_config.validate_timeout, "ValidateTimeout", bb_hashtbl); s_p_hashtbl_destroy(bb_hashtbl); xfree(bb_conf); if (state_ptr->bb_config.debug_flag) { value = _print_users(state_ptr->bb_config.allow_users); info("%s: AllowUsers:%s", __func__, value); xfree(value); info("%s: CreateBuffer:%s", __func__, state_ptr->bb_config.create_buffer); info("%s: DefaultPool:%s", __func__, state_ptr->bb_config.default_pool); value = _print_users(state_ptr->bb_config.deny_users); info("%s: DenyUsers:%s", __func__, value); xfree(value); info("%s: DestroyBuffer:%s", __func__, state_ptr->bb_config.destroy_buffer); info("%s: GetSysState:%s", __func__, state_ptr->bb_config.get_sys_state); info("%s: Granularity:%"PRIu64"", __func__, state_ptr->bb_config.granularity); for (i = 0; i < state_ptr->bb_config.pool_cnt; i++) { info("%s: AltPoolName[%d]:%s:%"PRIu64"", __func__, i, state_ptr->bb_config.pool_ptr[i].name, state_ptr->bb_config.pool_ptr[i].total_space); } info("%s: OtherTimeout:%u", __func__, state_ptr->bb_config.other_timeout); info("%s: StageInTimeout:%u", __func__, state_ptr->bb_config.stage_in_timeout); info("%s: StageOutTimeout:%u", __func__, state_ptr->bb_config.stage_out_timeout); info("%s: StartStageIn:%s", __func__, state_ptr->bb_config.start_stage_in); info("%s: StartStageOut:%s", __func__, state_ptr->bb_config.start_stage_out); info("%s: StopStageIn:%s", __func__, state_ptr->bb_config.stop_stage_in); info("%s: StopStageOut:%s", __func__, state_ptr->bb_config.stop_stage_out); info("%s: ValidateTimeout:%u", __func__, state_ptr->bb_config.validate_timeout); } }