コード例 #1
0
ファイル: burst_buffer_generic.c プロジェクト: rohgarg/slurm
/* Return the burst buffer size requested by a job */
static uint64_t _get_bb_size(struct job_record *job_ptr)
{
	char *tok;
	uint64_t bb_size_u = 0;

	if (job_ptr->burst_buffer) {
		tok = strstr(job_ptr->burst_buffer, "size=");
		if (tok)
			bb_size_u = bb_get_size_num(tok + 5,
						bb_state.bb_config.granularity);
	}

	return bb_size_u;
}
コード例 #2
0
ファイル: burst_buffer_generic.c プロジェクト: rohgarg/slurm
/*
 * Preliminary validation of a job submit request with respect to burst buffer
 * options. Performed after setting default account + qos, but prior to
 * establishing job ID or creating script file.
 *
 * Returns a SLURM errno.
 */
extern int bb_p_job_validate(struct job_descriptor *job_desc,
			     uid_t submit_uid)
{
	int64_t bb_size = 0;
	char *key;
	int i;

	xassert(job_desc);
	xassert(job_desc->tres_req_cnt);

	if (bb_state.bb_config.debug_flag) {
		info("%s: %s: job_user_id:%u, submit_uid:%d",
		     plugin_type, __func__, job_desc->user_id, submit_uid);
		info("%s: burst_buffer:%s", __func__, job_desc->burst_buffer);
		info("%s: script:%s", __func__, job_desc->script);
	}

	if (job_desc->burst_buffer) {
		key = strstr(job_desc->burst_buffer, "size=");
		if (key) {
			bb_size = bb_get_size_num(key + 5,
					bb_state.bb_config.granularity);
		}
	}
	if (bb_size == 0)
		return SLURM_SUCCESS;
	if (bb_size < 0)
		return ESLURM_BURST_BUFFER_LIMIT;

	pthread_mutex_lock(&bb_state.bb_mutex);
	if (bb_state.bb_config.allow_users) {
		for (i = 0; bb_state.bb_config.allow_users[i]; i++) {
			if (job_desc->user_id ==
			    bb_state.bb_config.allow_users[i])
				break;
		}
		if (bb_state.bb_config.allow_users[i] == 0) {
			pthread_mutex_unlock(&bb_state.bb_mutex);
			return ESLURM_BURST_BUFFER_PERMISSION;
		}
	}

	if (bb_state.bb_config.deny_users) {
		for (i = 0; bb_state.bb_config.deny_users[i]; i++) {
			if (job_desc->user_id ==
			    bb_state.bb_config.deny_users[i])
				break;
		}
		if (bb_state.bb_config.deny_users[i] != 0) {
			pthread_mutex_unlock(&bb_state.bb_mutex);
			return ESLURM_BURST_BUFFER_PERMISSION;
		}
	}

	if (bb_size > bb_state.total_space) {
		info("Job from user %u requested burst buffer size of "
		     "%"PRIu64", but total space is only %"PRIu64"",
		     job_desc->user_id, bb_size, bb_state.total_space);
	}

	job_desc->tres_req_cnt[bb_state.tres_pos] = bb_size / (1024 * 1024);

	pthread_mutex_unlock(&bb_state.bb_mutex);

	return SLURM_SUCCESS;
}
コード例 #3
0
ファイル: burst_buffer_generic.c プロジェクト: rohgarg/slurm
/*
 * Determine the current actual burst buffer state.
 * Run the program "get_sys_state" and parse stdout for details.
 * job_id IN - specific job to get information about, or 0 for all jobs
 */
static void _load_state(uint32_t job_id)
{
	static uint64_t last_total_space = 0;
	char *save_ptr = NULL, *tok, *leftover = NULL, *resp, *tmp = NULL;
	char *script_args[4], job_id_str[32];
	s_p_hashtbl_t *state_hashtbl = NULL;
	static s_p_options_t state_options[] = {
		{"ENOENT", S_P_STRING},
		{"UserID", S_P_ARRAY, _parse_job_info, _destroy_job_info},
		{"TotalSize", S_P_STRING},
		{NULL}
	};
	int status = 0;
	DEF_TIMERS;

	if (!bb_state.bb_config.get_sys_state)
		return;

	bb_state.last_load_time = time(NULL);

	tok = strrchr(bb_state.bb_config.get_sys_state, '/');
	if (tok)
		script_args[0] = tok + 1;
	else
		script_args[0] = bb_state.bb_config.get_sys_state;
	if (job_id) {
		script_args[1] = "get_job";
		snprintf(job_id_str, sizeof(job_id_str), "%u", job_id);
		script_args[3] = NULL;
	} else {
		script_args[1] = "get_sys";
		script_args[2] = NULL;
	}
	START_TIMER;
	resp = bb_run_script("GetSysState", bb_state.bb_config.get_sys_state,
			     script_args, 2000, &status);
	if (resp == NULL)
		return;
	END_TIMER;
	if (DELTA_TIMER > 200000)	/* 0.2 secs */
		info("%s: GetSysState ran for %s", __func__, TIME_STR);
	else if (bb_state.bb_config.debug_flag)
		debug("%s: GetSysState ran for %s", __func__, TIME_STR);

	state_hashtbl = s_p_hashtbl_create(state_options);
	tok = strtok_r(resp, "\n", &save_ptr);
	while (tok) {
		s_p_parse_line(state_hashtbl, tok, &leftover);
		tok = strtok_r(NULL, "\n", &save_ptr);
	}
	if (s_p_get_string(&tmp, "TotalSize", state_hashtbl)) {
		bb_state.total_space = bb_get_size_num(tmp,
						bb_state.bb_config.granularity);
		xfree(tmp);
		if (bb_state.bb_config.debug_flag &&
		    (bb_state.total_space != last_total_space)) {
			info("%s: total_space:%"PRIu64"",  __func__,
			     bb_state.total_space);
		}
		last_total_space = bb_state.total_space;
	} else if (job_id == 0) {
		error("%s: GetSysState failed to respond with TotalSize",
		      plugin_type);
	}
	s_p_hashtbl_destroy(state_hashtbl);
	xfree(resp);
}
コード例 #4
0
ファイル: burst_buffer_generic.c プロジェクト: rohgarg/slurm
static int _parse_job_info(void **dest, slurm_parser_enum_t type,
			   const char *key, const char *value,
			   const char *line, char **leftover)
{
	s_p_hashtbl_t *job_tbl;
	char *name = NULL, *tmp = NULL, local_name[64] = "";
	uint64_t size = 0;
	uint32_t job_id = 0, user_id = 0;
	uint16_t state = 0;
	bb_alloc_t *bb_ptr;
	struct job_record *job_ptr = NULL;
	bb_job_t *bb_spec;
	static s_p_options_t _job_options[] = {
		{"JobID",S_P_STRING},
		{"Name", S_P_STRING},
		{"Size", S_P_STRING},
		{"State", S_P_STRING},
		{NULL}
	};

	*dest = NULL;
	user_id = strtol(value, NULL, 10);
	job_tbl = s_p_hashtbl_create(_job_options);
	s_p_parse_line(job_tbl, *leftover, leftover);
	if (s_p_get_string(&tmp, "JobID", job_tbl)) {
		job_id = strtol(tmp, NULL, 10);
		xfree(tmp);
	}
	if (s_p_get_string(&name, "Name", job_tbl)) {
		snprintf(local_name, sizeof(local_name), "%s", name);
		xfree(name);
	}
	if (s_p_get_string(&tmp, "Size", job_tbl)) {
		size =  bb_get_size_num(tmp, bb_state.bb_config.granularity);
		xfree(tmp);
	}
	if (s_p_get_string(&tmp, "State", job_tbl)) {
		state = bb_state_num(tmp);
		xfree(tmp);
	}
	s_p_hashtbl_destroy(job_tbl);

#if 0
	info("%s: JobID:%u Name:%s Size:%"PRIu64" State:%u UserID:%u",
	     __func__, job_id, local_name, size, state, user_id);
#endif
	if (job_id) {
		job_ptr = find_job_record(job_id);
		if (!job_ptr && (state == BB_STATE_STAGED_OUT)) {
			struct job_record job_rec;
			job_rec.job_id  = job_id;
			job_rec.user_id = user_id;
			bb_ptr = bb_find_alloc_rec(&bb_state, &job_rec);
			_stop_stage_out(job_id);	/* Purge buffer */
			if (bb_ptr) {
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
			} else {
				/* Slurm knows nothing about this job,
				 * may be result of slurmctld cold start */
				error("%s: Vestigial buffer for purged job %u",
				      plugin_type, job_id);
			}
			return SLURM_SUCCESS;
		} else if (!job_ptr &&
			   ((state == BB_STATE_STAGING_IN) ||
			    (state == BB_STATE_STAGED_IN))) {
			struct job_record job_rec;
			job_rec.job_id  = job_id;
			job_rec.user_id = user_id;
			bb_ptr = bb_find_alloc_rec(&bb_state, &job_rec);
			_stop_stage_in(job_id);		/* Purge buffer */
			if (bb_ptr) {
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
			} else {
				/* Slurm knows nothing about this job,
				 * may be result of slurmctld cold start */
				error("%s: Vestigial buffer for purged job %u",
				      plugin_type, job_id);
			}
			return SLURM_SUCCESS;
		} else if (!job_ptr) {
			error("%s: Vestigial buffer for job ID %u. "
			      "Clear manually",
			      plugin_type, job_id);
		}
		snprintf(local_name, sizeof(local_name), "VestigialJob%u",
			 job_id);
	}
	if (job_ptr) {
		bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr);
		if (bb_ptr == NULL) {
			bb_spec = xmalloc(sizeof(bb_job_t));
			bb_spec->total_size = _get_bb_size(job_ptr);
			bb_ptr = bb_alloc_job_rec(&bb_state, job_ptr, bb_spec);
			xfree(bb_spec);
			bb_ptr->state = state;
			/* bb_ptr->state_time set in bb_alloc_job_rec() */
		}
	} else {
		if ((bb_ptr = _find_bb_name_rec(local_name, user_id)) == NULL) {
			bb_ptr = bb_alloc_name_rec(&bb_state, local_name,
						   user_id);
			bb_ptr->size = size;
			bb_ptr->state = state;
//FIXME: VESTIGIAL: Use bb_limit_add
//			bb_add_user_load(bb_ptr, &bb_state);
			return SLURM_SUCCESS;
		}
	}
	bb_ptr->seen_time = time(NULL); /* used to purge defunct recs */

	/* UserID set to 0 on some failure modes */
	if ((bb_ptr->user_id != user_id) && (user_id != 0)) {
		error("%s: User ID mismatch (%u != %u). "
		      "BB UserID=%u JobID=%u Name=%s",
		      plugin_type, bb_ptr->user_id, user_id,
		      bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name);
	}
	if ((bb_ptr->state == BB_STATE_RUNNING) &&
	    (state == BB_STATE_STAGED_IN))
		state = BB_STATE_RUNNING;	/* More precise state info */
	if (bb_ptr->state != state) {
		/* State is subject to real-time changes */
		debug("%s: State changed (%s to %s). "
		      "BB UserID=%u JobID=%u Name=%s",
		      plugin_type, bb_state_string(bb_ptr->state),
		      bb_state_string(state),
		      bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name);
		bb_ptr->state = state;
		bb_ptr->state_time = time(NULL);
		if (bb_ptr->state == BB_STATE_STAGED_OUT) {
			if (bb_ptr->size != 0) {
//FIXME: VESTIGIAL: Use bb_limit_rem
//				bb_remove_user_load(bb_ptr, &bb_state);
				bb_ptr->size = 0;
			}
		}
		if (bb_ptr->state == BB_STATE_STAGED_IN)
			queue_job_scheduler();
	}
	if ((bb_ptr->state != BB_STATE_STAGED_OUT) && (bb_ptr->size != size)) {
//FIXME: VESTIGIAL: Use bb_limit_rem
//		bb_remove_user_load(bb_ptr, &bb_state);
		if (size != 0) {
			error("%s: Size mismatch (%"PRIu64" != %"PRIu64"). "
			      "BB UserID=%u JobID=%u Name=%s",
			      plugin_type, bb_ptr->size, size,
			      bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name);
		}
		bb_ptr->size = MAX(bb_ptr->size, size);
//FIXME: VESTIGIAL: Use bb_limit_add
//		bb_add_user_load(bb_ptr, &bb_state);
	}

	return SLURM_SUCCESS;
}
コード例 #5
0
/* Load and process configuration parameters */
extern void bb_load_config(bb_state_t *state_ptr, char *plugin_type)
{
	s_p_hashtbl_t *bb_hashtbl = NULL;
	char *bb_conf, *tmp = NULL, *value;
#if _SUPPORT_ALT_POOL
	char *colon, *save_ptr = NULL, *tok;
	uint32_t pool_cnt;
#endif
	int fd, i;
	static s_p_options_t bb_options[] = {
		{"AllowUsers", S_P_STRING},
#if _SUPPORT_ALT_POOL
		{"AltPool", S_P_STRING},
#endif
		{"CreateBuffer", S_P_STRING},
		{"DefaultPool", S_P_STRING},
		{"DenyUsers", S_P_STRING},
		{"DestroyBuffer", S_P_STRING},
		{"Flags", S_P_STRING},
		{"GetSysState", S_P_STRING},
		{"Granularity", S_P_STRING},
		{"OtherTimeout", S_P_UINT32},
		{"StageInTimeout", S_P_UINT32},
		{"StageOutTimeout", S_P_UINT32},
		{"StartStageIn", S_P_STRING},
		{"StartStageOut", S_P_STRING},
		{"StopStageIn", S_P_STRING},
		{"StopStageOut", S_P_STRING},
		{"ValidateTimeout", S_P_UINT32},
		{NULL}
	};

	xfree(state_ptr->name);
	if (plugin_type) {
		tmp = strchr(plugin_type, '/');
		if (tmp)
			tmp++;
		else
			tmp = plugin_type;
		state_ptr->name = xstrdup(tmp);
	}

	/* Set default configuration */
	bb_clear_config(&state_ptr->bb_config, false);
	if (slurm_get_debug_flags() & DEBUG_FLAG_BURST_BUF)
		state_ptr->bb_config.debug_flag = true;
	state_ptr->bb_config.flags |= BB_FLAG_DISABLE_PERSISTENT;
	state_ptr->bb_config.other_timeout = DEFAULT_OTHER_TIMEOUT;
	state_ptr->bb_config.stage_in_timeout = DEFAULT_STATE_IN_TIMEOUT;
	state_ptr->bb_config.stage_out_timeout = DEFAULT_STATE_OUT_TIMEOUT;
	state_ptr->bb_config.validate_timeout = DEFAULT_VALIDATE_TIMEOUT;

	/* First look for "burst_buffer.conf" then with "type" field,
	 * for example "burst_buffer_cray.conf" */
	bb_conf = get_extra_conf_path("burst_buffer.conf");
	fd = open(bb_conf, 0);
	if (fd >= 0) {
		close(fd);
	} else {
		char *new_path = NULL;
		xfree(bb_conf);
		xstrfmtcat(new_path, "burst_buffer_%s.conf", state_ptr->name);
		bb_conf = get_extra_conf_path(new_path);
		fd = open(bb_conf, 0);
		if (fd < 0) {
			info("%s: Unable to find configuration file %s or "
			     "burst_buffer.conf", __func__, new_path);
			xfree(bb_conf);
			xfree(new_path);
			return;
		}
		close(fd);
		xfree(new_path);
	}

	bb_hashtbl = s_p_hashtbl_create(bb_options);
	if (s_p_parse_file(bb_hashtbl, NULL, bb_conf, false) == SLURM_ERROR) {
		fatal("%s: something wrong with opening/reading %s: %m",
		      __func__, bb_conf);
	}
	if (s_p_get_string(&state_ptr->bb_config.allow_users_str, "AllowUsers",
			   bb_hashtbl)) {
		state_ptr->bb_config.allow_users = _parse_users(
					state_ptr->bb_config.allow_users_str);
	}
	s_p_get_string(&state_ptr->bb_config.create_buffer, "CreateBuffer",
		       bb_hashtbl);
	s_p_get_string(&state_ptr->bb_config.default_pool, "DefaultPool",
		       bb_hashtbl);
	if (s_p_get_string(&state_ptr->bb_config.deny_users_str, "DenyUsers",
			   bb_hashtbl)) {
		state_ptr->bb_config.deny_users = _parse_users(
					state_ptr->bb_config.deny_users_str);
	}
	s_p_get_string(&state_ptr->bb_config.destroy_buffer, "DestroyBuffer",
		       bb_hashtbl);

	if (s_p_get_string(&tmp, "Flags", bb_hashtbl)) {
		state_ptr->bb_config.flags = slurm_bb_str2flags(tmp);
		xfree(tmp);
	}
	/* By default, disable persistent buffer creation by normal users */
	if (state_ptr->bb_config.flags & BB_FLAG_ENABLE_PERSISTENT)
		state_ptr->bb_config.flags &= (~BB_FLAG_DISABLE_PERSISTENT);

	s_p_get_string(&state_ptr->bb_config.get_sys_state, "GetSysState",
		       bb_hashtbl);
	if (s_p_get_string(&tmp, "Granularity", bb_hashtbl)) {
		state_ptr->bb_config.granularity = bb_get_size_num(tmp, 1);
		xfree(tmp);
		if (state_ptr->bb_config.granularity == 0) {
			error("%s: Granularity=0 is invalid", __func__);
			state_ptr->bb_config.granularity = 1;
		}
	}
#if _SUPPORT_ALT_POOL
	if (s_p_get_string(&tmp, "AltPool", bb_hashtbl)) {
		tok = strtok_r(tmp, ",", &save_ptr);
		while (tok) {
			colon = strchr(tok, ':');
			if (colon) {
				colon[0] = '\0';
				pool_cnt = _atoi(colon + 1);
			} else
				pool_cnt = 1;
			state_ptr->bb_config.pool_ptr = xrealloc(
				state_ptr->bb_config.pool_ptr,
				sizeof(burst_buffer_pool_t) *
				(state_ptr->bb_config.pool_cnt + 1));
			state_ptr->bb_config.
				pool_ptr[state_ptr->bb_config.pool_cnt].name =
				xstrdup(tok);
			state_ptr->bb_config.
				pool_ptr[state_ptr->bb_config.pool_cnt].
				avail_space = pool_cnt;
			state_ptr->bb_config.pool_cnt++;
			tok = strtok_r(NULL, ",", &save_ptr);
		}
		xfree(tmp);
	}
#endif

	(void) s_p_get_uint32(&state_ptr->bb_config.other_timeout,
			     "OtherTimeout", bb_hashtbl);
	(void) s_p_get_uint32(&state_ptr->bb_config.stage_in_timeout,
			    "StageInTimeout", bb_hashtbl);
	(void) s_p_get_uint32(&state_ptr->bb_config.stage_out_timeout,
			    "StageOutTimeout", bb_hashtbl);
	s_p_get_string(&state_ptr->bb_config.start_stage_in, "StartStageIn",
		       bb_hashtbl);
	s_p_get_string(&state_ptr->bb_config.start_stage_out, "StartStageOut",
			    bb_hashtbl);
	s_p_get_string(&state_ptr->bb_config.stop_stage_in, "StopStageIn",
		       bb_hashtbl);
	s_p_get_string(&state_ptr->bb_config.stop_stage_out, "StopStageOut",
		       bb_hashtbl);
	(void) s_p_get_uint32(&state_ptr->bb_config.validate_timeout,
			     "ValidateTimeout", bb_hashtbl);

	s_p_hashtbl_destroy(bb_hashtbl);
	xfree(bb_conf);

	if (state_ptr->bb_config.debug_flag) {
		value = _print_users(state_ptr->bb_config.allow_users);
		info("%s: AllowUsers:%s",  __func__, value);
		xfree(value);
		info("%s: CreateBuffer:%s",  __func__,
		     state_ptr->bb_config.create_buffer);
		info("%s: DefaultPool:%s",  __func__,
		     state_ptr->bb_config.default_pool);
		value = _print_users(state_ptr->bb_config.deny_users);
		info("%s: DenyUsers:%s",  __func__, value);
		xfree(value);
		info("%s: DestroyBuffer:%s",  __func__,
		     state_ptr->bb_config.destroy_buffer);
		info("%s: GetSysState:%s",  __func__,
		     state_ptr->bb_config.get_sys_state);
		info("%s: Granularity:%"PRIu64"",  __func__,
		     state_ptr->bb_config.granularity);
		for (i = 0; i < state_ptr->bb_config.pool_cnt; i++) {
			info("%s: AltPoolName[%d]:%s:%"PRIu64"", __func__, i,
			     state_ptr->bb_config.pool_ptr[i].name,
			     state_ptr->bb_config.pool_ptr[i].total_space);
		}
		info("%s: OtherTimeout:%u", __func__,
		     state_ptr->bb_config.other_timeout);
		info("%s: StageInTimeout:%u", __func__,
		     state_ptr->bb_config.stage_in_timeout);
		info("%s: StageOutTimeout:%u", __func__,
		     state_ptr->bb_config.stage_out_timeout);
		info("%s: StartStageIn:%s",  __func__,
		     state_ptr->bb_config.start_stage_in);
		info("%s: StartStageOut:%s",  __func__,
		     state_ptr->bb_config.start_stage_out);
		info("%s: StopStageIn:%s",  __func__,
		     state_ptr->bb_config.stop_stage_in);
		info("%s: StopStageOut:%s",  __func__,
		     state_ptr->bb_config.stop_stage_out);
		info("%s: ValidateTimeout:%u", __func__,
		     state_ptr->bb_config.validate_timeout);
	}
}