示例#1
0
/*
 * Trigger a job's burst buffer stage-out to begin
 *
 * Returns a SLURM errno.
 */
extern int bb_p_job_start_stage_out(struct job_record *job_ptr)
{
//FIXME: How to handle various job terminate states (e.g. requeue, failure), user script controlled?
//FIXME: Test for memory leaks
	bb_alloc_t *bb_ptr;
	char **script_argv, *resp;
	int i, status = 0;
	char jobid_buf[32];

	if (bb_state.bb_config.debug_flag) {
		info("%s: %s: %s", plugin_type, __func__,
		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	}

	if (!bb_state.bb_config.start_stage_out)
		return SLURM_ERROR;

	if ((job_ptr->burst_buffer == NULL) ||
	    (job_ptr->burst_buffer[0] == '\0') ||
	    (_get_bb_size(job_ptr) == 0))
		return SLURM_SUCCESS;

	pthread_mutex_lock(&bb_state.bb_mutex);
	bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr);
	if (!bb_ptr) {
		/* No job buffers. Assuming use of persistent buffers only */
		debug("%s: %s bb_rec not found", __func__,
		      jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	} else {
		script_argv = _build_stage_args(bb_state.bb_config.start_stage_out,
						"start_stage_out", job_ptr,
						bb_ptr->size);
		if (script_argv) {
			bb_ptr->state = BB_STATE_STAGING_OUT;
			bb_ptr->state_time = time(NULL);
			resp = bb_run_script("StartStageOut",
					     bb_state.bb_config.start_stage_out,
					     script_argv, -1, &status);
			if (resp) {
				error("%s: StartStageOut: %s", __func__, resp);
				xfree(resp);
			}
			for (i = 0; script_argv[i]; i++)
				xfree(script_argv[i]);
			xfree(script_argv);
		} else {
			bb_ptr->state = BB_STATE_STAGED_OUT;
			bb_ptr->state_time = time(NULL);
		}
	}
	pthread_mutex_unlock(&bb_state.bb_mutex);

	return SLURM_SUCCESS;
}
示例#2
0
/*
 * Terminate any file staging and completely release burst buffer resources
 *
 * Returns a SLURM errno.
 */
extern int bb_p_job_cancel(struct job_record *job_ptr)
{
	bb_alloc_t *bb_ptr;
	char **script_argv, *resp;
	int i, status = 0;
	char jobid_buf[32];

	if (bb_state.bb_config.debug_flag) {
		info("%s: %s: %s", plugin_type, __func__,
		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	}

	if (!bb_state.bb_config.stop_stage_out)
		return SLURM_ERROR;

	if ((job_ptr->burst_buffer == NULL) ||
	    (job_ptr->burst_buffer[0] == '\0') ||
	    (_get_bb_size(job_ptr) == 0))
		return SLURM_SUCCESS;

	pthread_mutex_lock(&bb_state.bb_mutex);
	bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr);
	if (!bb_ptr) {
		_stop_stage_out(job_ptr->job_id);
	} else {
		script_argv = _build_stage_args(bb_state.bb_config.stop_stage_out,
						"stop_stage_out", job_ptr, 0);
		if (script_argv) {
			bb_ptr->state = BB_STATE_STAGED_OUT;
			bb_ptr->state_time = time(NULL);
			resp = bb_run_script("StopStageOut",
					     bb_state.bb_config.stop_stage_out,
					     script_argv, -1, &status);
			if (resp) {
				error("%s: StopStageOut: %s", __func__, resp);
				xfree(resp);
			}
			for (i = 0; script_argv[i]; i++)
				xfree(script_argv[i]);
			xfree(script_argv);
		} else {
			_stop_stage_out(job_ptr->job_id);
			bb_ptr->cancelled = true;
			bb_ptr->end_time = 0;
			bb_ptr->state = BB_STATE_STAGED_OUT;
			bb_ptr->state_time = time(NULL);
		}
	}
	pthread_mutex_unlock(&bb_state.bb_mutex);

	return SLURM_SUCCESS;
}
示例#3
0
/*
 * Trigger a job's burst buffer stage-out to begin
 *
 * Returns a SLURM errno.
 */
extern int bb_p_job_start_stage_out(struct job_record *job_ptr)
{
	bb_alloc_t *bb_ptr;
	char **script_argv, *resp;
	int i;

	if (debug_flag) {
		info("%s: %s",  __func__, plugin_type);
		info("%s: job_id:%u", __func__, job_ptr->job_id);
	}
	if ((job_ptr->burst_buffer == NULL) ||
	    (job_ptr->burst_buffer[0] == '\0') ||
	    (_get_bb_size(job_ptr) == 0))
		return SLURM_SUCCESS;

	pthread_mutex_lock(&bb_mutex);
	bb_ptr = _find_bb_job_rec(job_ptr);
	if (!bb_ptr) {
		/* No job buffers. Assuming use of persistent buffers only */
		debug("%s: job_id:%u bb_rec not found",
		      __func__, job_ptr->job_id);
	} else {
		script_argv = _build_stage_args(start_stage_out, "stage_out",
						job_ptr);
		if (script_argv) {
			bb_ptr->state = BB_STATE_STAGING_OUT;
			resp = _run_script("StartStageOut", start_stage_out,
					   script_argv, -1);
			if (resp) {
				error("%s: StartStageOut: %s", __func__, resp);
				xfree(resp);
			}
			for (i = 0; script_argv[i]; i++)
				xfree(script_argv[i]);
			xfree(script_argv);
		} else {
			bb_ptr->state = BB_STATE_STAGED_OUT;
		}
	}
	pthread_mutex_unlock(&bb_mutex);

	return SLURM_SUCCESS;
}
示例#4
0
static void _alloc_job_bb(struct job_record *job_ptr, uint64_t bb_size)
{
	char **script_argv, *resp;
	bb_alloc_t *bb_ptr;
	int i, status = 0;
	bb_job_t *bb_spec;
	char jobid_buf[32];

	bb_spec = xmalloc(sizeof(bb_job_t));
	bb_spec->total_size = bb_size;
	bb_ptr = bb_alloc_job(&bb_state, job_ptr, bb_spec);
	xfree(bb_spec);

	if (bb_state.bb_config.debug_flag) {
		info("%s: start stage-in %s", __func__,
		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	}
	script_argv = _build_stage_args(bb_state.bb_config.start_stage_in,
					"start_stage_in", job_ptr, bb_size);
	if (script_argv) {
		bb_ptr->state = BB_STATE_STAGING_IN;
		bb_ptr->state_time = time(NULL);
		resp = bb_run_script("StartStageIn",
				     bb_state.bb_config.start_stage_in,
				     script_argv, -1, &status);
		if (resp) {
			error("%s: StartStageIn: %s", __func__, resp);
			xfree(resp);
		}
		for (i = 0; script_argv[i]; i++)
			xfree(script_argv[i]);
		xfree(script_argv);
	} else {
		bb_ptr->state = BB_STATE_STAGED_IN;
		bb_ptr->state_time = time(NULL);
	}
}