Esempio n. 1
0
/* For each running job, return power allocation/use information in a List
 * containing elements of type power_by_job_t.
 * NOTE: Job data structure must be locked on function entry
 * NOTE: Call list_delete() to free return value
 * NOTE: This function is currently unused. */
extern List get_job_power(List job_list,
			  struct node_record *node_record_table_ptr)
{
	struct node_record *node_ptr;
	struct job_record *job_ptr;
	ListIterator job_iterator;
	power_by_job_t *power_ptr;
	char jobid_buf[64] = "";
	int i, i_first, i_last;
	uint64_t debug_flag = slurm_get_debug_flags();
	List job_power_list = list_create(_job_power_del);
	time_t now = time(NULL);

	job_iterator = list_iterator_create(job_list);
	while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
		if (!IS_JOB_RUNNING(job_ptr))
			continue;
		power_ptr = xmalloc(sizeof(power_by_job_t));
		power_ptr->job_id = job_ptr->job_id;
		power_ptr->start_time = job_ptr->start_time;
		list_append(job_power_list, power_ptr);
		if (!job_ptr->node_bitmap) {
			error("%s: %s node_bitmap is NULL", __func__,
			      jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
			continue;
		}
		i_first = bit_ffs(job_ptr->node_bitmap);
		if (i_first < 0)
			continue;
		i_last = bit_fls(job_ptr->node_bitmap);
		for (i = i_first; i <= i_last; i++) {
			if (!bit_test(job_ptr->node_bitmap, i))
				continue;
			node_ptr = node_record_table_ptr + i;
			if (node_ptr->power) {
				power_ptr->alloc_watts +=
					node_ptr->power->cap_watts;
			}
			if (node_ptr->energy) {
				power_ptr->used_watts +=
					node_ptr->energy->current_watts;
			}
		}
		if (debug_flag & DEBUG_FLAG_POWER) {
			info("%s: %s Age=%ld(sec) AllocWatts=%u UsedWatts=%u",
			     __func__,
			     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)),
			     (long int) difftime(now, power_ptr->start_time),
			     power_ptr->alloc_watts, power_ptr->used_watts);
		}
	}
	list_iterator_destroy(job_iterator);

	return job_power_list;
}
Esempio n. 2
0
/*
 * Trigger a job's burst buffer stage-out to begin
 *
 * Returns a SLURM errno.
 */
extern int bb_p_job_start_stage_out(struct job_record *job_ptr)
{
//FIXME: How to handle various job terminate states (e.g. requeue, failure), user script controlled?
//FIXME: Test for memory leaks
	bb_alloc_t *bb_ptr;
	char **script_argv, *resp;
	int i, status = 0;
	char jobid_buf[32];

	if (bb_state.bb_config.debug_flag) {
		info("%s: %s: %s", plugin_type, __func__,
		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	}

	if (!bb_state.bb_config.start_stage_out)
		return SLURM_ERROR;

	if ((job_ptr->burst_buffer == NULL) ||
	    (job_ptr->burst_buffer[0] == '\0') ||
	    (_get_bb_size(job_ptr) == 0))
		return SLURM_SUCCESS;

	pthread_mutex_lock(&bb_state.bb_mutex);
	bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr);
	if (!bb_ptr) {
		/* No job buffers. Assuming use of persistent buffers only */
		debug("%s: %s bb_rec not found", __func__,
		      jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	} else {
		script_argv = _build_stage_args(bb_state.bb_config.start_stage_out,
						"start_stage_out", job_ptr,
						bb_ptr->size);
		if (script_argv) {
			bb_ptr->state = BB_STATE_STAGING_OUT;
			bb_ptr->state_time = time(NULL);
			resp = bb_run_script("StartStageOut",
					     bb_state.bb_config.start_stage_out,
					     script_argv, -1, &status);
			if (resp) {
				error("%s: StartStageOut: %s", __func__, resp);
				xfree(resp);
			}
			for (i = 0; script_argv[i]; i++)
				xfree(script_argv[i]);
			xfree(script_argv);
		} else {
			bb_ptr->state = BB_STATE_STAGED_OUT;
			bb_ptr->state_time = time(NULL);
		}
	}
	pthread_mutex_unlock(&bb_state.bb_mutex);

	return SLURM_SUCCESS;
}
/* Find a per-job burst buffer record for a specific job.
 * If not found, return NULL. */
extern bb_alloc_t *bb_find_alloc_rec(bb_state_t *state_ptr,
				     struct job_record *job_ptr)
{
	bb_alloc_t *bb_alloc = NULL;
	char jobid_buf[32];

	xassert(job_ptr);
	xassert(state_ptr);
	bb_alloc = state_ptr->bb_ahash[job_ptr->user_id % BB_HASH_SIZE];
	while (bb_alloc) {
		if (bb_alloc->job_id == job_ptr->job_id) {
			if (bb_alloc->user_id == job_ptr->user_id) {
				xassert(bb_alloc->magic == BB_ALLOC_MAGIC);
				return bb_alloc;
			}
			error("%s: Slurm state inconsistent with burst "
			      "buffer. %s has UserID mismatch (%u != %u)",
			      __func__,
			      jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)),
			      bb_alloc->user_id, job_ptr->user_id);
			/* This has been observed when slurmctld crashed and
			 * the job state recovered was missing some jobs
			 * which already had burst buffers configured. */
		}
		bb_alloc = bb_alloc->next;
	}
	return bb_alloc;
}
Esempio n. 4
0
/*
 * Determine if a job's burst buffer stage-out is complete
 *
 * RET: 0 - stage-out is underway
 *      1 - stage-out complete
 *     -1 - fatal error
 */
extern int bb_p_job_test_stage_out(struct job_record *job_ptr)
{
	bb_alloc_t *bb_ptr;
	int rc = -1;
	char jobid_buf[32];

	if (bb_state.bb_config.debug_flag) {
		info("%s: %s: %s", plugin_type, __func__,
		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	}

	if ((job_ptr->burst_buffer == NULL) ||
	    (job_ptr->burst_buffer[0] == '\0') ||
	    (_get_bb_size(job_ptr) == 0))
		return 1;

	pthread_mutex_lock(&bb_state.bb_mutex);
	bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr);
	if (!bb_ptr) {
		/* No job buffers. Assuming use of persistent buffers only */
		debug("%s: %s bb_rec not found", __func__,
		      jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
		rc =  1;
	} else {
		if (bb_ptr->state < BB_STATE_STAGED_OUT)
			_load_state(job_ptr->job_id);
		if (bb_ptr->state == BB_STATE_STAGING_OUT) {
			rc =  0;
		} else if (bb_ptr->state == BB_STATE_STAGED_OUT) {
			if (bb_ptr->size != 0) {
//FIXME: VESTIGIAL: Use bb_limit_rem
//				bb_remove_user_load(bb_ptr, &bb_state);
				bb_ptr->size = 0;
			}
			rc =  1;
		} else {
			error("%s: %s bb_state:%u", __func__,
			      jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)),
			      bb_ptr->state);
			rc = -1;
		}
	}
	pthread_mutex_unlock(&bb_state.bb_mutex);

	return rc;
}
Esempio n. 5
0
/*
 * Determine if a job's burst buffer stage-in is complete
 * job_ptr IN - Job to test
 * test_only IN - If false, then attempt to allocate burst buffer if possible
 *
 * RET: 0 - stage-in is underway
 *      1 - stage-in complete
 *     -1 - stage-in not started or burst buffer in some unexpected state
 */
extern int bb_p_job_test_stage_in(struct job_record *job_ptr, bool test_only)
{
	bb_alloc_t *bb_ptr;
	uint64_t bb_size = 0;
	int rc = 1;
	char jobid_buf[32];

	if (bb_state.bb_config.debug_flag) {
		info("%s: %s: %s", plugin_type, __func__,
		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	}

	if ((job_ptr->burst_buffer == NULL) ||
	    (job_ptr->burst_buffer[0] == '\0') ||
	    ((bb_size = _get_bb_size(job_ptr)) == 0))
		return rc;

	pthread_mutex_lock(&bb_state.bb_mutex);
	bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr);
	if (!bb_ptr) {
		debug("%s: %s bb_rec not found", __func__,
		      jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
		rc = -1;
		if ((test_only == false) &&
		    (_test_size_limit(job_ptr, bb_size) == 0))
			_alloc_job_bb(job_ptr, bb_size);
	} else {
		if (bb_ptr->state < BB_STATE_STAGED_IN)
			_load_state(job_ptr->job_id);
		if (bb_ptr->state < BB_STATE_STAGED_IN) {
			rc = 0;
		} else if (bb_ptr->state == BB_STATE_STAGED_IN) {
			rc = 1;
		} else {
			error("%s: %s bb_state:%u", __func__,
			      jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)),
			      bb_ptr->state);
			rc = -1;
		}
	}
	pthread_mutex_unlock(&bb_state.bb_mutex);
	return rc;
}
Esempio n. 6
0
/*
 * Terminate any file staging and completely release burst buffer resources
 *
 * Returns a SLURM errno.
 */
extern int bb_p_job_cancel(struct job_record *job_ptr)
{
	bb_alloc_t *bb_ptr;
	char **script_argv, *resp;
	int i, status = 0;
	char jobid_buf[32];

	if (bb_state.bb_config.debug_flag) {
		info("%s: %s: %s", plugin_type, __func__,
		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	}

	if (!bb_state.bb_config.stop_stage_out)
		return SLURM_ERROR;

	if ((job_ptr->burst_buffer == NULL) ||
	    (job_ptr->burst_buffer[0] == '\0') ||
	    (_get_bb_size(job_ptr) == 0))
		return SLURM_SUCCESS;

	pthread_mutex_lock(&bb_state.bb_mutex);
	bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr);
	if (!bb_ptr) {
		_stop_stage_out(job_ptr->job_id);
	} else {
		script_argv = _build_stage_args(bb_state.bb_config.stop_stage_out,
						"stop_stage_out", job_ptr, 0);
		if (script_argv) {
			bb_ptr->state = BB_STATE_STAGED_OUT;
			bb_ptr->state_time = time(NULL);
			resp = bb_run_script("StopStageOut",
					     bb_state.bb_config.stop_stage_out,
					     script_argv, -1, &status);
			if (resp) {
				error("%s: StopStageOut: %s", __func__, resp);
				xfree(resp);
			}
			for (i = 0; script_argv[i]; i++)
				xfree(script_argv[i]);
			xfree(script_argv);
		} else {
			_stop_stage_out(job_ptr->job_id);
			bb_ptr->cancelled = true;
			bb_ptr->end_time = 0;
			bb_ptr->state = BB_STATE_STAGED_OUT;
			bb_ptr->state_time = time(NULL);
		}
	}
	pthread_mutex_unlock(&bb_state.bb_mutex);

	return SLURM_SUCCESS;
}
Esempio n. 7
0
static void _alloc_job_bb(struct job_record *job_ptr, uint64_t bb_size)
{
	char **script_argv, *resp;
	bb_alloc_t *bb_ptr;
	int i, status = 0;
	bb_job_t *bb_spec;
	char jobid_buf[32];

	bb_spec = xmalloc(sizeof(bb_job_t));
	bb_spec->total_size = bb_size;
	bb_ptr = bb_alloc_job(&bb_state, job_ptr, bb_spec);
	xfree(bb_spec);

	if (bb_state.bb_config.debug_flag) {
		info("%s: start stage-in %s", __func__,
		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	}
	script_argv = _build_stage_args(bb_state.bb_config.start_stage_in,
					"start_stage_in", job_ptr, bb_size);
	if (script_argv) {
		bb_ptr->state = BB_STATE_STAGING_IN;
		bb_ptr->state_time = time(NULL);
		resp = bb_run_script("StartStageIn",
				     bb_state.bb_config.start_stage_in,
				     script_argv, -1, &status);
		if (resp) {
			error("%s: StartStageIn: %s", __func__, resp);
			xfree(resp);
		}
		for (i = 0; script_argv[i]; i++)
			xfree(script_argv[i]);
		xfree(script_argv);
	} else {
		bb_ptr->state = BB_STATE_STAGED_IN;
		bb_ptr->state_time = time(NULL);
	}
}
Esempio n. 8
0
/*
 * For a given job, return our best guess if when it might be able to start
 */
extern time_t bb_p_job_get_est_start(struct job_record *job_ptr)
{
	bb_alloc_t *bb_ptr;
	time_t est_start = time(NULL);
	uint64_t bb_size;
	int rc;
	char jobid_buf[32];

	if (bb_state.bb_config.debug_flag) {
		info("%s: %s: %s", plugin_type, __func__,
		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)));
	}

	if ((job_ptr->burst_buffer == NULL) ||
	    (job_ptr->burst_buffer[0] == '\0') ||
	    ((bb_size = _get_bb_size(job_ptr)) == 0))
		return est_start;

	pthread_mutex_lock(&bb_state.bb_mutex);
	bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr);
	if (!bb_ptr) {
		rc = _test_size_limit(job_ptr, bb_size);
		if (rc == 0) {		/* Could start now */
			;
		} else if (rc == 1) {	/* Exceeds configured limits */
			est_start += 365 * 24 * 60 * 60;
		} else {		/* No space currently available */
			est_start = MAX(est_start, bb_state.next_end_time);
		}
	} else if (bb_ptr->state < BB_STATE_STAGED_IN) {
		est_start++;
	}
	pthread_mutex_unlock(&bb_state.bb_mutex);

	return est_start;
}
Esempio n. 9
0
/* Test if a job can be allocated a burst buffer.
 * This may preempt currently active stage-in for higher priority jobs.
 *
 * RET 0: Job can be started now
 *     1: Job exceeds configured limits, continue testing with next job
 *     2: Job needs more resources than currently available can not start,
 *        skip all remaining jobs
 */
static int _test_size_limit(struct job_record *job_ptr, uint64_t add_space)
{
	burst_buffer_info_msg_t *resv_bb;
	struct preempt_bb_recs *preempt_ptr = NULL;
	List preempt_list;
	ListIterator preempt_iter;
	uint64_t resv_space = 0;
	int add_total_space_needed = 0, add_user_space_needed = 0;
	int add_total_space_avail  = 0, add_user_space_avail  = 0;
	time_t now = time(NULL), when;
	bb_alloc_t *bb_ptr = NULL;
	int i;
	char jobid_buf[32];

	if (job_ptr->start_time <= now)
		when = now;
	else
		when = job_ptr->start_time;
	resv_bb = job_test_bb_resv(job_ptr, when);
	if (resv_bb) {
		burst_buffer_info_t *resv_bb_ptr;
		for (i = 0, resv_bb_ptr = resv_bb->burst_buffer_array;
		     i < resv_bb->record_count; i++, resv_bb_ptr++) {
			if (resv_bb_ptr->name &&
			    strcmp(resv_bb_ptr->name, bb_state.name))
				continue;
			resv_bb_ptr->used_space =
				bb_granularity(resv_bb_ptr->used_space,
					       bb_state.bb_config.granularity);
			resv_space += resv_bb_ptr->used_space;
		}
		slurm_free_burst_buffer_info_msg(resv_bb);
	}

	add_total_space_needed = bb_state.used_space + add_space + resv_space -
				 bb_state.total_space;

	if ((add_total_space_needed <= 0) &&
	    (add_user_space_needed  <= 0))
		return 0;

	/* Identify candidate burst buffers to revoke for higher priority job */
	preempt_list = list_create(bb_job_queue_del);
	for (i = 0; i < BB_HASH_SIZE; i++) {
		bb_ptr = bb_state.bb_ahash[i];
		while (bb_ptr) {
			if (bb_ptr->job_id &&
			    (bb_ptr->use_time > now) &&
			    (bb_ptr->use_time > job_ptr->start_time)) {
				preempt_ptr = xmalloc(sizeof(
						struct preempt_bb_recs));
				preempt_ptr->bb_ptr = bb_ptr;
				preempt_ptr->job_id = bb_ptr->job_id;
				preempt_ptr->size = bb_ptr->size;
				preempt_ptr->use_time = bb_ptr->use_time;
				preempt_ptr->user_id = bb_ptr->user_id;
				list_push(preempt_list, preempt_ptr);

				add_total_space_avail += bb_ptr->size;
				if (bb_ptr->user_id == job_ptr->user_id)
					add_user_space_avail += bb_ptr->size;
			}
			bb_ptr = bb_ptr->next;
		}
	}

	if ((add_total_space_avail >= add_total_space_needed) &&
	    (add_user_space_avail  >= add_user_space_needed)) {
		list_sort(preempt_list, bb_preempt_queue_sort);
		preempt_iter = list_iterator_create(preempt_list);
		while ((preempt_ptr = list_next(preempt_iter)) &&
		       (add_total_space_needed || add_user_space_needed)) {
			if (add_user_space_needed &&
			    (preempt_ptr->user_id == job_ptr->user_id)) {
				_stop_stage_in(preempt_ptr->job_id);
				preempt_ptr->bb_ptr->cancelled = true;
				preempt_ptr->bb_ptr->end_time = 0;
				if (bb_state.bb_config.debug_flag) {
					info("%s: %s: Preempting stage-in of "
					     "job %u for %s", plugin_type,
					     __func__, preempt_ptr->job_id,
					     jobid2fmt(job_ptr, jobid_buf,
						       sizeof(jobid_buf)));
				}
				add_user_space_needed  -= preempt_ptr->size;
				add_total_space_needed -= preempt_ptr->size;
			}
			if ((add_total_space_needed > add_user_space_needed) &&
			    (preempt_ptr->user_id != job_ptr->user_id)) {
				_stop_stage_in(preempt_ptr->job_id);
				preempt_ptr->bb_ptr->cancelled = true;
				preempt_ptr->bb_ptr->end_time = 0;
				if (bb_state.bb_config.debug_flag) {
					info("%s: %s: Preempting stage-in of "
					     "job %u for %s", plugin_type,
					     __func__, preempt_ptr->job_id,
					     jobid2fmt(job_ptr, jobid_buf,
						       sizeof(jobid_buf)));
				}
				add_total_space_needed -= preempt_ptr->size;
			}
		}
		list_iterator_destroy(preempt_iter);
	}
	FREE_NULL_LIST(preempt_list);

	return 2;
}