/* Remove a specific bb_alloc_t from global records.
 * RET true if found, false otherwise */
extern bool bb_free_alloc_rec(bb_state_t *state_ptr, bb_alloc_t *bb_alloc)
{
	bb_alloc_t *bb_link, **bb_plink;
	int i;

	xassert(state_ptr);
	xassert(state_ptr->bb_ahash);
	xassert(bb_alloc);

	i = bb_alloc->user_id % BB_HASH_SIZE;
	bb_plink = &state_ptr->bb_ahash[i];
	bb_link = state_ptr->bb_ahash[i];
	while (bb_link) {
		if (bb_link == bb_alloc) {
			xassert(bb_link->magic == BB_ALLOC_MAGIC);
			*bb_plink = bb_alloc->next;
			bb_free_alloc_buf(bb_alloc);
			state_ptr->last_update_time = time(NULL);
			return true;
		}
		bb_plink = &bb_link->next;
		bb_link = bb_link->next;
	}
	return false;
}
/* Clear all cached burst buffer records, freeing all memory. */
extern void bb_clear_cache(bb_state_t *state_ptr)
{
	bb_alloc_t *bb_current,   *bb_next;
	bb_job_t   *job_current,  *job_next;
	bb_user_t  *user_current, *user_next;
	int i;

	if (state_ptr->bb_ahash) {
		for (i = 0; i < BB_HASH_SIZE; i++) {
			bb_current = state_ptr->bb_ahash[i];
			while (bb_current) {
				xassert(bb_current->magic == BB_ALLOC_MAGIC);
				bb_next = bb_current->next;
				bb_free_alloc_buf(bb_current);
				bb_current = bb_next;
			}
		}
		xfree(state_ptr->bb_ahash);
	}

	if (state_ptr->bb_jhash) {
		for (i = 0; i < BB_HASH_SIZE; i++) {
			job_current = state_ptr->bb_jhash[i];
			while (job_current) {
				xassert(job_current->magic == BB_JOB_MAGIC);
				job_next = job_current->next;
				_bb_job_del2(job_current);
				job_current = job_next;
			}
		}
		xfree(state_ptr->bb_jhash);
	}

	if (state_ptr->bb_uhash) {
		for (i = 0; i < BB_HASH_SIZE; i++) {
			user_current = state_ptr->bb_uhash[i];
			while (user_current) {
				xassert(user_current->magic == BB_USER_MAGIC);
				user_next = user_current->next;
				xfree(user_current);
				user_current = user_next;
			}
		}
		xfree(state_ptr->bb_uhash);
	}

	xfree(state_ptr->name);
	FREE_NULL_LIST(state_ptr->persist_resv_rec);
}
Beispiel #3
0
/* Handle timeout of burst buffer events:
 * 1. Purge per-job burst buffer records when the stage-out has completed and
 *    the job has been purged from Slurm
 * 2. Test for StageInTimeout events
 * 3. Test for StageOutTimeout events
 */
static void _timeout_bb_rec(void)
{
	struct job_record *job_ptr;
	bb_alloc_t **bb_pptr, *bb_ptr = NULL;
	uint32_t age;
	time_t now = time(NULL);
	int i;

	for (i = 0; i < BB_HASH_SIZE; i++) {
		bb_pptr = &bb_state.bb_ahash[i];
		bb_ptr = bb_state.bb_ahash[i];
		while (bb_ptr) {
			if (bb_ptr->seen_time < bb_state.last_load_time) {
				if (bb_ptr->job_id == 0) {
					info("%s: Persistent burst buffer %s "
					     "purged",
					     __func__, bb_ptr->name);
				} else if (bb_state.bb_config.debug_flag) {
					info("%s: burst buffer for job %u "
					     "purged",
					     __func__, bb_ptr->job_id);
				}
//FIXME: VESTIGIAL: Use bb_limit_rem
//				bb_remove_user_load(bb_ptr, &bb_state);
				*bb_pptr = bb_ptr->next;
				bb_free_alloc_buf(bb_ptr);
				break;
			}
			if ((bb_ptr->job_id != 0) &&
			    (bb_ptr->state >= BB_STATE_STAGED_OUT) &&
			    !find_job_record(bb_ptr->job_id)) {
				_stop_stage_out(bb_ptr->job_id);
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
				*bb_pptr = bb_ptr->next;
				bb_free_alloc_buf(bb_ptr);
				break;
			}
			age = difftime(now, bb_ptr->state_time);
			if ((bb_ptr->job_id != 0) &&
			    bb_state.bb_config.stop_stage_in &&
			    (bb_ptr->state == BB_STATE_STAGING_IN) &&
			    (bb_state.bb_config.stage_in_timeout != 0) &&
			    (!bb_ptr->cancelled) &&
			    (age >= bb_state.bb_config.stage_in_timeout)) {
				_stop_stage_in(bb_ptr->job_id);
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
				job_ptr = find_job_record(bb_ptr->job_id);
				if (job_ptr) {
					error("%s: StageIn timed out, holding "
					      "job %u",
					      __func__, bb_ptr->job_id);
					job_ptr->priority = 0;
					job_ptr->direct_set_prio = 1;
					job_ptr->state_reason = WAIT_HELD;
					xfree(job_ptr->state_desc);
					job_ptr->state_desc = xstrdup(
						"Burst buffer stage-in timeout");
					last_job_update = now;
				} else {
					error("%s: StageIn timed out for "
					      "vestigial job %u ",
					      __func__, bb_ptr->job_id);
				}
			}
			if ((bb_ptr->job_id != 0) &&
			    bb_state.bb_config.stop_stage_out &&
			    (bb_ptr->state == BB_STATE_STAGING_OUT) &&
			    (bb_state.bb_config.stage_out_timeout != 0) &&
			    (!bb_ptr->cancelled) &&
			    (age >= bb_state.bb_config.stage_out_timeout)) {
				error("%s: StageOut for job %u timed out",
				      __func__, bb_ptr->job_id);
				_stop_stage_out(bb_ptr->job_id);
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
			}
			bb_pptr = &bb_ptr->next;
			bb_ptr = bb_ptr->next;
		}
	}
}