Beispiel #1
0
static void *_block_agent(void *args)
{
	bg_action_t *bg_action_ptr = (bg_action_t *)args;

	if (bg_action_ptr->op == START_OP)
		_start_agent(bg_action_ptr);
	else if (bg_action_ptr->op == TERM_OP)
		bridge_block_post_job(bg_action_ptr->bg_block_id,
				      bg_action_ptr->job_ptr);
	_destroy_bg_action(bg_action_ptr);

	return NULL;
}
Beispiel #2
0
/* Update block user and reboot as needed */
static void _sync_agent(bg_action_t *bg_action_ptr)
{
	bg_record_t * bg_record = NULL;

	slurm_mutex_lock(&block_state_mutex);
	bg_record = find_bg_record_in_list(bg_lists->main,
					   bg_action_ptr->bg_block_id);
	if (!bg_record) {
		slurm_mutex_unlock(&block_state_mutex);
		error("No block %s", bg_action_ptr->bg_block_id);
		bg_requeue_job(bg_action_ptr->job_ptr->job_id, 1);
		return;
	}

	last_bg_update = time(NULL);
	bg_action_ptr->job_ptr->total_cpus =
		bg_action_ptr->job_ptr->details->min_cpus = bg_record->cpu_cnt;
	bg_record->job_running = bg_action_ptr->job_ptr->job_id;
	bg_record->job_ptr = bg_action_ptr->job_ptr;
	set_select_jobinfo(bg_record->job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_BLOCK_PTR,
			   bg_record);

	if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) {
		list_push(bg_lists->job_running, bg_record);
		num_unused_cpus -= bg_record->cpu_cnt;
	}
	if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
		list_push(bg_lists->booted, bg_record);

	if (bg_record->state == BG_BLOCK_INITED) {
		if (bg_record->job_ptr) {
			bg_record->job_ptr->job_state &= (~JOB_CONFIGURING);
			last_job_update = time(NULL);
		}
		if (bg_record->user_uid != bg_action_ptr->job_ptr->user_id) {
			int set_user_rc = SLURM_SUCCESS;

			debug("User isn't correct for job %d on %s, "
			      "fixing...",
			      bg_action_ptr->job_ptr->job_id,
			      bg_action_ptr->bg_block_id);
			xfree(bg_record->target_name);
			bg_record->target_name =
				uid_to_string(bg_action_ptr->job_ptr->user_id);
			set_user_rc = set_block_user(bg_record);
			slurm_mutex_unlock(&block_state_mutex);

			if (set_user_rc == SLURM_ERROR)
				(void) slurm_fail_job(bg_record->job_running);
		} else
			slurm_mutex_unlock(&block_state_mutex);

	} else {
		if (bg_record->state != BG_BLOCK_BOOTING) {
			error("Block %s isn't ready and isn't "
			      "being configured! Starting job again.",
			      bg_action_ptr->bg_block_id);
		} else {
			debug("Block %s is booting, job ok",
			      bg_action_ptr->bg_block_id);
		}
		slurm_mutex_unlock(&block_state_mutex);
		_start_agent(bg_action_ptr);
	}
}