示例#1
0
文件: bg_job_run.c 项目: HPCNow/slurm
/*
 * Synchronize BG block state to that of currently active jobs.
 * This can recover from slurmctld crashes when block usership
 * changes were queued
 */
extern int sync_jobs(List job_list)
{
	ListIterator itr;
	struct job_record  *job_ptr = NULL;
	List block_list = NULL, kill_list = NULL;
	static bool run_already = false;
	bg_record_t *bg_record = NULL;

	/* Execute only on initial startup. We don't support bgblock
	 * creation on demand today, so there is no need to re-sync data. */
	if (run_already)
		return SLURM_SUCCESS;
	run_already = true;

	if (!job_list) {
		error("sync_jobs: no job_list");
		return SLURM_ERROR;
	}
	slurm_mutex_lock(&block_state_mutex);
	/* Insure that all running jobs own the specified block */
	itr = list_iterator_create(job_list);
	while ((job_ptr = list_next(itr))) {
		bg_action_t *bg_action_ptr = NULL;
		if (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_COMPLETING(job_ptr))
			continue;

		bg_action_ptr = xmalloc(sizeof(bg_action_t));
		if (IS_JOB_COMPLETING(job_ptr))
			bg_action_ptr->op = TERM_OP;
		else
			bg_action_ptr->op = START_OP;
		bg_action_ptr->job_ptr = job_ptr;

		get_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_BLOCK_ID,
				   &(bg_action_ptr->bg_block_id));
#ifdef HAVE_BG_L_P
# ifdef HAVE_BGL
		get_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_BLRTS_IMAGE,
				   &(bg_action_ptr->blrtsimage));
# else
		get_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_CONN_TYPE,
				   &(bg_action_ptr->conn_type));
# endif
		get_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_LINUX_IMAGE,
				   &(bg_action_ptr->linuximage));
		get_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_RAMDISK_IMAGE,
				   &(bg_action_ptr->ramdiskimage));
#endif
		get_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_MLOADER_IMAGE,
				   &(bg_action_ptr->mloaderimage));

		if (bg_action_ptr->bg_block_id == NULL) {
			error("Running job %u has bgblock==NULL",
			      job_ptr->job_id);
		} else if (job_ptr->nodes == NULL) {
			error("Running job %u has nodes==NULL",
			      job_ptr->job_id);
		} else if (!(bg_record = find_bg_record_in_list(
				     bg_lists->main,
				     bg_action_ptr->bg_block_id))) {
			error("Kill job %u belongs to defunct "
			      "bgblock %s",
			      job_ptr->job_id,
			      bg_action_ptr->bg_block_id);
		}

		if (!bg_record) {
			/* Can't fail it just now, we have locks in
			   place. */
			bg_status_add_job_kill_list(job_ptr, &kill_list);
			_destroy_bg_action(bg_action_ptr);
			continue;
		}
		/* _sync_agent will destroy the bg_action_ptr */
		_sync_agent(bg_action_ptr, bg_record);
	}
	list_iterator_destroy(itr);

	block_list = list_create(destroy_bg_record);
	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		bg_record_t *rm_record;
		if (bg_record->job_ptr
		    || (bg_record->job_list
			&& list_count(bg_record->job_list)))
			continue;
		rm_record = xmalloc(sizeof(bg_record_t));
		rm_record->magic = BLOCK_MAGIC;
		rm_record->bg_block_id = xstrdup(bg_record->bg_block_id);
		rm_record->mp_str = xstrdup(bg_record->mp_str);
		list_append(block_list, rm_record);
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	if (kill_list) {
		/* slurmctld is already locked up, so handle this right after
		 * the unlock of block_state_mutex.
		 */
		bg_status_process_kill_job_list(kill_list, JOB_BOOT_FAIL, 1);
		FREE_NULL_LIST(kill_list);
	}

	/* Insure that all other blocks are free of users */
	if (block_list) {
		itr = list_iterator_create(block_list);
		while ((bg_record = list_next(itr))) {
			info("Queue clearing of users of BG block %s",
			     bg_record->bg_block_id);
			term_jobs_on_block(bg_record->bg_block_id);
		}
		list_iterator_destroy(itr);
		FREE_NULL_LIST(block_list);
	} else {
		/* this should never happen,
		 * vestigial logic */
		error("sync_jobs: no block_list");
		return SLURM_ERROR;
	}
	return SLURM_SUCCESS;
}
示例#2
0
文件: bg_core.c 项目: fafik23/slurm
/* block_state_mutex should be unlocked before calling this */
extern void free_block_list(uint32_t job_id, List track_list,
			    bool destroy, bool wait)
{
	bg_record_t *bg_record = NULL;
	int retries;
	ListIterator itr = NULL;
	bg_free_block_list_t *bg_free_list;
	pthread_attr_t attr_agent;
	pthread_t thread_agent;
	List kill_job_list = NULL;
	kill_job_struct_t *freeit;

	if (!track_list || !list_count(track_list))
		return;

	bg_free_list = xmalloc(sizeof(bg_free_block_list_t));
	bg_free_list->track_list = list_create(NULL);
	bg_free_list->destroy = destroy;
	bg_free_list->job_id = job_id;

	slurm_mutex_lock(&block_state_mutex);
	list_transfer(bg_free_list->track_list, track_list);
	itr = list_iterator_create(bg_free_list->track_list);
	while ((bg_record = list_next(itr))) {
		if (bg_record->magic != BLOCK_MAGIC) {
			error("block was already destroyed %p", bg_record);
			continue;
		}
		bg_record->free_cnt++;

		/* just so we don't over write a different thread that
		   wants this block destroyed */
		if (destroy && !bg_record->destroy)
			bg_record->destroy = destroy;

		if (destroy && (bg_record->state & BG_BLOCK_ERROR_FLAG))
			resume_block(bg_record);

		/* This means we are wanting this block free so we can
		   run this job on it, so it is ok to have the job
		   remain here.  Only checking for jobs should go
		   below this.
		*/
		if (bg_record->modifying) {
			debug("free_block_list: Just FYI, we are "
			      "freeing a block (%s) that "
			      "has at least one pending job.",
			      bg_record->bg_block_id);
			continue;
		}

		if (bg_record->job_ptr
		    && !IS_JOB_FINISHED(bg_record->job_ptr)) {
			info("We are freeing a block (%s) that "
			     "has job %u(%u).",
			     bg_record->bg_block_id,
			     bg_record->job_ptr->job_id,
			     bg_record->job_running);
			if (!kill_job_list)
				kill_job_list =
					bg_status_create_kill_job_list();
			freeit = xmalloc(sizeof(kill_job_struct_t));
			freeit->jobid = bg_record->job_ptr->job_id;
			list_push(kill_job_list, freeit);
		} else if (bg_record->job_list
			   && list_count(bg_record->job_list)) {
			struct job_record *job_ptr;
			ListIterator itr;

			if (!kill_job_list)
				kill_job_list =
					bg_status_create_kill_job_list();
			info("We are freeing a block (%s) that has at "
			     "least 1 job.",
			     bg_record->bg_block_id);
			itr = list_iterator_create(bg_record->job_list);
			while ((job_ptr = list_next(itr))) {
				if ((job_ptr->magic != JOB_MAGIC)
				    || IS_JOB_FINISHED(job_ptr))
					continue;
				freeit = xmalloc(sizeof(kill_job_struct_t));
				freeit->jobid = job_ptr->job_id;
				list_push(kill_job_list, freeit);
			}
			list_iterator_destroy(itr);
		}
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	if (kill_job_list) {
		bg_status_process_kill_job_list(kill_job_list, JOB_FAILED, 0);
		FREE_NULL_LIST(kill_job_list);
	}

	if (wait) {
		/* Track_freeing_blocks waits until the list is done
		   and frees the memory of bg_free_list.
		*/
		_track_freeing_blocks(bg_free_list);
		return;
	}

	/* _track_freeing_blocks handles cleanup */
	slurm_attr_init(&attr_agent);
	if (pthread_attr_setdetachstate(&attr_agent, PTHREAD_CREATE_DETACHED))
		error("pthread_attr_setdetachstate error %m");
	retries = 0;
	while (pthread_create(&thread_agent, &attr_agent,
			      _track_freeing_blocks,
			      bg_free_list)) {
		error("pthread_create error %m");
		if (++retries > MAX_PTHREAD_RETRIES)
			fatal("Can't create pthread");
		/* sleep and retry */
		usleep(1000);
	}
	slurm_attr_destroy(&attr_agent);
	return;
}
static int _do_block_poll(void)
{
	int updated = 0;
#if defined HAVE_BG_FILES
	int rc;
	rm_partition_t *block_ptr = NULL;
#ifdef HAVE_BGL
	rm_partition_mode_t node_use;
#endif
	rm_partition_state_t state;
	char *name = NULL;
	bg_record_t *bg_record = NULL;
	ListIterator itr = NULL;

	if (!bg_lists->main)
		return updated;

	lock_slurmctld(job_read_lock);
	slurm_mutex_lock(&block_state_mutex);
	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) {
		if (bg_record->magic != BLOCK_MAGIC) {
			/* block is gone */
			list_remove(itr);
			continue;
		} else if (!bg_record->bg_block_id)
			continue;

		name = bg_record->bg_block_id;
		if ((rc = bridge_get_block_info(name, &block_ptr))
		    != SLURM_SUCCESS) {
			if (bg_conf->layout_mode == LAYOUT_DYNAMIC) {
				switch(rc) {
				case BG_ERROR_INCONSISTENT_DATA:
					debug2("got inconsistent data when "
					       "querying block %s", name);
					continue;
					break;
				case BG_ERROR_BLOCK_NOT_FOUND:
					debug("block %s not found, removing "
					      "from slurm", name);
					list_remove(itr);
					destroy_bg_record(bg_record);
					continue;
					break;
				default:
					break;
				}
			}

			/* If the call was busy, just skip this
			   iteration.  It usually means something like
			   rm_get_BG was called which can be a very
			   long call */
			if (rc == EBUSY) {
				debug5("lock was busy, aborting");
				break;
			}

			error("bridge_get_block_info(%s): %s",
			      name,
			      bg_err_str(rc));
			continue;
		}

#ifdef HAVE_BGL
		if ((rc = bridge_get_data(block_ptr, RM_PartitionMode,
					  &node_use))
		    != SLURM_SUCCESS) {
			error("bridge_get_data(RM_PartitionMode): %s",
			      bg_err_str(rc));
			if (!updated)
				updated = -1;
			goto next_block;
		} else if (bg_record->node_use != node_use) {
			debug("node_use of Block %s was %d "
			      "and now is %d",
			      bg_record->bg_block_id,
			      bg_record->node_use,
			      node_use);
			bg_record->node_use = node_use;
			updated = 1;
		}
#else
		if ((bg_record->cnode_cnt < bg_conf->mp_cnode_cnt)
		    || (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)) {
			char *mode = NULL;
			uint16_t conn_type = SELECT_SMALL;
			if ((rc = bridge_get_data(block_ptr,
						  RM_PartitionOptions,
						  &mode))
			    != SLURM_SUCCESS) {
				error("bridge_get_data(RM_PartitionOptions): "
				      "%s", bg_err_str(rc));
				if (!updated)
					updated = -1;
				goto next_block;
			} else if (mode) {
				switch(mode[0]) {
				case 's':
					conn_type = SELECT_HTC_S;
					break;
				case 'd':
					conn_type = SELECT_HTC_D;
					break;
				case 'v':
					conn_type = SELECT_HTC_V;
					break;
				case 'l':
					conn_type = SELECT_HTC_L;
					break;
				default:
					conn_type = SELECT_SMALL;
					break;
				}
				free(mode);
			}

			if (bg_record->conn_type[0] != conn_type) {
				debug("mode of small Block %s was %u "
				      "and now is %u",
				      bg_record->bg_block_id,
				      bg_record->conn_type[0],
				      conn_type);
				bg_record->conn_type[0] = conn_type;
				updated = 1;
			}
		}
#endif
		if ((rc = bridge_get_data(block_ptr, RM_PartitionState,
					  &state))
		    != SLURM_SUCCESS) {
			error("bridge_get_data(RM_PartitionState): %s",
			      bg_err_str(rc));
			if (!updated)
				updated = -1;
			goto next_block;
		} else if (bg_status_update_block_state(
				   bg_record, state, kill_job_list) == 1)
			updated = 1;

	next_block:
		if ((rc = bridge_free_block(block_ptr))
		    != SLURM_SUCCESS) {
			error("bridge_free_block(): %s",
			      bg_err_str(rc));
		}
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);
	unlock_slurmctld(job_read_lock);

	bg_status_process_kill_job_list(kill_job_list, JOB_FAILED, 0);

#endif
	return updated;
}