Пример #1
0
/* Notify the gang scheduler that a job has been started */
extern int gs_job_start(struct job_record *job_ptr)
{
	struct gs_part *p_ptr;
	uint16_t job_state;

	if (gs_debug_flags & DEBUG_FLAG_GANG)
		info("gang: entering gs_job_start for job %u", job_ptr->job_id);
	/* add job to partition */
	pthread_mutex_lock(&data_mutex);
	p_ptr = list_find_first(gs_part_list, _find_gs_part,
				job_ptr->partition);
	if (p_ptr) {
		job_state = _add_job_to_part(p_ptr, job_ptr);
		/* if this job is running then check for preemption */
		if (job_state == GS_RESUME)
			_update_all_active_rows();
	}
	pthread_mutex_unlock(&data_mutex);

	if (!p_ptr) {
		/* No partition was found for this job, so let it run
		 * uninterupted (what else can we do?)
		 */
		error("gang: could not find partition %s for job %u",
		      job_ptr->partition, job_ptr->job_id);
	}

	_preempt_job_dequeue();	/* MUST BE OUTSIDE OF data_mutex lock */
	if (gs_debug_flags & DEBUG_FLAG_GANG)
		info("gang: leaving gs_job_start");

	return SLURM_SUCCESS;
}
Пример #2
0
/* Scan the master SLURM job list for any new jobs to add, or for any old jobs
 *	to remove */
extern int gs_job_scan(void)
{
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: entering gs_job_scan");
	pthread_mutex_lock(&data_mutex);
	_scan_slurm_job_list();
	pthread_mutex_unlock(&data_mutex);

	_preempt_job_dequeue();	/* MUST BE OUTSIDE OF data_mutex lock */
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: leaving gs_job_scan");

	return SLURM_SUCCESS;
}
Пример #3
0
/* The timeslicer thread */
static void *_timeslicer_thread(void *arg)
{
	/* Write locks on job and read lock on nodes */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK };
	ListIterator part_iterator;
	struct gs_part *p_ptr;

	if (gs_debug_flags & DEBUG_FLAG_GANG)
		info("gang: starting timeslicer loop");
	while (!thread_shutdown) {
		_slice_sleep();
		if (thread_shutdown)
			break;

		lock_slurmctld(job_write_lock);
		pthread_mutex_lock(&data_mutex);
		list_sort(gs_part_list, _sort_partitions);

		/* scan each partition... */
		if (gs_debug_flags & DEBUG_FLAG_GANG)
			info("gang: _timeslicer_thread: scanning partitions");
		part_iterator = list_iterator_create(gs_part_list);
		if (part_iterator == NULL)
			fatal("memory allocation failure");
		while ((p_ptr = (struct gs_part *) list_next(part_iterator))) {
			if (gs_debug_flags & DEBUG_FLAG_GANG) {
				info("gang: _timeslicer_thread: part %s: "
				     "run %u total %u", p_ptr->part_name,
				     p_ptr->jobs_active, p_ptr->num_jobs);
			}
			if (p_ptr->jobs_active <
			    (p_ptr->num_jobs + p_ptr->num_shadows)) {
				_cycle_job_list(p_ptr);
			}
		}
		list_iterator_destroy(part_iterator);
		pthread_mutex_unlock(&data_mutex);

		/* Preempt jobs that were formerly only suspended */
		_preempt_job_dequeue();	/* MUST BE OUTSIDE data_mutex lock */
		unlock_slurmctld(job_write_lock);
	}

	timeslicer_thread_id = (pthread_t) 0;
	pthread_exit((void *) 0);
	return NULL;
}
Пример #4
0
/* Notify the gang scheduler that a job has been resumed or started.
 * In either case, add the job to gang scheduling. */
extern void gs_job_start(struct job_record *job_ptr)
{
	struct gs_part *p_ptr;
	uint16_t job_sig_state;
	char *part_name;

	if (!(slurmctld_conf.preempt_mode & PREEMPT_MODE_GANG))
		return;

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: entering gs_job_start for job %u", job_ptr->job_id);
	/* add job to partition */
	if (job_ptr->part_ptr && job_ptr->part_ptr->name)
		part_name = job_ptr->part_ptr->name;
	else
		part_name = job_ptr->partition;
	slurm_mutex_lock(&data_mutex);
	p_ptr = list_find_first(gs_part_list, _find_gs_part, part_name);
	if (p_ptr) {
		job_sig_state = _add_job_to_part(p_ptr, job_ptr);
		/* if this job is running then check for preemption */
		if (job_sig_state == GS_RESUME)
			_update_all_active_rows();
	}
	slurm_mutex_unlock(&data_mutex);

	if (!p_ptr) {
		/* No partition was found for this job, so let it run
		 * uninterupted (what else can we do?)
		 */
		error("gang: could not find partition %s for job %u",
		      part_name, job_ptr->job_id);
	}

	_preempt_job_dequeue();	/* MUST BE OUTSIDE OF data_mutex lock */
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: leaving gs_job_start");
}
Пример #5
0
/* rebuild data structures from scratch
 *
 * A reconfigure can affect this plugin in these ways:
 * - partitions can be added or removed
 *   - this affects the gs_part_list
 * - nodes can be removed from a partition, or added to a partition
 *   - this affects the size of the active resmap
 *
 * Here's the plan:
 * 1. save a copy of the global structures, and then construct
 *    new ones.
 * 2. load the new partition structures with existing jobs,
 *    confirming the job exists and resizing their resmaps
 *    (if necessary).
 * 3. make sure all partitions are accounted for. If a partition
 *    was removed, make sure any jobs that were in the queue and
 *    that were suspended are resumed. Conversely, if a partition
 *    was added, check for existing jobs that may be contending
 *    for resources that we could begin timeslicing.
 * 4. delete the old global structures and return.
 */
extern int gs_reconfig(void)
{
	int i;
	ListIterator part_iterator;
	struct gs_part *p_ptr, *newp_ptr;
	List old_part_list;
	struct job_record *job_ptr;
	struct gs_job *j_ptr;

	if (!timeslicer_thread_id) {
		/* gs_init() will be called later from read_slurm_conf()
		 * if we are enabling gang scheduling via reconfiguration */
		return SLURM_SUCCESS;
	}

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: entering gs_reconfig");
	pthread_mutex_lock(&data_mutex);

	old_part_list = gs_part_list;
	gs_part_list = NULL;

	/* reset global data */
	gs_fast_schedule = slurm_get_fast_schedule();
	gr_type = _get_gr_type();
	_load_phys_res_cnt();
	_build_parts();

	/* scan the old part list and add existing jobs to the new list */
	part_iterator = list_iterator_create(old_part_list);
	while ((p_ptr = (struct gs_part *) list_next(part_iterator))) {
		newp_ptr = (struct gs_part *) list_find_first(gs_part_list,
							      _find_gs_part,
							      p_ptr->part_name);
		if (!newp_ptr) {
			/* this partition was removed, so resume
			 * any jobs suspended by gang and continue */
			for (i = 0; i < p_ptr->num_jobs; i++) {
				j_ptr = p_ptr->job_list[i];
				if ((j_ptr->sig_state == GS_SUSPEND) &&
				    (j_ptr->job_ptr->priority != 0)) {
					info("resuming job in missing part %s",
					     p_ptr->part_name);
					_resume_job(j_ptr->job_id);
					j_ptr->sig_state = GS_RESUME;
				}
			}
			continue;
		}
		if (p_ptr->num_jobs == 0)
			/* no jobs to transfer */
			continue;
		/* we need to transfer the jobs from p_ptr to new_ptr and
		 * adjust their resmaps (if necessary). then we need to create
		 * the active resmap and adjust the state of each job (if
		 * necessary). NOTE: there could be jobs that only overlap
		 * on nodes that are no longer in the partition, but we're
		 * not going to worry about those cases.
		 *
		 * add the jobs from p_ptr into new_ptr in their current order
		 * to preserve the state of timeslicing.
		 */
		for (i = 0; i < p_ptr->num_jobs; i++) {
			job_ptr = find_job_record(p_ptr->job_list[i]->job_id);
			if (job_ptr == NULL) {
				/* job no longer exists in SLURM, so drop it */
				continue;
			}
			/* resume any job that is suspended by us */
			if (IS_JOB_SUSPENDED(job_ptr) && job_ptr->priority) {
				if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG){
					info("resuming job %u apparently "
					     "suspended by gang",
					     job_ptr->job_id);
				}
				_resume_job(job_ptr->job_id);
			}

			/* transfer the job as long as it is still active */
			if (IS_JOB_SUSPENDED(job_ptr) ||
			    IS_JOB_RUNNING(job_ptr)) {
				_add_job_to_part(newp_ptr, job_ptr);
			}
		}
	}
	list_iterator_destroy(part_iterator);

	/* confirm all jobs. Scan the master job_list and confirm that we
	 * are tracking all jobs */
	_scan_slurm_job_list();

	FREE_NULL_LIST(old_part_list);
	pthread_mutex_unlock(&data_mutex);

	_preempt_job_dequeue();	/* MUST BE OUTSIDE OF data_mutex lock */
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: leaving gs_reconfig");

	return SLURM_SUCCESS;
}