Пример #1
0
int main(int argc, char * argv[])
{
	if (argc != 6) {
		printf("Usage: %s, control_addr job_id1 job_id2 sched_port is_bluegene\n",
			argv[0]);
		exit(1);
	}

	control_addr = argv[1];
	job_id1      = atoi(argv[2]);
	job_id2      = atoi(argv[3]);
	sched_port   = atoi(argv[4]);
	is_bluegene  = atoi(argv[5]);
	printf("control_addr=%s job_id=%ld,%ld sched_port=%d is_bluegene=%d\n",
		control_addr, job_id1, job_id2, sched_port, is_bluegene);

	_get_jobs();
	_get_nodes();
	_modify_job(job_id1);
	_get_jobs();
	_start_job(job_id1);
	if (!is_bluegene) {
		_suspend_job(job_id1);
		_resume_job(job_id1);
	}
	_cancel_job(job_id2);
	sleep(5);
	_get_jobs();

	printf("SUCCESS\n");
	exit(0);
}
Пример #2
0
int main(int argc, char * argv[])
{
	if (argc < 6) {
		printf("Usage: %s, auth_key control_addr e_port "
			"job_id sched_port is_bluegene\n", argv[0]);
		exit(1);
	}

	auth_key     = argv[1];
	control_addr = argv[2];
	e_port       = atoi(argv[3]);
	job_id       = atoi(argv[4]);
	sched_port   = atoi(argv[5]);
	is_bluegene  = atoi(argv[6]);
	printf("auth_key=%s control_addr=%s e_port=%d job_id=%d sched_port=%d "
		"is_bluegene=%d\n",
		auth_key, control_addr, e_port, job_id, sched_port, is_bluegene);

#if _DEBUG
	_single_msg();
#else
	_initialize();
	_get_jobs();
	_get_nodes();
	_job_will_run(job_id);
	_modify_job(job_id);
	_get_jobs();
	_start_job(job_id);
	_get_jobs();
	if (!is_bluegene) {
		_suspend_job(job_id);
		_resume_job(job_id);
	}
	_notify_job(job_id);
	_signal_job(job_id);
	if (e_port)
		_event_mgr();
	else {
		printf("READY\n");
		sleep(3);
	}
	_cancel_job(job_id+1);
	_job_requeue(job_id);	/* Put job back into HELD state */
	sleep(15);
	_start_job(job_id);
	_get_jobs();
#endif
	printf("SUCCESS\n");
	exit(0);
}
Пример #3
0
/* Gang scheduling has been disabled by change in configuration,
 *	resume any suspended jobs */
extern void gs_wake_jobs(void)
{
	struct job_record *job_ptr;
	ListIterator job_iterator;

	if (!job_list)	/* no jobs */
		return;

	job_iterator = list_iterator_create(job_list);
	while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
		if (IS_JOB_SUSPENDED(job_ptr) && (job_ptr->priority != 0)) {
			info("gang waking preempted job %u", job_ptr->job_id);
			_resume_job(job_ptr->job_id);
		}
	}
	list_iterator_destroy(job_iterator);
}
Пример #4
0
/* remove the given job from the given partition
 * IN job_id - job to remove
 * IN p_ptr  - GS partition structure
 * IN fini   - true is job is in finish state (e.g. not to be resumed)
 */
static void _remove_job_from_part(uint32_t job_id, struct gs_part *p_ptr,
				  bool fini)
{
	int i;
	struct gs_job *j_ptr;

	if (!job_id || !p_ptr)
		return;

	/* find the job in the job_list */
	i = _find_job_index(p_ptr, job_id);
	if (i < 0)
		/* job not found */
		return;

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
		info("gang: _remove_job_from_part: removing job %u from %s",
		     job_id, p_ptr->part_name);
	}
	j_ptr = p_ptr->job_list[i];

	/* remove any shadow first */
	_clear_shadow(j_ptr);

	/* remove the job from the job_list by shifting everyone else down */
	p_ptr->num_jobs--;
	for (; i < p_ptr->num_jobs; i++) {
		p_ptr->job_list[i] = p_ptr->job_list[i+1];
	}
	p_ptr->job_list[i] = NULL;

	/* make sure the job is not suspended by gang, and then delete it */
	if (!fini && (j_ptr->sig_state == GS_SUSPEND) &&
	    j_ptr->job_ptr->priority) {
		if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
			info("gang: _remove_job_from_part: resuming "
			     "suspended job %u", j_ptr->job_id);
		}
		_resume_job(j_ptr->job_id);
	}
	j_ptr->job_ptr = NULL;
	xfree(j_ptr);

	return;
}
Пример #5
0
/* Rebuild the active row BUT preserve the order of existing jobs.
 * This is called after one or more jobs have been removed from
 * the partition or if a higher priority "shadow" has been added
 * which could preempt running jobs.
 */
static void _update_active_row(struct gs_part *p_ptr, int add_new_jobs)
{
	int i;
	struct gs_job *j_ptr;

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
		info("gang: update_active_row: rebuilding part %s...",
		     p_ptr->part_name);
	}
	/* rebuild the active row, starting with any shadows */
	p_ptr->jobs_active = 0;
	for (i = 0; p_ptr->shadow && p_ptr->shadow[i]; i++) {
		_add_job_to_active(p_ptr->shadow[i]->job_ptr, p_ptr);
	}

	/* attempt to add the existing 'active' jobs */
	for (i = 0; i < p_ptr->num_jobs; i++) {
		j_ptr = p_ptr->job_list[i];
		if (j_ptr->row_state != GS_ACTIVE)
			continue;
		if (_job_fits_in_active_row(j_ptr->job_ptr, p_ptr)) {
			_add_job_to_active(j_ptr->job_ptr, p_ptr);
			_cast_shadow(j_ptr, p_ptr->priority);

		} else {
			/* this job has been preempted by a shadow job.
			 * suspend it and preserve it's job_list order */
			if (j_ptr->sig_state != GS_SUSPEND) {
				if (p_ptr->num_shadows &&
				    (slurm_job_preempt_mode(j_ptr->job_ptr) !=
				     PREEMPT_MODE_SUSPEND)) {
					_preempt_job_queue(j_ptr->job_id);
				} else
					_suspend_job(j_ptr->job_id);
				j_ptr->sig_state = GS_SUSPEND;
				_clear_shadow(j_ptr);
			}
			j_ptr->row_state = GS_NO_ACTIVE;
		}
	}
	/* attempt to add the existing 'filler' jobs */
	for (i = 0; i < p_ptr->num_jobs; i++) {
		j_ptr = p_ptr->job_list[i];
		if (j_ptr->row_state != GS_FILLER)
			continue;
		if (_job_fits_in_active_row(j_ptr->job_ptr, p_ptr)) {
			_add_job_to_active(j_ptr->job_ptr, p_ptr);
			_cast_shadow(j_ptr, p_ptr->priority);
		} else {
			/* this job has been preempted by a shadow job.
			 * suspend it and preserve it's job_list order */
			if (j_ptr->sig_state != GS_SUSPEND) {
				if (p_ptr->num_shadows &&
				    (slurm_job_preempt_mode(j_ptr->job_ptr) !=
				     PREEMPT_MODE_SUSPEND)) {
					_preempt_job_queue(j_ptr->job_id);
				} else
					_suspend_job(j_ptr->job_id);
				j_ptr->sig_state = GS_SUSPEND;
				_clear_shadow(j_ptr);
			}
			j_ptr->row_state = GS_NO_ACTIVE;
		}
	}

	if (!add_new_jobs)
		return;

	/* attempt to add any new jobs */
	for (i = 0; i < p_ptr->num_jobs; i++) {
		j_ptr = p_ptr->job_list[i];
		if ((j_ptr->row_state != GS_NO_ACTIVE) ||
		    (j_ptr->job_ptr->priority == 0))
			continue;
		if (_job_fits_in_active_row(j_ptr->job_ptr, p_ptr)) {
			_add_job_to_active(j_ptr->job_ptr, p_ptr);
			_cast_shadow(j_ptr, p_ptr->priority);
			/* note that this job is a "filler" for this row,
			 * blocked by a higher priority job */
			j_ptr->row_state = GS_FILLER;
			/* resume the job */
			if (j_ptr->sig_state == GS_SUSPEND) {
				_resume_job(j_ptr->job_id);
				j_ptr->sig_state = GS_RESUME;
			}
		}
	}
}
Пример #6
0
/* _cycle_job_list
 *
 * This is the heart of the timeslicer. The algorithm works as follows:
 *
 * 1. Each new job is added to the end of the job list, so the earliest job
 *    is at the front of the list.
 * 2. Any "shadow" jobs are first applied to the active_resmap. Then the
 *    active_resmap is filled out by starting with the first job in the list,
 *    and adding to it any job that doesn't conflict with the resources.
 * 3. When the timeslice has passed, all jobs that were added to the active
 *    resmap are moved to the back of the list (preserving their order among
 *    each other).
 * 4. Loop back to step 2, starting with the new "first job in the list".
 */
static void _cycle_job_list(struct gs_part *p_ptr)
{
	int i, j;
	struct gs_job *j_ptr;

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: entering _cycle_job_list");
	/* re-prioritize the job_list and set all row_states to GS_NO_ACTIVE */
	for (i = 0; i < p_ptr->num_jobs; i++) {
		while (p_ptr->job_list[i]->row_state == GS_ACTIVE) {
			/* move this job to the back row and "deactivate" it */
			j_ptr = p_ptr->job_list[i];
			j_ptr->row_state = GS_NO_ACTIVE;
			for (j = i; j+1 < p_ptr->num_jobs; j++) {
				p_ptr->job_list[j] = p_ptr->job_list[j+1];
			}
			p_ptr->job_list[j] = j_ptr;
		}
		if (p_ptr->job_list[i]->row_state == GS_FILLER)
			p_ptr->job_list[i]->row_state = GS_NO_ACTIVE;

	}
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: _cycle_job_list reordered job list:");
	/* Rebuild the active row. */
	_build_active_row(p_ptr);
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: _cycle_job_list new active job list:");
	_print_jobs(p_ptr);

	/* Suspend running jobs that are GS_NO_ACTIVE */
	for (i = 0; i < p_ptr->num_jobs; i++) {
		j_ptr = p_ptr->job_list[i];
		if ((j_ptr->row_state == GS_NO_ACTIVE) &&
		    (j_ptr->sig_state == GS_RESUME)) {
			if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
		    		info("gang: _cycle_job_list: suspending job %u",
				     j_ptr->job_id);
			}
			if (p_ptr->num_shadows &&
			    (slurm_job_preempt_mode(j_ptr->job_ptr) !=
			     PREEMPT_MODE_SUSPEND)) {
				_preempt_job_queue(j_ptr->job_id);
			} else
				_suspend_job(j_ptr->job_id);
			j_ptr->sig_state = GS_SUSPEND;
			_clear_shadow(j_ptr);
		}
	}

	/* Resume suspended jobs that are GS_ACTIVE */
	for (i = 0; i < p_ptr->num_jobs; i++) {
		j_ptr = p_ptr->job_list[i];
		if ((j_ptr->row_state == GS_ACTIVE) &&
		    (j_ptr->sig_state == GS_SUSPEND) &&
		    (j_ptr->job_ptr->priority != 0)) {	/* Redundant check */
			if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
		    		info("gang: _cycle_job_list: resuming job %u",
				     j_ptr->job_id);
			}
			_resume_job(j_ptr->job_id);
			j_ptr->sig_state = GS_RESUME;
			_cast_shadow(j_ptr, p_ptr->priority);
		}
	}
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: leaving _cycle_job_list");
}
Пример #7
0
/* rebuild data structures from scratch
 *
 * A reconfigure can affect this plugin in these ways:
 * - partitions can be added or removed
 *   - this affects the gs_part_list
 * - nodes can be removed from a partition, or added to a partition
 *   - this affects the size of the active resmap
 *
 * Here's the plan:
 * 1. save a copy of the global structures, and then construct
 *    new ones.
 * 2. load the new partition structures with existing jobs,
 *    confirming the job exists and resizing their resmaps
 *    (if necessary).
 * 3. make sure all partitions are accounted for. If a partition
 *    was removed, make sure any jobs that were in the queue and
 *    that were suspended are resumed. Conversely, if a partition
 *    was added, check for existing jobs that may be contending
 *    for resources that we could begin timeslicing.
 * 4. delete the old global structures and return.
 */
extern int gs_reconfig(void)
{
	int i;
	ListIterator part_iterator;
	struct gs_part *p_ptr, *newp_ptr;
	List old_part_list;
	struct job_record *job_ptr;
	struct gs_job *j_ptr;

	if (!timeslicer_thread_id) {
		/* gs_init() will be called later from read_slurm_conf()
		 * if we are enabling gang scheduling via reconfiguration */
		return SLURM_SUCCESS;
	}

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: entering gs_reconfig");
	pthread_mutex_lock(&data_mutex);

	old_part_list = gs_part_list;
	gs_part_list = NULL;

	/* reset global data */
	gs_fast_schedule = slurm_get_fast_schedule();
	gr_type = _get_gr_type();
	_load_phys_res_cnt();
	_build_parts();

	/* scan the old part list and add existing jobs to the new list */
	part_iterator = list_iterator_create(old_part_list);
	while ((p_ptr = (struct gs_part *) list_next(part_iterator))) {
		newp_ptr = (struct gs_part *) list_find_first(gs_part_list,
							      _find_gs_part,
							      p_ptr->part_name);
		if (!newp_ptr) {
			/* this partition was removed, so resume
			 * any jobs suspended by gang and continue */
			for (i = 0; i < p_ptr->num_jobs; i++) {
				j_ptr = p_ptr->job_list[i];
				if ((j_ptr->sig_state == GS_SUSPEND) &&
				    (j_ptr->job_ptr->priority != 0)) {
					info("resuming job in missing part %s",
					     p_ptr->part_name);
					_resume_job(j_ptr->job_id);
					j_ptr->sig_state = GS_RESUME;
				}
			}
			continue;
		}
		if (p_ptr->num_jobs == 0)
			/* no jobs to transfer */
			continue;
		/* we need to transfer the jobs from p_ptr to new_ptr and
		 * adjust their resmaps (if necessary). then we need to create
		 * the active resmap and adjust the state of each job (if
		 * necessary). NOTE: there could be jobs that only overlap
		 * on nodes that are no longer in the partition, but we're
		 * not going to worry about those cases.
		 *
		 * add the jobs from p_ptr into new_ptr in their current order
		 * to preserve the state of timeslicing.
		 */
		for (i = 0; i < p_ptr->num_jobs; i++) {
			job_ptr = find_job_record(p_ptr->job_list[i]->job_id);
			if (job_ptr == NULL) {
				/* job no longer exists in SLURM, so drop it */
				continue;
			}
			/* resume any job that is suspended by us */
			if (IS_JOB_SUSPENDED(job_ptr) && job_ptr->priority) {
				if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG){
					info("resuming job %u apparently "
					     "suspended by gang",
					     job_ptr->job_id);
				}
				_resume_job(job_ptr->job_id);
			}

			/* transfer the job as long as it is still active */
			if (IS_JOB_SUSPENDED(job_ptr) ||
			    IS_JOB_RUNNING(job_ptr)) {
				_add_job_to_part(newp_ptr, job_ptr);
			}
		}
	}
	list_iterator_destroy(part_iterator);

	/* confirm all jobs. Scan the master job_list and confirm that we
	 * are tracking all jobs */
	_scan_slurm_job_list();

	FREE_NULL_LIST(old_part_list);
	pthread_mutex_unlock(&data_mutex);

	_preempt_job_dequeue();	/* MUST BE OUTSIDE OF data_mutex lock */
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: leaving gs_reconfig");

	return SLURM_SUCCESS;
}
Пример #8
0
/* ensure that all jobs running in SLURM are accounted for.
 * this procedure assumes that the gs data has already been
 * locked by the caller!
 */
static void _scan_slurm_job_list(void)
{
	struct job_record *job_ptr;
	struct gs_part *p_ptr;
	int i;
	ListIterator job_iterator;
	char *part_name;

	if (!job_list) {	/* no jobs */
		if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
			info("gang: _scan_slurm_job_list: job_list NULL");
		return;
	}
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: _scan_slurm_job_list: job_list exists...");
	job_iterator = list_iterator_create(job_list);
	while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
		if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
			info("gang: _scan_slurm_job_list: checking job %u",
			    job_ptr->job_id);
		}
		if (IS_JOB_PENDING(job_ptr))
			continue;
		if (IS_JOB_SUSPENDED(job_ptr) && (job_ptr->priority == 0))
			continue;	/* not suspended by us */

		if (job_ptr->part_ptr && job_ptr->part_ptr->name)
			part_name = job_ptr->part_ptr->name;
		else
			part_name = job_ptr->partition;

		if (IS_JOB_SUSPENDED(job_ptr) || IS_JOB_RUNNING(job_ptr)) {
			/* are we tracking this job already? */
			p_ptr = list_find_first(gs_part_list, _find_gs_part,
						part_name);
			if (!p_ptr) /* no partition */
				continue;
			i = _find_job_index(p_ptr, job_ptr->job_id);
			if (i >= 0) /* we're tracking it, so continue */
				continue;

			/* We're not tracking this job. Resume it if it's
			 * suspended, and then add it to the job list. */

			if (IS_JOB_SUSPENDED(job_ptr) && job_ptr->priority) {
				/* The likely scenario here is that the
				 * failed over, and this is a job that gang
				 * had previously suspended. It's not possible
				 * to determine the previous order of jobs
				 * without preserving gang state, which is not
				 * worth the extra infrastructure. Just resume
				 * the job and then add it to the job list.
				 */
				_resume_job(job_ptr->job_id);
			}

			_add_job_to_part(p_ptr, job_ptr);
			continue;
		}

		/* if the job is not pending, suspended, or running, then
		 * it's completing or completed. Make sure we've released
		 * this job */
		p_ptr = list_find_first(gs_part_list, _find_gs_part, part_name);
		if (!p_ptr) /* no partition */
			continue;
		_remove_job_from_part(job_ptr->job_id, p_ptr, false);
	}
	list_iterator_destroy(job_iterator);

	/* now that all of the old jobs have been flushed out,
	 * update the active row of all partitions */
	_update_all_active_rows();

	return;
}