Beispiel #1
0
/* builtin_agent - detached thread periodically when pending jobs can start */
extern void *builtin_agent(void *args)
{
	time_t now;
	double wait_time;
	static time_t last_sched_time = 0;
	/* Read config, nodes and partitions; Write jobs */
	slurmctld_lock_t all_locks = {
		READ_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK, READ_LOCK };

	_load_config();
	last_sched_time = time(NULL);
	while (!stop_builtin) {
		_my_sleep(builtin_interval);
		if (stop_builtin)
			break;
		if (config_flag) {
			config_flag = false;
			_load_config();
		}
		now = time(NULL);
		wait_time = difftime(now, last_sched_time);
		if ((wait_time < builtin_interval))
			continue;

		lock_slurmctld(all_locks);
		_compute_start_times();
		last_sched_time = time(NULL);
		(void) bb_g_job_try_stage_in();
		unlock_slurmctld(all_locks);
	}
	return NULL;
}
Beispiel #2
0
/* backfill_agent - detached thread periodically attempts to backfill jobs */
extern void *backfill_agent(void *args)
{
	time_t now;
	double wait_time;
	static time_t last_backfill_time = 0;
	/* Read config and partitions; Write jobs and nodes */
	slurmctld_lock_t all_locks = {
		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };

	_load_config();
	last_backfill_time = time(NULL);
	while (!stop_backfill) {
		_my_sleep(backfill_interval);
		if (stop_backfill)
			break;
		if (config_flag) {
			config_flag = false;
			_load_config();
		}
		now = time(NULL);
		wait_time = difftime(now, last_backfill_time);
		if ((wait_time < backfill_interval) ||
		    _job_is_completing() || _many_pending_rpcs() ||
		    !avail_front_end(NULL) || !_more_work(last_backfill_time))
			continue;

		lock_slurmctld(all_locks);
		while (_attempt_backfill()) ;
		last_backfill_time = time(NULL);
		unlock_slurmctld(all_locks);
	}
	return NULL;
}
Beispiel #3
0
/* Checkpoint processing pthread
 * Never returns, but is cancelled on plugin termiantion */
static void *_ckpt_agent_thr(void *arg)
{
	ListIterator iter;
	struct ckpt_timeout_info *rec;
	time_t now;

	while (1) {
		_my_sleep(1);
		if (ckpt_agent_stop)
			break;
		if (!ckpt_timeout_list)
			continue;

		now = time(NULL);
		iter = list_iterator_create(ckpt_timeout_list);
		slurm_mutex_lock(&ckpt_agent_mutex);
		/* look for and process any timeouts */
		while ((rec = list_next(iter))) {
			if (rec->end_time > now)
				continue;
			info("checkpoint timeout for %u.%u",
				rec->job_id, rec->step_id);
			_ckpt_signal_step(rec);
			list_delete_item(iter);
		}
		slurm_mutex_unlock(&ckpt_agent_mutex);
		list_iterator_destroy(iter);
	}
	return NULL;
}
Beispiel #4
0
extern void *_sicp_agent(void *args)
{
    static time_t last_sicp_time = 0;
    time_t now;
    double wait_time;

    while (!sicp_stop) {
        _my_sleep(1);
        if (sicp_stop)
            break;

        now = time(NULL);
        wait_time = difftime(now, last_sicp_time);
        if (wait_time < sicp_interval)
            continue;
        last_sicp_time = now;

        _load_sicp_other_cluster();

        pthread_mutex_lock(&sicp_lock);
        list_delete_all(sicp_job_list, &_list_find_sicp_old, "");
        if (slurm_get_debug_flags() & DEBUG_FLAG_SICP)
            _log_sicp_recs();
        pthread_mutex_unlock(&sicp_lock);

        _dump_sicp_state();	/* Has own locking */
    }
    return NULL;
}
Beispiel #5
0
/* backfill_agent - detached thread periodically attempts to backfill jobs */
extern void *backfill_agent(void *args)
{
	struct timeval tv1, tv2;
	char tv_str[20];
	time_t now;
	double wait_time;
	static time_t last_backfill_time = 0;
	/* Read config and partitions; Write jobs and nodes */
	slurmctld_lock_t all_locks = {
		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };

	_load_config();
	last_backfill_time = time(NULL);
	while (!stop_backfill) {
		_my_sleep(backfill_interval);
		if (stop_backfill)
			break;
		if (config_flag) {
			config_flag = false;
			_load_config();
		}
		now = time(NULL);
		wait_time = difftime(now, last_backfill_time);
		if ((wait_time < backfill_interval) ||
		    _job_is_completing() || _many_pending_rpcs() ||
		    !avail_front_end() || !_more_work(last_backfill_time))
			continue;

		gettimeofday(&tv1, NULL);
		lock_slurmctld(all_locks);
		while (_attempt_backfill()) ;
		last_backfill_time = time(NULL);
		unlock_slurmctld(all_locks);
		gettimeofday(&tv2, NULL);
		_diff_tv_str(&tv1, &tv2, tv_str, 20);
		if (debug_flags & DEBUG_FLAG_BACKFILL)
			info("backfill: completed, %s", tv_str);
	}
	return NULL;
}
Beispiel #6
0
/* backfill_agent - detached thread periodically attempts to backfill jobs */
extern void *backfill_agent(void *args)
{
	time_t now;
	double wait_time;
	static time_t last_backfill_time = 0;
	/* Read config and partitions; Write jobs and nodes */
	slurmctld_lock_t all_locks = {
		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };

#if HAVE_SYS_PRCTL_H
	if (prctl(PR_SET_NAME, "slurmctld_bckfl", NULL, NULL, NULL) < 0) {
		error("%s: cannot set my name to %s %m",
		      __func__, "slurm_backfill");
	}
#endif
	_load_config();
	last_backfill_time = time(NULL);
	while (!stop_backfill) {
		_my_sleep(backfill_interval * 1000000);
		if (stop_backfill)
			break;
		if (config_flag) {
			config_flag = false;
			_load_config();
		}
		now = time(NULL);
		wait_time = difftime(now, last_backfill_time);
		if ((wait_time < backfill_interval) ||
		    _job_is_completing() || _many_pending_rpcs() ||
		    !avail_front_end(NULL) || !_more_work(last_backfill_time))
			continue;

		lock_slurmctld(all_locks);
		(void) _attempt_backfill();
		last_backfill_time = time(NULL);
		unlock_slurmctld(all_locks);
	}
	return NULL;
}
Beispiel #7
0
/* Return non-zero to break the backfill loop if change in job, node or
 * partition state or the backfill scheduler needs to be stopped. */
static int _yield_locks(void)
{
	slurmctld_lock_t all_locks = {
		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
	time_t job_update, node_update, part_update;

	job_update  = last_job_update;
	node_update = last_node_update;
	part_update = last_part_update;

	unlock_slurmctld(all_locks);
	_my_sleep(backfill_interval);
	lock_slurmctld(all_locks);

	if ((last_job_update  == job_update)  &&
	    (last_node_update == node_update) &&
	    (last_part_update == part_update) &&
	    (! stop_backfill) && (! config_flag))
		return 0;
	else
		return 1;
}