示例#1
0
static int _attempt_backfill(void)
{
    bool filter_root = false;
    List job_queue;
    job_queue_rec_t *job_queue_rec;
    slurmdb_qos_rec_t *qos_ptr = NULL;
    int i, j, node_space_recs;
    struct job_record *job_ptr;
    struct part_record *part_ptr;
    uint32_t end_time, end_reserve;
    uint32_t time_limit, comp_time_limit, orig_time_limit;
    uint32_t min_nodes, max_nodes, req_nodes;
    bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
    time_t now = time(NULL), sched_start, later_start, start_res;
    node_space_map_t *node_space;
    static int sched_timeout = 0;
    int this_sched_timeout = 0, rc = 0;

    sched_start = now;
    if (sched_timeout == 0) {
        sched_timeout = slurm_get_msg_timeout() / 2;
        sched_timeout = MAX(sched_timeout, 1);
        sched_timeout = MIN(sched_timeout, 10);
    }
    this_sched_timeout = sched_timeout;

#ifdef HAVE_CRAY
    /*
     * Run a Basil Inventory immediately before setting up the schedule
     * plan, to avoid race conditions caused by ALPS node state change.
     * Needs to be done with the node-state lock taken.
     */
    if (select_g_reconfigure()) {
        debug4("backfill: not scheduling due to ALPS");
        return SLURM_SUCCESS;
    }
#endif

    if (slurm_get_root_filter())
        filter_root = true;

    job_queue = build_job_queue(true);
    if (list_count(job_queue) <= 1) {
        debug("backfill: no jobs to backfill");
        list_destroy(job_queue);
        return 0;
    }

    node_space = xmalloc(sizeof(node_space_map_t) *
                         (max_backfill_job_cnt + 3));
    node_space[0].begin_time = sched_start;
    node_space[0].end_time = sched_start + backfill_window;
    node_space[0].avail_bitmap = bit_copy(avail_node_bitmap);
    node_space[0].next = 0;
    node_space_recs = 1;
    if (debug_flags & DEBUG_FLAG_BACKFILL)
        _dump_node_space_table(node_space);

    while ((job_queue_rec = (job_queue_rec_t *)
                            list_pop_bottom(job_queue, sort_job_queue2))) {
        job_ptr  = job_queue_rec->job_ptr;
        part_ptr = job_queue_rec->part_ptr;
        xfree(job_queue_rec);
        if (!IS_JOB_PENDING(job_ptr))
            continue;	/* started in other partition */
        job_ptr->part_ptr = part_ptr;

        if (debug_flags & DEBUG_FLAG_BACKFILL)
            info("backfill test for job %u", job_ptr->job_id);

        if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
                (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
                (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT) ||
                (job_ptr->state_reason == WAIT_QOS_JOB_LIMIT) ||
                (job_ptr->state_reason == WAIT_QOS_RESOURCE_LIMIT) ||
                (job_ptr->state_reason == WAIT_QOS_TIME_LIMIT) ||
                !acct_policy_job_runnable(job_ptr)) {
            debug2("backfill: job %u is not allowed to run now. "
                   "Skipping it. State=%s. Reason=%s. Priority=%u",
                   job_ptr->job_id,
                   job_state_string(job_ptr->job_state),
                   job_reason_string(job_ptr->state_reason),
                   job_ptr->priority);
            continue;
        }

        if (((part_ptr->state_up & PARTITION_SCHED) == 0) ||
                (part_ptr->node_bitmap == NULL))
            continue;
        if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && filter_root)
            continue;

        if ((!job_independent(job_ptr, 0)) ||
                (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS))
            continue;

        /* Determine minimum and maximum node counts */
        min_nodes = MAX(job_ptr->details->min_nodes,
                        part_ptr->min_nodes);
        if (job_ptr->details->max_nodes == 0)
            max_nodes = part_ptr->max_nodes;
        else
            max_nodes = MIN(job_ptr->details->max_nodes,
                            part_ptr->max_nodes);
        max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */
        if (job_ptr->details->max_nodes)
            req_nodes = max_nodes;
        else
            req_nodes = min_nodes;
        if (min_nodes > max_nodes) {
            /* job's min_nodes exceeds partition's max_nodes */
            continue;
        }

        /* Determine job's expected completion time */
        if (job_ptr->time_limit == NO_VAL) {
            if (part_ptr->max_time == INFINITE)
                time_limit = 365 * 24 * 60; /* one year */
            else
                time_limit = part_ptr->max_time;
        } else {
            if (part_ptr->max_time == INFINITE)
                time_limit = job_ptr->time_limit;
            else
                time_limit = MIN(job_ptr->time_limit,
                                 part_ptr->max_time);
        }
        comp_time_limit = time_limit;
        orig_time_limit = job_ptr->time_limit;
        if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
            time_limit = job_ptr->time_limit = 1;
        else if (job_ptr->time_min && (job_ptr->time_min < time_limit))
            time_limit = job_ptr->time_limit = job_ptr->time_min;

        /* Determine impact of any resource reservations */
        later_start = now;
TRY_LATER:
        FREE_NULL_BITMAP(avail_bitmap);
        start_res   = later_start;
        later_start = 0;
        j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap);
        if (j != SLURM_SUCCESS) {
            job_ptr->time_limit = orig_time_limit;
            continue;
        }
        if (start_res > now)
            end_time = (time_limit * 60) + start_res;
        else
            end_time = (time_limit * 60) + now;

        /* Identify usable nodes for this job */
        bit_and(avail_bitmap, part_ptr->node_bitmap);
        bit_and(avail_bitmap, up_node_bitmap);
        for (j=0; ; ) {
            if ((node_space[j].end_time > start_res) &&
                    node_space[j].next && (later_start == 0))
                later_start = node_space[j].end_time;
            if (node_space[j].end_time <= start_res)
                ;
            else if (node_space[j].begin_time <= end_time) {
                bit_and(avail_bitmap,
                        node_space[j].avail_bitmap);
            } else
                break;
            if ((j = node_space[j].next) == 0)
                break;
        }

        if (job_ptr->details->exc_node_bitmap) {
            bit_not(job_ptr->details->exc_node_bitmap);
            bit_and(avail_bitmap,
                    job_ptr->details->exc_node_bitmap);
            bit_not(job_ptr->details->exc_node_bitmap);
        }

        /* Test if insufficient nodes remain OR
         *	required nodes missing OR
         *	nodes lack features */
        if ((bit_set_count(avail_bitmap) < min_nodes) ||
                ((job_ptr->details->req_node_bitmap) &&
                 (!bit_super_set(job_ptr->details->req_node_bitmap,
                                 avail_bitmap))) ||
                (job_req_node_filter(job_ptr, avail_bitmap))) {
            if (later_start) {
                job_ptr->start_time = 0;
                goto TRY_LATER;
            }
            job_ptr->time_limit = orig_time_limit;
            continue;
        }

        /* Identify nodes which are definitely off limits */
        FREE_NULL_BITMAP(resv_bitmap);
        resv_bitmap = bit_copy(avail_bitmap);
        bit_not(resv_bitmap);

        if ((time(NULL) - sched_start) >= this_sched_timeout) {
            debug("backfill: loop taking too long, yielding locks");
            if (_yield_locks()) {
                debug("backfill: system state changed, "
                      "breaking out");
                rc = 1;
                break;
            } else {
                this_sched_timeout += sched_timeout;
            }
        }
        /* this is the time consuming operation */
        debug2("backfill: entering _try_sched for job %u.",
               job_ptr->job_id);
        j = _try_sched(job_ptr, &avail_bitmap,
                       min_nodes, max_nodes, req_nodes);
        debug2("backfill: finished _try_sched for job %u.",
               job_ptr->job_id);
        now = time(NULL);
        if (j != SLURM_SUCCESS) {
            job_ptr->time_limit = orig_time_limit;
            job_ptr->start_time = 0;
            continue;	/* not runable */
        }

        if (start_res > job_ptr->start_time) {
            job_ptr->start_time = start_res;
            last_job_update = now;
        }
        if (job_ptr->start_time <= now) {
            int rc = _start_job(job_ptr, resv_bitmap);
            if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
                job_ptr->time_limit = orig_time_limit;
            else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) {
                /* Set time limit as high as possible */
                job_ptr->time_limit = comp_time_limit;
                job_ptr->end_time = job_ptr->start_time +
                                    (comp_time_limit * 60);
                _reset_job_time_limit(job_ptr, now,
                                      node_space);
                time_limit = job_ptr->time_limit;
            } else {
                job_ptr->time_limit = orig_time_limit;
            }
            if (rc == ESLURM_ACCOUNTING_POLICY) {
                /* Unknown future start time, just skip job */
                job_ptr->start_time = 0;
                continue;
            } else if (rc != SLURM_SUCCESS) {
                /* Planned to start job, but something bad
                 * happended. */
                job_ptr->start_time = 0;
                break;
            } else {
                /* Started this job, move to next one */
                continue;
            }
        } else
            job_ptr->time_limit = orig_time_limit;

        if (later_start && (job_ptr->start_time > later_start)) {
            /* Try later when some nodes currently reserved for
             * pending jobs are free */
            job_ptr->start_time = 0;
            goto TRY_LATER;
        }

        if (job_ptr->start_time > (sched_start + backfill_window)) {
            /* Starts too far in the future to worry about */
            continue;
        }

        if (node_space_recs >= max_backfill_job_cnt) {
            /* Already have too many jobs to deal with */
            break;
        }

        end_reserve = job_ptr->start_time + (time_limit * 60);
        if (_test_resv_overlap(node_space, avail_bitmap,
                               job_ptr->start_time, end_reserve)) {
            /* This job overlaps with an existing reservation for
             * job to be backfill scheduled, which the sched
             * plugin does not know about. Try again later. */
            later_start = job_ptr->start_time;
            job_ptr->start_time = 0;
            goto TRY_LATER;
        }

        /*
         * Add reservation to scheduling table if appropriate
         */
        qos_ptr = job_ptr->qos_ptr;
        if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
            continue;
        bit_not(avail_bitmap);
        _add_reservation(job_ptr->start_time, end_reserve,
                         avail_bitmap, node_space, &node_space_recs);
        if (debug_flags & DEBUG_FLAG_BACKFILL)
            _dump_node_space_table(node_space);
    }
    FREE_NULL_BITMAP(avail_bitmap);
    FREE_NULL_BITMAP(resv_bitmap);

    for (i=0; ; ) {
        FREE_NULL_BITMAP(node_space[i].avail_bitmap);
        if ((i = node_space[i].next) == 0)
            break;
    }
    xfree(node_space);
    list_destroy(job_queue);
    return rc;
}
示例#2
0
文件: backfill.c 项目: perryh/slurm
static int _attempt_backfill(void)
{
	DEF_TIMERS;
	bool filter_root = false;
	List job_queue;
	job_queue_rec_t *job_queue_rec;
	slurmdb_qos_rec_t *qos_ptr = NULL;
	int i, j, node_space_recs;
	struct job_record *job_ptr;
	struct part_record *part_ptr;
	uint32_t end_time, end_reserve;
	uint32_t time_limit, comp_time_limit, orig_time_limit;
	uint32_t min_nodes, max_nodes, req_nodes;
	bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
	bitstr_t *exc_core_bitmap = NULL;
	time_t now, sched_start, later_start, start_res, resv_end;
	node_space_map_t *node_space;
	struct timeval bf_time1, bf_time2;
	int sched_timeout = 2, yield_sleep = 1;
	int rc = 0;
	int job_test_count = 0;
	uint32_t *uid = NULL, nuser = 0;
	uint16_t *njobs = NULL;
	bool already_counted;
	uint32_t reject_array_job_id = 0;

#ifdef HAVE_CRAY
	/*
	 * Run a Basil Inventory immediately before setting up the schedule
	 * plan, to avoid race conditions caused by ALPS node state change.
	 * Needs to be done with the node-state lock taken.
	 */
	START_TIMER;
	if (select_g_reconfigure()) {
		debug4("backfill: not scheduling due to ALPS");
		return SLURM_SUCCESS;
	}
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_BACKFILL)
		info("backfill: ALPS inventory completed, %s", TIME_STR);

	/* The Basil inventory can take a long time to complete. Process
	 * pending RPCs before starting the backfill scheduling logic */
	_yield_locks(1);
#endif

	START_TIMER;
	if (debug_flags & DEBUG_FLAG_BACKFILL)
		info("backfill: beginning");
	sched_start = now = time(NULL);

	if (slurm_get_root_filter())
		filter_root = true;

	job_queue = build_job_queue(true);
	if (list_count(job_queue) == 0) {
		debug("backfill: no jobs to backfill");
		list_destroy(job_queue);
		return 0;
	}

	gettimeofday(&bf_time1, NULL);

	slurmctld_diag_stats.bf_queue_len = list_count(job_queue);
	slurmctld_diag_stats.bf_queue_len_sum += slurmctld_diag_stats.
						 bf_queue_len;
	slurmctld_diag_stats.bf_last_depth = 0;
	slurmctld_diag_stats.bf_last_depth_try = 0;
	slurmctld_diag_stats.bf_when_last_cycle = now;
	bf_last_yields = 0;
	slurmctld_diag_stats.bf_active = 1;

	node_space = xmalloc(sizeof(node_space_map_t) *
			     (max_backfill_job_cnt + 3));
	node_space[0].begin_time = sched_start;
	node_space[0].end_time = sched_start + backfill_window;
	node_space[0].avail_bitmap = bit_copy(avail_node_bitmap);
	node_space[0].next = 0;
	node_space_recs = 1;
	if (debug_flags & DEBUG_FLAG_BACKFILL)
		_dump_node_space_table(node_space);

	if (max_backfill_job_per_user) {
		uid = xmalloc(BF_MAX_USERS * sizeof(uint32_t));
		njobs = xmalloc(BF_MAX_USERS * sizeof(uint16_t));
	}
	while ((job_queue_rec = (job_queue_rec_t *)
				list_pop_bottom(job_queue, sort_job_queue2))) {
		job_ptr  = job_queue_rec->job_ptr;
		orig_time_limit = job_ptr->time_limit;

		if ((time(NULL) - sched_start) >= sched_timeout) {
			uint32_t save_time_limit = job_ptr->time_limit;
			job_ptr->time_limit = orig_time_limit;
			if (debug_flags & DEBUG_FLAG_BACKFILL) {
				END_TIMER;
				info("backfill: completed yielding locks "
				     "after testing %d jobs, %s",
				     job_test_count, TIME_STR);
			}
			if (_yield_locks(yield_sleep) && !backfill_continue) {
				if (debug_flags & DEBUG_FLAG_BACKFILL) {
					info("backfill: system state changed, "
					     "breaking out after testing %d "
					     "jobs", job_test_count);
				}
				rc = 1;
				break;
			}
			job_ptr->time_limit = save_time_limit;
			/* Reset backfill scheduling timers, resume testing */
			sched_start = time(NULL);
			job_test_count = 0;
			START_TIMER;
		}

		part_ptr = job_queue_rec->part_ptr;
		job_test_count++;

		xfree(job_queue_rec);
		if (!IS_JOB_PENDING(job_ptr))
			continue;	/* started in other partition */
		if (!avail_front_end(job_ptr))
			continue;	/* No available frontend for this job */
		if (job_ptr->array_task_id != (uint16_t) NO_VAL) {
			if (reject_array_job_id == job_ptr->array_job_id)
				continue;  /* already rejected array element */
			/* assume reject whole array for now, clear if OK */
			reject_array_job_id = job_ptr->array_job_id;
		}
		job_ptr->part_ptr = part_ptr;

		if (debug_flags & DEBUG_FLAG_BACKFILL)
			info("backfill test for job %u", job_ptr->job_id);

		slurmctld_diag_stats.bf_last_depth++;
		already_counted = false;

		if (max_backfill_job_per_user) {
			for (j = 0; j < nuser; j++) {
				if (job_ptr->user_id == uid[j]) {
					njobs[j]++;
					if (debug_flags & DEBUG_FLAG_BACKFILL)
						debug("backfill: user %u: "
						      "#jobs %u",
						      uid[j], njobs[j]);
					break;
				}
			}
			if (j == nuser) { /* user not found */
				if (nuser < BF_MAX_USERS) {
					uid[j] = job_ptr->user_id;
					njobs[j] = 1;
					nuser++;
				} else {
					error("backfill: too many users in "
					      "queue. Consider increasing "
					      "BF_MAX_USERS");
				}
				if (debug_flags & DEBUG_FLAG_BACKFILL)
					debug2("backfill: found new user %u. "
					       "Total #users now %u",
					       job_ptr->user_id, nuser);
			} else {
				if (njobs[j] > max_backfill_job_per_user) {
					/* skip job */
					if (debug_flags & DEBUG_FLAG_BACKFILL)
						debug("backfill: have already "
						      "checked %u jobs for "
						      "user %u; skipping "
						      "job %u",
						      max_backfill_job_per_user,
						      job_ptr->user_id,
						      job_ptr->job_id);
					continue;
				}
			}
		}

		if (((part_ptr->state_up & PARTITION_SCHED) == 0) ||
		    (part_ptr->node_bitmap == NULL))
		 	continue;
		if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && filter_root)
			continue;

		if ((!job_independent(job_ptr, 0)) ||
		    (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS))
			continue;

		/* Determine minimum and maximum node counts */
		min_nodes = MAX(job_ptr->details->min_nodes,
				part_ptr->min_nodes);
		if (job_ptr->details->max_nodes == 0)
			max_nodes = part_ptr->max_nodes;
		else
			max_nodes = MIN(job_ptr->details->max_nodes,
					part_ptr->max_nodes);
		max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */
		if (job_ptr->details->max_nodes)
			req_nodes = max_nodes;
		else
			req_nodes = min_nodes;
		if (min_nodes > max_nodes) {
			/* job's min_nodes exceeds partition's max_nodes */
			continue;
		}

		/* Determine job's expected completion time */
		if (job_ptr->time_limit == NO_VAL) {
			if (part_ptr->max_time == INFINITE)
				time_limit = 365 * 24 * 60; /* one year */
			else
				time_limit = part_ptr->max_time;
		} else {
			if (part_ptr->max_time == INFINITE)
				time_limit = job_ptr->time_limit;
			else
				time_limit = MIN(job_ptr->time_limit,
						 part_ptr->max_time);
		}
		comp_time_limit = time_limit;
		qos_ptr = job_ptr->qos_ptr;
		if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE) &&
		    slurm_get_preempt_mode())
			time_limit = job_ptr->time_limit = 1;
		else if (job_ptr->time_min && (job_ptr->time_min < time_limit))
			time_limit = job_ptr->time_limit = job_ptr->time_min;

		/* Determine impact of any resource reservations */
		later_start = now;
 TRY_LATER:
		if ((time(NULL) - sched_start) >= sched_timeout) {
			uint32_t save_time_limit = job_ptr->time_limit;
			job_ptr->time_limit = orig_time_limit;
			if (debug_flags & DEBUG_FLAG_BACKFILL) {
				END_TIMER;
				info("backfill: completed yielding locks 2"
				     "after testing %d jobs, %s",
				     job_test_count, TIME_STR);
			}
			if (_yield_locks(yield_sleep) && !backfill_continue) {
				if (debug_flags & DEBUG_FLAG_BACKFILL) {
					info("backfill: system state changed, "
					     "breaking out after testing %d "
					     "jobs", job_test_count);
				}
				rc = 1;
				break;
			}
			job_ptr->time_limit = save_time_limit;
			/* Reset backfill scheduling timers, resume testing */
			sched_start = time(NULL);
			job_test_count = 1;
			START_TIMER;
		}

		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		start_res   = later_start;
		later_start = 0;
		j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap,
				  &exc_core_bitmap);
		if (j != SLURM_SUCCESS) {
			job_ptr->time_limit = orig_time_limit;
			continue;
		}
		if (start_res > now)
			end_time = (time_limit * 60) + start_res;
		else
			end_time = (time_limit * 60) + now;
		resv_end = find_resv_end(start_res);
		/* Identify usable nodes for this job */
		bit_and(avail_bitmap, part_ptr->node_bitmap);
		bit_and(avail_bitmap, up_node_bitmap);
		for (j=0; ; ) {
			if ((node_space[j].end_time > start_res) &&
			     node_space[j].next && (later_start == 0))
				later_start = node_space[j].end_time;
			if (node_space[j].end_time <= start_res)
				;
			else if (node_space[j].begin_time <= end_time) {
				bit_and(avail_bitmap,
					node_space[j].avail_bitmap);
			} else
				break;
			if ((j = node_space[j].next) == 0)
				break;
		}
		if ((resv_end++) &&
		    ((later_start == 0) || (resv_end < later_start))) {
			later_start = resv_end;
		}

		if (job_ptr->details->exc_node_bitmap) {
			bit_not(job_ptr->details->exc_node_bitmap);
			bit_and(avail_bitmap,
				job_ptr->details->exc_node_bitmap);
			bit_not(job_ptr->details->exc_node_bitmap);
		}

		/* Test if insufficient nodes remain OR
		 *	required nodes missing OR
		 *	nodes lack features */
		if ((bit_set_count(avail_bitmap) < min_nodes) ||
		    ((job_ptr->details->req_node_bitmap) &&
		     (!bit_super_set(job_ptr->details->req_node_bitmap,
				     avail_bitmap))) ||
		    (job_req_node_filter(job_ptr, avail_bitmap))) {
			if (later_start) {
				job_ptr->start_time = 0;
				goto TRY_LATER;
			}
			/* Job can not start until too far in the future */
			job_ptr->time_limit = orig_time_limit;
			job_ptr->start_time = sched_start + backfill_window;
			continue;
		}

		/* Identify nodes which are definitely off limits */
		FREE_NULL_BITMAP(resv_bitmap);
		resv_bitmap = bit_copy(avail_bitmap);
		bit_not(resv_bitmap);

		/* this is the time consuming operation */
		debug2("backfill: entering _try_sched for job %u.",
		       job_ptr->job_id);

		if (!already_counted) {
			slurmctld_diag_stats.bf_last_depth_try++;
			already_counted = true;
		}

		j = _try_sched(job_ptr, &avail_bitmap, min_nodes, max_nodes,
			       req_nodes, exc_core_bitmap);

		now = time(NULL);
		if (j != SLURM_SUCCESS) {
			job_ptr->time_limit = orig_time_limit;
			job_ptr->start_time = 0;
			continue;	/* not runable */
		}

		if (start_res > job_ptr->start_time) {
			job_ptr->start_time = start_res;
			last_job_update = now;
		}
		if (job_ptr->start_time <= now) {
			uint32_t save_time_limit = job_ptr->time_limit;
			int rc = _start_job(job_ptr, resv_bitmap);
			if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) {
				if (orig_time_limit == NO_VAL)
					job_ptr->time_limit = comp_time_limit;
				else
					job_ptr->time_limit = orig_time_limit;
				job_ptr->end_time = job_ptr->start_time +
						    (job_ptr->time_limit * 60);
			} else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) {
				/* Set time limit as high as possible */
				job_ptr->time_limit = comp_time_limit;
				job_ptr->end_time = job_ptr->start_time +
						    (comp_time_limit * 60);
				_reset_job_time_limit(job_ptr, now,
						      node_space);
				time_limit = job_ptr->time_limit;
			} else {
				job_ptr->time_limit = orig_time_limit;
			}
			if (rc == ESLURM_ACCOUNTING_POLICY) {
				/* Unknown future start time, just skip job */
				job_ptr->start_time = 0;
				continue;
			} else if (rc != SLURM_SUCCESS) {
				/* Planned to start job, but something bad
				 * happended. */
				job_ptr->start_time = 0;
				break;
			} else {
				/* Started this job, move to next one */
				reject_array_job_id = 0;

				/* Update the database if job time limit
				 * changed and move to next job */
				if (save_time_limit != job_ptr->time_limit)
					jobacct_storage_g_job_start(acct_db_conn,
								    job_ptr);
				continue;
			}
		} else
			job_ptr->time_limit = orig_time_limit;

		if (later_start && (job_ptr->start_time > later_start)) {
			/* Try later when some nodes currently reserved for
			 * pending jobs are free */
			job_ptr->start_time = 0;
			goto TRY_LATER;
		}

		if (job_ptr->start_time > (sched_start + backfill_window)) {
			/* Starts too far in the future to worry about */
			continue;
		}

		if (node_space_recs >= max_backfill_job_cnt) {
			/* Already have too many jobs to deal with */
			break;
		}

		end_reserve = job_ptr->start_time + (time_limit * 60);
		if (_test_resv_overlap(node_space, avail_bitmap,
				       job_ptr->start_time, end_reserve)) {
			/* This job overlaps with an existing reservation for
			 * job to be backfill scheduled, which the sched
			 * plugin does not know about. Try again later. */
			later_start = job_ptr->start_time;
			job_ptr->start_time = 0;
			goto TRY_LATER;
		}

		/*
		 * Add reservation to scheduling table if appropriate
		 */
		if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
			continue;
		reject_array_job_id = 0;
		bit_not(avail_bitmap);
		_add_reservation(job_ptr->start_time, end_reserve,
				 avail_bitmap, node_space, &node_space_recs);
		if (debug_flags & DEBUG_FLAG_BACKFILL)
			_dump_node_space_table(node_space);
	}
	xfree(uid);
	xfree(njobs);
	FREE_NULL_BITMAP(avail_bitmap);
	FREE_NULL_BITMAP(exc_core_bitmap);
	FREE_NULL_BITMAP(resv_bitmap);

	for (i=0; ; ) {
		FREE_NULL_BITMAP(node_space[i].avail_bitmap);
		if ((i = node_space[i].next) == 0)
			break;
	}
	xfree(node_space);
	list_destroy(job_queue);
	gettimeofday(&bf_time2, NULL);
	_do_diag_stats(&bf_time1, &bf_time2, yield_sleep);
	if (debug_flags & DEBUG_FLAG_BACKFILL) {
		END_TIMER;
		info("backfill: completed testing %d jobs, %s",
		     job_test_count, TIME_STR);
	}
	return rc;
}
示例#3
0
文件: builtin.c 项目: VURM/slurm
static void _compute_start_times(void)
{
	int j, rc = SLURM_SUCCESS, job_cnt = 0;
	List job_queue;
	job_queue_rec_t *job_queue_rec;
	List preemptee_candidates = NULL;
	struct job_record *job_ptr;
	struct part_record *part_ptr;
	bitstr_t *alloc_bitmap = NULL, *avail_bitmap = NULL;
	uint32_t max_nodes, min_nodes, req_nodes, time_limit;
	time_t now = time(NULL), sched_start, last_job_alloc;

	sched_start = now;
	last_job_alloc = now - 1;
	alloc_bitmap = bit_alloc(node_record_count);
	if (alloc_bitmap == NULL)
		fatal("bit_alloc: malloc failure");
	job_queue = build_job_queue(true);
	while ((job_queue_rec = (job_queue_rec_t *) 
				list_pop_bottom(job_queue, sort_job_queue2))) {
		job_ptr  = job_queue_rec->job_ptr;
		part_ptr = job_queue_rec->part_ptr;
		xfree(job_queue_rec);
		if (part_ptr != job_ptr->part_ptr)
			continue;	/* Only test one partition */

		if (job_cnt++ > max_backfill_job_cnt) {
			debug("backfill: loop taking to long, breaking out");
			break;
		}

		/* Determine minimum and maximum node counts */
		min_nodes = MAX(job_ptr->details->min_nodes,
				part_ptr->min_nodes);

		if (job_ptr->details->max_nodes == 0)
			max_nodes = part_ptr->max_nodes;
		else
			max_nodes = MIN(job_ptr->details->max_nodes,
					part_ptr->max_nodes);

		max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */

		if (job_ptr->details->max_nodes)
			req_nodes = max_nodes;
		else
			req_nodes = min_nodes;

		if (min_nodes > max_nodes) {
			/* job's min_nodes exceeds partition's max_nodes */
			continue;
		}

		j = job_test_resv(job_ptr, &now, true, &avail_bitmap);
		if (j != SLURM_SUCCESS)
			continue;

		rc = select_g_job_test(job_ptr, avail_bitmap,
				       min_nodes, max_nodes, req_nodes,
				       SELECT_MODE_WILL_RUN,
				       preemptee_candidates, NULL);
		last_job_update = now;

		if (job_ptr->time_limit == INFINITE)
			time_limit = 365 * 24 * 60 * 60;
		else if (job_ptr->time_limit != NO_VAL)
			time_limit = job_ptr->time_limit * 60;
		else if (job_ptr->part_ptr &&
			 (job_ptr->part_ptr->max_time != INFINITE))
			time_limit = job_ptr->part_ptr->max_time * 60;
		else
			time_limit = 365 * 24 * 60 * 60;
		if (bit_overlap(alloc_bitmap, avail_bitmap) &&
		    (job_ptr->start_time <= last_job_alloc)) {
			job_ptr->start_time = last_job_alloc;
		}
		bit_or(alloc_bitmap, avail_bitmap);
		last_job_alloc = job_ptr->start_time + time_limit;
		FREE_NULL_BITMAP(avail_bitmap);

		if ((time(NULL) - sched_start) >= sched_timeout) {
			debug("backfill: loop taking to long, breaking out");
			break;
		}
	}
	list_destroy(job_queue);
	FREE_NULL_BITMAP(alloc_bitmap);
}