/* Attempt to schedule a specific job on specific available nodes * IN job_ptr - job to schedule * IN/OUT avail_bitmap - nodes available/selected to use * IN exc_core_bitmap - cores which can not be used * RET SLURM_SUCCESS on success, otherwise an error code */ static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, bitstr_t *exc_core_bitmap) { bitstr_t *tmp_bitmap; int rc = SLURM_SUCCESS; int feat_cnt = _num_feature_count(job_ptr); List preemptee_candidates = NULL; if (feat_cnt) { /* Ideally schedule the job feature by feature, * but I don't want to add that complexity here * right now, so clear the feature counts and try * to schedule. This will work if there is only * one feature count. It should work fairly well * in cases where there are multiple feature * counts. */ struct job_details *detail_ptr = job_ptr->details; ListIterator feat_iter; struct feature_record *feat_ptr; int i = 0, list_size; uint16_t *feat_cnt_orig = NULL, high_cnt = 0; /* Clear the feature counts */ list_size = list_count(detail_ptr->feature_list); feat_cnt_orig = xmalloc(sizeof(uint16_t) * list_size); feat_iter = list_iterator_create(detail_ptr->feature_list); while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) { high_cnt = MAX(high_cnt, feat_ptr->count); feat_cnt_orig[i++] = feat_ptr->count; feat_ptr->count = 0; } list_iterator_destroy(feat_iter); if ((job_req_node_filter(job_ptr, *avail_bitmap) != SLURM_SUCCESS) || (bit_set_count(*avail_bitmap) < high_cnt)) { rc = ESLURM_NODES_BUSY; } else { preemptee_candidates = slurm_find_preemptable_jobs(job_ptr); rc = select_g_job_test(job_ptr, *avail_bitmap, high_cnt, max_nodes, req_nodes, SELECT_MODE_WILL_RUN, preemptee_candidates, NULL, exc_core_bitmap); } /* Restore the feature counts */ i = 0; feat_iter = list_iterator_create(detail_ptr->feature_list); while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) { feat_ptr->count = feat_cnt_orig[i++]; } list_iterator_destroy(feat_iter); xfree(feat_cnt_orig); } else { /* Try to schedule the job. First on dedicated nodes * then on shared nodes (if so configured). */ uint16_t orig_shared; time_t now = time(NULL); char str[100]; preemptee_candidates = slurm_find_preemptable_jobs(job_ptr); orig_shared = job_ptr->details->shared; job_ptr->details->shared = 0; tmp_bitmap = bit_copy(*avail_bitmap); if (exc_core_bitmap) { bit_fmt(str, (sizeof(str) - 1), exc_core_bitmap); debug2(" _try_sched with exclude core bitmap: %s",str); } rc = select_g_job_test(job_ptr, *avail_bitmap, min_nodes, max_nodes, req_nodes, SELECT_MODE_WILL_RUN, preemptee_candidates, NULL, exc_core_bitmap); job_ptr->details->shared = orig_shared; if (((rc != SLURM_SUCCESS) || (job_ptr->start_time > now)) && (orig_shared != 0)) { FREE_NULL_BITMAP(*avail_bitmap); *avail_bitmap= tmp_bitmap; rc = select_g_job_test(job_ptr, *avail_bitmap, min_nodes, max_nodes, req_nodes, SELECT_MODE_WILL_RUN, preemptee_candidates, NULL, exc_core_bitmap); } else FREE_NULL_BITMAP(tmp_bitmap); } if (preemptee_candidates) list_destroy(preemptee_candidates); return rc; }
static char * _will_run_test(uint32_t jobid, time_t start_time, char *node_list, int *err_code, char **err_msg) { struct job_record *job_ptr = NULL; struct part_record *part_ptr; bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL; bitstr_t *exc_core_bitmap = NULL; char *hostlist, *reply_msg = NULL; uint32_t min_nodes, max_nodes, req_nodes; int rc; time_t start_res, orig_start_time; List preemptee_candidates; bool resv_overlap = false; debug2("wiki2: will_run job_id=%u start_time=%u node_list=%s", jobid, (uint32_t)start_time, node_list); job_ptr = find_job_record(jobid); if (job_ptr == NULL) { *err_code = -700; *err_msg = "No such job"; error("wiki: Failed to find job %u", jobid); return NULL; } if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) { *err_code = -700; *err_msg = "WillRun not applicable to non-pending job"; error("wiki: WillRun on non-pending job %u", jobid); return NULL; } part_ptr = job_ptr->part_ptr; if (part_ptr == NULL) { *err_code = -700; *err_msg = "Job lacks a partition"; error("wiki: Job %u lacks a partition", jobid); return NULL; } if ((node_list == NULL) || (node_list[0] == '\0')) { /* assume all nodes available to job for testing */ avail_bitmap = bit_copy(avail_node_bitmap); } else if (node_name2bitmap(node_list, false, &avail_bitmap) != 0) { *err_code = -700; *err_msg = "Invalid available nodes value"; error("wiki: Attempt to set invalid available node " "list for job %u, %s", jobid, node_list); return NULL; } /* Enforce reservation: access control, time and nodes */ start_res = start_time; rc = job_test_resv(job_ptr, &start_res, true, &resv_bitmap, &exc_core_bitmap, &resv_overlap); if (rc != SLURM_SUCCESS) { *err_code = -730; *err_msg = "Job denied access to reservation"; error("wiki: reservation access denied for job %u", jobid); FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(exc_core_bitmap); return NULL; } bit_and(avail_bitmap, resv_bitmap); FREE_NULL_BITMAP(resv_bitmap); /* Only consider nodes that are not DOWN or DRAINED */ bit_and(avail_bitmap, avail_node_bitmap); /* Consider only nodes in this job's partition */ if (part_ptr->node_bitmap) bit_and(avail_bitmap, part_ptr->node_bitmap); else { *err_code = -730; *err_msg = "Job's partition has no nodes"; error("wiki: no nodes in partition %s for job %u", part_ptr->name, jobid); FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(exc_core_bitmap); return NULL; } if (job_req_node_filter(job_ptr, avail_bitmap) != SLURM_SUCCESS) { /* Job probably has invalid feature list */ *err_code = -730; *err_msg = "Job's required features not available " "on selected nodes"; error("wiki: job %u not runnable on hosts=%s", jobid, node_list); FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(exc_core_bitmap); return NULL; } if (job_ptr->details->exc_node_bitmap) { bit_not(job_ptr->details->exc_node_bitmap); bit_and(avail_bitmap, job_ptr->details->exc_node_bitmap); bit_not(job_ptr->details->exc_node_bitmap); } if ((job_ptr->details->req_node_bitmap) && (!bit_super_set(job_ptr->details->req_node_bitmap, avail_bitmap))) { *err_code = -730; *err_msg = "Job's required nodes not available"; error("wiki: job %u not runnable on hosts=%s", jobid, node_list); FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(exc_core_bitmap); return NULL; } min_nodes = MAX(job_ptr->details->min_nodes, part_ptr->min_nodes); if (job_ptr->details->max_nodes == 0) max_nodes = part_ptr->max_nodes; else max_nodes = MIN(job_ptr->details->max_nodes, part_ptr->max_nodes); max_nodes = MIN(max_nodes, 500000); /* prevent overflows */ if (job_ptr->details->max_nodes) req_nodes = max_nodes; else req_nodes = min_nodes; if (min_nodes > max_nodes) { /* job's min_nodes exceeds partitions max_nodes */ *err_code = -730; *err_msg = "Job's min_nodes > max_nodes"; error("wiki: job %u not runnable on hosts=%s", jobid, node_list); FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(exc_core_bitmap); return NULL; } preemptee_candidates = slurm_find_preemptable_jobs(job_ptr); orig_start_time = job_ptr->start_time; rc = select_g_job_test(job_ptr, avail_bitmap, min_nodes, max_nodes, req_nodes, SELECT_MODE_WILL_RUN, preemptee_candidates, NULL, exc_core_bitmap); FREE_NULL_LIST(preemptee_candidates); if (rc == SLURM_SUCCESS) { char tmp_str[128]; *err_code = 0; uint32_t proc_cnt = 0; xstrcat(reply_msg, "STARTINFO="); #ifdef HAVE_BG select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &proc_cnt); #else proc_cnt = job_ptr->total_cpus; #endif snprintf(tmp_str, sizeof(tmp_str), "%u:%u@%u,", jobid, proc_cnt, (uint32_t) job_ptr->start_time); xstrcat(reply_msg, tmp_str); hostlist = bitmap2node_name(avail_bitmap); xstrcat(reply_msg, hostlist); xfree(hostlist); } else { xstrcat(reply_msg, "Jobs not runable on selected nodes"); error("wiki: jobs not runnable on nodes"); } /* Restore pending job's expected start time */ job_ptr->start_time = orig_start_time; FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(exc_core_bitmap); return reply_msg; }