/* * _can_job_run_on_node - Given the job requirements, determine which * resources from the given node (if any) can be * allocated to this job. Returns the number of * cpus that can be used by this node and a bitmap * of available resources for allocation. * NOTE: This process does NOT support overcommitting resources * * IN job_ptr - pointer to job requirements * IN/OUT core_map - core_bitmap of available cores * IN n - index of node to be evaluated * IN cr_type - Consumable Resource setting * IN test_only - ignore allocated memory check * * NOTE: The returned cpu_count may be less than the number of set bits in * core_map for the given node. The cr_dist functions will determine * which bits to deselect from the core_map to match the cpu_count. */ uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map, const uint32_t node_i, struct node_use_record *node_usage, uint16_t cr_type, bool test_only) { uint16_t cpus; uint32_t avail_mem, req_mem, gres_cpus, gres_cores, cpus_per_core; int core_start_bit, core_end_bit; struct node_record *node_ptr = node_record_table_ptr + node_i; List gres_list; if (!test_only && IS_NODE_COMPLETING(node_ptr)) { /* Do not allocate more jobs to nodes with completing jobs */ cpus = 0; return cpus; } cpus = _allocate_cores(job_ptr, core_map, node_i); core_start_bit = cr_get_coremap_offset(node_i); core_end_bit = cr_get_coremap_offset(node_i + 1) - 1; node_ptr = select_node_record[node_i].node_ptr; cpus_per_core = select_node_record[node_i].cpus / (core_end_bit - core_start_bit + 1); if (node_usage[node_i].gres_list) gres_list = node_usage[node_i].gres_list; else gres_list = node_ptr->gres_list; gres_plugin_job_core_filter(job_ptr->gres_list, gres_list, test_only, core_map, core_start_bit, core_end_bit, node_ptr->name); if ((cr_type & CR_MEMORY) && cpus) { req_mem = job_ptr->details->pn_min_memory & ~MEM_PER_CPU; avail_mem = select_node_record[node_i].real_memory; if (!test_only) avail_mem -= node_usage[node_i].alloc_memory; if (req_mem > avail_mem) cpus = 0; } gres_cores = gres_plugin_job_test(job_ptr->gres_list, gres_list, test_only, core_map, core_start_bit, core_end_bit, job_ptr->job_id, node_ptr->name); gres_cpus = gres_cores; if (gres_cpus != NO_VAL) gres_cpus *= cpus_per_core; if ((gres_cpus < job_ptr->details->ntasks_per_node) || ((job_ptr->details->cpus_per_task > 1) && (gres_cpus < job_ptr->details->cpus_per_task))) gres_cpus = 0; if (gres_cpus < cpus) cpus = gres_cpus; if (cpus == 0) bit_nclear(core_map, core_start_bit, core_end_bit); if (select_debug_flags & DEBUG_FLAG_SELECT_TYPE) { info("select/serial: _can_job_run_on_node: %u cpus on %s(%d), " "mem %u/%u", cpus, select_node_record[node_i].node_ptr->name, node_usage[node_i].node_state, node_usage[node_i].alloc_memory, select_node_record[node_i].real_memory); } return cpus; }
/* * main - slurmctld main function, start various threads and process RPCs * test7.17.prog <TRES_PER_NODE> <CONFIG_DIR_HEAD> <CONFIG_SUB_DIR> <CPU_COUNT> * */ int main(int argc, char *argv[]) { log_options_t opts = LOG_OPTS_STDERR_ONLY; int rc; uint32_t cpu_count, cpu_alloc, job_id = 12345; char *node_name, *reason_down = NULL; char *orig_config, *new_config = NULL, *tres_per_node = NULL; Buf buffer; List job_gres_list = NULL, node_gres_list = NULL; bitstr_t *cpu_bitmap; char config_dir[10000], test[1000]; char slurm_conf[1000]; uint32_t num_tasks = 1; uint32_t min_nodes = 1; uint32_t max_nodes = 1; uint16_t ntasks_per_node = NO_VAL16; uint16_t ntasks_per_socket = NO_VAL16; uint16_t sockets_per_node = NO_VAL16; uint16_t cpus_per_task = NO_VAL16; int core_count, sock_count; /* Setup slurm.conf and gres.conf test paths */ strcpy(config_dir, argv[2]); strcpy(config_dir,strcat(config_dir, "/test7.17_configs")); strcpy(test, strcat(config_dir, argv[3])); strcpy(slurm_conf, strcat(test, "/slurm.conf")); /* Enable detailed logging for now */ opts.stderr_level = LOG_LEVEL_DEBUG; log_init(argv[0], opts, SYSLOG_FACILITY_USER, NULL); /* * Logic normally executed by slurmd daemon */ setenv("SLURM_CONF", slurm_conf, 1); rc = gres_plugin_init(); if (rc != SLURM_SUCCESS) { slurm_perror("failure: gres_plugin_init"); exit(1); } setenv("SLURM_CONFIG_DIR", config_dir, 1); cpu_count = strtol(argv[4], NULL, 10); node_name = "test_node"; rc = gres_plugin_node_config_load(cpu_count, node_name, NULL, NULL, NULL); if (rc != SLURM_SUCCESS) { slurm_perror("failure: gres_plugin_node_config_load"); exit(1); } buffer = init_buf(1024); rc = gres_plugin_node_config_pack(buffer); if (rc != SLURM_SUCCESS) { slurm_perror("failure: gres_plugin_node_config_pack"); exit(1); } /* * Logic normally executed by slurmctld daemon */ orig_config = "gpu:8"; rc = gres_plugin_init_node_config(node_name, orig_config, &node_gres_list); if (rc != SLURM_SUCCESS) { slurm_perror("failure: gres_plugin_init_node_config"); exit(1); } set_buf_offset(buffer, 0); rc = gres_plugin_node_config_unpack(buffer, node_name); if (rc != SLURM_SUCCESS) { slurm_perror("failure: gres_plugin_node_config_unpack"); exit(1); } core_count = cpu_count; sock_count = 1; rc = gres_plugin_node_config_validate(node_name, orig_config, &new_config, &node_gres_list, cpu_count, core_count, sock_count, 0, &reason_down); if (rc != SLURM_SUCCESS) { slurm_perror("failure: gres_plugin_node_config_validate"); exit(1); } if (argc > 2) tres_per_node = xstrdup(argv[1]); rc = gres_plugin_job_state_validate(NULL, /* cpus_per_tres */ NULL, /* tres_freq */ NULL, /* tres_per_job */ tres_per_node, NULL, /* tres_per_socket */ NULL, /* tres_per_task */ NULL, /* mem_per_tres */ &num_tasks, &min_nodes, &max_nodes, &ntasks_per_node, &ntasks_per_socket, &sockets_per_node, &cpus_per_task, &job_gres_list); if (rc != SLURM_SUCCESS) { slurm_seterrno(rc); slurm_perror("failure: gres_plugin_job_state_validate"); exit(1); } gres_plugin_node_state_log(node_gres_list, node_name); gres_plugin_job_state_log(job_gres_list, job_id); cpu_bitmap = bit_alloc(cpu_count); bit_nset(cpu_bitmap, 0, cpu_count - 1); cpu_alloc = gres_plugin_job_test(job_gres_list, node_gres_list, true, cpu_bitmap, 0, cpu_count - 1, job_id, node_name); if (cpu_alloc == NO_VAL) printf("cpu_alloc=ALL\n"); else printf("cpu_alloc=%u\n", cpu_alloc); rc = gres_plugin_fini(); if (rc != SLURM_SUCCESS) { slurm_perror("failure: gres_plugin_fini"); exit(1); } printf("Test %s ran to completion\n\n", argv[3]); exit(0); }
/* * Determine which of these nodes are usable by this job * * Remove nodes from the bitmap that don't have enough memory or gres to * support the job. * * Return SLURM_ERROR if a required node can't be used. * * if node_state = NODE_CR_RESERVED, clear bitmap (if node is required * then should we return NODE_BUSY!?!) * * if node_state = NODE_CR_ONE_ROW, then this node can only be used by * another NODE_CR_ONE_ROW job * * if node_state = NODE_CR_AVAILABLE AND: * - job_node_req = NODE_CR_RESERVED, then we need idle nodes * - job_node_req = NODE_CR_ONE_ROW, then we need idle or non-sharing nodes */ static int _verify_node_state(struct part_res_record *cr_part_ptr, struct job_record *job_ptr, bitstr_t * bitmap, uint16_t cr_type, struct node_use_record *node_usage, enum node_cr_state job_node_req) { struct node_record *node_ptr; uint32_t i, free_mem, gres_cpus, gres_cores, min_mem; int i_first, i_last; int core_start_bit, core_end_bit, cpus_per_core; List gres_list; if (job_ptr->details->pn_min_memory & MEM_PER_CPU) min_mem = job_ptr->details->pn_min_memory & (~MEM_PER_CPU); else min_mem = job_ptr->details->pn_min_memory; i_first = bit_ffs(bitmap); if (i_first >= 0) i_last = bit_fls(bitmap); else i_last = -2; for (i = i_first; i <= i_last; i++) { if (!bit_test(bitmap, i)) continue; node_ptr = select_node_record[i].node_ptr; core_start_bit = cr_get_coremap_offset(i); core_end_bit = cr_get_coremap_offset(i+1) - 1; cpus_per_core = select_node_record[i].cpus / (core_end_bit - core_start_bit + 1); /* node-level memory check */ if ((job_ptr->details->pn_min_memory) && (cr_type & CR_MEMORY)) { free_mem = select_node_record[i].real_memory; free_mem -= node_usage[i].alloc_memory; if (free_mem < min_mem) { debug3("select/serial: node %s no mem %u < %u", select_node_record[i].node_ptr->name, free_mem, min_mem); goto clear_bit; } } /* node-level gres check */ if (node_usage[i].gres_list) gres_list = node_usage[i].gres_list; else gres_list = node_ptr->gres_list; gres_cores = gres_plugin_job_test(job_ptr->gres_list, gres_list, true, NULL, 0, 0, job_ptr->job_id, node_ptr->name); gres_cpus = gres_cores; if (gres_cpus != NO_VAL) gres_cpus *= cpus_per_core; if (gres_cpus == 0) { debug3("select/serial: node %s lacks gres", node_ptr->name); goto clear_bit; } /* exclusive node check */ if (node_usage[i].node_state >= NODE_CR_RESERVED) { debug3("select/serial: node %s in exclusive use", node_ptr->name); goto clear_bit; /* non-resource-sharing node check */ } else if (node_usage[i].node_state >= NODE_CR_ONE_ROW) { if ((job_node_req == NODE_CR_RESERVED) || (job_node_req == NODE_CR_AVAILABLE)) { debug3("select/serial: node %s non-sharing", node_ptr->name); goto clear_bit; } /* cannot use this node if it is running jobs * in sharing partitions */ if (_is_node_busy(cr_part_ptr, i, 1, job_ptr->part_ptr)) { debug3("select/serial: node %s sharing?", node_ptr->name); goto clear_bit; } /* node is NODE_CR_AVAILABLE - check job request */ } else { if (job_node_req == NODE_CR_RESERVED) { if (_is_node_busy(cr_part_ptr, i, 0, job_ptr->part_ptr)) { debug3("select/serial: node %s busy", node_ptr->name); goto clear_bit; } } else if (job_node_req == NODE_CR_ONE_ROW) { /* cannot use this node if it is running jobs * in sharing partitions */ if (_is_node_busy(cr_part_ptr, i, 1, job_ptr->part_ptr)) { debug3("select/serial: node %s vbusy", node_ptr->name); goto clear_bit; } } } continue; /* node is usable, test next node */ clear_bit: /* This node is not usable by this job */ bit_clear(bitmap, i); if (job_ptr->details->req_node_bitmap && bit_test(job_ptr->details->req_node_bitmap, i)) { return SLURM_ERROR; } } return SLURM_SUCCESS; }