Example #1
0
/*
 * _can_job_run_on_node - Given the job requirements, determine which
 *                        resources from the given node (if any) can be
 *                        allocated to this job. Returns the number of
 *                        cpus that can be used by this node and a bitmap
 *                        of available resources for allocation.
 *       NOTE: This process does NOT support overcommitting resources
 *
 * IN job_ptr       - pointer to job requirements
 * IN/OUT core_map  - core_bitmap of available cores
 * IN n             - index of node to be evaluated
 * IN cr_type       - Consumable Resource setting
 * IN test_only     - ignore allocated memory check
 *
 * NOTE: The returned cpu_count may be less than the number of set bits in
 *       core_map for the given node. The cr_dist functions will determine
 *       which bits to deselect from the core_map to match the cpu_count.
 */
uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
			      const uint32_t node_i,
			      struct node_use_record *node_usage,
			      uint16_t cr_type,
			      bool test_only)
{
	uint16_t cpus;
	uint32_t avail_mem, req_mem, gres_cpus, gres_cores, cpus_per_core;
	int core_start_bit, core_end_bit;
	struct node_record *node_ptr = node_record_table_ptr + node_i;
	List gres_list;

	if (!test_only && IS_NODE_COMPLETING(node_ptr)) {
		/* Do not allocate more jobs to nodes with completing jobs */
		cpus = 0;
		return cpus;
	}

	cpus = _allocate_cores(job_ptr, core_map, node_i);

	core_start_bit = cr_get_coremap_offset(node_i);
	core_end_bit   = cr_get_coremap_offset(node_i + 1) - 1;
	node_ptr = select_node_record[node_i].node_ptr;
	cpus_per_core  = select_node_record[node_i].cpus /
			 (core_end_bit - core_start_bit + 1);
	if (node_usage[node_i].gres_list)
		gres_list = node_usage[node_i].gres_list;
	else
		gres_list = node_ptr->gres_list;

	gres_plugin_job_core_filter(job_ptr->gres_list, gres_list, test_only,
				    core_map, core_start_bit, core_end_bit,
				    node_ptr->name);

	if ((cr_type & CR_MEMORY) && cpus) {
		req_mem   = job_ptr->details->pn_min_memory & ~MEM_PER_CPU;
		avail_mem = select_node_record[node_i].real_memory;
		if (!test_only)
			avail_mem -= node_usage[node_i].alloc_memory;
		if (req_mem > avail_mem)
			cpus = 0;
	}

	gres_cores = gres_plugin_job_test(job_ptr->gres_list,
					  gres_list, test_only,
					  core_map, core_start_bit,
					  core_end_bit, job_ptr->job_id,
					  node_ptr->name);
	gres_cpus = gres_cores;
	if (gres_cpus != NO_VAL)
		gres_cpus *= cpus_per_core;
	if ((gres_cpus < job_ptr->details->ntasks_per_node) ||
	    ((job_ptr->details->cpus_per_task > 1) &&
	     (gres_cpus < job_ptr->details->cpus_per_task)))
		gres_cpus = 0;
	if (gres_cpus < cpus)
		cpus = gres_cpus;

	if (cpus == 0)
		bit_nclear(core_map, core_start_bit, core_end_bit);

	if (select_debug_flags & DEBUG_FLAG_SELECT_TYPE) {
		info("select/serial: _can_job_run_on_node: %u cpus on %s(%d), "
		     "mem %u/%u",
		     cpus, select_node_record[node_i].node_ptr->name,
		     node_usage[node_i].node_state,
		     node_usage[node_i].alloc_memory,
		     select_node_record[node_i].real_memory);
	}

	return cpus;
}
Example #2
0
/*
 * main - slurmctld main function, start various threads and process RPCs
 * test7.17.prog <TRES_PER_NODE> <CONFIG_DIR_HEAD> <CONFIG_SUB_DIR> <CPU_COUNT>
 * 
 */
int main(int argc, char *argv[])
{
	log_options_t opts = LOG_OPTS_STDERR_ONLY;
	int rc;
	uint32_t cpu_count, cpu_alloc, job_id = 12345;
	char *node_name, *reason_down = NULL;
	char *orig_config, *new_config = NULL, *tres_per_node = NULL;
	Buf buffer;
	List job_gres_list = NULL, node_gres_list = NULL;
	bitstr_t *cpu_bitmap;
	char config_dir[10000], test[1000];
	char slurm_conf[1000];
	uint32_t num_tasks = 1;
	uint32_t min_nodes = 1;
	uint32_t max_nodes = 1;
	uint16_t ntasks_per_node = NO_VAL16;
	uint16_t ntasks_per_socket = NO_VAL16;
	uint16_t sockets_per_node = NO_VAL16;
	uint16_t cpus_per_task = NO_VAL16;
	int core_count, sock_count;

	/* Setup slurm.conf and gres.conf test paths */
	strcpy(config_dir, argv[2]);
	strcpy(config_dir,strcat(config_dir, "/test7.17_configs"));
	strcpy(test, strcat(config_dir, argv[3]));
	strcpy(slurm_conf, strcat(test, "/slurm.conf"));

	/* Enable detailed logging for now */
	opts.stderr_level = LOG_LEVEL_DEBUG;
	log_init(argv[0], opts, SYSLOG_FACILITY_USER, NULL);

	/*
	 * Logic normally executed by slurmd daemon
	 */
	setenv("SLURM_CONF", slurm_conf, 1);
	rc = gres_plugin_init();
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_init");
		exit(1);
	}

	setenv("SLURM_CONFIG_DIR", config_dir, 1);

	cpu_count = strtol(argv[4], NULL, 10);
	node_name = "test_node";
	rc = gres_plugin_node_config_load(cpu_count, node_name, NULL, NULL,
					  NULL);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_load");
		exit(1);
	}

	buffer = init_buf(1024);
	rc = gres_plugin_node_config_pack(buffer);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_pack");
		exit(1);
	}

	/*
	 * Logic normally executed by slurmctld daemon
	 */
	orig_config = "gpu:8";
	rc = gres_plugin_init_node_config(node_name, orig_config,
					  &node_gres_list);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_init_node_config");
		exit(1);
	}

	set_buf_offset(buffer, 0);
	rc = gres_plugin_node_config_unpack(buffer, node_name);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_unpack");
		exit(1);
	}

	core_count = cpu_count;
	sock_count = 1;
	rc = gres_plugin_node_config_validate(node_name, orig_config,
					      &new_config, &node_gres_list,
					      cpu_count, core_count, sock_count,
					      0, &reason_down);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_validate");
		exit(1);
	}

	if (argc > 2)
		tres_per_node = xstrdup(argv[1]);

	rc = gres_plugin_job_state_validate(NULL,	/* cpus_per_tres */
					    NULL,	/* tres_freq */
					    NULL,	/* tres_per_job */
					    tres_per_node,
					    NULL,	/* tres_per_socket */
					    NULL,	/* tres_per_task */
					    NULL,	/* mem_per_tres */
					    &num_tasks,
					    &min_nodes,
					    &max_nodes,
					    &ntasks_per_node,
					    &ntasks_per_socket,
					    &sockets_per_node,
					    &cpus_per_task,
					    &job_gres_list);
	if (rc != SLURM_SUCCESS) {
		slurm_seterrno(rc);
		slurm_perror("failure: gres_plugin_job_state_validate");
		exit(1);
	}

	gres_plugin_node_state_log(node_gres_list, node_name);
	gres_plugin_job_state_log(job_gres_list, job_id);

	cpu_bitmap = bit_alloc(cpu_count);
	bit_nset(cpu_bitmap, 0, cpu_count - 1);
	cpu_alloc = gres_plugin_job_test(job_gres_list, node_gres_list, true,
					 cpu_bitmap, 0, cpu_count - 1,
					 job_id, node_name);
	if (cpu_alloc == NO_VAL)
		printf("cpu_alloc=ALL\n");
	else
		printf("cpu_alloc=%u\n", cpu_alloc);

	rc = gres_plugin_fini();
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_fini");
		exit(1);
	}

	printf("Test %s ran to completion\n\n", argv[3]);
	exit(0);
}
Example #3
0
/*
 * Determine which of these nodes are usable by this job
 *
 * Remove nodes from the bitmap that don't have enough memory or gres to
 * support the job. 
 *
 * Return SLURM_ERROR if a required node can't be used.
 *
 * if node_state = NODE_CR_RESERVED, clear bitmap (if node is required
 *                                   then should we return NODE_BUSY!?!)
 *
 * if node_state = NODE_CR_ONE_ROW, then this node can only be used by
 *                                  another NODE_CR_ONE_ROW job
 *
 * if node_state = NODE_CR_AVAILABLE AND:
 *  - job_node_req = NODE_CR_RESERVED, then we need idle nodes
 *  - job_node_req = NODE_CR_ONE_ROW, then we need idle or non-sharing nodes
 */
static int _verify_node_state(struct part_res_record *cr_part_ptr,
			      struct job_record *job_ptr, bitstr_t * bitmap,
			      uint16_t cr_type,
			      struct node_use_record *node_usage,
			      enum node_cr_state job_node_req)
{
	struct node_record *node_ptr;
	uint32_t i, free_mem, gres_cpus, gres_cores, min_mem;
	int i_first, i_last;
	int core_start_bit, core_end_bit, cpus_per_core;
	List gres_list;

	if (job_ptr->details->pn_min_memory & MEM_PER_CPU)
		min_mem = job_ptr->details->pn_min_memory & (~MEM_PER_CPU);
	else
		min_mem = job_ptr->details->pn_min_memory;
	i_first = bit_ffs(bitmap);
	if (i_first >= 0)
		i_last  = bit_fls(bitmap);
	else
		i_last = -2;
	for (i = i_first; i <= i_last; i++) {
		if (!bit_test(bitmap, i))
			continue;
		node_ptr = select_node_record[i].node_ptr;
		core_start_bit = cr_get_coremap_offset(i);
		core_end_bit   = cr_get_coremap_offset(i+1) - 1;
		cpus_per_core  = select_node_record[i].cpus /
				 (core_end_bit - core_start_bit + 1);
		/* node-level memory check */
		if ((job_ptr->details->pn_min_memory) &&
		    (cr_type & CR_MEMORY)) {
			free_mem  = select_node_record[i].real_memory;
			free_mem -= node_usage[i].alloc_memory;
			if (free_mem < min_mem) {
				debug3("select/serial: node %s no mem %u < %u",
					select_node_record[i].node_ptr->name,
					free_mem, min_mem);
				goto clear_bit;
			}
		}

		/* node-level gres check */
		if (node_usage[i].gres_list)
			gres_list = node_usage[i].gres_list;
		else
			gres_list = node_ptr->gres_list;
		gres_cores = gres_plugin_job_test(job_ptr->gres_list,
						  gres_list, true,
						  NULL, 0, 0, job_ptr->job_id,
						  node_ptr->name);
		gres_cpus = gres_cores;
		if (gres_cpus != NO_VAL)
			gres_cpus *= cpus_per_core;
		if (gres_cpus == 0) {
			debug3("select/serial: node %s lacks gres",
			       node_ptr->name);
			goto clear_bit;
		}

		/* exclusive node check */
		if (node_usage[i].node_state >= NODE_CR_RESERVED) {
			debug3("select/serial: node %s in exclusive use",
			       node_ptr->name);
			goto clear_bit;

		/* non-resource-sharing node check */
		} else if (node_usage[i].node_state >= NODE_CR_ONE_ROW) {
			if ((job_node_req == NODE_CR_RESERVED) ||
			    (job_node_req == NODE_CR_AVAILABLE)) {
				debug3("select/serial: node %s non-sharing",
				       node_ptr->name);
				goto clear_bit;
			}
			/* cannot use this node if it is running jobs
			 * in sharing partitions */
			if (_is_node_busy(cr_part_ptr, i, 1,
					  job_ptr->part_ptr)) {
				debug3("select/serial: node %s sharing?",
				       node_ptr->name);
				goto clear_bit;
			}

		/* node is NODE_CR_AVAILABLE - check job request */
		} else {
			if (job_node_req == NODE_CR_RESERVED) {
				if (_is_node_busy(cr_part_ptr, i, 0,
						  job_ptr->part_ptr)) {
					debug3("select/serial:  node %s busy",
					       node_ptr->name);
					goto clear_bit;
				}
			} else if (job_node_req == NODE_CR_ONE_ROW) {
				/* cannot use this node if it is running jobs
				 * in sharing partitions */
				if (_is_node_busy(cr_part_ptr, i, 1, 
						  job_ptr->part_ptr)) {
					debug3("select/serial: node %s vbusy",
					       node_ptr->name);
					goto clear_bit;
				}
			}
		}
		continue;	/* node is usable, test next node */

clear_bit:	/* This node is not usable by this job */
		bit_clear(bitmap, i);
		if (job_ptr->details->req_node_bitmap &&
		    bit_test(job_ptr->details->req_node_bitmap, i)) {
			return SLURM_ERROR;
		}
	}

	return SLURM_SUCCESS;
}