Пример #1
0
static int
_slurmd_init(void)
{
	struct rlimit rlim;
	slurm_ctl_conf_t *cf;
	struct stat stat_buf;
	uint32_t cpu_cnt;

	/*
	 * Process commandline arguments first, since one option may be
	 * an alternate location for the slurm config file.
	 */
	_process_cmdline(*conf->argc, *conf->argv);

	/*
	 * Build nodes table like in slurmctld
	 * This is required by the topology stack
	 * Node tables setup must preceed _read_config() so that the
	 * proper hostname is set.
	 */
	slurm_conf_init(conf->conffile);
	init_node_conf();
	/* slurm_select_init() must be called before
	 * build_all_nodeline_info() to be called with proper argument. */
	if (slurm_select_init(1) != SLURM_SUCCESS )
		return SLURM_FAILURE;
	build_all_nodeline_info(true);
	build_all_frontend_info(true);

	/*
	 * Read global slurm config file, override necessary values from
	 * defaults and command line.
	 */
	_read_config();

	cpu_cnt = MAX(conf->conf_cpus, conf->block_map_size);

	if ((gres_plugin_init() != SLURM_SUCCESS) ||
	    (gres_plugin_node_config_load(cpu_cnt) != SLURM_SUCCESS))
		return SLURM_FAILURE;
	if (slurm_topo_init() != SLURM_SUCCESS)
		return SLURM_FAILURE;

	/*
	 * Get and set slurmd topology information
	 * Build node hash table first to speed up the topo build
	 */
	rehash_node();
	slurm_topo_build_config();
	_set_topo_info();

	/*
	 * Check for cpu frequency set capabilities on this node
	 */
	cpu_freq_init(conf);

	_print_conf();

	if (slurm_proctrack_init() != SLURM_SUCCESS)
		return SLURM_FAILURE;
	if (slurmd_task_init() != SLURM_SUCCESS)
		return SLURM_FAILURE;
	if (slurm_auth_init(NULL) != SLURM_SUCCESS)
		return SLURM_FAILURE;
	if (spank_slurmd_init() < 0)
		return SLURM_FAILURE;

	if (getrlimit(RLIMIT_CPU, &rlim) == 0) {
		rlim.rlim_cur = rlim.rlim_max;
		setrlimit(RLIMIT_CPU, &rlim);
		if (rlim.rlim_max != RLIM_INFINITY) {
			error("Slurmd process CPU time limit is %d seconds",
			      (int) rlim.rlim_max);
		}
	}

	if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
		rlim.rlim_cur = rlim.rlim_max;
		setrlimit(RLIMIT_NOFILE, &rlim);
	}
#ifndef NDEBUG
	if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
		rlim.rlim_cur = rlim.rlim_max;
		setrlimit(RLIMIT_CORE, &rlim);
	}
#endif /* !NDEBUG */

	/*
	 * Create a context for verifying slurm job credentials
	 */
	if (!(conf->vctx = slurm_cred_verifier_ctx_create(conf->pubkey)))
		return SLURM_FAILURE;
	if (!strcmp(conf->select_type, "select/serial")) {
		/* Only cache credential for 5 seconds with select/serial
		 * for shorter cache searches and higher throughput */
		slurm_cred_ctx_set(conf->vctx, SLURM_CRED_OPT_EXPIRY_WINDOW, 5);
	}

	/*
	 * Create slurmd spool directory if necessary.
	 */
	if (_set_slurmd_spooldir() < 0) {
		error("Unable to initialize slurmd spooldir");
		return SLURM_FAILURE;
	}

	if (conf->cleanstart) {
		/*
		 * Need to kill any running slurmd's here
		 */
		_kill_old_slurmd();

		stepd_cleanup_sockets(conf->spooldir, conf->node_name);
		_stepd_cleanup_batch_dirs(conf->spooldir, conf->node_name);
	}

	if (conf->daemonize) {
		bool success = false;

		if (conf->logfile && (conf->logfile[0] == '/')) {
			char *slash_ptr, *work_dir;
			work_dir = xstrdup(conf->logfile);
			slash_ptr = strrchr(work_dir, '/');
			if (slash_ptr == work_dir)
				work_dir[1] = '\0';
			else
				slash_ptr[0] = '\0';
			if ((access(work_dir, W_OK) != 0) ||
			    (chdir(work_dir) < 0)) {
				error("Unable to chdir to %s", work_dir);
			} else
				success = true;
			xfree(work_dir);
		}

		if (!success) {
			if ((access(conf->spooldir, W_OK) != 0) ||
			    (chdir(conf->spooldir) < 0)) {
				error("Unable to chdir to %s", conf->spooldir);
			} else
				success = true;
		}

		if (!success) {
			if ((access("/var/tmp", W_OK) != 0) ||
			    (chdir("/var/tmp") < 0)) {
				error("chdir(/var/tmp): %m");
				return SLURM_FAILURE;
			} else
				info("chdir to /var/tmp");
		}
	}

	/*
	 * Cache the group access list
	 */
	cf = slurm_conf_lock();
	if (cf->group_info & GROUP_CACHE)
		init_gids_cache(1);
	else
		init_gids_cache(0);
	slurm_conf_unlock();

	if ((devnull = open_cloexec("/dev/null", O_RDWR)) < 0) {
		error("Unable to open /dev/null: %m");
		return SLURM_FAILURE;
	}

	/* make sure we have slurmstepd installed */
	if (stat(conf->stepd_loc, &stat_buf))
		fatal("Unable to find slurmstepd file at %s", conf->stepd_loc);
	if (!S_ISREG(stat_buf.st_mode))
		fatal("slurmstepd not a file at %s", conf->stepd_loc);

	return SLURM_SUCCESS;
}
Пример #2
0
/*
 * main - slurmctld main function, start various threads and process RPCs
 * test7.17.prog <TRES_PER_NODE> <CONFIG_DIR_HEAD> <CONFIG_SUB_DIR> <CPU_COUNT>
 * 
 */
int main(int argc, char *argv[])
{
	log_options_t opts = LOG_OPTS_STDERR_ONLY;
	int rc;
	uint32_t cpu_count, cpu_alloc, job_id = 12345;
	char *node_name, *reason_down = NULL;
	char *orig_config, *new_config = NULL, *tres_per_node = NULL;
	Buf buffer;
	List job_gres_list = NULL, node_gres_list = NULL;
	bitstr_t *cpu_bitmap;
	char config_dir[10000], test[1000];
	char slurm_conf[1000];
	uint32_t num_tasks = 1;
	uint32_t min_nodes = 1;
	uint32_t max_nodes = 1;
	uint16_t ntasks_per_node = NO_VAL16;
	uint16_t ntasks_per_socket = NO_VAL16;
	uint16_t sockets_per_node = NO_VAL16;
	uint16_t cpus_per_task = NO_VAL16;
	int core_count, sock_count;

	/* Setup slurm.conf and gres.conf test paths */
	strcpy(config_dir, argv[2]);
	strcpy(config_dir,strcat(config_dir, "/test7.17_configs"));
	strcpy(test, strcat(config_dir, argv[3]));
	strcpy(slurm_conf, strcat(test, "/slurm.conf"));

	/* Enable detailed logging for now */
	opts.stderr_level = LOG_LEVEL_DEBUG;
	log_init(argv[0], opts, SYSLOG_FACILITY_USER, NULL);

	/*
	 * Logic normally executed by slurmd daemon
	 */
	setenv("SLURM_CONF", slurm_conf, 1);
	rc = gres_plugin_init();
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_init");
		exit(1);
	}

	setenv("SLURM_CONFIG_DIR", config_dir, 1);

	cpu_count = strtol(argv[4], NULL, 10);
	node_name = "test_node";
	rc = gres_plugin_node_config_load(cpu_count, node_name, NULL, NULL,
					  NULL);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_load");
		exit(1);
	}

	buffer = init_buf(1024);
	rc = gres_plugin_node_config_pack(buffer);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_pack");
		exit(1);
	}

	/*
	 * Logic normally executed by slurmctld daemon
	 */
	orig_config = "gpu:8";
	rc = gres_plugin_init_node_config(node_name, orig_config,
					  &node_gres_list);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_init_node_config");
		exit(1);
	}

	set_buf_offset(buffer, 0);
	rc = gres_plugin_node_config_unpack(buffer, node_name);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_unpack");
		exit(1);
	}

	core_count = cpu_count;
	sock_count = 1;
	rc = gres_plugin_node_config_validate(node_name, orig_config,
					      &new_config, &node_gres_list,
					      cpu_count, core_count, sock_count,
					      0, &reason_down);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_validate");
		exit(1);
	}

	if (argc > 2)
		tres_per_node = xstrdup(argv[1]);

	rc = gres_plugin_job_state_validate(NULL,	/* cpus_per_tres */
					    NULL,	/* tres_freq */
					    NULL,	/* tres_per_job */
					    tres_per_node,
					    NULL,	/* tres_per_socket */
					    NULL,	/* tres_per_task */
					    NULL,	/* mem_per_tres */
					    &num_tasks,
					    &min_nodes,
					    &max_nodes,
					    &ntasks_per_node,
					    &ntasks_per_socket,
					    &sockets_per_node,
					    &cpus_per_task,
					    &job_gres_list);
	if (rc != SLURM_SUCCESS) {
		slurm_seterrno(rc);
		slurm_perror("failure: gres_plugin_job_state_validate");
		exit(1);
	}

	gres_plugin_node_state_log(node_gres_list, node_name);
	gres_plugin_job_state_log(job_gres_list, job_id);

	cpu_bitmap = bit_alloc(cpu_count);
	bit_nset(cpu_bitmap, 0, cpu_count - 1);
	cpu_alloc = gres_plugin_job_test(job_gres_list, node_gres_list, true,
					 cpu_bitmap, 0, cpu_count - 1,
					 job_id, node_name);
	if (cpu_alloc == NO_VAL)
		printf("cpu_alloc=ALL\n");
	else
		printf("cpu_alloc=%u\n", cpu_alloc);

	rc = gres_plugin_fini();
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_fini");
		exit(1);
	}

	printf("Test %s ran to completion\n\n", argv[3]);
	exit(0);
}