Exemplo n.º 1
0
static int
_slurmd_init(void)
{
	struct rlimit rlim;
	slurm_ctl_conf_t *cf;
	struct stat stat_buf;
	uint32_t cpu_cnt;

	/*
	 * Process commandline arguments first, since one option may be
	 * an alternate location for the slurm config file.
	 */
	_process_cmdline(*conf->argc, *conf->argv);

	/*
	 * Build nodes table like in slurmctld
	 * This is required by the topology stack
	 * Node tables setup must preceed _read_config() so that the
	 * proper hostname is set.
	 */
	slurm_conf_init(conf->conffile);
	init_node_conf();
	/* slurm_select_init() must be called before
	 * build_all_nodeline_info() to be called with proper argument. */
	if (slurm_select_init(1) != SLURM_SUCCESS )
		return SLURM_FAILURE;
	build_all_nodeline_info(true);
	build_all_frontend_info(true);

	/*
	 * Read global slurm config file, override necessary values from
	 * defaults and command line.
	 */
	_read_config();

	cpu_cnt = MAX(conf->conf_cpus, conf->block_map_size);

	if ((gres_plugin_init() != SLURM_SUCCESS) ||
	    (gres_plugin_node_config_load(cpu_cnt) != SLURM_SUCCESS))
		return SLURM_FAILURE;
	if (slurm_topo_init() != SLURM_SUCCESS)
		return SLURM_FAILURE;

	/*
	 * Get and set slurmd topology information
	 * Build node hash table first to speed up the topo build
	 */
	rehash_node();
	slurm_topo_build_config();
	_set_topo_info();

	/*
	 * Check for cpu frequency set capabilities on this node
	 */
	cpu_freq_init(conf);

	_print_conf();

	if (slurm_proctrack_init() != SLURM_SUCCESS)
		return SLURM_FAILURE;
	if (slurmd_task_init() != SLURM_SUCCESS)
		return SLURM_FAILURE;
	if (slurm_auth_init(NULL) != SLURM_SUCCESS)
		return SLURM_FAILURE;
	if (spank_slurmd_init() < 0)
		return SLURM_FAILURE;

	if (getrlimit(RLIMIT_CPU, &rlim) == 0) {
		rlim.rlim_cur = rlim.rlim_max;
		setrlimit(RLIMIT_CPU, &rlim);
		if (rlim.rlim_max != RLIM_INFINITY) {
			error("Slurmd process CPU time limit is %d seconds",
			      (int) rlim.rlim_max);
		}
	}

	if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
		rlim.rlim_cur = rlim.rlim_max;
		setrlimit(RLIMIT_NOFILE, &rlim);
	}
#ifndef NDEBUG
	if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
		rlim.rlim_cur = rlim.rlim_max;
		setrlimit(RLIMIT_CORE, &rlim);
	}
#endif /* !NDEBUG */

	/*
	 * Create a context for verifying slurm job credentials
	 */
	if (!(conf->vctx = slurm_cred_verifier_ctx_create(conf->pubkey)))
		return SLURM_FAILURE;
	if (!strcmp(conf->select_type, "select/serial")) {
		/* Only cache credential for 5 seconds with select/serial
		 * for shorter cache searches and higher throughput */
		slurm_cred_ctx_set(conf->vctx, SLURM_CRED_OPT_EXPIRY_WINDOW, 5);
	}

	/*
	 * Create slurmd spool directory if necessary.
	 */
	if (_set_slurmd_spooldir() < 0) {
		error("Unable to initialize slurmd spooldir");
		return SLURM_FAILURE;
	}

	if (conf->cleanstart) {
		/*
		 * Need to kill any running slurmd's here
		 */
		_kill_old_slurmd();

		stepd_cleanup_sockets(conf->spooldir, conf->node_name);
		_stepd_cleanup_batch_dirs(conf->spooldir, conf->node_name);
	}

	if (conf->daemonize) {
		bool success = false;

		if (conf->logfile && (conf->logfile[0] == '/')) {
			char *slash_ptr, *work_dir;
			work_dir = xstrdup(conf->logfile);
			slash_ptr = strrchr(work_dir, '/');
			if (slash_ptr == work_dir)
				work_dir[1] = '\0';
			else
				slash_ptr[0] = '\0';
			if ((access(work_dir, W_OK) != 0) ||
			    (chdir(work_dir) < 0)) {
				error("Unable to chdir to %s", work_dir);
			} else
				success = true;
			xfree(work_dir);
		}

		if (!success) {
			if ((access(conf->spooldir, W_OK) != 0) ||
			    (chdir(conf->spooldir) < 0)) {
				error("Unable to chdir to %s", conf->spooldir);
			} else
				success = true;
		}

		if (!success) {
			if ((access("/var/tmp", W_OK) != 0) ||
			    (chdir("/var/tmp") < 0)) {
				error("chdir(/var/tmp): %m");
				return SLURM_FAILURE;
			} else
				info("chdir to /var/tmp");
		}
	}

	/*
	 * Cache the group access list
	 */
	cf = slurm_conf_lock();
	if (cf->group_info & GROUP_CACHE)
		init_gids_cache(1);
	else
		init_gids_cache(0);
	slurm_conf_unlock();

	if ((devnull = open_cloexec("/dev/null", O_RDWR)) < 0) {
		error("Unable to open /dev/null: %m");
		return SLURM_FAILURE;
	}

	/* make sure we have slurmstepd installed */
	if (stat(conf->stepd_loc, &stat_buf))
		fatal("Unable to find slurmstepd file at %s", conf->stepd_loc);
	if (!S_ISREG(stat_buf.st_mode))
		fatal("slurmstepd not a file at %s", conf->stepd_loc);

	return SLURM_SUCCESS;
}
Exemplo n.º 2
0
/*
 * route_p_split_hostlist - logic to split an input hostlist into
 *                           a set of hostlists to forward to.
 *
 * IN: hl        - hostlist_t   - list of every node to send message to
 *                                will be empty on return;
 * OUT: sp_hl    - hostlist_t** - the array of hostlists that will be malloced
 * OUT: count    - int*         - the count of created hostlists
 * RET: SLURM_SUCCESS - int
 *
 * Note: created hostlist will have to be freed independently using
 *       hostlist_destroy by the caller.
 * Note: the hostlist_t array will have to be xfree.
 */
extern int route_p_split_hostlist(hostlist_t hl,
				  hostlist_t** sp_hl,
				  int* count)
{
	int i, j, k, hl_ndx, msg_count, sw_count, lst_count;
	char  *buf;
	bitstr_t *nodes_bitmap = NULL;		/* nodes in message list */
	bitstr_t *fwd_bitmap = NULL;		/* nodes in forward list */

	msg_count = hostlist_count(hl);
	if (switch_record_cnt == 0) {
		/* configs have not already been processed */
		slurm_conf_init(NULL);
		if (init_node_conf()) {
			fatal("ROUTE: Failed to init slurm config");
		}
		if (build_all_nodeline_info(false)) {
			fatal("ROUTE: Failed to build node config");
		}
		rehash_node();

		if (slurm_topo_build_config() != SLURM_SUCCESS) {
			fatal("ROUTE: Failed to build topology config");
		}
	}
	*sp_hl = (hostlist_t*) xmalloc(switch_record_cnt * sizeof(hostlist_t));
	/* create bitmap of nodes to send message too */
	if (hostlist2bitmap (hl, false, &nodes_bitmap) != SLURM_SUCCESS) {
		buf = hostlist_ranged_string_xmalloc(hl);
		fatal("ROUTE: Failed to make bitmap from hostlist=%s.", buf);
	}

	/* Find lowest level switch containing all the nodes in the list */
	j = 0;
	for (i = 0; i <= switch_levels; i++) {
		for (j=0; j<switch_record_cnt; j++) {
			if (switch_record_table[j].level == i) {
				if (bit_super_set(nodes_bitmap,
						  switch_record_table[j].
						  node_bitmap)) {
					/* All nodes in message list are in
					 * this switch */
					break;
				}
			}
		}
		if (j < switch_record_cnt) {
			/* Got here via break after bit_super_set */
			break; // 'j' is our switch
		} /* else, no switches at this level reach all nodes */
	}
	if (i > switch_levels) {
		/* This can only happen if trying to schedule multiple physical
		 * clusters as a single logical cluster under the control of a
		 * single slurmctld daemon, and sending something like a
		 * node_registation request to all nodes.
		 * Revert to default behavior*/
		if (debug_flags & DEBUG_FLAG_ROUTE) {
			buf = hostlist_ranged_string_xmalloc(hl);
			debug("ROUTE: didn't find switch containing nodes=%s",
			      buf);
			xfree(buf);
		}
		FREE_NULL_BITMAP(nodes_bitmap);
		xfree(*sp_hl);
		return route_split_hostlist_treewidth(hl, sp_hl, count);
	}
	if (switch_record_table[j].level == 0) {
		/* This is a leaf switch. Construct list based on TreeWidth */
		FREE_NULL_BITMAP(nodes_bitmap);
		xfree(*sp_hl);
		return route_split_hostlist_treewidth(hl, sp_hl, count);
	}
	/* loop through children, construction a hostlist for each child switch
	 * with nodes in the message list */
	hl_ndx = 0;
	lst_count = 0;
	for (i=0; i < switch_record_table[j].num_switches; i++) {
		k = switch_record_table[j].switch_index[i];
		fwd_bitmap = bit_copy(switch_record_table[k].node_bitmap);
		bit_and(fwd_bitmap, nodes_bitmap);
		sw_count = bit_set_count(fwd_bitmap);
		if (sw_count == 0) {
			continue; /* no nodes on this switch in message list */
		}
		(*sp_hl)[hl_ndx] = bitmap2hostlist(fwd_bitmap);
		/* Now remove nodes from this switch from message list */
		bit_not(fwd_bitmap);
		bit_and(nodes_bitmap, fwd_bitmap);
		FREE_NULL_BITMAP(fwd_bitmap);
		if (debug_flags & DEBUG_FLAG_ROUTE) {
			buf = hostlist_ranged_string_xmalloc((*sp_hl)[hl_ndx]);
			debug("ROUTE: ... sublist[%d] switch=%s :: %s",
			      i, switch_record_table[i].name, buf);
			xfree(buf);
		}
		hl_ndx++;
		lst_count += sw_count;
		if (lst_count == msg_count)
			break; /* all nodes in message are in a child list */
	}
	FREE_NULL_BITMAP(nodes_bitmap);

	*count = hl_ndx;
	return SLURM_SUCCESS;

}