Пример #1
0
/* Note change in configuration (e.g. "DebugFlag=JobContainer" set) */
extern void container_g_reconfig(void)
{
    int i;

    (void) job_container_init();

    slurm_mutex_lock(&g_container_context_lock);
    for (i = 0; i < g_container_context_num; i++) {
        (*(ops[i].container_p_reconfig))();
    }
    slurm_mutex_unlock(&g_container_context_lock);

    return;
}
Пример #2
0
/* Restore container information */
extern int container_g_restore(char * dir_name, bool recover)
{
    int i, rc = SLURM_SUCCESS;

    if (job_container_init())
        return SLURM_ERROR;

    slurm_mutex_lock(&g_container_context_lock);
    for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS));
            i++) {
        rc = (*(ops[i].container_p_restore))(dir_name, recover);
    }
    slurm_mutex_unlock(&g_container_context_lock);

    return rc;
}
Пример #3
0
/* Delete the container for the specified job */
extern int container_g_delete(uint32_t job_id)
{
    int i, rc = SLURM_SUCCESS;

    if (job_container_init())
        return SLURM_ERROR;

    slurm_mutex_lock(&g_container_context_lock);
    for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS));
            i++) {
        rc = (*(ops[i].container_p_delete))(job_id);
    }
    slurm_mutex_unlock(&g_container_context_lock);

    return rc;
}
Пример #4
0
int
main (int argc, char *argv[])
{
	int i, pidfd;
	int blocked_signals[] = {SIGPIPE, 0};
	int cc;
	char *oom_value;
	uint32_t slurmd_uid = 0;
	uint32_t curr_uid = 0;
	char time_stamp[256];
	log_options_t lopts = LOG_OPTS_INITIALIZER;

	/* NOTE: logfile is NULL at this point */
	log_init(argv[0], lopts, LOG_DAEMON, NULL);

	/*
	 * Make sure we have no extra open files which
	 * would be propagated to spawned tasks.
	 */
	cc = sysconf(_SC_OPEN_MAX);
	for (i = 3; i < cc; i++)
		close(i);

	/*
	 * Drop supplementary groups.
	 */
	if (geteuid() == 0) {
		if (setgroups(0, NULL) != 0) {
			fatal("Failed to drop supplementary groups, "
			      "setgroups: %m");
		}
	} else {
		debug("Not running as root. Can't drop supplementary groups");
	}

	/*
	 * Create and set default values for the slurmd global
	 * config variable "conf"
	 */
	conf = xmalloc(sizeof(slurmd_conf_t));
	_init_conf();
	conf->argv = &argv;
	conf->argc = &argc;

	if (_slurmd_init() < 0) {
		error( "slurmd initialization failed" );
		fflush( NULL );
		exit(1);
	}

	slurmd_uid = slurm_get_slurmd_user_id();
	curr_uid = getuid();
	if (curr_uid != slurmd_uid) {
		struct passwd *pw = NULL;
		char *slurmd_user = NULL;
		char *curr_user = NULL;

		/* since when you do a getpwuid you get a pointer to a
		 * structure you have to do a xstrdup on the first
		 * call or your information will just get over
		 * written.  This is a memory leak, but a fatal is
		 * called right after so it isn't that big of a deal.
		 */
		if ((pw=getpwuid(slurmd_uid)))
			slurmd_user = xstrdup(pw->pw_name);
		if ((pw=getpwuid(curr_uid)))
			curr_user = pw->pw_name;

		fatal("You are running slurmd as something "
		      "other than user %s(%d).  If you want to "
		      "run as this user add SlurmdUser=%s "
		      "to the slurm.conf file.",
		      slurmd_user, slurmd_uid, curr_user);
	}
	init_setproctitle(argc, argv);

	xsignal(SIGTERM, &_term_handler);
	xsignal(SIGINT,  &_term_handler);
	xsignal(SIGHUP,  &_hup_handler );
	xsignal_block(blocked_signals);

	debug3("slurmd initialization successful");

	/*
	 * Become a daemon if desired.
	 * Do not chdir("/") or close all fd's
	 */
	if (conf->daemonize) {
		if (daemon(1,1) == -1)
			error("Couldn't daemonize slurmd: %m");
	}
	test_core_limit();
	info("slurmd version %s started", SLURM_VERSION_STRING);
	debug3("finished daemonize");

	if ((oom_value = getenv("SLURMD_OOM_ADJ"))) {
		i = atoi(oom_value);
		debug("Setting slurmd oom_adj to %d", i);
		set_oom_adj(i);
	}

	_kill_old_slurmd();

	if (conf->mlock_pages) {
		/*
		 * Call mlockall() if available to ensure slurmd
		 *  doesn't get swapped out
		 */
#ifdef _POSIX_MEMLOCK
		if (mlockall (MCL_FUTURE | MCL_CURRENT) < 0)
			error ("failed to mlock() slurmd pages: %m");
#else
		error ("mlockall() system call does not appear to be available");
#endif /* _POSIX_MEMLOCK */
	}


	/*
	 * Restore any saved revoked credential information
	 */
	if (!conf->cleanstart && (_restore_cred_state(conf->vctx) < 0))
		return SLURM_FAILURE;

	if (jobacct_gather_init() != SLURM_SUCCESS)
		fatal("Unable to initialize jobacct_gather");
	if (job_container_init() < 0)
		fatal("Unable to initialize job_container plugin.");
	if (container_g_restore(conf->spooldir, !conf->cleanstart))
		error("Unable to restore job_container state.");
	if (switch_g_node_init() < 0)
		fatal("Unable to initialize interconnect.");
	if (conf->cleanstart && switch_g_clear_node_state())
		fatal("Unable to clear interconnect state.");
	switch_g_slurmd_init();

	_create_msg_socket();

	conf->pid = getpid();
	/* This has to happen after daemon(), which closes all fd's,
	   so we keep the write lock of the pidfile.
	*/
	pidfd = create_pidfile(conf->pidfile, 0);

	rfc2822_timestamp(time_stamp, sizeof(time_stamp));
	info("%s started on %s", slurm_prog_name, time_stamp);

	_install_fork_handlers();
	list_install_fork_handlers();
	slurm_conf_install_fork_handlers();

	/*
	 * Initialize any plugins
	 */
	if (slurmd_plugstack_init())
		fatal("failed to initialize slurmd_plugstack");

	_spawn_registration_engine();
	_msg_engine();

	/*
	 * Close fd here, otherwise we'll deadlock since create_pidfile()
	 * flocks the pidfile.
	 */
	if (pidfd >= 0)			/* valid pidfd, non-error */
		(void) close(pidfd);	/* Ignore errors */
	if (unlink(conf->pidfile) < 0)
		error("Unable to remove pidfile `%s': %m", conf->pidfile);

	_wait_for_all_threads(120);
	_slurmd_fini();
	_destroy_conf();
	slurm_crypto_fini();	/* must be after _destroy_conf() */

	info("Slurmd shutdown completing");
	log_fini();
       	return 0;
}