Exemple #1
0
static int do_site(void)
{
	int fd;
	int rv = -1;

	if (!cl.debug) {
		if (daemon(0, 0) < 0) {
			perror("daemon error");
			exit(EXIT_FAILURE);
		}
	}

	setup_logging();
	fd = lockfile();
	if (fd < 0)
		return fd;

	log_info("BOOTH cluster site daemon started");
	set_scheduler();
	set_oom_adj(-16);

	rv = loop(SITE);
	if (rv < 0)
		goto fail;

	unlink_lockfile(fd);
	close_logging();

	return 0;

fail:
	return -1;
}
Exemple #2
0
int main(int argc, char *argv[])
{
    char *envp = NULL;

    set_oom_adj(-1000);
    if (initService(argc, argv) != 0)
        return -1;

    envp = ::getenv("SCI_DISABLE_IPV6");
    if (envp && (::strcasecmp(envp, "yes") == 0)) {
        Socket::setDisableIPv6(1);
    }

    ExtListener *listener = new ExtListener();
    listener->start();

    launcherList.clear();
    while (1) {
        Locker::getLocker()->freeze();

        log_debug("Delete unused launcher");
        Locker::getLocker()->lock();
        vector<ExtLauncher *>::iterator lc;
        for (lc = launcherList.begin(); lc != launcherList.end(); lc++) {
            while (!(*lc)->isLaunched()) {
                // before join, this thread should have been launched
                SysUtil::sleep(WAIT_INTERVAL);
            }
            (*lc)->join();
            delete (*lc);
        }
        launcherList.clear();
        Locker::getLocker()->unlock();

        log_info("Begin to cleanup the jobInfo");
        Locker::getLocker()->lock();
        JOB_INFO::iterator it;
        for (it = jobInfo.begin(); it != jobInfo.end(); ) {
            if ((SysUtil::microseconds() - it->second.timestamp) > FIVE_MINUTES) {
                log_crit("Erase jobInfo item %d", it->first);
                jobInfo.erase(it++);
            } else {
                ++it;
            }
        }
        log_info("Finish cleanup the jobInfo");
        Locker::getLocker()->unlock();
    }

    listener->join();
    delete listener;
    delete Log::getInstance();

    return 0;
}
Exemple #3
0
static int do_server(int type)
{
	int fd = -1;
	int rv = -1;

	rv = setup(type);
	if (rv < 0)
		goto out;

	if (!daemonize) {
		if (daemon(0, 0) < 0) {
			perror("daemon error");
			exit(EXIT_FAILURE);
		}
	}

	/*
	  The lock cannot be obtained before the call to daemon(), otherwise
	  the lockfile would contain the pid of the parent, not the daemon.
	*/
	fd = lockfile();
	if (fd < 0)
		return fd;

	if (type == ARBITRATOR)
		log_info("BOOTH arbitrator daemon started");
	else if (type == SITE)
		log_info("BOOTH cluster site daemon started");

	set_scheduler();
	set_oom_adj(-16);

	rv = loop();

out:
	if (fd >= 0)
		unlink_lockfile(fd);

	return rv;
}
Exemple #4
0
int
main (int argc, char *argv[])
{
	int i, pidfd;
	int blocked_signals[] = {SIGPIPE, 0};
	int cc;
	char *oom_value;
	uint32_t slurmd_uid = 0;
	uint32_t curr_uid = 0;
	char time_stamp[256];
	log_options_t lopts = LOG_OPTS_INITIALIZER;

	/* NOTE: logfile is NULL at this point */
	log_init(argv[0], lopts, LOG_DAEMON, NULL);

	/*
	 * Make sure we have no extra open files which
	 * would be propagated to spawned tasks.
	 */
	cc = sysconf(_SC_OPEN_MAX);
	for (i = 3; i < cc; i++)
		close(i);

	/*
	 * Drop supplementary groups.
	 */
	if (geteuid() == 0) {
		if (setgroups(0, NULL) != 0) {
			fatal("Failed to drop supplementary groups, "
			      "setgroups: %m");
		}
	} else {
		debug("Not running as root. Can't drop supplementary groups");
	}

	/*
	 * Create and set default values for the slurmd global
	 * config variable "conf"
	 */
	conf = xmalloc(sizeof(slurmd_conf_t));
	_init_conf();
	conf->argv = &argv;
	conf->argc = &argc;

	if (_slurmd_init() < 0) {
		error( "slurmd initialization failed" );
		fflush( NULL );
		exit(1);
	}

	slurmd_uid = slurm_get_slurmd_user_id();
	curr_uid = getuid();
	if (curr_uid != slurmd_uid) {
		struct passwd *pw = NULL;
		char *slurmd_user = NULL;
		char *curr_user = NULL;

		/* since when you do a getpwuid you get a pointer to a
		 * structure you have to do a xstrdup on the first
		 * call or your information will just get over
		 * written.  This is a memory leak, but a fatal is
		 * called right after so it isn't that big of a deal.
		 */
		if ((pw=getpwuid(slurmd_uid)))
			slurmd_user = xstrdup(pw->pw_name);
		if ((pw=getpwuid(curr_uid)))
			curr_user = pw->pw_name;

		fatal("You are running slurmd as something "
		      "other than user %s(%d).  If you want to "
		      "run as this user add SlurmdUser=%s "
		      "to the slurm.conf file.",
		      slurmd_user, slurmd_uid, curr_user);
	}
	init_setproctitle(argc, argv);

	xsignal(SIGTERM, &_term_handler);
	xsignal(SIGINT,  &_term_handler);
	xsignal(SIGHUP,  &_hup_handler );
	xsignal_block(blocked_signals);

	debug3("slurmd initialization successful");

	/*
	 * Become a daemon if desired.
	 * Do not chdir("/") or close all fd's
	 */
	if (conf->daemonize) {
		if (daemon(1,1) == -1)
			error("Couldn't daemonize slurmd: %m");
	}
	test_core_limit();
	info("slurmd version %s started", SLURM_VERSION_STRING);
	debug3("finished daemonize");

	if ((oom_value = getenv("SLURMD_OOM_ADJ"))) {
		i = atoi(oom_value);
		debug("Setting slurmd oom_adj to %d", i);
		set_oom_adj(i);
	}

	_kill_old_slurmd();

	if (conf->mlock_pages) {
		/*
		 * Call mlockall() if available to ensure slurmd
		 *  doesn't get swapped out
		 */
#ifdef _POSIX_MEMLOCK
		if (mlockall (MCL_FUTURE | MCL_CURRENT) < 0)
			error ("failed to mlock() slurmd pages: %m");
#else
		error ("mlockall() system call does not appear to be available");
#endif /* _POSIX_MEMLOCK */
	}


	/*
	 * Restore any saved revoked credential information
	 */
	if (!conf->cleanstart && (_restore_cred_state(conf->vctx) < 0))
		return SLURM_FAILURE;

	if (jobacct_gather_init() != SLURM_SUCCESS)
		fatal("Unable to initialize jobacct_gather");
	if (job_container_init() < 0)
		fatal("Unable to initialize job_container plugin.");
	if (container_g_restore(conf->spooldir, !conf->cleanstart))
		error("Unable to restore job_container state.");
	if (switch_g_node_init() < 0)
		fatal("Unable to initialize interconnect.");
	if (conf->cleanstart && switch_g_clear_node_state())
		fatal("Unable to clear interconnect state.");
	switch_g_slurmd_init();

	_create_msg_socket();

	conf->pid = getpid();
	/* This has to happen after daemon(), which closes all fd's,
	   so we keep the write lock of the pidfile.
	*/
	pidfd = create_pidfile(conf->pidfile, 0);

	rfc2822_timestamp(time_stamp, sizeof(time_stamp));
	info("%s started on %s", slurm_prog_name, time_stamp);

	_install_fork_handlers();
	list_install_fork_handlers();
	slurm_conf_install_fork_handlers();

	/*
	 * Initialize any plugins
	 */
	if (slurmd_plugstack_init())
		fatal("failed to initialize slurmd_plugstack");

	_spawn_registration_engine();
	_msg_engine();

	/*
	 * Close fd here, otherwise we'll deadlock since create_pidfile()
	 * flocks the pidfile.
	 */
	if (pidfd >= 0)			/* valid pidfd, non-error */
		(void) close(pidfd);	/* Ignore errors */
	if (unlink(conf->pidfile) < 0)
		error("Unable to remove pidfile `%s': %m", conf->pidfile);

	_wait_for_all_threads(120);
	_slurmd_fini();
	_destroy_conf();
	slurm_crypto_fini();	/* must be after _destroy_conf() */

	info("Slurmd shutdown completing");
	log_fini();
       	return 0;
}
Exemple #5
0
static int do_server(int type)
{
	int rv = -1;
	static char log_ent[128] = DAEMON_NAME "-";

	rv = setup_config(type);
	if (rv < 0)
		return rv;


	if (!local) {
		log_error("Cannot find myself in the configuration.");
		exit(EXIT_FAILURE);
	}

	if (!daemonize) {
		if (daemon(0, 0) < 0) {
			perror("daemon error");
			exit(EXIT_FAILURE);
		}
	}

	/* The lockfile must be written to _after_ the call to daemon(), so
	 * that the lockfile contains the pid of the daemon, not the parent. */
	lock_fd = create_lockfile();
	if (lock_fd < 0)
		return lock_fd;

	atexit(server_exit);

	strcat(log_ent, type_to_string(local->type));
	cl_log_set_entity(log_ent);
	cl_log_enable_stderr(enable_stderr ? TRUE : FALSE);
	cl_log_set_facility(HA_LOG_FACILITY);
	cl_inherit_logging_environment(0);

	log_info("BOOTH %s %s daemon is starting",
			type_to_string(local->type), RELEASE_STR);

	signal(SIGUSR1, (__sighandler_t)tickets_log_info);
	signal(SIGTERM, (__sighandler_t)sig_exit_handler);
	signal(SIGINT, (__sighandler_t)sig_exit_handler);

	set_scheduler();
	set_oom_adj(-16);
	set_proc_title("%s %s %s for [%s]:%d",
			DAEMON_NAME,
			cl.configfile,
			type_to_string(local->type),
			local->addr_string,
			booth_conf->port);

	rv = limit_this_process();
	if (rv)
		return rv;

	if (cl_enable_coredumps(TRUE) < 0){
		cl_log(LOG_ERR, "enabling core dump failed");
	}
	cl_cdtocoredir();
	prctl(PR_SET_DUMPABLE, (unsigned long)TRUE, 0UL, 0UL, 0UL);

	rv = loop(lock_fd);

	return rv;
}