/* _signal_handler - Process daemon-wide signals */ static void *_signal_handler(void *no_data) { int rc, sig; int sig_array[] = {SIGINT, SIGTERM, SIGHUP, SIGABRT, SIGUSR2, 0}; sigset_t set; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); /* Make sure no required signals are ignored (possibly inherited) */ _default_sigaction(SIGINT); _default_sigaction(SIGTERM); _default_sigaction(SIGHUP); _default_sigaction(SIGABRT); _default_sigaction(SIGUSR2); while (1) { xsignal_sigset_create(sig_array, &set); rc = sigwait(&set, &sig); if (rc == EINTR) continue; switch (sig) { case SIGHUP: /* kill -1 */ info("Reconfigure signal (SIGHUP) received"); reconfig(); break; case SIGINT: /* kill -2 or <CTRL-C> */ case SIGTERM: /* kill -15 */ info("Terminate signal (SIGINT or SIGTERM) received"); shutdown_threads(); return NULL; /* Normal termination */ case SIGABRT: /* abort */ info("SIGABRT received"); abort(); /* Should terminate here */ shutdown_threads(); return NULL; case SIGUSR2: info("Logrotate signal (SIGUSR2) received"); _update_logging(false); break; default: error("Invalid signal (%d) received", sig); } } }
/* run_dbd_backup - this is the backup controller, it should run in standby * mode, assuming control when the primary controller stops responding */ extern void run_dbd_backup(void) { slurm_persist_conn_t slurmdbd_conn; primary_resumed = false; memset(&slurmdbd_conn, 0, sizeof(slurm_persist_conn_t)); slurmdbd_conn.rem_host = slurmdbd_conf->dbd_addr; slurmdbd_conn.rem_port = slurmdbd_conf->dbd_port; slurmdbd_conn.cluster_name = "backup_slurmdbd"; slurmdbd_conn.fd = -1; slurmdbd_conn.shutdown = &shutdown_time; slurm_persist_conn_open_without_init(&slurmdbd_conn); /* repeatedly ping Primary */ while (!shutdown_time) { int writeable = slurm_persist_conn_writeable(&slurmdbd_conn); //info("%d %d", have_control, writeable); if (have_control && writeable == 1) { info("Primary has come back"); primary_resumed = true; shutdown_threads(); have_control = false; break; } else if (!have_control && writeable <= 0) { have_control = true; info("Taking Control"); break; } sleep(1); if (writeable <= 0) slurm_persist_conn_reopen(&slurmdbd_conn, false); } slurm_persist_conn_close(&slurmdbd_conn); return; }