Esempio n. 1
0
gboolean
check_sbd_timeout(const char *value)
{
    long st_timeout = value? crm_get_msec(value) : 0;

    if (st_timeout <= 0) {
        crm_debug("Watchdog may be enabled but stonith-watchdog-timeout is disabled (%s)",
                  value? value : "default");

    } else if (pcmk_locate_sbd() == 0) {
        do_crm_log_always(LOG_EMERG,
                          "Shutting down: stonith-watchdog-timeout configured (%s) but SBD not active",
                          value);
        crm_exit(DAEMON_RESPAWN_STOP);
        return FALSE;

    } else {
        long sbd_timeout = crm_get_sbd_timeout();

        if (st_timeout < sbd_timeout) {
            do_crm_log_always(LOG_EMERG,
                              "Shutting down: stonith-watchdog-timeout (%s) too short (must be >%ldms)",
                              value, sbd_timeout);
            crm_exit(DAEMON_RESPAWN_STOP);
            return FALSE;
        }
        crm_info("Watchdog configured with stonith-watchdog-timeout %s and SBD timeout %ldms",
                 value, sbd_timeout);
    }
    return TRUE;
}
Esempio n. 2
0
void
pcmk_panic(const char *origin) 
{
    static struct qb_log_callsite *panic_cs = NULL;

    if (panic_cs == NULL) {
        panic_cs = qb_log_callsite_get(__func__, __FILE__, "panic-delay", LOG_TRACE, __LINE__, crm_trace_nonlog);
    }

    /* Ensure sbd_pid is set */
    (void)pcmk_locate_sbd();

    if (panic_cs && panic_cs->targets) {
        /* getppid() == 1 means our original parent no longer exists */
        do_crm_log_always(LOG_EMERG,
                          "Shutting down instead of panicking the node: origin=%s, sbd=%d, parent=%d",
                          origin, sbd_pid, getppid());
        crm_exit(DAEMON_RESPAWN_STOP);
        return;
    }

    if(sbd_pid > 1) {
        do_crm_log_always(LOG_EMERG, "Signaling sbd(%d) to panic the system: %s", sbd_pid, origin);
        pcmk_panic_sbd();

    } else {
        do_crm_log_always(LOG_EMERG, "Panicking the system directly: %s", origin);
        pcmk_panic_local();
    }
}
Esempio n. 3
0
static void
pcmk_panic_local(void) 
{
    int rc = pcmk_ok;
    uid_t uid = geteuid();
    pid_t ppid = getppid();

    if(uid != 0 && ppid > 1) {
        /* We're a non-root pacemaker daemon (cib, crmd, pengine,
         * attrd, etc) with the original pacemakerd parent
         *
         * Of these, only crmd is likely to be initiating resets
         */
        do_crm_log_always(LOG_EMERG, "Signaling parent %d to panic", ppid);
        crm_exit(pcmk_err_panic);
        return;

    } else if (uid != 0) {
        /*
         * No permissions and no pacemakerd parent to escalate to
         * Track down the new pacakerd process and send a signal instead
         */
        union sigval signal_value;

        memset(&signal_value, 0, sizeof(signal_value));
        ppid = crm_procfs_pid_of("pacemakerd");
        do_crm_log_always(LOG_EMERG, "Signaling pacemakerd(%d) to panic", ppid);

        if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) {
            crm_perror(LOG_EMERG, "Cannot signal pacemakerd(%d) to panic", ppid);
        }
        /* The best we can do now is die */
        crm_exit(pcmk_err_panic);
        return;
    }

    /* We're either pacemakerd, or a pacemaker daemon running as root */

    if (safe_str_eq("crash", getenv("PCMK_panic_action"))) {
        sysrq_trigger('c');
    } else {
        sysrq_trigger('b');
    }
    /* reboot(RB_HALT_SYSTEM); rc = errno; */
    reboot(RB_AUTOBOOT);
    rc = errno;

    do_crm_log_always(LOG_EMERG, "Reboot failed, escalating to %d: %s (%d)", ppid, pcmk_strerror(rc), rc);

    if(ppid > 1) {
        /* child daemon */
        exit(pcmk_err_panic);
    } else {
        /* pacemakerd or orphan child */
        exit(DAEMON_RESPAWN_STOP);
    }
}
Esempio n. 4
0
gboolean
check_sbd_timeout(const char *value)
{
    long sbd_timeout = crm_get_sbd_timeout();
    long st_timeout = crm_get_msec(value);

    if(value == NULL || st_timeout <= 0) {
        crm_notice("Watchdog may be enabled but stonith-watchdog-timeout is disabled: %s", value);

    } else if(pcmk_locate_sbd() == 0) {
        do_crm_log_always(LOG_EMERG, "Shutting down: stonith-watchdog-timeout is configured (%ldms) but SBD is not active", st_timeout);
        crm_exit(DAEMON_RESPAWN_STOP);
        return FALSE;

    } else if(st_timeout < sbd_timeout) {
        do_crm_log_always(LOG_EMERG, "Shutting down: stonith-watchdog-timeout (%ldms) is too short (must be greater than %ldms)",
                          st_timeout, sbd_timeout);
        crm_exit(DAEMON_RESPAWN_STOP);
        return FALSE;
    }

    crm_info("Watchdog functionality is consistent: %s delay exceeds timeout of %ldms", value, sbd_timeout);
    return TRUE;
}
Esempio n. 5
0
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
    pcmk_child_t *child = mainloop_child_userdata(p);
    const char *name = mainloop_child_name(p);

    if (signo && signo == SIGKILL) {
        crm_warn("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core);

    } else if (signo) {
        crm_err("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core);

    } else {
        switch(exitcode) {
            case pcmk_ok:
                crm_info("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode);
                break;

            case DAEMON_RESPAWN_STOP:
                crm_warn("The %s process (%d) can no longer be respawned, shutting the cluster down.", name, pid);
                child->respawn = FALSE;
                fatal_error = TRUE;
                pcmk_shutdown(SIGTERM);
                break;

            case pcmk_err_panic:
                do_crm_log_always(LOG_EMERG, "The %s process (%d) instructed the machine to reset", name, pid);
                child->respawn = FALSE;
                fatal_error = TRUE;
                pcmk_panic(__FUNCTION__);
                pcmk_shutdown(SIGTERM);
                break;

            default:
                crm_err("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode);
                break;
        }
    }

    pcmk_process_exit(child);
}
Esempio n. 6
0
static void
pcmk_panic_sbd(void) 
{
    union sigval signal_value;
    pid_t ppid = getppid();

    do_crm_log_always(LOG_EMERG, "Signaling sbd(%d) to panic", sbd_pid);

    memset(&signal_value, 0, sizeof(signal_value));
    /* TODO: Arrange for a slightly less brutal option? */
    if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) {
        crm_perror(LOG_EMERG, "Cannot signal SBD(%d) to terminate", sbd_pid);
        pcmk_panic_local();
    }

    if(ppid > 1) {
        /* child daemon */
        exit(pcmk_err_panic);
    } else {
        /* pacemakerd or orphan child */
        exit(DAEMON_RESPAWN_STOP);
    }
}