Ejemplo n.º 1
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
    if(te_client_id == NULL) {
        te_client_id = crm_strdup_printf("%s.%d", crm_system_name, getpid());
    }

    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    crmd_notify_fencing_op(st_event);

    if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
        crm_notice("%s was successfully unfenced by %s (at the request of %s)",
                   st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
                /* TODO: Hook up st_event->device */
        return;

    } else if (safe_str_eq("on", st_event->action)) {
        crm_err("Unfencing of %s by %s failed: %s (%d)",
                st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
                pcmk_strerror(st_event->result), st_event->result);
        return;

    } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_crit("We were allegedly just fenced by %s for %s!",
                 st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */

        qb_log_fini(); /* Try to get the above log message to disk - somehow */

        /* Get out ASAP and do not come back up.
         *
         * Triggering a reboot is also not the worst idea either since
         * the rest of the cluster thinks we're safely down
         */

#ifdef RB_HALT_SYSTEM
        reboot(RB_HALT_SYSTEM);
#endif

        /*
         * If reboot() fails or is not supported, coming back up will
         * probably lead to a situation where the other nodes set our
         * status to 'lost' because of the fencing callback and will
         * discard subsequent election votes with:
         *
         * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
         *
         * So just stay dead, something is seriously messed up anyway.
         *
         */
        exit(100); /* None of our wrappers since we already called qb_log_fini() */
        return;
    }

    if (st_event->result == pcmk_ok &&
        safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
        st_fail_count_reset(st_event->target);
    }

    crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
               st_event->target, st_event->result == pcmk_ok ? "" : " not",
               st_event->action,
               st_event->executioner ? st_event->executioner : "<anyone>",
               st_event->origin, pcmk_strerror(st_event->result), st_event->id,
               st_event->client_origin ? st_event->client_origin : "<unknown>");

#if SUPPORT_CMAN
    if (st_event->result == pcmk_ok && is_cman_cluster()) {
        int local_rc = 0;
        int confirm = 0;
        char *target_copy = strdup(st_event->target);

        /* In case fenced hasn't noticed yet
         *
         * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
         */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
                    local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
        }

        /* In case fenced is already trying to shoot it */
        confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
        if (confirm > 0) {
            int ignore = 0;
            int len = strlen(target_copy);

            errno = 0;
            local_rc = write(confirm, target_copy, len);
            ignore = write(confirm, "\n", 1);

            if(ignore < 0 && errno == EBADF) {
                crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target);

            } else if (local_rc < len) {
                crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc);

            } else {
                fsync(confirm);
                crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target);
            }
            close(confirm);
        }
        free(target_copy);
    }
#endif

    if (st_event->result == pcmk_ok) {
        crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_REMOTE | CRM_GET_PEER_CLUSTER);
        const char *uuid = NULL;
        gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        if (peer == NULL) {
            return;
        }

        uuid = crm_peer_uuid(peer);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if(AM_I_DC) {
            /* The DC always sends updates */
            send_stonith_update(NULL, st_event->target, uuid);

            if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {

                /* Abort the current transition graph if it wasn't us
                 * that invoked stonith to fence someone
                 */
                crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
            }

            /* Assume it was our leader if we dont currently have one */
        } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc ? "was" : "may have been", st_event->target,
                       fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                send_stonith_update(NULL, st_event->target, uuid);
            }
            stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));

        }

        crmd_peer_down(peer, TRUE);
     }
}
Ejemplo n.º 2
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
    static char *client_id = NULL;
    if(client_id == NULL) {
        client_id = g_strdup_printf("%s.%d", crm_system_name, getpid());
    }

    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner,
                st_event->origin);
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
        return;
    }

    if (st_event->result == pcmk_ok &&
        safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
        reset_st_fail_count(st_event->target);
    }

    crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
               st_event->target, st_event->result == pcmk_ok ? "" : " not",
               st_event->action,
               st_event->executioner ? st_event->executioner : "<anyone>",
               st_event->origin, pcmk_strerror(st_event->result), st_event->id,
               st_event->client_origin ? st_event->client_origin : "<unknown>");

#if SUPPORT_CMAN
    if (st_event->result == pcmk_ok && is_cman_cluster()) {
        int local_rc = 0;
        char *target_copy = strdup(st_event->target);

        /* In case fenced hasn't noticed yet
         *
         * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
         */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
                    local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
        }
        free(target_copy);
    }
#endif

     if (st_event->result == pcmk_ok) {
         const char *uuid = get_uuid(st_event->target);
        gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if(AM_I_DC) {
            /* The DC always sends updates */
            send_stonith_update(NULL, st_event->target, uuid);

            if (st_event->client_origin && safe_str_neq(st_event->client_origin, client_id)) {

                /* Abort the current transition graph if it wasn't us
                 * that invoked stonith to fence someone
                 */
                crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
            }

            /* Assume it was our leader if we dont currently have one */
        } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc ? "was" : "may have been", st_event->target,
                       fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                send_stonith_update(NULL, st_event->target, uuid);
            }
            stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));

        }
     }
}
Ejemplo n.º 3
0
static void
tengine_stonith_notify(stonith_t * st, const char *event, xmlNode * msg)
{
    int rc = -99;
    const char *origin = NULL;
    const char *target = NULL;
    const char *executioner = NULL;
    xmlNode *action = get_xpath_object("//st-data", msg, LOG_ERR);

    if (action == NULL) {
        crm_log_xml(LOG_ERR, "Notify data not found", msg);
        return;
    }

    crm_log_xml(LOG_DEBUG, "stonith_notify", msg);
    crm_element_value_int(msg, F_STONITH_RC, &rc);
    origin = crm_element_value(action, F_STONITH_ORIGIN);
    target = crm_element_value(action, F_STONITH_TARGET);
    executioner = crm_element_value(action, F_STONITH_DELEGATE);

    if (rc == stonith_ok && crm_str_eq(target, fsa_our_uname, TRUE)) {
        crm_err("We were alegedly just fenced by %s for %s!", executioner, origin);
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);

    } else if (rc == stonith_ok) {
        crm_info("Peer %s was terminated (%s) by %s for %s (ref=%s): %s",
                 target,
                 crm_element_value(action, F_STONITH_OPERATION),
                 executioner, origin,
                 crm_element_value(action, F_STONITH_REMOTE), stonith_error2string(rc));
    } else {
        crm_err("Peer %s could not be terminated (%s) by %s for %s (ref=%s): %s",
                target,
                crm_element_value(action, F_STONITH_OPERATION),
                executioner ? executioner : "<anyone>", origin,
                crm_element_value(action, F_STONITH_REMOTE), stonith_error2string(rc));
    }

#ifdef SUPPORT_CMAN
    if (rc == stonith_ok && is_cman_cluster()) {
        int local_rc = 0;
        int confirm = 0;
        char *target_copy = crm_strdup(target);

        /* In case fenced hasn't noticed yet */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", target, local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", target);
        }

        /* In case fenced is already trying to shoot it */
        confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
        if (confirm) {
            int len = strlen(target_copy);

            errno = 0;
            local_rc = write(confirm, target_copy, len);
            write(confirm, "\n", 1);

            if(errno == EBADF) {
                crm_trace("CMAN not expecting %s to be fenced (yet)", target);

            } else if (local_rc < len) {
                crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", target, local_rc);

            } else {
                fsync(confirm);
                crm_notice("Confirmed CMAN fencing event for '%s'", target);
            }
            close(confirm);
        }
    }
#endif

    if (rc == stonith_ok && safe_str_eq(target, origin)) {
        if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, target)) {
            crm_notice("Target was our leader %s (recorded: %s)",
                       target, fsa_our_dc ? fsa_our_dc : "<unset>");
            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (safe_str_eq(executioner, fsa_our_uname)) {
                const char *uuid = get_uuid(target);

                send_stonith_update(NULL, target, uuid);
            } else {
                stonith_cleanup_list = g_list_append(stonith_cleanup_list, crm_strdup(target));
            }
        }
    }
}
Ejemplo n.º 4
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t *st_event)
{
    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner, st_event->origin);
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
        return;
    }

    crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
               st_event->target, st_event->result == pcmk_ok?"":" not",
               st_event->operation,
               st_event->executioner ? st_event->executioner : "<anyone>",
               st_event->origin, pcmk_strerror(st_event->result), st_event->id,
               st_event->client_origin ? st_event->client_origin : "<unknown>");

#if SUPPORT_CMAN
    if (st_event->result == pcmk_ok && is_cman_cluster()) {
        int local_rc = 0;
        int confirm = 0;
        char *target_copy = strdup(st_event->target);

        /* In case fenced hasn't noticed yet */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target, local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
        }

        /* In case fenced is already trying to shoot it */
        confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
        if (confirm) {
            int ignore = 0;
            int len = strlen(target_copy);

            errno = 0;
            local_rc = write(confirm, target_copy, len);
            ignore = write(confirm, "\n", 1);

            if(errno == EBADF) {
                crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target);
                
            } else if (local_rc < len) {
                crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc);

            } else {
                fsync(confirm);
                crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target);
            }
            close(confirm);
        }
    }
#endif

    if (st_event->result == pcmk_ok) {
        gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc?"was":"may have been", st_event->target, fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                const char *uuid = get_uuid(st_event->target);
                send_stonith_update(NULL, st_event->target, uuid);
            }
            stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));
        } else if (AM_I_DC &&
                    st_event->client_origin &&
                    safe_str_neq(st_event->client_origin, crm_system_name)) {
            const char *uuid = get_uuid(st_event->target);
            /* If a remote process outside of pacemaker invoked stonith to
             * fence someone, report the fencing result to the cib
             * and abort the transition graph. */
            crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
            send_stonith_update(NULL, st_event->target, uuid);
            abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
        }
    }
}