Esempio n. 1
0
/*	 A_DC_TAKEOVER	*/
void
do_dc_takeover(long long action,
               enum crmd_fsa_cause cause,
               enum crmd_fsa_state cur_state,
               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    int rc = pcmk_ok;
    xmlNode *cib = NULL;
    GListPtr gIter = NULL;
    const char *cluster_type = name_for_cluster_type(get_cluster_type());

    crm_info("Taking over DC status for this partition");
    set_bit(fsa_input_register, R_THE_DC);

    for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) {
        char *target = gIter->data;
        crm_node_t *target_node = crm_get_peer(0, target);
        const char *uuid = crm_peer_uuid(target_node);

        crm_notice("Marking %s, target of a previous stonith action, as clean", target);
        send_stonith_update(NULL, target, uuid);
        free(target);
    }
    g_list_free(stonith_cleanup_list);
    stonith_cleanup_list = NULL;

#if SUPPORT_COROSYNC
    if (is_classic_ais_cluster()) {
        send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais);
    }
#endif

    if (voted != NULL) {
        crm_trace("Destroying voted hash");
        g_hash_table_destroy(voted);
        voted = NULL;
    }

    set_bit(fsa_input_register, R_JOIN_OK);
    set_bit(fsa_input_register, R_INVOKE_PE);

    fsa_cib_conn->cmds->set_master(fsa_cib_conn, cib_scope_local);

    cib = create_xml_node(NULL, XML_TAG_CIB);
    crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
    fsa_cib_update(XML_TAG_CIB, cib, cib_quorum_override, rc, NULL);
    fsa_register_cib_callback(rc, FALSE, NULL, feature_update_callback);

    update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
                         "dc-version", VERSION "-" BUILD_VERSION, FALSE, NULL);

    update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
                         "cluster-infrastructure", cluster_type, FALSE, NULL);

    mainloop_set_trigger(config_read);
    free_xml(cib);
}
Esempio n. 2
0
/*!
 * \internal
 * \brief Send stonith updates for all entries in cleanup list, then purge it
 */
void
execute_stonith_cleanup()
{
    GListPtr iter;

    for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
        char *target = iter->data;
        crm_node_t *target_node = crm_get_peer(0, target);
        const char *uuid = crm_peer_uuid(target_node);

        crm_notice("Marking %s, target of a previous stonith action, as clean", target);
        send_stonith_update(NULL, target, uuid);
        free(target);
    }
    g_list_free(stonith_cleanup_list);
    stonith_cleanup_list = NULL;
}
Esempio n. 3
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
    if(te_client_id == NULL) {
        te_client_id = crm_strdup_printf("%s.%d", crm_system_name, getpid());
    }

    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    crmd_notify_fencing_op(st_event);

    if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
        crm_notice("%s was successfully unfenced by %s (at the request of %s)",
                   st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
                /* TODO: Hook up st_event->device */
        return;

    } else if (safe_str_eq("on", st_event->action)) {
        crm_err("Unfencing of %s by %s failed: %s (%d)",
                st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
                pcmk_strerror(st_event->result), st_event->result);
        return;

    } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_crit("We were allegedly just fenced by %s for %s!",
                 st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */

        qb_log_fini(); /* Try to get the above log message to disk - somehow */

        /* Get out ASAP and do not come back up.
         *
         * Triggering a reboot is also not the worst idea either since
         * the rest of the cluster thinks we're safely down
         */

#ifdef RB_HALT_SYSTEM
        reboot(RB_HALT_SYSTEM);
#endif

        /*
         * If reboot() fails or is not supported, coming back up will
         * probably lead to a situation where the other nodes set our
         * status to 'lost' because of the fencing callback and will
         * discard subsequent election votes with:
         *
         * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
         *
         * So just stay dead, something is seriously messed up anyway.
         *
         */
        exit(100); /* None of our wrappers since we already called qb_log_fini() */
        return;
    }

    if (st_event->result == pcmk_ok &&
        safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
        st_fail_count_reset(st_event->target);
    }

    crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
               st_event->target, st_event->result == pcmk_ok ? "" : " not",
               st_event->action,
               st_event->executioner ? st_event->executioner : "<anyone>",
               st_event->origin, pcmk_strerror(st_event->result), st_event->id,
               st_event->client_origin ? st_event->client_origin : "<unknown>");

#if SUPPORT_CMAN
    if (st_event->result == pcmk_ok && is_cman_cluster()) {
        int local_rc = 0;
        int confirm = 0;
        char *target_copy = strdup(st_event->target);

        /* In case fenced hasn't noticed yet
         *
         * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
         */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
                    local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
        }

        /* In case fenced is already trying to shoot it */
        confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
        if (confirm > 0) {
            int ignore = 0;
            int len = strlen(target_copy);

            errno = 0;
            local_rc = write(confirm, target_copy, len);
            ignore = write(confirm, "\n", 1);

            if(ignore < 0 && errno == EBADF) {
                crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target);

            } else if (local_rc < len) {
                crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc);

            } else {
                fsync(confirm);
                crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target);
            }
            close(confirm);
        }
        free(target_copy);
    }
#endif

    if (st_event->result == pcmk_ok) {
        crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_REMOTE | CRM_GET_PEER_CLUSTER);
        const char *uuid = NULL;
        gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        if (peer == NULL) {
            return;
        }

        uuid = crm_peer_uuid(peer);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if(AM_I_DC) {
            /* The DC always sends updates */
            send_stonith_update(NULL, st_event->target, uuid);

            if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {

                /* Abort the current transition graph if it wasn't us
                 * that invoked stonith to fence someone
                 */
                crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
            }

            /* Assume it was our leader if we dont currently have one */
        } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc ? "was" : "may have been", st_event->target,
                       fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                send_stonith_update(NULL, st_event->target, uuid);
            }
            stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));

        }

        crmd_peer_down(peer, TRUE);
     }
}
Esempio n. 4
0
static void
tengine_stonith_notify(stonith_t * st, const char *event, xmlNode * msg)
{
    int rc = -99;
    const char *origin = NULL;
    const char *target = NULL;
    const char *executioner = NULL;
    xmlNode *action = get_xpath_object("//st-data", msg, LOG_ERR);

    if (action == NULL) {
        crm_log_xml(LOG_ERR, "Notify data not found", msg);
        return;
    }

    crm_log_xml(LOG_DEBUG, "stonith_notify", msg);
    crm_element_value_int(msg, F_STONITH_RC, &rc);
    origin = crm_element_value(action, F_STONITH_ORIGIN);
    target = crm_element_value(action, F_STONITH_TARGET);
    executioner = crm_element_value(action, F_STONITH_DELEGATE);

    if (rc == stonith_ok && crm_str_eq(target, fsa_our_uname, TRUE)) {
        crm_err("We were alegedly just fenced by %s for %s!", executioner, origin);
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);

    } else if (rc == stonith_ok) {
        crm_info("Peer %s was terminated (%s) by %s for %s (ref=%s): %s",
                 target,
                 crm_element_value(action, F_STONITH_OPERATION),
                 executioner, origin,
                 crm_element_value(action, F_STONITH_REMOTE), stonith_error2string(rc));
    } else {
        crm_err("Peer %s could not be terminated (%s) by %s for %s (ref=%s): %s",
                target,
                crm_element_value(action, F_STONITH_OPERATION),
                executioner ? executioner : "<anyone>", origin,
                crm_element_value(action, F_STONITH_REMOTE), stonith_error2string(rc));
    }

#ifdef SUPPORT_CMAN
    if (rc == stonith_ok && is_cman_cluster()) {
        int local_rc = 0;
        int confirm = 0;
        char *target_copy = crm_strdup(target);

        /* In case fenced hasn't noticed yet */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", target, local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", target);
        }

        /* In case fenced is already trying to shoot it */
        confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
        if (confirm) {
            int len = strlen(target_copy);

            errno = 0;
            local_rc = write(confirm, target_copy, len);
            write(confirm, "\n", 1);

            if(errno == EBADF) {
                crm_trace("CMAN not expecting %s to be fenced (yet)", target);

            } else if (local_rc < len) {
                crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", target, local_rc);

            } else {
                fsync(confirm);
                crm_notice("Confirmed CMAN fencing event for '%s'", target);
            }
            close(confirm);
        }
    }
#endif

    if (rc == stonith_ok && safe_str_eq(target, origin)) {
        if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, target)) {
            crm_notice("Target was our leader %s (recorded: %s)",
                       target, fsa_our_dc ? fsa_our_dc : "<unset>");
            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (safe_str_eq(executioner, fsa_our_uname)) {
                const char *uuid = get_uuid(target);

                send_stonith_update(NULL, target, uuid);
            } else {
                stonith_cleanup_list = g_list_append(stonith_cleanup_list, crm_strdup(target));
            }
        }
    }
}
Esempio n. 5
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
    static char *client_id = NULL;
    if(client_id == NULL) {
        client_id = g_strdup_printf("%s.%d", crm_system_name, getpid());
    }

    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner,
                st_event->origin);
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
        return;
    }

    if (st_event->result == pcmk_ok &&
        safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
        reset_st_fail_count(st_event->target);
    }

    crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
               st_event->target, st_event->result == pcmk_ok ? "" : " not",
               st_event->action,
               st_event->executioner ? st_event->executioner : "<anyone>",
               st_event->origin, pcmk_strerror(st_event->result), st_event->id,
               st_event->client_origin ? st_event->client_origin : "<unknown>");

#if SUPPORT_CMAN
    if (st_event->result == pcmk_ok && is_cman_cluster()) {
        int local_rc = 0;
        char *target_copy = strdup(st_event->target);

        /* In case fenced hasn't noticed yet
         *
         * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
         */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
                    local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
        }
        free(target_copy);
    }
#endif

     if (st_event->result == pcmk_ok) {
         const char *uuid = get_uuid(st_event->target);
        gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if(AM_I_DC) {
            /* The DC always sends updates */
            send_stonith_update(NULL, st_event->target, uuid);

            if (st_event->client_origin && safe_str_neq(st_event->client_origin, client_id)) {

                /* Abort the current transition graph if it wasn't us
                 * that invoked stonith to fence someone
                 */
                crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
            }

            /* Assume it was our leader if we dont currently have one */
        } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc ? "was" : "may have been", st_event->target,
                       fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                send_stonith_update(NULL, st_event->target, uuid);
            }
            stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));

        }
     }
}
Esempio n. 6
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
    if(te_client_id == NULL) {
        te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
                                         (unsigned long) getpid());
    }

    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    crmd_alert_fencing_op(st_event);

    if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
        crm_notice("%s was successfully unfenced by %s (at the request of %s)",
                   st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
                /* TODO: Hook up st_event->device */
        return;

    } else if (safe_str_eq("on", st_event->action)) {
        crm_err("Unfencing of %s by %s failed: %s (%d)",
                st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
                pcmk_strerror(st_event->result), st_event->result);
        return;

    } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_crit("We were allegedly just fenced by %s for %s!",
                 st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */

        qb_log_fini(); /* Try to get the above log message to disk - somehow */

        /* Get out ASAP and do not come back up.
         *
         * Triggering a reboot is also not the worst idea either since
         * the rest of the cluster thinks we're safely down
         */

#ifdef RB_HALT_SYSTEM
        reboot(RB_HALT_SYSTEM);
#endif

        /*
         * If reboot() fails or is not supported, coming back up will
         * probably lead to a situation where the other nodes set our
         * status to 'lost' because of the fencing callback and will
         * discard subsequent election votes with:
         *
         * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
         *
         * So just stay dead, something is seriously messed up anyway.
         *
         */
        exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini()
        return;
    }

    /* Update the count of stonith failures for this target, in case we become
     * DC later. The current DC has already updated its fail count in
     * tengine_stonith_callback().
     */
    if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
        if (st_event->result == pcmk_ok) {
            st_fail_count_reset(st_event->target);
        } else {
            st_fail_count_increment(st_event->target);
        }
    }

    crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
               CRM_XS " initiator=%s ref=%s",
               st_event->target, st_event->result == pcmk_ok ? "" : " not",
               st_event->action,
               st_event->executioner ? st_event->executioner : "<anyone>",
               (st_event->client_origin? st_event->client_origin : "<unknown>"),
               pcmk_strerror(st_event->result),
               st_event->origin, st_event->id);

    if (st_event->result == pcmk_ok) {
        crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
        const char *uuid = NULL;
        gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        if (peer == NULL) {
            return;
        }

        uuid = crm_peer_uuid(peer);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if(AM_I_DC) {
            /* The DC always sends updates */
            send_stonith_update(NULL, st_event->target, uuid);

            /* @TODO Ideally, at this point, we'd check whether the fenced node
             * hosted any guest nodes, and call remote_node_down() for them.
             * Unfortunately, the controller doesn't have a simple, reliable way
             * to map hosts to guests. It might be possible to track this in the
             * peer cache via crm_remote_peer_cache_refresh(). For now, we rely
             * on the PE creating fence pseudo-events for the guests.
             */

            if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {

                /* Abort the current transition graph if it wasn't us
                 * that invoked stonith to fence someone
                 */
                crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
            }

            /* Assume it was our leader if we don't currently have one */
        } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
            && !is_set(peer->flags, crm_remote_node)) {

            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc ? "was" : "may have been", st_event->target,
                       fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                send_stonith_update(NULL, st_event->target, uuid);
            }
            add_stonith_cleanup(st_event->target);
        }

        /* If the target is a remote node, and we host its connection,
         * immediately fail all monitors so it can be recovered quickly.
         * The connection won't necessarily drop when a remote node is fenced,
         * so the failure might not otherwise be detected until the next poke.
         */
        if (is_set(peer->flags, crm_remote_node)) {
            remote_ra_fail(st_event->target);
        }

        crmd_peer_down(peer, TRUE);
     }
}
void
tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
    char *uuid = NULL;
    int stonith_id = -1;
    int transition_id = -1;
    crm_action_t *action = NULL;
    int call_id = data->call_id;
    int rc = data->rc;
    char *userdata = data->userdata;

    CRM_CHECK(userdata != NULL, return);
    crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
               pcmk_strerror(rc), rc);

    if (AM_I_DC == FALSE) {
        return;
    }

    /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
    /*       op->call_id, op->optype, op->node_name, op->op_result, */
    /*       (char *)op->node_list, op->private_data); */

    /* filter out old STONITH actions */
    CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
              goto bail);

    if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
        || transition_graph->id != transition_id) {
        crm_info("Ignoring STONITH action initiated outside of the current transition");
        goto bail;
    }

    action = get_action(stonith_id, FALSE);
    if (action == NULL) {
        crm_err("Stonith action not matched");
        goto bail;
    }

    stop_te_timer(action->timer);
    if (rc == pcmk_ok) {
        const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
        const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
        const char *op = crm_meta_value(action->params, "stonith_action"); 

        crm_info("Stonith operation %d for %s passed", call_id, target);
        if (action->confirmed == FALSE) {
            te_action_confirmed(action);
            if (safe_str_eq("on", op)) {
                const char *value = NULL;
                char *now = crm_itoa(time(NULL));

                update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
                free(now);

                value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
                update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);

                value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
                update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);

            } else if (action->sent_update == FALSE) {
                send_stonith_update(action, target, uuid);
                action->sent_update = TRUE;
            }
        }
        st_fail_count_reset(target);

    } else {
        const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
        enum transition_action abort_action = tg_restart;

        action->failed = TRUE;
        crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
                   call_id, target, pcmk_strerror(rc));

        /* If no fence devices were available, there's no use in immediately
         * checking again, so don't start a new transition in that case.
         */
        if (rc == -ENODEV) {
            crm_warn("No devices found in cluster to fence %s, giving up",
                     target);
            abort_action = tg_stop;
        }

        /* Increment the fail count now, so abort_for_stonith_failure() can
         * check it. Non-DC nodes will increment it in tengine_stonith_notify().
         */
        st_fail_count_increment(target);
        abort_for_stonith_failure(abort_action, target, NULL);
    }

    update_graph(transition_graph, action);
    trigger_graph();

  bail:
    free(userdata);
    free(uuid);
    return;
}
Esempio n. 8
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t *st_event)
{
    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner, st_event->origin);
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
        return;
    }

    crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
               st_event->target, st_event->result == pcmk_ok?"":" not",
               st_event->operation,
               st_event->executioner ? st_event->executioner : "<anyone>",
               st_event->origin, pcmk_strerror(st_event->result), st_event->id,
               st_event->client_origin ? st_event->client_origin : "<unknown>");

#if SUPPORT_CMAN
    if (st_event->result == pcmk_ok && is_cman_cluster()) {
        int local_rc = 0;
        int confirm = 0;
        char *target_copy = strdup(st_event->target);

        /* In case fenced hasn't noticed yet */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target, local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
        }

        /* In case fenced is already trying to shoot it */
        confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
        if (confirm) {
            int ignore = 0;
            int len = strlen(target_copy);

            errno = 0;
            local_rc = write(confirm, target_copy, len);
            ignore = write(confirm, "\n", 1);

            if(errno == EBADF) {
                crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target);
                
            } else if (local_rc < len) {
                crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc);

            } else {
                fsync(confirm);
                crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target);
            }
            close(confirm);
        }
    }
#endif

    if (st_event->result == pcmk_ok) {
        gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc?"was":"may have been", st_event->target, fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                const char *uuid = get_uuid(st_event->target);
                send_stonith_update(NULL, st_event->target, uuid);
            }
            stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));
        } else if (AM_I_DC &&
                    st_event->client_origin &&
                    safe_str_neq(st_event->client_origin, crm_system_name)) {
            const char *uuid = get_uuid(st_event->target);
            /* If a remote process outside of pacemaker invoked stonith to
             * fence someone, report the fencing result to the cib
             * and abort the transition graph. */
            crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
            send_stonith_update(NULL, st_event->target, uuid);
            abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
        }
    }
}