Exemplo n.º 1
0
static void
process_remote_node_action(crm_action_t *action, xmlNode *event)
{
    xmlNode *child = NULL;

    /* The whole point of this function is to detect when a remote-node
     * is integrated into the cluster, and abort the transition if that remote-node
     * was fenced earlier in the transition. This allows a new transition to be
     * generated so resources can be placed on the new node.
     */

    if (crm_remote_peer_cache_size() == 0) {
        return;
    } else if (action->type != action_type_rsc) {
        return;
    } else if (action->failed || action->confirmed == FALSE) {
        return;
    } else if (safe_str_neq(crm_element_value(action->xml, XML_LRM_ATTR_TASK), "start")) {
        return;
    }

    for (child = __xml_first_child(action->xml); child != NULL; child = __xml_next(child)) {
        const char *provider;
        const char *type;
        const char *rsc;
        crm_node_t *remote_peer;

        if (safe_str_neq(crm_element_name(child), XML_CIB_TAG_RESOURCE)) {
            continue;
        }

        provider = crm_element_value(child, XML_AGENT_ATTR_PROVIDER);
        type = crm_element_value(child, XML_ATTR_TYPE);
        rsc = ID(child);

        if (safe_str_neq(provider, "pacemaker") || safe_str_neq(type, "remote") || rsc == NULL) {
            break;
        }

        remote_peer = crm_get_peer_full(0, rsc, CRM_GET_PEER_REMOTE);
        if (remote_peer == NULL) {
            break;
        }

        /* A remote node will be placed in the "lost" state after
         * it has been successfully fenced.  After successfully connecting
         * to a remote-node after being fenced, we need to abort the transition
         * so resources can be placed on the newly integrated remote-node */
        if (safe_str_eq(remote_peer->state, CRM_NODE_LOST)) {
            abort_transition(INFINITY, tg_restart, "Remote-node re-discovered.", event);
        }

        return;
    }
}
Exemplo n.º 2
0
void
send_stonith_update(crm_action_t * action, const char *target, const char *uuid)
{
    int rc = pcmk_ok;
    crm_node_t *peer = NULL;

    /* zero out the node-status & remove all LRM status info */
    xmlNode *node_state = NULL;

    CRM_CHECK(target != NULL, return);
    CRM_CHECK(uuid != NULL, return);

    /* Make sure the membership and join caches are accurate */
    peer = crm_get_peer_full(0, target, CRM_GET_PEER_CLUSTER | CRM_GET_PEER_REMOTE);

    CRM_CHECK(peer != NULL, return);

    if (peer->uuid == NULL) {
        crm_info("Recording uuid '%s' for node '%s'", uuid, target);
        peer->uuid = strdup(uuid);
    }
    crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL);
    crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0);
    crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
    crm_update_peer_join(__FUNCTION__, peer, crm_join_none);

    node_state =
        do_update_node_cib(peer,
                           node_update_cluster | node_update_peer | node_update_join |
                           node_update_expected, NULL, __FUNCTION__);

    /* Force our known ID */
    crm_xml_add(node_state, XML_ATTR_UUID, uuid);

    rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
                                    cib_quorum_override | cib_scope_local | cib_can_create);

    /* Delay processing the trigger until the update completes */
    crm_debug("Sending fencing update %d for %s", rc, target);
    fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);

    /* Make sure it sticks */
    /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local);    */

    erase_status_tag(target, XML_CIB_TAG_LRM, cib_scope_local);
    erase_status_tag(target, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);

    free_xml(node_state);
    return;
}
Exemplo n.º 3
0
void
write_attribute(attribute_t *a)
{
    int updates = 0;
    xmlNode *xml_top = NULL;
    attribute_value_t *v = NULL;
    GHashTableIter iter;
    enum cib_call_options flags = cib_quorum_override;

    if (a == NULL) {
        return;

    } else if (the_cib == NULL) {
        crm_info("Write out of %s delayed: cib not connected", a->id);
        return;

    } else if(a->update && a->update < last_cib_op_done) {
        crm_info("Write out of %s continuing: update %d considered lost", a->id, a->update);

    } else if(a->update) {
        crm_info("Write out of %s delayed: update %d in progress", a->id, a->update);
        return;

    } else if(mainloop_timer_running(a->timer)) {
        crm_info("Write out of %s delayed: timer is running", a->id);
        return;
    }

    a->changed = FALSE;
    a->unknown_peer_uuids = FALSE;
    xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS);

    g_hash_table_iter_init(&iter, a->values);
    while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) {
        crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_REMOTE|CRM_GET_PEER_CLUSTER);

        if(peer && peer->id && v->nodeid == 0) {
            crm_trace("Updating value's nodeid");
            v->nodeid = peer->id;
        }

        if (peer == NULL) {
            /* If the user is trying to set an attribute on an unknown peer, ignore it. */
            crm_notice("Update error (peer not found): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer);

        } else if (peer->uuid == NULL) {
            /* peer is found, but we don't know the uuid yet. Wait until we discover a new uuid before attempting to write */
            a->unknown_peer_uuids = FALSE;
            crm_notice("Update error (unknown peer uuid, retry will be attempted once uuid is discovered): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer);

        } else {
            crm_debug("Update: %s[%s]=%s (%s %u %u %s)", v->nodename, a->id, v->current, peer->uuid, peer->id, v->nodeid, peer->uname);
            build_update_element(xml_top, a, peer->uuid, v->current);
            updates++;

            free(v->requested);
            v->requested = NULL;

            if(v->current) {
                v->requested = strdup(v->current);

            } else {
                flags |= cib_mixed_update;
            }
        }
    }

    if(updates) {
        crm_log_xml_trace(xml_top, __FUNCTION__);

        a->update = cib_internal_op(the_cib, CIB_OP_MODIFY, NULL, XML_CIB_TAG_STATUS, xml_top, NULL,
                                    flags, a->user);

        crm_notice("Sent update %d with %d changes for %s, id=%s, set=%s",
                   a->update, updates, a->id, a->uuid ? a->uuid : "<n/a>", a->set);

        the_cib->cmds->register_callback(
            the_cib, a->update, 120, FALSE, strdup(a->id), "attrd_cib_callback", attrd_cib_callback);
    }
}
Exemplo n.º 4
0
void
write_attribute(attribute_t *a)
{
    int private_updates = 0, cib_updates = 0;
    xmlNode *xml_top = NULL;
    attribute_value_t *v = NULL;
    GHashTableIter iter;
    enum cib_call_options flags = cib_quorum_override;

    if (a == NULL) {
        return;
    }

    /* If this attribute will be written to the CIB ... */
    if (!a->is_private) {

        /* Defer the write if now's not a good time */
        if (the_cib == NULL) {
            crm_info("Write out of '%s' delayed: cib not connected", a->id);
            return;

        } else if (a->update && (a->update < last_cib_op_done)) {
            crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update);

        } else if (a->update) {
            crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update);
            return;

        } else if (mainloop_timer_running(a->timer)) {
            crm_info("Write out of '%s' delayed: timer is running", a->id);
            return;
        }

        /* Initialize the status update XML */
        xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS);
    }

    /* Attribute will be written shortly, so clear changed flag */
    a->changed = FALSE;

    /* We will check all peers' uuids shortly, so initialize this to false */
    a->unknown_peer_uuids = FALSE;

    /* Iterate over each peer value of this attribute */
    g_hash_table_iter_init(&iter, a->values);
    while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) {
        crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY);

        /* If the value's peer info does not correspond to a peer, ignore it */
        if (peer == NULL) {
            crm_notice("Update error (peer not found): %s[%s]=%s failed (host=%p)",
                       v->nodename, a->id, v->current, peer);
            continue;
        }

        /* If we're just learning the peer's node id, remember it */
        if (peer->id && (v->nodeid == 0)) {
            crm_trace("Updating value's nodeid");
            v->nodeid = peer->id;
        }

        /* If this is a private attribute, no update needs to be sent */
        if (a->is_private) {
            private_updates++;
            continue;
        }

        /* If the peer is found, but its uuid is unknown, defer write */
        if (peer->uuid == NULL) {
            a->unknown_peer_uuids = TRUE;
            crm_notice("Update error (unknown peer uuid, retry will be attempted once uuid is discovered): %s[%s]=%s failed (host=%p)",
                       v->nodename, a->id, v->current, peer);
            continue;
        }

        /* Add this value to status update XML */
        crm_debug("Update: %s[%s]=%s (%s %u %u %s)", v->nodename, a->id,
                  v->current, peer->uuid, peer->id, v->nodeid, peer->uname);
        build_update_element(xml_top, a, peer->uuid, v->current);
        cib_updates++;

        free(v->requested);
        v->requested = NULL;
        if (v->current) {
            v->requested = strdup(v->current);
        } else {
            /* Older attrd versions don't know about the cib_mixed_update
             * flag so make sure it goes to the local cib which does
             */
            flags |= cib_mixed_update|cib_scope_local;
        }
    }

    if (private_updates) {
        crm_info("Processed %d private change%s for %s, id=%s, set=%s",
                 private_updates, ((private_updates == 1)? "" : "s"),
                 a->id, (a->uuid? a->uuid : "<n/a>"), a->set);
    }
    if (cib_updates) {
        crm_log_xml_trace(xml_top, __FUNCTION__);

        a->update = cib_internal_op(the_cib, CIB_OP_MODIFY, NULL, XML_CIB_TAG_STATUS, xml_top, NULL,
                                    flags, a->user);

        crm_info("Sent update %d with %d changes for %s, id=%s, set=%s",
                 a->update, cib_updates, a->id, (a->uuid? a->uuid : "<n/a>"), a->set);

        the_cib->cmds->register_callback_full(the_cib, a->update, 120, FALSE,
                                              strdup(a->id),
                                              "attrd_cib_callback",
                                              attrd_cib_callback, free);
    }
    free_xml(xml_top);
}
Exemplo n.º 5
0
void
send_stonith_update(crm_action_t * action, const char *target, const char *uuid)
{
    int rc = pcmk_ok;
    crm_node_t *peer = NULL;

    /* We (usually) rely on the membership layer to do node_update_cluster,
     * and the peer status callback to do node_update_peer, because the node
     * might have already rejoined before we get the stonith result here.
     */
    int flags = node_update_join | node_update_expected;

    /* zero out the node-status & remove all LRM status info */
    xmlNode *node_state = NULL;

    CRM_CHECK(target != NULL, return);
    CRM_CHECK(uuid != NULL, return);

    /* Make sure the membership and join caches are accurate */
    peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);

    CRM_CHECK(peer != NULL, return);

    if (peer->state == NULL) {
        /* Usually, we rely on the membership layer to update the cluster state
         * in the CIB. However, if the node has never been seen, do it here, so
         * the node is not considered unclean.
         */
        flags |= node_update_cluster;
    }

    if (peer->uuid == NULL) {
        crm_info("Recording uuid '%s' for node '%s'", uuid, target);
        peer->uuid = strdup(uuid);
    }

    crmd_peer_down(peer, TRUE);

    /* Generate a node state update for the CIB */
    node_state = do_update_node_cib(peer, flags, NULL, __FUNCTION__);

    /* we have to mark whether or not remote nodes have already been fenced */
    if (peer->flags & crm_remote_node) {
        time_t now = time(NULL);
        char *now_s = crm_itoa(now);
        crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
        free(now_s);
    }

    /* Force our known ID */
    crm_xml_add(node_state, XML_ATTR_UUID, uuid);

    rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
                                    cib_quorum_override | cib_scope_local | cib_can_create);

    /* Delay processing the trigger until the update completes */
    crm_debug("Sending fencing update %d for %s", rc, target);
    fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);

    /* Make sure it sticks */
    /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local);    */

    erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
    erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);

    free_xml(node_state);
    return;
}
Exemplo n.º 6
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
    if(te_client_id == NULL) {
        te_client_id = g_strdup_printf("%s.%d", crm_system_name, getpid());
    }

    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_crit("We were alegedly just fenced by %s for %s with %s!", st_event->executioner,
                 st_event->origin, st_event->device); /* Dumps blackbox if enabled */

        qb_log_fini(); /* Try to get the above log message to disk - somehow */

        /* Get out ASAP and do not come back up.
         *
         * Triggering a reboot is also not the worst idea either since
         * the rest of the cluster thinks we're safely down
         */

#ifdef RB_HALT_SYSTEM
        reboot(RB_HALT_SYSTEM);
#endif

        /*
         * If reboot() fails or is not supported, coming back up will
         * probably lead to a situation where the other nodes set our
         * status to 'lost' because of the fencing callback and will
         * discard subsequent election votes with:
         *
         * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
         *
         * So just stay dead, something is seriously messed up anyway.
         *
         */
        exit(100); /* None of our wrappers since we already called qb_log_fini() */
        return;
    }

    if (st_event->result == pcmk_ok &&
        safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
        st_fail_count_reset(st_event->target);
    }

    crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
               st_event->target, st_event->result == pcmk_ok ? "" : " not",
               st_event->action,
               st_event->executioner ? st_event->executioner : "<anyone>",
               st_event->origin, pcmk_strerror(st_event->result), st_event->id,
               st_event->client_origin ? st_event->client_origin : "<unknown>");

#if SUPPORT_CMAN
    if (st_event->result == pcmk_ok && is_cman_cluster()) {
        int local_rc = 0;
        char *target_copy = strdup(st_event->target);

        /* In case fenced hasn't noticed yet
         *
         * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
         */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
                    local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
        }
        free(target_copy);
    }
#endif

     if (st_event->result == pcmk_ok) {
         crm_node_t *peer = crm_get_peer_full(0, st_event->target, CRM_GET_PEER_REMOTE | CRM_GET_PEER_CLUSTER);
         const char *uuid = crm_peer_uuid(peer);
         gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if(AM_I_DC) {
            /* The DC always sends updates */
            send_stonith_update(NULL, st_event->target, uuid);

            if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {

                /* Abort the current transition graph if it wasn't us
                 * that invoked stonith to fence someone
                 */
                crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
            }

            /* Assume it was our leader if we dont currently have one */
        } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc ? "was" : "may have been", st_event->target,
                       fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                send_stonith_update(NULL, st_event->target, uuid);
            }
            stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));

        }

        crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL);
        crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0);
        crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
        crm_update_peer_join(__FUNCTION__, peer, crm_join_none);
     }
}