Esempio n. 1
0
/*!
 * \internal
 * \brief Respond to scheduler connection failure
 *
 * \param[in] user_data  Ignored
 */
static void
pe_ipc_destroy(gpointer user_data)
{
    if (is_set(fsa_input_register, R_PE_REQUIRED)) {
        int rc = pcmk_ok;
        char *uuid_str = crm_generate_uuid();

        crm_crit("Connection to the scheduler failed "
                 CRM_XS " uuid=%s", uuid_str);

        /*
         * The scheduler died...
         *
         * Save the current CIB so that we have a chance of
         * figuring out what killed it.
         *
         * Delay raising the I_ERROR until the query below completes or
         * 5s is up, whichever comes first.
         *
         */
        rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
        fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents);

    } else {
        crm_info("Connection to the scheduler released");
    }

    clear_bit(fsa_input_register, R_PE_CONNECTED);
    pe_subsystem = NULL;
    mainloop_set_trigger(fsa_source);
    return;
}
Esempio n. 2
0
/*	A_DC_JOIN_FINALIZE	*/
void
do_dc_join_finalize(long long action,
                    enum crmd_fsa_cause cause,
                    enum crmd_fsa_state cur_state,
                    enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    char *sync_from = NULL;
    int rc = pcmk_ok;

    /* This we can do straight away and avoid clients timing us out
     *  while we compute the latest CIB
     */
    crm_debug("Finializing join-%d for %d clients",
              current_join_id, crmd_join_phase_count(crm_join_integrated));

    crmd_join_phase_log(LOG_INFO);
    if (crmd_join_phase_count(crm_join_welcomed) != 0) {
        crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed));
        /* crmd_fsa_stall(FALSE); Needed? */
        return;

    } else if (crmd_join_phase_count(crm_join_integrated) == 0) {
        /* Nothing to do */
        check_join_state(fsa_state, __FUNCTION__);
        return;
    }

    clear_bit(fsa_input_register, R_HAVE_CIB);
    if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) {
        set_bit(fsa_input_register, R_HAVE_CIB);
    }

    if (is_set(fsa_input_register, R_IN_TRANSITION)) {
        crm_warn("Delaying response to cluster join offer while transition in progress "
                 CRM_XS " join-%d", current_join_id);
        crmd_fsa_stall(FALSE);
        return;
    }

    if (max_generation_from && is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
        /* ask for the agreed best CIB */
        sync_from = strdup(max_generation_from);
        set_bit(fsa_input_register, R_CIB_ASKED);
        crm_notice("Syncing the Cluster Information Base from %s to rest of cluster "
                   CRM_XS " join-%d", sync_from, current_join_id);
        crm_log_xml_notice(max_generation_xml, "Requested version");

    } else {
        /* Send _our_ CIB out to everyone */
        sync_from = strdup(fsa_our_uname);
        crm_info("join-%d: Syncing our CIB to the rest of the cluster",
                 current_join_id);
        crm_log_xml_debug(max_generation_xml, "Requested version");
    }


    rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
    fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
}
Esempio n. 3
0
/*	 A_DC_TAKEOVER	*/
void
do_dc_takeover(long long action,
               enum crmd_fsa_cause cause,
               enum crmd_fsa_state cur_state,
               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    int rc = pcmk_ok;
    xmlNode *cib = NULL;
    GListPtr gIter = NULL;
    const char *cluster_type = name_for_cluster_type(get_cluster_type());

    crm_info("Taking over DC status for this partition");
    set_bit(fsa_input_register, R_THE_DC);

    for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) {
        char *target = gIter->data;
        crm_node_t *target_node = crm_get_peer(0, target);
        const char *uuid = crm_peer_uuid(target_node);

        crm_notice("Marking %s, target of a previous stonith action, as clean", target);
        send_stonith_update(NULL, target, uuid);
        free(target);
    }
    g_list_free(stonith_cleanup_list);
    stonith_cleanup_list = NULL;

#if SUPPORT_COROSYNC
    if (is_classic_ais_cluster()) {
        send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais);
    }
#endif

    if (voted != NULL) {
        crm_trace("Destroying voted hash");
        g_hash_table_destroy(voted);
        voted = NULL;
    }

    set_bit(fsa_input_register, R_JOIN_OK);
    set_bit(fsa_input_register, R_INVOKE_PE);

    fsa_cib_conn->cmds->set_master(fsa_cib_conn, cib_scope_local);

    cib = create_xml_node(NULL, XML_TAG_CIB);
    crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
    fsa_cib_update(XML_TAG_CIB, cib, cib_quorum_override, rc, NULL);
    fsa_register_cib_callback(rc, FALSE, NULL, feature_update_callback);

    update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
                         "dc-version", VERSION "-" BUILD_VERSION, FALSE, NULL);

    update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
                         "cluster-infrastructure", cluster_type, FALSE, NULL);

    mainloop_set_trigger(config_read);
    free_xml(cib);
}
Esempio n. 4
0
void
send_stonith_update(crm_action_t * action, const char *target, const char *uuid)
{
    int rc = pcmk_ok;
    crm_node_t *peer = NULL;

    /* zero out the node-status & remove all LRM status info */
    xmlNode *node_state = NULL;

    CRM_CHECK(target != NULL, return);
    CRM_CHECK(uuid != NULL, return);

    if (get_node_uuid(0, target) == NULL) {
        set_node_uuid(target, uuid);
    }

    /* Make sure the membership and join caches are accurate */
    peer = crm_get_peer(0, target);
    if (peer->uuid == NULL) {
        crm_info("Recording uuid '%s' for node '%s'", uuid, target);
        peer->uuid = strdup(uuid);
    }
    crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL);
    crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0);
    crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
    erase_node_from_join(target);

    node_state =
        do_update_node_cib(peer,
                           node_update_cluster | node_update_peer | node_update_join |
                           node_update_expected, NULL, __FUNCTION__);

    /* Force our known ID */
    crm_xml_add(node_state, XML_ATTR_UUID, uuid);

    rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
                                    cib_quorum_override | cib_scope_local | cib_can_create);

    /* Delay processing the trigger until the update completes */
    crm_debug("Sending fencing update %d for %s", rc, target);
    fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);

    /* Make sure it sticks */
    /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local);    */

    erase_status_tag(target, XML_CIB_TAG_LRM, cib_scope_local);
    erase_status_tag(target, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);

    free_xml(node_state);
    return;
}
Esempio n. 5
0
/*	A_DC_JOIN_PROCESS_ACK	*/
void
do_dc_join_ack(long long action,
               enum crmd_fsa_cause cause,
               enum crmd_fsa_state cur_state,
               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    int join_id = -1;
    int call_id = 0;
    ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);

    const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
    const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
    crm_node_t *peer = crm_get_peer(0, join_from);

    if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) {
        crm_debug("Ignoring op=%s message from %s", op, join_from);
        return;
    }

    crm_trace("Processing ack from %s", join_from);
    crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);

    if (peer->join != crm_join_finalized) {
        crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)",
                 join_id, join_from, peer->join);
        return;

    } else if (join_id != current_join_id) {
        crm_err("Invalid response from %s: join-%d vs. join-%d",
                join_from, join_id, current_join_id);
        crm_update_peer_join(__FUNCTION__, peer, crm_join_nack);
        return;
    }

    crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed);

    crm_info("join-%d: Updating node state to %s for %s",
             join_id, CRMD_JOINSTATE_MEMBER, join_from);

    /* update CIB with the current LRM status from the node
     * We dont need to notify the TE of these updates, a transition will
     *   be started in due time
     */
    erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
    fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
                   cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
    fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
    crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id);
}
Esempio n. 6
0
/*	 A_PE_INVOKE	*/
void
do_pe_invoke(long long action,
             enum crmd_fsa_cause cause,
             enum crmd_fsa_state cur_state,
             enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    if (AM_I_DC == FALSE) {
        crm_err("Not invoking scheduler because not DC: %s",
                fsa_action2string(action));
        return;
    }

    if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
        if (is_set(fsa_input_register, R_SHUTDOWN)) {
            crm_err("Cannot shut down gracefully without the scheduler");
            register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);

        } else {
            crm_info("Waiting for the scheduler to connect");
            crmd_fsa_stall(FALSE);
            register_fsa_action(A_PE_START);
        }
        return;
    }

    if (cur_state != S_POLICY_ENGINE) {
        crm_notice("Not invoking scheduler because in state %s",
                   fsa_state2string(cur_state));
        return;
    }
    if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
        crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");

        /* start the join from scratch */
        register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
        return;
    }

    fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);

    crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
              fsa_state2string(fsa_state));

    /* Make sure any queued calculations are discarded */
    free(fsa_pe_ref);
    fsa_pe_ref = NULL;

    fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
}
Esempio n. 7
0
/* aka. accept the welcome offer */
void
do_cl_join_offer_respond(long long action,
                         enum crmd_fsa_cause cause,
                         enum crmd_fsa_state cur_state,
                         enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
    const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
    const char *join_id = crm_element_value(input->msg, F_CRM_JOIN_ID);

#if 0
    if (we are sick) {
        log error;

        /* save the request for later? */
        return;
    }
#endif

    crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s",
              welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID));

    /* we only ever want the last one */
    if (query_call_id > 0) {
        crm_trace("Cancelling previous join query: %d", query_call_id);
        remove_cib_op_callback(query_call_id, FALSE);
        query_call_id = 0;
    }

    if (update_dc(input->msg) == FALSE) {
        crm_warn("Discarding cluster join offer from node %s (expected %s)",
                 welcome_from, fsa_our_dc);
        return;
    }

    update_dc_expected(input->msg);

    CRM_LOG_ASSERT(input != NULL);
    query_call_id =
        fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children);
    fsa_register_cib_callback(query_call_id, FALSE, strdup(join_id), join_query_callback);
    crm_trace("Registered join query callback: %d", query_call_id);

    register_fsa_action(A_DC_TIMER_STOP);
}
Esempio n. 8
0
void
crm_update_quorum(gboolean quorum, gboolean force_update)
{
    ever_had_quorum |= quorum;
    if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) {
        int call_id = 0;
        xmlNode *update = NULL;
        int call_options = cib_scope_local | cib_quorum_override;

        update = create_xml_node(NULL, XML_TAG_CIB);
        crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum);
        crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid);

        fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL);
        crm_debug("Updating quorum status to %s (call=%d)", quorum ? "true" : "false", call_id);
        fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete);
        free_xml(update);
    }
    fsa_has_quorum = quorum;
}
Esempio n. 9
0
gboolean
cib_action_update(crm_action_t * action, int status, int op_rc)
{
    lrmd_event_data_t *op = NULL;
    xmlNode *state = NULL;
    xmlNode *rsc = NULL;
    xmlNode *xml_op = NULL;
    xmlNode *action_rsc = NULL;

    int rc = pcmk_ok;

    const char *name = NULL;
    const char *value = NULL;
    const char *rsc_id = NULL;
    const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
    const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
    const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);

    int call_options = cib_quorum_override | cib_scope_local;
    int target_rc = get_target_rc(action);

    if (status == PCMK_LRM_OP_PENDING) {
        crm_debug("%s %d: Recording pending operation %s on %s",
                  crm_element_name(action->xml), action->id, task_uuid, target);
    } else {
        crm_warn("%s %d: %s on %s timed out",
                 crm_element_name(action->xml), action->id, task_uuid, target);
    }

    action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
    if (action_rsc == NULL) {
        return FALSE;
    }

    rsc_id = ID(action_rsc);
    CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action");
              return FALSE);

/*
  update the CIB

<node_state id="hadev">
      <lrm>
        <lrm_resources>
          <lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/

    state = create_xml_node(NULL, XML_CIB_TAG_STATE);

    crm_xml_add(state, XML_ATTR_UUID, target_uuid);
    crm_xml_add(state, XML_ATTR_UNAME, target);

    rsc = create_xml_node(state, XML_CIB_TAG_LRM);
    crm_xml_add(rsc, XML_ATTR_ID, target_uuid);

    rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
    rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
    crm_xml_add(rsc, XML_ATTR_ID, rsc_id);

    name = XML_ATTR_TYPE;
    value = crm_element_value(action_rsc, name);
    crm_xml_add(rsc, name, value);
    name = XML_AGENT_ATTR_CLASS;
    value = crm_element_value(action_rsc, name);
    crm_xml_add(rsc, name, value);
    name = XML_AGENT_ATTR_PROVIDER;
    value = crm_element_value(action_rsc, name);
    crm_xml_add(rsc, name, value);

    op = convert_graph_action(NULL, action, status, op_rc);
    op->call_id = -1;
    op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid);

    xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, __FUNCTION__, LOG_INFO);
    lrmd_free_event(op);

    crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s",
              status < 0 ? "new action" : XML_ATTR_TIMEOUT,
              crm_element_name(action->xml), crm_str(task), rsc_id, target);
    crm_log_xml_trace(xml_op, "Op");

    rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);

    crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)",
              services_lrm_status_str(status), action->id, task_uuid, target, rc);

    fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
    free_xml(state);

    action->sent_update = TRUE;

    if (rc < pcmk_ok) {
        return FALSE;
    }

    return TRUE;
}
Esempio n. 10
0
void
send_stonith_update(crm_action_t * action, const char *target, const char *uuid)
{
    int rc = pcmk_ok;
    crm_node_t *peer = NULL;

    /* We (usually) rely on the membership layer to do node_update_cluster,
     * and the peer status callback to do node_update_peer, because the node
     * might have already rejoined before we get the stonith result here.
     */
    int flags = node_update_join | node_update_expected;

    /* zero out the node-status & remove all LRM status info */
    xmlNode *node_state = NULL;

    CRM_CHECK(target != NULL, return);
    CRM_CHECK(uuid != NULL, return);

    /* Make sure the membership and join caches are accurate */
    peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);

    CRM_CHECK(peer != NULL, return);

    if (peer->state == NULL) {
        /* Usually, we rely on the membership layer to update the cluster state
         * in the CIB. However, if the node has never been seen, do it here, so
         * the node is not considered unclean.
         */
        flags |= node_update_cluster;
    }

    if (peer->uuid == NULL) {
        crm_info("Recording uuid '%s' for node '%s'", uuid, target);
        peer->uuid = strdup(uuid);
    }

    crmd_peer_down(peer, TRUE);

    /* Generate a node state update for the CIB */
    node_state = do_update_node_cib(peer, flags, NULL, __FUNCTION__);

    /* we have to mark whether or not remote nodes have already been fenced */
    if (peer->flags & crm_remote_node) {
        time_t now = time(NULL);
        char *now_s = crm_itoa(now);
        crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
        free(now_s);
    }

    /* Force our known ID */
    crm_xml_add(node_state, XML_ATTR_UUID, uuid);

    rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
                                    cib_quorum_override | cib_scope_local | cib_can_create);

    /* Delay processing the trigger until the update completes */
    crm_debug("Sending fencing update %d for %s", rc, target);
    fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);

    /* Make sure it sticks */
    /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local);    */

    erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
    erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);

    free_xml(node_state);
    return;
}
Esempio n. 11
0
void
populate_cib_nodes(enum node_update_flags flags, const char *source)
{
    int call_id = 0;
    gboolean from_hashtable = TRUE;
    int call_options = cib_scope_local | cib_quorum_override;
    xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);

#if SUPPORT_HEARTBEAT
    if (is_not_set(flags, node_update_quick) && is_heartbeat_cluster()) {
        from_hashtable = heartbeat_initialize_nodelist(fsa_cluster_conn, FALSE, node_list);
    }
#endif

#if SUPPORT_COROSYNC
#  if !SUPPORT_PLUGIN
    if (is_not_set(flags, node_update_quick) && is_corosync_cluster()) {
        from_hashtable = corosync_initialize_nodelist(NULL, FALSE, node_list);
    }
#  endif
#endif

    if (from_hashtable) {
        GHashTableIter iter;
        crm_node_t *node = NULL;

        g_hash_table_iter_init(&iter, crm_peer_cache);
        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
            xmlNode *new_node = NULL;

            crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
            if(node->uuid && node->uname) {
                /* We need both to be valid */
                new_node = create_xml_node(node_list, XML_CIB_TAG_NODE);
                crm_xml_add(new_node, XML_ATTR_ID, node->uuid);
                crm_xml_add(new_node, XML_ATTR_UNAME, node->uname);
            }
        }
    }

    crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");

    fsa_cib_update(XML_CIB_TAG_NODES, node_list, call_options, call_id, NULL);
    fsa_register_cib_callback(call_id, FALSE, NULL, node_list_update_callback);

    free_xml(node_list);

    if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) {
        /*
         * There is no need to update the local CIB with our values if
         * we've not seen valid membership data
         */
        GHashTableIter iter;
        crm_node_t *node = NULL;

        node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS);

        g_hash_table_iter_init(&iter, crm_peer_cache);
        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
            do_update_node_cib(node, flags, node_list, source);
        }

        if (crm_remote_peer_cache) {
            g_hash_table_iter_init(&iter, crm_remote_peer_cache);
            while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
                do_update_node_cib(node, flags, node_list, source);
            }
        }

        fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL);
        fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete);
        last_peer_update = call_id;

        free_xml(node_list);
    }
}