Exemple #1
0
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
    crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));

    if (saved_ccm_membership_id != crm_peer_seq) {
        crm_debug("%s: Membership changed since join started: %llu -> %llu",
                 source, saved_ccm_membership_id, crm_peer_seq);
        register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);

    } else if (cur_state == S_INTEGRATION) {
        if (g_hash_table_size(welcomed_nodes) == 0) {
            crm_debug("join-%d: Integration of %d peers complete: %s",
                      current_join_id, g_hash_table_size(integrated_nodes), source);
            register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
            return TRUE;
        }

    } else if (cur_state == S_FINALIZE_JOIN) {
        if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
            crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
            return TRUE;

        } else if (g_hash_table_size(integrated_nodes) == 0
                   && g_hash_table_size(finalized_nodes) == 0) {
            crm_debug("join-%d complete: %s", current_join_id, source);
            register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);

        } else if (g_hash_table_size(integrated_nodes) != 0
                   && g_hash_table_size(finalized_nodes) != 0) {
            char *msg = NULL;

            crm_err("join-%d: Waiting on %d integrated nodes"
                    " AND %d finalized nodes",
                    current_join_id,
                    g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes));
            msg = strdup("Integrated node");
            g_hash_table_foreach(integrated_nodes, ghash_print_node, msg);
            free(msg);

            msg = strdup("Finalized node");
            g_hash_table_foreach(finalized_nodes, ghash_print_node, msg);
            free(msg);

        } else if (g_hash_table_size(integrated_nodes) != 0) {
            crm_debug("join-%d: Still waiting on %d integrated nodes",
                      current_join_id, g_hash_table_size(integrated_nodes));

        } else if (g_hash_table_size(finalized_nodes) != 0) {
            crm_debug("join-%d: Still waiting on %d finalized nodes",
                      current_join_id, g_hash_table_size(finalized_nodes));
        }
    }

    return FALSE;
}
Exemple #2
0
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
    static unsigned long long highest_seq = 0;

    crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));

    if (saved_ccm_membership_id != crm_peer_seq) {
        crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)",
                  source, saved_ccm_membership_id, crm_peer_seq, highest_seq);
        if(highest_seq < crm_peer_seq) {
            /* Don't spam the FSA with duplicates */
            highest_seq = crm_peer_seq;
            register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
        }

    } else if (cur_state == S_INTEGRATION) {
        if (crmd_join_phase_count(crm_join_welcomed) == 0) {
            crm_debug("join-%d: Integration of %d peers complete: %s",
                      current_join_id, crmd_join_phase_count(crm_join_integrated), source);
            register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
            return TRUE;
        }

    } else if (cur_state == S_FINALIZE_JOIN) {
        if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
            crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
            return TRUE;

        } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
            crm_debug("join-%d: Still waiting on %d welcomed nodes",
                      current_join_id, crmd_join_phase_count(crm_join_welcomed));
            crmd_join_phase_log(LOG_DEBUG);

        } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
            crm_debug("join-%d: Still waiting on %d integrated nodes",
                      current_join_id, crmd_join_phase_count(crm_join_integrated));
            crmd_join_phase_log(LOG_DEBUG);

        } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
            crm_debug("join-%d: Still waiting on %d finalized nodes",
                      current_join_id, crmd_join_phase_count(crm_join_finalized));
            crmd_join_phase_log(LOG_DEBUG);

        } else {
            crm_debug("join-%d complete: %s", current_join_id, source);
            register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
            return TRUE;
        }
    }

    return FALSE;
}
/*	 A_PE_INVOKE	*/
void
do_pe_invoke(long long action,
             enum crmd_fsa_cause cause,
             enum crmd_fsa_state cur_state,
             enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    if (AM_I_DC == FALSE) {
        crm_err("Not invoking scheduler because not DC: %s",
                fsa_action2string(action));
        return;
    }

    if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
        if (is_set(fsa_input_register, R_SHUTDOWN)) {
            crm_err("Cannot shut down gracefully without the scheduler");
            register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);

        } else {
            crm_info("Waiting for the scheduler to connect");
            crmd_fsa_stall(FALSE);
            register_fsa_action(A_PE_START);
        }
        return;
    }

    if (cur_state != S_POLICY_ENGINE) {
        crm_notice("Not invoking scheduler because in state %s",
                   fsa_state2string(cur_state));
        return;
    }
    if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
        crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");

        /* start the join from scratch */
        register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
        return;
    }

    fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);

    crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
              fsa_state2string(fsa_state));

    /* Make sure any queued calculations are discarded */
    free(fsa_pe_ref);
    fsa_pe_ref = NULL;

    fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
}
Exemple #4
0
gboolean
crm_timer_popped(gpointer data)
{
    fsa_timer_t *timer = (fsa_timer_t *) data;

    if (timer == wait_timer
        || timer == recheck_timer
        || timer == transition_timer || timer == finalization_timer || timer == election_trigger) {
        crm_info("%s (%s) just popped (%dms)",
                 get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms);
        timer->counter++;

    } else {
        crm_err("%s (%s) just popped in state %s! (%dms)",
                get_timer_desc(timer), fsa_input2string(timer->fsa_input),
                fsa_state2string(fsa_state), timer->period_ms);
    }

    if (timer == election_trigger && election_trigger->counter > 5) {
        crm_notice("We appear to be in an election loop, something may be wrong");
        crm_write_blackbox(0, NULL);
        election_trigger->counter = 0;
    }

    if (timer->repeat == FALSE) {
        crm_timer_stop(timer);  /* make it _not_ go off again */
    }

    if (timer->fsa_input == I_INTEGRATED) {
        crm_info("Welcomed: %d, Integrated: %d",
                 crmd_join_phase_count(crm_join_welcomed),
                 crmd_join_phase_count(crm_join_integrated));
        if (crmd_join_phase_count(crm_join_welcomed) == 0) {
            /* If we don't even have ourself, start again */
            register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, __FUNCTION__);

        } else {
            register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
        }

    } else if (timer == recheck_timer && fsa_state != S_IDLE) {
        crm_debug("Discarding %s event in state: %s",
                  fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));

    } else if (timer == finalization_timer && fsa_state != S_FINALIZE_JOIN) {
        crm_debug("Discarding %s event in state: %s",
                  fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));

    } else if (timer->fsa_input != I_NULL) {
        register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
    }

    crm_trace("Triggering FSA: %s", __FUNCTION__);
    mainloop_set_trigger(fsa_source);

    return TRUE;
}
Exemple #5
0
gboolean
crm_timer_popped(gpointer data)
{
    fsa_timer_t *timer = (fsa_timer_t *) data;

    if (timer == wait_timer
        || timer == recheck_timer
        || timer == transition_timer || timer == finalization_timer || timer == election_trigger) {
        crm_info("%s (%s) just popped (%dms)",
                 get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms);

    } else {
        crm_err("%s (%s) just popped in state %s! (%dms)",
                get_timer_desc(timer), fsa_input2string(timer->fsa_input),
                fsa_state2string(fsa_state), timer->period_ms);
    }

    if (timer->repeat == FALSE) {
        crm_timer_stop(timer);  /* make it _not_ go off again */
    }

    if (timer->fsa_input == I_INTEGRATED) {
        crm_info("Welcomed: %d, Integrated: %d",
                 g_hash_table_size(welcomed_nodes), g_hash_table_size(integrated_nodes));
        if (g_hash_table_size(welcomed_nodes) == 0) {
            /* If we don't even have ourself, start again */
            register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, __FUNCTION__);

        } else {
            register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
        }

    } else if (timer == recheck_timer && fsa_state != S_IDLE) {
        crm_debug("Discarding %s event in state: %s",
                  fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));

    } else if (timer == finalization_timer && fsa_state != S_FINALIZE_JOIN) {
        crm_debug("Discarding %s event in state: %s",
                  fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));

    } else if (timer->fsa_input != I_NULL) {
        register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
    }

    crm_trace("Triggering FSA: %s", __FUNCTION__);
    mainloop_set_trigger(fsa_source);

    return TRUE;
}
Exemple #6
0
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
    uint32_t old = 0;
    uint32_t changed = 0;
    bool appeared = FALSE;
    bool is_remote = is_set(node->flags, crm_remote_node);
    const char *status = NULL;

    /* Crmd waits to receive some information from the membership layer before
     * declaring itself operational. If this is being called for a cluster node,
     * indicate that we have it.
     */
    if (!is_remote) {
        set_bit(fsa_input_register, R_PEER_DATA);
    }

    if (node->uname == NULL) {
        return;
    }

    switch (type) {
        case crm_status_uname:
            /* If we've never seen the node, then it also won't be in the status section */
            crm_info("%s node %s is now %s",
                     (is_remote? "Remote" : "Cluster"),
                     node->uname, state_text(node->state));
            return;

        case crm_status_rstate:
        case crm_status_nstate:
            /* This callback should not be called unless the state actually
             * changed, but here's a failsafe just in case.
             */
            CRM_CHECK(safe_str_neq(data, node->state), return);

            crm_info("%s node %s is now %s (was %s)",
                     (is_remote? "Remote" : "Cluster"),
                     node->uname, state_text(node->state), state_text(data));

            if (safe_str_eq(CRM_NODE_MEMBER, node->state)) {
                appeared = TRUE;
                if (!is_remote) {
                    remove_stonith_cleanup(node->uname);
                }
            }

            crmd_alert_node_event(node);
            break;

        case crm_status_processes:
            if (data) {
                old = *(const uint32_t *)data;
                changed = node->processes ^ old;
            }

            status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS;
            crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)",
                     node->uname, peer2text(proc_flags), status,
                     AM_I_DC ? "true" : crm_str(fsa_our_dc), changed);

            if ((changed & proc_flags) == 0) {
                /* Peer process did not change */
                crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags);
                return;
            } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
                crm_trace("Not connected");
                return;
            } else if (fsa_state == S_STOPPING) {
                crm_trace("Stopping");
                return;
            }

            appeared = (node->processes & proc_flags) != 0;
            if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) {
                /* Did we get evicted? */
                crm_notice("Our peer connection failed");
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);

            } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
                /* Did the DC leave us? */
                crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);

                /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
                 * want to fence it. Newer DCs will send their shutdown request
                 * to all peers, who will update the DC's expected state to
                 * down, thus avoiding fencing. We can safely erase the DC's
                 * transient attributes when it leaves in that case. However,
                 * the only way to avoid fencing older DCs is to leave the
                 * transient attributes intact until it rejoins.
                 */
                if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
                    erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
                }

            } else if(AM_I_DC && appeared == FALSE) {
                crm_info("Peer %s left us", node->uname);
                erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
            }
            break;
    }

    if (AM_I_DC) {
        xmlNode *update = NULL;
        int flags = node_update_peer;
        gboolean alive = is_remote? appeared : crm_is_peer_active(node);
        crm_action_t *down = match_down_event(node->uuid, appeared);

        crm_trace("Alive=%d, appeared=%d, down=%d",
                  alive, appeared, (down? down->id : -1));

        if (alive && type == crm_status_processes) {
            register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
        }

        if (down) {
            const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);

            if (safe_str_eq(task, CRM_OP_FENCE)) {

                /* tengine_stonith_callback() confirms fence actions */
                crm_trace("Updating CIB %s stonithd reported fencing of %s complete",
                          (down->confirmed? "after" : "before"), node->uname);

            } else if ((alive == FALSE) && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
                crm_notice("%s of peer %s is complete "CRM_XS" op=%d",
                           task, node->uname, down->id);

                /* down->confirmed = TRUE; */
                stop_te_timer(down->timer);

                if (!is_remote) {
                    flags |= node_update_join | node_update_expected;
                    crmd_peer_down(node, FALSE);
                    check_join_state(fsa_state, __FUNCTION__);
                }

                update_graph(transition_graph, down);
                trigger_graph();

            } else {
                crm_trace("Node %s is %salive, was expected to %s (op %d)",
                          node->uname, (alive? "" : "not "), task, down->id);
            }

        } else if (appeared == FALSE) {
            crm_notice("Stonith/shutdown of %s not matched", node->uname);

            if (!is_remote) {
                crm_update_peer_join(__FUNCTION__, node, crm_join_none);
                check_join_state(fsa_state, __FUNCTION__);
            }

            abort_transition(INFINITY, tg_restart, "Node failure", NULL);
            fail_incompletable_actions(transition_graph, node->uuid);

        } else {
            crm_trace("Node %s came up, was not expected to be down",
                      node->uname);
        }

        if (is_remote) {
            /* A pacemaker_remote node won't have its cluster status updated
             * in the CIB by membership-layer callbacks, so do it here.
             */
            flags |= node_update_cluster;

            /* Trigger resource placement on newly integrated nodes */
            if (appeared) {
                abort_transition(INFINITY, tg_restart,
                                 "pacemaker_remote node integrated", NULL);
            }
        }

        /* Update the CIB node state */
        update = create_node_state_update(node, flags, NULL, __FUNCTION__);
        fsa_cib_anon_update(XML_CIB_TAG_STATUS, update,
                            cib_scope_local | cib_quorum_override | cib_can_create);
        free_xml(update);
    }

    trigger_fsa(fsa_source);
}
Exemple #7
0
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
    uint32_t old = 0;
    uint32_t changed = 0;
    bool appeared = FALSE;
    const char *status = NULL;

    set_bit(fsa_input_register, R_PEER_DATA);
    if (node->uname == NULL) {
        return;
    }

    switch (type) {
        case crm_status_uname:
            /* If we've never seen the node, then it also wont be in the status section */
            crm_info("%s is now %s", node->uname, node->state);
            return;
        case crm_status_rstate:
            crm_info("Remote node %s is now %s (was %s)", node->uname, node->state, (const char *)data);
            /* Keep going */
        case crm_status_nstate:
            crm_info("%s is now %s (was %s)", node->uname, node->state, (const char *)data);
            if (safe_str_eq(data, node->state)) {
                /* State did not change */
                return;
            } else if(safe_str_eq(CRM_NODE_MEMBER, node->state)) {
                appeared = TRUE;
            }
            break;
        case crm_status_processes:
            if (data) {
                old = *(const uint32_t *)data;
                changed = node->processes ^ old;
            }

            /* crmd_proc_update(node, proc_flags); */
            status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS;
            crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)",
                     node->uname, peer2text(proc_flags), status,
                     AM_I_DC ? "true" : crm_str(fsa_our_dc), changed);

            if ((changed & proc_flags) == 0) {
                /* Peer process did not change */
                crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags);
                return;
            } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
                crm_trace("Not connected");
                return;
            } else if (fsa_state == S_STOPPING) {
                crm_trace("Stopping");
                return;
            }

            appeared = (node->processes & proc_flags) != 0;
            if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) {
                /* Did we get evicted? */
                crm_notice("Our peer connection failed");
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);

            } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
                /* Did the DC leave us? */
                crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);

            } else if(AM_I_DC && appeared == FALSE) {
                crm_info("Peer %s left us", node->uname);
                /* crm_update_peer_join(__FUNCTION__, node, crm_join_none); */
            }
            break;
    }

    if (AM_I_DC) {
        xmlNode *update = NULL;
        int flags = node_update_peer;
        gboolean alive = crm_is_peer_active(node);
        crm_action_t *down = match_down_event(0, node->uuid, NULL, appeared);

        crm_trace("Alive=%d, appear=%d, down=%p", alive, appeared, down);

        if (alive && type == crm_status_processes) {
            register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
        }

        if (down) {
            const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);

            if (alive && safe_str_eq(task, CRM_OP_FENCE)) {
                crm_info("Node return implies stonith of %s (action %d) completed", node->uname,
                         down->id);
                erase_status_tag(node->uname, XML_CIB_TAG_LRM, cib_scope_local);
                erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
                /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */
                down->sent_update = TRUE;       /* Prevent tengine_stonith_callback() from calling send_stonith_update() */

            } else if (safe_str_eq(task, CRM_OP_FENCE)) {
                crm_trace("Waiting for stonithd to report the fencing of %s is complete", node->uname); /* via tengine_stonith_callback() */

            } else if (alive == FALSE) {
                crm_notice("%s of %s (op %d) is complete", task, node->uname, down->id);
                /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */
                stop_te_timer(down->timer);

                flags |= node_update_join | node_update_expected;
                crmd_peer_down(node, FALSE);
                check_join_state(fsa_state, __FUNCTION__);

                update_graph(transition_graph, down);
                trigger_graph();

            } else {
                crm_trace("Other %p", down);
            }

        } else if (appeared == FALSE) {
            crm_notice("Stonith/shutdown of %s not matched", node->uname);

            crm_update_peer_join(__FUNCTION__, node, crm_join_none);
            check_join_state(fsa_state, __FUNCTION__);

            abort_transition(INFINITY, tg_restart, "Node failure", NULL);
            fail_incompletable_actions(transition_graph, node->uuid);

        } else {
            crm_trace("Other %p", down);
        }

        update = do_update_node_cib(node, flags, NULL, __FUNCTION__);
        fsa_cib_anon_update(XML_CIB_TAG_STATUS, update,
                            cib_scope_local | cib_quorum_override | cib_can_create);
        free_xml(update);
    }

    trigger_fsa(fsa_source);
}