gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); if (saved_ccm_membership_id != crm_peer_seq) { crm_debug("%s: Membership changed since join started: %llu -> %llu", source, saved_ccm_membership_id, crm_peer_seq); register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } else if (cur_state == S_INTEGRATION) { if (g_hash_table_size(welcomed_nodes) == 0) { crm_debug("join-%d: Integration of %d peers complete: %s", current_join_id, g_hash_table_size(integrated_nodes), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); return TRUE; } else if (g_hash_table_size(integrated_nodes) == 0 && g_hash_table_size(finalized_nodes) == 0) { crm_debug("join-%d complete: %s", current_join_id, source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); } else if (g_hash_table_size(integrated_nodes) != 0 && g_hash_table_size(finalized_nodes) != 0) { char *msg = NULL; crm_err("join-%d: Waiting on %d integrated nodes" " AND %d finalized nodes", current_join_id, g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes)); msg = strdup("Integrated node"); g_hash_table_foreach(integrated_nodes, ghash_print_node, msg); free(msg); msg = strdup("Finalized node"); g_hash_table_foreach(finalized_nodes, ghash_print_node, msg); free(msg); } else if (g_hash_table_size(integrated_nodes) != 0) { crm_debug("join-%d: Still waiting on %d integrated nodes", current_join_id, g_hash_table_size(integrated_nodes)); } else if (g_hash_table_size(finalized_nodes) != 0) { crm_debug("join-%d: Still waiting on %d finalized nodes", current_join_id, g_hash_table_size(finalized_nodes)); } } return FALSE; }
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { static unsigned long long highest_seq = 0; crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); if (saved_ccm_membership_id != crm_peer_seq) { crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)", source, saved_ccm_membership_id, crm_peer_seq, highest_seq); if(highest_seq < crm_peer_seq) { /* Don't spam the FSA with duplicates */ highest_seq = crm_peer_seq; register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } else if (cur_state == S_INTEGRATION) { if (crmd_join_phase_count(crm_join_welcomed) == 0) { crm_debug("join-%d: Integration of %d peers complete: %s", current_join_id, crmd_join_phase_count(crm_join_integrated), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); return TRUE; } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { crm_debug("join-%d: Still waiting on %d welcomed nodes", current_join_id, crmd_join_phase_count(crm_join_welcomed)); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_integrated) != 0) { crm_debug("join-%d: Still waiting on %d integrated nodes", current_join_id, crmd_join_phase_count(crm_join_integrated)); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_finalized) != 0) { crm_debug("join-%d: Still waiting on %d finalized nodes", current_join_id, crmd_join_phase_count(crm_join_finalized)); crmd_join_phase_log(LOG_DEBUG); } else { crm_debug("join-%d complete: %s", current_join_id, source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); return TRUE; } } return FALSE; }
/* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE) { crm_err("Not invoking scheduler because not DC: %s", fsa_action2string(action)); return; } if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the scheduler"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the scheduler to connect"); crmd_fsa_stall(FALSE); register_fsa_action(A_PE_START); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("Not invoking scheduler because in state %s", fsa_state2string(cur_state)); return; } if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); /* Make sure any queued calculations are discarded */ free(fsa_pe_ref); fsa_pe_ref = NULL; fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback); }
gboolean crm_timer_popped(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *) data; if (timer == wait_timer || timer == recheck_timer || timer == transition_timer || timer == finalization_timer || timer == election_trigger) { crm_info("%s (%s) just popped (%dms)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms); timer->counter++; } else { crm_err("%s (%s) just popped in state %s! (%dms)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state), timer->period_ms); } if (timer == election_trigger && election_trigger->counter > 5) { crm_notice("We appear to be in an election loop, something may be wrong"); crm_write_blackbox(0, NULL); election_trigger->counter = 0; } if (timer->repeat == FALSE) { crm_timer_stop(timer); /* make it _not_ go off again */ } if (timer->fsa_input == I_INTEGRATED) { crm_info("Welcomed: %d, Integrated: %d", crmd_join_phase_count(crm_join_welcomed), crmd_join_phase_count(crm_join_integrated)); if (crmd_join_phase_count(crm_join_welcomed) == 0) { /* If we don't even have ourself, start again */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, __FUNCTION__); } else { register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL); } } else if (timer == recheck_timer && fsa_state != S_IDLE) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state)); } else if (timer == finalization_timer && fsa_state != S_FINALIZE_JOIN) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state)); } else if (timer->fsa_input != I_NULL) { register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL); } crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); return TRUE; }
gboolean crm_timer_popped(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *) data; if (timer == wait_timer || timer == recheck_timer || timer == transition_timer || timer == finalization_timer || timer == election_trigger) { crm_info("%s (%s) just popped (%dms)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms); } else { crm_err("%s (%s) just popped in state %s! (%dms)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state), timer->period_ms); } if (timer->repeat == FALSE) { crm_timer_stop(timer); /* make it _not_ go off again */ } if (timer->fsa_input == I_INTEGRATED) { crm_info("Welcomed: %d, Integrated: %d", g_hash_table_size(welcomed_nodes), g_hash_table_size(integrated_nodes)); if (g_hash_table_size(welcomed_nodes) == 0) { /* If we don't even have ourself, start again */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, __FUNCTION__); } else { register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL); } } else if (timer == recheck_timer && fsa_state != S_IDLE) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state)); } else if (timer == finalization_timer && fsa_state != S_FINALIZE_JOIN) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state)); } else if (timer->fsa_input != I_NULL) { register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL); } crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); return TRUE; }
void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { uint32_t old = 0; uint32_t changed = 0; bool appeared = FALSE; bool is_remote = is_set(node->flags, crm_remote_node); const char *status = NULL; /* Crmd waits to receive some information from the membership layer before * declaring itself operational. If this is being called for a cluster node, * indicate that we have it. */ if (!is_remote) { set_bit(fsa_input_register, R_PEER_DATA); } if (node->uname == NULL) { return; } switch (type) { case crm_status_uname: /* If we've never seen the node, then it also won't be in the status section */ crm_info("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), node->uname, state_text(node->state)); return; case crm_status_rstate: case crm_status_nstate: /* This callback should not be called unless the state actually * changed, but here's a failsafe just in case. */ CRM_CHECK(safe_str_neq(data, node->state), return); crm_info("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), node->uname, state_text(node->state), state_text(data)); if (safe_str_eq(CRM_NODE_MEMBER, node->state)) { appeared = TRUE; if (!is_remote) { remove_stonith_cleanup(node->uname); } } crmd_alert_node_event(node); break; case crm_status_processes: if (data) { old = *(const uint32_t *)data; changed = node->processes ^ old; } status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS; crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)", node->uname, peer2text(proc_flags), status, AM_I_DC ? "true" : crm_str(fsa_our_dc), changed); if ((changed & proc_flags) == 0) { /* Peer process did not change */ crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags); return; } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) { crm_trace("Not connected"); return; } else if (fsa_state == S_STOPPING) { crm_trace("Stopping"); return; } appeared = (node->processes & proc_flags) != 0; if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) { /* Did we get evicted? */ crm_notice("Our peer connection failed"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) { /* Did the DC leave us? */ crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't * want to fence it. Newer DCs will send their shutdown request * to all peers, who will update the DC's expected state to * down, thus avoiding fencing. We can safely erase the DC's * transient attributes when it leaves in that case. However, * the only way to avoid fencing older DCs is to leave the * transient attributes intact until it rejoins. */ if (compare_version(fsa_our_dc_version, "3.0.9") > 0) { erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); } } else if(AM_I_DC && appeared == FALSE) { crm_info("Peer %s left us", node->uname); erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); } break; } if (AM_I_DC) { xmlNode *update = NULL; int flags = node_update_peer; gboolean alive = is_remote? appeared : crm_is_peer_active(node); crm_action_t *down = match_down_event(node->uuid, appeared); crm_trace("Alive=%d, appeared=%d, down=%d", alive, appeared, (down? down->id : -1)); if (alive && type == crm_status_processes) { register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } if (down) { const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRM_OP_FENCE)) { /* tengine_stonith_callback() confirms fence actions */ crm_trace("Updating CIB %s stonithd reported fencing of %s complete", (down->confirmed? "after" : "before"), node->uname); } else if ((alive == FALSE) && safe_str_eq(task, CRM_OP_SHUTDOWN)) { crm_notice("%s of peer %s is complete "CRM_XS" op=%d", task, node->uname, down->id); /* down->confirmed = TRUE; */ stop_te_timer(down->timer); if (!is_remote) { flags |= node_update_join | node_update_expected; crmd_peer_down(node, FALSE); check_join_state(fsa_state, __FUNCTION__); } update_graph(transition_graph, down); trigger_graph(); } else { crm_trace("Node %s is %salive, was expected to %s (op %d)", node->uname, (alive? "" : "not "), task, down->id); } } else if (appeared == FALSE) { crm_notice("Stonith/shutdown of %s not matched", node->uname); if (!is_remote) { crm_update_peer_join(__FUNCTION__, node, crm_join_none); check_join_state(fsa_state, __FUNCTION__); } abort_transition(INFINITY, tg_restart, "Node failure", NULL); fail_incompletable_actions(transition_graph, node->uuid); } else { crm_trace("Node %s came up, was not expected to be down", node->uname); } if (is_remote) { /* A pacemaker_remote node won't have its cluster status updated * in the CIB by membership-layer callbacks, so do it here. */ flags |= node_update_cluster; /* Trigger resource placement on newly integrated nodes */ if (appeared) { abort_transition(INFINITY, tg_restart, "pacemaker_remote node integrated", NULL); } } /* Update the CIB node state */ update = create_node_state_update(node, flags, NULL, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(update); } trigger_fsa(fsa_source); }
void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { uint32_t old = 0; uint32_t changed = 0; bool appeared = FALSE; const char *status = NULL; set_bit(fsa_input_register, R_PEER_DATA); if (node->uname == NULL) { return; } switch (type) { case crm_status_uname: /* If we've never seen the node, then it also wont be in the status section */ crm_info("%s is now %s", node->uname, node->state); return; case crm_status_rstate: crm_info("Remote node %s is now %s (was %s)", node->uname, node->state, (const char *)data); /* Keep going */ case crm_status_nstate: crm_info("%s is now %s (was %s)", node->uname, node->state, (const char *)data); if (safe_str_eq(data, node->state)) { /* State did not change */ return; } else if(safe_str_eq(CRM_NODE_MEMBER, node->state)) { appeared = TRUE; } break; case crm_status_processes: if (data) { old = *(const uint32_t *)data; changed = node->processes ^ old; } /* crmd_proc_update(node, proc_flags); */ status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS; crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)", node->uname, peer2text(proc_flags), status, AM_I_DC ? "true" : crm_str(fsa_our_dc), changed); if ((changed & proc_flags) == 0) { /* Peer process did not change */ crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags); return; } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) { crm_trace("Not connected"); return; } else if (fsa_state == S_STOPPING) { crm_trace("Stopping"); return; } appeared = (node->processes & proc_flags) != 0; if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) { /* Did we get evicted? */ crm_notice("Our peer connection failed"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) { /* Did the DC leave us? */ crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else if(AM_I_DC && appeared == FALSE) { crm_info("Peer %s left us", node->uname); /* crm_update_peer_join(__FUNCTION__, node, crm_join_none); */ } break; } if (AM_I_DC) { xmlNode *update = NULL; int flags = node_update_peer; gboolean alive = crm_is_peer_active(node); crm_action_t *down = match_down_event(0, node->uuid, NULL, appeared); crm_trace("Alive=%d, appear=%d, down=%p", alive, appeared, down); if (alive && type == crm_status_processes) { register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } if (down) { const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); if (alive && safe_str_eq(task, CRM_OP_FENCE)) { crm_info("Node return implies stonith of %s (action %d) completed", node->uname, down->id); erase_status_tag(node->uname, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */ down->sent_update = TRUE; /* Prevent tengine_stonith_callback() from calling send_stonith_update() */ } else if (safe_str_eq(task, CRM_OP_FENCE)) { crm_trace("Waiting for stonithd to report the fencing of %s is complete", node->uname); /* via tengine_stonith_callback() */ } else if (alive == FALSE) { crm_notice("%s of %s (op %d) is complete", task, node->uname, down->id); /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */ stop_te_timer(down->timer); flags |= node_update_join | node_update_expected; crmd_peer_down(node, FALSE); check_join_state(fsa_state, __FUNCTION__); update_graph(transition_graph, down); trigger_graph(); } else { crm_trace("Other %p", down); } } else if (appeared == FALSE) { crm_notice("Stonith/shutdown of %s not matched", node->uname); crm_update_peer_join(__FUNCTION__, node, crm_join_none); check_join_state(fsa_state, __FUNCTION__); abort_transition(INFINITY, tg_restart, "Node failure", NULL); fail_incompletable_actions(transition_graph, node->uuid); } else { crm_trace("Other %p", down); } update = do_update_node_cib(node, flags, NULL, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(update); } trigger_fsa(fsa_source); }