/* A_PE_START, A_PE_STOP, O_PE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (action & A_PE_STOP) { clear_bit(fsa_input_register, R_PE_REQUIRED); pe_subsystem_free(); clear_bit(fsa_input_register, R_PE_CONNECTED); } if ((action & A_PE_START) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) { if (cur_state != S_STOPPING) { set_bit(fsa_input_register, R_PE_REQUIRED); if (pe_subsystem_new()) { set_bit(fsa_input_register, R_PE_CONNECTED); } else { crm_warn("Could not connect to scheduler"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } else { crm_info("Ignoring request to connect to scheduler while shutting down"); } } }
static void revision_check_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { int cmp = -1; const char *revision = NULL; xmlNode *generation = NULL; if(rc != cib_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } generation = output; CRM_CHECK(safe_str_eq(crm_element_name(generation), XML_TAG_CIB), crm_log_xml_err(output, __FUNCTION__); return); crm_debug_3("Checking our feature revision is allowed: %s", CIB_FEATURE_SET); revision = crm_element_value(generation, XML_ATTR_CRM_VERSION); cmp = compare_version(revision, CRM_FEATURE_SET); if(cmp > 0) { crm_err("This build (%s) does not support the current" " resource configuration", VERSION); crm_err("We can support up to CRM feature set %s (current=%s)", revision, CRM_FEATURE_SET); crm_err("Shutting down the CRM"); /* go into a stall state */ register_fsa_error_adv( C_FSA_INTERNAL, I_SHUTDOWN, NULL, NULL, __FUNCTION__); return; } }
static void node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if(call_id < pcmk_ok) { crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id); crm_log_xml_debug(msg, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else if(rc < pcmk_ok) { crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } }
static void feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } }
static void revision_check_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { int cmp = -1; const char *revision = NULL; crm_data_t *generation = NULL; #if CRM_DEPRECATED_SINCE_2_0_4 if(safe_str_eq(crm_element_name(output), XML_TAG_CIB)) { generation = output; } else { generation = find_xml_node(output, XML_TAG_CIB, TRUE); } #else generation = output; CRM_DEV_ASSERT(safe_str_eq(crm_element_name(generation), XML_TAG_CIB)); #endif if(rc != cib_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } crm_debug_3("Checking our feature revision is allowed: %s", CIB_FEATURE_SET); revision = crm_element_value(generation, XML_ATTR_CIB_REVISION); cmp = compare_version(revision, CIB_FEATURE_SET); if(cmp > 0) { crm_err("This build (%s) does not support the current" " resource configuration", VERSION); crm_err("We can support up to CIB feature set %s (current=%s)", CIB_FEATURE_SET, revision); crm_err("Shutting down the CRM"); /* go into a stall state */ register_fsa_error_adv( C_FSA_INTERNAL, I_SHUTDOWN, NULL, NULL, __FUNCTION__); return; } revision = crm_element_value(generation, XML_ATTR_CRM_VERSION); cmp = compare_version(revision, CRM_FEATURE_SET); if(cmp > 0) { crm_err("This build (%s) does not support the current" " resource configuration", VERSION); crm_err("We can support up to CRM feature set %s (current=%s)", revision, CRM_FEATURE_SET); crm_err("Shutting down the CRM"); /* go into a stall state */ register_fsa_error_adv( C_FSA_INTERNAL, I_SHUTDOWN, NULL, NULL, __FUNCTION__); return; } }
static void feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_notice("Update failed: %s (%d)", pcmk_strerror(rc), rc); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } }
static void crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; last_peer_update = 0; if (rc == pcmk_ok) { crm_trace("Node update %d complete", call_id); } else if(call_id < pcmk_ok) { crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else { crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } }
static void join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_debug("Join update %d complete", call_id); check_join_state(fsa_state, __FUNCTION__); } else { crm_err("Join update %d failed", call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } }
/* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(AM_I_DC == FALSE) { crm_err("Not DC: No need to invoke the TE (anymore): %s", fsa_action2string(action)); return; } else if(fsa_state != S_TRANSITION_ENGINE && (action & A_TE_INVOKE)) { crm_err("No need to invoke the TE (%s) in state %s", fsa_action2string(action), fsa_state2string(fsa_state)); return; } if(action & A_TE_CANCEL) { crm_debug("Cancelling the transition: %s", transition_graph->complete?"inactive":"active"); abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); if(transition_graph && transition_graph->complete == FALSE) { crmd_fsa_stall(NULL); } } else if(action & A_TE_HALT) { crm_debug("Halting the transition: %s", transition_graph->complete?"inactive":"active"); abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); if(transition_graph && transition_graph->complete == FALSE) { crmd_fsa_stall(NULL); } } else if(action & A_TE_INVOKE) { const char *value = NULL; xmlNode *graph_data = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *ref = crm_element_value(input->msg, XML_ATTR_REFERENCE); const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH); const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT); if(graph_file == NULL && input->xml == NULL) { crm_log_xml_err(input->msg, "Bad command"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } if(transition_graph && transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition(INFINITY, tg_restart, "Transition Active", NULL); return; } if(fsa_pe_ref == NULL || safe_str_neq(fsa_pe_ref, ref)) { crm_info("Transition is redundant: %s vs. %s", crm_str(fsa_pe_ref), crm_str(ref)); abort_transition(INFINITY, tg_restart, "Transition Redundant", NULL); } graph_data = input->xml; if(graph_data == NULL && graph_file != NULL) { graph_data = filename2xml(graph_file); } if (is_timer_started(transition_timer)) { crm_debug("The transitioner wait for a transition timer"); return; } CRM_CHECK(graph_data != NULL, crm_err("Input raised by %s is invalid", msg_data->origin); crm_log_xml_err(input->msg, "Bad command"); return); destroy_graph(transition_graph); transition_graph = unpack_graph(graph_data, graph_input); CRM_CHECK(transition_graph != NULL, transition_graph = create_blank_graph(); return); crm_info("Processing graph %d (ref=%s) derived from %s", transition_graph->id, ref, graph_input); value = crm_element_value(graph_data, "failed-stop-offset"); if(value) { crm_free(failed_stop_offset); failed_stop_offset = crm_strdup(value); } value = crm_element_value(graph_data, "failed-start-offset"); if(value) { crm_free(failed_start_offset); failed_start_offset = crm_strdup(value); } trigger_graph(); print_graph(LOG_DEBUG_2, transition_graph); if(graph_data != input->xml) { free_xml(graph_data); } } }
/* A_CIB_STOP, A_CIB_START, A_CIB_RESTART, */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { struct crm_subsystem_s *this_subsys = cib_subsystem; long long stop_actions = A_CIB_STOP; long long start_actions = A_CIB_START; if(action & stop_actions) { if (fsa_cib_conn->state != cib_disconnected && last_resource_update != 0) { crm_info("Waiting for resource update %d to complete", last_resource_update); crmd_fsa_stall(NULL); return; } crm_info("Disconnecting CIB"); clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); CRM_ASSERT(fsa_cib_conn != NULL); fsa_cib_conn->cmds->del_notify_callback( fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated); if(fsa_cib_conn->state != cib_disconnected) { fsa_cib_conn->cmds->set_slave( fsa_cib_conn, cib_scope_local); fsa_cib_conn->cmds->signoff(fsa_cib_conn); } } if(action & start_actions) { int rc = cib_ok; CRM_ASSERT(fsa_cib_conn != NULL); if(cur_state == S_STOPPING) { crm_err("Ignoring request to start %s after shutdown", this_subsys->name); return; } rc = fsa_cib_conn->cmds->signon( fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command); if(rc != cib_ok) { /* a short wait that usually avoids stalling the FSA */ sleep(1); rc = fsa_cib_conn->cmds->signon( fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command); } if(rc != cib_ok){ crm_info("Could not connect to the CIB service: %s", cib_error2string(rc)); } else if(cib_ok != fsa_cib_conn->cmds->set_connection_dnotify( fsa_cib_conn, crmd_cib_connection_destroy)) { crm_err("Could not set dnotify callback"); } else if(cib_ok != fsa_cib_conn->cmds->add_notify_callback( fsa_cib_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced)) { crm_err("Could not set CIB notification callback"); } else if(cib_ok != fsa_cib_conn->cmds->add_notify_callback( fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated)) { crm_err("Could not set CIB notification callback"); } else { set_bit_inplace( fsa_input_register, R_CIB_CONNECTED); } if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { cib_retries++; crm_warn("Couldn't complete CIB registration %d" " times... pause and retry", cib_retries); if(cib_retries < 30) { crm_timer_start(wait_timer); crmd_fsa_stall(NULL); } else { crm_err("Could not complete CIB" " registration %d times..." " hard error", cib_retries); register_fsa_error( C_FSA_INTERNAL, I_ERROR, NULL); } } else { int call_id = 0; crm_info("CIB connection established"); call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, NULL, NULL, cib_scope_local); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, revision_check_callback); cib_retries = 0; } } }
/* aka. this is notification that we have (or have not) been accepted */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *tmp1 = NULL; gboolean was_nack = TRUE; static gboolean first_join = TRUE; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *start_state = daemon_option("node_start_state"); int join_id = -1; const char *op = crm_element_value(input->msg, F_CRM_TASK); const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK); const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) { crm_trace("Ignoring op=%s message", op); return; } /* calculate if it was an ack or a nack */ if (crm_is_true(ack_nack)) { was_nack = FALSE; } crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id); if (was_nack) { crm_err("Shutting down because cluster join with leader %s failed " CRM_XS" join-%d NACK'd", welcome_from, join_id); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) { crm_warn("Discarding our own welcome - we're no longer the DC"); return; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding %s from node %s (expected from %s)", op, welcome_from, fsa_our_dc); return; } update_dc_expected(input->msg); /* send our status section to the DC */ tmp1 = do_lrm_query(TRUE, fsa_our_uname); if (tmp1 != NULL) { xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id); crm_debug("Confirming join-%d: sending local operation history to %s", join_id, fsa_our_dc); /* * If this is the node's first join since the crmd started on it, clear * any previous transient node attributes, to handle the case where * the node restarted so quickly that the cluster layer didn't notice. * * Do not remove the resources though, they'll be cleaned up in * do_dc_join_ack(). Removing them here creates a race condition if the * crmd is being recovered. Instead of a list of active resources from * the lrmd, we may end up with a blank status section. If we are _NOT_ * lucky, we will probe for the "wrong" instance of anonymous clones and * end up with multiple active instances on the machine. */ if (first_join && is_not_set(fsa_input_register, R_SHUTDOWN)) { first_join = FALSE; erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0); update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE); update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE); if (start_state) { set_join_state(start_state); } } send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE); free_xml(reply); if (AM_I_DC == FALSE) { register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__); update_attrd(NULL, NULL, NULL, NULL, FALSE); } free_xml(tmp1); } else { crm_err("Could not confirm join-%d with %s: Local operation history failed", join_id, fsa_our_dc); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } }
/* A_CIB_INVOKE, A_CIB_BUMPGEN, A_UPDATE_NODESTATUS */ void do_cib_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { HA_Message *answer = NULL; ha_msg_input_t *cib_msg = fsa_typed_data(fsa_dt_ha_msg); const char *sys_from = cl_get_string(cib_msg->msg, F_CRM_SYS_FROM); if(fsa_cib_conn->state == cib_disconnected) { if(cur_state != S_STOPPING) { crm_err("CIB is disconnected"); crm_log_message_adv(LOG_WARNING, "CIB Input", cib_msg->msg); return; } crm_info("CIB is disconnected"); crm_log_message_adv(LOG_DEBUG, "CIB Input", cib_msg->msg); return; } if(action & A_CIB_INVOKE) { if(safe_str_eq(sys_from, CRM_SYSTEM_CRMD)) { action = A_CIB_INVOKE_LOCAL; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { action = A_CIB_INVOKE_LOCAL; } } if(action & A_CIB_INVOKE || action & A_CIB_INVOKE_LOCAL) { int call_options = 0; enum cib_errors rc = cib_ok; crm_data_t *cib_frag = NULL; const char *section = NULL; const char *op = cl_get_string(cib_msg->msg, F_CRM_TASK); section = cl_get_string(cib_msg->msg, F_CIB_SECTION); ha_msg_value_int(cib_msg->msg, F_CIB_CALLOPTS, &call_options); crm_log_message(LOG_MSG, cib_msg->msg); crm_log_xml_debug_3(cib_msg->xml, "[CIB update]"); if(op == NULL) { crm_err("Invalid CIB Message"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } cib_frag = NULL; rc = fsa_cib_conn->cmds->variant_op( fsa_cib_conn, op, NULL, section, cib_msg->xml, &cib_frag, call_options); if(rc < cib_ok || (action & A_CIB_INVOKE)) { answer = create_reply(cib_msg->msg, cib_frag); ha_msg_add(answer,XML_ATTR_RESULT,cib_error2string(rc)); } if(action & A_CIB_INVOKE) { if(relay_message(answer, TRUE) == FALSE) { crm_err("Confused what to do with cib result"); crm_log_message(LOG_ERR, answer); crm_msg_del(answer); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); return; } } else if(rc < cib_ok) { ha_msg_input_t *input = NULL; crm_err("Internal CRM/CIB command from %s() failed: %s", msg_data->origin, cib_error2string(rc)); crm_log_message_adv(LOG_WARNING, "CIB Input", cib_msg->msg); crm_log_message_adv(LOG_WARNING, "CIB Reply", answer); input = new_ha_msg_input(answer); register_fsa_input(C_FSA_INTERNAL, I_ERROR, input); crm_msg_del(answer); delete_ha_msg_input(input); } } else { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } }
/* aka. this is notification that we have (or have not) been accepted */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *tmp1 = NULL; gboolean was_nack = TRUE; static gboolean first_join = TRUE; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); int join_id = -1; const char *op = crm_element_value(input->msg, F_CRM_TASK); const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK); const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) { crm_trace("Ignoring op=%s message", op); return; } /* calculate if it was an ack or a nack */ if (crm_is_true(ack_nack)) { was_nack = FALSE; } crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id); if (was_nack) { crm_err("Join (join-%d) with leader %s failed (NACK'd): Shutting down", join_id, welcome_from); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) { crm_warn("Discarding our own welcome - we're no longer the DC"); return; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding %s from %s (expected %s)", op, welcome_from, fsa_our_dc); return; } /* send our status section to the DC */ crm_debug("Confirming join join-%d: %s", join_id, crm_element_value(input->msg, F_CRM_TASK)); tmp1 = do_lrm_query(TRUE, fsa_our_uname); if (tmp1 != NULL) { xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id); crm_debug("join-%d: Join complete." " Sending local LRM status to %s", join_id, fsa_our_dc); if (first_join) { first_join = FALSE; /* * Clear any previous transient node attribute and lrm operations * * Corosync has a nasty habit of not being able to tell if a * node is returning or didn't leave in the first place. * This confuses Pacemaker because it never gets a "node up" * event which is normally used to clean up the status section. * * Do not remove the resources though, they'll be cleaned up in * do_dc_join_ack(). Removing them here creates a race * condition if the crmd is being recovered. * Instead of a list of active resources from the lrmd * we may end up with a blank status section. * If we are _NOT_ lucky, we will probe for the "wrong" instance * of anonymous clones and end up with multiple active * instances on the machine. */ erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0); /* Just in case attrd was still around too */ if (is_not_set(fsa_input_register, R_SHUTDOWN)) { update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE); update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE); } } send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE); free_xml(reply); if (AM_I_DC == FALSE) { register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__); update_attrd(NULL, NULL, NULL, NULL, FALSE); } free_xml(tmp1); } else { crm_err("Could not send our LRM state to the DC"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } }