void cib_fencing_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } }
/*! * \brief Handle a timeout in node-to-node communication * * \param[in] data Pointer to action timer * * \return FALSE (indicating that source should be not be re-added) */ gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; const char *task = NULL; const char *on_node = NULL; const char *via_node = NULL; CRM_CHECK(data != NULL, return FALSE); timer = (crm_action_timer_t *) data; stop_te_timer(timer); CRM_CHECK(timer->action != NULL, return FALSE); task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_TARGET); via_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_ROUTER_NODE); if (transition_graph->complete) { crm_notice("Node %s did not send %s result (via %s) within %dms " "(ignoring because transition not in progress)", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), timer->timeout); } else { /* fail the action */ crm_err("Node %s did not send %s result (via %s) within %dms " "(action timeout plus cluster-delay)", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), timer->timeout); print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action); timer->action->failed = TRUE; te_action_confirmed(timer->action); abort_transition(INFINITY, tg_restart, "Action lost", NULL); update_graph(transition_graph, timer->action); trigger_graph(); // Record timeout in the CIB if appropriate if ((timer->action->type == action_type_rsc) && controld_action_is_recordable(task)) { controld_record_action_timeout(timer->action); } } return FALSE; }
static void process_cib_diff(xmlNode *cib, xmlNode *change, const char *op, const char *xpath) { xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS); xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION); if (status) { process_status_diff(status, change, op, xpath); } if (config) { abort_transition(INFINITY, tg_restart, "Non-status-only change", change); } }
void send_stonith_update(stonith_ops_t * op) { enum cib_errors rc = cib_ok; const char *target = op->node_name; const char *uuid = op->node_uuid; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); CRM_CHECK(op->node_name != NULL, return); CRM_CHECK(op->node_uuid != NULL, return); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_ATTR_ORIGIN, __FUNCTION__); rc = fsa_cib_conn->cmds->update( fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override|cib_scope_local|cib_can_create); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); abort_transition( INFINITY, tg_shutdown, "CIB update failed", node_state); } else { /* delay processing the trigger until the update completes */ add_cib_op_callback(fsa_cib_conn, rc, FALSE, crm_strdup(target), cib_fencing_updated); } erase_status_tag(op->node_name, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(op->node_name, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); free_xml(node_state); #if 0 /* Make sure the membership cache is accurate */ crm_update_peer(0, 0, 0, -1, 0, uuid, target, NULL, CRM_NODE_LOST); #endif return; }
static void process_resource_updates( const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) { xmlNode *cIter = NULL; xmlNode *rsc = NULL; xmlNode *rsc_op = NULL; int num_resources = 0; if(xml == NULL) { return; } else if(strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) { xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); crm_trace("Got %p in %s", xml, XML_CIB_TAG_LRM); } CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0); for(cIter = xml->children; cIter; cIter = cIter->next) { num_resources++; } if(num_resources > 1) { /* * Check for and fast-track the processing of LRM refreshes * In large clusters this can result in _huge_ speedups * * Unfortunately we can only do so when there are no pending actions * Otherwise we could miss updates we're waiting for and stall * */ crm_debug("Detected LRM refresh - %d resources updated", num_resources); crm_log_xml_trace(change, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); return; } for (rsc = __xml_first_child(xml); rsc != NULL; rsc = __xml_next(rsc)) { crm_trace("Processing %s", ID(rsc)); for (rsc_op = __xml_first_child(rsc); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { crm_trace("Processing %s", ID(rsc_op)); process_graph_event(rsc_op, node); } } }
static gboolean fail_incompletable_stonith(crm_graph_t * graph) { GListPtr lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GListPtr lpc2 = NULL; synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { crm_action_t *action = (crm_action_t *) lpc2->data; if (action->type != action_type_crm || action->confirmed) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (task && safe_str_eq(task, CRM_OP_FENCE)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Failing action %d (%s): STONITHd terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("STONITHd failure resulted in un-runnable actions"); abort_transition(INFINITY, tg_restart, "Stonith failure", last_action); return TRUE; } return FALSE; }
static void process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) { xmlNode *rsc = NULL; if (xml == NULL) { return; } else if (strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) { xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); crm_trace("Got %p in %s", xml, XML_CIB_TAG_LRM); } CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0); /* * Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time, so such updates indicate an * LRM refresh. * * In that case, start a new transition rather than check each result * individually, which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((transition_graph->pending == 0) && xml->children && xml->children->next) { crm_log_xml_trace(change, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); return; } for (rsc = __xml_first_child(xml); rsc != NULL; rsc = __xml_next(rsc)) { crm_trace("Processing %s", ID(rsc)); process_lrm_resource_diff(rsc, node); } }
void send_stonith_update(stonith_ops_t * op) { enum cib_errors rc = cib_ok; const char *target = op->node_name; const char *uuid = op->node_uuid; /* zero out the node-status & remove all LRM status info */ crm_data_t *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); CRM_CHECK(op->node_name != NULL, return); CRM_CHECK(op->node_uuid != NULL, return); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); crm_xml_add(node_state, XML_ATTR_ORIGIN, __FUNCTION__); rc = te_cib_conn->cmds->update( te_cib_conn, XML_CIB_TAG_STATUS, node_state, NULL, cib_quorum_override|cib_scope_local); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); abort_transition( INFINITY, tg_shutdown, "CIB update failed", node_state); } else { /* delay processing the trigger until the update completes */ add_cib_op_callback(rc, FALSE, NULL, cib_fencing_updated); } free_xml(node_state); return; }
static void process_op_deletion(const char *xpath, xmlNode *change) { char *mutable_key = strdup(xpath); char *key; char *node_uuid; crm_action_t *cancel = NULL; // Extract the part of xpath between last pair of single quotes key = strrchr(mutable_key, '\''); if (key != NULL) { *key = '\0'; key = strrchr(mutable_key, '\''); } if (key == NULL) { crm_warn("Ignoring malformed CIB update (resource deletion of %s)", xpath); free(mutable_key); return; } ++key; node_uuid = extract_node_uuid(xpath); cancel = get_cancel_action(key, node_uuid); if (cancel) { crm_info("Cancellation of %s on %s confirmed (%d)", key, node_uuid, cancel->id); stop_te_timer(cancel->timer); te_action_confirmed(cancel); update_graph(transition_graph, cancel); trigger_graph(); } else { abort_transition(INFINITY, tg_restart, "Resource operation removal", change); } free(mutable_key); free(node_uuid); }
/* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(AM_I_DC == FALSE) { crm_err("Not DC: No need to invoke the TE (anymore): %s", fsa_action2string(action)); return; } else if(fsa_state != S_TRANSITION_ENGINE && (action & A_TE_INVOKE)) { crm_err("No need to invoke the TE (%s) in state %s", fsa_action2string(action), fsa_state2string(fsa_state)); return; } if(action & A_TE_CANCEL) { crm_debug("Cancelling the transition: %s", transition_graph->complete?"inactive":"active"); abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); if(transition_graph && transition_graph->complete == FALSE) { crmd_fsa_stall(NULL); } } else if(action & A_TE_HALT) { crm_debug("Halting the transition: %s", transition_graph->complete?"inactive":"active"); abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); if(transition_graph && transition_graph->complete == FALSE) { crmd_fsa_stall(NULL); } } else if(action & A_TE_INVOKE) { const char *value = NULL; xmlNode *graph_data = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *ref = crm_element_value(input->msg, XML_ATTR_REFERENCE); const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH); const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT); if(graph_file == NULL && input->xml == NULL) { crm_log_xml_err(input->msg, "Bad command"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } if(transition_graph && transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition(INFINITY, tg_restart, "Transition Active", NULL); return; } if(fsa_pe_ref == NULL || safe_str_neq(fsa_pe_ref, ref)) { crm_info("Transition is redundant: %s vs. %s", crm_str(fsa_pe_ref), crm_str(ref)); abort_transition(INFINITY, tg_restart, "Transition Redundant", NULL); } graph_data = input->xml; if(graph_data == NULL && graph_file != NULL) { graph_data = filename2xml(graph_file); } if (is_timer_started(transition_timer)) { crm_debug("The transitioner wait for a transition timer"); return; } CRM_CHECK(graph_data != NULL, crm_err("Input raised by %s is invalid", msg_data->origin); crm_log_xml_err(input->msg, "Bad command"); return); destroy_graph(transition_graph); transition_graph = unpack_graph(graph_data, graph_input); CRM_CHECK(transition_graph != NULL, transition_graph = create_blank_graph(); return); crm_info("Processing graph %d (ref=%s) derived from %s", transition_graph->id, ref, graph_input); value = crm_element_value(graph_data, "failed-stop-offset"); if(value) { crm_free(failed_stop_offset); failed_stop_offset = crm_strdup(value); } value = crm_element_value(graph_data, "failed-start-offset"); if(value) { crm_free(failed_start_offset); failed_start_offset = crm_strdup(value); } trigger_graph(); print_graph(LOG_DEBUG_2, transition_graph); if(graph_data != input->xml) { free_xml(graph_data); } } }
void te_update_diff(const char *event, HA_Message *msg) { int rc = -1; const char *op = NULL; crm_data_t *diff = NULL; crm_data_t *aborted = NULL; const char *set_name = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; if(msg == NULL) { crm_err("NULL update"); return; } ha_msg_value_int(msg, F_CIB_RC, &rc); op = cl_get_string(msg, F_CIB_OPERATION); if(rc < cib_ok) { crm_debug_2("Ignoring failed %s operation: %s", op, cib_error2string(rc)); return; } diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); log_cib_diff(LOG_DEBUG_2, diff, op); set_name = "diff-added"; if(diff != NULL) { crm_data_t *section = NULL; crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); if(change_set != NULL) { crm_debug_2("Checking status changes"); section=get_object_root(XML_CIB_TAG_STATUS,change_set); } if(section != NULL) { extract_event(section); } crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } set_name = "diff-removed"; if(diff != NULL && aborted == NULL) { crm_data_t *attrs = NULL; crm_data_t *status = NULL; crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); if(aborted == NULL && change_set != NULL) { status = get_object_root(XML_CIB_TAG_STATUS, change_set); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" deletions"); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } );
void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { uint32_t old = 0; uint32_t changed = 0; bool appeared = FALSE; bool is_remote = is_set(node->flags, crm_remote_node); const char *status = NULL; /* Crmd waits to receive some information from the membership layer before * declaring itself operational. If this is being called for a cluster node, * indicate that we have it. */ if (!is_remote) { set_bit(fsa_input_register, R_PEER_DATA); } if (node->uname == NULL) { return; } switch (type) { case crm_status_uname: /* If we've never seen the node, then it also won't be in the status section */ crm_info("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), node->uname, state_text(node->state)); return; case crm_status_rstate: case crm_status_nstate: /* This callback should not be called unless the state actually * changed, but here's a failsafe just in case. */ CRM_CHECK(safe_str_neq(data, node->state), return); crm_info("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), node->uname, state_text(node->state), state_text(data)); if (safe_str_eq(CRM_NODE_MEMBER, node->state)) { appeared = TRUE; if (!is_remote) { remove_stonith_cleanup(node->uname); } } crmd_alert_node_event(node); break; case crm_status_processes: if (data) { old = *(const uint32_t *)data; changed = node->processes ^ old; } status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS; crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)", node->uname, peer2text(proc_flags), status, AM_I_DC ? "true" : crm_str(fsa_our_dc), changed); if ((changed & proc_flags) == 0) { /* Peer process did not change */ crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags); return; } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) { crm_trace("Not connected"); return; } else if (fsa_state == S_STOPPING) { crm_trace("Stopping"); return; } appeared = (node->processes & proc_flags) != 0; if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) { /* Did we get evicted? */ crm_notice("Our peer connection failed"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) { /* Did the DC leave us? */ crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't * want to fence it. Newer DCs will send their shutdown request * to all peers, who will update the DC's expected state to * down, thus avoiding fencing. We can safely erase the DC's * transient attributes when it leaves in that case. However, * the only way to avoid fencing older DCs is to leave the * transient attributes intact until it rejoins. */ if (compare_version(fsa_our_dc_version, "3.0.9") > 0) { erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); } } else if(AM_I_DC && appeared == FALSE) { crm_info("Peer %s left us", node->uname); erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); } break; } if (AM_I_DC) { xmlNode *update = NULL; int flags = node_update_peer; gboolean alive = is_remote? appeared : crm_is_peer_active(node); crm_action_t *down = match_down_event(node->uuid, appeared); crm_trace("Alive=%d, appeared=%d, down=%d", alive, appeared, (down? down->id : -1)); if (alive && type == crm_status_processes) { register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } if (down) { const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRM_OP_FENCE)) { /* tengine_stonith_callback() confirms fence actions */ crm_trace("Updating CIB %s stonithd reported fencing of %s complete", (down->confirmed? "after" : "before"), node->uname); } else if ((alive == FALSE) && safe_str_eq(task, CRM_OP_SHUTDOWN)) { crm_notice("%s of peer %s is complete "CRM_XS" op=%d", task, node->uname, down->id); /* down->confirmed = TRUE; */ stop_te_timer(down->timer); if (!is_remote) { flags |= node_update_join | node_update_expected; crmd_peer_down(node, FALSE); check_join_state(fsa_state, __FUNCTION__); } update_graph(transition_graph, down); trigger_graph(); } else { crm_trace("Node %s is %salive, was expected to %s (op %d)", node->uname, (alive? "" : "not "), task, down->id); } } else if (appeared == FALSE) { crm_notice("Stonith/shutdown of %s not matched", node->uname); if (!is_remote) { crm_update_peer_join(__FUNCTION__, node, crm_join_none); check_join_state(fsa_state, __FUNCTION__); } abort_transition(INFINITY, tg_restart, "Node failure", NULL); fail_incompletable_actions(transition_graph, node->uuid); } else { crm_trace("Node %s came up, was not expected to be down", node->uname); } if (is_remote) { /* A pacemaker_remote node won't have its cluster status updated * in the CIB by membership-layer callbacks, so do it here. */ flags |= node_update_cluster; /* Trigger resource placement on newly integrated nodes */ if (appeared) { abort_transition(INFINITY, tg_restart, "pacemaker_remote node integrated", NULL); } } /* Update the CIB node state */ update = create_node_state_update(node, flags, NULL, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(update); } trigger_fsa(fsa_source); }
static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { static char *client_id = NULL; if(client_id == NULL) { client_id = g_strdup_printf("%s.%d", crm_system_name, getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner, st_event->origin); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } if (st_event->result == pcmk_ok && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { reset_st_fail_count(st_event->target); } crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin, pcmk_strerror(st_event->result), st_event->id, st_event->client_origin ? st_event->client_origin : "<unknown>"); #if SUPPORT_CMAN if (st_event->result == pcmk_ok && is_cman_cluster()) { int local_rc = 0; char *target_copy = strdup(st_event->target); /* In case fenced hasn't noticed yet * * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect */ local_rc = fenced_external(target_copy); if (local_rc != 0) { crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target, local_rc); } else { crm_notice("Notified CMAN that '%s' is now fenced", st_event->target); } free(target_copy); } #endif if (st_event->result == pcmk_ok) { const char *uuid = get_uuid(st_event->target); gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); if (st_event->client_origin && safe_str_neq(st_event->client_origin, client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we dont currently have one */ } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : "<unset>"); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target)); } } }
static void te_legacy_update_diff(const char *event, xmlNode * diff) { int lpc, max; xmlXPathObject *xpathObj = NULL; CRM_CHECK(diff != NULL, return); xml_log_patchset(LOG_TRACE, __FUNCTION__, diff); if (cib_config_changed(NULL, NULL, &diff)) { abort_transition(INFINITY, tg_restart, "Non-status change", diff); goto bail; /* configuration changed */ } /* Tickets Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted); goto bail; } freeXpathObject(xpathObj); /* Tickets Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); /* Transient Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *attr = getXpathResult(xpathObj, lpc); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = NULL; if (safe_str_eq(CRM_OP_PROBED, name)) { value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); } if (crm_is_true(value) == FALSE) { abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); crm_log_xml_trace(attr, "Abort"); goto bail; } } freeXpathObject(xpathObj); /* Transient Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_TRANSIENT_NODEATTRS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); /* * Check for and fast-track the processing of LRM refreshes * In large clusters this can result in _huge_ speedups * * Unfortunately we can only do so when there are no pending actions * Otherwise we could miss updates we're waiting for and stall * */ xpathObj = NULL; if (transition_graph->pending == 0) { xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RESOURCE); } max = numXpathResults(xpathObj); if (max > 1) { /* Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time */ crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", max); crm_log_xml_trace(diff, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); goto bail; } freeXpathObject(xpathObj); /* Process operation updates */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); if (numXpathResults(xpathObj)) { /* <status> <node_state id="node1" state=CRMD_JOINSTATE_MEMBER exp_state="active"> <lrm> <lrm_resources> <rsc_state id="" rsc_id="rsc4" node_id="node1" rsc_state="stopped"/> */ int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } } freeXpathObject(xpathObj); /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { int path_max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; op_id = ID(match); path_max = strlen(rsc_op_template) + strlen(op_id) + 1; rsc_op_xpath = calloc(1, path_max); snprintf(rsc_op_xpath, path_max, rsc_op_template, op_id); op_match = xpath_search(diff, rsc_op_xpath); if (numXpathResults(op_match) == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); crm_action_t *cancelled = get_cancel_action(op_id, node); if (cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, node); abort_transition(INFINITY, tg_restart, "Resource op removal", match); freeXpathObject(op_match); free(rsc_op_xpath); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } freeXpathObject(op_match); free(rsc_op_xpath); } bail: freeXpathObject(xpathObj); }
static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { if(te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%d", crm_system_name, getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } crmd_notify_fencing_op(st_event); if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) { crm_notice("%s was successfully unfenced by %s (at the request of %s)", st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* TODO: Hook up st_event->device */ return; } else if (safe_str_eq("on", st_event->action)) { crm_err("Unfencing of %s by %s failed: %s (%d)", st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", pcmk_strerror(st_event->result), st_event->result); return; } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_crit("We were allegedly just fenced by %s for %s!", st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */ qb_log_fini(); /* Try to get the above log message to disk - somehow */ /* Get out ASAP and do not come back up. * * Triggering a reboot is also not the worst idea either since * the rest of the cluster thinks we're safely down */ #ifdef RB_HALT_SYSTEM reboot(RB_HALT_SYSTEM); #endif /* * If reboot() fails or is not supported, coming back up will * probably lead to a situation where the other nodes set our * status to 'lost' because of the fencing callback and will * discard subsequent election votes with: * * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster) * * So just stay dead, something is seriously messed up anyway. * */ exit(100); /* None of our wrappers since we already called qb_log_fini() */ return; } if (st_event->result == pcmk_ok && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { st_fail_count_reset(st_event->target); } crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin, pcmk_strerror(st_event->result), st_event->id, st_event->client_origin ? st_event->client_origin : "<unknown>"); #if SUPPORT_CMAN if (st_event->result == pcmk_ok && is_cman_cluster()) { int local_rc = 0; int confirm = 0; char *target_copy = strdup(st_event->target); /* In case fenced hasn't noticed yet * * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect */ local_rc = fenced_external(target_copy); if (local_rc != 0) { crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target, local_rc); } else { crm_notice("Notified CMAN that '%s' is now fenced", st_event->target); } /* In case fenced is already trying to shoot it */ confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY); if (confirm > 0) { int ignore = 0; int len = strlen(target_copy); errno = 0; local_rc = write(confirm, target_copy, len); ignore = write(confirm, "\n", 1); if(ignore < 0 && errno == EBADF) { crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target); } else if (local_rc < len) { crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc); } else { fsync(confirm); crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target); } close(confirm); } free(target_copy); } #endif if (st_event->result == pcmk_ok) { crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_REMOTE | CRM_GET_PEER_CLUSTER); const char *uuid = NULL; gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we dont currently have one */ } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : "<unset>"); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target)); } crmd_peer_down(peer, TRUE); } }
gboolean process_graph_event(xmlNode * event, const char *event_node) { int rc = -1; int status = -1; int callid = -1; int action = -1; int target_rc = -1; int transition_num = -1; char *update_te_uuid = NULL; gboolean stop_early = FALSE; gboolean passed = FALSE; const char *id = NULL; const char *desc = NULL; const char *magic = NULL; CRM_ASSERT(event != NULL); /* <lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1355361636" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/> */ id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); crm_element_value_int(event, XML_LRM_ATTR_RC, &rc); crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid); magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY); if (magic == NULL) { /* non-change */ return FALSE; } if (decode_transition_key(magic, &update_te_uuid, &transition_num, &action, &target_rc) == FALSE) { crm_err("Invalid event %s.%d detected: %s", id, callid, magic); abort_transition(INFINITY, tg_restart, "Bad event", event); return FALSE; } if (status == PCMK_LRM_OP_PENDING) { goto bail; } if (transition_num == -1) { desc = "initiated outside of the cluster"; abort_transition(INFINITY, tg_restart, "Unexpected event", event); } else if (action < 0 || crm_str_eq(update_te_uuid, te_uuid, TRUE) == FALSE) { desc = "initiated by a different node"; abort_transition(INFINITY, tg_restart, "Foreign event", event); stop_early = TRUE; /* This could be an lrm status refresh */ } else if (transition_graph->id != transition_num) { desc = "arrived really late"; abort_transition(INFINITY, tg_restart, "Old event", event); stop_early = TRUE; /* This could be an lrm status refresh */ } else if (transition_graph->complete) { desc = "arrived late"; abort_transition(INFINITY, tg_restart, "Inactive graph", event); } else if (match_graph_event(action, event, event_node, status, rc, target_rc) < 0) { desc = "unknown"; abort_transition(INFINITY, tg_restart, "Unknown event", event); } else if (rc == target_rc) { passed = TRUE; crm_trace("Processed update to %s: %s", id, magic); } if (passed == FALSE) { if (update_failcount(event, event_node, rc, target_rc, transition_num == -1)) { /* Turns out this wasn't an lrm status refresh update aferall */ stop_early = FALSE; desc = "failed"; } crm_info("Detected action (%d.%d) %s.%d=%s: %s", transition_num, action, id, callid, services_ocf_exitcode_str(rc), desc); } bail: free(update_te_uuid); return stop_early; }
/* * returns the ID of the action if a match is found * returns -1 if a match was not found * returns -2 if a match was found but the action failed (and was * not allowed to) */ int match_graph_event(int action_id, xmlNode * event, const char *event_node, int op_status, int op_rc, int target_rc) { const char *target = NULL; const char *allow_fail = NULL; const char *this_event = NULL; crm_action_t *action = NULL; action = get_action(action_id, FALSE); if (action == NULL) { return -1; } op_status = status_from_rc(action, op_status, op_rc, target_rc); if (op_status != PCMK_LRM_OP_DONE) { update_failcount(event, event_node, op_rc, target_rc, FALSE); } /* Process OP status */ switch (op_status) { case PCMK_LRM_OP_PENDING: crm_debug("Ignoring pending operation"); return action->id; break; case PCMK_LRM_OP_DONE: break; case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: action->failed = TRUE; break; case PCMK_LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Dont know what to do for cancelled ops yet"); break; default: action->failed = TRUE; crm_err("Unsupported action result: %d", op_status); } /* stop this event's timer if it had one */ stop_te_timer(action->timer); te_action_confirmed(action); update_graph(transition_graph, action); trigger_graph(); if (action->failed) { allow_fail = crm_meta_value(action->params, XML_ATTR_TE_ALLOWFAIL); if (crm_is_true(allow_fail)) { action->failed = FALSE; } } if (action->failed) { abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event); } this_event = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); crm_info("Action %s (%d) confirmed on %s (rc=%d)", crm_str(this_event), action->id, crm_str(target), op_status); /* determine if this action affects a remote-node's online/offline status */ process_remote_node_action(action, event); return action->id; }
static void te_update_diff_v1(const char *event, xmlNode *diff) { int lpc, max; xmlXPathObject *xpathObj = NULL; CRM_CHECK(diff != NULL, return); xml_log_patchset(LOG_TRACE, __FUNCTION__, diff); if (cib_config_changed(NULL, NULL, &diff)) { abort_transition(INFINITY, tg_restart, "Non-status change", diff); goto bail; /* configuration changed */ } /* Tickets Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted); goto bail; } freeXpathObject(xpathObj); /* Tickets Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); /* Transient Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *attr = getXpathResult(xpathObj, lpc); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = NULL; if (safe_str_eq(CRM_OP_PROBED, name)) { value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); } if (crm_is_true(value) == FALSE) { abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); crm_log_xml_trace(attr, "Abort"); goto bail; } } freeXpathObject(xpathObj); /* Transient Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_TRANSIENT_NODEATTRS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); /* * Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time, so such updates indicate an * LRM refresh. * * In that case, start a new transition rather than check each result * individually, which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if (transition_graph->pending == 0) { xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RESOURCE); max = numXpathResults(xpathObj); if (max > 1) { crm_debug("Ignoring resource operation updates due to history refresh of %d resources", max); crm_log_xml_trace(diff, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); goto bail; } freeXpathObject(xpathObj); } /* Process operation updates */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); if (max > 0) { int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } } freeXpathObject(xpathObj); /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { int path_max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; op_id = ID(match); path_max = strlen(RSC_OP_TEMPLATE) + strlen(op_id) + 1; rsc_op_xpath = calloc(1, path_max); snprintf(rsc_op_xpath, path_max, RSC_OP_TEMPLATE, op_id); op_match = xpath_search(diff, rsc_op_xpath); if (numXpathResults(op_match) == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); crm_action_t *cancelled = get_cancel_action(op_id, node); if (cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, node); abort_transition(INFINITY, tg_restart, "Resource op removal", match); freeXpathObject(op_match); free(rsc_op_xpath); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } freeXpathObject(op_match); free(rsc_op_xpath); } bail: freeXpathObject(xpathObj); }
static void te_update_diff_v2(xmlNode *diff) { crm_log_xml_trace(diff, "Patch:Raw"); for (xmlNode *change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) { xmlNode *match = NULL; const char *name = NULL; const char *xpath = crm_element_value(change, XML_DIFF_PATH); // Possible ops: create, modify, delete, move const char *op = crm_element_value(change, XML_DIFF_OP); // Ignore uninteresting updates if (op == NULL) { continue; } else if (xpath == NULL) { crm_trace("Ignoring %s change for version field", op); continue; } else if (strcmp(op, "move") == 0) { crm_trace("Ignoring move change at %s", xpath); continue; } // Find the result of create/modify ops if (strcmp(op, "create") == 0) { match = change->children; } else if (strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } else if (strcmp(op, "delete") != 0) { crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", op, xpath); continue; } if (match) { if (match->type == XML_COMMENT_NODE) { crm_trace("Ignoring %s operation for comment at %s", op, xpath); continue; } name = (const char *)match->name; } crm_trace("Handling %s operation for %s%s%s", op, (xpath? xpath : "CIB"), (name? " matched by " : ""), (name? name : "")); if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) { abort_transition(INFINITY, tg_restart, "Configuration change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS) || safe_str_eq(name, XML_CIB_TAG_TICKETS)) { abort_transition(INFINITY, tg_restart, "Ticket attribute change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[") || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) { abort_unless_down(xpath, op, change, "Transient attribute change"); break; // Won't be packaged with operation results we may be waiting for } else if (strcmp(op, "delete") == 0) { process_delete_diff(xpath, op, change); } else if (name == NULL) { crm_warn("Ignoring malformed CIB update (%s at %s has no result)", op, xpath); } else if (strcmp(name, XML_TAG_CIB) == 0) { process_cib_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) { process_status_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) { process_node_state_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) { process_resource_updates(ID(match), match, change, op, xpath); } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = get_node_from_xpath(xpath); process_resource_updates(local_node, match, change, op, xpath); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = get_node_from_xpath(xpath); process_lrm_resource_diff(match, local_node); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = get_node_from_xpath(xpath); process_graph_event(match, local_node); free(local_node); } else { crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)", op, xpath, name); } } }
static void tengine_stonith_notify(stonith_t * st, stonith_event_t *st_event) { if (st_event == NULL) { crm_err("Notify data not found"); return; } if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner, st_event->origin); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s", st_event->target, st_event->result == pcmk_ok?"":" not", st_event->operation, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin, pcmk_strerror(st_event->result), st_event->id, st_event->client_origin ? st_event->client_origin : "<unknown>"); #if SUPPORT_CMAN if (st_event->result == pcmk_ok && is_cman_cluster()) { int local_rc = 0; int confirm = 0; char *target_copy = strdup(st_event->target); /* In case fenced hasn't noticed yet */ local_rc = fenced_external(target_copy); if (local_rc != 0) { crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target, local_rc); } else { crm_notice("Notified CMAN that '%s' is now fenced", st_event->target); } /* In case fenced is already trying to shoot it */ confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY); if (confirm) { int ignore = 0; int len = strlen(target_copy); errno = 0; local_rc = write(confirm, target_copy, len); ignore = write(confirm, "\n", 1); if(errno == EBADF) { crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target); } else if (local_rc < len) { crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc); } else { fsync(confirm); crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target); } close(confirm); } } #endif if (st_event->result == pcmk_ok) { gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc?"was":"may have been", st_event->target, fsa_our_dc ? fsa_our_dc : "<unset>"); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { const char *uuid = get_uuid(st_event->target); send_stonith_update(NULL, st_event->target, uuid); } stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target)); } else if (AM_I_DC && st_event->client_origin && safe_str_neq(st_event->client_origin, crm_system_name)) { const char *uuid = get_uuid(st_event->target); /* If a remote process outside of pacemaker invoked stonith to * fence someone, report the fencing result to the cib * and abort the transition graph. */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); send_stonith_update(NULL, st_event->target, uuid); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } } }
gboolean tengine_shutdown(int nsig, gpointer unused) { shuttingdown = TRUE; abort_transition(INFINITY, tg_shutdown, "Shutdown", NULL); return TRUE; }
/* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_node_t *member; ha_msg_input_t *welcome = NULL; const char *op = NULL; const char *join_to = NULL; if (msg_data->data) { welcome = fsa_typed_data(fsa_dt_ha_msg); } else { crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes"); g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); check_join_state(cur_state, __FUNCTION__); return; } if (welcome == NULL) { crm_err("Attempt to send welcome message without a message to reply to!"); return; } join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); if (join_to == NULL) { crm_err("Attempt to send welcome message without a host to reply to!"); return; } member = crm_get_peer(0, join_to); op = crm_element_value(welcome->msg, F_CRM_TASK); if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { /* note: it _is_ possible that a node will have been * sick or starting up when the original offer was made. * however, it will either re-announce itself in due course * _or_ we can re-store the original offer on the client. */ crm_trace("(Re-)offering membership to %s...", join_to); } crm_info("join-%d: Processing %s request from %s in state %s", current_join_id, op, join_to, fsa_state2string(cur_state)); crm_update_peer_join(__FUNCTION__, member, crm_join_none); join_make_offer(NULL, member, NULL); /* always offer to the DC (ourselves) * this ensures the correct value for max_generation_from */ member = crm_get_peer(0, fsa_our_uname); join_make_offer(NULL, member, NULL); /* this was a genuine join request, cancel any existing * transition and invoke the PE */ abort_transition(INFINITY, tg_restart, "Node join", NULL); /* don't waste time by invoking the PE yet; */ crm_debug("Waiting on %d outstanding join acks for join-%d", crmd_join_phase_count(crm_join_welcomed), current_join_id); }
gboolean process_graph_event(xmlNode * event, const char *event_node) { int rc = -1; int status = -1; int action = -1; int target_rc = -1; int transition_num = -1; char *update_te_uuid = NULL; gboolean stop_early = FALSE; gboolean passed = FALSE; const char *id = NULL; const char *magic = NULL; CRM_ASSERT(event != NULL); id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return FALSE; } if(decode_transition_magic(magic, &update_te_uuid, &transition_num, &action, &status, &rc, &target_rc) == FALSE) { crm_err("Invalid event %s detected: %s", id, magic); abort_transition(INFINITY, tg_restart, "Bad event", event); return FALSE; } if (status == LRM_OP_PENDING) { goto bail; } if (transition_num == -1) { crm_err("Action %s (%s) initiated outside of a transition", id, magic); abort_transition(INFINITY, tg_restart, "Unexpected event", event); } else if (action < 0 || crm_str_eq(update_te_uuid, te_uuid, TRUE) == FALSE) { crm_info("Action %s/%d (%s) initiated by a different transitioner", id, action, magic); abort_transition(INFINITY, tg_restart, "Foreign event", event); stop_early = TRUE; /* This could be an lrm status refresh */ } else if (transition_graph->id != transition_num) { crm_info("Detected action %s from a different transition:" " %d vs. %d", id, transition_num, transition_graph->id); abort_transition(INFINITY, tg_restart, "Old event", event); stop_early = TRUE; /* This could be an lrm status refresh */ } else if (transition_graph->complete) { crm_info("Action %s arrived after a completed transition", id); abort_transition(INFINITY, tg_restart, "Inactive graph", event); } else if (match_graph_event(action, event, event_node, status, rc, target_rc) < 0) { crm_err("Unknown graph action %s", id); abort_transition(INFINITY, tg_restart, "Unknown event", event); } else { passed = TRUE; crm_debug_2("Processed update to %s: %s", id, magic); } if (passed == FALSE) { if (update_failcount(event, event_node, rc, target_rc, transition_num == -1)) { /* Turns out this wasn't an lrm status refresh update aferall */ stop_early = FALSE; } } bail: crm_free(update_te_uuid); return stop_early; }
void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { uint32_t old = 0; uint32_t changed = 0; bool appeared = FALSE; const char *status = NULL; set_bit(fsa_input_register, R_PEER_DATA); if (node->uname == NULL) { return; } switch (type) { case crm_status_uname: /* If we've never seen the node, then it also wont be in the status section */ crm_info("%s is now %s", node->uname, node->state); return; case crm_status_rstate: crm_info("Remote node %s is now %s (was %s)", node->uname, node->state, (const char *)data); /* Keep going */ case crm_status_nstate: crm_info("%s is now %s (was %s)", node->uname, node->state, (const char *)data); if (safe_str_eq(data, node->state)) { /* State did not change */ return; } else if(safe_str_eq(CRM_NODE_MEMBER, node->state)) { appeared = TRUE; } break; case crm_status_processes: if (data) { old = *(const uint32_t *)data; changed = node->processes ^ old; } /* crmd_proc_update(node, proc_flags); */ status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS; crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)", node->uname, peer2text(proc_flags), status, AM_I_DC ? "true" : crm_str(fsa_our_dc), changed); if ((changed & proc_flags) == 0) { /* Peer process did not change */ crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags); return; } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) { crm_trace("Not connected"); return; } else if (fsa_state == S_STOPPING) { crm_trace("Stopping"); return; } appeared = (node->processes & proc_flags) != 0; if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) { /* Did we get evicted? */ crm_notice("Our peer connection failed"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) { /* Did the DC leave us? */ crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else if(AM_I_DC && appeared == FALSE) { crm_info("Peer %s left us", node->uname); /* crm_update_peer_join(__FUNCTION__, node, crm_join_none); */ } break; } if (AM_I_DC) { xmlNode *update = NULL; int flags = node_update_peer; gboolean alive = crm_is_peer_active(node); crm_action_t *down = match_down_event(0, node->uuid, NULL, appeared); crm_trace("Alive=%d, appear=%d, down=%p", alive, appeared, down); if (alive && type == crm_status_processes) { register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } if (down) { const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); if (alive && safe_str_eq(task, CRM_OP_FENCE)) { crm_info("Node return implies stonith of %s (action %d) completed", node->uname, down->id); erase_status_tag(node->uname, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */ down->sent_update = TRUE; /* Prevent tengine_stonith_callback() from calling send_stonith_update() */ } else if (safe_str_eq(task, CRM_OP_FENCE)) { crm_trace("Waiting for stonithd to report the fencing of %s is complete", node->uname); /* via tengine_stonith_callback() */ } else if (alive == FALSE) { crm_notice("%s of %s (op %d) is complete", task, node->uname, down->id); /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */ stop_te_timer(down->timer); flags |= node_update_join | node_update_expected; crmd_peer_down(node, FALSE); check_join_state(fsa_state, __FUNCTION__); update_graph(transition_graph, down); trigger_graph(); } else { crm_trace("Other %p", down); } } else if (appeared == FALSE) { crm_notice("Stonith/shutdown of %s not matched", node->uname); crm_update_peer_join(__FUNCTION__, node, crm_join_none); check_join_state(fsa_state, __FUNCTION__); abort_transition(INFINITY, tg_restart, "Node failure", NULL); fail_incompletable_actions(transition_graph, node->uuid); } else { crm_trace("Other %p", down); } update = do_update_node_cib(node, flags, NULL, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(update); } trigger_fsa(fsa_source); }
void te_update_diff(const char *event, xmlNode * msg) { int rc = -EINVAL; int format = 1; xmlNode *change = NULL; const char *op = NULL; xmlNode *diff = NULL; int p_add[] = { 0, 0, 0 }; int p_del[] = { 0, 0, 0 }; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (transition_graph == NULL) { crm_trace("No graph"); return; } else if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } else if (transition_graph->complete == TRUE && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_patch_versions(diff, p_add, p_del); crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], fsa_state2string(fsa_state)); crm_element_value_int(diff, "format", &format); switch(format) { case 1: te_legacy_update_diff(event, diff); return; case 2: /* Cool, we know what to do here */ crm_log_xml_trace(diff, "Patch:Raw"); break; default: crm_warn("Unknown patch format: %d", format); return; } for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) { const char *name = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); xmlNode *match = NULL; const char *node = NULL; if(op == NULL) { continue; } else if(strcmp(op, "create") == 0) { match = change->children; } else if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(strstr(xpath, "/cib/configuration")) { abort_transition(INFINITY, tg_restart, "Non-status change", change); break; /* Wont be packaged with any resource operations we may be waiting for */ } else if(strstr(xpath, "/"XML_CIB_TAG_TICKETS) || safe_str_eq(name, XML_CIB_TAG_TICKETS)) { abort_transition(INFINITY, tg_restart, "Ticket attribute change", change); break; /* Wont be packaged with any resource operations we may be waiting for */ } else if(strstr(xpath, "/"XML_TAG_TRANSIENT_NODEATTRS"[") || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) { abort_transition(INFINITY, tg_restart, "Transient attribute change", change); break; /* Wont be packaged with any resource operations we may be waiting for */ } else if(strstr(xpath, "/"XML_LRM_TAG_RSC_OP"[") && safe_str_eq(op, "delete")) { crm_action_t *cancel = NULL; char *mutable_key = strdup(xpath); char *mutable_node = strdup(xpath); char *search = NULL; const char *key = NULL; const char *node_uuid = NULL; search = strrchr(mutable_key, '\''); search[0] = 0; key = strrchr(mutable_key, '\'') + 1; node_uuid = strstr(mutable_node, "node_state[@id=\'") + strlen("node_state[@id=\'"); search = strchr(node_uuid, '\''); search[0] = 0; cancel = get_cancel_action(key, node_uuid); if (cancel == NULL) { abort_transition(INFINITY, tg_restart, "Resource operation removal", change); } else { crm_info("Cancellation of %s on %s confirmed (%d)", key, node_uuid, cancel->id); stop_te_timer(cancel->timer); te_action_confirmed(cancel); update_graph(transition_graph, cancel); trigger_graph(); } free(mutable_node); free(mutable_key); } else if(strstr(xpath, "/"XML_CIB_TAG_LRM"[") && safe_str_eq(op, "delete")) { abort_transition(INFINITY, tg_restart, "Resource state removal", change); } else if(strstr(xpath, "/"XML_CIB_TAG_STATE"[") && safe_str_eq(op, "delete")) { abort_transition(INFINITY, tg_restart, "Node state removal", change); } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0); } else if(strcmp(name, XML_TAG_CIB) == 0) { xmlNode *state = NULL; xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS); xmlNode *config = first_named_child(match, XML_CIB_TAG_CONFIGURATION); for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); node = ID(state); process_resource_updates(node, lrm, change, op, xpath); } if(config) { abort_transition(INFINITY, tg_restart, "Non-status change", change); } } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) { xmlNode *state = NULL; for (state = __xml_first_child(match); state != NULL; state = __xml_next(state)) { xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); node = ID(state); process_resource_updates(node, lrm, change, op, xpath); } } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) { xmlNode *lrm = first_named_child(match, XML_CIB_TAG_LRM); node = ID(match); process_resource_updates(node, lrm, change, op, xpath); } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) { node = ID(match); process_resource_updates(node, match, change, op, xpath); } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = get_node_from_xpath(xpath); process_resource_updates(local_node, match, change, op, xpath); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { xmlNode *rsc_op; char *local_node = get_node_from_xpath(xpath); for (rsc_op = __xml_first_child(match); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { process_graph_event(rsc_op, local_node); } free(local_node); } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = get_node_from_xpath(xpath); process_graph_event(match, local_node); free(local_node); } else { crm_err("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } } }
gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node) { const char *target = NULL; xmlNode *last_action = NULL; GListPtr gIter = NULL; GListPtr gIter2 = NULL; if (graph == NULL || graph->complete) { return FALSE; } gIter = graph->synapses; for (; gIter != NULL; gIter = gIter->next) { synapse_t *synapse = (synapse_t *) gIter->data; if (synapse->confirmed) { continue; } gIter2 = synapse->actions; for (; gIter2 != NULL; gIter2 = gIter2->next) { crm_action_t *action = (crm_action_t *) gIter2->data; if (action->type == action_type_pseudo || action->confirmed) { continue; } else if (action->type == action_type_crm) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRM_OP_FENCE)) { continue; } } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); if (safe_str_eq(target, down_node)) { action->failed = TRUE; synapse->failed = TRUE; last_action = action->xml; stop_te_timer(action->timer); update_graph(graph, action); if (synapse->executed) { crm_notice("Action %d (%s) was pending on %s (offline)", action->id, ID(action->xml), down_node); } else { crm_notice("Action %d (%s) is scheduled for %s (offline)", action->id, ID(action->xml), down_node); } } } } if (last_action != NULL) { crm_warn("Node %s shutdown resulted in un-runnable actions", down_node); abort_transition(INFINITY, tg_restart, "Node failure", last_action); return TRUE; } return FALSE; }
void te_update_diff(const char *event, xmlNode *msg) { int rc = -1; const char *op = NULL; xmlNode *diff = NULL; xmlNode *cib_top = NULL; xmlXPathObject *xpathObj = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if(transition_graph == NULL) { crm_debug_3("No graph"); return; } else if(rc < cib_ok) { crm_debug_3("Filter rc=%d (%s)", rc, cib_error2string(rc)); return; } else if(transition_graph->complete == TRUE && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_debug_2("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates, fsa_state2string(fsa_state)); log_cib_diff(LOG_DEBUG_2, diff, op); /* Process anything that was added */ cib_top = get_xpath_object("//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB, diff, LOG_ERR); if(need_abort(cib_top)) { goto bail; /* configuration changed */ } /* Process anything that was removed */ cib_top = get_xpath_object("//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_REMOVED"//"XML_TAG_CIB, diff, LOG_ERR); if(need_abort(cib_top)) { goto bail; /* configuration changed */ } /* Transient Attributes - Added/Updated */ xpathObj = xpath_search(diff,"//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_TAG_TRANSIENT_NODEATTRS"//"XML_CIB_TAG_NVPAIR); if(xpathObj && xpathObj->nodesetval->nodeNr > 0) { int lpc; for(lpc = 0; lpc < xpathObj->nodesetval->nodeNr; lpc++) { xmlNode *attr = getXpathResult(xpathObj, lpc); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = NULL; if(safe_str_eq(CRM_OP_PROBED, name)) { value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); } if(crm_is_true(value) == FALSE) { abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); crm_log_xml_debug_2(attr, "Abort"); goto bail; } } } else if(xpathObj) { xmlXPathFreeObject(xpathObj); } /* Transient Attributes - Removed */ xpathObj = xpath_search(diff,"//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_REMOVED"//"XML_TAG_TRANSIENT_NODEATTRS); if(xpathObj && xpathObj->nodesetval->nodeNr > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); goto bail; } else if(xpathObj) { xmlXPathFreeObject(xpathObj); } /* Check for node state updates... possibly from a shutdown we requested */ xpathObj = xpath_search(diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_CIB_TAG_STATE); if(xpathObj) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; for(lpc = 0; lpc < max; lpc++) { xmlNode *node = getXpathResult(xpathObj, lpc); const char *event_node = crm_element_value(node, XML_ATTR_ID); const char *ccm_state = crm_element_value(node, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node, XML_CIB_ATTR_HASTATE); const char *shutdown_s = crm_element_value(node, XML_CIB_ATTR_SHUTDOWN); const char *crmd_state = crm_element_value(node, XML_CIB_ATTR_CRMDSTATE); if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE) || safe_str_eq(ha_state, DEADSTATUS) || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) { crm_action_t *shutdown = match_down_event(0, event_node, NULL); if(shutdown != NULL) { const char *task = crm_element_value(shutdown->xml, XML_LRM_ATTR_TASK); if(safe_str_neq(task, CRM_OP_FENCE)) { /* Wait for stonithd to tell us it is complete via tengine_stonith_callback() */ update_graph(transition_graph, shutdown); trigger_graph(); } } else { crm_info("Stonith/shutdown of %s not matched", event_node); abort_transition(INFINITY, tg_restart, "Node failure", node); } fail_incompletable_actions(transition_graph, event_node); } if(shutdown_s) { int shutdown = crm_parse_int(shutdown_s, NULL); if(shutdown > 0) { crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute for %s", event_node); abort_transition(INFINITY, tg_restart, "Shutdown request", node); } } } xmlXPathFreeObject(xpathObj); } /* * Check for and fast-track the processing of LRM refreshes * In large clusters this can result in _huge_ speedups * * Unfortunately we can only do so when there are no pending actions * Otherwise we could miss updates we're waiting for and stall * */ xpathObj = NULL; if(transition_graph->pending == 0) { xpathObj = xpath_search(diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_LRM_TAG_RESOURCE); } if(xpathObj) { int updates = xpathObj->nodesetval->nodeNr; if(updates > 1) { /* Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time */ crm_info("Detected LRM refresh - %d resources updated: Skipping all resource events", updates); abort_transition(INFINITY, tg_restart, "LRM Refresh", diff); goto bail; } xmlXPathFreeObject(xpathObj); } /* Process operation updates */ xpathObj = xpath_search(diff, "//"F_CIB_UPDATE_RESULT"//"XML_TAG_DIFF_ADDED"//"XML_LRM_TAG_RSC_OP); if(xpathObj) { process_resource_updates(xpathObj); xmlXPathFreeObject(xpathObj); } /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//"XML_TAG_DIFF_REMOVED"//"XML_LRM_TAG_RSC_OP); if(xpathObj) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; for(lpc = 0; lpc < max; lpc++) { int max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); op_id = ID(match); max = strlen(rsc_op_template) + strlen(op_id) + 1; crm_malloc0(rsc_op_xpath, max); snprintf(rsc_op_xpath, max, rsc_op_template, op_id); op_match = xpath_search(diff, rsc_op_xpath); if(op_match == NULL || op_match->nodesetval->nodeNr == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); crm_action_t *cancelled = get_cancel_action(op_id, node); if(cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, node); abort_transition(INFINITY, tg_restart, "Resource op removal", match); if(op_match) { xmlXPathFreeObject(op_match); } crm_free(rsc_op_xpath); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } if(op_match) { xmlXPathFreeObject(op_match); } crm_free(rsc_op_xpath); } } bail: if(xpathObj) { xmlXPathFreeObject(xpathObj); } }
void te_update_diff(const char *event, xmlNode * msg) { int rc = -1; const char *op = NULL; xmlNode *diff = NULL; xmlXPathObject *xpathObj = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (transition_graph == NULL) { crm_trace("No graph"); return; } else if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } else if (transition_graph->complete == TRUE && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details(diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op, diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates, fsa_state2string(fsa_state)); log_cib_diff(LOG_DEBUG_2, diff, op); if (cib_config_changed(NULL, NULL, &diff)) { abort_transition(INFINITY, tg_restart, "Non-status change", diff); goto bail; /* configuration changed */ } /* Tickets Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted); goto bail; } else if (xpathObj) { xmlXPathFreeObject(xpathObj); } /* Tickets Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted); goto bail; } else if (xpathObj) { xmlXPathFreeObject(xpathObj); } /* Transient Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR); if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { int lpc; for (lpc = 0; lpc < xpathObj->nodesetval->nodeNr; lpc++) { xmlNode *attr = getXpathResult(xpathObj, lpc); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = NULL; if (safe_str_eq(CRM_OP_PROBED, name)) { value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); } if (crm_is_true(value) == FALSE) { abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); crm_log_xml_trace(attr, "Abort"); goto bail; } } } else if (xpathObj) { xmlXPathFreeObject(xpathObj); } /* Transient Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_TRANSIENT_NODEATTRS); if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); goto bail; } else if (xpathObj) { xmlXPathFreeObject(xpathObj); } /* * Check for and fast-track the processing of LRM refreshes * In large clusters this can result in _huge_ speedups * * Unfortunately we can only do so when there are no pending actions * Otherwise we could miss updates we're waiting for and stall * */ xpathObj = NULL; if (transition_graph->pending == 0) { xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RESOURCE); } if (xpathObj) { int updates = xpathObj->nodesetval->nodeNr; if (updates > 1) { /* Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time */ crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", updates); crm_log_xml_trace(diff, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL); goto bail; } xmlXPathFreeObject(xpathObj); } /* Process operation updates */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); if (xpathObj) { process_resource_updates(xpathObj); xmlXPathFreeObject(xpathObj); } /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); if (xpathObj) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; for (lpc = 0; lpc < max; lpc++) { int max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); op_id = ID(match); max = strlen(rsc_op_template) + strlen(op_id) + 1; rsc_op_xpath = calloc(1, max); snprintf(rsc_op_xpath, max, rsc_op_template, op_id); op_match = xpath_search(diff, rsc_op_xpath); if (op_match == NULL || op_match->nodesetval->nodeNr == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); crm_action_t *cancelled = get_cancel_action(op_id, node); if (cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, node); abort_transition(INFINITY, tg_restart, "Resource op removal", match); if (op_match) { xmlXPathFreeObject(op_match); } free(rsc_op_xpath); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } if (op_match) { xmlXPathFreeObject(op_match); } free(rsc_op_xpath); } } bail: if (xpathObj) { xmlXPathFreeObject(xpathObj); } }
static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { if(te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } crmd_alert_fencing_op(st_event); if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) { crm_notice("%s was successfully unfenced by %s (at the request of %s)", st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* TODO: Hook up st_event->device */ return; } else if (safe_str_eq("on", st_event->action)) { crm_err("Unfencing of %s by %s failed: %s (%d)", st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", pcmk_strerror(st_event->result), st_event->result); return; } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_crit("We were allegedly just fenced by %s for %s!", st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */ qb_log_fini(); /* Try to get the above log message to disk - somehow */ /* Get out ASAP and do not come back up. * * Triggering a reboot is also not the worst idea either since * the rest of the cluster thinks we're safely down */ #ifdef RB_HALT_SYSTEM reboot(RB_HALT_SYSTEM); #endif /* * If reboot() fails or is not supported, coming back up will * probably lead to a situation where the other nodes set our * status to 'lost' because of the fencing callback and will * discard subsequent election votes with: * * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster) * * So just stay dead, something is seriously messed up anyway. * */ exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini() return; } /* Update the count of stonith failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { if (st_event->result == pcmk_ok) { st_fail_count_reset(st_event->target); } else { st_fail_count_increment(st_event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s " CRM_XS " initiator=%s ref=%s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "<anyone>", (st_event->client_origin? st_event->client_origin : "<unknown>"), pcmk_strerror(st_event->result), st_event->origin, st_event->id); if (st_event->result == pcmk_ok) { crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY); const char *uuid = NULL; gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the * peer cache via crm_remote_peer_cache_refresh(). For now, we rely * on the PE creating fence pseudo-events for the guests. */ if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we don't currently have one */ } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target)) && !is_set(peer->flags, crm_remote_node)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : "<unset>"); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } add_stonith_cleanup(st_event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (is_set(peer->flags, crm_remote_node)) { remote_ra_fail(st_event->target); } crmd_peer_down(peer, TRUE); } }