void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line) { int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; int level = LOG_INFO; xmlNode *diff = NULL; xmlNode *change = NULL; CRM_CHECK(transition_graph != NULL, return); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: crm_info("Abort %s suppressed: state=%s (complete=%d)", abort_text, fsa_state2string(fsa_state), transition_graph->complete); return; default: break; } /* Make sure any queued calculations are discarded ASAP */ free(fsa_pe_ref); fsa_pe_ref = NULL; if (transition_graph->complete == FALSE) { if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) { level = LOG_NOTICE; } } if(reason) { xmlNode *search = NULL; for(search = reason; search; search = search->parent) { if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) { diff = search; break; } } if(diff) { xml_patch_versions(diff, add, del); for(search = reason; search; search = search->parent) { if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) { change = search; break; } } } } if(reason == NULL) { do_crm_log(level, "Transition aborted: %s (source=%s:%d, %d)", abort_text, fn, line, transition_graph->complete); } else if(change == NULL) { char *local_path = xml_get_path(reason); do_crm_log(level, "Transition aborted by %s.%s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)", TYPE(reason), ID(reason), abort_text, add[0], add[1], add[2], fn, line, local_path, transition_graph->complete); free(local_path); } else { const char *kind = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *path = crm_element_value(change, XML_DIFF_PATH); if(change == reason) { if(strcmp(op, "create") == 0) { reason = reason->children; } else if(strcmp(op, "modify") == 0) { reason = first_named_child(reason, XML_DIFF_RESULT); if(reason) { reason = reason->children; } } } kind = TYPE(reason); if(strcmp(op, "delete") == 0) { const char *shortpath = strrchr(path, '/'); do_crm_log(level, "Transition aborted by deletion of %s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)", shortpath?shortpath+1:path, abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete); } else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) { do_crm_log(level, "Transition aborted by %s, %s=%s: %s (%s cib=%d.%d.%d, source=%s:%d, path=%s, %d)", crm_element_value(reason, XML_ATTR_ID), crm_element_value(reason, XML_NVPAIR_ATTR_NAME), crm_element_value(reason, XML_NVPAIR_ATTR_VALUE), abort_text, op, add[0], add[1], add[2], fn, line, path, transition_graph->complete); } else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) { const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (magic=%s, cib=%d.%d.%d, source=%s:%d, %d)", crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op, crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text, magic, add[0], add[1], add[2], fn, line, transition_graph->complete); } else if (safe_str_eq(XML_CIB_TAG_STATE, kind) || safe_str_eq(XML_CIB_TAG_NODE, kind)) { const char *uname = crm_peer_uname(ID(reason)); do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (cib=%d.%d.%d, source=%s:%d, %d)", kind, op, uname ? uname : ID(reason), abort_text, add[0], add[1], add[2], fn, line, transition_graph->complete); } else { do_crm_log(level, "Transition aborted by %s.%s '%s': %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)", TYPE(reason), ID(reason), op?op:"change", abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete); } } if (transition_graph->complete) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); } return; } mainloop_set_trigger(transition_trigger); }
/*! * \internal * \brief Update failure-related node attributes if warranted * * \param[in] event XML describing operation that (maybe) failed * \param[in] event_node_uuid Node that event occurred on * \param[in] rc Actual operation return code * \param[in] target_rc Expected operation return code * \param[in] do_update If TRUE, do update regardless of operation type * \param[in] ignore_failures If TRUE, update last failure but not fail count * * \return TRUE if this was not a direct nack, success or lrm status refresh */ static gboolean update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int target_rc, gboolean do_update, gboolean ignore_failures) { int interval = 0; char *task = NULL; char *rsc_id = NULL; const char *value = NULL; const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); const char *on_uname = crm_peer_uname(event_node_uuid); const char *origin = crm_element_value(event, XML_ATTR_ORIGIN); /* Nothing needs to be done for success, lrm status refresh, * or direct nack (internal code for "busy, try again") */ if ((rc == CRM_DIRECT_NACK_RC) || (rc == target_rc)) { return FALSE; } else if (safe_str_eq(origin, "build_active_RAs")) { crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh", id, rc, on_uname); return FALSE; } /* Sanity check */ CRM_CHECK(on_uname != NULL, return TRUE); CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event)); goto bail); CRM_CHECK(task != NULL, goto bail); CRM_CHECK(rsc_id != NULL, goto bail); /* Decide whether update is necessary and what value to use */ if ((interval > 0) || safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { do_update = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_START)) { do_update = TRUE; if (failed_start_offset == NULL) { failed_start_offset = strdup(INFINITY_S); } value = failed_start_offset; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { do_update = TRUE; if (failed_stop_offset == NULL) { failed_stop_offset = strdup(INFINITY_S); } value = failed_stop_offset; } /* Fail count will be either incremented or set to infinity */ if (value == NULL || safe_str_neq(value, INFINITY_S)) { value = XML_NVPAIR_ATTR_VALUE "++"; } if (do_update) { char *now = crm_itoa(time(NULL)); char *attr_name = NULL; gboolean is_remote_node = FALSE; if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) { is_remote_node = TRUE; } crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)", (ignore_failures? "last failure" : "failcount"), rsc_id, on_uname, task, rc, value, now); /* Update the fail count, if we're not ignoring failures */ if (!ignore_failures) { attr_name = crm_concat("fail-count", rsc_id, '-'); update_attrd(on_uname, attr_name, value, NULL, is_remote_node); free(attr_name); } /* Update the last failure time (even if we're ignoring failures, * so that failure can still be detected and shown, e.g. by crm_mon) */ attr_name = crm_concat("last-failure", rsc_id, '-'); update_attrd(on_uname, attr_name, now, NULL, is_remote_node); free(attr_name); free(now); } bail: free(rsc_id); free(task); return TRUE; }
static gboolean update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int target_rc, gboolean do_update) { int interval = 0; char *task = NULL; char *rsc_id = NULL; char *attr_name = NULL; const char *value = NULL; const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY); const char *on_uname = crm_peer_uname(event_node_uuid); const char *origin = crm_element_value(event, XML_ATTR_ORIGIN); if (rc == 99) { /* this is an internal code for "we're busy, try again" */ return FALSE; } else if (rc == target_rc) { return FALSE; } if (safe_str_eq(origin, "build_active_RAs")) { crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh", id, rc, on_uname); return FALSE; } CRM_CHECK(on_uname != NULL, return TRUE); if (failed_stop_offset == NULL) { failed_stop_offset = strdup(INFINITY_S); } if (failed_start_offset == NULL) { failed_start_offset = strdup(INFINITY_S); } CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event)); goto bail); CRM_CHECK(task != NULL, goto bail); CRM_CHECK(rsc_id != NULL, goto bail); if (do_update || interval > 0) { do_update = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_START)) { do_update = TRUE; value = failed_start_offset; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { do_update = TRUE; value = failed_stop_offset; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { do_update = TRUE; value = failed_stop_offset; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { do_update = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { do_update = TRUE; } if (value == NULL || safe_str_neq(value, INFINITY_S)) { value = XML_NVPAIR_ATTR_VALUE "++"; } if (do_update) { char *now = crm_itoa(time(NULL)); gboolean is_remote_node = FALSE; if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) { is_remote_node = TRUE; } crm_warn("Updating failcount for %s on %s after failed %s:" " rc=%d (update=%s, time=%s)", rsc_id, on_uname, task, rc, value, now); attr_name = crm_concat("fail-count", rsc_id, '-'); update_attrd(on_uname, attr_name, value, NULL, is_remote_node); free(attr_name); attr_name = crm_concat("last-failure", rsc_id, '-'); update_attrd(on_uname, attr_name, now, NULL, is_remote_node); free(attr_name); free(now); } bail: free(rsc_id); free(task); return TRUE; }
void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line) { const char *magic = NULL; CRM_CHECK(transition_graph != NULL, return); if (reason) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; const char *uname = ""; xmlNode *search = reason; xmlNode *diff = get_xpath_object("//" F_CIB_UPDATE_RESULT "//diff", reason, LOG_DEBUG_2); magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); while(search) { const char *kind = TYPE(search); if (safe_str_eq(XML_CIB_TAG_STATE, kind) || safe_str_eq(XML_CIB_TAG_NODE, kind)) { uname = crm_peer_uname(ID(search)); break; } search = search->parent; } if (diff) { cib_diff_version_details(diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); if (crm_str_eq(TYPE(reason), XML_CIB_TAG_NVPAIR, TRUE)) { crm_info ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s", fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), NAME(reason), VALUE(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates, abort_text); } else { crm_info ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s", fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates, abort_text); } } else { crm_info ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s) : %s", fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), magic ? magic : "NA", abort_text); } } else { crm_info("%s:%d - Triggered transition abort (complete=%d) : %s", fn, line, transition_graph->complete, abort_text); } switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: crm_info("Abort suppressed: state=%s (complete=%d)", fsa_state2string(fsa_state), transition_graph->complete); return; default: break; } if (magic == NULL && reason != NULL) { crm_log_xml_debug(reason, "Cause"); } /* Make sure any queued calculations are discarded ASAP */ free(fsa_pe_ref); fsa_pe_ref = NULL; if (transition_graph->complete) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); } return; } update_abort_priority(transition_graph, abort_priority, abort_action, abort_text); mainloop_set_trigger(transition_trigger); }