Example #1
0
static gboolean
update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int target_rc, gboolean do_update)
{
    int interval = 0;

    char *task = NULL;
    char *rsc_id = NULL;
    char *attr_name = NULL;

    const char *value = NULL;
    const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
    const char *on_uname = get_uname_from_event(event);
    const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);

    if (rc == 99) {
        /* this is an internal code for "we're busy, try again" */
        return FALSE;

    } else if (rc == target_rc) {
        return FALSE;
    }

    if (safe_str_eq(origin, "build_active_RAs")) {
        crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
                  id, rc, on_uname);
        return FALSE;
    }

    CRM_LOG_ASSERT(on_uname != NULL);
    if (on_uname == NULL) {
        return TRUE;
    }

    if (failed_stop_offset == NULL) {
        failed_stop_offset = strdup(INFINITY_S);
    }

    if (failed_start_offset == NULL) {
        failed_start_offset = strdup(INFINITY_S);
    }

    CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event));
              goto bail);
    CRM_CHECK(task != NULL, goto bail);
    CRM_CHECK(rsc_id != NULL, goto bail);

    if (do_update || interval > 0) {
        do_update = TRUE;

    } else if (safe_str_eq(task, CRMD_ACTION_START)) {
        do_update = TRUE;
        value = failed_start_offset;

    } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
        do_update = TRUE;
        value = failed_stop_offset;

    } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
        do_update = TRUE;
        value = failed_stop_offset;

    } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
        do_update = TRUE;

    } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
        do_update = TRUE;
    }

    if (value == NULL || safe_str_neq(value, INFINITY_S)) {
        value = XML_NVPAIR_ATTR_VALUE "++";
    }

    if (do_update) {
        char *now = crm_itoa(time(NULL));
        gboolean is_remote_node = get_is_remote_from_event(event);

        crm_warn("Updating failcount for %s on %s after failed %s:"
                 " rc=%d (update=%s, time=%s)", rsc_id, on_uname, task, rc, value, now);

        attr_name = crm_concat("fail-count", rsc_id, '-');
        update_attrd(on_uname, attr_name, value, NULL, is_remote_node);
        free(attr_name);

        attr_name = crm_concat("last-failure", rsc_id, '-');
        update_attrd(on_uname, attr_name, now, NULL, is_remote_node);
        free(attr_name);

        free(now);
    }

  bail:
    free(rsc_id);
    free(task);
    return TRUE;
}
Example #2
0
/*!
 * \internal
 * \brief Update failure-related node attributes if warranted
 *
 * \param[in] event            XML describing operation that (maybe) failed
 * \param[in] event_node_uuid  Node that event occurred on
 * \param[in] rc               Actual operation return code
 * \param[in] target_rc        Expected operation return code
 * \param[in] do_update        If TRUE, do update regardless of operation type
 * \param[in] ignore_failures  If TRUE, update last failure but not fail count
 *
 * \return TRUE if this was not a direct nack, success or lrm status refresh
 */
static gboolean
update_failcount(xmlNode * event, const char *event_node_uuid, int rc,
                 int target_rc, gboolean do_update, gboolean ignore_failures)
{
    int interval = 0;

    char *task = NULL;
    char *rsc_id = NULL;

    const char *value = NULL;
    const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
    const char *on_uname = crm_peer_uname(event_node_uuid);
    const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);

    /* Nothing needs to be done for success, lrm status refresh,
     * or direct nack (internal code for "busy, try again")
     */
    if ((rc == CRM_DIRECT_NACK_RC) || (rc == target_rc)) {
        return FALSE;
    } else if (safe_str_eq(origin, "build_active_RAs")) {
        crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
                  id, rc, on_uname);
        return FALSE;
    }

    /* Sanity check */
    CRM_CHECK(on_uname != NULL, return TRUE);
    CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval),
              crm_err("Couldn't parse: %s", ID(event)); goto bail);
    CRM_CHECK(task != NULL, goto bail);
    CRM_CHECK(rsc_id != NULL, goto bail);

    /* Decide whether update is necessary and what value to use */
    if ((interval > 0) || safe_str_eq(task, CRMD_ACTION_PROMOTE)
        || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
        do_update = TRUE;

    } else if (safe_str_eq(task, CRMD_ACTION_START)) {
        do_update = TRUE;
        if (failed_start_offset == NULL) {
            failed_start_offset = strdup(INFINITY_S);
        }
        value = failed_start_offset;

    } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
        do_update = TRUE;
        if (failed_stop_offset == NULL) {
            failed_stop_offset = strdup(INFINITY_S);
        }
        value = failed_stop_offset;
    }

    /* Fail count will be either incremented or set to infinity */
    if (value == NULL || safe_str_neq(value, INFINITY_S)) {
        value = XML_NVPAIR_ATTR_VALUE "++";
    }

    if (do_update) {
        char *now = crm_itoa(time(NULL));
        char *attr_name = NULL;
        gboolean is_remote_node = FALSE;

        if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
            is_remote_node = TRUE;
        }

        crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
                 (ignore_failures? "last failure" : "failcount"),
                 rsc_id, on_uname, task, rc, value, now);

        /* Update the fail count, if we're not ignoring failures */
        if (!ignore_failures) {
            attr_name = crm_concat("fail-count", rsc_id, '-');
            update_attrd(on_uname, attr_name, value, NULL, is_remote_node);
            free(attr_name);
        }

        /* Update the last failure time (even if we're ignoring failures,
         * so that failure can still be detected and shown, e.g. by crm_mon)
         */
        attr_name = crm_concat("last-failure", rsc_id, '-');
        update_attrd(on_uname, attr_name, now, NULL, is_remote_node);
        free(attr_name);

        free(now);
    }

  bail:
    free(rsc_id);
    free(task);
    return TRUE;
}
Example #3
0
static char *
convert_non_atomic_uuid(char *old_uuid, resource_t * rsc, gboolean allow_notify,
                        gboolean free_original)
{
    int interval = 0;
    char *uuid = NULL;
    char *rid = NULL;
    char *raw_task = NULL;
    int task = no_action;

    CRM_ASSERT(rsc);
    pe_rsc_trace(rsc, "Processing %s", old_uuid);
    if (old_uuid == NULL) {
        return NULL;

    } else if (strstr(old_uuid, "notify") != NULL) {
        goto done;              /* no conversion */

    } else if (rsc->variant < pe_group) {
        goto done;              /* no conversion */
    }

    CRM_ASSERT(parse_op_key(old_uuid, &rid, &raw_task, &interval));
    if (interval > 0) {
        goto done;              /* no conversion */
    }

    task = text2task(raw_task);
    switch (task) {
        case stop_rsc:
        case start_rsc:
        case action_notify:
        case action_promote:
        case action_demote:
            break;
        case stopped_rsc:
        case started_rsc:
        case action_notified:
        case action_promoted:
        case action_demoted:
            task--;
            break;
        case monitor_rsc:
        case shutdown_crm:
        case stonith_node:
            task = no_action;
            break;
        default:
            crm_err("Unknown action: %s", raw_task);
            task = no_action;
            break;
    }

    if (task != no_action) {
        if (is_set(rsc->flags, pe_rsc_notify) && allow_notify) {
            uuid = generate_notify_key(rid, "confirmed-post", task2text(task + 1));

        } else {
            uuid = generate_op_key(rid, task2text(task + 1), 0);
        }
        pe_rsc_trace(rsc, "Converted %s -> %s", old_uuid, uuid);
    }

  done:
    if (uuid == NULL) {
        uuid = strdup(old_uuid);
    }

    if (free_original) {
        free(old_uuid);
    }

    free(raw_task);
    free(rid);
    return uuid;
}
Example #4
0
    if (failed_stop_offset == NULL) {
        failed_stop_offset = strdup(INFINITY_S);
    }

    if (failed_start_offset == NULL) {
        failed_start_offset = strdup(INFINITY_S);
    }

    if (on_uname == NULL) {
        /* uname not in event, check cache */
        on_uname = get_uname(event_node_uuid);
        CRM_CHECK(on_uname != NULL, return TRUE);
    }

    CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval), crm_err("Couldn't parse: %s", ID(event));
              goto bail);
    CRM_CHECK(task != NULL, goto bail);
    CRM_CHECK(rsc_id != NULL, goto bail);

    if (do_update || interval > 0) {
        do_update = TRUE;

    } else if (safe_str_eq(task, CRMD_ACTION_START)) {
        do_update = TRUE;
        value = failed_start_offset;

    } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
        do_update = TRUE;
        value = failed_stop_offset;