Esempio n. 1
0
crm_action_t *
get_action(int id, gboolean confirmed)
{
    GListPtr gIter = NULL;
    GListPtr gIter2 = NULL;

    gIter = transition_graph->synapses;
    for (; gIter != NULL; gIter = gIter->next) {
        synapse_t *synapse = (synapse_t *) gIter->data;

        gIter2 = synapse->actions;
        for (; gIter2 != NULL; gIter2 = gIter2->next) {
            crm_action_t *action = (crm_action_t *) gIter2->data;

            if (action->id == id) {
                if (confirmed) {
                    stop_te_timer(action->timer);
                    te_action_confirmed(action);
                }
                return action;
            }
        }
    }

    return NULL;
}
Esempio n. 2
0
static gboolean
te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
{
    crm_debug("Pseudo action %d fired and confirmed", pseudo->id);
    te_action_confirmed(pseudo);
    update_graph(graph, pseudo);
    trigger_graph();
    return TRUE;
}
Esempio n. 3
0
static gboolean
te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
{
    crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
              crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
    te_action_confirmed(pseudo);
    update_graph(graph, pseudo);
    trigger_graph();
    return TRUE;
}
Esempio n. 4
0
/*!
 * \internal
 * \brief Confirm action and update transition graph, aborting transition on failures
 *
 * \param[in/out] action           CRM action instance of this operation
 * \param[in]     event            Event instance of this operation
 * \param[in]     orig_status      Original reported operation status
 * \param[in]     op_rc            Actual operation return code
 * \param[in]     target_rc        Expected operation return code
 * \param[in]     ignore_failures  Whether to ignore operation failures
 *
 * \note This assumes that PCMK_LRM_OP_PENDING operations have already been
 *       filtered (otherwise they may be treated as failures).
 */
static void
match_graph_event(crm_action_t *action, xmlNode *event, int op_status,
                  int op_rc, int target_rc, gboolean ignore_failures)
{
    const char *target = NULL;
    const char *this_event = NULL;
    const char *ignore_s = "";

    /* Remap operation status based on return code */
    op_status = status_from_rc(action, op_status, op_rc, target_rc);

    /* Process OP status */
    switch (op_status) {
        case PCMK_LRM_OP_DONE:
            break;
        case PCMK_LRM_OP_ERROR:
        case PCMK_LRM_OP_TIMEOUT:
        case PCMK_LRM_OP_NOTSUPPORTED:
            if (ignore_failures) {
                ignore_s = ", ignoring failure";
            } else {
                action->failed = TRUE;
            }
            break;
        case PCMK_LRM_OP_CANCELLED:
            /* do nothing?? */
            crm_err("Don't know what to do for cancelled ops yet");
            break;
        default:
            /*
             PCMK_LRM_OP_ERROR_HARD,
             PCMK_LRM_OP_ERROR_FATAL,
             PCMK_LRM_OP_NOT_INSTALLED
             */
            action->failed = TRUE;
            crm_err("Unsupported action result: %d", op_status);
    }

    /* stop this event's timer if it had one */
    stop_te_timer(action->timer);
    te_action_confirmed(action);

    update_graph(transition_graph, action);
    trigger_graph();

    if (action->failed) {
        abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event);
    }

    this_event = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
    target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    crm_info("Action %s (%d) confirmed on %s (rc=%d%s)",
             crm_str(this_event), action->id, crm_str(target), op_rc, ignore_s);
}
/*!
 * \brief Handle a timeout in node-to-node communication
 *
 * \param[in] data  Pointer to action timer
 *
 * \return FALSE (indicating that source should be not be re-added)
 */
gboolean
action_timer_callback(gpointer data)
{
    crm_action_timer_t *timer = NULL;
    const char *task = NULL;
    const char *on_node = NULL;
    const char *via_node = NULL;

    CRM_CHECK(data != NULL, return FALSE);

    timer = (crm_action_timer_t *) data;
    stop_te_timer(timer);

    CRM_CHECK(timer->action != NULL, return FALSE);

    task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK);
    on_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_TARGET);
    via_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_ROUTER_NODE);

    if (transition_graph->complete) {
        crm_notice("Node %s did not send %s result (via %s) within %dms "
                   "(ignoring because transition not in progress)",
                   (on_node? on_node : ""), (task? task : "unknown action"),
                   (via_node? via_node : "controller"), timer->timeout);
    } else {
        /* fail the action */

        crm_err("Node %s did not send %s result (via %s) within %dms "
                "(action timeout plus cluster-delay)",
                (on_node? on_node : ""), (task? task : "unknown action"),
                (via_node? via_node : "controller"), timer->timeout);
        print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action);

        timer->action->failed = TRUE;
        te_action_confirmed(timer->action);
        abort_transition(INFINITY, tg_restart, "Action lost", NULL);

        update_graph(transition_graph, timer->action);
        trigger_graph();

        // Record timeout in the CIB if appropriate
        if ((timer->action->type == action_type_rsc)
            && controld_action_is_recordable(task)) {
            controld_record_action_timeout(timer->action);
        }
    }

    return FALSE;
}
Esempio n. 6
0
static void
process_op_deletion(const char *xpath, xmlNode *change)
{
    char *mutable_key = strdup(xpath);
    char *key;
    char *node_uuid;
    crm_action_t *cancel = NULL;

    // Extract the part of xpath between last pair of single quotes
    key = strrchr(mutable_key, '\'');
    if (key != NULL) {
        *key = '\0';
        key = strrchr(mutable_key, '\'');
    }
    if (key == NULL) {
        crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
                 xpath);
        free(mutable_key);
        return;
    }
    ++key;

    node_uuid = extract_node_uuid(xpath);
    cancel = get_cancel_action(key, node_uuid);
    if (cancel) {
        crm_info("Cancellation of %s on %s confirmed (%d)",
                 key, node_uuid, cancel->id);
        stop_te_timer(cancel->timer);
        te_action_confirmed(cancel);
        update_graph(transition_graph, cancel);
        trigger_graph();
    } else {
        abort_transition(INFINITY, tg_restart, "Resource operation removal",
                         change);
    }
    free(mutable_key);
    free(node_uuid);
}
Esempio n. 7
0
static gboolean
te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
{
    const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);

    /* send to peers as well? */
    if (safe_str_eq(task, CRM_OP_MAINTENANCE_NODES)) {
        GHashTableIter iter;
        crm_node_t *node = NULL;

        g_hash_table_iter_init(&iter, crm_peer_cache);
        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
            xmlNode *cmd = NULL;

            if (safe_str_eq(fsa_our_uname, node->uname)) {
                continue;
            }

            cmd = create_request(task, pseudo->xml, node->uname,
                                 CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
            send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
            free_xml(cmd);
        }

        remote_ra_process_maintenance_nodes(pseudo->xml);
    } else {
        /* Check action for Pacemaker Remote node side effects */
        remote_ra_process_pseudo(pseudo->xml);
    }

    crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
              crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
    te_action_confirmed(pseudo);
    update_graph(graph, pseudo);
    trigger_graph();
    return TRUE;
}
Esempio n. 8
0
/*
 * returns the ID of the action if a match is found
 * returns -1 if a match was not found
 * returns -2 if a match was found but the action failed (and was
 *            not allowed to)
 */
int
match_graph_event(int action_id, xmlNode * event, const char *event_node,
                  int op_status, int op_rc, int target_rc)
{
    const char *target = NULL;
    const char *allow_fail = NULL;
    const char *this_event = NULL;
    crm_action_t *action = NULL;

    action = get_action(action_id, FALSE);
    if (action == NULL) {
        return -1;
    }

    op_status = status_from_rc(action, op_status, op_rc, target_rc);
    if (op_status != PCMK_LRM_OP_DONE) {
        update_failcount(event, event_node, op_rc, target_rc, FALSE);
    }

    /* Process OP status */
    switch (op_status) {
        case PCMK_LRM_OP_PENDING:
            crm_debug("Ignoring pending operation");
            return action->id;
            break;
        case PCMK_LRM_OP_DONE:
            break;
        case PCMK_LRM_OP_ERROR:
        case PCMK_LRM_OP_TIMEOUT:
        case PCMK_LRM_OP_NOTSUPPORTED:
            action->failed = TRUE;
            break;
        case PCMK_LRM_OP_CANCELLED:
            /* do nothing?? */
            crm_err("Dont know what to do for cancelled ops yet");
            break;
        default:
            action->failed = TRUE;
            crm_err("Unsupported action result: %d", op_status);
    }

    /* stop this event's timer if it had one */
    stop_te_timer(action->timer);
    te_action_confirmed(action);

    update_graph(transition_graph, action);
    trigger_graph();

    if (action->failed) {
        allow_fail = crm_meta_value(action->params, XML_ATTR_TE_ALLOWFAIL);
        if (crm_is_true(allow_fail)) {
            action->failed = FALSE;
        }
    }

    if (action->failed) {
        abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event);
    }

    this_event = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
    target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    crm_info("Action %s (%d) confirmed on %s (rc=%d)",
             crm_str(this_event), action->id, crm_str(target), op_status);

    /* determine if this action affects a remote-node's online/offline status */
    process_remote_node_action(action, event);
    return action->id;
}
Esempio n. 9
0
static gboolean
te_crm_command(crm_graph_t * graph, crm_action_t * action)
{
    char *counter = NULL;
    xmlNode *cmd = NULL;
    gboolean is_local = FALSE;

    const char *id = NULL;
    const char *task = NULL;
    const char *value = NULL;
    const char *on_node = NULL;
    const char *router_node = NULL;

    gboolean rc = TRUE;
    gboolean no_wait = FALSE;

    id = ID(action->xml);
    task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
    on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);

    if (!router_node) {
        router_node = on_node;
    }

    CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
              crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task));
              return FALSE);

    crm_info("Executing crm-event (%s): %s on %s%s%s",
             crm_str(id), crm_str(task), on_node,
             is_local ? " (local)" : "", no_wait ? " - no waiting" : "");

    if (safe_str_eq(router_node, fsa_our_uname)) {
        is_local = TRUE;
    }

    value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
    if (crm_is_true(value)) {
        no_wait = TRUE;
    }

    if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
        /* defer until everything else completes */
        crm_info("crm-event (%s) is a local shutdown", crm_str(id));
        graph->completion_action = tg_shutdown;
        graph->abort_reason = "local shutdown";
        te_action_confirmed(action);
        update_graph(graph, action);
        trigger_graph();
        return TRUE;

    } else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) {
        crm_node_t *peer = crm_get_peer(0, router_node);
        crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
    }

    cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);

    counter =
        generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
    crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);

    rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
    free(counter);
    free_xml(cmd);

    if (rc == FALSE) {
        crm_err("Action %d failed: send", action->id);
        return FALSE;

    } else if (no_wait) {
        te_action_confirmed(action);
        update_graph(graph, action);
        trigger_graph();

    } else {
        if (action->timeout <= 0) {
            crm_err("Action %d: %s on %s had an invalid timeout (%dms).  Using %dms instead",
                    action->id, task, on_node, action->timeout, graph->network_delay);
            action->timeout = graph->network_delay;
        }
        te_start_action_timer(graph, action);
    }

    return TRUE;
}
void
tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
    char *uuid = NULL;
    int stonith_id = -1;
    int transition_id = -1;
    crm_action_t *action = NULL;
    int call_id = data->call_id;
    int rc = data->rc;
    char *userdata = data->userdata;

    CRM_CHECK(userdata != NULL, return);
    crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
               pcmk_strerror(rc), rc);

    if (AM_I_DC == FALSE) {
        return;
    }

    /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
    /*       op->call_id, op->optype, op->node_name, op->op_result, */
    /*       (char *)op->node_list, op->private_data); */

    /* filter out old STONITH actions */
    CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
              goto bail);

    if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
        || transition_graph->id != transition_id) {
        crm_info("Ignoring STONITH action initiated outside of the current transition");
        goto bail;
    }

    action = get_action(stonith_id, FALSE);
    if (action == NULL) {
        crm_err("Stonith action not matched");
        goto bail;
    }

    stop_te_timer(action->timer);
    if (rc == pcmk_ok) {
        const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
        const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
        const char *op = crm_meta_value(action->params, "stonith_action"); 

        crm_info("Stonith operation %d for %s passed", call_id, target);
        if (action->confirmed == FALSE) {
            te_action_confirmed(action);
            if (safe_str_eq("on", op)) {
                const char *value = NULL;
                char *now = crm_itoa(time(NULL));

                update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
                free(now);

                value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
                update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);

                value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
                update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);

            } else if (action->sent_update == FALSE) {
                send_stonith_update(action, target, uuid);
                action->sent_update = TRUE;
            }
        }
        st_fail_count_reset(target);

    } else {
        const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
        enum transition_action abort_action = tg_restart;

        action->failed = TRUE;
        crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
                   call_id, target, pcmk_strerror(rc));

        /* If no fence devices were available, there's no use in immediately
         * checking again, so don't start a new transition in that case.
         */
        if (rc == -ENODEV) {
            crm_warn("No devices found in cluster to fence %s, giving up",
                     target);
            abort_action = tg_stop;
        }

        /* Increment the fail count now, so abort_for_stonith_failure() can
         * check it. Non-DC nodes will increment it in tengine_stonith_notify().
         */
        st_fail_count_increment(target);
        abort_for_stonith_failure(abort_action, target, NULL);
    }

    update_graph(transition_graph, action);
    trigger_graph();

  bail:
    free(userdata);
    free(uuid);
    return;
}
Esempio n. 11
0
void
te_update_diff(const char *event, xmlNode * msg)
{
    int rc = -EINVAL;
    int format = 1;
    xmlNode *change = NULL;
    const char *op = NULL;

    xmlNode *diff = NULL;

    int p_add[] = { 0, 0, 0 };
    int p_del[] = { 0, 0, 0 };

    CRM_CHECK(msg != NULL, return);
    crm_element_value_int(msg, F_CIB_RC, &rc);

    if (transition_graph == NULL) {
        crm_trace("No graph");
        return;

    } else if (rc < pcmk_ok) {
        crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
        return;

    } else if (transition_graph->complete == TRUE
               && fsa_state != S_IDLE
               && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) {
        crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
                  transition_graph->complete);
        return;
    }

    op = crm_element_value(msg, F_CIB_OPERATION);
    diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);

    xml_patch_versions(diff, p_add, p_del);
    crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
              p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
              fsa_state2string(fsa_state));

    crm_element_value_int(diff, "format", &format);
    switch(format) {
        case 1:
            te_legacy_update_diff(event, diff);
            return;
        case 2:
            /* Cool, we know what to do here */
            crm_log_xml_trace(diff, "Patch:Raw");
            break;
        default:
            crm_warn("Unknown patch format: %d", format);
            return;
    }

    for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) {
        const char *name = NULL;
        const char *op = crm_element_value(change, XML_DIFF_OP);
        const char *xpath = crm_element_value(change, XML_DIFF_PATH);
        xmlNode *match = NULL;
        const char *node = NULL;

        if(op == NULL) {
            continue;

        } else if(strcmp(op, "create") == 0) {
            match = change->children;

        } else if(strcmp(op, "move") == 0) {
            continue;

        } else if(strcmp(op, "modify") == 0) {
            match = first_named_child(change, XML_DIFF_RESULT);
            if(match) {
                match = match->children;
            }
        }

        if(match) {
            name = (const char *)match->name;
        }

        crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name);
        if(xpath == NULL) {
            /* Version field, ignore */

        } else if(strstr(xpath, "/cib/configuration")) {
            abort_transition(INFINITY, tg_restart, "Non-status change", change);
            break; /* Wont be packaged with any resource operations we may be waiting for */

        } else if(strstr(xpath, "/"XML_CIB_TAG_TICKETS) || safe_str_eq(name, XML_CIB_TAG_TICKETS)) {
            abort_transition(INFINITY, tg_restart, "Ticket attribute change", change);
            break; /* Wont be packaged with any resource operations we may be waiting for */

        } else if(strstr(xpath, "/"XML_TAG_TRANSIENT_NODEATTRS"[") || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) {
            abort_transition(INFINITY, tg_restart, "Transient attribute change", change);
            break; /* Wont be packaged with any resource operations we may be waiting for */

        } else if(strstr(xpath, "/"XML_LRM_TAG_RSC_OP"[") && safe_str_eq(op, "delete")) {
            crm_action_t *cancel = NULL;
            char *mutable_key = strdup(xpath);
            char *mutable_node = strdup(xpath);
            char *search = NULL;

            const char *key = NULL;
            const char *node_uuid = NULL;

            search = strrchr(mutable_key, '\'');
            search[0] = 0;

            key = strrchr(mutable_key, '\'') + 1;

            node_uuid = strstr(mutable_node, "node_state[@id=\'") + strlen("node_state[@id=\'");
            search = strchr(node_uuid, '\'');
            search[0] = 0;

            cancel = get_cancel_action(key, node_uuid);
            if (cancel == NULL) {
                abort_transition(INFINITY, tg_restart, "Resource operation removal", change);

            } else {
                crm_info("Cancellation of %s on %s confirmed (%d)", key, node_uuid, cancel->id);
                stop_te_timer(cancel->timer);
                te_action_confirmed(cancel);

                update_graph(transition_graph, cancel);
                trigger_graph();

            }
            free(mutable_node);
            free(mutable_key);

        } else if(strstr(xpath, "/"XML_CIB_TAG_LRM"[") && safe_str_eq(op, "delete")) {
            abort_transition(INFINITY, tg_restart, "Resource state removal", change);

        } else if(strstr(xpath, "/"XML_CIB_TAG_STATE"[") && safe_str_eq(op, "delete")) {
            abort_transition(INFINITY, tg_restart, "Node state removal", change);

        } else if(name == NULL) {
            crm_debug("No result for %s operation to %s", op, xpath);
            CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0);

        } else if(strcmp(name, XML_TAG_CIB) == 0) {
            xmlNode *state = NULL;
            xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS);
            xmlNode *config = first_named_child(match, XML_CIB_TAG_CONFIGURATION);

            for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
                xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);

                node = ID(state);
                process_resource_updates(node, lrm, change, op, xpath);
            }

            if(config) {
                abort_transition(INFINITY, tg_restart, "Non-status change", change);
            }

        } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) {
            xmlNode *state = NULL;

            for (state = __xml_first_child(match); state != NULL; state = __xml_next(state)) {
                xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);

                node = ID(state);
                process_resource_updates(node, lrm, change, op, xpath);
            }

        } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) {
            xmlNode *lrm = first_named_child(match, XML_CIB_TAG_LRM);

            node = ID(match);
            process_resource_updates(node, lrm, change, op, xpath);

        } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) {
            node = ID(match);
            process_resource_updates(node, match, change, op, xpath);

        } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
            char *local_node = get_node_from_xpath(xpath);

            process_resource_updates(local_node, match, change, op, xpath);
            free(local_node);

        } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {

            xmlNode *rsc_op;
            char *local_node = get_node_from_xpath(xpath);

            for (rsc_op = __xml_first_child(match); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
                process_graph_event(rsc_op, local_node);
            }
            free(local_node);

        } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
            char *local_node = get_node_from_xpath(xpath);

            process_graph_event(match, local_node);
            free(local_node);

        } else {
            crm_err("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
        }
    }
}