Пример #1
0
static gboolean
te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
{
    const char *target = NULL;
    const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);

    if (action->type != action_type_rsc) {
        /* No limit on these */
        return TRUE;
    }

    /* if we have a router node, this means the action is performing
     * on a remote node. For now, we count all action occuring on a
     * remote node against the job list on the cluster node hosting
     * the connection resources */
    target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);

    if ((target == NULL) &&
        (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {

        target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
        if(te_should_perform_action_on(graph, action, target) == FALSE) {
            return FALSE;
        }

        target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);

    } else if (target == NULL) {
        target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    }

    return te_should_perform_action_on(graph, action, target);
}
Пример #2
0
static gboolean
te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
{
    const char *target = NULL;
    const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);

    if (action->type != action_type_rsc) {
        /* No limit on these */
        return TRUE;
    }

    if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
        target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
        if(te_should_perform_action_on(graph, action, target) == FALSE) {
            return FALSE;
        }

        target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);

    } else {
        target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    }

    return te_should_perform_action_on(graph, action, target);
}
Пример #3
0
static void
te_update_job_count(crm_action_t * action, int offset)
{
    const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
    const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);

    if (action->type != action_type_rsc || target == NULL) {
        /* No limit on these */
        return;
    }

    /* if we have a router node, this means the action is performing
     * on a remote node. For now, we count all action occuring on a
     * remote node against the job list on the cluster node hosting
     * the connection resources */
    target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);

    if ((target == NULL) &&
        (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {

        const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
        const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);

        te_update_job_count_on(t1, offset, TRUE);
        te_update_job_count_on(t2, offset, TRUE);
        return;
    } else if (target == NULL) {
        target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    }

    te_update_job_count_on(target, offset, FALSE);
}
static int get_target_rc(crm_action_t *action) 
{
	const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC);

	if(target_rc_s != NULL) {
		return crm_parse_int(target_rc_s, "0");
	}
	return 0;
}
Пример #5
0
static void
te_update_job_count(crm_action_t * action, int offset)
{
    const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
    const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);

    if (action->type != action_type_rsc || target == NULL) {
        /* No limit on these */
        return;
    }

    if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
        const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
        const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);

        te_update_job_count_on(t1, offset, TRUE);
        te_update_job_count_on(t2, offset, TRUE);

    } else {

        te_update_job_count_on(target, offset, FALSE);
    }
}
Пример #6
0
static gboolean
te_fence_node(crm_graph_t * graph, crm_action_t * action)
{
    int rc = 0;
    const char *id = NULL;
    const char *uuid = NULL;
    const char *target = NULL;
    const char *type = NULL;
    gboolean invalid_action = FALSE;
    enum stonith_call_options options = st_opt_none;

    id = ID(action->xml);
    target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
    type = crm_meta_value(action->params, "stonith_action");

    CRM_CHECK(id != NULL, invalid_action = TRUE);
    CRM_CHECK(uuid != NULL, invalid_action = TRUE);
    CRM_CHECK(type != NULL, invalid_action = TRUE);
    CRM_CHECK(target != NULL, invalid_action = TRUE);

    if (invalid_action) {
        crm_log_xml_warn(action->xml, "BadAction");
        return FALSE;
    }

    crm_notice("Requesting fencing (%s) of node %s "
               CRM_XS " action=%s timeout=%d",
               type, target, id, transition_graph->stonith_timeout);

    /* Passing NULL means block until we can connect... */
    te_connect_stonith(NULL);

    if (crmd_join_phase_count(crm_join_confirmed) == 1) {
        options |= st_opt_allow_suicide;
    }

    rc = stonith_api->cmds->fence(stonith_api, options, target, type,
                                  transition_graph->stonith_timeout / 1000, 0);

    stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
                                         st_opt_timeout_updates,
                                         generate_transition_key(transition_graph->id, action->id,
                                                                 0, te_uuid),
                                         "tengine_stonith_callback", tengine_stonith_callback);

    return TRUE;
}
Пример #7
0
static gboolean
te_fence_node(crm_graph_t * graph, crm_action_t * action)
{
    int rc = 0;
    const char *id = NULL;
    const char *uuid = NULL;
    const char *target = NULL;
    const char *type = NULL;
    gboolean invalid_action = FALSE;
    enum stonith_call_options options = st_opt_none;

    id = ID(action->xml);
    target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
    type = crm_meta_value(action->params, "stonith_action");

    CRM_CHECK(id != NULL, invalid_action = TRUE);
    CRM_CHECK(uuid != NULL, invalid_action = TRUE);
    CRM_CHECK(type != NULL, invalid_action = TRUE);
    CRM_CHECK(target != NULL, invalid_action = TRUE);

    if (invalid_action) {
        crm_log_xml_warn(action->xml, "BadAction");
        return FALSE;
    }

    te_log_action(LOG_INFO,
                  "Executing %s fencing operation (%s) on %s (timeout=%d)",
                  type, id, target, transition_graph->stonith_timeout);

    /* Passing NULL means block until we can connect... */
    te_connect_stonith(NULL);

    if (finalized_nodes && g_hash_table_size(finalized_nodes) == 1) {
        options |= st_opt_allow_suicide;
    }

    rc = stonith_api->cmds->fence(stonith_api, options, target, type,
                                  transition_graph->stonith_timeout / 1000);

    stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
                                         FALSE, generate_transition_key(transition_graph->id,
                                                                        action->id, 0, te_uuid),
                                         "tengine_stonith_callback", tengine_stonith_callback);

    return TRUE;
}
Пример #8
0
static gboolean
te_rsc_command(crm_graph_t * graph, crm_action_t * action)
{
    /* never overwrite stop actions in the CIB with
     *   anything other than completed results
     *
     * Writing pending stops makes it look like the
     *   resource is running again
     */
    xmlNode *cmd = NULL;
    xmlNode *rsc_op = NULL;

    gboolean rc = TRUE;
    gboolean no_wait = FALSE;
    gboolean is_local = FALSE;

    char *counter = NULL;
    const char *task = NULL;
    const char *value = NULL;
    const char *on_node = NULL;
    const char *task_uuid = NULL;

    CRM_ASSERT(action != NULL);
    CRM_ASSERT(action->xml != NULL);

    action->executed = FALSE;
    on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);

    CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
              te_log_action(LOG_ERR, "Corrupted command(id=%s) %s: no node",
                            ID(action->xml), crm_str(task));
              return FALSE);

    rsc_op = action->xml;
    task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
    task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
    on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET);
    counter =
        generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
    crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);

    if (safe_str_eq(on_node, fsa_our_uname)) {
        is_local = TRUE;
    }

    value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
    if (crm_is_true(value)) {
        no_wait = TRUE;
    }

    crm_info("Initiating action %d: %s %s on %s%s%s",
             action->id, task, task_uuid, on_node,
             is_local ? " (local)" : "", no_wait ? " - no waiting" : "");

    cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node,
                         CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);

    if (is_local) {
        /* shortcut local resource commands */
        ha_msg_input_t data = {
            .msg = cmd,
            .xml = rsc_op,
        };

        fsa_data_t msg = {
            .id = 0,
            .data = &data,
            .data_type = fsa_dt_ha_msg,
            .fsa_input = I_NULL,
            .fsa_cause = C_FSA_INTERNAL,
            .actions = A_LRM_INVOKE,
            .origin = __FUNCTION__,
        };

        do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);

    } else {
        rc = send_cluster_message(on_node, crm_msg_lrmd, cmd, TRUE);
    }

    crm_free(counter);
    free_xml(cmd);

    action->executed = TRUE;
    if (rc == FALSE) {
        crm_err("Action %d failed: send", action->id);
        return FALSE;

    } else if (no_wait) {
        action->confirmed = TRUE;
        update_graph(transition_graph, action);
        trigger_graph();

    } else {
        if (action->timeout <= 0) {
            crm_err("Action %d: %s %s on %s had an invalid timeout (%dms).  Using %dms instead",
                    action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
            action->timeout = graph->network_delay;
        }
        te_start_action_timer(graph, action);
    }

    value = crm_meta_value(action->params, XML_OP_ATTR_PENDING);
    if (crm_is_true(value)) {
        /* write a "pending" entry to the CIB, inhibit notification */
        crm_info("Recording pending op %s in the CIB", task_uuid);
        cib_action_update(action, LRM_OP_PENDING, EXECRA_STATUS_UNKNOWN);
    }

    return TRUE;
}
Пример #9
0
static gboolean
te_crm_command(crm_graph_t * graph, crm_action_t * action)
{
    char *counter = NULL;
    xmlNode *cmd = NULL;
    gboolean is_local = FALSE;

    const char *id = NULL;
    const char *task = NULL;
    const char *value = NULL;
    const char *on_node = NULL;

    gboolean rc = TRUE;
    gboolean no_wait = FALSE;

    id = ID(action->xml);
    task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
    on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);

    CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
              te_log_action(LOG_ERR, "Corrupted command (id=%s) %s: no node",
                            crm_str(id), crm_str(task));
              return FALSE);

    te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s%s%s",
                  crm_str(id), crm_str(task), on_node,
                  is_local ? " (local)" : "", no_wait ? " - no waiting" : "");

    if (safe_str_eq(on_node, fsa_our_uname)) {
        is_local = TRUE;
    }

    value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
    if (crm_is_true(value)) {
        no_wait = TRUE;
    }

    if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
        /* defer until everything else completes */
        te_log_action(LOG_INFO, "crm-event (%s) is a local shutdown", crm_str(id));
        graph->completion_action = tg_shutdown;
        graph->abort_reason = "local shutdown";
        action->confirmed = TRUE;
        update_graph(graph, action);
        trigger_graph();
        return TRUE;
    }

    cmd = create_request(task, action->xml, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);

    counter =
        generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
    crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);

    rc = send_cluster_message(on_node, crm_msg_crmd, cmd, TRUE);
    crm_free(counter);
    free_xml(cmd);

    if (rc == FALSE) {
        crm_err("Action %d failed: send", action->id);
        return FALSE;

    } else if (no_wait) {
        action->confirmed = TRUE;
        update_graph(graph, action);
        trigger_graph();

    } else {
        if (action->timeout <= 0) {
            crm_err("Action %d: %s on %s had an invalid timeout (%dms).  Using %dms instead",
                    action->id, task, on_node, action->timeout, graph->network_delay);
            action->timeout = graph->network_delay;
        }
        te_start_action_timer(graph, action);
    }

    return TRUE;
}
Пример #10
0
/*
 * returns the ID of the action if a match is found
 * returns -1 if a match was not found
 * returns -2 if a match was found but the action failed (and was
 *            not allowed to)
 */
int
match_graph_event(int action_id, xmlNode * event, const char *event_node,
                  int op_status, int op_rc, int target_rc)
{
    const char *target = NULL;
    const char *allow_fail = NULL;
    const char *this_event = NULL;
    crm_action_t *action = NULL;

    action = get_action(action_id, FALSE);
    if (action == NULL) {
        return -1;
    }

    op_status = status_from_rc(action, op_status, op_rc, target_rc);
    if (op_status != PCMK_LRM_OP_DONE) {
        update_failcount(event, event_node, op_rc, target_rc, FALSE);
    }

    /* Process OP status */
    switch (op_status) {
        case PCMK_LRM_OP_PENDING:
            crm_debug("Ignoring pending operation");
            return action->id;
            break;
        case PCMK_LRM_OP_DONE:
            break;
        case PCMK_LRM_OP_ERROR:
        case PCMK_LRM_OP_TIMEOUT:
        case PCMK_LRM_OP_NOTSUPPORTED:
            action->failed = TRUE;
            break;
        case PCMK_LRM_OP_CANCELLED:
            /* do nothing?? */
            crm_err("Dont know what to do for cancelled ops yet");
            break;
        default:
            action->failed = TRUE;
            crm_err("Unsupported action result: %d", op_status);
    }

    /* stop this event's timer if it had one */
    stop_te_timer(action->timer);
    te_action_confirmed(action);

    update_graph(transition_graph, action);
    trigger_graph();

    if (action->failed) {
        allow_fail = crm_meta_value(action->params, XML_ATTR_TE_ALLOWFAIL);
        if (crm_is_true(allow_fail)) {
            action->failed = FALSE;
        }
    }

    if (action->failed) {
        abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event);
    }

    this_event = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
    target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
    crm_info("Action %s (%d) confirmed on %s (rc=%d)",
             crm_str(this_event), action->id, crm_str(target), op_status);

    /* determine if this action affects a remote-node's online/offline status */
    process_remote_node_action(action, event);
    return action->id;
}
Пример #11
0
gboolean
process_graph_event(xmlNode * event, const char *event_node)
{
    int rc = -1;
    int status = -1;
    int callid = -1;

    int action_num = -1;
    crm_action_t *action = NULL;

    int target_rc = -1;
    int transition_num = -1;
    char *update_te_uuid = NULL;

    gboolean stop_early = FALSE;
    gboolean ignore_failures = FALSE;
    const char *id = NULL;
    const char *desc = NULL;
    const char *magic = NULL;

    CRM_ASSERT(event != NULL);

/*
<lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1355361636" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
*/

    id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
    crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
    crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
    crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);

    magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
    if (magic == NULL) {
        /* non-change */
        return FALSE;
    }

    if (decode_transition_key(magic, &update_te_uuid, &transition_num,
                              &action_num, &target_rc) == FALSE) {
        crm_err("Invalid event %s.%d detected: %s", id, callid, magic);
        abort_transition(INFINITY, tg_restart, "Bad event", event);
        return FALSE;
    }

    if (status == PCMK_LRM_OP_PENDING) {
        goto bail;
    }

    if (transition_num == -1) {
        desc = "initiated outside of the cluster";
        abort_transition(INFINITY, tg_restart, "Unexpected event", event);

    } else if ((action_num < 0) || (crm_str_eq(update_te_uuid, te_uuid, TRUE) == FALSE)) {
        desc = "initiated by a different node";
        abort_transition(INFINITY, tg_restart, "Foreign event", event);
        stop_early = TRUE;      /* This could be an lrm status refresh */

    } else if (transition_graph->id != transition_num) {
        desc = "arrived really late";
        abort_transition(INFINITY, tg_restart, "Old event", event);
        stop_early = TRUE;      /* This could be an lrm status refresh */

    } else if (transition_graph->complete) {
        desc = "arrived late";
        abort_transition(INFINITY, tg_restart, "Inactive graph", event);

    } else {
        action = get_action(action_num, FALSE);

        if (action == NULL) {
            desc = "unknown";
            abort_transition(INFINITY, tg_restart, "Unknown event", event);

        } else {
            ignore_failures = safe_str_eq(
                crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore");
            match_graph_event(action, event, status, rc, target_rc, ignore_failures);
        }
    }

    if (action && (rc == target_rc)) {
        crm_trace("Processed update to %s: %s", id, magic);
    } else {
        if (update_failcount(event, event_node, rc, target_rc,
                             (transition_num == -1), ignore_failures)) {
            /* Turns out this wasn't an lrm status refresh update afterall */
            stop_early = FALSE;
            desc = "failed";
        }
        crm_info("Detected action (%d.%d) %s.%d=%s: %s", transition_num,
                 action_num, id, callid, services_ocf_exitcode_str(rc), desc);
    }

  bail:
    free(update_te_uuid);
    return stop_early;
}
static gboolean
te_fence_node(crm_graph_t *graph, crm_action_t *action)
{
	const char *id = NULL;
	const char *uuid = NULL;
	const char *target = NULL;
	const char *type = NULL;
	stonith_ops_t * st_op = NULL;
	
	id = ID(action->xml);
	target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
	uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
	type = crm_meta_value(action->params, "stonith_action");
	
	CRM_CHECK(id != NULL,
		  crm_log_xml_warn(action->xml, "BadAction");
		  return FALSE);
	CRM_CHECK(uuid != NULL,
		  crm_log_xml_warn(action->xml, "BadAction");
		  return FALSE);
	CRM_CHECK(type != NULL,
		  crm_log_xml_warn(action->xml, "BadAction");
		  return FALSE);
	CRM_CHECK(target != NULL,
		  crm_log_xml_warn(action->xml, "BadAction");
		  return FALSE);

	te_log_action(LOG_INFO,
		      "Executing %s fencing operation (%s) on %s (timeout=%d)",
		      type, id, target, transition_graph->stonith_timeout);

	/* Passing NULL means block until we can connect... */
	te_connect_stonith(NULL);
	
	crm_malloc0(st_op, sizeof(stonith_ops_t));
	if(safe_str_eq(type, "poweroff")) {
		st_op->optype = POWEROFF;
	} else {
		st_op->optype = RESET;
	}
	
	st_op->timeout = transition_graph->stonith_timeout;
	st_op->node_name = crm_strdup(target);
	st_op->node_uuid = crm_strdup(uuid);
	
	st_op->private_data = generate_transition_key(
	    transition_graph->id, action->id, 0, te_uuid);
	
	CRM_ASSERT(stonithd_input_IPC_channel() != NULL);
		
	if (ST_OK != stonithd_node_fence( st_op )) {
		crm_err("Cannot fence %s: stonithd_node_fence() call failed ",
			target);
	}

	crm_free(st_op->node_name);
	crm_free(st_op->node_uuid);
	crm_free(st_op->private_data);
	crm_free(st_op);
	return TRUE;
}
Пример #13
0
void
tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
    char *uuid = NULL;
    int stonith_id = -1;
    int transition_id = -1;
    crm_action_t *action = NULL;
    int call_id = data->call_id;
    int rc = data->rc;
    char *userdata = data->userdata;

    CRM_CHECK(userdata != NULL, return);
    crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
               pcmk_strerror(rc), rc);

    if (AM_I_DC == FALSE) {
        return;
    }

    /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
    /*       op->call_id, op->optype, op->node_name, op->op_result, */
    /*       (char *)op->node_list, op->private_data); */

    /* filter out old STONITH actions */
    CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
              goto bail);

    if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
        || transition_graph->id != transition_id) {
        crm_info("Ignoring STONITH action initiated outside of the current transition");
        goto bail;
    }

    action = get_action(stonith_id, FALSE);
    if (action == NULL) {
        crm_err("Stonith action not matched");
        goto bail;
    }

    stop_te_timer(action->timer);
    if (rc == pcmk_ok) {
        const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
        const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
        const char *op = crm_meta_value(action->params, "stonith_action"); 

        crm_info("Stonith operation %d for %s passed", call_id, target);
        if (action->confirmed == FALSE) {
            te_action_confirmed(action);
            if (safe_str_eq("on", op)) {
                const char *value = NULL;
                char *now = crm_itoa(time(NULL));

                update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
                free(now);

                value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
                update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);

                value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
                update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);

            } else if (action->sent_update == FALSE) {
                send_stonith_update(action, target, uuid);
                action->sent_update = TRUE;
            }
        }
        st_fail_count_reset(target);

    } else {
        const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
        enum transition_action abort_action = tg_restart;

        action->failed = TRUE;
        crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
                   call_id, target, pcmk_strerror(rc));

        /* If no fence devices were available, there's no use in immediately
         * checking again, so don't start a new transition in that case.
         */
        if (rc == -ENODEV) {
            crm_warn("No devices found in cluster to fence %s, giving up",
                     target);
            abort_action = tg_stop;
        }

        /* Increment the fail count now, so abort_for_stonith_failure() can
         * check it. Non-DC nodes will increment it in tengine_stonith_notify().
         */
        st_fail_count_increment(target);
        abort_for_stonith_failure(abort_action, target, NULL);
    }

    update_graph(transition_graph, action);
    trigger_graph();

  bail:
    free(userdata);
    free(uuid);
    return;
}
Пример #14
0
static gboolean
te_rsc_command(crm_graph_t * graph, crm_action_t * action)
{
    /* never overwrite stop actions in the CIB with
     *   anything other than completed results
     *
     * Writing pending stops makes it look like the
     *   resource is running again
     */
    xmlNode *cmd = NULL;
    xmlNode *rsc_op = NULL;

    gboolean rc = TRUE;
    gboolean no_wait = FALSE;
    gboolean is_local = FALSE;

    char *counter = NULL;
    const char *task = NULL;
    const char *value = NULL;
    const char *on_node = NULL;
    const char *router_node = NULL;
    const char *task_uuid = NULL;

    CRM_ASSERT(action != NULL);
    CRM_ASSERT(action->xml != NULL);

    action->executed = FALSE;
    on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);

    CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
              crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task));
              return FALSE);

    rsc_op = action->xml;
    task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
    task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
    router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);

    if (!router_node) {
        router_node = on_node;
    }

    counter =
        generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
    crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);

    if (safe_str_eq(router_node, fsa_our_uname)) {
        is_local = TRUE;
    }

    value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
    if (crm_is_true(value)) {
        no_wait = TRUE;
    }

    crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
               task, task_uuid, (is_local? " locally" : ""), on_node,
               (no_wait? " without waiting" : ""), action->id);

    cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
                         CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);

    if (is_local) {
        /* shortcut local resource commands */
        ha_msg_input_t data = {
            .msg = cmd,
            .xml = rsc_op,
        };

        fsa_data_t msg = {
            .id = 0,
            .data = &data,
            .data_type = fsa_dt_ha_msg,
            .fsa_input = I_NULL,
            .fsa_cause = C_FSA_INTERNAL,
            .actions = A_LRM_INVOKE,
            .origin = __FUNCTION__,
        };

        do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);

    } else {
        rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
    }

    free(counter);
    free_xml(cmd);

    action->executed = TRUE;

    if (rc == FALSE) {
        crm_err("Action %d failed: send", action->id);
        return FALSE;

    } else if (no_wait) {
        crm_info("Action %d confirmed - no wait", action->id);
        action->confirmed = TRUE; /* Just mark confirmed.
                                   * Don't bump the job count only to immediately decrement it
                                   */
        update_graph(transition_graph, action);
        trigger_graph();

    } else if (action->confirmed == TRUE) {
        crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
                  action->id, task, task_uuid, on_node, action->timeout);
    } else {
        if (action->timeout <= 0) {
            crm_err("Action %d: %s %s on %s had an invalid timeout (%dms).  Using %dms instead",
                    action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
            action->timeout = graph->network_delay;
        }
        te_update_job_count(action, 1);
        te_start_action_timer(graph, action);
    }

    return TRUE;
}
Пример #15
0
static gboolean
exec_rsc_action(crm_graph_t *graph, crm_action_t *action)
{
	lrm_op_t *op = NULL;
	struct pe_operation *pe_op;
	const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC);
	xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
	char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
	char *uuid;
	const char *tmp_provider;
	xmlNode *params_all;

	qb_enter();

	if (safe_str_eq(crm_element_value(action->xml, "operation"), "probe_complete")) {
		crm_free(node);
		action->confirmed = TRUE;
		update_graph(graph, action);
		graph_updated = TRUE;
		qb_leave();
		return TRUE;
	}

	if (action_rsc == NULL) {
		crm_log_xml_err(action->xml, "Bad");
		crm_free(node);
		qb_leave();
		return FALSE;
	}
	uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID);

	pe_op = calloc(1, sizeof(struct pe_operation));
	pe_op->refcount = 1;
	pe_op->hostname = node;
	pe_op->node_uuid = uuid;
	pe_op->user_data = run_user_data;
	pe_op->rname = strdup(ID(action_rsc));
	pe_op->rclass = strdup(crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS));
	tmp_provider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER);
	if (tmp_provider) {
		pe_op->rprovider = strdup(tmp_provider);
	}
	pe_op->rtype = strdup(crm_element_value(action_rsc, XML_ATTR_TYPE));

	if (target_rc_s != NULL) {
		pe_op->target_outcome = crm_parse_int(target_rc_s, "0");
	}
	op = convert_graph_action(NULL, action, 0, pe_op->target_outcome);

	params_all = create_xml_node(NULL, XML_TAG_PARAMS);
	g_hash_table_foreach(op->params, hash2field, params_all);
/*
 * TODO at some point.
	g_hash_table_foreach(action->extra, hash2field, params_all);
	g_hash_table_foreach(rsc->parameters, hash2field, params_all);
	g_hash_table_foreach(action->meta, hash2metafield, params_all);
*/
	filter_action_parameters(params_all, PE_CRM_VERSION);
	pe_op->op_digest = calculate_operation_digest(params_all, PE_CRM_VERSION);

	pe_op->method = strdup(op->op_type);

	pe_op->params = qb_skiplist_create();
	if (op->params != NULL) {
		g_hash_table_foreach(op->params, dup_attr, pe_op->params);
	}

	pe_op->interval = op->interval;
	pe_op->timeout = op->timeout;
	if (pe_op->timeout == 0) {
		if (pe_op->interval == 0) {
			pe_op->timeout = PE_DEFAULT_TIMEOUT;
		} else {
			pe_op->timeout = pe_op->interval / 2;
		}
	}
	pe_op->action = action;
	pe_op->graph = graph;
	pe_op->action_id = action->id;
	pe_op->graph_id = graph->id;
	pe_op->time_execed = qb_util_stopwatch_create();

	free_lrm_op(op);
	free_xml(params_all);

	run_fn(pe_op);

	qb_leave();

	return TRUE;
}