Beispiel #1
0
void
register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
                       fsa_data_t * cur_data, void *new_data, const char *raised_from)
{
    /* save the current actions if any */
    if (fsa_actions != A_NOTHING) {
        register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
                               I_NULL, cur_data ? cur_data->data : NULL,
                               fsa_actions, TRUE, __FUNCTION__);
    }

    /* reset the action list */
    fsa_actions = A_NOTHING;

    /* register the error */
    register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
}
Beispiel #2
0
enum crmd_fsa_input
handle_request(xmlNode * stored_msg)
{
    xmlNode *msg = NULL;
    const char *op = crm_element_value(stored_msg, F_CRM_TASK);

    /* Optimize this for the DC - it has the most to do */

    if (op == NULL) {
        crm_log_xml_err(stored_msg, "Bad message");
        return I_NULL;
    }

    /*========== DC-Only Actions ==========*/
    if (AM_I_DC) {
        if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
            return I_NODE_JOIN;

        } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
            return I_JOIN_REQUEST;

        } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
            return I_JOIN_RESULT;

        } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
            const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
            gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);

            if (is_set(fsa_input_register, R_SHUTDOWN)) {
                crm_info("Shutting ourselves down (DC)");
                return I_STOP;

            } else if (dc_match) {
                crm_err("We didnt ask to be shut down, yet our"
                        " TE is telling us too." " Better get out now!");
                return I_TERMINATE;

            } else if (fsa_state != S_STOPPING) {
                crm_err("Another node is asking us to shutdown" " but we think we're ok.");
                return I_ELECTION;
            }

        } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
            /* a slave wants to shut down */
            /* create cib fragment and add to message */
            return handle_shutdown_request(stored_msg);
        }
    }

    /*========== common actions ==========*/
    if (strcmp(op, CRM_OP_NOVOTE) == 0) {
        ha_msg_input_t fsa_input;

        fsa_input.msg = stored_msg;
        register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
                               A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);

    } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
        return handle_failcount_op(stored_msg);

    } else if (strcmp(op, CRM_OP_VOTE) == 0) {
        /* count the vote and decide what to do after that */
        ha_msg_input_t fsa_input;

        fsa_input.msg = stored_msg;
        register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
                               A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);

        /* Sometimes we _must_ go into S_ELECTION */
        if (fsa_state == S_HALT) {
            crm_debug("Forcing an election from S_HALT");
            return I_ELECTION;
#if 0
        } else if (AM_I_DC) {
            /* This is the old way of doing things but what is gained? */
            return I_ELECTION;
#endif
        }

    } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
        crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
        return I_JOIN_OFFER;

    } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
        crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
        return I_JOIN_RESULT;

    } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0
               || strcmp(op, CRM_OP_LRM_FAIL) == 0
               || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) {

        crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
        return I_ROUTER;

    } else if (strcmp(op, CRM_OP_NOOP) == 0) {
        return I_NULL;

    } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {

        crm_shutdown(SIGTERM);
        /*return I_SHUTDOWN; */
        return I_NULL;

        /*========== (NOT_DC)-Only Actions ==========*/
    } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) {

        const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
        gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);

        if (dc_match || fsa_our_dc == NULL) {
            if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) {
                crm_err("We didn't ask to be shut down, yet our" " DC is telling us too.");
                set_bit(fsa_input_register, R_STAYDOWN);
                return I_STOP;
            }
            crm_info("Shutting down");
            return I_STOP;

        } else {
            crm_warn("Discarding %s op from %s", op, host_from);
        }

    } else if (strcmp(op, CRM_OP_PING) == 0) {
        /* eventually do some stuff to figure out
         * if we /are/ ok
         */
        const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO);
        xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING);

        crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
        crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to);
        crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state));

        /* Ok, so technically not so interesting, but CTS needs to see this */
        crm_notice("Current ping state: %s", fsa_state2string(fsa_state));

        msg = create_reply(stored_msg, ping);
        relay_message(msg, TRUE);

        free_xml(ping);
        free_xml(msg);

    } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
        int id = 0;
        const char *name = NULL;
        xmlNode *options = get_xpath_object("//" XML_TAG_OPTIONS, stored_msg, LOG_ERR);

        if (options) {
            crm_element_value_int(options, XML_ATTR_ID, &id);
            name = crm_element_value(options, XML_ATTR_UNAME);
        }

        reap_crm_member(id, name);

    } else {
        crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
        crm_log_xml_err(stored_msg, "Unexpected");
    }

    return I_NULL;
}
Beispiel #3
0
enum crmd_fsa_input
handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)
{
    xmlNode *msg = NULL;
    const char *op = crm_element_value(stored_msg, F_CRM_TASK);

    /* Optimize this for the DC - it has the most to do */

    if (op == NULL) {
        crm_log_xml_err(stored_msg, "Bad message");
        return I_NULL;
    }

    if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
        const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
        crm_node_t *node = crm_find_peer(0, from);

        crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN);
        if(AM_I_DC == FALSE) {
            return I_NULL; /* Done */
        }
    }

    /*========== DC-Only Actions ==========*/
    if (AM_I_DC) {
        if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
            return I_NODE_JOIN;

        } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
            return I_JOIN_REQUEST;

        } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
            return I_JOIN_RESULT;

        } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
            const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
            gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);

            if (is_set(fsa_input_register, R_SHUTDOWN)) {
                crm_info("Shutting ourselves down (DC)");
                return I_STOP;

            } else if (dc_match) {
                crm_err("We didn't ask to be shut down, yet our"
                        " TE is telling us to. Better get out now!");
                return I_TERMINATE;

            } else if (fsa_state != S_STOPPING) {
                crm_err("Another node is asking us to shutdown" " but we think we're ok.");
                return I_ELECTION;
            }

        } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
            /* a slave wants to shut down */
            /* create cib fragment and add to message */
            return handle_shutdown_request(stored_msg);
        }
    }

    /*========== common actions ==========*/
    if (strcmp(op, CRM_OP_NOVOTE) == 0) {
        ha_msg_input_t fsa_input;

        fsa_input.msg = stored_msg;
        register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
                               A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);

    } else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
        throttle_update(stored_msg);
        return I_NULL;

    } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
        return handle_failcount_op(stored_msg);

    } else if (strcmp(op, CRM_OP_VOTE) == 0) {
        /* count the vote and decide what to do after that */
        ha_msg_input_t fsa_input;

        fsa_input.msg = stored_msg;
        register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
                               A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);

        /* Sometimes we _must_ go into S_ELECTION */
        if (fsa_state == S_HALT) {
            crm_debug("Forcing an election from S_HALT");
            return I_ELECTION;
#if 0
        } else if (AM_I_DC) {
            /* This is the old way of doing things but what is gained? */
            return I_ELECTION;
#endif
        }

    } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
        crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
        return I_JOIN_OFFER;

    } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
        crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
        return I_JOIN_RESULT;

    } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0
               || strcmp(op, CRM_OP_LRM_FAIL) == 0
               || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) {

        crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
        return I_ROUTER;

    } else if (strcmp(op, CRM_OP_NOOP) == 0) {
        return I_NULL;

    } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {

        crm_shutdown(SIGTERM);
        /*return I_SHUTDOWN; */
        return I_NULL;

        /*========== (NOT_DC)-Only Actions ==========*/
    } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) {

        const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
        gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);

        if (dc_match || fsa_our_dc == NULL) {
            if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) {
                crm_err("We didn't ask to be shut down, yet our DC is telling us to.");
                set_bit(fsa_input_register, R_STAYDOWN);
                return I_STOP;
            }
            crm_info("Shutting down");
            return I_STOP;

        } else {
            crm_warn("Discarding %s op from %s", op, host_from);
        }

    } else if (strcmp(op, CRM_OP_PING) == 0) {
        /* eventually do some stuff to figure out
         * if we /are/ ok
         */
        const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO);
        xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING);

        crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
        crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to);
        crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state));

        /* Ok, so technically not so interesting, but CTS needs to see this */
        crm_notice("Current ping state: %s", fsa_state2string(fsa_state));

        msg = create_reply(stored_msg, ping);
        if (msg) {
            (void)relay_message(msg, TRUE);
        }

        free_xml(ping);
        free_xml(msg);

    } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
        int id = 0;
        const char *name = NULL;

        crm_element_value_int(stored_msg, XML_ATTR_ID, &id);
        name = crm_element_value(stored_msg, XML_ATTR_UNAME);

        if(cause == C_IPC_MESSAGE) {
            msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
            if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
                crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
            } else {
                crm_notice("Instructing peers to remove references to node %s/%u", name, id);
            }
            free_xml(msg);

        } else {

            reap_crm_member(id, name);
            if(attrd_ipc) {
                int rc = 0;
                xmlNode *update = create_xml_node(NULL, __FUNCTION__);

                crm_xml_add(update, F_TYPE, T_ATTRD);
                crm_xml_add(update, F_ORIG, crm_system_name);

                crm_xml_add(update, F_ATTRD_TASK, "peer-remove");
                crm_xml_add(update, F_ATTRD_HOST, name);
                crm_xml_add_int(update, F_ATTRD_HOST_ID, id);

                rc = crm_ipc_send(attrd_ipc, update, crm_ipc_client_response, 0, NULL);
                if (rc > 0) {
                    rc = pcmk_ok;
                }

                crm_debug("Peer cache cleanup for %s (%d): %s (%d)", name, id, pcmk_strerror(rc), rc);
                free_xml(update);
            }
        }

    } else {
        crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
        crm_log_xml_err(stored_msg, "Unexpected");
    }

    return I_NULL;
}
Beispiel #4
0
/* aka. this is notification that we have (or have not) been accepted */
void
do_cl_join_finalize_respond(long long action,
                            enum crmd_fsa_cause cause,
                            enum crmd_fsa_state cur_state,
                            enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    xmlNode *tmp1 = NULL;
    gboolean was_nack = TRUE;
    static gboolean first_join = TRUE;
    ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
    const char *start_state = daemon_option("node_start_state");

    int join_id = -1;
    const char *op = crm_element_value(input->msg, F_CRM_TASK);
    const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK);
    const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);

    if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) {
        crm_trace("Ignoring op=%s message", op);
        return;
    }

    /* calculate if it was an ack or a nack */
    if (crm_is_true(ack_nack)) {
        was_nack = FALSE;
    }

    crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);

    if (was_nack) {
        crm_err("Shutting down because cluster join with leader %s failed "
                CRM_XS" join-%d NACK'd", welcome_from, join_id);
        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
        return;
    }

    if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) {
        crm_warn("Discarding our own welcome - we're no longer the DC");
        return;
    }

    if (update_dc(input->msg) == FALSE) {
        crm_warn("Discarding %s from node %s (expected from %s)",
                 op, welcome_from, fsa_our_dc);
        return;
    }

    update_dc_expected(input->msg);

    /* send our status section to the DC */
    tmp1 = do_lrm_query(TRUE, fsa_our_uname);
    if (tmp1 != NULL) {
        xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
                                        CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);

        crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);

        crm_debug("Confirming join-%d: sending local operation history to %s",
                  join_id, fsa_our_dc);

        /*
         * If this is the node's first join since the crmd started on it, clear
         * any previous transient node attributes, to handle the case where
         * the node restarted so quickly that the cluster layer didn't notice.
         *
         * Do not remove the resources though, they'll be cleaned up in
         * do_dc_join_ack(). Removing them here creates a race condition if the
         * crmd is being recovered. Instead of a list of active resources from
         * the lrmd, we may end up with a blank status section. If we are _NOT_
         * lucky, we will probe for the "wrong" instance of anonymous clones and
         * end up with multiple active instances on the machine.
         */
        if (first_join && is_not_set(fsa_input_register, R_SHUTDOWN)) {
            first_join = FALSE;
            erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0);
            update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE);
            update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE);

            if (start_state) {
                set_join_state(start_state);
            }
        }

        send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
        free_xml(reply);

        if (AM_I_DC == FALSE) {
            register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__);
            update_attrd(NULL, NULL, NULL, NULL, FALSE);
        }

        free_xml(tmp1);

    } else {
        crm_err("Could not confirm join-%d with %s: Local operation history failed",
                join_id, fsa_our_dc);
        register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
    }
}
Beispiel #5
0
/* aka. this is notification that we have (or have not) been accepted */
void
do_cl_join_finalize_respond(long long action,
                            enum crmd_fsa_cause cause,
                            enum crmd_fsa_state cur_state,
                            enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    xmlNode *tmp1 = NULL;
    gboolean was_nack = TRUE;
    static gboolean first_join = TRUE;
    ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);

    int join_id = -1;
    const char *op = crm_element_value(input->msg, F_CRM_TASK);
    const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK);
    const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);

    if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) {
        crm_trace("Ignoring op=%s message", op);
        return;
    }

    /* calculate if it was an ack or a nack */
    if (crm_is_true(ack_nack)) {
        was_nack = FALSE;
    }

    crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);

    if (was_nack) {
        crm_err("Join (join-%d) with leader %s failed (NACK'd): Shutting down",
                join_id, welcome_from);
        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
        return;
    }

    if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) {
        crm_warn("Discarding our own welcome - we're no longer the DC");
        return;
    }

    if (update_dc(input->msg) == FALSE) {
        crm_warn("Discarding %s from %s (expected %s)", op, welcome_from, fsa_our_dc);
        return;
    }

    /* send our status section to the DC */
    crm_debug("Confirming join join-%d: %s", join_id, crm_element_value(input->msg, F_CRM_TASK));
    tmp1 = do_lrm_query(TRUE, fsa_our_uname);
    if (tmp1 != NULL) {
        xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
                                        CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);

        crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);

        crm_debug("join-%d: Join complete."
                  "  Sending local LRM status to %s", join_id, fsa_our_dc);

        if (first_join) {
            first_join = FALSE;

            /*
             * Clear any previous transient node attribute and lrm operations
             *
             * Corosync has a nasty habit of not being able to tell if a
             *   node is returning or didn't leave in the first place.
             * This confuses Pacemaker because it never gets a "node up"
             *   event which is normally used to clean up the status section.
             *
             * Do not remove the resources though, they'll be cleaned up in
             *   do_dc_join_ack().  Removing them here creates a race
             *   condition if the crmd is being recovered.
             * Instead of a list of active resources from the lrmd
             *   we may end up with a blank status section.
             * If we are _NOT_ lucky, we will probe for the "wrong" instance
             *   of anonymous clones and end up with multiple active
             *   instances on the machine.
             */
            erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0);

            /* Just in case attrd was still around too */
            if (is_not_set(fsa_input_register, R_SHUTDOWN)) {
                update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE);
                update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE);
            }
        }

        send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
        free_xml(reply);

        if (AM_I_DC == FALSE) {
            register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__);
            update_attrd(NULL, NULL, NULL, NULL, FALSE);
        }

        free_xml(tmp1);

    } else {
        crm_err("Could not send our LRM state to the DC");
        register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
    }
}