Exemplo n.º 1
0
/*	A_ELECTION_COUNT	*/
void
do_election_count_vote(long long action,
                       enum crmd_fsa_cause cause,
                       enum crmd_fsa_state cur_state,
                       enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    struct timeval your_age;
    int age;
    int election_id = -1;
    int log_level = LOG_INFO;
    gboolean use_born_on = FALSE;
    gboolean done = FALSE;
    gboolean we_loose = FALSE;
    const char *op = NULL;
    const char *vote_from = NULL;
    const char *your_version = NULL;
    const char *election_owner = NULL;
    const char *reason = "unknown";
    crm_node_t *our_node = NULL, *your_node = NULL;
    ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);

    static time_t last_election_loss = 0;

    /* if the membership copy is NULL we REALLY shouldnt be voting
     * the question is how we managed to get here.
     */

    CRM_CHECK(msg_data != NULL, return);
    CRM_CHECK(crm_peer_cache != NULL, return);
    CRM_CHECK(vote != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
    CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return);

    your_age.tv_sec = 0;
    your_age.tv_usec = 0;

    op = crm_element_value(vote->msg, F_CRM_TASK);
    vote_from = crm_element_value(vote->msg, F_CRM_HOST_FROM);
    your_version = crm_element_value(vote->msg, F_CRM_VERSION);
    election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
    crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);
    crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_S, (int *)&(your_age.tv_sec));
    crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_US, (int *)&(your_age.tv_usec));

    CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);

    your_node = crm_get_peer(0, vote_from);
    our_node = crm_get_peer(0, fsa_our_uname);

    if (voted == NULL) {
        crm_debug("Created voted hash");
        voted = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                      g_hash_destroy_str, g_hash_destroy_str);
    }

    if (is_heartbeat_cluster()) {
        use_born_on = TRUE;
    } else if (is_classic_ais_cluster()) {
        use_born_on = TRUE;
    }

    age = crm_compare_age(your_age);

    if (cur_state == S_STARTING) {
        reason = "Still starting";
        we_loose = TRUE;

    } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
        reason = "We are not part of the cluster";
        log_level = LOG_ERR;
        we_loose = TRUE;

    } else if (election_id != current_election_id && crm_str_eq(fsa_our_uuid, election_owner, TRUE)) {
        log_level = LOG_DEBUG_2;
        reason = "Superceeded";
        done = TRUE;

    } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
        /* Possibly we cached the message in the FSA queue at a point that it wasn't */
        reason = "Peer is not part of our cluster";
        log_level = LOG_WARNING;
        done = TRUE;

    } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(vote_from);

        CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

        /* update the list of nodes that have voted */
        g_hash_table_replace(voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else if (crm_str_eq(vote_from, fsa_our_uname, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(vote_from);

        CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

        /* update ourselves in the list of nodes that have voted */
        g_hash_table_replace(voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
        reason = "Version";
        we_loose = TRUE;

    } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
        reason = "Version";

    } else if (age < 0) {
        reason = "Uptime";
        we_loose = TRUE;

    } else if (age > 0) {
        reason = "Uptime";

        /* TODO: Check for y(our) born < 0 */
    } else if (use_born_on && your_node->born < our_node->born) {
        reason = "Born";
        we_loose = TRUE;

    } else if (use_born_on && your_node->born > our_node->born) {
        reason = "Born";

    } else if (fsa_our_uname == NULL) {
        reason = "Unknown host name";
        we_loose = TRUE;

    } else if (strcasecmp(fsa_our_uname, vote_from) > 0) {
        reason = "Host name";
        we_loose = TRUE;

    } else {
        reason = "Host name";
        CRM_ASSERT(strcmp(fsa_our_uname, vote_from) != 0);
/* cant happen...
 *	} else if(strcasecmp(fsa_our_uname, vote_from) == 0) {
 *
 * default...
 *	} else { // strcasecmp(fsa_our_uname, vote_from) < 0
 *		we win
 */
    }

    if (done) {
        do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
                   election_id, current_election_id, election_owner, op, vote_from, reason);

    } else if (we_loose) {
        xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote_from,
                                         CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

        do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
                   election_id, election_owner, op, vote_from, reason);
        update_dc(NULL);

        crm_timer_stop(election_timeout);
        if (fsa_input_register & R_THE_DC) {
            crm_trace("Give up the DC to %s", vote_from);
            register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);

        } else if (cur_state != S_STARTING) {
            crm_trace("We werent the DC anyway");
            register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
        }

        crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
        crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);

        send_cluster_message(crm_get_peer(0, vote_from), crm_msg_crmd, novote, TRUE);
        free_xml(novote);

        fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);

        last_election_loss = time(NULL);

    } else {
        do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
                   election_id, election_owner, op, vote_from, reason);

        if (last_election_loss) {
            time_t tm_now = time(NULL);

            if (tm_now - last_election_loss < (time_t) loss_dampen) {
                crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
                         election_id, loss_dampen, ctime(&last_election_loss));
                update_dc(NULL);
                return;
            }
            last_election_loss = 0;
        }

        register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
        g_hash_table_destroy(voted);
        voted = NULL;
    }
}
/*	A_ELECTION_COUNT	*/
enum election_result
election_count_vote(election_t *e, xmlNode *vote, bool can_win)
{
    int age = 0;
    int election_id = -1;
    int log_level = LOG_INFO;
    gboolean use_born_on = FALSE;
    gboolean done = FALSE;
    gboolean we_loose = FALSE;
    const char *op = NULL;
    const char *from = NULL;
    const char *reason = "unknown";
    const char *election_owner = NULL;
    crm_node_t *our_node = NULL, *your_node = NULL;

    static int election_wins = 0;

    xmlNode *novote = NULL;
    time_t tm_now = time(NULL);
    static time_t expires = 0;
    static time_t last_election_loss = 0;

    /* if the membership copy is NULL we REALLY shouldn't be voting
     * the question is how we managed to get here.
     */

    CRM_CHECK(vote != NULL, return election_error);

    if(e == NULL) {
        crm_info("Not voting in election: not initialized");
        return election_lost;

    } else if(crm_peer_cache == NULL) {
        crm_info("Not voting in election: no peer cache");
        return election_lost;
    }

    op = crm_element_value(vote, F_CRM_TASK);
    from = crm_element_value(vote, F_CRM_HOST_FROM);
    election_owner = crm_element_value(vote, F_CRM_ELECTION_OWNER);
    crm_element_value_int(vote, F_CRM_ELECTION_ID, &election_id);

    your_node = crm_get_peer(0, from);
    our_node = crm_get_peer(0, e->uname);

    if (e->voted == NULL) {
        crm_debug("Created voted hash");
        e->voted = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                         g_hash_destroy_str, g_hash_destroy_str);
    }

    if (is_heartbeat_cluster()) {
        use_born_on = TRUE;
    } else if (is_classic_ais_cluster()) {
        use_born_on = TRUE;
    }

    if(can_win == FALSE) {
        reason = "Not eligible";
        we_loose = TRUE;

    } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
        reason = "We are not part of the cluster";
        log_level = LOG_ERR;
        we_loose = TRUE;

    } else if (election_id != e->count && crm_str_eq(our_node->uuid, election_owner, TRUE)) {
        log_level = LOG_TRACE;
        reason = "Superseded";
        done = TRUE;

    } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
        /* Possibly we cached the message in the FSA queue at a point that it wasn't */
        reason = "Peer is not part of our cluster";
        log_level = LOG_WARNING;
        done = TRUE;

    } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(from);

        CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));

        /* update the list of nodes that have voted */
        g_hash_table_replace(e->voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else {
        struct timeval your_age;
        const char *your_version = crm_element_value(vote, F_CRM_VERSION);
        int tv_sec = 0;
        int tv_usec = 0;

        crm_element_value_int(vote, F_CRM_ELECTION_AGE_S, &tv_sec);
        crm_element_value_int(vote, F_CRM_ELECTION_AGE_US, &tv_usec);

        your_age.tv_sec = tv_sec;
        your_age.tv_usec = tv_usec;

        age = crm_compare_age(your_age);
        if (crm_str_eq(from, e->uname, TRUE)) {
            char *op_copy = strdup(op);
            char *uname_copy = strdup(from);

            CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));

            /* update ourselves in the list of nodes that have voted */
            g_hash_table_replace(e->voted, uname_copy, op_copy);
            reason = "Recorded";
            done = TRUE;

        } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
            reason = "Version";
            we_loose = TRUE;

        } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
            reason = "Version";

        } else if (age < 0) {
            reason = "Uptime";
            we_loose = TRUE;

        } else if (age > 0) {
            reason = "Uptime";

            /* TODO: Check for y(our) born < 0 */
        } else if (use_born_on && your_node->born < our_node->born) {
            reason = "Born";
            we_loose = TRUE;

        } else if (use_born_on && your_node->born > our_node->born) {
            reason = "Born";

        } else if (e->uname == NULL) {
            reason = "Unknown host name";
            we_loose = TRUE;

        } else if (strcasecmp(e->uname, from) > 0) {
            reason = "Host name";
            we_loose = TRUE;

        } else {
            reason = "Host name";
            CRM_ASSERT(strcasecmp(e->uname, from) < 0);
/* can't happen...
 *	} else if(strcasecmp(e->uname, from) == 0) {
 *
 */
        }
    }

    if (expires < tm_now) {
        election_wins = 0;
        expires = tm_now + STORM_INTERVAL;

    } else if (done == FALSE && we_loose == FALSE) {
        int peers = 1 + g_hash_table_size(crm_peer_cache);

        /* If every node has to vote down every other node, thats N*(N-1) total elections
         * Allow some leway before _really_ complaining
         */
        election_wins++;
        if (election_wins > (peers * peers)) {
            crm_warn("Election storm detected: %d elections in %d seconds", election_wins,
                     STORM_INTERVAL);
            election_wins = 0;
            expires = tm_now + STORM_INTERVAL;
            crm_write_blackbox(0, NULL);
        }
    }

    if (done) {
        do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
                   election_id, e->count, election_owner, op, from, reason);
        return e->state;

    } else if(we_loose == FALSE) {
        do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
                   election_id, election_owner, op, from, reason);

        if (last_election_loss == 0
            || tm_now - last_election_loss > (time_t) loss_dampen) {

            last_election_loss = 0;
            election_timeout_stop(e);

            /* Start a new election by voting down this, and other, peers */
            e->state = election_start;
            return e->state;
        }

        crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
                 election_id, loss_dampen, ctime(&last_election_loss));
    }

    novote = create_request(CRM_OP_NOVOTE, NULL, from,
                            CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

    do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
               election_id, election_owner, op, from, reason);

    election_timeout_stop(e);

    crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
    crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);

    send_cluster_message(your_node, crm_msg_crmd, novote, TRUE);
    free_xml(novote);

    last_election_loss = tm_now;
    e->state = election_lost;
    return e->state;
}