コード例 #1
0
guint
reap_crm_member(uint32_t id)
{
    int matches = 0;
    crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id));

    if (node == NULL) {
        crm_info("Peer %u is unknown", id);

    } else if (crm_is_peer_active(node)) {
        crm_warn("Peer %u/%s is still active", id, node->uname);

    } else {
        if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) {
            crm_notice("Removed dead peer %u from the uuid cache", id);

        } else {
            crm_warn("Peer %u/%s was not removed", id, node->uname);
        }

        matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node);

        crm_notice("Removed %d dead peers with id=%u from the membership list", matches, id);
    }

    return matches;
}
コード例 #2
0
ファイル: membership.c プロジェクト: HyunKwangYong/pacemaker
static void
reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
{
    crm_node_t *node = value;

    if (crm_is_peer_active(node) == FALSE) {
        crm_update_peer_join(__FUNCTION__, node, crm_join_none);

        if(node && node->uname) {
            election_remove(fsa_election, node->uname);

            if (safe_str_eq(fsa_our_uname, node->uname)) {
                crm_err("We're not part of the cluster anymore");
                register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);

            } else if (AM_I_DC == FALSE && safe_str_eq(node->uname, fsa_our_dc)) {
                crm_warn("Our DC node (%s) left the cluster", node->uname);
                register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
            }
        }

        if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) {
            check_join_state(fsa_state, __FUNCTION__);
        }
        fail_incompletable_actions(transition_graph, node->uuid);
    }
}
コード例 #3
0
ファイル: election.c プロジェクト: jnewland/pacemaker
static void
log_member_uname(gpointer key, gpointer value, gpointer user_data)
{
    const crm_node_t *node = value;

    if (crm_is_peer_active(node)) {
        crm_err("%s: %s proc=%.32x", (char *)user_data, (char *)key, node->processes);
    }
}
コード例 #4
0
ファイル: join_dc.c プロジェクト: kiranmurari/pacemaker
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
    const char *join_to = NULL;
    const crm_node_t *member = value;

    CRM_ASSERT(member != NULL);
    if (crm_is_peer_active(member) == FALSE) {
        crm_trace("Not making an offer to %s: not active", member->uname);
        return;
    }

    join_to = member->uname;
    if (join_to == NULL) {
        crm_err("No recipient for welcome message");
        return;
    }

    erase_node_from_join(join_to);

    if (saved_ccm_membership_id != crm_peer_seq) {
        saved_ccm_membership_id = crm_peer_seq;
        crm_info("Making join offers based on membership %llu", crm_peer_seq);
    }

    if (crm_is_peer_active(member)) {
        xmlNode *offer = create_request(CRM_OP_JOIN_OFFER, NULL, join_to,
                                        CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
        char *join_offered = crm_itoa(current_join_id);

        crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
        /* send the welcome */
        crm_debug("join-%d: Sending offer to %s", current_join_id, join_to);

        send_cluster_message(join_to, crm_msg_crmd, offer, TRUE);
        free_xml(offer);

        g_hash_table_insert(welcomed_nodes, strdup(join_to), join_offered);
    } else {
        crm_info("Peer process on %s is not active (yet?): %.8lx %d",
                 join_to, (long)member->processes, g_hash_table_size(crm_peer_cache));
    }

}
コード例 #5
0
ファイル: election.c プロジェクト: ClusterLabs/pacemaker
/*!
 * \brief Check whether local node has won an election
 *
 * If all known peers have sent no-vote messages, stop the election timer, set
 * the election state to won, and call any registered win callback.
 *
 * \param[in] e      Election object
 *
 * \return TRUE if local node has won, FALSE otherwise
 * \note If all known peers have sent no-vote messages, but the election owner
 *       does not call this function, the election will not be won (and the
 *       callback will not be called) until the election times out.
 * \note This should be called when election_count_vote() returns
 *       \c election_in_progress.
 */
bool
election_check(election_t *e)
{
    int voted_size = 0;
    int num_members = 0;

    if(e == NULL) {
        crm_trace("Election check requested, but no election available");
        return FALSE;
    }
    if (e->voted == NULL) {
        crm_trace("%s check requested, but no votes received yet", e->name);
        return FALSE;
    }

    voted_size = g_hash_table_size(e->voted);
    num_members = crm_active_peers();

    /* in the case of #voted > #members, it is better to
     *   wait for the timeout and give the cluster time to
     *   stabilize
     */
    if (voted_size >= num_members) {
        /* we won and everyone has voted */
        election_timeout_stop(e);
        if (voted_size > num_members) {
            GHashTableIter gIter;
            const crm_node_t *node;
            char *key = NULL;

            crm_warn("Received too many votes in %s", e->name);
            g_hash_table_iter_init(&gIter, crm_peer_cache);
            while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
                if (crm_is_peer_active(node)) {
                    crm_warn("* expected vote: %s", node->uname);
                }
            }

            g_hash_table_iter_init(&gIter, e->voted);
            while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
                crm_warn("* actual vote: %s", key);
            }

        }

        crm_info("%s won by local node", e->name);
        election_complete(e);
        return TRUE;

    } else {
        crm_debug("%s still waiting on %d of %d votes",
                  e->name, num_members - voted_size, num_members);
    }

    return FALSE;
}
コード例 #6
0
ファイル: membership.c プロジェクト: vishnumitraha/pacemaker
static void
crm_count_peer(gpointer key, gpointer value, gpointer user_data)
{
    guint *count = user_data;
    crm_node_t *node = value;

    if (crm_is_peer_active(node)) {
        *count = *count + 1;
    }
}
コード例 #7
0
ファイル: join_dc.c プロジェクト: kiranmurari/pacemaker
gboolean
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
    const char *join_to = NULL;
    const char *join_state = NULL;
    xmlNode *acknak = NULL;
    crm_node_t *join_node = NULL;

    if (key == NULL || value == NULL) {
        return TRUE;
    }

    join_to = (const char *)key;
    join_state = (const char *)value;

    /* make sure the node exists in the config section */
    create_node_entry(join_to, join_to, NORMALNODE);

    join_node = crm_get_peer(0, join_to);
    if (crm_is_peer_active(join_node) == FALSE) {
        /*
         * NACK'ing nodes that the membership layer doesn't know about yet
         * simply creates more churn
         *
         * Better to leave them waiting and let the join restart when
         * the new membership event comes in
         *
         * All other NACKs (due to versions etc) should still be processed
         */
        return TRUE;
    }

    /* send the ack/nack to the node */
    acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to,
                            CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
    crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);

    /* set the ack/nack */
    if (safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) {
        crm_debug("join-%d: ACK'ing join request from %s, state %s",
                  current_join_id, join_to, join_state);
        crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
        g_hash_table_insert(finalized_nodes,
                            strdup(join_to), strdup(CRMD_JOINSTATE_MEMBER));
    } else {
        crm_warn("join-%d: NACK'ing join request from %s, state %s",
                 current_join_id, join_to, join_state);

        crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE);
    }

    send_cluster_message(join_to, crm_msg_crmd, acknak, TRUE);
    free_xml(acknak);
    return TRUE;
}
コード例 #8
0
ファイル: join_dc.c プロジェクト: oalbrigt/pacemaker
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
    xmlNode *offer = NULL;
    crm_node_t *member = (crm_node_t *)value;

    CRM_ASSERT(member != NULL);
    if (crm_is_peer_active(member) == FALSE) {
        crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state);
        if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) {
            /* You would think this unsafe, but in fact this plus an
             * active resource is what causes it to be fenced.
             *
             * Yes, this does mean that any node that dies at the same
             * time as the old DC and is not running resource (still)
             * won't be fenced.
             *
             * I'm not happy about this either.
             */
            crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_DOWN);
        }
        return;
    }

    if (member->uname == NULL) {
        crm_err("No recipient for welcome message");
        return;
    }

    if (saved_ccm_membership_id != crm_peer_seq) {
        saved_ccm_membership_id = crm_peer_seq;
        crm_info("Making join offers based on membership %llu", crm_peer_seq);
    }

    if(user_data && member->join > crm_join_none) {
        crm_info("Skipping %s: already known %d", member->uname, member->join);
        return;
    }

    crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none);

    offer = create_request(CRM_OP_JOIN_OFFER, NULL, member->uname,
                           CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);

    crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
    /* send the welcome */
    crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);

    send_cluster_message(member, crm_msg_crmd, offer, TRUE);
    free_xml(offer);

    crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed);
    /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
}
コード例 #9
0
ファイル: join_dc.c プロジェクト: oalbrigt/pacemaker
void
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
    xmlNode *acknak = NULL;
    xmlNode *tmp1 = NULL;
    crm_node_t *join_node = value;
    const char *join_to = join_node->uname;

    if(join_node->join != crm_join_integrated) {
        crm_trace("Skipping %s in state %d", join_to, join_node->join);
        return;
    }

    /* make sure a node entry exists for the new node */
    crm_trace("Creating node entry for %s", join_to);

    tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
    set_uuid(tmp1, XML_ATTR_UUID, join_node);
    crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);

    fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1,
                        cib_scope_local | cib_quorum_override | cib_can_create);
    free_xml(tmp1);

    join_node = crm_get_peer(0, join_to);
    if (crm_is_peer_active(join_node) == FALSE) {
        /*
         * NACK'ing nodes that the membership layer doesn't know about yet
         * simply creates more churn
         *
         * Better to leave them waiting and let the join restart when
         * the new membership event comes in
         *
         * All other NACKs (due to versions etc) should still be processed
         */
        crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_PENDING);
        return;
    }

    /* send the ack/nack to the node */
    acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to,
                            CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
    crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);

    crm_debug("join-%d: ACK'ing join request from %s",
              current_join_id, join_to);
    crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
    crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized);
    crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);

    send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE);
    free_xml(acknak);
    return;
}
コード例 #10
0
ファイル: election.c プロジェクト: chjohnst/pacemaker
void
do_election_check(long long action,
                  enum crmd_fsa_cause cause,
                  enum crmd_fsa_state cur_state,
                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    int voted_size = 0;
    int num_members = crm_active_peers();

    if (voted) {
        voted_size = g_hash_table_size(voted);
    }
    /* in the case of #voted > #members, it is better to
     *   wait for the timeout and give the cluster time to
     *   stabilize
     */
    if (fsa_state != S_ELECTION) {
        crm_debug("Ignore election check: we not in an election");

    } else if (voted_size >= num_members) {
        /* we won and everyone has voted */
        crm_timer_stop(election_timeout);
        register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
        if (voted_size > num_members) {
            GHashTableIter gIter;
            const crm_node_t *node;
            char *key = NULL;

            g_hash_table_iter_init(&gIter, crm_peer_cache);
            while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
                if (crm_is_peer_active(node)) {
                    crm_err("member: %s proc=%.32x", node->uname, node->processes);
                }
            }

            g_hash_table_iter_init(&gIter, voted);
            while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
                crm_err("voted: %s", key);
            }

        }
        crm_debug("Destroying voted hash");
        g_hash_table_destroy(voted);
        voted = NULL;

    } else {
        crm_debug("Still waiting on %d non-votes (%d total)",
                  num_members - voted_size, num_members);
    }

    return;
}
コード例 #11
0
bool
election_check(election_t *e)
{
    int voted_size = 0;
    int num_members = crm_active_peers();

    if(e == NULL) {
        crm_trace("not initialized");
        return FALSE;
    }

    if (e->voted) {
        voted_size = g_hash_table_size(e->voted);
    }
    /* in the case of #voted > #members, it is better to
     *   wait for the timeout and give the cluster time to
     *   stabilize
     */
    if (voted_size >= num_members) {
        /* we won and everyone has voted */
        election_timeout_stop(e);
        if (voted_size > num_members) {
            GHashTableIter gIter;
            const crm_node_t *node;
            char *key = NULL;

            g_hash_table_iter_init(&gIter, crm_peer_cache);
            while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
                if (crm_is_peer_active(node)) {
                    crm_err("member: %s proc=%.32x", node->uname, node->processes);
                }
            }

            g_hash_table_iter_init(&gIter, e->voted);
            while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
                crm_err("voted: %s", key);
            }

        }
		/* ELECTION完了処理 */
        election_complete(e);
        return TRUE;

    } else {
        crm_debug("Still waiting on %d non-votes (%d total)",
                  num_members - voted_size, num_members);
    }

    return FALSE;
}
コード例 #12
0
static gboolean
crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
{
    crm_node_t *node = value;
    crm_node_t *search = user_data;

    if (search != NULL && node->id != search->id) {
        return FALSE;

    } else if (crm_is_peer_active(value) == FALSE) {
        crm_notice("Removing %s/%u from the membership list", node->uname, node->id);
        return TRUE;
    }
    return FALSE;
}
コード例 #13
0
ファイル: join_dc.c プロジェクト: krast/pacemaker
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
    xmlNode *offer = NULL;
    crm_node_t *member = (crm_node_t *)value;

    CRM_ASSERT(member != NULL);
    if (crm_is_peer_active(member) == FALSE) {
        crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state);
        if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) {
            crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_DOWN);
        }
        return;
    }

    if (member->uname == NULL) {
        crm_err("No recipient for welcome message");
        return;
    }

    if (saved_ccm_membership_id != crm_peer_seq) {
        saved_ccm_membership_id = crm_peer_seq;
        crm_info("Making join offers based on membership %llu", crm_peer_seq);
    }

    if(user_data && member->join > crm_join_none) {
        crm_info("Skipping %s: already known %d", member->uname, member->join);
        return;
    }

    crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none);

    offer = create_request(CRM_OP_JOIN_OFFER, NULL, member->uname,
                           CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);

    crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
    /* send the welcome */
    crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);

    send_cluster_message(member, crm_msg_crmd, offer, TRUE);
    free_xml(offer);

    crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed);
    /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
}
コード例 #14
0
/* ELECTION投票処理 */
void
election_vote(election_t *e)
{
    struct timeval age;
    xmlNode *vote = NULL;
    crm_node_t *our_node;

    if(e == NULL) {
        crm_trace("Not voting in election: not initialized");
        return;
    }

    our_node = crm_get_peer(0, e->uname);
    if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
        crm_trace("Cannot vote yet: %p", our_node);
        return;
    }
	/* 自ノードの情報を生成する */
    e->state = election_in_progress;
    /* CRM_OP_VOTE(F_ATTRD_TASKフィールド)メッセージを生成する */
    vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

    e->count++;
    crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
    crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);

    crm_uptime(&age);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec);
	/* クラスタに投票メッセージを送信する */
    send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
    free_xml(vote);

    crm_debug("Started election %d", e->count);
    if (e->voted) {
        g_hash_table_destroy(e->voted);
        e->voted = NULL;
    }
	/* ELECTIONのタイマーを開始する */
    election_timeout_start(e);
    return;
}
コード例 #15
0
ファイル: membership.c プロジェクト: bcavanagh/pacemaker
guint
reap_crm_member(uint32_t id, const char *name)
{
    int matches = 0;
    crm_node_t *node = NULL;

    if(crm_peer_cache == NULL || crm_peer_id_cache == NULL) {
        crm_trace("Nothing to do, cache not initialized");
        return 0;
    }

    if (name) {
        node = g_hash_table_lookup(crm_peer_cache, name);
    }

    if (node == NULL && id > 0) {
        node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id));
    }

    if (node == NULL) {
        crm_info("Peer %u/%s cannot be purged: does not exist", id, name);
        return 0;
    }

    if (crm_is_peer_active(node)) {
        crm_warn("Peer %u/%s cannot be purged: still active", id, name);

    } else {
        if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) {
            crm_notice("Purged dead peer %u/%s from the uuid cache", id, name);

        } else if(id) {
            crm_warn("Peer %u/%s was not found in the ID cache", id, name);
        }

        matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node);
        crm_notice("Purged %d dead peers with id=%u from the membership cache", matches, id);
    }

    return matches;
}
コード例 #16
0
ファイル: election.c プロジェクト: ClusterLabs/pacemaker
/*!
 * \brief Start a new election by offering local node's candidacy
 *
 * Broadcast a "vote" election message containing the local node's ID,
 * (incremented) election counter, and uptime, and start the election timer.
 *
 * \param[in] e      Election object
 * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
 *       all active peers do so, or if the election times out, the local node
 *       wins the election. (If we lose to any peer vote, we will stop the
 *       timer, so a timeout means we did not lose -- either some peer did not
 *       vote, or we did not call election_check() in time.)
 */
void
election_vote(election_t *e)
{
    struct timeval age;
    xmlNode *vote = NULL;
    crm_node_t *our_node;

    if (e == NULL) {
        crm_trace("Election vote requested, but no election available");
        return;
    }

    our_node = crm_get_peer(0, e->uname);
    if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) {
        crm_trace("Cannot vote in %s yet: local node not connected to cluster",
                  e->name);
        return;
    }

    election_reset(e);
    e->state = election_in_progress;
    vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

    e->count++;
    crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
    crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);

    crm_uptime(&age);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec);

    send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
    free_xml(vote);

    crm_debug("Started %s round %d", e->name, e->count);
    election_timeout_start(e);
    return;
}
コード例 #17
0
ファイル: join_dc.c プロジェクト: kiranmurari/pacemaker
/*	 A_DC_JOIN_OFFER_ONE	*/
void
do_dc_join_offer_one(long long action,
                     enum crmd_fsa_cause cause,
                     enum crmd_fsa_state cur_state,
                     enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    crm_node_t *member;
    ha_msg_input_t *welcome = NULL;

    const char *op = NULL;
    const char *join_to = NULL;

    if (msg_data->data) {
        welcome = fsa_typed_data(fsa_dt_ha_msg);

    } else {
        crm_info("A new node joined - wait until it contacts us");
        return;
    }

    if (welcome == NULL) {
        crm_err("Attempt to send welcome message without a message to reply to!");
        return;
    }

    join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
    if (join_to == NULL) {
        crm_err("Attempt to send welcome message without a host to reply to!");
        return;
    }

    member = crm_get_peer(0, join_to);
    if (crm_is_peer_active(member) == FALSE) {
        crm_err("%s is not a fully active member of our partition", join_to);
        return;
    }

    op = crm_element_value(welcome->msg, F_CRM_TASK);
    if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) {
        /* note: it _is_ possible that a node will have been
         *  sick or starting up when the original offer was made.
         *  however, it will either re-announce itself in due course
         *  _or_ we can re-store the original offer on the client.
         */
        crm_trace("(Re-)offering membership to %s...", join_to);
    }

    crm_info("join-%d: Processing %s request from %s in state %s",
             current_join_id, op, join_to, fsa_state2string(cur_state));

    join_make_offer(NULL, member, NULL);

    /* always offer to the DC (ourselves)
     * this ensures the correct value for max_generation_from
     */
    member = crm_get_peer(0, fsa_our_uname);
    join_make_offer(NULL, member, NULL);

    /* this was a genuine join request, cancel any existing
     * transition and invoke the PE
     */
    start_transition(fsa_state);

    /* dont waste time by invoking the pe yet; */
    crm_debug("Waiting on %d outstanding join acks for join-%d",
              g_hash_table_size(welcomed_nodes), current_join_id);
}
コード例 #18
0
ファイル: cpg.c プロジェクト: oalbrigt/pacemaker
void
pcmk_cpg_membership(cpg_handle_t handle,
                    const struct cpg_name *groupName,
                    const struct cpg_address *member_list, size_t member_list_entries,
                    const struct cpg_address *left_list, size_t left_list_entries,
                    const struct cpg_address *joined_list, size_t joined_list_entries)
{
    int i;
    gboolean found = FALSE;
    static int counter = 0;
    uint32_t local_nodeid = get_local_nodeid(handle);

    for (i = 0; i < left_list_entries; i++) {
        crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL);

        crm_info("Node %u left group %s (peer=%s, counter=%d.%d)",
                 left_list[i].nodeid, groupName->value,
                 (peer? peer->uname : "<none>"), counter, i);
        if (peer) {
            crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS);
        }
    }

    for (i = 0; i < joined_list_entries; i++) {
        crm_info("Node %u joined group %s (counter=%d.%d)",
                 joined_list[i].nodeid, groupName->value, counter, i);
    }

    for (i = 0; i < member_list_entries; i++) {
        crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL);

        crm_info("Node %u still member of group %s (peer=%s, counter=%d.%d)",
                 member_list[i].nodeid, groupName->value,
                 (peer? peer->uname : "<none>"), counter, i);

        /* Anyone that is sending us CPG messages must also be a _CPG_ member.
         * But it's _not_ safe to assume it's in the quorum membership.
         * We may have just found out it's dead and are processing the last couple of messages it sent
         */
        peer = crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS);
        if(peer && peer->state && crm_is_peer_active(peer) == FALSE) {
            time_t now = time(NULL);

            /* Co-opt the otherwise unused votes field */
            if(peer->votes == 0) {
                peer->votes = now;

            } else if(now > (60 + peer->votes)) {
                /* On the otherhand, if we're still getting messages, at a certain point
                 * we need to acknowledge our internal cache is probably wrong
                 *
                 * Set the threshold to 1 minute
                 */
                crm_err("Node %s[%u] appears to be online even though we think it is dead", peer->uname, peer->id);
                if (crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0)) {
                    peer->votes = 0;
                }
            }
        }

        if (local_nodeid == member_list[i].nodeid) {
            found = TRUE;
        }
    }

    if (!found) {
        crm_err("We're not part of CPG group '%s' anymore!", groupName->value);
        cpg_evicted = TRUE;
    }

    counter++;
}
コード例 #19
0
ファイル: callbacks.c プロジェクト: dangzhiqiang/pacemaker
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
    uint32_t old = 0;
    uint32_t changed = 0;
    bool appeared = FALSE;
    const char *status = NULL;

    set_bit(fsa_input_register, R_PEER_DATA);
    if (node->uname == NULL) {
        return;
    }

    switch (type) {
        case crm_status_uname:
            /* If we've never seen the node, then it also wont be in the status section */
            crm_info("%s is now %s", node->uname, node->state);
            return;
        case crm_status_rstate:
            crm_info("Remote node %s is now %s (was %s)", node->uname, node->state, (const char *)data);
            /* Keep going */
        case crm_status_nstate:
            crm_info("%s is now %s (was %s)", node->uname, node->state, (const char *)data);
            if (safe_str_eq(data, node->state)) {
                /* State did not change */
                return;
            } else if(safe_str_eq(CRM_NODE_MEMBER, node->state)) {
                appeared = TRUE;
            }
            break;
        case crm_status_processes:
            if (data) {
                old = *(const uint32_t *)data;
                changed = node->processes ^ old;
            }

            /* crmd_proc_update(node, proc_flags); */
            status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS;
            crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)",
                     node->uname, peer2text(proc_flags), status,
                     AM_I_DC ? "true" : crm_str(fsa_our_dc), changed);

            if ((changed & proc_flags) == 0) {
                /* Peer process did not change */
                crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags);
                return;
            } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
                crm_trace("Not connected");
                return;
            } else if (fsa_state == S_STOPPING) {
                crm_trace("Stopping");
                return;
            }

            appeared = (node->processes & proc_flags) != 0;
            if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) {
                /* Did we get evicted? */
                crm_notice("Our peer connection failed");
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);

            } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
                /* Did the DC leave us? */
                crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);

            } else if(AM_I_DC && appeared == FALSE) {
                crm_info("Peer %s left us", node->uname);
                /* crm_update_peer_join(__FUNCTION__, node, crm_join_none); */
            }
            break;
    }

    if (AM_I_DC) {
        xmlNode *update = NULL;
        int flags = node_update_peer;
        gboolean alive = crm_is_peer_active(node);
        crm_action_t *down = match_down_event(0, node->uuid, NULL, appeared);

        crm_trace("Alive=%d, appear=%d, down=%p", alive, appeared, down);

        if (alive && type == crm_status_processes) {
            register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
        }

        if (down) {
            const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);

            if (alive && safe_str_eq(task, CRM_OP_FENCE)) {
                crm_info("Node return implies stonith of %s (action %d) completed", node->uname,
                         down->id);
                erase_status_tag(node->uname, XML_CIB_TAG_LRM, cib_scope_local);
                erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
                /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */
                down->sent_update = TRUE;       /* Prevent tengine_stonith_callback() from calling send_stonith_update() */

            } else if (safe_str_eq(task, CRM_OP_FENCE)) {
                crm_trace("Waiting for stonithd to report the fencing of %s is complete", node->uname); /* via tengine_stonith_callback() */

            } else if (alive == FALSE) {
                crm_notice("%s of %s (op %d) is complete", task, node->uname, down->id);
                /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */
                stop_te_timer(down->timer);

                flags |= node_update_join | node_update_expected;
                crmd_peer_down(node, FALSE);
                check_join_state(fsa_state, __FUNCTION__);

                update_graph(transition_graph, down);
                trigger_graph();

            } else {
                crm_trace("Other %p", down);
            }

        } else if (appeared == FALSE) {
            crm_notice("Stonith/shutdown of %s not matched", node->uname);

            crm_update_peer_join(__FUNCTION__, node, crm_join_none);
            check_join_state(fsa_state, __FUNCTION__);

            abort_transition(INFINITY, tg_restart, "Node failure", NULL);
            fail_incompletable_actions(transition_graph, node->uuid);

        } else {
            crm_trace("Other %p", down);
        }

        update = do_update_node_cib(node, flags, NULL, __FUNCTION__);
        fsa_cib_anon_update(XML_CIB_TAG_STATUS, update,
                            cib_scope_local | cib_quorum_override | cib_can_create);
        free_xml(update);
    }

    trigger_fsa(fsa_source);
}
コード例 #20
0
ファイル: callbacks.c プロジェクト: beekhof/pacemaker
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
    uint32_t old = 0;
    uint32_t changed = 0;
    bool appeared = FALSE;
    bool is_remote = is_set(node->flags, crm_remote_node);
    const char *status = NULL;

    /* Crmd waits to receive some information from the membership layer before
     * declaring itself operational. If this is being called for a cluster node,
     * indicate that we have it.
     */
    if (!is_remote) {
        set_bit(fsa_input_register, R_PEER_DATA);
    }

    if (node->uname == NULL) {
        return;
    }

    switch (type) {
        case crm_status_uname:
            /* If we've never seen the node, then it also won't be in the status section */
            crm_info("%s node %s is now %s",
                     (is_remote? "Remote" : "Cluster"),
                     node->uname, state_text(node->state));
            return;

        case crm_status_rstate:
        case crm_status_nstate:
            /* This callback should not be called unless the state actually
             * changed, but here's a failsafe just in case.
             */
            CRM_CHECK(safe_str_neq(data, node->state), return);

            crm_info("%s node %s is now %s (was %s)",
                     (is_remote? "Remote" : "Cluster"),
                     node->uname, state_text(node->state), state_text(data));

            if (safe_str_eq(CRM_NODE_MEMBER, node->state)) {
                appeared = TRUE;
                if (!is_remote) {
                    remove_stonith_cleanup(node->uname);
                }
            }

            crmd_alert_node_event(node);
            break;

        case crm_status_processes:
            if (data) {
                old = *(const uint32_t *)data;
                changed = node->processes ^ old;
            }

            status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS;
            crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)",
                     node->uname, peer2text(proc_flags), status,
                     AM_I_DC ? "true" : crm_str(fsa_our_dc), changed);

            if ((changed & proc_flags) == 0) {
                /* Peer process did not change */
                crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags);
                return;
            } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
                crm_trace("Not connected");
                return;
            } else if (fsa_state == S_STOPPING) {
                crm_trace("Stopping");
                return;
            }

            appeared = (node->processes & proc_flags) != 0;
            if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) {
                /* Did we get evicted? */
                crm_notice("Our peer connection failed");
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);

            } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
                /* Did the DC leave us? */
                crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);

                /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
                 * want to fence it. Newer DCs will send their shutdown request
                 * to all peers, who will update the DC's expected state to
                 * down, thus avoiding fencing. We can safely erase the DC's
                 * transient attributes when it leaves in that case. However,
                 * the only way to avoid fencing older DCs is to leave the
                 * transient attributes intact until it rejoins.
                 */
                if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
                    erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
                }

            } else if(AM_I_DC && appeared == FALSE) {
                crm_info("Peer %s left us", node->uname);
                erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
            }
            break;
    }

    if (AM_I_DC) {
        xmlNode *update = NULL;
        int flags = node_update_peer;
        gboolean alive = is_remote? appeared : crm_is_peer_active(node);
        crm_action_t *down = match_down_event(node->uuid, appeared);

        crm_trace("Alive=%d, appeared=%d, down=%d",
                  alive, appeared, (down? down->id : -1));

        if (alive && type == crm_status_processes) {
            register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
        }

        if (down) {
            const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);

            if (safe_str_eq(task, CRM_OP_FENCE)) {

                /* tengine_stonith_callback() confirms fence actions */
                crm_trace("Updating CIB %s stonithd reported fencing of %s complete",
                          (down->confirmed? "after" : "before"), node->uname);

            } else if ((alive == FALSE) && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
                crm_notice("%s of peer %s is complete "CRM_XS" op=%d",
                           task, node->uname, down->id);

                /* down->confirmed = TRUE; */
                stop_te_timer(down->timer);

                if (!is_remote) {
                    flags |= node_update_join | node_update_expected;
                    crmd_peer_down(node, FALSE);
                    check_join_state(fsa_state, __FUNCTION__);
                }

                update_graph(transition_graph, down);
                trigger_graph();

            } else {
                crm_trace("Node %s is %salive, was expected to %s (op %d)",
                          node->uname, (alive? "" : "not "), task, down->id);
            }

        } else if (appeared == FALSE) {
            crm_notice("Stonith/shutdown of %s not matched", node->uname);

            if (!is_remote) {
                crm_update_peer_join(__FUNCTION__, node, crm_join_none);
                check_join_state(fsa_state, __FUNCTION__);
            }

            abort_transition(INFINITY, tg_restart, "Node failure", NULL);
            fail_incompletable_actions(transition_graph, node->uuid);

        } else {
            crm_trace("Node %s came up, was not expected to be down",
                      node->uname);
        }

        if (is_remote) {
            /* A pacemaker_remote node won't have its cluster status updated
             * in the CIB by membership-layer callbacks, so do it here.
             */
            flags |= node_update_cluster;

            /* Trigger resource placement on newly integrated nodes */
            if (appeared) {
                abort_transition(INFINITY, tg_restart,
                                 "pacemaker_remote node integrated", NULL);
            }
        }

        /* Update the CIB node state */
        update = create_node_state_update(node, flags, NULL, __FUNCTION__);
        fsa_cib_anon_update(XML_CIB_TAG_STATUS, update,
                            cib_scope_local | cib_quorum_override | cib_can_create);
        free_xml(update);
    }

    trigger_fsa(fsa_source);
}
コード例 #21
0
ファイル: join_dc.c プロジェクト: oalbrigt/pacemaker
/*	 A_DC_JOIN_PROCESS_REQ	*/
void
do_dc_join_filter_offer(long long action,
                        enum crmd_fsa_cause cause,
                        enum crmd_fsa_state cur_state,
                        enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    xmlNode *generation = NULL;

    int cmp = 0;
    int join_id = -1;
    gboolean ack_nack_bool = TRUE;
    const char *ack_nack = CRMD_JOINSTATE_MEMBER;
    ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);

    const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
    const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);

    crm_node_t *join_node = crm_get_peer(0, join_from);

    crm_debug("Processing req from %s", join_from);

    generation = join_ack->xml;
    crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);

    if (max_generation_xml != NULL && generation != NULL) {
        int lpc = 0;

        const char *attributes[] = {
            XML_ATTR_GENERATION_ADMIN,
            XML_ATTR_GENERATION,
            XML_ATTR_NUMUPDATES,
        };

        for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) {
            cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
        }
    }

    if (join_id != current_join_id) {
        crm_debug("Invalid response from %s: join-%d vs. join-%d",
                  join_from, join_id, current_join_id);
        check_join_state(cur_state, __FUNCTION__);
        return;

    } else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) {
        crm_err("Node %s is not a member", join_from);
        ack_nack_bool = FALSE;

    } else if (generation == NULL) {
        crm_err("Generation was NULL");
        ack_nack_bool = FALSE;

    } else if (max_generation_xml == NULL) {
        max_generation_xml = copy_xml(generation);
        max_generation_from = strdup(join_from);

    } else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) {
        crm_debug("%s has a better generation number than"
                  " the current max %s", join_from, max_generation_from);
        if (max_generation_xml) {
            crm_log_xml_debug(max_generation_xml, "Max generation");
        }
        crm_log_xml_debug(generation, "Their generation");

        free(max_generation_from);
        free_xml(max_generation_xml);

        max_generation_from = strdup(join_from);
        max_generation_xml = copy_xml(join_ack->xml);
    }

    if (ack_nack_bool == FALSE) {
        /* NACK this client */
        ack_nack = CRMD_JOINSTATE_NACK;
        crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack);
        crm_err("Rejecting cluster join request from %s " CRM_XS
                " NACK join-%d ref=%s", join_from, join_id, ref);

    } else {
        crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref);
        crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated);
    }

    crm_update_peer_expected(__FUNCTION__, join_node, ack_nack);

    crm_debug("%u nodes have been integrated into join-%d",
              crmd_join_phase_count(crm_join_integrated), join_id);


    if (check_join_state(cur_state, __FUNCTION__) == FALSE) {
        /* don't waste time by invoking the PE yet; */
        crm_debug("join-%d: Still waiting on %d outstanding offers",
                  join_id, crmd_join_phase_count(crm_join_welcomed));
    }
}
コード例 #22
0
/*	A_ELECTION_COUNT	*/
enum election_result
election_count_vote(election_t *e, xmlNode *vote, bool can_win)
{
    int age = 0;
    int election_id = -1;
    int log_level = LOG_INFO;
    gboolean use_born_on = FALSE;
    gboolean done = FALSE;
    gboolean we_loose = FALSE;
    const char *op = NULL;
    const char *from = NULL;
    const char *reason = "unknown";
    const char *election_owner = NULL;
    crm_node_t *our_node = NULL, *your_node = NULL;

    static int election_wins = 0;

    xmlNode *novote = NULL;
    time_t tm_now = time(NULL);
    static time_t expires = 0;
    static time_t last_election_loss = 0;

    /* if the membership copy is NULL we REALLY shouldn't be voting
     * the question is how we managed to get here.
     */

    CRM_CHECK(vote != NULL, return election_error);

    if(e == NULL) {
        crm_info("Not voting in election: not initialized");
        return election_lost;

    } else if(crm_peer_cache == NULL) {
        crm_info("Not voting in election: no peer cache");
        return election_lost;
    }

    op = crm_element_value(vote, F_CRM_TASK);
    from = crm_element_value(vote, F_CRM_HOST_FROM);
    election_owner = crm_element_value(vote, F_CRM_ELECTION_OWNER);
    crm_element_value_int(vote, F_CRM_ELECTION_ID, &election_id);

    your_node = crm_get_peer(0, from);
    our_node = crm_get_peer(0, e->uname);

    if (e->voted == NULL) {
        crm_debug("Created voted hash");
        e->voted = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                         g_hash_destroy_str, g_hash_destroy_str);
    }

    if (is_heartbeat_cluster()) {
        use_born_on = TRUE;
    } else if (is_classic_ais_cluster()) {
        use_born_on = TRUE;
    }

    if(can_win == FALSE) {
        reason = "Not eligible";
        we_loose = TRUE;

    } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
        reason = "We are not part of the cluster";
        log_level = LOG_ERR;
        we_loose = TRUE;

    } else if (election_id != e->count && crm_str_eq(our_node->uuid, election_owner, TRUE)) {
        log_level = LOG_TRACE;
        reason = "Superseded";
        done = TRUE;

    } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
        /* Possibly we cached the message in the FSA queue at a point that it wasn't */
        reason = "Peer is not part of our cluster";
        log_level = LOG_WARNING;
        done = TRUE;

    } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(from);

        CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));

        /* update the list of nodes that have voted */
        g_hash_table_replace(e->voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else {
        struct timeval your_age;
        const char *your_version = crm_element_value(vote, F_CRM_VERSION);
        int tv_sec = 0;
        int tv_usec = 0;

        crm_element_value_int(vote, F_CRM_ELECTION_AGE_S, &tv_sec);
        crm_element_value_int(vote, F_CRM_ELECTION_AGE_US, &tv_usec);

        your_age.tv_sec = tv_sec;
        your_age.tv_usec = tv_usec;

        age = crm_compare_age(your_age);
        if (crm_str_eq(from, e->uname, TRUE)) {
            char *op_copy = strdup(op);
            char *uname_copy = strdup(from);

            CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));

            /* update ourselves in the list of nodes that have voted */
            g_hash_table_replace(e->voted, uname_copy, op_copy);
            reason = "Recorded";
            done = TRUE;

        } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
            reason = "Version";
            we_loose = TRUE;

        } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
            reason = "Version";

        } else if (age < 0) {
            reason = "Uptime";
            we_loose = TRUE;

        } else if (age > 0) {
            reason = "Uptime";

            /* TODO: Check for y(our) born < 0 */
        } else if (use_born_on && your_node->born < our_node->born) {
            reason = "Born";
            we_loose = TRUE;

        } else if (use_born_on && your_node->born > our_node->born) {
            reason = "Born";

        } else if (e->uname == NULL) {
            reason = "Unknown host name";
            we_loose = TRUE;

        } else if (strcasecmp(e->uname, from) > 0) {
            reason = "Host name";
            we_loose = TRUE;

        } else {
            reason = "Host name";
            CRM_ASSERT(strcasecmp(e->uname, from) < 0);
/* can't happen...
 *	} else if(strcasecmp(e->uname, from) == 0) {
 *
 */
        }
    }

    if (expires < tm_now) {
        election_wins = 0;
        expires = tm_now + STORM_INTERVAL;

    } else if (done == FALSE && we_loose == FALSE) {
        int peers = 1 + g_hash_table_size(crm_peer_cache);

        /* If every node has to vote down every other node, thats N*(N-1) total elections
         * Allow some leway before _really_ complaining
         */
        election_wins++;
        if (election_wins > (peers * peers)) {
            crm_warn("Election storm detected: %d elections in %d seconds", election_wins,
                     STORM_INTERVAL);
            election_wins = 0;
            expires = tm_now + STORM_INTERVAL;
            crm_write_blackbox(0, NULL);
        }
    }

    if (done) {
        do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
                   election_id, e->count, election_owner, op, from, reason);
        return e->state;

    } else if(we_loose == FALSE) {
        do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
                   election_id, election_owner, op, from, reason);

        if (last_election_loss == 0
            || tm_now - last_election_loss > (time_t) loss_dampen) {

            last_election_loss = 0;
            election_timeout_stop(e);

            /* Start a new election by voting down this, and other, peers */
            e->state = election_start;
            return e->state;
        }

        crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
                 election_id, loss_dampen, ctime(&last_election_loss));
    }

    novote = create_request(CRM_OP_NOVOTE, NULL, from,
                            CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

    do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
               election_id, election_owner, op, from, reason);

    election_timeout_stop(e);

    crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
    crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);

    send_cluster_message(your_node, crm_msg_crmd, novote, TRUE);
    free_xml(novote);

    last_election_loss = tm_now;
    e->state = election_lost;
    return e->state;
}
コード例 #23
0
ファイル: election.c プロジェクト: jnewland/pacemaker
/*	A_ELECTION_COUNT	*/
void
do_election_count_vote(long long action,
                       enum crmd_fsa_cause cause,
                       enum crmd_fsa_state cur_state,
                       enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    struct timeval your_age;
    int age;
    int election_id = -1;
    int log_level = LOG_INFO;
    gboolean use_born_on = FALSE;
    gboolean done = FALSE;
    gboolean we_loose = FALSE;
    const char *op = NULL;
    const char *vote_from = NULL;
    const char *your_version = NULL;
    const char *election_owner = NULL;
    const char *reason = "unknown";
    crm_node_t *our_node = NULL, *your_node = NULL;
    ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);

    static time_t last_election_loss = 0;

    /* if the membership copy is NULL we REALLY shouldnt be voting
     * the question is how we managed to get here.
     */

    CRM_CHECK(msg_data != NULL, return);
    CRM_CHECK(crm_peer_cache != NULL, return);
    CRM_CHECK(vote != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
    CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return);

    your_age.tv_sec = 0;
    your_age.tv_usec = 0;

    op = crm_element_value(vote->msg, F_CRM_TASK);
    vote_from = crm_element_value(vote->msg, F_CRM_HOST_FROM);
    your_version = crm_element_value(vote->msg, F_CRM_VERSION);
    election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
    crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);
    crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_S, (int *)&(your_age.tv_sec));
    crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_US, (int *)&(your_age.tv_usec));

    CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);

    your_node = crm_get_peer(0, vote_from);
    our_node = crm_get_peer(0, fsa_our_uname);

    if (voted == NULL) {
        crm_debug("Created voted hash");
        voted = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                      g_hash_destroy_str, g_hash_destroy_str);
    }

    if (is_heartbeat_cluster()) {
        use_born_on = TRUE;
    } else if (is_classic_ais_cluster()) {
        use_born_on = TRUE;
    }

    age = crm_compare_age(your_age);

    if (cur_state == S_STARTING) {
        reason = "Still starting";
        we_loose = TRUE;

    } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
        reason = "We are not part of the cluster";
        log_level = LOG_ERR;
        we_loose = TRUE;

    } else if (election_id != current_election_id && crm_str_eq(fsa_our_uuid, election_owner, TRUE)) {
        log_level = LOG_DEBUG_2;
        reason = "Superceeded";
        done = TRUE;

    } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
        /* Possibly we cached the message in the FSA queue at a point that it wasn't */
        reason = "Peer is not part of our cluster";
        log_level = LOG_WARNING;
        done = TRUE;

    } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(vote_from);

        CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

        /* update the list of nodes that have voted */
        g_hash_table_replace(voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else if (crm_str_eq(vote_from, fsa_our_uname, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(vote_from);

        CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

        /* update ourselves in the list of nodes that have voted */
        g_hash_table_replace(voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
        reason = "Version";
        we_loose = TRUE;

    } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
        reason = "Version";

    } else if (age < 0) {
        reason = "Uptime";
        we_loose = TRUE;

    } else if (age > 0) {
        reason = "Uptime";

        /* TODO: Check for y(our) born < 0 */
    } else if (use_born_on && your_node->born < our_node->born) {
        reason = "Born";
        we_loose = TRUE;

    } else if (use_born_on && your_node->born > our_node->born) {
        reason = "Born";

    } else if (fsa_our_uname == NULL) {
        reason = "Unknown host name";
        we_loose = TRUE;

    } else if (strcasecmp(fsa_our_uname, vote_from) > 0) {
        reason = "Host name";
        we_loose = TRUE;

    } else {
        reason = "Host name";
        CRM_ASSERT(strcmp(fsa_our_uname, vote_from) != 0);
/* cant happen...
 *	} else if(strcasecmp(fsa_our_uname, vote_from) == 0) {
 *
 * default...
 *	} else { // strcasecmp(fsa_our_uname, vote_from) < 0
 *		we win
 */
    }

    if (done) {
        do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
                   election_id, current_election_id, election_owner, op, vote_from, reason);

    } else if (we_loose) {
        xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote_from,
                                         CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

        do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
                   election_id, election_owner, op, vote_from, reason);
        update_dc(NULL);

        crm_timer_stop(election_timeout);
        if (fsa_input_register & R_THE_DC) {
            crm_trace("Give up the DC to %s", vote_from);
            register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);

        } else if (cur_state != S_STARTING) {
            crm_trace("We werent the DC anyway");
            register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
        }

        crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
        crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);

        send_cluster_message(crm_get_peer(0, vote_from), crm_msg_crmd, novote, TRUE);
        free_xml(novote);

        fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);

        last_election_loss = time(NULL);

    } else {
        do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
                   election_id, election_owner, op, vote_from, reason);

        if (last_election_loss) {
            time_t tm_now = time(NULL);

            if (tm_now - last_election_loss < (time_t) loss_dampen) {
                crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
                         election_id, loss_dampen, ctime(&last_election_loss));
                update_dc(NULL);
                return;
            }
            last_election_loss = 0;
        }

        register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
        g_hash_table_destroy(voted);
        voted = NULL;
    }
}