Exemplo n.º 1
0
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
    xmlNode *offer = NULL;
    crm_node_t *member = (crm_node_t *)value;

    CRM_ASSERT(member != NULL);
    if (crm_is_peer_active(member) == FALSE) {
        crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state);
        if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) {
            crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_DOWN);
        }
        return;
    }

    if (member->uname == NULL) {
        crm_err("No recipient for welcome message");
        return;
    }

    if (saved_ccm_membership_id != crm_peer_seq) {
        saved_ccm_membership_id = crm_peer_seq;
        crm_info("Making join offers based on membership %llu", crm_peer_seq);
    }

    if(user_data && member->join > crm_join_none) {
        crm_info("Skipping %s: already known %d", member->uname, member->join);
        return;
    }

    crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none);

    offer = create_request(CRM_OP_JOIN_OFFER, NULL, member->uname,
                           CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);

    crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
    /* send the welcome */
    crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);

    send_cluster_message(member, crm_msg_crmd, offer, TRUE);
    free_xml(offer);

    crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed);
    /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
}
Exemplo n.º 2
0
static void
throttle_send_command(enum throttle_state_e mode)
{
    xmlNode *xml = NULL;
    static enum throttle_state_e last = -1;

    if(mode != last) {
        crm_info("New throttle mode: %.4x (was %.4x)", mode, last);
        last = mode;

        xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
        crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode);
        crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max);

        send_cluster_message(NULL, crm_msg_crmd, xml, TRUE);
        free_xml(xml);
    }
}
Exemplo n.º 3
0
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
    const char *join_to = NULL;
    const crm_node_t *member = value;

    CRM_ASSERT(member != NULL);
    if (crm_is_peer_active(member) == FALSE) {
        crm_trace("Not making an offer to %s: not active", member->uname);
        return;
    }

    join_to = member->uname;
    if (join_to == NULL) {
        crm_err("No recipient for welcome message");
        return;
    }

    erase_node_from_join(join_to);

    if (saved_ccm_membership_id != crm_peer_seq) {
        saved_ccm_membership_id = crm_peer_seq;
        crm_info("Making join offers based on membership %llu", crm_peer_seq);
    }

    if (crm_is_peer_active(member)) {
        xmlNode *offer = create_request(CRM_OP_JOIN_OFFER, NULL, join_to,
                                        CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
        char *join_offered = crm_itoa(current_join_id);

        crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
        /* send the welcome */
        crm_debug("join-%d: Sending offer to %s", current_join_id, join_to);

        send_cluster_message(join_to, crm_msg_crmd, offer, TRUE);
        free_xml(offer);

        g_hash_table_insert(welcomed_nodes, strdup(join_to), join_offered);
    } else {
        crm_info("Peer process on %s is not active (yet?): %.8lx %d",
                 join_to, (long)member->processes, g_hash_table_size(crm_peer_cache));
    }

}
/* ELECTION投票処理 */
void
election_vote(election_t *e)
{
    struct timeval age;
    xmlNode *vote = NULL;
    crm_node_t *our_node;

    if(e == NULL) {
        crm_trace("Not voting in election: not initialized");
        return;
    }

    our_node = crm_get_peer(0, e->uname);
    if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
        crm_trace("Cannot vote yet: %p", our_node);
        return;
    }
	/* 自ノードの情報を生成する */
    e->state = election_in_progress;
    /* CRM_OP_VOTE(F_ATTRD_TASKフィールド)メッセージを生成する */
    vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

    e->count++;
    crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
    crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);

    crm_uptime(&age);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec);
	/* クラスタに投票メッセージを送信する */
    send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
    free_xml(vote);

    crm_debug("Started election %d", e->count);
    if (e->voted) {
        g_hash_table_destroy(e->voted);
        e->voted = NULL;
    }
	/* ELECTIONのタイマーを開始する */
    election_timeout_start(e);
    return;
}
Exemplo n.º 5
0
static void
bcast_result_to_peers(remote_fencing_op_t * op, int rc)
{
    static int count = 0;
    xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
    xmlNode *notify_data = create_op_done_notify(op, rc);

    count++;
    crm_trace("Broadcasting result to peers");
    crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
    crm_xml_add(bcast, F_SUBTYPE, "broadcast");
    crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
    crm_xml_add_int(bcast, "count", count);
    add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
    send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
    free_xml(notify_data);
    free_xml(bcast);

    return;
}
Exemplo n.º 6
0
void
join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
    char *join_id = user_data;
    xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE);

    CRM_LOG_ASSERT(join_id != NULL);

    if (query_call_id != call_id) {
        crm_trace("Query %d superceeded", call_id);
        goto done;
    }

    query_call_id = 0;
    if(rc != pcmk_ok || output == NULL) {
        crm_err("Could not retrieve version details for join-%s: %s (%d)",
                join_id, pcmk_strerror(rc), rc);
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);

    } else if (fsa_our_dc == NULL) {
        crm_debug("Membership is in flux, not continuing join-%s", join_id);

    } else {
        xmlNode *reply = NULL;

        crm_debug("Respond to join offer join-%s from %s", join_id, fsa_our_dc);
        copy_in_properties(generation, output);

        reply = create_request(CRM_OP_JOIN_REQUEST, generation, fsa_our_dc,
                               CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);

        crm_xml_add(reply, F_CRM_JOIN_ID, join_id);
        send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
        free_xml(reply);
    }

  done:
    free_xml(generation);
    free(join_id);
}
Exemplo n.º 7
0
remote_fencing_op_t *initiate_remote_stonith_op(stonith_client_t *client, xmlNode *request, gboolean manual_ack)
{
    xmlNode *query = NULL;
    const char *client_id = NULL;
    remote_fencing_op_t *op = NULL;

    if(client) {
        client_id = client->id;
    } else {
        client_id = crm_element_value(request, F_STONITH_CLIENTID);
    }

    CRM_LOG_ASSERT(client_id != NULL);
    op = create_remote_stonith_op(client_id, request, FALSE);
    query = stonith_create_op(0, op->id, STONITH_OP_QUERY, NULL, 0);

    if(!manual_ack) {
        op->op_timer = g_timeout_add(1200*op->base_timeout, remote_op_timeout, op);
        op->query_timer = g_timeout_add(100*op->base_timeout, remote_op_query_timeout, op);

    } else {
        crm_xml_add(query, F_STONITH_DEVICE, "manual_ack");
    }

    crm_xml_add(query, F_STONITH_REMOTE, op->id);
    crm_xml_add(query, F_STONITH_TARGET, op->target);
    crm_xml_add(query, F_STONITH_ACTION, op->action);
    crm_xml_add(query, F_STONITH_OWNER,  op->originator);
    crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
    crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
    crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);

    crm_info("Initiating remote operation %s for %s: %s", op->action, op->target, op->id);
    CRM_CHECK(op->action, return NULL);

    send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);

    free_xml(query);
    return op;
}
Exemplo n.º 8
0
/*!
 * \brief Start a new election by offering local node's candidacy
 *
 * Broadcast a "vote" election message containing the local node's ID,
 * (incremented) election counter, and uptime, and start the election timer.
 *
 * \param[in] e      Election object
 * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
 *       all active peers do so, or if the election times out, the local node
 *       wins the election. (If we lose to any peer vote, we will stop the
 *       timer, so a timeout means we did not lose -- either some peer did not
 *       vote, or we did not call election_check() in time.)
 */
void
election_vote(election_t *e)
{
    struct timeval age;
    xmlNode *vote = NULL;
    crm_node_t *our_node;

    if (e == NULL) {
        crm_trace("Election vote requested, but no election available");
        return;
    }

    our_node = crm_get_peer(0, e->uname);
    if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) {
        crm_trace("Cannot vote in %s yet: local node not connected to cluster",
                  e->name);
        return;
    }

    election_reset(e);
    e->state = election_in_progress;
    vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

    e->count++;
    crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
    crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);

    crm_uptime(&age);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec);

    send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
    free_xml(vote);

    crm_debug("Started %s round %d", e->name, e->count);
    election_timeout_start(e);
    return;
}
Exemplo n.º 9
0
/*!
 * \internal
 * \brief Notify the DC of a remote node state change
 *
 * \param[in] node_name  Node's name
 * \param[in] node_up    TRUE if node is up, FALSE if down
 */
void
send_remote_state_message(const char *node_name, gboolean node_up)
{
    /* If we don't have a DC, or the message fails, we have a failsafe:
     * the DC will eventually pick up the change via the CIB node state.
     * The message allows it to happen sooner if possible.
     */
    if (fsa_our_dc) {
        xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, fsa_our_dc,
                                      CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);

        crm_info("Notifying DC %s of pacemaker_remote node %s %s",
                 fsa_our_dc, node_name, (node_up? "coming up" : "going down"));
        crm_xml_add(msg, XML_ATTR_ID, node_name);
        crm_xml_add_boolean(msg, XML_NODE_IN_CLUSTER, node_up);
        send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, msg,
                             TRUE);
        free_xml(msg);
    } else {
        crm_debug("No DC to notify of pacemaker_remote node %s %s",
                  node_name, (node_up? "coming up" : "going down"));
    }
}
Exemplo n.º 10
0
static gboolean
te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
{
    const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);

    /* send to peers as well? */
    if (safe_str_eq(task, CRM_OP_MAINTENANCE_NODES)) {
        GHashTableIter iter;
        crm_node_t *node = NULL;

        g_hash_table_iter_init(&iter, crm_peer_cache);
        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
            xmlNode *cmd = NULL;

            if (safe_str_eq(fsa_our_uname, node->uname)) {
                continue;
            }

            cmd = create_request(task, pseudo->xml, node->uname,
                                 CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
            send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
            free_xml(cmd);
        }

        remote_ra_process_maintenance_nodes(pseudo->xml);
    } else {
        /* Check action for Pacemaker Remote node side effects */
        remote_ra_process_pseudo(pseudo->xml);
    }

    crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
              crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
    te_action_confirmed(pseudo);
    update_graph(graph, pseudo);
    trigger_graph();
    return TRUE;
}
Exemplo n.º 11
0
/*!
 * \internal
 * \brief Send a broadcast to all nodes to trigger cleanup or
 *        history synchronisation
 *
 * \param[in] history   Optional history to be attached
 * \param[in] callopts  We control cleanup via a flag in the callopts
 * \param[in] target    Cleanup can be limited to certain fence-targets
 */
static void
stonith_send_broadcast_history(xmlNode *history,
                               int callopts,
                               const char *target)
{
    xmlNode *bcast = create_xml_node(NULL, "stonith_command");
    xmlNode *data = create_xml_node(NULL, __FUNCTION__);

    if (target) {
        crm_xml_add(data, F_STONITH_TARGET, target);
    }
    crm_xml_add(bcast, F_TYPE, T_STONITH_NG);
    crm_xml_add(bcast, F_SUBTYPE, "broadcast");
    crm_xml_add(bcast, F_STONITH_OPERATION, STONITH_OP_FENCE_HISTORY);
    crm_xml_add_int(bcast, F_STONITH_CALLOPTS, callopts);
    if (history) {
        add_node_copy(data, history);
    }
    add_message_xml(bcast, F_STONITH_CALLDATA, data);
    send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);

    free_xml(data);
    free_xml(bcast);
}
Exemplo n.º 12
0
gboolean
relay_message(xmlNode * msg, gboolean originated_locally)
{
    int dest = 1;
    int is_for_dc = 0;
    int is_for_dcib = 0;
    int is_for_te = 0;
    int is_for_crm = 0;
    int is_for_cib = 0;
    int is_local = 0;
    gboolean processing_complete = FALSE;
    const char *host_to = crm_element_value(msg, F_CRM_HOST_TO);
    const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
    const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
    const char *type = crm_element_value(msg, F_TYPE);
    const char *msg_error = NULL;

    crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE));

    if (msg == NULL) {
        msg_error = "Cannot route empty message";

    } else if (safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))) {
        /* quietly ignore */
        processing_complete = TRUE;

    } else if (safe_str_neq(type, T_CRM)) {
        msg_error = "Bad message type";

    } else if (sys_to == NULL) {
        msg_error = "Bad message destination: no subsystem";
    }

    if (msg_error != NULL) {
        processing_complete = TRUE;
        crm_err("%s", msg_error);
        crm_log_xml_warn(msg, "bad msg");
    }

    if (processing_complete) {
        return TRUE;
    }

    processing_complete = TRUE;

    is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
    is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
    is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
    is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
    is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);

    is_local = 0;
    if (host_to == NULL || strlen(host_to) == 0) {
        if (is_for_dc || is_for_te) {
            is_local = 0;

        } else if (is_for_crm && originated_locally) {
            is_local = 0;

        } else {
            is_local = 1;
        }

    } else if (safe_str_eq(fsa_our_uname, host_to)) {
        is_local = 1;
    }

    if (is_for_dc || is_for_dcib || is_for_te) {
        if (AM_I_DC && is_for_te) {
            ROUTER_RESULT("Message result: Local relay");
            send_msg_via_ipc(msg, sys_to);

        } else if (AM_I_DC) {
            ROUTER_RESULT("Message result: DC/CRMd process");
            processing_complete = FALSE;        /* more to be done by caller */
        } else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE)
                   && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) {

            /* Neither the TE or PE should be sending messages
             *   to DC's on other nodes
             *
             * By definition, if we are no longer the DC, then
             *   the PE or TE's data should be discarded
             */

#if SUPPORT_COROSYNC
            if (is_openais_cluster()) {
                dest = text2msg_type(sys_to);
            }
#endif
            ROUTER_RESULT("Message result: External relay to DC");
            send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);

        } else {
            /* discard */
            ROUTER_RESULT("Message result: Discard, not DC");
        }

    } else if (is_local && (is_for_crm || is_for_cib)) {
        ROUTER_RESULT("Message result: CRMd process");
        processing_complete = FALSE;    /* more to be done by caller */

    } else if (is_local) {
        ROUTER_RESULT("Message result: Local relay");
        send_msg_via_ipc(msg, sys_to);

    } else {
#if SUPPORT_COROSYNC
        if (is_openais_cluster()) {
            dest = text2msg_type(sys_to);

            if (dest == crm_msg_none || dest > crm_msg_stonith_ng) {
                dest = crm_msg_crmd;
            }
        }
#endif
        ROUTER_RESULT("Message result: External relay");
        send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);
    }

    return processing_complete;
}
Exemplo n.º 13
0
void
call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
{
    const char *device = NULL;
    int timeout = op->base_timeout;

    crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state);
    if (peer == NULL && !is_set(op->call_options, st_opt_topology)) {
        peer = stonith_choose_peer(op);
    }

    if (!op->op_timer_total) {
        int total_timeout = get_op_total_timeout(op, peer, op->base_timeout);

        op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout;
        op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
        report_timeout_period(op, op->total_timeout);
        crm_info("Total remote op timeout set to %d for fencing of node %s for %s.%.8s",
                 total_timeout, op->target, op->client_name, op->id);
    }

    if (is_set(op->call_options, st_opt_topology) && op->devices) {
        /* Ignore any preference, they might not have the device we need */
        /* When using topology, the stonith_choose_peer function pops off
         * the peer from the op's query results.  Make sure to calculate
         * the op_timeout before calling this function when topology is in use */
        peer = stonith_choose_peer(op);
        device = op->devices->data;
        timeout = get_device_timeout(peer, device, op->base_timeout);
    }

    if (peer) {
        int timeout_one = 0;
        xmlNode *query = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);

        crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
        crm_xml_add(query, F_STONITH_TARGET, op->target);
        crm_xml_add(query, F_STONITH_ACTION, op->action);
        crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
        crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
        crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
        crm_xml_add_int(query, F_STONITH_TIMEOUT, timeout);

        if (device) {
            timeout_one =
                TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout);
            crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", peer->host,
                     op->action, op->target, device, op->client_name, timeout_one);
            crm_xml_add(query, F_STONITH_DEVICE, device);
            crm_xml_add(query, F_STONITH_MODE, "slave");

        } else {
            timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout);
            crm_info("Requesting that %s perform op %s %s for %s (%ds)",
                     peer->host, op->action, op->target, op->client_name, timeout_one);
            crm_xml_add(query, F_STONITH_MODE, "smart");
        }

        op->state = st_exec;
        if (op->op_timer_one) {
            g_source_remove(op->op_timer_one);
        }
        op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);

        send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, query, FALSE);
        peer->tried = TRUE;
        free_xml(query);
        return;

    } else if (op->owner == FALSE) {
        crm_err("The termination of %s for %s is not ours to control", op->target, op->client_name);

    } else if (op->query_timer == 0) {
        /* We've exhausted all available peers */
        crm_info("No remaining peers capable of terminating %s for %s (%d)", op->target,
                 op->client_name, op->state);
        CRM_LOG_ASSERT(op->state < st_done);
        remote_op_timeout(op);

    } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
        int rc = -EHOSTUNREACH;

        /* if the operation never left the query state,
         * but we have all the expected replies, then no devices
         * are available to execute the fencing operation. */
        if (op->state == st_query) {
           crm_info("None of the %d peers have devices capable of terminating %s for %s (%d)",
                   op->replies, op->target, op->client_name, op->state);

            rc = -ENODEV;
        } else {
           crm_info("None of the %d peers are capable of terminating %s for %s (%d)",
                   op->replies, op->target, op->client_name, op->state);
        }

        op->state = st_failed;
        remote_op_done(op, NULL, rc, FALSE);

    } else if (device) {
        crm_info("Waiting for additional peers capable of terminating %s with %s for %s.%.8s",
                 op->target, device, op->client_name, op->id);
    } else {
        crm_info("Waiting for additional peers capable of terminating %s for %s%.8s",
                 op->target, op->client_name, op->id);
    }
}
Exemplo n.º 14
0
remote_fencing_op_t *
initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack)
{
    int query_timeout = 0;
    xmlNode *query = NULL;
    const char *client_id = NULL;
    remote_fencing_op_t *op = NULL;

    if (client) {
        client_id = client->id;
    } else {
        client_id = crm_element_value(request, F_STONITH_CLIENTID);
    }

    CRM_LOG_ASSERT(client_id != NULL);
    op = create_remote_stonith_op(client_id, request, FALSE);
    op->owner = TRUE;
    if (manual_ack) {
        crm_notice("Initiating manual confirmation for %s: %s",
                   op->target, op->id);
        return op;
    }
    
    CRM_CHECK(op->action, return NULL);

    if (stonith_topology_next(op) != pcmk_ok) {
        op->state = st_failed;
    }

    switch (op->state) {
        case st_failed:
            crm_warn("Initiation of remote operation %s for %s: failed (%s)", op->action,
                     op->target, op->id);
            remote_op_done(op, NULL, -EINVAL, FALSE);
            return op;

        case st_duplicate:
            crm_info("Initiating remote operation %s for %s: %s (duplicate)", op->action,
                     op->target, op->id);
            return op;

        default:
            crm_notice("Initiating remote operation %s for %s: %s (%d)", op->action, op->target,
                       op->id, op->state);
    }

    query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, 0);

    crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
    crm_xml_add(query, F_STONITH_TARGET, op->target);
    crm_xml_add(query, F_STONITH_ACTION, op->action);
    crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
    crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
    crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
    crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);

    send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
    free_xml(query);

    query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
    op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);

    return op;
}
Exemplo n.º 15
0
/* aka. this is notification that we have (or have not) been accepted */
void
do_cl_join_finalize_respond(long long action,
                            enum crmd_fsa_cause cause,
                            enum crmd_fsa_state cur_state,
                            enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    xmlNode *tmp1 = NULL;
    gboolean was_nack = TRUE;
    static gboolean first_join = TRUE;
    ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);

    int join_id = -1;
    const char *op = crm_element_value(input->msg, F_CRM_TASK);
    const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK);
    const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);

    if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) {
        crm_trace("Ignoring op=%s message", op);
        return;
    }

    /* calculate if it was an ack or a nack */
    if (crm_is_true(ack_nack)) {
        was_nack = FALSE;
    }

    crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);

    if (was_nack) {
        crm_err("Join (join-%d) with leader %s failed (NACK'd): Shutting down",
                join_id, welcome_from);
        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
        return;
    }

    if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) {
        crm_warn("Discarding our own welcome - we're no longer the DC");
        return;
    }

    if (update_dc(input->msg) == FALSE) {
        crm_warn("Discarding %s from %s (expected %s)", op, welcome_from, fsa_our_dc);
        return;
    }

    /* send our status section to the DC */
    crm_debug("Confirming join join-%d: %s", join_id, crm_element_value(input->msg, F_CRM_TASK));
    tmp1 = do_lrm_query(TRUE, fsa_our_uname);
    if (tmp1 != NULL) {
        xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
                                        CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);

        crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);

        crm_debug("join-%d: Join complete."
                  "  Sending local LRM status to %s", join_id, fsa_our_dc);

        if (first_join) {
            first_join = FALSE;

            /*
             * Clear any previous transient node attribute and lrm operations
             *
             * Corosync has a nasty habit of not being able to tell if a
             *   node is returning or didn't leave in the first place.
             * This confuses Pacemaker because it never gets a "node up"
             *   event which is normally used to clean up the status section.
             *
             * Do not remove the resources though, they'll be cleaned up in
             *   do_dc_join_ack().  Removing them here creates a race
             *   condition if the crmd is being recovered.
             * Instead of a list of active resources from the lrmd
             *   we may end up with a blank status section.
             * If we are _NOT_ lucky, we will probe for the "wrong" instance
             *   of anonymous clones and end up with multiple active
             *   instances on the machine.
             */
            erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0);

            /* Just in case attrd was still around too */
            if (is_not_set(fsa_input_register, R_SHUTDOWN)) {
                update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE);
                update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE);
            }
        }

        send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
        free_xml(reply);

        if (AM_I_DC == FALSE) {
            register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__);
            update_attrd(NULL, NULL, NULL, NULL, FALSE);
        }

        free_xml(tmp1);

    } else {
        crm_err("Could not send our LRM state to the DC");
        register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
    }
}
Exemplo n.º 16
0
int process_remote_stonith_exec(xmlNode *msg) 
{
    int rc = 0;
    const char *id = NULL;
    remote_fencing_op_t *op = NULL;
    xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR);

    CRM_CHECK(dev != NULL, return -EPROTO);

    id = crm_element_value(dev, F_STONITH_REMOTE);
    CRM_CHECK(id != NULL, return -EPROTO);

    dev = get_xpath_object("//@"F_STONITH_RC, msg, LOG_ERR);
    CRM_CHECK(dev != NULL, return -EPROTO);

    crm_element_value_int(dev, F_STONITH_RC, &rc);

    if(remote_op_list) {
        op = g_hash_table_lookup(remote_op_list, id);
    }

    if(op == NULL && rc == pcmk_ok) {
        /* Record successful fencing operations */
        const char *client_id = crm_element_value(msg, F_STONITH_CLIENTID);

        op = create_remote_stonith_op(client_id, msg, TRUE);
    }

    if(op == NULL) {
        /* Could be for an event that began before we started */
        /* TODO: Record the op for later querying */
        crm_info("Unknown or expired remote op: %s", id);
        return -EOPNOTSUPP;
    }

    if(is_set(op->call_options, st_opt_topology)) {
        const char *device = crm_element_value(msg, F_STONITH_DEVICE);

        crm_notice("Call to %s for %s on behalf of %s: %s (%d)", device, op->target, op->originator, rc == pcmk_ok?"passed":"failed", rc);
        if(safe_str_eq(op->originator, stonith_our_uname)) {

            if(op->state == st_done) {
                remote_op_done(op, msg, rc);
                return rc;

            } else if(rc == pcmk_ok && op->devices) {
                /* Success, are there any more? */
                op->devices = op->devices->next;
            }

            if(op->devices == NULL) {
                crm_trace("Broadcasting completion of complex fencing op for %s", op->target);
                send_cluster_message(NULL, crm_msg_stonith_ng, msg, FALSE);
                op->state = st_done;
                return rc;
            }

        } else {
            op->state = st_done;
            remote_op_done(op, msg, rc);
        }

    } else if(rc == pcmk_ok && op->devices == NULL) {
        crm_trace("All done for %s", op->target);

        op->state = st_done;
        remote_op_done(op, msg, rc);
        return rc;
    }

    /* Retry on failure or execute the rest of the topology */
    crm_trace("Next for %s (rc was %d)", op->target, rc);
    call_remote_stonith(op, NULL);
    return rc;
}
Exemplo n.º 17
0
static gboolean
te_rsc_command(crm_graph_t * graph, crm_action_t * action)
{
    /* never overwrite stop actions in the CIB with
     *   anything other than completed results
     *
     * Writing pending stops makes it look like the
     *   resource is running again
     */
    xmlNode *cmd = NULL;
    xmlNode *rsc_op = NULL;

    gboolean rc = TRUE;
    gboolean no_wait = FALSE;
    gboolean is_local = FALSE;

    char *counter = NULL;
    const char *task = NULL;
    const char *value = NULL;
    const char *on_node = NULL;
    const char *task_uuid = NULL;

    CRM_ASSERT(action != NULL);
    CRM_ASSERT(action->xml != NULL);

    action->executed = FALSE;
    on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);

    CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
              te_log_action(LOG_ERR, "Corrupted command(id=%s) %s: no node",
                            ID(action->xml), crm_str(task));
              return FALSE);

    rsc_op = action->xml;
    task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
    task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
    on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET);
    counter =
        generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
    crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);

    if (safe_str_eq(on_node, fsa_our_uname)) {
        is_local = TRUE;
    }

    value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
    if (crm_is_true(value)) {
        no_wait = TRUE;
    }

    crm_info("Initiating action %d: %s %s on %s%s%s",
             action->id, task, task_uuid, on_node,
             is_local ? " (local)" : "", no_wait ? " - no waiting" : "");

    cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node,
                         CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);

    if (is_local) {
        /* shortcut local resource commands */
        ha_msg_input_t data = {
            .msg = cmd,
            .xml = rsc_op,
        };

        fsa_data_t msg = {
            .id = 0,
            .data = &data,
            .data_type = fsa_dt_ha_msg,
            .fsa_input = I_NULL,
            .fsa_cause = C_FSA_INTERNAL,
            .actions = A_LRM_INVOKE,
            .origin = __FUNCTION__,
        };

        do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);

    } else {
        rc = send_cluster_message(on_node, crm_msg_lrmd, cmd, TRUE);
    }

    crm_free(counter);
    free_xml(cmd);

    action->executed = TRUE;
    if (rc == FALSE) {
        crm_err("Action %d failed: send", action->id);
        return FALSE;

    } else if (no_wait) {
        action->confirmed = TRUE;
        update_graph(transition_graph, action);
        trigger_graph();

    } else {
        if (action->timeout <= 0) {
            crm_err("Action %d: %s %s on %s had an invalid timeout (%dms).  Using %dms instead",
                    action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
            action->timeout = graph->network_delay;
        }
        te_start_action_timer(graph, action);
    }

    value = crm_meta_value(action->params, XML_OP_ATTR_PENDING);
    if (crm_is_true(value)) {
        /* write a "pending" entry to the CIB, inhibit notification */
        crm_info("Recording pending op %s in the CIB", task_uuid);
        cib_action_update(action, LRM_OP_PENDING, EXECRA_STATUS_UNKNOWN);
    }

    return TRUE;
}
Exemplo n.º 18
0
void
attrd_peer_update(crm_node_t *peer, xmlNode *xml, bool filter)
{
    bool changed = FALSE;
    attribute_value_t *v = NULL;

    const char *host = crm_element_value(xml, F_ATTRD_HOST);
    const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
    const char *value = crm_element_value(xml, F_ATTRD_VALUE);

    attribute_t *a = g_hash_table_lookup(attributes, attr);

    if(a == NULL) {
        a = create_attribute(xml);
    }

    v = g_hash_table_lookup(a->values, host);

    if(v == NULL) {
        crm_trace("Setting %s[%s] to %s from %s", attr, host, value, peer->uname);
        v = calloc(1, sizeof(attribute_value_t));
        if(value) {
            v->current = strdup(value);
        }
        v->nodename = strdup(host);
        crm_element_value_int(xml, F_ATTRD_IS_REMOTE, &v->is_remote);
        g_hash_table_replace(a->values, v->nodename, v);

        if (v->is_remote == TRUE) {
            crm_remote_peer_cache_add(host);
        }

        changed = TRUE;

    } else if(filter
              && safe_str_neq(v->current, value)
              && safe_str_eq(host, attrd_cluster->uname)) {
        xmlNode *sync = create_xml_node(NULL, __FUNCTION__);
        crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
                   a->id, host, v->current, value, peer->uname);

        crm_xml_add(sync, F_ATTRD_TASK, "sync-response");
        v = g_hash_table_lookup(a->values, host);
        build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, v->nodename, v->nodeid, v->current);

        crm_xml_add_int(sync, F_ATTRD_WRITER, election_state(writer));
        send_cluster_message(peer, crm_msg_attrd, sync, TRUE);
        free_xml(sync);

    } else if(safe_str_neq(v->current, value)) {
        crm_info("Setting %s[%s]: %s -> %s from %s", attr, host, v->current, value, peer->uname);
        free(v->current);
        if(value) {
            v->current = strdup(value);
        } else {
            v->current = NULL;
        }
        changed = TRUE;

    } else {
        crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value);
    }

    a->changed |= changed;

    /* this only involves cluster nodes. */
    if(v->nodeid == 0 && (v->is_remote == FALSE)) {
        if(crm_element_value_int(xml, F_ATTRD_HOST_ID, (int*)&v->nodeid) == 0) {
            /* Create the name/id association */
            crm_node_t *peer = crm_get_peer(v->nodeid, host);
            crm_trace("We know %s's node id now: %s", peer->uname, peer->uuid);
            if(election_state(writer) == election_won) {
                write_attributes(FALSE, TRUE);
                return;
            }
        }
    }

    if(changed) {
        if(a->timer) {
            crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, a->id);
            mainloop_timer_start(a->timer);
        } else {
            write_or_elect_attribute(a);
        }
    }
}
/*	A_ELECTION_COUNT	*/
enum election_result
election_count_vote(election_t *e, xmlNode *vote, bool can_win)
{
    int age = 0;
    int election_id = -1;
    int log_level = LOG_INFO;
    gboolean use_born_on = FALSE;
    gboolean done = FALSE;
    gboolean we_loose = FALSE;
    const char *op = NULL;
    const char *from = NULL;
    const char *reason = "unknown";
    const char *election_owner = NULL;
    crm_node_t *our_node = NULL, *your_node = NULL;

    static int election_wins = 0;

    xmlNode *novote = NULL;
    time_t tm_now = time(NULL);
    static time_t expires = 0;
    static time_t last_election_loss = 0;

    /* if the membership copy is NULL we REALLY shouldn't be voting
     * the question is how we managed to get here.
     */

    CRM_CHECK(vote != NULL, return election_error);

    if(e == NULL) {
        crm_info("Not voting in election: not initialized");
        return election_lost;

    } else if(crm_peer_cache == NULL) {
        crm_info("Not voting in election: no peer cache");
        return election_lost;
    }

    op = crm_element_value(vote, F_CRM_TASK);
    from = crm_element_value(vote, F_CRM_HOST_FROM);
    election_owner = crm_element_value(vote, F_CRM_ELECTION_OWNER);
    crm_element_value_int(vote, F_CRM_ELECTION_ID, &election_id);

    your_node = crm_get_peer(0, from);
    our_node = crm_get_peer(0, e->uname);

    if (e->voted == NULL) {
        crm_debug("Created voted hash");
        e->voted = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                         g_hash_destroy_str, g_hash_destroy_str);
    }

    if (is_heartbeat_cluster()) {
        use_born_on = TRUE;
    } else if (is_classic_ais_cluster()) {
        use_born_on = TRUE;
    }

    if(can_win == FALSE) {
        reason = "Not eligible";
        we_loose = TRUE;

    } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
        reason = "We are not part of the cluster";
        log_level = LOG_ERR;
        we_loose = TRUE;

    } else if (election_id != e->count && crm_str_eq(our_node->uuid, election_owner, TRUE)) {
        log_level = LOG_TRACE;
        reason = "Superseded";
        done = TRUE;

    } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
        /* Possibly we cached the message in the FSA queue at a point that it wasn't */
        reason = "Peer is not part of our cluster";
        log_level = LOG_WARNING;
        done = TRUE;

    } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(from);

        CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));

        /* update the list of nodes that have voted */
        g_hash_table_replace(e->voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else {
        struct timeval your_age;
        const char *your_version = crm_element_value(vote, F_CRM_VERSION);
        int tv_sec = 0;
        int tv_usec = 0;

        crm_element_value_int(vote, F_CRM_ELECTION_AGE_S, &tv_sec);
        crm_element_value_int(vote, F_CRM_ELECTION_AGE_US, &tv_usec);

        your_age.tv_sec = tv_sec;
        your_age.tv_usec = tv_usec;

        age = crm_compare_age(your_age);
        if (crm_str_eq(from, e->uname, TRUE)) {
            char *op_copy = strdup(op);
            char *uname_copy = strdup(from);

            CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));

            /* update ourselves in the list of nodes that have voted */
            g_hash_table_replace(e->voted, uname_copy, op_copy);
            reason = "Recorded";
            done = TRUE;

        } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
            reason = "Version";
            we_loose = TRUE;

        } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
            reason = "Version";

        } else if (age < 0) {
            reason = "Uptime";
            we_loose = TRUE;

        } else if (age > 0) {
            reason = "Uptime";

            /* TODO: Check for y(our) born < 0 */
        } else if (use_born_on && your_node->born < our_node->born) {
            reason = "Born";
            we_loose = TRUE;

        } else if (use_born_on && your_node->born > our_node->born) {
            reason = "Born";

        } else if (e->uname == NULL) {
            reason = "Unknown host name";
            we_loose = TRUE;

        } else if (strcasecmp(e->uname, from) > 0) {
            reason = "Host name";
            we_loose = TRUE;

        } else {
            reason = "Host name";
            CRM_ASSERT(strcasecmp(e->uname, from) < 0);
/* can't happen...
 *	} else if(strcasecmp(e->uname, from) == 0) {
 *
 */
        }
    }

    if (expires < tm_now) {
        election_wins = 0;
        expires = tm_now + STORM_INTERVAL;

    } else if (done == FALSE && we_loose == FALSE) {
        int peers = 1 + g_hash_table_size(crm_peer_cache);

        /* If every node has to vote down every other node, thats N*(N-1) total elections
         * Allow some leway before _really_ complaining
         */
        election_wins++;
        if (election_wins > (peers * peers)) {
            crm_warn("Election storm detected: %d elections in %d seconds", election_wins,
                     STORM_INTERVAL);
            election_wins = 0;
            expires = tm_now + STORM_INTERVAL;
            crm_write_blackbox(0, NULL);
        }
    }

    if (done) {
        do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
                   election_id, e->count, election_owner, op, from, reason);
        return e->state;

    } else if(we_loose == FALSE) {
        do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
                   election_id, election_owner, op, from, reason);

        if (last_election_loss == 0
            || tm_now - last_election_loss > (time_t) loss_dampen) {

            last_election_loss = 0;
            election_timeout_stop(e);

            /* Start a new election by voting down this, and other, peers */
            e->state = election_start;
            return e->state;
        }

        crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
                 election_id, loss_dampen, ctime(&last_election_loss));
    }

    novote = create_request(CRM_OP_NOVOTE, NULL, from,
                            CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

    do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
               election_id, election_owner, op, from, reason);

    election_timeout_stop(e);

    crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
    crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);

    send_cluster_message(your_node, crm_msg_crmd, novote, TRUE);
    free_xml(novote);

    last_election_loss = tm_now;
    e->state = election_lost;
    return e->state;
}
Exemplo n.º 20
0
/* aka. this is notification that we have (or have not) been accepted */
void
do_cl_join_finalize_respond(long long action,
                            enum crmd_fsa_cause cause,
                            enum crmd_fsa_state cur_state,
                            enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    xmlNode *tmp1 = NULL;
    gboolean was_nack = TRUE;
    static gboolean first_join = TRUE;
    ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
    const char *start_state = daemon_option("node_start_state");

    int join_id = -1;
    const char *op = crm_element_value(input->msg, F_CRM_TASK);
    const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK);
    const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);

    if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) {
        crm_trace("Ignoring op=%s message", op);
        return;
    }

    /* calculate if it was an ack or a nack */
    if (crm_is_true(ack_nack)) {
        was_nack = FALSE;
    }

    crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);

    if (was_nack) {
        crm_err("Shutting down because cluster join with leader %s failed "
                CRM_XS" join-%d NACK'd", welcome_from, join_id);
        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
        return;
    }

    if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) {
        crm_warn("Discarding our own welcome - we're no longer the DC");
        return;
    }

    if (update_dc(input->msg) == FALSE) {
        crm_warn("Discarding %s from node %s (expected from %s)",
                 op, welcome_from, fsa_our_dc);
        return;
    }

    update_dc_expected(input->msg);

    /* send our status section to the DC */
    tmp1 = do_lrm_query(TRUE, fsa_our_uname);
    if (tmp1 != NULL) {
        xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
                                        CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);

        crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);

        crm_debug("Confirming join-%d: sending local operation history to %s",
                  join_id, fsa_our_dc);

        /*
         * If this is the node's first join since the crmd started on it, clear
         * any previous transient node attributes, to handle the case where
         * the node restarted so quickly that the cluster layer didn't notice.
         *
         * Do not remove the resources though, they'll be cleaned up in
         * do_dc_join_ack(). Removing them here creates a race condition if the
         * crmd is being recovered. Instead of a list of active resources from
         * the lrmd, we may end up with a blank status section. If we are _NOT_
         * lucky, we will probe for the "wrong" instance of anonymous clones and
         * end up with multiple active instances on the machine.
         */
        if (first_join && is_not_set(fsa_input_register, R_SHUTDOWN)) {
            first_join = FALSE;
            erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0);
            update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE);
            update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE);

            if (start_state) {
                set_join_state(start_state);
            }
        }

        send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
        free_xml(reply);

        if (AM_I_DC == FALSE) {
            register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__);
            update_attrd(NULL, NULL, NULL, NULL, FALSE);
        }

        free_xml(tmp1);

    } else {
        crm_err("Could not confirm join-%d with %s: Local operation history failed",
                join_id, fsa_our_dc);
        register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
    }
}
Exemplo n.º 21
0
static gboolean
te_rsc_command(crm_graph_t * graph, crm_action_t * action)
{
    /* never overwrite stop actions in the CIB with
     *   anything other than completed results
     *
     * Writing pending stops makes it look like the
     *   resource is running again
     */
    xmlNode *cmd = NULL;
    xmlNode *rsc_op = NULL;

    gboolean rc = TRUE;
    gboolean no_wait = FALSE;
    gboolean is_local = FALSE;

    char *counter = NULL;
    const char *task = NULL;
    const char *value = NULL;
    const char *on_node = NULL;
    const char *router_node = NULL;
    const char *task_uuid = NULL;

    CRM_ASSERT(action != NULL);
    CRM_ASSERT(action->xml != NULL);

    action->executed = FALSE;
    on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);

    CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
              crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task));
              return FALSE);

    rsc_op = action->xml;
    task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
    task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
    router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);

    if (!router_node) {
        router_node = on_node;
    }

    counter =
        generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
    crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);

    if (safe_str_eq(router_node, fsa_our_uname)) {
        is_local = TRUE;
    }

    value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
    if (crm_is_true(value)) {
        no_wait = TRUE;
    }

    crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
               task, task_uuid, (is_local? " locally" : ""), on_node,
               (no_wait? " without waiting" : ""), action->id);

    cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
                         CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);

    if (is_local) {
        /* shortcut local resource commands */
        ha_msg_input_t data = {
            .msg = cmd,
            .xml = rsc_op,
        };

        fsa_data_t msg = {
            .id = 0,
            .data = &data,
            .data_type = fsa_dt_ha_msg,
            .fsa_input = I_NULL,
            .fsa_cause = C_FSA_INTERNAL,
            .actions = A_LRM_INVOKE,
            .origin = __FUNCTION__,
        };

        do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);

    } else {
        rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
    }

    free(counter);
    free_xml(cmd);

    action->executed = TRUE;

    if (rc == FALSE) {
        crm_err("Action %d failed: send", action->id);
        return FALSE;

    } else if (no_wait) {
        crm_info("Action %d confirmed - no wait", action->id);
        action->confirmed = TRUE; /* Just mark confirmed.
                                   * Don't bump the job count only to immediately decrement it
                                   */
        update_graph(transition_graph, action);
        trigger_graph();

    } else if (action->confirmed == TRUE) {
        crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
                  action->id, task, task_uuid, on_node, action->timeout);
    } else {
        if (action->timeout <= 0) {
            crm_err("Action %d: %s %s on %s had an invalid timeout (%dms).  Using %dms instead",
                    action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
            action->timeout = graph->network_delay;
        }
        te_update_job_count(action, 1);
        te_start_action_timer(graph, action);
    }

    return TRUE;
}
void
do_election_count_vote(long long action,
		       enum crmd_fsa_cause cause,
		       enum crmd_fsa_state cur_state,
		       enum crmd_fsa_input current_input,
		       fsa_data_t *msg_data)
{
	int election_id = -1;
	int log_level = LOG_INFO;
	gboolean done = FALSE;
	gboolean we_loose = FALSE;
	const char *op             = NULL;	
	const char *vote_from      = NULL;
	const char *your_version   = NULL;
	const char *election_owner = NULL;
	const char *reason	   = "unknown";
	crm_node_t *our_node = NULL, *your_node = NULL;
	ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);

	static time_t last_election_win = 0;
	static time_t last_election_loss = 0;
	
	/* if the membership copy is NULL we REALLY shouldnt be voting
	 * the question is how we managed to get here.
	 */
	
	CRM_CHECK(msg_data != NULL, return);
	CRM_CHECK(crm_peer_cache != NULL, return);
	CRM_CHECK(vote != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
	CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
	
	/* 受信メッセージデータを取り出す */
	op             = crm_element_value(vote->msg, F_CRM_TASK);
	vote_from      = crm_element_value(vote->msg, F_CRM_HOST_FROM);
	your_version   = crm_element_value(vote->msg, F_CRM_VERSION);
	election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
	crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);

	CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);
	
	/* CRM_OP_VOTEメッセージの送信元のノード情報を取得する */
	your_node = crm_get_peer(0, vote_from);
	/* 自ノードのノード情報を取得する */
	our_node = crm_get_peer(0, fsa_our_uname);
	
 	if(voted == NULL) {
		crm_debug("Created voted hash");
		/* votedハッシュテーブルが未作成の場合は作成する */
 		voted = g_hash_table_new_full(
			g_str_hash, g_str_equal,
			g_hash_destroy_str, g_hash_destroy_str);
 	}
	
	if(cur_state == S_STARTING) {
		/* 自ノードの状態が、まだ、S_STARTING状態の場合は、DCになれないのでCRM_OP_NOVOTEメッセージを送信する */
	    reason = "Still starting";
	    we_loose = TRUE;
	
	} else if(our_node == NULL || crm_is_member_active(our_node) == FALSE) {
		/* 自ノードがまだクラスタ構成として認識されていないか、アクティブでない場合は */
		/* DCになれないのでCRM_OP_NOVOTEメッセージを送信する */
	    reason = "We are not part of the cluster";
	    log_level = LOG_ERR;
	    we_loose = TRUE;

	} else if(your_node == NULL || crm_is_member_active(your_node) == FALSE) {
	    /* CRM_OP_VOTEメッセージの送信元のノードがクラスタ構成として認識されていないか、アクティブでない場合は */
	    /* ログのみを出力する */
	    reason = "Peer is not part of our cluster";
	    log_level = LOG_WARNING;
	    done = TRUE;

	} else if(election_id != current_election_id
	    && crm_str_eq(fsa_our_uuid, election_owner, TRUE)) {
		/* 現在のelection_idと受信したelection_idが違う場合も、ログのみ出力する */
	    log_level = LOG_DEBUG_2;
	    reason = "Superceeded";
	    done = TRUE;

	} else if(crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
		/* DCになれないと思ったノードが送信したCRM_OP_NOVOTEメッセージの場合 */
	    char *op_copy = crm_strdup(op);
	    char *uname_copy = crm_strdup(vote_from);
	    CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));
	    
	    /* update the list of nodes that have voted */
		/* votedハッシュテーブルにノードデータをセットする */
	    g_hash_table_replace(voted, uname_copy, op_copy);
	    reason = "Recorded";
	    done = TRUE;

	} else if(crm_str_eq(vote_from, fsa_our_uname, TRUE)) {
		/* 自ノードが送信したCRM_OP_VOTEメッセージを処理する場合 */
	    char *op_copy = crm_strdup(op);
	    char *uname_copy = crm_strdup(vote_from);
	    CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

	    /* update ourselves in the list of nodes that have voted */
		/* votedハッシュテーブルにノードデータをセットする */
	    g_hash_table_replace(voted, uname_copy, op_copy);
	    reason = "Recorded";
	    done = TRUE;
	    
	} else if(compare_version(your_version, CRM_FEATURE_SET) < 0) {
		/* 受信したCRM_OP_VOTEメッセージの送り元のversionがCRM_FEATURE_SETよりも小さい場合 */
	    /* 自ノードは、DCになれない */
	    reason = "Version";
	    we_loose = TRUE;
		
	} else if(compare_version(your_version, CRM_FEATURE_SET) > 0) {
		/* 受信したCRM_OP_VOTEメッセージの送り元のversionがCRM_FEATURE_SETよりも大きい場合、ログのみ出力する */
	    reason = "Version";
	    
	} else if(your_node->born < our_node->born) {
		/* 受信したCRM_OP_VOTEメッセージの送り元の方がbornが自ノードよりも小さい場合 */
	    reason = "Age";
	    /* 自ノードは、DCになれない */
	    we_loose = TRUE;
	    
	} else if(your_node->born > our_node->born) {
		/* 受信したCRM_OP_VOTEメッセージの送り元の方がbornが自ノードよりも大きい場合 */
	    /* 自ノードは、DCの候補 */
	    reason = "Age";

	} else if(fsa_our_uname == NULL) {
		/* 自ノードのノード名称がセットされていない場合 */
	    /* 自ノードは、DCになれない */
	    reason = "Unknown host name";
	    we_loose = TRUE;
	    
	} else if(strcasecmp(fsa_our_uname, vote_from) > 0) {
		/* 自ノードのノード名が送信元...*/
	    /* 自ノードは、DCになれない */
	    reason = "Host name";
	    we_loose = TRUE;
	    
	} else {
		/* その他の場合 */
	    reason = "Host name";
	    CRM_ASSERT(strcmp(fsa_our_uname, vote_from) != 0);
/* cant happen...
 *	} else if(strcasecmp(fsa_our_uname, vote_from) == 0) {
 *
 * default...
 *	} else { // strcasecmp(fsa_our_uname, vote_from) < 0
 *		we win
 */
	}

	if(done) {
	    do_crm_log(log_level+1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
		       election_id, current_election_id, election_owner, op, vote_from, reason);
	    
	} else if(we_loose) {
		/* born値の比較などから、DCノードになれないと判断した場合 */
		
		/* CRM_OP_NOVOTEメッセージをCRMD宛に生成する */
		xmlNode *novote = create_request(
			CRM_OP_NOVOTE, NULL, vote_from,
			CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

		do_crm_log(log_level+1, "Election %d (owner: %s) lost: %s from %s (%s)",
			   election_id, election_owner, op, vote_from, reason);
		
		/* DCノードにNULLをセットする */
		update_dc(NULL);
		
		/* election_timeoutタイマーを止める */
		crm_timer_stop(election_timeout);
		
		if(fsa_input_register & R_THE_DC) {
			crm_debug_3("Give up the DC to %s", vote_from);
			register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
			
		} else if(cur_state != S_STARTING) {
			crm_debug_3("We werent the DC anyway");
			register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
		}

		/* CRM_OP_NOVOTEメッセージのF_CRM_ELECTION_OWNERに受信メッセージのelection_ownerをセットする */
		crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
		/* CRM_OP_NOVOTEメッセージのF_CRM_ELECTION_IDに受信メッセージのelection_idをセットする */
		crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);
		
		/* CRM_OP_VOTEメッセージの送信元にCRM_OP_NOVOTEメッセージを送信する */
		send_cluster_message(vote_from, crm_msg_crmd, novote, TRUE);
		
		/* 送信メッセージを解放する */
		free_xml(novote);

		/* CIBのset_slave処理を実行する */
		fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);

		last_election_loss = time(NULL);
		last_election_win = 0;

	} else {
	    do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
		     election_id, election_owner, op, vote_from, reason);

	    if(last_election_loss) {
			time_t tm_now = time(NULL);
			if(tm_now - last_election_loss < (time_t)loss_dampen) {
		    	crm_info("Election %d ignore: We already lost an election less than %ds ago",
			      election_id, loss_dampen);
		    	update_dc(NULL);
		    return;
			}
			last_election_loss = 0;
	    }

#if 0
	    /* Enabling this code can lead to multiple DCs during SimulStart.
	     * Specifically when a node comes up after our last 'win' vote.
	     *
	     * Fixing and enabling this functionality might become important when
	     * we start running realy big clusters, but for now leave it disabled.
	     */
	    if(last_election_win) {
		time_t tm_now = time(NULL);
		if(tm_now - last_election_win < (time_t)win_dampen) {
		    crm_info("Election %d ignore: We already won an election less than %ds ago",
			      election_id, win_dampen);
		    return;
		}
	    }

	    last_election_win = time(NULL);
#endif
		/* I_ELECTIONへ */
	    register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
	    g_hash_table_destroy(voted);
	    voted = NULL;
	}	
}
Exemplo n.º 23
0
enum crmd_fsa_input
handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)
{
    xmlNode *msg = NULL;
    const char *op = crm_element_value(stored_msg, F_CRM_TASK);

    /* Optimize this for the DC - it has the most to do */

    if (op == NULL) {
        crm_log_xml_err(stored_msg, "Bad message");
        return I_NULL;
    }

    if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
        const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
        crm_node_t *node = crm_find_peer(0, from);

        crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN);
        if(AM_I_DC == FALSE) {
            return I_NULL; /* Done */
        }
    }

    /*========== DC-Only Actions ==========*/
    if (AM_I_DC) {
        if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
            return I_NODE_JOIN;

        } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
            return I_JOIN_REQUEST;

        } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
            return I_JOIN_RESULT;

        } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
            const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
            gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);

            if (is_set(fsa_input_register, R_SHUTDOWN)) {
                crm_info("Shutting ourselves down (DC)");
                return I_STOP;

            } else if (dc_match) {
                crm_err("We didnt ask to be shut down, yet our"
                        " TE is telling us too." " Better get out now!");
                return I_TERMINATE;

            } else if (fsa_state != S_STOPPING) {
                crm_err("Another node is asking us to shutdown" " but we think we're ok.");
                return I_ELECTION;
            }

        } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
            /* a slave wants to shut down */
            /* create cib fragment and add to message */
            return handle_shutdown_request(stored_msg);
        }
    }

    /*========== common actions ==========*/
    if (strcmp(op, CRM_OP_NOVOTE) == 0) {
        ha_msg_input_t fsa_input;

        fsa_input.msg = stored_msg;
        register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
                               A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);

    } else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
        throttle_update(stored_msg);
        return I_NULL;

    } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
        return handle_failcount_op(stored_msg);

    } else if (strcmp(op, CRM_OP_VOTE) == 0) {
        /* count the vote and decide what to do after that */
        ha_msg_input_t fsa_input;

        fsa_input.msg = stored_msg;
        register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
                               A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);

        /* Sometimes we _must_ go into S_ELECTION */
        if (fsa_state == S_HALT) {
            crm_debug("Forcing an election from S_HALT");
            return I_ELECTION;
#if 0
        } else if (AM_I_DC) {
            /* This is the old way of doing things but what is gained? */
            return I_ELECTION;
#endif
        }

    } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
        crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
        return I_JOIN_OFFER;

    } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
        crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
        return I_JOIN_RESULT;

    } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0
               || strcmp(op, CRM_OP_LRM_FAIL) == 0
               || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) {

        crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
        return I_ROUTER;

    } else if (strcmp(op, CRM_OP_NOOP) == 0) {
        return I_NULL;

    } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {

        crm_shutdown(SIGTERM);
        /*return I_SHUTDOWN; */
        return I_NULL;

        /*========== (NOT_DC)-Only Actions ==========*/
    } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) {

        const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
        gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);

        if (dc_match || fsa_our_dc == NULL) {
            if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) {
                crm_err("We didn't ask to be shut down, yet our" " DC is telling us too.");
                set_bit(fsa_input_register, R_STAYDOWN);
                return I_STOP;
            }
            crm_info("Shutting down");
            return I_STOP;

        } else {
            crm_warn("Discarding %s op from %s", op, host_from);
        }

    } else if (strcmp(op, CRM_OP_PING) == 0) {
        /* eventually do some stuff to figure out
         * if we /are/ ok
         */
        const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO);
        xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING);

        crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
        crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to);
        crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state));

        /* Ok, so technically not so interesting, but CTS needs to see this */
        crm_notice("Current ping state: %s", fsa_state2string(fsa_state));

        msg = create_reply(stored_msg, ping);
        if (msg) {
            (void)relay_message(msg, TRUE);
        }

        free_xml(ping);
        free_xml(msg);

    } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
        int id = 0;
        const char *name = NULL;

        crm_element_value_int(stored_msg, XML_ATTR_ID, &id);
        name = crm_element_value(stored_msg, XML_ATTR_UNAME);

        if(cause == C_IPC_MESSAGE) {
            msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
            if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
                crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
            } else {
                crm_notice("Instructing peers to remove references to node %s/%u", name, id);
            }
            free_xml(msg);

        } else {
            reap_crm_member(id, name);
        }

    } else {
        crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
        crm_log_xml_err(stored_msg, "Unexpected");
    }

    return I_NULL;
}
Exemplo n.º 24
0
/*
	ノードをCIBのnodeエントリに追加して、CRM_OP_JOIN_ACKNAKメッセージを送信する
	また、クラスタメンバーとして認識したノードは、finalized_nodesハッシュテーブルに追加する
*/
gboolean
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
	const char *join_to = NULL;
	const char *join_state = NULL;
	xmlNode *acknak = NULL;
	crm_node_t *join_node = NULL;
	
	if(key == NULL || value == NULL) {
		return TRUE;
	}

	join_to    = (const char *)key;
	join_state = (const char *)value;

	/* make sure the node exists in the config section */
	/* CIBに対象ノードのnodeエントリを生成する */
	create_node_entry(join_to, join_to, NORMALNODE);

	join_node = crm_get_peer(0, join_to);
	if(crm_is_member_active(join_node) == FALSE) {
	    /*
	     * NACK'ing nodes that the membership layer doesn't know about yet
	     * simply creates more churn
	     *
	     * Better to leave them waiting and let the join restart when
	     * the new membership event comes in
	     *
	     * All other NACKs (due to versions etc) should still be processed
	     */
	    return TRUE;
	}
	
	/* send the ack/nack to the node */
	/* CRM_OP_JOIN_ACKNAKメッセージを生成する */
	acknak = create_request(
		CRM_OP_JOIN_ACKNAK, NULL, join_to,
		CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
	crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);
	
	/* set the ack/nack */
	if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) {
		/* 対象ノードの状態がCRMD_JOINSTATE_MEMBERの場合は、CRM_OP_JOIN_ACKNAKにXML_BOOLEAN_TRUEをセットする */
		crm_debug("join-%d: ACK'ing join request from %s, state %s",
			  current_join_id, join_to, join_state);
		crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
		/* finalized_nodesハッシュテーブルにXML_BOOLEAN_TRUEを送信するノードをセットする */
		g_hash_table_insert(
			finalized_nodes,
			crm_strdup(join_to), crm_strdup(CRMD_JOINSTATE_MEMBER));
	} else {
		/* その他の状態の場合は、CRM_OP_JOIN_ACKNAKにXML_BOOLEAN_FALSEをセットする */
		crm_warn("join-%d: NACK'ing join request from %s, state %s",
			 current_join_id, join_to, join_state);
		
		crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE);
	}
	/* セットしたCRM_OP_JOIN_ACKNAKメッセージを送信する */
	send_cluster_message(join_to, crm_msg_crmd, acknak, TRUE);
	/* 生成したメッセージを破棄する */
	free_xml(acknak);
	return TRUE;
}
Exemplo n.º 25
0
void
attrd_client_message(crm_client_t *client, xmlNode *xml)
{
    bool broadcast = FALSE;
    static int plus_plus_len = 5;
    const char *op = crm_element_value(xml, F_ATTRD_TASK);

    if(safe_str_eq(op, "peer-remove")) {
        const char *host = crm_element_value(xml, F_ATTRD_HOST);

        crm_info("Client %s is requesting all values for %s be removed", client->name, host);
        if(host) {
            broadcast = TRUE;
        }

    } else if(safe_str_eq(op, "update")) {
        attribute_t *a = NULL;
        attribute_value_t *v = NULL;
        char *key = crm_element_value_copy(xml, F_ATTRD_KEY);
        char *set = crm_element_value_copy(xml, F_ATTRD_SET);
        char *host = crm_element_value_copy(xml, F_ATTRD_HOST);
        const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
        const char *value = crm_element_value(xml, F_ATTRD_VALUE);

        a = g_hash_table_lookup(attributes, attr);

        if(host == NULL) {
            crm_trace("Inferring host");
            host = strdup(attrd_cluster->uname);
            crm_xml_add(xml, F_ATTRD_HOST, host);
            crm_xml_add_int(xml, F_ATTRD_HOST_ID, attrd_cluster->nodeid);
        }

        if (value) {
            int offset = 1;
            int int_value = 0;
            int value_len = strlen(value);

            if (value_len < (plus_plus_len + 2)
                || value[plus_plus_len] != '+'
                || (value[plus_plus_len + 1] != '+' && value[plus_plus_len + 1] != '=')) {
                goto send;
            }

            if(a) {
                v = g_hash_table_lookup(a->values, host);
            }
            if(v) {
                int_value = char2score(v->current);
            }

            if (value[plus_plus_len + 1] != '+') {
                const char *offset_s = value + (plus_plus_len + 2);

                offset = char2score(offset_s);
            }
            int_value += offset;

            if (int_value > INFINITY) {
                int_value = INFINITY;
            }

            crm_info("Expanded %s=%s to %d", attr, value, int_value);
            crm_xml_add_int(xml, F_ATTRD_VALUE, int_value);
        }

      send:

        if(peer_writer == NULL && election_state(writer) != election_in_progress) {
            crm_info("Starting an election to determine the writer");
            election_vote(writer);
        }

        crm_info("Broadcasting %s[%s] = %s%s", attr, host, value, election_state(writer) == election_won?" (writer)":"");
        broadcast = TRUE;

        free(key);
        free(set);
        free(host);
    }

    if(broadcast) {
        crm_xml_add_int(xml, F_ATTRD_WRITER, election_state(writer));
        send_cluster_message(NULL, crm_msg_attrd, xml, TRUE);
    }
}
Exemplo n.º 26
0
/*	A_ELECTION_COUNT	*/
void
do_election_count_vote(long long action,
                       enum crmd_fsa_cause cause,
                       enum crmd_fsa_state cur_state,
                       enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    struct timeval your_age;
    int age;
    int election_id = -1;
    int log_level = LOG_INFO;
    gboolean use_born_on = FALSE;
    gboolean done = FALSE;
    gboolean we_loose = FALSE;
    const char *op = NULL;
    const char *vote_from = NULL;
    const char *your_version = NULL;
    const char *election_owner = NULL;
    const char *reason = "unknown";
    crm_node_t *our_node = NULL, *your_node = NULL;
    ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);

    static time_t last_election_loss = 0;

    /* if the membership copy is NULL we REALLY shouldnt be voting
     * the question is how we managed to get here.
     */

    CRM_CHECK(msg_data != NULL, return);
    CRM_CHECK(crm_peer_cache != NULL, return);
    CRM_CHECK(vote != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
    CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return);

    your_age.tv_sec = 0;
    your_age.tv_usec = 0;

    op = crm_element_value(vote->msg, F_CRM_TASK);
    vote_from = crm_element_value(vote->msg, F_CRM_HOST_FROM);
    your_version = crm_element_value(vote->msg, F_CRM_VERSION);
    election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
    crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);
    crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_S, (int *)&(your_age.tv_sec));
    crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_US, (int *)&(your_age.tv_usec));

    CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);

    your_node = crm_get_peer(0, vote_from);
    our_node = crm_get_peer(0, fsa_our_uname);

    if (voted == NULL) {
        crm_debug("Created voted hash");
        voted = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                      g_hash_destroy_str, g_hash_destroy_str);
    }

    if (is_heartbeat_cluster()) {
        use_born_on = TRUE;
    } else if (is_classic_ais_cluster()) {
        use_born_on = TRUE;
    }

    age = crm_compare_age(your_age);

    if (cur_state == S_STARTING) {
        reason = "Still starting";
        we_loose = TRUE;

    } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
        reason = "We are not part of the cluster";
        log_level = LOG_ERR;
        we_loose = TRUE;

    } else if (election_id != current_election_id && crm_str_eq(fsa_our_uuid, election_owner, TRUE)) {
        log_level = LOG_DEBUG_2;
        reason = "Superceeded";
        done = TRUE;

    } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
        /* Possibly we cached the message in the FSA queue at a point that it wasn't */
        reason = "Peer is not part of our cluster";
        log_level = LOG_WARNING;
        done = TRUE;

    } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(vote_from);

        CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

        /* update the list of nodes that have voted */
        g_hash_table_replace(voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else if (crm_str_eq(vote_from, fsa_our_uname, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(vote_from);

        CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

        /* update ourselves in the list of nodes that have voted */
        g_hash_table_replace(voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
        reason = "Version";
        we_loose = TRUE;

    } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
        reason = "Version";

    } else if (age < 0) {
        reason = "Uptime";
        we_loose = TRUE;

    } else if (age > 0) {
        reason = "Uptime";

        /* TODO: Check for y(our) born < 0 */
    } else if (use_born_on && your_node->born < our_node->born) {
        reason = "Born";
        we_loose = TRUE;

    } else if (use_born_on && your_node->born > our_node->born) {
        reason = "Born";

    } else if (fsa_our_uname == NULL) {
        reason = "Unknown host name";
        we_loose = TRUE;

    } else if (strcasecmp(fsa_our_uname, vote_from) > 0) {
        reason = "Host name";
        we_loose = TRUE;

    } else {
        reason = "Host name";
        CRM_ASSERT(strcmp(fsa_our_uname, vote_from) != 0);
/* cant happen...
 *	} else if(strcasecmp(fsa_our_uname, vote_from) == 0) {
 *
 * default...
 *	} else { // strcasecmp(fsa_our_uname, vote_from) < 0
 *		we win
 */
    }

    if (done) {
        do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
                   election_id, current_election_id, election_owner, op, vote_from, reason);

    } else if (we_loose) {
        xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote_from,
                                         CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

        do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
                   election_id, election_owner, op, vote_from, reason);
        update_dc(NULL);

        crm_timer_stop(election_timeout);
        if (fsa_input_register & R_THE_DC) {
            crm_trace("Give up the DC to %s", vote_from);
            register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);

        } else if (cur_state != S_STARTING) {
            crm_trace("We werent the DC anyway");
            register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
        }

        crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
        crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);

        send_cluster_message(crm_get_peer(0, vote_from), crm_msg_crmd, novote, TRUE);
        free_xml(novote);

        fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);

        last_election_loss = time(NULL);

    } else {
        do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
                   election_id, election_owner, op, vote_from, reason);

        if (last_election_loss) {
            time_t tm_now = time(NULL);

            if (tm_now - last_election_loss < (time_t) loss_dampen) {
                crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
                         election_id, loss_dampen, ctime(&last_election_loss));
                update_dc(NULL);
                return;
            }
            last_election_loss = 0;
        }

        register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
        g_hash_table_destroy(voted);
        voted = NULL;
    }
}
Exemplo n.º 27
0
static gboolean
te_crm_command(crm_graph_t * graph, crm_action_t * action)
{
    char *counter = NULL;
    xmlNode *cmd = NULL;
    gboolean is_local = FALSE;

    const char *id = NULL;
    const char *task = NULL;
    const char *value = NULL;
    const char *on_node = NULL;

    gboolean rc = TRUE;
    gboolean no_wait = FALSE;

    id = ID(action->xml);
    task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
    on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);

    CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
              te_log_action(LOG_ERR, "Corrupted command (id=%s) %s: no node",
                            crm_str(id), crm_str(task));
              return FALSE);

    te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s%s%s",
                  crm_str(id), crm_str(task), on_node,
                  is_local ? " (local)" : "", no_wait ? " - no waiting" : "");

    if (safe_str_eq(on_node, fsa_our_uname)) {
        is_local = TRUE;
    }

    value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
    if (crm_is_true(value)) {
        no_wait = TRUE;
    }

    if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
        /* defer until everything else completes */
        te_log_action(LOG_INFO, "crm-event (%s) is a local shutdown", crm_str(id));
        graph->completion_action = tg_shutdown;
        graph->abort_reason = "local shutdown";
        action->confirmed = TRUE;
        update_graph(graph, action);
        trigger_graph();
        return TRUE;
    }

    cmd = create_request(task, action->xml, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);

    counter =
        generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
    crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);

    rc = send_cluster_message(on_node, crm_msg_crmd, cmd, TRUE);
    crm_free(counter);
    free_xml(cmd);

    if (rc == FALSE) {
        crm_err("Action %d failed: send", action->id);
        return FALSE;

    } else if (no_wait) {
        action->confirmed = TRUE;
        update_graph(graph, action);
        trigger_graph();

    } else {
        if (action->timeout <= 0) {
            crm_err("Action %d: %s on %s had an invalid timeout (%dms).  Using %dms instead",
                    action->id, task, on_node, action->timeout, graph->network_delay);
            action->timeout = graph->network_delay;
        }
        te_start_action_timer(graph, action);
    }

    return TRUE;
}
Exemplo n.º 28
0
/*	A_ELECTION_VOTE	*/
void
do_election_vote(long long action,
                 enum crmd_fsa_cause cause,
                 enum crmd_fsa_state cur_state,
                 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    struct timeval age;
    xmlNode *vote = NULL;
    gboolean not_voting = FALSE;

    /* don't vote if we're in one of these states or wanting to shut down */
    switch (cur_state) {
        case S_STARTING:
        case S_RECOVERY:
        case S_STOPPING:
        case S_TERMINATE:
            crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state));
            not_voting = TRUE;
            break;
        default:
            break;
    }

    if (not_voting == FALSE) {
        if (is_set(fsa_input_register, R_STARTING)) {
            not_voting = TRUE;
        }
    }

    if (not_voting) {
        if (AM_I_DC) {
            register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);

        } else {
            register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
        }
        return;
    }

    vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

    current_election_id++;
    crm_xml_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid);
    crm_xml_add_int(vote, F_CRM_ELECTION_ID, current_election_id);

    crm_uptime(&age);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec);

    send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
    free_xml(vote);

    crm_debug("Started election %d", current_election_id);
    if (voted) {
        g_hash_table_destroy(voted);
    }
    voted = NULL;

    if (cur_state == S_ELECTION || cur_state == S_RELEASE_DC) {
        crm_timer_start(election_timeout);

    } else if (cur_state != S_INTEGRATION) {
        crm_err("Broken? Voting in state %s", fsa_state2string(cur_state));
    }

    return;
}
Exemplo n.º 29
0
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
    uint32_t id = 0;
    uint32_t flags = 0;
    int call_options = 0;
    xmlNode *request = NULL;
    crm_client_t *c = crm_client_get(qbc);
    const char *op = NULL;

    if (c == NULL) {
        crm_info("Invalid client: %p", qbc);
        return 0;
    }

    request = crm_ipcs_recv(c, data, size, &id, &flags);
    if (request == NULL) {
        crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__);
        return 0;
    }


    op = crm_element_value(request, F_CRM_TASK);
    if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
        crm_xml_add(request, F_TYPE, T_STONITH_NG);
        crm_xml_add(request, F_STONITH_OPERATION, op);
        crm_xml_add(request, F_STONITH_CLIENTID, c->id);
        crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
        crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);

        send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
        free_xml(request);
        return 0;
    }

    if (c->name == NULL) {
        const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);

        if (value == NULL) {
            value = "unknown";
        }
        c->name = g_strdup_printf("%s.%u", value, c->pid);
    }

    crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
    crm_trace("Flags %u/%u for command %u from %s", flags, call_options, id, crm_client_name(c));

    if (is_set(call_options, st_opt_sync_call)) {
        CRM_ASSERT(flags & crm_ipc_client_response);
        CRM_LOG_ASSERT(c->request_id == 0);     /* This means the client has two synchronous events in-flight */
        c->request_id = id;     /* Reply only to the last one */
    }

    crm_xml_add(request, F_STONITH_CLIENTID, c->id);
    crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
    crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);

    crm_log_xml_trace(request, "Client[inbound]");
    stonith_command(c, id, flags, request, NULL);

    free_xml(request);
    return 0;
}
Exemplo n.º 30
0
void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer) 
{
    const char *device = NULL;
    int timeout = op->base_timeout;
    int device_number = 0;

    if(is_set(op->call_options, st_opt_topology)) {
        if(op->topology_device_number) {
            device_number = op->topology_device_number;
        }

        /* Ignore any preference, they might not have the device we need */
        peer = stonith_choose_peer(op);
        device = op->devices->data;
    } else if(peer == NULL) {
        if ((peer = stonith_choose_peer(op)) != NULL) {
            device_number = peer->devices;
        }
    } else {
        device_number = peer->devices;
    }

    if (device_number > 1) {
        timeout /= device_number;
        crm_trace("Dividing the timeout (%ds) equally between %d peer devices: %ds",
                  op->base_timeout, device_number, timeout);
    }

    if(peer) {
        xmlNode *query = stonith_create_op(0, op->id, STONITH_OP_FENCE, NULL, 0);
        crm_xml_add(query, F_STONITH_REMOTE, op->id);
        crm_xml_add(query, F_STONITH_TARGET, op->target);
        crm_xml_add(query, F_STONITH_ACTION, op->action);
        crm_xml_add(query, F_STONITH_OWNER,  op->originator);
        crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
        crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
        crm_xml_add_int(query, F_STONITH_TIMEOUT, timeout);

        if(device) {
            crm_info("Requesting that %s perform op %s %s with %s", peer->host, op->action, op->target, device);
            crm_xml_add(query, F_STONITH_DEVICE, device);
            crm_xml_add(query, F_STONITH_MODE, "slave");

        } else {
            crm_info("Requesting that %s perform op %s %s", peer->host, op->action, op->target);
            crm_xml_add(query, F_STONITH_MODE, "smart");
        }

        op->state = st_exec;
        send_cluster_message(peer->host, crm_msg_stonith_ng, query, FALSE);
        free_xml(query);
        return;

    } else if(op->query_timer == 0) {
        /* We've exhausted all available peers */
        crm_info("No remaining peers capable of terminating %s", op->target);
        remote_op_timeout(op);
    } else if(device) {
        crm_info("Waiting for additional peers capable of terminating %s with %s", op->target, device);
    } else {
        crm_info("Waiting for additional peers capable of terminating %s", op->target);
    }

    free_remote_query(peer);
}