int
cib_init(void)
{
    if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
        crm_cluster.destroy = cib_cs_destroy;
        crm_cluster.cpg.cpg_deliver_fn = cib_cs_dispatch;
        crm_cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
#endif
    }

    config_hash = crm_str_table_new();

    if (startCib("cib.xml") == FALSE) {
        crm_crit("Cannot start CIB... terminating");
        crm_exit(CRM_EX_NOINPUT);
    }

    if (stand_alone == FALSE) {
        if (is_corosync_cluster()) {
            crm_set_status_callback(&cib_peer_update_callback);
        }

        if (crm_cluster_connect(&crm_cluster) == FALSE) {
            crm_crit("Cannot sign in to the cluster... terminating");
            crm_exit(CRM_EX_FATAL);
        }
        cib_our_uname = crm_cluster.uname;

    } else {
        cib_our_uname = strdup("localhost");
    }

    cib_ipc_servers_init(&ipcs_ro,
                         &ipcs_rw,
                         &ipcs_shm,
                         &ipc_ro_callbacks,
                         &ipc_rw_callbacks);

    if (stand_alone) {
        cib_is_master = TRUE;
    }

    /* Create the mainloop and run it... */
    mainloop = g_main_loop_new(NULL, FALSE);
    crm_info("Starting %s mainloop", crm_system_name);
    g_main_loop_run(mainloop);

    /* If main loop returned, clean up and exit. We disconnect in case
     * terminate_cib() was called with fast=-1.
     */
    crm_cluster_disconnect(&crm_cluster);
    cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm);

    return crm_exit(CRM_EX_OK);
}
Exemple #2
0
gboolean
decode_transition_magic(
	const char *magic, char **uuid, int *transition_id, int *action_id,
	int *op_status, int *op_rc, int *target_rc)
{
    int res = 0;
    char *key = NULL;
    gboolean result = TRUE;

    CRM_CHECK(magic != NULL, return FALSE);
    CRM_CHECK(op_rc != NULL, return FALSE);
    CRM_CHECK(op_status != NULL, return FALSE);
    
    crm_malloc0(key, strlen(magic));
    res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key);
    if(res != 3) {
	crm_crit("Only found %d items in: %s", res, magic);
	result = FALSE;
	goto bail;
    }
    
    CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc),
	      result = FALSE;
	      goto bail;
	);
/*!
 * \internal
 * \brief Respond to scheduler connection failure
 *
 * \param[in] user_data  Ignored
 */
static void
pe_ipc_destroy(gpointer user_data)
{
    if (is_set(fsa_input_register, R_PE_REQUIRED)) {
        int rc = pcmk_ok;
        char *uuid_str = crm_generate_uuid();

        crm_crit("Connection to the scheduler failed "
                 CRM_XS " uuid=%s", uuid_str);

        /*
         * The scheduler died...
         *
         * Save the current CIB so that we have a chance of
         * figuring out what killed it.
         *
         * Delay raising the I_ERROR until the query below completes or
         * 5s is up, whichever comes first.
         *
         */
        rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
        fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents);

    } else {
        crm_info("Connection to the scheduler released");
    }

    clear_bit(fsa_input_register, R_PE_CONNECTED);
    pe_subsystem = NULL;
    mainloop_set_trigger(fsa_source);
    return;
}
Exemple #4
0
static gboolean
child_timeout_callback(gpointer p)
{
    mainloop_child_t *child = p;

    child->timerid = 0;
    if (child->timeout) {
        crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid);
        return FALSE;
    }

    child->timeout = TRUE;
    crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid);

    if (kill(child->pid, SIGKILL) < 0) {
        if (errno == ESRCH) {
            /* Nothing left to do */
            return FALSE;
        }
        crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid);
    }

    child->timerid = g_timeout_add(5000, child_timeout_callback, child);
    return FALSE;
}
static void
cib_cs_destroy(gpointer user_data)
{
    if (cib_shutdown_flag) {
        crm_info("Corosync disconnection complete");
    } else {
        crm_crit("Lost connection to cluster layer, shutting down");
        terminate_cib(__FUNCTION__, CRM_EX_DISCONNECT);
    }
}
static void
attrd_cpg_destroy(gpointer unused)
{
    if (attrd_shutting_down()) {
        crm_info("Corosync disconnection complete");

    } else {
        crm_crit("Lost connection to cluster layer, shutting down");
        attrd_exit_status = CRM_EX_DISCONNECT;
        attrd_shutdown(0);
    }
}
Exemple #7
0
static void
attrd_cpg_destroy(gpointer unused)
{
    if (shutting_down) {
        crm_info("Corosync disconnection complete");

    } else {
        crm_crit("Lost connection to Corosync service!");
        attrd_error = ECONNRESET;
        attrd_shutdown(0);
    }
}
Exemple #8
0
void
crmd_ha_connection_destroy(gpointer user_data)
{
    crm_trace("Invoked");
    if (is_set(fsa_input_register, R_HA_DISCONNECTED)) {
        /* we signed out, so this is expected */
        crm_info("Heartbeat disconnection complete");
        return;
    }

    crm_crit("Lost connection to heartbeat service!");
    register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL);
    trigger_fsa(fsa_source);
}
Exemple #9
0
/*!
 * \internal
 * \brief Request cluster shutdown if appropriate, otherwise exit immediately
 *
 * \param[in] nsig  Signal that caused invocation (ignored)
 */
static void
lrmd_shutdown(int nsig)
{
#ifdef ENABLE_PCMK_REMOTE
    crm_client_t *ipc_proxy = ipc_proxy_get_provider();

    /* If there are active proxied IPC providers, then we may be running
     * resources, so notify the cluster that we wish to shut down.
     */
    if (ipc_proxy) {
        if (shutting_down) {
            crm_notice("Waiting for cluster to stop resources before exiting");
            return;
        }

        crm_info("Sending shutdown request to cluster");
        if (ipc_proxy_shutdown_req(ipc_proxy) < 0) {
            crm_crit("Shutdown request failed, exiting immediately");

        } else {
            /* We requested a shutdown. Now, we need to wait for an
             * acknowledgement from the proxy host (which ensures the proxy host
             * supports shutdown requests), then wait for all proxy hosts to
             * disconnect (which ensures that all resources have been stopped).
             */
            shutting_down = TRUE;

            /* Stop accepting new proxy connections */
            lrmd_tls_server_destroy();

            /* Older crmd versions will never acknowledge our request, so set a
             * fairly short timeout to exit quickly in that case. If we get the
             * ack, we'll defuse this timer.
             */
            shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL);

            /* Currently, we let the OS kill us if the clients don't disconnect
             * in a reasonable time. We could instead set a long timer here
             * (shorter than what the OS is likely to use) and exit immediately
             * if it pops.
             */
            return;
        }
    }
#endif
    lrmd_exit(NULL);
}
Exemple #10
0
static void
tengine_stonith_connection_destroy(stonith_t * st, const char *event, xmlNode * msg)
{
    if (is_set(fsa_input_register, R_ST_REQUIRED)) {
        crm_crit("Fencing daemon connection failed");
        mainloop_set_trigger(stonith_reconnect);

    } else {
        crm_info("Fencing daemon disconnected");
    }

    /* cbchan will be garbage at this point, arrange for it to be reset */
    stonith_api->state = stonith_disconnected;

    if (AM_I_DC) {
        fail_incompletable_stonith(transition_graph);
        trigger_graph();
    }
}
static void
attrd_cib_destroy_cb(gpointer user_data)
{
    cib_t *conn = user_data;

    conn->cmds->signoff(conn);  /* Ensure IPC is cleaned up */

    if (attrd_shutting_down()) {
        crm_info("Connection disconnection complete");

    } else {
        /* eventually this should trigger a reconnect, not a shutdown */
        crm_crit("Lost connection to the CIB manager, shutting down");
        attrd_exit_status = CRM_EX_DISCONNECT;
        attrd_shutdown(0);
    }

    return;
}
crm_node_t *crm_get_peer(unsigned int id, const char *uname)
{
    crm_node_t *node = NULL;
    if(uname != NULL) {
	node = g_hash_table_lookup(crm_peer_cache, uname);
    }
    
    if(node == NULL && id > 0) {
	node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id));
	if(node && node->uname && uname) {
	    crm_crit("Node %s and %s share the same cluster node id '%u'!",
		     node->uname, uname, id);
	    
	    /* NOTE: Calling crm_new_peer() means the entry in 
	     * crm_peer_id_cache will point to the new entity
	     */

	    /* TODO: Replace the old uname instead? */
	    node = crm_new_peer(id, uname);
	    CRM_ASSERT(node->uname != NULL);
	}
    }

    if(node && uname && node->uname == NULL) {
	node->uname = crm_strdup(uname);
	crm_info("Node %u is now known as %s", id, uname);	
	g_hash_table_insert(crm_peer_cache, node->uname, node);
	if(crm_status_callback) {
	    crm_status_callback(crm_status_uname, node, NULL);
	}
	
    }

    if(node && id > 0 && id != node->id) {
	g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(node->id));
	g_hash_table_insert(crm_peer_id_cache, GUINT_TO_POINTER(id), node);
	node->id = id;
	crm_info("Node %s now has id: %u", crm_str(uname), id);	
    }
    
    return node;
}
Exemple #13
0
void *
fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
{
    void *ret_val = NULL;

    if (fsa_data == NULL) {
        crm_err("%s: No FSA data available", caller);

    } else if (fsa_data->data == NULL) {
        crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);

    } else if (fsa_data->data_type != a_type) {
        crm_crit("%s: Message data was the wrong type! %d vs. requested=%d.  Origin: %s",
                 caller, fsa_data->data_type, a_type, fsa_data->origin);
        CRM_ASSERT(fsa_data->data_type == a_type);
    } else {
        ret_val = fsa_data->data;
    }

    return ret_val;
}
Exemple #14
0
static gboolean
child_timeout_callback(gpointer p)
{
    mainloop_child_t *child = p;
    int rc = 0;

    child->timerid = 0;
    if (child->timeout) {
        crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid);
        return FALSE;
    }

    rc = child_kill_helper(child);
    if (rc == ESRCH) {
        /* Nothing left to do. pid doesn't exist */
        return FALSE;
    }

    child->timeout = TRUE;
    crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid);

    child->timerid = g_timeout_add(5000, child_timeout_callback, child);
    return FALSE;
}
Exemple #15
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
    if(te_client_id == NULL) {
        te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
                                         (unsigned long) getpid());
    }

    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    crmd_alert_fencing_op(st_event);

    if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
        crm_notice("%s was successfully unfenced by %s (at the request of %s)",
                   st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
                /* TODO: Hook up st_event->device */
        return;

    } else if (safe_str_eq("on", st_event->action)) {
        crm_err("Unfencing of %s by %s failed: %s (%d)",
                st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
                pcmk_strerror(st_event->result), st_event->result);
        return;

    } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_crit("We were allegedly just fenced by %s for %s!",
                 st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */

        qb_log_fini(); /* Try to get the above log message to disk - somehow */

        /* Get out ASAP and do not come back up.
         *
         * Triggering a reboot is also not the worst idea either since
         * the rest of the cluster thinks we're safely down
         */

#ifdef RB_HALT_SYSTEM
        reboot(RB_HALT_SYSTEM);
#endif

        /*
         * If reboot() fails or is not supported, coming back up will
         * probably lead to a situation where the other nodes set our
         * status to 'lost' because of the fencing callback and will
         * discard subsequent election votes with:
         *
         * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
         *
         * So just stay dead, something is seriously messed up anyway.
         *
         */
        exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini()
        return;
    }

    /* Update the count of stonith failures for this target, in case we become
     * DC later. The current DC has already updated its fail count in
     * tengine_stonith_callback().
     */
    if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
        if (st_event->result == pcmk_ok) {
            st_fail_count_reset(st_event->target);
        } else {
            st_fail_count_increment(st_event->target);
        }
    }

    crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
               CRM_XS " initiator=%s ref=%s",
               st_event->target, st_event->result == pcmk_ok ? "" : " not",
               st_event->action,
               st_event->executioner ? st_event->executioner : "<anyone>",
               (st_event->client_origin? st_event->client_origin : "<unknown>"),
               pcmk_strerror(st_event->result),
               st_event->origin, st_event->id);

    if (st_event->result == pcmk_ok) {
        crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
        const char *uuid = NULL;
        gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        if (peer == NULL) {
            return;
        }

        uuid = crm_peer_uuid(peer);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if(AM_I_DC) {
            /* The DC always sends updates */
            send_stonith_update(NULL, st_event->target, uuid);

            /* @TODO Ideally, at this point, we'd check whether the fenced node
             * hosted any guest nodes, and call remote_node_down() for them.
             * Unfortunately, the controller doesn't have a simple, reliable way
             * to map hosts to guests. It might be possible to track this in the
             * peer cache via crm_remote_peer_cache_refresh(). For now, we rely
             * on the PE creating fence pseudo-events for the guests.
             */

            if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {

                /* Abort the current transition graph if it wasn't us
                 * that invoked stonith to fence someone
                 */
                crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
            }

            /* Assume it was our leader if we don't currently have one */
        } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
            && !is_set(peer->flags, crm_remote_node)) {

            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc ? "was" : "may have been", st_event->target,
                       fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                send_stonith_update(NULL, st_event->target, uuid);
            }
            add_stonith_cleanup(st_event->target);
        }

        /* If the target is a remote node, and we host its connection,
         * immediately fail all monitors so it can be recovered quickly.
         * The connection won't necessarily drop when a remote node is fenced,
         * so the failure might not otherwise be detected until the next poke.
         */
        if (is_set(peer->flags, crm_remote_node)) {
            remote_ra_fail(st_event->target);
        }

        crmd_peer_down(peer, TRUE);
     }
}
Exemple #16
0
gboolean
corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent)
{
    int lpc = 0;
    cs_error_t rc = CS_OK;
    int retries = 0;
    gboolean any = FALSE;
    cmap_handle_t cmap_handle;
    int fd = -1;
    uid_t found_uid = 0;
    gid_t found_gid = 0;
    pid_t found_pid = 0;
    int rv;

    do {
        rc = cmap_initialize(&cmap_handle);
        if (rc != CS_OK) {
            retries++;
            crm_debug("API connection setup failed: %s.  Retrying in %ds", cs_strerror(rc),
                      retries);
            sleep(retries);
        }

    } while (retries < 5 && rc != CS_OK);

    if (rc != CS_OK) {
        crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
        return FALSE;
    }

    rc = cmap_fd_get(cmap_handle, &fd);
    if (rc != CS_OK) {
        crm_err("Could not obtain the CMAP API connection: %s (%d)",
                cs_strerror(rc), rc);
        goto bail;
    }

    /* CMAP provider run as root (in given user namespace, anyway)? */
    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
                                            &found_uid, &found_gid))) {
        crm_err("CMAP provider is not authentic:"
                " process %lld (uid: %lld, gid: %lld)",
                (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
                (long long) found_uid, (long long) found_gid);
        goto bail;
    } else if (rv < 0) {
        crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
                strerror(-rv), -rv);
        goto bail;
    }

    crm_peer_init();
    crm_trace("Initializing corosync nodelist");
    for (lpc = 0; TRUE; lpc++) {
        uint32_t nodeid = 0;
        char *name = NULL;
        char *key = NULL;

        key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
        rc = cmap_get_uint32(cmap_handle, key, &nodeid);
        free(key);

        if (rc != CS_OK) {
            break;
        }

        name = corosync_node_name(cmap_handle, nodeid);
        if (name != NULL) {
            GHashTableIter iter;
            crm_node_t *node = NULL;

            g_hash_table_iter_init(&iter, crm_peer_cache);
            while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
                if(node && node->uname && strcasecmp(node->uname, name) == 0) {
                    if (node->id && node->id != nodeid) {
                        crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id,
                                 nodeid, name);
                        crm_exit(CRM_EX_FATAL);
                    }
                }
            }
        }

        if (nodeid > 0 || name != NULL) {
            crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
            crm_get_peer(nodeid, name);
        }

        if (nodeid > 0 && name != NULL) {
            any = TRUE;

            if (xml_parent) {
                xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE);

                crm_xml_set_id(node, "%u", nodeid);
                crm_xml_add(node, XML_ATTR_UNAME, name);
                if (force_member) {
                    crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER);
                }
            }
        }

        free(name);
    }
bail:
    cmap_finalize(cmap_handle);
    return any;
}
Exemple #17
0
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
    if(te_client_id == NULL) {
        te_client_id = crm_strdup_printf("%s.%d", crm_system_name, getpid());
    }

    if (st_event == NULL) {
        crm_err("Notify data not found");
        return;
    }

    crmd_notify_fencing_op(st_event);

    if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
        crm_notice("%s was successfully unfenced by %s (at the request of %s)",
                   st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
                /* TODO: Hook up st_event->device */
        return;

    } else if (safe_str_eq("on", st_event->action)) {
        crm_err("Unfencing of %s by %s failed: %s (%d)",
                st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
                pcmk_strerror(st_event->result), st_event->result);
        return;

    } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
        crm_crit("We were allegedly just fenced by %s for %s!",
                 st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */

        qb_log_fini(); /* Try to get the above log message to disk - somehow */

        /* Get out ASAP and do not come back up.
         *
         * Triggering a reboot is also not the worst idea either since
         * the rest of the cluster thinks we're safely down
         */

#ifdef RB_HALT_SYSTEM
        reboot(RB_HALT_SYSTEM);
#endif

        /*
         * If reboot() fails or is not supported, coming back up will
         * probably lead to a situation where the other nodes set our
         * status to 'lost' because of the fencing callback and will
         * discard subsequent election votes with:
         *
         * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
         *
         * So just stay dead, something is seriously messed up anyway.
         *
         */
        exit(100); /* None of our wrappers since we already called qb_log_fini() */
        return;
    }

    if (st_event->result == pcmk_ok &&
        safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
        st_fail_count_reset(st_event->target);
    }

    crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
               st_event->target, st_event->result == pcmk_ok ? "" : " not",
               st_event->action,
               st_event->executioner ? st_event->executioner : "<anyone>",
               st_event->origin, pcmk_strerror(st_event->result), st_event->id,
               st_event->client_origin ? st_event->client_origin : "<unknown>");

#if SUPPORT_CMAN
    if (st_event->result == pcmk_ok && is_cman_cluster()) {
        int local_rc = 0;
        int confirm = 0;
        char *target_copy = strdup(st_event->target);

        /* In case fenced hasn't noticed yet
         *
         * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
         */
        local_rc = fenced_external(target_copy);
        if (local_rc != 0) {
            crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
                    local_rc);
        } else {
            crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
        }

        /* In case fenced is already trying to shoot it */
        confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
        if (confirm > 0) {
            int ignore = 0;
            int len = strlen(target_copy);

            errno = 0;
            local_rc = write(confirm, target_copy, len);
            ignore = write(confirm, "\n", 1);

            if(ignore < 0 && errno == EBADF) {
                crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target);

            } else if (local_rc < len) {
                crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc);

            } else {
                fsync(confirm);
                crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target);
            }
            close(confirm);
        }
        free(target_copy);
    }
#endif

    if (st_event->result == pcmk_ok) {
        crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_REMOTE | CRM_GET_PEER_CLUSTER);
        const char *uuid = NULL;
        gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);

        if (peer == NULL) {
            return;
        }

        uuid = crm_peer_uuid(peer);

        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
        if(AM_I_DC) {
            /* The DC always sends updates */
            send_stonith_update(NULL, st_event->target, uuid);

            if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {

                /* Abort the current transition graph if it wasn't us
                 * that invoked stonith to fence someone
                 */
                crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
                abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
            }

            /* Assume it was our leader if we dont currently have one */
        } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) {
            crm_notice("Target %s our leader %s (recorded: %s)",
                       fsa_our_dc ? "was" : "may have been", st_event->target,
                       fsa_our_dc ? fsa_our_dc : "<unset>");

            /* Given the CIB resyncing that occurs around elections,
             * have one node update the CIB now and, if the new DC is different,
             * have them do so too after the election
             */
            if (we_are_executioner) {
                send_stonith_update(NULL, st_event->target, uuid);
            }
            stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target));

        }

        crmd_peer_down(peer, TRUE);
     }
}
Exemple #18
0
/* coverity[+kill] */
void
crm_abort(const char *file, const char *function, int line,
          const char *assert_condition, gboolean do_core, gboolean do_fork)
{
    int rc = 0;
    int pid = 0;
    int status = 0;

    /* Implied by the parent's error logging below */
    /* crm_write_blackbox(0); */

    if(crm_is_daemon == FALSE) {
        /* This is a command line tool - do not fork */

        /* crm_add_logfile(NULL);   * Record it to a file? */
        crm_enable_stderr(TRUE); /* Make sure stderr is enabled so we can tell the caller */
        do_fork = FALSE;         /* Just crash if needed */
    }

    if (do_core == FALSE) {
        crm_err("%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition);
        return;

    } else if (do_fork) {
        pid = fork();

    } else {
        crm_err("%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition);
    }

    if (pid == -1) {
        crm_crit("%s: Cannot create core for non-fatal assert at %s:%d : %s",
                 function, file, line, assert_condition);
        return;

    } else if(pid == 0) {
        /* Child process */
        abort();
        return;
    }

    /* Parent process */
    crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s",
            function, pid, file, line, assert_condition);
    crm_write_blackbox(SIGTRAP, NULL);

    do {
        rc = waitpid(pid, &status, 0);
        if(rc == pid) {
            return; /* Job done */
        }

    } while(errno == EINTR);

    if (errno == ECHILD) {
        /* crm_mon does this */
        crm_trace("Cannot wait on forked child %d - SIGCHLD is probably set to SIG_IGN", pid);
        return;
    }
    crm_perror(LOG_ERR, "Cannot wait on forked child %d", pid);
}
Exemple #19
0
/* coverity[-alloc] Memory is referenced in one or both hashtables */
crm_node_t *
crm_get_peer(unsigned int id, const char *uname)
{
    GHashTableIter iter;
    crm_node_t *node = NULL;
    crm_node_t *by_id = NULL;
    crm_node_t *by_name = NULL;

    CRM_ASSERT(id > 0 || uname != NULL);

    crm_peer_init();

    if (uname != NULL) {
        g_hash_table_iter_init(&iter, crm_peer_cache);
        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
            if(node->uname && strcasecmp(node->uname, uname) == 0) {
                crm_trace("Name match: %s = %p", node->uname, node);
                by_name = node;
                break;
            }
        }
    }

    if (id > 0) {
        g_hash_table_iter_init(&iter, crm_peer_cache);
        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
            if(node->id == id) {
                crm_trace("ID match: %u = %p", node->id, node);
                by_id = node;
                break;
            }
        }
    }

    node = by_id; /* Good default */
    if(by_id == by_name) {
        /* Nothing to do if they match (both NULL counts) */
        crm_trace("Consistent: %p for %u/%s", by_id, id, uname);

    } else if(by_id == NULL && by_name) {
        crm_trace("Only one: %p for %u/%s", by_name, id, uname);

        if(id && by_name->id) {
            crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
            crm_crit("Node %u and %u share the same name '%s'",
                     id, by_name->id, uname);
            node = NULL; /* Create a new one */

        } else {
            node = by_name;
        }

    } else if(by_name == NULL && by_id) {
        crm_trace("Only one: %p for %u/%s", by_id, id, uname);

        if(uname && by_id->uname) {
            crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
            crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
                     uname, by_id->uname, id, uname);
        }

    } else if(uname && by_id->uname) {
        crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u", by_id->uname, by_name->uname, id);

    } else if(id && by_name->id) {
        crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);

    } else {
        /* Simple merge */

        /* Only corosync based clusters use nodeid's
         *
         * The functions that call crm_update_peer_state() only know nodeid
         * so 'by_id' is authorative when merging
         *
         * Same for crm_update_peer_proc()
         */
        crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);

        crm_info("Merging %p into %p", by_name, by_id);
        g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
    }

    if (node == NULL) {
        char *uniqueid = crm_generate_uuid();

        node = calloc(1, sizeof(crm_node_t));
        CRM_ASSERT(node);

        crm_info("Created entry %s/%p for node %s/%u (%d total)",
                 uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
        g_hash_table_replace(crm_peer_cache, uniqueid, node);
    }

    if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
        crm_info("Node %u is now known as %s", id, uname);
    }

    if(id > 0 && node->id == 0) {
        node->id = id;
    }

    if(uname && node->uname == NULL) {
        int lpc, len = strlen(uname);

        for (lpc = 0; lpc < len; lpc++) {
            if (uname[lpc] >= 'A' && uname[lpc] <= 'Z') {
                crm_warn("Node names with capitals are discouraged, consider changing '%s' to something else",
                         uname);
                break;
            }
        }

        node->uname = strdup(uname);
        if (crm_status_callback) {
            crm_status_callback(crm_status_uname, node, NULL);
        }
    }

    if(node->uuid == NULL) {
        const char *uuid = crm_peer_uuid(node);

        if (uuid) {
            crm_info("Node %u has uuid %s", id, uuid);

        } else {
            crm_info("Cannot obtain a UUID for node %d/%s", id, node->uname);
        }
    }

    return node;
}
Exemple #20
0
void
cib_notify_client(gpointer key, gpointer value, gpointer user_data)
{

	IPC_Channel *ipc_client = NULL;
	HA_Message *update_msg = user_data;
	cib_client_t *client = value;
	const char *type = NULL;
	gboolean is_pre = FALSE;
	gboolean is_post = FALSE;	
	gboolean is_confirm = FALSE;
	gboolean is_replace = FALSE;
	gboolean is_diff = FALSE;
	gboolean do_send = FALSE;

	int qlen = 0;
	int max_qlen = 0;
	
	CRM_DEV_ASSERT(client != NULL);
	CRM_DEV_ASSERT(update_msg != NULL);

	type = cl_get_string(update_msg, F_SUBTYPE);
	CRM_DEV_ASSERT(type != NULL);

	if(client == NULL) {
		crm_warn("Skipping NULL client");
		return;

	} else if(client->channel == NULL) {
		crm_warn("Skipping client with NULL channel");
		return;

	} else if(client->name == NULL) {
		crm_debug_2("Skipping unnammed client / comamnd channel");
		return;
	}
	
	if(safe_str_eq(type, T_CIB_PRE_NOTIFY)) {
		is_pre = TRUE;
		
	} else if(safe_str_eq(type, T_CIB_POST_NOTIFY)) {
		is_post = TRUE;

	} else if(safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) {
		is_confirm = TRUE;

	} else if(safe_str_eq(type, T_CIB_DIFF_NOTIFY)) {
		is_diff = TRUE;

	} else if(safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) {
		is_replace = TRUE;
	}

	ipc_client = client->channel;
	qlen = ipc_client->send_queue->current_qlen;
	max_qlen = ipc_client->send_queue->max_qlen;

#if 1
	/* get_chan_status() causes memory to be allocated that isnt free'd
	 *   until the message is read (which messes up the memory stats) 
	 */
	if(ipc_client->ops->get_chan_status(ipc_client) != IPC_CONNECT) {
		crm_debug_2("Skipping notification to disconnected"
			    " client %s/%s", client->name, client->id);
		
	} else if(client->pre_notify && is_pre) {
		if(qlen < (int)(0.4 * max_qlen)) {
			do_send = TRUE;
		} else {
			crm_warn("Throttling pre-notifications due to"
				 " high load: queue=%d (max=%d)",
				 qlen, max_qlen);
		}
		 
	} else if(client->post_notify && is_post) {
		if(qlen < (int)(0.7 * max_qlen)) {
			do_send = TRUE;
		} else {
			crm_warn("Throttling post-notifications due to"
				 " extreme load: queue=%d (max=%d)",
				 qlen, max_qlen);
		}

		/* these are critical */
	} else
#endif
		if(client->diffs && is_diff) {
		do_send = TRUE;

	} else if(client->confirmations && is_confirm) {
		do_send = TRUE;

	} else if(client->replace && is_replace) {
		do_send = TRUE;
	}

	if(do_send) {
		crm_debug_2("Notifying client %s/%s of %s update (queue=%d)",
			    client->name, client->channel_name, type, qlen);

		if(ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) {
			/* We never want the CIB to exit because our client is slow */
			crm_crit("%s-notification of client %s/%s failed - queue saturated",
				 is_confirm?"Confirmation":is_post?"Post":"Pre",
				 client->name, client->id);
			
		} else if(send_ipc_message(ipc_client, update_msg) == FALSE) {
			crm_warn("Notification of client %s/%s failed",
				 client->name, client->id);
		}
		
	} else {
		crm_debug_3("Client %s/%s not interested in %s notifications",
			    client->name, client->channel_name, type);	
	}
}
Exemple #21
0
gboolean
decode_transition_key(
	const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
{
	int res = 0;
	gboolean done = FALSE;

	CRM_CHECK(uuid != NULL, return FALSE);
	CRM_CHECK(target_rc != NULL, return FALSE);
	CRM_CHECK(action_id != NULL, return FALSE);
	CRM_CHECK(transition_id != NULL, return FALSE);
	
	crm_malloc0(*uuid, strlen(key));
	res = sscanf(key, "%d:%d:%d:%s", action_id, transition_id, target_rc, *uuid);
	switch(res) {
	    case 4:
		/* Post Pacemaker 0.6 */
		done = TRUE;
		break;
	    case 3:
	    case 2:
		/* this can be tricky - the UUID might start with an integer */

		/* Until Pacemaker 0.6 */
		done = TRUE;
		*target_rc = -1;
		res = sscanf(key, "%d:%d:%s", action_id, transition_id, *uuid);
		if(res == 2) {
		    *action_id = -1;
		    res = sscanf(key, "%d:%s", transition_id, *uuid);
		    CRM_CHECK(res == 2, done = FALSE);

		} else if(res != 3) {
		    CRM_CHECK(res == 3, done = FALSE);
		}
		break;
		
	    case 1:
		/* Prior to Heartbeat 2.0.8 */
		done = TRUE;
		*action_id = -1;
		*target_rc = -1;
		res = sscanf(key, "%d:%s", transition_id, *uuid);
		CRM_CHECK(res == 2, done = FALSE);
		break;
	    default:
		crm_crit("Unhandled sscanf result (%d) for %s", res, key);
		
	}

	if(strlen(*uuid) != 36) {
	    crm_warn("Bad UUID (%s) in sscanf result (%d) for %s", *uuid, res, key);		    
	}
	
	if(done == FALSE) {
	    crm_err("Cannot decode '%s' rc=%d", key, res);
	    
	    crm_free(*uuid);
	    *uuid = NULL;
	    *target_rc = -1;
	    *action_id = -1;
	    *transition_id = -1;
	}
	
	return done;
}
Exemple #22
0
gboolean
cib_notify_client(gpointer key, gpointer value, gpointer user_data)
{
    int qlen = 0;
    int max_qlen = 500;
    const char *type = NULL;
    gboolean do_send = FALSE;
    gboolean do_remote = FALSE;
    IPC_Channel *ipc_client = NULL;

    cib_client_t *client = value;
    xmlNode *update_msg = user_data;

    CRM_CHECK(client != NULL, return TRUE);
    CRM_CHECK(update_msg != NULL, return TRUE);

    if (client == NULL) {
        crm_warn("Skipping NULL client");
        return TRUE;

    } else if (client->channel == NULL) {
        crm_warn("Skipping client with NULL channel");
        return FALSE;

    } else if (client->name == NULL) {
        crm_trace("Skipping unnammed client / comamnd channel");
        return FALSE;
    }

    type = crm_element_value(update_msg, F_SUBTYPE);

    ipc_client = client->channel;
    do_remote = crm_str_eq(client->channel_name, "remote", FALSE);

    if (do_remote == FALSE) {
        qlen = ipc_client->send_queue->current_qlen;
        max_qlen = ipc_client->send_queue->max_qlen;
    }

    CRM_LOG_ASSERT(type != NULL);
    if (client->diffs && safe_str_eq(type, T_CIB_DIFF_NOTIFY)) {
        do_send = TRUE;

    } else if (client->replace && safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) {
        do_send = TRUE;

    } else if (client->confirmations && safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) {
        do_send = TRUE;

    } else if (client->pre_notify && safe_str_eq(type, T_CIB_PRE_NOTIFY)) {
        if (qlen < (int)(0.4 * max_qlen)) {
            do_send = TRUE;
        } else {
            crm_warn("Throttling pre-notifications due to"
                     " high load: queue=%d (max=%d)", qlen, max_qlen);
        }

    } else if (client->post_notify && safe_str_eq(type, T_CIB_POST_NOTIFY)) {
        if (qlen < (int)(0.7 * max_qlen)) {
            do_send = TRUE;
        } else {
            crm_warn("Throttling post-notifications due to"
                     " extreme load: queue=%d (max=%d)", qlen, max_qlen);
        }
    }

    if (do_send) {
        if (do_remote) {
            crm_debug("Sent %s notification to client %s/%s", type, client->name, client->id);
            cib_send_remote_msg(client->channel, update_msg, client->encrypted);

        } else if (ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) {
            /* We never want the CIB to exit because our client is slow */
            crm_crit("%s-notification of client %s/%s failed - queue saturated",
                     type, client->name, client->id);

        } else if (send_ipc_message(ipc_client, update_msg) == FALSE) {
            crm_warn("Notification of client %s/%s failed", client->name, client->id);
            return FALSE;
        }
    }
    return FALSE;
}
Exemple #23
0
/* coverity[-alloc] Memory is referenced in one or both hashtables */
crm_node_t *
crm_get_peer(unsigned int id, const char *uname)
{
    crm_node_t *node = NULL;
    CRM_ASSERT(id > 0 || uname != NULL);

    crm_peer_init();

    if (node == NULL && uname != NULL) {
        node = g_hash_table_lookup(crm_peer_cache, uname);
    }

    if (node == NULL && id > 0) {
        node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id));

        if (node && node->uname && uname) {
            crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id);
            
            /* NOTE: Calling crm_new_peer() means the entry in 
             * crm_peer_id_cache will point to the new entity
             *
             * TO-DO: Replace the old uname instead?
             */
            node = NULL;
        }
    }

    if (node == NULL) {
        crm_debug("Creating entry for node %s/%u", uname, id);

        node = calloc(1, sizeof(crm_node_t));
        CRM_ASSERT(node);
    }

    if (id > 0 && node->id != id) {
        node->id = id;
        crm_info("Node %s now has id: %u", crm_str(uname), id);
        g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node);
    }

    if (uname && node->uname == NULL) {
        node->uname = strdup(uname);
        crm_info("Node %u is now known as %s", id, uname);
        g_hash_table_replace(crm_peer_cache, node->uname, node);
        if (crm_status_callback) {
            crm_status_callback(crm_status_uname, node, NULL);
        }
    }

    if (node && node->uname && node->uuid == NULL) {
        const char *uuid = get_node_uuid(id, node->uname);

        if(uuid) {
            node->uuid = strdup(uuid);
            crm_info("Node %u has uuid %s", id, node->uuid);
        } else {
            crm_warn("Cannot obtain a UUID for node %d/%s", id, node->uname);
        }
    }

    return node;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
    crm_crit("Lost connection to cluster layer, shutting down");
    stonith_shutdown(0);
}
Exemple #25
0
gboolean
corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent)
{
    int lpc = 0;
    int rc = CS_OK;
    int retries = 0;
    gboolean any = FALSE;
    cmap_handle_t cmap_handle;

    do {
        rc = cmap_initialize(&cmap_handle);
        if (rc != CS_OK) {
            retries++;
            crm_debug("API connection setup failed: %s.  Retrying in %ds", cs_strerror(rc),
                      retries);
            sleep(retries);
        }

    } while (retries < 5 && rc != CS_OK);

    if (rc != CS_OK) {
        crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
        return FALSE;
    }

    crm_peer_init();
    crm_trace("Initializing corosync nodelist");
    for (lpc = 0;; lpc++) {
        uint32_t nodeid = 0;
        char *name = NULL;
        char *key = NULL;

        key = g_strdup_printf("nodelist.node.%d.nodeid", lpc);
        rc = cmap_get_uint32(cmap_handle, key, &nodeid);
        g_free(key);

        if (rc != CS_OK) {
            break;
        }

        name = corosync_node_name(cmap_handle, nodeid);
        if (name != NULL) {
            GHashTableIter iter;
            crm_node_t *node = NULL;

            g_hash_table_iter_init(&iter, crm_peer_cache);
            while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
                if(node && node->uname && strcasecmp(node->uname, name) == 0) {
                    if (node->id && node->id != nodeid) {
                        crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id,
                                 nodeid, name);
                        crm_exit(DAEMON_RESPAWN_STOP);
                    }
                }
            }
        }

        if (nodeid > 0 || name != NULL) {
            crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
            crm_get_peer(nodeid, name);
        }

        if (nodeid > 0 && name != NULL) {
            any = TRUE;

            if (xml_parent) {
                xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE);

                crm_xml_add_int(node, XML_ATTR_ID, nodeid);
                crm_xml_add(node, XML_ATTR_UNAME, name);
                if (force_member) {
                    crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER);
                }
            }
        }

        free(name);
    }
    cmap_finalize(cmap_handle);
    return any;
}