Exemplo n.º 1
0
int
servant_cluster(const char *diskname, int mode, const void* argp)
{
    enum cluster_type_e cluster_stack = get_cluster_type();

    crm_system_name = strdup("sbd:cluster");
    cl_log(LOG_INFO, "Monitoring %s cluster health", name_for_cluster_type(cluster_stack));
    set_proc_title("sbd: watcher: Cluster");

    sbd_membership_connect();

    /* stonith_our_uname = cluster.uname; */
    /* stonith_our_uuid = cluster.uuid; */

    mainloop = g_main_new(FALSE);
    notify_timer = g_timeout_add(timeout_loop * 1000, notify_timer_cb, NULL);

    mainloop_add_signal(SIGTERM, cluster_shutdown);
    mainloop_add_signal(SIGINT, cluster_shutdown);
    
    g_main_run(mainloop);
    g_main_destroy(mainloop);
    
    clean_up(0);
    return 0;                   /* never reached */
}
Exemplo n.º 2
0
static void
sbd_membership_destroy(gpointer user_data)
{
    cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type()));

    set_servant_health(pcmk_health_unclean, LOG_ERR, "Cluster connection terminated");
    notify_parent();

    /* Attempt to reconnect, the watchdog will take the node down if the problem isn't transient */
    sbd_membership_connect();
}
Exemplo n.º 3
0
static void
sbd_membership_destroy(gpointer user_data)
{
    cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type()));

    if (get_cluster_type() != pcmk_cluster_unknown) {
#if SUPPORT_COROSYNC && CHECK_TWO_NODE
        cmap_destroy();
#endif
    }

    set_servant_health(pcmk_health_unclean, LOG_ERR, "Cluster connection terminated");
    notify_parent();

    /* Attempt to reconnect, the watchdog will take the node down if the problem isn't transient */
    sbd_membership_connect();
}
Exemplo n.º 4
0
static gboolean
sbd_remote_check(gpointer user_data)
{
    static int have_proc_pid = 0;

    int running = 0;

    cl_log(LOG_DEBUG, "Checking pacemaker remote connection: %d/%d", have_proc_pid, remoted_pid);
    
    if(have_proc_pid == 0) {
        char proc_path[PATH_MAX], exe_path[PATH_MAX];

        /* check to make sure pid hasn't been reused by another process */
        snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid());

        have_proc_pid = 1;
        if(readlink(proc_path, exe_path, PATH_MAX - 1) < 0) {
            have_proc_pid = -1;
        }
    }
    
    if (remoted_pid <= 0) {
        set_servant_health(pcmk_health_transient, LOG_WARNING, "No Pacemaker Remote connection");
        goto notify;

    } else if (kill(remoted_pid, 0) < 0 && errno == ESRCH) {
        /* Not running */

    } else if(have_proc_pid == -1) {
        running = 1;
        cl_log(LOG_DEBUG, "Poccess %ld is active", (long)remoted_pid);

    } else {
        int rc = 0;
        char proc_path[PATH_MAX], exe_path[PATH_MAX], expected_path[PATH_MAX];

        /* check to make sure pid hasn't been reused by another process */
        snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)remoted_pid);

        rc = readlink(proc_path, exe_path, PATH_MAX - 1);
        if (rc < 0) {
            crm_perror(LOG_ERR, "Could not read from %s", proc_path);
            goto done;
        }
        exe_path[rc] = 0;

        rc = snprintf(expected_path, sizeof(proc_path), "%s/pacemaker_remoted", SBINDIR);
        expected_path[rc] = 0;

        if (strcmp(exe_path, expected_path) == 0) {
            cl_log(LOG_DEBUG, "Process %s (%ld) is active",
                   exe_path, (long)remoted_pid);
            running = 1;
        }
    }

  done:
    
    if(running) {
        set_servant_health(pcmk_health_online, LOG_INFO,
                           "Connected to Pacemaker Remote %lu", (long unsigned int)remoted_pid);
    } else {
        set_servant_health(pcmk_health_unclean, LOG_WARNING,
                           "Connection to Pacemaker Remote %lu lost", (long unsigned int)remoted_pid);
    }

  notify:    
    notify_parent();

    if(running == 0) {
        sbd_membership_connect();
    }
    return true;
}