예제 #1
0
gboolean
check_sbd_timeout(const char *value)
{
    long st_timeout = value? crm_get_msec(value) : 0;

    if (st_timeout <= 0) {
        crm_debug("Watchdog may be enabled but stonith-watchdog-timeout is disabled (%s)",
                  value? value : "default");

    } else if (pcmk_locate_sbd() == 0) {
        do_crm_log_always(LOG_EMERG,
                          "Shutting down: stonith-watchdog-timeout configured (%s) but SBD not active",
                          value);
        crm_exit(DAEMON_RESPAWN_STOP);
        return FALSE;

    } else {
        long sbd_timeout = crm_get_sbd_timeout();

        if (st_timeout < sbd_timeout) {
            do_crm_log_always(LOG_EMERG,
                              "Shutting down: stonith-watchdog-timeout (%s) too short (must be >%ldms)",
                              value, sbd_timeout);
            crm_exit(DAEMON_RESPAWN_STOP);
            return FALSE;
        }
        crm_info("Watchdog configured with stonith-watchdog-timeout %s and SBD timeout %ldms",
                 value, sbd_timeout);
    }
    return TRUE;
}
예제 #2
0
void
pcmk_panic(const char *origin) 
{
    static struct qb_log_callsite *panic_cs = NULL;

    if (panic_cs == NULL) {
        panic_cs = qb_log_callsite_get(__func__, __FILE__, "panic-delay", LOG_TRACE, __LINE__, crm_trace_nonlog);
    }

    /* Ensure sbd_pid is set */
    (void)pcmk_locate_sbd();

    if (panic_cs && panic_cs->targets) {
        /* getppid() == 1 means our original parent no longer exists */
        do_crm_log_always(LOG_EMERG,
                          "Shutting down instead of panicking the node: origin=%s, sbd=%d, parent=%d",
                          origin, sbd_pid, getppid());
        crm_exit(DAEMON_RESPAWN_STOP);
        return;
    }

    if(sbd_pid > 1) {
        do_crm_log_always(LOG_EMERG, "Signaling sbd(%d) to panic the system: %s", sbd_pid, origin);
        pcmk_panic_sbd();

    } else {
        do_crm_log_always(LOG_EMERG, "Panicking the system directly: %s", origin);
        pcmk_panic_local();
    }
}
예제 #3
0
gboolean
check_sbd_timeout(const char *value)
{
    long sbd_timeout = crm_get_sbd_timeout();
    long st_timeout = crm_get_msec(value);

    if(value == NULL || st_timeout <= 0) {
        crm_notice("Watchdog may be enabled but stonith-watchdog-timeout is disabled: %s", value);

    } else if(pcmk_locate_sbd() == 0) {
        do_crm_log_always(LOG_EMERG, "Shutting down: stonith-watchdog-timeout is configured (%ldms) but SBD is not active", st_timeout);
        crm_exit(DAEMON_RESPAWN_STOP);
        return FALSE;

    } else if(st_timeout < sbd_timeout) {
        do_crm_log_always(LOG_EMERG, "Shutting down: stonith-watchdog-timeout (%ldms) is too short (must be greater than %ldms)",
                          st_timeout, sbd_timeout);
        crm_exit(DAEMON_RESPAWN_STOP);
        return FALSE;
    }

    crm_info("Watchdog functionality is consistent: %s delay exceeds timeout of %ldms", value, sbd_timeout);
    return TRUE;
}
예제 #4
0
int
main(int argc, char **argv)
{
    int rc;
    int flag;
    int argerr = 0;

    int option_index = 0;
    gboolean shutdown = FALSE;

    uid_t pcmk_uid = 0;
    gid_t pcmk_gid = 0;
    struct rlimit cores;
    crm_ipc_t *old_instance = NULL;
    qb_ipcs_service_t *ipcs = NULL;
    const char *facility = daemon_option("logfacility");
    static crm_cluster_t cluster;

    crm_log_preinit(NULL, argc, argv);
    crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
    mainloop_add_signal(SIGHUP, pcmk_ignore);
    mainloop_add_signal(SIGQUIT, pcmk_sigquit);

    while (1) {
        flag = crm_get_option(argc, argv, &option_index);
        if (flag == -1)
            break;

        switch (flag) {
            case 'V':
                crm_bump_log_level(argc, argv);
                break;
            case 'f':
                /* Legacy */
                break;
            case 'p':
                pid_file = optarg;
                break;
            case '$':
            case '?':
                crm_help(flag, EX_OK);
                break;
            case 'S':
                shutdown = TRUE;
                break;
            case 'F':
                printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION,
                       CRM_FEATURE_SET, CRM_FEATURES);
                crm_exit(pcmk_ok);
            default:
                printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
                ++argerr;
                break;
        }
    }

    if (optind < argc) {
        printf("non-option ARGV-elements: ");
        while (optind < argc)
            printf("%s ", argv[optind++]);
        printf("\n");
    }
    if (argerr) {
        crm_help('?', EX_USAGE);
    }


    setenv("LC_ALL", "C", 1);
    setenv("HA_LOGD", "no", 1);

    set_daemon_option("mcp", "true");
    set_daemon_option("use_logd", "off");

    crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);

    /* Restore the original facility so that mcp_read_config() does the right thing */
    set_daemon_option("logfacility", facility);

    crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP);
    old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
    crm_ipc_connect(old_instance);

    if (shutdown) {
        crm_debug("Terminating previous instance");
        while (crm_ipc_connected(old_instance)) {
            xmlNode *cmd =
                create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);

            crm_debug(".");
            crm_ipc_send(old_instance, cmd, 0, 0, NULL);
            free_xml(cmd);

            sleep(2);
        }
        crm_ipc_close(old_instance);
        crm_ipc_destroy(old_instance);
        crm_exit(pcmk_ok);

    } else if (crm_ipc_connected(old_instance)) {
        crm_ipc_close(old_instance);
        crm_ipc_destroy(old_instance);
        crm_err("Pacemaker is already active, aborting startup");
        crm_exit(DAEMON_RESPAWN_STOP);
    }

    crm_ipc_close(old_instance);
    crm_ipc_destroy(old_instance);

    if (mcp_read_config() == FALSE) {
        crm_notice("Could not obtain corosync config data, exiting");
        crm_exit(ENODATA);
    }

    crm_notice("Starting Pacemaker %s (Build: %s): %s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
    mainloop = g_main_new(FALSE);
    sysrq_init();

    rc = getrlimit(RLIMIT_CORE, &cores);
    if (rc < 0) {
        crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
    } else {
        if (cores.rlim_max == 0 && geteuid() == 0) {
            cores.rlim_max = RLIM_INFINITY;
        } else {
            crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
        }
        cores.rlim_cur = cores.rlim_max;

        rc = setrlimit(RLIMIT_CORE, &cores);
        if (rc < 0) {
            crm_perror(LOG_ERR,
                       "Core file generation will remain disabled."
                       " Core files are an important diagnositic tool,"
                       " please consider enabling them by default.");
        }
#if 0
        /* system() is not thread-safe, can't call from here
         * Actually, its a pretty hacky way to try and achieve this anyway
         */
        if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) {
            crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid");
        }
#endif
    }
    rc = pcmk_ok;

    if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) {
        crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
        crm_exit(ENOKEY);
    }

    mkdir(CRM_STATE_DIR, 0750);
    mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);

    /* Used to store core files in */
    crm_build_path(CRM_CORE_DIR, 0775);
    mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);

    /* Used to store blackbox dumps in */
    crm_build_path(CRM_BLACKBOX_DIR, 0755);
    mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);

    /* Used to store policy engine inputs in */
    crm_build_path(PE_STATE_DIR, 0755);
    mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);

    /* Used to store the cluster configuration */
    crm_build_path(CRM_CONFIG_DIR, 0755);
    mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);

    /* Resource agent paths are constructed by the lrmd */

    ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks);
    if (ipcs == NULL) {
        crm_err("Couldn't start IPC server");
        crm_exit(EIO);
    }

    /* Allows us to block shutdown */
    if (cluster_connect_cfg(&local_nodeid) == FALSE) {
        crm_err("Couldn't connect to Corosync's CFG service");
        crm_exit(ENOPROTOOPT);
    }

    if(pcmk_locate_sbd() > 0) {
        setenv("PCMK_watchdog", "true", 1);
    } else {
        setenv("PCMK_watchdog", "false", 1);
    }

    find_and_track_existing_processes();

    cluster.destroy = mcp_cpg_destroy;
    cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver;
    cluster.cpg.cpg_confchg_fn = mcp_cpg_membership;

    crm_set_autoreap(FALSE);

    if(cluster_connect_cpg(&cluster) == FALSE) {
        crm_err("Couldn't connect to Corosync's CPG service");
        rc = -ENOPROTOOPT;
    }

    if (rc == pcmk_ok && is_corosync_cluster()) {
        /* Keep the membership list up-to-date for crm_node to query */
        if(cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) {
            rc = -ENOTCONN;
        }
    }

#if SUPPORT_CMAN
    if (rc == pcmk_ok && is_cman_cluster()) {
        init_cman_connection(mcp_cman_dispatch, mcp_cman_destroy);
    }
#endif

    if(rc == pcmk_ok) {
        local_name = get_local_node_name();
        update_node_processes(local_nodeid, local_name, get_process_list());

        mainloop_add_signal(SIGTERM, pcmk_shutdown);
        mainloop_add_signal(SIGINT, pcmk_shutdown);

        init_children_processes();

        crm_info("Starting mainloop");

        g_main_run(mainloop);
    }

    if (ipcs) {
        crm_trace("Closing IPC server");
        mainloop_del_ipc_server(ipcs);
        ipcs = NULL;
    }

    g_main_destroy(mainloop);

    cluster_disconnect_cpg(&cluster);
    cluster_disconnect_cfg();

    crm_info("Exiting %s", crm_system_name);

    return crm_exit(rc);
}
예제 #5
0
static void
do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
    xmlNode *cmd = NULL;
    pid_t watchdog = pcmk_locate_sbd();

    if (rc != pcmk_ok) {
        crm_err("Could not retrieve the Cluster Information Base: %s "
                CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
        return;

    } else if (call_id != fsa_pe_query) {
        crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
        return;

    } else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
        crm_debug("No need to invoke the scheduler anymore");
        return;

    } else if (fsa_state != S_POLICY_ENGINE) {
        crm_debug("Discarding scheduler request in state: %s",
                  fsa_state2string(fsa_state));
        return;

    /* this callback counts as 1 */
    } else if (num_cib_op_callbacks() > 1) {
        crm_debug("Re-asking for the CIB: %d other peer updates still pending",
                  (num_cib_op_callbacks() - 1));
        sleep(1);
        register_fsa_action(A_PE_INVOKE);
        return;

    } else if (fsa_state != S_POLICY_ENGINE) {
        crm_err("Invoking scheduler in state: %s", fsa_state2string(fsa_state));
        return;
    }

    CRM_LOG_ASSERT(output != NULL);

    // Refresh the remote node cache when the scheduler is invoked
    crm_remote_peer_cache_refresh(output);

    crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
    crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);

    force_local_option(output, XML_ATTR_HAVE_WATCHDOG, watchdog?"true":"false");

    if (ever_had_quorum && crm_have_quorum == FALSE) {
        crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
    }

    cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL);

    free(fsa_pe_ref);
    fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE);

    rc = pe_subsystem_send(cmd);
    if (rc < 0) {
        crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
                pcmk_strerror(rc), rc);
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
    }
    crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d",
              fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum);
    free_xml(cmd);
}