Пример #1
0
static void
pcmk_process_exit(pcmk_child_t * child)
{
    child->pid = 0;
    child->active_before_startup = FALSE;

    /* Broadcast the fact that one of our processes died ASAP
     *
     * Try to get some logging of the cause out first though
     * because we're probably about to get fenced
     *
     * Potentially do this only if respawn_count > N
     * to allow for local recovery
     */
    update_node_processes(local_nodeid, NULL, get_process_list());

    child->respawn_count += 1;
    if (child->respawn_count > MAX_RESPAWN) {
        crm_err("Child respawn count exceeded by %s", child->name);
        child->respawn = FALSE;
    }

    if (shutdown_trigger) {
        mainloop_set_trigger(shutdown_trigger);
        update_node_processes(local_nodeid, NULL, get_process_list());

    } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) {
        crm_err("Rebooting system because of %s", child->name);
        pcmk_panic(__FUNCTION__);

    } else if (child->respawn) {
        crm_notice("Respawning failed child process: %s", child->name);
        start_child(child);
    }
}
Пример #2
0
static void
mcp_cpg_deliver(cpg_handle_t handle,
                 const struct cpg_name *groupName,
                 uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
    xmlNode *xml = string2xml(msg);
    const char *task = crm_element_value(xml, F_CRM_TASK);

    crm_trace("Received %s %.200s", task, msg);
    if (task == NULL && nodeid != local_nodeid) {
        uint32_t procs = 0;
        const char *uname = crm_element_value(xml, "uname");

        crm_element_value_int(xml, "proclist", (int *)&procs);
        /* crm_debug("Got proclist %.32x from %s", procs, uname); */
        if (update_node_processes(nodeid, uname, procs)) {
            update_process_clients(NULL);
        }

    } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
        int id = 0;
        const char *name = NULL;

        crm_element_value_int(xml, XML_ATTR_ID, &id);
        name = crm_element_value(xml, XML_ATTR_UNAME);
        reap_crm_member(id, name);
    }
}
Пример #3
0
static void
pcmk_cpg_deliver(cpg_handle_t handle,
                 const struct cpg_name *groupName,
                 uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
    if (nodeid != local_nodeid) {
        uint32_t procs = 0;
        xmlNode *xml = string2xml(msg);
        const char *uname = crm_element_value(xml, "uname");

        crm_element_value_int(xml, "proclist", (int *)&procs);
        /* crm_debug("Got proclist %.32x from %s", procs, uname); */
        if (update_node_processes(nodeid, uname, procs)) {
            update_process_clients();
        }
    }
}
Пример #4
0
/*!
 * \internal
 * \brief Process a CPG message (process list or manual peer cache removal)
 *
 * \param[in] handle     CPG connection (ignored)
 * \param[in] groupName  CPG group name (ignored)
 * \param[in] nodeid     ID of affected node
 * \param[in] pid        Process ID (ignored)
 * \param[in] msg        CPG XML message
 * \param[in] msg_len    Length of msg in bytes (ignored)
 */
static void
mcp_cpg_deliver(cpg_handle_t handle,
                 const struct cpg_name *groupName,
                 uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
    xmlNode *xml = string2xml(msg);
    const char *task = crm_element_value(xml, F_CRM_TASK);

    crm_trace("Received CPG message (%s): %.200s",
              (task? task : "process list"), msg);

    if (task == NULL) {
        if (nodeid == local_nodeid) {
            crm_info("Ignoring process list sent by peer for local node");
        } else {
            uint32_t procs = 0;
            const char *uname = crm_element_value(xml, "uname");

            crm_element_value_int(xml, "proclist", (int *)&procs);
            if (update_node_processes(nodeid, uname, procs)) {
                update_process_clients(NULL);
            }
        }

    } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
        int id = 0;
        const char *name = NULL;

        crm_element_value_int(xml, XML_ATTR_ID, &id);
        name = crm_element_value(xml, XML_ATTR_UNAME);
        reap_crm_member(id, name);
    }

    if (xml != NULL) {
        free_xml(xml);
    }
}
Пример #5
0
int
main(int argc, char **argv)
{
    int rc;
    int flag;
    int argerr = 0;

    int option_index = 0;
    gboolean shutdown = FALSE;

    uid_t pcmk_uid = 0;
    gid_t pcmk_gid = 0;
    struct rlimit cores;
    crm_ipc_t *old_instance = NULL;
    qb_ipcs_service_t *ipcs = NULL;
    const char *facility = daemon_option("logfacility");
    static crm_cluster_t cluster;

    crm_log_preinit(NULL, argc, argv);
    crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
    mainloop_add_signal(SIGHUP, pcmk_ignore);
    mainloop_add_signal(SIGQUIT, pcmk_sigquit);

    while (1) {
        flag = crm_get_option(argc, argv, &option_index);
        if (flag == -1)
            break;

        switch (flag) {
            case 'V':
                crm_bump_log_level(argc, argv);
                break;
            case 'f':
                /* Legacy */
                break;
            case 'p':
                pid_file = optarg;
                break;
            case '$':
            case '?':
                crm_help(flag, EX_OK);
                break;
            case 'S':
                shutdown = TRUE;
                break;
            case 'F':
                printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION,
                       CRM_FEATURE_SET, CRM_FEATURES);
                crm_exit(pcmk_ok);
            default:
                printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
                ++argerr;
                break;
        }
    }

    if (optind < argc) {
        printf("non-option ARGV-elements: ");
        while (optind < argc)
            printf("%s ", argv[optind++]);
        printf("\n");
    }
    if (argerr) {
        crm_help('?', EX_USAGE);
    }


    setenv("LC_ALL", "C", 1);
    setenv("HA_LOGD", "no", 1);

    set_daemon_option("mcp", "true");
    set_daemon_option("use_logd", "off");

    crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);

    /* Restore the original facility so that mcp_read_config() does the right thing */
    set_daemon_option("logfacility", facility);

    crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP);
    old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
    crm_ipc_connect(old_instance);

    if (shutdown) {
        crm_debug("Terminating previous instance");
        while (crm_ipc_connected(old_instance)) {
            xmlNode *cmd =
                create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);

            crm_debug(".");
            crm_ipc_send(old_instance, cmd, 0, 0, NULL);
            free_xml(cmd);

            sleep(2);
        }
        crm_ipc_close(old_instance);
        crm_ipc_destroy(old_instance);
        crm_exit(pcmk_ok);

    } else if (crm_ipc_connected(old_instance)) {
        crm_ipc_close(old_instance);
        crm_ipc_destroy(old_instance);
        crm_err("Pacemaker is already active, aborting startup");
        crm_exit(DAEMON_RESPAWN_STOP);
    }

    crm_ipc_close(old_instance);
    crm_ipc_destroy(old_instance);

    if (mcp_read_config() == FALSE) {
        crm_notice("Could not obtain corosync config data, exiting");
        crm_exit(ENODATA);
    }

    crm_notice("Starting Pacemaker %s (Build: %s): %s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
    mainloop = g_main_new(FALSE);
    sysrq_init();

    rc = getrlimit(RLIMIT_CORE, &cores);
    if (rc < 0) {
        crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
    } else {
        if (cores.rlim_max == 0 && geteuid() == 0) {
            cores.rlim_max = RLIM_INFINITY;
        } else {
            crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
        }
        cores.rlim_cur = cores.rlim_max;

        rc = setrlimit(RLIMIT_CORE, &cores);
        if (rc < 0) {
            crm_perror(LOG_ERR,
                       "Core file generation will remain disabled."
                       " Core files are an important diagnositic tool,"
                       " please consider enabling them by default.");
        }
#if 0
        /* system() is not thread-safe, can't call from here
         * Actually, its a pretty hacky way to try and achieve this anyway
         */
        if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) {
            crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid");
        }
#endif
    }
    rc = pcmk_ok;

    if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) {
        crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
        crm_exit(ENOKEY);
    }

    mkdir(CRM_STATE_DIR, 0750);
    mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);

    /* Used to store core files in */
    crm_build_path(CRM_CORE_DIR, 0775);
    mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);

    /* Used to store blackbox dumps in */
    crm_build_path(CRM_BLACKBOX_DIR, 0755);
    mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);

    /* Used to store policy engine inputs in */
    crm_build_path(PE_STATE_DIR, 0755);
    mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);

    /* Used to store the cluster configuration */
    crm_build_path(CRM_CONFIG_DIR, 0755);
    mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);

    /* Resource agent paths are constructed by the lrmd */

    ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks);
    if (ipcs == NULL) {
        crm_err("Couldn't start IPC server");
        crm_exit(EIO);
    }

    /* Allows us to block shutdown */
    if (cluster_connect_cfg(&local_nodeid) == FALSE) {
        crm_err("Couldn't connect to Corosync's CFG service");
        crm_exit(ENOPROTOOPT);
    }

    if(pcmk_locate_sbd() > 0) {
        setenv("PCMK_watchdog", "true", 1);
    } else {
        setenv("PCMK_watchdog", "false", 1);
    }

    find_and_track_existing_processes();

    cluster.destroy = mcp_cpg_destroy;
    cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver;
    cluster.cpg.cpg_confchg_fn = mcp_cpg_membership;

    crm_set_autoreap(FALSE);

    if(cluster_connect_cpg(&cluster) == FALSE) {
        crm_err("Couldn't connect to Corosync's CPG service");
        rc = -ENOPROTOOPT;
    }

    if (rc == pcmk_ok && is_corosync_cluster()) {
        /* Keep the membership list up-to-date for crm_node to query */
        if(cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) {
            rc = -ENOTCONN;
        }
    }

#if SUPPORT_CMAN
    if (rc == pcmk_ok && is_cman_cluster()) {
        init_cman_connection(mcp_cman_dispatch, mcp_cman_destroy);
    }
#endif

    if(rc == pcmk_ok) {
        local_name = get_local_node_name();
        update_node_processes(local_nodeid, local_name, get_process_list());

        mainloop_add_signal(SIGTERM, pcmk_shutdown);
        mainloop_add_signal(SIGINT, pcmk_shutdown);

        init_children_processes();

        crm_info("Starting mainloop");

        g_main_run(mainloop);
    }

    if (ipcs) {
        crm_trace("Closing IPC server");
        mainloop_del_ipc_server(ipcs);
        ipcs = NULL;
    }

    g_main_destroy(mainloop);

    cluster_disconnect_cpg(&cluster);
    cluster_disconnect_cfg();

    crm_info("Exiting %s", crm_system_name);

    return crm_exit(rc);
}
Пример #6
0
static gboolean
start_child(pcmk_child_t * child)
{
    int lpc = 0;
    uid_t uid = 0;
    gid_t gid = 0;
    struct rlimit oflimits;
    gboolean use_valgrind = FALSE;
    gboolean use_callgrind = FALSE;
    const char *devnull = "/dev/null";
    const char *env_valgrind = getenv("PCMK_valgrind_enabled");
    const char *env_callgrind = getenv("PCMK_callgrind_enabled");
    enum cluster_type_e stack = get_cluster_type();

    child->active_before_startup = FALSE;

    if (child->command == NULL) {
        crm_info("Nothing to do for child \"%s\"", child->name);
        return TRUE;
    }

    if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
        use_callgrind = TRUE;
        use_valgrind = TRUE;

    } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
        use_callgrind = TRUE;
        use_valgrind = TRUE;

    } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
        use_valgrind = TRUE;

    } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
        use_valgrind = TRUE;
    }

    if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
        crm_warn("Cannot enable valgrind for %s:"
                 " The location of the valgrind binary is unknown", child->name);
        use_valgrind = FALSE;
    }

    if (child->uid) {
        if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
            crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
            return FALSE;
        }
        crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
    }

    child->pid = fork();
    CRM_ASSERT(child->pid != -1);

    if (child->pid > 0) {
        /* parent */
        mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);

        crm_info("Forked child %d for process %s%s", child->pid, child->name,
                 use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
        update_node_processes(local_nodeid, NULL, get_process_list());
        return TRUE;

    } else {
        /* Start a new session */
        (void)setsid();

        /* Setup the two alternate arg arrarys */
        opts_vgrind[0] = strdup(VALGRIND_BIN);
        if (use_callgrind) {
            opts_vgrind[1] = strdup("--tool=callgrind");
            opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
            opts_vgrind[3] = strdup(child->command);
            opts_vgrind[4] = NULL;
        } else {
            opts_vgrind[1] = strdup(child->command);
            opts_vgrind[2] = NULL;
            opts_vgrind[3] = NULL;
            opts_vgrind[4] = NULL;
        }
        opts_default[0] = strdup(child->command);;

        if(gid) {
            if(stack == pcmk_cluster_corosync) {
                /* Drop root privileges completely
                 *
                 * We can do this because we set uidgid.gid.${gid}=1
                 * via CMAP which allows these processes to connect to
                 * corosync
                 */
                if (setgid(gid) < 0) {
                    crm_perror(LOG_ERR, "Could not set group to %d", gid);
                }

                /* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */
            } else if (initgroups(child->uid, gid) < 0) {
                crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
            }
        }

        if (uid && setuid(uid) < 0) {
            crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid);
        }

        /* Close all open file descriptors */
        getrlimit(RLIMIT_NOFILE, &oflimits);
        for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) {
            close(lpc);
        }

        (void)open(devnull, O_RDONLY);  /* Stdin:  fd 0 */
        (void)open(devnull, O_WRONLY);  /* Stdout: fd 1 */
        (void)open(devnull, O_WRONLY);  /* Stderr: fd 2 */

        if (use_valgrind) {
            (void)execvp(VALGRIND_BIN, opts_vgrind);
        } else {
            (void)execvp(child->command, opts_default);
        }
        crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command);
        crm_exit(DAEMON_RESPAWN_STOP);
    }
    return TRUE;                /* never reached */
}