void ap_relieve_child_processes(void) { int i; extra_process_t *cur_extra; int max_daemons; ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons); /* now see who is done */ for (i = 0; i < max_daemons; ++i) { pid_t pid = MPM_CHILD_PID(i); if (pid == 0) { continue; /* not every scoreboard entry is in use */ } if (reclaim_one_pid(pid, DO_NOTHING)) { MPM_NOTE_CHILD_KILLED(i); } } cur_extra = extras; while (cur_extra) { extra_process_t *next = cur_extra->next; if (reclaim_one_pid(cur_extra->pid, DO_NOTHING)) { AP_DEBUG_ASSERT(1 == ap_unregister_extra_mpm_process(cur_extra->pid)); } cur_extra = next; } }
int ap_mpm_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) { int index; int remaining_children_to_start; apr_status_t rv; ap_log_pid(pconf, ap_pid_fname); first_server_limit = server_limit; if (changed_limit_at_restart) { ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, "WARNING: Attempt to change ServerLimit " "ignored during restart"); changed_limit_at_restart = 0; } /* Initialize cross-process accept lock */ ap_lock_fname = apr_psprintf(_pconf, "%s.%" APR_PID_T_FMT, ap_server_root_relative(_pconf, ap_lock_fname), ap_my_pid); rv = apr_proc_mutex_create(&accept_mutex, ap_lock_fname, ap_accept_lock_mech, _pconf); if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_EMERG, rv, s, "Couldn't create accept lock (%s) (%d)", ap_lock_fname, ap_accept_lock_mech); mpm_state = AP_MPMQ_STOPPING; return 1; } #if APR_USE_SYSVSEM_SERIALIZE if (ap_accept_lock_mech == APR_LOCK_DEFAULT || ap_accept_lock_mech == APR_LOCK_SYSVSEM) { #else if (ap_accept_lock_mech == APR_LOCK_SYSVSEM) { #endif rv = unixd_set_proc_mutex_perms(accept_mutex); if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_EMERG, rv, s, "Couldn't set permissions on cross-process lock; " "check User and Group directives"); mpm_state = AP_MPMQ_STOPPING; return 1; } } if (!is_graceful) { if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) { mpm_state = AP_MPMQ_STOPPING; return 1; } /* fix the generation number in the global score; we just got a new, * cleared scoreboard */ ap_scoreboard_image->global->running_generation = ap_my_generation; } set_signals(); if (one_process) { AP_MONCONTROL(1); make_child(ap_server_conf, 0); } else { if (ap_daemons_max_free < ap_daemons_min_free + 1) /* Don't thrash... */ ap_daemons_max_free = ap_daemons_min_free + 1; /* If we're doing a graceful_restart then we're going to see a lot * of children exiting immediately when we get into the main loop * below (because we just sent them AP_SIG_GRACEFUL). This happens pretty * rapidly... and for each one that exits we'll start a new one until * we reach at least daemons_min_free. But we may be permitted to * start more than that, so we'll just keep track of how many we're * supposed to start up without the 1 second penalty between each fork. */ remaining_children_to_start = ap_daemons_to_start; if (remaining_children_to_start > ap_daemons_limit) { remaining_children_to_start = ap_daemons_limit; } if (!is_graceful) { startup_children(remaining_children_to_start); remaining_children_to_start = 0; } else { /* give the system some time to recover before kicking into * exponential mode */ hold_off_on_exponential_spawning = 10; } ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, "%s configured -- resuming normal operations", ap_get_server_description()); ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, "Server built: %s", ap_get_server_built()); #ifdef AP_MPM_WANT_SET_ACCEPT_LOCK_MECH ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, "AcceptMutex: %s (default: %s)", apr_proc_mutex_name(accept_mutex), apr_proc_mutex_defname()); #endif restart_pending = shutdown_pending = 0; mpm_state = AP_MPMQ_RUNNING; while (!restart_pending && !shutdown_pending) { int child_slot; apr_exit_why_e exitwhy; int status, processed_status; /* this is a memory leak, but I'll fix it later. */ apr_proc_t pid; ap_wait_or_timeout(&exitwhy, &status, &pid, pconf); /* XXX: if it takes longer than 1 second for all our children * to start up and get into IDLE state then we may spawn an * extra child */ if (pid.pid != -1) { processed_status = ap_process_child_status(&pid, exitwhy, status); if (processed_status == APEXIT_CHILDFATAL) { mpm_state = AP_MPMQ_STOPPING; return 1; } /* non-fatal death... note that it's gone in the scoreboard. */ child_slot = find_child_by_pid(&pid); if (child_slot >= 0) { (void) ap_update_child_status_from_indexes(child_slot, 0, SERVER_DEAD, (request_rec *) NULL); if (processed_status == APEXIT_CHILDSICK) { /* child detected a resource shortage (E[NM]FILE, ENOBUFS, etc) * cut the fork rate to the minimum */ idle_spawn_rate = 1; } else if (remaining_children_to_start && child_slot < ap_daemons_limit) { /* we're still doing a 1-for-1 replacement of dead * children with new children */ make_child(ap_server_conf, child_slot); --remaining_children_to_start; } #if APR_HAS_OTHER_CHILD } else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH, status) == APR_SUCCESS) { /* handled */ #endif } else if (is_graceful) { /* Great, we've probably just lost a slot in the * scoreboard. Somehow we don't know about this * child. */ ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf, "long lost child came home! (pid %ld)", (long)pid.pid); } /* Don't perform idle maintenance when a child dies, * only do it when there's a timeout. Remember only a * finite number of children can die, and it's pretty * pathological for a lot to die suddenly. */ continue; } else if (remaining_children_to_start) { /* we hit a 1 second timeout in which none of the previous * generation of children needed to be reaped... so assume * they're all done, and pick up the slack if any is left. */ startup_children(remaining_children_to_start); remaining_children_to_start = 0; /* In any event we really shouldn't do the code below because * few of the servers we just started are in the IDLE state * yet, so we'd mistakenly create an extra server. */ continue; } perform_idle_server_maintenance(pconf); #ifdef TPF shutdown_pending = os_check_server(tpf_server_name); ap_check_signals(); sleep(1); #endif /*TPF */ } } /* one_process */ mpm_state = AP_MPMQ_STOPPING; if (shutdown_pending && !is_graceful) { /* Time to shut down: * Kill child processes, tell them to call child_exit, etc... */ if (unixd_killpg(getpgrp(), SIGTERM) < 0) { ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, "killpg SIGTERM"); } ap_reclaim_child_processes(1); /* Start with SIGTERM */ /* cleanup pid file on normal shutdown */ { const char *pidfile = NULL; pidfile = ap_server_root_relative (pconf, ap_pid_fname); if ( pidfile != NULL && unlink(pidfile) == 0) ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, "removed PID file %s (pid=%ld)", pidfile, (long)getpid()); } ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, "caught SIGTERM, shutting down"); return 1; } else if (shutdown_pending) { /* Time to perform a graceful shut down: * Reap the inactive children, and ask the active ones * to close their listeners, then wait until they are * all done to exit. */ int active_children; apr_time_t cutoff = 0; /* Stop listening */ ap_close_listeners(); /* kill off the idle ones */ ap_mpm_pod_killpg(pod, ap_max_daemons_limit); /* Send SIGUSR1 to the active children */ active_children = 0; for (index = 0; index < ap_daemons_limit; ++index) { if (ap_scoreboard_image->servers[index][0].status != SERVER_DEAD) { /* Ask each child to close its listeners. */ ap_mpm_safe_kill(MPM_CHILD_PID(index), AP_SIG_GRACEFUL); active_children++; } } /* Allow each child which actually finished to exit */ ap_relieve_child_processes(); /* cleanup pid file */ { const char *pidfile = NULL; pidfile = ap_server_root_relative (pconf, ap_pid_fname); if ( pidfile != NULL && unlink(pidfile) == 0) ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, "removed PID file %s (pid=%ld)", pidfile, (long)getpid()); } ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, "caught " AP_SIG_GRACEFUL_STOP_STRING ", shutting down gracefully"); if (ap_graceful_shutdown_timeout) { cutoff = apr_time_now() + apr_time_from_sec(ap_graceful_shutdown_timeout); } /* Don't really exit until each child has finished */ shutdown_pending = 0; do { /* Pause for a second */ sleep(1); /* Relieve any children which have now exited */ ap_relieve_child_processes(); active_children = 0; for (index = 0; index < ap_daemons_limit; ++index) { if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) { active_children = 1; /* Having just one child is enough to stay around */ break; } } } while (!shutdown_pending && active_children && (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff)); /* We might be here because we received SIGTERM, either * way, try and make sure that all of our processes are * really dead. */ unixd_killpg(getpgrp(), SIGTERM); return 1; } /* we've been told to restart */ apr_signal(SIGHUP, SIG_IGN); apr_signal(AP_SIG_GRACEFUL, SIG_IGN); if (one_process) { /* not worth thinking about */ return 1; } /* advance to the next generation */ /* XXX: we really need to make sure this new generation number isn't in * use by any of the children. */ ++ap_my_generation; ap_scoreboard_image->global->running_generation = ap_my_generation; if (is_graceful) { ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, "Graceful restart requested, doing restart"); /* kill off the idle ones */ ap_mpm_pod_killpg(pod, ap_max_daemons_limit); /* This is mostly for debugging... so that we know what is still * gracefully dealing with existing request. This will break * in a very nasty way if we ever have the scoreboard totally * file-based (no shared memory) */ for (index = 0; index < ap_daemons_limit; ++index) { if (ap_scoreboard_image->servers[index][0].status != SERVER_DEAD) { ap_scoreboard_image->servers[index][0].status = SERVER_GRACEFUL; /* Ask each child to close its listeners. * * NOTE: we use the scoreboard, because if we send SIGUSR1 * to every process in the group, this may include CGI's, * piped loggers, etc. They almost certainly won't handle * it gracefully. */ ap_mpm_safe_kill(ap_scoreboard_image->parent[index].pid, AP_SIG_GRACEFUL); } } } else { /* Kill 'em off */ if (unixd_killpg(getpgrp(), SIGHUP) < 0) { ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, "killpg SIGHUP"); } ap_reclaim_child_processes(0); /* Not when just starting up */ ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, "SIGHUP received. Attempting to restart"); } return 0; } /* This really should be a post_config hook, but the error log is already * redirected by that point, so we need to do this in the open_logs phase. */ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s) { apr_status_t rv; pconf = p; ap_server_conf = s; if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) { ap_log_error(APLOG_MARK, APLOG_ALERT|APLOG_STARTUP, 0, NULL, "no listening sockets available, shutting down"); return DONE; } if ((rv = ap_mpm_pod_open(pconf, &pod))) { ap_log_error(APLOG_MARK, APLOG_CRIT|APLOG_STARTUP, rv, NULL, "Could not open pipe-of-death."); return DONE; } return OK; }
void ap_reclaim_child_processes(int terminate) { apr_time_t waittime = 1024 * 16; int i; extra_process_t *cur_extra; int not_dead_yet; int max_daemons; apr_time_t starttime = apr_time_now(); /* this table of actions and elapsed times tells what action is taken * at which elapsed time from starting the reclaim */ struct { action_t action; apr_time_t action_time; } action_table[] = { {DO_NOTHING, 0}, /* dummy entry for iterations where we reap * children but take no action against * stragglers */ {SEND_SIGTERM, apr_time_from_sec(3)}, {SEND_SIGTERM, apr_time_from_sec(5)}, {SEND_SIGTERM, apr_time_from_sec(7)}, {SEND_SIGKILL, apr_time_from_sec(9)}, {GIVEUP, apr_time_from_sec(10)} }; int cur_action; /* index of action we decided to take this * iteration */ int next_action = 1; /* index of first real action */ ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons); do { apr_sleep(waittime); /* don't let waittime get longer than 1 second; otherwise, we don't * react quickly to the last child exiting, and taking action can * be delayed */ waittime = waittime * 4; if (waittime > apr_time_from_sec(1)) { waittime = apr_time_from_sec(1); } /* see what action to take, if any */ if (action_table[next_action].action_time <= apr_time_now() - starttime) { cur_action = next_action; ++next_action; } else { cur_action = 0; /* nothing to do */ } /* now see who is done */ not_dead_yet = 0; for (i = 0; i < max_daemons; ++i) { pid_t pid = MPM_CHILD_PID(i); if (pid == 0) { continue; /* not every scoreboard entry is in use */ } if (reclaim_one_pid(pid, action_table[cur_action].action)) { MPM_NOTE_CHILD_KILLED(i); } else { ++not_dead_yet; } } cur_extra = extras; while (cur_extra) { extra_process_t *next = cur_extra->next; if (reclaim_one_pid(cur_extra->pid, action_table[cur_action].action)) { AP_DEBUG_ASSERT(1 == ap_unregister_extra_mpm_process(cur_extra->pid)); } else { ++not_dead_yet; } cur_extra = next; } #if APR_HAS_OTHER_CHILD apr_proc_other_child_refresh_all(APR_OC_REASON_RESTART); #endif } while (not_dead_yet > 0 && action_table[cur_action].action != GIVEUP); }