/* Generate children*/ static void fork_children(void) { while (shm->running_childs < max_children) { int childno; int pid = 0; if (shm->spawn_no_more == TRUE) return; /* a new child means a new seed, or the new child * will do the same syscalls as the one in the child it's replacing. * (special case startup, or we reseed unnecessarily) */ if (shm->ready == TRUE) reseed(); /* Find a space for it in the pid map */ childno = find_childno(EMPTY_PIDSLOT); if (childno == CHILD_NOT_FOUND) { outputerr("## Pid map was full!\n"); dump_childnos(); exit_main_fail(); } fflush(stdout); pid = fork(); if (pid == 0) { /* Child process. */ init_child(childno); child_process(); debugf("child %d %d exiting.\n", childno, getpid()); close_logfile(&this_child->logfile); _exit(EXIT_SUCCESS); } else { if (pid == -1) { /* We failed, wait for a child to exit before retrying. */ if (shm->running_childs > 0) return; output(0, "couldn't create child! (%s)\n", strerror(errno)); panic(EXIT_FORK_FAILURE); exit_main_fail(); } } shm->children[childno]->pid = pid; shm->running_childs++; debugf("Created child %d (pid:%d) [total:%d/%d]\n", childno, pid, shm->running_childs, max_children); if (shm->exit_reason != STILL_RUNNING) return; } shm->ready = TRUE; debugf("created enough children\n"); }
static unsigned int reap_dead_kids(void) { unsigned int i; unsigned int alive = 0; unsigned int reaped = 0; for_each_child(i) { struct childdata *child; pid_t pid; int ret; child = shm->children[i]; pid = child->pid; if (pid == EMPTY_PIDSLOT) continue; if (pid_is_valid(pid) == FALSE) { static bool once = FALSE; if (once != FALSE) return 0; output(0, "Sanity check failed! Found pid %u at pidslot %u!\n", pid, i); dump_childnos(); if (shm->exit_reason == STILL_RUNNING) panic(EXIT_PID_OUT_OF_RANGE); dump_childdata(child); once = TRUE; return 0; } ret = kill(pid, 0); /* If it disappeared, reap it. */ if (ret == -1) { if (errno == ESRCH) { output(0, "pid %u has disappeared. Reaping.\n", pid); reap_child(pid); reaped++; } else { output(0, "problem checking on pid %u (%d:%s)\n", pid, errno, strerror(errno)); } } else { alive++; } if (shm->running_childs == 0) return 0; } if (reaped != 0) output(0, "Reaped %d dead children\n", reaped); return alive; }
/* * Make sure various entries in the shm look sensible. * We use this to make sure that random syscalls haven't corrupted it. * * also check the pids for sanity. */ static int shm_is_corrupt(void) { unsigned int i; // FIXME: The '500000' is magic, and should be dynamically calculated. // On startup, we should figure out how many getpid()'s per second we can do, // and use that. if (shm->stats.total_syscalls_done - shm->stats.previous_op_count > 500000) { output(0, "Execcount increased dramatically! (old:%ld new:%ld):\n", shm->stats.previous_op_count, shm->stats.total_syscalls_done); panic(EXIT_SHM_CORRUPTION); return TRUE; } shm->stats.previous_op_count = shm->stats.total_syscalls_done; for_each_child(i) { struct childdata *child; pid_t pid; child = shm->children[i]; pid = child->pid; if (pid == EMPTY_PIDSLOT) continue; if (pid_is_valid(pid) == FALSE) { static bool once = FALSE; if (once != FALSE) return TRUE; output(0, "Sanity check failed! Found pid %u at pidslot %u!\n", pid, i); dump_childnos(); if (shm->exit_reason == STILL_RUNNING) panic(EXIT_PID_OUT_OF_RANGE); dump_childdata(child); once = TRUE; return TRUE; } } return FALSE; }
static FILE * find_child_logfile_handle(pid_t pid) { int i; unsigned int j; FILE *log = NULL; i = find_childno(pid); if (i != CHILD_NOT_FOUND) { log = shm->children[i]->logfile; } else { /* This is pretty ugly, and should never happen, * but try again a second later, in case we're racing setup/teardown. * FIXME: We may not even need this now that we have proper locking; test it. */ sleep(1); i = find_childno(pid); if (i == CHILD_NOT_FOUND) { outputerr("Couldn't find child for pid %d\n", pid); return mainlogfile; } log = shm->children[i]->logfile; } if (log != NULL) return log; /* if the logfile hadn't been set, log to main. */ shm->children[i]->logfile = mainlogfile; outputerr("## child %d logfile handle was null logging to main!\n", i); outputerr("## Couldn't find logfile for pid %d\n", pid); dump_childnos(); outputerr("## Logfiles for pids: "); for_each_child(j) outputerr("%p ", shm->children[j]->logfile); outputerr("\n"); (void)fflush(stdout); sleep(5); return mainlogfile; }
/* * Make sure various entries in the shm look sensible. * We use this to make sure that random syscalls haven't corrupted it. * * also check the pids for sanity. */ static int shm_is_corrupt(void) { unsigned int i; if (shm->stats.op_count < shm->stats.previous_op_count) { output(0, "Execcount went backwards! (old:%ld new:%ld):\n", shm->stats.previous_op_count, shm->stats.op_count); panic(EXIT_SHM_CORRUPTION); return TRUE; } shm->stats.previous_op_count = shm->stats.op_count; for_each_child(i) { struct childdata *child; pid_t pid; child = shm->children[i]; pid = pids[i]; if (pid == EMPTY_PIDSLOT) continue; if (pid_is_valid(pid) == FALSE) { static bool once = FALSE; if (once != FALSE) return TRUE; output(0, "Sanity check failed! Found pid %u at pidslot %u!\n", pid, i); dump_childnos(); if (shm->exit_reason == STILL_RUNNING) panic(EXIT_PID_OUT_OF_RANGE); dump_childdata(child); once = TRUE; return TRUE; } } return FALSE; }
static FILE * find_logfile_handle(void) { pid_t pid; int i; pid = getpid(); if (pid == initpid) return mainlogfile; if (pid == shm->mainpid) return mainlogfile; if (pid == watchdog_pid) return mainlogfile; i = find_childno(pid); if (i != CHILD_NOT_FOUND) return shm->children[i]->logfile; else { /* try one more time. FIXME: This is awful. */ unsigned int j; sleep(1); i = find_childno(pid); if (i != CHILD_NOT_FOUND) return shm->children[i]->logfile; outputerr("## Couldn't find logfile for pid %d\n", pid); dump_childnos(); outputerr("## Logfiles for pids: "); for_each_child(j) outputerr("%p ", shm->children[j]->logfile); outputerr("\n"); } return NULL; }
static void handle_child(pid_t childpid, int childstatus) { switch (childpid) { case 0: //debugf("Nothing changed. children:%d\n", shm->running_childs); break; case -1: if (shm->exit_reason != STILL_RUNNING) return; if (errno == ECHILD) { unsigned int i; bool seen = FALSE; debugf("All children exited!\n"); for_each_child(i) { struct childdata *child; child = shm->children[i]; if (child->pid != EMPTY_PIDSLOT) { if (pid_alive(child->pid) == -1) { debugf("Removing %d from pidmap\n", child->pid); child->pid = EMPTY_PIDSLOT; shm->running_childs--; } else { debugf("%d looks still alive! ignoring.\n", child->pid); } seen = TRUE; } } if (seen == FALSE) shm->running_childs = 0; break; } output(0, "error! (%s)\n", strerror(errno)); break; default: debugf("Something happened to pid %d\n", childpid); if (WIFEXITED(childstatus)) { int childno; childno = find_childno(childpid); if (childno == CHILD_NOT_FOUND) { /* If we reaped it, it wouldn't show up, so check that. */ if (shm->last_reaped != childpid) { outputerr("## Couldn't find %d in list of pids.\n", childpid); panic(EXIT_LOST_CHILD); dump_childnos(); } } else { debugf("Child %d exited after %ld operations.\n", childpid, shm->children[childno]->syscall.op_nr); reap_child(childpid); } break; } else if (WIFSIGNALED(childstatus)) { handle_childsig(childpid, childstatus, FALSE); } else if (WIFSTOPPED(childstatus)) { handle_childsig(childpid, childstatus, TRUE); } else if (WIFCONTINUED(childstatus)) { break; } else { output(0, "erk, wtf\n"); } }