void sigcatcher(int sig) { /* All caught signals will cause the program to exit */ signal_caught = 1; if( child_tab && (total_children > 0) ) { reap_workers(child_tab, total_children, 1); } fprintf(stderr, "** Operation aborted **\n"); exit(0); }
static bool wait_for_accept(void) { // accept the connection // wait up to 0.5s struct timespec timeout = {0, 500000000}; for(;;) { int res = waiter_wait(&pfd[ACCEPTING],1,&timeout); if(res < 0) { if(errno == EINTR) { reap_workers(); continue; } if(errno == EAGAIN) { // uh... how? continue; } perror("get_worker wait"); abort(); } return res == 1; // don't waste time hanging in this mini-poll waiting for accept } }
int main(int argc, char *argv[]) { unsigned int i; struct timeval start, stop, diff; int readyfds[2], wakefds[2]; char dummy; process_options (argc, argv); printf("Running in %s mode with %d groups using %d file descriptors each (== %d tasks)\n", (process_mode == 0 ? "threaded" : "process"), num_groups, 2*num_fds, num_groups*(num_fds*2)); printf("Each sender will pass %d messages of %d bytes\n", loops, datasize); fflush(NULL); child_tab = calloc(num_fds * 2 * num_groups, sizeof(childinfo_t)); if (!child_tab) barf("main:malloc()"); fdpair(readyfds); fdpair(wakefds); /* Catch some signals */ signal(SIGINT, sigcatcher); signal(SIGTERM, sigcatcher); signal(SIGHUP, SIG_IGN); total_children = 0; for (i = 0; i < num_groups; i++) { int c = group(child_tab, total_children, num_fds, readyfds[1], wakefds[0]); if( c != (num_fds*2) ) { fprintf(stderr, "%i children started. Expected %i\n", c, num_fds*2); reap_workers(child_tab, total_children + c, 1); barf("Creating workers"); } total_children += c; } /* Wait for everyone to be ready */ for (i = 0; i < total_children; i++) if (read(readyfds[0], &dummy, 1) != 1) { reap_workers(child_tab, total_children, 1); barf("Reading for readyfds"); } gettimeofday(&start, NULL); /* Kick them off */ if (write(wakefds[1], &dummy, 1) != 1) { reap_workers(child_tab, total_children, 1); barf("Writing to start senders"); } /* Reap them all */ reap_workers(child_tab, total_children, 0); gettimeofday(&stop, NULL); /* Print time... */ timersub(&stop, &start, &diff); printf("Time: %lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000); free(child_tab); exit(0); }
size_t get_worker(void) { // get a worker // off, so we don't check worker 0 a million times static size_t off = -1; static int tries = 0; ++off; int which; for(which=0;which<numworkers;++which) { size_t derp = (which+off)%numworkers; switch(workers[derp].status) { case IDLE: workers[derp].status = BUSY; PFD(derp).events = POLLIN; tries = 0; return derp; }; } if(tries < 3) { ++tries; // if we timeout 3 times, stop waiting for idle workers. return -1; } /* no idle found, try starting some workers */ if(numworkers < MAXWORKERS) { // add a worker to the end if(start_worker()) { tries = 0; return numworkers-1; } } else { reap_workers(); for(which=0;which<numworkers;++which) { if(workers[which].status == DOOMED) { /* if 995 ns left (expiration - now) and doom delay is 1000ns 1000 - 995 < 50, so wait a teensy bit longer please */ Time diff = timediff(DOOM_DELAY, timediff(workers[which].expiration, getnow())); if(diff.tv_nsec > 50) { // waited too long, kill the thing. kill_worker(which); if(start_worker()) { tries = 0; return numworkers-1; } } } } } if(wait_for_accept()) { if(accept_workers()) { return get_worker(); } } // have to wait until the new worker connects errno = EAGAIN; // eh return -1; }
void stop_worker(int which) { workers[which].status = DOOMED; workers[which].expiration = timeadd(getnow(),DOOM_DELAY); kill(workers[which].pid,SIGTERM); reap_workers(); }
void kill_worker(int which) { kill(workers[which].pid,SIGKILL); remove_worker(which); reap_workers(); }