/* * Print information about a job to stdout */ void print_job (job_info_t * job_ptr, char * user) { char * user_name = uid_to_string((uid_t)job_ptr->user_id); if (user == NULL || strcmp(user, user_name) == 0) { uint32_t job_id = job_ptr->job_id; char * job_state = job_state_string_compact(job_ptr->job_state); char * partition = job_ptr->partition; char * batch_host = (job_ptr->batch_host == NULL) ? "N/A" : job_ptr->batch_host; char req_cores[128]; char submit_time_str[32]; char start_time_str[32]; _sprint_range(req_cores, sizeof(req_cores), job_ptr->num_cpus, job_ptr->max_cpus); make_time_str((time_t *)&job_ptr->submit_time, submit_time_str, sizeof(submit_time_str)); make_time_str((time_t *)&job_ptr->start_time, start_time_str, sizeof(start_time_str)); printf("%u|%s|%s|%s|%s|%s|%s|%s\n", job_id, job_state, user_name, partition, req_cores, batch_host, submit_time_str, start_time_str); } }
void dsk_daemon_maybe_fork (void) { int fork_pipe_fds[2] = {-1,-1}; if (dsk_daemon_do_fork) { int pid; retry_pipe: if (pipe (fork_pipe_fds) < 0) { if (errno == EINTR) goto retry_pipe; dsk_fd_creation_failed (errno); dsk_die ("error creating pipe: %s", strerror (errno)); } retry_daemon_fork: pid = fork (); if (pid < 0) { if (errno == EINTR) goto retry_daemon_fork; dsk_die ("error forking daemon: %s", strerror (errno)); } else if (pid > 0) { /* wait for EOF on pipe */ close (fork_pipe_fds[1]); char buf[1]; for (;;) { int nread = read (fork_pipe_fds[0], buf, 1); if (nread < 0) { if (errno == EINTR) continue; dsk_die ("error reading from semaphore pipe: %s", strerror (errno)); } else if (nread > 0) dsk_die ("somehow got data on semaphore pipe: %s:%u", __FILE__, __LINE__); else break; } _exit (0); } else { /* child process: continue as the non-forking case. */ close (fork_pipe_fds[0]); setsid (); } } int pid_file_fd = -1; if (dsk_daemon_pid_filename) { dsk_boolean must_truncate = DSK_FALSE; dsk_boolean made_dir = DSK_FALSE; retry_outer_pid_file_open: if ((pid_file_fd=open (dsk_daemon_pid_filename, O_CREAT|O_EXCL|O_WRONLY, 0666)) < 0) { if (errno == EINTR) goto retry_outer_pid_file_open; else if (errno == EEXIST) { /* open / lock-nonblocking / rewrite we get lock */ retry_inner_pid_file_open: if ((pid_file_fd=open (dsk_daemon_pid_filename, O_WRONLY, 0666)) < 0) { if (errno == EINTR) goto retry_inner_pid_file_open; dsk_die ("daemonize: error opening lock file %s: %s", dsk_daemon_pid_filename, strerror (errno)); } must_truncate = DSK_TRUE; } else if (errno == ENOENT && !made_dir) { /* make directories, retry */ char *slash = strrchr (dsk_daemon_pid_filename, '/'); if (slash == NULL) dsk_die ("daemonize: error creating %s: no such file or dir (cwd does not exist?)", dsk_daemon_pid_filename); char *dir = dsk_strdup_slice (dsk_daemon_pid_filename, slash); DskError *error = NULL; if (!dsk_mkdir_recursive (dir, 0777, &error)) dsk_die ("error making directory %s: %s", dir, error->message); dsk_free (dir); made_dir = DSK_TRUE; goto retry_outer_pid_file_open; } else { dsk_fd_creation_failed (errno); dsk_die ("daemonize: error creating PID file %s: %s", dsk_daemon_pid_filename, strerror (errno)); } } retry_flock: if (flock (pid_file_fd, LOCK_EX|LOCK_NB) < 0) { if (errno == EINTR) goto retry_flock; if (errno == EWOULDBLOCK) { /* TODO: print PID */ dsk_die ("daemonize: process already running"); } dsk_die ("daemonize: error locking: %s", strerror (errno)); } if (must_truncate) { ftruncate (pid_file_fd, 0); } char buf[32]; snprintf (buf, sizeof (buf), "%u\n", (unsigned)getpid ()); unsigned len = strlen (buf); unsigned written = 0; while (written < len) { int write_rv = write (pid_file_fd, buf + written, len - written); if (write_rv < 0) { if (errno == EINTR) continue; dsk_die ("error writing pid file %s", dsk_daemon_pid_filename); } written += write_rv; } } if (fork_pipe_fds[1] != -1) { close (fork_pipe_fds[1]); } if (dsk_daemon_watchdog) { int alert_pid = 0; unsigned last_alert_time = 0; for (;;) { /* NOTE: must never die, i guess */ int pid; int status; retry_watchdog_fork: pid = fork (); if (pid < 0) { if (errno == EINTR) goto retry_watchdog_fork; dsk_die ("error forking watchdogged process: %s", strerror (errno)); } else if (pid == 0) { if (pid_file_fd >= 0) close (pid_file_fd); maybe_redirect_stdouterr (); add_maybe_redirect_timer (); return; } maybe_redirect_stdouterr (); char time_str[TIME_STR_LENGTH]; make_time_str (time_str); fprintf (stderr, "%s: watchdog: forked process %u\n", time_str, (unsigned) pid); retry_waitpid: if (waitpid (pid, &status, 0) < 0) { if (errno == EINTR) goto retry_waitpid; dsk_die ("error running waitpid %u: %s", pid, strerror (errno)); } maybe_redirect_stdouterr (); make_time_str (time_str); if (WIFEXITED (status)) fprintf (stderr, "%s: watchdog: process %u exited with status %u\n", time_str, pid, WEXITSTATUS (status)); else if (WIFSIGNALED (status)) fprintf (stderr, "%s: watchdog: process %u killed by signal %u%s\n", time_str, pid, WTERMSIG (status), WCOREDUMP (status) ? " [core dumped]" : ""); else fprintf (stderr, "%s: watchdog: process %u died in some creative way\n", time_str, pid); /* configurable? */ sleep (1); /* send alert */ if (dsk_daemon_alert_script) { int time_delta = time (NULL) - last_alert_time; unsigned clamped_delta = time_delta < 0 ? 0 : time_delta; if (alert_pid > 0) { int rv = waitpid (alert_pid, &status, WNOHANG); if (rv < 0) { if (errno == EINTR) goto retry_waitpid; else dsk_die ("error waiting for alert process"); } else if (rv == 0) { /* process has not terminated */ } else { /* process terminated (ignore status?) */ alert_pid = 0; } } if (alert_pid == 0 && clamped_delta > dsk_daemon_alert_interval) { retry_alert_fork: alert_pid = fork (); if (alert_pid < 0) { if (errno == EINTR) goto retry_alert_fork; dsk_warning ("error forking alert process: %s", strerror (errno)); alert_pid = 0; } else if (alert_pid == 0) { execl ("/bin/sh", "/bin/sh", "-c", dsk_daemon_alert_script, NULL); _exit (127); } else dsk_daemon_n_alerts_suppressed = 0; } else ++dsk_daemon_n_alerts_suppressed; } } } }