Пример #1
0
/*
 * Print information about a job to stdout
 */
void print_job (job_info_t * job_ptr, char * user) {

    char * user_name = uid_to_string((uid_t)job_ptr->user_id);
    if (user == NULL || strcmp(user, user_name) == 0) {
        uint32_t job_id = job_ptr->job_id;
        char * job_state = job_state_string_compact(job_ptr->job_state);
        char * partition = job_ptr->partition;
        char * batch_host = (job_ptr->batch_host == NULL) ? "N/A" : job_ptr->batch_host;
        char req_cores[128];
        char submit_time_str[32];
        char start_time_str[32];
        _sprint_range(req_cores, sizeof(req_cores), job_ptr->num_cpus, job_ptr->max_cpus);
        make_time_str((time_t *)&job_ptr->submit_time, submit_time_str, sizeof(submit_time_str));
        make_time_str((time_t *)&job_ptr->start_time, start_time_str, sizeof(start_time_str));
        printf("%u|%s|%s|%s|%s|%s|%s|%s\n", job_id, job_state, user_name, partition, req_cores, batch_host, submit_time_str, start_time_str);
    }
}
Пример #2
0
void dsk_daemon_maybe_fork (void)
{
  int fork_pipe_fds[2] = {-1,-1};
  if (dsk_daemon_do_fork)
    {
      int pid;
    retry_pipe:
      if (pipe (fork_pipe_fds) < 0)
        {
	  if (errno == EINTR)
	    goto retry_pipe;
          dsk_fd_creation_failed (errno);
	  dsk_die ("error creating pipe: %s", strerror (errno));
	}
    retry_daemon_fork:
      pid = fork ();
      if (pid < 0)
        {
	  if (errno == EINTR)
	    goto retry_daemon_fork;
	  dsk_die ("error forking daemon: %s", strerror (errno));
	}
      else if (pid > 0)
        {
	  /* wait for EOF on pipe */
	  close (fork_pipe_fds[1]);
	  char buf[1];
	  for (;;)
	    {
	      int nread = read (fork_pipe_fds[0], buf, 1);
	      if (nread < 0)
	        {
		  if (errno == EINTR)
		    continue;
		  dsk_die ("error reading from semaphore pipe: %s", strerror (errno));
		}
              else if (nread > 0)
		dsk_die ("somehow got data on semaphore pipe: %s:%u", __FILE__, __LINE__);
	      else
	        break;
	    }
	  _exit (0);
        }
      else
        {
	  /* child process: continue as the non-forking case. */
	  close (fork_pipe_fds[0]);
          setsid ();
	}
    }
  int pid_file_fd = -1;
  if (dsk_daemon_pid_filename)
    {
      dsk_boolean must_truncate = DSK_FALSE;
      dsk_boolean made_dir = DSK_FALSE;

retry_outer_pid_file_open:
      if ((pid_file_fd=open (dsk_daemon_pid_filename, O_CREAT|O_EXCL|O_WRONLY, 0666)) < 0)
        {
          if (errno == EINTR)
            goto retry_outer_pid_file_open;
          else if (errno == EEXIST)
            {
              /* open / lock-nonblocking / rewrite we get lock */
retry_inner_pid_file_open:
              if ((pid_file_fd=open (dsk_daemon_pid_filename, O_WRONLY, 0666)) < 0)
                {
                  if (errno == EINTR)
                    goto retry_inner_pid_file_open;
                  dsk_die ("daemonize: error opening lock file %s: %s", dsk_daemon_pid_filename,
                           strerror (errno));
                }
              must_truncate = DSK_TRUE;
            }
          else if (errno == ENOENT && !made_dir)
            {
              /* make directories, retry */
              char *slash = strrchr (dsk_daemon_pid_filename, '/');
              if (slash == NULL)
                dsk_die ("daemonize: error creating %s: no such file or dir (cwd does not exist?)", dsk_daemon_pid_filename);
              char *dir = dsk_strdup_slice (dsk_daemon_pid_filename, slash);
              DskError *error = NULL;
              if (!dsk_mkdir_recursive (dir, 0777, &error))
                dsk_die ("error making directory %s: %s", dir, error->message);
              dsk_free (dir);
              made_dir = DSK_TRUE;
              goto retry_outer_pid_file_open;
            }
          else
            {
              dsk_fd_creation_failed (errno);
              dsk_die ("daemonize: error creating PID file %s: %s",
                       dsk_daemon_pid_filename, strerror (errno));
            }
        }
retry_flock:
      if (flock (pid_file_fd, LOCK_EX|LOCK_NB) < 0)
        {
          if (errno == EINTR)
            goto retry_flock;
          if (errno == EWOULDBLOCK)
            {
              /* TODO: print PID */
              dsk_die ("daemonize: process already running");
            }
          dsk_die ("daemonize: error locking: %s", strerror (errno));
        }
      if (must_truncate)
        {
          ftruncate (pid_file_fd, 0);
        }
      char buf[32];
      snprintf (buf, sizeof (buf), "%u\n", (unsigned)getpid ());
      unsigned len = strlen (buf);
      unsigned written = 0;
      while (written < len)
        {
          int write_rv = write (pid_file_fd, buf + written, len - written);
          if (write_rv < 0)
            {
              if (errno == EINTR)
                continue;
              dsk_die ("error writing pid file %s", dsk_daemon_pid_filename);
            }
          written += write_rv;
        }
    }
  if (fork_pipe_fds[1] != -1)
    {
      close (fork_pipe_fds[1]);
    }

  if (dsk_daemon_watchdog)
    {
      int alert_pid = 0;
      unsigned last_alert_time = 0;
      for (;;)
        {
	  /* NOTE: must never die, i guess */
          int pid;
          int status;

retry_watchdog_fork:
	  pid = fork ();
	  if (pid < 0)
	    {
	      if (errno == EINTR)
	        goto retry_watchdog_fork;
              dsk_die ("error forking watchdogged process: %s", strerror (errno));
	    }
	  else if (pid == 0)
	    {
              if (pid_file_fd >= 0)
                close (pid_file_fd);
              maybe_redirect_stdouterr ();
              add_maybe_redirect_timer ();
	      return;
	    }
	  maybe_redirect_stdouterr ();
          char time_str[TIME_STR_LENGTH];
	  make_time_str (time_str);
	  fprintf (stderr, "%s: watchdog: forked process %u\n",
	           time_str, (unsigned) pid);
retry_waitpid:
          if (waitpid (pid, &status, 0) < 0)
	    {
	      if (errno == EINTR)
		goto retry_waitpid;
	      dsk_die ("error running waitpid %u: %s", pid, strerror (errno));
	    }
	  maybe_redirect_stdouterr ();
	  make_time_str (time_str);
	  if (WIFEXITED (status))
	    fprintf (stderr, "%s: watchdog: process %u exited with status %u\n",
		     time_str, pid, WEXITSTATUS (status));
	  else if (WIFSIGNALED (status))
	    fprintf (stderr, "%s: watchdog: process %u killed by signal %u%s\n",
		     time_str, pid, WTERMSIG (status),
		     WCOREDUMP (status) ? " [core dumped]" : "");
          else
	    fprintf (stderr, "%s: watchdog: process %u died in some creative way\n",
		     time_str, pid);

          /* configurable? */
          sleep (1);

	  /* send alert */
	  if (dsk_daemon_alert_script)
	    {
	      int time_delta = time (NULL) - last_alert_time;
              unsigned clamped_delta = time_delta < 0 ? 0 : time_delta;
              if (alert_pid > 0)
                {
                  int rv = waitpid (alert_pid, &status, WNOHANG);
                  if (rv < 0)
                    {
                      if (errno == EINTR)
                        goto retry_waitpid;
                      else
                        dsk_die ("error waiting for alert process");
                    }
                  else if (rv == 0)
                    {
                      /* process has not terminated */
                    }
                  else
                    {
                      /* process terminated (ignore status?) */
                      alert_pid = 0;
                    }
                }
	      if (alert_pid == 0 && clamped_delta > dsk_daemon_alert_interval)
	        {
                  retry_alert_fork:
                  alert_pid = fork ();
                  if (alert_pid < 0)
                    {
                      if (errno == EINTR)
                        goto retry_alert_fork;
                      dsk_warning ("error forking alert process: %s", strerror (errno));
                      alert_pid = 0;
                    }
                  else if (alert_pid == 0)
                    {
                      execl ("/bin/sh", "/bin/sh", "-c", dsk_daemon_alert_script, NULL);
                      _exit (127);
                    }
                  else
                    dsk_daemon_n_alerts_suppressed = 0;
		}
              else
	        ++dsk_daemon_n_alerts_suppressed;
	    }
	}
    }
}