static List _get_precs(List task_list, bool pgid_plugin, uint64_t cont_id, jag_callbacks_t *callbacks) { jag_prec_t *prec = NULL; int pid = 0; if (!pgid_plugin) { pid_t *pids = NULL; int npids = 0; /* get only the processes in the proctrack container */ proctrack_g_get_pids(cont_id, &pids, &npids); if (!npids) { debug4("no pids in this container %"PRIu64"", cont_id); goto finished; } for (i = 0; i < npids; i++) { pid = pids[i]; if (!getprocs(&proc, sizeof(proc), 0, 0, &pid, 1)) continue; /* Assume the process went away */ prec = xmalloc(sizeof(prec_t)); list_append(prec_list, prec); prec->pid = proc.pi_pid; prec->ppid = proc.pi_ppid; prec->usec = proc.pi_ru.ru_utime.tv_sec + proc.pi_ru.ru_utime.tv_usec * 1e-6; prec->ssec = proc.pi_ru.ru_stime.tv_sec + proc.pi_ru.ru_stime.tv_usec * 1e-6; prec->pages = proc.pi_majflt; prec->rss = (proc.pi_trss + proc.pi_drss) * pagesize; //prec->rss *= 1024; prec->vsize = (proc.pi_tsize / 1024); prec->vsize += (proc.pi_dvm * pagesize); //prec->vsize *= 1024; /* debug("vsize = %f = (%d/1024)+(%d*%d)", */ /* prec->vsize, proc.pi_tsize, proc.pi_dvm, pagesize); */ } } else { while (getprocs(&proc, sizeof(proc), 0, 0, &pid, 1) == 1) { prec = xmalloc(sizeof(prec_t)); list_append(prec_list, prec); prec->pid = proc.pi_pid; prec->ppid = proc.pi_ppid; prec->usec = proc.pi_ru.ru_utime.tv_sec + proc.pi_ru.ru_utime.tv_usec * 1e-6; prec->ssec = proc.pi_ru.ru_stime.tv_sec + proc.pi_ru.ru_stime.tv_usec * 1e-6; prec->pages = proc.pi_majflt; prec->rss = (proc.pi_trss + proc.pi_drss) * pagesize; //prec->rss *= 1024; prec->vsize = (proc.pi_tsize / 1024); prec->vsize += (proc.pi_dvm * pagesize); //prec->vsize *= 1024; /* debug("vsize = %f = (%d/1024)+(%d*%d)", */ /* prec->vsize, proc.pi_tsize, proc.pi_dvm, pagesize); */ } } }
/* * Signal all processes within a container * cont_id IN - container ID as returned by proctrack_g_create() * signal IN - signal to send, if zero then perform error checking * but do not send signal * * Returns a SLURM errno. */ extern int proctrack_g_signal(uint64_t cont_id, int signal) { if (slurm_proctrack_init() < 0) return SLURM_ERROR; if (signal == SIGKILL) { pid_t *pids = NULL; int i, j, npids = 0, hung_pids = 0; char *stat_fname = NULL; if (proctrack_g_get_pids(cont_id, &pids, &npids) == SLURM_SUCCESS) { /* NOTE: proctrack_g_get_pids() is not supported * by the proctrack/pgid plugin */ for (j = 0; j < 2; j++) { if (j) sleep(2); hung_pids = 0; for (i = 0; i < npids; i++) { if (!pids[i]) continue; xstrfmtcat(stat_fname, "/proc/%d/stat", (int) pids[i]); if (_test_core_dumping(stat_fname)) { debug("Process %d continuing " "core dump", (int) pids[i]); hung_pids++; } else { /* Don't test this PID again */ pids[i] = 0; } xfree(stat_fname); } if (hung_pids == 0) break; } xfree(pids); if (hung_pids) { info("Defering sending signal, processes in " "job are currently core dumping"); _spawn_signal_thread(cont_id, signal); return SLURM_SUCCESS; } } } return (*(ops.signal)) (cont_id, signal); }
/* We don't check the uid in this function, anyone may list the task info. */ static int _handle_list_pids(int fd, stepd_step_rec_t *job) { int i; pid_t *pids = NULL; int npids = 0; uint32_t pid; debug("_handle_list_pids for job %u.%u", job->jobid, job->stepid); proctrack_g_get_pids(job->cont_id, &pids, &npids); safe_write(fd, &npids, sizeof(uint32_t)); for (i = 0; i < npids; i++) { pid = (uint32_t)pids[i]; safe_write(fd, &pid, sizeof(uint32_t)); } if (npids > 0) xfree(pids); return SLURM_SUCCESS; rwfail: if (npids > 0) xfree(pids); return SLURM_FAILURE; }
static void *_sig_agent(void *args) { agent_arg_t *agent_arg_ptr = args; while (1) { pid_t *pids = NULL; int i, npids = 0, hung_pids = 0; char *stat_fname = NULL; if (proctrack_g_get_pids(agent_arg_ptr->cont_id, &pids, &npids) == SLURM_SUCCESS) { hung_pids = 0; for (i = 0; i < npids; i++) { xstrfmtcat(stat_fname, "/proc/%d/stat", (int) pids[i]); if (_test_core_dumping(stat_fname)) { debug("Process %d continuing " "core dump", (int) pids[i]); hung_pids++; } else { /* Kill processes that we can now */ kill(pids[i], agent_arg_ptr->signal); } xfree(stat_fname); } } if (hung_pids == 0) break; sleep(5); } (void) (*(ops.signal)) (agent_arg_ptr->cont_id, agent_arg_ptr->signal); xfree(args); return NULL; }
static List _get_precs(List task_list, bool pgid_plugin, uint64_t cont_id, jag_callbacks_t *callbacks) { List prec_list = list_create(destroy_jag_prec); char proc_stat_file[256]; /* Allow ~20x extra length */ char proc_io_file[256]; /* Allow ~20x extra length */ static int slash_proc_open = 0; int i; if (!pgid_plugin) { pid_t *pids = NULL; int npids = 0; /* get only the processes in the proctrack container */ proctrack_g_get_pids(cont_id, &pids, &npids); if (!npids) { /* update consumed energy even if pids do not exist */ ListIterator itr = list_iterator_create(task_list); struct jobacctinfo *jobacct = NULL; if ((jobacct = list_next(itr))) { acct_gather_energy_g_get_data( energy_profile, &jobacct->energy); debug2("getjoules_task energy = %u", jobacct->energy.consumed_energy); } list_iterator_destroy(itr); debug4("no pids in this container %"PRIu64"", cont_id); goto finished; } for (i = 0; i < npids; i++) { snprintf(proc_stat_file, 256, "/proc/%d/stat", pids[i]); snprintf(proc_io_file, 256, "/proc/%d/io", pids[i]); _handle_stats(prec_list, proc_stat_file, proc_io_file, callbacks); } xfree(pids); } else { struct dirent *slash_proc_entry; char *iptr = NULL, *optr = NULL, *optr2 = NULL; if (slash_proc_open) { rewinddir(slash_proc); } else { slash_proc=opendir("/proc"); if (slash_proc == NULL) { perror("opening /proc"); goto finished; } slash_proc_open=1; } strcpy(proc_stat_file, "/proc/"); strcpy(proc_io_file, "/proc/"); while ((slash_proc_entry = readdir(slash_proc))) { /* Save a few cyles by simulating * strcat(statFileName, slash_proc_entry->d_name); * strcat(statFileName, "/stat"); * while checking for a numeric filename (which really * should be a pid). Then do the same for the * /proc/<pid>/io file name. */ optr = proc_stat_file + sizeof("/proc"); iptr = slash_proc_entry->d_name; i = 0; do { if ((*iptr < '0') || ((*optr++ = *iptr++) > '9')) { i = -1; break; } } while (*iptr); if (i == -1) continue; iptr = (char*)"/stat"; do { *optr++ = *iptr++; } while (*iptr); *optr = 0; optr2 = proc_io_file + sizeof("/proc"); iptr = slash_proc_entry->d_name; i = 0; do { if ((*iptr < '0') || ((*optr2++ = *iptr++) > '9')) { i = -1; break; } } while (*iptr); if (i == -1) continue; iptr = (char*)"/io"; do { *optr2++ = *iptr++; } while (*iptr); *optr2 = 0; _handle_stats(prec_list, proc_stat_file, proc_io_file, callbacks); } } finished: return prec_list; }
/* Wait for the pid given and when it ends get and children it might * of left behind and wait on them instead. */ static void *_wait_extern_pid(void *args) { extern_pid_t *extern_pid = (extern_pid_t *)args; stepd_step_rec_t *job = extern_pid->job; pid_t pid = extern_pid->pid; jobacctinfo_t *jobacct = NULL; pid_t *pids = NULL; int npids = 0, i; char proc_stat_file[256]; /* Allow ~20x extra length */ FILE *stat_fp = NULL; int fd; char sbuf[256], *tmp, state[1]; int num_read, ppid; xfree(extern_pid); //info("waiting on pid %d", pid); _block_on_pid(pid); //info("done with pid %d %d: %m", pid, rc); jobacct = jobacct_gather_remove_task(pid); if (jobacct) { job->jobacct->energy.consumed_energy = 0; jobacctinfo_aggregate(job->jobacct, jobacct); jobacctinfo_destroy(jobacct); } acct_gather_profile_g_task_end(pid); /* See if we have any children of init left and add them to track. */ proctrack_g_get_pids(job->cont_id, &pids, &npids); for (i = 0; i < npids; i++) { snprintf(proc_stat_file, 256, "/proc/%d/stat", pids[i]); if (!(stat_fp = fopen(proc_stat_file, "r"))) continue; /* Assume the process went away */ fd = fileno(stat_fp); fcntl(fd, F_SETFD, FD_CLOEXEC); num_read = read(fd, sbuf, (sizeof(sbuf) - 1)); if (num_read <= 0) goto next_pid; sbuf[num_read] = '\0'; /* get to the end of cmd name */ tmp = strrchr(sbuf, ')'); *tmp = '\0'; /* replace trailing ')' with NULL */ /* skip space after ')' too */ sscanf(tmp + 2, "%c %d ", state, &ppid); if (ppid == 1) { debug2("adding tracking of orphaned process %d", pids[i]); _handle_add_extern_pid_internal(job, pids[i]); } next_pid: fclose(stat_fp); } return NULL; }
static void *_sig_agent(void *args) { bool hung_pids = false; agent_arg_t *agent_arg_ptr = args; while (1) { pid_t *pids = NULL; int i, npids = 0; char *stat_fname = NULL; if (hung_pids) sleep(5); hung_pids = false; if (proctrack_g_get_pids(agent_arg_ptr->cont_id, &pids, &npids) == SLURM_SUCCESS) { /* * Check if any processes are core dumping. * If so, do not signal any of them, instead * jump back to the sleep and wait for the core * dump to finish. * * This works around an issue with OpenMP * applications failing to write a full core * file out - only one of the processes will * be marked are core dumping, but killing any * of them will terminate the application. */ for (i = 0; i < npids; i++) { xstrfmtcat(stat_fname, "/proc/%d/stat", (int) pids[i]); if (_test_core_dumping(stat_fname)) { debug("Process %d continuing core dump", (int) pids[i]); hung_pids = true; xfree(stat_fname); break; } xfree(stat_fname); } if (hung_pids) { xfree(pids); continue; } for (i = 0; i < npids; i++) { /* Kill processes */ kill(pids[i], agent_arg_ptr->signal); } xfree(pids); } break; } (void) (*(ops.signal)) (agent_arg_ptr->cont_id, agent_arg_ptr->signal); xfree(args); return NULL; }