extern void jobacct_gather_handle_mem_limit(uint64_t total_job_mem, uint64_t total_job_vsize) { if (!plugin_polling) return; if (jobacct_mem_limit) { if (jobacct_step_id == NO_VAL) { debug("Job %u memory used:%"PRIu64" limit:%"PRIu64" KB", jobacct_job_id, total_job_mem, jobacct_mem_limit); } else { debug("Step %u.%u memory used:%"PRIu64" " "limit:%"PRIu64" KB", jobacct_job_id, jobacct_step_id, total_job_mem, jobacct_mem_limit); } } if (jobacct_job_id && jobacct_mem_limit && (total_job_mem > jobacct_mem_limit)) { if (jobacct_step_id == NO_VAL) { error("Job %u exceeded memory limit " "(%"PRIu64" > %"PRIu64"), being " "killed", jobacct_job_id, total_job_mem, jobacct_mem_limit); } else { error("Step %u.%u exceeded memory limit " "(%"PRIu64" > %"PRIu64"), " "being killed", jobacct_job_id, jobacct_step_id, total_job_mem, jobacct_mem_limit); } _acct_kill_step(); } else if (jobacct_job_id && jobacct_vmem_limit && (total_job_vsize > jobacct_vmem_limit)) { if (jobacct_step_id == NO_VAL) { error("Job %u exceeded virtual memory limit " "(%"PRIu64" > %"PRIu64"), being killed", jobacct_job_id, total_job_vsize, jobacct_vmem_limit); } else { error("Step %u.%u exceeded virtual memory limit " "(%"PRIu64" > %"PRIu64"), being killed", jobacct_job_id, jobacct_step_id, total_job_vsize, jobacct_vmem_limit); } _acct_kill_step(); } }
/* * _get_process_data() - Build a table of all current processes * * IN: pid. * * OUT: none * * THREADSAFE! Only one thread ever gets here. * * Assumption: * Any file with a name of the form "/proc/[0-9]+/stat" * is a Linux-style stat entry. We disregard the data if they look * wrong. */ static void _get_process_data(void) { struct procsinfo proc; pid_t *pids = NULL; int npids = 0; int i; uint32_t total_job_mem = 0, total_job_vsize = 0; int pid = 0; static int processing = 0; prec_t *prec = NULL; struct jobacctinfo *jobacct = NULL; List prec_list = NULL; ListIterator itr; ListIterator itr2; if (!pgid_plugin && (cont_id == (uint64_t)NO_VAL)) { debug("cont_id hasn't been set yet not running poll"); return; } if(processing) { debug("already running, returning"); return; } processing = 1; prec_list = list_create(_destroy_prec); if(!pgid_plugin) { /* get only the processes in the proctrack container */ slurm_container_get_pids(cont_id, &pids, &npids); if (!npids) { debug4("no pids in this container %"PRIu64"", cont_id); goto finished; } for (i = 0; i < npids; i++) { pid = pids[i]; if(!getprocs(&proc, sizeof(proc), 0, 0, &pid, 1)) continue; /* Assume the process went away */ prec = xmalloc(sizeof(prec_t)); list_append(prec_list, prec); prec->pid = proc.pi_pid; prec->ppid = proc.pi_ppid; prec->usec = proc.pi_ru.ru_utime.tv_sec + proc.pi_ru.ru_utime.tv_usec * 1e-6; prec->ssec = proc.pi_ru.ru_stime.tv_sec + proc.pi_ru.ru_stime.tv_usec * 1e-6; prec->pages = proc.pi_majflt; prec->rss = (proc.pi_trss + proc.pi_drss) * pagesize; //prec->rss *= 1024; prec->vsize = (proc.pi_tsize / 1024); prec->vsize += (proc.pi_dvm * pagesize); //prec->vsize *= 1024; /* debug("vsize = %f = (%d/1024)+(%d*%d)", */ /* prec->vsize, proc.pi_tsize, proc.pi_dvm, pagesize); */ } } else { while(getprocs(&proc, sizeof(proc), 0, 0, &pid, 1) == 1) { prec = xmalloc(sizeof(prec_t)); list_append(prec_list, prec); prec->pid = proc.pi_pid; prec->ppid = proc.pi_ppid; prec->usec = proc.pi_ru.ru_utime.tv_sec + proc.pi_ru.ru_utime.tv_usec * 1e-6; prec->ssec = proc.pi_ru.ru_stime.tv_sec + proc.pi_ru.ru_stime.tv_usec * 1e-6; prec->pages = proc.pi_majflt; prec->rss = (proc.pi_trss + proc.pi_drss) * pagesize; //prec->rss *= 1024; prec->vsize = (proc.pi_tsize / 1024); prec->vsize += (proc.pi_dvm * pagesize); //prec->vsize *= 1024; /* debug("vsize = %f = (%d/1024)+(%d*%d)", */ /* prec->vsize, proc.pi_tsize, proc.pi_dvm, pagesize); */ } } if(!list_count(prec_list)) goto finished; slurm_mutex_lock(&jobacct_lock); if(!task_list || !list_count(task_list)) { slurm_mutex_unlock(&jobacct_lock); goto finished; } itr = list_iterator_create(task_list); while((jobacct = list_next(itr))) { itr2 = list_iterator_create(prec_list); while((prec = list_next(itr2))) { //debug2("pid %d ? %d", prec->ppid, jobacct->pid); if (prec->pid == jobacct->pid) { /* find all my descendents */ _get_offspring_data(prec_list, prec, prec->pid); /* tally their usage */ jobacct->max_rss = jobacct->tot_rss = MAX(jobacct->max_rss, (int)prec->rss); total_job_mem += jobacct->max_rss; jobacct->max_vsize = jobacct->tot_vsize = MAX(jobacct->max_vsize, (int)prec->vsize); total_job_vsize += prec->vsize; jobacct->max_pages = jobacct->tot_pages = MAX(jobacct->max_pages, prec->pages); jobacct->min_cpu = jobacct->tot_cpu = MAX(jobacct->min_cpu, (prec->usec + prec->ssec)); debug2("%d size now %d %d time %d", jobacct->pid, jobacct->max_rss, jobacct->max_vsize, jobacct->tot_cpu); break; } } list_iterator_destroy(itr2); } list_iterator_destroy(itr); slurm_mutex_unlock(&jobacct_lock); if (jobacct_mem_limit) { debug("Step %u.%u memory used:%u limit:%u KB", jobacct_job_id, jobacct_step_id, total_job_mem, jobacct_mem_limit); } if (jobacct_job_id && jobacct_mem_limit && (total_job_mem > jobacct_mem_limit)) { if (jobacct_step_id == NO_VAL) { error("Job %u exceeded %u KB memory limit, being " "killed", jobacct_job_id, jobacct_mem_limit); } else { error("Step %u.%u exceeded %u KB memory limit, being " "killed", jobacct_job_id, jobacct_step_id, jobacct_mem_limit); } _acct_kill_step(); } else if (jobacct_job_id && jobacct_vmem_limit && (total_job_vsize > jobacct_vmem_limit)) { if (jobacct_step_id == NO_VAL) { error("Job %u exceeded %u KB virtual memory limit, " "being killed", jobacct_job_id, jobacct_vmem_limit); } else { error("Step %u.%u exceeded %u KB virtual memory " "limit, being killed", jobacct_job_id, jobacct_step_id, jobacct_vmem_limit); } _acct_kill_step(); } finished: list_destroy(prec_list); processing = 0; return; }
/* * _get_process_data() - Build a table of all current processes * * IN: pid. * * OUT: none * * THREADSAFE! Only one thread ever gets here. * * Assumption: * Any file with a name of the form "/proc/[0-9]+/stat" * is a Linux-style stat entry. We disregard the data if they look * wrong. */ static void _get_process_data(void) { static int slash_proc_open = 0; struct dirent *slash_proc_entry; char *iptr = NULL, *optr = NULL; FILE *stat_fp = NULL; char proc_stat_file[256]; /* Allow ~20x extra length */ List prec_list = NULL; pid_t *pids = NULL; int npids = 0; uint32_t total_job_mem = 0, total_job_vsize = 0; int i, fd; ListIterator itr; ListIterator itr2; prec_t *prec = NULL; struct jobacctinfo *jobacct = NULL; static int processing = 0; long hertz; if (!pgid_plugin && (cont_id == (uint64_t)NO_VAL)) { debug("cont_id hasn't been set yet not running poll"); return; } if(processing) { debug("already running, returning"); return; } processing = 1; prec_list = list_create(_destroy_prec); hertz = sysconf(_SC_CLK_TCK); if (hertz < 1) { error ("_get_process_data: unable to get clock rate"); hertz = 100; /* default on many systems */ } if(!pgid_plugin) { /* get only the processes in the proctrack container */ slurm_container_get_pids(cont_id, &pids, &npids); if(!npids) { debug4("no pids in this container %"PRIu64"", cont_id); goto finished; } for (i = 0; i < npids; i++) { snprintf(proc_stat_file, 256, "/proc/%d/stat", pids[i]); if ((stat_fp = fopen(proc_stat_file, "r"))==NULL) continue; /* Assume the process went away */ /* * Close the file on exec() of user tasks. * * NOTE: If we fork() slurmstepd after the * fopen() above and before the fcntl() below, * then the user task may have this extra file * open, which can cause problems for * checkpoint/restart, but this should be a very rare * problem in practice. */ fd = fileno(stat_fp); fcntl(fd, F_SETFD, FD_CLOEXEC); prec = xmalloc(sizeof(prec_t)); if (_get_process_data_line(fd, prec)) list_append(prec_list, prec); else xfree(prec); fclose(stat_fp); } } else { slurm_mutex_lock(&reading_mutex); if (slash_proc_open) { rewinddir(slash_proc); } else { slash_proc=opendir("/proc"); if (slash_proc == NULL) { perror("opening /proc"); slurm_mutex_unlock(&reading_mutex); goto finished; } slash_proc_open=1; } strcpy(proc_stat_file, "/proc/"); while ((slash_proc_entry = readdir(slash_proc))) { /* Save a few cyles by simulating strcat(statFileName, slash_proc_entry->d_name); strcat(statFileName, "/stat"); while checking for a numeric filename (which really should be a pid). */ optr = proc_stat_file + sizeof("/proc"); iptr = slash_proc_entry->d_name; i = 0; do { if((*iptr < '0') || ((*optr++ = *iptr++) > '9')) { i = -1; break; } } while (*iptr); if(i == -1) continue; iptr = (char*)"/stat"; do { *optr++ = *iptr++; } while (*iptr); *optr = 0; if ((stat_fp = fopen(proc_stat_file,"r"))==NULL) continue; /* Assume the process went away */ /* * Close the file on exec() of user tasks. * * NOTE: If we fork() slurmstepd after the * fopen() above and before the fcntl() below, * then the user task may have this extra file * open, which can cause problems for * checkpoint/restart, but this should be a very rare * problem in practice. */ fd = fileno(stat_fp); fcntl(fd, F_SETFD, FD_CLOEXEC); prec = xmalloc(sizeof(prec_t)); if (_get_process_data_line(fd, prec)) list_append(prec_list, prec); else xfree(prec); fclose(stat_fp); } slurm_mutex_unlock(&reading_mutex); } if (!list_count(prec_list)) { goto finished; /* We have no business being here! */ } slurm_mutex_lock(&jobacct_lock); if(!task_list || !list_count(task_list)) { slurm_mutex_unlock(&jobacct_lock); goto finished; } itr = list_iterator_create(task_list); while((jobacct = list_next(itr))) { itr2 = list_iterator_create(prec_list); while((prec = list_next(itr2))) { if (prec->pid == jobacct->pid) { #if _DEBUG info("pid:%u ppid:%u rss:%d KB", prec->pid, prec->ppid, prec->rss); #endif /* find all my descendents */ _get_offspring_data(prec_list, prec, prec->pid); /* tally their usage */ jobacct->max_rss = jobacct->tot_rss = MAX(jobacct->max_rss, prec->rss); total_job_mem += prec->rss; jobacct->max_vsize = jobacct->tot_vsize = MAX(jobacct->max_vsize, prec->vsize); total_job_vsize += prec->vsize; jobacct->max_pages = jobacct->tot_pages = MAX(jobacct->max_pages, prec->pages); jobacct->min_cpu = jobacct->tot_cpu = MAX(jobacct->min_cpu, (prec->ssec / hertz + prec->usec / hertz)); debug2("%d mem size %u %u time %u(%u+%u)", jobacct->pid, jobacct->max_rss, jobacct->max_vsize, jobacct->tot_cpu, prec->usec, prec->ssec); break; } } list_iterator_destroy(itr2); } list_iterator_destroy(itr); slurm_mutex_unlock(&jobacct_lock); if (jobacct_mem_limit) { if (jobacct_step_id == NO_VAL) { debug("Job %u memory used:%u limit:%u KB", jobacct_job_id, total_job_mem, jobacct_mem_limit); } else { debug("Step %u.%u memory used:%u limit:%u KB", jobacct_job_id, jobacct_step_id, total_job_mem, jobacct_mem_limit); } } if (jobacct_job_id && jobacct_mem_limit && (total_job_mem > jobacct_mem_limit)) { if (jobacct_step_id == NO_VAL) { error("Job %u exceeded %u KB memory limit, being " "killed", jobacct_job_id, jobacct_mem_limit); } else { error("Step %u.%u exceeded %u KB memory limit, being " "killed", jobacct_job_id, jobacct_step_id, jobacct_mem_limit); } _acct_kill_step(); } else if (jobacct_job_id && jobacct_vmem_limit && (total_job_vsize > jobacct_vmem_limit)) { if (jobacct_step_id == NO_VAL) { error("Job %u exceeded %u KB virtual memory limit, " "being killed", jobacct_job_id, jobacct_vmem_limit); } else { error("Step %u.%u exceeded %u KB virtual memory " "limit, being killed", jobacct_job_id, jobacct_step_id, jobacct_vmem_limit); } _acct_kill_step(); } finished: list_destroy(prec_list); processing = 0; return; }