Exemple #1
0
Fichier : req.c Projet : VURM/slurm
/* We don't check the uid in this function, anyone may list the task info. */
static int
_handle_list_pids(int fd, slurmd_job_t *job)
{
	int i;
	pid_t *pids = NULL;
	int npids = 0;
	uint32_t pid;

	debug("_handle_list_pids for job %u.%u", job->jobid, job->stepid);
	slurm_container_get_pids(job->cont_id, &pids, &npids);
	safe_write(fd, &npids, sizeof(uint32_t));
	for (i = 0; i < npids; i++) {
		pid = (uint32_t)pids[i];
		safe_write(fd, &pid, sizeof(uint32_t));
	}
	if (npids > 0)
		xfree(pids);

	return SLURM_SUCCESS;
rwfail:
	if (npids > 0)
		xfree(pids);
	return SLURM_FAILURE;
}
/*
 * _get_process_data() - Build a table of all current processes
 *
 * IN:	pid.
 *
 * OUT:	none
 *
  * THREADSAFE! Only one thread ever gets here.
 *
 * Assumption:
 *    Any file with a name of the form "/proc/[0-9]+/stat"
 *    is a Linux-style stat entry. We disregard the data if they look
 *    wrong.
 */
static void _get_process_data(void)
{
	struct procsinfo proc;
	pid_t *pids = NULL;
	int npids = 0;
	int i;
	uint32_t total_job_mem = 0, total_job_vsize = 0;
	int pid = 0;
	static int processing = 0;
	prec_t *prec = NULL;
	struct jobacctinfo *jobacct = NULL;
	List prec_list = NULL;
	ListIterator itr;
	ListIterator itr2;

	if (!pgid_plugin && (cont_id == (uint64_t)NO_VAL)) {
		debug("cont_id hasn't been set yet not running poll");
		return;
	}

	if(processing) {
		debug("already running, returning");
		return;
	}

	processing = 1;
	prec_list = list_create(_destroy_prec);

	if(!pgid_plugin) {
		/* get only the processes in the proctrack container */
		slurm_container_get_pids(cont_id, &pids, &npids);
		if (!npids) {
			debug4("no pids in this container %"PRIu64"", cont_id);
			goto finished;
		}
		for (i = 0; i < npids; i++) {
			pid = pids[i];
			if(!getprocs(&proc, sizeof(proc), 0, 0, &pid, 1))
				continue; /* Assume the process went away */
			prec = xmalloc(sizeof(prec_t));
			list_append(prec_list, prec);
			prec->pid = proc.pi_pid;
			prec->ppid = proc.pi_ppid;
			prec->usec = proc.pi_ru.ru_utime.tv_sec +
				proc.pi_ru.ru_utime.tv_usec * 1e-6;
			prec->ssec = proc.pi_ru.ru_stime.tv_sec +
				proc.pi_ru.ru_stime.tv_usec * 1e-6;
			prec->pages = proc.pi_majflt;
			prec->rss = (proc.pi_trss + proc.pi_drss) * pagesize;
			//prec->rss *= 1024;
			prec->vsize = (proc.pi_tsize / 1024);
			prec->vsize += (proc.pi_dvm * pagesize);
			//prec->vsize *= 1024;
			/*  debug("vsize = %f = (%d/1024)+(%d*%d)",   */
/*    		      prec->vsize, proc.pi_tsize, proc.pi_dvm, pagesize);  */
		}
	} else {
		while(getprocs(&proc, sizeof(proc), 0, 0, &pid, 1) == 1) {
			prec = xmalloc(sizeof(prec_t));
			list_append(prec_list, prec);
			prec->pid = proc.pi_pid;
			prec->ppid = proc.pi_ppid;
			prec->usec = proc.pi_ru.ru_utime.tv_sec +
				proc.pi_ru.ru_utime.tv_usec * 1e-6;
			prec->ssec = proc.pi_ru.ru_stime.tv_sec +
				proc.pi_ru.ru_stime.tv_usec * 1e-6;
			prec->pages = proc.pi_majflt;
			prec->rss = (proc.pi_trss + proc.pi_drss) * pagesize;
			//prec->rss *= 1024;
			prec->vsize = (proc.pi_tsize / 1024);
			prec->vsize += (proc.pi_dvm * pagesize);
			//prec->vsize *= 1024;
			/*  debug("vsize = %f = (%d/1024)+(%d*%d)",   */
/*    		      prec->vsize, proc.pi_tsize, proc.pi_dvm, pagesize);  */
		}
	}
	if(!list_count(prec_list))
		goto finished;

	slurm_mutex_lock(&jobacct_lock);
	if(!task_list || !list_count(task_list)) {
		slurm_mutex_unlock(&jobacct_lock);
		goto finished;
	}
	itr = list_iterator_create(task_list);
	while((jobacct = list_next(itr))) {
		itr2 = list_iterator_create(prec_list);
		while((prec = list_next(itr2))) {
			//debug2("pid %d ? %d", prec->ppid, jobacct->pid);
			if (prec->pid == jobacct->pid) {
				/* find all my descendents */
				_get_offspring_data(prec_list, prec,
						    prec->pid);

				/* tally their usage */
				jobacct->max_rss = jobacct->tot_rss =
					MAX(jobacct->max_rss, (int)prec->rss);
				total_job_mem += jobacct->max_rss;
				jobacct->max_vsize = jobacct->tot_vsize =
					MAX(jobacct->max_vsize,
					    (int)prec->vsize);
				total_job_vsize += prec->vsize;
				jobacct->max_pages = jobacct->tot_pages =
					MAX(jobacct->max_pages, prec->pages);
				jobacct->min_cpu = jobacct->tot_cpu =
					MAX(jobacct->min_cpu,
					    (prec->usec + prec->ssec));
				debug2("%d size now %d %d time %d",
				      jobacct->pid, jobacct->max_rss,
				      jobacct->max_vsize, jobacct->tot_cpu);

				break;
			}
		}
		list_iterator_destroy(itr2);
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&jobacct_lock);

	if (jobacct_mem_limit) {
		debug("Step %u.%u memory used:%u limit:%u KB",
		      jobacct_job_id, jobacct_step_id, 
		      total_job_mem, jobacct_mem_limit);
	}
	if (jobacct_job_id && jobacct_mem_limit &&
	    (total_job_mem > jobacct_mem_limit)) {
		if (jobacct_step_id == NO_VAL) {
			error("Job %u exceeded %u KB memory limit, being "
 			      "killed", jobacct_job_id, jobacct_mem_limit);
		} else {
			error("Step %u.%u exceeded %u KB memory limit, being "
			      "killed", jobacct_job_id, jobacct_step_id, 
			      jobacct_mem_limit);
		}
		_acct_kill_step();
	} else if (jobacct_job_id && jobacct_vmem_limit &&
	    (total_job_vsize > jobacct_vmem_limit)) {
		if (jobacct_step_id == NO_VAL) {
			error("Job %u exceeded %u KB virtual memory limit, "
			      "being killed", jobacct_job_id, 
			      jobacct_vmem_limit);
		} else {
			error("Step %u.%u exceeded %u KB virtual memory "
			      "limit, being killed", jobacct_job_id, 
			      jobacct_step_id, jobacct_vmem_limit);
		}
		_acct_kill_step();
	}

finished:
	list_destroy(prec_list);
	processing = 0;

	return;
}
/*
 * _get_process_data() - Build a table of all current processes
 *
 * IN:	pid.
 *
 * OUT:	none
 *
 * THREADSAFE! Only one thread ever gets here.
 *
 * Assumption:
 *    Any file with a name of the form "/proc/[0-9]+/stat"
 *    is a Linux-style stat entry. We disregard the data if they look
 *    wrong.
 */
static void _get_process_data(void)
{
	static	int	slash_proc_open = 0;

	struct	dirent *slash_proc_entry;
	char		*iptr = NULL, *optr = NULL;
	FILE		*stat_fp = NULL;
	char		proc_stat_file[256];	/* Allow ~20x extra length */
	List prec_list = NULL;
	pid_t *pids = NULL;
	int npids = 0;
	uint32_t total_job_mem = 0, total_job_vsize = 0;
	int		i, fd;
	ListIterator itr;
	ListIterator itr2;
	prec_t *prec = NULL;
	struct jobacctinfo *jobacct = NULL;
	static int processing = 0;
	long		hertz;

	if (!pgid_plugin && (cont_id == (uint64_t)NO_VAL)) {
		debug("cont_id hasn't been set yet not running poll");
		return;
	}

	if(processing) {
		debug("already running, returning");
		return;
	}
	processing = 1;
	prec_list = list_create(_destroy_prec);

	hertz = sysconf(_SC_CLK_TCK);
	if (hertz < 1) {
		error ("_get_process_data: unable to get clock rate");
		hertz = 100;	/* default on many systems */
	}

	if(!pgid_plugin) {
		/* get only the processes in the proctrack container */
		slurm_container_get_pids(cont_id, &pids, &npids);
		if(!npids) {
			debug4("no pids in this container %"PRIu64"", cont_id);
			goto finished;
		}
		for (i = 0; i < npids; i++) {
			snprintf(proc_stat_file, 256,
				 "/proc/%d/stat", pids[i]);
			if ((stat_fp = fopen(proc_stat_file, "r"))==NULL)
				continue;  /* Assume the process went away */
			/*
			 * Close the file on exec() of user tasks.
			 *
			 * NOTE: If we fork() slurmstepd after the
			 * fopen() above and before the fcntl() below,
			 * then the user task may have this extra file
			 * open, which can cause problems for
			 * checkpoint/restart, but this should be a very rare
			 * problem in practice.
			 */
			fd = fileno(stat_fp);
			fcntl(fd, F_SETFD, FD_CLOEXEC);

			prec = xmalloc(sizeof(prec_t));
			if (_get_process_data_line(fd, prec))
				list_append(prec_list, prec);
			else
				xfree(prec);
			fclose(stat_fp);
		}
	} else {
		slurm_mutex_lock(&reading_mutex);

		if (slash_proc_open) {
			rewinddir(slash_proc);
		} else {
			slash_proc=opendir("/proc");
			if (slash_proc == NULL) {
				perror("opening /proc");
				slurm_mutex_unlock(&reading_mutex);
				goto finished;
			}
			slash_proc_open=1;
		}
		strcpy(proc_stat_file, "/proc/");

		while ((slash_proc_entry = readdir(slash_proc))) {

			/* Save a few cyles by simulating
			   strcat(statFileName, slash_proc_entry->d_name);
			   strcat(statFileName, "/stat");
			   while checking for a numeric filename (which really
			   should be a pid).
			*/
			optr = proc_stat_file + sizeof("/proc");
			iptr = slash_proc_entry->d_name;
			i = 0;
			do {
				if((*iptr < '0')
				   || ((*optr++ = *iptr++) > '9')) {
					i = -1;
					break;
				}
			} while (*iptr);

			if(i == -1)
				continue;
			iptr = (char*)"/stat";

			do {
				*optr++ = *iptr++;
			} while (*iptr);
			*optr = 0;

			if ((stat_fp = fopen(proc_stat_file,"r"))==NULL)
				continue;  /* Assume the process went away */
			/*
			 * Close the file on exec() of user tasks.
			 *
			 * NOTE: If we fork() slurmstepd after the
			 * fopen() above and before the fcntl() below,
			 * then the user task may have this extra file
			 * open, which can cause problems for
			 * checkpoint/restart, but this should be a very rare
			 * problem in practice.
			 */
			fd = fileno(stat_fp);
			fcntl(fd, F_SETFD, FD_CLOEXEC);

			prec = xmalloc(sizeof(prec_t));
			if (_get_process_data_line(fd, prec))
				list_append(prec_list, prec);
			else
				xfree(prec);
			fclose(stat_fp);
		}
		slurm_mutex_unlock(&reading_mutex);

	}

	if (!list_count(prec_list)) {
		goto finished;	/* We have no business being here! */
	}

	slurm_mutex_lock(&jobacct_lock);
	if(!task_list || !list_count(task_list)) {
		slurm_mutex_unlock(&jobacct_lock);
		goto finished;
	}

	itr = list_iterator_create(task_list);
	while((jobacct = list_next(itr))) {
		itr2 = list_iterator_create(prec_list);
		while((prec = list_next(itr2))) {
			if (prec->pid == jobacct->pid) {
#if _DEBUG
				info("pid:%u ppid:%u rss:%d KB",
				     prec->pid, prec->ppid, prec->rss);
#endif
				/* find all my descendents */
				_get_offspring_data(prec_list,
						    prec, prec->pid);
				/* tally their usage */
				jobacct->max_rss = jobacct->tot_rss =
					MAX(jobacct->max_rss, prec->rss);
				total_job_mem += prec->rss;
				jobacct->max_vsize = jobacct->tot_vsize =
					MAX(jobacct->max_vsize, prec->vsize);
				total_job_vsize += prec->vsize;
				jobacct->max_pages = jobacct->tot_pages =
					MAX(jobacct->max_pages, prec->pages);
				jobacct->min_cpu = jobacct->tot_cpu =
					MAX(jobacct->min_cpu,
					    (prec->ssec / hertz +
					     prec->usec / hertz));
				debug2("%d mem size %u %u time %u(%u+%u)",
				       jobacct->pid, jobacct->max_rss,
				       jobacct->max_vsize, jobacct->tot_cpu,
				       prec->usec, prec->ssec);
				break;
			}
		}
		list_iterator_destroy(itr2);
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&jobacct_lock);

	if (jobacct_mem_limit) {
		if (jobacct_step_id == NO_VAL) {
			debug("Job %u memory used:%u limit:%u KB",
			      jobacct_job_id, total_job_mem, jobacct_mem_limit);
		} else {
			debug("Step %u.%u memory used:%u limit:%u KB",
			      jobacct_job_id, jobacct_step_id,
			      total_job_mem, jobacct_mem_limit);
		}
	}
	if (jobacct_job_id && jobacct_mem_limit &&
	    (total_job_mem > jobacct_mem_limit)) {
		if (jobacct_step_id == NO_VAL) {
			error("Job %u exceeded %u KB memory limit, being "
			      "killed", jobacct_job_id, jobacct_mem_limit);
		} else {
			error("Step %u.%u exceeded %u KB memory limit, being "
			      "killed", jobacct_job_id, jobacct_step_id,
			      jobacct_mem_limit);
		}
		_acct_kill_step();
	} else if (jobacct_job_id && jobacct_vmem_limit &&
	           (total_job_vsize > jobacct_vmem_limit)) {
		if (jobacct_step_id == NO_VAL) {
			error("Job %u exceeded %u KB virtual memory limit, "
			      "being killed", jobacct_job_id,
			      jobacct_vmem_limit);
		} else {
			error("Step %u.%u exceeded %u KB virtual memory "
			      "limit, being killed", jobacct_job_id,
			      jobacct_step_id, jobacct_vmem_limit);
		}
		_acct_kill_step();
	}

finished:
	list_destroy(prec_list);
	processing = 0;
	return;
}