Пример #1
0
static void _handle_stats(List prec_list, char *proc_stat_file,
			  char *proc_io_file, jag_callbacks_t *callbacks)
{
	static int no_share_data = -1;
	FILE *stat_fp = NULL;
	FILE *io_fp = NULL;
	int fd, fd2;
	jag_prec_t *prec = NULL;

	if (no_share_data == -1) {
		char *acct_params = slurm_get_jobacct_gather_params();
		if (acct_params && strstr(acct_params, "NoShare"))
			no_share_data = 1;
		else
			no_share_data = 0;
		xfree(acct_params);
	}

	if (!(stat_fp = fopen(proc_stat_file, "r")))
		return;  /* Assume the process went away */
	/*
	 * Close the file on exec() of user tasks.
	 *
	 * NOTE: If we fork() slurmstepd after the
	 * fopen() above and before the fcntl() below,
	 * then the user task may have this extra file
	 * open, which can cause problems for
	 * checkpoint/restart, but this should be a very rare
	 * problem in practice.
	 */
	fd = fileno(stat_fp);
	fcntl(fd, F_SETFD, FD_CLOEXEC);

	prec = xmalloc(sizeof(jag_prec_t));
	if (_get_process_data_line(fd, prec)) {
		if (no_share_data)
			_remove_share_data(proc_stat_file, prec);
		list_append(prec_list, prec);
		if ((io_fp = fopen(proc_io_file, "r"))) {
			fd2 = fileno(io_fp);
			fcntl(fd2, F_SETFD, FD_CLOEXEC);
			_get_process_io_data_line(fd2, prec);
			fclose(io_fp);
		}
		if (callbacks->prec_extra)
			(*(callbacks->prec_extra))(prec, my_pagesize);
	} else
		xfree(prec);
	fclose(stat_fp);

}
Пример #2
0
static void _handle_stats(List prec_list, char *proc_stat_file, char *proc_io_file,
			  char *proc_smaps_file, jag_callbacks_t *callbacks)
{
	static int no_share_data = -1;
	static int use_pss = -1;
	FILE *stat_fp = NULL;
	FILE *io_fp = NULL;
	int fd, fd2;
	jag_prec_t *prec = NULL;

	if (no_share_data == -1) {
		char *acct_params = slurm_get_jobacct_gather_params();
		if (acct_params && xstrcasestr(acct_params, "NoShare"))
			no_share_data = 1;
		else
			no_share_data = 0;

		if (acct_params && xstrcasestr(acct_params, "UsePss"))
			use_pss = 1;
		else
			use_pss = 0;
		xfree(acct_params);
	}

	if (!(stat_fp = fopen(proc_stat_file, "r")))
		return;  /* Assume the process went away */
	/*
	 * Close the file on exec() of user tasks.
	 *
	 * NOTE: If we fork() slurmstepd after the
	 * fopen() above and before the fcntl() below,
	 * then the user task may have this extra file
	 * open, which can cause problems for
	 * checkpoint/restart, but this should be a very rare
	 * problem in practice.
	 */
	fd = fileno(stat_fp);
	if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)
		error("%s: fcntl(%s): %m", __func__, proc_stat_file);

	prec = try_xmalloc(sizeof(jag_prec_t));
	if (prec == NULL) {	/* Avoid killing slurmstepd on malloc failure */
		fclose(stat_fp);
		return;
	}
	if (!_get_process_data_line(fd, prec)) {
		xfree(prec);
		fclose(stat_fp);
		return;
	}
	fclose(stat_fp);

	/* Remove shared data from rss */
	if (no_share_data)
		_remove_share_data(proc_stat_file, prec);

	/* Use PSS instead if RSS */
	if (use_pss) {
		if (_get_pss(proc_smaps_file, prec) == -1) {
			xfree(prec);
			return;
		}
	}

	list_append(prec_list, prec);

	if ((io_fp = fopen(proc_io_file, "r"))) {
		fd2 = fileno(io_fp);
		if (fcntl(fd2, F_SETFD, FD_CLOEXEC) == -1)
			error("%s: fcntl: %m", __func__);
		_get_process_io_data_line(fd2, prec);
		fclose(io_fp);
	}
	if (callbacks->prec_extra)
		(*(callbacks->prec_extra))(prec);
}
Пример #3
0
static void _handle_stats(List prec_list, char *proc_stat_file,
			  char *proc_io_file, char *proc_smaps_file,
			  jag_callbacks_t *callbacks,
			  int tres_count)
{
	static int no_share_data = -1;
	static int use_pss = -1;
	FILE *stat_fp = NULL;
	FILE *io_fp = NULL;
	int fd, fd2, i;
	jag_prec_t *prec = NULL;

	if (no_share_data == -1) {
		char *acct_params = slurm_get_jobacct_gather_params();
		if (acct_params && xstrcasestr(acct_params, "NoShare"))
			no_share_data = 1;
		else
			no_share_data = 0;

		if (acct_params && xstrcasestr(acct_params, "UsePss"))
			use_pss = 1;
		else
			use_pss = 0;
		xfree(acct_params);
	}

	if (!(stat_fp = fopen(proc_stat_file, "r")))
		return;  /* Assume the process went away */
	/*
	 * Close the file on exec() of user tasks.
	 *
	 * NOTE: If we fork() slurmstepd after the
	 * fopen() above and before the fcntl() below,
	 * then the user task may have this extra file
	 * open, which can cause problems for
	 * checkpoint/restart, but this should be a very rare
	 * problem in practice.
	 */
	fd = fileno(stat_fp);
	if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)
		error("%s: fcntl(%s): %m", __func__, proc_stat_file);

	prec = xmalloc(sizeof(jag_prec_t));

	if (!tres_count) {
		assoc_mgr_lock_t locks = {
			NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
			READ_LOCK, NO_LOCK, NO_LOCK };
		assoc_mgr_lock(&locks);
		tres_count = g_tres_count;
		assoc_mgr_unlock(&locks);
	}

	prec->tres_count = tres_count;
	prec->tres_data = xmalloc(prec->tres_count *
				  sizeof(acct_gather_data_t));

	/* Initialize read/writes */
	for (i = 0; i < prec->tres_count; i++) {
		prec->tres_data[i].num_reads = INFINITE64;
		prec->tres_data[i].num_writes = INFINITE64;
		prec->tres_data[i].size_read = INFINITE64;
		prec->tres_data[i].size_write = INFINITE64;
	}

	if (!_get_process_data_line(fd, prec)) {
		xfree(prec->tres_data);
		xfree(prec);
		fclose(stat_fp);
		return;
	}
	fclose(stat_fp);

	if (acct_gather_filesystem_g_get_data(prec->tres_data) < 0) {
		debug2("problem retrieving filesystem data");
	}

	if (acct_gather_interconnect_g_get_data(prec->tres_data) < 0) {
		debug2("problem retrieving interconnect data");
	}

	/* Remove shared data from rss */
	if (no_share_data)
		_remove_share_data(proc_stat_file, prec);

	/* Use PSS instead if RSS */
	if (use_pss) {
		if (_get_pss(proc_smaps_file, prec) == -1) {
			xfree(prec->tres_data);
			xfree(prec);
			return;
		}
	}

	list_append(prec_list, prec);

	if ((io_fp = fopen(proc_io_file, "r"))) {
		fd2 = fileno(io_fp);
		if (fcntl(fd2, F_SETFD, FD_CLOEXEC) == -1)
			error("%s: fcntl: %m", __func__);
		_get_process_io_data_line(fd2, prec);
		fclose(io_fp);
	}
}