예제 #1
0
파일: common_jag.c 프로젝트: npe9/slurm
extern void jag_common_poll_data(
	List task_list, bool pgid_plugin, uint64_t cont_id,
	jag_callbacks_t *callbacks, bool profile)
{
	/* Update the data */
	List prec_list = NULL;
	uint64_t total_job_mem = 0, total_job_vsize = 0;
	ListIterator itr;
	jag_prec_t *prec = NULL;
	struct jobacctinfo *jobacct = NULL;
	static int processing = 0;
	char sbuf[72];
	int energy_counted = 0;
	time_t ct;
	static int no_over_memory_kill = -1;

	xassert(callbacks);

	if (!pgid_plugin && (cont_id == (uint64_t)NO_VAL)) {
		debug("cont_id hasn't been set yet not running poll");
		return;
	}

	if (processing) {
		debug("already running, returning");
		return;
	}
	processing = 1;

	if (no_over_memory_kill == -1) {
		char *acct_params = slurm_get_jobacct_gather_params();
		if (acct_params && strstr(acct_params, "NoOverMemoryKill"))
			no_over_memory_kill = 1;
		else
			no_over_memory_kill = 0;
		xfree(acct_params);
	}

	if (!callbacks->get_precs)
		callbacks->get_precs = _get_precs;

	ct = time(NULL);
	prec_list = (*(callbacks->get_precs))(task_list, pgid_plugin, cont_id,
					      callbacks);

	if (!list_count(prec_list) || !task_list || !list_count(task_list))
		goto finished;	/* We have no business being here! */

	itr = list_iterator_create(task_list);
	while ((jobacct = list_next(itr))) {
		uint32_t cpu_calc;
		uint32_t last_total_cputime;
		if (!(prec = list_find_first(prec_list, _find_prec, jobacct)))
			continue;

#if _DEBUG
		info("pid:%u ppid:%u rss:%d KB",
		     prec->pid, prec->ppid, prec->rss);
#endif
		/* find all my descendents */
		if (callbacks->get_offspring_data)
			(*(callbacks->get_offspring_data))
				(prec_list, prec, prec->pid);

		last_total_cputime = jobacct->tot_cpu;

		cpu_calc = (prec->ssec + prec->usec)/hertz;
		/* tally their usage */
		jobacct->max_rss =
			MAX(jobacct->max_rss, prec->rss);
		jobacct->tot_rss = prec->rss;
		total_job_mem += prec->rss;
		jobacct->max_vsize =
			MAX(jobacct->max_vsize, prec->vsize);
		jobacct->tot_vsize = prec->vsize;
		total_job_vsize += prec->vsize;
		jobacct->max_pages =
			MAX(jobacct->max_pages, prec->pages);
		jobacct->tot_pages = prec->pages;
		jobacct->max_disk_read = MAX(
			jobacct->max_disk_read,
			prec->disk_read);
		jobacct->tot_disk_read = prec->disk_read;
		jobacct->max_disk_write = MAX(
			jobacct->max_disk_write,
			prec->disk_write);

		jobacct->tot_disk_write = prec->disk_write;
		jobacct->min_cpu =
			MAX(jobacct->min_cpu, cpu_calc);

		/* Update the cpu times
		 */
		jobacct->tot_cpu = cpu_calc;
		jobacct->user_cpu_sec = prec->usec/hertz;
		jobacct->sys_cpu_sec = prec->ssec/hertz;
		debug2("%s: %d mem size %"PRIu64" %"PRIu64" "
		       "time %u(%u+%u)", __func__,
		       jobacct->pid, jobacct->max_rss,
		       jobacct->max_vsize, jobacct->tot_cpu,
		       jobacct->user_cpu_sec,
		       jobacct->sys_cpu_sec);
		/* compute frequency */
		jobacct->this_sampled_cputime =
			cpu_calc - last_total_cputime;
		_get_sys_interface_freq_line(
			prec->last_cpu,
			"cpuinfo_cur_freq", sbuf);
		jobacct->act_cpufreq =
			_update_weighted_freq(jobacct, sbuf);
		debug("%s: Task average frequency = %u "
		       "pid %d mem size %"PRIu64" %"PRIu64" "
		       "time %u(%u+%u)", __func__,
		       jobacct->act_cpufreq,
		       jobacct->pid, jobacct->max_rss,
		       jobacct->max_vsize, jobacct->tot_cpu,
		       jobacct->user_cpu_sec,
		       jobacct->sys_cpu_sec);
		/* get energy consumption
		 * only once is enough since we
		 * report per node energy consumption */
		debug2("energycounted = %d", energy_counted);
		if (energy_counted == 0) {
			acct_gather_energy_g_get_data(
				energy_profile,
				&jobacct->energy);
			debug2("getjoules_task energy = %"PRIu64,
			       jobacct->energy.consumed_energy);
			energy_counted = 1;
		}
		if (profile &&
		    acct_gather_profile_g_is_active(ACCT_GATHER_PROFILE_TASK)) {
			jobacct->cur_time = ct;

			_record_profile(jobacct);

			jobacct->last_tot_disk_read = jobacct->tot_disk_read;
			jobacct->last_tot_disk_write = jobacct->tot_disk_write;
			jobacct->last_total_cputime = jobacct->tot_cpu;
			jobacct->last_time = jobacct->cur_time;
		}
	}
	list_iterator_destroy(itr);

	if (!no_over_memory_kill)
		jobacct_gather_handle_mem_limit(total_job_mem, total_job_vsize);

finished:
	FREE_NULL_LIST(prec_list);
	processing = 0;
}
예제 #2
0
파일: common_jag.c 프로젝트: SchedMD/slurm
extern void jag_common_poll_data(
	List task_list, bool pgid_plugin, uint64_t cont_id,
	jag_callbacks_t *callbacks, bool profile)
{
	/* Update the data */
	List prec_list = NULL;
	uint64_t total_job_mem = 0, total_job_vsize = 0;
	ListIterator itr;
	jag_prec_t *prec = NULL;
	struct jobacctinfo *jobacct = NULL;
	static int processing = 0;
	char sbuf[72];
	int energy_counted = 0;
	time_t ct;
	static int over_memory_kill = -1;
	int i = 0;

	xassert(callbacks);

	if (!pgid_plugin && (cont_id == NO_VAL64)) {
		debug("cont_id hasn't been set yet not running poll");
		return;
	}

	if (processing) {
		debug("already running, returning");
		return;
	}
	processing = 1;

	if (!callbacks->get_precs)
		callbacks->get_precs = _get_precs;

	ct = time(NULL);
	prec_list = (*(callbacks->get_precs))(task_list, pgid_plugin, cont_id,
					      callbacks);

	if (!list_count(prec_list) || !task_list || !list_count(task_list))
		goto finished;	/* We have no business being here! */

	itr = list_iterator_create(task_list);
	while ((jobacct = list_next(itr))) {
		double cpu_calc;
		double last_total_cputime;
		if (!(prec = list_find_first(prec_list, _find_prec, jobacct)))
			continue;

		/*
		 * Only jobacct_gather/cgroup uses prec_extra, and we want to
		 * make sure we call it once per task, so call it here as we
		 * iterate through the tasks instead of in get_precs.
		 */
		if (callbacks->prec_extra)
			(*(callbacks->prec_extra))(prec, jobacct->id.taskid);

#if _DEBUG
		info("pid:%u ppid:%u rss:%"PRIu64" B",
		     prec->pid, prec->ppid,
		     prec->tres_data[TRES_ARRAY_MEM].size_read);
#endif
		/* find all my descendents */
		if (callbacks->get_offspring_data)
			(*(callbacks->get_offspring_data))
				(prec_list, prec, prec->pid);

		last_total_cputime =
			(double)jobacct->tres_usage_in_tot[TRES_ARRAY_CPU];

		cpu_calc = (prec->ssec + prec->usec) / (double)hertz;

		/*
		 * Since we are not storing things as a double anymore make it
		 * bigger so we don't loose precision.
		 */
		cpu_calc *= CPU_TIME_ADJ;

		prec->tres_data[TRES_ARRAY_CPU].size_read = (uint64_t)cpu_calc;

		/* get energy consumption
		 * only once is enough since we
		 * report per node energy consumption.
		 * Energy is stored in read fields, while power is stored
		 * in write fields.*/
		debug2("energycounted = %d", energy_counted);
		if (energy_counted == 0) {
			acct_gather_energy_g_get_data(
				energy_profile,
				&jobacct->energy);
			prec->tres_data[TRES_ARRAY_ENERGY].size_read =
				jobacct->energy.consumed_energy;
			prec->tres_data[TRES_ARRAY_ENERGY].size_write =
				jobacct->energy.current_watts;
			debug2("%s: energy = %"PRIu64" watts = %"PRIu64" ave_watts = %u",
			       __func__,
			       prec->tres_data[TRES_ARRAY_ENERGY].size_read,
			       prec->tres_data[TRES_ARRAY_ENERGY].size_write,
			       jobacct->energy.ave_watts);
			energy_counted = 1;
		}

		/* tally their usage */
		for (i = 0; i < jobacct->tres_count; i++) {
			if (prec->tres_data[i].size_read == INFINITE64)
				continue;
			if (jobacct->tres_usage_in_max[i] == INFINITE64)
				jobacct->tres_usage_in_max[i] =
					prec->tres_data[i].size_read;
			else
				jobacct->tres_usage_in_max[i] =
					MAX(jobacct->tres_usage_in_max[i],
					    prec->tres_data[i].size_read);
			/*
			 * Even with min we want to get the max as we are
			 * looking at a specific task aso we are always looking
			 * at the max that task had, not the min (or lots of
			 * things will be zero).  The min is from compairing
			 * ranks later when combining.  So here it will be the
			 * same as the max value set above.
			 * (same thing goes for the out)
			 */
			jobacct->tres_usage_in_min[i] =
				jobacct->tres_usage_in_max[i];
			jobacct->tres_usage_in_tot[i] =
				prec->tres_data[i].size_read;

			if (jobacct->tres_usage_out_max[i] == INFINITE64)
				jobacct->tres_usage_out_max[i] =
					prec->tres_data[i].size_write;
			else
				jobacct->tres_usage_out_max[i] =
					MAX(jobacct->tres_usage_out_max[i],
					    prec->tres_data[i].size_write);
			jobacct->tres_usage_out_min[i] =
				jobacct->tres_usage_out_max[i];
			jobacct->tres_usage_out_tot[i] =
				prec->tres_data[i].size_write;
		}

		total_job_mem += jobacct->tres_usage_in_tot[TRES_ARRAY_MEM];
		total_job_vsize += jobacct->tres_usage_in_tot[TRES_ARRAY_VMEM];

		/* Update the cpu times */
		jobacct->user_cpu_sec = (uint32_t)(prec->usec / (double)hertz);
		jobacct->sys_cpu_sec = (uint32_t)(prec->ssec / (double)hertz);

		/* compute frequency */
		jobacct->this_sampled_cputime =
			cpu_calc - last_total_cputime;
		_get_sys_interface_freq_line(
			prec->last_cpu,
			"cpuinfo_cur_freq", sbuf);
		jobacct->act_cpufreq =
			_update_weighted_freq(jobacct, sbuf);

		debug("%s: Task %u pid %d ave_freq = %u mem size/max %"PRIu64"/%"PRIu64" vmem size/max %"PRIu64"/%"PRIu64", disk read size/max (%"PRIu64"/%"PRIu64"), disk write size/max (%"PRIu64"/%"PRIu64"), time %f(%u+%u) Energy tot/max %"PRIu64"/%"PRIu64" TotPower %"PRIu64" MaxPower %"PRIu64" MinPower %"PRIu64,
		      __func__,
		      jobacct->id.taskid,
		      jobacct->pid,
		      jobacct->act_cpufreq,
		      jobacct->tres_usage_in_tot[TRES_ARRAY_MEM],
		      jobacct->tres_usage_in_max[TRES_ARRAY_MEM],
		      jobacct->tres_usage_in_tot[TRES_ARRAY_VMEM],
		      jobacct->tres_usage_in_max[TRES_ARRAY_VMEM],
		      jobacct->tres_usage_in_tot[TRES_ARRAY_FS_DISK],
		      jobacct->tres_usage_in_max[TRES_ARRAY_FS_DISK],
		      jobacct->tres_usage_out_tot[TRES_ARRAY_FS_DISK],
		      jobacct->tres_usage_out_max[TRES_ARRAY_FS_DISK],
		      (double)(jobacct->tres_usage_in_tot[TRES_ARRAY_CPU] /
			       CPU_TIME_ADJ),
		      jobacct->user_cpu_sec,
		      jobacct->sys_cpu_sec,
		      jobacct->tres_usage_in_tot[TRES_ARRAY_ENERGY],
		      jobacct->tres_usage_in_max[TRES_ARRAY_ENERGY],
		      jobacct->tres_usage_out_tot[TRES_ARRAY_ENERGY],
		      jobacct->tres_usage_out_max[TRES_ARRAY_ENERGY],
		      jobacct->tres_usage_out_min[TRES_ARRAY_ENERGY]);

		if (profile &&
		    acct_gather_profile_g_is_active(ACCT_GATHER_PROFILE_TASK)) {
			jobacct->cur_time = ct;

			_record_profile(jobacct);

			jobacct->last_tres_usage_in_tot =
				jobacct->tres_usage_in_tot[TRES_ARRAY_FS_DISK];
			jobacct->last_tres_usage_out_tot =
				jobacct->tres_usage_out_tot[TRES_ARRAY_FS_DISK];
			jobacct->last_total_cputime =
				jobacct->tres_usage_in_tot[TRES_ARRAY_CPU];

			jobacct->last_time = jobacct->cur_time;
		}
	}
	list_iterator_destroy(itr);

	if (over_memory_kill == -1)
		over_memory_kill = slurm_get_job_acct_oom_kill();

	if (over_memory_kill)
		jobacct_gather_handle_mem_limit(total_job_mem,
						total_job_vsize);

finished:
	FREE_NULL_LIST(prec_list);
	processing = 0;
}