/*
 * _thread_update_node_energy calls _read_ipmi_values and updates all values
 * for node consumption
 */
static int _update_node_filesystem(void)
{
	static acct_gather_data_t previous;
	static int dataset_id = -1;
	static bool first = true;
	acct_gather_data_t current;

	enum {
		FIELD_READ,
		FIELD_READMB,
		FIELD_WRITE,
		FIELD_WRITEMB,
		FIELD_CNT
	};

	acct_gather_profile_dataset_t dataset[] = {
		{ "Reads", PROFILE_FIELD_UINT64 },
		{ "ReadMB", PROFILE_FIELD_DOUBLE },
		{ "Writes", PROFILE_FIELD_UINT64 },
		{ "WriteMB", PROFILE_FIELD_DOUBLE },
		{ NULL, PROFILE_FIELD_NOT_SET }
	};

	union {
		double d;
		uint64_t u64;
	} data[FIELD_CNT];

	slurm_mutex_lock(&lustre_lock);

	if (_read_lustre_counters() != SLURM_SUCCESS) {
		error("%s: Cannot read lustre counters", __func__);
		slurm_mutex_unlock(&lustre_lock);
		return SLURM_FAILURE;
	}

	if (first) {
		dataset_id = acct_gather_profile_g_create_dataset(
			"Filesystem", NO_PARENT, dataset);
		if (dataset_id == SLURM_ERROR) {
			error("FileSystem: Failed to create the dataset "
			      "for Lustre");
			slurm_mutex_unlock(&lustre_lock);
			return SLURM_ERROR;
		}

		previous.num_reads = lustre_se.all_lustre_nb_reads;
		previous.num_writes = lustre_se.all_lustre_nb_writes;
		previous.size_read = lustre_se.all_lustre_read_bytes;
		previous.size_write = lustre_se.all_lustre_write_bytes;

		first = false;
	}

	if (dataset_id < 0) {
		slurm_mutex_unlock(&lustre_lock);
		return SLURM_ERROR;
	}

	/* Compute the current values read from all lustre-xxxx directories */
	current.num_reads = lustre_se.all_lustre_nb_reads;
	current.num_writes = lustre_se.all_lustre_nb_writes;
	current.size_read = lustre_se.all_lustre_read_bytes;
	current.size_write = lustre_se.all_lustre_write_bytes;

	/* record sample */
	data[FIELD_READ].u64 = current.num_reads - previous.num_reads;
	data[FIELD_READMB].d =
		(double)(current.size_read - previous.size_read) / (1 << 20);
	data[FIELD_WRITE].u64 = current.num_writes - previous.num_writes;
	data[FIELD_WRITEMB].d =
		(double)(current.size_write - previous.size_write) / (1 << 20);

	if (debug_flags & DEBUG_FLAG_PROFILE) {
		char str[256];
		info("PROFILE-Lustre: %s", acct_gather_profile_dataset_str(
			     dataset, data, str, sizeof(str)));
	}
	acct_gather_profile_g_add_sample_data(dataset_id, (void *)data,
					      lustre_se.update_time);

	/* Save current as previous and clean up the working
	 * data structure.
	 */
	memcpy(&previous, &current, sizeof(acct_gather_data_t));
	memset(&lustre_se, 0, sizeof(lustre_sens_t));

	slurm_mutex_unlock(&lustre_lock);

	return SLURM_SUCCESS;
}
/*
 * _thread_update_node_energy calls _read_ipmi_values and updates all values
 * for node consumption
 */
static int _update_node_infiniband(void)
{
	int rc;

	enum {
		FIELD_PACKIN,
		FIELD_PACKOUT,
		FIELD_MBIN,
		FIELD_MBOUT,
		FIELD_CNT
	};

	acct_gather_profile_dataset_t dataset[] = {
		{ "PacketsIn", PROFILE_FIELD_UINT64 },
		{ "PacketsOut", PROFILE_FIELD_UINT64 },
		{ "InMB", PROFILE_FIELD_DOUBLE },
		{ "OutMB", PROFILE_FIELD_DOUBLE },
		{ NULL, PROFILE_FIELD_NOT_SET }
	};

	union {
		double d;
		uint64_t u64;
	} data[FIELD_CNT];

	if (dataset_id < 0) {
		dataset_id = acct_gather_profile_g_create_dataset("Network",
			NO_PARENT, dataset);
		if (debug_flags & DEBUG_FLAG_INFINIBAND)
			debug("IB: dataset created (id = %d)", dataset_id);
		if (dataset_id == SLURM_ERROR) {
			error("IB: Failed to create the dataset for ofed");
			return SLURM_ERROR;
		}
	}

	slurm_mutex_lock(&ofed_lock);
	if ((rc = _read_ofed_values()) != SLURM_SUCCESS) {
		slurm_mutex_unlock(&ofed_lock);
		return rc;
	}

	data[FIELD_PACKIN].u64 = ofed_sens.rcvpkts;
	data[FIELD_PACKOUT].u64 = ofed_sens.xmtpkts;
	data[FIELD_MBIN].d = (double) ofed_sens.rcvdata / (1 << 20);
	data[FIELD_MBOUT].d = (double) ofed_sens.xmtdata / (1 << 20);

	if (debug_flags & DEBUG_FLAG_INFINIBAND) {
		info("ofed-thread = %d sec, transmitted %"PRIu64" bytes, "
		     "received %"PRIu64" bytes",
		     (int) (ofed_sens.update_time - ofed_sens.last_update_time),
		     ofed_sens.xmtdata, ofed_sens.rcvdata);
	}
	slurm_mutex_unlock(&ofed_lock);

	if (debug_flags & DEBUG_FLAG_PROFILE) {
		char str[256];
		info("PROFILE-Network: %s", acct_gather_profile_dataset_str(
			     dataset, data, str, sizeof(str)));
	}
	return acct_gather_profile_g_add_sample_data(dataset_id, (void *)data,
						     ofed_sens.update_time);
}
Beispiel #3
0
static void _record_profile(struct jobacctinfo *jobacct)
{
	enum {
		FIELD_CPUFREQ,
		FIELD_CPUTIME,
		FIELD_CPUUTIL,
		FIELD_RSS,
		FIELD_VMSIZE,
		FIELD_PAGES,
		FIELD_READ,
		FIELD_WRITE,
		FIELD_CNT
	};

	acct_gather_profile_dataset_t dataset[] = {
		{ "CPUFrequency", PROFILE_FIELD_UINT64 },
		{ "CPUTime", PROFILE_FIELD_UINT64 },
		{ "CPUUtilization", PROFILE_FIELD_DOUBLE },
		{ "RSS", PROFILE_FIELD_UINT64 },
		{ "VMSize", PROFILE_FIELD_UINT64 },
		{ "Pages", PROFILE_FIELD_UINT64 },
		{ "ReadMB", PROFILE_FIELD_DOUBLE },
		{ "WriteMB", PROFILE_FIELD_DOUBLE },
		{ NULL, PROFILE_FIELD_NOT_SET }
	};

	static int profile_gid = -1;
	double et;
	union {
		double d;
		uint64_t u64;
	} data[FIELD_CNT];

	if (profile_gid == -1)
		profile_gid = acct_gather_profile_g_create_group("Tasks");

	/* Create the dataset first */
	if (jobacct->dataset_id < 0) {
		char ds_name[32];
		snprintf(ds_name, sizeof(ds_name), "%u", jobacct->id.taskid);

		jobacct->dataset_id = acct_gather_profile_g_create_dataset(
			ds_name, profile_gid, dataset);
		if (jobacct->dataset_id == SLURM_ERROR) {
			error("JobAcct: Failed to create the dataset for "
			      "task %d",
			      jobacct->pid);
			return;
		}
	}

	if (jobacct->dataset_id < 0)
		return;

	data[FIELD_CPUFREQ].u64 = jobacct->act_cpufreq;
	data[FIELD_RSS].u64 = jobacct->tot_rss;
	data[FIELD_VMSIZE].u64 = jobacct->tot_vsize;
	data[FIELD_PAGES].u64 = jobacct->tot_pages;

	/* delta from last snapshot */
	if (!jobacct->last_time) {
		data[FIELD_CPUTIME].u64 = 0;
		data[FIELD_CPUUTIL].d = 0.0;
		data[FIELD_READ].d = 0.0;
		data[FIELD_WRITE].d = 0.0;
	} else {
		data[FIELD_CPUTIME].u64 =
			jobacct->tot_cpu - jobacct->last_total_cputime;
		et = (jobacct->cur_time - jobacct->last_time);
		if (!et)
			data[FIELD_CPUUTIL].d = 0.0;
		else
			data[FIELD_CPUUTIL].d =
				(100.0 * (double)data[FIELD_CPUTIME].u64) /
				((double) et);

		data[FIELD_READ].d = jobacct->tot_disk_read -
			jobacct->last_tot_disk_read;

		data[FIELD_WRITE].d = jobacct->tot_disk_write -
			jobacct->last_tot_disk_write;
	}

	if (debug_flags & DEBUG_FLAG_PROFILE) {
		char str[256];
		info("PROFILE-Task: %s", acct_gather_profile_dataset_str(
			     dataset, data, str, sizeof(str)));
	}
	acct_gather_profile_g_add_sample_data(jobacct->dataset_id,
	                                      (void *)data, jobacct->cur_time);
}