extern int acct_gather_filesystem_p_get_data(acct_gather_data_t *data)
{
	int retval = SLURM_SUCCESS;

	if ((tres_pos == -1) || !data) {
		debug2("%s: We are not tracking TRES fs/lustre", __func__);
		return SLURM_SUCCESS;
	}

	slurm_mutex_lock(&lustre_lock);

	if (_read_lustre_counters() != SLURM_SUCCESS) {
		error("%s: Cannot read lustre counters", __func__);
		slurm_mutex_unlock(&lustre_lock);
		return SLURM_FAILURE;
	}

	/* Obtain the current values read from all lustre-xxxx directories */

	data[tres_pos].num_reads = lustre_se.all_lustre_nb_reads;
	data[tres_pos].num_writes = lustre_se.all_lustre_nb_writes;
	data[tres_pos].size_read = lustre_se.all_lustre_read_bytes;
	data[tres_pos].size_write = lustre_se.all_lustre_write_bytes;

	slurm_mutex_unlock(&lustre_lock);
	return retval;
}
Esempio n. 2
0
/*
 * _thread_update_node_energy calls _read_ipmi_values and updates all values
 * for node consumption
 */
static int _update_node_filesystem(void)
{
	acct_filesystem_data_t *fls;
	int rc = SLURM_SUCCESS;

	slurm_mutex_lock(&lustre_lock);
	rc = _read_lustre_counters();

	fls = xmalloc(sizeof(acct_filesystem_data_t));

	fls->reads = lustre_se.all_lustre_nb_reads;
	fls->writes = lustre_se.all_lustre_nb_writes;
	fls->read_size = (double) lustre_se.all_lustre_read_bytes / 1048576;
	fls->write_size = (double) lustre_se.all_lustre_write_bytes / 1048576;
	acct_gather_profile_g_add_sample_data(ACCT_GATHER_PROFILE_LUSTRE, fls);

	debug3("Collection of Lustre counters Finished");
	xfree(fls);


	if (debug_flags & DEBUG_FLAG_FILESYSTEM) {
		info("lustre-thread = %d sec, transmitted %"PRIu64" bytes, "
		    "received %"PRIu64" bytes",
		    (int) (lustre_se.update_time - lustre_se.last_update_time),
		    lustre_se.all_lustre_read_bytes,
		    lustre_se.all_lustre_write_bytes);
	}
	slurm_mutex_unlock(&lustre_lock);

	return rc;
}
/*
 * _thread_update_node_energy calls _read_ipmi_values and updates all values
 * for node consumption
 */
static int _update_node_filesystem(void)
{
	static acct_gather_data_t previous;
	static int dataset_id = -1;
	static bool first = true;
	acct_gather_data_t current;

	enum {
		FIELD_READ,
		FIELD_READMB,
		FIELD_WRITE,
		FIELD_WRITEMB,
		FIELD_CNT
	};

	acct_gather_profile_dataset_t dataset[] = {
		{ "Reads", PROFILE_FIELD_UINT64 },
		{ "ReadMB", PROFILE_FIELD_DOUBLE },
		{ "Writes", PROFILE_FIELD_UINT64 },
		{ "WriteMB", PROFILE_FIELD_DOUBLE },
		{ NULL, PROFILE_FIELD_NOT_SET }
	};

	union {
		double d;
		uint64_t u64;
	} data[FIELD_CNT];

	slurm_mutex_lock(&lustre_lock);

	if (_read_lustre_counters() != SLURM_SUCCESS) {
		error("%s: Cannot read lustre counters", __func__);
		slurm_mutex_unlock(&lustre_lock);
		return SLURM_FAILURE;
	}

	if (first) {
		dataset_id = acct_gather_profile_g_create_dataset(
			"Filesystem", NO_PARENT, dataset);
		if (dataset_id == SLURM_ERROR) {
			error("FileSystem: Failed to create the dataset "
			      "for Lustre");
			slurm_mutex_unlock(&lustre_lock);
			return SLURM_ERROR;
		}

		previous.num_reads = lustre_se.all_lustre_nb_reads;
		previous.num_writes = lustre_se.all_lustre_nb_writes;
		previous.size_read = lustre_se.all_lustre_read_bytes;
		previous.size_write = lustre_se.all_lustre_write_bytes;

		first = false;
	}

	if (dataset_id < 0) {
		slurm_mutex_unlock(&lustre_lock);
		return SLURM_ERROR;
	}

	/* Compute the current values read from all lustre-xxxx directories */
	current.num_reads = lustre_se.all_lustre_nb_reads;
	current.num_writes = lustre_se.all_lustre_nb_writes;
	current.size_read = lustre_se.all_lustre_read_bytes;
	current.size_write = lustre_se.all_lustre_write_bytes;

	/* record sample */
	data[FIELD_READ].u64 = current.num_reads - previous.num_reads;
	data[FIELD_READMB].d =
		(double)(current.size_read - previous.size_read) / (1 << 20);
	data[FIELD_WRITE].u64 = current.num_writes - previous.num_writes;
	data[FIELD_WRITEMB].d =
		(double)(current.size_write - previous.size_write) / (1 << 20);

	if (debug_flags & DEBUG_FLAG_PROFILE) {
		char str[256];
		info("PROFILE-Lustre: %s", acct_gather_profile_dataset_str(
			     dataset, data, str, sizeof(str)));
	}
	acct_gather_profile_g_add_sample_data(dataset_id, (void *)data,
					      lustre_se.update_time);

	/* Save current as previous and clean up the working
	 * data structure.
	 */
	memcpy(&previous, &current, sizeof(acct_gather_data_t));
	memset(&lustre_se, 0, sizeof(lustre_sens_t));

	slurm_mutex_unlock(&lustre_lock);

	return SLURM_SUCCESS;
}
/*
 * _thread_update_node_energy calls _read_ipmi_values and updates all values
 * for node consumption
 */
static int _update_node_filesystem(void)
{
	static acct_filesystem_data_t fls;
	static acct_filesystem_data_t current;
	static acct_filesystem_data_t previous;
	static bool first = true;
	int cc;

	slurm_mutex_lock(&lustre_lock);

	cc = _read_lustre_counters();
	if (cc != SLURM_SUCCESS) {
		error("%s: Cannot read lustre counters", __func__);
		slurm_mutex_unlock(&lustre_lock);
		return SLURM_FAILURE;
	}

	if (first) {
		/* First time initialize the counters and return.
		 */
		previous.reads = lustre_se.all_lustre_nb_reads;
		previous.writes = lustre_se.all_lustre_nb_writes;
		previous.read_size
			= (double)lustre_se.all_lustre_read_bytes/1048576.0;
		previous.write_size
			= (double)lustre_se.all_lustre_write_bytes/1048576.0;

		first = false;
		memset(&lustre_se, 0, sizeof(lustre_sens_t));
		slurm_mutex_unlock(&lustre_lock);

		return SLURM_SUCCESS;
	}

	/* Compute the current values read from all lustre-xxxx
	 * directories
	 */
	current.reads = lustre_se.all_lustre_nb_reads;
	current.writes = lustre_se.all_lustre_nb_writes;
	current.read_size = (double)lustre_se.all_lustre_read_bytes/1048576.0;
	current.write_size = (double)lustre_se.all_lustre_write_bytes/1048576.0;

	/* Now compute the difference between the two snapshots
	 * and send it to hdf5 log.
	 */
	fls.reads = fls.reads + (current.reads - previous.reads);
	fls.writes = fls.writes + (current.writes - previous.writes);
	fls.read_size = fls.read_size
		+ (current.read_size - previous.read_size);
	fls.write_size = fls.write_size
		+ (current.write_size - previous.write_size);

	acct_gather_profile_g_add_sample_data(ACCT_GATHER_PROFILE_LUSTRE, &fls);

	/* Save current as previous and clean up the working
	 * data structure.
	 */
	memcpy(&previous, &current, sizeof(acct_filesystem_data_t));
	memset(&lustre_se, 0, sizeof(lustre_sens_t));

	info("%s: num reads %"PRIu64" nums write %"PRIu64" "
	     "read %f MB wrote %f MB",
	     __func__, fls.reads, fls.writes, fls.read_size, fls.write_size);

	slurm_mutex_unlock(&lustre_lock);

	return SLURM_SUCCESS;
}