extern int acct_gather_filesystem_p_get_data(acct_gather_data_t *data) { int retval = SLURM_SUCCESS; if ((tres_pos == -1) || !data) { debug2("%s: We are not tracking TRES fs/lustre", __func__); return SLURM_SUCCESS; } slurm_mutex_lock(&lustre_lock); if (_read_lustre_counters() != SLURM_SUCCESS) { error("%s: Cannot read lustre counters", __func__); slurm_mutex_unlock(&lustre_lock); return SLURM_FAILURE; } /* Obtain the current values read from all lustre-xxxx directories */ data[tres_pos].num_reads = lustre_se.all_lustre_nb_reads; data[tres_pos].num_writes = lustre_se.all_lustre_nb_writes; data[tres_pos].size_read = lustre_se.all_lustre_read_bytes; data[tres_pos].size_write = lustre_se.all_lustre_write_bytes; slurm_mutex_unlock(&lustre_lock); return retval; }
/* * _thread_update_node_energy calls _read_ipmi_values and updates all values * for node consumption */ static int _update_node_filesystem(void) { acct_filesystem_data_t *fls; int rc = SLURM_SUCCESS; slurm_mutex_lock(&lustre_lock); rc = _read_lustre_counters(); fls = xmalloc(sizeof(acct_filesystem_data_t)); fls->reads = lustre_se.all_lustre_nb_reads; fls->writes = lustre_se.all_lustre_nb_writes; fls->read_size = (double) lustre_se.all_lustre_read_bytes / 1048576; fls->write_size = (double) lustre_se.all_lustre_write_bytes / 1048576; acct_gather_profile_g_add_sample_data(ACCT_GATHER_PROFILE_LUSTRE, fls); debug3("Collection of Lustre counters Finished"); xfree(fls); if (debug_flags & DEBUG_FLAG_FILESYSTEM) { info("lustre-thread = %d sec, transmitted %"PRIu64" bytes, " "received %"PRIu64" bytes", (int) (lustre_se.update_time - lustre_se.last_update_time), lustre_se.all_lustre_read_bytes, lustre_se.all_lustre_write_bytes); } slurm_mutex_unlock(&lustre_lock); return rc; }
/* * _thread_update_node_energy calls _read_ipmi_values and updates all values * for node consumption */ static int _update_node_filesystem(void) { static acct_gather_data_t previous; static int dataset_id = -1; static bool first = true; acct_gather_data_t current; enum { FIELD_READ, FIELD_READMB, FIELD_WRITE, FIELD_WRITEMB, FIELD_CNT }; acct_gather_profile_dataset_t dataset[] = { { "Reads", PROFILE_FIELD_UINT64 }, { "ReadMB", PROFILE_FIELD_DOUBLE }, { "Writes", PROFILE_FIELD_UINT64 }, { "WriteMB", PROFILE_FIELD_DOUBLE }, { NULL, PROFILE_FIELD_NOT_SET } }; union { double d; uint64_t u64; } data[FIELD_CNT]; slurm_mutex_lock(&lustre_lock); if (_read_lustre_counters() != SLURM_SUCCESS) { error("%s: Cannot read lustre counters", __func__); slurm_mutex_unlock(&lustre_lock); return SLURM_FAILURE; } if (first) { dataset_id = acct_gather_profile_g_create_dataset( "Filesystem", NO_PARENT, dataset); if (dataset_id == SLURM_ERROR) { error("FileSystem: Failed to create the dataset " "for Lustre"); slurm_mutex_unlock(&lustre_lock); return SLURM_ERROR; } previous.num_reads = lustre_se.all_lustre_nb_reads; previous.num_writes = lustre_se.all_lustre_nb_writes; previous.size_read = lustre_se.all_lustre_read_bytes; previous.size_write = lustre_se.all_lustre_write_bytes; first = false; } if (dataset_id < 0) { slurm_mutex_unlock(&lustre_lock); return SLURM_ERROR; } /* Compute the current values read from all lustre-xxxx directories */ current.num_reads = lustre_se.all_lustre_nb_reads; current.num_writes = lustre_se.all_lustre_nb_writes; current.size_read = lustre_se.all_lustre_read_bytes; current.size_write = lustre_se.all_lustre_write_bytes; /* record sample */ data[FIELD_READ].u64 = current.num_reads - previous.num_reads; data[FIELD_READMB].d = (double)(current.size_read - previous.size_read) / (1 << 20); data[FIELD_WRITE].u64 = current.num_writes - previous.num_writes; data[FIELD_WRITEMB].d = (double)(current.size_write - previous.size_write) / (1 << 20); if (debug_flags & DEBUG_FLAG_PROFILE) { char str[256]; info("PROFILE-Lustre: %s", acct_gather_profile_dataset_str( dataset, data, str, sizeof(str))); } acct_gather_profile_g_add_sample_data(dataset_id, (void *)data, lustre_se.update_time); /* Save current as previous and clean up the working * data structure. */ memcpy(&previous, ¤t, sizeof(acct_gather_data_t)); memset(&lustre_se, 0, sizeof(lustre_sens_t)); slurm_mutex_unlock(&lustre_lock); return SLURM_SUCCESS; }
/* * _thread_update_node_energy calls _read_ipmi_values and updates all values * for node consumption */ static int _update_node_filesystem(void) { static acct_filesystem_data_t fls; static acct_filesystem_data_t current; static acct_filesystem_data_t previous; static bool first = true; int cc; slurm_mutex_lock(&lustre_lock); cc = _read_lustre_counters(); if (cc != SLURM_SUCCESS) { error("%s: Cannot read lustre counters", __func__); slurm_mutex_unlock(&lustre_lock); return SLURM_FAILURE; } if (first) { /* First time initialize the counters and return. */ previous.reads = lustre_se.all_lustre_nb_reads; previous.writes = lustre_se.all_lustre_nb_writes; previous.read_size = (double)lustre_se.all_lustre_read_bytes/1048576.0; previous.write_size = (double)lustre_se.all_lustre_write_bytes/1048576.0; first = false; memset(&lustre_se, 0, sizeof(lustre_sens_t)); slurm_mutex_unlock(&lustre_lock); return SLURM_SUCCESS; } /* Compute the current values read from all lustre-xxxx * directories */ current.reads = lustre_se.all_lustre_nb_reads; current.writes = lustre_se.all_lustre_nb_writes; current.read_size = (double)lustre_se.all_lustre_read_bytes/1048576.0; current.write_size = (double)lustre_se.all_lustre_write_bytes/1048576.0; /* Now compute the difference between the two snapshots * and send it to hdf5 log. */ fls.reads = fls.reads + (current.reads - previous.reads); fls.writes = fls.writes + (current.writes - previous.writes); fls.read_size = fls.read_size + (current.read_size - previous.read_size); fls.write_size = fls.write_size + (current.write_size - previous.write_size); acct_gather_profile_g_add_sample_data(ACCT_GATHER_PROFILE_LUSTRE, &fls); /* Save current as previous and clean up the working * data structure. */ memcpy(&previous, ¤t, sizeof(acct_filesystem_data_t)); memset(&lustre_se, 0, sizeof(lustre_sens_t)); info("%s: num reads %"PRIu64" nums write %"PRIu64" " "read %f MB wrote %f MB", __func__, fls.reads, fls.writes, fls.read_size, fls.write_size); slurm_mutex_unlock(&lustre_lock); return SLURM_SUCCESS; }