/* * _thread_update_node_energy calls _read_ipmi_values and updates all values * for node consumption */ static int _update_node_filesystem(void) { static acct_gather_data_t previous; static int dataset_id = -1; static bool first = true; acct_gather_data_t current; enum { FIELD_READ, FIELD_READMB, FIELD_WRITE, FIELD_WRITEMB, FIELD_CNT }; acct_gather_profile_dataset_t dataset[] = { { "Reads", PROFILE_FIELD_UINT64 }, { "ReadMB", PROFILE_FIELD_DOUBLE }, { "Writes", PROFILE_FIELD_UINT64 }, { "WriteMB", PROFILE_FIELD_DOUBLE }, { NULL, PROFILE_FIELD_NOT_SET } }; union { double d; uint64_t u64; } data[FIELD_CNT]; slurm_mutex_lock(&lustre_lock); if (_read_lustre_counters() != SLURM_SUCCESS) { error("%s: Cannot read lustre counters", __func__); slurm_mutex_unlock(&lustre_lock); return SLURM_FAILURE; } if (first) { dataset_id = acct_gather_profile_g_create_dataset( "Filesystem", NO_PARENT, dataset); if (dataset_id == SLURM_ERROR) { error("FileSystem: Failed to create the dataset " "for Lustre"); slurm_mutex_unlock(&lustre_lock); return SLURM_ERROR; } previous.num_reads = lustre_se.all_lustre_nb_reads; previous.num_writes = lustre_se.all_lustre_nb_writes; previous.size_read = lustre_se.all_lustre_read_bytes; previous.size_write = lustre_se.all_lustre_write_bytes; first = false; } if (dataset_id < 0) { slurm_mutex_unlock(&lustre_lock); return SLURM_ERROR; } /* Compute the current values read from all lustre-xxxx directories */ current.num_reads = lustre_se.all_lustre_nb_reads; current.num_writes = lustre_se.all_lustre_nb_writes; current.size_read = lustre_se.all_lustre_read_bytes; current.size_write = lustre_se.all_lustre_write_bytes; /* record sample */ data[FIELD_READ].u64 = current.num_reads - previous.num_reads; data[FIELD_READMB].d = (double)(current.size_read - previous.size_read) / (1 << 20); data[FIELD_WRITE].u64 = current.num_writes - previous.num_writes; data[FIELD_WRITEMB].d = (double)(current.size_write - previous.size_write) / (1 << 20); if (debug_flags & DEBUG_FLAG_PROFILE) { char str[256]; info("PROFILE-Lustre: %s", acct_gather_profile_dataset_str( dataset, data, str, sizeof(str))); } acct_gather_profile_g_add_sample_data(dataset_id, (void *)data, lustre_se.update_time); /* Save current as previous and clean up the working * data structure. */ memcpy(&previous, ¤t, sizeof(acct_gather_data_t)); memset(&lustre_se, 0, sizeof(lustre_sens_t)); slurm_mutex_unlock(&lustre_lock); return SLURM_SUCCESS; }
/* * _thread_update_node_energy calls _read_ipmi_values and updates all values * for node consumption */ static int _update_node_infiniband(void) { int rc; enum { FIELD_PACKIN, FIELD_PACKOUT, FIELD_MBIN, FIELD_MBOUT, FIELD_CNT }; acct_gather_profile_dataset_t dataset[] = { { "PacketsIn", PROFILE_FIELD_UINT64 }, { "PacketsOut", PROFILE_FIELD_UINT64 }, { "InMB", PROFILE_FIELD_DOUBLE }, { "OutMB", PROFILE_FIELD_DOUBLE }, { NULL, PROFILE_FIELD_NOT_SET } }; union { double d; uint64_t u64; } data[FIELD_CNT]; if (dataset_id < 0) { dataset_id = acct_gather_profile_g_create_dataset("Network", NO_PARENT, dataset); if (debug_flags & DEBUG_FLAG_INFINIBAND) debug("IB: dataset created (id = %d)", dataset_id); if (dataset_id == SLURM_ERROR) { error("IB: Failed to create the dataset for ofed"); return SLURM_ERROR; } } slurm_mutex_lock(&ofed_lock); if ((rc = _read_ofed_values()) != SLURM_SUCCESS) { slurm_mutex_unlock(&ofed_lock); return rc; } data[FIELD_PACKIN].u64 = ofed_sens.rcvpkts; data[FIELD_PACKOUT].u64 = ofed_sens.xmtpkts; data[FIELD_MBIN].d = (double) ofed_sens.rcvdata / (1 << 20); data[FIELD_MBOUT].d = (double) ofed_sens.xmtdata / (1 << 20); if (debug_flags & DEBUG_FLAG_INFINIBAND) { info("ofed-thread = %d sec, transmitted %"PRIu64" bytes, " "received %"PRIu64" bytes", (int) (ofed_sens.update_time - ofed_sens.last_update_time), ofed_sens.xmtdata, ofed_sens.rcvdata); } slurm_mutex_unlock(&ofed_lock); if (debug_flags & DEBUG_FLAG_PROFILE) { char str[256]; info("PROFILE-Network: %s", acct_gather_profile_dataset_str( dataset, data, str, sizeof(str))); } return acct_gather_profile_g_add_sample_data(dataset_id, (void *)data, ofed_sens.update_time); }
static void _record_profile(struct jobacctinfo *jobacct) { enum { FIELD_CPUFREQ, FIELD_CPUTIME, FIELD_CPUUTIL, FIELD_RSS, FIELD_VMSIZE, FIELD_PAGES, FIELD_READ, FIELD_WRITE, FIELD_CNT }; acct_gather_profile_dataset_t dataset[] = { { "CPUFrequency", PROFILE_FIELD_UINT64 }, { "CPUTime", PROFILE_FIELD_UINT64 }, { "CPUUtilization", PROFILE_FIELD_DOUBLE }, { "RSS", PROFILE_FIELD_UINT64 }, { "VMSize", PROFILE_FIELD_UINT64 }, { "Pages", PROFILE_FIELD_UINT64 }, { "ReadMB", PROFILE_FIELD_DOUBLE }, { "WriteMB", PROFILE_FIELD_DOUBLE }, { NULL, PROFILE_FIELD_NOT_SET } }; static int profile_gid = -1; double et; union { double d; uint64_t u64; } data[FIELD_CNT]; if (profile_gid == -1) profile_gid = acct_gather_profile_g_create_group("Tasks"); /* Create the dataset first */ if (jobacct->dataset_id < 0) { char ds_name[32]; snprintf(ds_name, sizeof(ds_name), "%u", jobacct->id.taskid); jobacct->dataset_id = acct_gather_profile_g_create_dataset( ds_name, profile_gid, dataset); if (jobacct->dataset_id == SLURM_ERROR) { error("JobAcct: Failed to create the dataset for " "task %d", jobacct->pid); return; } } if (jobacct->dataset_id < 0) return; data[FIELD_CPUFREQ].u64 = jobacct->act_cpufreq; data[FIELD_RSS].u64 = jobacct->tot_rss; data[FIELD_VMSIZE].u64 = jobacct->tot_vsize; data[FIELD_PAGES].u64 = jobacct->tot_pages; /* delta from last snapshot */ if (!jobacct->last_time) { data[FIELD_CPUTIME].u64 = 0; data[FIELD_CPUUTIL].d = 0.0; data[FIELD_READ].d = 0.0; data[FIELD_WRITE].d = 0.0; } else { data[FIELD_CPUTIME].u64 = jobacct->tot_cpu - jobacct->last_total_cputime; et = (jobacct->cur_time - jobacct->last_time); if (!et) data[FIELD_CPUUTIL].d = 0.0; else data[FIELD_CPUUTIL].d = (100.0 * (double)data[FIELD_CPUTIME].u64) / ((double) et); data[FIELD_READ].d = jobacct->tot_disk_read - jobacct->last_tot_disk_read; data[FIELD_WRITE].d = jobacct->tot_disk_write - jobacct->last_tot_disk_write; } if (debug_flags & DEBUG_FLAG_PROFILE) { char str[256]; info("PROFILE-Task: %s", acct_gather_profile_dataset_str( dataset, data, str, sizeof(str))); } acct_gather_profile_g_add_sample_data(jobacct->dataset_id, (void *)data, jobacct->cur_time); }