예제 #1
0
extern int acct_gather_energy_p_set_data(enum acct_energy_type data_type,
					 void *data)
{
	int rc = SLURM_SUCCESS;
	int *delta = (int *)data;

	xassert(_run_in_daemon());

	switch (data_type) {
	case ENERGY_DATA_RECONFIG:
		debug_flags = slurm_get_debug_flags();
		break;
	case ENERGY_DATA_PROFILE:
		slurm_mutex_lock(&ipmi_mutex);
		_get_joules_task(*delta);
		_ipmi_send_profile();
		slurm_mutex_unlock(&ipmi_mutex);
		break;
	default:
		error("acct_gather_energy_p_set_data: unknown enum %d",
		      data_type);
		rc = SLURM_ERROR;
		break;
	}
	return rc;
}
예제 #2
0
extern int acct_gather_energy_p_update_node_energy(void)
{
	int rc = SLURM_SUCCESS;
	xassert(_run_in_daemon());

	return rc;
}
예제 #3
0
extern int acct_gather_filesystem_p_node_update(void)
{
	if (_run_in_daemon() && (_check_lustre_fs() == SLURM_SUCCESS))
		_update_node_filesystem();

	return SLURM_SUCCESS;
}
예제 #4
0
extern void acct_gather_filesystem_p_conf_set(s_p_hashtbl_t *tbl)
{
	if (!_run_in_daemon())
		return;

	debug("%s loaded", plugin_name);
}
예제 #5
0
extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type,
					 void *data)
{
	int rc = SLURM_SUCCESS;
	acct_gather_energy_t *energy = (acct_gather_energy_t *)data;
	time_t *last_poll = (time_t *)data;
	uint16_t *sensor_cnt = (uint16_t *)data;

	xassert(_run_in_daemon());

	switch (data_type) {
	case ENERGY_DATA_JOULES_TASK:
		if (local_energy->current_watts == NO_VAL)
			energy->consumed_energy = NO_VAL;
		else
			_get_joules_task(energy);
		break;
	case ENERGY_DATA_NODE_ENERGY:
	case ENERGY_DATA_STRUCT:
		memcpy(energy, local_energy, sizeof(acct_gather_energy_t));
		break;
	case ENERGY_DATA_LAST_POLL:
		*last_poll = local_energy->poll_time;
		break;
	case ENERGY_DATA_SENSOR_CNT:
		*sensor_cnt = 1;
		break;
	default:
		error("acct_gather_energy_p_get_data: unknown enum %d",
		      data_type);
		rc = SLURM_ERROR;
		break;
	}
	return rc;
}
예제 #6
0
extern int fini(void)
{
	if (!_run_in_daemon())
		return SLURM_SUCCESS;

	acct_gather_energy_destroy(local_energy);
	local_energy = NULL;
	return SLURM_SUCCESS;
}
예제 #7
0
/*
 * init() is called when the plugin is loaded, before any other functions
 * are called.  Put global initialization here.
 */
extern int init(void)
{
	if (!_run_in_daemon())
		return SLURM_SUCCESS;

	debug_flags = slurm_get_debug_flags();

	return SLURM_SUCCESS;
}
예제 #8
0
extern int fini ( void )
{
	if (_run_in_daemon()) {
		/* just to make sure it closes things up since we call it
		 * from here */
		acct_gather_energy_fini();
	}

	return SLURM_SUCCESS;
}
예제 #9
0
/*
 * init() is called when the plugin is loaded, before any other functions
 * are called.  Put global initialization here.
 */
extern int init ( void )
{
	if (_run_in_daemon()) {
		jag_common_init(1);
		pagesize = getpagesize()/1024;
	}

	debug("%s loaded", plugin_name);
	return SLURM_SUCCESS;
}
예제 #10
0
extern int fini(void)
{
	if (!_run_in_daemon())
		return SLURM_SUCCESS;

	if (debug_flags & DEBUG_FLAG_FILESYSTEM)
		info("lustre: ended");

	return SLURM_SUCCESS;
}
예제 #11
0
/*
 * init() is called when the plugin is loaded, before any other functions
 * are called.  Put global initialization here.
 */
extern int init(void)
{
	if (!_run_in_daemon())
		return SLURM_SUCCESS;

	debug_flags = slurm_get_debug_flags();

	/* Move HDF5 trace printing to log file instead of stderr */
	H5Eset_auto(H5E_DEFAULT, (herr_t (*)(hid_t, void *))H5Eprint,
	            log_fp());

	return SLURM_SUCCESS;
}
예제 #12
0
extern int acct_gather_energy_p_update_node_energy(void)
{
	int rc = SLURM_SUCCESS;

	xassert(_run_in_daemon());

	if (!local_energy || local_energy->current_watts == NO_VAL)
		return rc;

	_get_joules_task(local_energy);

	return rc;
}
예제 #13
0
extern int fini (void)
{
	if (_run_in_daemon()) {
		jobacct_gather_cgroup_cpuacct_fini(&slurm_cgroup_conf);
		jobacct_gather_cgroup_memory_fini(&slurm_cgroup_conf);
		/* jobacct_gather_cgroup_blkio_fini(&slurm_cgroup_conf); */
		acct_gather_energy_fini();

		/* unload configuration */
		free_slurm_cgroup_conf(&slurm_cgroup_conf);
	}
	return SLURM_SUCCESS;
}
예제 #14
0
extern void acct_gather_infiniband_p_conf_set(s_p_hashtbl_t *tbl)
{
	if (tbl) {
		if (!s_p_get_uint32(&ofed_conf.port,
				    "InfinibandOFEDPort", tbl))
			ofed_conf.port = INFINIBAND_DEFAULT_PORT;
	}

	if (!_run_in_daemon())
		return;

	debug("%s loaded", plugin_name);
	ofed_sens.update_time = time(NULL);
}
예제 #15
0
/*
 * init() is called when the plugin is loaded, before any other functions
 * are called.  Put global initialization here.
 */
extern int init (void)
{
	/* If running on the slurmctld don't do any of this since it
	   isn't needed.
	*/
	if (_run_in_daemon()) {
		jag_common_init(0);

		/* read cgroup configuration */
		if (read_slurm_cgroup_conf(&slurm_cgroup_conf))
			return SLURM_ERROR;

		/* initialize cpuinfo internal data */
		if (xcpuinfo_init() != XCPUINFO_SUCCESS) {
			free_slurm_cgroup_conf(&slurm_cgroup_conf);
			return SLURM_ERROR;
		}

		/* enable cpuacct cgroup subsystem */
		if (jobacct_gather_cgroup_cpuacct_init(&slurm_cgroup_conf) !=
		    SLURM_SUCCESS) {
			xcpuinfo_fini();
			free_slurm_cgroup_conf(&slurm_cgroup_conf);
			return SLURM_ERROR;
		}

		/* enable memory cgroup subsystem */
		if (jobacct_gather_cgroup_memory_init(&slurm_cgroup_conf) !=
		    SLURM_SUCCESS) {
			xcpuinfo_fini();
			free_slurm_cgroup_conf(&slurm_cgroup_conf);
			return SLURM_ERROR;
		}

		/* FIXME: Enable when kernel support ready.
		 *
		 * Enable blkio subsystem.
		 */
		/* if (jobacct_gather_cgroup_blkio_init(&slurm_cgroup_conf) */
		/*     != SLURM_SUCCESS) { */
		/* 	xcpuinfo_fini(); */
		/* 	free_slurm_cgroup_conf(&slurm_cgroup_conf); */
		/* 	return SLURM_ERROR; */
		/* } */
	}

	verbose("%s loaded", plugin_name);
	return SLURM_SUCCESS;
}
예제 #16
0
extern int fini(void)
{
	if (!_run_in_daemon())
		return SLURM_SUCCESS;

	if (srcport) {
		_update_node_infiniband();
		mad_rpc_close_port(srcport);
	}

	if (debug_flags & DEBUG_FLAG_INFINIBAND)
		info("ofed: ended");

	return SLURM_SUCCESS;
}
예제 #17
0
/*
 * init() is called when the plugin is loaded, before any other functions
 * are called.  Put global initialization here.
 */
extern int init(void)
{
	slurmdb_tres_rec_t tres_rec;

	if (!_run_in_daemon())
		return SLURM_SUCCESS;

	debug_flags = slurm_get_debug_flags();

	memset(&tres_rec, 0, sizeof(slurmdb_tres_rec_t));
	tres_rec.type = "fs";
	tres_rec.name = "lustre";
	tres_pos = assoc_mgr_find_tres_pos(&tres_rec, false);

	return SLURM_SUCCESS;
}
예제 #18
0
extern int acct_gather_profile_p_task_end(pid_t taskpid)
{
	hid_t   gid_task;
	char 	group_task[MAX_GROUP_NAME+1];
	uint32_t task_id;
	int rc = SLURM_SUCCESS;

	xassert(_run_in_daemon());
	xassert(g_job);

	if (g_job->stepid == NO_VAL)
		return rc;

	xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET);

	if (!_do_profile(ACCT_GATHER_PROFILE_NOT_SET, g_profile_running))
		return rc;

	if (_get_taskid_from_pid(taskpid, &task_id) != SLURM_SUCCESS)
		return SLURM_FAILURE;
	if (file_id == -1) {
		info("PROFILE: add_task_data, HDF5 file is not open");
		return SLURM_FAILURE;
	}
	if (gid_tasks < 0) {
		gid_tasks = make_group(gid_node, GRP_TASKS);
		if (gid_tasks < 1) {
			info("PROFILE: Failed to create Tasks group");
			return SLURM_FAILURE;
		}
	}
	sprintf(group_task, "%s_%d", GRP_TASK, task_id);
	gid_task = get_group(gid_tasks, group_task);
	if (gid_task == -1) {
		gid_task = make_group(gid_tasks, group_task);
		if (gid_task < 0) {
			info("Failed to open tasks %s", group_task);
			return SLURM_FAILURE;
		}
		put_int_attribute(gid_task, ATTR_TASKID, task_id);
	}
	put_int_attribute(gid_task, ATTR_CPUPERTASK, g_job->cpus_per_task);

	if (debug_flags & DEBUG_FLAG_PROFILE)
		info("PROFILE: task_end");
	return rc;
}
예제 #19
0
extern int acct_gather_profile_p_task_start(uint32_t taskid)
{
	int rc = SLURM_SUCCESS;

	xassert(_run_in_daemon());
	xassert(g_job);

	xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET);

	if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
		return rc;

	if (debug_flags & DEBUG_FLAG_PROFILE)
		info("PROFILE: task_start");

	return rc;
}
예제 #20
0
extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type,
					 void *data)
{
	int rc = SLURM_SUCCESS;
	acct_gather_energy_t *energy = (acct_gather_energy_t *)data;
	time_t *last_poll = (time_t *)data;

	xassert(_run_in_daemon());

	switch (data_type) {
	case ENERGY_DATA_JOULES_TASK:
		slurm_mutex_lock(&ipmi_mutex);
		if (_is_thread_launcher()) {
			_thread_init();
			_thread_update_node_energy();
		} else
			_get_joules_task(10); /* Since we don't have
						 access to the
						 frequency here just
						 send in something.
					      */
		memcpy(energy, local_energy, sizeof(acct_gather_energy_t));
		slurm_mutex_unlock(&ipmi_mutex);
		break;
	case ENERGY_DATA_STRUCT:
		slurm_mutex_lock(&ipmi_mutex);
		memcpy(energy, local_energy, sizeof(acct_gather_energy_t));
		slurm_mutex_unlock(&ipmi_mutex);
		if (debug_flags & DEBUG_FLAG_ENERGY) {
			info("_get_joules_node_ipmi = consumed %d Joules",
			     energy->consumed_energy);
		}
		break;
	case ENERGY_DATA_LAST_POLL:
		slurm_mutex_lock(&ipmi_mutex);
		*last_poll = local_energy->poll_time;
		slurm_mutex_unlock(&ipmi_mutex);
		break;
	default:
		error("acct_gather_energy_p_get_data: unknown enum %d",
		      data_type);
		rc = SLURM_ERROR;
		break;
	}
	return rc;
}
예제 #21
0
extern int fini(void)
{
	if (!_run_in_daemon())
		return SLURM_SUCCESS;

	flag_energy_accounting_shutdown = true;

	slurm_mutex_lock(&ipmi_mutex);
	if (thread_ipmi_id_run)
		pthread_cancel(thread_ipmi_id_run);
	if (cleanup_handler_thread)
		pthread_join(cleanup_handler_thread, NULL);
	slurm_mutex_unlock(&ipmi_mutex);

	acct_gather_energy_destroy(local_energy);
	local_energy = NULL;
	return SLURM_SUCCESS;
}
예제 #22
0
extern int acct_gather_profile_p_add_sample_data(int table_id, void *data,
						 time_t sample_time)
{
	table_t *ds = &tables[table_id];
	uint8_t send_data[ds->type_size];
	int header_size = 0;
	debug("acct_gather_profile_p_add_sample_data %d", table_id);

	if (file_id < 0) {
		debug("PROFILE: Trying to add data but profiling is over");
		return SLURM_SUCCESS;
	}

	if (table_id < 0 || table_id >= tables_cur_len) {
		error("PROFILE: trying to add samples to an invalid table %d",
		      table_id);
		return SLURM_ERROR;
	}

	/* ensure that we have to record something */
	xassert(_run_in_daemon());
	xassert(g_job);
	xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET);

	if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
		return SLURM_ERROR;

	/* prepend timestampe and relative time */
	((uint64_t *)send_data)[0] = difftime(sample_time, step_start_time);
	header_size += sizeof(uint64_t);
	((uint64_t *)send_data)[1] = sample_time;
	header_size += sizeof(uint64_t);

	memcpy(send_data + header_size, data, ds->type_size - header_size);

	/* append the record to the table */
	if (H5PTappend(ds->table_id, 1, send_data) < 0) {
		error("PROFILE: Impossible to add data to the table %d; "
		      "maybe the table has not been created?", table_id);
		return SLURM_ERROR;
	}

	return SLURM_SUCCESS;
}
예제 #23
0
extern int acct_gather_profile_p_node_step_end(void)
{
	int rc = SLURM_SUCCESS;
	size_t i;

	xassert(_run_in_daemon());

	xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET);

	// No check for --profile as we always want to close the HDF5 file
	// if it has been opened.


	if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
		return rc;

	if (debug_flags & DEBUG_FLAG_PROFILE)
		info("PROFILE: node_step_end (shutdown)");

	/* close tables */
	for (i = 0; i < tables_cur_len; ++i) {
		H5PTclose(tables[i].table_id);
	}
	/* close groups */
	for (i = 0; i < groups_len; ++i) {
		H5Gclose(groups[i]);
	}

	if (gid_totals > 0)
		H5Gclose(gid_totals);
	if (gid_samples > 0)
		H5Gclose(gid_samples);
	if (gid_tasks > 0)
		H5Gclose(gid_tasks);
	if (gid_node > 0)
		H5Gclose(gid_node);
	if (file_id > 0)
		H5Fclose(file_id);
	profile_fini();
	file_id = -1;

	return rc;
}
예제 #24
0
extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl)
{
	static bool flag_init = 0;

	if (!_run_in_daemon())
		return;

	if (!flag_init) {
		flag_init = 1;
		local_energy = acct_gather_energy_alloc(1);
		if (!_get_latest_stats(GET_ENERGY))
			local_energy->current_watts = NO_VAL;
		else
			_get_joules_task(local_energy);
	}

	debug("%s loaded", plugin_name);

	return;
}
예제 #25
0
extern int acct_gather_profile_p_node_step_end()
{
	int rc = SLURM_SUCCESS;

	xassert(_run_in_daemon());

	if (g_job->stepid == NO_VAL)
		return rc;

	xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET);

	// No check for --profile as we always want to close the HDF5 file
	// if it has been opened.


	if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
		return rc;

	if (debug_flags & DEBUG_FLAG_PROFILE)
		info("PROFILE: node_step_end (shutdown)");

	if (gid_totals > 0)
		H5Gclose(gid_totals);
	if (gid_samples > 0)
		H5Gclose(gid_samples);
	if (gid_tasks > 0)
		H5Gclose(gid_tasks);
	if (gid_node > 0)
		H5Gclose(gid_node);
	if (file_id > 0)
		H5Fclose(file_id);
	profile_fini();
	file_id = -1;

	return rc;
}
예제 #26
0
extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job)
{
	int rc = SLURM_SUCCESS;

	time_t start_time;
	char    *profile_file_name;
	char *profile_str;

	xassert(_run_in_daemon());

	g_job = job;

	if (g_job->stepid == NO_VAL) {
		g_profile_running = ACCT_GATHER_PROFILE_NONE;
		return rc;
	}

	xassert(hdf5_conf.dir);

	if (debug_flags & DEBUG_FLAG_PROFILE) {
		profile_str = acct_gather_profile_to_string(g_job->profile);
		info("PROFILE: option --profile=%s", profile_str);
	}

	if (g_profile_running == ACCT_GATHER_PROFILE_NOT_SET)
		g_profile_running = _determine_profile();

	if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
		return rc;

	_create_directories();

	profile_file_name = xstrdup_printf(
		"%s/%s/%u_%u_%s.h5",
		hdf5_conf.dir, g_job->pwd->pw_name,
		g_job->jobid, g_job->stepid, g_job->node_name);

	if (debug_flags & DEBUG_FLAG_PROFILE) {
		profile_str = acct_gather_profile_to_string(g_profile_running);
		info("PROFILE: node_step_start, opt=%s file=%s",
		     profile_str, profile_file_name);
	}

	// Create a new file using the default properties.
	profile_init();
	file_id = H5Fcreate(profile_file_name, H5F_ACC_TRUNC, H5P_DEFAULT,
			    H5P_DEFAULT);

	if (chown(profile_file_name, (uid_t)g_job->pwd->pw_uid,
		  (gid_t)g_job->pwd->pw_gid) < 0)
		error("chown(%s): %m", profile_file_name);
	chmod(profile_file_name,  0600);
	xfree(profile_file_name);

	if (file_id < 1) {
		info("PROFILE: Failed to create Node group");
		return SLURM_FAILURE;
	}

	sprintf(group_node, "/%s_%s", GRP_NODE, g_job->node_name);
	gid_node = H5Gcreate(file_id, group_node, H5P_DEFAULT,
			     H5P_DEFAULT, H5P_DEFAULT);
	if (gid_node < 1) {
		H5Fclose(file_id);
		file_id = -1;
		info("PROFILE: Failed to create Node group");
		return SLURM_FAILURE;
	}
	put_string_attribute(gid_node, ATTR_NODENAME, g_job->node_name);
	put_int_attribute(gid_node, ATTR_NTASKS, g_job->node_tasks);
	start_time = time(NULL);
	put_string_attribute(gid_node, ATTR_STARTTIME, ctime(&start_time));

	return rc;
}
예제 #27
0
extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job)
{
	int rc = SLURM_SUCCESS;

	char *profile_file_name;
	char *profile_str;

	xassert(_run_in_daemon());

	g_job = job;

	xassert(hdf5_conf.dir);

	if (debug_flags & DEBUG_FLAG_PROFILE) {
		profile_str = acct_gather_profile_to_string(g_job->profile);
		info("PROFILE: option --profile=%s", profile_str);
	}

	if (g_profile_running == ACCT_GATHER_PROFILE_NOT_SET)
		g_profile_running = _determine_profile();

	if (g_profile_running <= ACCT_GATHER_PROFILE_NONE)
		return rc;

	_create_directories();

	/* Use a more user friendly string "batch" rather
	 * then 4294967294.
	 */
	if (g_job->stepid == NO_VAL) {
		profile_file_name = xstrdup_printf("%s/%s/%u_%s_%s.h5",
						   hdf5_conf.dir,
						   g_job->user_name,
						   g_job->jobid,
						   "batch",
						   g_job->node_name);
	} else {
		profile_file_name = xstrdup_printf(
			"%s/%s/%u_%u_%s.h5",
			hdf5_conf.dir, g_job->user_name,
			g_job->jobid, g_job->stepid, g_job->node_name);
	}

	if (debug_flags & DEBUG_FLAG_PROFILE) {
		profile_str = acct_gather_profile_to_string(g_profile_running);
		info("PROFILE: node_step_start, opt=%s file=%s",
		     profile_str, profile_file_name);
	}

	// Create a new file using the default properties.
	file_id = H5Fcreate(profile_file_name, H5F_ACC_TRUNC, H5P_DEFAULT,
			    H5P_DEFAULT);
	if (chown(profile_file_name, (uid_t)g_job->uid,
		  (gid_t)g_job->gid) < 0)
		error("chown(%s): %m", profile_file_name);
	chmod(profile_file_name,  0600);
	xfree(profile_file_name);

	if (file_id < 1) {
		info("PROFILE: Failed to create Node group");
		return SLURM_FAILURE;
	}
	/* fd_set_close_on_exec(file_id); Not supported for HDF5 */
	sprintf(group_node, "/%s", g_job->node_name);
	gid_node = make_group(file_id, group_node);
	if (gid_node < 0) {
		H5Fclose(file_id);
		file_id = -1;
		info("PROFILE: Failed to create Node group");
		return SLURM_FAILURE;
	}
	put_int_attribute(gid_node, ATTR_NODEINX, g_job->nodeid);
	put_string_attribute(gid_node, ATTR_NODENAME, g_job->node_name);
	put_int_attribute(gid_node, ATTR_NTASKS, g_job->node_tasks);
	put_int_attribute(gid_node, ATTR_CPUPERTASK, g_job->cpus_per_task);

	step_start_time = time(NULL);
	put_string_attribute(gid_node, ATTR_STARTTIME,
			     slurm_ctime2(&step_start_time));

	return rc;
}
예제 #28
0
extern int acct_gather_profile_p_add_sample_data(uint32_t type, void *data)
{
	hid_t   g_sample_grp;
	char    group[MAX_GROUP_NAME+1];
	char 	group_sample[MAX_GROUP_NAME+1];
	static uint32_t sample_no = 0;
	uint32_t task_id = 0;
	void *send_profile = NULL;
	char *type_name = NULL;

	profile_task_t  profile_task;
	profile_network_t  profile_network;
	profile_energy_t  profile_energy;
	profile_io_t  profile_io;

	struct jobacctinfo *jobacct = (struct jobacctinfo *)data;
	acct_network_data_t *net = (acct_network_data_t *)data;
	acct_energy_data_t *ener = (acct_energy_data_t *)data;
	struct lustre_data *lus = (struct lustre_data *)data;

	xassert(_run_in_daemon());
	xassert(g_job);

	if (g_job->stepid == NO_VAL)
		return SLURM_SUCCESS;

	xassert(g_profile_running != ACCT_GATHER_PROFILE_NOT_SET);

	if (!_do_profile(type, g_profile_running))
		return SLURM_SUCCESS;

	switch (type) {
	case ACCT_GATHER_PROFILE_ENERGY:
		snprintf(group, sizeof(group), "%s", GRP_ENERGY);

		memset(&profile_energy, 0, sizeof(profile_energy_t));
		profile_energy.time = ener->time;
		profile_energy.cpu_freq = ener->cpu_freq;
		profile_energy.power = ener->power;

		send_profile = &profile_energy;
		break;
	case ACCT_GATHER_PROFILE_TASK:
		if (_get_taskid_from_pid(jobacct->pid, &task_id)
		    != SLURM_SUCCESS)
			return SLURM_ERROR;

		snprintf(group, sizeof(group), "%s_%u", GRP_TASK, task_id);

		memset(&profile_task, 0, sizeof(profile_task_t));
		profile_task.time = time(NULL);
		profile_task.cpu_freq = jobacct->act_cpufreq;
		profile_task.cpu_time = jobacct->tot_cpu;
		profile_task.cpu_utilization = jobacct->tot_cpu;
		profile_task.pages = jobacct->tot_pages;
		profile_task.read_size = jobacct->tot_disk_read;
		profile_task.rss = jobacct->tot_rss;
		profile_task.vm_size = jobacct->tot_vsize;
		profile_task.write_size = jobacct->tot_disk_write;

		send_profile = &profile_task;
		break;
	case ACCT_GATHER_PROFILE_LUSTRE:
		snprintf(group, sizeof(group), "%s", GRP_LUSTRE);

		memset(&profile_io, 0, sizeof(profile_io_t));
		profile_io.time = time(NULL);
		profile_io.reads = lus->reads;
		profile_io.read_size = lus->read_size;
		profile_io.writes = lus->writes;
		profile_io.write_size = lus->write_size;

		send_profile = &profile_io;

		break;
	case ACCT_GATHER_PROFILE_NETWORK:

		snprintf(group, sizeof(group), "%s", GRP_NETWORK);

		memset(&profile_network, 0, sizeof(profile_network_t));
		profile_network.time = time(NULL);
		profile_network.packets_in = net->packets_in;
		profile_network.size_in = net->size_in;
		profile_network.packets_out = net->packets_out;
		profile_network.size_out = net->size_out;

		send_profile = &profile_network;

		break;
	default:
		error("acct_gather_profile_p_add_sample_data: "
		      "Unknown type %d sent", type);
		return SLURM_ERROR;
	}

	type_name = acct_gather_profile_type_to_string(type);

	if (debug_flags & DEBUG_FLAG_PROFILE)
		info("PROFILE: add_sample_data Group-%s Type=%s",
		     group, type_name);

	if (file_id == -1) {
		if (debug_flags & DEBUG_FLAG_PROFILE) {
			// This can happen from samples from the gather threads
			// before the step actually starts.
			info("PROFILE: add_sample_data, HDF5 file not open");
		}
		return SLURM_FAILURE;
	}
	if (gid_samples < 0) {
		gid_samples = make_group(gid_node, GRP_SAMPLES);
		if (gid_samples < 1) {
			info("PROFILE: failed to create TimeSeries group");
			return SLURM_FAILURE;
		}
	}
	g_sample_grp = get_group(gid_samples, group);
	if (g_sample_grp < 0) {
		g_sample_grp = make_group(gid_samples, group);
		if (g_sample_grp < 0) {
			info("PROFILE: failed to open TimeSeries %s", group);
			return SLURM_FAILURE;
		}
		put_string_attribute(g_sample_grp, ATTR_DATATYPE, type_name);
	}
	sprintf(group_sample, "%s_%10.10d", group, ++sample_no);
	put_hdf5_data(g_sample_grp, type, SUBDATA_SAMPLE,
		      group_sample, send_profile, 1);
	H5Gclose(g_sample_grp);

	return SLURM_SUCCESS;
}
예제 #29
0
extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl)
{
	char *tmp_char;

	/* Set initial values */
	reset_slurm_ipmi_conf(&slurm_ipmi_conf);

	if (tbl) {
		/* ipmi initialisation parameters */
		s_p_get_uint32(&slurm_ipmi_conf.driver_type,
			       "EnergyIPMIDriverType", tbl);
		s_p_get_uint32(&slurm_ipmi_conf.disable_auto_probe,
			       "EnergyIPMIDisableAutoProbe", tbl);
		s_p_get_uint32(&slurm_ipmi_conf.driver_address,
			       "EnergyIPMIDriverAddress", tbl);
		s_p_get_uint32(&slurm_ipmi_conf.register_spacing,
			       "EnergyIPMIRegisterSpacing", tbl);

		s_p_get_string(&slurm_ipmi_conf.driver_device,
			       "EnergyIPMIDriverDevice", tbl);

		s_p_get_uint32(&slurm_ipmi_conf.protocol_version,
			       "EnergyIPMIProtocolVersion", tbl);

		if (!s_p_get_string(&slurm_ipmi_conf.username,
				    "EnergyIPMIUsername", tbl))
			slurm_ipmi_conf.username = xstrdup(DEFAULT_IPMI_USER);

		s_p_get_string(&slurm_ipmi_conf.password,
			       "EnergyIPMIPassword", tbl);
		if (!slurm_ipmi_conf.password)
			slurm_ipmi_conf.password = xstrdup("foopassword");

		s_p_get_uint32(&slurm_ipmi_conf.privilege_level,
			       "EnergyIPMIPrivilegeLevel", tbl);
		s_p_get_uint32(&slurm_ipmi_conf.authentication_type,
			       "EnergyIPMIAuthenticationType", tbl);
		s_p_get_uint32(&slurm_ipmi_conf.cipher_suite_id,
			       "EnergyIPMICipherSuiteId", tbl);
		s_p_get_uint32(&slurm_ipmi_conf.session_timeout,
			       "EnergyIPMISessionTimeout", tbl);
		s_p_get_uint32(&slurm_ipmi_conf.retransmission_timeout,
			       "EnergyIPMIRetransmissionTimeout", tbl);
		s_p_get_uint32(&slurm_ipmi_conf. workaround_flags,
			       "EnergyIPMIWorkaroundFlags", tbl);

		if (!s_p_get_boolean(&slurm_ipmi_conf.reread_sdr_cache,
				     "EnergyIPMIRereadSdrCache", tbl))
			slurm_ipmi_conf.reread_sdr_cache = false;
		if (!s_p_get_boolean(&slurm_ipmi_conf.
				     ignore_non_interpretable_sensors,
				     "EnergyIPMIIgnoreNonInterpretableSensors",
				     tbl))
			slurm_ipmi_conf.ignore_non_interpretable_sensors =
				false;
		if (!s_p_get_boolean(&slurm_ipmi_conf.bridge_sensors,
				     "EnergyIPMIBridgeSensors", tbl))
			slurm_ipmi_conf.bridge_sensors = false;
		if (!s_p_get_boolean(&slurm_ipmi_conf.interpret_oem_data,
				     "EnergyIPMIInterpretOemData", tbl))
			slurm_ipmi_conf.interpret_oem_data = false;
		if (!s_p_get_boolean(&slurm_ipmi_conf.shared_sensors,
				     "EnergyIPMISharedSensors", tbl))
			slurm_ipmi_conf.shared_sensors = false;
		if (!s_p_get_boolean(&slurm_ipmi_conf.discrete_reading,
				     "EnergyIPMIDiscreteReading", tbl))
			slurm_ipmi_conf.discrete_reading = false;
		if (!s_p_get_boolean(&slurm_ipmi_conf.ignore_scanning_disabled,
				     "EnergyIPMIIgnoreScanningDisabled", tbl))
			slurm_ipmi_conf.ignore_scanning_disabled = false;
		if (!s_p_get_boolean(&slurm_ipmi_conf.assume_bmc_owner,
				     "EnergyIPMIAssumeBmcOwner", tbl))
			slurm_ipmi_conf.assume_bmc_owner = false;
		if (!s_p_get_boolean(&slurm_ipmi_conf.entity_sensor_names,
				     "EnergyIPMIEntitySensorNames", tbl))
			slurm_ipmi_conf.entity_sensor_names = false;

		s_p_get_uint32(&slurm_ipmi_conf.freq,
			       "EnergyIPMIFrequency", tbl);

		if ((int)slurm_ipmi_conf.freq <= 0)
			fatal("EnergyIPMIFrequency must be a positive integer "
			      "in acct_gather.conf.");

		if (!s_p_get_boolean(&(slurm_ipmi_conf.adjustment),
				     "EnergyIPMICalcAdjustment", tbl))
			slurm_ipmi_conf.adjustment = false;

		s_p_get_uint32(&slurm_ipmi_conf.power_sensor_num,
			       "EnergyIPMIPowerSensor", tbl);

		s_p_get_uint32(&slurm_ipmi_conf.timeout,
			       "EnergyIPMITimeout", tbl);

		if (s_p_get_string(&tmp_char, "EnergyIPMIVariable", tbl)) {
			if (!strcmp(tmp_char, "Temp"))
				slurm_ipmi_conf.variable =
					IPMI_MONITORING_SENSOR_TYPE_TEMPERATURE;
			xfree(tmp_char);
		}
	}

	if (!_run_in_daemon())
		return;

	if (!flag_init) {
		local_energy = acct_gather_energy_alloc();
		local_energy->consumed_energy=0;
		local_energy->base_consumed_energy=0;
		local_energy->base_watts=0;
		flag_init = true;
		if (_is_thread_launcher()) {
			pthread_attr_t attr;
			slurm_attr_init(&attr);
			if (pthread_create(&thread_ipmi_id_launcher, &attr,
					   &_thread_launcher, NULL)) {
				//if (pthread_create(... (void *)arg)) {
				debug("energy accounting failed to create "
				      "_thread_launcher thread: %m");
			}
			slurm_attr_destroy(&attr);
			if (debug_flags & DEBUG_FLAG_ENERGY)
				info("%s thread launched", plugin_name);
		} else
			_get_joules_task(0);
	}

	verbose("%s loaded", plugin_name);
}