static void *_watch_tasks(void *arg)
{
	/* Give chance for processes to spawn before starting
	 * the polling. This should largely eliminate the
	 * the chance of having /proc open when the tasks are
	 * spawned, which would prevent a valid checkpoint/restart
	 * with some systems */
	_task_sleep(1);

	while(!jobacct_shutdown) {  /* Do this until shutdown is requested */
		if(!jobacct_suspended) {
			_get_process_data();	/* Update the data */
		}
		_task_sleep(freq);
	}
	return NULL;
}
Exemple #2
0
static void *_thread_launcher(void *no_data)
{
	//what arg would countain? frequency, socket?
	pthread_attr_t attr_run;
	time_t begin_time;
	int rc = SLURM_SUCCESS;

	slurm_attr_init(&attr_run);
	if (pthread_create(&thread_ipmi_id_run, &attr_run,
			   &_thread_ipmi_run, NULL)) {
		//if (pthread_create(... (void *)arg)) {
		debug("energy accounting failed to create _thread_ipmi_run "
		      "thread: %m");
	}
	slurm_attr_destroy(&attr_run);

	begin_time = time(NULL);
	while (rc == SLURM_SUCCESS) {
		if (time(NULL) - begin_time > slurm_ipmi_conf.timeout) {
			error("ipmi thread init timeout");
			rc = SLURM_ERROR;
			break;
		}
		if (flag_thread_started)
			break;
		_task_sleep(1);
	}

	if (rc != SLURM_SUCCESS) {
		error("%s threads failed to start in a timely manner",
		     plugin_name);

		if (thread_ipmi_id_run) {
			pthread_cancel(thread_ipmi_id_run);
			pthread_join(thread_ipmi_id_run, NULL);
		}

		flag_energy_accounting_shutdown = true;
	} else {
		/* This is here to join the decay thread so we don't core
		 * dump if in the sleep, since there is no other place to join
		 * we have to create another thread to do it. */
		slurm_attr_init(&attr_run);
		if (pthread_create(&cleanup_handler_thread, &attr_run,
				   _cleanup_thread, NULL))
			fatal("pthread_create error %m");

		slurm_attr_destroy(&attr_run);
	}

	return NULL;
}
Exemple #3
0
extern jobacctinfo_t *jobacct_gather_stat_task(pid_t pid)
{
	if (!plugin_polling || _jobacct_shutdown_test())
		return NULL;
	else if (pid) {
		struct jobacctinfo *jobacct = NULL;
		struct jobacctinfo *ret_jobacct = NULL;
		ListIterator itr = NULL;

		_poll_data(0);

		slurm_mutex_lock(&task_list_lock);
		if (!task_list) {
			error("no task list created!");
			goto error;
		}

		itr = list_iterator_create(task_list);
		while ((jobacct = list_next(itr))) {
			if (jobacct->pid == pid)
				break;
		}
		list_iterator_destroy(itr);
		if (jobacct == NULL)
			goto error;
		ret_jobacct = xmalloc(sizeof(struct jobacctinfo));
		memcpy(ret_jobacct, jobacct, sizeof(struct jobacctinfo));
	error:
		slurm_mutex_unlock(&task_list_lock);
		return ret_jobacct;
	} else {
		/* In this situation, we are just trying to get a
		 * basis of information since we are not pollng.  So
		 * we will give a chance for processes to spawn before we
		 * gather information. This should largely eliminate the
		 * the chance of having /proc open when the tasks are
		 * spawned, which would prevent a valid checkpoint/restart
		 * with some systems */
		_task_sleep(1);
		_poll_data(0);
		return NULL;
	}
}
extern struct jobacctinfo *jobacct_gather_p_stat_task(pid_t pid)
{
	if (jobacct_shutdown)
		return NULL;
	else if(pid) {
		_get_process_data();
		return jobacct_common_stat_task(pid, task_list);
	} else {
		/* In this situation, we are just trying to get a
		 * basis of information since we are not pollng.  So
		 * we will give a chance for processes to spawn before we
		 * gather information. This should largely eliminate the
		 * the chance of having /proc open when the tasks are
		 * spawned, which would prevent a valid checkpoint/restart
		 * with some systems */
		_task_sleep(1);
		_get_process_data();
		return NULL;
	}
}
Exemple #5
0
static void *_watch_tasks(void *arg)
{
	int type = PROFILE_TASK;
	/* Give chance for processes to spawn before starting
	 * the polling. This should largely eliminate the
	 * the chance of having /proc open when the tasks are
	 * spawned, which would prevent a valid checkpoint/restart
	 * with some systems */
	_task_sleep(1);
	while (!jobacct_shutdown && acct_gather_profile_running) {
		/* Do this until shutdown is requested */
		_poll_data();
		slurm_mutex_lock(&acct_gather_profile_timer[type].notify_mutex);
		pthread_cond_wait(
			&acct_gather_profile_timer[type].notify,
			&acct_gather_profile_timer[type].notify_mutex);
		slurm_mutex_unlock(&acct_gather_profile_timer[type].
				   notify_mutex);
	}
	return NULL;
}
Exemple #6
0
/*
 * _thread_ipmi_run is the thread calling ipmi and launching _thread_ipmi_write
 */
static void *_thread_ipmi_run(void *no_data)
{
// need input (attr)
	int time_lost;

	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

	flag_energy_accounting_shutdown = false;
	if (debug_flags & DEBUG_FLAG_ENERGY)
		info("ipmi-thread: launched");

	slurm_mutex_lock(&ipmi_mutex);
	if (_thread_init() != SLURM_SUCCESS) {
		if (debug_flags & DEBUG_FLAG_ENERGY)
			info("ipmi-thread: aborted");
		slurm_mutex_unlock(&ipmi_mutex);
		return NULL;
	}
	slurm_mutex_unlock(&ipmi_mutex);

	flag_thread_started = true;

	//loop until slurm stop
	while (!flag_energy_accounting_shutdown) {
		time_lost = (int)(time(NULL) - last_update_time);
		_task_sleep(slurm_ipmi_conf.freq - time_lost);
		slurm_mutex_lock(&ipmi_mutex);
		_thread_update_node_energy();
		slurm_mutex_unlock(&ipmi_mutex);
	}

	if (debug_flags & DEBUG_FLAG_ENERGY)
		info("ipmi-thread: ended");

	return NULL;
}
Exemple #7
0
static void *_watch_tasks(void *arg)
{
	int type = PROFILE_TASK;

#if HAVE_SYS_PRCTL_H
	if (prctl(PR_SET_NAME, "acctg", NULL, NULL, NULL) < 0) {
		error("%s: cannot set my name to %s %m", __func__, "acctg");
	}
#endif

	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

	/* Give chance for processes to spawn before starting
	 * the polling. This should largely eliminate the
	 * the chance of having /proc open when the tasks are
	 * spawned, which would prevent a valid checkpoint/restart
	 * with some systems */
	_task_sleep(1);
	while (_init_run_test() && !_jobacct_shutdown_test() &&
	       acct_gather_profile_test()) {
		/* Do this until shutdown is requested */
		slurm_mutex_lock(&acct_gather_profile_timer[type].notify_mutex);
		slurm_cond_wait(
			&acct_gather_profile_timer[type].notify,
			&acct_gather_profile_timer[type].notify_mutex);
		slurm_mutex_unlock(&acct_gather_profile_timer[type].
				   notify_mutex);
		slurm_mutex_lock(&g_context_lock);
		/* The initial poll is done after the last task is added */
		_poll_data(1);
		slurm_mutex_unlock(&g_context_lock);

	}
	return NULL;
}