コード例 #1
0
ファイル: cookies.c プロジェクト: HPCNow/slurm
/*
 * Start the thread to extend cookie leases.
 */
extern int start_lease_extender(void)
{
	pthread_attr_t attr_agent;
	pthread_t thread_agent;
	int retries = 0;

	// Start lease extender in the slurmctld
	if (!_in_slurmctld())
		return SLURM_SUCCESS;

	/* spawn an agent */
	slurm_attr_init(&attr_agent);
	if (pthread_attr_setdetachstate(&attr_agent,
					PTHREAD_CREATE_DETACHED)) {
		CRAY_ERR("pthread_attr_setdetachstate error %m");
	}

	retries = 0;
	while (pthread_create(&thread_agent, &attr_agent,
			      &_lease_extender, NULL)) {
		error("pthread_create error %m");
		if (++retries > 1) {
			CRAY_ERR("Can't create pthread");
			slurm_attr_destroy(&attr_agent);
			return SLURM_ERROR;
		}

		usleep(1000);	/* sleep and retry */
	}
	slurm_attr_destroy(&attr_agent);
	return SLURM_SUCCESS;
}
コード例 #2
0
ファイル: agent.c プロジェクト: HPCNow/slurm
/*
 * start the PMI2 agent thread
 */
extern int
pmi2_start_agent(void)
{
	int retries = 0;
	pthread_attr_t attr;
	pthread_t pmi2_agent_tid = 0;

	pthread_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	while ((errno = pthread_create(&pmi2_agent_tid, &attr,
				       &_agent, NULL))) {
		if (++retries > MAX_RETRIES) {
			error ("mpi/pmi2: pthread_create error %m");
			slurm_attr_destroy(&attr);
			return SLURM_ERROR;
		}
		sleep(1);
	}
	slurm_attr_destroy(&attr);
	debug("mpi/pmi2: started agent thread (%lu)",
	      (unsigned long) pmi2_agent_tid);

	/* wait for the agent to start */
	while (!_agent_running_test()) {
		sched_yield();
	}

	return SLURM_SUCCESS;
}
コード例 #3
0
static void *_thread_launcher(void *no_data)
{
	//what arg would countain? frequency, socket?
	pthread_attr_t attr_run;
	time_t begin_time;
	int rc = SLURM_SUCCESS;

	slurm_attr_init(&attr_run);
	if (pthread_create(&thread_ipmi_id_run, &attr_run,
			   &_thread_ipmi_run, NULL)) {
		//if (pthread_create(... (void *)arg)) {
		debug("energy accounting failed to create _thread_ipmi_run "
		      "thread: %m");
	}
	slurm_attr_destroy(&attr_run);

	begin_time = time(NULL);
	while (rc == SLURM_SUCCESS) {
		if (time(NULL) - begin_time > slurm_ipmi_conf.timeout) {
			error("ipmi thread init timeout");
			rc = SLURM_ERROR;
			break;
		}
		if (flag_thread_started)
			break;
		_task_sleep(1);
	}

	if (rc != SLURM_SUCCESS) {
		error("%s threads failed to start in a timely manner",
		     plugin_name);

		if (thread_ipmi_id_run) {
			pthread_cancel(thread_ipmi_id_run);
			pthread_join(thread_ipmi_id_run, NULL);
		}

		flag_energy_accounting_shutdown = true;
	} else {
		/* This is here to join the decay thread so we don't core
		 * dump if in the sleep, since there is no other place to join
		 * we have to create another thread to do it. */
		slurm_attr_init(&attr_run);
		if (pthread_create(&cleanup_handler_thread, &attr_run,
				   _cleanup_thread, NULL))
			fatal("pthread_create error %m");

		slurm_attr_destroy(&attr_run);
	}

	return NULL;
}
コード例 #4
0
static void _create_ping_thread()
{
	pthread_attr_t attr;
	slurm_attr_init(&attr);

	stop_pinging = false;
	if (!ping_thread &&
	    (pthread_create(&ping_thread, &attr, _ping_thread, NULL) != 0)) {
		error("pthread_create of message thread: %m");
		slurm_attr_destroy(&attr);
		ping_thread = 0;
		return;
	}
	slurm_attr_destroy(&attr);
}
コード例 #5
0
extern int bridge_status_init(void)
{
	pthread_attr_t attr;

	if (bridge_status_inited)
		return SLURM_ERROR;

	bridge_status_inited = true;
	if (!kill_job_list)
		kill_job_list = bg_status_create_kill_job_list();

	pthread_mutex_lock(&thread_flag_mutex);
	if (block_thread) {
		debug2("Bluegene threads already running, not starting "
		       "another");
		pthread_mutex_unlock(&thread_flag_mutex);
		return SLURM_ERROR;
	}

	slurm_attr_init(&attr);
	/* since we do a join on this later we don't make it detached */
	if (pthread_create(&block_thread, &attr, _block_state_agent, NULL))
		error("Failed to create block_agent thread");
	slurm_attr_init(&attr);
	/* since we do a join on this later we don't make it detached */
	if (pthread_create(&state_thread, &attr, _mp_state_agent, NULL))
		error("Failed to create state_agent thread");
	pthread_mutex_unlock(&thread_flag_mutex);
	slurm_attr_destroy(&attr);

	return SLURM_SUCCESS;
}
コード例 #6
0
static int _handle_add_extern_pid_internal(stepd_step_rec_t *job, pid_t pid)
{
	pthread_t thread_id;
	pthread_attr_t attr;
	extern_pid_t *extern_pid;
	jobacct_id_t jobacct_id;
	int retries = 0, rc = SLURM_SUCCESS;

	if (job->stepid != SLURM_EXTERN_CONT) {
		error("%s: non-extern step (%u) given for job %u.",
		      __func__, job->stepid, job->jobid);
		return SLURM_FAILURE;
	}

	debug("%s: for job %u.%u, pid %d",
	      __func__, job->jobid, job->stepid, pid);

	extern_pid = xmalloc(sizeof(extern_pid_t));
	extern_pid->job = job;
	extern_pid->pid = pid;

	/* track pid: add outside of the below thread so that the pam module
	 * waits until the parent pid is added, before letting the parent spawn
	 * any children. */
	jobacct_id.taskid = job->nodeid;
	jobacct_id.nodeid = job->nodeid;
	jobacct_id.job = job;

	if (proctrack_g_add(job, pid) != SLURM_SUCCESS) {
		error("%s: Job %u can't add pid %d to proctrack plugin in the extern_step.", __func__, job->jobid, pid);
		return SLURM_FAILURE;
	}

	if (task_g_add_pid(pid) != SLURM_SUCCESS) {
		error("%s: Job %u can't add pid %d to task plugin in the extern_step.", __func__, job->jobid, pid);
		return SLURM_FAILURE;
	}

	if (jobacct_gather_add_task(pid, &jobacct_id, 1) != SLURM_SUCCESS) {
		error("%s: Job %u can't add pid %d to jobacct_gather plugin in the extern_step.", __func__, job->jobid, pid);
		return SLURM_FAILURE;
	}

	/* spawn a thread that will wait on the pid given */
	slurm_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	while (pthread_create(&thread_id, &attr,
			      &_wait_extern_pid, (void *) extern_pid)) {
		error("%s: pthread_create: %m", __func__);
		if (++retries > MAX_RETRIES) {
			error("%s: Can't create pthread", __func__);
			rc = SLURM_FAILURE;
			break;
		}
		usleep(10);	/* sleep and again */
	}
	slurm_attr_destroy(&attr);

	return rc;
}
コード例 #7
0
ファイル: slurmd.c プロジェクト: mrhaoji/slurm
static void
_spawn_registration_engine(void)
{
	int            rc;
	pthread_attr_t attr;
	pthread_t      id;
	int            retries = 0;

	slurm_attr_init(&attr);
	rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	if (rc != 0) {
		errno = rc;
		fatal("Unable to set detachstate on attr: %m");
		slurm_attr_destroy(&attr);
		return;
	}

	while (pthread_create(&id, &attr, &_registration_engine, NULL)) {
		error("msg_engine: pthread_create: %m");
		if (++retries > 3)
			fatal("msg_engine: pthread_create: %m");
		usleep(10);	/* sleep and again */
	}

	return;
}
コード例 #8
0
ファイル: jobcomp_script.c プロジェクト: diorsman/slurm
/*
 * init() is called when the plugin is loaded, before any other functions
 * are called.  Put global initialization here.
 */
extern int init (void)
{
    pthread_attr_t attr;

    verbose("jobcomp/script plugin loaded init");

    slurm_mutex_lock(&thread_flag_mutex);

    if (comp_list)
        error("Creating duplicate comp_list, possible memory leak");
    if (!(comp_list = list_create((ListDelF) _jobcomp_info_destroy))) {
        slurm_mutex_unlock(&thread_flag_mutex);
        return SLURM_ERROR;
    }

    if (script_thread) {
        debug2( "Script thread already running, not starting another");
        slurm_mutex_unlock(&thread_flag_mutex);
        return SLURM_ERROR;
    }

    slurm_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
    pthread_create(&script_thread, &attr, _script_agent, NULL);

    slurm_mutex_unlock(&thread_flag_mutex);
    slurm_attr_destroy(&attr);

    return SLURM_SUCCESS;
}
コード例 #9
0
ファイル: step_terminate_monitor.c プロジェクト: VURM/slurm
void step_terminate_monitor_start(uint32_t jobid, uint32_t stepid)
{
	slurm_ctl_conf_t *conf;
	pthread_attr_t attr;

	pthread_mutex_lock(&lock);

	if (running_flag) {
		pthread_mutex_unlock(&lock);
		return;
	}

	conf = slurm_conf_lock();
	if (conf->unkillable_program == NULL) {
		/* do nothing */
		slurm_conf_unlock();
		pthread_mutex_unlock(&lock);
		return;
	}
	timeout = conf->unkillable_timeout;
	program_name = xstrdup(conf->unkillable_program);
	slurm_conf_unlock();

	slurm_attr_init(&attr);
	pthread_create(&tid, &attr, monitor, NULL);
	slurm_attr_destroy(&attr);
	running_flag = 1;
	recorded_jobid = jobid;
	recorded_stepid = stepid;

	pthread_mutex_unlock(&lock);

	return;
}
コード例 #10
0
ファイル: gang.c プロジェクト: HPCNow/slurm
static void _spawn_timeslicer_thread(void)
{
	pthread_attr_t thread_attr_msg;
	int retries = 0;

	slurm_mutex_lock( &thread_flag_mutex );
	if (thread_running) {
		error("timeslicer thread already running, not starting "
		      "another");
		slurm_mutex_unlock(&thread_flag_mutex);
		return;
	}

	slurm_attr_init(&thread_attr_msg);
	while (pthread_create(&timeslicer_thread_id, &thread_attr_msg,
			      _timeslicer_thread, NULL)) {
		error("pthread_create error %m");
		if (++retries > 3)
			fatal("Can't create pthread");
		usleep(10000);	/* sleep and retry */
	}
	slurm_attr_destroy(&thread_attr_msg);
	thread_running = true;
	slurm_mutex_unlock(&thread_flag_mutex);
}
コード例 #11
0
ファイル: backfill_wrapper.c プロジェクト: artpol84/slurm
int init( void )
{
	pthread_attr_t attr;

	if (slurmctld_config.scheduling_disabled)
		return SLURM_SUCCESS;


	verbose( "sched: Backfill scheduler plugin loaded" );

	slurm_mutex_lock( &thread_flag_mutex );
	if ( backfill_thread ) {
		debug2( "Backfill thread already running, not starting "
			"another" );
		slurm_mutex_unlock( &thread_flag_mutex );
		return SLURM_ERROR;
	}

	slurm_attr_init( &attr );
	/* since we do a join on this later we don't make it detached */
	if (pthread_create( &backfill_thread, &attr, backfill_agent, NULL))
		error("Unable to start backfill thread: %m");
	slurm_mutex_unlock( &thread_flag_mutex );
	slurm_attr_destroy( &attr );

	return SLURM_SUCCESS;
}
コード例 #12
0
extern void jobacct_gather_p_change_poll(uint16_t frequency)
{
#ifdef HAVE_AIX
	if(freq == 0 && frequency != 0) {
		pthread_attr_t attr;
		pthread_t _watch_tasks_thread_id;
		/* create polling thread */
		slurm_attr_init(&attr);
		if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
			error("pthread_attr_setdetachstate error %m");

		if  (pthread_create(&_watch_tasks_thread_id, &attr,
				    &_watch_tasks, NULL)) {
			debug("jobacct-gather failed to create _watch_tasks "
			      "thread: %m");
			frequency = 0;
		} else
			debug3("jobacct-gather AIX dynamic logging enabled");
		slurm_attr_destroy(&attr);
		jobacct_shutdown = false;
	}

	freq = frequency;
	debug("jobacct-gather: frequency changed = %d", frequency);
	if (freq == 0)
		jobacct_shutdown = true;
#endif
	return;
}
コード例 #13
0
extern int acct_gather_infiniband_startpoll(uint32_t frequency)
{
	int retval = SLURM_SUCCESS;
	pthread_attr_t attr;

	if (acct_gather_infiniband_init() < 0)
		return SLURM_ERROR;

	if (!acct_shutdown) {
		error("acct_gather_infiniband_startpoll: "
		      "poll already started!");
		return retval;
	}

	acct_shutdown = false;

	freq = frequency;

	if (frequency == 0) {   /* don't want dynamic monitoring? */
		debug2("acct_gather_infiniband dynamic logging disabled");
		return retval;
	}

	/* create polling thread */
	slurm_attr_init(&attr);
	if (pthread_create(&watch_node_thread_id, &attr, &_watch_node, NULL)) {
		debug("acct_gather_infiniband failed to create _watch_node "
		      "thread: %m");
	} else
		debug3("acct_gather_infiniband dynamic logging enabled");
	slurm_attr_destroy(&attr);

	return retval;
}
コード例 #14
0
void step_terminate_monitor_start(stepd_step_rec_t *job)
{
	slurm_ctl_conf_t *conf;
	pthread_attr_t attr;

	slurm_mutex_lock(&lock);

	if (running_flag) {
		slurm_mutex_unlock(&lock);
		return;
	}

	conf = slurm_conf_lock();
	timeout = conf->unkillable_timeout;
	program_name = xstrdup(conf->unkillable_program);
	slurm_conf_unlock();

	slurm_attr_init(&attr);
	pthread_create(&tid, &attr, _monitor, job);
	slurm_attr_destroy(&attr);
	running_flag = 1;
	recorded_jobid = job->jobid;
	recorded_stepid = job->stepid;

	slurm_mutex_unlock(&lock);

	return;
}
コード例 #15
0
/**
 * queue_basil_signal  -  queue job signal on to any APIDs
 * IN job_ptr - job to be signalled
 * IN signal  - signal(7) number
 * IN delay   - how long to delay the signal, in seconds
 * Only signal job if an ALPS reservation exists (non-0 reservation ID).
 */
extern void queue_basil_signal(struct job_record *job_ptr, int signal,
                               uint16_t delay)
{
    args_sig_basil_t *args_sig_basil;
    pthread_attr_t attr_sig_basil;
    pthread_t thread_sig_basil;
    uint32_t resv_id;

    if (_get_select_jobinfo(job_ptr->select_jobinfo->data,
                            SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) {
        error("can not read resId for JobId=%u", job_ptr->job_id);
        return;
    }
    if (resv_id == 0)
        return;
    if ((delay == 0) || (delay == (uint16_t) NO_VAL)) {
        /* Send the signal now */
        int rc = basil_signal_apids(resv_id, signal, NULL);

        if (rc)
            error("could not signal APIDs of resId %u: %s", resv_id,
                  basil_strerror(rc));
        return;
    }

    /* Create a thread to send the signal later */
    slurm_attr_init(&attr_sig_basil);
    if (pthread_attr_setdetachstate(&attr_sig_basil,
                                    PTHREAD_CREATE_DETACHED)) {
        error("pthread_attr_setdetachstate error %m");
        slurm_attr_destroy(&attr_sig_basil);
        return;
    }
    args_sig_basil = xmalloc(sizeof(args_sig_basil_t));
    args_sig_basil->resv_id = resv_id;
    args_sig_basil->signal  = signal;
    args_sig_basil->delay   = delay;
    if (pthread_create(&thread_sig_basil, &attr_sig_basil,
                       _sig_basil, (void *) args_sig_basil)) {
        error("pthread_create error %m");
        slurm_attr_destroy(&attr_sig_basil);
        xfree(args_sig_basil);
        return;
    }
    slurm_attr_destroy(&attr_sig_basil);
}
コード例 #16
0
ファイル: capmc_suspend.c プロジェクト: elodina/slurm
int main(int argc, char *argv[])
{
	log_options_t log_opts = LOG_OPTS_INITIALIZER;
	hostlist_t hl = NULL;
	char *node_name;
	pthread_attr_t attr_work;
	pthread_t thread_work = 0;

	xstrfmtcat(prog_name, "%s[%u]", argv[0], (uint32_t) getpid());
	_read_config();
	log_opts.stderr_level = LOG_LEVEL_QUIET;
	log_opts.syslog_level = LOG_LEVEL_QUIET;
	if (slurm_get_debug_flags() && DEBUG_FLAG_NODE_FEATURES)
		log_opts.logfile_level += 3;
	(void) log_init(argv[0], log_opts, LOG_DAEMON, log_file);

	if ((hl = hostlist_create(argv[1])) == NULL) {
		error("%s: Invalid hostlist (%s)", prog_name, argv[1]);
		exit(2);
	}
	while ((node_name = hostlist_pop(hl))) {
		slurm_mutex_lock(&thread_cnt_mutex);
		while (1) {
			if (thread_cnt <= MAX_THREADS) {
				thread_cnt++;
				break;
			} else {	/* wait for state change and retry */
				pthread_cond_wait(&thread_cnt_cond,
						  &thread_cnt_mutex);
			}
		}
		slurm_mutex_unlock(&thread_cnt_mutex);

		slurm_attr_init(&attr_work);
		(void) pthread_attr_setdetachstate
			(&attr_work, PTHREAD_CREATE_DETACHED);
		if (pthread_create(&thread_work, &attr_work, _node_update,
				   (void *) node_name)) {
			_node_update((void *) node_name);
		}
		slurm_attr_destroy(&attr_work);
	}

	/* Wait for work threads to complete */
	slurm_mutex_lock(&thread_cnt_mutex);
	while (1) {
		if (thread_cnt == 0)
			break;
		else	/* wait for state change and retry */
			pthread_cond_wait(&thread_cnt_cond, &thread_cnt_mutex);
	}
	slurm_mutex_unlock(&thread_cnt_mutex);
	hostlist_destroy(hl);

	exit(0);
}
コード例 #17
0
static void _spawn_state_save_thread(char *dir)
{
	pthread_attr_t attr;
	pthread_t id;

	slurm_attr_init(&attr);

	if (pthread_create(&id, &attr, &_state_save_thread, (void *)dir) != 0)
		error("Could not start switch/nrt state saving pthread");

	slurm_attr_destroy(&attr);
}
コード例 #18
0
ファイル: scancel.c プロジェクト: adammoody/slurm
static int _signal_job_by_str(void)
{
	job_cancel_info_t *cancel_info;
	int err, i, rc = 0;
	pthread_t dummy;

	slurm_attr_init(&attr);
	if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
		error("pthread_attr_setdetachstate error %m");
	slurm_mutex_init(&num_active_threads_lock);
	slurm_cond_init(&num_active_threads_cond, NULL);

	for (i = 0; opt.job_list[i]; i++) {
		cancel_info = (job_cancel_info_t *)
			xmalloc(sizeof(job_cancel_info_t));
		cancel_info->job_id_str = xstrdup(opt.job_list[i]);
		cancel_info->rc      = &rc;
		cancel_info->sig     = opt.signal;
		cancel_info->num_active_threads = &num_active_threads;
		cancel_info->num_active_threads_lock =
			&num_active_threads_lock;
		cancel_info->num_active_threads_cond =
			&num_active_threads_cond;

		slurm_mutex_lock(&num_active_threads_lock);
		num_active_threads++;
		while (num_active_threads > MAX_THREADS) {
			slurm_cond_wait(&num_active_threads_cond,
					&num_active_threads_lock);
		}
		slurm_mutex_unlock(&num_active_threads_lock);

		err = pthread_create(&dummy, &attr, _cancel_job_id,cancel_info);
		if (err)	/* Run in-line if thread create fails */
			_cancel_job_id(cancel_info);
	}

	/* Wait all spawned threads to finish */
	slurm_mutex_lock( &num_active_threads_lock );
	while (num_active_threads > 0) {
		slurm_cond_wait(&num_active_threads_cond,
				&num_active_threads_lock);
	}
	slurm_mutex_unlock(&num_active_threads_lock);

	slurm_attr_destroy(&attr);

	return rc;
}
コード例 #19
0
ファイル: setup.c プロジェクト: Cray/slurm
static int
_setup_srun_task_launch_detection(void)
{
	int retries = 0;
	pthread_t tid;
	pthread_attr_t attr;

	pthread_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	while ((errno = pthread_create(&tid, &attr,
				       &_task_launch_detection, NULL))) {
		if (++retries > 5) {
			error ("mpi/pmi2: pthread_create error %m");
			slurm_attr_destroy(&attr);
			return SLURM_ERROR;
		}
		sleep(1);
	}
	slurm_attr_destroy(&attr);
	debug("mpi/pmi2: task launch detection thread (%lu) started",
	      (unsigned long) tid);

	return SLURM_SUCCESS;
}
コード例 #20
0
ファイル: checkpoint_poe.c プロジェクト: Cray/slurm
/*
 * init() is called when the plugin is loaded, before any other functions
 * are called.  Put global initialization here.
 */
extern int init ( void )
{
	pthread_attr_t attr;

	slurm_attr_init(&attr);
	if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
		error("pthread_attr_setdetachstate: %m");
	if (pthread_create(&ckpt_agent_tid, &attr, _ckpt_agent_thr, NULL)) {
		error("pthread_create: %m");
		return SLURM_ERROR;
	}
	slurm_attr_destroy(&attr);

	return SLURM_SUCCESS;
}
コード例 #21
0
ファイル: proctrack.c プロジェクト: jsollom/slurm
static void _spawn_signal_thread(uint64_t cont_id, int signal)
{
	agent_arg_t *agent_arg_ptr;
	pthread_attr_t attr_agent;
	pthread_t thread_agent;

	slurm_attr_init(&attr_agent);
	if (pthread_attr_setdetachstate(&attr_agent, PTHREAD_CREATE_DETACHED))
		error("pthread_attr_setdetachstate error %m");
	agent_arg_ptr = xmalloc(sizeof(agent_arg_t));
	agent_arg_ptr->cont_id = cont_id;
	agent_arg_ptr->signal  = signal;
	(void) pthread_create(&thread_agent, &attr_agent,
			     _sig_agent, (void *) agent_arg_ptr);
	slurm_attr_destroy(&attr_agent);
}
コード例 #22
0
ファイル: launch_aprun.c プロジェクト: HPCNow/slurm
static pthread_t _spawn_msg_handler(srun_job_t *job)
{
	pthread_attr_t attr;
	pthread_t msg_thread;
	static int slurmctld_fd;

	slurmctld_fd = job->step_ctx->launch_state->slurmctld_socket_fd;
	if (slurmctld_fd < 0)
		return (pthread_t) 0;
	job->step_ctx->launch_state->slurmctld_socket_fd = -1;

	slurm_attr_init(&attr);
	if (pthread_create(&msg_thread, &attr, _msg_thr_internal,
			   (void *) &slurmctld_fd))
		error("pthread_create of message thread: %m");
	slurm_attr_destroy(&attr);
	return msg_thread;
}
コード例 #23
0
ファイル: pmi_server.c プロジェクト: FredHutch/slurm
/* Transmit the KVS keypairs to all tasks, waiting at a barrier
 * This will take some time, so we work with a copy of the KVS keypairs.
 * We also work with a private copy of the barrier data and clear the
 * global data pointers so any new barrier requests get treated as
 * completely independent of this one. */
static void _kvs_xmit_tasks(void)
{
	struct agent_arg *args;
	pthread_attr_t attr;
	pthread_t agent_id;

#if _DEBUG
	info("All tasks at barrier, transmit KVS keypairs now");
#endif

	/* Target KVS_TIME should be about ave processing time */
	debug("kvs_put processing time min=%d, max=%d ave=%d (usec)",
		min_time_kvs_put, max_time_kvs_put,
		(tot_time_kvs_put / barrier_cnt));
	min_time_kvs_put = 1000000;
	max_time_kvs_put = 0;
	tot_time_kvs_put = 0;

	/* reset barrier info */
	args = xmalloc(sizeof(struct agent_arg));
	args->barrier_xmit_ptr = barrier_ptr;
	args->barrier_xmit_cnt = barrier_cnt;
	barrier_ptr = NULL;
	barrier_resp_cnt = 0;
	barrier_cnt = 0;

	/* copy the new kvs data */
	if (kvs_updated) {
		args->kvs_xmit_ptr = _kvs_comm_dup();
		args->kvs_xmit_cnt = kvs_comm_cnt;
		kvs_updated = 0;
	} else {	/* No new data to transmit */
		args->kvs_xmit_ptr = xmalloc(0);
		args->kvs_xmit_cnt = 0;
	}

	/* Spawn a pthread to transmit it */
	slurm_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	if (pthread_create(&agent_id, &attr, _agent, (void *) args))
		fatal("pthread_create");
	slurm_attr_destroy(&attr);
}
コード例 #24
0
ファイル: slurm_jobacct_gather.c プロジェクト: jsollom/slurm
extern int jobacct_gather_startpoll(uint16_t frequency)
{
	int retval = SLURM_SUCCESS;
	pthread_attr_t attr;
	pthread_t _watch_tasks_thread_id;

	if (!plugin_polling)
		return SLURM_SUCCESS;

	if (jobacct_gather_init() < 0)
		return SLURM_ERROR;

	if (!jobacct_shutdown) {
		error("jobacct_gather_startpoll: poll already started!");
		return retval;
	}

	jobacct_shutdown = false;

	freq = frequency;

	task_list = list_create(jobacctinfo_destroy);
	if (frequency == 0) {	/* don't want dynamic monitoring? */
		debug2("jobacct_gather dynamic logging disabled");
		return retval;
	}

	/* create polling thread */
	slurm_attr_init(&attr);
	if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
		error("pthread_attr_setdetachstate error %m");

	if  (pthread_create(&_watch_tasks_thread_id, &attr,
			    &_watch_tasks, NULL)) {
		debug("jobacct_gather failed to create _watch_tasks "
		      "thread: %m");
		frequency = 0;
	} else
		debug3("jobacct_gather dynamic logging enabled");
	slurm_attr_destroy(&attr);

	return retval;
}
コード例 #25
0
ファイル: power_save.c プロジェクト: edsw/slurm
/* start_power_mgr - Start power management thread as needed. The thread
 *	terminates automatically at slurmctld shutdown time.
 * IN thread_id - pointer to thread ID of the started pthread.
 */
extern void start_power_mgr(pthread_t *thread_id)
{
	pthread_attr_t thread_attr;

	slurm_mutex_lock(&power_mutex);
	if (power_save_started) {     /* Already running */
		slurm_mutex_unlock(&power_mutex);
		return;
	}
	power_save_started = true;
	slurm_mutex_unlock(&power_mutex);

	slurm_attr_init(&thread_attr);
	while (pthread_create(thread_id, &thread_attr, _init_power_save,
			      NULL)) {
		error("pthread_create %m");
		sleep(1);
	}
	slurm_attr_destroy(&thread_attr);
}
コード例 #26
0
ファイル: scancel.c プロジェクト: rohgarg/slurm
/* _cancel_jobs - filter then cancel jobs or job steps per request */
static int _cancel_jobs(int filter_cnt)
{
	int rc = 0;

	slurm_attr_init(&attr);
	if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
		error("pthread_attr_setdetachstate error %m");

	slurm_mutex_init(&num_active_threads_lock);

	if (pthread_cond_init(&num_active_threads_cond, NULL))
		error("pthread_cond_init error %m");

	_cancel_jobs_by_state(JOB_PENDING, filter_cnt, &rc);
	/* Wait for any cancel of pending jobs to complete before starting
	 * cancellation of running jobs so that we don't have a race condition
	 * with pending jobs getting scheduled while running jobs are also
	 * being cancelled. */
	pthread_mutex_lock( &num_active_threads_lock );
	while (num_active_threads > 0) {
		pthread_cond_wait(&num_active_threads_cond,
				  &num_active_threads_lock);
	}
	pthread_mutex_unlock(&num_active_threads_lock);

	_cancel_jobs_by_state(JOB_END, filter_cnt, &rc);
	/* Wait for any spawned threads that have not finished */
	pthread_mutex_lock( &num_active_threads_lock );
	while (num_active_threads > 0) {
		pthread_cond_wait(&num_active_threads_cond,
				  &num_active_threads_lock);
	}
	pthread_mutex_unlock(&num_active_threads_lock);

	slurm_attr_destroy(&attr);
	slurm_mutex_destroy(&num_active_threads_lock);
	if (pthread_cond_destroy(&num_active_threads_cond))
		error("pthread_cond_destroy error %m");

	return rc;
}
コード例 #27
0
ファイル: slurmd.c プロジェクト: mrhaoji/slurm
static void
_handle_connection(slurm_fd_t fd, slurm_addr_t *cli)
{
	int            rc;
	pthread_attr_t attr;
	pthread_t      id;
	conn_t         *arg = xmalloc(sizeof(conn_t));
	int            retries = 0;

	arg->fd       = fd;
	arg->cli_addr = cli;

	slurm_attr_init(&attr);
	rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	if (rc != 0) {
		errno = rc;
		xfree(arg);
		error("Unable to set detachstate on attr: %m");
		slurm_attr_destroy(&attr);
		return;
	}

	fd_set_close_on_exec(fd);

	_increment_thd_count();
	while (pthread_create(&id, &attr, &_service_connection, (void *)arg)) {
		error("msg_engine: pthread_create: %m");
		if (++retries > 3) {
			error("running service_connection without starting "
			      "a new thread slurmd will be "
			      "unresponsive until done");

			_service_connection((void *) arg);
			info("slurmd should be responsive now");
			break;
		}
		usleep(10);	/* sleep and again */
	}

	return;
}
コード例 #28
0
extern int jobacct_gather_p_startpoll(uint16_t frequency)
{
	int rc = SLURM_SUCCESS;

#ifdef HAVE_AIX
	pthread_attr_t attr;
	pthread_t _watch_tasks_thread_id;

	debug("%s loaded", plugin_name);

	debug("jobacct-gather: frequency = %d", frequency);

	jobacct_shutdown = false;

	freq = frequency;
	pagesize = getpagesize()/1024;
	task_list = list_create(jobacct_common_free_jobacct);
	if (frequency == 0) {	/* don't want dynamic monitoring? */
		debug2("jobacct AIX dynamic logging disabled");
		return rc;
	}

	/* create polling thread */
	slurm_attr_init(&attr);
	if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
		error("pthread_attr_setdetachstate error %m");

	if  (pthread_create(&_watch_tasks_thread_id, &attr,
			    &_watch_tasks, NULL)) {
		debug("jobacct-gather failed to create _watch_tasks "
		      "thread: %m");
		frequency = 0;
	} else
		debug3("jobacct-gather AIX dynamic logging enabled");
	slurm_attr_destroy(&attr);
#else
	error("jobacct-gather AIX not loaded, not an aix system, "
	      "check slurm.conf");
#endif
	return rc;
}
コード例 #29
0
ファイル: gang.c プロジェクト: corburn/slurm
static void _spawn_timeslicer_thread(void)
{
	pthread_attr_t thread_attr_msg;

	pthread_mutex_lock( &thread_flag_mutex );
	if (thread_running) {
		error("timeslicer thread already running, not starting "
		      "another");
		pthread_mutex_unlock(&thread_flag_mutex);
		return;
	}

	slurm_attr_init(&thread_attr_msg);
	if (pthread_create(&timeslicer_thread_id, &thread_attr_msg,
			_timeslicer_thread, NULL))
		fatal("pthread_create %m");

	slurm_attr_destroy(&thread_attr_msg);
	thread_running = true;
	pthread_mutex_unlock(&thread_flag_mutex);
}
コード例 #30
0
ファイル: bg_job_run.c プロジェクト: HPCNow/slurm
/* Perform an operation upon a BG block (block) for starting or
 * terminating a job */
static void _block_op(bg_action_t *bg_action_ptr)
{
	pthread_attr_t attr_agent;
	pthread_t thread_agent;
	int retries;

	/* spawn an agent */
	slurm_attr_init(&attr_agent);
	if (pthread_attr_setdetachstate(&attr_agent, PTHREAD_CREATE_DETACHED))
		error("pthread_attr_setdetachstate error %m");

	retries = 0;
	while (pthread_create(&thread_agent, &attr_agent,
			      _block_agent, bg_action_ptr)) {
		error("pthread_create error %m");
		if (++retries > MAX_PTHREAD_RETRIES)
			fatal("Can't create pthread");
		usleep(1000);	/* sleep and retry */
	}
	slurm_attr_destroy(&attr_agent);
}