コード例 #1
0
ファイル: sched_plugin.c プロジェクト: SchedMD/slurm
/*
 * The scheduler plugin can not be changed via reconfiguration
 * due to background threads, job priorities, etc.
 * slurmctld must be restarted and job priority changes may be
 * required to change the scheduler type.
 */
extern int slurm_sched_init(void)
{
	int retval = SLURM_SUCCESS;
	char *plugin_type = "sched";
	char *type = NULL;

	if ( init_run && g_context )
		return retval;

	slurm_mutex_lock( &g_context_lock );

	if ( g_context )
		goto done;

	type = slurm_get_sched_type();
	g_context = plugin_context_create(
		plugin_type, type, (void **)&ops, syms, sizeof(syms));

	if (!g_context) {
		error("cannot create %s context for %s", plugin_type, type);
		retval = SLURM_ERROR;
		goto done;
	}
	init_run = true;

done:
	slurm_mutex_unlock( &g_context_lock );
	xfree(type);
	return retval;
}
コード例 #2
0
ファイル: sched_plugin.c プロジェクト: alepharchives/slurm
/* *********************************************************************** */
extern int
slurm_sched_init( void )
{
	int retval = SLURM_SUCCESS;
	char *sched_type = NULL;

	slurm_mutex_lock( &g_sched_context_lock );

	if ( g_sched_context )
		goto done;

	sched_type = slurm_get_sched_type();
	g_sched_context = slurm_sched_context_create( sched_type );
	if ( g_sched_context == NULL ) {
		error( "cannot create scheduler context for %s",
			 sched_type );
		retval = SLURM_ERROR;
		goto done;
	}

	if ( slurm_sched_get_ops( g_sched_context ) == NULL ) {
		error( "cannot resolve scheduler plugin operations" );
		slurm_sched_context_destroy( g_sched_context );
		g_sched_context = NULL;
		retval = SLURM_ERROR;
		goto done;
	}

	if ( (slurm_get_preempt_mode() & PREEMPT_MODE_GANG) &&
	     (gs_init() != SLURM_SUCCESS))
		error( "cannot start gang scheduler ");

 done:
	slurm_mutex_unlock( &g_sched_context_lock );
	xfree(sched_type);
	return retval;
}
コード例 #3
0
ファイル: agent.c プロジェクト: alepharchives/slurm
/*
 * agent - party responsible for transmitting an common RPC in parallel
 *	across a set of nodes. Use agent_queue_request() if immediate
 *	execution is not essential.
 * IN pointer to agent_arg_t, which is xfree'd (including hostlist,
 *	and msg_args) upon completion
 * RET always NULL (function format just for use as pthread)
 */
void *agent(void *args)
{
	int i, delay, rc, retries = 0;
	pthread_attr_t attr_wdog;
	pthread_t thread_wdog;
	agent_arg_t *agent_arg_ptr = args;
	agent_info_t *agent_info_ptr = NULL;
	thd_t *thread_ptr;
	task_info_t *task_specific_ptr;
	time_t begin_time;

#if 0
	info("Agent_cnt is %d of %d with msg_type %d",
	     agent_cnt, MAX_AGENT_CNT, agent_arg_ptr->msg_type);
#endif
	slurm_mutex_lock(&agent_cnt_mutex);
	if (!wiki2_sched_test) {
		char *sched_type = slurm_get_sched_type();
		if (strcmp(sched_type, "sched/wiki2") == 0)
			wiki2_sched = true;
		xfree(sched_type);
		wiki2_sched_test = true;
	}

	while (1) {
		if (slurmctld_config.shutdown_time ||
		    (agent_cnt < MAX_AGENT_CNT)) {
			agent_cnt++;
			break;
		} else {	/* wait for state change and retry */
			pthread_cond_wait(&agent_cnt_cond, &agent_cnt_mutex);
		}
	}
	slurm_mutex_unlock(&agent_cnt_mutex);
	if (slurmctld_config.shutdown_time)
		goto cleanup;

	/* basic argument value tests */
	begin_time = time(NULL);
	if (_valid_agent_arg(agent_arg_ptr))
		goto cleanup;

	/* initialize the agent data structures */
	agent_info_ptr = _make_agent_info(agent_arg_ptr);
	thread_ptr = agent_info_ptr->thread_struct;

	/* start the watchdog thread */
	slurm_attr_init(&attr_wdog);
	if (pthread_attr_setdetachstate
	    (&attr_wdog, PTHREAD_CREATE_JOINABLE))
		error("pthread_attr_setdetachstate error %m");
	while (pthread_create(&thread_wdog, &attr_wdog, _wdog,
				(void *) agent_info_ptr)) {
		error("pthread_create error %m");
		if (++retries > MAX_RETRIES)
			fatal("Can't create pthread");
		usleep(10000);	/* sleep and retry */
	}
	slurm_attr_destroy(&attr_wdog);
#if 	AGENT_THREAD_COUNT < 1
	fatal("AGENT_THREAD_COUNT value is invalid");
#endif
	debug2("got %d threads to send out",agent_info_ptr->thread_count);
	/* start all the other threads (up to AGENT_THREAD_COUNT active) */
	for (i = 0; i < agent_info_ptr->thread_count; i++) {

		/* wait until "room" for another thread */
		slurm_mutex_lock(&agent_info_ptr->thread_mutex);
		while (agent_info_ptr->threads_active >=
		       AGENT_THREAD_COUNT) {
			pthread_cond_wait(&agent_info_ptr->thread_cond,
					  &agent_info_ptr->thread_mutex);
		}

		/* create thread specific data, NOTE: freed from
		 *      _thread_per_group_rpc() */
		task_specific_ptr = _make_task_data(agent_info_ptr, i);

		slurm_attr_init(&thread_ptr[i].attr);
		if (pthread_attr_setdetachstate(&thread_ptr[i].attr,
						PTHREAD_CREATE_DETACHED))
			error("pthread_attr_setdetachstate error %m");
		while ((rc = pthread_create(&thread_ptr[i].thread,
					    &thread_ptr[i].attr,
					    _thread_per_group_rpc,
					    (void *) task_specific_ptr))) {
			error("pthread_create error %m");
			if (agent_info_ptr->threads_active)
				pthread_cond_wait(&agent_info_ptr->
						  thread_cond,
						  &agent_info_ptr->
						  thread_mutex);
			else {
				slurm_mutex_unlock(&agent_info_ptr->
						     thread_mutex);
				usleep(10000);	/* sleep and retry */
				slurm_mutex_lock(&agent_info_ptr->
						   thread_mutex);
			}
		}
		slurm_attr_destroy(&thread_ptr[i].attr);
		agent_info_ptr->threads_active++;
		slurm_mutex_unlock(&agent_info_ptr->thread_mutex);
	}

	/* wait for termination of remaining threads */
	pthread_join(thread_wdog, NULL);
	delay = (int) difftime(time(NULL), begin_time);
	if (delay > (slurm_get_msg_timeout() * 2)) {
		info("agent msg_type=%u ran for %d seconds",
			agent_arg_ptr->msg_type,  delay);
	}
	slurm_mutex_lock(&agent_info_ptr->thread_mutex);
	while (agent_info_ptr->threads_active != 0) {
		pthread_cond_wait(&agent_info_ptr->thread_cond,
				&agent_info_ptr->thread_mutex);
	}
	slurm_mutex_unlock(&agent_info_ptr->thread_mutex);

      cleanup:
	_purge_agent_args(agent_arg_ptr);

	if (agent_info_ptr) {
		xfree(agent_info_ptr->thread_struct);
		xfree(agent_info_ptr);
	}
	slurm_mutex_lock(&agent_cnt_mutex);

	if (agent_cnt > 0)
		agent_cnt--;
	else {
		error("agent_cnt underflow");
		agent_cnt = 0;
	}

	if (agent_cnt && agent_cnt < MAX_AGENT_CNT)
		agent_retry(RPC_RETRY_INTERVAL, true);

	pthread_cond_broadcast(&agent_cnt_cond);
	slurm_mutex_unlock(&agent_cnt_mutex);

	return NULL;
}