/* * The scheduler plugin can not be changed via reconfiguration * due to background threads, job priorities, etc. * slurmctld must be restarted and job priority changes may be * required to change the scheduler type. */ extern int slurm_sched_init(void) { int retval = SLURM_SUCCESS; char *plugin_type = "sched"; char *type = NULL; if ( init_run && g_context ) return retval; slurm_mutex_lock( &g_context_lock ); if ( g_context ) goto done; type = slurm_get_sched_type(); g_context = plugin_context_create( plugin_type, type, (void **)&ops, syms, sizeof(syms)); if (!g_context) { error("cannot create %s context for %s", plugin_type, type); retval = SLURM_ERROR; goto done; } init_run = true; done: slurm_mutex_unlock( &g_context_lock ); xfree(type); return retval; }
/* *********************************************************************** */ extern int slurm_sched_init( void ) { int retval = SLURM_SUCCESS; char *sched_type = NULL; slurm_mutex_lock( &g_sched_context_lock ); if ( g_sched_context ) goto done; sched_type = slurm_get_sched_type(); g_sched_context = slurm_sched_context_create( sched_type ); if ( g_sched_context == NULL ) { error( "cannot create scheduler context for %s", sched_type ); retval = SLURM_ERROR; goto done; } if ( slurm_sched_get_ops( g_sched_context ) == NULL ) { error( "cannot resolve scheduler plugin operations" ); slurm_sched_context_destroy( g_sched_context ); g_sched_context = NULL; retval = SLURM_ERROR; goto done; } if ( (slurm_get_preempt_mode() & PREEMPT_MODE_GANG) && (gs_init() != SLURM_SUCCESS)) error( "cannot start gang scheduler "); done: slurm_mutex_unlock( &g_sched_context_lock ); xfree(sched_type); return retval; }
/* * agent - party responsible for transmitting an common RPC in parallel * across a set of nodes. Use agent_queue_request() if immediate * execution is not essential. * IN pointer to agent_arg_t, which is xfree'd (including hostlist, * and msg_args) upon completion * RET always NULL (function format just for use as pthread) */ void *agent(void *args) { int i, delay, rc, retries = 0; pthread_attr_t attr_wdog; pthread_t thread_wdog; agent_arg_t *agent_arg_ptr = args; agent_info_t *agent_info_ptr = NULL; thd_t *thread_ptr; task_info_t *task_specific_ptr; time_t begin_time; #if 0 info("Agent_cnt is %d of %d with msg_type %d", agent_cnt, MAX_AGENT_CNT, agent_arg_ptr->msg_type); #endif slurm_mutex_lock(&agent_cnt_mutex); if (!wiki2_sched_test) { char *sched_type = slurm_get_sched_type(); if (strcmp(sched_type, "sched/wiki2") == 0) wiki2_sched = true; xfree(sched_type); wiki2_sched_test = true; } while (1) { if (slurmctld_config.shutdown_time || (agent_cnt < MAX_AGENT_CNT)) { agent_cnt++; break; } else { /* wait for state change and retry */ pthread_cond_wait(&agent_cnt_cond, &agent_cnt_mutex); } } slurm_mutex_unlock(&agent_cnt_mutex); if (slurmctld_config.shutdown_time) goto cleanup; /* basic argument value tests */ begin_time = time(NULL); if (_valid_agent_arg(agent_arg_ptr)) goto cleanup; /* initialize the agent data structures */ agent_info_ptr = _make_agent_info(agent_arg_ptr); thread_ptr = agent_info_ptr->thread_struct; /* start the watchdog thread */ slurm_attr_init(&attr_wdog); if (pthread_attr_setdetachstate (&attr_wdog, PTHREAD_CREATE_JOINABLE)) error("pthread_attr_setdetachstate error %m"); while (pthread_create(&thread_wdog, &attr_wdog, _wdog, (void *) agent_info_ptr)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); usleep(10000); /* sleep and retry */ } slurm_attr_destroy(&attr_wdog); #if AGENT_THREAD_COUNT < 1 fatal("AGENT_THREAD_COUNT value is invalid"); #endif debug2("got %d threads to send out",agent_info_ptr->thread_count); /* start all the other threads (up to AGENT_THREAD_COUNT active) */ for (i = 0; i < agent_info_ptr->thread_count; i++) { /* wait until "room" for another thread */ slurm_mutex_lock(&agent_info_ptr->thread_mutex); while (agent_info_ptr->threads_active >= AGENT_THREAD_COUNT) { pthread_cond_wait(&agent_info_ptr->thread_cond, &agent_info_ptr->thread_mutex); } /* create thread specific data, NOTE: freed from * _thread_per_group_rpc() */ task_specific_ptr = _make_task_data(agent_info_ptr, i); slurm_attr_init(&thread_ptr[i].attr); if (pthread_attr_setdetachstate(&thread_ptr[i].attr, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); while ((rc = pthread_create(&thread_ptr[i].thread, &thread_ptr[i].attr, _thread_per_group_rpc, (void *) task_specific_ptr))) { error("pthread_create error %m"); if (agent_info_ptr->threads_active) pthread_cond_wait(&agent_info_ptr-> thread_cond, &agent_info_ptr-> thread_mutex); else { slurm_mutex_unlock(&agent_info_ptr-> thread_mutex); usleep(10000); /* sleep and retry */ slurm_mutex_lock(&agent_info_ptr-> thread_mutex); } } slurm_attr_destroy(&thread_ptr[i].attr); agent_info_ptr->threads_active++; slurm_mutex_unlock(&agent_info_ptr->thread_mutex); } /* wait for termination of remaining threads */ pthread_join(thread_wdog, NULL); delay = (int) difftime(time(NULL), begin_time); if (delay > (slurm_get_msg_timeout() * 2)) { info("agent msg_type=%u ran for %d seconds", agent_arg_ptr->msg_type, delay); } slurm_mutex_lock(&agent_info_ptr->thread_mutex); while (agent_info_ptr->threads_active != 0) { pthread_cond_wait(&agent_info_ptr->thread_cond, &agent_info_ptr->thread_mutex); } slurm_mutex_unlock(&agent_info_ptr->thread_mutex); cleanup: _purge_agent_args(agent_arg_ptr); if (agent_info_ptr) { xfree(agent_info_ptr->thread_struct); xfree(agent_info_ptr); } slurm_mutex_lock(&agent_cnt_mutex); if (agent_cnt > 0) agent_cnt--; else { error("agent_cnt underflow"); agent_cnt = 0; } if (agent_cnt && agent_cnt < MAX_AGENT_CNT) agent_retry(RPC_RETRY_INTERVAL, true); pthread_cond_broadcast(&agent_cnt_cond); slurm_mutex_unlock(&agent_cnt_mutex); return NULL; }