extern int acct_gather_interconnect_startpoll(uint32_t frequency) { int retval = SLURM_SUCCESS; if (acct_gather_interconnect_init() < 0) return SLURM_ERROR; if (!acct_shutdown) { error("%s: poll already started!", __func__); return retval; } acct_shutdown = false; freq = frequency; if (frequency == 0) { /* don't want dynamic monitoring? */ debug2("%s: dynamic logging disabled", __func__); return retval; } /* create polling thread */ slurm_thread_create(&watch_node_thread_id, &_watch_node, NULL); debug3("%s: dynamic logging enabled", __func__); return retval; }
extern void msg_aggr_sender_init(char *host, uint16_t port, uint64_t window, uint64_t max_msg_cnt) { if (msg_collection.running || (max_msg_cnt <= 1)) return; memset(&msg_collection, 0, sizeof(msg_collection_type_t)); slurm_mutex_init(&msg_collection.aggr_mutex); slurm_mutex_init(&msg_collection.mutex); slurm_mutex_lock(&msg_collection.mutex); slurm_mutex_lock(&msg_collection.aggr_mutex); slurm_cond_init(&msg_collection.cond, NULL); slurm_set_addr(&msg_collection.node_addr, port, host); msg_collection.window = window; msg_collection.max_msg_cnt = max_msg_cnt; msg_collection.msg_aggr_list = list_create(_msg_aggr_free); msg_collection.msg_list = list_create(slurm_free_comp_msg_list); msg_collection.max_msgs = false; msg_collection.debug_flags = slurm_get_debug_flags(); slurm_mutex_unlock(&msg_collection.aggr_mutex); slurm_mutex_unlock(&msg_collection.mutex); slurm_thread_create(&msg_collection.thread_id, &_msg_aggregation_sender, NULL); }
extern void slurm_persist_conn_recv_thread_init(slurm_persist_conn_t *persist_conn, int thread_loc, void *arg) { persist_service_conn_t *service_conn; if (thread_loc < 0) thread_loc = slurm_persist_conn_wait_for_thread_loc(); if (thread_loc < 0) return; service_conn = xmalloc(sizeof(persist_service_conn_t)); slurm_mutex_lock(&thread_count_lock); persist_service_conn[thread_loc] = service_conn; slurm_mutex_unlock(&thread_count_lock); service_conn->arg = arg; service_conn->conn = persist_conn; service_conn->thread_loc = thread_loc; persist_conn->timeout = 0; /* If this isn't zero we won't wait forever like we want to. */ //_service_connection(service_conn); slurm_thread_create(&persist_service_conn[thread_loc]->thread_id, _service_connection, service_conn); }
/* * init() is called when the plugin is loaded, before any other functions * are called. Put global initialization here. */ extern int init(void) { jobslist = list_create(_jobslist_del); slurm_thread_create(&job_handler_thread, _process_jobs, NULL); slurm_mutex_lock(&pend_jobs_lock); (void) _load_pending_jobs(); slurm_mutex_unlock(&pend_jobs_lock); return SLURM_SUCCESS; }
extern int proctrack_p_create(stepd_step_rec_t *job) { DEF_TIMERS; START_TIMER; if (!libjob_handle) init(); if (!job->cont_id) { /* Since the cray job lib will create the container off the process calling job_create we don't want to call it from the main process since it will include all the threads the main process spawns and there is no way to safely track which pids need to be removed when removing the parent. It turns out spawning a thread will make the job_create create the container off that process instead of the main process. Once we have added a process we can end the thread which will remove the pid from the container automatically. Empty containers are not valid. */ slurm_mutex_lock(&thread_mutex); if (threadid) { debug("Had a thread already 0x%08lx", threadid); slurm_mutex_lock(¬ify_mutex); slurm_cond_wait(¬ify, ¬ify_mutex); slurm_mutex_unlock(¬ify_mutex); debug("Last thread done 0x%08lx", threadid); } /* We have to lock the notify_mutex here since the thread could possibly signal things before we started waiting for it. */ slurm_mutex_lock(¬ify_mutex); slurm_thread_create(&threadid, _create_container_thread, job); slurm_cond_wait(¬ify, ¬ify_mutex); slurm_mutex_unlock(¬ify_mutex); slurm_mutex_unlock(&thread_mutex); if (job->cont_id != (jid_t)-1) debug("proctrack_p_create: created jid " "0x%08lx thread 0x%08lx", job->cont_id, threadid); } else error("proctrack_p_create: already have a cont_id"); END_TIMER; if (debug_flags & DEBUG_FLAG_TIME_CRAY) INFO_LINE("call took: %s", TIME_STR); return SLURM_SUCCESS; }
static void _spawn_timeslicer_thread(void) { slurm_mutex_lock( &thread_flag_mutex ); if (thread_running) { error("timeslicer thread already running, not starting " "another"); slurm_mutex_unlock(&thread_flag_mutex); return; } slurm_thread_create(×licer_thread_id, _timeslicer_thread, NULL); thread_running = true; slurm_mutex_unlock(&thread_flag_mutex); }
static void _create_agent(void) { /* this needs to be set because the agent thread will do nothing if the connection was closed and then opened again */ slurmdbd_shutdown = 0; if (agent_list == NULL) { agent_list = list_create(slurmdbd_free_buffer); _load_dbd_state(); } if (agent_tid == 0) { slurm_thread_create(&agent_tid, _agent, NULL); } }
int init( void ) { if (slurmctld_config.scheduling_disabled) return SLURM_SUCCESS; verbose( "sched: Backfill scheduler plugin loaded" ); slurm_mutex_lock( &thread_flag_mutex ); if ( backfill_thread ) { debug2( "Backfill thread already running, not starting " "another" ); slurm_mutex_unlock( &thread_flag_mutex ); return SLURM_ERROR; } /* since we do a join on this later we don't make it detached */ slurm_thread_create(&backfill_thread, backfill_agent, NULL); slurm_mutex_unlock( &thread_flag_mutex ); return SLURM_SUCCESS; }
static void *_thread_launcher(void *no_data) { //what arg would countain? frequency, socket? struct timeval tvnow; struct timespec abs; slurm_thread_create(&thread_ipmi_id_run, _thread_ipmi_run, NULL); /* setup timer */ gettimeofday(&tvnow, NULL); abs.tv_sec = tvnow.tv_sec + slurm_ipmi_conf.timeout; abs.tv_nsec = tvnow.tv_usec * 1000; slurm_mutex_lock(&launch_mutex); slurm_cond_timedwait(&launch_cond, &launch_mutex, &abs); slurm_mutex_unlock(&launch_mutex); if (!flag_thread_started) { error("%s threads failed to start in a timely manner", plugin_name); flag_energy_accounting_shutdown = true; /* * It is a known thing we can hang up on IPMI calls cancel if * we must. */ pthread_cancel(thread_ipmi_id_run); /* * Unlock just to make sure since we could have canceled the * thread while in the lock. */ slurm_mutex_unlock(&ipmi_mutex); } return NULL; }
static int _fed_job_will_run(job_desc_msg_t *req, will_run_response_msg_t **will_run_resp, slurmdb_federation_rec_t *fed) { List resp_msg_list; int pthread_count = 0, i; pthread_t *load_thread = 0; load_willrun_req_struct_t *load_args; ListIterator iter; will_run_response_msg_t *earliest_resp = NULL; load_willrun_resp_struct_t *tmp_resp; slurmdb_cluster_rec_t *cluster; List req_clusters = NULL; xassert(req); xassert(will_run_resp); *will_run_resp = NULL; /* * If a subset of clusters was specified then only do a will_run to * those clusters, otherwise check all clusters in the federation. */ if (req->clusters && xstrcasecmp(req->clusters, "all")) { req_clusters = list_create(slurm_destroy_char); slurm_addto_char_list(req_clusters, req->clusters); } /* Spawn one pthread per cluster to collect job information */ resp_msg_list = list_create(NULL); load_thread = xmalloc(sizeof(pthread_t) * list_count(fed->cluster_list)); iter = list_iterator_create(fed->cluster_list); while ((cluster = (slurmdb_cluster_rec_t *)list_next(iter))) { if ((cluster->control_host == NULL) || (cluster->control_host[0] == '\0')) continue; /* Cluster down */ if (req_clusters && !list_find_first(req_clusters, slurm_find_char_in_list, cluster->name)) continue; load_args = xmalloc(sizeof(load_willrun_req_struct_t)); load_args->cluster = cluster; load_args->req = req; load_args->resp_msg_list = resp_msg_list; slurm_thread_create(&load_thread[pthread_count], _load_willrun_thread, load_args); pthread_count++; } list_iterator_destroy(iter); FREE_NULL_LIST(req_clusters); /* Wait for all pthreads to complete */ for (i = 0; i < pthread_count; i++) pthread_join(load_thread[i], NULL); xfree(load_thread); iter = list_iterator_create(resp_msg_list); while ((tmp_resp = (load_willrun_resp_struct_t *)list_next(iter))) { if (!tmp_resp->willrun_resp_msg) slurm_seterrno(tmp_resp->rc); else if ((!earliest_resp) || (tmp_resp->willrun_resp_msg->start_time < earliest_resp->start_time)) { slurm_free_will_run_response_msg(earliest_resp); earliest_resp = tmp_resp->willrun_resp_msg; tmp_resp->willrun_resp_msg = NULL; } slurm_free_will_run_response_msg(tmp_resp->willrun_resp_msg); xfree(tmp_resp); } list_iterator_destroy(iter); FREE_NULL_LIST(resp_msg_list); *will_run_resp = earliest_resp; if (!earliest_resp) return SLURM_ERROR; return SLURM_SUCCESS; }
static int _load_fed_nodes(slurm_msg_t *req_msg, node_info_msg_t **node_info_msg_pptr, uint16_t show_flags, char *cluster_name, slurmdb_federation_rec_t *fed) { int cluster_inx = 0, i; load_node_resp_struct_t *node_resp; node_info_msg_t *orig_msg = NULL, *new_msg = NULL; uint32_t new_rec_cnt; slurmdb_cluster_rec_t *cluster; ListIterator iter; int pthread_count = 0; pthread_t *load_thread = 0; load_node_req_struct_t *load_args; List resp_msg_list; *node_info_msg_pptr = NULL; /* Spawn one pthread per cluster to collect node information */ resp_msg_list = list_create(NULL); load_thread = xmalloc(sizeof(pthread_t) * list_count(fed->cluster_list)); iter = list_iterator_create(fed->cluster_list); while ((cluster = (slurmdb_cluster_rec_t *) list_next(iter))) { if ((cluster->control_host == NULL) || (cluster->control_host[0] == '\0')) continue; /* Cluster down */ load_args = xmalloc(sizeof(load_node_req_struct_t)); load_args->cluster = cluster; load_args->cluster_inx = cluster_inx++; load_args->req_msg = req_msg; load_args->resp_msg_list = resp_msg_list; load_args->show_flags = show_flags; slurm_thread_create(&load_thread[pthread_count], _load_node_thread, load_args); pthread_count++; } list_iterator_destroy(iter); /* Wait for all pthreads to complete */ for (i = 0; i < pthread_count; i++) pthread_join(load_thread[i], NULL); xfree(load_thread); /* Maintain a consistent cluster/node ordering */ list_sort(resp_msg_list, _sort_by_cluster_inx); /* Merge the responses into a single response message */ iter = list_iterator_create(resp_msg_list); while ((node_resp = (load_node_resp_struct_t *) list_next(iter))) { new_msg = node_resp->new_msg; if (!orig_msg) { orig_msg = new_msg; *node_info_msg_pptr = orig_msg; } else { /* Merge the node records */ orig_msg->last_update = MIN(orig_msg->last_update, new_msg->last_update); new_rec_cnt = orig_msg->record_count + new_msg->record_count; if (new_msg->record_count) { orig_msg->node_array = xrealloc(orig_msg->node_array, sizeof(node_info_t) * new_rec_cnt); (void) memcpy(orig_msg->node_array + orig_msg->record_count, new_msg->node_array, sizeof(node_info_t) * new_msg->record_count); orig_msg->record_count = new_rec_cnt; } xfree(new_msg->node_array); xfree(new_msg); } xfree(node_resp); } list_iterator_destroy(iter); FREE_NULL_LIST(resp_msg_list); if (!orig_msg) slurm_seterrno_ret(SLURM_ERROR); return SLURM_SUCCESS; }