extern int slurm_persist_conn_open_without_init( slurm_persist_conn_t *persist_conn) { slurm_addr_t addr; xassert(persist_conn); xassert(persist_conn->rem_host); xassert(persist_conn->rem_port); xassert(persist_conn->cluster_name); if (persist_conn->fd > 0) _close_fd(&persist_conn->fd); else persist_conn->fd = -1; if (!persist_conn->inited) persist_conn->inited = true; if (!persist_conn->version) { /* Set to MIN_PROTOCOL so that a higher version controller can * talk to a lower protocol version controller. When talking to * the DBD, the protocol version should be set to the current * protocol version prior to calling this. */ persist_conn->version = SLURM_MIN_PROTOCOL_VERSION; } if (persist_conn->timeout < 0) persist_conn->timeout = slurm_get_msg_timeout() * 1000; slurm_set_addr_char(&addr, persist_conn->rem_port, persist_conn->rem_host); if ((persist_conn->fd = slurm_open_msg_conn(&addr)) < 0) { if (_comm_fail_log(persist_conn)) { char *s = xstrdup_printf("%s: failed to open persistent connection to %s:%d: %m", __func__, persist_conn->rem_host, persist_conn->rem_port); if (persist_conn->flags & PERSIST_FLAG_SUPPRESS_ERR) debug2("%s", s); else error("%s", s); xfree(s); } return SLURM_ERROR; } fd_set_nonblocking(persist_conn->fd); fd_set_close_on_exec(persist_conn->fd); return SLURM_SUCCESS; }
static void _start_msg_tree_internal(hostlist_t hl, hostlist_t* sp_hl, fwd_tree_t *fwd_tree_in, int hl_count) { int j; fwd_tree_t *fwd_tree; xassert((hl || sp_hl) && !(hl && sp_hl)); xassert(fwd_tree_in); xassert(fwd_tree_in->p_thr_count); xassert(fwd_tree_in->tree_mutex); xassert(fwd_tree_in->notify); xassert(fwd_tree_in->ret_list); if (hl) xassert(hl_count == hostlist_count(hl)); if (fwd_tree_in->timeout <= 0) /* convert secs to msec */ fwd_tree_in->timeout = slurm_get_msg_timeout() * 1000; for (j = 0; j < hl_count; j++) { fwd_tree = xmalloc(sizeof(fwd_tree_t)); memcpy(fwd_tree, fwd_tree_in, sizeof(fwd_tree_t)); if (sp_hl) { fwd_tree->tree_hl = sp_hl[j]; sp_hl[j] = NULL; } else if (hl) { char *name = hostlist_shift(hl); fwd_tree->tree_hl = hostlist_create(name); free(name); } /* * Lock and increase thread counter, we need that to protect * the start_msg_tree waiting loop that was originally designed * around a "while ((count < host_count))" loop. In case where a * fwd thread was not able to get all the return codes from * children, the waiting loop was deadlocked. */ slurm_mutex_lock(fwd_tree->tree_mutex); (*fwd_tree->p_thr_count)++; slurm_mutex_unlock(fwd_tree->tree_mutex); slurm_thread_create_detached(NULL, _fwd_tree_thread, fwd_tree); } }
static void _load_slurm_config(void) { acct_storage_backup_host = slurm_get_accounting_storage_backup_host(); acct_storage_host = slurm_get_accounting_storage_host(); acct_storage_loc = slurm_get_accounting_storage_loc(); acct_storage_pass = slurm_get_accounting_storage_pass(); acct_storage_port = slurm_get_accounting_storage_port(); acct_storage_type = slurm_get_accounting_storage_type(); acct_storage_user = slurm_get_accounting_storage_user(); auth_type = slurm_get_auth_type(); msg_timeout = slurm_get_msg_timeout(); plugin_dir = slurm_get_plugin_dir(); private_data = slurm_get_private_data(); slurm_user_id = slurm_get_slurm_user_id(); track_wckey = slurm_get_track_wckey(); }
static void _forward_msg_internal(hostlist_t hl, hostlist_t* sp_hl, forward_struct_t *fwd_struct, header_t *header, int timeout, int hl_count) { int j; forward_msg_t *fwd_msg = NULL; char *buf = NULL, *tmp_char = NULL; if (timeout <= 0) /* convert secs to msec */ timeout = slurm_get_msg_timeout() * 1000; for (j = 0; j < hl_count; j++) { fwd_msg = xmalloc(sizeof(forward_msg_t)); fwd_msg->fwd_struct = fwd_struct; fwd_msg->timeout = timeout; memcpy(&fwd_msg->header.orig_addr, &header->orig_addr, sizeof(slurm_addr_t)); fwd_msg->header.version = header->version; fwd_msg->header.flags = header->flags; fwd_msg->header.msg_type = header->msg_type; fwd_msg->header.body_length = header->body_length; fwd_msg->header.ret_list = NULL; fwd_msg->header.ret_cnt = 0; if (sp_hl) { buf = hostlist_ranged_string_xmalloc(sp_hl[j]); hostlist_destroy(sp_hl[j]); } else { tmp_char = hostlist_shift(hl); buf = xstrdup(tmp_char); free(tmp_char); } forward_init(&fwd_msg->header.forward, NULL); fwd_msg->header.forward.nodelist = buf; slurm_thread_create_detached(NULL, _forward_thread, fwd_msg); } }
static void _load_config(void) { char *sched_params, *select_type, *tmp_ptr; sched_timeout = slurm_get_msg_timeout() / 2; sched_timeout = MAX(sched_timeout, 1); sched_timeout = MIN(sched_timeout, 10); sched_params = slurm_get_sched_params(); if (sched_params && (tmp_ptr=strstr(sched_params, "interval="))) builtin_interval = atoi(tmp_ptr + 9); if (builtin_interval < 1) { error("Invalid SchedulerParameters interval: %d", builtin_interval); builtin_interval = BACKFILL_INTERVAL; } if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf="))) max_sched_job_cnt = atoi(tmp_ptr + 11); if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_test="))) max_sched_job_cnt = atoi(tmp_ptr + 16); if (max_sched_job_cnt < 1) { error("Invalid SchedulerParameters bf_max_job_test: %d", max_sched_job_cnt); max_sched_job_cnt = 50; } xfree(sched_params); select_type = slurm_get_select_type(); if (!xstrcmp(select_type, "select/serial")) { /* Do not spend time computing expected start time for * pending jobs */ max_sched_job_cnt = 0; stop_builtin_agent(); } xfree(select_type); }
static void *_msg_thread(void *x) { struct msg_arg *msg_arg_ptr = (struct msg_arg *) x; int rc, timeout; slurm_msg_t msg_send; slurm_msg_t_init(&msg_send); debug2("KVS_Barrier msg to %s:%hu", msg_arg_ptr->bar_ptr->hostname, msg_arg_ptr->bar_ptr->port); msg_send.msg_type = PMI_KVS_GET_RESP; msg_send.data = (void *) msg_arg_ptr->kvs_ptr; slurm_set_addr(&msg_send.address, msg_arg_ptr->bar_ptr->port, msg_arg_ptr->bar_ptr->hostname); timeout = slurm_get_msg_timeout() * 10000; if (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { error("slurm_send_recv_rc_msg_only_one to %s:%hu : %m", msg_arg_ptr->bar_ptr->hostname, msg_arg_ptr->bar_ptr->port); } else if (rc != SLURM_SUCCESS) { error("KVS_Barrier confirm from %s, rc=%d", msg_arg_ptr->bar_ptr->hostname, rc); } else { /* successfully transmitted KVS keypairs */ } slurm_mutex_lock(&agent_mutex); agent_cnt--; pthread_cond_signal(&agent_cond); slurm_mutex_unlock(&agent_mutex); xfree(x); return NULL; }
static void _layout_conf_dbd(GtkTreeStore *treestore) { ListIterator itr = NULL; GtkTreeIter iter; config_key_pair_t *key_pair; int update = 0; time_t now = time(NULL); char tmp_str[128], *user_name = NULL; List dbd_config_list = NULL; /* first load accounting parms from slurm.conf */ char *acct_storage_backup_host = slurm_get_accounting_storage_backup_host(); char *acct_storage_host = slurm_get_accounting_storage_host(); char *acct_storage_loc = slurm_get_accounting_storage_loc(); char *acct_storage_pass = slurm_get_accounting_storage_pass(); uint32_t acct_storage_port = slurm_get_accounting_storage_port(); char *acct_storage_type = slurm_get_accounting_storage_type(); char *acct_storage_user = slurm_get_accounting_storage_user(); char *auth_type = slurm_get_auth_type(); uint16_t msg_timeout = slurm_get_msg_timeout(); char *plugin_dir = slurm_get_plugin_dir(); uint16_t private_data = slurm_get_private_data(); uint32_t slurm_user_id = slurm_get_slurm_user_id(); uint16_t track_wckey = slurm_get_track_wckey(); slurm_make_time_str(&now, tmp_str, sizeof(tmp_str)); add_display_treestore_line_with_font( update, treestore, &iter, "SLURM Configuration data as of", tmp_str, "bold"); add_display_treestore_line(update, treestore, &iter, "AccountingStorageBackupHost", acct_storage_backup_host); add_display_treestore_line(update, treestore, &iter, "AccountingStorageHost", acct_storage_host); add_display_treestore_line(update, treestore, &iter, "AccountingStorageLoc", acct_storage_loc); add_display_treestore_line(update, treestore, &iter, "AccountingStoragePass", acct_storage_pass); sprintf(tmp_str, "%u", acct_storage_port); add_display_treestore_line(update, treestore, &iter, "AccountingStoragePort", tmp_str); add_display_treestore_line(update, treestore, &iter, "AccountingStorageType", acct_storage_type); add_display_treestore_line(update, treestore, &iter, "AccountingStorageUser", acct_storage_user); add_display_treestore_line(update, treestore, &iter, "AuthType", auth_type); sprintf(tmp_str, "%u sec", msg_timeout); add_display_treestore_line(update, treestore, &iter, "MessageTimeout", tmp_str); add_display_treestore_line(update, treestore, &iter, "PluginDir", plugin_dir); private_data_string(private_data, tmp_str, sizeof(tmp_str)); add_display_treestore_line(update, treestore, &iter, "PrivateData", tmp_str); user_name = uid_to_string(slurm_user_id); sprintf(tmp_str, "%s(%u)", user_name, slurm_user_id); xfree(user_name); add_display_treestore_line(update, treestore, &iter, "SlurmUserId", tmp_str); add_display_treestore_line(update, treestore, &iter, "SLURM_CONF", default_slurm_config_file); add_display_treestore_line(update, treestore, &iter, "SLURM_VERSION", SLURM_VERSION_STRING); sprintf(tmp_str, "%u", track_wckey); add_display_treestore_line(update, treestore, &iter, "TrackWCKey", tmp_str); xfree(acct_storage_backup_host); xfree(acct_storage_host); xfree(acct_storage_loc); xfree(acct_storage_pass); xfree(acct_storage_type); xfree(acct_storage_user); xfree(auth_type); xfree(plugin_dir); /* now load accounting parms from slurmdbd.conf */ /* second load slurmdbd.conf parms */ if (!(dbd_config_list = slurmdb_config_get(NULL))) return; add_display_treestore_line_with_font( update, treestore, &iter, "\nSlurmDBD Configuration:", NULL, "bold"); itr = list_iterator_create(dbd_config_list); while ((key_pair = list_next(itr))) { add_display_treestore_line(update, treestore, &iter, key_pair->name, key_pair->value); } list_iterator_destroy(itr); }
/* Open a connection to the Slurm DBD and set slurmdbd_conn */ static void _open_slurmdbd_conn(bool need_db) { char *backup_host = NULL; int rc; if (slurmdbd_conn && slurmdbd_conn->fd >= 0) { debug("Attempt to re-open slurmdbd socket"); /* clear errno (checked after this for errors) */ errno = 0; return; } slurm_persist_conn_close(slurmdbd_conn); if (!slurmdbd_conn) { slurmdbd_conn = xmalloc(sizeof(slurm_persist_conn_t)); slurmdbd_conn->flags = PERSIST_FLAG_DBD | PERSIST_FLAG_RECONNECT; slurmdbd_conn->persist_type = PERSIST_TYPE_DBD; if (!slurmdbd_cluster) slurmdbd_cluster = slurm_get_cluster_name(); slurmdbd_conn->cluster_name = xstrdup(slurmdbd_cluster); slurmdbd_conn->timeout = (slurm_get_msg_timeout() + 35) * 1000; slurmdbd_conn->rem_port = slurm_get_accounting_storage_port(); if (!slurmdbd_conn->rem_port) { slurmdbd_conn->rem_port = SLURMDBD_PORT; slurm_set_accounting_storage_port( slurmdbd_conn->rem_port); } } slurmdbd_shutdown = 0; slurmdbd_conn->shutdown = &slurmdbd_shutdown; slurmdbd_conn->version = SLURM_PROTOCOL_VERSION; xfree(slurmdbd_conn->rem_host); slurmdbd_conn->rem_host = slurm_get_accounting_storage_host(); if (!slurmdbd_conn->rem_host) { slurmdbd_conn->rem_host = xstrdup(DEFAULT_STORAGE_HOST); slurm_set_accounting_storage_host( slurmdbd_conn->rem_host); } // See if a backup slurmdbd is configured backup_host = slurm_get_accounting_storage_backup_host(); again: // A connection failure is only an error if backup dne or also fails if (backup_host) slurmdbd_conn->flags |= PERSIST_FLAG_SUPPRESS_ERR; else slurmdbd_conn->flags &= (~PERSIST_FLAG_SUPPRESS_ERR); if (((rc = slurm_persist_conn_open(slurmdbd_conn)) != SLURM_SUCCESS) && backup_host) { xfree(slurmdbd_conn->rem_host); // Force the next error to display slurmdbd_conn->comm_fail_time = 0; slurmdbd_conn->rem_host = backup_host; backup_host = NULL; goto again; } xfree(backup_host); if (rc == SLURM_SUCCESS) { /* set the timeout to the timeout to be used for all other * messages */ slurmdbd_conn->timeout = SLURMDBD_TIMEOUT * 1000; if (slurmdbd_conn->trigger_callbacks.dbd_resumed) (slurmdbd_conn->trigger_callbacks.dbd_resumed)(); if (slurmdbd_conn->trigger_callbacks.db_resumed) (slurmdbd_conn->trigger_callbacks.db_resumed)(); } if ((!need_db && (rc == ESLURM_DB_CONNECTION)) || (rc == SLURM_SUCCESS)) { debug("slurmdbd: Sent PersistInit msg"); /* clear errno (checked after this for errors) */ errno = 0; } else { if ((rc == ESLURM_DB_CONNECTION) && slurmdbd_conn->trigger_callbacks.db_fail) (slurmdbd_conn->trigger_callbacks.db_fail)(); error("slurmdbd: Sending PersistInit msg: %m"); slurm_persist_conn_close(slurmdbd_conn); } }
ssize_t _slurm_msg_sendto(slurm_fd_t fd, char *buffer, size_t size, uint32_t flags) { return _slurm_msg_sendto_timeout( fd, buffer, size, flags, (slurm_get_msg_timeout() * 1000)); }
ssize_t _slurm_msg_recvfrom(slurm_fd_t fd, char **pbuf, size_t *lenp, uint32_t flags) { return _slurm_msg_recvfrom_timeout(fd, pbuf, lenp, flags, (slurm_get_msg_timeout() * 1000)); }
static int _attempt_backfill(void) { DEF_TIMERS; bool filter_root = false; List job_queue; job_queue_rec_t *job_queue_rec; slurmdb_qos_rec_t *qos_ptr = NULL; int i, j, node_space_recs; struct job_record *job_ptr; struct part_record *part_ptr; uint32_t end_time, end_reserve; uint32_t time_limit, comp_time_limit, orig_time_limit; uint32_t min_nodes, max_nodes, req_nodes; bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL; bitstr_t *exc_core_bitmap = NULL; time_t now, sched_start, later_start, start_res, resv_end; node_space_map_t *node_space; struct timeval bf_time1, bf_time2; static int sched_timeout = 0; int this_sched_timeout = 0, rc = 0; int job_test_count = 0; uint32_t *uid = NULL, nuser = 0; uint16_t *njobs = NULL; bool already_counted; #ifdef HAVE_CRAY /* * Run a Basil Inventory immediately before setting up the schedule * plan, to avoid race conditions caused by ALPS node state change. * Needs to be done with the node-state lock taken. */ START_TIMER; if (select_g_reconfigure()) { debug4("backfill: not scheduling due to ALPS"); return SLURM_SUCCESS; } END_TIMER; if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill: ALPS inventory completed, %s", TIME_STR); /* The Basil inventory can take a long time to complete. Process * pending RPCs before starting the backfill scheduling logic */ _yield_locks(1); #endif START_TIMER; if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill: beginning"); sched_start = now = time(NULL); if (sched_timeout == 0) { sched_timeout = slurm_get_msg_timeout() / 2; sched_timeout = MAX(sched_timeout, 1); sched_timeout = MIN(sched_timeout, 10); } this_sched_timeout = sched_timeout; if (slurm_get_root_filter()) filter_root = true; job_queue = build_job_queue(true); if (list_count(job_queue) == 0) { debug("backfill: no jobs to backfill"); list_destroy(job_queue); return 0; } gettimeofday(&bf_time1, NULL); slurmctld_diag_stats.bf_queue_len = list_count(job_queue); slurmctld_diag_stats.bf_queue_len_sum += slurmctld_diag_stats. bf_queue_len; slurmctld_diag_stats.bf_last_depth = 0; slurmctld_diag_stats.bf_last_depth_try = 0; slurmctld_diag_stats.bf_when_last_cycle = now; bf_last_ints = 0; slurmctld_diag_stats.bf_active = 1; node_space = xmalloc(sizeof(node_space_map_t) * (max_backfill_job_cnt + 3)); node_space[0].begin_time = sched_start; node_space[0].end_time = sched_start + backfill_window; node_space[0].avail_bitmap = bit_copy(avail_node_bitmap); node_space[0].next = 0; node_space_recs = 1; if (debug_flags & DEBUG_FLAG_BACKFILL) _dump_node_space_table(node_space); if (max_backfill_job_per_user) { uid = xmalloc(BF_MAX_USERS * sizeof(uint32_t)); njobs = xmalloc(BF_MAX_USERS * sizeof(uint16_t)); } while ((job_queue_rec = (job_queue_rec_t *) list_pop_bottom(job_queue, sort_job_queue2))) { job_test_count++; job_ptr = job_queue_rec->job_ptr; part_ptr = job_queue_rec->part_ptr; xfree(job_queue_rec); if (!IS_JOB_PENDING(job_ptr)) continue; /* started in other partition */ job_ptr->part_ptr = part_ptr; if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill test for job %u", job_ptr->job_id); slurmctld_diag_stats.bf_last_depth++; already_counted = false; if (max_backfill_job_per_user) { for (j = 0; j < nuser; j++) { if (job_ptr->user_id == uid[j]) { njobs[j]++; debug2("backfill: user %u: #jobs %u", uid[j], njobs[j]); break; } } if (j == nuser) { /* user not found */ if (nuser < BF_MAX_USERS) { uid[j] = job_ptr->user_id; njobs[j] = 1; nuser++; } else { error("backfill: too many users in " "queue. Consider increasing " "BF_MAX_USERS"); } debug2("backfill: found new user %u. " "Total #users now %u", job_ptr->user_id, nuser); } else { if (njobs[j] > max_backfill_job_per_user) { /* skip job */ debug("backfill: have already checked " "%u jobs for user %u; skipping " "job %u", max_backfill_job_per_user, job_ptr->user_id, job_ptr->job_id); continue; } } } if (((part_ptr->state_up & PARTITION_SCHED) == 0) || (part_ptr->node_bitmap == NULL)) continue; if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && filter_root) continue; if ((!job_independent(job_ptr, 0)) || (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS)) continue; /* Determine minimum and maximum node counts */ min_nodes = MAX(job_ptr->details->min_nodes, part_ptr->min_nodes); if (job_ptr->details->max_nodes == 0) max_nodes = part_ptr->max_nodes; else max_nodes = MIN(job_ptr->details->max_nodes, part_ptr->max_nodes); max_nodes = MIN(max_nodes, 500000); /* prevent overflows */ if (job_ptr->details->max_nodes) req_nodes = max_nodes; else req_nodes = min_nodes; if (min_nodes > max_nodes) { /* job's min_nodes exceeds partition's max_nodes */ continue; } /* Determine job's expected completion time */ if (job_ptr->time_limit == NO_VAL) { if (part_ptr->max_time == INFINITE) time_limit = 365 * 24 * 60; /* one year */ else time_limit = part_ptr->max_time; } else { if (part_ptr->max_time == INFINITE) time_limit = job_ptr->time_limit; else time_limit = MIN(job_ptr->time_limit, part_ptr->max_time); } comp_time_limit = time_limit; orig_time_limit = job_ptr->time_limit; qos_ptr = job_ptr->qos_ptr; if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) time_limit = job_ptr->time_limit = 1; else if (job_ptr->time_min && (job_ptr->time_min < time_limit)) time_limit = job_ptr->time_limit = job_ptr->time_min; /* Determine impact of any resource reservations */ later_start = now; TRY_LATER: FREE_NULL_BITMAP(avail_bitmap); start_res = later_start; later_start = 0; exc_core_bitmap = NULL; j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap, &exc_core_bitmap); if (j != SLURM_SUCCESS) { job_ptr->time_limit = orig_time_limit; continue; } if (start_res > now) end_time = (time_limit * 60) + start_res; else end_time = (time_limit * 60) + now; resv_end = find_resv_end(start_res); /* Identify usable nodes for this job */ bit_and(avail_bitmap, part_ptr->node_bitmap); bit_and(avail_bitmap, up_node_bitmap); for (j=0; ; ) { if ((node_space[j].end_time > start_res) && node_space[j].next && (later_start == 0)) later_start = node_space[j].end_time; if (node_space[j].end_time <= start_res) ; else if (node_space[j].begin_time <= end_time) { bit_and(avail_bitmap, node_space[j].avail_bitmap); } else break; if ((j = node_space[j].next) == 0) break; } if ((resv_end++) && ((later_start == 0) || (resv_end < later_start))) { later_start = resv_end; } if (job_ptr->details->exc_node_bitmap) { bit_not(job_ptr->details->exc_node_bitmap); bit_and(avail_bitmap, job_ptr->details->exc_node_bitmap); bit_not(job_ptr->details->exc_node_bitmap); } /* Test if insufficient nodes remain OR * required nodes missing OR * nodes lack features */ if ((bit_set_count(avail_bitmap) < min_nodes) || ((job_ptr->details->req_node_bitmap) && (!bit_super_set(job_ptr->details->req_node_bitmap, avail_bitmap))) || (job_req_node_filter(job_ptr, avail_bitmap))) { if (later_start) { job_ptr->start_time = 0; goto TRY_LATER; } /* Job can not start until too far in the future */ job_ptr->time_limit = orig_time_limit; job_ptr->start_time = sched_start + backfill_window; continue; } /* Identify nodes which are definitely off limits */ FREE_NULL_BITMAP(resv_bitmap); resv_bitmap = bit_copy(avail_bitmap); bit_not(resv_bitmap); if ((time(NULL) - sched_start) >= this_sched_timeout) { uint32_t save_time_limit = job_ptr->time_limit; job_ptr->time_limit = orig_time_limit; if (debug_flags & DEBUG_FLAG_BACKFILL) { END_TIMER; info("backfill: yielding locks after testing " "%d jobs, %s", job_test_count, TIME_STR); } if (_yield_locks(backfill_interval)) { if (debug_flags & DEBUG_FLAG_BACKFILL) { info("backfill: system state changed, " "breaking out after testing %d " "jobs", job_test_count); } rc = 1; break; } job_ptr->time_limit = save_time_limit; /* Reset backfill scheduling timers, resume testing */ sched_start = time(NULL); job_test_count = 0; START_TIMER; } /* this is the time consuming operation */ debug2("backfill: entering _try_sched for job %u.", job_ptr->job_id); if (!already_counted) { slurmctld_diag_stats.bf_last_depth_try++; already_counted = true; } j = _try_sched(job_ptr, &avail_bitmap, min_nodes, max_nodes, req_nodes, exc_core_bitmap); now = time(NULL); if (j != SLURM_SUCCESS) { job_ptr->time_limit = orig_time_limit; job_ptr->start_time = 0; continue; /* not runable */ } if (start_res > job_ptr->start_time) { job_ptr->start_time = start_res; last_job_update = now; } if (job_ptr->start_time <= now) { int rc = _start_job(job_ptr, resv_bitmap); if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)){ job_ptr->time_limit = orig_time_limit; job_ptr->end_time = job_ptr->start_time + (orig_time_limit * 60); } else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) { /* Set time limit as high as possible */ job_ptr->time_limit = comp_time_limit; job_ptr->end_time = job_ptr->start_time + (comp_time_limit * 60); _reset_job_time_limit(job_ptr, now, node_space); time_limit = job_ptr->time_limit; } else { job_ptr->time_limit = orig_time_limit; } if (rc == ESLURM_ACCOUNTING_POLICY) { /* Unknown future start time, just skip job */ job_ptr->start_time = 0; continue; } else if (rc != SLURM_SUCCESS) { /* Planned to start job, but something bad * happended. */ job_ptr->start_time = 0; break; } else { /* Started this job, move to next one */ continue; } } else job_ptr->time_limit = orig_time_limit; if (later_start && (job_ptr->start_time > later_start)) { /* Try later when some nodes currently reserved for * pending jobs are free */ job_ptr->start_time = 0; goto TRY_LATER; } if (job_ptr->start_time > (sched_start + backfill_window)) { /* Starts too far in the future to worry about */ continue; } if (node_space_recs >= max_backfill_job_cnt) { /* Already have too many jobs to deal with */ break; } end_reserve = job_ptr->start_time + (time_limit * 60); if (_test_resv_overlap(node_space, avail_bitmap, job_ptr->start_time, end_reserve)) { /* This job overlaps with an existing reservation for * job to be backfill scheduled, which the sched * plugin does not know about. Try again later. */ later_start = job_ptr->start_time; job_ptr->start_time = 0; goto TRY_LATER; } /* * Add reservation to scheduling table if appropriate */ if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) continue; bit_not(avail_bitmap); _add_reservation(job_ptr->start_time, end_reserve, avail_bitmap, node_space, &node_space_recs); if (debug_flags & DEBUG_FLAG_BACKFILL) _dump_node_space_table(node_space); } xfree(uid); xfree(njobs); FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(resv_bitmap); for (i=0; ; ) { FREE_NULL_BITMAP(node_space[i].avail_bitmap); if ((i = node_space[i].next) == 0) break; } xfree(node_space); list_destroy(job_queue); gettimeofday(&bf_time2, NULL); _do_diag_stats(&bf_time1, &bf_time2); if (debug_flags & DEBUG_FLAG_BACKFILL) { END_TIMER; info("backfill: completed testing %d jobs, %s", job_test_count, TIME_STR); } return rc; }
/* Wait for barrier and get full PMI Keyval space data */ int slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr, int pmi_rank, int pmi_size) { int rc, srun_fd, retries = 0, timeout = 0; slurm_msg_t msg_send, msg_rcv; slurm_addr_t slurm_addr, srun_reply_addr; char hostname[64]; uint16_t port; kvs_get_msg_t data; char *env_pmi_ifhn; if (kvs_set_ptr == NULL) return EINVAL; *kvs_set_ptr = NULL; /* initialization */ if ((rc = _get_addr()) != SLURM_SUCCESS) { error("_get_addr: %m"); return rc; } _set_pmi_time(); if (pmi_fd < 0) { if ((pmi_fd = slurm_init_msg_engine_port(0)) < 0) { error("slurm_init_msg_engine_port: %m"); return SLURM_ERROR; } fd_set_blocking(pmi_fd); } if (slurm_get_stream_addr(pmi_fd, &slurm_addr) < 0) { error("slurm_get_stream_addr: %m"); return SLURM_ERROR; } /* hostname is not set here, so slurm_get_addr fails slurm_get_addr(&slurm_addr, &port, hostname, sizeof(hostname)); */ port = ntohs(slurm_addr.sin_port); if ((env_pmi_ifhn = getenv("SLURM_PMI_RESP_IFHN"))) { strncpy(hostname, env_pmi_ifhn, sizeof(hostname)); hostname[sizeof(hostname)-1] = 0; } else gethostname_short(hostname, sizeof(hostname)); data.task_id = pmi_rank; data.size = pmi_size; data.port = port; data.hostname = hostname; slurm_msg_t_init(&msg_send); slurm_msg_t_init(&msg_rcv); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_GET_REQ; msg_send.data = &data; /* Send the RPC to the local srun communcation manager. * Since the srun can be sent thousands of messages at * the same time and refuse some connections, retry as * needed. Wait until all key-pairs have been sent by * all tasks then spread out messages by task's rank. * Also increase the message timeout if many tasks * since the srun command can get very overloaded (the * default timeout is 10 secs). */ _delay_rpc(pmi_rank, pmi_size); if (pmi_size > 4000) /* 240 secs */ timeout = slurm_get_msg_timeout() * 24000; else if (pmi_size > 1000) /* 120 secs */ timeout = slurm_get_msg_timeout() * 12000; else if (pmi_size > 100) /* 60 secs */ timeout = slurm_get_msg_timeout() * 6000; else if (pmi_size > 10) /* 20 secs */ timeout = slurm_get_msg_timeout() * 2000; while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { if (retries++ > MAX_RETRIES) { error("slurm_get_kvs_comm_set: %m"); return SLURM_ERROR; } else debug("get kvs retry %d", retries); _delay_rpc(pmi_rank, pmi_size); } if (rc != SLURM_SUCCESS) { error("slurm_get_kvs_comm_set error_code=%d", rc); return rc; } /* get the message after all tasks reach the barrier */ srun_fd = slurm_accept_msg_conn(pmi_fd, &srun_reply_addr); if (srun_fd < 0) { error("slurm_accept_msg_conn: %m"); return errno; } while ((rc = slurm_receive_msg(srun_fd, &msg_rcv, timeout)) != 0) { if (errno == EINTR) continue; error("slurm_receive_msg: %m"); slurm_close(srun_fd); return errno; } if (msg_rcv.auth_cred) (void)g_slurm_auth_destroy(msg_rcv.auth_cred); if (msg_rcv.msg_type != PMI_KVS_GET_RESP) { error("slurm_get_kvs_comm_set msg_type=%d", msg_rcv.msg_type); slurm_close(srun_fd); return SLURM_UNEXPECTED_MSG_ERROR; } if (slurm_send_rc_msg(&msg_rcv, SLURM_SUCCESS) < 0) error("slurm_send_rc_msg: %m"); slurm_close(srun_fd); *kvs_set_ptr = msg_rcv.data; rc = _forward_comm_set(*kvs_set_ptr); return rc; }
/* * start_msg_tree - logic to begin the forward tree and * accumulate the return codes from processes getting the * the forwarded message * * IN: hl - hostlist_t - list of every node to send message to * IN: msg - slurm_msg_t - message to send. * IN: timeout - int - how long to wait in milliseconds. * RET List - List containing the responses of the childern * (if any) we forwarded the message to. List * containing type (ret_data_info_t). */ extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout) { int *span = NULL; fwd_tree_t *fwd_tree = NULL; pthread_mutex_t tree_mutex; pthread_cond_t notify; int j = 0, count = 0; List ret_list = NULL; char *name = NULL; int thr_count = 0; int host_count = 0; xassert(hl); xassert(msg); hostlist_uniq(hl); host_count = hostlist_count(hl); span = set_span(host_count, 0); slurm_mutex_init(&tree_mutex); pthread_cond_init(¬ify, NULL); ret_list = list_create(destroy_data_info); while ((name = hostlist_shift(hl))) { pthread_attr_t attr_agent; pthread_t thread_agent; int retries = 0; slurm_attr_init(&attr_agent); if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); fwd_tree = xmalloc(sizeof(fwd_tree_t)); fwd_tree->orig_msg = msg; fwd_tree->ret_list = ret_list; fwd_tree->timeout = timeout; fwd_tree->notify = ¬ify; fwd_tree->tree_mutex = &tree_mutex; if(fwd_tree->timeout <= 0) { /* convert secs to msec */ fwd_tree->timeout = slurm_get_msg_timeout() * 1000; } fwd_tree->tree_hl = hostlist_create(name); free(name); for (j = 0; j < span[thr_count]; j++) { name = hostlist_shift(hl); if (!name) break; hostlist_push(fwd_tree->tree_hl, name); free(name); } while (pthread_create(&thread_agent, &attr_agent, _fwd_tree_thread, (void *)fwd_tree)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); sleep(1); /* sleep and try again */ } slurm_attr_destroy(&attr_agent); thr_count++; } xfree(span); slurm_mutex_lock(&tree_mutex); count = list_count(ret_list); debug2("Tree head got back %d looking for %d", count, host_count); while ((count < host_count)) { pthread_cond_wait(¬ify, &tree_mutex); count = list_count(ret_list); debug2("Tree head got back %d", count); } debug2("Tree head got them all"); slurm_mutex_unlock(&tree_mutex); slurm_mutex_destroy(&tree_mutex); pthread_cond_destroy(¬ify); return ret_list; }
void *_forward_thread(void *arg) { forward_msg_t *fwd_msg = (forward_msg_t *)arg; forward_struct_t *fwd_struct = fwd_msg->fwd_struct; Buf buffer = init_buf(BUF_SIZE); /* probably enough for header */ List ret_list = NULL; int fd = -1; ret_data_info_t *ret_data_info = NULL; char *name = NULL; hostlist_t hl = hostlist_create(fwd_msg->header.forward.nodelist); slurm_addr_t addr; char *buf = NULL; int steps = 0; int start_timeout = fwd_msg->timeout; /* repeat until we are sure the message was sent */ while ((name = hostlist_shift(hl))) { if (slurm_conf_get_addr(name, &addr) == SLURM_ERROR) { error("forward_thread: can't find address for host " "%s, check slurm.conf", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, SLURM_UNKNOWN_FORWARD_ADDR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); continue; } goto cleanup; } if ((fd = slurm_open_msg_conn(&addr)) < 0) { error("forward_thread to %s: %m", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } buf = hostlist_ranged_string_xmalloc(hl); xfree(fwd_msg->header.forward.nodelist); fwd_msg->header.forward.nodelist = buf; fwd_msg->header.forward.cnt = hostlist_count(hl); #if 0 info("sending %d forwards (%s) to %s", fwd_msg->header.forward.cnt, fwd_msg->header.forward.nodelist, name); #endif if (fwd_msg->header.forward.nodelist[0]) { debug3("forward: send to %s along with %s", name, fwd_msg->header.forward.nodelist); } else debug3("forward: send to %s ", name); pack_header(&fwd_msg->header, buffer); /* add forward data to buffer */ if (remaining_buf(buffer) < fwd_struct->buf_len) { int new_size = buffer->processed + fwd_struct->buf_len; new_size += 1024; /* padded for paranoia */ xrealloc_nz(buffer->head, new_size); buffer->size = new_size; } if (fwd_struct->buf_len) { memcpy(&buffer->head[buffer->processed], fwd_struct->buf, fwd_struct->buf_len); buffer->processed += fwd_struct->buf_len; } /* * forward message */ if (slurm_msg_sendto(fd, get_buf_data(buffer), get_buf_offset(buffer), SLURM_PROTOCOL_NO_SEND_RECV_FLAGS ) < 0) { error("forward_thread: slurm_msg_sendto: %m"); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } /* These messages don't have a return message, but if * we got here things worked out so make note of the * list of nodes as success. */ if ((fwd_msg->header.msg_type == REQUEST_SHUTDOWN) || (fwd_msg->header.msg_type == REQUEST_RECONFIGURE) || (fwd_msg->header.msg_type == REQUEST_REBOOT_NODES)) { slurm_mutex_lock(&fwd_struct->forward_mutex); ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); while ((name = hostlist_shift(hl))) { ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); } goto cleanup; } if (fwd_msg->header.forward.cnt > 0) { static int message_timeout = -1; if (message_timeout < 0) message_timeout = slurm_get_msg_timeout() * 1000; if (!fwd_msg->header.forward.tree_width) fwd_msg->header.forward.tree_width = slurm_get_tree_width(); steps = (fwd_msg->header.forward.cnt+1) / fwd_msg->header.forward.tree_width; fwd_msg->timeout = (message_timeout*steps); /* info("got %d * %d = %d", message_timeout, */ /* steps, fwd_msg->timeout); */ steps++; fwd_msg->timeout += (start_timeout*steps); /* info("now + %d*%d = %d", start_timeout, */ /* steps, fwd_msg->timeout); */ } ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout); /* info("sent %d forwards got %d back", */ /* fwd_msg->header.forward.cnt, list_count(ret_list)); */ if (!ret_list || (fwd_msg->header.forward.cnt != 0 && list_count(ret_list) <= 1)) { slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); FREE_NULL_LIST(ret_list); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; continue; } goto cleanup; } else if ((fwd_msg->header.forward.cnt+1) != list_count(ret_list)) { /* this should never be called since the above should catch the failed forwards and pipe them back down, but this is here so we never have to worry about a locked mutex */ ListIterator itr = NULL; char *tmp = NULL; int first_node_found = 0; hostlist_iterator_t host_itr = hostlist_iterator_create(hl); error("We shouldn't be here. We forwarded to %d " "but only got %d back", (fwd_msg->header.forward.cnt+1), list_count(ret_list)); while ((tmp = hostlist_next(host_itr))) { int node_found = 0; itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr))) { if (!ret_data_info->node_name) { first_node_found = 1; ret_data_info->node_name = xstrdup(name); } if (!xstrcmp(tmp, ret_data_info->node_name)) { node_found = 1; break; } } list_iterator_destroy(itr); if (!node_found) { mark_as_failed_forward( &fwd_struct->ret_list, tmp, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } free(tmp); } hostlist_iterator_destroy(host_itr); if (!first_node_found) { mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } } break; } slurm_mutex_lock(&fwd_struct->forward_mutex); if (ret_list) { while ((ret_data_info = list_pop(ret_list)) != NULL) { if (!ret_data_info->node_name) { ret_data_info->node_name = xstrdup(name); } list_push(fwd_struct->ret_list, ret_data_info); debug3("got response from %s", ret_data_info->node_name); } FREE_NULL_LIST(ret_list); } free(name); cleanup: if ((fd >= 0) && slurm_close(fd) < 0) error ("close(%d): %m", fd); hostlist_destroy(hl); destroy_forward(&fwd_msg->header.forward); free_buf(buffer); slurm_cond_signal(&fwd_struct->notify); slurm_mutex_unlock(&fwd_struct->forward_mutex); xfree(fwd_msg); return (NULL); }
static void _start_msg_tree_internal(hostlist_t hl, hostlist_t* sp_hl, fwd_tree_t *fwd_tree_in, int hl_count) { int j; fwd_tree_t *fwd_tree; xassert((hl || sp_hl) && !(hl && sp_hl)); xassert(fwd_tree_in); xassert(fwd_tree_in->p_thr_count); xassert(fwd_tree_in->tree_mutex); xassert(fwd_tree_in->notify); xassert(fwd_tree_in->ret_list); if (hl) xassert(hl_count == hostlist_count(hl)); if (fwd_tree_in->timeout <= 0) /* convert secs to msec */ fwd_tree_in->timeout = slurm_get_msg_timeout() * 1000; for (j = 0; j < hl_count; j++) { pthread_attr_t attr_agent; pthread_t thread_agent; int retries = 0; slurm_attr_init(&attr_agent); if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); fwd_tree = xmalloc(sizeof(fwd_tree_t)); memcpy(fwd_tree, fwd_tree_in, sizeof(fwd_tree_t)); if (sp_hl) { fwd_tree->tree_hl = sp_hl[j]; sp_hl[j] = NULL; } else if (hl) { char *name = hostlist_shift(hl); fwd_tree->tree_hl = hostlist_create(name); free(name); } /* * Lock and increase thread counter, we need that to protect * the start_msg_tree waiting loop that was originally designed * around a "while ((count < host_count))" loop. In case where a * fwd thread was not able to get all the return codes from * children, the waiting loop was deadlocked. */ slurm_mutex_lock(fwd_tree->tree_mutex); (*fwd_tree->p_thr_count)++; slurm_mutex_unlock(fwd_tree->tree_mutex); while (pthread_create(&thread_agent, &attr_agent, _fwd_tree_thread, (void *)fwd_tree)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); usleep(100000); /* sleep and try again */ } slurm_attr_destroy(&attr_agent); } }
/* * start_msg_tree - logic to begin the forward tree and * accumulate the return codes from processes getting the * the forwarded message * * IN: hl - hostlist_t - list of every node to send message to * IN: msg - slurm_msg_t - message to send. * IN: timeout - int - how long to wait in milliseconds. * RET List - List containing the responses of the childern * (if any) we forwarded the message to. List * containing type (ret_data_info_t). */ extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout) { int *span = NULL; fwd_tree_t *fwd_tree = NULL; pthread_mutex_t tree_mutex; pthread_cond_t notify; int j = 0, count = 0; List ret_list = NULL; char *name = NULL; int thr_count = 0; int host_count = 0; xassert(hl); xassert(msg); hostlist_uniq(hl); host_count = hostlist_count(hl); span = set_span(host_count, 0); slurm_mutex_init(&tree_mutex); pthread_cond_init(¬ify, NULL); ret_list = list_create(destroy_data_info); while ((name = hostlist_shift(hl))) { pthread_attr_t attr_agent; pthread_t thread_agent; int retries = 0; slurm_attr_init(&attr_agent); if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); fwd_tree = xmalloc(sizeof(fwd_tree_t)); fwd_tree->orig_msg = msg; fwd_tree->ret_list = ret_list; fwd_tree->timeout = timeout; fwd_tree->notify = ¬ify; fwd_tree->p_thr_count = &thr_count; fwd_tree->tree_mutex = &tree_mutex; if (fwd_tree->timeout <= 0) { /* convert secs to msec */ fwd_tree->timeout = slurm_get_msg_timeout() * 1000; } fwd_tree->tree_hl = hostlist_create(name); free(name); for (j = 0; j < span[thr_count]; j++) { name = hostlist_shift(hl); if (!name) break; hostlist_push(fwd_tree->tree_hl, name); free(name); } /* * Lock and increase thread counter, we need that to protect * the start_msg_tree waiting loop that was originally designed * around a "while ((count < host_count))" loop. In case where a * fwd thread was not able to get all the return codes from * children, the waiting loop was deadlocked. */ slurm_mutex_lock(&tree_mutex); thr_count++; slurm_mutex_unlock(&tree_mutex); while (pthread_create(&thread_agent, &attr_agent, _fwd_tree_thread, (void *)fwd_tree)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); sleep(1); /* sleep and try again */ } slurm_attr_destroy(&attr_agent); } xfree(span); slurm_mutex_lock(&tree_mutex); count = list_count(ret_list); debug2("Tree head got back %d looking for %d", count, host_count); while (thr_count > 0) { pthread_cond_wait(¬ify, &tree_mutex); count = list_count(ret_list); debug2("Tree head got back %d", count); } xassert(count >= host_count); /* Tree head did not get all responses, * but no more active fwd threads!*/ slurm_mutex_unlock(&tree_mutex); slurm_mutex_destroy(&tree_mutex); pthread_cond_destroy(¬ify); return ret_list; }
/* * forward_msg - logic to forward a message which has been received and * accumulate the return codes from processes getting the * the forwarded message * * IN: forward_struct - forward_struct_t * - holds information about message * that needs to be forwarded to * childern processes * IN: header - header_t - header from message that came in * needing to be forwarded. * RET: SLURM_SUCCESS - int */ extern int forward_msg(forward_struct_t *forward_struct, header_t *header) { int j = 0; int retries = 0; forward_msg_t *forward_msg = NULL; int thr_count = 0; int *span = set_span(header->forward.cnt, 0); hostlist_t hl = NULL; hostlist_t forward_hl = NULL; char *name = NULL; if (!forward_struct->ret_list) { error("didn't get a ret_list from forward_struct"); xfree(span); return SLURM_ERROR; } hl = hostlist_create(header->forward.nodelist); hostlist_uniq(hl); while ((name = hostlist_shift(hl))) { pthread_attr_t attr_agent; pthread_t thread_agent; char *buf = NULL; slurm_attr_init(&attr_agent); if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); forward_msg = &forward_struct->forward_msg[thr_count]; forward_msg->ret_list = forward_struct->ret_list; forward_msg->timeout = forward_struct->timeout; if (forward_msg->timeout <= 0) { /* convert secs to msec */ forward_msg->timeout = slurm_get_msg_timeout() * 1000; } forward_msg->notify = &forward_struct->notify; forward_msg->forward_mutex = &forward_struct->forward_mutex; forward_msg->buf_len = forward_struct->buf_len; forward_msg->buf = forward_struct->buf; memcpy(&forward_msg->header.orig_addr, &header->orig_addr, sizeof(slurm_addr_t)); forward_msg->header.version = header->version; forward_msg->header.flags = header->flags; forward_msg->header.msg_type = header->msg_type; forward_msg->header.body_length = header->body_length; forward_msg->header.ret_list = NULL; forward_msg->header.ret_cnt = 0; forward_hl = hostlist_create(name); free(name); for(j = 0; j < span[thr_count]; j++) { name = hostlist_shift(hl); if (!name) break; hostlist_push(forward_hl, name); free(name); } buf = hostlist_ranged_string_xmalloc(forward_hl); hostlist_destroy(forward_hl); forward_init(&forward_msg->header.forward, NULL); forward_msg->header.forward.nodelist = buf; while (pthread_create(&thread_agent, &attr_agent, _forward_thread, (void *)forward_msg)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); sleep(1); /* sleep and try again */ } slurm_attr_destroy(&attr_agent); thr_count++; } hostlist_destroy(hl); xfree(span); return SLURM_SUCCESS; }
/* Open a connection to the Slurm DBD and set slurmdbd_conn */ static void _open_slurmdbd_conn(bool need_db) { bool try_backup = true; int rc; if (slurmdbd_conn && slurmdbd_conn->fd >= 0) { debug("Attempt to re-open slurmdbd socket"); /* clear errno (checked after this for errors) */ errno = 0; return; } slurm_persist_conn_close(slurmdbd_conn); if (!slurmdbd_conn) { slurmdbd_conn = xmalloc(sizeof(slurm_persist_conn_t)); slurmdbd_conn->flags = PERSIST_FLAG_DBD | PERSIST_FLAG_RECONNECT; slurmdbd_conn->persist_type = PERSIST_TYPE_DBD; if (!slurmdbd_cluster) slurmdbd_cluster = slurm_get_cluster_name(); slurmdbd_conn->cluster_name = xstrdup(slurmdbd_cluster); slurmdbd_conn->timeout = (slurm_get_msg_timeout() + 35) * 1000; slurmdbd_conn->rem_port = slurm_get_accounting_storage_port(); if (!slurmdbd_conn->rem_port) { slurmdbd_conn->rem_port = SLURMDBD_PORT; slurm_set_accounting_storage_port( slurmdbd_conn->rem_port); } } slurmdbd_shutdown = 0; slurmdbd_conn->shutdown = &slurmdbd_shutdown; slurmdbd_conn->version = SLURM_PROTOCOL_VERSION; xfree(slurmdbd_conn->rem_host); slurmdbd_conn->rem_host = slurm_get_accounting_storage_host(); if (!slurmdbd_conn->rem_host) { slurmdbd_conn->rem_host = xstrdup(DEFAULT_STORAGE_HOST); slurm_set_accounting_storage_host( slurmdbd_conn->rem_host); } again: if (((rc = slurm_persist_conn_open(slurmdbd_conn)) != SLURM_SUCCESS) && try_backup) { xfree(slurmdbd_conn->rem_host); try_backup = false; if ((slurmdbd_conn->rem_host = slurm_get_accounting_storage_backup_host())) goto again; } if (rc == SLURM_SUCCESS) { /* set the timeout to the timeout to be used for all other * messages */ slurmdbd_conn->timeout = SLURMDBD_TIMEOUT * 1000; if (slurmdbd_conn->trigger_callbacks.dbd_resumed) (slurmdbd_conn->trigger_callbacks.dbd_resumed)(); if (slurmdbd_conn->trigger_callbacks.db_resumed) (slurmdbd_conn->trigger_callbacks.db_resumed)(); } if ((!need_db && (rc == ESLURM_DB_CONNECTION)) || (rc == SLURM_SUCCESS)) { debug("slurmdbd: Sent PersistInit msg"); /* clear errno (checked after this for errors) */ errno = 0; } else { if ((rc == ESLURM_DB_CONNECTION) && slurmdbd_conn->trigger_callbacks.db_fail) (slurmdbd_conn->trigger_callbacks.db_fail)(); error("slurmdbd: Sending PersistInit msg: %m"); slurm_persist_conn_close(slurmdbd_conn); } }
static int _open_controller_conn(slurmdb_cluster_rec_t *cluster, bool locked) { int rc; slurm_persist_conn_t *persist_conn = NULL; static int timeout = -1; if (timeout < 0) timeout = slurm_get_msg_timeout() * 1000; if (cluster == fed_mgr_cluster_rec) { info("%s: hey! how did we get here with ourselves?", __func__); return SLURM_ERROR; } if (!locked) slurm_mutex_lock(&cluster->lock); if (!cluster->control_host || !cluster->control_host[0] || !cluster->control_port) { if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("%s: Sibling cluster %s doesn't appear up yet, skipping", __func__, cluster->name); if (!locked) slurm_mutex_unlock(&cluster->lock); return SLURM_ERROR; } if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("opening sibling conn to %s", cluster->name); if (!cluster->fed.send) { persist_conn = xmalloc(sizeof(slurm_persist_conn_t)); cluster->fed.send = persist_conn; /* Since this connection is coming from us, make it so ;) */ persist_conn->cluster_name = xstrdup(slurmctld_cluster_name); persist_conn->my_port = slurmctld_conf.slurmctld_port; persist_conn->rem_host = xstrdup(cluster->control_host); persist_conn->rem_port = cluster->control_port; persist_conn->shutdown = &slurmctld_config.shutdown_time; persist_conn->timeout = timeout; /* don't put this as 0 it * could cause deadlock */ } else { persist_conn = cluster->fed.send; /* Perhaps a backup came up, so don't assume it was the same * host or port we had before. */ xfree(persist_conn->rem_host); persist_conn->rem_host = xstrdup(cluster->control_host); persist_conn->rem_port = cluster->control_port; } rc = slurm_persist_conn_open(persist_conn); if (rc != SLURM_SUCCESS) { error("fed_mgr: Unable to open connection to cluster %s using host %s(%u)", cluster->name, persist_conn->rem_host, persist_conn->rem_port); } else if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("opened sibling conn to %s:%d", cluster->name, persist_conn->fd); if (!locked) slurm_mutex_unlock(&cluster->lock); return rc; }
static void _forward_msg_internal(hostlist_t hl, hostlist_t* sp_hl, forward_struct_t *fwd_struct, header_t *header, int timeout, int hl_count) { int j; forward_msg_t *fwd_msg = NULL; char *buf = NULL, *tmp_char = NULL; pthread_attr_t attr_agent; pthread_t thread_agent; if (timeout <= 0) /* convert secs to msec */ timeout = slurm_get_msg_timeout() * 1000; for (j = 0; j < hl_count; j++) { int retries = 0; slurm_attr_init(&attr_agent); if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); fwd_msg = xmalloc(sizeof(forward_msg_t)); fwd_msg->fwd_struct = fwd_struct; fwd_msg->timeout = timeout; memcpy(&fwd_msg->header.orig_addr, &header->orig_addr, sizeof(slurm_addr_t)); fwd_msg->header.version = header->version; fwd_msg->header.flags = header->flags; fwd_msg->header.msg_type = header->msg_type; fwd_msg->header.body_length = header->body_length; fwd_msg->header.ret_list = NULL; fwd_msg->header.ret_cnt = 0; if (sp_hl) { buf = hostlist_ranged_string_xmalloc(sp_hl[j]); hostlist_destroy(sp_hl[j]); } else { tmp_char = hostlist_shift(hl); buf = xstrdup(tmp_char); free(tmp_char); } forward_init(&fwd_msg->header.forward, NULL); fwd_msg->header.forward.nodelist = buf; while (pthread_create(&thread_agent, &attr_agent, _forward_thread, (void *)fwd_msg)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); usleep(100000); /* sleep and try again */ } slurm_attr_destroy(&attr_agent); } }
/* * agent - party responsible for transmitting an common RPC in parallel * across a set of nodes. Use agent_queue_request() if immediate * execution is not essential. * IN pointer to agent_arg_t, which is xfree'd (including hostlist, * and msg_args) upon completion * RET always NULL (function format just for use as pthread) */ void *agent(void *args) { int i, delay, rc, retries = 0; pthread_attr_t attr_wdog; pthread_t thread_wdog; agent_arg_t *agent_arg_ptr = args; agent_info_t *agent_info_ptr = NULL; thd_t *thread_ptr; task_info_t *task_specific_ptr; time_t begin_time; #if 0 info("Agent_cnt is %d of %d with msg_type %d", agent_cnt, MAX_AGENT_CNT, agent_arg_ptr->msg_type); #endif slurm_mutex_lock(&agent_cnt_mutex); if (!wiki2_sched_test) { char *sched_type = slurm_get_sched_type(); if (strcmp(sched_type, "sched/wiki2") == 0) wiki2_sched = true; xfree(sched_type); wiki2_sched_test = true; } while (1) { if (slurmctld_config.shutdown_time || (agent_cnt < MAX_AGENT_CNT)) { agent_cnt++; break; } else { /* wait for state change and retry */ pthread_cond_wait(&agent_cnt_cond, &agent_cnt_mutex); } } slurm_mutex_unlock(&agent_cnt_mutex); if (slurmctld_config.shutdown_time) goto cleanup; /* basic argument value tests */ begin_time = time(NULL); if (_valid_agent_arg(agent_arg_ptr)) goto cleanup; /* initialize the agent data structures */ agent_info_ptr = _make_agent_info(agent_arg_ptr); thread_ptr = agent_info_ptr->thread_struct; /* start the watchdog thread */ slurm_attr_init(&attr_wdog); if (pthread_attr_setdetachstate (&attr_wdog, PTHREAD_CREATE_JOINABLE)) error("pthread_attr_setdetachstate error %m"); while (pthread_create(&thread_wdog, &attr_wdog, _wdog, (void *) agent_info_ptr)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); usleep(10000); /* sleep and retry */ } slurm_attr_destroy(&attr_wdog); #if AGENT_THREAD_COUNT < 1 fatal("AGENT_THREAD_COUNT value is invalid"); #endif debug2("got %d threads to send out",agent_info_ptr->thread_count); /* start all the other threads (up to AGENT_THREAD_COUNT active) */ for (i = 0; i < agent_info_ptr->thread_count; i++) { /* wait until "room" for another thread */ slurm_mutex_lock(&agent_info_ptr->thread_mutex); while (agent_info_ptr->threads_active >= AGENT_THREAD_COUNT) { pthread_cond_wait(&agent_info_ptr->thread_cond, &agent_info_ptr->thread_mutex); } /* create thread specific data, NOTE: freed from * _thread_per_group_rpc() */ task_specific_ptr = _make_task_data(agent_info_ptr, i); slurm_attr_init(&thread_ptr[i].attr); if (pthread_attr_setdetachstate(&thread_ptr[i].attr, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); while ((rc = pthread_create(&thread_ptr[i].thread, &thread_ptr[i].attr, _thread_per_group_rpc, (void *) task_specific_ptr))) { error("pthread_create error %m"); if (agent_info_ptr->threads_active) pthread_cond_wait(&agent_info_ptr-> thread_cond, &agent_info_ptr-> thread_mutex); else { slurm_mutex_unlock(&agent_info_ptr-> thread_mutex); usleep(10000); /* sleep and retry */ slurm_mutex_lock(&agent_info_ptr-> thread_mutex); } } slurm_attr_destroy(&thread_ptr[i].attr); agent_info_ptr->threads_active++; slurm_mutex_unlock(&agent_info_ptr->thread_mutex); } /* wait for termination of remaining threads */ pthread_join(thread_wdog, NULL); delay = (int) difftime(time(NULL), begin_time); if (delay > (slurm_get_msg_timeout() * 2)) { info("agent msg_type=%u ran for %d seconds", agent_arg_ptr->msg_type, delay); } slurm_mutex_lock(&agent_info_ptr->thread_mutex); while (agent_info_ptr->threads_active != 0) { pthread_cond_wait(&agent_info_ptr->thread_cond, &agent_info_ptr->thread_mutex); } slurm_mutex_unlock(&agent_info_ptr->thread_mutex); cleanup: _purge_agent_args(agent_arg_ptr); if (agent_info_ptr) { xfree(agent_info_ptr->thread_struct); xfree(agent_info_ptr); } slurm_mutex_lock(&agent_cnt_mutex); if (agent_cnt > 0) agent_cnt--; else { error("agent_cnt underflow"); agent_cnt = 0; } if (agent_cnt && agent_cnt < MAX_AGENT_CNT) agent_retry(RPC_RETRY_INTERVAL, true); pthread_cond_broadcast(&agent_cnt_cond); slurm_mutex_unlock(&agent_cnt_mutex); return NULL; }
extern void msg_aggr_add_msg(slurm_msg_t *msg, bool wait, void (*resp_callback) (slurm_msg_t *msg)) { int count; static uint16_t msg_index = 1; static uint32_t wait_count = 0; if (!msg_collection.running) return; slurm_mutex_lock(&msg_collection.mutex); if (msg_collection.max_msgs == true) { slurm_cond_wait(&msg_collection.cond, &msg_collection.mutex); } msg->msg_index = msg_index++; /* Add msg to message collection */ list_append(msg_collection.msg_list, msg); count = list_count(msg_collection.msg_list); /* First msg in collection; initiate new window */ if (count == 1) slurm_cond_signal(&msg_collection.cond); /* Max msgs reached; terminate window */ if (count >= msg_collection.max_msg_cnt) { msg_collection.max_msgs = true; slurm_cond_signal(&msg_collection.cond); } slurm_mutex_unlock(&msg_collection.mutex); if (wait) { msg_aggr_t *msg_aggr = xmalloc(sizeof(msg_aggr_t)); uint16_t msg_timeout; struct timeval now; struct timespec timeout; msg_aggr->msg_index = msg->msg_index; msg_aggr->resp_callback = resp_callback; slurm_cond_init(&msg_aggr->wait_cond, NULL); slurm_mutex_lock(&msg_collection.aggr_mutex); list_append(msg_collection.msg_aggr_list, msg_aggr); msg_timeout = slurm_get_msg_timeout(); gettimeofday(&now, NULL); timeout.tv_sec = now.tv_sec + msg_timeout; timeout.tv_nsec = now.tv_usec * 1000; wait_count++; if (pthread_cond_timedwait(&msg_aggr->wait_cond, &msg_collection.aggr_mutex, &timeout) == ETIMEDOUT) _handle_msg_aggr_ret(msg_aggr->msg_index, 1); wait_count--; slurm_mutex_unlock(&msg_collection.aggr_mutex); ; if (!msg_collection.running && !wait_count) slurm_mutex_destroy(&msg_collection.aggr_mutex); _msg_aggr_free(msg_aggr); } }
static int _attempt_backfill(void) { bool filter_root = false; List job_queue; job_queue_rec_t *job_queue_rec; slurmdb_qos_rec_t *qos_ptr = NULL; int i, j, node_space_recs; struct job_record *job_ptr; struct part_record *part_ptr; uint32_t end_time, end_reserve; uint32_t time_limit, comp_time_limit, orig_time_limit; uint32_t min_nodes, max_nodes, req_nodes; bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL; time_t now = time(NULL), sched_start, later_start, start_res; node_space_map_t *node_space; static int sched_timeout = 0; int this_sched_timeout = 0, rc = 0; sched_start = now; if (sched_timeout == 0) { sched_timeout = slurm_get_msg_timeout() / 2; sched_timeout = MAX(sched_timeout, 1); sched_timeout = MIN(sched_timeout, 10); } this_sched_timeout = sched_timeout; #ifdef HAVE_CRAY /* * Run a Basil Inventory immediately before setting up the schedule * plan, to avoid race conditions caused by ALPS node state change. * Needs to be done with the node-state lock taken. */ if (select_g_reconfigure()) { debug4("backfill: not scheduling due to ALPS"); return SLURM_SUCCESS; } #endif if (slurm_get_root_filter()) filter_root = true; job_queue = build_job_queue(true); if (list_count(job_queue) <= 1) { debug("backfill: no jobs to backfill"); list_destroy(job_queue); return 0; } node_space = xmalloc(sizeof(node_space_map_t) * (max_backfill_job_cnt + 3)); node_space[0].begin_time = sched_start; node_space[0].end_time = sched_start + backfill_window; node_space[0].avail_bitmap = bit_copy(avail_node_bitmap); node_space[0].next = 0; node_space_recs = 1; if (debug_flags & DEBUG_FLAG_BACKFILL) _dump_node_space_table(node_space); while ((job_queue_rec = (job_queue_rec_t *) list_pop_bottom(job_queue, sort_job_queue2))) { job_ptr = job_queue_rec->job_ptr; part_ptr = job_queue_rec->part_ptr; xfree(job_queue_rec); if (!IS_JOB_PENDING(job_ptr)) continue; /* started in other partition */ job_ptr->part_ptr = part_ptr; if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill test for job %u", job_ptr->job_id); if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) || (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) || (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT) || (job_ptr->state_reason == WAIT_QOS_JOB_LIMIT) || (job_ptr->state_reason == WAIT_QOS_RESOURCE_LIMIT) || (job_ptr->state_reason == WAIT_QOS_TIME_LIMIT) || !acct_policy_job_runnable(job_ptr)) { debug2("backfill: job %u is not allowed to run now. " "Skipping it. State=%s. Reason=%s. Priority=%u", job_ptr->job_id, job_state_string(job_ptr->job_state), job_reason_string(job_ptr->state_reason), job_ptr->priority); continue; } if (((part_ptr->state_up & PARTITION_SCHED) == 0) || (part_ptr->node_bitmap == NULL)) continue; if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && filter_root) continue; if ((!job_independent(job_ptr, 0)) || (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS)) continue; /* Determine minimum and maximum node counts */ min_nodes = MAX(job_ptr->details->min_nodes, part_ptr->min_nodes); if (job_ptr->details->max_nodes == 0) max_nodes = part_ptr->max_nodes; else max_nodes = MIN(job_ptr->details->max_nodes, part_ptr->max_nodes); max_nodes = MIN(max_nodes, 500000); /* prevent overflows */ if (job_ptr->details->max_nodes) req_nodes = max_nodes; else req_nodes = min_nodes; if (min_nodes > max_nodes) { /* job's min_nodes exceeds partition's max_nodes */ continue; } /* Determine job's expected completion time */ if (job_ptr->time_limit == NO_VAL) { if (part_ptr->max_time == INFINITE) time_limit = 365 * 24 * 60; /* one year */ else time_limit = part_ptr->max_time; } else { if (part_ptr->max_time == INFINITE) time_limit = job_ptr->time_limit; else time_limit = MIN(job_ptr->time_limit, part_ptr->max_time); } comp_time_limit = time_limit; orig_time_limit = job_ptr->time_limit; if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) time_limit = job_ptr->time_limit = 1; else if (job_ptr->time_min && (job_ptr->time_min < time_limit)) time_limit = job_ptr->time_limit = job_ptr->time_min; /* Determine impact of any resource reservations */ later_start = now; TRY_LATER: FREE_NULL_BITMAP(avail_bitmap); start_res = later_start; later_start = 0; j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap); if (j != SLURM_SUCCESS) { job_ptr->time_limit = orig_time_limit; continue; } if (start_res > now) end_time = (time_limit * 60) + start_res; else end_time = (time_limit * 60) + now; /* Identify usable nodes for this job */ bit_and(avail_bitmap, part_ptr->node_bitmap); bit_and(avail_bitmap, up_node_bitmap); for (j=0; ; ) { if ((node_space[j].end_time > start_res) && node_space[j].next && (later_start == 0)) later_start = node_space[j].end_time; if (node_space[j].end_time <= start_res) ; else if (node_space[j].begin_time <= end_time) { bit_and(avail_bitmap, node_space[j].avail_bitmap); } else break; if ((j = node_space[j].next) == 0) break; } if (job_ptr->details->exc_node_bitmap) { bit_not(job_ptr->details->exc_node_bitmap); bit_and(avail_bitmap, job_ptr->details->exc_node_bitmap); bit_not(job_ptr->details->exc_node_bitmap); } /* Test if insufficient nodes remain OR * required nodes missing OR * nodes lack features */ if ((bit_set_count(avail_bitmap) < min_nodes) || ((job_ptr->details->req_node_bitmap) && (!bit_super_set(job_ptr->details->req_node_bitmap, avail_bitmap))) || (job_req_node_filter(job_ptr, avail_bitmap))) { if (later_start) { job_ptr->start_time = 0; goto TRY_LATER; } job_ptr->time_limit = orig_time_limit; continue; } /* Identify nodes which are definitely off limits */ FREE_NULL_BITMAP(resv_bitmap); resv_bitmap = bit_copy(avail_bitmap); bit_not(resv_bitmap); if ((time(NULL) - sched_start) >= this_sched_timeout) { debug("backfill: loop taking too long, yielding locks"); if (_yield_locks()) { debug("backfill: system state changed, " "breaking out"); rc = 1; break; } else { this_sched_timeout += sched_timeout; } } /* this is the time consuming operation */ debug2("backfill: entering _try_sched for job %u.", job_ptr->job_id); j = _try_sched(job_ptr, &avail_bitmap, min_nodes, max_nodes, req_nodes); debug2("backfill: finished _try_sched for job %u.", job_ptr->job_id); now = time(NULL); if (j != SLURM_SUCCESS) { job_ptr->time_limit = orig_time_limit; job_ptr->start_time = 0; continue; /* not runable */ } if (start_res > job_ptr->start_time) { job_ptr->start_time = start_res; last_job_update = now; } if (job_ptr->start_time <= now) { int rc = _start_job(job_ptr, resv_bitmap); if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) job_ptr->time_limit = orig_time_limit; else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) { /* Set time limit as high as possible */ job_ptr->time_limit = comp_time_limit; job_ptr->end_time = job_ptr->start_time + (comp_time_limit * 60); _reset_job_time_limit(job_ptr, now, node_space); time_limit = job_ptr->time_limit; } else { job_ptr->time_limit = orig_time_limit; } if (rc == ESLURM_ACCOUNTING_POLICY) { /* Unknown future start time, just skip job */ job_ptr->start_time = 0; continue; } else if (rc != SLURM_SUCCESS) { /* Planned to start job, but something bad * happended. */ job_ptr->start_time = 0; break; } else { /* Started this job, move to next one */ continue; } } else job_ptr->time_limit = orig_time_limit; if (later_start && (job_ptr->start_time > later_start)) { /* Try later when some nodes currently reserved for * pending jobs are free */ job_ptr->start_time = 0; goto TRY_LATER; } if (job_ptr->start_time > (sched_start + backfill_window)) { /* Starts too far in the future to worry about */ continue; } if (node_space_recs >= max_backfill_job_cnt) { /* Already have too many jobs to deal with */ break; } end_reserve = job_ptr->start_time + (time_limit * 60); if (_test_resv_overlap(node_space, avail_bitmap, job_ptr->start_time, end_reserve)) { /* This job overlaps with an existing reservation for * job to be backfill scheduled, which the sched * plugin does not know about. Try again later. */ later_start = job_ptr->start_time; job_ptr->start_time = 0; goto TRY_LATER; } /* * Add reservation to scheduling table if appropriate */ qos_ptr = job_ptr->qos_ptr; if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) continue; bit_not(avail_bitmap); _add_reservation(job_ptr->start_time, end_reserve, avail_bitmap, node_space, &node_space_recs); if (debug_flags & DEBUG_FLAG_BACKFILL) _dump_node_space_table(node_space); } FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(resv_bitmap); for (i=0; ; ) { FREE_NULL_BITMAP(node_space[i].avail_bitmap); if ((i = node_space[i].next) == 0) break; } xfree(node_space); list_destroy(job_queue); return rc; }