/* * bitmap2node_name_sortable - given a bitmap, build a list of comma * separated node names. names may include regular expressions * (e.g. "lx[01-10]") * IN bitmap - bitmap pointer * IN sort - returned sorted list or not * RET pointer to node list or NULL on error * globals: node_record_table_ptr - pointer to node table * NOTE: the caller must xfree the memory at node_list when no longer required */ char * bitmap2node_name_sortable (bitstr_t *bitmap, bool sort) { int i, first, last; hostlist_t hl; char *buf; if (bitmap == NULL) return xstrdup(""); first = bit_ffs(bitmap); if (first == -1) return xstrdup(""); last = bit_fls(bitmap); hl = hostlist_create(""); for (i = first; i <= last; i++) { if (bit_test(bitmap, i) == 0) continue; hostlist_push(hl, node_record_table_ptr[i].name); } if (sort) hostlist_sort(hl); buf = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); return buf; }
extern void select_admin_front_end(GtkTreeModel *model, GtkTreeIter *iter, display_data_t *display_data, GtkTreeView *treeview) { if (treeview) { char *node_list; hostlist_t hl = NULL; front_end_user_data_t user_data; memset(&user_data, 0, sizeof(front_end_user_data_t)); gtk_tree_selection_selected_foreach( gtk_tree_view_get_selection(treeview), _process_each_front_end, &user_data); hl = hostlist_create(user_data.node_list); hostlist_uniq(hl); hostlist_sort(hl); xfree(user_data.node_list); node_list = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); _admin_front_end(model, iter, display_data->name, node_list); xfree(node_list); } }
extern void scontrol_print_completing_job(job_info_t *job_ptr, node_info_msg_t *node_info_msg) { int i, c_offset = 0; node_info_t *node_info; hostlist_t comp_nodes, down_nodes; char *node_buf; comp_nodes = hostlist_create(NULL); down_nodes = hostlist_create(NULL); if (job_ptr->cluster && federation_flag && !local_flag) c_offset = get_cluster_node_offset(job_ptr->cluster, node_info_msg); for (i = 0; job_ptr->node_inx[i] != -1; i+=2) { int j = job_ptr->node_inx[i]; for (; j <= job_ptr->node_inx[i+1]; j++) { int node_inx = j + c_offset; if (node_inx >= node_info_msg->record_count) break; node_info = &(node_info_msg->node_array[node_inx]); if (IS_NODE_COMPLETING(node_info)) hostlist_push_host(comp_nodes, node_info->name); else if (IS_NODE_DOWN(node_info)) hostlist_push_host(down_nodes, node_info->name); } } fprintf(stdout, "JobId=%u ", job_ptr->job_id); node_buf = hostlist_ranged_string_xmalloc(comp_nodes); if (node_buf && node_buf[0]) fprintf(stdout, "Nodes(COMPLETING)=%s ", node_buf); xfree(node_buf); node_buf = hostlist_ranged_string_xmalloc(down_nodes); if (node_buf && node_buf[0]) fprintf(stdout, "Nodes(DOWN)=%s ", node_buf); xfree(node_buf); fprintf(stdout, "\n"); hostlist_destroy(comp_nodes); hostlist_destroy(down_nodes); }
/* Update acct_gather data for every node that is not DOWN */ extern void update_nodes_acct_gather_data(void) { #ifdef HAVE_FRONT_END front_end_record_t *front_end_ptr; #else struct node_record *node_ptr; #endif int i; char *host_str = NULL; agent_arg_t *agent_args = NULL; agent_args = xmalloc (sizeof (agent_arg_t)); agent_args->msg_type = REQUEST_ACCT_GATHER_UPDATE; agent_args->retry = 0; agent_args->protocol_version = SLURM_PROTOCOL_VERSION; agent_args->hostlist = hostlist_create(NULL); #ifdef HAVE_FRONT_END for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { if (IS_NODE_NO_RESPOND(front_end_ptr)) continue; if (agent_args->protocol_version > front_end_ptr->protocol_version) agent_args->protocol_version = front_end_ptr->protocol_version; hostlist_push_host(agent_args->hostlist, front_end_ptr->name); agent_args->node_count++; } #else for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; i++, node_ptr++) { if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) || IS_NODE_POWER_SAVE(node_ptr)) continue; if (agent_args->protocol_version > node_ptr->protocol_version) agent_args->protocol_version = node_ptr->protocol_version; hostlist_push_host(agent_args->hostlist, node_ptr->name); agent_args->node_count++; } #endif if (agent_args->node_count == 0) { hostlist_destroy(agent_args->hostlist); xfree (agent_args); } else { hostlist_uniq(agent_args->hostlist); host_str = hostlist_ranged_string_xmalloc(agent_args->hostlist); if (slurmctld_conf.debug_flags & DEBUG_FLAG_ENERGY) info("Updating acct_gather data for %s", host_str); xfree(host_str); ping_begin(); agent_queue_request(agent_args); } }
/* * slurm_step_layout_create - determine how many tasks of a job will be * run on each node. Distribution is influenced * by number of cpus on each host. * IN tlist - hostlist corresponding to task layout * IN cpus_per_node - cpus per node * IN cpu_count_reps - how many nodes have same cpu count * IN num_hosts - number of hosts we have * IN num_tasks - number of tasks to distribute across these cpus * IN cpus_per_task - number of cpus per task * IN task_dist - type of distribution we are using * IN plane_size - plane size (only needed for the plane distribution) * RET a pointer to an slurm_step_layout_t structure * NOTE: allocates memory that should be xfreed by caller */ slurm_step_layout_t *slurm_step_layout_create( const char *tlist, uint16_t *cpus_per_node, uint32_t *cpu_count_reps, uint32_t num_hosts, uint32_t num_tasks, uint16_t cpus_per_task, uint16_t task_dist, uint16_t plane_size) { char *arbitrary_nodes = NULL; slurm_step_layout_t *step_layout = xmalloc(sizeof(slurm_step_layout_t)); uint32_t cluster_flags = slurmdb_setup_cluster_flags(); step_layout->task_dist = task_dist; if (task_dist == SLURM_DIST_ARBITRARY) { hostlist_t hl = NULL; char *buf = NULL; /* set the node list for the task layout later if user * supplied could be different that the job allocation */ arbitrary_nodes = xstrdup(tlist); hl = hostlist_create(tlist); hostlist_uniq(hl); buf = hostlist_ranged_string_xmalloc(hl); num_hosts = hostlist_count(hl); hostlist_destroy(hl); step_layout->node_list = buf; } else { step_layout->node_list = xstrdup(tlist); } step_layout->task_cnt = num_tasks; if (cluster_flags & CLUSTER_FLAG_FE) { /* Limited job step support on front-end systems. * All jobs execute through front-end on Blue Gene. * Normally we would not permit execution of job steps, * but can fake it by just allocating all tasks to * one of the allocated nodes. */ if ((cluster_flags & CLUSTER_FLAG_BG) || (cluster_flags & CLUSTER_FLAG_CRAY_A)) step_layout->node_cnt = num_hosts; else step_layout->node_cnt = 1; } else step_layout->node_cnt = num_hosts; if (_init_task_layout(step_layout, arbitrary_nodes, cpus_per_node, cpu_count_reps, cpus_per_task, task_dist, plane_size) != SLURM_SUCCESS) { slurm_step_layout_destroy(step_layout); step_layout = NULL; } xfree(arbitrary_nodes); return step_layout; }
extern int basil_node_ranking(struct node_record *node_array, int node_cnt) { enum basil_version version = get_basil_version(); struct basil_inventory *inv; struct basil_node *node; int rank_count = 0, i; hostlist_t hl = hostlist_create(NULL); bool bad_node = 0; inv = get_full_inventory(version); if (inv == NULL) /* FIXME: should retry here if the condition is transient */ fatal("failed to get BASIL %s ranking", bv_names_long[version]); else if (!inv->batch_total) fatal("system has no usable batch compute nodes"); debug("BASIL %s RANKING INVENTORY: %d/%d batch nodes", bv_names_long[version], inv->batch_avail, inv->batch_total); /* * Node ranking is based on a subset of the inventory: only nodes in * batch allocation mode which are up and not allocated. Assign a * 'NO_VAL' rank to all other nodes, which will translate as a very * high value, (unsigned)-2, to put those nodes last in the ranking. * The rest of the code must ensure that those nodes are never chosen. */ for (i = 0; i < node_cnt; i++) node_array[i].node_rank = NO_VAL; for (node = inv->f->node_head; node; node = node->next) { struct node_record *node_ptr; char tmp[50]; node_ptr = _find_node_by_basil_id(node->node_id); if (node_ptr == NULL) { error("nid%05u (%s node in state %s) not in slurm.conf", node->node_id, nam_noderole[node->role], nam_nodestate[node->state]); bad_node = 1; } else node_ptr->node_rank = inv->nodes_total - rank_count++; sprintf(tmp, "nid%05u", node->node_id); hostlist_push(hl, tmp); } free_inv(inv); if (bad_node) { hostlist_sort(hl); char *name = hostlist_ranged_string_xmalloc(hl); info("It appears your slurm.conf nodelist doesn't " "match the alps system. Here are the nodes alps knows " "about\n%s", name); } hostlist_destroy(hl); return SLURM_SUCCESS; }
int _print_nodes(char *nodes, int width, bool right, bool cut) { hostlist_t hl = hostlist_create(nodes); char *buf = NULL; int retval; buf = hostlist_ranged_string_xmalloc(hl); retval = _print_str(buf, width, right, false); xfree(buf); hostlist_destroy(hl); return retval; }
extern void scontrol_print_completing_job(job_info_t *job_ptr, node_info_msg_t *node_info_msg) { int i; node_info_t *node_info; hostlist_t all_nodes, comp_nodes, down_nodes; char *node_buf; all_nodes = hostlist_create(job_ptr->nodes); comp_nodes = hostlist_create(""); down_nodes = hostlist_create(""); for (i=0; i<node_info_msg->record_count; i++) { node_info = &(node_info_msg->node_array[i]); if (IS_NODE_COMPLETING(node_info) && (_in_node_bit_list(i, job_ptr->node_inx))) hostlist_push_host(comp_nodes, node_info->name); else if (IS_NODE_DOWN(node_info) && (hostlist_find(all_nodes, node_info->name) != -1)) hostlist_push_host(down_nodes, node_info->name); } fprintf(stdout, "JobId=%u ", job_ptr->job_id); node_buf = hostlist_ranged_string_xmalloc(comp_nodes); if (node_buf && node_buf[0]) fprintf(stdout, "Nodes(COMPLETING)=%s ", node_buf); xfree(node_buf); node_buf = hostlist_ranged_string_xmalloc(down_nodes); if (node_buf && node_buf[0]) fprintf(stdout, "Nodes(DOWN)=%s ", node_buf); xfree(node_buf); fprintf(stdout, "\n"); hostlist_destroy(all_nodes); hostlist_destroy(comp_nodes); hostlist_destroy(down_nodes); }
static char * _normalize_hostlist(const char *hostlist) { char *buf = NULL; hostlist_t hl = hostlist_create(hostlist); if (hl) buf = hostlist_ranged_string_xmalloc(hl); if (!hl || !buf) return xstrdup(hostlist); return buf; }
/* Spawn health check function for every node that is not DOWN */ extern void run_health_check(void) { #ifdef HAVE_FRONT_END front_end_record_t *front_end_ptr; #else struct node_record *node_ptr; #endif int i; char *host_str = NULL; agent_arg_t *check_agent_args = NULL; check_agent_args = xmalloc (sizeof (agent_arg_t)); check_agent_args->msg_type = REQUEST_HEALTH_CHECK; check_agent_args->retry = 0; check_agent_args->hostlist = hostlist_create(""); if (check_agent_args->hostlist == NULL) fatal("hostlist_create: malloc failure"); #ifdef HAVE_FRONT_END for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { if (IS_NODE_NO_RESPOND(front_end_ptr)) continue; hostlist_push(check_agent_args->hostlist, front_end_ptr->name); check_agent_args->node_count++; } #else for (i=0, node_ptr=node_record_table_ptr; i<node_record_count; i++, node_ptr++) { if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) || IS_NODE_POWER_SAVE(node_ptr)) continue; hostlist_push(check_agent_args->hostlist, node_ptr->name); check_agent_args->node_count++; } #endif if (check_agent_args->node_count == 0) { hostlist_destroy(check_agent_args->hostlist); xfree (check_agent_args); } else { hostlist_uniq(check_agent_args->hostlist); host_str = hostlist_ranged_string_xmalloc( check_agent_args->hostlist); debug("Spawning health check agent for %s", host_str); xfree(host_str); ping_begin(); agent_queue_request(check_agent_args); } }
/* * bitmap2node_name_sortable - given a bitmap, build a list of comma * separated node names. names may include regular expressions * (e.g. "lx[01-10]") * IN bitmap - bitmap pointer * IN sort - returned sorted list or not * RET pointer to node list or NULL on error * globals: node_record_table_ptr - pointer to node table * NOTE: the caller must xfree the memory at node_list when no longer required */ char * bitmap2node_name_sortable (bitstr_t *bitmap, bool sort) { hostlist_t hl; char *buf; hl = bitmap2hostlist (bitmap); if (hl == NULL) return xstrdup(""); if (sort) hostlist_sort(hl); buf = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); return buf; }
/* * scontrol_encode_hostlist - given a list of hostnames or the pathname * of a file containing hostnames, translate them into a hostlist * expression */ extern int scontrol_encode_hostlist(char *hostlist, bool sorted) { char *io_buf = NULL, *tmp_list, *ranged_string; int buf_size = 1024 * 1024; hostlist_t hl; if (!hostlist) { fprintf(stderr, "Hostlist is NULL\n"); return SLURM_ERROR; } if (hostlist[0] == '/') { ssize_t buf_read; int fd = open(hostlist, O_RDONLY); if (fd < 0) { fprintf(stderr, "Can not open %s\n", hostlist); return SLURM_ERROR; } io_buf = xmalloc(buf_size); buf_read = read(fd, io_buf, buf_size); close(fd); if (buf_read >= buf_size) { /* If over 1MB, the file is almost certainly invalid */ fprintf(stderr, "File %s is too large\n", hostlist); xfree(io_buf); return SLURM_ERROR; } io_buf[buf_read] = '\0'; _reformat_hostlist(io_buf); tmp_list = io_buf; } else tmp_list = hostlist; hl = hostlist_create(tmp_list); if (hl == NULL) { fprintf(stderr, "Invalid hostlist: %s\n", tmp_list); xfree(io_buf); return SLURM_ERROR; } if (sorted) hostlist_sort(hl); ranged_string = hostlist_ranged_string_xmalloc(hl); printf("%s\n", ranged_string); hostlist_destroy(hl); xfree(ranged_string); xfree(io_buf); return SLURM_SUCCESS; }
static void _set_node_field_size(List sinfo_list) { char *tmp = NULL; ListIterator i = list_iterator_create(sinfo_list); sinfo_data_t *current; int max_width = MIN_NODE_FIELD_SIZE, this_width = 0; while ((current = (sinfo_data_t *) list_next(i)) != NULL) { tmp = hostlist_ranged_string_xmalloc(current->nodes); this_width = strlen(tmp); xfree(tmp); max_width = MAX(max_width, this_width); } list_iterator_destroy(i); params.node_field_size = max_width; }
/* Append to buf a compact tasklist expression (e.g. "tux[0-1]*2") * Prepend ":" to expression as needed */ static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps) { char *host_str; char *tok, *sep; int i, in_bracket = 0, fini = 0; hostlist_uniq(*hl_tmp); host_str = hostlist_ranged_string_xmalloc(*hl_tmp); /* Note that host_str may be of this form "alpha,beta". We want * to record this as "alpha*#:beta*#" and NOT "alpha,beta*#". * NOTE: Do not break up command within brackets (e.g. "tux[1,2-4]") */ if (*buf) sep = ":"; else sep = ""; tok = host_str; for (i=0; fini == 0; i++) { switch (tok[i]) { case '[': in_bracket = 1; break; case ']': in_bracket = 0; break; case '\0': fini = 1; if (in_bracket) error("badly formed hostlist %s", tok); case ',': if (in_bracket) break; tok[i] = '\0'; xstrfmtcat(*buf, "%s%s*%d", sep, tok, *reps); sep = ":"; tok += (i + 1); i = -1; break; } } xfree(host_str); hostlist_destroy(*hl_tmp); *hl_tmp = (hostlist_t) NULL; *reps = 0; }
static void _forward_msg_internal(hostlist_t hl, hostlist_t* sp_hl, forward_struct_t *fwd_struct, header_t *header, int timeout, int hl_count) { int j; forward_msg_t *fwd_msg = NULL; char *buf = NULL, *tmp_char = NULL; if (timeout <= 0) /* convert secs to msec */ timeout = slurm_get_msg_timeout() * 1000; for (j = 0; j < hl_count; j++) { fwd_msg = xmalloc(sizeof(forward_msg_t)); fwd_msg->fwd_struct = fwd_struct; fwd_msg->timeout = timeout; memcpy(&fwd_msg->header.orig_addr, &header->orig_addr, sizeof(slurm_addr_t)); fwd_msg->header.version = header->version; fwd_msg->header.flags = header->flags; fwd_msg->header.msg_type = header->msg_type; fwd_msg->header.body_length = header->body_length; fwd_msg->header.ret_list = NULL; fwd_msg->header.ret_cnt = 0; if (sp_hl) { buf = hostlist_ranged_string_xmalloc(sp_hl[j]); hostlist_destroy(sp_hl[j]); } else { tmp_char = hostlist_shift(hl); buf = xstrdup(tmp_char); free(tmp_char); } forward_init(&fwd_msg->header.forward, NULL); fwd_msg->header.forward.nodelist = buf; slurm_thread_create_detached(NULL, _forward_thread, fwd_msg); } }
int _print_node_address(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix) { if (sinfo_data) { char *tmp = NULL; tmp = hostlist_ranged_string_xmalloc( sinfo_data->node_addr); _print_str(tmp, width, right_justify, true); xfree(tmp); } else { char *title = "NODE_ADDR"; _print_str(title, width, right_justify, false); } if (suffix) printf("%s", suffix); return SLURM_SUCCESS; }
static int _send_to_stepds(hostlist_t hl, const char *addr, uint32_t len, char *data) { List ret_list = NULL; int temp_rc = 0, rc = 0; ret_data_info_t *ret_data_info = NULL; slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t)); forward_data_msg_t req; char *nodelist = NULL; slurm_msg_t_init(msg); req.address = xstrdup(addr); req.len = len; req.data = data; msg->msg_type = REQUEST_FORWARD_DATA; msg->data = &req; nodelist = hostlist_ranged_string_xmalloc(hl); if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) { while ((ret_data_info = list_pop(ret_list))) { temp_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); if (temp_rc) { rc = temp_rc; } else { hostlist_delete_host(hl, ret_data_info->node_name); } } } else { error("tree_msg_to_stepds: no list was returned"); rc = SLURM_ERROR; } slurm_free_msg(msg); xfree(nodelist); xfree(req.address); return rc; }
int _print_node_hostnames(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix) { if (params.node_field_flag) width = params.node_field_size; if (sinfo_data) { char *tmp = NULL; tmp = hostlist_ranged_string_xmalloc( sinfo_data->hostnames); _print_str(tmp, width, right_justify, true); xfree(tmp); } else { char *title = "HOSTNAMES"; _print_str(title, width, right_justify, false); } if (suffix) printf("%s", suffix); return SLURM_SUCCESS; }
/* * route_split_hostlist_treewidth - logic to split an input hostlist into * a set of hostlists to forward to. * * This is the default behavior. It is implemented here as there are cases * where the topology version also needs to split the message list based * on TreeWidth. * * IN: hl - hostlist_t - list of every node to send message to * will be empty on return which is same behavior * as similar code replaced in forward.c * OUT: sp_hl - hostlist_t** - the array of hostlists that will be malloced * OUT: count - int* - the count of created hostlists * RET: SLURM_SUCCESS - int * * Note: created hostlist will have to be freed independently using * hostlist_destroy by the caller. * Note: the hostlist_t array will have to be xfree. */ extern int route_split_hostlist_treewidth(hostlist_t hl, hostlist_t** sp_hl, int* count) { int host_count; int *span = NULL; char *name = NULL; char *buf; int nhl = 0; int j; host_count = hostlist_count(hl); span = set_span(host_count, tree_width); *sp_hl = (hostlist_t*) xmalloc(tree_width * sizeof(hostlist_t)); while ((name = hostlist_shift(hl))) { (*sp_hl)[nhl] = hostlist_create(name); free(name); for (j = 0; j < span[nhl]; j++) { name = hostlist_shift(hl); if (!name) { break; } hostlist_push_host((*sp_hl)[nhl], name); free(name); } if (debug_flags & DEBUG_FLAG_ROUTE) { buf = hostlist_ranged_string_xmalloc((*sp_hl)[nhl]); debug("ROUTE: ... sublist[%d] %s", nhl, buf); xfree(buf); } nhl++; } xfree(span); *count = nhl; return SLURM_SUCCESS; }
/* * route_g_split_hostlist - logic to split an input hostlist into * a set of hostlists to forward to. * * IN: hl - hostlist_t - list of every node to send message to * will be empty on return which is same behavior * as similar code replaced in forward.c * OUT: sp_hl - hostlist_t** - the array of hostlists that will be malloced * OUT: count - int* - the count of created hostlists * RET: SLURM_SUCCESS - int * * Note: created hostlist will have to be freed independently using * hostlist_destroy by the caller. * Note: the hostlist_t array will have to be xfree. */ extern int route_g_split_hostlist(hostlist_t hl, hostlist_t** sp_hl, int* count) { int rc; int j, nnodes, nnodex; char *buf; nnodes = nnodex = 0; if (route_init(NULL) != SLURM_SUCCESS) return SLURM_ERROR; if (debug_flags & DEBUG_FLAG_ROUTE) { /* nnodes has to be set here as the hl is empty after the * split_hostlise call. */ nnodes = hostlist_count(hl); buf = hostlist_ranged_string_xmalloc(hl); info("ROUTE: split_hostlist: hl=%s",buf); xfree(buf); } rc = (*(ops.split_hostlist))(hl, sp_hl, count); if (debug_flags & DEBUG_FLAG_ROUTE) { /* Sanity check to make sure all nodes in msg list are in * a child list */ nnodex = 0; for (j = 0; j < *count; j++) { nnodex += hostlist_count((*sp_hl)[j]); } if (nnodex != nnodes) { /* CLANG false positive */ info("ROUTE: number of nodes in split lists (%d)" " is not equal to number in input list (%d)", nnodex, nnodes); } } return rc; }
int _print_node_list(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix) { if (params.node_field_flag) width = params.node_field_size; if (sinfo_data) { char *tmp = NULL; tmp = hostlist_ranged_string_xmalloc( sinfo_data->nodes); _print_str(tmp, width, right_justify, true); xfree(tmp); } else { char *title = "NODELIST"; if(params.cluster_flags & CLUSTER_FLAG_BG) title = "MIDPLANELIST"; _print_str(title, width, right_justify, false); } if (suffix) printf("%s", suffix); return SLURM_SUCCESS; }
void *_forward_thread(void *arg) { forward_msg_t *fwd_msg = (forward_msg_t *)arg; forward_struct_t *fwd_struct = fwd_msg->fwd_struct; Buf buffer = init_buf(BUF_SIZE); /* probably enough for header */ List ret_list = NULL; int fd = -1; ret_data_info_t *ret_data_info = NULL; char *name = NULL; hostlist_t hl = hostlist_create(fwd_msg->header.forward.nodelist); slurm_addr_t addr; char *buf = NULL; int steps = 0; int start_timeout = fwd_msg->timeout; /* repeat until we are sure the message was sent */ while ((name = hostlist_shift(hl))) { if (slurm_conf_get_addr(name, &addr) == SLURM_ERROR) { error("forward_thread: can't find address for host " "%s, check slurm.conf", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, SLURM_UNKNOWN_FORWARD_ADDR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); continue; } goto cleanup; } if ((fd = slurm_open_msg_conn(&addr)) < 0) { error("forward_thread to %s: %m", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } buf = hostlist_ranged_string_xmalloc(hl); xfree(fwd_msg->header.forward.nodelist); fwd_msg->header.forward.nodelist = buf; fwd_msg->header.forward.cnt = hostlist_count(hl); #if 0 info("sending %d forwards (%s) to %s", fwd_msg->header.forward.cnt, fwd_msg->header.forward.nodelist, name); #endif if (fwd_msg->header.forward.nodelist[0]) { debug3("forward: send to %s along with %s", name, fwd_msg->header.forward.nodelist); } else debug3("forward: send to %s ", name); pack_header(&fwd_msg->header, buffer); /* add forward data to buffer */ if (remaining_buf(buffer) < fwd_struct->buf_len) { int new_size = buffer->processed + fwd_struct->buf_len; new_size += 1024; /* padded for paranoia */ xrealloc_nz(buffer->head, new_size); buffer->size = new_size; } if (fwd_struct->buf_len) { memcpy(&buffer->head[buffer->processed], fwd_struct->buf, fwd_struct->buf_len); buffer->processed += fwd_struct->buf_len; } /* * forward message */ if (slurm_msg_sendto(fd, get_buf_data(buffer), get_buf_offset(buffer), SLURM_PROTOCOL_NO_SEND_RECV_FLAGS ) < 0) { error("forward_thread: slurm_msg_sendto: %m"); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } /* These messages don't have a return message, but if * we got here things worked out so make note of the * list of nodes as success. */ if ((fwd_msg->header.msg_type == REQUEST_SHUTDOWN) || (fwd_msg->header.msg_type == REQUEST_RECONFIGURE) || (fwd_msg->header.msg_type == REQUEST_REBOOT_NODES)) { slurm_mutex_lock(&fwd_struct->forward_mutex); ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); while ((name = hostlist_shift(hl))) { ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); } goto cleanup; } if (fwd_msg->header.forward.cnt > 0) { static int message_timeout = -1; if (message_timeout < 0) message_timeout = slurm_get_msg_timeout() * 1000; if (!fwd_msg->header.forward.tree_width) fwd_msg->header.forward.tree_width = slurm_get_tree_width(); steps = (fwd_msg->header.forward.cnt+1) / fwd_msg->header.forward.tree_width; fwd_msg->timeout = (message_timeout*steps); /* info("got %d * %d = %d", message_timeout, */ /* steps, fwd_msg->timeout); */ steps++; fwd_msg->timeout += (start_timeout*steps); /* info("now + %d*%d = %d", start_timeout, */ /* steps, fwd_msg->timeout); */ } ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout); /* info("sent %d forwards got %d back", */ /* fwd_msg->header.forward.cnt, list_count(ret_list)); */ if (!ret_list || (fwd_msg->header.forward.cnt != 0 && list_count(ret_list) <= 1)) { slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); FREE_NULL_LIST(ret_list); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; continue; } goto cleanup; } else if ((fwd_msg->header.forward.cnt+1) != list_count(ret_list)) { /* this should never be called since the above should catch the failed forwards and pipe them back down, but this is here so we never have to worry about a locked mutex */ ListIterator itr = NULL; char *tmp = NULL; int first_node_found = 0; hostlist_iterator_t host_itr = hostlist_iterator_create(hl); error("We shouldn't be here. We forwarded to %d " "but only got %d back", (fwd_msg->header.forward.cnt+1), list_count(ret_list)); while ((tmp = hostlist_next(host_itr))) { int node_found = 0; itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr))) { if (!ret_data_info->node_name) { first_node_found = 1; ret_data_info->node_name = xstrdup(name); } if (!xstrcmp(tmp, ret_data_info->node_name)) { node_found = 1; break; } } list_iterator_destroy(itr); if (!node_found) { mark_as_failed_forward( &fwd_struct->ret_list, tmp, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } free(tmp); } hostlist_iterator_destroy(host_itr); if (!first_node_found) { mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } } break; } slurm_mutex_lock(&fwd_struct->forward_mutex); if (ret_list) { while ((ret_data_info = list_pop(ret_list)) != NULL) { if (!ret_data_info->node_name) { ret_data_info->node_name = xstrdup(name); } list_push(fwd_struct->ret_list, ret_data_info); debug3("got response from %s", ret_data_info->node_name); } FREE_NULL_LIST(ret_list); } free(name); cleanup: if ((fd >= 0) && slurm_close(fd) < 0) error ("close(%d): %m", fd); hostlist_destroy(hl); destroy_forward(&fwd_msg->header.forward); free_buf(buffer); slurm_cond_signal(&fwd_struct->notify); slurm_mutex_unlock(&fwd_struct->forward_mutex); xfree(fwd_msg); return (NULL); }
static void _forward_msg_internal(hostlist_t hl, hostlist_t* sp_hl, forward_struct_t *fwd_struct, header_t *header, int timeout, int hl_count) { int j; forward_msg_t *fwd_msg = NULL; char *buf = NULL, *tmp_char = NULL; pthread_attr_t attr_agent; pthread_t thread_agent; if (timeout <= 0) /* convert secs to msec */ timeout = slurm_get_msg_timeout() * 1000; for (j = 0; j < hl_count; j++) { int retries = 0; slurm_attr_init(&attr_agent); if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); fwd_msg = xmalloc(sizeof(forward_msg_t)); fwd_msg->fwd_struct = fwd_struct; fwd_msg->timeout = timeout; memcpy(&fwd_msg->header.orig_addr, &header->orig_addr, sizeof(slurm_addr_t)); fwd_msg->header.version = header->version; fwd_msg->header.flags = header->flags; fwd_msg->header.msg_type = header->msg_type; fwd_msg->header.body_length = header->body_length; fwd_msg->header.ret_list = NULL; fwd_msg->header.ret_cnt = 0; if (sp_hl) { buf = hostlist_ranged_string_xmalloc(sp_hl[j]); hostlist_destroy(sp_hl[j]); } else { tmp_char = hostlist_shift(hl); buf = xstrdup(tmp_char); free(tmp_char); } forward_init(&fwd_msg->header.forward, NULL); fwd_msg->header.forward.nodelist = buf; while (pthread_create(&thread_agent, &attr_agent, _forward_thread, (void *)fwd_msg)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); usleep(100000); /* sleep and try again */ } slurm_attr_destroy(&attr_agent); } }
void *_fwd_tree_thread(void *arg) { fwd_tree_t *fwd_tree = (fwd_tree_t *)arg; List ret_list = NULL; char *name = NULL; char *buf = NULL; slurm_msg_t send_msg; slurm_msg_t_init(&send_msg); send_msg.msg_type = fwd_tree->orig_msg->msg_type; send_msg.data = fwd_tree->orig_msg->data; send_msg.protocol_version = fwd_tree->orig_msg->protocol_version; /* repeat until we are sure the message was sent */ while ((name = hostlist_shift(fwd_tree->tree_hl))) { if (slurm_conf_get_addr(name, &send_msg.address) == SLURM_ERROR) { error("fwd_tree_thread: can't find address for host " "%s, check slurm.conf", name); slurm_mutex_lock(fwd_tree->tree_mutex); mark_as_failed_forward(&fwd_tree->ret_list, name, SLURM_UNKNOWN_FORWARD_ADDR); slurm_cond_signal(fwd_tree->notify); slurm_mutex_unlock(fwd_tree->tree_mutex); free(name); continue; } send_msg.forward.timeout = fwd_tree->timeout; if ((send_msg.forward.cnt = hostlist_count(fwd_tree->tree_hl))){ buf = hostlist_ranged_string_xmalloc( fwd_tree->tree_hl); send_msg.forward.nodelist = buf; } else send_msg.forward.nodelist = NULL; if (send_msg.forward.nodelist && send_msg.forward.nodelist[0]) { debug3("Tree sending to %s along with %s", name, send_msg.forward.nodelist); } else debug3("Tree sending to %s", name); ret_list = slurm_send_addr_recv_msgs(&send_msg, name, fwd_tree->timeout); xfree(send_msg.forward.nodelist); if (ret_list) { int ret_cnt = list_count(ret_list); /* This is most common if a slurmd is running an older version of Slurm than the originator of the message. */ if ((ret_cnt <= send_msg.forward.cnt) && (errno != SLURM_COMMUNICATIONS_CONNECTION_ERROR)) { error("fwd_tree_thread: %s failed to forward " "the message, expecting %d ret got only " "%d", name, send_msg.forward.cnt + 1, ret_cnt); if (ret_cnt > 1) { /* not likely */ ret_data_info_t *ret_data_info = NULL; ListIterator itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr))) { if (xstrcmp(ret_data_info-> node_name, name)) hostlist_delete_host( fwd_tree-> tree_hl, ret_data_info-> node_name); } list_iterator_destroy(itr); } } slurm_mutex_lock(fwd_tree->tree_mutex); list_transfer(fwd_tree->ret_list, ret_list); slurm_cond_signal(fwd_tree->notify); slurm_mutex_unlock(fwd_tree->tree_mutex); FREE_NULL_LIST(ret_list); /* try next node */ if (ret_cnt <= send_msg.forward.cnt) { free(name); /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _start_msg_tree_internal( fwd_tree->tree_hl, NULL, fwd_tree, hostlist_count(fwd_tree->tree_hl)); continue; } } else { /* This should never happen (when this was * written slurm_send_addr_recv_msgs always * returned a list */ error("fwd_tree_thread: no return list given from " "slurm_send_addr_recv_msgs spawned for %s", name); slurm_mutex_lock(fwd_tree->tree_mutex); mark_as_failed_forward( &fwd_tree->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); slurm_cond_signal(fwd_tree->notify); slurm_mutex_unlock(fwd_tree->tree_mutex); free(name); continue; } free(name); /* check for error and try again */ if (errno == SLURM_COMMUNICATIONS_CONNECTION_ERROR) continue; break; } _destroy_tree_fwd(fwd_tree); return NULL; }
/* Start a job: * CMD=STARTJOB ARG=<jobid> TASKLIST=<node_list> [COMMENT=<whatever>] * RET 0 on success, -1 on failure */ extern int start_job(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr, *comment_ptr, *task_ptr, *tasklist, *tmp_char; int i, rc, task_cnt; uint32_t jobid; hostlist_t hl = (hostlist_t) NULL; char *host_string; static char reply_msg[128]; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "STARTJOB lacks ARG"; error("wiki: STARTJOB lacks ARG"); return -1; } jobid = strtoul(arg_ptr+4, &tmp_char, 10); if (!isspace(tmp_char[0])) { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: STARTJOB has invalid jobid"); return -1; } comment_ptr = strstr(cmd_ptr, "COMMENT="); task_ptr = strstr(cmd_ptr, "TASKLIST="); if (comment_ptr) { comment_ptr[7] = ':'; comment_ptr += 8; if (comment_ptr[0] == '\"') { comment_ptr++; for (i=0; i<MAX_COMMENT_LEN; i++) { if (comment_ptr[i] == '\0') break; if (comment_ptr[i] == '\"') { comment_ptr[i] = '\0'; break; } } if (i == MAX_COMMENT_LEN) comment_ptr[i-1] = '\0'; } else if (comment_ptr[0] == '\'') { comment_ptr++; for (i=0; i<MAX_COMMENT_LEN; i++) { if (comment_ptr[i] == '\0') break; if (comment_ptr[i] == '\'') { comment_ptr[i] = '\0'; break; } } if (i == MAX_COMMENT_LEN) comment_ptr[i-1] = '\0'; } else null_term(comment_ptr); } if (task_ptr == NULL) { *err_code = -300; *err_msg = "STARTJOB lacks TASKLIST"; error("wiki: STARTJOB lacks TASKLIST"); return -1; } task_ptr += 9; /* skip over "TASKLIST=" */ if ((task_ptr[0] == '\0') || isspace(task_ptr[0])) { /* No TASKLIST specification, useful for testing */ host_string = xstrdup(""); task_cnt = 0; tasklist = NULL; } else { null_term(task_ptr); tasklist = moab2slurm_task_list(task_ptr, &task_cnt); if (tasklist) hl = hostlist_create(tasklist); if ((tasklist == NULL) || (hl == NULL)) { *err_code = -300; *err_msg = "STARTJOB TASKLIST is invalid"; error("wiki: STARTJOB TASKLIST is invalid: %s", task_ptr); xfree(tasklist); return -1; } hostlist_uniq(hl); hostlist_sort(hl); host_string = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); if (host_string == NULL) { *err_code = -300; *err_msg = "STARTJOB has invalid TASKLIST"; error("wiki: STARTJOB has invalid TASKLIST: %s", tasklist); xfree(tasklist); return -1; } } rc = _start_job(jobid, task_cnt, host_string, tasklist, comment_ptr, err_code, err_msg); xfree(host_string); xfree(tasklist); if (rc == 0) { snprintf(reply_msg, sizeof(reply_msg), "job %u started successfully", jobid); *err_msg = reply_msg; } return rc; }
/* Start a job: * CMD=STARTJOB ARG=<jobid> TASKLIST=<node_list> * RET 0 on success, -1 on failure */ extern int start_job(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr, *task_ptr, *tasklist, *tmp_char; int rc, task_cnt; uint32_t jobid; hostlist_t hl = (hostlist_t) NULL; char *host_string; static char reply_msg[128]; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "STARTJOB lacks ARG"; error("wiki: STARTJOB lacks ARG"); return -1; } jobid = strtoul(arg_ptr+4, &tmp_char, 10); if (!isspace(tmp_char[0])) { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: STARTJOB has invalid jobid"); return -1; } task_ptr = strstr(cmd_ptr, "TASKLIST="); if (task_ptr == NULL) { *err_code = -300; *err_msg = "STARTJOB lacks TASKLIST"; error("wiki: STARTJOB lacks TASKLIST"); return -1; } task_ptr += 9; /* skip over "TASKLIST=" */ null_term(task_ptr); tasklist = moab2slurm_task_list(task_ptr, &task_cnt); if (tasklist) hl = hostlist_create(tasklist); if ((tasklist == NULL) || (hl == NULL)) { *err_code = -300; *err_msg = "STARTJOB TASKLIST is invalid"; error("wiki: STARTJOB TASKLIST is invalid: %s", task_ptr); xfree(tasklist); return -1; } hostlist_uniq(hl); hostlist_sort(hl); host_string = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); if (host_string == NULL) { *err_code = -300; *err_msg = "STARTJOB has invalid TASKLIST"; error("wiki: STARTJOB has invalid TASKLIST: %s", tasklist); xfree(tasklist); return -1; } rc = _start_job(jobid, task_cnt, host_string, tasklist, err_code, err_msg); xfree(host_string); xfree(tasklist); if (rc == 0) { snprintf(reply_msg, sizeof(reply_msg), "job %u started successfully", jobid); *err_msg = reply_msg; } return rc; }
static int _job_modify(uint32_t jobid, char *bank_ptr, char *depend_ptr, char *new_hostlist, uint32_t new_node_cnt, char *part_name_ptr, uint32_t new_time_limit, char *name_ptr, char *start_ptr, char *feature_ptr, char *env_ptr, char *comment_ptr, char *gres_ptr, char *wckey_ptr) { struct job_record *job_ptr; time_t now = time(NULL); bool update_accounting = false; job_ptr = find_job_record(jobid); if (job_ptr == NULL) { error("wiki: MODIFYJOB has invalid jobid %u", jobid); return ESLURM_INVALID_JOB_ID; } if (IS_JOB_FINISHED(job_ptr) || (job_ptr->details == NULL)) { info("wiki: MODIFYJOB jobid %u is finished", jobid); return ESLURM_DISABLED; } if (comment_ptr) { info("wiki: change job %u comment %s", jobid, comment_ptr); xfree(job_ptr->comment); job_ptr->comment = xstrdup(comment_ptr); last_job_update = now; } if (depend_ptr) { int rc = update_job_dependency(job_ptr, depend_ptr); if (rc == SLURM_SUCCESS) { info("wiki: changed job %u dependency to %s", jobid, depend_ptr); } else { error("wiki: changing job %u dependency to %s", jobid, depend_ptr); return EINVAL; } } if (env_ptr) { bool have_equal = false; char old_sep[1]; int begin = 0, i; if (job_ptr->batch_flag == 0) { error("wiki: attempt to set environment variables " "for non-batch job %u", jobid); return ESLURM_DISABLED; } for (i=0; ; i++) { if (env_ptr[i] == '=') { if (have_equal) { error("wiki: setting job %u invalid " "environment variables: %s", jobid, env_ptr); return EINVAL; } have_equal = true; if (env_ptr[i+1] == '\"') { for (i+=2; ; i++) { if (env_ptr[i] == '\0') { error("wiki: setting job %u " "invalid environment " "variables: %s", jobid, env_ptr); return EINVAL; } if (env_ptr[i] == '\"') { i++; break; } if (env_ptr[i] == '\\') { i++; } } } else if (env_ptr[i+1] == '\'') { for (i+=2; ; i++) { if (env_ptr[i] == '\0') { error("wiki: setting job %u " "invalid environment " "variables: %s", jobid, env_ptr); return EINVAL; } if (env_ptr[i] == '\'') { i++; break; } if (env_ptr[i] == '\\') { i++; } } } } if (isspace(env_ptr[i]) || (env_ptr[i] == ',')) { if (!have_equal) { error("wiki: setting job %u invalid " "environment variables: %s", jobid, env_ptr); return EINVAL; } old_sep[0] = env_ptr[i]; env_ptr[i] = '\0'; xrealloc(job_ptr->details->env_sup, sizeof(char *) * (job_ptr->details->env_cnt+1)); job_ptr->details->env_sup [job_ptr->details->env_cnt++] = xstrdup(&env_ptr[begin]); info("wiki: for job %u add env: %s", jobid, &env_ptr[begin]); env_ptr[i] = old_sep[0]; if (isspace(old_sep[0])) break; begin = i + 1; have_equal = false; } } } if (new_time_limit) { time_t old_time = job_ptr->time_limit; job_ptr->time_limit = new_time_limit; info("wiki: change job %u time_limit to %u", jobid, new_time_limit); /* Update end_time based upon change * to preserve suspend time info */ job_ptr->end_time = job_ptr->end_time + ((job_ptr->time_limit - old_time) * 60); last_job_update = now; } if (bank_ptr && (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) { return EINVAL; } if (feature_ptr) { if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) { info("wiki: change job %u features to %s", jobid, feature_ptr); job_ptr->details->features = xstrdup(feature_ptr); last_job_update = now; } else { error("wiki: MODIFYJOB features of non-pending " "job %u", jobid); return ESLURM_DISABLED; } } if (start_ptr) { char *end_ptr; uint32_t begin_time = strtol(start_ptr, &end_ptr, 10); if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) { info("wiki: change job %u begin time to %u", jobid, begin_time); job_ptr->details->begin_time = begin_time; last_job_update = now; update_accounting = true; } else { error("wiki: MODIFYJOB begin_time of non-pending " "job %u", jobid); return ESLURM_DISABLED; } } if (name_ptr) { if (IS_JOB_PENDING(job_ptr)) { info("wiki: change job %u name %s", jobid, name_ptr); xfree(job_ptr->name); job_ptr->name = xstrdup(name_ptr); last_job_update = now; update_accounting = true; } else { error("wiki: MODIFYJOB name of non-pending job %u", jobid); return ESLURM_DISABLED; } } if (new_hostlist) { int rc = 0, task_cnt; hostlist_t hl; char *tasklist; if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) { /* Job is done, nothing to reset */ if (new_hostlist == '\0') goto host_fini; error("wiki: MODIFYJOB hostlist of non-pending " "job %u", jobid); return ESLURM_DISABLED; } xfree(job_ptr->details->req_nodes); FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap); if (new_hostlist == '\0') goto host_fini; tasklist = moab2slurm_task_list(new_hostlist, &task_cnt); if (tasklist == NULL) { rc = 1; goto host_fini; } hl = hostlist_create(tasklist); if (hl == 0) { rc = 1; goto host_fini; } hostlist_uniq(hl); hostlist_sort(hl); job_ptr->details->req_nodes = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); if (job_ptr->details->req_nodes == NULL) { rc = 1; goto host_fini; } if (node_name2bitmap(job_ptr->details->req_nodes, false, &job_ptr->details->req_node_bitmap)) { rc = 1; goto host_fini; } host_fini: if (rc) { info("wiki: change job %u invalid hostlist %s", jobid, new_hostlist); xfree(job_ptr->details->req_nodes); return EINVAL; } else { info("wiki: change job %u hostlist %s", jobid, new_hostlist); update_accounting = true; } } if (part_name_ptr) { struct part_record *part_ptr; if (!IS_JOB_PENDING(job_ptr)) { error("wiki: MODIFYJOB partition of non-pending " "job %u", jobid); return ESLURM_DISABLED; } part_ptr = find_part_record(part_name_ptr); if (part_ptr == NULL) { error("wiki: MODIFYJOB has invalid partition %s", part_name_ptr); return ESLURM_INVALID_PARTITION_NAME; } info("wiki: change job %u partition %s", jobid, part_name_ptr); xfree(job_ptr->partition); job_ptr->partition = xstrdup(part_name_ptr); job_ptr->part_ptr = part_ptr; last_job_update = now; update_accounting = true; } if (new_node_cnt) { job_desc_msg_t job_desc; #ifdef HAVE_BG uint16_t geometry[SYSTEM_DIMENSIONS] = {(uint16_t) NO_VAL}; static uint16_t cpus_per_node = 0; if (!cpus_per_node) { select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT, &cpus_per_node); } #endif if(!IS_JOB_PENDING(job_ptr) || !job_ptr->details) { error("wiki: MODIFYJOB node count of non-pending " "job %u", jobid); return ESLURM_DISABLED; } memset(&job_desc, 0, sizeof(job_desc_msg_t)); job_desc.min_nodes = new_node_cnt; job_desc.max_nodes = NO_VAL; job_desc.select_jobinfo = select_g_select_jobinfo_alloc(); select_g_alter_node_cnt(SELECT_SET_NODE_CNT, &job_desc); select_g_select_jobinfo_free(job_desc.select_jobinfo); job_ptr->details->min_nodes = job_desc.min_nodes; if (job_ptr->details->max_nodes && (job_ptr->details->max_nodes < job_desc.min_nodes)) job_ptr->details->max_nodes = job_desc.min_nodes; info("wiki: change job %u min_nodes to %u", jobid, new_node_cnt); #ifdef HAVE_BG job_ptr->details->min_cpus = job_desc.min_cpus; job_ptr->details->max_cpus = job_desc.max_cpus; job_ptr->details->pn_min_cpus = job_desc.pn_min_cpus; new_node_cnt = job_ptr->details->min_cpus; if (cpus_per_node) new_node_cnt /= cpus_per_node; /* This is only set up so accounting is set up correctly */ select_g_select_jobinfo_set(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &new_node_cnt); /* reset geo since changing this makes any geo potentially invalid */ select_g_select_jobinfo_set(job_ptr->select_jobinfo, SELECT_JOBDATA_GEOMETRY, geometry); #endif last_job_update = now; update_accounting = true; } if (gres_ptr) { char *orig_gres; if (!IS_JOB_PENDING(job_ptr)) { error("wiki: MODIFYJOB GRES of non-pending job %u", jobid); return ESLURM_DISABLED; } orig_gres = job_ptr->gres; job_ptr->gres = NULL; if (gres_ptr[0]) job_ptr->gres = xstrdup(gres_ptr); if (gres_plugin_job_state_validate(job_ptr->gres, &job_ptr->gres_list)) { error("wiki: MODIFYJOB Invalid GRES=%s", gres_ptr); xfree(job_ptr->gres); job_ptr->gres = orig_gres; return ESLURM_INVALID_GRES; } xfree(orig_gres); } if (wckey_ptr) { int rc = update_job_wckey("update_job", job_ptr, wckey_ptr); if (rc != SLURM_SUCCESS) { error("wiki: MODIFYJOB Invalid WCKEY=%s", wckey_ptr); return rc; } } if (update_accounting) { if (job_ptr->details && job_ptr->details->begin_time) { /* Update job record in accounting to reflect * the changes */ jobacct_storage_g_job_start(acct_db_conn, job_ptr); } } return SLURM_SUCCESS; }
extern int parse_blockreq(void **dest, slurm_parser_enum_t type, const char *key, const char *value, const char *line, char **leftover) { s_p_options_t block_options[] = { {"Type", S_P_STRING}, {"32CNBlocks", S_P_UINT16}, {"128CNBlocks", S_P_UINT16}, #ifdef HAVE_BGL {"Nodecards", S_P_UINT16}, {"Quarters", S_P_UINT16}, {"BlrtsImage", S_P_STRING}, {"LinuxImage", S_P_STRING}, {"RamDiskImage", S_P_STRING}, #else {"16CNBlocks", S_P_UINT16}, {"64CNBlocks", S_P_UINT16}, {"256CNBlocks", S_P_UINT16}, {"CnloadImage", S_P_STRING}, {"IoloadImage", S_P_STRING}, #endif {"MloaderImage", S_P_STRING}, {NULL} }; s_p_hashtbl_t *tbl; char *tmp = NULL; select_ba_request_t *n = NULL; hostlist_t hl = NULL; tbl = s_p_hashtbl_create(block_options); s_p_parse_line(tbl, *leftover, leftover); if (!value) { return 0; } n = xmalloc(sizeof(select_ba_request_t)); hl = hostlist_create(value); n->save_name = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); #ifdef HAVE_BGL s_p_get_string(&n->blrtsimage, "BlrtsImage", tbl); s_p_get_string(&n->linuximage, "LinuxImage", tbl); s_p_get_string(&n->ramdiskimage, "RamDiskImage", tbl); #else s_p_get_string(&n->linuximage, "CnloadImage", tbl); s_p_get_string(&n->ramdiskimage, "IoloadImage", tbl); #endif s_p_get_string(&n->mloaderimage, "MloaderImage", tbl); s_p_get_string(&tmp, "Type", tbl); if (!tmp || !strcasecmp(tmp,"TORUS")) n->conn_type[0] = SELECT_TORUS; else if (!strcasecmp(tmp,"MESH")) n->conn_type[0] = SELECT_MESH; else n->conn_type[0] = SELECT_SMALL; xfree(tmp); if (!s_p_get_uint16(&n->small32, "32CNBlocks", tbl)) { #ifdef HAVE_BGL s_p_get_uint16(&n->small32, "Nodecards", tbl); #else ; #endif } if (!s_p_get_uint16(&n->small128, "128CNBlocks", tbl)) { #ifdef HAVE_BGL s_p_get_uint16(&n->small128, "Quarters", tbl); #else ; #endif } #ifndef HAVE_BGL s_p_get_uint16(&n->small16, "16CNBlocks", tbl); s_p_get_uint16(&n->small64, "64CNBlocks", tbl); s_p_get_uint16(&n->small256, "256CNBlocks", tbl); #endif s_p_hashtbl_destroy(tbl); *dest = (void *)n; return 1; }
/* * Create job description structure based off srun options * (see opt.h) */ job_desc_msg_t * job_desc_msg_create_from_opts (void) { job_desc_msg_t *j = xmalloc(sizeof(*j)); hostlist_t hl = NULL; slurm_init_job_desc_msg(j); j->contiguous = opt.contiguous; j->features = opt.constraints; j->gres = opt.gres; if (opt.immediate == 1) j->immediate = opt.immediate; if (opt.job_name) j->name = xstrdup(opt.job_name); else j->name = xstrdup(opt.cmd_name); if (opt.argc > 0) { j->argc = 1; j->argv = (char **) xmalloc(sizeof(char *) * 2); j->argv[0] = xstrdup(opt.argv[0]); } if (opt.acctg_freq >= 0) j->acctg_freq = opt.acctg_freq; j->reservation = xstrdup(opt.reservation); j->wckey = xstrdup(opt.wckey); j->req_nodes = xstrdup(opt.nodelist); /* simplify the job allocation nodelist, * not laying out tasks until step */ if(j->req_nodes) { hl = hostlist_create(j->req_nodes); xfree(opt.nodelist); opt.nodelist = hostlist_ranged_string_xmalloc(hl); hostlist_uniq(hl); xfree(j->req_nodes); j->req_nodes = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); } if(opt.distribution == SLURM_DIST_ARBITRARY && !j->req_nodes) { error("With Arbitrary distribution you need to " "specify a nodelist or hostfile with the -w option"); return NULL; } j->exc_nodes = opt.exc_nodes; j->partition = opt.partition; j->min_nodes = opt.min_nodes; if (opt.sockets_per_node != NO_VAL) j->sockets_per_node = opt.sockets_per_node; if (opt.cores_per_socket != NO_VAL) j->cores_per_socket = opt.cores_per_socket; if (opt.threads_per_core != NO_VAL) j->threads_per_core = opt.threads_per_core; j->user_id = opt.uid; j->dependency = opt.dependency; if (opt.nice) j->nice = NICE_OFFSET + opt.nice; if (opt.cpu_bind) j->cpu_bind = opt.cpu_bind; if (opt.cpu_bind_type) j->cpu_bind_type = opt.cpu_bind_type; if (opt.mem_bind) j->mem_bind = opt.mem_bind; if (opt.mem_bind_type) j->mem_bind_type = opt.mem_bind_type; if (opt.plane_size != NO_VAL) j->plane_size = opt.plane_size; j->task_dist = opt.distribution; j->group_id = opt.gid; j->mail_type = opt.mail_type; if (opt.ntasks_per_node != NO_VAL) j->ntasks_per_node = opt.ntasks_per_node; if (opt.ntasks_per_socket != NO_VAL) j->ntasks_per_socket = opt.ntasks_per_socket; if (opt.ntasks_per_core != NO_VAL) j->ntasks_per_core = opt.ntasks_per_core; if (opt.mail_user) j->mail_user = xstrdup(opt.mail_user); if (opt.begin) j->begin_time = opt.begin; if (opt.licenses) j->licenses = xstrdup(opt.licenses); if (opt.network) j->network = xstrdup(opt.network); if (opt.account) j->account = xstrdup(opt.account); if (opt.comment) j->comment = xstrdup(opt.comment); if (opt.qos) j->qos = xstrdup(opt.qos); if (opt.cwd) j->work_dir = xstrdup(opt.cwd); if (opt.hold) j->priority = 0; if (opt.jobid != NO_VAL) j->job_id = opt.jobid; #ifdef HAVE_BG if (opt.geometry[0] > 0) { int i; for (i=0; i<SYSTEM_DIMENSIONS; i++) j->geometry[i] = opt.geometry[i]; } #endif if (opt.conn_type != (uint16_t) NO_VAL) j->conn_type[0] = opt.conn_type; if (opt.reboot) j->reboot = 1; if (opt.no_rotate) j->rotate = 0; if (opt.blrtsimage) j->blrtsimage = xstrdup(opt.blrtsimage); if (opt.linuximage) j->linuximage = xstrdup(opt.linuximage); if (opt.mloaderimage) j->mloaderimage = xstrdup(opt.mloaderimage); if (opt.ramdiskimage) j->ramdiskimage = xstrdup(opt.ramdiskimage); if (opt.max_nodes) j->max_nodes = opt.max_nodes; else if (opt.nodes_set) { /* On an allocation if the max nodes isn't set set it * to do the same behavior as with salloc or sbatch. */ j->max_nodes = opt.min_nodes; } if (opt.pn_min_cpus != NO_VAL) j->pn_min_cpus = opt.pn_min_cpus; if (opt.pn_min_memory != NO_VAL) j->pn_min_memory = opt.pn_min_memory; else if (opt.mem_per_cpu != NO_VAL) j->pn_min_memory = opt.mem_per_cpu | MEM_PER_CPU; if (opt.pn_min_tmp_disk != NO_VAL) j->pn_min_tmp_disk = opt.pn_min_tmp_disk; if (opt.overcommit) { j->min_cpus = opt.min_nodes; j->overcommit = opt.overcommit; } else j->min_cpus = opt.ntasks * opt.cpus_per_task; if (opt.ntasks_set) j->num_tasks = opt.ntasks; if (opt.cpus_set) j->cpus_per_task = opt.cpus_per_task; if (opt.no_kill) j->kill_on_node_fail = 0; if (opt.time_limit != NO_VAL) j->time_limit = opt.time_limit; if (opt.time_min != NO_VAL) j->time_min = opt.time_min; j->shared = opt.shared; if (opt.warn_signal) j->warn_signal = opt.warn_signal; if (opt.warn_time) j->warn_time = opt.warn_time; /* srun uses the same listening port for the allocation response * message as all other messages */ j->alloc_resp_port = slurmctld_comm_addr.port; j->other_port = slurmctld_comm_addr.port; if (opt.spank_job_env_size) { j->spank_job_env = opt.spank_job_env; j->spank_job_env_size = opt.spank_job_env_size; } return (j); }
/* * Create job description structure based off srun options * (see opt.h) */ job_desc_msg_t * job_desc_msg_create_from_opts (void) { job_desc_msg_t *j = xmalloc(sizeof(*j)); hostlist_t hl = NULL; slurm_init_job_desc_msg(j); #if defined HAVE_ALPS_CRAY && defined HAVE_REAL_CRAY uint64_t pagg_id = job_getjid(getpid()); /* * Interactive sessions require pam_job.so in /etc/pam.d/common-session * since creating sgi_job containers requires root permissions. This is * the only exception where we allow the fallback of using the SID to * confirm the reservation (caught later, in do_basil_confirm). */ if (pagg_id == (uint64_t)-1) { error("No SGI job container ID detected - please enable the " "Cray job service via /etc/init.d/job"); } else { if (!j->select_jobinfo) j->select_jobinfo = select_g_select_jobinfo_alloc(); select_g_select_jobinfo_set(j->select_jobinfo, SELECT_JOBDATA_PAGG_ID, &pagg_id); } #endif j->contiguous = opt.contiguous; if (opt.core_spec) j->core_spec = opt.core_spec; j->features = opt.constraints; j->gres = opt.gres; if (opt.immediate == 1) j->immediate = opt.immediate; if (opt.job_name) j->name = opt.job_name; else j->name = opt.cmd_name; if (opt.argc > 0) { j->argc = 1; j->argv = (char **) xmalloc(sizeof(char *) * 2); j->argv[0] = xstrdup(opt.argv[0]); } if (opt.acctg_freq) j->acctg_freq = xstrdup(opt.acctg_freq); j->reservation = opt.reservation; j->wckey = opt.wckey; j->req_nodes = xstrdup(opt.nodelist); /* simplify the job allocation nodelist, * not laying out tasks until step */ if (j->req_nodes) { hl = hostlist_create(j->req_nodes); xfree(opt.nodelist); opt.nodelist = hostlist_ranged_string_xmalloc(hl); hostlist_uniq(hl); xfree(j->req_nodes); j->req_nodes = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); } if (opt.distribution == SLURM_DIST_ARBITRARY && !j->req_nodes) { error("With Arbitrary distribution you need to " "specify a nodelist or hostfile with the -w option"); return NULL; } j->exc_nodes = opt.exc_nodes; j->partition = opt.partition; j->min_nodes = opt.min_nodes; if (opt.sockets_per_node != NO_VAL) j->sockets_per_node = opt.sockets_per_node; if (opt.cores_per_socket != NO_VAL) j->cores_per_socket = opt.cores_per_socket; if (opt.threads_per_core != NO_VAL) j->threads_per_core = opt.threads_per_core; j->user_id = opt.uid; j->dependency = opt.dependency; if (opt.nice) j->nice = NICE_OFFSET + opt.nice; if (opt.priority) j->priority = opt.priority; if (opt.cpu_bind) j->cpu_bind = opt.cpu_bind; if (opt.cpu_bind_type) j->cpu_bind_type = opt.cpu_bind_type; if (opt.mem_bind) j->mem_bind = opt.mem_bind; if (opt.mem_bind_type) j->mem_bind_type = opt.mem_bind_type; if (opt.plane_size != NO_VAL) j->plane_size = opt.plane_size; j->task_dist = opt.distribution; j->group_id = opt.gid; j->mail_type = opt.mail_type; if (opt.ntasks_per_node != NO_VAL) j->ntasks_per_node = opt.ntasks_per_node; if (opt.ntasks_per_socket != NO_VAL) j->ntasks_per_socket = opt.ntasks_per_socket; if (opt.ntasks_per_core != NO_VAL) j->ntasks_per_core = opt.ntasks_per_core; if (opt.mail_user) j->mail_user = opt.mail_user; if (opt.begin) j->begin_time = opt.begin; if (opt.licenses) j->licenses = opt.licenses; if (opt.network) j->network = opt.network; if (opt.profile) j->profile = opt.profile; if (opt.account) j->account = opt.account; if (opt.comment) j->comment = opt.comment; if (opt.qos) j->qos = opt.qos; if (opt.cwd) j->work_dir = opt.cwd; if (opt.hold) j->priority = 0; if (opt.jobid != NO_VAL) j->job_id = opt.jobid; #ifdef HAVE_BG if (opt.geometry[0] > 0) { int i; for (i = 0; i < SYSTEM_DIMENSIONS; i++) j->geometry[i] = opt.geometry[i]; } #endif memcpy(j->conn_type, opt.conn_type, sizeof(j->conn_type)); if (opt.reboot) j->reboot = 1; if (opt.no_rotate) j->rotate = 0; if (opt.blrtsimage) j->blrtsimage = opt.blrtsimage; if (opt.linuximage) j->linuximage = opt.linuximage; if (opt.mloaderimage) j->mloaderimage = opt.mloaderimage; if (opt.ramdiskimage) j->ramdiskimage = opt.ramdiskimage; if (opt.max_nodes) j->max_nodes = opt.max_nodes; else if (opt.nodes_set) { /* On an allocation if the max nodes isn't set set it * to do the same behavior as with salloc or sbatch. */ j->max_nodes = opt.min_nodes; } if (opt.pn_min_cpus != NO_VAL) j->pn_min_cpus = opt.pn_min_cpus; if (opt.pn_min_memory != NO_VAL) j->pn_min_memory = opt.pn_min_memory; else if (opt.mem_per_cpu != NO_VAL) j->pn_min_memory = opt.mem_per_cpu | MEM_PER_CPU; if (opt.pn_min_tmp_disk != NO_VAL) j->pn_min_tmp_disk = opt.pn_min_tmp_disk; if (opt.overcommit) { j->min_cpus = opt.min_nodes; j->overcommit = opt.overcommit; } else if (opt.cpus_set) j->min_cpus = opt.ntasks * opt.cpus_per_task; else j->min_cpus = opt.ntasks; if (opt.ntasks_set) j->num_tasks = opt.ntasks; if (opt.cpus_set) j->cpus_per_task = opt.cpus_per_task; if (opt.no_kill) j->kill_on_node_fail = 0; if (opt.time_limit != NO_VAL) j->time_limit = opt.time_limit; if (opt.time_min != NO_VAL) j->time_min = opt.time_min; j->shared = opt.shared; if (opt.warn_signal) j->warn_signal = opt.warn_signal; if (opt.warn_time) j->warn_time = opt.warn_time; if (opt.req_switch >= 0) j->req_switch = opt.req_switch; if (opt.wait4switch >= 0) j->wait4switch = opt.wait4switch; /* srun uses the same listening port for the allocation response * message as all other messages */ j->alloc_resp_port = slurmctld_comm_addr.port; j->other_port = slurmctld_comm_addr.port; if (opt.spank_job_env_size) { j->spank_job_env = opt.spank_job_env; j->spank_job_env_size = opt.spank_job_env_size; } return (j); }