/* DO NOT RUN AGAINST PRODUCTION NODES, IT CAN MESS UP STATE */ int main (int argc, char *argv[]) { int error_code; update_part_msg_t part_update1 ; update_part_msg_t part_update2 ; update_node_msg_t node_update1 ; update_node_msg_t node_update2 ; char node_name[NAME_LEN]; slurm_init_part_desc_msg ( &part_update1 ); slurm_init_part_desc_msg ( &part_update2 ); part_update1 . name = "batch" ; part_update2 . name = "batch" ; part_update1 . state_up = false ; part_update2 . state_up = true ; _getnodename(node_name, NAME_LEN); node_update1 . node_names = node_name ; node_update2 . node_names = node_name ; node_update1 . node_state = NODE_STATE_DRAIN ; node_update2 . node_state = NODE_RESUME ; error_code = slurm_update_partition ( &part_update1); if (error_code) slurm_perror ("slurm_update_partition #1"); error_code = slurm_update_partition ( &part_update2); if (error_code) slurm_perror ("slurm_update_partition #2"); error_code = slurm_update_node ( &node_update1); if (error_code) slurm_perror ("slurm_update_node #1"); error_code = slurm_update_node ( &node_update2); if (error_code) slurm_perror ("slurm_update_node #2"); return (errno); }
/* * scontrol_update_node - update the slurm node configuration per the supplied * arguments * IN argc - count of arguments * IN argv - list of arguments * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_update_node (int argc, char **argv) { int i, j, rc = 0, update_cnt = 0; uint16_t state_val; update_node_msg_t node_msg; char *reason_str = NULL; char *tag, *val; int tag_len, val_len; slurm_init_update_node_msg(&node_msg); for (i = 0; i < argc; i++) { tag = argv[i]; val = strchr(argv[i], '='); if (val) { tag_len = val - argv[i]; val++; val_len = strlen(val); } else { exit_code = 1; error("Invalid input: %s Request aborted", argv[i]); return -1; } if (xstrncasecmp(tag, "NodeAddr", MAX(tag_len, 5)) == 0) { node_msg.node_addr = val; update_cnt++; } else if (xstrncasecmp(tag, "NodeHostName", MAX(tag_len, 5)) == 0) { node_msg.node_hostname = val; update_cnt++; } else if (xstrncasecmp(tag, "NodeName", MAX(tag_len, 1)) == 0) { node_msg.node_names = val; } else if (!xstrncasecmp(tag, "ActiveFeatures", MAX(tag_len,3))) { node_msg.features_act = val; update_cnt++; } else if (xstrncasecmp(tag, "CpuBind", MAX(tag_len, 7)) == 0) { if (xlate_cpu_bind_str(val, &node_msg.cpu_bind) != SLURM_SUCCESS) { exit_code = 1; error("Invalid input %s", argv[i]); return -1; } update_cnt++; } else if (!xstrncasecmp(tag, "Features", MAX(tag_len, 1)) || !xstrncasecmp(tag, "AvailableFeatures", MAX(tag_len,3))) { node_msg.features = val; update_cnt++; } else if (xstrncasecmp(tag, "Gres", MAX(tag_len, 1)) == 0) { node_msg.gres = val; update_cnt++; } else if (xstrncasecmp(tag, "Weight", MAX(tag_len,1)) == 0) { /* Logic borrowed from function _handle_uint32 */ char *endptr; unsigned long num; errno = 0; num = strtoul(val, &endptr, 0); if ((endptr[0] == 'k') || (endptr[0] == 'K')) { num *= 1024; endptr++; } if ((num == 0 && errno == EINVAL) || (*endptr != '\0')) { if ((xstrcasecmp(val, "UNLIMITED") == 0) || (xstrcasecmp(val, "INFINITE") == 0)) { num = INFINITE; } else { error("Weight value (%s) is not a " "valid number", val); break; } } else if (errno == ERANGE) { error("Weight value (%s) is out of range", val); break; } else if (val[0] == '-') { error("Weight value (%s) is less than zero", val); break; } else if (num > 0xfffffff0) { error("Weight value (%s) is greater than %u", val, 0xfffffff0); break; } node_msg.weight = num; update_cnt++; } else if (xstrncasecmp(tag, "Reason", MAX(tag_len, 1)) == 0) { int len = strlen(val); reason_str = xmalloc(len+1); if (*val == '"') strcpy(reason_str, val+1); else strcpy(reason_str, val); len = strlen(reason_str) - 1; if ((len >= 0) && (reason_str[len] == '"')) reason_str[len] = '\0'; node_msg.reason = reason_str; if ((getlogin() == NULL) || (uid_from_string(getlogin(), &node_msg.reason_uid) < 0)) { node_msg.reason_uid = getuid(); } update_cnt++; } else if (xstrncasecmp(tag, "State", MAX(tag_len, 1)) == 0) { if (cluster_flags & CLUSTER_FLAG_CRAY_A) { fprintf (stderr, "%s can not be changed through" " SLURM. Use native Cray tools such as" " xtprocadmin(8)\n", argv[i]); fprintf (stderr, "Request aborted\n"); exit_code = 1; goto done; } if (xstrncasecmp(val, "NoResp", MAX(val_len, 3)) == 0) { node_msg.node_state = NODE_STATE_NO_RESPOND; update_cnt++; } else if (xstrncasecmp(val, "DRAIN", MAX(val_len, 3)) == 0) { node_msg.node_state = NODE_STATE_DRAIN; update_cnt++; } else if (xstrncasecmp(val, "FAIL", MAX(val_len, 3)) == 0) { node_msg.node_state = NODE_STATE_FAIL; update_cnt++; } else if (xstrncasecmp(val, "FUTURE", MAX(val_len, 3)) == 0) { node_msg.node_state = NODE_STATE_FUTURE; update_cnt++; } else if (xstrncasecmp(val, "RESUME", MAX(val_len, 3)) == 0) { node_msg.node_state = NODE_RESUME; update_cnt++; } else if (xstrncasecmp(val, "POWER_DOWN", MAX(val_len, 7)) == 0) { node_msg.node_state = NODE_STATE_POWER_SAVE; update_cnt++; } else if (xstrncasecmp(val, "POWER_UP", MAX(val_len, 7)) == 0) { node_msg.node_state = NODE_STATE_POWER_UP; update_cnt++; } else if (xstrncasecmp(val, "UNDRAIN", MAX(val_len, 3)) == 0) { node_msg.node_state = NODE_STATE_UNDRAIN; update_cnt++; } else { state_val = NO_VAL16; for (j = 0; j < NODE_STATE_END; j++) { if (xstrncasecmp(node_state_string(j), val, MAX(val_len, 3)) == 0){ state_val = (uint16_t) j; break; } } if (j == NODE_STATE_END) { exit_code = 1; fprintf(stderr, "Invalid input: %s\n", argv[i]); fprintf (stderr, "Request aborted\n"); fprintf (stderr, "Valid states are: "); fprintf (stderr, "NoResp DRAIN FAIL FUTURE RESUME " "POWER_DOWN POWER_UP UNDRAIN"); fprintf (stderr, "\n"); fprintf (stderr, "Not all states are valid " "given a node's prior " "state\n"); goto done; } node_msg.node_state = state_val; update_cnt++; } } else { exit_code = 1; fprintf (stderr, "Update of this parameter is not " "supported: %s\n", argv[i]); fprintf (stderr, "Request aborted\n"); goto done; } } if (((node_msg.node_state == NODE_STATE_DOWN) || (node_msg.node_state == NODE_STATE_DRAIN) || (node_msg.node_state == NODE_STATE_FAIL)) && ((node_msg.reason == NULL) || (strlen(node_msg.reason) == 0))) { fprintf(stderr, "You must specify a reason when DOWNING or " "DRAINING a node. Request denied\n"); goto done; } if (update_cnt == 0) { exit_code = 1; fprintf (stderr, "No changes specified\n"); return 0; } rc = slurm_update_node(&node_msg); done: xfree(reason_str); if (rc) { exit_code = 1; return slurm_get_errno (); } else return 0; }
int main(int argc, char *argv[]) { log_options_t log_opts = LOG_OPTS_INITIALIZER; char *features, *save_ptr = NULL, *tok; update_node_msg_t node_msg; int rc = SLURM_SUCCESS; hostlist_t hl = NULL; char *node_name; pthread_attr_t attr_work; pthread_t thread_work = 0; prog_name = argv[0]; _read_config(); log_opts.stderr_level = LOG_LEVEL_QUIET; log_opts.syslog_level = LOG_LEVEL_QUIET; if (slurm_get_debug_flags() && DEBUG_FLAG_NODE_FEATURES) log_opts.logfile_level += 3; (void) log_init(argv[0], log_opts, LOG_DAEMON, log_file); /* Parse the MCDRAM and NUMA boot options */ if (argc == 3) { features = xstrdup(argv[2]); tok = strtok_r(features, ",", &save_ptr); while (tok) { printf("%s\n", tok); if (!strcasecmp(tok, "a2a") || !strcasecmp(tok, "hemi") || !strcasecmp(tok, "quad") || !strcasecmp(tok, "snc2") || !strcasecmp(tok, "snc4")) { xfree(mcdram_mode); mcdram_mode = xstrdup(tok); } else if (!strcasecmp(tok, "cache") || !strcasecmp(tok, "equal") || !strcasecmp(tok, "flat")) { xfree(numa_mode); numa_mode = xstrdup(tok); } tok = strtok_r(NULL, ",", &save_ptr); } xfree(features); } /* Spawn threads to change MCDRAM and NUMA states and start node * reboot process */ if ((hl = hostlist_create(argv[1])) == NULL) { error("%s: Invalid hostlist (%s)", prog_name, argv[1]); exit(2); } node_bitmap = bit_alloc(100000); while ((node_name = hostlist_pop(hl))) { slurm_mutex_lock(&thread_cnt_mutex); while (1) { if (thread_cnt <= MAX_THREADS) { thread_cnt++; break; } else { /* wait for state change and retry */ pthread_cond_wait(&thread_cnt_cond, &thread_cnt_mutex); } } slurm_mutex_unlock(&thread_cnt_mutex); slurm_attr_init(&attr_work); (void) pthread_attr_setdetachstate (&attr_work, PTHREAD_CREATE_DETACHED); if (pthread_create(&thread_work, &attr_work, _node_update, (void *) node_name)) { _node_update((void *) node_name); } slurm_attr_destroy(&attr_work); } /* Wait for work threads to complete */ slurm_mutex_lock(&thread_cnt_mutex); while (1) { if (thread_cnt == 0) break; else /* wait for state change and retry */ pthread_cond_wait(&thread_cnt_cond, &thread_cnt_mutex); } slurm_mutex_unlock(&thread_cnt_mutex); hostlist_destroy(hl); xfree(mcdram_mode); xfree(numa_mode); /* Wait for all nodes to change state to "on" */ _wait_all_nodes_on(); if ((argc == 3) && !syscfg_path) { slurm_init_update_node_msg(&node_msg); node_msg.node_names = argv[1]; node_msg.features_act = argv[2]; rc = slurm_update_node(&node_msg); } if (rc == SLURM_SUCCESS) { exit(0); } else { error("%s: slurm_update_node(\'%s\', \'%s\'): %s\n", prog_name, argv[1], argv[2], slurm_strerror(slurm_get_errno())); exit(1); } }