Exemple #1
0
/* DO NOT RUN AGAINST PRODUCTION NODES, IT CAN MESS UP STATE */
int
main (int argc, char *argv[]) {
	int error_code;
	update_part_msg_t	part_update1 ;
	update_part_msg_t	part_update2 ;
	update_node_msg_t	node_update1 ;
	update_node_msg_t	node_update2 ;
	char node_name[NAME_LEN];

	slurm_init_part_desc_msg ( &part_update1 );
	slurm_init_part_desc_msg ( &part_update2 );
	part_update1 . name = "batch" ;
	part_update2 . name = "batch" ;
	part_update1 . state_up = false ;
	part_update2 . state_up = true ; 

	_getnodename(node_name, NAME_LEN);
	node_update1 . node_names = node_name ;
	node_update2 . node_names = node_name ;
	node_update1 . node_state = NODE_STATE_DRAIN ;
	node_update2 . node_state = NODE_RESUME ; 

	error_code = slurm_update_partition ( &part_update1);
	if (error_code)
		slurm_perror ("slurm_update_partition #1");

	error_code = slurm_update_partition ( &part_update2);
	if (error_code)
		slurm_perror ("slurm_update_partition #2");

	error_code = slurm_update_node ( &node_update1);
	if (error_code)
		slurm_perror ("slurm_update_node #1");

	error_code = slurm_update_node ( &node_update2);
	if (error_code)
		slurm_perror ("slurm_update_node #2");

	return (errno);
}
Exemple #2
0
/*
 * scontrol_update_node - update the slurm node configuration per the supplied
 *	arguments
 * IN argc - count of arguments
 * IN argv - list of arguments
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *			error message and returns 0
 */
extern int
scontrol_update_node (int argc, char **argv)
{
	int i, j, rc = 0, update_cnt = 0;
	uint16_t state_val;
	update_node_msg_t node_msg;
	char *reason_str = NULL;
	char *tag, *val;
	int tag_len, val_len;

	slurm_init_update_node_msg(&node_msg);
	for (i = 0; i < argc; i++) {
		tag = argv[i];
		val = strchr(argv[i], '=');
		if (val) {
			tag_len = val - argv[i];
			val++;
			val_len = strlen(val);
		} else {
			exit_code = 1;
			error("Invalid input: %s  Request aborted", argv[i]);
			return -1;
		}

		if (xstrncasecmp(tag, "NodeAddr", MAX(tag_len, 5)) == 0) {
			node_msg.node_addr = val;
			update_cnt++;
		} else if (xstrncasecmp(tag, "NodeHostName", MAX(tag_len, 5))
			   == 0) {
			node_msg.node_hostname = val;
			update_cnt++;
		} else if (xstrncasecmp(tag, "NodeName", MAX(tag_len, 1)) == 0) {
			node_msg.node_names = val;
		} else if (!xstrncasecmp(tag, "ActiveFeatures",
					 MAX(tag_len,3))) {
			node_msg.features_act = val;
			update_cnt++;
		} else if (xstrncasecmp(tag, "CpuBind", MAX(tag_len, 7)) == 0) {
			if (xlate_cpu_bind_str(val, &node_msg.cpu_bind) !=
			    SLURM_SUCCESS) {
				exit_code = 1;
				error("Invalid input %s", argv[i]);
				return -1;
			}
			update_cnt++;


		} else if (!xstrncasecmp(tag, "Features", MAX(tag_len, 1)) ||
			   !xstrncasecmp(tag, "AvailableFeatures",
					 MAX(tag_len,3))) {
			node_msg.features = val;
			update_cnt++;
		} else if (xstrncasecmp(tag, "Gres", MAX(tag_len, 1)) == 0) {
			node_msg.gres = val;
			update_cnt++;
		} else if (xstrncasecmp(tag, "Weight", MAX(tag_len,1)) == 0) {
			/* Logic borrowed from function _handle_uint32 */
			char *endptr;
			unsigned long num;
			errno = 0;
			num = strtoul(val, &endptr, 0);
			if ((endptr[0] == 'k') || (endptr[0] == 'K')) {
				num *= 1024;
				endptr++;
			}
			if ((num == 0 && errno == EINVAL)
        		            || (*endptr != '\0')) {
				if ((xstrcasecmp(val, "UNLIMITED") == 0) ||
				    (xstrcasecmp(val, "INFINITE")  == 0)) {
					num = INFINITE;
				} else {
					error("Weight value (%s) is not a "
					      "valid number", val);
					break;
				}
			} else if (errno == ERANGE) {
				error("Weight value (%s) is out of range",
				      val);
				break;
			} else if (val[0] == '-') {
				error("Weight value (%s) is less than zero",
				      val);
				break;
			} else if (num > 0xfffffff0) {
				error("Weight value (%s) is greater than %u",
					val, 0xfffffff0);
				break;
			}
			node_msg.weight = num;
			update_cnt++;
		} else if (xstrncasecmp(tag, "Reason", MAX(tag_len, 1)) == 0) {
			int len = strlen(val);
			reason_str = xmalloc(len+1);
			if (*val == '"')
				strcpy(reason_str, val+1);
			else
				strcpy(reason_str, val);

			len = strlen(reason_str) - 1;
			if ((len >= 0) && (reason_str[len] == '"'))
				reason_str[len] = '\0';

			node_msg.reason = reason_str;
			if ((getlogin() == NULL) ||
			    (uid_from_string(getlogin(),
					     &node_msg.reason_uid) < 0)) {
				node_msg.reason_uid = getuid();
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "State", MAX(tag_len, 1)) == 0) {
			if (cluster_flags & CLUSTER_FLAG_CRAY_A) {
				fprintf (stderr, "%s can not be changed through"
					 " SLURM. Use native Cray tools such as"
					 " xtprocadmin(8)\n", argv[i]);
				fprintf (stderr, "Request aborted\n");
				exit_code = 1;
				goto done;
			}
			if (xstrncasecmp(val, "NoResp",
				        MAX(val_len, 3)) == 0) {
				node_msg.node_state = NODE_STATE_NO_RESPOND;
				update_cnt++;
			} else if (xstrncasecmp(val, "DRAIN",
				   MAX(val_len, 3)) == 0) {
				node_msg.node_state = NODE_STATE_DRAIN;
				update_cnt++;
			} else if (xstrncasecmp(val, "FAIL",
				   MAX(val_len, 3)) == 0) {
				node_msg.node_state = NODE_STATE_FAIL;
				update_cnt++;
			} else if (xstrncasecmp(val, "FUTURE",
				   MAX(val_len, 3)) == 0) {
				node_msg.node_state = NODE_STATE_FUTURE;
				update_cnt++;
			} else if (xstrncasecmp(val, "RESUME",
				   MAX(val_len, 3)) == 0) {
				node_msg.node_state = NODE_RESUME;
				update_cnt++;
			} else if (xstrncasecmp(val, "POWER_DOWN",
				   MAX(val_len, 7)) == 0) {
				node_msg.node_state = NODE_STATE_POWER_SAVE;
				update_cnt++;
			} else if (xstrncasecmp(val, "POWER_UP",
				   MAX(val_len, 7)) == 0) {
				node_msg.node_state = NODE_STATE_POWER_UP;
				update_cnt++;
			} else if (xstrncasecmp(val, "UNDRAIN",
				   MAX(val_len, 3)) == 0) {
				node_msg.node_state = NODE_STATE_UNDRAIN;
				update_cnt++;
			} else {
				state_val = NO_VAL16;
				for (j = 0; j < NODE_STATE_END; j++) {
					if (xstrncasecmp(node_state_string(j),
							 val,
							 MAX(val_len, 3)) == 0){
						state_val = (uint16_t) j;
						break;
					}
				}
				if (j == NODE_STATE_END) {
					exit_code = 1;
					fprintf(stderr, "Invalid input: %s\n",
						argv[i]);
					fprintf (stderr, "Request aborted\n");
					fprintf (stderr, "Valid states are: ");
					fprintf (stderr,
						 "NoResp DRAIN FAIL FUTURE RESUME "
						 "POWER_DOWN POWER_UP UNDRAIN");
					fprintf (stderr, "\n");
					fprintf (stderr,
						 "Not all states are valid "
						 "given a node's prior "
						 "state\n");
					goto done;
				}
				node_msg.node_state = state_val;
				update_cnt++;
			}
		} else {
			exit_code = 1;
			fprintf (stderr, "Update of this parameter is not "
				 "supported: %s\n", argv[i]);
			fprintf (stderr, "Request aborted\n");
			goto done;
		}
	}

	if (((node_msg.node_state == NODE_STATE_DOWN)  ||
	     (node_msg.node_state == NODE_STATE_DRAIN) ||
	     (node_msg.node_state == NODE_STATE_FAIL)) &&
	    ((node_msg.reason == NULL) || (strlen(node_msg.reason) == 0))) {
		fprintf(stderr, "You must specify a reason when DOWNING or "
			"DRAINING a node. Request denied\n");
		goto done;
	}

	if (update_cnt == 0) {
		exit_code = 1;
		fprintf (stderr, "No changes specified\n");
		return 0;
	}

	rc = slurm_update_node(&node_msg);

done:	xfree(reason_str);
	if (rc) {
		exit_code = 1;
		return slurm_get_errno ();
	} else
		return 0;
}
Exemple #3
0
int main(int argc, char *argv[])
{
	log_options_t log_opts = LOG_OPTS_INITIALIZER;
	char *features, *save_ptr = NULL, *tok;
	update_node_msg_t node_msg;
	int rc =  SLURM_SUCCESS;
	hostlist_t hl = NULL;
	char *node_name;
	pthread_attr_t attr_work;
	pthread_t thread_work = 0;

	prog_name = argv[0];
	_read_config();
	log_opts.stderr_level = LOG_LEVEL_QUIET;
	log_opts.syslog_level = LOG_LEVEL_QUIET;
	if (slurm_get_debug_flags() && DEBUG_FLAG_NODE_FEATURES)
		log_opts.logfile_level += 3;
	(void) log_init(argv[0], log_opts, LOG_DAEMON, log_file);

	/* Parse the MCDRAM and NUMA boot options */
	if (argc == 3) {
		features = xstrdup(argv[2]);
		tok = strtok_r(features, ",", &save_ptr);
		while (tok) {
			printf("%s\n", tok);
			if (!strcasecmp(tok, "a2a")  ||
			    !strcasecmp(tok, "hemi") ||
			    !strcasecmp(tok, "quad") ||
			    !strcasecmp(tok, "snc2") ||
			    !strcasecmp(tok, "snc4")) {
				xfree(mcdram_mode);
				mcdram_mode = xstrdup(tok);
			} else if (!strcasecmp(tok, "cache")  ||
				   !strcasecmp(tok, "equal") ||
				   !strcasecmp(tok, "flat")) {
				xfree(numa_mode);
				numa_mode = xstrdup(tok);
			}
			tok = strtok_r(NULL, ",", &save_ptr);
		}
		xfree(features);
	}

	/* Spawn threads to change MCDRAM and NUMA states and start node
	 * reboot process */
	if ((hl = hostlist_create(argv[1])) == NULL) {
		error("%s: Invalid hostlist (%s)", prog_name, argv[1]);
		exit(2);
	}
	node_bitmap = bit_alloc(100000);
	while ((node_name = hostlist_pop(hl))) {
		slurm_mutex_lock(&thread_cnt_mutex);
		while (1) {
			if (thread_cnt <= MAX_THREADS) {
				thread_cnt++;
				break;
			} else {	/* wait for state change and retry */
				pthread_cond_wait(&thread_cnt_cond,
						  &thread_cnt_mutex);
			}
		}
		slurm_mutex_unlock(&thread_cnt_mutex);

		slurm_attr_init(&attr_work);
		(void) pthread_attr_setdetachstate
			(&attr_work, PTHREAD_CREATE_DETACHED);
		if (pthread_create(&thread_work, &attr_work, _node_update,
				   (void *) node_name)) {
			_node_update((void *) node_name);
		}
		slurm_attr_destroy(&attr_work);
	}

	/* Wait for work threads to complete */
	slurm_mutex_lock(&thread_cnt_mutex);
	while (1) {
		if (thread_cnt == 0)
			break;
		else	/* wait for state change and retry */
			pthread_cond_wait(&thread_cnt_cond, &thread_cnt_mutex);
	}
	slurm_mutex_unlock(&thread_cnt_mutex);
	hostlist_destroy(hl);
	xfree(mcdram_mode);
	xfree(numa_mode);

	/* Wait for all nodes to change state to "on" */
	_wait_all_nodes_on();

	if ((argc == 3) && !syscfg_path) {
		slurm_init_update_node_msg(&node_msg);
		node_msg.node_names = argv[1];
		node_msg.features_act = argv[2];
		rc = slurm_update_node(&node_msg);
	}

	if (rc == SLURM_SUCCESS) {
		exit(0);
	} else {
		error("%s: slurm_update_node(\'%s\', \'%s\'): %s\n",
		      prog_name, argv[1], argv[2],
		      slurm_strerror(slurm_get_errno()));
		exit(1);
	}
}