Exemple #1
0
static char *
_guess_nodename()
{
	char host[256];
	char *nodename = NULL;

	if (gethostname_short(host, 256) != 0)
		return NULL;

	nodename = slurm_conf_get_nodename(host);
	if (nodename == NULL)
		nodename = slurm_conf_get_aliased_nodename();
	if (nodename == NULL) /* if no match, try localhost */
		nodename = slurm_conf_get_nodename("localhost");

	return nodename;
}
Exemple #2
0
/*
 * _find_alias_node_record - find a record for node with the alias of
 * the specified name supplied
 * input: name - name to be aliased of the desired node
 * output: return pointer to node record or NULL if not found
 * global: node_record_table_ptr - pointer to global node table
 *         node_hash_table - table of hash indexes
 */
static struct node_record *_find_alias_node_record (char *name)
{
	int i;
	char *alias = NULL;

	if ((name == NULL) || (name[0] == '\0')) {
		info("_find_alias_node_record: passed NULL name");
		return NULL;
	}
	/* Get the alias we have just to make sure the user isn't
	 * trying to use the real hostname to run on something that has
	 * been aliased.
	 */
	alias = slurm_conf_get_nodename(name);

	if (!alias)
		return NULL;

	/* try to find via hash table, if it exists */
	if (node_hash_table) {
		struct node_record *node_ptr;

		i = _hash_index (alias);
		node_ptr = node_hash_table[i];
		while (node_ptr) {
			xassert(node_ptr->magic == NODE_MAGIC);
			if (!strcmp(node_ptr->name, alias)) {
				xfree(alias);
				return node_ptr;
			}
			node_ptr = node_ptr->node_next;
		}
		error ("_find_alias_node_record: lookup failure for %s", name);
	}

	/* revert to sequential search */
	else {
		for (i = 0; i < node_record_count; i++) {
			if (!strcmp (alias, node_record_table_ptr[i].name)) {
				xfree(alias);
				return (&node_record_table_ptr[i]);
			}
		}
	}

	xfree(alias);
	return (struct node_record *) NULL;
}
Exemple #3
0
extern void create_daemon_popup(GtkAction *action, gpointer user_data)
{
	GtkWidget *popup = gtk_dialog_new_with_buttons(
		"SLURM Daemons running",
		GTK_WINDOW(user_data),
		GTK_DIALOG_DESTROY_WITH_PARENT,
		GTK_STOCK_CLOSE,
		GTK_RESPONSE_OK,
		NULL);

	int update = 0;
	slurm_ctl_conf_info_msg_t *conf;
	char me[MAX_SLURM_NAME], *b, *c, *n;
	int actld = 0, ctld = 0, d = 0;
	GtkTreeStore *treestore =
		_local_create_treestore_2cols(popup, 300, 100);
	GtkTreeIter iter;
	g_signal_connect(G_OBJECT(popup), "delete_event",
			 G_CALLBACK(_delete_popup), NULL);
	g_signal_connect(G_OBJECT(popup), "response",
			 G_CALLBACK(_delete_popup), NULL);

	slurm_conf_init(NULL);
	conf = slurm_conf_lock();

	gethostname_short(me, MAX_SLURM_NAME);
	if ((b = conf->backup_controller)) {
		if ((strcmp(b, me) == 0) ||
		    (strcasecmp(b, "localhost") == 0))
			ctld = 1;
	}
	if ((c = conf->control_machine)) {
		actld = 1;
		if ((strcmp(c, me) == 0) ||
		    (strcasecmp(c, "localhost") == 0))
			ctld = 1;
	}
	slurm_conf_unlock();

	if ((n = slurm_conf_get_nodename(me))) {
		d = 1;
		xfree(n);
	} else if ((n = slurm_conf_get_aliased_nodename())) {
		d = 1;
		xfree(n);
	} else if ((n = slurm_conf_get_nodename("localhost"))) {
		d = 1;
		xfree(n);
	}
	if (actld && ctld)
		add_display_treestore_line(update, treestore, &iter,
					   "Slurmctld", "1");
	if (actld && d)
		add_display_treestore_line(update, treestore, &iter,
					   "Slurmd", "1");


	gtk_widget_show_all(popup);

	return;
}
Exemple #4
0
/*
 * Read the slurm configuration file (slurm.conf) and substitute some
 * values into the slurmd configuration in preference of the defaults.
 */
static void
_read_config(void)
{
	char *path_pubkey = NULL;
	slurm_ctl_conf_t *cf = NULL;
	uint16_t tmp16 = 0;

#ifndef HAVE_FRONT_END
	bool cr_flag = false, gang_flag = false;
#endif

	cf = slurm_conf_lock();

	slurm_mutex_lock(&conf->config_mutex);

	if (conf->conffile == NULL)
		conf->conffile = xstrdup(cf->slurm_conf);

	conf->slurm_user_id =  cf->slurm_user_id;

	conf->cr_type = cf->select_type_param;

	path_pubkey = xstrdup(cf->job_credential_public_certificate);

	if (!conf->logfile)
		conf->logfile = xstrdup(cf->slurmd_logfile);

#ifndef HAVE_FRONT_END
	if (!strcmp(cf->select_type, "select/cons_res"))
		cr_flag = true;
	if (cf->preempt_mode & PREEMPT_MODE_GANG)
		gang_flag = true;
#endif

	slurm_conf_unlock();
	/* node_name may already be set from a command line parameter */
	if (conf->node_name == NULL)
		conf->node_name = slurm_conf_get_nodename(conf->hostname);
	/* if we didn't match the form of the hostname already
	 * stored in conf->hostname, check to see if we match any
	 * valid aliases */
	if (conf->node_name == NULL)
		conf->node_name = slurm_conf_get_aliased_nodename();

	if (conf->node_name == NULL)
		conf->node_name = slurm_conf_get_nodename("localhost");

	if (conf->node_name == NULL)
		fatal("Unable to determine this slurmd's NodeName");

	_massage_pathname(&conf->logfile);

	/* set node_addr if relevant */
	if ((conf->node_addr == NULL) &&
	    (conf->node_addr = slurm_conf_get_nodeaddr(conf->hostname)) &&
	    (strcmp(conf->node_addr, conf->hostname) == 0)) {
		xfree(conf->node_addr);	/* Sets to NULL */
	}

	conf->port = slurm_conf_get_port(conf->node_name);
	slurm_conf_get_cpus_bsct(conf->node_name,
				 &conf->conf_cpus, &conf->conf_boards,
				 &conf->conf_sockets, &conf->conf_cores,
				 &conf->conf_threads);

	/* store hardware properties in slurmd_config */
	xfree(conf->block_map);
	xfree(conf->block_map_inv);

	_update_logging();
	_update_nice();

	get_cpuinfo(&conf->actual_cpus,
		    &conf->actual_boards,
	            &conf->actual_sockets,
	            &conf->actual_cores,
	            &conf->actual_threads,
	            &conf->block_map_size,
	            &conf->block_map, &conf->block_map_inv);
#ifdef HAVE_FRONT_END
	/*
	 * When running with multiple frontends, the slurmd S:C:T values are not
	 * relevant, hence ignored by both _register_front_ends (sets all to 1)
	 * and validate_nodes_via_front_end (uses slurm.conf values).
	 * Report actual hardware configuration, irrespective of FastSchedule.
	 */
	conf->cpus    = conf->actual_cpus;
	conf->boards  = conf->actual_boards;
	conf->sockets = conf->actual_sockets;
	conf->cores   = conf->actual_cores;
	conf->threads = conf->actual_threads;
#else
	/* If the actual resources on a node differ than what is in
	 * the configuration file and we are using
	 * cons_res or gang scheduling we have to use what is in the
	 * configuration file because the slurmctld creates bitmaps
	 * for scheduling before these nodes check in.
	 */
	if (((cf->fast_schedule == 0) && !cr_flag && !gang_flag) ||
	    ((cf->fast_schedule == 1) &&
	     (conf->actual_cpus < conf->conf_cpus))) {
		conf->cpus    = conf->actual_cpus;
		conf->boards  = conf->actual_boards;
		conf->sockets = conf->actual_sockets;
		conf->cores   = conf->actual_cores;
		conf->threads = conf->actual_threads;
	} else {
		conf->cpus    = conf->conf_cpus;
		conf->boards  = conf->conf_boards;
		conf->sockets = conf->conf_sockets;
		conf->cores   = conf->conf_cores;
		conf->threads = conf->conf_threads;
	}

	if ((conf->cpus    != conf->actual_cpus)    ||
	    (conf->sockets != conf->actual_sockets) ||
	    (conf->cores   != conf->actual_cores)   ||
	    (conf->threads != conf->actual_threads)) {
		if (cf->fast_schedule) {
			info("Node configuration differs from hardware: "
			     "CPUs=%u:%u(hw) Boards=%u:%u(hw) "
			     "SocketsPerBoard=%u:%u(hw) CoresPerSocket=%u:%u(hw) "
			     "ThreadsPerCore=%u:%u(hw)",
			     conf->cpus,    conf->actual_cpus,
			     conf->boards,  conf->actual_boards,
			     conf->sockets, conf->actual_sockets,
			     conf->cores,   conf->actual_cores,
			     conf->threads, conf->actual_threads);
		} else if ((cf->fast_schedule == 0) && (cr_flag || gang_flag)) {
			error("You are using cons_res or gang scheduling with "
			      "Fastschedule=0 and node configuration differs "
			      "from hardware.  The node configuration used "
			      "will be what is in the slurm.conf because of "
			      "the bitmaps the slurmctld must create before "
			      "the slurmd registers.\n"
			      "   CPUs=%u:%u(hw) Boards=%u:%u(hw) "
			      "SocketsPerBoard=%u:%u(hw) CoresPerSocket=%u:%u(hw) "
			      "ThreadsPerCore=%u:%u(hw)",
			      conf->cpus,    conf->actual_cpus,
			      conf->boards,  conf->actual_boards,
			      conf->sockets, conf->actual_sockets,
			      conf->cores,   conf->actual_cores,
			      conf->threads, conf->actual_threads);
		}
	}
#endif

	get_memory(&conf->real_memory_size);
	get_up_time(&conf->up_time);

	cf = slurm_conf_lock();
	get_tmp_disk(&conf->tmp_disk_space, cf->tmp_fs);
	_free_and_set(&conf->epilog,   xstrdup(cf->epilog));
	_free_and_set(&conf->prolog,   xstrdup(cf->prolog));
	_free_and_set(&conf->tmpfs,    xstrdup(cf->tmp_fs));
	_free_and_set(&conf->health_check_program,
		      xstrdup(cf->health_check_program));
	_free_and_set(&conf->spooldir, xstrdup(cf->slurmd_spooldir));
	_massage_pathname(&conf->spooldir);
	_free_and_set(&conf->pidfile,  xstrdup(cf->slurmd_pidfile));
	_massage_pathname(&conf->pidfile);
	_free_and_set(&conf->select_type, xstrdup(cf->select_type));
	_free_and_set(&conf->task_prolog, xstrdup(cf->task_prolog));
	_free_and_set(&conf->task_epilog, xstrdup(cf->task_epilog));
	_free_and_set(&conf->pubkey,   path_pubkey);

	conf->debug_flags = cf->debug_flags;
	conf->propagate_prio = cf->propagate_prio_process;

	_free_and_set(&conf->job_acct_gather_freq,
		      xstrdup(cf->job_acct_gather_freq));

	conf->acct_freq_task = (uint16_t)NO_VAL;
	tmp16 = acct_gather_parse_freq(PROFILE_TASK,
				       conf->job_acct_gather_freq);
	if (tmp16 != -1)
		conf->acct_freq_task = tmp16;

	_free_and_set(&conf->acct_gather_energy_type,
		      xstrdup(cf->acct_gather_energy_type));
	_free_and_set(&conf->acct_gather_filesystem_type,
		      xstrdup(cf->acct_gather_filesystem_type));
	_free_and_set(&conf->acct_gather_infiniband_type,
		      xstrdup(cf->acct_gather_infiniband_type));
	_free_and_set(&conf->acct_gather_profile_type,
		      xstrdup(cf->acct_gather_profile_type));
	_free_and_set(&conf->job_acct_gather_type,
		      xstrdup(cf->job_acct_gather_type));

	if ( (conf->node_name == NULL) ||
	     (conf->node_name[0] == '\0') )
		fatal("Node name lookup failure");

	if (cf->control_addr == NULL)
		fatal("Unable to establish controller machine");
	if (cf->slurmctld_port == 0)
		fatal("Unable to establish controller port");
	conf->slurmd_timeout = cf->slurmd_timeout;
	conf->use_pam = cf->use_pam;
	conf->task_plugin_param = cf->task_plugin_param;

	slurm_mutex_unlock(&conf->config_mutex);
	slurm_conf_unlock();
}
Exemple #5
0
/*
 * parse_command_line
 */
extern void
parse_command_line( int argc, char* argv[] )
{
	char *env_val = NULL;
	bool override_format_env = false;
	int opt_char;
	int option_index;
	static struct option long_options[] = {
		{"accounts",   required_argument, 0, 'A'},
		{"all",        no_argument,       0, 'a'},
		{"format",     required_argument, 0, 'o'},
		{"help",       no_argument,       0, OPT_LONG_HELP},
		{"hide",       no_argument,       0, OPT_LONG_HIDE},
		{"iterate",    required_argument, 0, 'i'},
		{"jobs",       optional_argument, 0, 'j'},
		{"long",       no_argument,       0, 'l'},
		{"cluster",    required_argument, 0, 'M'},
		{"clusters",   required_argument, 0, 'M'},
		{"node",       required_argument, 0, 'n'},
		{"nodes",      required_argument, 0, 'n'},
		{"noheader",   no_argument,       0, 'h'},
		{"partitions", required_argument, 0, 'p'},
		{"qos",        required_argument, 0, 'q'},
		{"reservation",required_argument, 0, 'R'},
		{"sort",       required_argument, 0, 'S'},
		{"start",      no_argument,       0, OPT_LONG_START},
		{"steps",      optional_argument, 0, 's'},
		{"states",     required_argument, 0, 't'},
		{"usage",      no_argument,       0, OPT_LONG_USAGE},
		{"user",       required_argument, 0, 'u'},
		{"users",      required_argument, 0, 'u'},
		{"verbose",    no_argument,       0, 'v'},
		{"version",    no_argument,       0, 'V'},
		{NULL,         0,                 0, 0}
	};

	if (getenv("SQUEUE_ALL"))
		params.all_flag = true;
	if ( ( env_val = getenv("SQUEUE_SORT") ) )
		params.sort = xstrdup(env_val);
	if ( ( env_val = getenv("SLURM_CLUSTERS") ) ) {
		if (!(params.clusters = slurmdb_get_info_cluster(env_val))) {
			error("'%s' can't be reached now, "
			      "or it is an invalid entry for "
			      "SLURM_CLUSTERS.  Use 'sacctmgr --list "
			      "cluster' to see avaliable clusters.",
			      env_val);
			exit(1);
		}
		working_cluster_rec = list_peek(params.clusters);
	}

	while ((opt_char = getopt_long(argc, argv,
				       "A:ahi:j::ln:M:o:p:q:R:s::S:t:u:U:vV",
				       long_options, &option_index)) != -1) {
		switch (opt_char) {
		case (int)'?':
			fprintf(stderr, "Try \"squeue --help\" "
				"for more information\n");
			exit(1);
		case (int) 'A':
		case (int) 'U':	/* backwards compatibility */
			xfree(params.accounts);
		        params.accounts = xstrdup(optarg);
			params.account_list =
				_build_str_list( params.accounts );
		break;
		case (int)'a':
			params.all_flag = true;
			break;
		case (int)'h':
			params.no_header = true;
			break;
		case (int) 'i':
			params.iterate= atoi(optarg);
			if (params.iterate <= 0) {
				error ("--iterate=%s\n", optarg);
				exit(1);
			}
			break;
		case (int) 'j':
			if (optarg) {
				params.jobs = xstrdup(optarg);
				params.job_list =
					_build_job_list(params.jobs);
			}
			params.job_flag = true;
			break;
		case (int) 'l':
			params.long_list = true;
			override_format_env = true;
			break;
		case (int) 'M':
			if (params.clusters)
				list_destroy(params.clusters);
			if (!(params.clusters =
			    slurmdb_get_info_cluster(optarg))) {
				error("'%s' can't be reached now, "
				      "or it is an invalid entry for "
				      "--cluster.  Use 'sacctmgr --list "
				      "cluster' to see avaliable clusters.",
				      optarg);
				exit(1);
			}
			working_cluster_rec = list_peek(params.clusters);
			break;
		case (int) 'n':
			if (params.nodes)
				hostset_destroy(params.nodes);

			params.nodes = hostset_create(optarg);
			if (params.nodes == NULL) {
				error("'%s' invalid entry for --nodes",
				      optarg);
				exit(1);
			}
			break;
		case (int) 'o':
			xfree(params.format);
			params.format = xstrdup(optarg);
			override_format_env = true;

			break;
		case (int) 'p':
			xfree(params.partitions);
			params.partitions = xstrdup(optarg);
			params.part_list =
				_build_str_list( params.partitions );
			params.all_flag = true;
			break;
		case (int) 'q':
			xfree(params.qoss);
			params.qoss = xstrdup(optarg);
			params.qos_list =
				_build_str_list( params.qoss );
			break;
		case (int) 'R':
			xfree(params.reservation);
			params.reservation = xstrdup(optarg);
			break;
		case (int) 's':
			if (optarg) {
				params.steps = xstrdup(optarg);
				params.step_list =
					_build_step_list(params.steps);
			}
			params.step_flag = true;
			override_format_env = true;
			break;
		case (int) 'S':
			xfree(params.sort);
			params.sort = xstrdup(optarg);
			break;
		case (int) 't':
			xfree(params.states);
			params.states = xstrdup(optarg);
			params.state_list =
				_build_state_list( params.states );
			break;
		case (int) 'u':
			xfree(params.users);
			params.users = xstrdup(optarg);
			params.user_list =
				_build_user_list( params.users );
			break;
		case (int) 'v':
			params.verbose++;
			break;
		case (int) 'V':
			print_slurm_version();
			exit(0);
		case OPT_LONG_HELP:
			_help();
			exit(0);
		case OPT_LONG_HIDE:
			params.all_flag = false;
			break;
		case OPT_LONG_START:
			params.start_flag = true;
			break;
		case OPT_LONG_USAGE:
			_usage();
			exit(0);
		}
	}

	if ( override_format_env == false ) {
		if ( ( env_val = getenv("SQUEUE_FORMAT") ) )
			params.format = xstrdup(env_val);
	}

	params.cluster_flags = slurmdb_setup_cluster_flags();
	if (optind < argc) {
		if (params.job_flag) {
			params.jobs = xstrdup(argv[optind++]);
			params.job_list = _build_job_list(params.jobs);
		} else if (params.step_flag) {
			params.steps = xstrdup(argv[optind++]);
			params.step_list = _build_step_list(params.steps);
		}
		if (optind < argc) {
			error("Unrecognized option: %s",argv[optind]);
			_usage();
			exit(1);
		}
	}

	if ( params.job_flag && params.step_flag) {
		if (params.job_list) {
			verbose("Printing job steps with job filter");
			params.job_flag = false;
		} else {
			error("Incompatible options --jobs and --steps");
			exit(1);
		}
	}

	if ( params.nodes ) {
		char *name1 = NULL;
		char *name2 = NULL;
		hostset_t nodenames = hostset_create(NULL);
		if (nodenames == NULL)
			fatal("malloc failure");

		while ( hostset_count(params.nodes) > 0 ) {
			name1 = hostset_pop(params.nodes);

			/* localhost = use current host name */
			if ( strcasecmp("localhost", name1) == 0 ) {
				name2 = xmalloc(128);
				gethostname_short(name2, 128);
			} else {
				/* translate NodeHostName to NodeName */
				name2 = slurm_conf_get_nodename(name1);

				/* use NodeName if translation failed */
				if ( name2 == NULL )
					name2 = xstrdup(name1);
			}
			hostset_insert(nodenames, name2);
			free(name1);
			xfree(name2);
		}

		/* Replace params.nodename with the new one */
		hostset_destroy(params.nodes);
		params.nodes = nodenames;
	}

	if ( ( params.accounts == NULL ) &&
	     ( env_val = getenv("SQUEUE_ACCOUNT") ) ) {
		params.accounts = xstrdup(env_val);
		params.account_list = _build_str_list( params.accounts );
	}

	if ( ( params.partitions == NULL ) &&
	     ( env_val = getenv("SQUEUE_PARTITION") ) ) {
		params.partitions = xstrdup(env_val);
		params.part_list = _build_str_list( params.partitions );
		params.all_flag = true;
	}

	if ( ( params.qoss == NULL ) &&
	     ( env_val = getenv("SQUEUE_QOS") ) ) {
		params.qoss = xstrdup(env_val);
		params.qos_list = _build_str_list( params.qoss );
	}

	if ( ( params.states == NULL ) &&
	     ( env_val = getenv("SQUEUE_STATES") ) ) {
		params.states = xstrdup(env_val);
		params.state_list = _build_state_list( params.states );
	}

	if ( ( params.users == NULL ) &&
	     ( env_val = getenv("SQUEUE_USERS") ) ) {
		params.users = xstrdup(env_val);
		params.user_list = _build_user_list( params.users );
	}

	if ( params.start_flag && !params.step_flag ) {
		/* Set more defaults */
		if (params.format == NULL)
			params.format = xstrdup("%.7i %.9P %.8j %.8u  %.2t  %.19S %.6D %R");
		if (params.sort == NULL)
			params.sort = xstrdup("S");
		if (params.states == NULL) {
			params.states = xstrdup("PD");
			params.state_list = _build_state_list( params.states );
		}
	}

	params.max_cpus = _max_cpus_per_node();

	if ( params.verbose )
		_print_options();
}