Example #1
0
/* Convert an array of task IDs into a list of host names
 * RET: the string, caller must xfree() this value */
static char *_task_ids_to_host_list(int ntasks, uint32_t taskids[])
{
	int i;
	hostset_t hs;
	char *hosts;
	slurm_step_layout_t *sl;

	if ((sl = launch_common_get_slurm_step_layout(local_srun_job)) == NULL)
		return (xstrdup("Unknown"));

	hs = hostset_create(NULL);
	for (i = 0; i < ntasks; i++) {
		char *host = slurm_step_layout_host_name(sl, taskids[i]);
		if (host) {
			hostset_insert(hs, host);
			free(host);
		} else {
			error("Could not identify host name for task %u",
			      taskids[i]);
		}
	}

	hosts = _hostset_to_string(hs);
	hostset_destroy(hs);

	return (hosts);
}
/* Convert an array of task IDs into a list of host names
 * RET: the string, caller must xfree() this value */
static char *_task_ids_to_host_list(int ntasks, uint32_t *taskids)
{
	int i, task_cnt = 0;
	hostset_t hs;
	char *hosts;
	slurm_step_layout_t *sl;

	if ((sl = launch_common_get_slurm_step_layout(local_srun_job)) == NULL)
		return (xstrdup("Unknown"));

	/* If overhead of determining the hostlist is too high then srun
	 * communications will timeout and fail, so return "Unknown" instead.
	 *
	 * See slurm_step_layout_host_id() in src/common/slurm_step_layout.c
	 * for details. */
	for (i = 0; i < sl->node_cnt; i++) {
		task_cnt += sl->tasks[i];
	}
	if (task_cnt > 100000)
		return (xstrdup("Unknown"));

	hs = hostset_create(NULL);
	for (i = 0; i < ntasks; i++) {
		char *host = slurm_step_layout_host_name(sl, taskids[i]);
		if (host) {
			hostset_insert(hs, host);
			free(host);
		} else {
			error("Could not identify host name for task %u",
			      taskids[i]);
		}
	}

	hosts = _hostset_to_string(hs);
	hostset_destroy(hs);

	return (hosts);
}
Example #3
0
/*
 * slurm_pack_job_will_run - determine if a heterogenous job would execute
 *	immediately if submitted now
 * IN job_req_list - List of job_desc_msg_t structures describing the resource
 *		allocation request
 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set
 */
extern int slurm_pack_job_will_run(List job_req_list)
{
	job_desc_msg_t *req;
	will_run_response_msg_t *will_run_resp;
	char buf[64], *sep = "";
	int rc = SLURM_SUCCESS, inx = 0;
	ListIterator iter, itr;
	time_t first_start = (time_t) 0;
	uint32_t first_job_id = 0, tot_proc_count = 0, *job_id_ptr;
	hostset_t hs = NULL;
	char *job_list = NULL;

	if (!job_req_list || (list_count(job_req_list) == 0)) {
		error("No job descriptors input");
		return SLURM_ERROR;
	}

	iter = list_iterator_create(job_req_list);
	while ((req = (job_desc_msg_t *) list_next(iter))) {
		will_run_resp = NULL;
		rc = slurm_job_will_run2(req, &will_run_resp);

		if (will_run_resp)
			print_multi_line_string(
				will_run_resp->job_submit_user_msg,
				inx, LOG_LEVEL_INFO);

		if ((rc == SLURM_SUCCESS) && will_run_resp) {
			if (first_job_id == 0)
				first_job_id = will_run_resp->job_id;
			if ((first_start == 0) ||
			    (first_start < will_run_resp->start_time))
				first_start = will_run_resp->start_time;
			tot_proc_count += will_run_resp->proc_cnt;
			if (hs)
				hostset_insert(hs, will_run_resp->node_list);
			else
				hs = hostset_create(will_run_resp->node_list);

			if (will_run_resp->preemptee_job_id) {
				itr = list_iterator_create(will_run_resp->
							   preemptee_job_id);
				while ((job_id_ptr = list_next(itr))) {
					if (job_list)
						sep = ",";
					xstrfmtcat(job_list, "%s%u", sep,
						   *job_id_ptr);
				}
				list_iterator_destroy(itr);
			}

			slurm_free_will_run_response_msg(will_run_resp);
		}
		if (rc != SLURM_SUCCESS)
			break;
		inx++;
	}
	list_iterator_destroy(iter);


	if (rc == SLURM_SUCCESS) {
		char node_list[1028] = "";

		if (hs)
			hostset_ranged_string(hs, sizeof(node_list), node_list);
		slurm_make_time_str(&first_start, buf, sizeof(buf));
		info("Job %u to start at %s using %u processors on %s",
		     first_job_id, buf, tot_proc_count, node_list);
		if (job_list)
			info("  Preempts: %s", job_list);
	}

	if (hs)
		hostset_destroy(hs);
	xfree(job_list);

	return rc;
}
Example #4
0
/*
 * slurm_pack_job_will_run - determine if a heterogenous job would execute
 *	immediately if submitted now
 * IN job_req_list - List of job_desc_msg_t structures describing the resource
 *		allocation request
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
extern int slurm_pack_job_will_run(List job_req_list)
{
	job_desc_msg_t *req;
	will_run_response_msg_t *will_run_resp;
	char buf[64], local_hostname[64] = "", *sep = "";
	int rc = SLURM_SUCCESS;
	char *type = "processors";
	ListIterator iter, itr;
	time_t first_start = (time_t) 0;
	uint32_t first_job_id = 0, tot_proc_count = 0, *job_id_ptr;
	hostset_t hs = NULL;
	char *job_list = NULL;

	if (!job_req_list || (list_count(job_req_list) == 0)) {
		error("No job descriptors input");
		return SLURM_ERROR;
	}

	(void) gethostname_short(local_hostname, sizeof(local_hostname));
	iter = list_iterator_create(job_req_list);
	while ((req = (job_desc_msg_t *) list_next(iter))) {
		if ((req->alloc_node == NULL) && local_hostname[0])
			req->alloc_node = local_hostname;

		will_run_resp = NULL;
		rc = slurm_job_will_run2(req, &will_run_resp);
		if ((rc == SLURM_SUCCESS) && will_run_resp) {
			if (first_job_id == 0)
				first_job_id = will_run_resp->job_id;
			if ((first_start == 0) ||
			    (first_start < will_run_resp->start_time))
				first_start = will_run_resp->start_time;
			tot_proc_count += will_run_resp->proc_cnt;
			if (hs)
				hostset_insert(hs, will_run_resp->node_list);
			else
				hs = hostset_create(will_run_resp->node_list);

			if (will_run_resp->preemptee_job_id) {
				itr = list_iterator_create(will_run_resp->
							   preemptee_job_id);
				while ((job_id_ptr = list_next(itr))) {
					if (job_list)
						sep = ",";
					xstrfmtcat(job_list, "%s%u", sep,
						   *job_id_ptr);
				}
				list_iterator_destroy(itr);
			}

			slurm_free_will_run_response_msg(will_run_resp);
		}
		if (req->alloc_node == local_hostname)
			req->alloc_node = NULL;
		if (rc != SLURM_SUCCESS)
			break;
	}
	list_iterator_destroy(iter);


	if (rc == SLURM_SUCCESS) {
		uint32_t cluster_flags = slurmdb_setup_cluster_flags();
		char node_list[1028] = "";

		if (cluster_flags & CLUSTER_FLAG_BG)
			type = "cnodes";
		if (hs)
			hostset_ranged_string(hs, sizeof(node_list), node_list);
		slurm_make_time_str(&first_start, buf, sizeof(buf));
		info("Job %u to start at %s using %u %s on %s",
		     first_job_id, buf, tot_proc_count, type, node_list);
		if (job_list)
			info("  Preempts: %s", job_list);
	}

	if (hs)
		hostset_destroy(hs);
	xfree(job_list);

	return rc;
}
Example #5
0
/*
 * parse_command_line
 */
extern void
parse_command_line( int argc, char* argv[] )
{
	char *env_val = NULL;
	bool override_format_env = false;
	int opt_char;
	int option_index;
	static struct option long_options[] = {
		{"accounts",   required_argument, 0, 'A'},
		{"all",        no_argument,       0, 'a'},
		{"format",     required_argument, 0, 'o'},
		{"help",       no_argument,       0, OPT_LONG_HELP},
		{"hide",       no_argument,       0, OPT_LONG_HIDE},
		{"iterate",    required_argument, 0, 'i'},
		{"jobs",       optional_argument, 0, 'j'},
		{"long",       no_argument,       0, 'l'},
		{"cluster",    required_argument, 0, 'M'},
		{"clusters",   required_argument, 0, 'M'},
		{"node",       required_argument, 0, 'n'},
		{"nodes",      required_argument, 0, 'n'},
		{"noheader",   no_argument,       0, 'h'},
		{"partitions", required_argument, 0, 'p'},
		{"qos",        required_argument, 0, 'q'},
		{"reservation",required_argument, 0, 'R'},
		{"sort",       required_argument, 0, 'S'},
		{"start",      no_argument,       0, OPT_LONG_START},
		{"steps",      optional_argument, 0, 's'},
		{"states",     required_argument, 0, 't'},
		{"usage",      no_argument,       0, OPT_LONG_USAGE},
		{"user",       required_argument, 0, 'u'},
		{"users",      required_argument, 0, 'u'},
		{"verbose",    no_argument,       0, 'v'},
		{"version",    no_argument,       0, 'V'},
		{NULL,         0,                 0, 0}
	};

	if (getenv("SQUEUE_ALL"))
		params.all_flag = true;
	if ( ( env_val = getenv("SQUEUE_SORT") ) )
		params.sort = xstrdup(env_val);
	if ( ( env_val = getenv("SLURM_CLUSTERS") ) ) {
		if (!(params.clusters = slurmdb_get_info_cluster(env_val))) {
			error("'%s' can't be reached now, "
			      "or it is an invalid entry for "
			      "SLURM_CLUSTERS.  Use 'sacctmgr --list "
			      "cluster' to see avaliable clusters.",
			      env_val);
			exit(1);
		}
		working_cluster_rec = list_peek(params.clusters);
	}

	while ((opt_char = getopt_long(argc, argv,
				       "A:ahi:j::ln:M:o:p:q:R:s::S:t:u:U:vV",
				       long_options, &option_index)) != -1) {
		switch (opt_char) {
		case (int)'?':
			fprintf(stderr, "Try \"squeue --help\" "
				"for more information\n");
			exit(1);
		case (int) 'A':
		case (int) 'U':	/* backwards compatibility */
			xfree(params.accounts);
		        params.accounts = xstrdup(optarg);
			params.account_list =
				_build_str_list( params.accounts );
		break;
		case (int)'a':
			params.all_flag = true;
			break;
		case (int)'h':
			params.no_header = true;
			break;
		case (int) 'i':
			params.iterate= atoi(optarg);
			if (params.iterate <= 0) {
				error ("--iterate=%s\n", optarg);
				exit(1);
			}
			break;
		case (int) 'j':
			if (optarg) {
				params.jobs = xstrdup(optarg);
				params.job_list =
					_build_job_list(params.jobs);
			}
			params.job_flag = true;
			break;
		case (int) 'l':
			params.long_list = true;
			override_format_env = true;
			break;
		case (int) 'M':
			if (params.clusters)
				list_destroy(params.clusters);
			if (!(params.clusters =
			    slurmdb_get_info_cluster(optarg))) {
				error("'%s' can't be reached now, "
				      "or it is an invalid entry for "
				      "--cluster.  Use 'sacctmgr --list "
				      "cluster' to see avaliable clusters.",
				      optarg);
				exit(1);
			}
			working_cluster_rec = list_peek(params.clusters);
			break;
		case (int) 'n':
			if (params.nodes)
				hostset_destroy(params.nodes);

			params.nodes = hostset_create(optarg);
			if (params.nodes == NULL) {
				error("'%s' invalid entry for --nodes",
				      optarg);
				exit(1);
			}
			break;
		case (int) 'o':
			xfree(params.format);
			params.format = xstrdup(optarg);
			override_format_env = true;

			break;
		case (int) 'p':
			xfree(params.partitions);
			params.partitions = xstrdup(optarg);
			params.part_list =
				_build_str_list( params.partitions );
			params.all_flag = true;
			break;
		case (int) 'q':
			xfree(params.qoss);
			params.qoss = xstrdup(optarg);
			params.qos_list =
				_build_str_list( params.qoss );
			break;
		case (int) 'R':
			xfree(params.reservation);
			params.reservation = xstrdup(optarg);
			break;
		case (int) 's':
			if (optarg) {
				params.steps = xstrdup(optarg);
				params.step_list =
					_build_step_list(params.steps);
			}
			params.step_flag = true;
			override_format_env = true;
			break;
		case (int) 'S':
			xfree(params.sort);
			params.sort = xstrdup(optarg);
			break;
		case (int) 't':
			xfree(params.states);
			params.states = xstrdup(optarg);
			params.state_list =
				_build_state_list( params.states );
			break;
		case (int) 'u':
			xfree(params.users);
			params.users = xstrdup(optarg);
			params.user_list =
				_build_user_list( params.users );
			break;
		case (int) 'v':
			params.verbose++;
			break;
		case (int) 'V':
			print_slurm_version();
			exit(0);
		case OPT_LONG_HELP:
			_help();
			exit(0);
		case OPT_LONG_HIDE:
			params.all_flag = false;
			break;
		case OPT_LONG_START:
			params.start_flag = true;
			break;
		case OPT_LONG_USAGE:
			_usage();
			exit(0);
		}
	}

	if ( override_format_env == false ) {
		if ( ( env_val = getenv("SQUEUE_FORMAT") ) )
			params.format = xstrdup(env_val);
	}

	params.cluster_flags = slurmdb_setup_cluster_flags();
	if (optind < argc) {
		if (params.job_flag) {
			params.jobs = xstrdup(argv[optind++]);
			params.job_list = _build_job_list(params.jobs);
		} else if (params.step_flag) {
			params.steps = xstrdup(argv[optind++]);
			params.step_list = _build_step_list(params.steps);
		}
		if (optind < argc) {
			error("Unrecognized option: %s",argv[optind]);
			_usage();
			exit(1);
		}
	}

	if ( params.job_flag && params.step_flag) {
		if (params.job_list) {
			verbose("Printing job steps with job filter");
			params.job_flag = false;
		} else {
			error("Incompatible options --jobs and --steps");
			exit(1);
		}
	}

	if ( params.nodes ) {
		char *name1 = NULL;
		char *name2 = NULL;
		hostset_t nodenames = hostset_create(NULL);
		if (nodenames == NULL)
			fatal("malloc failure");

		while ( hostset_count(params.nodes) > 0 ) {
			name1 = hostset_pop(params.nodes);

			/* localhost = use current host name */
			if ( strcasecmp("localhost", name1) == 0 ) {
				name2 = xmalloc(128);
				gethostname_short(name2, 128);
			} else {
				/* translate NodeHostName to NodeName */
				name2 = slurm_conf_get_nodename(name1);

				/* use NodeName if translation failed */
				if ( name2 == NULL )
					name2 = xstrdup(name1);
			}
			hostset_insert(nodenames, name2);
			free(name1);
			xfree(name2);
		}

		/* Replace params.nodename with the new one */
		hostset_destroy(params.nodes);
		params.nodes = nodenames;
	}

	if ( ( params.accounts == NULL ) &&
	     ( env_val = getenv("SQUEUE_ACCOUNT") ) ) {
		params.accounts = xstrdup(env_val);
		params.account_list = _build_str_list( params.accounts );
	}

	if ( ( params.partitions == NULL ) &&
	     ( env_val = getenv("SQUEUE_PARTITION") ) ) {
		params.partitions = xstrdup(env_val);
		params.part_list = _build_str_list( params.partitions );
		params.all_flag = true;
	}

	if ( ( params.qoss == NULL ) &&
	     ( env_val = getenv("SQUEUE_QOS") ) ) {
		params.qoss = xstrdup(env_val);
		params.qos_list = _build_str_list( params.qoss );
	}

	if ( ( params.states == NULL ) &&
	     ( env_val = getenv("SQUEUE_STATES") ) ) {
		params.states = xstrdup(env_val);
		params.state_list = _build_state_list( params.states );
	}

	if ( ( params.users == NULL ) &&
	     ( env_val = getenv("SQUEUE_USERS") ) ) {
		params.users = xstrdup(env_val);
		params.user_list = _build_user_list( params.users );
	}

	if ( params.start_flag && !params.step_flag ) {
		/* Set more defaults */
		if (params.format == NULL)
			params.format = xstrdup("%.7i %.9P %.8j %.8u  %.2t  %.19S %.6D %R");
		if (params.sort == NULL)
			params.sort = xstrdup("S");
		if (params.states == NULL) {
			params.states = xstrdup("PD");
			params.state_list = _build_state_list( params.states );
		}
	}

	params.max_cpus = _max_cpus_per_node();

	if ( params.verbose )
		_print_options();
}