Exemplo n.º 1
0
Arquivo: opts.c Projeto: HPCNow/slurm
/*
 * parse_command_line, fill in params data structure with data
 */
extern void parse_command_line(int argc, char **argv)
{
	char *env_val = NULL;
	int opt_char;
	int option_index;
	hostlist_t host_list;
	bool long_form = false;
	bool opt_a_set = false, opt_p_set = false;
	bool env_a_set = false, env_p_set = false;
	static struct option long_options[] = {
		{"all",       no_argument,       0, 'a'},
		{"bg",        no_argument,       0, 'b'},
		{"dead",      no_argument,       0, 'd'},
		{"exact",     no_argument,       0, 'e'},
		{"federation",no_argument,       0, OPT_LONG_FEDR},
		{"help",      no_argument,       0, OPT_LONG_HELP},
		{"hide",      no_argument,       0, OPT_LONG_HIDE},
		{"iterate",   required_argument, 0, 'i'},
		{"local",     no_argument,       0, OPT_LONG_LOCAL},
		{"long",      no_argument,       0, 'l'},
		{"cluster",   required_argument, 0, 'M'},
		{"clusters",  required_argument, 0, 'M'},
		{"nodes",     required_argument, 0, 'n'},
		{"noconvert", no_argument,       0, OPT_LONG_NOCONVERT},
		{"noheader",  no_argument,       0, 'h'},
		{"Node",      no_argument,       0, 'N'},
		{"format",    required_argument, 0, 'o'},
		{"Format",    required_argument, 0, 'O'},
		{"partition", required_argument, 0, 'p'},
		{"responding",no_argument,       0, 'r'},
		{"list-reasons", no_argument,    0, 'R'},
		{"summarize", no_argument,       0, 's'},
		{"sort",      required_argument, 0, 'S'},
		{"states",    required_argument, 0, 't'},
		{"reservation",no_argument,      0, 'T'},
		{"usage",     no_argument,       0, OPT_LONG_USAGE},
		{"verbose",   no_argument,       0, 'v'},
		{"version",   no_argument,       0, 'V'},
		{NULL,        0,                 0, 0}
	};

	params.convert_flags = CONVERT_NUM_UNIT_EXACT;

	if (slurmctld_conf.fed_params &&
	    strstr(slurmctld_conf.fed_params, "fed_display"))
		params.federation_flag = true;

	if (getenv("SINFO_ALL")) {
		env_a_set = true;
		params.all_flag = true;
	}
	if (getenv("SINFO_FEDERATION"))
		params.federation_flag = true;
	if (getenv("SINFO_LOCAL"))
		params.local = true;
	if ( ( env_val = getenv("SINFO_PARTITION") ) ) {
		env_p_set = true;
		params.partition = xstrdup(env_val);
		params.part_list = _build_part_list(env_val);
		params.all_flag = true;
	}
	if (env_a_set && env_p_set) {
		error("Conflicting options, SINFO_ALL and SINFO_PARTITION, specified. "
		      "Please choose one or the other.");
		exit(1);
	}
	if ( ( env_val = getenv("SINFO_SORT") ) )
		params.sort = xstrdup(env_val);
	if ( ( env_val = getenv("SLURM_CLUSTERS") ) ) {
		if (!(params.clusters = slurmdb_get_info_cluster(env_val))) {
			print_db_notok(env_val, 1);
			exit(1);
		}
		working_cluster_rec = list_peek(params.clusters);
		params.local = true;
	}

	while ((opt_char = getopt_long(argc, argv,
				       "abdehi:lM:n:No:O:p:rRsS:t:TvV",
				       long_options, &option_index)) != -1) {
		switch (opt_char) {
		case (int)'?':
			fprintf(stderr,
				"Try \"sinfo --help\" for more information\n");
			exit(1);
			break;
		case (int)'a':
			opt_a_set = true;
			xfree(params.partition);
			FREE_NULL_LIST(params.part_list);
			params.all_flag = true;
			break;
		case (int)'b':
			params.cluster_flags = slurmdb_setup_cluster_flags();
			if (params.cluster_flags & CLUSTER_FLAG_BG)
				params.bg_flag = true;
			else {
				error("Must be on a BG system to use --bg "
				      "option, if using --cluster option "
				      "put the --bg option "
				      "after the --cluster option.");
				exit(1);
			}
			break;
		case (int)'d':
			params.dead_nodes = true;
			break;
		case (int)'e':
			params.exact_match = true;
			break;
		case (int)'h':
			params.no_header = true;
			break;
		case (int) 'i':
			params.iterate= atoi(optarg);
			if (params.iterate <= 0) {
				error ("Error: invalid entry for "
				       "--iterate=%s", optarg);
				exit(1);
			}
			break;
		case (int) 'l':
			params.long_output = true;
			break;
		case (int) 'M':
			FREE_NULL_LIST(params.clusters);
			if (!(params.clusters =
			      slurmdb_get_info_cluster(optarg))) {
				print_db_notok(optarg, 0);
				exit(1);
			}
			working_cluster_rec = list_peek(params.clusters);
			params.local = true;
			break;
		case OPT_LONG_NOCONVERT:
			params.convert_flags |= CONVERT_NUM_UNIT_NO;
			break;
		case (int) 'n':
			xfree(params.nodes);
			params.nodes = xstrdup(optarg);
			/*
			 * confirm valid nodelist entry
			 */
			host_list = hostlist_create(params.nodes);
			if (!host_list) {
				error("'%s' invalid entry for --nodes",
				      optarg);
				exit(1);
			}
			if (hostlist_count(host_list) == 1) {
				params.node_name_single = true;
				xfree(params.nodes);
				params.nodes =
				    hostlist_deranged_string_xmalloc(host_list);
			} else
				params.node_name_single = false;
			hostlist_destroy(host_list);
			break;
		case (int) 'N':
			params.node_flag = true;
			break;
		case (int) 'o':
			xfree(params.format);
			params.format = xstrdup(optarg);
			break;
		case (int) 'O':
			long_form = true;
			xfree(params.format);
			params.format = xstrdup(optarg);
			break;
		case (int) 'p':
			opt_p_set = true;
			xfree(params.partition);
			FREE_NULL_LIST(params.part_list);
			params.partition = xstrdup(optarg);
			params.part_list = _build_part_list(optarg);
			params.all_flag = true;
			break;
		case (int) 'r':
			params.responding_nodes = true;
			break;
		case (int) 'R':
			params.list_reasons = true;
			break;
		case (int) 's':
			params.summarize = true;
			break;
		case (int) 'S':
			xfree(params.sort);
			params.sort = xstrdup(optarg);
			break;
		case (int) 't':
			xfree(params.states);
			params.states = xstrdup(optarg);
			if (!(params.state_list = _build_state_list(optarg))) {
				error ("valid states: %s", _node_state_list ());
				exit (1);
			}
			break;
		case (int) 'T':
			params.reservation_flag = true;
			break;
		case (int) 'v':
			params.verbose++;
			break;
		case (int) 'V':
			print_slurm_version ();
			exit(0);
		case (int) OPT_LONG_FEDR:
			params.federation_flag = true;
			break;
		case (int) OPT_LONG_HELP:
			_help();
			exit(0);
		case (int) OPT_LONG_USAGE:
			_usage();
			exit(0);
		case OPT_LONG_HIDE:
			params.all_flag = false;
			break;
		case OPT_LONG_LOCAL:
			params.local = true;
			break;
		}
	}

	if (opt_a_set && opt_p_set) {
		error("Conflicting options, -a and -p, specified. "
		      "Please choose one or the other.");
		exit(1);
	}

	params.cluster_flags = slurmdb_setup_cluster_flags();

	if (params.federation_flag && !params.clusters && !params.local) {
		void *ptr = NULL;
		char *cluster_name = slurm_get_cluster_name();
		if (slurm_load_federation(&ptr) ||
		    !cluster_in_federation(ptr, cluster_name)) {
			/* Not in federation */
			params.local = true;
			slurm_destroy_federation_rec(ptr);
		} else {
			params.fed = (slurmdb_federation_rec_t *) ptr;
		}
		xfree(cluster_name);
	}

	if ( params.format == NULL ) {
		if ( params.summarize ) {
			params.part_field_flag = true;	/* compute size later */
			if (params.cluster_flags & CLUSTER_FLAG_BG)
				params.format = "%9P %.5a %.10l %.32F  %N";
			else
				params.format = "%9P %.5a %.10l %.16F  %N";
		} else if ( params.node_flag ) {
			params.node_field_flag = true;	/* compute size later */
			params.part_field_flag = true;	/* compute size later */
			params.format = params.long_output ?
			  "%N %.6D %.9P %.11T %.4c %.8z %.6m %.8d %.6w %.8f %20E" :
			  "%N %.6D %.9P %6t";

		} else if (params.list_reasons) {
			params.format = params.long_output ?
			  "%20E %12U %19H %6t %N" :
			  "%20E %9u %19H %N";

		} else if ((env_val = getenv ("SINFO_FORMAT"))) {
			params.format = xstrdup(env_val);


		} else if (params.fed) {
			params.part_field_flag = true;	/* compute size later */
			params.format = params.long_output ?
			  "%9P %8V %.5a %.10l %.10s %.4r %.8h %.10g %.6D %.11T %N" :
			  "%9P %8V %.5a %.10l %.6D %.6t %N";
		} else {
			params.part_field_flag = true;	/* compute size later */
			params.format = params.long_output ?
			  "%9P %.5a %.10l %.10s %.4r %.8h %.10g %.6D %.11T %N" :
			  "%9P %.5a %.10l %.6D %.6t %N";
		}
	}

	if (long_form)
		_parse_long_format(params.format);
	else
		_parse_format(params.format);

	if (params.list_reasons && (params.state_list == NULL)) {
		params.states = xstrdup ("down,drain,error");
		if (!(params.state_list = _build_state_list (params.states)))
			fatal ("Unable to build state list for -R!");
	}

	if (params.dead_nodes || params.nodes || params.partition ||
			params.responding_nodes ||params.state_list)
		params.filtering = true;

	if (params.verbose)
		_print_options();
}
Exemplo n.º 2
0
/* Test if a batch launch request should be defered
 * RET -1: abort the request, pending job cancelled
 *      0: execute the request now
 *      1: defer the request
 */
static int _batch_launch_defer(queued_request_t *queued_req_ptr)
{
	agent_arg_t *agent_arg_ptr;
	batch_job_launch_msg_t *launch_msg_ptr;
	time_t now = time(NULL);
	struct job_record  *job_ptr;
	int delay_time, nodes_ready = 0;

	agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
	if (agent_arg_ptr->msg_type != REQUEST_BATCH_JOB_LAUNCH)
		return 0;

	if (difftime(now, queued_req_ptr->last_attempt) < 10) {
		/* Reduce overhead by only testing once every 10 secs */
		return 1;
	}

	launch_msg_ptr = (batch_job_launch_msg_t *)agent_arg_ptr->msg_args;
	job_ptr = find_job_record(launch_msg_ptr->job_id);
	if ((job_ptr == NULL) ||
	    (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr))) {
		info("agent(batch_launch): removed pending request for "
		     "cancelled job %u",
		     launch_msg_ptr->job_id);
		return -1;	/* job cancelled while waiting */
	}

	if (job_ptr->wait_all_nodes) {
		(void) job_node_ready(launch_msg_ptr->job_id, &nodes_ready);
	} else {
#ifdef HAVE_FRONT_END
		nodes_ready = 1;
#else
		struct node_record *node_ptr;
		char *hostname;

		hostname = hostlist_deranged_string_xmalloc(
					agent_arg_ptr->hostlist);
		node_ptr = find_node_record(hostname);
		if (node_ptr == NULL) {
			error("agent(batch_launch) removed pending request for "
			      "job %u, missing node %s",
			      launch_msg_ptr->job_id, hostname);
			xfree(hostname);
			return -1;	/* invalid request?? */
		}
		xfree(hostname);
		if (!IS_NODE_POWER_SAVE(node_ptr) &&
		    !IS_NODE_NO_RESPOND(node_ptr)) {
			nodes_ready = 1;
		}
#endif
	}

	delay_time = difftime(now, job_ptr->start_time);
	if (nodes_ready) {
		/* ready to launch, adjust time limit for boot time */
		if (delay_time && (job_ptr->time_limit != INFINITE) &&
		    (!wiki2_sched)) {
			info("Job %u launch delayed by %d secs, "
			     "updating end_time",
			     launch_msg_ptr->job_id, delay_time);
			job_ptr->end_time += delay_time;
		}
		queued_req_ptr->last_attempt = (time_t) 0;
		return 0;
	}

	if (queued_req_ptr->last_attempt == 0) {
		queued_req_ptr->first_attempt = now;
		queued_req_ptr->last_attempt  = now;
	} else if (difftime(now, queued_req_ptr->first_attempt) >=
				 slurm_get_resume_timeout()) {
		error("agent waited too long for nodes to respond, "
		      "sending batch request anyway...");
		if (delay_time && (job_ptr->time_limit != INFINITE) &&
		    (!wiki2_sched)) {
			info("Job %u launch delayed by %d secs, "
			     "updating end_time",
			     launch_msg_ptr->job_id, delay_time);
			job_ptr->end_time += delay_time;
		}
		queued_req_ptr->last_attempt = (time_t) 0;
		return 0;
	}

	queued_req_ptr->last_attempt  = now;
	return 1;
}