示例#1
0
/*
 * scontrol_hold - perform some job hold/release operation
 * IN op - suspend/resume operation
 * IN job_id_str - a job id
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *		error message and returns 0
 */
extern int
scontrol_hold(char *op, char *job_id_str)
{
	int rc = SLURM_SUCCESS;
	char *next_str;
	job_desc_msg_t job_msg;
	uint16_t job_state;

	slurm_init_job_desc_msg (&job_msg);

	/* set current user, needed e.g., for AllowGroups checks */
	job_msg.user_id = getuid();

	if (job_id_str) {
		job_msg.job_id = (uint32_t) strtol(job_id_str, &next_str, 10);
		if ((job_msg.job_id == 0) || (next_str[0] != '\0')) {
			fprintf(stderr, "Invalid job id specified\n");
			exit_code = 1;
			return 0;
		}
	} else {
		fprintf(stderr, "Invalid job id specified\n");
		exit_code = 1;
		return 0;
	}

	job_state = scontrol_get_job_state(job_msg.job_id);
	if (job_state == (uint16_t) NO_VAL)
		return SLURM_ERROR;
	if ((job_state & JOB_STATE_BASE) != JOB_PENDING) {
		slurm_seterrno(ESLURM_JOB_NOT_PENDING);
		return ESLURM_JOB_NOT_PENDING;
	}

	if ((strncasecmp(op, "holdu", 5) == 0) ||
	    (strncasecmp(op, "uhold", 5) == 0)) {
		job_msg.priority = 0;
		job_msg.alloc_sid = ALLOC_SID_USER_HOLD;
	} else if (strncasecmp(op, "hold", 4) == 0) {
		job_msg.priority = 0;
		job_msg.alloc_sid = 0;
	} else
		job_msg.priority = INFINITE;

	if (slurm_update_job(&job_msg))
		return slurm_get_errno();

	return rc;
}
示例#2
0
文件: alloc.c 项目: BYUHPC/slurm
/*
 * convert perl HV to job_desc_msg_t
 * return 0 on success, -1 on failure
 */
int
hv_to_job_desc_msg(HV *hv, job_desc_msg_t *job_desc)
{
	SV **svp;
	HV *environ_hv;
	AV *argv_av;
	SV *val;
	char *env_key, *env_val;
	I32 klen;
	STRLEN vlen;
	int num_keys, i;

	slurm_init_job_desc_msg(job_desc);

	FETCH_FIELD(hv, job_desc, account, charp, FALSE);
	FETCH_FIELD(hv, job_desc, acctg_freq, charp, FALSE);
	FETCH_FIELD(hv, job_desc, alloc_node, charp, FALSE);
	FETCH_FIELD(hv, job_desc, alloc_resp_port, uint16_t, FALSE);
	FETCH_FIELD(hv, job_desc, alloc_sid, uint32_t, FALSE);
	/* argv, argc */
	if((svp = hv_fetch(hv, "argv", 4, FALSE))) {
		if(SvROK(*svp) && SvTYPE(SvRV(*svp)) == SVt_PVAV) {
			argv_av = (AV*)SvRV(*svp);
			job_desc->argc = av_len(argv_av) + 1;
			if (job_desc->argc > 0) {
				Newz(0, job_desc->argv, (int32_t)(job_desc->argc + 1), char*);
				for(i = 0; i < job_desc->argc; i ++) {
					if((svp = av_fetch(argv_av, i, FALSE)))
						*(job_desc->argv + i) = (char*) SvPV_nolen(*svp);
					else {
						Perl_warn(aTHX_ "error fetching `argv' of job descriptor");
						free_job_desc_msg_memory(job_desc);
						return -1;
					}
				}
			}
		} else {
示例#3
0
extern void create_create_popup(GtkAction *action, gpointer user_data)
{
	GtkWidget *popup = gtk_dialog_new_with_buttons(
		"Create",
		GTK_WINDOW(user_data),
		GTK_DIALOG_MODAL | GTK_DIALOG_DESTROY_WITH_PARENT,
		NULL);
	int i, response = 0;
	GtkWidget *label = NULL;
	GtkWidget *entry = NULL;
	GtkTreeModel *model = NULL;
	GtkTreeIter iter;
	const gchar *name = gtk_action_get_name(action);
	sview_search_info_t sview_search_info;
	job_desc_msg_t *job_msg = NULL;
	submit_response_msg_t *slurm_alloc_msg = NULL;
	update_part_msg_t *part_msg = NULL;
	resv_desc_msg_t *resv_msg = NULL;
	char *res_name, *temp;

	sview_search_info.gchar_data = NULL;
	sview_search_info.int_data = NO_VAL;
	sview_search_info.int_data2 = NO_VAL;

	label = gtk_dialog_add_button(GTK_DIALOG(popup),
				      GTK_STOCK_OK, GTK_RESPONSE_OK);
	gtk_window_set_default(GTK_WINDOW(popup), label);
	gtk_dialog_add_button(GTK_DIALOG(popup),
			      GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL);
	gtk_window_set_default_size(GTK_WINDOW(popup), 400, 600);

	if (!strcmp(name, "batch_job")) {
		sview_search_info.search_type = CREATE_BATCH_JOB;
		entry = create_entry();
		label = gtk_label_new(
			"Batch job submission specifications\n\n"
			"Specify size (task and/or node count) plus the\n"
			"script. All other fields are optional.\n\n"
			"More fields will be made available later.");
		job_msg = xmalloc(sizeof(job_desc_msg_t));
		slurm_init_job_desc_msg(job_msg);
		job_msg->group_id = getgid();
		job_msg->user_id  = getuid();
		job_msg->work_dir = xmalloc(1024);
		if (!getcwd(job_msg->work_dir, 1024))
			goto end_it;
		entry = create_job_entry(job_msg, model, &iter);
	} else if (!strcmp(name, "partition")) {
		sview_search_info.search_type = CREATE_PARTITION;
		entry = create_entry();
		label = gtk_label_new(
			"Partition creation specifications\n\n"
			"Specify Name. All other fields are optional.");
		part_msg = xmalloc(sizeof(update_part_msg_t));
		slurm_init_part_desc_msg(part_msg);
		entry = create_part_entry(part_msg, model, &iter);
	} else if (!strcmp(name, "reservation")) {
		sview_search_info.search_type = CREATE_RESERVATION;
		label = gtk_label_new(
			"Reservation creation specifications\n\n"
			"Specify Time_Start and either Duration or Time_End.\n"
#ifdef HAVE_BG
			"Specify either Node_Count or Midplane_List.\n"
#else
			"Specify either Node_Count or Node_List.\n"
#endif
			"Specify either Accounts or Users.\n\n"
			"Supported Flags include: Maintenance, Overlap,\n"
			"Ignore_Jobs, Daily and Weekly, License_Only\n"
			"and Static_Alloc.\n"
			"All other fields are optional.");
		resv_msg = xmalloc(sizeof(resv_desc_msg_t));
		slurm_init_resv_desc_msg(resv_msg);
		entry = create_resv_entry(resv_msg, model, &iter);
	} else {
		sview_search_info.search_type = 0;
		goto end_it;
	}

	gtk_box_pack_start(GTK_BOX(GTK_DIALOG(popup)->vbox),
			   label, FALSE, FALSE, 0);
	gtk_box_pack_start(GTK_BOX(GTK_DIALOG(popup)->vbox),
			   entry, TRUE, TRUE, 0);

	gtk_widget_show_all(popup);
	response = gtk_dialog_run (GTK_DIALOG(popup));

	if (response == GTK_RESPONSE_OK) {
		if (!sview_search_info.search_type)
			goto end_it;

		switch(sview_search_info.search_type) {
		case CREATE_BATCH_JOB:
			response = slurm_submit_batch_job(job_msg,
							  &slurm_alloc_msg);
			if (response == SLURM_SUCCESS) {
				temp = g_strdup_printf(
					"Job %u submitted",
					slurm_alloc_msg->job_id);
			} else {
				temp = g_strdup_printf(
					"Problem submitting job: %s",
					slurm_strerror(slurm_get_errno()));
			}
			display_edit_note(temp);
			g_free(temp);
			break;
		case CREATE_PARTITION:
			response = slurm_create_partition(part_msg);
			if (response == SLURM_SUCCESS) {
				temp = g_strdup_printf("Partition %s created",
						       part_msg->name);
			} else {
				temp = g_strdup_printf(
					"Problem creating partition: %s",
					slurm_strerror(slurm_get_errno()));
			}
			display_edit_note(temp);
			g_free(temp);
			break;
		case CREATE_RESERVATION:
			res_name = slurm_create_reservation(resv_msg);
			if (res_name) {
				temp = g_strdup_printf(
					"Reservation %s created",
					res_name);
				free(res_name);
			} else {
				temp = g_strdup_printf(
					"Problem creating reservation: %s",
					slurm_strerror(slurm_get_errno()));
			}
			display_edit_note(temp);
			g_free(temp);
			break;
		default:
			break;
		}
	}

end_it:
	gtk_widget_destroy(popup);
	if (slurm_alloc_msg)
		slurm_free_submit_response_response_msg(slurm_alloc_msg);
	if (job_msg) {
		for (i = 0; i < job_msg->argc; i++)
			xfree(job_msg->argv[i]);
		xfree(job_msg->argv);
		xfree(job_msg->name);
		xfree(job_msg->script);
		xfree(job_msg->work_dir);
		xfree(job_msg);
	}
	xfree(part_msg);
	if (resv_msg)
		slurm_free_resv_desc_msg(resv_msg);
	return;
}
示例#4
0
/*
 * scontrol_update_job - update the slurm job configuration per the supplied
 *	arguments
 * IN argc - count of arguments
 * IN argv - list of arguments
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *			error message and returns 0
 */
extern int
scontrol_update_job (int argc, char *argv[])
{
	bool update_size = false;
	int i, update_cnt = 0;
	char *tag, *val;
	int taglen, vallen;
	job_desc_msg_t job_msg;

	slurm_init_job_desc_msg (&job_msg);

	/* set current user, needed e.g., for AllowGroups checks */
	job_msg.user_id = getuid();

	for (i=0; i<argc; i++) {
		tag = argv[i];
		val = strchr(argv[i], '=');
		if (val) {
			taglen = val - argv[i];
			val++;
			vallen = strlen(val);
		} else if (strncasecmp(tag, "Nice", MAX(strlen(tag), 2)) == 0){
			/* "Nice" is the only tag that might not have an
			   equal sign, so it is handled specially. */
			job_msg.nice = NICE_OFFSET + 100;
			update_cnt++;
			continue;
		} else {
			exit_code = 1;
			fprintf (stderr, "Invalid input: %s\n", argv[i]);
			fprintf (stderr, "Request aborted\n");
			return -1;
		}

		if (strncasecmp(tag, "JobId", MAX(taglen, 3)) == 0) {
			job_msg.job_id =
				(uint32_t) strtol(val, (char **) NULL, 10);
		}
		else if (strncasecmp(tag, "Comment", MAX(taglen, 3)) == 0) {
			job_msg.comment = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "TimeLimit", MAX(taglen, 5)) == 0) {
			bool incr, decr;
			uint32_t job_current_time, time_limit;

			incr = (val[0] == '+');
			decr = (val[0] == '-');
			if (incr || decr)
				val++;
			time_limit = time_str2mins(val);
			if ((time_limit < 0) && (time_limit != INFINITE)) {
				error("Invalid TimeLimit value");
				exit_code = 1;
				return 0;
			}
			if (incr || decr) {
				job_current_time = _get_job_time(job_msg.
								 job_id);
				if (job_current_time == NO_VAL) {
					exit_code = 1;
					return 0;
				}
				if (incr) {
					time_limit += job_current_time;
				} else if (time_limit > job_current_time) {
					error("TimeLimit decrement larger than"
					      " current time limit (%u > %u)",
					      time_limit, job_current_time);
					exit_code = 1;
					return 0;
				} else {
					time_limit = job_current_time -
						     time_limit;
				}
			}
			job_msg.time_limit = time_limit;
			update_cnt++;
		}
		else if (strncasecmp(tag, "TimeMin", MAX(taglen, 5)) == 0) {
			int time_min = time_str2mins(val);
			if ((time_min < 0) && (time_min != INFINITE)) {
				error("Invalid TimeMin value");
				exit_code = 1;
				return 0;
			}
			job_msg.time_min = time_min;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Priority", MAX(taglen, 2)) == 0) {
			job_msg.priority =
				(uint32_t) strtoll(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Nice", MAX(taglen, 2)) == 0) {
			int nice;
			nice = strtoll(val, (char **) NULL, 10);
			if (abs(nice) > NICE_OFFSET) {
				error("Invalid nice value, must be between "
					"-%d and %d", NICE_OFFSET,
					NICE_OFFSET);
				exit_code = 1;
				return 0;
			}
			job_msg.nice = NICE_OFFSET + nice;
			update_cnt++;
		}
		else if (strncasecmp(tag, "NumCPUs", MAX(taglen, 6)) == 0) {
			int min_cpus, max_cpus=0;
			if (!get_resource_arg_range(val, "NumCPUs", &min_cpus,
						   &max_cpus, false) ||
			    (min_cpus <= 0) ||
			    (max_cpus && (max_cpus < min_cpus))) {
				error("Invalid NumCPUs value: %s", val);
				exit_code = 1;
				return 0;
			}
			job_msg.min_cpus = min_cpus;
			if (max_cpus)
				job_msg.max_cpus = max_cpus;
			update_cnt++;
		}
		/* ReqProcs was removed in SLURM version 2.1 */
		else if (strncasecmp(tag, "ReqProcs", MAX(taglen, 8)) == 0) {
			job_msg.num_tasks =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Requeue", MAX(taglen, 4)) == 0) {
			job_msg.requeue =
				(uint16_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		/* ReqNodes was replaced by NumNodes in SLURM version 2.1 */
		else if ((strncasecmp(tag, "ReqNodes", MAX(taglen, 8)) == 0) ||
		         (strncasecmp(tag, "NumNodes", MAX(taglen, 8)) == 0)) {
			int min_nodes, max_nodes, rc;
			if (strcmp(val, "0") == 0) {
				job_msg.min_nodes = 0;
			} else if (strcasecmp(val, "ALL") == 0) {
				job_msg.min_nodes = INFINITE;
			} else {
				min_nodes = (int) job_msg.min_nodes;
				max_nodes = (int) job_msg.max_nodes;
				rc = get_resource_arg_range(
						val, "requested node count",
						&min_nodes, &max_nodes, false);
				if (!rc)
					return rc;
				job_msg.min_nodes = (uint32_t) min_nodes;
				job_msg.max_nodes = (uint32_t) max_nodes;
			}
			update_size = true;
			update_cnt++;
		}
		else if (strncasecmp(tag, "ReqSockets", MAX(taglen, 4)) == 0) {
			job_msg.sockets_per_node =
				(uint16_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "ReqCores", MAX(taglen, 4)) == 0) {
			job_msg.cores_per_socket =
				(uint16_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
                else if (strncasecmp(tag, "TasksPerNode", MAX(taglen, 2))==0) {
                        job_msg.ntasks_per_node =
                                (uint16_t) strtol(val, (char **) NULL, 10);
                        update_cnt++;
                }
		else if (strncasecmp(tag, "ReqThreads", MAX(taglen, 4)) == 0) {
			job_msg.threads_per_core =
				(uint16_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "MinCPUsNode", MAX(taglen, 4)) == 0) {
			job_msg.pn_min_cpus =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "MinMemoryNode",
				     MAX(taglen, 10)) == 0) {
			job_msg.pn_min_memory =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "MinMemoryCPU",
				     MAX(taglen, 10)) == 0) {
			job_msg.pn_min_memory =
				(uint32_t) strtol(val, (char **) NULL, 10);
			job_msg.pn_min_memory |= MEM_PER_CPU;
			update_cnt++;
		}
		else if (strncasecmp(tag, "MinTmpDiskNode",
				     MAX(taglen, 5)) == 0) {
			job_msg.pn_min_tmp_disk =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Partition", MAX(taglen, 2)) == 0) {
			job_msg.partition = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "QOS", MAX(taglen, 2)) == 0) {
			job_msg.qos = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "ReservationName",
				     MAX(taglen, 3)) == 0) {
			job_msg.reservation = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Name", MAX(taglen, 2)) == 0) {
			job_msg.name = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "WCKey", MAX(taglen, 1)) == 0) {
			job_msg.wckey = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Switches", MAX(taglen, 5)) == 0) {
			char *sep_char;
			job_msg.req_switch =
				(uint32_t) strtol(val, &sep_char, 10);
			update_cnt++;
			if (sep_char && sep_char[0] == '@') {
				job_msg.wait4switch = time_str2mins(sep_char+1)
						      * 60;
			}
		}
		else if (strncasecmp(tag, "wait-for-switch", MAX(taglen, 5))
			 == 0) {
			job_msg.wait4switch =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Shared", MAX(taglen, 2)) == 0) {
			if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0)
				job_msg.shared = 1;
			else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
				job_msg.shared = 0;
			else
				job_msg.shared =
					(uint16_t) strtol(val,
							(char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Contiguous", MAX(taglen, 3)) == 0) {
			if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0)
				job_msg.contiguous = 1;
			else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
				job_msg.contiguous = 0;
			else
				job_msg.contiguous =
					(uint16_t) strtol(val,
							(char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "ExcNodeList", MAX(taglen, 3)) == 0){
			job_msg.exc_nodes = val;
			update_cnt++;
		}
		else if (!strncasecmp(tag, "NodeList",    MAX(taglen, 8)) ||
			 !strncasecmp(tag, "ReqNodeList", MAX(taglen, 8))) {
			job_msg.req_nodes = val;
			update_size = true;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Features", MAX(taglen, 1)) == 0) {
			job_msg.features = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Gres", MAX(taglen, 2)) == 0) {
			if (!strcasecmp(val, "help") ||
			    !strcasecmp(val, "list")) {
				print_gres_help();
			} else {
				job_msg.gres = val;
				update_cnt++;
			}
		}
		else if (strncasecmp(tag, "Account", MAX(taglen, 1)) == 0) {
			job_msg.account = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Dependency", MAX(taglen, 1)) == 0) {
			job_msg.dependency = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Geometry", MAX(taglen, 2)) == 0) {
			char* token, *delimiter = ",x", *next_ptr;
			int j, rc = 0;
			int dims = slurmdb_setup_cluster_dims();
			uint16_t geo[dims];
			char* geometry_tmp = xstrdup(val);
			char* original_ptr = geometry_tmp;
			token = strtok_r(geometry_tmp, delimiter, &next_ptr);
			for (j=0; j<dims; j++) {
				if (token == NULL) {
					error("insufficient dimensions in "
						"Geometry");
					rc = -1;
					break;
				}
				geo[j] = (uint16_t) atoi(token);
				if (geo[j] <= 0) {
					error("invalid --geometry argument");
					rc = -1;
					break;
				}
				geometry_tmp = next_ptr;
				token = strtok_r(geometry_tmp, delimiter,
					&next_ptr);
			}
			if (token != NULL) {
				error("too many dimensions in Geometry");
				rc = -1;
			}

			if (original_ptr)
				xfree(original_ptr);
			if (rc != 0)
				exit_code = 1;
			else {
				for (j=0; j<dims; j++)
					job_msg.geometry[j] = geo[j];
				update_cnt++;
			}
		}

		else if (strncasecmp(tag, "Rotate", MAX(taglen, 2)) == 0) {
			uint16_t rotate;
			if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0)
				rotate = 1;
			else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
				rotate = 0;
			else
				rotate = (uint16_t) strtol(val,
							   (char **) NULL, 10);
			job_msg.rotate = rotate;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Conn-Type", MAX(taglen, 2)) == 0) {
			verify_conn_type(val, job_msg.conn_type);
			if(job_msg.conn_type[0] != (uint16_t)NO_VAL)
				update_cnt++;
		}
		else if (strncasecmp(tag, "Licenses", MAX(taglen, 1)) == 0) {
			job_msg.licenses = val;
			update_cnt++;
		}
		else if (!strncasecmp(tag, "EligibleTime", MAX(taglen, 2)) ||
			 !strncasecmp(tag, "StartTime",    MAX(taglen, 2))) {
			if ((job_msg.begin_time = parse_time(val, 0))) {
				if (job_msg.begin_time < time(NULL))
					job_msg.begin_time = time(NULL);
				update_cnt++;
			}
		}
		else if (!strncasecmp(tag, "EndTime", MAX(taglen, 2))) {
			job_msg.end_time = parse_time(val, 0);
			update_cnt++;
		}
		else {
			exit_code = 1;
			fprintf (stderr, "Update of this parameter is not "
				 "supported: %s\n", argv[i]);
			fprintf (stderr, "Request aborted\n");
			return 0;
		}
	}

	if (update_cnt == 0) {
		exit_code = 1;
		fprintf (stderr, "No changes specified\n");
		return 0;
	}

	if (slurm_update_job(&job_msg))
		return slurm_get_errno ();

	if (update_size)
		_update_job_size(job_msg.job_id);

	return SLURM_SUCCESS;
}
示例#5
0
文件: allocate.c 项目: VURM/slurm
/*
 * Create job description structure based off srun options
 * (see opt.h)
 */
job_desc_msg_t *
job_desc_msg_create_from_opts (void)
{
	job_desc_msg_t *j = xmalloc(sizeof(*j));
	hostlist_t hl = NULL;

	slurm_init_job_desc_msg(j);

	j->contiguous     = opt.contiguous;
	j->features       = opt.constraints;
	j->gres           = opt.gres;
	if (opt.immediate == 1)
		j->immediate = opt.immediate;
	if (opt.job_name)
		j->name   = xstrdup(opt.job_name);
	else
		j->name   = xstrdup(opt.cmd_name);
	if (opt.argc > 0) {
		j->argc    = 1;
		j->argv    = (char **) xmalloc(sizeof(char *) * 2);
		j->argv[0] = xstrdup(opt.argv[0]);
	}
	if (opt.acctg_freq >= 0)
		j->acctg_freq     = opt.acctg_freq;
	j->reservation    = xstrdup(opt.reservation);
	j->wckey          = xstrdup(opt.wckey);

	j->req_nodes      = xstrdup(opt.nodelist);

	/* simplify the job allocation nodelist,
	 * not laying out tasks until step */
	if(j->req_nodes) {
		hl = hostlist_create(j->req_nodes);
		xfree(opt.nodelist);
		opt.nodelist = hostlist_ranged_string_xmalloc(hl);
		hostlist_uniq(hl);
		xfree(j->req_nodes);
		j->req_nodes = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

	}

	if(opt.distribution == SLURM_DIST_ARBITRARY
	   && !j->req_nodes) {
		error("With Arbitrary distribution you need to "
		      "specify a nodelist or hostfile with the -w option");
		return NULL;
	}
	j->exc_nodes      = opt.exc_nodes;
	j->partition      = opt.partition;
	j->min_nodes      = opt.min_nodes;
	if (opt.sockets_per_node != NO_VAL)
		j->sockets_per_node    = opt.sockets_per_node;
	if (opt.cores_per_socket != NO_VAL)
		j->cores_per_socket      = opt.cores_per_socket;
	if (opt.threads_per_core != NO_VAL)
		j->threads_per_core    = opt.threads_per_core;
	j->user_id        = opt.uid;
	j->dependency     = opt.dependency;
	if (opt.nice)
		j->nice   = NICE_OFFSET + opt.nice;

	if (opt.cpu_bind)
		j->cpu_bind       = opt.cpu_bind;
	if (opt.cpu_bind_type)
		j->cpu_bind_type  = opt.cpu_bind_type;
	if (opt.mem_bind)
		j->mem_bind       = opt.mem_bind;
	if (opt.mem_bind_type)
		j->mem_bind_type  = opt.mem_bind_type;
	if (opt.plane_size != NO_VAL)
		j->plane_size     = opt.plane_size;
	j->task_dist      = opt.distribution;

	j->group_id       = opt.gid;
	j->mail_type      = opt.mail_type;

	if (opt.ntasks_per_node != NO_VAL)
		j->ntasks_per_node   = opt.ntasks_per_node;
	if (opt.ntasks_per_socket != NO_VAL)
		j->ntasks_per_socket = opt.ntasks_per_socket;
	if (opt.ntasks_per_core != NO_VAL)
		j->ntasks_per_core   = opt.ntasks_per_core;

	if (opt.mail_user)
		j->mail_user = xstrdup(opt.mail_user);
	if (opt.begin)
		j->begin_time = opt.begin;
	if (opt.licenses)
		j->licenses = xstrdup(opt.licenses);
	if (opt.network)
		j->network = xstrdup(opt.network);
	if (opt.account)
		j->account = xstrdup(opt.account);
	if (opt.comment)
		j->comment = xstrdup(opt.comment);
	if (opt.qos)
		j->qos = xstrdup(opt.qos);
	if (opt.cwd)
		j->work_dir = xstrdup(opt.cwd);

	if (opt.hold)
		j->priority     = 0;
	if (opt.jobid != NO_VAL)
		j->job_id	= opt.jobid;
#ifdef HAVE_BG
	if (opt.geometry[0] > 0) {
		int i;
		for (i=0; i<SYSTEM_DIMENSIONS; i++)
			j->geometry[i] = opt.geometry[i];
	}
#endif

	if (opt.conn_type != (uint16_t) NO_VAL)
		j->conn_type[0] = opt.conn_type;

	if (opt.reboot)
		j->reboot = 1;
	if (opt.no_rotate)
		j->rotate = 0;

	if (opt.blrtsimage)
		j->blrtsimage = xstrdup(opt.blrtsimage);
	if (opt.linuximage)
		j->linuximage = xstrdup(opt.linuximage);
	if (opt.mloaderimage)
		j->mloaderimage = xstrdup(opt.mloaderimage);
	if (opt.ramdiskimage)
		j->ramdiskimage = xstrdup(opt.ramdiskimage);

	if (opt.max_nodes)
		j->max_nodes    = opt.max_nodes;
	else if (opt.nodes_set) {
		/* On an allocation if the max nodes isn't set set it
		 * to do the same behavior as with salloc or sbatch.
		 */
		j->max_nodes    = opt.min_nodes;
	}
	if (opt.pn_min_cpus != NO_VAL)
		j->pn_min_cpus    = opt.pn_min_cpus;
	if (opt.pn_min_memory != NO_VAL)
		j->pn_min_memory = opt.pn_min_memory;
	else if (opt.mem_per_cpu != NO_VAL)
		j->pn_min_memory = opt.mem_per_cpu | MEM_PER_CPU;
	if (opt.pn_min_tmp_disk != NO_VAL)
		j->pn_min_tmp_disk = opt.pn_min_tmp_disk;
	if (opt.overcommit) {
		j->min_cpus    = opt.min_nodes;
		j->overcommit  = opt.overcommit;
	} else
		j->min_cpus    = opt.ntasks * opt.cpus_per_task;
	if (opt.ntasks_set)
		j->num_tasks   = opt.ntasks;

	if (opt.cpus_set)
		j->cpus_per_task = opt.cpus_per_task;

	if (opt.no_kill)
		j->kill_on_node_fail   = 0;
	if (opt.time_limit != NO_VAL)
		j->time_limit          = opt.time_limit;
	if (opt.time_min != NO_VAL)
		j->time_min            = opt.time_min;
	j->shared = opt.shared;

	if (opt.warn_signal)
		j->warn_signal = opt.warn_signal;
	if (opt.warn_time)
		j->warn_time = opt.warn_time;

	/* srun uses the same listening port for the allocation response
	 * message as all other messages */
	j->alloc_resp_port = slurmctld_comm_addr.port;
	j->other_port = slurmctld_comm_addr.port;

	if (opt.spank_job_env_size) {
		j->spank_job_env      = opt.spank_job_env;
		j->spank_job_env_size = opt.spank_job_env_size;
	}

	return (j);
}
示例#6
0
文件: allocate.c 项目: RPI-HPC/slurm
/*
 * Create job description structure based off srun options
 * (see opt.h)
 */
job_desc_msg_t *
job_desc_msg_create_from_opts (void)
{
	job_desc_msg_t *j = xmalloc(sizeof(*j));
	hostlist_t hl = NULL;

	slurm_init_job_desc_msg(j);
#if defined HAVE_ALPS_CRAY && defined HAVE_REAL_CRAY
	uint64_t pagg_id = job_getjid(getpid());
	/*
	 * Interactive sessions require pam_job.so in /etc/pam.d/common-session
	 * since creating sgi_job containers requires root permissions. This is
	 * the only exception where we allow the fallback of using the SID to
	 * confirm the reservation (caught later, in do_basil_confirm).
	 */
	if (pagg_id == (uint64_t)-1) {
		error("No SGI job container ID detected - please enable the "
		      "Cray job service via /etc/init.d/job");
	} else {
		if (!j->select_jobinfo)
			j->select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_select_jobinfo_set(j->select_jobinfo,
					    SELECT_JOBDATA_PAGG_ID, &pagg_id);
	}
#endif

	j->contiguous     = opt.contiguous;
	if (opt.core_spec)
		j->core_spec = opt.core_spec;
	j->features       = opt.constraints;
	j->gres           = opt.gres;
	if (opt.immediate == 1)
		j->immediate = opt.immediate;
	if (opt.job_name)
		j->name   = opt.job_name;
	else
		j->name   = opt.cmd_name;
	if (opt.argc > 0) {
		j->argc    = 1;
		j->argv    = (char **) xmalloc(sizeof(char *) * 2);
		j->argv[0] = xstrdup(opt.argv[0]);
	}
	if (opt.acctg_freq)
		j->acctg_freq     = xstrdup(opt.acctg_freq);
	j->reservation    = opt.reservation;
	j->wckey          = opt.wckey;

	j->req_nodes      = xstrdup(opt.nodelist);

	/* simplify the job allocation nodelist,
	 * not laying out tasks until step */
	if (j->req_nodes) {
		hl = hostlist_create(j->req_nodes);
		xfree(opt.nodelist);
		opt.nodelist = hostlist_ranged_string_xmalloc(hl);
		hostlist_uniq(hl);
		xfree(j->req_nodes);
		j->req_nodes = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

	}

	if (opt.distribution == SLURM_DIST_ARBITRARY
	   && !j->req_nodes) {
		error("With Arbitrary distribution you need to "
		      "specify a nodelist or hostfile with the -w option");
		return NULL;
	}
	j->exc_nodes      = opt.exc_nodes;
	j->partition      = opt.partition;
	j->min_nodes      = opt.min_nodes;
	if (opt.sockets_per_node != NO_VAL)
		j->sockets_per_node    = opt.sockets_per_node;
	if (opt.cores_per_socket != NO_VAL)
		j->cores_per_socket      = opt.cores_per_socket;
	if (opt.threads_per_core != NO_VAL)
		j->threads_per_core    = opt.threads_per_core;
	j->user_id        = opt.uid;
	j->dependency     = opt.dependency;
	if (opt.nice)
		j->nice   = NICE_OFFSET + opt.nice;
	if (opt.priority)
		j->priority = opt.priority;

	if (opt.cpu_bind)
		j->cpu_bind       = opt.cpu_bind;
	if (opt.cpu_bind_type)
		j->cpu_bind_type  = opt.cpu_bind_type;
	if (opt.mem_bind)
		j->mem_bind       = opt.mem_bind;
	if (opt.mem_bind_type)
		j->mem_bind_type  = opt.mem_bind_type;
	if (opt.plane_size != NO_VAL)
		j->plane_size     = opt.plane_size;
	j->task_dist      = opt.distribution;

	j->group_id       = opt.gid;
	j->mail_type      = opt.mail_type;

	if (opt.ntasks_per_node != NO_VAL)
		j->ntasks_per_node   = opt.ntasks_per_node;
	if (opt.ntasks_per_socket != NO_VAL)
		j->ntasks_per_socket = opt.ntasks_per_socket;
	if (opt.ntasks_per_core != NO_VAL)
		j->ntasks_per_core   = opt.ntasks_per_core;

	if (opt.mail_user)
		j->mail_user = opt.mail_user;
	if (opt.begin)
		j->begin_time = opt.begin;
	if (opt.licenses)
		j->licenses = opt.licenses;
	if (opt.network)
		j->network = opt.network;
	if (opt.profile)
		j->profile = opt.profile;
	if (opt.account)
		j->account = opt.account;
	if (opt.comment)
		j->comment = opt.comment;
	if (opt.qos)
		j->qos = opt.qos;
	if (opt.cwd)
		j->work_dir = opt.cwd;

	if (opt.hold)
		j->priority     = 0;
	if (opt.jobid != NO_VAL)
		j->job_id	= opt.jobid;
#ifdef HAVE_BG
	if (opt.geometry[0] > 0) {
		int i;
		for (i = 0; i < SYSTEM_DIMENSIONS; i++)
			j->geometry[i] = opt.geometry[i];
	}
#endif

	memcpy(j->conn_type, opt.conn_type, sizeof(j->conn_type));

	if (opt.reboot)
		j->reboot = 1;
	if (opt.no_rotate)
		j->rotate = 0;

	if (opt.blrtsimage)
		j->blrtsimage = opt.blrtsimage;
	if (opt.linuximage)
		j->linuximage = opt.linuximage;
	if (opt.mloaderimage)
		j->mloaderimage = opt.mloaderimage;
	if (opt.ramdiskimage)
		j->ramdiskimage = opt.ramdiskimage;

	if (opt.max_nodes)
		j->max_nodes    = opt.max_nodes;
	else if (opt.nodes_set) {
		/* On an allocation if the max nodes isn't set set it
		 * to do the same behavior as with salloc or sbatch.
		 */
		j->max_nodes    = opt.min_nodes;
	}
	if (opt.pn_min_cpus != NO_VAL)
		j->pn_min_cpus    = opt.pn_min_cpus;
	if (opt.pn_min_memory != NO_VAL)
		j->pn_min_memory = opt.pn_min_memory;
	else if (opt.mem_per_cpu != NO_VAL)
		j->pn_min_memory = opt.mem_per_cpu | MEM_PER_CPU;
	if (opt.pn_min_tmp_disk != NO_VAL)
		j->pn_min_tmp_disk = opt.pn_min_tmp_disk;
	if (opt.overcommit) {
		j->min_cpus    = opt.min_nodes;
		j->overcommit  = opt.overcommit;
	} else if (opt.cpus_set)
		j->min_cpus    = opt.ntasks * opt.cpus_per_task;
	else
		j->min_cpus    = opt.ntasks;
	if (opt.ntasks_set)
		j->num_tasks   = opt.ntasks;

	if (opt.cpus_set)
		j->cpus_per_task = opt.cpus_per_task;

	if (opt.no_kill)
		j->kill_on_node_fail   = 0;
	if (opt.time_limit != NO_VAL)
		j->time_limit          = opt.time_limit;
	if (opt.time_min != NO_VAL)
		j->time_min            = opt.time_min;
	j->shared = opt.shared;

	if (opt.warn_signal)
		j->warn_signal = opt.warn_signal;
	if (opt.warn_time)
		j->warn_time = opt.warn_time;

	if (opt.req_switch >= 0)
		j->req_switch = opt.req_switch;
	if (opt.wait4switch >= 0)
		j->wait4switch = opt.wait4switch;

	/* srun uses the same listening port for the allocation response
	 * message as all other messages */
	j->alloc_resp_port = slurmctld_comm_addr.port;
	j->other_port = slurmctld_comm_addr.port;

	if (opt.spank_job_env_size) {
		j->spank_job_env      = opt.spank_job_env;
		j->spank_job_env_size = opt.spank_job_env_size;
	}

	return (j);
}
示例#7
0
文件: acct_policy.c 项目: VURM/slurm
/*
 * acct_policy_update_pending_job - Make sure the limits imposed on a
 *	job on submission are correct after an update to a qos or
 *	association.  If the association/qos limits prevent
 *	the job from ever running (lowered limits since job submission),
 *	then cancel the job.
 */
extern int acct_policy_update_pending_job(struct job_record *job_ptr)
{
	job_desc_msg_t job_desc;
	uint16_t limit_set_max_cpus = 0;
	uint16_t limit_set_max_nodes = 0;
	uint16_t limit_set_time = 0;
	bool update_accounting = false;
	struct job_details *details_ptr;
	int rc = SLURM_SUCCESS;

	/* check to see if we are enforcing associations and the job
	 * is pending or if we are even enforcing limits. */
	if (!accounting_enforce || !IS_JOB_PENDING(job_ptr)
	    || !(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return SLURM_SUCCESS;

	details_ptr = job_ptr->details;

	if (!details_ptr) {
		error("acct_policy_update_pending_job: no details");
		return SLURM_ERROR;
	}

	/* set up the job desc to make sure things are the way we
	 * need.
	 */
	slurm_init_job_desc_msg(&job_desc);

	job_desc.min_cpus = details_ptr->min_cpus;
	/* Only set this value if not set from a limit */
	if (job_ptr->limit_set_max_cpus == ADMIN_SET_LIMIT)
		limit_set_max_cpus = job_ptr->limit_set_max_cpus;
	else if ((details_ptr->max_cpus != NO_VAL)
		 && !job_ptr->limit_set_max_cpus)
		job_desc.max_cpus = details_ptr->max_cpus;

	job_desc.min_nodes = details_ptr->min_nodes;
	/* Only set this value if not set from a limit */
	if (job_ptr->limit_set_max_nodes == ADMIN_SET_LIMIT)
		limit_set_max_nodes = job_ptr->limit_set_max_nodes;
	else if ((details_ptr->max_nodes != NO_VAL)
		 && !job_ptr->limit_set_max_nodes)
		job_desc.max_nodes = details_ptr->max_nodes;
	else
		job_desc.max_nodes = 0;

	/* Only set this value if not set from a limit */
	if (job_ptr->limit_set_time == ADMIN_SET_LIMIT)
		limit_set_time = job_ptr->limit_set_time;
	else if ((job_ptr->time_limit != NO_VAL) && !job_ptr->limit_set_time)
		job_desc.time_limit = job_ptr->time_limit;

	if (!acct_policy_validate(&job_desc, job_ptr->part_ptr,
				  job_ptr->assoc_ptr, job_ptr->qos_ptr,
				  &limit_set_max_cpus,
				  &limit_set_max_nodes,
				  &limit_set_time, 0)) {
		info("acct_policy_update_pending_job: exceeded "
		     "association/qos's cpu, node or "
		     "time limit for job %d", job_ptr->job_id);
		_cancel_job(job_ptr);
		return SLURM_ERROR;
	}

	/* If it isn't an admin set limit replace it. */
	if (!limit_set_max_cpus && (job_ptr->limit_set_max_cpus == 1)) {
		details_ptr->max_cpus = NO_VAL;
		job_ptr->limit_set_max_cpus = 0;
		update_accounting = true;
	} else if (limit_set_max_cpus != ADMIN_SET_LIMIT) {
		if (details_ptr->max_cpus != job_desc.max_cpus) {
			details_ptr->max_cpus = job_desc.max_cpus;
			update_accounting = true;
		}
		job_ptr->limit_set_max_cpus = limit_set_max_cpus;
	}

	if (!limit_set_max_nodes && (job_ptr->limit_set_max_nodes == 1)) {
		details_ptr->max_nodes = 0;
		job_ptr->limit_set_max_nodes = 0;
		update_accounting = true;
	} else if (limit_set_max_nodes != ADMIN_SET_LIMIT) {
		if (details_ptr->max_nodes != job_desc.max_nodes) {
			details_ptr->max_nodes = job_desc.max_nodes;
			update_accounting = true;
		}
		job_ptr->limit_set_max_nodes = limit_set_max_nodes;
	}

	if (!limit_set_time && (job_ptr->limit_set_time == 1)) {
		job_ptr->time_limit = NO_VAL;
		job_ptr->limit_set_time = 0;
		update_accounting = true;
	} else if (limit_set_time != ADMIN_SET_LIMIT) {
		if (job_ptr->time_limit != job_desc.time_limit) {
			job_ptr->time_limit = job_desc.time_limit;
			update_accounting = true;
		}
		job_ptr->limit_set_time = limit_set_time;
	}

	if (update_accounting) {
		last_job_update = time(NULL);
		debug("limits changed for job %u: updating accounting",
		      job_ptr->job_id);
		if (details_ptr->begin_time) {
			/* Update job record in accounting to reflect changes */
			jobacct_storage_g_job_start(acct_db_conn, job_ptr);
		}
	}

	return rc;
}
示例#8
0
文件: job.c 项目: eliv/slurm-drmaa-1
static void
slurmdrmaa_job_control( fsd_job_t *self, int action )
{
	slurmdrmaa_job_t *slurm_self = (slurmdrmaa_job_t*)self;
	job_desc_msg_t job_desc;

	fsd_log_enter(( "({job_id=%s}, action=%d)", self->job_id, action ));

	fsd_mutex_lock( &self->session->drm_connection_mutex );
	TRY
	 {
		switch( action )
		 {
			case DRMAA_CONTROL_SUSPEND:
				if(slurm_suspend(fsd_atoi(self->job_id)) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_suspend error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				slurm_self->user_suspended = true;
				break;
			case DRMAA_CONTROL_HOLD:
				/* change priority to 0*/
				slurm_init_job_desc_msg(&job_desc);
				slurm_self->old_priority = job_desc.priority;
				job_desc.job_id = atoi(self->job_id);
				job_desc.priority = 0;
				job_desc.alloc_sid = 0;
				if(slurm_update_job(&job_desc) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_update_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				break;
			case DRMAA_CONTROL_RESUME:
				if(slurm_resume(fsd_atoi(self->job_id)) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_resume error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				slurm_self->user_suspended = false;
				break;
			case DRMAA_CONTROL_RELEASE:
			  /* change priority back*/
			  	slurm_init_job_desc_msg(&job_desc);
				job_desc.priority = INFINITE;
				job_desc.job_id = atoi(self->job_id);
				if(slurm_update_job(&job_desc) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_update_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				break;
			case DRMAA_CONTROL_TERMINATE:
				if(slurm_kill_job(fsd_atoi(self->job_id),SIGKILL,0) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_terminate_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				break;
			default:
				fsd_exc_raise_fmt(
						FSD_ERRNO_INVALID_ARGUMENT,
						"job::control: unknown action %d", action );
		 }
					
		fsd_log_debug(("job::control: successful"));
	 }
	FINALLY
	 {
		fsd_mutex_unlock( &self->session->drm_connection_mutex );
	 }
	END_TRY

	fsd_log_return(( "" ));
}
示例#9
0
文件: test7.3.prog.c 项目: VURM/slurm
int main (int argc, char *argv[])
{
	int i, min_nodes = 1, max_nodes = 1, nodes, tasks = 0, rc = 0;
	job_desc_msg_t job_req;
	resource_allocation_response_msg_t *job_resp;
	slurm_step_ctx_params_t step_params[1];
	slurm_step_ctx_t *ctx = NULL;
	slurm_step_launch_params_t launch[1];
	char *task_argv[3];
	int *fd_array = NULL;
	int num_fd;

	if (argc > 1) {
		i = atoi(argv[1]);
		if (i > 0)
			min_nodes = i;
	}
	if (argc > 2) {
		i = atoi(argv[2]);
		if (i > 0)
			max_nodes = i;
	}
	if (max_nodes < min_nodes)
		max_nodes = min_nodes;

	/* Create a job allocation */
	slurm_init_job_desc_msg( &job_req );
	job_req.min_nodes  = min_nodes;
	job_req.max_nodes  = max_nodes;
	job_req.user_id    = getuid();
	job_req.group_id   = getgid();
	job_req.time_limit = 1;
	if (slurm_allocate_resources(&job_req, &job_resp)) {
		slurm_perror ("slurm_allocate_resources");
		printf("INFO: min_nodes=%u max_nodes=%u user_id=%u group_id=%u",
		       job_req.min_nodes, job_req.max_nodes,
		       job_req.user_id, job_req.group_id);
		exit(0);
	}
	printf("job_id %u\n", job_resp->job_id);
	fflush(stdout);

	/* Wait for allocation request to be satisfied */
	if ((job_resp->node_list == NULL) ||
	    (strlen(job_resp->node_list) == 0)) {
		printf("Waiting for resource allocation\n");
		fflush(stdout);
		while ((job_resp->node_list == NULL) ||
		       (strlen(job_resp->node_list) == 0)) {
			sleep(5);
			if (slurm_allocation_lookup_lite(job_resp->job_id,
							 &job_resp) &&
			    (slurm_get_errno() != ESLURM_JOB_PENDING)) {
				slurm_perror("slurm_confirm_allocation");
				exit(0);
			}
		}
	}
	nodes = job_resp->node_cnt;
	if (argc > 3)
		tasks = atoi(argv[3]);
	if (tasks < 1)
		tasks = nodes * TASKS_PER_NODE;
	if (tasks < nodes) {
		fprintf(stderr, "Invalid task count argument\n");
		exit(1);
	}
	printf("Starting %d tasks on %d nodes\n", tasks, nodes);
	fflush(stdout);

	/*
	 * Create a job step context.
	 */
	slurm_step_ctx_params_t_init(step_params);
	step_params->job_id = job_resp->job_id;
	step_params->min_nodes = nodes;
	step_params->task_count = tasks;

	ctx = slurm_step_ctx_create(step_params);
	if ((ctx == NULL) &&
	    (slurm_get_errno() == ESLURM_PROLOG_RUNNING)) {
		printf("SlurmctldProlog is still running, "
		       "sleep and try again\n");
		sleep(10);
		ctx = slurm_step_ctx_create(step_params);
	}
	if (ctx == NULL) {
		slurm_perror("slurm_step_ctx_create");
		rc = 1;
		goto done;
	}

	/*
	 * Hack to run one task per node, regardless of what we set up
	 * when we created the job step context.
	 */
	if (slurm_step_ctx_daemon_per_node_hack(ctx) != SLURM_SUCCESS) {
		slurm_perror("slurm_step_ctx_daemon_per_node_hack");
		rc = 1;
		goto done;
	}

	/*
	 * Launch the tasks using "user managed" IO.
	 * "user managed" IO means a TCP stream for each task, directly
         * connected to the stdin, stdout, and stderr the task.
	 */
	slurm_step_launch_params_t_init(launch);
	task_argv[0] = "./test7.3.io";
	launch->argv = task_argv;
	launch->argc = 1;
	launch->user_managed_io = true; /* This is the key to using
					  "user managed" IO */

	if (slurm_step_launch(ctx, launch, NULL) != SLURM_SUCCESS) {
		slurm_perror("slurm_step_launch");
		rc = 1;
		goto done;
	}

	if (slurm_step_launch_wait_start(ctx) != SLURM_SUCCESS) {
		slurm_perror("slurm_step_launch_wait_start");
		rc =1;
		goto done;
	}

	slurm_step_ctx_get(ctx, SLURM_STEP_CTX_USER_MANAGED_SOCKETS,
			   &num_fd, &fd_array);

	/* Interact with launched tasks as desired */
	_do_task_work(fd_array, tasks);

	for (i = 0; i < tasks; i++) {
		close(fd_array[i]);
	}

	slurm_step_launch_wait_finish(ctx);

	/* Terminate the job killing all tasks */
done:	slurm_kill_job(job_resp->job_id, SIGKILL, 0);

	/* clean up storage */
	slurm_free_resource_allocation_response_msg(job_resp);
	if (ctx)
		slurm_step_ctx_destroy(ctx);
	exit(0);
}
示例#10
0
/*
 * scontrol_update_job - update the slurm job configuration per the supplied
 *	arguments
 * IN argc - count of arguments
 * IN argv - list of arguments
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *			error message and returns 0
 */
extern int scontrol_update_job(int argc, char **argv)
{
	bool update_size = false;
	int i, update_cnt = 0, rc = SLURM_SUCCESS, rc2;
	char *tag, *val;
	int taglen, vallen;
	job_desc_msg_t job_msg;
	job_array_resp_msg_t *resp = NULL;
	uint32_t job_uid = NO_VAL;

	slurm_init_job_desc_msg (&job_msg);
	for (i = 0; i < argc; i++) {
		char *add_info = NULL;
		tag = argv[i];
		val = strchr(argv[i], '=');
		if (val) {
			taglen = val - argv[i];
			if ((taglen > 0) && ((val[-1] == '+') ||
					     (val[-1] == '-'))) {
				add_info = val - 1;
				taglen--;
			}
			val++;
			vallen = strlen(val);
		} else if (xstrncasecmp(tag, "Nice", MAX(strlen(tag), 2)) == 0){
			/* "Nice" is the only tag that might not have an
			   equal sign, so it is handled specially. */
			job_msg.nice = NICE_OFFSET + 100;
			update_cnt++;
			continue;
		} else if (!val && argv[i + 1]) {
			tag = argv[i];
			taglen = strlen(tag);
			val = argv[++i];
			vallen = strlen(val);
		} else {
			exit_code = 1;
			fprintf (stderr, "Invalid input: %s\n", argv[i]);
			fprintf (stderr, "Request aborted\n");
			return -1;
		}

		if (xstrncasecmp(tag, "JobId", MAX(taglen, 3)) == 0) {
			job_msg.job_id_str = val;
		}
		else if (xstrncasecmp(tag, "AdminComment",
				      MAX(taglen, 3)) == 0) {
			if (add_info) {
				if (add_info[0] == '-') {
					error("Invalid syntax, AdminComment can not be subtracted from.");
					exit_code = 1;
					return 0;
				}
				job_msg.admin_comment = add_info;
				/*
				 * Mark as unset so we know we handled this
				 * correctly as there is a check later to make
				 * sure we know we got a +-.
				 */
				add_info = NULL;
			} else
				job_msg.admin_comment = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "ArrayTaskThrottle",
				      MAX(taglen, 10)) == 0) {
			int throttle;
			throttle = strtoll(val, (char **) NULL, 10);
			if (throttle < 0) {
				error("Invalid ArrayTaskThrottle value");
				exit_code = 1;
				return 0;
			}
			job_msg.array_inx = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Comment", MAX(taglen, 3)) == 0) {
			job_msg.comment = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Clusters", MAX(taglen, 8)) == 0) {
			job_msg.clusters = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "ClusterFeatures",
				      MAX(taglen, 8)) == 0) {
			job_msg.cluster_features = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "DelayBoot", MAX(taglen, 5)) == 0) {
			int time_sec = time_str2secs(val);
			if (time_sec == NO_VAL) {
				error("Invalid DelayBoot value");
				exit_code = 1;
				return 0;
			}
			job_msg.delay_boot = time_sec;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "TimeLimit", MAX(taglen, 5)) == 0) {
			uint32_t job_current_time, time_limit;

			if (val && ((val[0] == '+') || (val[0] == '-'))) {
				if (add_info) {
					error("Invalid syntax, variations of +=- are not accepted.");
					exit_code = 1;
					return 0;
				}
				add_info = val;
				val++;
			}

			time_limit = time_str2mins(val);
			if (time_limit == NO_VAL) {
				error("Invalid TimeLimit value");
				exit_code = 1;
				return 0;
			}
			if (add_info) {
				if (!job_msg.job_id_str) {
					error("JobId must precede TimeLimit "
					      "increment or decrement");
					exit_code = 1;
					return 0;
				}

				job_current_time = _get_job_time(job_msg.
								 job_id_str);
				if (job_current_time == NO_VAL) {
					exit_code = 1;
					return 0;
				}
				if (add_info[0] == '+') {
					time_limit += job_current_time;
				} else if (time_limit > job_current_time) {
					error("TimeLimit decrement larger than"
					      " current time limit (%u > %u)",
					      time_limit, job_current_time);
					exit_code = 1;
					return 0;
				} else {
					time_limit = job_current_time -
						     time_limit;
				}
				/*
				 * Mark as unset so we know we handled this
				 * correctly as there is a check later to make
				 * sure we know we got a +-.
				 */
				add_info = NULL;
			}
			job_msg.time_limit = time_limit;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "TimeMin", MAX(taglen, 5)) == 0) {
			int time_min = time_str2mins(val);
			if ((time_min < 0) && (time_min != INFINITE)) {
				error("Invalid TimeMin value");
				exit_code = 1;
				return 0;
			}
			job_msg.time_min = time_min;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Priority", MAX(taglen, 2)) == 0) {
			if (parse_uint32(val, &job_msg.priority)) {
				error ("Invalid Priority value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Nice", MAX(taglen, 2)) == 0) {
			long long tmp_nice;
			tmp_nice = strtoll(val, (char **)NULL, 10);
			if (llabs(tmp_nice) > (NICE_OFFSET - 3)) {
				error("Nice value out of range (+/- %u). Value "
				      "ignored", NICE_OFFSET - 3);
				exit_code = 1;
				return 0;
			}
			job_msg.nice = NICE_OFFSET + tmp_nice;
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "CPUsPerTask", MAX(taglen, 9))) {
			if (parse_uint16(val, &job_msg.cpus_per_task)) {
				error("Invalid CPUsPerTask value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "CpusPerTres", MAX(taglen, 9))) {
			job_msg.cpus_per_tres = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "NumCPUs", MAX(taglen, 6)) == 0) {
			int min_cpus, max_cpus=0;
			if (!get_resource_arg_range(val, "NumCPUs", &min_cpus,
						   &max_cpus, false) ||
			    (min_cpus <= 0) ||
			    (max_cpus && (max_cpus < min_cpus))) {
				error ("Invalid NumCPUs value: %s", val);
				exit_code = 1;
				return 0;
			}
			job_msg.min_cpus = min_cpus;
			if (max_cpus)
				job_msg.max_cpus = max_cpus;
			update_cnt++;
		}
		/* ReqProcs was removed in Slurm version 2.1 */
		else if ((xstrncasecmp(tag, "NumTasks", MAX(taglen, 8)) == 0) ||
			 (xstrncasecmp(tag, "ReqProcs", MAX(taglen, 8)) == 0)) {
			if (parse_uint32(val, &job_msg.num_tasks)) {
				error ("Invalid NumTasks value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Requeue", MAX(taglen, 4)) == 0) {
			if (parse_uint16(val, &job_msg.requeue)) {
				error ("Invalid Requeue value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		/* ReqNodes was replaced by NumNodes in Slurm version 2.1 */
		else if ((xstrncasecmp(tag, "ReqNodes", MAX(taglen, 8)) == 0) ||
		         (xstrncasecmp(tag, "NumNodes", MAX(taglen, 8)) == 0)) {
			int min_nodes, max_nodes, rc;
			if (xstrcmp(val, "0") == 0) {
				job_msg.min_nodes = 0;
			} else if (xstrcasecmp(val, "ALL") == 0) {
				job_msg.min_nodes = INFINITE;
			} else {
				min_nodes = (int) job_msg.min_nodes;
				max_nodes = (int) job_msg.max_nodes;
				rc = get_resource_arg_range(
						val, "requested node count",
						&min_nodes, &max_nodes, false);
				if (!rc)
					return rc;
				job_msg.min_nodes = (uint32_t) min_nodes;
				job_msg.max_nodes = (uint32_t) max_nodes;
			}
			update_size = true;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "ReqSockets", MAX(taglen, 4)) == 0) {
			if (parse_uint16(val, &job_msg.sockets_per_node)) {
				error ("Invalid ReqSockets value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "ReqCores", MAX(taglen, 4)) == 0) {
			if (parse_uint16(val, &job_msg.cores_per_socket)) {
				error ("Invalid ReqCores value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
                else if (xstrncasecmp(tag, "TasksPerNode", MAX(taglen, 2))==0) {
			if (parse_uint16(val, &job_msg.ntasks_per_node)) {
				error ("Invalid TasksPerNode value: %s", val);
				exit_code = 1;
				return 0;
			}
                        update_cnt++;
                }
		else if (xstrncasecmp(tag, "ReqThreads", MAX(taglen, 4)) == 0) {
			if (parse_uint16(val, &job_msg.threads_per_core)) {
				error ("Invalid ReqThreads value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "MinCPUsNode", MAX(taglen, 4)) == 0) {
			if (parse_uint16(val, &job_msg.pn_min_cpus)) {
				error ("Invalid MinCPUsNode value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "MinMemoryNode",
				     MAX(taglen, 10)) == 0) {
			if (parse_uint64(val, &job_msg.pn_min_memory)) {
				error ("Invalid MinMemoryNode value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "MinMemoryCPU",
				     MAX(taglen, 10)) == 0) {
			if (parse_uint64(val, &job_msg.pn_min_memory)) {
				error ("Invalid MinMemoryCPU value: %s", val);
				exit_code = 1;
				return 0;
			}
			job_msg.pn_min_memory |= MEM_PER_CPU;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "MinTmpDiskNode",
				     MAX(taglen, 5)) == 0) {
			if (parse_uint32(val, &job_msg.pn_min_tmp_disk)) {
				error ("Invalid MinTmpDiskNode value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Partition", MAX(taglen, 2)) == 0) {
			job_msg.partition = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "QOS", MAX(taglen, 2)) == 0) {
			job_msg.qos = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "ReservationName",
				     MAX(taglen, 3)) == 0) {
			job_msg.reservation = val;
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "Name", MAX(taglen, 2)) ||
			 !xstrncasecmp(tag, "JobName", MAX(taglen, 4))) {
			job_msg.name = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "WCKey", MAX(taglen, 1)) == 0) {
			job_msg.wckey = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "StdOut", MAX(taglen, 6)) == 0) {
			job_msg.std_out = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Switches", MAX(taglen, 5)) == 0) {
			char *sep_char;
			job_msg.req_switch =
				(uint32_t) strtol(val, &sep_char, 10);
			update_cnt++;
			if (sep_char && sep_char[0] == '@') {
				job_msg.wait4switch = time_str2mins(sep_char+1)
						      * 60;
			}
		}
		else if (xstrncasecmp(tag, "wait-for-switch", MAX(taglen, 5))
			 == 0) {
			if (parse_uint32(val, &job_msg.wait4switch)) {
				error ("Invalid wait-for-switch value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "OverSubscribe", MAX(taglen, 2)) ||
			 !xstrncasecmp(tag, "Shared", MAX(taglen, 2))) {
			if (xstrncasecmp(val, "YES", MAX(vallen, 1)) == 0)
				job_msg.shared = 1;
			else if (xstrncasecmp(val, "NO", MAX(vallen, 1)) == 0)
				job_msg.shared = 0;
			else if (parse_uint16(val, &job_msg.shared)) {
				error("Invalid OverSubscribe value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Contiguous", MAX(taglen, 3)) == 0) {
			if (xstrncasecmp(val, "YES", MAX(vallen, 1)) == 0)
				job_msg.contiguous = 1;
			else if (xstrncasecmp(val, "NO", MAX(vallen, 1)) == 0)
				job_msg.contiguous = 0;
			else if (parse_uint16(val, &job_msg.contiguous)) {
				error ("Invalid Contiguous value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "CoreSpec", MAX(taglen, 4)) == 0) {
			if (!xstrcmp(val, "-1") || !xstrcmp(val, "*"))
				job_msg.core_spec = INFINITE16;
			else if (parse_uint16(val, &job_msg.core_spec)) {
				error ("Invalid CoreSpec value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "MemPerTres", MAX(taglen, 5))) {
			job_msg.mem_per_tres = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "ThreadSpec", MAX(taglen, 4)) == 0) {
			if (!xstrcmp(val, "-1") || !xstrcmp(val, "*"))
				job_msg.core_spec = INFINITE16;
			else if (parse_uint16(val, &job_msg.core_spec)) {
				error ("Invalid ThreadSpec value: %s", val);
				exit_code = 1;
				return 0;
			} else
				job_msg.core_spec |= CORE_SPEC_THREAD;
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "TresBind", MAX(taglen, 5))) {
			job_msg.tres_bind = val;
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "TresFreq", MAX(taglen, 5))) {
			job_msg.tres_freq = val;
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "TresPerJob", MAX(taglen, 8))) {
			job_msg.tres_per_job = val;
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "TresPerNode", MAX(taglen, 8))) {
			/* "gres" replaced by "tres_per_node" in v18.08 */
			if (job_msg.tres_per_node)
				xstrfmtcat(job_msg.tres_per_node, ",%s", val);
			else
				job_msg.tres_per_node = xstrdup(val);
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "TresPerSocket", MAX(taglen, 8))) {
			job_msg.tres_per_socket = val;
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "TresPerTask", MAX(taglen, 8))) {
			job_msg.tres_per_task = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "ExcNodeList", MAX(taglen, 3)) == 0){
			job_msg.exc_nodes = val;
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "NodeList",    MAX(taglen, 8)) ||
			 !xstrncasecmp(tag, "ReqNodeList", MAX(taglen, 8))) {
			job_msg.req_nodes = val;
			update_size = true;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Features", MAX(taglen, 1)) == 0) {
			job_msg.features = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Gres", MAX(taglen, 2)) == 0) {
			/* "gres" replaced by "tres_per_node" in v18.08 */
			if (!xstrcasecmp(val, "help") ||
			    !xstrcasecmp(val, "list")) {
				print_gres_help();
			} else if (job_msg.tres_per_node) {
				xstrfmtcat(job_msg.tres_per_node, ",%s", val);
			} else {
				job_msg.tres_per_node = xstrdup(val);
				update_cnt++;
			}
		}
		else if (xstrncasecmp(tag, "Account", MAX(taglen, 1)) == 0) {
			job_msg.account = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "BurstBuffer", MAX(taglen, 1)) == 0) {
			job_msg.burst_buffer = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Dependency", MAX(taglen, 1)) == 0) {
			job_msg.dependency = val;
			update_cnt++;
		}
		else if (xstrncasecmp(tag, "Licenses", MAX(taglen, 1)) == 0) {
			job_msg.licenses = val;
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "EligibleTime", MAX(taglen, 2)) ||
			 !xstrncasecmp(tag, "StartTime",    MAX(taglen, 2))) {
			if ((job_msg.begin_time = parse_time(val, 0))) {
				if (job_msg.begin_time < time(NULL))
					job_msg.begin_time = time(NULL);
				update_cnt++;
			}
		}
		else if (!xstrncasecmp(tag, "EndTime", MAX(taglen, 2))) {
			job_msg.end_time = parse_time(val, 0);
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "Reboot", MAX(taglen, 3))) {
			if (xstrncasecmp(val, "YES", MAX(vallen, 1)) == 0)
				job_msg.reboot = 1;
			else if (xstrncasecmp(val, "NO", MAX(vallen, 1)) == 0)
				job_msg.reboot = 0;
			else if (parse_uint16(val, &job_msg.reboot)) {
				error ("Invalid reboot value: %s", val);
				exit_code = 1;
				return 0;
			}
			update_cnt++;
		}
		else if (!xstrncasecmp(tag, "UserID", MAX(taglen, 3))) {
			uid_t user_id = 0;
			if (uid_from_string(val, &user_id) < 0) {
				exit_code = 1;
				fprintf (stderr, "Invalid UserID: %s\n", val);
				fprintf (stderr, "Request aborted\n");
				return 0;
			}
			job_uid = (uint32_t) user_id;
		}
		else if (!xstrncasecmp(tag, "Deadline", MAX(taglen, 3))) {
			if ((job_msg.deadline = parse_time(val, 0))) {
				update_cnt++;
			}
		}
		else {
			exit_code = 1;
			fprintf (stderr, "Update of this parameter is not "
				 "supported: %s\n", argv[i]);
			fprintf (stderr, "Request aborted\n");
			return 0;
		}

		if (add_info) {
			error("Option %s does not accept [+|-]= syntax", tag);
			exit_code = 1;
			return 0;
		}
	}

	if (update_cnt == 0) {
		exit_code = 1;
		fprintf (stderr, "No changes specified\n");
		return 0;
	}

	/* If specified, override uid with effective uid provided by
	 * -u <uid> or --uid=<uid> */
	if (euid != NO_VAL)
		job_msg.user_id = euid;

	if (!job_msg.job_id_str && job_msg.name) {
		/* Translate name to job ID string */
		job_msg.job_id_str = _job_name2id(job_msg.name, job_uid);
		if (!job_msg.job_id_str) {
			exit_code = 1;
			return 0;
		}
	}

	if (!job_msg.job_id_str) {
		error("No job ID specified");
		exit_code = 1;
		return 0;
	}

	if (update_size && !_is_single_job(job_msg.job_id_str)) {
		exit_code = 1;
		return 0;
	}

	if (_is_job_id(job_msg.job_id_str)) {
		job_msg.job_id_str = _next_job_id();
		while (job_msg.job_id_str) {
			rc2 = slurm_update_job2(&job_msg, &resp);
			if (update_size && (rc2 == SLURM_SUCCESS)) {
				/* See check above for one job ID */
				job_msg.job_id = slurm_atoul(job_msg.job_id_str);
				_update_job_size(job_msg.job_id);
			}
			if (rc2 != SLURM_SUCCESS) {
				rc2 = slurm_get_errno();
				rc = MAX(rc, rc2);
				exit_code = 1;
				if (quiet_flag != 1) {
					fprintf(stderr, "%s for job %s\n",
						slurm_strerror(slurm_get_errno()),
						job_msg.job_id_str);
				}
			} else if (resp) {
				for (i = 0; i < resp->job_array_count; i++) {
					if ((resp->error_code[i] == SLURM_SUCCESS)
					    && (resp->job_array_count == 1))
						continue;
					exit_code = 1;
					if (quiet_flag == 1)
						continue;
					fprintf(stderr, "%s: %s\n",
						resp->job_array_id[i],
						slurm_strerror(resp->
							       error_code[i]));
				}
				slurm_free_job_array_resp(resp);
				resp = NULL;
			}
			job_msg.job_id_str = _next_job_id();
		}
	} else if (job_msg.job_id_str) {
		exit_code = 1;
		rc = ESLURM_INVALID_JOB_ID;
		slurm_seterrno(rc);
		if (quiet_flag != 1) {
			fprintf(stderr, "%s for job %s\n",
				slurm_strerror(rc), job_msg.job_id_str);
		}
	}

	return rc;
}
示例#11
0
/*
 * scontrol_hold - perform some job hold/release operation
 * IN op	- hold/release operation
 * IN job_str	- a job ID or job name
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *		error message and returns 0
 */
extern int
scontrol_hold(char *op, char *job_str)
{
	static uint32_t last_job_id = NO_VAL;
	static job_info_msg_t *jobs = NULL;
	job_array_resp_msg_t *resp = NULL;
	int i, rc = SLURM_SUCCESS, rc2;
	int j;
	job_desc_msg_t job_msg;
	uint32_t job_id = 0;
	char *job_name = NULL;
	char *job_id_str = NULL;
	slurm_job_info_t *job_ptr;

	if (job_str && !xstrncasecmp(job_str, "JobID=", 6))
		job_str += 6;
	if (job_str && !xstrncasecmp(job_str, "Job=", 4))
		job_str += 4;

	slurm_init_job_desc_msg (&job_msg);
	if ((xstrncasecmp(op, "holdu", 5) == 0) ||
	    (xstrncasecmp(op, "uhold", 5) == 0)) {
		job_msg.priority = 0;
		job_msg.alloc_sid = ALLOC_SID_USER_HOLD;
	} else if (xstrncasecmp(op, "hold", 4) == 0) {
		job_msg.priority = 0;
		job_msg.alloc_sid = 0;
	} else
		job_msg.priority = INFINITE;

	if (_is_job_id(job_str)) {
		while ((job_msg.job_id_str = _next_job_id())) {
			rc2 = slurm_update_job2(&job_msg, &resp);
			if (rc2 != SLURM_SUCCESS) {
				rc2 = slurm_get_errno();
				rc = MAX(rc, rc2);
				exit_code = 1;
				if (quiet_flag != 1) {
					fprintf(stderr, "%s for job %s\n",
						slurm_strerror(rc2),
						job_msg.job_id_str);
				}
			} else if (resp) {
				for (i = 0; i < resp->job_array_count; i++) {
					if ((resp->error_code[i]==SLURM_SUCCESS)
					    && (resp->job_array_count == 1))
						continue;
					exit_code = 1;
					if (quiet_flag == 1)
						continue;
					fprintf(stderr, "%s: %s\n",
						resp->job_array_id[i],
						slurm_strerror(resp->
							       error_code[i]));
				}
				slurm_free_job_array_resp(resp);
				resp = NULL;
			}
			job_msg.job_id_str = _next_job_id();
		}
		return rc;
	} else if (job_str) {
		if (!xstrncasecmp(job_str, "Name=", 5)) {
			job_str += 5;
			job_id = 0;
			job_name = job_str;
			last_job_id = NO_VAL;
		} else {
			exit_code = 1;
			rc = ESLURM_INVALID_JOB_ID;
			slurm_seterrno(rc);
			if (quiet_flag != 1) {
				fprintf(stderr, "%s for job %s\n",
					slurm_strerror(rc), job_str);
			}
			return rc;
		}
	} else {
		last_job_id = NO_VAL;	/* Refresh cache on next call */
		return 0;
	}

	if (last_job_id != job_id) {
		if (scontrol_load_job(&jobs, job_id)) {
			if (quiet_flag == -1)
				slurm_perror ("slurm_load_job error");
			return 1;
		}
		last_job_id = job_id;
	}

	/* set current user, needed e.g., for AllowGroups checks */
	for (i = 0, job_ptr = jobs->job_array; i < jobs->record_count;
	     i++, job_ptr++) {
		if (xstrcmp(job_name, job_ptr->name))
			continue;

		if (!IS_JOB_PENDING(job_ptr)) {
			if (job_ptr->array_task_id != NO_VAL)
				continue;
			slurm_seterrno(ESLURM_JOB_NOT_PENDING);
			rc = MAX(rc, ESLURM_JOB_NOT_PENDING);
		}

		if (job_ptr->array_task_str) {
			xstrfmtcat(job_id_str, "%u_%s",
				   job_ptr->array_job_id,
				   job_ptr->array_task_str);
		} else if (job_ptr->array_task_id != NO_VAL) {
			xstrfmtcat(job_id_str, "%u_%u",
				   job_ptr->array_job_id,
				   job_ptr->array_task_id);
		} else {
			xstrfmtcat(job_id_str, "%u", job_ptr->job_id);
		}
		job_msg.job_id_str = job_id_str;
		rc2 = slurm_update_job2(&job_msg, &resp);
		if (rc2 != SLURM_SUCCESS) {
			rc2 = slurm_get_errno();
			rc = MAX(rc, rc2);
			exit_code = 1;
			if (quiet_flag != 1) {
				fprintf(stderr, "%s for job %s\n",
					slurm_strerror(rc2),
					job_msg.job_id_str);
			}
		} else if (resp) {
			for (j = 0; j < resp->job_array_count; j++) {
				if ((resp->error_code[j] == SLURM_SUCCESS) &&
				    (resp->job_array_count == 1))
					continue;
				exit_code = 1;
				if (quiet_flag == 1)
					continue;
				fprintf(stderr, "%s: %s\n",
					resp->job_array_id[j],
					slurm_strerror(resp->error_code[j]));
			}
			slurm_free_job_array_resp(resp);
			resp = NULL;
		}
		xfree(job_id_str);
	}

	return rc;
}
示例#12
0
int main(int argc, char *argv[])
{
	log_options_t logopt = LOG_OPTS_STDERR_ONLY;
	job_desc_msg_t desc;
	resource_allocation_response_msg_t *alloc;
	time_t before, after;
	allocation_msg_thread_t *msg_thr;
	char **env = NULL;
	int status = 0;
	int retries = 0;
	pid_t pid  = getpid();
	pid_t tpgid = 0;
	pid_t rc_pid = 0;
	int i, rc = 0;
	static char *msg = "Slurm job queue full, sleeping and retrying.";
	slurm_allocation_callbacks_t callbacks;

	log_init(xbasename(argv[0]), logopt, 0, NULL);
	_set_exit_code();

	if (spank_init_allocator() < 0) {
		error("Failed to initialize plugin stack");
		exit(error_exit);
	}

	/* Be sure to call spank_fini when salloc exits
	 */
	if (atexit((void (*) (void)) spank_fini) < 0)
		error("Failed to register atexit handler for plugins: %m");


	if (initialize_and_process_args(argc, argv) < 0) {
		error("salloc parameter parsing");
		exit(error_exit);
	}
	/* reinit log with new verbosity (if changed by command line) */
	if (opt.verbose || opt.quiet) {
		logopt.stderr_level += opt.verbose;
		logopt.stderr_level -= opt.quiet;
		logopt.prefix_level = 1;
		log_alter(logopt, 0, NULL);
	}

	if (spank_init_post_opt() < 0) {
		error("Plugin stack post-option processing failed");
		exit(error_exit);
	}

	_set_spank_env();
	_set_submit_dir_env();
	if (opt.cwd && chdir(opt.cwd)) {
		error("chdir(%s): %m", opt.cwd);
		exit(error_exit);
	}

	if (opt.get_user_env_time >= 0) {
		char *user = uid_to_string(opt.uid);
		if (strcmp(user, "nobody") == 0) {
			error("Invalid user id %u: %m", (uint32_t)opt.uid);
			exit(error_exit);
		}
		env = env_array_user_default(user,
					     opt.get_user_env_time,
					     opt.get_user_env_mode);
		xfree(user);
		if (env == NULL)
			exit(error_exit);    /* error already logged */
		_set_rlimits(env);
	}

	/*
	 * Job control for interactive salloc sessions: only if ...
	 *
	 * a) input is from a terminal (stdin has valid termios attributes),
	 * b) controlling terminal exists (non-negative tpgid),
	 * c) salloc is not run in allocation-only (--no-shell) mode,
	 * d) salloc runs in its own process group (true in interactive
	 *    shells that support job control),
	 * e) salloc has been configured at compile-time to support background
	 *    execution and is not currently in the background process group.
	 */
	if (tcgetattr(STDIN_FILENO, &saved_tty_attributes) < 0) {
		/*
		 * Test existence of controlling terminal (tpgid > 0)
		 * after first making sure stdin is not redirected.
		 */
	} else if ((tpgid = tcgetpgrp(STDIN_FILENO)) < 0) {
		if (!opt.no_shell) {
			error("no controlling terminal: please set --no-shell");
			exit(error_exit);
		}
	} else if ((!opt.no_shell) && (pid == getpgrp())) {
		if (tpgid == pid)
			is_interactive = true;
#ifdef SALLOC_RUN_FOREGROUND
		while (tcgetpgrp(STDIN_FILENO) != pid) {
			if (!is_interactive) {
				error("Waiting for program to be placed in "
				      "the foreground");
				is_interactive = true;
			}
			killpg(pid, SIGTTIN);
		}
#endif
	}
	/*
	 * Reset saved tty attributes at exit, in case a child
	 * process died before properly resetting terminal.
	 */
	if (is_interactive)
		atexit (_reset_input_mode);

	/*
	 * Request a job allocation
	 */
	slurm_init_job_desc_msg(&desc);
	if (_fill_job_desc_from_opts(&desc) == -1) {
		exit(error_exit);
	}
	if (opt.gid != (gid_t) -1) {
		if (setgid(opt.gid) < 0) {
			error("setgid: %m");
			exit(error_exit);
		}
	}

	callbacks.ping = _ping_handler;
	callbacks.timeout = _timeout_handler;
	callbacks.job_complete = _job_complete_handler;
	callbacks.user_msg = _user_msg_handler;
	callbacks.node_fail = _node_fail_handler;
	/* create message thread to handle pings and such from slurmctld */
	msg_thr = slurm_allocation_msg_thr_create(&desc.other_port,
						  &callbacks);

	/* NOTE: Do not process signals in separate pthread. The signal will
	 * cause slurm_allocate_resources_blocking() to exit immediately. */
	for (i = 0; sig_array[i]; i++)
		xsignal(sig_array[i], _signal_while_allocating);

	before = time(NULL);
	while ((alloc = slurm_allocate_resources_blocking(&desc, opt.immediate,
					_pending_callback)) == NULL) {
		if ((errno != ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) ||
		    (retries >= MAX_RETRIES))
			break;
		if (retries == 0)
			error("%s", msg);
		else
			debug("%s", msg);
		sleep (++retries);
	}

	/* become the user after the allocation has been requested. */
	if (opt.uid != (uid_t) -1) {
		if (setuid(opt.uid) < 0) {
			error("setuid: %m");
			exit(error_exit);
		}
	}
	if (alloc == NULL) {
		if (allocation_interrupted) {
			/* cancelled by signal */
			info("Job aborted due to signal");
		} else if (errno == EINTR) {
			error("Interrupted by signal."
			      "  Allocation request rescinded.");
		} else if (opt.immediate &&
			   ((errno == ETIMEDOUT) ||
			    (errno == ESLURM_NOT_TOP_PRIORITY) ||
			    (errno == ESLURM_NODES_BUSY))) {
			error("Unable to allocate resources: %m");
			error_exit = immediate_exit;
		} else {
			error("Failed to allocate resources: %m");
		}
		slurm_allocation_msg_thr_destroy(msg_thr);
		exit(error_exit);
	} else if (!allocation_interrupted) {
		/*
		 * Allocation granted!
		 */
		info("Granted job allocation %u", alloc->job_id);
		pending_job_id = alloc->job_id;
#ifdef HAVE_BG
		if (!_wait_bluegene_block_ready(alloc)) {
			if(!allocation_interrupted)
				error("Something is wrong with the "
				      "boot of the block.");
			goto relinquish;
		}
#else
		if (!_wait_nodes_ready(alloc)) {
			if(!allocation_interrupted)
				error("Something is wrong with the "
				      "boot of the nodes.");
			goto relinquish;
		}
#endif
	}

	after = time(NULL);
	if (opt.bell == BELL_ALWAYS
	    || (opt.bell == BELL_AFTER_DELAY
		&& ((after - before) > DEFAULT_BELL_DELAY))) {
		_ring_terminal_bell();
	}
	if (opt.no_shell)
		exit(0);
	if (allocation_interrupted) {
		/* salloc process received a signal after
		 * slurm_allocate_resources_blocking returned with the
		 * allocation, but before the new signal handlers were
		 * registered.
		 */
		goto relinquish;
	}

	/*
	 * Run the user's command.
	 */
	if (env_array_for_job(&env, alloc, &desc) != SLURM_SUCCESS)
		goto relinquish;

	/* Add default task count for srun, if not already set */
	if (opt.ntasks_set) {
		env_array_append_fmt(&env, "SLURM_NTASKS", "%d", opt.ntasks);
		/* keep around for old scripts */
		env_array_append_fmt(&env, "SLURM_NPROCS", "%d", opt.ntasks);
	}
	if (opt.cpus_per_task > 1) {
		env_array_append_fmt(&env, "SLURM_CPUS_PER_TASK", "%d",
				     opt.cpus_per_task);
	}
	if (opt.overcommit) {
		env_array_append_fmt(&env, "SLURM_OVERCOMMIT", "%d",
			opt.overcommit);
	}
	if (opt.acctg_freq >= 0) {
		env_array_append_fmt(&env, "SLURM_ACCTG_FREQ", "%d",
			opt.acctg_freq);
	}
	if (opt.network)
		env_array_append_fmt(&env, "SLURM_NETWORK", "%s", opt.network);

	env_array_set_environment(env);
	env_array_free(env);
	pthread_mutex_lock(&allocation_state_lock);
	if (allocation_state == REVOKED) {
		error("Allocation was revoked for job %u before command could "
		      "be run", alloc->job_id);
		pthread_mutex_unlock(&allocation_state_lock);
		if (slurm_complete_job(alloc->job_id, status) != 0) {
			error("Unable to clean up allocation for job %u: %m",
			      alloc->job_id);
		}
		return 1;
 	}
	allocation_state = GRANTED;
	pthread_mutex_unlock(&allocation_state_lock);

	/*  Ensure that salloc has initial terminal foreground control.  */
	if (is_interactive) {
		/*
		 * Ignore remaining job-control signals (other than those in
		 * sig_array, which at this state act like SIG_IGN).
		 */
		xsignal(SIGTSTP, SIG_IGN);
		xsignal(SIGTTIN, SIG_IGN);
		xsignal(SIGTTOU, SIG_IGN);

		pid = getpid();
		setpgid(pid, pid);

		tcsetpgrp(STDIN_FILENO, pid);
	}

	command_pid = _fork_command(command_argv);
	/*
	 * Wait for command to exit, OR for waitpid to be interrupted by a
	 * signal.  Either way, we are going to release the allocation next.
	 */
	if (command_pid > 0) {
		setpgid(command_pid, command_pid);
		if (is_interactive)
			tcsetpgrp(STDIN_FILENO, command_pid);

		/* NOTE: Do not process signals in separate pthread.
		 * The signal will cause waitpid() to exit immediately. */
		xsignal(SIGHUP,  _exit_on_signal);

		/* Use WUNTRACED to treat stopped children like terminated ones */
		do {
			rc_pid = waitpid(command_pid, &status, WUNTRACED);
		} while ((rc_pid == -1) && (!exit_flag));

		if ((rc_pid == -1) && (errno != EINTR))
			error("waitpid for %s failed: %m", command_argv[0]);
	}

	if (is_interactive)
		tcsetpgrp(STDIN_FILENO, pid);

	/*
	 * Relinquish the job allocation (if not already revoked).
	 */
relinquish:
	pthread_mutex_lock(&allocation_state_lock);
	if (allocation_state != REVOKED) {
		pthread_mutex_unlock(&allocation_state_lock);

		info("Relinquishing job allocation %d", alloc->job_id);
		if ((slurm_complete_job(alloc->job_id, status) != 0) &&
		    (slurm_get_errno() != ESLURM_ALREADY_DONE))
			error("Unable to clean up job allocation %d: %m",
			      alloc->job_id);
		pthread_mutex_lock(&allocation_state_lock);
		allocation_state = REVOKED;
	}
	pthread_mutex_unlock(&allocation_state_lock);

	slurm_free_resource_allocation_response_msg(alloc);
	slurm_allocation_msg_thr_destroy(msg_thr);

	/*
	 * Figure out what return code we should use.  If the user's command
	 * exited normally, return the user's return code.
	 */
	rc = 1;
	if (rc_pid != -1) {

		if (WIFEXITED(status)) {
			rc = WEXITSTATUS(status);
		} else if (WIFSIGNALED(status)) {
			verbose("Command \"%s\" was terminated by signal %d",
				command_argv[0], WTERMSIG(status));
			/* if we get these signals we return a normal
			 * exit since this was most likely sent from the
			 * user */
			switch(WTERMSIG(status)) {
			case SIGHUP:
			case SIGINT:
			case SIGQUIT:
			case SIGKILL:
				rc = 0;
				break;
			default:
				break;
			}
		}
	}
	return rc;
}
示例#13
0
文件: update_job.c 项目: Cray/slurm
/*
 * scontrol_hold - perform some job hold/release operation
 * IN op - suspend/resume operation
 * IN job_id_str - a job id
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *		error message and returns 0
 */
extern int
scontrol_hold(char *op, char *job_id_str)
{
	int i, rc = SLURM_SUCCESS;
	char *next_str;
	job_desc_msg_t job_msg;
	uint32_t job_id;
	uint32_t array_id;
	job_info_msg_t *resp;
	slurm_job_info_t *job_ptr;

	if (job_id_str) {
		job_id = (uint32_t) strtol(job_id_str, &next_str, 10);
		if (next_str[0] == '_')
			array_id = strtol(next_str+1, &next_str, 10);
		else
			array_id = NO_VAL;
		if ((job_id == 0) || (next_str[0] != '\0')) {
			fprintf(stderr, "Invalid job id specified\n");
			return 1;
		}
	} else {
		fprintf(stderr, "Invalid job id specified\n");
		return 1;
	}

	if (scontrol_load_job(&resp, job_id)) {
		if (quiet_flag == -1)
			slurm_perror ("slurm_load_job error");
		return 1;
	}

	slurm_init_job_desc_msg (&job_msg);
	job_msg.job_id = job_id;
	/* set current user, needed e.g., for AllowGroups checks */
	job_msg.user_id = getuid();
	if ((strncasecmp(op, "holdu", 5) == 0) ||
	    (strncasecmp(op, "uhold", 5) == 0)) {
		job_msg.priority = 0;
		job_msg.alloc_sid = ALLOC_SID_USER_HOLD;
	} else if (strncasecmp(op, "hold", 4) == 0) {
		job_msg.priority = 0;
		job_msg.alloc_sid = 0;
	} else
		job_msg.priority = INFINITE;
	for (i = 0, job_ptr = resp->job_array; i < resp->record_count;
	     i++, job_ptr++) {
		if ((array_id != NO_VAL) &&
		    (job_ptr->array_task_id != array_id))
			continue;

		if (!IS_JOB_PENDING(job_ptr)) {
			if ((array_id == NO_VAL) &&
			    (job_ptr->array_task_id != NO_VAL))
				continue;
			slurm_seterrno(ESLURM_JOB_NOT_PENDING);
			return ESLURM_JOB_NOT_PENDING;
		}

		job_msg.job_id = job_ptr->job_id;
		if (slurm_update_job(&job_msg))
			rc = slurm_get_errno();
	}

	return rc;
}
示例#14
0
/*
 * Create job description structure based off srun options
 * (see opt.h)
 */
static job_desc_msg_t *_job_desc_msg_create_from_opts(slurm_opt_t *opt_local)
{
	srun_opt_t *srun_opt = opt_local->srun_opt;
	job_desc_msg_t *j = xmalloc(sizeof(*j));
	hostlist_t hl = NULL;
	xassert(srun_opt);

	slurm_init_job_desc_msg(j);
#if defined HAVE_ALPS_CRAY && defined HAVE_REAL_CRAY
	static bool sgi_err_logged = false;
	uint64_t pagg_id = job_getjid(getpid());
	/*
	 * Interactive sessions require pam_job.so in /etc/pam.d/common-session
	 * since creating sgi_job containers requires root permissions. This is
	 * the only exception where we allow the fallback of using the SID to
	 * confirm the reservation (caught later, in do_basil_confirm).
	 */
	if (pagg_id != (uint64_t) -1) {
		if (!j->select_jobinfo)
			j->select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_select_jobinfo_set(j->select_jobinfo,
					    SELECT_JOBDATA_PAGG_ID, &pagg_id);
	} else if (!sgi_err_logged) {
		error("No SGI job container ID detected - please enable the "
		      "Cray job service via /etc/init.d/job");
		sgi_err_logged = true;
	}
#endif

	j->contiguous     = opt_local->contiguous;
	if (opt_local->core_spec != NO_VAL16)
		j->core_spec      = opt_local->core_spec;
	j->features       = opt_local->constraints;
	j->cluster_features = opt_local->c_constraints;
	if (opt_local->gres && xstrcasecmp(opt_local->gres, "NONE"))
		j->gres   = opt_local->gres;
	if (opt_local->immediate == 1)
		j->immediate = opt_local->immediate;
	if (opt_local->job_name)
		j->name   = opt_local->job_name;
	else
		j->name = srun_opt->cmd_name;
	if (srun_opt->argc > 0) {
		j->argc    = 1;
		j->argv    = (char **) xmalloc(sizeof(char *) * 2);
		j->argv[0] = xstrdup(srun_opt->argv[0]);
	}
	if (opt_local->acctg_freq)
		j->acctg_freq     = xstrdup(opt_local->acctg_freq);
	j->reservation    = opt_local->reservation;
	j->wckey          = opt_local->wckey;
	j->x11 = opt.x11;
	if (j->x11) {
		j->x11_magic_cookie = xstrdup(opt.x11_magic_cookie);
		j->x11_target_port = opt.x11_target_port;
	}

	j->req_nodes      = xstrdup(opt_local->nodelist);

	/* simplify the job allocation nodelist,
	 * not laying out tasks until step */
	if (j->req_nodes) {
		hl = hostlist_create(j->req_nodes);
		xfree(opt_local->nodelist);
		opt_local->nodelist = hostlist_ranged_string_xmalloc(hl);
		hostlist_uniq(hl);
		xfree(j->req_nodes);
		j->req_nodes = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

	}

	if (((opt_local->distribution & SLURM_DIST_STATE_BASE) ==
	     SLURM_DIST_ARBITRARY) && !j->req_nodes) {
		error("With Arbitrary distribution you need to "
		      "specify a nodelist or hostfile with the -w option");
		return NULL;
	}
	j->extra = opt_local->extra;
	j->exc_nodes      = opt_local->exc_nodes;
	j->partition      = opt_local->partition;
	j->min_nodes      = opt_local->min_nodes;
	if (opt_local->sockets_per_node != NO_VAL)
		j->sockets_per_node    = opt_local->sockets_per_node;
	if (opt_local->cores_per_socket != NO_VAL)
		j->cores_per_socket      = opt_local->cores_per_socket;
	if (opt_local->threads_per_core != NO_VAL) {
		j->threads_per_core    = opt_local->threads_per_core;
		/* if 1 always make sure affinity knows about it */
		if (j->threads_per_core == 1)
			srun_opt->cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE;
	}
	j->user_id        = opt_local->uid;
	j->dependency     = opt_local->dependency;
	if (opt_local->nice != NO_VAL)
		j->nice   = NICE_OFFSET + opt_local->nice;
	if (opt_local->priority)
		j->priority = opt_local->priority;
	if (srun_opt->cpu_bind)
		j->cpu_bind = srun_opt->cpu_bind;
	if (srun_opt->cpu_bind_type)
		j->cpu_bind_type = srun_opt->cpu_bind_type;
	if (opt_local->delay_boot != NO_VAL)
		j->delay_boot = opt_local->delay_boot;
	if (opt_local->mem_bind)
		j->mem_bind       = opt_local->mem_bind;
	if (opt_local->mem_bind_type)
		j->mem_bind_type  = opt_local->mem_bind_type;
	if (opt_local->plane_size != NO_VAL)
		j->plane_size     = opt_local->plane_size;
	j->task_dist      = opt_local->distribution;

	j->group_id       = opt_local->gid;
	j->mail_type      = opt_local->mail_type;

	if (opt_local->ntasks_per_node != NO_VAL)
		j->ntasks_per_node   = opt_local->ntasks_per_node;
	if (opt_local->ntasks_per_socket != NO_VAL)
		j->ntasks_per_socket = opt_local->ntasks_per_socket;
	if (opt_local->ntasks_per_core != NO_VAL)
		j->ntasks_per_core   = opt_local->ntasks_per_core;

	if (opt_local->mail_user)
		j->mail_user = opt_local->mail_user;
	if (opt_local->burst_buffer)
		j->burst_buffer = opt_local->burst_buffer;
	if (opt_local->begin)
		j->begin_time = opt_local->begin;
	if (opt_local->deadline)
		j->deadline = opt_local->deadline;
	if (opt_local->licenses)
		j->licenses = opt_local->licenses;
	if (opt_local->network)
		j->network = opt_local->network;
	if (opt_local->profile)
		j->profile = opt_local->profile;
	if (opt_local->account)
		j->account = opt_local->account;
	if (opt_local->comment)
		j->comment = opt_local->comment;
	if (opt_local->qos)
		j->qos = opt_local->qos;
	if (opt_local->cwd)
		j->work_dir = opt_local->cwd;

	if (opt_local->hold)
		j->priority     = 0;
	if (opt_local->jobid != NO_VAL)
		j->job_id	= opt_local->jobid;
#ifdef HAVE_BG
	if (opt_local->geometry[0] > 0) {
		int i;
		for (i = 0; i < SYSTEM_DIMENSIONS; i++)
			j->geometry[i] = opt_local->geometry[i];
	}
#endif

	memcpy(j->conn_type, opt_local->conn_type, sizeof(j->conn_type));

	if (opt_local->reboot)
		j->reboot = 1;
	if (opt_local->no_rotate)
		j->rotate = 0;

	if (opt_local->blrtsimage)
		j->blrtsimage = opt_local->blrtsimage;
	if (opt_local->linuximage)
		j->linuximage = opt_local->linuximage;
	if (opt_local->mloaderimage)
		j->mloaderimage = opt_local->mloaderimage;
	if (opt_local->ramdiskimage)
		j->ramdiskimage = opt_local->ramdiskimage;

	if (opt_local->max_nodes)
		j->max_nodes    = opt_local->max_nodes;
	else if (opt_local->nodes_set) {
		/* On an allocation if the max nodes isn't set set it
		 * to do the same behavior as with salloc or sbatch.
		 */
		j->max_nodes    = opt_local->min_nodes;
	}
	if (opt_local->pn_min_cpus != NO_VAL)
		j->pn_min_cpus = opt_local->pn_min_cpus;
	if (opt_local->pn_min_memory != NO_VAL64)
		j->pn_min_memory = opt_local->pn_min_memory;
	else if (opt_local->mem_per_cpu != NO_VAL64)
		j->pn_min_memory = opt_local->mem_per_cpu | MEM_PER_CPU;
	if (opt_local->pn_min_tmp_disk != NO_VAL)
		j->pn_min_tmp_disk = opt_local->pn_min_tmp_disk;
	if (opt_local->overcommit) {
		j->min_cpus    = opt_local->min_nodes;
		j->overcommit  = opt_local->overcommit;
	} else if (opt_local->cpus_set)
		j->min_cpus    = opt_local->ntasks * opt_local->cpus_per_task;
	else
		j->min_cpus    = opt_local->ntasks;
	if (opt_local->ntasks_set)
		j->num_tasks   = opt_local->ntasks;

	if (opt_local->cpus_set)
		j->cpus_per_task = opt_local->cpus_per_task;

	if (opt_local->no_kill)
		j->kill_on_node_fail   = 0;
	if (opt_local->time_limit != NO_VAL)
		j->time_limit          = opt_local->time_limit;
	if (opt_local->time_min != NO_VAL)
		j->time_min            = opt_local->time_min;
	if (opt_local->shared != NO_VAL16)
		j->shared = opt_local->shared;

	if (opt_local->warn_signal)
		j->warn_signal = opt_local->warn_signal;
	if (opt_local->warn_time)
		j->warn_time = opt_local->warn_time;
	if (opt_local->job_flags)
		j->bitflags = opt_local->job_flags;

	if (opt_local->cpu_freq_min != NO_VAL)
		j->cpu_freq_min = opt_local->cpu_freq_min;
	if (opt_local->cpu_freq_max != NO_VAL)
		j->cpu_freq_max = opt_local->cpu_freq_max;
	if (opt_local->cpu_freq_gov != NO_VAL)
		j->cpu_freq_gov = opt_local->cpu_freq_gov;

	if (opt_local->req_switch >= 0)
		j->req_switch = opt_local->req_switch;
	if (opt_local->wait4switch >= 0)
		j->wait4switch = opt_local->wait4switch;

	/* srun uses the same listening port for the allocation response
	 * message as all other messages */
	j->alloc_resp_port = slurmctld_comm_addr.port;
	j->other_port = slurmctld_comm_addr.port;

	if (opt_local->spank_job_env_size) {
		j->spank_job_env      = opt_local->spank_job_env;
		j->spank_job_env_size = opt_local->spank_job_env_size;
	}

	if (opt_local->power_flags)
		j->power_flags = opt_local->power_flags;
	if (opt_local->mcs_label)
		j->mcs_label = opt_local->mcs_label;
	j->wait_all_nodes = 1;

	/* If can run on multiple clusters find the earliest run time
	 * and run it there */
	j->clusters = xstrdup(opt_local->clusters);

	return j;
}