Ejemplo n.º 1
0
/*
 * scontrol_hold - perform some job hold/release operation
 * IN op - suspend/resume operation
 * IN job_id_str - a job id
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *		error message and returns 0
 */
extern int
scontrol_hold(char *op, char *job_id_str)
{
	int rc = SLURM_SUCCESS;
	char *next_str;
	job_desc_msg_t job_msg;
	uint16_t job_state;

	slurm_init_job_desc_msg (&job_msg);

	/* set current user, needed e.g., for AllowGroups checks */
	job_msg.user_id = getuid();

	if (job_id_str) {
		job_msg.job_id = (uint32_t) strtol(job_id_str, &next_str, 10);
		if ((job_msg.job_id == 0) || (next_str[0] != '\0')) {
			fprintf(stderr, "Invalid job id specified\n");
			exit_code = 1;
			return 0;
		}
	} else {
		fprintf(stderr, "Invalid job id specified\n");
		exit_code = 1;
		return 0;
	}

	job_state = scontrol_get_job_state(job_msg.job_id);
	if (job_state == (uint16_t) NO_VAL)
		return SLURM_ERROR;
	if ((job_state & JOB_STATE_BASE) != JOB_PENDING) {
		slurm_seterrno(ESLURM_JOB_NOT_PENDING);
		return ESLURM_JOB_NOT_PENDING;
	}

	if ((strncasecmp(op, "holdu", 5) == 0) ||
	    (strncasecmp(op, "uhold", 5) == 0)) {
		job_msg.priority = 0;
		job_msg.alloc_sid = ALLOC_SID_USER_HOLD;
	} else if (strncasecmp(op, "hold", 4) == 0) {
		job_msg.priority = 0;
		job_msg.alloc_sid = 0;
	} else
		job_msg.priority = INFINITE;

	if (slurm_update_job(&job_msg))
		return slurm_get_errno();

	return rc;
}
Ejemplo n.º 2
0
/*
 * scontrol_update_job - update the slurm job configuration per the supplied
 *	arguments
 * IN argc - count of arguments
 * IN argv - list of arguments
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *			error message and returns 0
 */
extern int
scontrol_update_job (int argc, char *argv[])
{
	bool update_size = false;
	int i, update_cnt = 0;
	char *tag, *val;
	int taglen, vallen;
	job_desc_msg_t job_msg;

	slurm_init_job_desc_msg (&job_msg);

	/* set current user, needed e.g., for AllowGroups checks */
	job_msg.user_id = getuid();

	for (i=0; i<argc; i++) {
		tag = argv[i];
		val = strchr(argv[i], '=');
		if (val) {
			taglen = val - argv[i];
			val++;
			vallen = strlen(val);
		} else if (strncasecmp(tag, "Nice", MAX(strlen(tag), 2)) == 0){
			/* "Nice" is the only tag that might not have an
			   equal sign, so it is handled specially. */
			job_msg.nice = NICE_OFFSET + 100;
			update_cnt++;
			continue;
		} else {
			exit_code = 1;
			fprintf (stderr, "Invalid input: %s\n", argv[i]);
			fprintf (stderr, "Request aborted\n");
			return -1;
		}

		if (strncasecmp(tag, "JobId", MAX(taglen, 3)) == 0) {
			job_msg.job_id =
				(uint32_t) strtol(val, (char **) NULL, 10);
		}
		else if (strncasecmp(tag, "Comment", MAX(taglen, 3)) == 0) {
			job_msg.comment = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "TimeLimit", MAX(taglen, 5)) == 0) {
			bool incr, decr;
			uint32_t job_current_time, time_limit;

			incr = (val[0] == '+');
			decr = (val[0] == '-');
			if (incr || decr)
				val++;
			time_limit = time_str2mins(val);
			if ((time_limit < 0) && (time_limit != INFINITE)) {
				error("Invalid TimeLimit value");
				exit_code = 1;
				return 0;
			}
			if (incr || decr) {
				job_current_time = _get_job_time(job_msg.
								 job_id);
				if (job_current_time == NO_VAL) {
					exit_code = 1;
					return 0;
				}
				if (incr) {
					time_limit += job_current_time;
				} else if (time_limit > job_current_time) {
					error("TimeLimit decrement larger than"
					      " current time limit (%u > %u)",
					      time_limit, job_current_time);
					exit_code = 1;
					return 0;
				} else {
					time_limit = job_current_time -
						     time_limit;
				}
			}
			job_msg.time_limit = time_limit;
			update_cnt++;
		}
		else if (strncasecmp(tag, "TimeMin", MAX(taglen, 5)) == 0) {
			int time_min = time_str2mins(val);
			if ((time_min < 0) && (time_min != INFINITE)) {
				error("Invalid TimeMin value");
				exit_code = 1;
				return 0;
			}
			job_msg.time_min = time_min;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Priority", MAX(taglen, 2)) == 0) {
			job_msg.priority =
				(uint32_t) strtoll(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Nice", MAX(taglen, 2)) == 0) {
			int nice;
			nice = strtoll(val, (char **) NULL, 10);
			if (abs(nice) > NICE_OFFSET) {
				error("Invalid nice value, must be between "
					"-%d and %d", NICE_OFFSET,
					NICE_OFFSET);
				exit_code = 1;
				return 0;
			}
			job_msg.nice = NICE_OFFSET + nice;
			update_cnt++;
		}
		else if (strncasecmp(tag, "NumCPUs", MAX(taglen, 6)) == 0) {
			int min_cpus, max_cpus=0;
			if (!get_resource_arg_range(val, "NumCPUs", &min_cpus,
						   &max_cpus, false) ||
			    (min_cpus <= 0) ||
			    (max_cpus && (max_cpus < min_cpus))) {
				error("Invalid NumCPUs value: %s", val);
				exit_code = 1;
				return 0;
			}
			job_msg.min_cpus = min_cpus;
			if (max_cpus)
				job_msg.max_cpus = max_cpus;
			update_cnt++;
		}
		/* ReqProcs was removed in SLURM version 2.1 */
		else if (strncasecmp(tag, "ReqProcs", MAX(taglen, 8)) == 0) {
			job_msg.num_tasks =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Requeue", MAX(taglen, 4)) == 0) {
			job_msg.requeue =
				(uint16_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		/* ReqNodes was replaced by NumNodes in SLURM version 2.1 */
		else if ((strncasecmp(tag, "ReqNodes", MAX(taglen, 8)) == 0) ||
		         (strncasecmp(tag, "NumNodes", MAX(taglen, 8)) == 0)) {
			int min_nodes, max_nodes, rc;
			if (strcmp(val, "0") == 0) {
				job_msg.min_nodes = 0;
			} else if (strcasecmp(val, "ALL") == 0) {
				job_msg.min_nodes = INFINITE;
			} else {
				min_nodes = (int) job_msg.min_nodes;
				max_nodes = (int) job_msg.max_nodes;
				rc = get_resource_arg_range(
						val, "requested node count",
						&min_nodes, &max_nodes, false);
				if (!rc)
					return rc;
				job_msg.min_nodes = (uint32_t) min_nodes;
				job_msg.max_nodes = (uint32_t) max_nodes;
			}
			update_size = true;
			update_cnt++;
		}
		else if (strncasecmp(tag, "ReqSockets", MAX(taglen, 4)) == 0) {
			job_msg.sockets_per_node =
				(uint16_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "ReqCores", MAX(taglen, 4)) == 0) {
			job_msg.cores_per_socket =
				(uint16_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
                else if (strncasecmp(tag, "TasksPerNode", MAX(taglen, 2))==0) {
                        job_msg.ntasks_per_node =
                                (uint16_t) strtol(val, (char **) NULL, 10);
                        update_cnt++;
                }
		else if (strncasecmp(tag, "ReqThreads", MAX(taglen, 4)) == 0) {
			job_msg.threads_per_core =
				(uint16_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "MinCPUsNode", MAX(taglen, 4)) == 0) {
			job_msg.pn_min_cpus =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "MinMemoryNode",
				     MAX(taglen, 10)) == 0) {
			job_msg.pn_min_memory =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "MinMemoryCPU",
				     MAX(taglen, 10)) == 0) {
			job_msg.pn_min_memory =
				(uint32_t) strtol(val, (char **) NULL, 10);
			job_msg.pn_min_memory |= MEM_PER_CPU;
			update_cnt++;
		}
		else if (strncasecmp(tag, "MinTmpDiskNode",
				     MAX(taglen, 5)) == 0) {
			job_msg.pn_min_tmp_disk =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Partition", MAX(taglen, 2)) == 0) {
			job_msg.partition = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "QOS", MAX(taglen, 2)) == 0) {
			job_msg.qos = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "ReservationName",
				     MAX(taglen, 3)) == 0) {
			job_msg.reservation = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Name", MAX(taglen, 2)) == 0) {
			job_msg.name = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "WCKey", MAX(taglen, 1)) == 0) {
			job_msg.wckey = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Switches", MAX(taglen, 5)) == 0) {
			char *sep_char;
			job_msg.req_switch =
				(uint32_t) strtol(val, &sep_char, 10);
			update_cnt++;
			if (sep_char && sep_char[0] == '@') {
				job_msg.wait4switch = time_str2mins(sep_char+1)
						      * 60;
			}
		}
		else if (strncasecmp(tag, "wait-for-switch", MAX(taglen, 5))
			 == 0) {
			job_msg.wait4switch =
				(uint32_t) strtol(val, (char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Shared", MAX(taglen, 2)) == 0) {
			if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0)
				job_msg.shared = 1;
			else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
				job_msg.shared = 0;
			else
				job_msg.shared =
					(uint16_t) strtol(val,
							(char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "Contiguous", MAX(taglen, 3)) == 0) {
			if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0)
				job_msg.contiguous = 1;
			else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
				job_msg.contiguous = 0;
			else
				job_msg.contiguous =
					(uint16_t) strtol(val,
							(char **) NULL, 10);
			update_cnt++;
		}
		else if (strncasecmp(tag, "ExcNodeList", MAX(taglen, 3)) == 0){
			job_msg.exc_nodes = val;
			update_cnt++;
		}
		else if (!strncasecmp(tag, "NodeList",    MAX(taglen, 8)) ||
			 !strncasecmp(tag, "ReqNodeList", MAX(taglen, 8))) {
			job_msg.req_nodes = val;
			update_size = true;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Features", MAX(taglen, 1)) == 0) {
			job_msg.features = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Gres", MAX(taglen, 2)) == 0) {
			if (!strcasecmp(val, "help") ||
			    !strcasecmp(val, "list")) {
				print_gres_help();
			} else {
				job_msg.gres = val;
				update_cnt++;
			}
		}
		else if (strncasecmp(tag, "Account", MAX(taglen, 1)) == 0) {
			job_msg.account = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Dependency", MAX(taglen, 1)) == 0) {
			job_msg.dependency = val;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Geometry", MAX(taglen, 2)) == 0) {
			char* token, *delimiter = ",x", *next_ptr;
			int j, rc = 0;
			int dims = slurmdb_setup_cluster_dims();
			uint16_t geo[dims];
			char* geometry_tmp = xstrdup(val);
			char* original_ptr = geometry_tmp;
			token = strtok_r(geometry_tmp, delimiter, &next_ptr);
			for (j=0; j<dims; j++) {
				if (token == NULL) {
					error("insufficient dimensions in "
						"Geometry");
					rc = -1;
					break;
				}
				geo[j] = (uint16_t) atoi(token);
				if (geo[j] <= 0) {
					error("invalid --geometry argument");
					rc = -1;
					break;
				}
				geometry_tmp = next_ptr;
				token = strtok_r(geometry_tmp, delimiter,
					&next_ptr);
			}
			if (token != NULL) {
				error("too many dimensions in Geometry");
				rc = -1;
			}

			if (original_ptr)
				xfree(original_ptr);
			if (rc != 0)
				exit_code = 1;
			else {
				for (j=0; j<dims; j++)
					job_msg.geometry[j] = geo[j];
				update_cnt++;
			}
		}

		else if (strncasecmp(tag, "Rotate", MAX(taglen, 2)) == 0) {
			uint16_t rotate;
			if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0)
				rotate = 1;
			else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0)
				rotate = 0;
			else
				rotate = (uint16_t) strtol(val,
							   (char **) NULL, 10);
			job_msg.rotate = rotate;
			update_cnt++;
		}
		else if (strncasecmp(tag, "Conn-Type", MAX(taglen, 2)) == 0) {
			verify_conn_type(val, job_msg.conn_type);
			if(job_msg.conn_type[0] != (uint16_t)NO_VAL)
				update_cnt++;
		}
		else if (strncasecmp(tag, "Licenses", MAX(taglen, 1)) == 0) {
			job_msg.licenses = val;
			update_cnt++;
		}
		else if (!strncasecmp(tag, "EligibleTime", MAX(taglen, 2)) ||
			 !strncasecmp(tag, "StartTime",    MAX(taglen, 2))) {
			if ((job_msg.begin_time = parse_time(val, 0))) {
				if (job_msg.begin_time < time(NULL))
					job_msg.begin_time = time(NULL);
				update_cnt++;
			}
		}
		else if (!strncasecmp(tag, "EndTime", MAX(taglen, 2))) {
			job_msg.end_time = parse_time(val, 0);
			update_cnt++;
		}
		else {
			exit_code = 1;
			fprintf (stderr, "Update of this parameter is not "
				 "supported: %s\n", argv[i]);
			fprintf (stderr, "Request aborted\n");
			return 0;
		}
	}

	if (update_cnt == 0) {
		exit_code = 1;
		fprintf (stderr, "No changes specified\n");
		return 0;
	}

	if (slurm_update_job(&job_msg))
		return slurm_get_errno ();

	if (update_size)
		_update_job_size(job_msg.job_id);

	return SLURM_SUCCESS;
}
Ejemplo n.º 3
0
static void
slurmdrmaa_job_control( fsd_job_t *self, int action )
{
	slurmdrmaa_job_t *slurm_self = (slurmdrmaa_job_t*)self;
	job_desc_msg_t job_desc;

	fsd_log_enter(( "({job_id=%s}, action=%d)", self->job_id, action ));

	fsd_mutex_lock( &self->session->drm_connection_mutex );
	TRY
	 {
		switch( action )
		 {
			case DRMAA_CONTROL_SUSPEND:
				if(slurm_suspend(fsd_atoi(self->job_id)) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_suspend error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				slurm_self->user_suspended = true;
				break;
			case DRMAA_CONTROL_HOLD:
				/* change priority to 0*/
				slurm_init_job_desc_msg(&job_desc);
				slurm_self->old_priority = job_desc.priority;
				job_desc.job_id = atoi(self->job_id);
				job_desc.priority = 0;
				job_desc.alloc_sid = 0;
				if(slurm_update_job(&job_desc) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_update_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				break;
			case DRMAA_CONTROL_RESUME:
				if(slurm_resume(fsd_atoi(self->job_id)) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_resume error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				slurm_self->user_suspended = false;
				break;
			case DRMAA_CONTROL_RELEASE:
			  /* change priority back*/
			  	slurm_init_job_desc_msg(&job_desc);
				job_desc.priority = INFINITE;
				job_desc.job_id = atoi(self->job_id);
				if(slurm_update_job(&job_desc) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_update_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				break;
			case DRMAA_CONTROL_TERMINATE:
				if(slurm_kill_job(fsd_atoi(self->job_id),SIGKILL,0) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_terminate_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				break;
			default:
				fsd_exc_raise_fmt(
						FSD_ERRNO_INVALID_ARGUMENT,
						"job::control: unknown action %d", action );
		 }
					
		fsd_log_debug(("job::control: successful"));
	 }
	FINALLY
	 {
		fsd_mutex_unlock( &self->session->drm_connection_mutex );
	 }
	END_TRY

	fsd_log_return(( "" ));
}
Ejemplo n.º 4
0
/*
 * scontrol_hold - perform some job hold/release operation
 * IN op - suspend/resume operation
 * IN job_id_str - a job id
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *		error message and returns 0
 */
extern int
scontrol_hold(char *op, char *job_id_str)
{
	int i, rc = SLURM_SUCCESS;
	char *next_str;
	job_desc_msg_t job_msg;
	uint32_t job_id;
	uint32_t array_id;
	job_info_msg_t *resp;
	slurm_job_info_t *job_ptr;

	if (job_id_str) {
		job_id = (uint32_t) strtol(job_id_str, &next_str, 10);
		if (next_str[0] == '_')
			array_id = strtol(next_str+1, &next_str, 10);
		else
			array_id = NO_VAL;
		if ((job_id == 0) || (next_str[0] != '\0')) {
			fprintf(stderr, "Invalid job id specified\n");
			return 1;
		}
	} else {
		fprintf(stderr, "Invalid job id specified\n");
		return 1;
	}

	if (scontrol_load_job(&resp, job_id)) {
		if (quiet_flag == -1)
			slurm_perror ("slurm_load_job error");
		return 1;
	}

	slurm_init_job_desc_msg (&job_msg);
	job_msg.job_id = job_id;
	/* set current user, needed e.g., for AllowGroups checks */
	job_msg.user_id = getuid();
	if ((strncasecmp(op, "holdu", 5) == 0) ||
	    (strncasecmp(op, "uhold", 5) == 0)) {
		job_msg.priority = 0;
		job_msg.alloc_sid = ALLOC_SID_USER_HOLD;
	} else if (strncasecmp(op, "hold", 4) == 0) {
		job_msg.priority = 0;
		job_msg.alloc_sid = 0;
	} else
		job_msg.priority = INFINITE;
	for (i = 0, job_ptr = resp->job_array; i < resp->record_count;
	     i++, job_ptr++) {
		if ((array_id != NO_VAL) &&
		    (job_ptr->array_task_id != array_id))
			continue;

		if (!IS_JOB_PENDING(job_ptr)) {
			if ((array_id == NO_VAL) &&
			    (job_ptr->array_task_id != NO_VAL))
				continue;
			slurm_seterrno(ESLURM_JOB_NOT_PENDING);
			return ESLURM_JOB_NOT_PENDING;
		}

		job_msg.job_id = job_ptr->job_id;
		if (slurm_update_job(&job_msg))
			rc = slurm_get_errno();
	}

	return rc;
}