/* * scontrol_hold - perform some job hold/release operation * IN op - suspend/resume operation * IN job_id_str - a job id * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_hold(char *op, char *job_id_str) { int rc = SLURM_SUCCESS; char *next_str; job_desc_msg_t job_msg; uint16_t job_state; slurm_init_job_desc_msg (&job_msg); /* set current user, needed e.g., for AllowGroups checks */ job_msg.user_id = getuid(); if (job_id_str) { job_msg.job_id = (uint32_t) strtol(job_id_str, &next_str, 10); if ((job_msg.job_id == 0) || (next_str[0] != '\0')) { fprintf(stderr, "Invalid job id specified\n"); exit_code = 1; return 0; } } else { fprintf(stderr, "Invalid job id specified\n"); exit_code = 1; return 0; } job_state = scontrol_get_job_state(job_msg.job_id); if (job_state == (uint16_t) NO_VAL) return SLURM_ERROR; if ((job_state & JOB_STATE_BASE) != JOB_PENDING) { slurm_seterrno(ESLURM_JOB_NOT_PENDING); return ESLURM_JOB_NOT_PENDING; } if ((strncasecmp(op, "holdu", 5) == 0) || (strncasecmp(op, "uhold", 5) == 0)) { job_msg.priority = 0; job_msg.alloc_sid = ALLOC_SID_USER_HOLD; } else if (strncasecmp(op, "hold", 4) == 0) { job_msg.priority = 0; job_msg.alloc_sid = 0; } else job_msg.priority = INFINITE; if (slurm_update_job(&job_msg)) return slurm_get_errno(); return rc; }
/* * scontrol_update_job - update the slurm job configuration per the supplied * arguments * IN argc - count of arguments * IN argv - list of arguments * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_update_job (int argc, char *argv[]) { bool update_size = false; int i, update_cnt = 0; char *tag, *val; int taglen, vallen; job_desc_msg_t job_msg; slurm_init_job_desc_msg (&job_msg); /* set current user, needed e.g., for AllowGroups checks */ job_msg.user_id = getuid(); for (i=0; i<argc; i++) { tag = argv[i]; val = strchr(argv[i], '='); if (val) { taglen = val - argv[i]; val++; vallen = strlen(val); } else if (strncasecmp(tag, "Nice", MAX(strlen(tag), 2)) == 0){ /* "Nice" is the only tag that might not have an equal sign, so it is handled specially. */ job_msg.nice = NICE_OFFSET + 100; update_cnt++; continue; } else { exit_code = 1; fprintf (stderr, "Invalid input: %s\n", argv[i]); fprintf (stderr, "Request aborted\n"); return -1; } if (strncasecmp(tag, "JobId", MAX(taglen, 3)) == 0) { job_msg.job_id = (uint32_t) strtol(val, (char **) NULL, 10); } else if (strncasecmp(tag, "Comment", MAX(taglen, 3)) == 0) { job_msg.comment = val; update_cnt++; } else if (strncasecmp(tag, "TimeLimit", MAX(taglen, 5)) == 0) { bool incr, decr; uint32_t job_current_time, time_limit; incr = (val[0] == '+'); decr = (val[0] == '-'); if (incr || decr) val++; time_limit = time_str2mins(val); if ((time_limit < 0) && (time_limit != INFINITE)) { error("Invalid TimeLimit value"); exit_code = 1; return 0; } if (incr || decr) { job_current_time = _get_job_time(job_msg. job_id); if (job_current_time == NO_VAL) { exit_code = 1; return 0; } if (incr) { time_limit += job_current_time; } else if (time_limit > job_current_time) { error("TimeLimit decrement larger than" " current time limit (%u > %u)", time_limit, job_current_time); exit_code = 1; return 0; } else { time_limit = job_current_time - time_limit; } } job_msg.time_limit = time_limit; update_cnt++; } else if (strncasecmp(tag, "TimeMin", MAX(taglen, 5)) == 0) { int time_min = time_str2mins(val); if ((time_min < 0) && (time_min != INFINITE)) { error("Invalid TimeMin value"); exit_code = 1; return 0; } job_msg.time_min = time_min; update_cnt++; } else if (strncasecmp(tag, "Priority", MAX(taglen, 2)) == 0) { job_msg.priority = (uint32_t) strtoll(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "Nice", MAX(taglen, 2)) == 0) { int nice; nice = strtoll(val, (char **) NULL, 10); if (abs(nice) > NICE_OFFSET) { error("Invalid nice value, must be between " "-%d and %d", NICE_OFFSET, NICE_OFFSET); exit_code = 1; return 0; } job_msg.nice = NICE_OFFSET + nice; update_cnt++; } else if (strncasecmp(tag, "NumCPUs", MAX(taglen, 6)) == 0) { int min_cpus, max_cpus=0; if (!get_resource_arg_range(val, "NumCPUs", &min_cpus, &max_cpus, false) || (min_cpus <= 0) || (max_cpus && (max_cpus < min_cpus))) { error("Invalid NumCPUs value: %s", val); exit_code = 1; return 0; } job_msg.min_cpus = min_cpus; if (max_cpus) job_msg.max_cpus = max_cpus; update_cnt++; } /* ReqProcs was removed in SLURM version 2.1 */ else if (strncasecmp(tag, "ReqProcs", MAX(taglen, 8)) == 0) { job_msg.num_tasks = (uint32_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "Requeue", MAX(taglen, 4)) == 0) { job_msg.requeue = (uint16_t) strtol(val, (char **) NULL, 10); update_cnt++; } /* ReqNodes was replaced by NumNodes in SLURM version 2.1 */ else if ((strncasecmp(tag, "ReqNodes", MAX(taglen, 8)) == 0) || (strncasecmp(tag, "NumNodes", MAX(taglen, 8)) == 0)) { int min_nodes, max_nodes, rc; if (strcmp(val, "0") == 0) { job_msg.min_nodes = 0; } else if (strcasecmp(val, "ALL") == 0) { job_msg.min_nodes = INFINITE; } else { min_nodes = (int) job_msg.min_nodes; max_nodes = (int) job_msg.max_nodes; rc = get_resource_arg_range( val, "requested node count", &min_nodes, &max_nodes, false); if (!rc) return rc; job_msg.min_nodes = (uint32_t) min_nodes; job_msg.max_nodes = (uint32_t) max_nodes; } update_size = true; update_cnt++; } else if (strncasecmp(tag, "ReqSockets", MAX(taglen, 4)) == 0) { job_msg.sockets_per_node = (uint16_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "ReqCores", MAX(taglen, 4)) == 0) { job_msg.cores_per_socket = (uint16_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "TasksPerNode", MAX(taglen, 2))==0) { job_msg.ntasks_per_node = (uint16_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "ReqThreads", MAX(taglen, 4)) == 0) { job_msg.threads_per_core = (uint16_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "MinCPUsNode", MAX(taglen, 4)) == 0) { job_msg.pn_min_cpus = (uint32_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "MinMemoryNode", MAX(taglen, 10)) == 0) { job_msg.pn_min_memory = (uint32_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "MinMemoryCPU", MAX(taglen, 10)) == 0) { job_msg.pn_min_memory = (uint32_t) strtol(val, (char **) NULL, 10); job_msg.pn_min_memory |= MEM_PER_CPU; update_cnt++; } else if (strncasecmp(tag, "MinTmpDiskNode", MAX(taglen, 5)) == 0) { job_msg.pn_min_tmp_disk = (uint32_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "Partition", MAX(taglen, 2)) == 0) { job_msg.partition = val; update_cnt++; } else if (strncasecmp(tag, "QOS", MAX(taglen, 2)) == 0) { job_msg.qos = val; update_cnt++; } else if (strncasecmp(tag, "ReservationName", MAX(taglen, 3)) == 0) { job_msg.reservation = val; update_cnt++; } else if (strncasecmp(tag, "Name", MAX(taglen, 2)) == 0) { job_msg.name = val; update_cnt++; } else if (strncasecmp(tag, "WCKey", MAX(taglen, 1)) == 0) { job_msg.wckey = val; update_cnt++; } else if (strncasecmp(tag, "Switches", MAX(taglen, 5)) == 0) { char *sep_char; job_msg.req_switch = (uint32_t) strtol(val, &sep_char, 10); update_cnt++; if (sep_char && sep_char[0] == '@') { job_msg.wait4switch = time_str2mins(sep_char+1) * 60; } } else if (strncasecmp(tag, "wait-for-switch", MAX(taglen, 5)) == 0) { job_msg.wait4switch = (uint32_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "Shared", MAX(taglen, 2)) == 0) { if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0) job_msg.shared = 1; else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0) job_msg.shared = 0; else job_msg.shared = (uint16_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "Contiguous", MAX(taglen, 3)) == 0) { if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0) job_msg.contiguous = 1; else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0) job_msg.contiguous = 0; else job_msg.contiguous = (uint16_t) strtol(val, (char **) NULL, 10); update_cnt++; } else if (strncasecmp(tag, "ExcNodeList", MAX(taglen, 3)) == 0){ job_msg.exc_nodes = val; update_cnt++; } else if (!strncasecmp(tag, "NodeList", MAX(taglen, 8)) || !strncasecmp(tag, "ReqNodeList", MAX(taglen, 8))) { job_msg.req_nodes = val; update_size = true; update_cnt++; } else if (strncasecmp(tag, "Features", MAX(taglen, 1)) == 0) { job_msg.features = val; update_cnt++; } else if (strncasecmp(tag, "Gres", MAX(taglen, 2)) == 0) { if (!strcasecmp(val, "help") || !strcasecmp(val, "list")) { print_gres_help(); } else { job_msg.gres = val; update_cnt++; } } else if (strncasecmp(tag, "Account", MAX(taglen, 1)) == 0) { job_msg.account = val; update_cnt++; } else if (strncasecmp(tag, "Dependency", MAX(taglen, 1)) == 0) { job_msg.dependency = val; update_cnt++; } else if (strncasecmp(tag, "Geometry", MAX(taglen, 2)) == 0) { char* token, *delimiter = ",x", *next_ptr; int j, rc = 0; int dims = slurmdb_setup_cluster_dims(); uint16_t geo[dims]; char* geometry_tmp = xstrdup(val); char* original_ptr = geometry_tmp; token = strtok_r(geometry_tmp, delimiter, &next_ptr); for (j=0; j<dims; j++) { if (token == NULL) { error("insufficient dimensions in " "Geometry"); rc = -1; break; } geo[j] = (uint16_t) atoi(token); if (geo[j] <= 0) { error("invalid --geometry argument"); rc = -1; break; } geometry_tmp = next_ptr; token = strtok_r(geometry_tmp, delimiter, &next_ptr); } if (token != NULL) { error("too many dimensions in Geometry"); rc = -1; } if (original_ptr) xfree(original_ptr); if (rc != 0) exit_code = 1; else { for (j=0; j<dims; j++) job_msg.geometry[j] = geo[j]; update_cnt++; } } else if (strncasecmp(tag, "Rotate", MAX(taglen, 2)) == 0) { uint16_t rotate; if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0) rotate = 1; else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0) rotate = 0; else rotate = (uint16_t) strtol(val, (char **) NULL, 10); job_msg.rotate = rotate; update_cnt++; } else if (strncasecmp(tag, "Conn-Type", MAX(taglen, 2)) == 0) { verify_conn_type(val, job_msg.conn_type); if(job_msg.conn_type[0] != (uint16_t)NO_VAL) update_cnt++; } else if (strncasecmp(tag, "Licenses", MAX(taglen, 1)) == 0) { job_msg.licenses = val; update_cnt++; } else if (!strncasecmp(tag, "EligibleTime", MAX(taglen, 2)) || !strncasecmp(tag, "StartTime", MAX(taglen, 2))) { if ((job_msg.begin_time = parse_time(val, 0))) { if (job_msg.begin_time < time(NULL)) job_msg.begin_time = time(NULL); update_cnt++; } } else if (!strncasecmp(tag, "EndTime", MAX(taglen, 2))) { job_msg.end_time = parse_time(val, 0); update_cnt++; } else { exit_code = 1; fprintf (stderr, "Update of this parameter is not " "supported: %s\n", argv[i]); fprintf (stderr, "Request aborted\n"); return 0; } } if (update_cnt == 0) { exit_code = 1; fprintf (stderr, "No changes specified\n"); return 0; } if (slurm_update_job(&job_msg)) return slurm_get_errno (); if (update_size) _update_job_size(job_msg.job_id); return SLURM_SUCCESS; }
static void slurmdrmaa_job_control( fsd_job_t *self, int action ) { slurmdrmaa_job_t *slurm_self = (slurmdrmaa_job_t*)self; job_desc_msg_t job_desc; fsd_log_enter(( "({job_id=%s}, action=%d)", self->job_id, action )); fsd_mutex_lock( &self->session->drm_connection_mutex ); TRY { switch( action ) { case DRMAA_CONTROL_SUSPEND: if(slurm_suspend(fsd_atoi(self->job_id)) == -1) { fsd_exc_raise_fmt( FSD_ERRNO_INTERNAL_ERROR,"slurm_suspend error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id); } slurm_self->user_suspended = true; break; case DRMAA_CONTROL_HOLD: /* change priority to 0*/ slurm_init_job_desc_msg(&job_desc); slurm_self->old_priority = job_desc.priority; job_desc.job_id = atoi(self->job_id); job_desc.priority = 0; job_desc.alloc_sid = 0; if(slurm_update_job(&job_desc) == -1) { fsd_exc_raise_fmt( FSD_ERRNO_INTERNAL_ERROR,"slurm_update_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id); } break; case DRMAA_CONTROL_RESUME: if(slurm_resume(fsd_atoi(self->job_id)) == -1) { fsd_exc_raise_fmt( FSD_ERRNO_INTERNAL_ERROR,"slurm_resume error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id); } slurm_self->user_suspended = false; break; case DRMAA_CONTROL_RELEASE: /* change priority back*/ slurm_init_job_desc_msg(&job_desc); job_desc.priority = INFINITE; job_desc.job_id = atoi(self->job_id); if(slurm_update_job(&job_desc) == -1) { fsd_exc_raise_fmt( FSD_ERRNO_INTERNAL_ERROR,"slurm_update_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id); } break; case DRMAA_CONTROL_TERMINATE: if(slurm_kill_job(fsd_atoi(self->job_id),SIGKILL,0) == -1) { fsd_exc_raise_fmt( FSD_ERRNO_INTERNAL_ERROR,"slurm_terminate_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id); } break; default: fsd_exc_raise_fmt( FSD_ERRNO_INVALID_ARGUMENT, "job::control: unknown action %d", action ); } fsd_log_debug(("job::control: successful")); } FINALLY { fsd_mutex_unlock( &self->session->drm_connection_mutex ); } END_TRY fsd_log_return(( "" )); }
/* * scontrol_hold - perform some job hold/release operation * IN op - suspend/resume operation * IN job_id_str - a job id * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_hold(char *op, char *job_id_str) { int i, rc = SLURM_SUCCESS; char *next_str; job_desc_msg_t job_msg; uint32_t job_id; uint32_t array_id; job_info_msg_t *resp; slurm_job_info_t *job_ptr; if (job_id_str) { job_id = (uint32_t) strtol(job_id_str, &next_str, 10); if (next_str[0] == '_') array_id = strtol(next_str+1, &next_str, 10); else array_id = NO_VAL; if ((job_id == 0) || (next_str[0] != '\0')) { fprintf(stderr, "Invalid job id specified\n"); return 1; } } else { fprintf(stderr, "Invalid job id specified\n"); return 1; } if (scontrol_load_job(&resp, job_id)) { if (quiet_flag == -1) slurm_perror ("slurm_load_job error"); return 1; } slurm_init_job_desc_msg (&job_msg); job_msg.job_id = job_id; /* set current user, needed e.g., for AllowGroups checks */ job_msg.user_id = getuid(); if ((strncasecmp(op, "holdu", 5) == 0) || (strncasecmp(op, "uhold", 5) == 0)) { job_msg.priority = 0; job_msg.alloc_sid = ALLOC_SID_USER_HOLD; } else if (strncasecmp(op, "hold", 4) == 0) { job_msg.priority = 0; job_msg.alloc_sid = 0; } else job_msg.priority = INFINITE; for (i = 0, job_ptr = resp->job_array; i < resp->record_count; i++, job_ptr++) { if ((array_id != NO_VAL) && (job_ptr->array_task_id != array_id)) continue; if (!IS_JOB_PENDING(job_ptr)) { if ((array_id == NO_VAL) && (job_ptr->array_task_id != NO_VAL)) continue; slurm_seterrno(ESLURM_JOB_NOT_PENDING); return ESLURM_JOB_NOT_PENDING; } job_msg.job_id = job_ptr->job_id; if (slurm_update_job(&job_msg)) rc = slurm_get_errno(); } return rc; }