static int _handle_terminate(int fd, slurmd_job_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int errnum = 0; debug("_handle_terminate for job %u.%u", job->jobid, job->stepid); step_terminate_monitor_start(job->jobid, job->stepid); debug3(" uid = %d", uid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("terminate req from uid %ld for job %u.%u " "owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); rc = -1; errnum = EPERM; goto done; } /* * Sanity checks */ if (job->cont_id == 0) { debug ("step %u.%u invalid container [cont_id:%"PRIu64"]", job->jobid, job->stepid, job->cont_id); rc = -1; errnum = ESLURMD_JOB_NOTRUNNING; goto done; } /* * Signal the container with SIGKILL */ pthread_mutex_lock(&suspend_mutex); if (suspended) { debug("Terminating suspended job step %u.%u", job->jobid, job->stepid); } if (slurm_container_signal(job->cont_id, SIGKILL) < 0) { rc = -1; errnum = errno; verbose("Error sending SIGKILL signal to %u.%u: %m", job->jobid, job->stepid); } else { verbose("Sent SIGKILL signal to %u.%u", job->jobid, job->stepid); } pthread_mutex_unlock(&suspend_mutex); done: /* Send the return code and errnum */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }
static int _handle_terminate(int fd, stepd_step_rec_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int errnum = 0; stepd_step_task_info_t *task; uint32_t i; debug("_handle_terminate for step=%u.%u uid=%d", job->jobid, job->stepid, uid); step_terminate_monitor_start(job->jobid, job->stepid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("terminate req from uid %ld for job %u.%u " "owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); rc = -1; errnum = EPERM; goto done; } /* * Sanity checks */ if (job->cont_id == 0) { debug ("step %u.%u invalid container [cont_id:%"PRIu64"]", job->jobid, job->stepid, job->cont_id); rc = -1; errnum = ESLURMD_JOB_NOTRUNNING; goto done; } /* cycle thru the tasks and mark those that have not * called abort and/or terminated as killed_by_cmd */ for (i = 0; i < job->node_tasks; i++) { if (NULL == (task = job->task[i])) { continue; } if (task->aborted || task->exited) { continue; } /* mark that this task is going to be killed by * cmd so we ignore its exit status - otherwise, * we will probably report the final exit status * as SIGKILL */ task->killed_by_cmd = true; } /* * Signal the container with SIGKILL */ pthread_mutex_lock(&suspend_mutex); if (suspended) { debug("Terminating suspended job step %u.%u", job->jobid, job->stepid); } if (proctrack_g_signal(job->cont_id, SIGKILL) < 0) { rc = -1; errnum = errno; verbose("Error sending SIGKILL signal to %u.%u: %m", job->jobid, job->stepid); } else { verbose("Sent SIGKILL signal to %u.%u", job->jobid, job->stepid); } pthread_mutex_unlock(&suspend_mutex); done: /* Send the return code and errnum */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }