static int _handle_resume(int fd, stepd_step_rec_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int errnum = 0; debug("_handle_resume for job %u.%u", job->jobid, job->stepid); debug3(" uid = %d", uid); if (!_slurm_authorized_user(uid)) { debug("job step resume request from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); rc = -1; errnum = EPERM; goto done; } if (job->cont_id == 0) { debug ("step %u.%u invalid container [cont_id:%"PRIu64"]", job->jobid, job->stepid, job->cont_id); rc = -1; errnum = ESLURMD_JOB_NOTRUNNING; goto done; } acct_gather_resume_poll(); /* * Signal the container */ pthread_mutex_lock(&suspend_mutex); if (!suspended) { rc = -1; errnum = ESLURMD_STEP_NOTSUSPENDED; pthread_mutex_unlock(&suspend_mutex); goto done; } else { if (proctrack_g_signal(job->cont_id, SIGCONT) < 0) { verbose("Error resuming %u.%u: %m", job->jobid, job->stepid); } else { verbose("Resumed %u.%u", job->jobid, job->stepid); } suspended = false; } /* set the cpu frequencies if cpu_freq option used */ if (job->cpu_freq != NO_VAL) cpu_freq_set(job); pthread_mutex_unlock(&suspend_mutex); done: /* Send the return code and errno */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }
static int _handle_terminate(int fd, stepd_step_rec_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int errnum = 0; stepd_step_task_info_t *task; uint32_t i; debug("_handle_terminate for step=%u.%u uid=%d", job->jobid, job->stepid, uid); step_terminate_monitor_start(job->jobid, job->stepid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("terminate req from uid %ld for job %u.%u " "owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); rc = -1; errnum = EPERM; goto done; } /* * Sanity checks */ if (job->cont_id == 0) { debug ("step %u.%u invalid container [cont_id:%"PRIu64"]", job->jobid, job->stepid, job->cont_id); rc = -1; errnum = ESLURMD_JOB_NOTRUNNING; goto done; } /* cycle thru the tasks and mark those that have not * called abort and/or terminated as killed_by_cmd */ for (i = 0; i < job->node_tasks; i++) { if (NULL == (task = job->task[i])) { continue; } if (task->aborted || task->exited) { continue; } /* mark that this task is going to be killed by * cmd so we ignore its exit status - otherwise, * we will probably report the final exit status * as SIGKILL */ task->killed_by_cmd = true; } /* * Signal the container with SIGKILL */ pthread_mutex_lock(&suspend_mutex); if (suspended) { debug("Terminating suspended job step %u.%u", job->jobid, job->stepid); } if (proctrack_g_signal(job->cont_id, SIGKILL) < 0) { rc = -1; errnum = errno; verbose("Error sending SIGKILL signal to %u.%u: %m", job->jobid, job->stepid); } else { verbose("Sent SIGKILL signal to %u.%u", job->jobid, job->stepid); } pthread_mutex_unlock(&suspend_mutex); done: /* Send the return code and errnum */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }
static int _handle_signal_container(int fd, stepd_step_rec_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int errnum = 0; int sig; static int msg_sent = 0; char *ptr = NULL; int target_node_id = 0; stepd_step_task_info_t *task; uint32_t i; safe_read(fd, &sig, sizeof(int)); debug("_handle_signal_container for step=%u.%u uid=%d signal=%d", job->jobid, job->stepid, (int) uid, sig); if ((uid != job->uid) && !_slurm_authorized_user(uid)) { error("signal container req from uid %ld for step=%u.%u " "owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); rc = -1; errnum = EPERM; goto done; } /* * Sanity checks */ if (job->cont_id == 0) { debug ("step %u.%u invalid container [cont_id:%"PRIu64"]", job->jobid, job->stepid, job->cont_id); rc = -1; errnum = ESLURMD_JOB_NOTRUNNING; goto done; } if ((sig == SIGTERM) || (sig == SIGKILL)) { /* cycle thru the tasks and mark those that have not * called abort and/or terminated as killed_by_cmd */ for (i = 0; i < job->node_tasks; i++) { if (NULL == (task = job->task[i])) { continue; } if (task->aborted || task->exited) { continue; } /* mark that this task is going to be killed by * cmd so we ignore its exit status - otherwise, * we will probably report the final exit status * as SIGKILL */ task->killed_by_cmd = true; } } ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID"); if (ptr) target_node_id = atoi(ptr); if ((job->nodeid == target_node_id) && (msg_sent == 0) && (job->state < SLURMSTEPD_STEP_ENDING)) { time_t now = time(NULL); char entity[24], time_str[24]; if (job->stepid == SLURM_BATCH_SCRIPT) { snprintf(entity, sizeof(entity), "JOB %u", job->jobid); } else { snprintf(entity, sizeof(entity), "STEP %u.%u", job->jobid, job->stepid); } slurm_make_time_str(&now, time_str, sizeof(time_str)); /* Not really errors, * but we want messages displayed by default */ if (sig == SIG_TIME_LIMIT) { error("*** %s CANCELLED AT %s DUE TO TIME LIMIT ***", entity, time_str); msg_sent = 1; } else if (sig == SIG_PREEMPTED) { error("*** %s CANCELLED AT %s DUE TO PREEMPTION ***", entity, time_str); msg_sent = 1; } else if (sig == SIG_NODE_FAIL) { error("*** %s CANCELLED AT %s DUE TO NODE FAILURE ***", entity, time_str); msg_sent = 1; } else if (sig == SIG_FAILURE) { error("*** %s FAILED (non-zero exit code or other " "failure mode) ***", entity); msg_sent = 1; } else if ((sig == SIGTERM) || (sig == SIGKILL)) { error("*** %s CANCELLED AT %s ***", entity, time_str); msg_sent = 1; } } if ((sig == SIG_TIME_LIMIT) || (sig == SIG_NODE_FAIL) || (sig == SIG_PREEMPTED) || (sig == SIG_FAILURE)) goto done; if (sig == SIG_ABORT) { sig = SIGKILL; job->aborted = true; } pthread_mutex_lock(&suspend_mutex); if (suspended && (sig != SIGKILL)) { rc = -1; errnum = ESLURMD_STEP_SUSPENDED; pthread_mutex_unlock(&suspend_mutex); goto done; } if (sig == SIG_DEBUG_WAKE) { int i; for (i = 0; i < job->node_tasks; i++) pdebug_wake_process(job, job->task[i]->pid); pthread_mutex_unlock(&suspend_mutex); goto done; } /* * Signal the container */ if (proctrack_g_signal(job->cont_id, sig) < 0) { rc = -1; errnum = errno; verbose("Error sending signal %d to %u.%u: %m", sig, job->jobid, job->stepid); } else { verbose("Sent signal %d to %u.%u", sig, job->jobid, job->stepid); } pthread_mutex_unlock(&suspend_mutex); done: /* Send the return code and errnum */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }
static int _handle_suspend(int fd, stepd_step_rec_t *job, uid_t uid) { static int launch_poe = -1; int rc = SLURM_SUCCESS; int errnum = 0; uint16_t job_core_spec = (uint16_t) NO_VAL; safe_read(fd, &job_core_spec, sizeof(uint16_t)); debug("_handle_suspend for step:%u.%u uid:%ld core_spec:%u", job->jobid, job->stepid, (long)uid, job_core_spec); if (!_slurm_authorized_user(uid)) { debug("job step suspend request from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); rc = -1; errnum = EPERM; goto done; } if (job->cont_id == 0) { debug ("step %u.%u invalid container [cont_id:%"PRIu64"]", job->jobid, job->stepid, job->cont_id); rc = -1; errnum = ESLURMD_JOB_NOTRUNNING; goto done; } acct_gather_suspend_poll(); if (launch_poe == -1) { char *launch_type = slurm_get_launch_type(); if (!strcmp(launch_type, "launch/poe")) launch_poe = 1; else launch_poe = 0; xfree(launch_type); } /* * Signal the container */ pthread_mutex_lock(&suspend_mutex); if (suspended) { rc = -1; errnum = ESLURMD_STEP_SUSPENDED; pthread_mutex_unlock(&suspend_mutex); goto done; } else { if (!job->batch && switch_g_job_step_pre_suspend(job)) error("switch_g_job_step_pre_suspend: %m"); /* SIGTSTP is sent first to let MPI daemons stop their tasks, * then wait 2 seconds, then send SIGSTOP to the spawned * process's container to stop everything else. * * In some cases, 1 second has proven insufficient. Longer * delays may help insure that all MPI tasks have been stopped * (that depends upon the MPI implementaiton used), but will * also permit longer time periods when more than one job can * be running on each resource (not good). */ if (launch_poe == 0) { /* IBM MPI seens to periodically hang upon receipt * of SIGTSTP. */ if (proctrack_g_signal(job->cont_id, SIGTSTP) < 0) { verbose("Error suspending %u.%u (SIGTSTP): %m", job->jobid, job->stepid); } else sleep(2); } if (proctrack_g_signal(job->cont_id, SIGSTOP) < 0) { verbose("Error suspending %u.%u (SIGSTOP): %m", job->jobid, job->stepid); } else { verbose("Suspended %u.%u", job->jobid, job->stepid); } suspended = true; } if (!job->batch && switch_g_job_step_post_suspend(job)) error("switch_g_job_step_post_suspend: %m"); if (!job->batch && core_spec_g_suspend(job->cont_id, job_core_spec)) error("core_spec_g_suspend: %m"); pthread_mutex_unlock(&suspend_mutex); done: /* Send the return code and errno */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }