extern int select_p_job_suspend(struct job_record *job_ptr, bool indf_susp) { if (job_ptr == NULL) return SLURM_SUCCESS; if ((!_zero_size_job(job_ptr)) && (do_basil_switch(job_ptr, 1) != SLURM_SUCCESS)) return SLURM_ERROR; return other_job_suspend(job_ptr, indf_susp); }
extern int select_p_job_resume(struct job_record *job_ptr, bool indf_susp) { if (job_ptr == NULL) return SLURM_SUCCESS; if (slurmctld_primary && !_zero_size_job(job_ptr) && (do_basil_switch(job_ptr, 0) != SLURM_SUCCESS)) return SLURM_ERROR; return other_job_resume(job_ptr, indf_susp); }
extern int select_p_job_begin(struct job_record *job_ptr) { xassert(job_ptr); if (slurmctld_primary && !_zero_size_job(job_ptr) && (do_basil_reserve(job_ptr) != SLURM_SUCCESS)) { job_ptr->state_reason = WAIT_RESOURCES; xfree(job_ptr->state_desc); return SLURM_ERROR; } return other_job_begin(job_ptr); }
extern int select_p_job_signal(struct job_record *job_ptr, int signal) { xassert(job_ptr); /* * Release the ALPS reservation already here for those signals that are * likely to terminate the job. Otherwise there is a race condition if a * script has more than one aprun line: while the apkill of the current * aprun line is underway, the job script proceeds to run and executes * the next following aprun line, until reaching the end of the script. * This not only creates large delays, it can also mess up cleaning up * after the job. Releasing the reservation will stop any new aprun * lines from being executed. */ if (slurmctld_primary) { switch (signal) { case SIGCHLD: case SIGCONT: case SIGSTOP: case SIGTSTP: case SIGTTIN: case SIGTTOU: case SIGURG: case SIGWINCH: break; case SIGTERM: case SIGKILL: if (cray_conf->no_apid_signal_on_kill && job_ptr->batch_flag) return other_job_signal( job_ptr, signal); default: if (signal < SIGRTMIN) do_basil_release(job_ptr); } } if (slurmctld_primary && !_zero_size_job(job_ptr)) { if (signal != SIGKILL) { if (do_basil_signal(job_ptr, signal) != SLURM_SUCCESS) return SLURM_ERROR; } else { uint16_t kill_wait = slurm_get_kill_wait(); if (do_basil_signal(job_ptr, SIGCONT) != SLURM_SUCCESS) return SLURM_ERROR; if (do_basil_signal(job_ptr, SIGTERM) != SLURM_SUCCESS) return SLURM_ERROR; queue_basil_signal(job_ptr, SIGKILL, kill_wait); } } return other_job_signal(job_ptr, signal); }
extern int select_p_job_fini(struct job_record *job_ptr) { if (job_ptr == NULL) return SLURM_SUCCESS; if ((!_zero_size_job(job_ptr)) && (do_basil_release(job_ptr) != SLURM_SUCCESS)) return SLURM_ERROR; /* * Convention: like select_p_job_ready, may be called also from * stepdmgr, where job_state == NO_VAL is used to * distinguish the context from that of slurmctld. */ if (job_ptr->job_state == (uint16_t)NO_VAL) return SLURM_SUCCESS; return other_job_fini(job_ptr); }
extern int select_p_job_ready(struct job_record *job_ptr) { int rc = SLURM_SUCCESS; xassert(job_ptr); /* * Convention: this function may be called also from stepdmgr, to * confirm the ALPS reservation of a batch job. In this * case, job_ptr only has minimal information and sets * job_state == NO_VAL to distinguish this call from one * done by slurmctld. It also sets batch_flag == 0, which * means that we need to confirm only if batch_flag is 0, * and execute the other_job_ready() only in slurmctld. */ if (!job_ptr->batch_flag && !_zero_size_job(job_ptr)) rc = do_basil_confirm(job_ptr); if (rc != SLURM_SUCCESS || (job_ptr->job_state == (uint16_t)NO_VAL)) return rc; return other_job_ready(job_ptr); }
extern int select_p_job_fini(struct job_record *job_ptr) { if (job_ptr == NULL) return SLURM_SUCCESS; /* Don't run the release in the controller for batch jobs. It is * handled on the stepd end. */ if (((slurmctld_primary && !job_ptr->batch_flag) || (job_ptr->job_state == NO_VAL)) && !_zero_size_job(job_ptr) && (do_basil_release(job_ptr) != SLURM_SUCCESS)) return SLURM_ERROR; /* * Convention: like select_p_job_ready, may be called also from * stepdmgr, where job_state == NO_VAL is used to * distinguish the context from that of slurmctld. */ if (job_ptr->job_state == NO_VAL) return SLURM_SUCCESS; return other_job_fini(job_ptr); }