void *_sig_basil(void *args) { args_sig_basil_t *args_sig_basil = (args_sig_basil_t *) args; int rc; sleep(args_sig_basil->delay); rc = basil_signal_apids(args_sig_basil->resv_id, args_sig_basil->signal, NULL); if (rc) { error("could not signal APIDs of resId %u: %s", args_sig_basil->resv_id, basil_strerror(rc)); } xfree(args); return NULL; }
/** * do_basil_signal - pass job signal on to any APIDs * IN job_ptr - job to be signalled * IN signal - signal(7) number * Only signal job if an ALPS reservation exists (non-0 reservation ID). */ extern int do_basil_signal(struct job_record *job_ptr, int signal) { uint32_t resv_id; if (_get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) { error("can not read resId for JobId=%u", job_ptr->job_id); } else if (resv_id != 0) { int rc = basil_signal_apids(resv_id, signal, NULL); if (rc) error("could not signal APIDs of resId %u: %s", resv_id, basil_strerror(rc)); } return SLURM_SUCCESS; }
/** * queue_basil_signal - queue job signal on to any APIDs * IN job_ptr - job to be signalled * IN signal - signal(7) number * IN delay - how long to delay the signal, in seconds * Only signal job if an ALPS reservation exists (non-0 reservation ID). */ extern void queue_basil_signal(struct job_record *job_ptr, int signal, uint16_t delay) { args_sig_basil_t *args_sig_basil; pthread_attr_t attr_sig_basil; pthread_t thread_sig_basil; uint32_t resv_id; if (_get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) { error("can not read resId for JobId=%u", job_ptr->job_id); return; } if (resv_id == 0) return; if ((delay == 0) || (delay == (uint16_t) NO_VAL)) { /* Send the signal now */ int rc = basil_signal_apids(resv_id, signal, NULL); if (rc) error("could not signal APIDs of resId %u: %s", resv_id, basil_strerror(rc)); return; } /* Create a thread to send the signal later */ slurm_attr_init(&attr_sig_basil); if (pthread_attr_setdetachstate(&attr_sig_basil, PTHREAD_CREATE_DETACHED)) { error("pthread_attr_setdetachstate error %m"); slurm_attr_destroy(&attr_sig_basil); return; } args_sig_basil = xmalloc(sizeof(args_sig_basil_t)); args_sig_basil->resv_id = resv_id; args_sig_basil->signal = signal; args_sig_basil->delay = delay; if (pthread_create(&thread_sig_basil, &attr_sig_basil, _sig_basil, (void *) args_sig_basil)) { error("pthread_create error %m"); slurm_attr_destroy(&attr_sig_basil); xfree(args_sig_basil); return; } slurm_attr_destroy(&attr_sig_basil); }
/** * basil_safe_release - release reservation after signaling job steps * @rsvn_id: reservation to release * @inv: recent Basil Inventory, or NULL to generate internally * Returns 0 if ok, a negative %basil_error otherwise. */ int basil_safe_release(int32_t rsvn_id, struct basil_inventory *inv) { int rc = basil_release(rsvn_id); /* * If there are still any live application IDs (APIDs) associated with * @rsvn_id, the RELEASE command will be without effect, since ALPS * holds on to a reservation until all of its application IDs have * disappeared. * On normal termination, ALPS should clean up the APIDs by itself. In * order to clean up orphaned reservations, try to terminate the APIDs * manually using apkill(1). If this step fails, fall back to releasing * the reservation normally and hope that ALPS resolves the situation. * To prevent that any subsequent aprun lines get started while the * apkill of the current one is still in progress, do the RELEASE first. */ basil_signal_apids(rsvn_id, SIGKILL, inv); return rc; }