Example #1
0
void *_sig_basil(void *args)
{
    args_sig_basil_t *args_sig_basil = (args_sig_basil_t *) args;
    int rc;

    sleep(args_sig_basil->delay);
    rc = basil_signal_apids(args_sig_basil->resv_id,
                            args_sig_basil->signal, NULL);
    if (rc) {
        error("could not signal APIDs of resId %u: %s",
              args_sig_basil->resv_id, basil_strerror(rc));
    }
    xfree(args);
    return NULL;
}
Example #2
0
/**
 * do_basil_signal  -  pass job signal on to any APIDs
 * IN job_ptr - job to be signalled
 * IN signal  - signal(7) number
 * Only signal job if an ALPS reservation exists (non-0 reservation ID).
 */
extern int do_basil_signal(struct job_record *job_ptr, int signal)
{
	uint32_t resv_id;

	if (_get_select_jobinfo(job_ptr->select_jobinfo->data,
			SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) {
		error("can not read resId for JobId=%u", job_ptr->job_id);
	} else if (resv_id != 0) {
		int rc = basil_signal_apids(resv_id, signal, NULL);

		if (rc)
			error("could not signal APIDs of resId %u: %s", resv_id,
				basil_strerror(rc));
	}
	return SLURM_SUCCESS;
}
Example #3
0
/**
 * queue_basil_signal  -  queue job signal on to any APIDs
 * IN job_ptr - job to be signalled
 * IN signal  - signal(7) number
 * IN delay   - how long to delay the signal, in seconds
 * Only signal job if an ALPS reservation exists (non-0 reservation ID).
 */
extern void queue_basil_signal(struct job_record *job_ptr, int signal,
                               uint16_t delay)
{
    args_sig_basil_t *args_sig_basil;
    pthread_attr_t attr_sig_basil;
    pthread_t thread_sig_basil;
    uint32_t resv_id;

    if (_get_select_jobinfo(job_ptr->select_jobinfo->data,
                            SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) {
        error("can not read resId for JobId=%u", job_ptr->job_id);
        return;
    }
    if (resv_id == 0)
        return;
    if ((delay == 0) || (delay == (uint16_t) NO_VAL)) {
        /* Send the signal now */
        int rc = basil_signal_apids(resv_id, signal, NULL);

        if (rc)
            error("could not signal APIDs of resId %u: %s", resv_id,
                  basil_strerror(rc));
        return;
    }

    /* Create a thread to send the signal later */
    slurm_attr_init(&attr_sig_basil);
    if (pthread_attr_setdetachstate(&attr_sig_basil,
                                    PTHREAD_CREATE_DETACHED)) {
        error("pthread_attr_setdetachstate error %m");
        slurm_attr_destroy(&attr_sig_basil);
        return;
    }
    args_sig_basil = xmalloc(sizeof(args_sig_basil_t));
    args_sig_basil->resv_id = resv_id;
    args_sig_basil->signal  = signal;
    args_sig_basil->delay   = delay;
    if (pthread_create(&thread_sig_basil, &attr_sig_basil,
                       _sig_basil, (void *) args_sig_basil)) {
        error("pthread_create error %m");
        slurm_attr_destroy(&attr_sig_basil);
        xfree(args_sig_basil);
        return;
    }
    slurm_attr_destroy(&attr_sig_basil);
}
Example #4
0
/**
 * basil_safe_release  -  release reservation after signaling job steps
 * @rsvn_id:	reservation to release
 * @inv:	recent Basil Inventory, or NULL to generate internally
 * Returns 0 if ok, a negative %basil_error otherwise.
 */
int basil_safe_release(int32_t rsvn_id, struct basil_inventory *inv)
{
	int rc = basil_release(rsvn_id);
	/*
	 * If there are still any live application IDs (APIDs) associated with
	 * @rsvn_id, the RELEASE command will be without effect, since ALPS
	 * holds on to a reservation until all of its application IDs have
	 * disappeared.
	 * On normal termination, ALPS should clean up the APIDs by itself. In
	 * order to clean up orphaned reservations, try to terminate the APIDs
	 * manually using apkill(1). If this step fails, fall back to releasing
	 * the reservation normally and hope that ALPS resolves the situation.
	 * To prevent that any subsequent aprun lines get started while the
	 * apkill of the current one is still in progress, do the RELEASE first.
	 */
	basil_signal_apids(rsvn_id, SIGKILL, inv);
	return rc;
}