static void _handle_msg(slurm_msg_t *msg) { static uint32_t slurm_uid = NO_VAL; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); uid_t uid = getuid(); job_step_kill_msg_t *ss; srun_user_msg_t *um; if (slurm_uid == NO_VAL) slurm_uid = slurm_get_slurm_user_id(); if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); return; } switch (msg->msg_type) { case SRUN_PING: debug3("slurmctld ping received"); slurm_send_rc_msg(msg, SLURM_SUCCESS); slurm_free_srun_ping_msg(msg->data); break; case SRUN_JOB_COMPLETE: debug("received job step complete message"); _handle_step_complete(msg->data); slurm_free_srun_job_complete_msg(msg->data); break; case SRUN_USER_MSG: um = msg->data; info("%s", um->msg); slurm_free_srun_user_msg(msg->data); break; case SRUN_TIMEOUT: debug2("received job step timeout message"); _handle_timeout(msg->data); slurm_free_srun_timeout_msg(msg->data); break; case SRUN_STEP_SIGNAL: ss = msg->data; debug("received step signal %u RPC", ss->signal); if (ss->signal) launch_p_fwd_signal(ss->signal); slurm_free_job_step_kill_msg(msg->data); break; default: debug("received spurious message type: %u", msg->msg_type); break; } return; }
static void _handle_timeout(srun_timeout_msg_t *timeout_msg) { time_t now = time(NULL); char time_str[24]; if (now < timeout_msg->timeout) { slurm_make_time_str(&timeout_msg->timeout, time_str, sizeof(time_str)); debug("step %u.%u will timeout at %s", timeout_msg->job_id, timeout_msg->step_id, time_str); return; } slurm_make_time_str(&now, time_str, sizeof(time_str)); error("*** STEP %u.%u CANCELLED AT %s DUE TO TIME LIMIT ***", timeout_msg->job_id, timeout_msg->step_id, time_str); launch_p_fwd_signal(SIGKILL); return; }
static void _handle_timeout(srun_timeout_msg_t *timeout_msg) { time_t now = time(NULL); char time_str[24]; /* It turns out if we wait for this to happen it will never happen if srun is the caller without being in an allocation. So just exit instead of wait. */ /* if (now < timeout_msg->timeout) { */ /* slurm_make_time_str(&timeout_msg->timeout, */ /* time_str, sizeof(time_str)); */ /* debug("step %u.%u will timeout at %s", */ /* timeout_msg->job_id, timeout_msg->step_id, time_str); */ /* return; */ /* } */ slurm_make_time_str(&now, time_str, sizeof(time_str)); error("*** STEP %u.%u CANCELLED AT %s DUE TO TIME LIMIT ***", timeout_msg->job_id, timeout_msg->step_id, time_str); launch_p_fwd_signal(SIGKILL); return; }
static void _handle_step_complete(srun_job_complete_msg_t *comp_msg) { launch_p_fwd_signal(SIGKILL); return; }