static int _handle_add_extern_pid_internal(stepd_step_rec_t *job, pid_t pid) { pthread_t thread_id; pthread_attr_t attr; extern_pid_t *extern_pid; jobacct_id_t jobacct_id; int retries = 0, rc = SLURM_SUCCESS; if (job->stepid != SLURM_EXTERN_CONT) { error("%s: non-extern step (%u) given for job %u.", __func__, job->stepid, job->jobid); return SLURM_FAILURE; } debug("%s: for job %u.%u, pid %d", __func__, job->jobid, job->stepid, pid); extern_pid = xmalloc(sizeof(extern_pid_t)); extern_pid->job = job; extern_pid->pid = pid; /* track pid: add outside of the below thread so that the pam module * waits until the parent pid is added, before letting the parent spawn * any children. */ jobacct_id.taskid = job->nodeid; jobacct_id.nodeid = job->nodeid; jobacct_id.job = job; if (proctrack_g_add(job, pid) != SLURM_SUCCESS) { error("%s: Job %u can't add pid %d to proctrack plugin in the extern_step.", __func__, job->jobid, pid); return SLURM_FAILURE; } if (task_g_add_pid(pid) != SLURM_SUCCESS) { error("%s: Job %u can't add pid %d to task plugin in the extern_step.", __func__, job->jobid, pid); return SLURM_FAILURE; } if (jobacct_gather_add_task(pid, &jobacct_id, 1) != SLURM_SUCCESS) { error("%s: Job %u can't add pid %d to jobacct_gather plugin in the extern_step.", __func__, job->jobid, pid); return SLURM_FAILURE; } /* spawn a thread that will wait on the pid given */ slurm_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); while (pthread_create(&thread_id, &attr, &_wait_extern_pid, (void *) extern_pid)) { error("%s: pthread_create: %m", __func__); if (++retries > MAX_RETRIES) { error("%s: Can't create pthread", __func__); rc = SLURM_FAILURE; break; } usleep(10); /* sleep and again */ } slurm_attr_destroy(&attr); return rc; }
/* Add a process to a job container, create the proctrack container to add */ extern int container_p_add_pid(uint32_t job_id, pid_t pid, uid_t uid) { stepd_step_rec_t job; int rc; DEF_TIMERS; START_TIMER; if (debug_flags & DEBUG_FLAG_JOB_CONT) { info("%s: adding pid(%u.%u)", plugin_type, job_id, (uint32_t) pid); } memset(&job, 0, sizeof(stepd_step_rec_t)); job.jmgr_pid = pid; job.uid = uid; if (proctrack_g_create(&job) != SLURM_SUCCESS) { error("%s: proctrack_g_create job(%u)", plugin_type,job_id); return SLURM_ERROR; } proctrack_g_add(&job, pid); rc = container_p_add_cont(job_id, job.cont_id); if (debug_flags & DEBUG_FLAG_TIME_CRAY) { END_TIMER; INFO_LINE("call took: %s", TIME_STR); } return rc; }
static int _handle_add_extern_pid(int fd, stepd_step_rec_t *job) { int rc = SLURM_SUCCESS; pid_t pid; jobacct_id_t jobacct_id; safe_read(fd, &pid, sizeof(pid_t)); if (job->stepid != SLURM_EXTERN_CONT) { error("_handle_add_extern_pid: non-extern step (%u) given for job %u.", job->stepid, job->jobid); rc = SLURM_FAILURE; goto send_it; } debug("_handle_add_extern_pid for job %u.%u, pid %d", job->jobid, job->stepid, pid); jobacct_id.taskid = job->nodeid; jobacct_id.nodeid = job->nodeid; jobacct_id.job = job; proctrack_g_add(job, pid); jobacct_gather_add_task(pid, &jobacct_id, 1); send_it: /* Send the return code */ safe_write(fd, &rc, sizeof(int)); debug("Leaving _handle_add_extern_pid"); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }