Beispiel #1
0
static int _handle_add_extern_pid_internal(stepd_step_rec_t *job, pid_t pid)
{
	pthread_t thread_id;
	pthread_attr_t attr;
	extern_pid_t *extern_pid;
	jobacct_id_t jobacct_id;
	int retries = 0, rc = SLURM_SUCCESS;

	if (job->stepid != SLURM_EXTERN_CONT) {
		error("%s: non-extern step (%u) given for job %u.",
		      __func__, job->stepid, job->jobid);
		return SLURM_FAILURE;
	}

	debug("%s: for job %u.%u, pid %d",
	      __func__, job->jobid, job->stepid, pid);

	extern_pid = xmalloc(sizeof(extern_pid_t));
	extern_pid->job = job;
	extern_pid->pid = pid;

	/* track pid: add outside of the below thread so that the pam module
	 * waits until the parent pid is added, before letting the parent spawn
	 * any children. */
	jobacct_id.taskid = job->nodeid;
	jobacct_id.nodeid = job->nodeid;
	jobacct_id.job = job;

	if (proctrack_g_add(job, pid) != SLURM_SUCCESS) {
		error("%s: Job %u can't add pid %d to proctrack plugin in the extern_step.", __func__, job->jobid, pid);
		return SLURM_FAILURE;
	}

	if (task_g_add_pid(pid) != SLURM_SUCCESS) {
		error("%s: Job %u can't add pid %d to task plugin in the extern_step.", __func__, job->jobid, pid);
		return SLURM_FAILURE;
	}

	if (jobacct_gather_add_task(pid, &jobacct_id, 1) != SLURM_SUCCESS) {
		error("%s: Job %u can't add pid %d to jobacct_gather plugin in the extern_step.", __func__, job->jobid, pid);
		return SLURM_FAILURE;
	}

	/* spawn a thread that will wait on the pid given */
	slurm_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	while (pthread_create(&thread_id, &attr,
			      &_wait_extern_pid, (void *) extern_pid)) {
		error("%s: pthread_create: %m", __func__);
		if (++retries > MAX_RETRIES) {
			error("%s: Can't create pthread", __func__);
			rc = SLURM_FAILURE;
			break;
		}
		usleep(10);	/* sleep and again */
	}
	slurm_attr_destroy(&attr);

	return rc;
}
Beispiel #2
0
static int
_handle_add_extern_pid(int fd, stepd_step_rec_t *job)
{
	int rc = SLURM_SUCCESS;
	pid_t pid;
	jobacct_id_t jobacct_id;

	safe_read(fd, &pid, sizeof(pid_t));

	if (job->stepid != SLURM_EXTERN_CONT) {
		error("_handle_add_extern_pid: non-extern step (%u) given for job %u.",
		      job->stepid, job->jobid);
		rc = SLURM_FAILURE;
		goto send_it;
	}

	debug("_handle_add_extern_pid for job %u.%u, pid %d",
	      job->jobid, job->stepid, pid);


	jobacct_id.taskid = job->nodeid;
	jobacct_id.nodeid = job->nodeid;
	jobacct_id.job = job;

	proctrack_g_add(job, pid);
	jobacct_gather_add_task(pid, &jobacct_id, 1);

send_it:
	/* Send the return code */
	safe_write(fd, &rc, sizeof(int));

	debug("Leaving _handle_add_extern_pid");
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}