コード例 #1
0
ファイル: proctrack_cray.c プロジェクト: mrhaoji/slurm
/* NOTE: This function is called after slurmstepd spawns all user tasks.
 * Since the slurmstepd was placed in the job container when the container
 * was created and all of it's spawned tasks are placed into the container
 * when forked, all we need to do is remove the slurmstepd from the container
 * (once) at this time. */
int proctrack_p_plugin_add(stepd_step_rec_t *job, pid_t pid)
{
	if (job_attachpid(pid, job->cont_id) == (jid_t) -1)
		error("Failed to attach pid %d to job container: %m", pid);

	_end_container_thread();

	return SLURM_SUCCESS;
}
コード例 #2
0
extern int fini(void)
{
	_end_container_thread();

	/* free up some memory */
	slurm_mutex_destroy(&notify_mutex);
	pthread_cond_destroy(&notify);
	slurm_mutex_destroy(&thread_mutex);

	return SLURM_SUCCESS;
}
コード例 #3
0
int proctrack_p_signal(uint64_t id, int sig)
{
	if (!threadid) {
		if ((job_killjid((jid_t) id, sig) < 0)
		    && (errno != ENODATA) && (errno != EBADF) )
			return (SLURM_ERROR);
	} else if (sig == SIGKILL) {
		/* job ended before it started */
		_end_container_thread();
	} else
		error("Trying to send signal %d a container 0x%08lx "
		      "that hasn't had anything added to it yet", sig, id);
	return (SLURM_SUCCESS);
}
コード例 #4
0
ファイル: proctrack_cray.c プロジェクト: A1ve5/slurm
/* NOTE: This function is called after slurmstepd spawns all user tasks.
 * Since the slurmstepd was placed in the job container when the container
 * was created and all of it's spawned tasks are placed into the container
 * when forked, all we need to do is remove the slurmstepd from the container
 * (once) at this time. */
int proctrack_p_add(stepd_step_rec_t *job, pid_t pid)
{
#ifdef HAVE_NATIVE_CRAY
	char fname[64];
	int fd;
#endif
	DEF_TIMERS;
	START_TIMER;

	// Attach to the job container
	if (job_attachpid(pid, job->cont_id) == (jid_t) -1) {
		error("Failed to attach pid %d to job container: %m", pid);
		return SLURM_ERROR;
	}

	_end_container_thread();

#ifdef HAVE_NATIVE_CRAY
	// Set apid for this pid
	if (job_setapid(pid, SLURM_ID_HASH(job->jobid, job->stepid)) == -1) {
		error("Failed to set pid %d apid: %m", pid);
		return SLURM_ERROR;
	}

	// Explicitly mark pid as an application (/proc/<pid>/task_is_app)
	snprintf(fname, sizeof(fname), "/proc/%d/task_is_app", pid);
	fd = open(fname, O_WRONLY);
	if (fd == -1) {
		error("Failed to open %s: %m", fname);
		return SLURM_ERROR;
	}
	if (write(fd, "1", 1) < 1) {
		error("Failed to write to %s: %m", fname);
		TEMP_FAILURE_RETRY(close(fd));
		return SLURM_ERROR;
	}
	TEMP_FAILURE_RETRY(close(fd));
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return SLURM_SUCCESS;
}
コード例 #5
0
ファイル: proctrack_cray.c プロジェクト: A1ve5/slurm
int proctrack_p_signal(uint64_t id, int sig)
{
	DEF_TIMERS;
	START_TIMER;
	if (!threadid) {
		if ((job_killjid((jid_t) id, sig) < 0)
		    && (errno != ENODATA) && (errno != EBADF) )
			return (SLURM_ERROR);
	} else if (sig == SIGKILL) {
		/* job ended before it started */
		_end_container_thread();
	} else
		error("Trying to send signal %d a container 0x%08lx "
		      "that hasn't had anything added to it yet", sig, id);
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
	return (SLURM_SUCCESS);
}
コード例 #6
0
ファイル: proctrack_cray.c プロジェクト: supermanue/slurm
/* NOTE: This function is called after slurmstepd spawns all user tasks.
 * Since the slurmstepd was placed in the job container when the container
 * was created and all of it's spawned tasks are placed into the container
 * when forked, all we need to do is remove the slurmstepd from the container
 * (once) at this time. */
int proctrack_p_add(stepd_step_rec_t *job, pid_t pid)
{
#ifdef HAVE_NATIVE_CRAY
	char fname[64];
	int fd;
#endif
	int count = 0;

	DEF_TIMERS;
	START_TIMER;

try_again:
	// Attach to the job container
	if (job_attachpid(pid, job->cont_id) == (jid_t) -1) {
		if (errno == EINVAL && (count < 1)) {
			jid_t jid;
			if (proctrack_p_has_pid(job->cont_id, pid)) {
				debug("%s: Trying to add pid (%d) again to the same container, ignoring.",
				      __func__, pid);
				return SLURM_SUCCESS;
			}

			if ((jid = job_detachpid(pid)) != (jid_t) -1) {
				error("%s: Pid %d was attached to container %"PRIu64" incorrectly.  Moving to correct (%"PRIu64").",
				      __func__, pid, jid, job->cont_id);
				count++;
				goto try_again;
			} else {
				error("%s: Couldn't detach pid %d from container: %m",
				      __func__, pid);
				return SLURM_ERROR;
			}
		} else {
			error("Failed to attach pid %d to job container: %m",
			      pid);
			return SLURM_ERROR;
		}
	}
	_end_container_thread();

#ifdef HAVE_NATIVE_CRAY
	// Set apid for this pid
	if (job_setapid(pid, SLURM_ID_HASH(job->jobid, job->stepid)) == -1) {
		error("Failed to set pid %d apid: %m", pid);
		return SLURM_ERROR;
	}

	// Explicitly mark pid as an application (/proc/<pid>/task_is_app)
	snprintf(fname, sizeof(fname), "/proc/%d/task_is_app", pid);
	fd = open(fname, O_WRONLY);
	if (fd == -1) {
		error("Failed to open %s: %m", fname);
		return SLURM_ERROR;
	}
	if (write(fd, "1", 1) < 1) {
		error("Failed to write to %s: %m", fname);
		TEMP_FAILURE_RETRY(close(fd));
		return SLURM_ERROR;
	}
	TEMP_FAILURE_RETRY(close(fd));
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return SLURM_SUCCESS;
}