Example #1
0
/*
 * Set the count of specialized cores at job start
 *
 * Return SLURM_SUCCESS on success
 */
extern int core_spec_p_set(uint64_t cont_id, uint16_t core_count)
{
#if _DEBUG
	info("core_spec_p_set(%"PRIu64") to %u", cont_id, core_count);
#endif

#ifdef HAVE_NATIVE_CRAY
	int rc;
	struct job_set_affinity_info affinity_info;
	pid_t pid;
	int i;

	// Skip core spec setup for no specialized cores
	if ((core_count == (uint16_t) NO_VAL) || (core_count < 1)) {
		return SLURM_SUCCESS;
	}

	// Set the core spec information
	// Retry because there's a small timing window during preemption
	// when two core spec jobs can be running at once.
	for (i = 0; i < CORE_SPEC_RETRIES; i++) {
		if (i) {
			sleep(1);
		}

		errno = 0;
		rc = job_set_corespec(cont_id, core_count, NULL);
		if (rc == 0 || errno != EINVAL) {
			break;
		}
	}
	if (rc != 0) {
		error("job_set_corespec(%"PRIu64", %"PRIu16") failed: %m",
		      cont_id, core_count);
		return SLURM_ERROR;
	}

	pid = getpid();

	// Slurm detaches the slurmstepd from the job, so we temporarily
	// reattach so the job_set_affinity doesn't mess up one of the
	// task's affinity settings
	if (job_attachpid(pid, cont_id) == (jid_t)-1) {
		error("job_attachpid(%zu, %"PRIu64") failed: %m",
		      (size_t)pid, cont_id);
		return SLURM_ERROR;
	}

	// Apply the core specialization with job_set_affinity
	// Use NONE for the cpu list because Slurm handles its
	// own task->cpu binding
	memset(&affinity_info, 0, sizeof(struct job_set_affinity_info));
	affinity_info.cpu_list = JOB_AFFINITY_NONE;
	rc = job_set_affinity(cont_id, pid, &affinity_info);
	if (rc != 0) {
		if (affinity_info.message != NULL) {
			error("job_set_affinity(%"PRIu64", %zu) failed %s: %m",
			      cont_id, (size_t)pid, affinity_info.message);
			free(affinity_info.message);
		} else {
			error("job_set_affinity(%"PRIu64", %zu) failed: %m",
			      cont_id, (size_t)pid);
		}
		job_detachpid(pid);
		return SLURM_ERROR;
	} else if (affinity_info.message != NULL) {
		info("job_set_affinity(%"PRIu64", %zu): %s",
		     cont_id, (size_t)pid, affinity_info.message);
		free(affinity_info.message);
	}
	job_detachpid(pid);
#endif
	// The code that was here is now performed by
	// switch_p_job_step_{pre,post}_suspend()
	return SLURM_SUCCESS;
}
Example #2
0
/* NOTE: This function is called after slurmstepd spawns all user tasks.
 * Since the slurmstepd was placed in the job container when the container
 * was created and all of it's spawned tasks are placed into the container
 * when forked, all we need to do is remove the slurmstepd from the container
 * (once) at this time. */
int proctrack_p_add(stepd_step_rec_t *job, pid_t pid)
{
#ifdef HAVE_NATIVE_CRAY
	char fname[64];
	int fd;
#endif
	int count = 0;

	DEF_TIMERS;
	START_TIMER;

try_again:
	// Attach to the job container
	if (job_attachpid(pid, job->cont_id) == (jid_t) -1) {
		if (errno == EINVAL && (count < 1)) {
			jid_t jid;
			if (proctrack_p_has_pid(job->cont_id, pid)) {
				debug("%s: Trying to add pid (%d) again to the same container, ignoring.",
				      __func__, pid);
				return SLURM_SUCCESS;
			}

			if ((jid = job_detachpid(pid)) != (jid_t) -1) {
				error("%s: Pid %d was attached to container %"PRIu64" incorrectly.  Moving to correct (%"PRIu64").",
				      __func__, pid, jid, job->cont_id);
				count++;
				goto try_again;
			} else {
				error("%s: Couldn't detach pid %d from container: %m",
				      __func__, pid);
				return SLURM_ERROR;
			}
		} else {
			error("Failed to attach pid %d to job container: %m",
			      pid);
			return SLURM_ERROR;
		}
	}
	_end_container_thread();

#ifdef HAVE_NATIVE_CRAY
	// Set apid for this pid
	if (job_setapid(pid, SLURM_ID_HASH(job->jobid, job->stepid)) == -1) {
		error("Failed to set pid %d apid: %m", pid);
		return SLURM_ERROR;
	}

	// Explicitly mark pid as an application (/proc/<pid>/task_is_app)
	snprintf(fname, sizeof(fname), "/proc/%d/task_is_app", pid);
	fd = open(fname, O_WRONLY);
	if (fd == -1) {
		error("Failed to open %s: %m", fname);
		return SLURM_ERROR;
	}
	if (write(fd, "1", 1) < 1) {
		error("Failed to write to %s: %m", fname);
		TEMP_FAILURE_RETRY(close(fd));
		return SLURM_ERROR;
	}
	TEMP_FAILURE_RETRY(close(fd));
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return SLURM_SUCCESS;
}