コード例 #1
0
ファイル: job_container_cncu.c プロジェクト: cread/slurm
/* Add proctrack container (PAGG) to a job container */
extern int container_p_add_cont(uint32_t job_id, uint64_t cont_id)
{
#ifdef HAVE_NATIVE_CRAY
	jid_t cjob_id = cont_id;
	rid_t resv_id = job_id;
	int rc;
	DEF_TIMERS;
#endif

	if (debug_flags & DEBUG_FLAG_JOB_CONT) {
		info("%s: adding cont(%u.%"PRIu64")",
		     plugin_type, job_id, cont_id);
	}

#ifdef HAVE_NATIVE_CRAY
	START_TIMER;
	rc = job_attach_reservation(cjob_id, resv_id, ADD_FLAGS);
	if (debug_flags & DEBUG_FLAG_TIME_CRAY) {
		END_TIMER;
		INFO_LINE("call took: %s", TIME_STR);
	} else
		END_TIMER3("container_p_add_cont: job_attach_reservation took",
			   3000000);
	if ((rc != 0) && (errno == ENOENT)) {	/* Log and retry */
		if (debug_flags & DEBUG_FLAG_JOB_CONT)
			info("%s: add(%u.%"PRIu64"): No reservation found, "
			     "no big deal, this is probably the first time "
			     "this was called.  We will just create a new one.",
			     plugin_type, job_id, cont_id);
		START_TIMER;
		rc = job_create_reservation(resv_id, CREATE_FLAGS);
		rc = job_attach_reservation(cjob_id, resv_id, ADD_FLAGS);
		if (debug_flags & DEBUG_FLAG_TIME_CRAY) {
			END_TIMER;
			INFO_LINE("call took: %s", TIME_STR);
		} else
			END_TIMER3("container_p_add_cont: "
				   "job_(create&attach)_reservation took",
				   3000000);
	}

	if ((rc == 0) || (errno == EBUSY)) {
		if (rc) {
			/* EBUSY - job ID already attached to a reservation
			 * Duplicate adds can be generated by prolog/epilog */
			debug2("%s: add(%u.%"PRIu64"): %m",
			       plugin_type, job_id, cont_id);
		} else if (debug_flags & DEBUG_FLAG_JOB_CONT)
			_stat_reservation("add", resv_id);
		return SLURM_SUCCESS;
	}
	error("%s: add(%u.%"PRIu64"): %m", plugin_type, job_id, cont_id);
	return SLURM_ERROR;
#else
	return SLURM_SUCCESS;
#endif
}
コード例 #2
0
ファイル: job_container_cncu.c プロジェクト: cread/slurm
/* Add a process to a job container, create the proctrack container to add */
extern int container_p_add_pid(uint32_t job_id, pid_t pid, uid_t uid)
{
	stepd_step_rec_t job;
	int rc;
	DEF_TIMERS;

	START_TIMER;

	if (debug_flags & DEBUG_FLAG_JOB_CONT) {
		info("%s: adding pid(%u.%u)",
		     plugin_type, job_id, (uint32_t) pid);
	}
	memset(&job, 0, sizeof(stepd_step_rec_t));
	job.jmgr_pid = pid;
	job.uid = uid;
	if (proctrack_g_create(&job) != SLURM_SUCCESS) {
		error("%s: proctrack_g_create job(%u)", plugin_type,job_id);
		return SLURM_ERROR;
	}

	proctrack_g_add(&job, pid);

	rc = container_p_add_cont(job_id, job.cont_id);

	if (debug_flags & DEBUG_FLAG_TIME_CRAY) {
		END_TIMER;
		INFO_LINE("call took: %s", TIME_STR);
	}

	return rc;
}
コード例 #3
0
/*
 * task_p_pre_launch() is called prior to exec of application task.
 *	It is followed by TaskProlog program (from slurm.conf) and
 *	--task-prolog (from srun command line).
 */
extern int task_p_pre_launch (stepd_step_rec_t *job)
{
#ifdef HAVE_NATIVE_CRAY
	int rc;
	uint64_t apid;
	DEF_TIMERS;

	START_TIMER;
	apid = SLURM_ID_HASH(job->jobid, job->stepid);
	debug2("task_p_pre_launch: %u.%u, apid %"PRIu64", task %d",
	       job->jobid, job->stepid, apid, job->envtp->procid);

	/*
	 * Send the rank to the application's PMI layer via an environment
	 * variable.
	 */
	rc = env_array_overwrite_fmt(&job->env, ALPS_APP_PE_ENV,
				     "%d", job->envtp->procid);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", ALPS_APP_PE_ENV);
		return SLURM_ERROR;
	}

	/*
	 * Set the PMI_NO_FORK environment variable.
	 */
	rc = env_array_overwrite(&job->env, PMI_NO_FORK_ENV, "1");
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", PMI_NO_FORK_ENV);
		return SLURM_ERROR;
	}

	/*
	 *  Notify the task which offset to use
	 */
	rc = env_array_overwrite_fmt(&job->env, LLI_STATUS_OFFS_ENV,
				     "%d", job->envtp->localid + 1);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s",
			 LLI_STATUS_OFFS_ENV);
		return SLURM_ERROR;
	}

	/*
	 * Set the ALPS_APP_ID environment variable for use by
	 * Cray tools.
	 */
	rc = env_array_overwrite_fmt(&job->env, ALPS_APP_ID_ENV, "%"PRIu64,
				     apid);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s",
			 ALPS_APP_ID_ENV);
	}
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
#endif
	return SLURM_SUCCESS;
}
コード例 #4
0
ファイル: proctrack_cray.c プロジェクト: A1ve5/slurm
extern int proctrack_p_create(stepd_step_rec_t *job)
{
	pthread_attr_t attr;
	DEF_TIMERS;
	START_TIMER;

	if (!libjob_handle)
		init();

	if (!job->cont_id) {
		/* Since the cray job lib will create the container
		   off the process calling job_create we don't want to
		   call it from the main process since it will include
		   all the threads the main process spawns and there
		   is no way to safely track which pids need to be
		   removed when removing the parent.  It turns out
		   spawning a thread will make the job_create create
		   the container off that process instead of the main
		   process.  Once we have added a process we can end
		   the thread which will remove the pid from the
		   container automatically.  Empty containers are not
		   valid.
		*/
		slurm_mutex_lock(&thread_mutex);
		if (threadid) {
			debug("Had a thread already 0x%08lx", threadid);
			slurm_mutex_lock(&notify_mutex);
			slurm_cond_wait(&notify, &notify_mutex);
			slurm_mutex_unlock(&notify_mutex);
			debug("Last thread done 0x%08lx", threadid);
		}
		/* We have to lock the notify_mutex here since the
		   thread could possibly signal things before we
		   started waiting for it.

		*/
		slurm_mutex_lock(&notify_mutex);
		pthread_attr_init(&attr);
		pthread_create(&threadid, &attr, _create_container_thread, job);
		slurm_cond_wait(&notify, &notify_mutex);
		slurm_mutex_unlock(&notify_mutex);
		slurm_mutex_unlock(&thread_mutex);
		if (job->cont_id != (jid_t)-1)
			debug("proctrack_p_create: created jid "
			      "0x%08lx thread 0x%08lx",
			      job->cont_id, threadid);
	} else
		error("proctrack_p_create: already have a cont_id");

	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return SLURM_SUCCESS;
}
コード例 #5
0
ファイル: proctrack_cray.c プロジェクト: A1ve5/slurm
uint64_t proctrack_p_find(pid_t pid)
{
	jid_t jid;
	DEF_TIMERS;
	START_TIMER;

	if ((jid = job_getjid(pid)) == (jid_t) -1)
		return ((uint64_t) 0);
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return ((uint64_t) jid);
}
コード例 #6
0
/*
 * task_p_slurmd_resume_job()
 */
extern int task_p_slurmd_resume_job (uint32_t job_id)
{
	DEF_TIMERS;
	START_TIMER;
	debug("task_p_slurmd_resume_job: %u", job_id);

#ifdef HAVE_NATIVE_CRAY
	_step_prologue();
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return SLURM_SUCCESS;
}
コード例 #7
0
ファイル: proctrack_cray.c プロジェクト: A1ve5/slurm
int proctrack_p_get_pids(uint64_t cont_id, pid_t **pids, int *npids)
{
	int pidcnt, bufsize;
	pid_t *p;
	DEF_TIMERS;
	START_TIMER;

	pidcnt = job_getpidcnt((jid_t)cont_id);
	if (pidcnt > 0) {
		/*
		 * FIXME - The "+ 128" is a rough attempt to allow for
		 * the fact that _job_getpidcnt() followed by _job_get_pidlist
		 * is not atomic.
		 */
		bufsize = sizeof(pid_t) * (pidcnt + 128);
		p = (pid_t *)xmalloc(bufsize);
		pidcnt = job_getpidlist((jid_t)cont_id, p, bufsize);
		if (pidcnt == -1) {
			int rc = SLURM_SUCCESS;
			/* There is a possiblity for a race condition
			   where if the last task in the job exits
			   between job_getpidcnt and job_getpidlist.
			   That is ok, so just return SUCCESS;
			*/
			if (errno != ENODATA) {
				rc = SLURM_ERROR;
				error("job_getpidlist() failed: %m");
			}

			*pids = NULL;
			*npids = 0;
			xfree(p);

			return rc;
		}
		*pids = p;
		*npids = pidcnt;
	} else {
		*pids = NULL;
		*npids = 0;
	}
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return SLURM_SUCCESS;
}
コード例 #8
0
/*
 * task_p_pre_setuid() is called before setting the UID for the
 * user to launch his jobs. Use this to create the CPUSET directory
 * and set the owner appropriately.
 */
extern int task_p_pre_setuid (stepd_step_rec_t *job)
{
	DEF_TIMERS;
	START_TIMER;
	debug("task_p_pre_setuid: %u.%u",
	      job->jobid, job->stepid);

#ifdef HAVE_NATIVE_CRAY
	if (!job->batch)
		_step_prologue();
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return SLURM_SUCCESS;
}
コード例 #9
0
ファイル: proctrack_cray.c プロジェクト: A1ve5/slurm
/* NOTE: This function is called after slurmstepd spawns all user tasks.
 * Since the slurmstepd was placed in the job container when the container
 * was created and all of it's spawned tasks are placed into the container
 * when forked, all we need to do is remove the slurmstepd from the container
 * (once) at this time. */
int proctrack_p_add(stepd_step_rec_t *job, pid_t pid)
{
#ifdef HAVE_NATIVE_CRAY
	char fname[64];
	int fd;
#endif
	DEF_TIMERS;
	START_TIMER;

	// Attach to the job container
	if (job_attachpid(pid, job->cont_id) == (jid_t) -1) {
		error("Failed to attach pid %d to job container: %m", pid);
		return SLURM_ERROR;
	}

	_end_container_thread();

#ifdef HAVE_NATIVE_CRAY
	// Set apid for this pid
	if (job_setapid(pid, SLURM_ID_HASH(job->jobid, job->stepid)) == -1) {
		error("Failed to set pid %d apid: %m", pid);
		return SLURM_ERROR;
	}

	// Explicitly mark pid as an application (/proc/<pid>/task_is_app)
	snprintf(fname, sizeof(fname), "/proc/%d/task_is_app", pid);
	fd = open(fname, O_WRONLY);
	if (fd == -1) {
		error("Failed to open %s: %m", fname);
		return SLURM_ERROR;
	}
	if (write(fd, "1", 1) < 1) {
		error("Failed to write to %s: %m", fname);
		TEMP_FAILURE_RETRY(close(fd));
		return SLURM_ERROR;
	}
	TEMP_FAILURE_RETRY(close(fd));
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return SLURM_SUCCESS;
}
コード例 #10
0
ファイル: proctrack_cray.c プロジェクト: A1ve5/slurm
int proctrack_p_destroy(uint64_t id)
{
	int status;
	DEF_TIMERS;
	START_TIMER;

	debug("destroying 0x%08lx 0x%08lx", id, threadid);

	if (!threadid)
		job_waitjid((jid_t) id, &status, 0);

	/*  Assume any error means job doesn't exist. Therefore,
	 *   return SUCCESS to slurmd so it doesn't retry continuously
	 */
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
	return SLURM_SUCCESS;
}
コード例 #11
0
ファイル: proctrack_cray.c プロジェクト: A1ve5/slurm
int proctrack_p_signal(uint64_t id, int sig)
{
	DEF_TIMERS;
	START_TIMER;
	if (!threadid) {
		if ((job_killjid((jid_t) id, sig) < 0)
		    && (errno != ENODATA) && (errno != EBADF) )
			return (SLURM_ERROR);
	} else if (sig == SIGKILL) {
		/* job ended before it started */
		_end_container_thread();
	} else
		error("Trying to send signal %d a container 0x%08lx "
		      "that hasn't had anything added to it yet", sig, id);
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
	return (SLURM_SUCCESS);
}
コード例 #12
0
ファイル: job_container_cncu.c プロジェクト: cread/slurm
extern int container_p_delete(uint32_t job_id)
{
#ifdef HAVE_NATIVE_CRAY
	rid_t resv_id = job_id;
	DEF_TIMERS;
#endif
	int rc = 0;
	int i, found = -1;
	bool job_id_change = false;

	if (debug_flags & DEBUG_FLAG_JOB_CONT)
		info("%s: deleting(%u)", plugin_type, job_id);
	slurm_mutex_lock(&context_lock);
	for (i = 0; i < job_id_count; i++) {
		if (job_id_array[i] == job_id) {
			job_id_array[i] = 0;
			job_id_change = true;
			found = i;
		}
	}
	if (found == -1)
		info("%s: no job for delete(%u)", plugin_type, job_id);
	if (job_id_change)
		_save_state(state_dir);
	slurm_mutex_unlock(&context_lock);
#ifdef HAVE_NATIVE_CRAY
	START_TIMER;
	rc = job_end_reservation(resv_id, DELETE_FLAGS);
	if (debug_flags & DEBUG_FLAG_TIME_CRAY) {
		END_TIMER;
		INFO_LINE("call took: %s", TIME_STR);
	} else
		END_TIMER3("container_p_delete: job_end_reservation took",
			   3000000);
#endif
	if (rc == 0)
		return SLURM_SUCCESS;

	if ((errno == ENOENT) || (errno == EINPROGRESS) || (errno == EALREADY))
		return SLURM_SUCCESS;	/* Not fatal error */
	error("%s: delete(%u): %m", plugin_type, job_id);
	return SLURM_ERROR;
}
コード例 #13
0
ファイル: job_container_cncu.c プロジェクト: cread/slurm
static void _stat_reservation(char *type, rid_t resv_id)
{
	struct job_resv_stat buf;
	DEF_TIMERS;

	START_TIMER;

	if (job_stat_reservation(resv_id, &buf)) {
		error("%s: stat(%"PRIu64"): %m", plugin_type, resv_id);
	} else {
		info("%s: %s/stat(%"PRIu64"): flags=%d "
		     "num_jobs=%d num_files=%d num_ipc_objs=%d",
		     plugin_type, type, resv_id, buf.flags, buf.num_jobs,
		     buf.num_files, buf.num_ipc_objs);
	}
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
}
コード例 #14
0
/*
 * task_p_pre_launch_priv() is called prior to exec of application task.
 * in privileged mode, just after slurm_spank_task_init_privileged
 */
extern int task_p_pre_launch_priv (stepd_step_rec_t *job)
{
	int rc = SLURM_SUCCESS;
	DEF_TIMERS;

	START_TIMER;

#ifdef HAVE_NATIVE_CRAY
	debug("task_p_pre_launch_priv: %u.%u",
	      job->jobid, job->stepid);

	if (track_status) {
		rc = _make_status_file(job);
	}
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
	return rc;
}
コード例 #15
0
/*
 * task_term() is called after termination of application task.
 *	It is preceded by --task-epilog (from srun command line)
 *	followed by TaskEpilog program (from slurm.conf).
 */
extern int task_p_post_term (stepd_step_rec_t *job,
			     stepd_step_task_info_t *task)
{
	int rc = SLURM_SUCCESS;
	DEF_TIMERS;

	START_TIMER;

#ifdef HAVE_NATIVE_CRAY
	debug("task_p_post_term: %u.%u, task %d",
	      job->jobid, job->stepid, job->envtp->procid);

	if (track_status) {
		rc = _check_status_file(job, task);
	}
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
	return rc;
}
コード例 #16
0
ファイル: proctrack_cray.c プロジェクト: supermanue/slurm
/* NOTE: This function is called after slurmstepd spawns all user tasks.
 * Since the slurmstepd was placed in the job container when the container
 * was created and all of it's spawned tasks are placed into the container
 * when forked, all we need to do is remove the slurmstepd from the container
 * (once) at this time. */
int proctrack_p_add(stepd_step_rec_t *job, pid_t pid)
{
#ifdef HAVE_NATIVE_CRAY
	char fname[64];
	int fd;
#endif
	int count = 0;

	DEF_TIMERS;
	START_TIMER;

try_again:
	// Attach to the job container
	if (job_attachpid(pid, job->cont_id) == (jid_t) -1) {
		if (errno == EINVAL && (count < 1)) {
			jid_t jid;
			if (proctrack_p_has_pid(job->cont_id, pid)) {
				debug("%s: Trying to add pid (%d) again to the same container, ignoring.",
				      __func__, pid);
				return SLURM_SUCCESS;
			}

			if ((jid = job_detachpid(pid)) != (jid_t) -1) {
				error("%s: Pid %d was attached to container %"PRIu64" incorrectly.  Moving to correct (%"PRIu64").",
				      __func__, pid, jid, job->cont_id);
				count++;
				goto try_again;
			} else {
				error("%s: Couldn't detach pid %d from container: %m",
				      __func__, pid);
				return SLURM_ERROR;
			}
		} else {
			error("Failed to attach pid %d to job container: %m",
			      pid);
			return SLURM_ERROR;
		}
	}
	_end_container_thread();

#ifdef HAVE_NATIVE_CRAY
	// Set apid for this pid
	if (job_setapid(pid, SLURM_ID_HASH(job->jobid, job->stepid)) == -1) {
		error("Failed to set pid %d apid: %m", pid);
		return SLURM_ERROR;
	}

	// Explicitly mark pid as an application (/proc/<pid>/task_is_app)
	snprintf(fname, sizeof(fname), "/proc/%d/task_is_app", pid);
	fd = open(fname, O_WRONLY);
	if (fd == -1) {
		error("Failed to open %s: %m", fname);
		return SLURM_ERROR;
	}
	if (write(fd, "1", 1) < 1) {
		error("Failed to write to %s: %m", fname);
		TEMP_FAILURE_RETRY(close(fd));
		return SLURM_ERROR;
	}
	TEMP_FAILURE_RETRY(close(fd));
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	return SLURM_SUCCESS;
}
コード例 #17
0
ファイル: core_spec_cray.c プロジェクト: chrisdukey/slurm
/*
 * Set the count of specialized cores at job start
 *
 * Return SLURM_SUCCESS on success
 */
extern int core_spec_p_set(uint64_t cont_id, uint16_t core_count)
{
	DEF_TIMERS;
	START_TIMER;
#if _DEBUG
	char *spec_type;
	int spec_count;
	if (core_count == NO_VAL16) {
		spec_type  = "Cores";
		spec_count = 0;
	} else if (core_count & CORE_SPEC_THREAD) {
		spec_type  = "Threads";
		spec_count = core_count & (~CORE_SPEC_THREAD);
	} else {
		spec_type  = "Cores";
		spec_count = core_count;
	}
	info("core_spec_p_set(%"PRIu64") to %d %s",
	     cont_id, spec_count, spec_type);
#endif

#ifdef HAVE_NATIVE_CRAY
	int rc;
	struct job_set_affinity_info affinity_info;
	pid_t pid;
	int i;

	// Skip core spec setup for no specialized cores
	if ((core_count == NO_VAL16) ||
	    (core_count == CORE_SPEC_THREAD)) {
		return SLURM_SUCCESS;
	}
	core_count &= (~CORE_SPEC_THREAD);

	// Set the core spec information
	// Retry because there's a small timing window during preemption
	// when two core spec jobs can be running at once.
	for (i = 0; i < CORE_SPEC_RETRIES; i++) {
		if (i) {
			sleep(1);
		}

		errno = 0;
		rc = job_set_corespec(cont_id, core_count, NULL);
		if (rc == 0 || errno != EINVAL) {
			break;
		}
	}
	if (rc != 0) {
		debug("job_set_corespec(%"PRIu64", %"PRIu16") failed: %m",
		      cont_id, core_count);
		return SLURM_ERROR;
	}

	// Get a pid in the job to use with job_set_affinity
	pid = job_getprimepid(cont_id);
	if (pid < 0) {
		error("job_getprimepid(%"PRIu64") returned %d: %m",
		      cont_id, (int)pid);
		return SLURM_ERROR;
	}

	// Apply the core specialization with job_set_affinity
	// JOB_AFFINITY_NONE tells the kernel to not alter the process'
	// affinity unless required (the process is only allowed to run
	// on cores that will be specialized).
	memset(&affinity_info, 0, sizeof(struct job_set_affinity_info));
	affinity_info.cpu_list = JOB_AFFINITY_NONE;
	rc = job_set_affinity(cont_id, pid, &affinity_info);
	if (rc != 0) {
		if (affinity_info.message != NULL) {
			error("job_set_affinity(%"PRIu64", %zu) failed %s: %m",
			      cont_id, (size_t)pid, affinity_info.message);
			free(affinity_info.message);
		} else {
			error("job_set_affinity(%"PRIu64", %zu) failed: %m",
			      cont_id, (size_t)pid);
		}
		return SLURM_ERROR;
	} else if (affinity_info.message != NULL) {
		info("job_set_affinity(%"PRIu64", %zu): %s",
		     cont_id, (size_t)pid, affinity_info.message);
		free(affinity_info.message);
	}
#endif
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);

	// The code that was here is now performed by
	// switch_p_job_step_{pre,post}_suspend()
	return SLURM_SUCCESS;
}
コード例 #18
0
ファイル: job_container_cncu.c プロジェクト: cread/slurm
extern int container_p_create(uint32_t job_id)
{
#ifdef HAVE_NATIVE_CRAY
	rid_t resv_id = job_id;
	int rc;
#endif
	int i, empty = -1, found = -1;
	DEF_TIMERS;

	START_TIMER;
	if (debug_flags & DEBUG_FLAG_JOB_CONT)
		info("%s: creating(%u)", plugin_type, job_id);
	slurm_mutex_lock(&context_lock);
	for (i = 0; i < job_id_count; i++) {
		if (job_id_array[i] == 0) {
			empty = i;
		} else if (job_id_array[i] == job_id) {
			found = i;
			break;
		}
	}
	if (found == -1) {
		if (empty == -1) {
			empty = job_id_count;
			job_id_count += 4;
			job_id_array = xrealloc(job_id_array,
						sizeof(uint32_t)*job_id_count);
		}
		job_id_array[empty] = job_id;
		_save_state(state_dir);
	}
	slurm_mutex_unlock(&context_lock);

	if (debug_flags & DEBUG_FLAG_TIME_CRAY) {
		END_TIMER;
		INFO_LINE("call took: %s", TIME_STR);
	} else {
		END_TIMER3("container_p_create: saving state took", 3000000);
	}
#ifdef HAVE_NATIVE_CRAY
	START_TIMER;
	rc = job_create_reservation(resv_id, CREATE_FLAGS);
	if (debug_flags & DEBUG_FLAG_TIME_CRAY) {
		END_TIMER;
		INFO_LINE("call took: %s", TIME_STR);
	} else
		END_TIMER3("container_p_create: job_create_reservation took",
			   3000000);
	if ((rc == 0) || (errno == EEXIST)) {
		if ((found == -1) && (rc != 0) && (errno == EEXIST)) {
			error("%s: create(%u): Reservation already exists",
			      plugin_type, job_id);
		}
		if (debug_flags & DEBUG_FLAG_JOB_CONT)
			_stat_reservation("create", resv_id);
		return SLURM_SUCCESS;
	}
	error("%s: create(%u): %m", plugin_type, job_id);
	return SLURM_ERROR;
#else
	return SLURM_SUCCESS;
#endif
}
コード例 #19
0
/*
 * task_p_post_step() is called after termination of the step
 * (all the tasks)
 */
extern int task_p_post_step (stepd_step_rec_t *job)
{
#ifdef HAVE_NATIVE_CRAY
	char llifile[LLI_STATUS_FILE_BUF_SIZE];
	int rc, cnt;
	char *err_msg = NULL, path[PATH_MAX];
	int32_t *numa_nodes;
	cpu_set_t *cpuMasks;
	DEF_TIMERS;

	START_TIMER;

	if (track_status) {
		// Get the lli file name
		snprintf(llifile, sizeof(llifile), LLI_STATUS_FILE,
			 SLURM_ID_HASH(job->jobid, job->stepid));

		// Unlink the file
		errno = 0;
		rc = unlink(llifile);
		if (rc == -1 && errno != ENOENT) {
			CRAY_ERR("unlink(%s) failed: %m", llifile);
		} else if (rc == 0) {
			info("Unlinked %s", llifile);
		}
	}

	/*
	 * Compact Memory
	 *
	 * Determine which NUMA nodes and CPUS an application is using.  It will
	 * be used to compact the memory.
	 *
	 * You'll find the information in the following location.
	 * For a normal job step:
	 * /dev/cpuset/slurm/uid_<uid>/job_<jobID>/step_<stepID>/
	 *
	 * For a batch job step (only on the head node and only for batch jobs):
	 * /dev/cpuset/slurm/uid_<uid>/job_<jobID>/step_batch/
	 *
	 * NUMA node: mems
	 * CPU Masks: cpus
	 */
	if (job->stepid == SLURM_BATCH_SCRIPT) {
		// Batch Job Step
		rc = snprintf(path, sizeof(path),
			      "/dev/cpuset/slurm/uid_%d/job_%"
			      PRIu32 "/step_batch", job->uid, job->jobid);
		if (rc < 0) {
			CRAY_ERR("snprintf failed. Return code: %d", rc);
			return SLURM_ERROR;
		}
	} else if (job->stepid == SLURM_EXTERN_CONT) {
		// Container for PAM to use for externally launched processes
		rc = snprintf(path, sizeof(path),
			      "/dev/cpuset/slurm/uid_%d/job_%"
			      PRIu32 "/step_extern", job->uid, job->jobid);
		if (rc < 0) {
			CRAY_ERR("snprintf failed. Return code: %d", rc);
			return SLURM_ERROR;
		}
	} else {
		// Normal Job Step

		/* Only run epilogue on non-batch steps */
		_step_epilogue();

		rc = snprintf(path, sizeof(path),
			      "/dev/cpuset/slurm/uid_%d/job_%"
			      PRIu32 "/step_%" PRIu32,
			      job->uid, job->jobid, job->stepid);
		if (rc < 0) {
			CRAY_ERR("snprintf failed. Return code: %d", rc);
			return SLURM_ERROR;
		}
	}

	rc = _get_numa_nodes(path, &cnt, &numa_nodes);
	if (rc < 0) {
		CRAY_ERR("get_numa_nodes failed. Return code: %d", rc);
		return SLURM_ERROR;
	}

	rc = _get_cpu_masks(cnt, numa_nodes, &cpuMasks);
	if (rc < 0) {
		CRAY_ERR("get_cpu_masks failed. Return code: %d", rc);
		return SLURM_ERROR;
	}

	/*
	 * Compact Memory
	 * The last argument which is a path to the cpuset directory has to be
	 * NULL because the CPUSET directory has already been cleaned up.
	 */
	rc = alpsc_compact_mem(&err_msg, cnt, numa_nodes, cpuMasks, NULL);
	_ALPSC_DEBUG("alpsc_compact_mem");

	xfree(numa_nodes);
	xfree(cpuMasks);

	if (rc != 1) {
		return SLURM_ERROR;
	}
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
#endif
	return SLURM_SUCCESS;
}