示例#1
0
文件: req.c 项目: VURM/slurm
static int
_handle_reconfig(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;

	if (!_slurm_authorized_user(uid)) {
		debug("job step reconfigure request from uid %ld "
		      "for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	/* We just want to make sure the file handle is correct on a
	   reconfigure since the file could had rolled thus making
	   the currect fd incorrect. */
	log_alter(conf->log_opts, SYSLOG_FACILITY_DAEMON, conf->logfile);
	debug("_handle_reconfigure for job %u.%u successful",
	      job->jobid, job->stepid);

done:
	/* Send the return code and errno */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#2
0
文件: req.c 项目: VURM/slurm
static int
_handle_notify_job(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int len;
	char *message = NULL;

	debug3("_handle_notify_job for job %u.%u",
	       job->jobid, job->stepid);

	safe_read(fd, &len, sizeof(int));
	if (len) {
		message = xmalloc (len);
		safe_read(fd, message, len); /* '\0' terminated */
	}

	debug3("  uid = %d", uid);
	if ((uid != job->uid) && !_slurm_authorized_user(uid)) {
		debug("notify req from uid %ld for job %u.%u "
		      "owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		rc = EPERM;
		goto done;
	}
	error("%s", message);
	xfree(message);

done:
	/* Send the return code */
	safe_write(fd, &rc, sizeof(int));
	xfree(message);
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#3
0
文件: req.c 项目: VURM/slurm
static int
_handle_terminate(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;

	debug("_handle_terminate for job %u.%u",
	      job->jobid, job->stepid);
	step_terminate_monitor_start(job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (uid != job->uid && !_slurm_authorized_user(uid)) {
		debug("terminate req from uid %ld for job %u.%u "
		      "owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	/*
	 * Sanity checks
	 */
	if (job->cont_id == 0) {
		debug ("step %u.%u invalid container [cont_id:%"PRIu64"]",
			job->jobid, job->stepid, job->cont_id);
		rc = -1;
		errnum = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	/*
	 * Signal the container with SIGKILL
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (suspended) {
		debug("Terminating suspended job step %u.%u",
		      job->jobid, job->stepid);
	}

	if (slurm_container_signal(job->cont_id, SIGKILL) < 0) {
		rc = -1;
		errnum = errno;
		verbose("Error sending SIGKILL signal to %u.%u: %m",
			job->jobid, job->stepid);
	} else {
		verbose("Sent SIGKILL signal to %u.%u",
			job->jobid, job->stepid);
	}
	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code and errnum */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#4
0
文件: req.c 项目: gyliu513/slurm
static int
_handle_resume(int fd, stepd_step_rec_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;

	debug("_handle_resume for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("job step resume request from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	if (job->cont_id == 0) {
		debug ("step %u.%u invalid container [cont_id:%"PRIu64"]",
			job->jobid, job->stepid, job->cont_id);
		rc = -1;
		errnum = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	acct_gather_resume_poll();
	/*
	 * Signal the container
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (!suspended) {
		rc = -1;
		errnum = ESLURMD_STEP_NOTSUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	} else {
		if (proctrack_g_signal(job->cont_id, SIGCONT) < 0) {
			verbose("Error resuming %u.%u: %m",
				job->jobid, job->stepid);
		} else {
			verbose("Resumed %u.%u", job->jobid, job->stepid);
		}
		suspended = false;
	}
	/* set the cpu frequencies if cpu_freq option used */
	if (job->cpu_freq != NO_VAL)
		cpu_freq_set(job);

	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code and errno */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#5
0
static int
_handle_stat_jobacct(int fd, stepd_step_rec_t *job, uid_t uid)
{
	jobacctinfo_t *jobacct = NULL;
	jobacctinfo_t *temp_jobacct = NULL;
	int i = 0;
	int num_tasks = 0;
	debug("_handle_stat_jobacct for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (uid != job->uid && !_slurm_authorized_user(uid)) {
		debug("stat jobacct from uid %ld for job %u.%u "
		      "owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		/* Send NULL */
		jobacctinfo_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd,
				    SLURM_PROTOCOL_VERSION);
		return SLURM_ERROR;
	}

	jobacct = jobacctinfo_create(NULL);
	debug3("num tasks = %d", job->node_tasks);

	for (i = 0; i < job->node_tasks; i++) {
		temp_jobacct = jobacct_gather_stat_task(job->task[i]->pid);
		if (temp_jobacct) {
			jobacctinfo_aggregate(jobacct, temp_jobacct);
			jobacctinfo_destroy(temp_jobacct);
			num_tasks++;
		}
	}

	jobacctinfo_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd,
			    SLURM_PROTOCOL_VERSION);
	safe_write(fd, &num_tasks, sizeof(int));

	jobacctinfo_destroy(jobacct);

	return SLURM_SUCCESS;

rwfail:
	jobacctinfo_destroy(jobacct);
	return SLURM_ERROR;
}
示例#6
0
文件: stepd_api.c 项目: IFCA/slurm
/*
 * Should be called when a connect() to a socket returns ECONNREFUSED.
 * Presumably the ECONNREFUSED means that nothing is attached to the listening
 * side of the unix domain socket.
 * If the socket is at least five minutes old, go ahead an unlink it.
 */
static void
_handle_stray_socket(const char *socket_name)
{
	struct stat buf;
	uid_t uid;
	time_t now;

	/* Only attempt to remove the stale socket if process is running
	   as root or the SlurmUser. */
	if (!_slurm_authorized_user())
		return;

	if (stat(socket_name, &buf) == -1) {
		debug3("_handle_stray_socket: unable to stat %s: %m",
			socket_name);
		return;
	}

	if ((uid = getuid()) != buf.st_uid) {
		debug3("_handle_stray_socket: socket %s is not owned by uid %d",
		       socket_name, (int)uid);
		return;
	}

	now = time(NULL);
	if ((now-buf.st_mtime) > 300) {
		/* remove the socket */
		if (unlink(socket_name) == -1) {
			if (errno != ENOENT) {
				error("_handle_stray_socket: unable to clean up"
				      " stray socket %s: %m", socket_name);
			}
		} else {
			debug("Cleaned up stray socket %s", socket_name);
		}
	}
}
示例#7
0
文件: req.c 项目: VURM/slurm
static int
_handle_suspend(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;

	debug("_handle_suspend for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("job step suspend request from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	if (job->cont_id == 0) {
		debug ("step %u.%u invalid container [cont_id:%"PRIu64"]",
			job->jobid, job->stepid, job->cont_id);
		rc = -1;
		errnum = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	jobacct_gather_g_suspend_poll();

	/*
	 * Signal the container
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (suspended) {
		rc = -1;
		errnum = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	} else {
		/* SIGTSTP is sent first to let MPI daemons stop their
		 * tasks, then we send SIGSTOP to stop everything else */
		if (slurm_container_signal(job->cont_id, SIGTSTP) < 0) {
			verbose("Error suspending %u.%u (SIGTSTP): %m",
				job->jobid, job->stepid);
		} else
			sleep(1);

		if (slurm_container_signal(job->cont_id, SIGSTOP) < 0) {
			verbose("Error suspending %u.%u (SIGSTOP): %m",
				job->jobid, job->stepid);
		} else {
			verbose("Suspended %u.%u", job->jobid, job->stepid);
		}
		suspended = true;
	}
	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code and errno */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#8
0
文件: req.c 项目: lipari/slurm
static int
_handle_suspend(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;

	debug("_handle_suspend for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("job step suspend request from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	if (job->cont_id == 0) {
		debug ("step %u.%u invalid container [cont_id:%"PRIu64"]",
			job->jobid, job->stepid, job->cont_id);
		rc = -1;
		errnum = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	jobacct_gather_g_suspend_poll();

	/*
	 * Signal the container
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (suspended) {
		rc = -1;
		errnum = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	} else {
		/* SIGTSTP is sent first to let MPI daemons stop their tasks,
		 * then wait 2 seconds, then send SIGSTOP to the spawned
		 * process's container to stop everything else.
		 *
		 * In some cases, 1 second has proven insufficient. Longer
		 * delays may help insure that all MPI tasks have been stopped
		 * (that depends upon the MPI implementaiton used), but will
		 * also permit longer time periods when more than one job can
		 * be running on each resource (not good). */
		if (slurm_container_signal(job->cont_id, SIGTSTP) < 0) {
			verbose("Error suspending %u.%u (SIGTSTP): %m",
				job->jobid, job->stepid);
		} else
			sleep(2);

		if (slurm_container_signal(job->cont_id, SIGSTOP) < 0) {
			verbose("Error suspending %u.%u (SIGSTOP): %m",
				job->jobid, job->stepid);
		} else {
			verbose("Suspended %u.%u", job->jobid, job->stepid);
		}
		suspended = true;
	}
	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code and errno */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#9
0
文件: req.c 项目: VURM/slurm
static int
_handle_checkpoint_tasks(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	time_t timestamp;
	int len;
	char *image_dir = NULL;

	debug3("_handle_checkpoint_tasks for job %u.%u",
	       job->jobid, job->stepid);

	safe_read(fd, &timestamp, sizeof(time_t));
	safe_read(fd, &len, sizeof(int));
	if (len) {
		image_dir = xmalloc (len);
		safe_read(fd, image_dir, len); /* '\0' terminated */
	}

	debug3("  uid = %d", uid);
	if (uid != job->uid && !_slurm_authorized_user(uid)) {
		debug("checkpoint req from uid %ld for job %u.%u "
		      "owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		rc = EPERM;
		goto done;
	}

	if (job->ckpt_timestamp &&
	    timestamp == job->ckpt_timestamp) {
		debug("duplicate checkpoint req for job %u.%u, "
		      "timestamp %ld. discarded.",
		      job->jobid, job->stepid, (long)timestamp);
		rc = ESLURM_ALREADY_DONE; /* EINPROGRESS? */
		goto done;
	}

	/*
	 * Sanity checks
	 */
	if (job->pgid <= (pid_t)1) {
		debug ("step %u.%u invalid [jmgr_pid:%d pgid:%u]",
		       job->jobid, job->stepid, job->jmgr_pid, job->pgid);
		rc = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	/*
	 * Signal the process group
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (suspended) {
		rc = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	}

	/* set timestamp in case another request comes */
	job->ckpt_timestamp = timestamp;

	/* TODO: do we need job->ckpt_dir any more,
	 *	except for checkpoint/xlch? */
/*	if (! image_dir) { */
/*		image_dir = xstrdup(job->ckpt_dir); */
/*	} */

	/* call the plugin to send the request */
	if (checkpoint_signal_tasks(job, image_dir) != SLURM_SUCCESS) {
		rc = -1;
		verbose("Error sending checkpoint request to %u.%u: %s",
			job->jobid, job->stepid, slurm_strerror(rc));
	} else {
		verbose("Sent checkpoint request to %u.%u",
			job->jobid, job->stepid);
	}

	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code */
	safe_write(fd, &rc, sizeof(int));
	xfree(image_dir);
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#10
0
文件: req.c 项目: VURM/slurm
static int
_handle_signal_container(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	int sig;
	static int msg_sent = 0;

	debug("_handle_signal_container for job %u.%u",
	      job->jobid, job->stepid);

	safe_read(fd, &sig, sizeof(int));

	debug3("  uid = %d", uid);
	if (uid != job->uid && !_slurm_authorized_user(uid)) {
		debug("kill container req from uid %ld for job %u.%u "
		      "owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	/*
	 * Sanity checks
	 */
	if (job->cont_id == 0) {
		debug ("step %u.%u invalid container [cont_id:%"PRIu64"]",
			job->jobid, job->stepid, job->cont_id);
		rc = -1;
		errnum = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	if ((job->nodeid == 0) && (msg_sent == 0) &&
	    (job->state < SLURMSTEPD_STEP_ENDING)) {
		time_t now = time(NULL);
		char entity[24], time_str[24];
		if (job->stepid == SLURM_BATCH_SCRIPT) {
			snprintf(entity, sizeof(entity), "JOB %u", job->jobid);
		} else {
			snprintf(entity, sizeof(entity), "STEP %u.%u",
				 job->jobid, job->stepid);
		}
		slurm_make_time_str(&now, time_str, sizeof(time_str));

		/* Not really errors,
		 * but we want messages displayed by default */
		if (sig == SIG_TIME_LIMIT) {
			error("*** %s CANCELLED AT %s DUE TO TIME LIMIT ***",
			      entity, time_str);
			msg_sent = 1;
		} else if (sig == SIG_PREEMPTED) {
			error("*** %s CANCELLED AT %s DUE TO PREEMPTION ***",
			      entity, time_str);
			msg_sent = 1;
		} else if (sig == SIG_NODE_FAIL) {
			error("*** %s CANCELLED AT %s DUE TO NODE FAILURE ***",
			      entity, time_str);
			msg_sent = 1;
		} else if (sig == SIG_FAILURE) {
			error("*** %s FAILED (non-zero exit code or other "
			      "failure mode) ***", entity);
			msg_sent = 1;
		} else if ((sig == SIGTERM) || (sig == SIGKILL)) {
			error("*** %s CANCELLED AT %s ***", entity, time_str);
			msg_sent = 1;
		}
	}
	if ((sig == SIG_TIME_LIMIT) || (sig == SIG_NODE_FAIL) ||
	    (sig == SIG_PREEMPTED)  || (sig == SIG_FAILURE))
		goto done;
	if (sig == SIG_DEBUG_WAKE) {
		int i;
		for (i = 0; i < job->node_tasks; i++)
			pdebug_wake_process(job, job->task[i]->pid);
		goto done;
	}
	if (sig == SIG_ABORT) {
		sig = SIGKILL;
		job->aborted = true;
	}

	pthread_mutex_lock(&suspend_mutex);
	if (suspended && (sig != SIGKILL)) {
		rc = -1;
		errnum = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	}

	/*
	 * Signal the container
	 */
	if (slurm_container_signal(job->cont_id, sig) < 0) {
		rc = -1;
		errnum = errno;
		verbose("Error sending signal %d to %u.%u: %m",
			sig, job->jobid, job->stepid);
	} else {
		verbose("Sent signal %d to %u.%u",
			sig, job->jobid, job->stepid);
	}
	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code and errnum */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#11
0
文件: req.c 项目: VURM/slurm
static int
_handle_signal_task_local(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int signal;
	int ltaskid; /* local task index */

	debug("_handle_signal_task_local for job %u.%u",
	      job->jobid, job->stepid);

	safe_read(fd, &signal, sizeof(int));
	safe_read(fd, &ltaskid, sizeof(int));

	debug3("  uid = %d", uid);
	if (uid != job->uid && !_slurm_authorized_user(uid)) {
		debug("kill req from uid %ld for job %u.%u owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		rc = EPERM;
		goto done;
	}

	/*
	 * Sanity checks
	 */
	if (ltaskid < 0 || ltaskid >= job->node_tasks) {
		debug("step %u.%u invalid local task id %d",
		      job->jobid, job->stepid, ltaskid);
		rc = SLURM_ERROR;
		goto done;
	}
	if (!job->task
	    || !job->task[ltaskid]) {
		debug("step %u.%u no task info for task id %d",
		      job->jobid, job->stepid, ltaskid);
		rc = SLURM_ERROR;
		goto done;
	}
	if (job->task[ltaskid]->pid <= 1) {
		debug("step %u.%u invalid pid %d for task %d",
		      job->jobid, job->stepid,
		      job->task[ltaskid]->pid, ltaskid);
		rc = SLURM_ERROR;
		goto done;
	}

	/*
	 * Signal the task
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (suspended) {
		rc = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	}

	if (kill(job->task[ltaskid]->pid, signal) == -1) {
		rc = -1;
		verbose("Error sending signal %d to %u.%u, pid %d: %m",
			signal, job->jobid, job->stepid,
			job->task[ltaskid]->pid);
	} else {
		verbose("Sent signal %d to %u.%u, pid %d",
			signal, job->jobid, job->stepid,
			job->task[ltaskid]->pid);
	}
	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code */
	safe_write(fd, &rc, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#12
0
文件: req.c 项目: VURM/slurm
static int
_handle_signal_process_group(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int signal;

	debug3("_handle_signal_process_group for job %u.%u",
	      job->jobid, job->stepid);

	safe_read(fd, &signal, sizeof(int));

	debug3("  uid = %d", uid);
	if (uid != job->uid && !_slurm_authorized_user(uid)) {
		debug("kill req from uid %ld for job %u.%u owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		rc = EPERM;
		goto done;
	}

	/*
	 * Sanity checks
	 */
	if (job->pgid <= (pid_t)1) {
		debug ("step %u.%u invalid [jmgr_pid:%d pgid:%u]",
		       job->jobid, job->stepid, job->jmgr_pid, job->pgid);
		rc = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	/*
	 * Signal the process group
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (suspended && (signal != SIGKILL)) {
		rc = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	}

	/*
	 * Print a message in the step output before killing when
	 * SIGTERM or SIGKILL are sent
	 */
	if ((signal == SIGTERM) || (signal == SIGKILL)) {
		time_t now = time(NULL);
		char entity[24], time_str[24];
		if (job->stepid == SLURM_BATCH_SCRIPT) {
			snprintf(entity, sizeof(entity), "JOB %u", job->jobid);
		} else {
			snprintf(entity, sizeof(entity), "STEP %u.%u",
				 job->jobid, job->stepid);
		}
		slurm_make_time_str(&now, time_str, sizeof(time_str));

		error("*** %s KILLED AT %s WITH SIGNAL %u ***",
		      entity, time_str, signal);
	}

	if (killpg(job->pgid, signal) == -1) {
		rc = -1;
		verbose("Error sending signal %d to %u.%u, pgid %d: %m",
			signal, job->jobid, job->stepid, job->pgid);
	} else {
		verbose("Sent signal %d to %u.%u, pgid %d",
			signal, job->jobid, job->stepid, job->pgid);
	}
	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code */
	safe_write(fd, &rc, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#13
0
文件: req.c 项目: VURM/slurm
static int
_handle_completion(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	int first;
	int last;
	jobacctinfo_t *jobacct = NULL;
	int step_rc;

	debug("_handle_completion for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("step completion message from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		/* Send the return code and errno */
		safe_write(fd, &rc, sizeof(int));
		safe_write(fd, &errnum, sizeof(int));
		return SLURM_SUCCESS;
	}

	safe_read(fd, &first, sizeof(int));
	safe_read(fd, &last, sizeof(int));
	safe_read(fd, &step_rc, sizeof(int));
	jobacct = jobacct_gather_g_create(NULL);
	jobacct_gather_g_getinfo(jobacct, JOBACCT_DATA_PIPE, &fd);

	/*
	 * Record the completed nodes
	 */
	pthread_mutex_lock(&step_complete.lock);
	if (! step_complete.wait_children) {
		rc = -1;
		errnum = ETIMEDOUT; /* not used anyway */
		goto timeout;
	}

	/* SlurmUser or root can craft a launch without a valid credential
	 * ("srun --no-alloc ...") and no tree information can be built
	 *  without the hostlist from the credential. */
	if (step_complete.rank >= 0) {
#if 0
		char bits_string[128];
		debug2("Setting range %d (bit %d) through %d(bit %d)",
		       first, first-(step_complete.rank+1),
		       last, last-(step_complete.rank+1));
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  before bits: %s", bits_string);
#endif
		bit_nset(step_complete.bits,
			 first - (step_complete.rank+1),
			 last - (step_complete.rank+1));
#if 0
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  after bits: %s", bits_string);
#endif
	}
	step_complete.step_rc = MAX(step_complete.step_rc, step_rc);

	/************* acct stuff ********************/
	jobacct_gather_g_aggregate(step_complete.jobacct, jobacct);
timeout:
	jobacct_gather_g_destroy(jobacct);
	/*********************************************/

	/* Send the return code and errno, we do this within the locked
	 * region to ensure that the stepd doesn't exit before we can
	 * perform this send. */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	pthread_cond_signal(&step_complete.cond);
	pthread_mutex_unlock(&step_complete.lock);

	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#14
0
文件: req.c 项目: VURM/slurm
static int
_handle_attach(int fd, slurmd_job_t *job, uid_t uid)
{
	srun_info_t *srun;
	int rc = SLURM_SUCCESS;

	debug("_handle_attach for job %u.%u", job->jobid, job->stepid);

	srun       = xmalloc(sizeof(srun_info_t));
	srun->key  = (srun_key_t *)xmalloc(SLURM_IO_KEY_SIZE);

	debug("sizeof(srun_info_t) = %d, sizeof(slurm_addr_t) = %d",
	      (int) sizeof(srun_info_t), (int) sizeof(slurm_addr_t));
	safe_read(fd, &srun->ioaddr, sizeof(slurm_addr_t));
	safe_read(fd, &srun->resp_addr, sizeof(slurm_addr_t));
	safe_read(fd, srun->key, SLURM_IO_KEY_SIZE);

	/*
	 * Check if jobstep is actually running.
	 */
	if (job->state != SLURMSTEPD_STEP_RUNNING) {
		rc = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	/*
	 * At the moment, it only makes sense for the slurmd to make this
	 * call, so only _slurm_authorized_user is allowed.
	 */
	if (!_slurm_authorized_user(uid)) {
		error("uid %ld attempt to attach to job %u.%u owned by %ld",
		      (long) uid, job->jobid, job->stepid, (long)job->uid);
		rc = EPERM;
		goto done;
	}

	list_prepend(job->sruns, (void *) srun);
	rc = io_client_connect(srun, job);
	debug("  back from io_client_connect, rc = %d", rc);
done:
	/* Send the return code */
	safe_write(fd, &rc, sizeof(int));

	debug("  in _handle_attach rc = %d", rc);
	if (rc == SLURM_SUCCESS) {
		/* Send response info */
		uint32_t *pids, *gtids;
		int len, i;

		debug("  in _handle_attach sending response info");
		len = job->node_tasks * sizeof(uint32_t);
		pids = xmalloc(len);
		gtids = xmalloc(len);

		if (job->task != NULL) {
			for (i = 0; i < job->node_tasks; i++) {
				if (job->task[i] == NULL)
					continue;
				pids[i] = (uint32_t)job->task[i]->pid;
				gtids[i] = job->task[i]->gtid;
			}
		}

		safe_write(fd, &job->node_tasks, sizeof(uint32_t));
		safe_write(fd, pids, len);
		safe_write(fd, gtids, len);
		xfree(pids);
		xfree(gtids);

		for (i = 0; i < job->node_tasks; i++) {
			len = strlen(job->task[i]->argv[0]) + 1;
			safe_write(fd, &len, sizeof(int));
			safe_write(fd, job->task[i]->argv[0], len);
		}
	}

	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#15
0
文件: req.c 项目: birc-aeh/slurm
static int
_handle_terminate(int fd, stepd_step_rec_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	stepd_step_task_info_t *task;
	uint32_t i;

	debug("_handle_terminate for step=%u.%u uid=%d",
	      job->jobid, job->stepid, uid);
	step_terminate_monitor_start(job->jobid, job->stepid);

	if (uid != job->uid && !_slurm_authorized_user(uid)) {
		debug("terminate req from uid %ld for job %u.%u "
		      "owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	/*
	 * Sanity checks
	 */
	if (job->cont_id == 0) {
		debug ("step %u.%u invalid container [cont_id:%"PRIu64"]",
			job->jobid, job->stepid, job->cont_id);
		rc = -1;
		errnum = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	/* cycle thru the tasks and mark those that have not
	 * called abort and/or terminated as killed_by_cmd
	 */
	for (i = 0; i < job->node_tasks; i++) {
		if (NULL == (task = job->task[i])) {
			continue;
		}
		if (task->aborted || task->exited) {
			continue;
		}
		/* mark that this task is going to be killed by
		 * cmd so we ignore its exit status - otherwise,
		 * we will probably report the final exit status
		 * as SIGKILL
		 */
		task->killed_by_cmd = true;
	}

	/*
	 * Signal the container with SIGKILL
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (suspended) {
		debug("Terminating suspended job step %u.%u",
		      job->jobid, job->stepid);
	}

	if (proctrack_g_signal(job->cont_id, SIGKILL) < 0) {
		rc = -1;
		errnum = errno;
		verbose("Error sending SIGKILL signal to %u.%u: %m",
			job->jobid, job->stepid);
	} else {
		verbose("Sent SIGKILL signal to %u.%u",
			job->jobid, job->stepid);
	}
	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code and errnum */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#16
0
文件: req.c 项目: birc-aeh/slurm
static int
_handle_signal_container(int fd, stepd_step_rec_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	int sig;
	static int msg_sent = 0;
	char *ptr = NULL;
	int target_node_id = 0;
	stepd_step_task_info_t *task;
	uint32_t i;

	safe_read(fd, &sig, sizeof(int));
	debug("_handle_signal_container for step=%u.%u uid=%d signal=%d",
	      job->jobid, job->stepid, (int) uid, sig);
	if ((uid != job->uid) && !_slurm_authorized_user(uid)) {
		error("signal container req from uid %ld for step=%u.%u "
		      "owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	/*
	 * Sanity checks
	 */
	if (job->cont_id == 0) {
		debug ("step %u.%u invalid container [cont_id:%"PRIu64"]",
			job->jobid, job->stepid, job->cont_id);
		rc = -1;
		errnum = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	if ((sig == SIGTERM) || (sig == SIGKILL)) {
		/* cycle thru the tasks and mark those that have not
		 * called abort and/or terminated as killed_by_cmd
		 */
		for (i = 0; i < job->node_tasks; i++) {
			if (NULL == (task = job->task[i])) {
				continue;
			}
			if (task->aborted || task->exited) {
				continue;
			}
			/* mark that this task is going to be killed by
			 * cmd so we ignore its exit status - otherwise,
			 * we will probably report the final exit status
			 * as SIGKILL
			 */
			task->killed_by_cmd = true;
		}
	}

	ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID");
	if (ptr)
		target_node_id = atoi(ptr);
	if ((job->nodeid == target_node_id) && (msg_sent == 0) &&
	    (job->state < SLURMSTEPD_STEP_ENDING)) {
		time_t now = time(NULL);
		char entity[24], time_str[24];
		if (job->stepid == SLURM_BATCH_SCRIPT) {
			snprintf(entity, sizeof(entity), "JOB %u", job->jobid);
		} else {
			snprintf(entity, sizeof(entity), "STEP %u.%u",
				 job->jobid, job->stepid);
		}
		slurm_make_time_str(&now, time_str, sizeof(time_str));

		/* Not really errors,
		 * but we want messages displayed by default */
		if (sig == SIG_TIME_LIMIT) {
			error("*** %s CANCELLED AT %s DUE TO TIME LIMIT ***",
			      entity, time_str);
			msg_sent = 1;
		} else if (sig == SIG_PREEMPTED) {
			error("*** %s CANCELLED AT %s DUE TO PREEMPTION ***",
			      entity, time_str);
			msg_sent = 1;
		} else if (sig == SIG_NODE_FAIL) {
			error("*** %s CANCELLED AT %s DUE TO NODE FAILURE ***",
			      entity, time_str);
			msg_sent = 1;
		} else if (sig == SIG_FAILURE) {
			error("*** %s FAILED (non-zero exit code or other "
			      "failure mode) ***", entity);
			msg_sent = 1;
		} else if ((sig == SIGTERM) || (sig == SIGKILL)) {
			error("*** %s CANCELLED AT %s ***", entity, time_str);
			msg_sent = 1;
		}
	}
	if ((sig == SIG_TIME_LIMIT) || (sig == SIG_NODE_FAIL) ||
	    (sig == SIG_PREEMPTED)  || (sig == SIG_FAILURE))
		goto done;

	if (sig == SIG_ABORT) {
		sig = SIGKILL;
		job->aborted = true;
	}

	pthread_mutex_lock(&suspend_mutex);
	if (suspended && (sig != SIGKILL)) {
		rc = -1;
		errnum = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	}

	if (sig == SIG_DEBUG_WAKE) {
		int i;
		for (i = 0; i < job->node_tasks; i++)
			pdebug_wake_process(job, job->task[i]->pid);
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	}

	/*
	 * Signal the container
	 */
	if (proctrack_g_signal(job->cont_id, sig) < 0) {
		rc = -1;
		errnum = errno;
		verbose("Error sending signal %d to %u.%u: %m",
			sig, job->jobid, job->stepid);
	} else {
		verbose("Sent signal %d to %u.%u",
			sig, job->jobid, job->stepid);
	}
	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code and errnum */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
示例#17
0
文件: req.c 项目: birc-aeh/slurm
static int
_handle_completion(int fd, stepd_step_rec_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	int first;
	int last;
	jobacctinfo_t *jobacct = NULL;
	int step_rc;
	char* buf;
	int len;
	Buf buffer;
	int version;	/* For future use */
	bool lock_set = false;

	debug("_handle_completion for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("step completion message from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		/* Send the return code and errno */
		safe_write(fd, &rc, sizeof(int));
		safe_write(fd, &errnum, sizeof(int));
		return SLURM_SUCCESS;
	}

	safe_read(fd, &version, sizeof(int));
	safe_read(fd, &first, sizeof(int));
	safe_read(fd, &last, sizeof(int));
	safe_read(fd, &step_rc, sizeof(int));

	/*
	 * We must not use getinfo over a pipe with slurmd here
	 * Indeed, slurmstepd does a large use of setinfo over a pipe
	 * with slurmd and doing the reverse can result in a deadlock
	 * scenario with slurmd :
	 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
	 * Do pack/unpack instead to be sure of independances of
	 * slurmd and slurmstepd
	 */
	safe_read(fd, &len, sizeof(int));
	buf = xmalloc(len);
	safe_read(fd, buf, len);
	buffer = create_buf(buf, len);
	jobacctinfo_unpack(&jobacct, SLURM_PROTOCOL_VERSION,
			   PROTOCOL_TYPE_SLURM, buffer, 1);
	free_buf(buffer);

	/*
	 * Record the completed nodes
	 */
	pthread_mutex_lock(&step_complete.lock);
	lock_set = true;
	if (! step_complete.wait_children) {
		rc = -1;
		errnum = ETIMEDOUT; /* not used anyway */
		goto timeout;
	}

	/* SlurmUser or root can craft a launch without a valid credential
	 * ("srun --no-alloc ...") and no tree information can be built
	 *  without the hostlist from the credential. */
	if (step_complete.rank >= 0) {
#if 0
		char bits_string[128];
		debug2("Setting range %d (bit %d) through %d(bit %d)",
		       first, first-(step_complete.rank+1),
		       last, last-(step_complete.rank+1));
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  before bits: %s", bits_string);
#endif
		bit_nset(step_complete.bits,
			 first - (step_complete.rank+1),
			 last - (step_complete.rank+1));
#if 0
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  after bits: %s", bits_string);
#endif
	}
	step_complete.step_rc = MAX(step_complete.step_rc, step_rc);

	/************* acct stuff ********************/
	jobacctinfo_aggregate(step_complete.jobacct, jobacct);
timeout:
	jobacctinfo_destroy(jobacct);
	/*********************************************/

	/* Send the return code and errno, we do this within the locked
	 * region to ensure that the stepd doesn't exit before we can
	 * perform this send. */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	pthread_cond_signal(&step_complete.cond);
	pthread_mutex_unlock(&step_complete.lock);

	return SLURM_SUCCESS;


rwfail:	if (lock_set) {
		pthread_cond_signal(&step_complete.cond);
		pthread_mutex_unlock(&step_complete.lock);
	}
	return SLURM_FAILURE;
}
示例#18
0
文件: req.c 项目: birc-aeh/slurm
static int
_handle_suspend(int fd, stepd_step_rec_t *job, uid_t uid)
{
	static int launch_poe = -1;
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	uint16_t job_core_spec = (uint16_t) NO_VAL;

	safe_read(fd, &job_core_spec, sizeof(uint16_t));

	debug("_handle_suspend for step:%u.%u uid:%ld core_spec:%u",
	      job->jobid, job->stepid, (long)uid, job_core_spec);

	if (!_slurm_authorized_user(uid)) {
		debug("job step suspend request from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	if (job->cont_id == 0) {
		debug ("step %u.%u invalid container [cont_id:%"PRIu64"]",
			job->jobid, job->stepid, job->cont_id);
		rc = -1;
		errnum = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	acct_gather_suspend_poll();
	if (launch_poe == -1) {
		char *launch_type = slurm_get_launch_type();
		if (!strcmp(launch_type, "launch/poe"))
			launch_poe = 1;
		else
			launch_poe = 0;
		xfree(launch_type);
	}

	/*
	 * Signal the container
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (suspended) {
		rc = -1;
		errnum = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	} else {
		if (!job->batch && switch_g_job_step_pre_suspend(job))
			error("switch_g_job_step_pre_suspend: %m");

		/* SIGTSTP is sent first to let MPI daemons stop their tasks,
		 * then wait 2 seconds, then send SIGSTOP to the spawned
		 * process's container to stop everything else.
		 *
		 * In some cases, 1 second has proven insufficient. Longer
		 * delays may help insure that all MPI tasks have been stopped
		 * (that depends upon the MPI implementaiton used), but will
		 * also permit longer time periods when more than one job can
		 * be running on each resource (not good). */
		if (launch_poe == 0) {
			/* IBM MPI seens to periodically hang upon receipt
			 * of SIGTSTP. */
			if (proctrack_g_signal(job->cont_id, SIGTSTP) < 0) {
				verbose("Error suspending %u.%u (SIGTSTP): %m",
					job->jobid, job->stepid);
			} else
				sleep(2);
		}

		if (proctrack_g_signal(job->cont_id, SIGSTOP) < 0) {
			verbose("Error suspending %u.%u (SIGSTOP): %m",
				job->jobid, job->stepid);
		} else {
			verbose("Suspended %u.%u", job->jobid, job->stepid);
		}
		suspended = true;
	}
	if (!job->batch && switch_g_job_step_post_suspend(job))
		error("switch_g_job_step_post_suspend: %m");
	if (!job->batch && core_spec_g_suspend(job->cont_id, job_core_spec))
		error("core_spec_g_suspend: %m");

	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code and errno */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}