示例#1
0
文件: allocate.c 项目: HPCNow/slurm
/* process RPC from slurmctld
 * IN msg: message received
 * OUT resp: resource allocation response message or List of them
 * RET 1 if resp is filled in, 0 otherwise */
static int
_handle_msg(slurm_msg_t *msg, uint16_t msg_type, void **resp)
{
	char *auth_info = slurm_get_auth_info();
	uid_t req_uid;
	uid_t uid       = getuid();
	uid_t slurm_uid = (uid_t) slurm_get_slurm_user_id();
	int rc = 0;

	req_uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info);
	xfree(auth_info);

	if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) {
		error ("Security violation, slurm message from uid %u",
			(unsigned int) req_uid);
		return 0;
	}

	if (msg->msg_type == msg_type) {
		debug2("resource allocation response received");
		slurm_send_rc_msg(msg, SLURM_SUCCESS);
		*resp = msg->data;    /* transfer payload to response */
		msg->data = NULL;
		rc = 1;
	} else if (msg->msg_type == SRUN_JOB_COMPLETE) {
		info("Job has been cancelled");
	} else {
		error("%s: received spurious message type: %u",
		      __func__, msg->msg_type);
	}
	return rc;
}
示例#2
0
/*
 * Convert AuthInfo to a socket path. Accepts two input formats:
 * 1) <path>		(Old format)
 * 2) socket=<path>[,]	(New format)
 * NOTE: Caller must xfree return value
 */
static char *_auth_opts_to_socket(void)
{
	char *socket = NULL, *sep, *tmp;
	char *opts = slurm_get_auth_info();

	if (!opts)
		return NULL;

	tmp = strstr(opts, "socket=");
	if (tmp) {	/* New format */
		socket = xstrdup(tmp + 7);
		sep = strchr(socket, ',');
		if (sep)
			sep[0] = '\0';
	} else if (strchr(opts, '='))
		;	/* New format, but socket not specified */
	else {
		socket = opts;	/* Old format */
		opts = NULL;
	}

	xfree(opts);

	return socket;
}
示例#3
0
文件: allocate.c 项目: jabl/slurm
/* process RPC from slurmctld
 * IN msg: message received
 * OUT resp: resource allocation response message
 * RET 1 if resp is filled in, 0 otherwise */
static int
_handle_msg(slurm_msg_t *msg, resource_allocation_response_msg_t **resp)
{
	uid_t req_uid   = g_slurm_auth_get_uid(msg->auth_cred,
					       slurm_get_auth_info());
	uid_t uid       = getuid();
	uid_t slurm_uid = (uid_t) slurm_get_slurm_user_id();
	int rc = 0;

	if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) {
		error ("Security violation, slurm message from uid %u",
			(unsigned int) req_uid);
		return 0;
	}

	switch (msg->msg_type) {
		case RESPONSE_RESOURCE_ALLOCATION:
			debug2("resource allocation response received");
			slurm_send_rc_msg(msg, SLURM_SUCCESS);
			*resp = msg->data;
			rc = 1;
			break;
		case SRUN_JOB_COMPLETE:
			info("Job has been cancelled");
			break;
		default:
			error("received spurious message type: %d",
			      msg->msg_type);
	}
	return rc;
}
示例#4
0
static void
_handle_msg(slurm_msg_t *msg)
{
	static uint32_t slurm_uid = NO_VAL;
	uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred,
					     slurm_get_auth_info());
	uid_t uid = getuid();
	job_step_kill_msg_t *ss;
	srun_user_msg_t *um;

	if (slurm_uid == NO_VAL)
		slurm_uid = slurm_get_slurm_user_id();
	if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) {
		error ("Security violation, slurm message from uid %u",
		       (unsigned int) req_uid);
 		return;
	}

	switch (msg->msg_type) {
	case SRUN_PING:
		debug3("slurmctld ping received");
		slurm_send_rc_msg(msg, SLURM_SUCCESS);
		slurm_free_srun_ping_msg(msg->data);
		break;
	case SRUN_JOB_COMPLETE:
		debug("received job step complete message");
		runjob_signal(SIGKILL);
		slurm_free_srun_job_complete_msg(msg->data);
		break;
	case SRUN_USER_MSG:
		um = msg->data;
		info("%s", um->msg);
		slurm_free_srun_user_msg(msg->data);
		break;
	case SRUN_TIMEOUT:
		debug("received job step timeout message");
		_handle_timeout(msg->data);
		slurm_free_srun_timeout_msg(msg->data);
		break;
	case SRUN_STEP_SIGNAL:
		ss = msg->data;
		debug("received step signal %u RPC", ss->signal);
		if (ss->signal)
			runjob_signal(ss->signal);
		slurm_free_job_step_kill_msg(msg->data);
		break;
	default:
		debug("received spurious message type: %u",
		      msg->msg_type);
		break;
	}
	return;
}
/* Convert AuthInfo to a socket path. Parses input format "socket=<path>[,]".
 * NOTE: Caller must xfree return value
 */
static char *_auth_opts_to_socket(void)
{
	char *socket = NULL, *sep, *tmp;
	char *opts = slurm_get_auth_info();

	if (opts) {
		tmp = strstr(opts, "socket=");
		if (tmp) {	/* New format */
			socket = xstrdup(tmp + 7);
			sep = strchr(socket, ',');
			if (sep)
				sep[0] = '\0';
		}
		xfree(opts);
	}

	return socket;
}
示例#6
0
/* _background_process_msg - process an RPC to the backup_controller */
static int _background_process_msg(slurm_msg_t * msg)
{
	int error_code = SLURM_SUCCESS;

	if (msg->msg_type != REQUEST_PING) {
		bool super_user = false;
		char *auth_info = slurm_get_auth_info();
		uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info);

		xfree(auth_info);
		if ((uid == 0) || (uid == getuid()))
			super_user = true;

		if (super_user &&
		    (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE)) {
			info("Performing RPC: REQUEST_SHUTDOWN_IMMEDIATE");
		} else if (super_user &&
			   (msg->msg_type == REQUEST_SHUTDOWN)) {
			info("Performing RPC: REQUEST_SHUTDOWN");
			pthread_kill(slurmctld_config.thread_id_sig, SIGTERM);
		} else if (super_user &&
			   (msg->msg_type == REQUEST_TAKEOVER)) {
			info("Performing RPC: REQUEST_TAKEOVER");
			_shutdown_primary_controller(SHUTDOWN_WAIT);
			takeover = true;
			error_code = SLURM_SUCCESS;
		} else if (super_user &&
			   (msg->msg_type == REQUEST_CONTROL)) {
			debug3("Ignoring RPC: REQUEST_CONTROL");
			error_code = ESLURM_DISABLED;
			last_controller_response = time(NULL);
		} else {
			error("Invalid RPC received %d while in standby mode",
			      msg->msg_type);
			error_code = ESLURM_IN_STANDBY_MODE;
		}
	}
	if (msg->msg_type != REQUEST_SHUTDOWN_IMMEDIATE)
		slurm_send_rc_msg(msg, error_code);
	return error_code;
}
示例#7
0
文件: spawn.c 项目: corburn/slurm
extern void
spawn_req_pack(spawn_req_t *req, Buf buf)
{
	int i, j;
	spawn_subcmd_t *subcmd;
	void *auth_cred;

	auth_cred = g_slurm_auth_create(NULL, 2, slurm_get_auth_info());
	if (auth_cred == NULL) {
		error("authentication: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(NULL)) );
		return;
	}
	(void) g_slurm_auth_pack(auth_cred, buf);
	(void) g_slurm_auth_destroy(auth_cred);

	pack32(req->seq, buf);
	packstr(req->from_node, buf);
	pack32(req->subcmd_cnt, buf);
	pack32(req->preput_cnt, buf);
	for (i = 0; i < req->preput_cnt; i ++) {
		packstr(req->pp_keys[i], buf);
		packstr(req->pp_vals[i], buf);
	}
	for (i = 0; i < req->subcmd_cnt; i ++) {
		subcmd = req->subcmds[i];

		packstr(subcmd->cmd, buf);
		pack32(subcmd->max_procs, buf);
		pack32(subcmd->argc, buf);
		for (j = 0; j < subcmd->argc; j ++) {
			packstr(subcmd->argv[j], buf);
		}
		pack32(subcmd->info_cnt, buf);
		for (j = 0; j < subcmd->info_cnt; j ++) {
			packstr(subcmd->info_keys[j], buf);
			packstr(subcmd->info_vals[j], buf);
		}
	}
}
示例#8
0
文件: req.c 项目: A1ve5/slurm
static void *
_handle_accept(void *arg)
{
	/*struct request_params *param = (struct request_params *)arg;*/
	int fd = ((struct request_params *)arg)->fd;
	stepd_step_rec_t *job = ((struct request_params *)arg)->job;
	int req;
	int len;
	Buf buffer;
	void *auth_cred;
	int rc;
	uid_t uid;
	gid_t gid;
	char *auth_info;

	debug3("Entering _handle_accept (new thread)");
	xfree(arg);

	safe_read(fd, &req, sizeof(int));
	if (req != REQUEST_CONNECT) {
		error("First message must be REQUEST_CONNECT");
		goto fail;
	}

	safe_read(fd, &len, sizeof(int));
	buffer = init_buf(len);
	safe_read(fd, get_buf_data(buffer), len);

	/* Unpack and verify the auth credential */
	auth_cred = g_slurm_auth_unpack(buffer);
	if (auth_cred == NULL) {
		error("Unpacking authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(NULL)));
		free_buf(buffer);
		goto fail;
	}
	auth_info = slurm_get_auth_info();
	rc = g_slurm_auth_verify(auth_cred, NULL, 2, auth_info);
	if (rc != SLURM_SUCCESS) {
		error("Verifying authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
		xfree(auth_info);
		(void) g_slurm_auth_destroy(auth_cred);
		free_buf(buffer);
		goto fail;
	}

	/* Get the uid & gid from the credential, then destroy it. */
	uid = g_slurm_auth_get_uid(auth_cred, auth_info);
	gid = g_slurm_auth_get_gid(auth_cred, auth_info);
	xfree(auth_info);
	debug3("  Identity: uid=%d, gid=%d", uid, gid);
	g_slurm_auth_destroy(auth_cred);
	free_buf(buffer);

	rc = SLURM_PROTOCOL_VERSION;
	safe_write(fd, &rc, sizeof(int));

	while (1) {
		rc = _handle_request(fd, job, uid, gid);
		if (rc != SLURM_SUCCESS)
			break;
	}

	if (close(fd) == -1)
		error("Closing accepted fd: %m");

	slurm_mutex_lock(&message_lock);
	message_connections--;
	slurm_cond_signal(&message_cond);
	slurm_mutex_unlock(&message_lock);

	debug3("Leaving  _handle_accept");
	return NULL;

fail:
	rc = SLURM_FAILURE;
	safe_write(fd, &rc, sizeof(int));
rwfail:
	if (close(fd) == -1)
		error("Closing accepted fd after error: %m");
	debug("Leaving  _handle_accept on an error");
	return NULL;
}
示例#9
0
/*
 * Connect to a slurmstepd proccess by way of its unix domain socket.
 *
 * Both "directory" and "nodename" may be null, in which case stepd_connect
 * will attempt to determine them on its own.  If you are using multiple
 * slurmd on one node (unusual outside of development environments), you
 * will get one of the local NodeNames more-or-less at random.
 *
 * Returns a socket descriptor for the opened socket on success,
 * and -1 on error.
 */
extern int
stepd_connect(const char *directory, const char *nodename,
	      uint32_t jobid, uint32_t stepid, uint16_t *protocol_version)
{
	int req = REQUEST_CONNECT;
	int fd = -1;
	int rc;
	void *auth_cred;
	char *auth_info;
	char *local_nodename = NULL;
	Buf buffer;
	int len;

	*protocol_version = 0;

	if (nodename == NULL) {
		if (!(local_nodename = _guess_nodename()))
			return -1;
		nodename = local_nodename;
	}
	if (directory == NULL) {
		slurm_ctl_conf_t *cf;

		cf = slurm_conf_lock();
		directory = slurm_conf_expand_slurmd_path(
			cf->slurmd_spooldir, nodename);
		slurm_conf_unlock();
	}

	buffer = init_buf(0);
	/* Create an auth credential */
	auth_info = slurm_get_auth_info();
	auth_cred = g_slurm_auth_create(auth_info);
	xfree(auth_info);
	if (auth_cred == NULL) {
		error("Creating authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(NULL)));
		slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
		goto fail1;
	}

	/* Pack the auth credential */
	rc = g_slurm_auth_pack(auth_cred, buffer);
	(void) g_slurm_auth_destroy(auth_cred);
	if (rc) {
		error("Packing authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
		slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
		goto fail1;
	}

	/* Connect to the step */
	fd = _step_connect(directory, nodename, jobid, stepid);
	if (fd == -1)
		goto fail1;

	safe_write(fd, &req, sizeof(int));
	len = size_buf(buffer);
	safe_write(fd, &len, sizeof(int));
	safe_write(fd, get_buf_data(buffer), len);

	safe_read(fd, &rc, sizeof(int));
	if (rc < 0) {
		error("slurmstepd refused authentication: %m");
		slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
		goto rwfail;
	} else if (rc) {
		*protocol_version = rc;
	} else {
		/* 0n older versions of Slurm < 14.11 SLURM_SUCCESS
		 * was returned here instead of the protocol version.
		 * This can be removed when we are 2 versions past
		 * 14.11.
		 */
		slurmstepd_info_t *stepd_info = stepd_get_info(fd);
		*protocol_version = stepd_info->protocol_version;
		xfree(stepd_info);
	}

	free_buf(buffer);
	xfree(local_nodename);
	return fd;

rwfail:
	close(fd);
fail1:
	free_buf(buffer);
	xfree(local_nodename);
	return -1;
}