Exemplo n.º 1
0
/*
 * Connect to a slurmstepd proccess by way of its unix domain socket.
 *
 * Both "directory" and "nodename" may be null, in which case stepd_connect
 * will attempt to determine them on its own.  If you are using multiple
 * slurmd on one node (unusual outside of development environments), you
 * will get one of the local NodeNames more-or-less at random.
 *
 * Returns a socket descriptor for the opened socket on success,
 * and -1 on error.
 */
int
stepd_connect(const char *directory, const char *nodename,
	      uint32_t jobid, uint32_t stepid)
{
	int req = REQUEST_CONNECT;
	int fd = -1;
	int rc;
	void *auth_cred;
	Buf buffer;
	int len;

	if (nodename == NULL) {
		if (!(nodename = _guess_nodename()))
			return -1;
	}
	if (directory == NULL) {
		slurm_ctl_conf_t *cf;

		cf = slurm_conf_lock();
		directory = slurm_conf_expand_slurmd_path(
			cf->slurmd_spooldir, nodename);
		slurm_conf_unlock();
	}

	buffer = init_buf(0);
	/* Create an auth credential */
	auth_cred = g_slurm_auth_create(NULL, 2, NULL);
	if (auth_cred == NULL) {
		error("Creating authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(NULL)));
		slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
		goto fail1;
	}

	/* Pack the auth credential */
	rc = g_slurm_auth_pack(auth_cred, buffer);
	(void) g_slurm_auth_destroy(auth_cred);
	if (rc) {
		error("Packing authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
		slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
		goto fail1;
	}

	/* Connect to the step */
	fd = _step_connect(directory, nodename, jobid, stepid);
	if (fd == -1)
		goto fail1;

	safe_write(fd, &req, sizeof(int));
	len = size_buf(buffer);
	safe_write(fd, &len, sizeof(int));
	safe_write(fd, get_buf_data(buffer), len);

	safe_read(fd, &rc, sizeof(int));
	if (rc < 0) {
		error("slurmstepd refused authentication: %m");
		slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
		goto rwfail;
	}

	free_buf(buffer);
	return fd;

rwfail:
	close(fd);
fail1:
	free_buf(buffer);
	return -1;
}
Exemplo n.º 2
0
/*
 * Scan for available running slurm step daemons by checking
 * "directory" for unix domain sockets with names beginning in "nodename".
 *
 * Both "directory" and "nodename" may be null, in which case stepd_available
 * will attempt to determine them on its own.  If you are using multiple
 * slurmd on one node (unusual outside of development environments), you
 * will get one of the local NodeNames more-or-less at random.
 *
 * Returns a List of pointers to step_loc_t structures.
 */
List
stepd_available(const char *directory, const char *nodename)
{
	List l;
	DIR *dp;
	struct dirent *ent;
	regex_t re;
	struct stat stat_buf;

	if (nodename == NULL) {
		if (!(nodename = _guess_nodename()))
			return NULL;
	}
	if (directory == NULL) {
		slurm_ctl_conf_t *cf;

		cf = slurm_conf_lock();
		directory = slurm_conf_expand_slurmd_path(
			cf->slurmd_spooldir, nodename);
		slurm_conf_unlock();
	}

	l = list_create((ListDelF) _free_step_loc_t);
	if (_sockname_regex_init(&re, nodename) == -1)
		goto done;

	/*
	 * Make sure that "directory" exists and is a directory.
	 */
	if (stat(directory, &stat_buf) < 0) {
		error("Domain socket directory %s: %m", directory);
		goto done;
	} else if (!S_ISDIR(stat_buf.st_mode)) {
		error("%s is not a directory", directory);
		goto done;
	}

	if ((dp = opendir(directory)) == NULL) {
		error("Unable to open directory: %m");
		goto done;
	}

	while ((ent = readdir(dp)) != NULL) {
		step_loc_t *loc;
		uint32_t jobid, stepid;

		if (_sockname_regex(&re, ent->d_name, &jobid, &stepid) == 0) {
			debug4("found jobid = %u, stepid = %u", jobid, stepid);
			loc = xmalloc(sizeof(step_loc_t));
			loc->directory = xstrdup(directory);
			loc->nodename = xstrdup(nodename);
			loc->jobid = jobid;
			loc->stepid = stepid;
			list_append(l, (void *)loc);
		}
	}

	closedir(dp);
done:
	regfree(&re);
	return l;
}
Exemplo n.º 3
0
/*
 * Connect to a slurmstepd proccess by way of its unix domain socket.
 *
 * Both "directory" and "nodename" may be null, in which case stepd_connect
 * will attempt to determine them on its own.  If you are using multiple
 * slurmd on one node (unusual outside of development environments), you
 * will get one of the local NodeNames more-or-less at random.
 *
 * Returns a socket descriptor for the opened socket on success,
 * and -1 on error.
 */
int
stepd_connect(const char *directory, const char *nodename,
	      uint32_t jobid, uint32_t stepid, uint16_t *protocol_version)
{
	int req = REQUEST_CONNECT;
	int fd = -1;
	int rc;
	void *auth_cred;
	Buf buffer;
	int len;

	*protocol_version = 0;

	if (nodename == NULL) {
		if (!(nodename = _guess_nodename()))
			return -1;
	}
	if (directory == NULL) {
		slurm_ctl_conf_t *cf;

		cf = slurm_conf_lock();
		directory = slurm_conf_expand_slurmd_path(
			cf->slurmd_spooldir, nodename);
		slurm_conf_unlock();
	}

	buffer = init_buf(0);
	/* Create an auth credential */
	auth_cred = g_slurm_auth_create(NULL, 2, NULL);
	if (auth_cred == NULL) {
		error("Creating authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(NULL)));
		slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
		goto fail1;
	}

	/* Pack the auth credential */
	rc = g_slurm_auth_pack(auth_cred, buffer);
	(void) g_slurm_auth_destroy(auth_cred);
	if (rc) {
		error("Packing authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
		slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
		goto fail1;
	}

	/* Connect to the step */
	fd = _step_connect(directory, nodename, jobid, stepid);
	if (fd == -1)
		goto fail1;

	safe_write(fd, &req, sizeof(int));
	len = size_buf(buffer);
	safe_write(fd, &len, sizeof(int));
	safe_write(fd, get_buf_data(buffer), len);

	safe_read(fd, &rc, sizeof(int));
	if (rc < 0) {
		error("slurmstepd refused authentication: %m");
		slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
		goto rwfail;
	} else if (rc)
		*protocol_version = rc;
	else {
		/* 0n older versions of Slurm < 14.11 SLURM_SUCCESS
		 * was returned here instead of the protocol version.
		 * This can be removed when we are 2 versions past
		 * 14.11.
		 */
		slurmstepd_info_t *stepd_info = stepd_get_info(fd);
		*protocol_version = stepd_info->protocol_version;
		xfree(stepd_info);
	}

	free_buf(buffer);
	return fd;

rwfail:
	close(fd);
fail1:
	free_buf(buffer);
	return -1;
}