int pmixp_libpmix_init(void) { int rc; mode_t rights = (S_IRUSR | S_IWUSR | S_IXUSR) | (S_IRGRP | S_IWGRP | S_IXGRP); pmix_info_t *kvp; /* NOTE: we need user who owns the job to access PMIx usock * file. According to 'man 7 unix': * "... In the Linux implementation, sockets which are visible in the file system * honor the permissions of the directory they are in... " * Our case is the following: slurmstepd is usually running as root, user application will * be "sudo'ed". To provide both of them with acces to the unix socket we do the following: * 1. Owner ID is set to the job owner. * 2. Group ID corresponds to slurmstepd. * 3. Set 0770 access mode */ if (0 != mkdir(pmixp_info_tmpdir_lib(), rights) ) { PMIXP_ERROR_STD("Cannot create directory \"%s\"", pmixp_info_tmpdir_lib()); return errno; } /* There might be umask that will drop essential rights. Fix it explicitly. * TODO: is there more elegant solution? */ if (chmod(pmixp_info_tmpdir_lib(), rights) < 0) { error("chown(%s): %m", pmixp_info_tmpdir_lib()); return errno; } if (chown(pmixp_info_tmpdir_lib(), (uid_t) pmixp_info_jobuid(), (gid_t) -1) < 0) { error("chown(%s): %m", pmixp_info_tmpdir_lib()); return errno; } setenv(PMIXP_PMIXLIB_TMPDIR, pmixp_info_tmpdir_lib(), 1); PMIXP_ALLOC_KEY(kvp, PMIX_USERID); PMIX_VAL_SET(&kvp->value, uint32_t, pmixp_info_jobuid()); /* setup the server library */ if (PMIX_SUCCESS != (rc = PMIx_server_init(&_slurm_pmix_cb, kvp, 1))) { PMIXP_ERROR_STD("PMIx_server_init failed with error %d\n", rc); return SLURM_ERROR; } PMIXP_FREE_KEY(kvp); /* if( pmixp_fixrights(pmixp_info_tmpdir_lib(), (uid_t) pmixp_info_jobuid(), rights) ){ } */ /* register the errhandler */ PMIx_Register_errhandler(NULL, 0, errhandler, errhandler_reg_callbk, NULL); return 0; }
int pmixp_fixrights(char *path, uid_t uid, mode_t mode) { char nested_path[PATH_MAX]; DIR *dp; struct dirent *ent; int rc; /* * Make sure that "directory" exists and is a directory. */ if (1 != (rc = _is_dir(path))) { PMIXP_ERROR("path=\"%s\" is not a directory", path); return (rc == 0) ? -1 : rc; } if ((dp = opendir(path)) == NULL) { PMIXP_ERROR_STD("cannot open path=\"%s\"", path); return -1; } while ((ent = readdir(dp)) != NULL) { if (0 == xstrcmp(ent->d_name, ".") || 0 == xstrcmp(ent->d_name, "..")) { /* skip special dir's */ continue; } snprintf(nested_path, sizeof(nested_path), "%s/%s", path, ent->d_name); if (_is_dir(nested_path)) { if ((rc = _file_fix_rights(nested_path, uid, mode))) { PMIXP_ERROR_STD("cannot fix permissions for " "\"%s\"", nested_path); return -1; } pmixp_rmdir_recursively(nested_path); } else { if ((rc = _file_fix_rights(nested_path, uid, mode))) { PMIXP_ERROR_STD("cannot fix permissions for " "\"%s\"", nested_path); return -1; } } } closedir(dp); return 0; }
int pmixp_mkdir(char *path, mode_t rights) { /* NOTE: we need user who owns the job to access PMIx usock * file. According to 'man 7 unix': * "... In the Linux implementation, sockets which are visible in the * file system honor the permissions of the directory they are in... " * Our case is the following: slurmstepd is usually running as root, * user application will be "sudo'ed". To provide both of them with * access to the unix socket we do the following: * 1. Owner ID is set to the job owner. * 2. Group ID corresponds to slurmstepd. * 3. Set 0770 access mode */ if (0 != mkdir(path, rights) ) { PMIXP_ERROR_STD("Cannot create directory \"%s\"", path); return errno; } /* There might be umask that will drop essential rights. * Fix it explicitly. * TODO: is there more elegant solution? */ if (chmod(path, rights) < 0) { error("%s: chown(%s): %m", __func__, path); return errno; } if (chown(path, (uid_t) pmixp_info_jobuid(), (gid_t) -1) < 0) { error("%s: chown(%s): %m", __func__, path); return errno; } return 0; }
int pmixp_lib_init(void) { pmix_info_t *kvp = NULL; pmix_status_t rc; PMIXP_INFO_ADD(kvp, PMIX_USERID, uint32_t, pmixp_info_jobuid()); #ifdef PMIX_SERVER_TMPDIR PMIXP_INFO_ADD(kvp, PMIX_SERVER_TMPDIR, string, pmixp_info_tmpdir_lib()); #endif /* setup the server library */ if (PMIX_SUCCESS != (rc = PMIx_server_init(&slurm_pmix_cb, kvp, PMIXP_INFO_SIZE(kvp)))) { PMIXP_ERROR_STD("PMIx_server_init failed with error %d\n", rc); return SLURM_ERROR; } PMIXP_FREE_KEY(kvp); /* register the errhandler */ PMIx_Register_event_handler(NULL, 0, NULL, 0, _errhandler, _errhandler_reg_callbk, NULL); return SLURM_SUCCESS; }
int pmixp_fd_set_nodelay(int fd) { int val = 1; if ( 0 > setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val)) ) { PMIXP_ERROR_STD("Cannot set TCP_NODELAY on fd = %d\n", fd); return SLURM_ERROR; } return SLURM_SUCCESS; }
static void errhandler(pmix_status_t status, pmix_proc_t proc[], size_t nproc, pmix_info_t info[], size_t ninfo) { /* TODO: do something more sophisticated here */ /* FIXME: use proper specificator for nranges */ PMIXP_ERROR_STD("Error handler invoked: status = %d, nranges = %d", status, (int) nproc); slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL); }
int pmixp_rmdir_recursively(char *path) { char nested_path[PATH_MAX]; DIR *dp; struct dirent *ent; int rc; /* * Make sure that "directory" exists and is a directory. */ if (1 != (rc = _is_dir(path))) { PMIXP_ERROR("path=\"%s\" is not a directory", path); return (rc == 0) ? -1 : rc; } if ((dp = opendir(path)) == NULL) { PMIXP_ERROR_STD("cannot open path=\"%s\"", path); return -1; } while ((ent = readdir(dp)) != NULL) { if (0 == strcmp(ent->d_name, ".") || 0 == strcmp(ent->d_name, "..")) { /* skip special dir's */ continue; } snprintf(nested_path, sizeof(nested_path), "%s/%s", path, ent->d_name); if (_is_dir(nested_path)) { pmixp_rmdir_recursively(nested_path); } else { unlink(nested_path); } } closedir(dp); if ((rc = rmdir(path))) { PMIXP_ERROR_STD("Cannot remove path=\"%s\"", path); } return rc; }
static int _is_dir(char *path) { struct stat stat_buf; int rc; if (0 > (rc = stat(path, &stat_buf))) { PMIXP_ERROR_STD("Cannot stat() path=\"%s\"", path); return rc; } else if (!S_ISDIR(stat_buf.st_mode)) { return 0; } return 1; }
int pmixp_usock_create_srv(char *path) { static struct sockaddr_un sa; int ret = 0; if (strlen(path) >= sizeof(sa.sun_path)) { PMIXP_ERROR_STD("UNIX socket path is too long: %lu, max %lu", (unsigned long) strlen(path), (unsigned long) sizeof(sa.sun_path) - 1); return SLURM_ERROR; } int fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd < 0) { PMIXP_ERROR_STD("Cannot create UNIX socket"); return SLURM_ERROR; } memset(&sa, 0, sizeof(sa)); sa.sun_family = AF_UNIX; strcpy(sa.sun_path, path); if ((ret = bind(fd, (struct sockaddr *)&sa, SUN_LEN(&sa)))) { PMIXP_ERROR_STD("Cannot bind() UNIX socket %s", path); goto err_fd; } if ((ret = listen(fd, 64))) { PMIXP_ERROR_STD("Cannot listen(%d, 64) UNIX socket %s", fd, path); goto err_bind; } return fd; err_bind: unlink(path); err_fd: close(fd); return ret; }
static void _errhandler(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, pmix_info_t *results, size_t nresults, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { /* TODO: do something more sophisticated here */ /* FIXME: use proper specificator for nranges */ PMIXP_ERROR_STD("Error handler invoked: status = %d", status); slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL); }
size_t pmixp_read_buf(int sd, void *buf, size_t count, int *shutdown, bool blocking) { ssize_t ret, offs = 0; *shutdown = 0; if (!blocking && !pmixp_fd_read_ready(sd, shutdown)) { return 0; } if (blocking) { fd_set_blocking(sd); } while (count - offs > 0) { ret = read(sd, (char *)buf + offs, count - offs); if (ret > 0) { offs += ret; continue; } else if (ret == 0) { /* connection closed. */ *shutdown = 1; return offs; } switch (errno) { case EINTR: continue; case EWOULDBLOCK: /* we can get here in non-blocking mode only */ return offs; default: PMIXP_ERROR_STD("blocking=%d", blocking); *shutdown = -errno; return offs; } } if (blocking) { fd_set_nonblocking(sd); } return offs; }