/* * Connect to a slurmstepd proccess by way of its unix domain socket. * * Both "directory" and "nodename" may be null, in which case stepd_connect * will attempt to determine them on its own. If you are using multiple * slurmd on one node (unusual outside of development environments), you * will get one of the local NodeNames more-or-less at random. * * Returns a socket descriptor for the opened socket on success, * and -1 on error. */ int stepd_connect(const char *directory, const char *nodename, uint32_t jobid, uint32_t stepid) { int req = REQUEST_CONNECT; int fd = -1; int rc; void *auth_cred; Buf buffer; int len; if (nodename == NULL) { if (!(nodename = _guess_nodename())) return -1; } if (directory == NULL) { slurm_ctl_conf_t *cf; cf = slurm_conf_lock(); directory = slurm_conf_expand_slurmd_path( cf->slurmd_spooldir, nodename); slurm_conf_unlock(); } buffer = init_buf(0); /* Create an auth credential */ auth_cred = g_slurm_auth_create(NULL, 2, NULL); if (auth_cred == NULL) { error("Creating authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(NULL))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Pack the auth credential */ rc = g_slurm_auth_pack(auth_cred, buffer); (void) g_slurm_auth_destroy(auth_cred); if (rc) { error("Packing authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Connect to the step */ fd = _step_connect(directory, nodename, jobid, stepid); if (fd == -1) goto fail1; safe_write(fd, &req, sizeof(int)); len = size_buf(buffer); safe_write(fd, &len, sizeof(int)); safe_write(fd, get_buf_data(buffer), len); safe_read(fd, &rc, sizeof(int)); if (rc < 0) { error("slurmstepd refused authentication: %m"); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto rwfail; } free_buf(buffer); return fd; rwfail: close(fd); fail1: free_buf(buffer); return -1; }
/* * Scan for available running slurm step daemons by checking * "directory" for unix domain sockets with names beginning in "nodename". * * Both "directory" and "nodename" may be null, in which case stepd_available * will attempt to determine them on its own. If you are using multiple * slurmd on one node (unusual outside of development environments), you * will get one of the local NodeNames more-or-less at random. * * Returns a List of pointers to step_loc_t structures. */ List stepd_available(const char *directory, const char *nodename) { List l; DIR *dp; struct dirent *ent; regex_t re; struct stat stat_buf; if (nodename == NULL) { if (!(nodename = _guess_nodename())) return NULL; } if (directory == NULL) { slurm_ctl_conf_t *cf; cf = slurm_conf_lock(); directory = slurm_conf_expand_slurmd_path( cf->slurmd_spooldir, nodename); slurm_conf_unlock(); } l = list_create((ListDelF) _free_step_loc_t); if (_sockname_regex_init(&re, nodename) == -1) goto done; /* * Make sure that "directory" exists and is a directory. */ if (stat(directory, &stat_buf) < 0) { error("Domain socket directory %s: %m", directory); goto done; } else if (!S_ISDIR(stat_buf.st_mode)) { error("%s is not a directory", directory); goto done; } if ((dp = opendir(directory)) == NULL) { error("Unable to open directory: %m"); goto done; } while ((ent = readdir(dp)) != NULL) { step_loc_t *loc; uint32_t jobid, stepid; if (_sockname_regex(&re, ent->d_name, &jobid, &stepid) == 0) { debug4("found jobid = %u, stepid = %u", jobid, stepid); loc = xmalloc(sizeof(step_loc_t)); loc->directory = xstrdup(directory); loc->nodename = xstrdup(nodename); loc->jobid = jobid; loc->stepid = stepid; list_append(l, (void *)loc); } } closedir(dp); done: regfree(&re); return l; }
/* * Connect to a slurmstepd proccess by way of its unix domain socket. * * Both "directory" and "nodename" may be null, in which case stepd_connect * will attempt to determine them on its own. If you are using multiple * slurmd on one node (unusual outside of development environments), you * will get one of the local NodeNames more-or-less at random. * * Returns a socket descriptor for the opened socket on success, * and -1 on error. */ int stepd_connect(const char *directory, const char *nodename, uint32_t jobid, uint32_t stepid, uint16_t *protocol_version) { int req = REQUEST_CONNECT; int fd = -1; int rc; void *auth_cred; Buf buffer; int len; *protocol_version = 0; if (nodename == NULL) { if (!(nodename = _guess_nodename())) return -1; } if (directory == NULL) { slurm_ctl_conf_t *cf; cf = slurm_conf_lock(); directory = slurm_conf_expand_slurmd_path( cf->slurmd_spooldir, nodename); slurm_conf_unlock(); } buffer = init_buf(0); /* Create an auth credential */ auth_cred = g_slurm_auth_create(NULL, 2, NULL); if (auth_cred == NULL) { error("Creating authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(NULL))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Pack the auth credential */ rc = g_slurm_auth_pack(auth_cred, buffer); (void) g_slurm_auth_destroy(auth_cred); if (rc) { error("Packing authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Connect to the step */ fd = _step_connect(directory, nodename, jobid, stepid); if (fd == -1) goto fail1; safe_write(fd, &req, sizeof(int)); len = size_buf(buffer); safe_write(fd, &len, sizeof(int)); safe_write(fd, get_buf_data(buffer), len); safe_read(fd, &rc, sizeof(int)); if (rc < 0) { error("slurmstepd refused authentication: %m"); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto rwfail; } else if (rc) *protocol_version = rc; else { /* 0n older versions of Slurm < 14.11 SLURM_SUCCESS * was returned here instead of the protocol version. * This can be removed when we are 2 versions past * 14.11. */ slurmstepd_info_t *stepd_info = stepd_get_info(fd); *protocol_version = stepd_info->protocol_version; xfree(stepd_info); } free_buf(buffer); return fd; rwfail: close(fd); fail1: free_buf(buffer); return -1; }