/* process RPC from slurmctld * IN msg: message received * OUT resp: resource allocation response message or List of them * RET 1 if resp is filled in, 0 otherwise */ static int _handle_msg(slurm_msg_t *msg, uint16_t msg_type, void **resp) { char *auth_info = slurm_get_auth_info(); uid_t req_uid; uid_t uid = getuid(); uid_t slurm_uid = (uid_t) slurm_get_slurm_user_id(); int rc = 0; req_uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); xfree(auth_info); if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); return 0; } if (msg->msg_type == msg_type) { debug2("resource allocation response received"); slurm_send_rc_msg(msg, SLURM_SUCCESS); *resp = msg->data; /* transfer payload to response */ msg->data = NULL; rc = 1; } else if (msg->msg_type == SRUN_JOB_COMPLETE) { info("Job has been cancelled"); } else { error("%s: received spurious message type: %u", __func__, msg->msg_type); } return rc; }
/* * Convert AuthInfo to a socket path. Accepts two input formats: * 1) <path> (Old format) * 2) socket=<path>[,] (New format) * NOTE: Caller must xfree return value */ static char *_auth_opts_to_socket(void) { char *socket = NULL, *sep, *tmp; char *opts = slurm_get_auth_info(); if (!opts) return NULL; tmp = strstr(opts, "socket="); if (tmp) { /* New format */ socket = xstrdup(tmp + 7); sep = strchr(socket, ','); if (sep) sep[0] = '\0'; } else if (strchr(opts, '=')) ; /* New format, but socket not specified */ else { socket = opts; /* Old format */ opts = NULL; } xfree(opts); return socket; }
/* process RPC from slurmctld * IN msg: message received * OUT resp: resource allocation response message * RET 1 if resp is filled in, 0 otherwise */ static int _handle_msg(slurm_msg_t *msg, resource_allocation_response_msg_t **resp) { uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); uid_t uid = getuid(); uid_t slurm_uid = (uid_t) slurm_get_slurm_user_id(); int rc = 0; if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); return 0; } switch (msg->msg_type) { case RESPONSE_RESOURCE_ALLOCATION: debug2("resource allocation response received"); slurm_send_rc_msg(msg, SLURM_SUCCESS); *resp = msg->data; rc = 1; break; case SRUN_JOB_COMPLETE: info("Job has been cancelled"); break; default: error("received spurious message type: %d", msg->msg_type); } return rc; }
static void _handle_msg(slurm_msg_t *msg) { static uint32_t slurm_uid = NO_VAL; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); uid_t uid = getuid(); job_step_kill_msg_t *ss; srun_user_msg_t *um; if (slurm_uid == NO_VAL) slurm_uid = slurm_get_slurm_user_id(); if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); return; } switch (msg->msg_type) { case SRUN_PING: debug3("slurmctld ping received"); slurm_send_rc_msg(msg, SLURM_SUCCESS); slurm_free_srun_ping_msg(msg->data); break; case SRUN_JOB_COMPLETE: debug("received job step complete message"); runjob_signal(SIGKILL); slurm_free_srun_job_complete_msg(msg->data); break; case SRUN_USER_MSG: um = msg->data; info("%s", um->msg); slurm_free_srun_user_msg(msg->data); break; case SRUN_TIMEOUT: debug("received job step timeout message"); _handle_timeout(msg->data); slurm_free_srun_timeout_msg(msg->data); break; case SRUN_STEP_SIGNAL: ss = msg->data; debug("received step signal %u RPC", ss->signal); if (ss->signal) runjob_signal(ss->signal); slurm_free_job_step_kill_msg(msg->data); break; default: debug("received spurious message type: %u", msg->msg_type); break; } return; }
/* Convert AuthInfo to a socket path. Parses input format "socket=<path>[,]". * NOTE: Caller must xfree return value */ static char *_auth_opts_to_socket(void) { char *socket = NULL, *sep, *tmp; char *opts = slurm_get_auth_info(); if (opts) { tmp = strstr(opts, "socket="); if (tmp) { /* New format */ socket = xstrdup(tmp + 7); sep = strchr(socket, ','); if (sep) sep[0] = '\0'; } xfree(opts); } return socket; }
/* _background_process_msg - process an RPC to the backup_controller */ static int _background_process_msg(slurm_msg_t * msg) { int error_code = SLURM_SUCCESS; if (msg->msg_type != REQUEST_PING) { bool super_user = false; char *auth_info = slurm_get_auth_info(); uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); xfree(auth_info); if ((uid == 0) || (uid == getuid())) super_user = true; if (super_user && (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE)) { info("Performing RPC: REQUEST_SHUTDOWN_IMMEDIATE"); } else if (super_user && (msg->msg_type == REQUEST_SHUTDOWN)) { info("Performing RPC: REQUEST_SHUTDOWN"); pthread_kill(slurmctld_config.thread_id_sig, SIGTERM); } else if (super_user && (msg->msg_type == REQUEST_TAKEOVER)) { info("Performing RPC: REQUEST_TAKEOVER"); _shutdown_primary_controller(SHUTDOWN_WAIT); takeover = true; error_code = SLURM_SUCCESS; } else if (super_user && (msg->msg_type == REQUEST_CONTROL)) { debug3("Ignoring RPC: REQUEST_CONTROL"); error_code = ESLURM_DISABLED; last_controller_response = time(NULL); } else { error("Invalid RPC received %d while in standby mode", msg->msg_type); error_code = ESLURM_IN_STANDBY_MODE; } } if (msg->msg_type != REQUEST_SHUTDOWN_IMMEDIATE) slurm_send_rc_msg(msg, error_code); return error_code; }
extern void spawn_req_pack(spawn_req_t *req, Buf buf) { int i, j; spawn_subcmd_t *subcmd; void *auth_cred; auth_cred = g_slurm_auth_create(NULL, 2, slurm_get_auth_info()); if (auth_cred == NULL) { error("authentication: %s", g_slurm_auth_errstr(g_slurm_auth_errno(NULL)) ); return; } (void) g_slurm_auth_pack(auth_cred, buf); (void) g_slurm_auth_destroy(auth_cred); pack32(req->seq, buf); packstr(req->from_node, buf); pack32(req->subcmd_cnt, buf); pack32(req->preput_cnt, buf); for (i = 0; i < req->preput_cnt; i ++) { packstr(req->pp_keys[i], buf); packstr(req->pp_vals[i], buf); } for (i = 0; i < req->subcmd_cnt; i ++) { subcmd = req->subcmds[i]; packstr(subcmd->cmd, buf); pack32(subcmd->max_procs, buf); pack32(subcmd->argc, buf); for (j = 0; j < subcmd->argc; j ++) { packstr(subcmd->argv[j], buf); } pack32(subcmd->info_cnt, buf); for (j = 0; j < subcmd->info_cnt; j ++) { packstr(subcmd->info_keys[j], buf); packstr(subcmd->info_vals[j], buf); } } }
static void * _handle_accept(void *arg) { /*struct request_params *param = (struct request_params *)arg;*/ int fd = ((struct request_params *)arg)->fd; stepd_step_rec_t *job = ((struct request_params *)arg)->job; int req; int len; Buf buffer; void *auth_cred; int rc; uid_t uid; gid_t gid; char *auth_info; debug3("Entering _handle_accept (new thread)"); xfree(arg); safe_read(fd, &req, sizeof(int)); if (req != REQUEST_CONNECT) { error("First message must be REQUEST_CONNECT"); goto fail; } safe_read(fd, &len, sizeof(int)); buffer = init_buf(len); safe_read(fd, get_buf_data(buffer), len); /* Unpack and verify the auth credential */ auth_cred = g_slurm_auth_unpack(buffer); if (auth_cred == NULL) { error("Unpacking authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(NULL))); free_buf(buffer); goto fail; } auth_info = slurm_get_auth_info(); rc = g_slurm_auth_verify(auth_cred, NULL, 2, auth_info); if (rc != SLURM_SUCCESS) { error("Verifying authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred))); xfree(auth_info); (void) g_slurm_auth_destroy(auth_cred); free_buf(buffer); goto fail; } /* Get the uid & gid from the credential, then destroy it. */ uid = g_slurm_auth_get_uid(auth_cred, auth_info); gid = g_slurm_auth_get_gid(auth_cred, auth_info); xfree(auth_info); debug3(" Identity: uid=%d, gid=%d", uid, gid); g_slurm_auth_destroy(auth_cred); free_buf(buffer); rc = SLURM_PROTOCOL_VERSION; safe_write(fd, &rc, sizeof(int)); while (1) { rc = _handle_request(fd, job, uid, gid); if (rc != SLURM_SUCCESS) break; } if (close(fd) == -1) error("Closing accepted fd: %m"); slurm_mutex_lock(&message_lock); message_connections--; slurm_cond_signal(&message_cond); slurm_mutex_unlock(&message_lock); debug3("Leaving _handle_accept"); return NULL; fail: rc = SLURM_FAILURE; safe_write(fd, &rc, sizeof(int)); rwfail: if (close(fd) == -1) error("Closing accepted fd after error: %m"); debug("Leaving _handle_accept on an error"); return NULL; }
/* * Connect to a slurmstepd proccess by way of its unix domain socket. * * Both "directory" and "nodename" may be null, in which case stepd_connect * will attempt to determine them on its own. If you are using multiple * slurmd on one node (unusual outside of development environments), you * will get one of the local NodeNames more-or-less at random. * * Returns a socket descriptor for the opened socket on success, * and -1 on error. */ extern int stepd_connect(const char *directory, const char *nodename, uint32_t jobid, uint32_t stepid, uint16_t *protocol_version) { int req = REQUEST_CONNECT; int fd = -1; int rc; void *auth_cred; char *auth_info; char *local_nodename = NULL; Buf buffer; int len; *protocol_version = 0; if (nodename == NULL) { if (!(local_nodename = _guess_nodename())) return -1; nodename = local_nodename; } if (directory == NULL) { slurm_ctl_conf_t *cf; cf = slurm_conf_lock(); directory = slurm_conf_expand_slurmd_path( cf->slurmd_spooldir, nodename); slurm_conf_unlock(); } buffer = init_buf(0); /* Create an auth credential */ auth_info = slurm_get_auth_info(); auth_cred = g_slurm_auth_create(auth_info); xfree(auth_info); if (auth_cred == NULL) { error("Creating authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(NULL))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Pack the auth credential */ rc = g_slurm_auth_pack(auth_cred, buffer); (void) g_slurm_auth_destroy(auth_cred); if (rc) { error("Packing authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Connect to the step */ fd = _step_connect(directory, nodename, jobid, stepid); if (fd == -1) goto fail1; safe_write(fd, &req, sizeof(int)); len = size_buf(buffer); safe_write(fd, &len, sizeof(int)); safe_write(fd, get_buf_data(buffer), len); safe_read(fd, &rc, sizeof(int)); if (rc < 0) { error("slurmstepd refused authentication: %m"); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto rwfail; } else if (rc) { *protocol_version = rc; } else { /* 0n older versions of Slurm < 14.11 SLURM_SUCCESS * was returned here instead of the protocol version. * This can be removed when we are 2 versions past * 14.11. */ slurmstepd_info_t *stepd_info = stepd_get_info(fd); *protocol_version = stepd_info->protocol_version; xfree(stepd_info); } free_buf(buffer); xfree(local_nodename); return fd; rwfail: close(fd); fail1: free_buf(buffer); xfree(local_nodename); return -1; }