static int _unpack_return_code(uint16_t rpc_version, Buf buffer) { uint16_t msg_type = -1; persist_rc_msg_t *msg; dbd_id_rc_msg_t *id_msg; slurmdbd_msg_t resp; int rc = SLURM_ERROR; memset(&resp, 0, sizeof(slurmdbd_msg_t)); if ((rc = unpack_slurmdbd_msg(&resp, slurmdbd_conn->version, buffer)) != SLURM_SUCCESS) { error("%s: unpack message error", __func__); return rc; } switch (resp.msg_type) { case DBD_ID_RC: id_msg = resp.data; rc = id_msg->return_code; slurmdbd_free_id_rc_msg(id_msg); if (rc != SLURM_SUCCESS) error("slurmdbd: DBD_ID_RC is %d", rc); break; case PERSIST_RC: msg = resp.data; rc = msg->rc; if (rc != SLURM_SUCCESS) { if (msg->ret_info == DBD_REGISTER_CTLD && slurm_get_accounting_storage_enforce()) { error("slurmdbd: PERSIST_RC is %d from " "%s(%u): %s", rc, slurmdbd_msg_type_2_str( msg->ret_info, 1), msg->ret_info, msg->comment); fatal("You need to add this cluster " "to accounting if you want to " "enforce associations, or no " "jobs will ever run."); } else debug("slurmdbd: PERSIST_RC is %d from " "%s(%u): %s", rc, slurmdbd_msg_type_2_str( msg->ret_info, 1), msg->ret_info, msg->comment); } slurm_persist_free_rc_msg(msg); break; default: error("slurmdbd: bad message type %d != PERSIST_RC", msg_type); } return rc; }
extern int slurm_persist_msg_unpack(slurm_persist_conn_t *persist_conn, persist_msg_t *resp_msg, Buf buffer) { int rc; xassert(persist_conn); xassert(resp_msg); if (persist_conn->flags & PERSIST_FLAG_DBD) { rc = unpack_slurmdbd_msg((slurmdbd_msg_t *)resp_msg, persist_conn->version, buffer); } else { slurm_msg_t msg; slurm_msg_t_init(&msg); msg.protocol_version = persist_conn->version; safe_unpack16(&msg.msg_type, buffer); rc = unpack_msg(&msg, buffer); resp_msg->msg_type = msg.msg_type; resp_msg->data = msg.data; } /* Here we transfer the auth_cred to the persist_conn just in case in the * future we need to use it in some way to verify things for messages * that don't have on that will follow on the connection. */ if (resp_msg->msg_type == REQUEST_PERSIST_INIT) { slurm_msg_t *msg = resp_msg->data; if (persist_conn->auth_cred) g_slurm_auth_destroy(persist_conn->auth_cred); persist_conn->auth_cred = msg->auth_cred; msg->auth_cred = NULL; } return rc; unpack_error: return SLURM_ERROR; }
/* Send an RPC to the SlurmDBD and wait for an arbitrary reply message. * The RPC will not be queued if an error occurs. * The "resp" message must be freed by the caller. * Returns SLURM_SUCCESS or an error code */ extern int send_recv_slurmdbd_msg(uint16_t rpc_version, slurmdbd_msg_t *req, slurmdbd_msg_t *resp) { int rc = SLURM_SUCCESS; Buf buffer; xassert(req); xassert(resp); /* To make sure we can get this to send instead of the agent sending stuff that can happen anytime we set halt_agent and then after we get into the mutex we unset. */ halt_agent = 1; slurm_mutex_lock(&slurmdbd_lock); halt_agent = 0; if (!slurmdbd_conn || (slurmdbd_conn->fd < 0)) { /* Either slurm_open_slurmdbd_conn() was not executed or * the connection to Slurm DBD has been closed */ if (req->msg_type == DBD_GET_CONFIG) _open_slurmdbd_conn(0); else _open_slurmdbd_conn(1); if (!slurmdbd_conn || (slurmdbd_conn->fd < 0)) { rc = SLURM_ERROR; goto end_it; } } if (!(buffer = pack_slurmdbd_msg(req, rpc_version))) { rc = SLURM_ERROR; goto end_it; } rc = slurm_persist_send_msg(slurmdbd_conn, buffer); free_buf(buffer); if (rc != SLURM_SUCCESS) { error("slurmdbd: Sending message type %s: %d: %m", rpc_num2string(req->msg_type), rc); goto end_it; } buffer = slurm_persist_recv_msg(slurmdbd_conn); if (buffer == NULL) { error("slurmdbd: Getting response to message type %u", req->msg_type); rc = SLURM_ERROR; goto end_it; } rc = unpack_slurmdbd_msg(resp, rpc_version, buffer); /* check for the rc of the start job message */ if (rc == SLURM_SUCCESS && resp->msg_type == DBD_ID_RC) rc = ((dbd_id_rc_msg_t *)resp->data)->return_code; free_buf(buffer); end_it: slurm_cond_signal(&slurmdbd_cond); slurm_mutex_unlock(&slurmdbd_lock); return rc; }
static void _load_dbd_state(void) { char *dbd_fname; Buf buffer; int fd, recovered = 0; uint16_t rpc_version = 0; dbd_fname = slurm_get_state_save_location(); xstrcat(dbd_fname, "/dbd.messages"); fd = open(dbd_fname, O_RDONLY); if (fd < 0) { /* don't print an error message if there is no file */ if (errno == ENOENT) debug4("slurmdbd: There is no state save file to " "open by name %s", dbd_fname); else error("slurmdbd: Opening state save file %s: %m", dbd_fname); } else { char *ver_str = NULL; uint32_t ver_str_len; buffer = _load_dbd_rec(fd); if (buffer == NULL) goto end_it; /* This is set to the end of the buffer for send so we need to set it back to 0 */ set_buf_offset(buffer, 0); safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer); debug3("Version string in dbd_state header is %s", ver_str); unpack_error: free_buf(buffer); buffer = NULL; if (ver_str) { /* get the version after VER */ rpc_version = slurm_atoul(ver_str + 3); xfree(ver_str); } while (1) { /* If the buffer was not the VER%d string it was an actual message so we don't want to skip it. */ if (!buffer) buffer = _load_dbd_rec(fd); if (buffer == NULL) break; if (rpc_version != SLURM_PROTOCOL_VERSION) { /* unpack and repack with new * PROTOCOL_VERSION just so we keep * things up to date. */ slurmdbd_msg_t msg; int rc; set_buf_offset(buffer, 0); rc = unpack_slurmdbd_msg( &msg, rpc_version, buffer); free_buf(buffer); if (rc == SLURM_SUCCESS) buffer = pack_slurmdbd_msg( &msg, SLURM_PROTOCOL_VERSION); else buffer = NULL; } if (!buffer) { error("no buffer given"); continue; } if (!list_enqueue(agent_list, buffer)) fatal("slurmdbd: list_enqueue, no memory"); recovered++; buffer = NULL; } end_it: verbose("slurmdbd: recovered %d pending RPCs", recovered); (void) close(fd); } xfree(dbd_fname); }