/* * send_slurmctld_register_req - request register from slurmctld * IN host: control host of cluster * IN port: control port of cluster * IN rpc_version: rpc version of cluster * RET: error code */ static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec) { slurm_addr_t ctld_address; slurm_fd_t fd; int rc = SLURM_SUCCESS; slurm_set_addr_char(&ctld_address, cluster_rec->control_port, cluster_rec->control_host); fd = slurm_open_msg_conn(&ctld_address); if (fd < 0) { rc = SLURM_ERROR; } else { slurm_msg_t out_msg; slurm_msg_t_init(&out_msg); out_msg.msg_type = ACCOUNTING_REGISTER_CTLD; out_msg.flags = SLURM_GLOBAL_AUTH_KEY; out_msg.protocol_version = slurmdbd_translate_rpc(cluster_rec->rpc_version); slurm_send_node_msg(fd, &out_msg); /* We probably need to add matching recv_msg function * for an arbitray fd or should these be fire * and forget? For this, that we can probably * forget about it */ slurm_close_stream(fd); } return rc; }
/* * cluster_first_reg - ask for controller to send nodes in a down state * and jobs pending or running on first registration. * * IN host: controller host * IN port: controller port * IN rpc_version: controller rpc version * RET: error code */ extern int cluster_first_reg(char *host, uint16_t port, uint16_t rpc_version) { slurm_addr_t ctld_address; int fd; int rc = SLURM_SUCCESS; info("First time to register cluster requesting " "running jobs and system information."); slurm_set_addr_char(&ctld_address, port, host); fd = slurm_open_msg_conn(&ctld_address); if (fd < 0) { error("can not open socket back to slurmctld " "%s(%u): %m", host, port); rc = SLURM_ERROR; } else { slurm_msg_t out_msg; accounting_update_msg_t update; /* We have to put this update message here so we can tell the sender to send the correct RPC version. */ memset(&update, 0, sizeof(accounting_update_msg_t)); update.rpc_version = rpc_version; slurm_msg_t_init(&out_msg); out_msg.msg_type = ACCOUNTING_FIRST_REG; out_msg.flags = SLURM_GLOBAL_AUTH_KEY; out_msg.data = &update; slurm_send_node_msg(fd, &out_msg); /* We probably need to add matching recv_msg function * for an arbitray fd or should these be fire * and forget? For this, that we can probably * forget about it */ close(fd); } return rc; }
int _send_message_controller (enum controller_id dest, slurm_msg_t *req) { int rc = SLURM_PROTOCOL_SUCCESS; slurm_fd_t fd = -1; slurm_msg_t *resp_msg = NULL; /* always going to one node (primary or backup per value of "dest") */ if ((fd = slurm_open_controller_conn_spec(dest)) < 0) slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR); if (slurm_send_node_msg(fd, req) < 0) { slurm_shutdown_msg_conn(fd); slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR); } resp_msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(resp_msg); if((rc = slurm_receive_msg(fd, resp_msg, 0)) != 0) { slurm_shutdown_msg_conn(fd); return SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR; } if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS) rc = SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR; else if (resp_msg->msg_type != RESPONSE_SLURM_RC) rc = SLURM_UNEXPECTED_MSG_ERROR; else rc = slurm_get_return_code(resp_msg->msg_type, resp_msg->data); slurm_free_msg(resp_msg); if (rc) slurm_seterrno_ret(rc); return rc; }
/* Open a persistent socket connection * IN/OUT - persistent connection needing rem_host and rem_port filled in. * Returned completely filled in. * Returns SLURM_SUCCESS on success or SLURM_ERROR on failure */ extern int slurm_persist_conn_open(slurm_persist_conn_t *persist_conn) { int rc = SLURM_ERROR; slurm_msg_t req_msg; persist_init_req_msg_t req; persist_rc_msg_t *resp = NULL; if (slurm_persist_conn_open_without_init(persist_conn) != SLURM_SUCCESS) return rc; slurm_msg_t_init(&req_msg); /* Always send the lowest protocol since we don't know what version the * other side is running yet. */ req_msg.protocol_version = persist_conn->version; req_msg.msg_type = REQUEST_PERSIST_INIT; req_msg.flags |= SLURM_GLOBAL_AUTH_KEY; if (persist_conn->flags & PERSIST_FLAG_DBD) req_msg.flags |= SLURMDBD_CONNECTION; memset(&req, 0, sizeof(persist_init_req_msg_t)); req.cluster_name = persist_conn->cluster_name; req.persist_type = persist_conn->persist_type; req.port = persist_conn->my_port; req.version = SLURM_PROTOCOL_VERSION; req_msg.data = &req; if (slurm_send_node_msg(persist_conn->fd, &req_msg) < 0) { error("%s: failed to send persistent connection init message to %s:%d", __func__, persist_conn->rem_host, persist_conn->rem_port); _close_fd(&persist_conn->fd); } else { Buf buffer = slurm_persist_recv_msg(persist_conn); persist_msg_t msg; slurm_persist_conn_t persist_conn_tmp; if (!buffer) { if (_comm_fail_log(persist_conn)) { error("%s: No response to persist_init", __func__); } _close_fd(&persist_conn->fd); goto end_it; } memset(&msg, 0, sizeof(persist_msg_t)); memcpy(&persist_conn_tmp, persist_conn, sizeof(slurm_persist_conn_t)); /* The first unpack is done the same way for dbd or normal * communication . */ persist_conn_tmp.flags &= (~PERSIST_FLAG_DBD); rc = slurm_persist_msg_unpack(&persist_conn_tmp, &msg, buffer); free_buf(buffer); resp = (persist_rc_msg_t *)msg.data; if (resp && (rc == SLURM_SUCCESS)) { rc = resp->rc; persist_conn->version = resp->ret_info; persist_conn->flags |= resp->flags; } if (rc != SLURM_SUCCESS) { if (resp) { error("%s: Something happened with the receiving/processing of the persistent connection init message to %s:%d: %s", __func__, persist_conn->rem_host, persist_conn->rem_port, resp->comment); } else { error("%s: Failed to unpack persistent connection init resp message from %s:%d", __func__, persist_conn->rem_host, persist_conn->rem_port); } _close_fd(&persist_conn->fd); } } end_it: slurm_persist_free_rc_msg(resp); return rc; }