extern int slurm_persist_send_msg( slurm_persist_conn_t *persist_conn, Buf buffer) { uint32_t msg_size, nw_size; char *msg; ssize_t msg_wrote; int rc, retry_cnt = 0; xassert(persist_conn); if (persist_conn->fd < 0) return EAGAIN; if (!buffer) return SLURM_ERROR; rc = slurm_persist_conn_writeable(persist_conn); if (rc == -1) { re_open: if (retry_cnt++ > 3) return EAGAIN; /* if errno is ACCESS_DENIED do not try to reopen to connection just return that */ if (errno == ESLURM_ACCESS_DENIED) return ESLURM_ACCESS_DENIED; if (persist_conn->flags & PERSIST_FLAG_RECONNECT) { slurm_persist_conn_reopen(persist_conn, true); rc = slurm_persist_conn_writeable(persist_conn); } else return SLURM_ERROR; } if (rc < 1) return EAGAIN; msg_size = get_buf_offset(buffer); nw_size = htonl(msg_size); msg_wrote = write(persist_conn->fd, &nw_size, sizeof(nw_size)); if (msg_wrote != sizeof(nw_size)) return EAGAIN; msg = get_buf_data(buffer); while (msg_size > 0) { rc = slurm_persist_conn_writeable(persist_conn); if (rc == -1) goto re_open; if (rc < 1) return EAGAIN; msg_wrote = write(persist_conn->fd, msg, msg_size); if (msg_wrote <= 0) return EAGAIN; msg += msg_wrote; msg_size -= msg_wrote; } return SLURM_SUCCESS; }
/* run_dbd_backup - this is the backup controller, it should run in standby * mode, assuming control when the primary controller stops responding */ extern void run_dbd_backup(void) { slurm_persist_conn_t slurmdbd_conn; primary_resumed = false; memset(&slurmdbd_conn, 0, sizeof(slurm_persist_conn_t)); slurmdbd_conn.rem_host = slurmdbd_conf->dbd_addr; slurmdbd_conn.rem_port = slurmdbd_conf->dbd_port; slurmdbd_conn.cluster_name = "backup_slurmdbd"; slurmdbd_conn.fd = -1; slurmdbd_conn.shutdown = &shutdown_time; slurm_persist_conn_open_without_init(&slurmdbd_conn); /* repeatedly ping Primary */ while (!shutdown_time) { int writeable = slurm_persist_conn_writeable(&slurmdbd_conn); //info("%d %d", have_control, writeable); if (have_control && writeable == 1) { info("Primary has come back"); primary_resumed = true; shutdown_threads(); have_control = false; break; } else if (!have_control && writeable <= 0) { have_control = true; info("Taking Control"); break; } sleep(1); if (writeable <= 0) slurm_persist_conn_reopen(&slurmdbd_conn, false); } slurm_persist_conn_close(&slurmdbd_conn); return; }
static int _send_fini_msg(void) { int rc; Buf buffer; dbd_fini_msg_t req; /* If the connection is already gone, we don't need to send a fini. */ if (slurm_persist_conn_writeable(slurmdbd_conn) == -1) return SLURM_SUCCESS; buffer = init_buf(1024); pack16((uint16_t) DBD_FINI, buffer); req.commit = 0; req.close_conn = 1; slurmdbd_pack_fini_msg(&req, SLURM_PROTOCOL_VERSION, buffer); rc = slurm_persist_send_msg(slurmdbd_conn, buffer); free_buf(buffer); return rc; }