/* Process incoming RPCs. Meant to execute as a pthread */ extern void *rpc_mgr(void *no_data) { int sockfd, newsockfd; int i; uint16_t port; slurm_addr_t cli_addr; slurmdbd_conn_t *conn_arg = NULL; master_thread_id = pthread_self(); /* initialize port for RPCs */ if ((sockfd = slurm_init_msg_engine_port(get_dbd_port())) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_port error %m"); slurm_persist_conn_recv_server_init(); /* * Process incoming RPCs until told to shutdown */ while (!shutdown_time && (i = slurm_persist_conn_wait_for_thread_loc()) >= 0) { /* * accept needed for stream implementation is a no-op in * message implementation that just passes sockfd to newsockfd */ if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == SLURM_SOCKET_ERROR) { slurm_persist_conn_free_thread_loc(i); if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } fd_set_nonblocking(newsockfd); conn_arg = xmalloc(sizeof(slurmdbd_conn_t)); conn_arg->conn = xmalloc(sizeof(slurm_persist_conn_t)); conn_arg->conn->fd = newsockfd; conn_arg->conn->flags = PERSIST_FLAG_DBD; conn_arg->conn->callback_proc = proc_req; conn_arg->conn->callback_fini = _connection_fini_callback; conn_arg->conn->shutdown = &shutdown_time; conn_arg->conn->version = SLURM_MIN_PROTOCOL_VERSION; conn_arg->conn->rem_host = xmalloc_nz(sizeof(char) * 16); /* Don't fill in the rem_port here. It will be filled in * later if it is a slurmctld connection. */ slurm_get_ip_str(&cli_addr, &port, conn_arg->conn->rem_host, sizeof(char) * 16); slurm_persist_conn_recv_thread_init( conn_arg->conn, i, conn_arg); } debug("rpc_mgr shutting down"); (void) slurm_shutdown_msg_engine(sockfd); pthread_exit((void *) 0); return NULL; }
extern int fed_mgr_add_sibling_conn(slurm_persist_conn_t *persist_conn, char **out_buffer) { slurmdb_cluster_rec_t *cluster = NULL; slurmctld_lock_t fed_read_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; int rc = SLURM_SUCCESS; lock_slurmctld(fed_read_lock); if (!fed_mgr_fed_rec) { unlock_slurmctld(fed_read_lock); *out_buffer = xstrdup_printf( "no fed_mgr_fed_rec on cluster %s yet.", slurmctld_cluster_name); /* This really isn't an error. If the cluster doesn't know it * is in a federation this could happen on the initial * connection from a sibling that found out about the addition * before I did. */ debug("%s: %s", __func__, *out_buffer); /* The other side needs to see this as an error though or the * connection won't be completely established. */ return SLURM_ERROR; } if (!fed_mgr_cluster_rec) { unlock_slurmctld(fed_read_lock); *out_buffer = xstrdup_printf( "no fed_mgr_cluster_rec on cluster %s? " "This should never happen", slurmctld_cluster_name); error("%s: %s", __func__, *out_buffer); return SLURM_ERROR; } if (!(cluster = list_find_first(fed_mgr_fed_rec->cluster_list, slurmdb_find_cluster_in_list, persist_conn->cluster_name))) { unlock_slurmctld(fed_read_lock); *out_buffer = xstrdup_printf( "%s isn't a known sibling of ours, but tried to connect to cluster %s federation %s", persist_conn->cluster_name, slurmctld_cluster_name, fed_mgr_fed_rec->name); error("%s: %s", __func__, *out_buffer); return SLURM_ERROR; } persist_conn->callback_fini = _persist_callback_fini; persist_conn->flags |= PERSIST_FLAG_ALREADY_INITED; slurm_mutex_lock(&cluster->lock); cluster->control_port = persist_conn->rem_port; xfree(cluster->control_host); cluster->control_host = xstrdup(persist_conn->rem_host); /* If this pointer exists it will be handled by the persist_conn code, * don't free */ //slurm_persist_conn_destroy(cluster->fed.recv); cluster->fed.recv = persist_conn; slurm_mutex_unlock(&cluster->lock); unlock_slurmctld(fed_read_lock); if (rc == SLURM_SUCCESS && (rc = slurm_persist_conn_recv_thread_init( persist_conn, -1, persist_conn) != SLURM_SUCCESS)) { *out_buffer = xstrdup_printf( "Couldn't connect back to %s for some reason", persist_conn->cluster_name); error("%s: %s", __func__, *out_buffer); } return rc; }