示例#1
0
/* Process incoming RPCs. Meant to execute as a pthread */
extern void *rpc_mgr(void *no_data)
{
	int sockfd, newsockfd;
	int i;
	uint16_t port;
	slurm_addr_t cli_addr;
	slurmdbd_conn_t *conn_arg = NULL;

	master_thread_id = pthread_self();

	/* initialize port for RPCs */
	if ((sockfd = slurm_init_msg_engine_port(get_dbd_port()))
	    == SLURM_SOCKET_ERROR)
		fatal("slurm_init_msg_engine_port error %m");

	slurm_persist_conn_recv_server_init();

	/*
	 * Process incoming RPCs until told to shutdown
	 */
	while (!shutdown_time &&
	       (i = slurm_persist_conn_wait_for_thread_loc()) >= 0) {
		/*
		 * accept needed for stream implementation is a no-op in
		 * message implementation that just passes sockfd to newsockfd
		 */
		if ((newsockfd = slurm_accept_msg_conn(sockfd,
						       &cli_addr)) ==
		    SLURM_SOCKET_ERROR) {
			slurm_persist_conn_free_thread_loc(i);
			if (errno != EINTR)
				error("slurm_accept_msg_conn: %m");
			continue;
		}
		fd_set_nonblocking(newsockfd);

		conn_arg = xmalloc(sizeof(slurmdbd_conn_t));
		conn_arg->conn = xmalloc(sizeof(slurm_persist_conn_t));
		conn_arg->conn->fd = newsockfd;
		conn_arg->conn->flags = PERSIST_FLAG_DBD;
		conn_arg->conn->callback_proc = proc_req;
		conn_arg->conn->callback_fini = _connection_fini_callback;
		conn_arg->conn->shutdown = &shutdown_time;
		conn_arg->conn->version = SLURM_MIN_PROTOCOL_VERSION;
		conn_arg->conn->rem_host = xmalloc_nz(sizeof(char) * 16);
		/* Don't fill in the rem_port here.  It will be filled in
		 * later if it is a slurmctld connection. */
		slurm_get_ip_str(&cli_addr, &port,
				 conn_arg->conn->rem_host, sizeof(char) * 16);

		slurm_persist_conn_recv_thread_init(
			conn_arg->conn, i, conn_arg);
	}

	debug("rpc_mgr shutting down");
	(void) slurm_shutdown_msg_engine(sockfd);
	pthread_exit((void *) 0);
	return NULL;
}
示例#2
0
extern int fed_mgr_add_sibling_conn(slurm_persist_conn_t *persist_conn,
				    char **out_buffer)
{
	slurmdb_cluster_rec_t *cluster = NULL;
	slurmctld_lock_t fed_read_lock = {
		NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK };
	int rc = SLURM_SUCCESS;

	lock_slurmctld(fed_read_lock);

	if (!fed_mgr_fed_rec) {
		unlock_slurmctld(fed_read_lock);
		*out_buffer = xstrdup_printf(
			"no fed_mgr_fed_rec on cluster %s yet.",
			slurmctld_cluster_name);
		/* This really isn't an error.  If the cluster doesn't know it
		 * is in a federation this could happen on the initial
		 * connection from a sibling that found out about the addition
		 * before I did.
		 */
		debug("%s: %s", __func__, *out_buffer);
		/* The other side needs to see this as an error though or the
		 * connection won't be completely established.
		 */
		return SLURM_ERROR;
	}

	if (!fed_mgr_cluster_rec) {
		unlock_slurmctld(fed_read_lock);
		*out_buffer = xstrdup_printf(
			"no fed_mgr_cluster_rec on cluster %s?  "
			"This should never happen",
			slurmctld_cluster_name);
		error("%s: %s", __func__, *out_buffer);
		return SLURM_ERROR;
	}

	if (!(cluster = list_find_first(fed_mgr_fed_rec->cluster_list,
					slurmdb_find_cluster_in_list,
					persist_conn->cluster_name))) {
		unlock_slurmctld(fed_read_lock);
		*out_buffer = xstrdup_printf(
			"%s isn't a known sibling of ours, but tried to connect to cluster %s federation %s",
			persist_conn->cluster_name, slurmctld_cluster_name,
			fed_mgr_fed_rec->name);
		error("%s: %s", __func__, *out_buffer);
		return SLURM_ERROR;
	}

	persist_conn->callback_fini = _persist_callback_fini;
	persist_conn->flags |= PERSIST_FLAG_ALREADY_INITED;

	slurm_mutex_lock(&cluster->lock);
	cluster->control_port = persist_conn->rem_port;
	xfree(cluster->control_host);
	cluster->control_host = xstrdup(persist_conn->rem_host);

	/* If this pointer exists it will be handled by the persist_conn code,
	 * don't free
	 */
	//slurm_persist_conn_destroy(cluster->fed.recv);

	cluster->fed.recv = persist_conn;

	slurm_mutex_unlock(&cluster->lock);

	unlock_slurmctld(fed_read_lock);

	if (rc == SLURM_SUCCESS &&
	    (rc = slurm_persist_conn_recv_thread_init(
		    persist_conn, -1, persist_conn)
	     != SLURM_SUCCESS)) {
		*out_buffer = xstrdup_printf(
			"Couldn't connect back to %s for some reason",
			persist_conn->cluster_name);
		error("%s: %s", __func__, *out_buffer);
	}

	return rc;
}