示例#1
0
static int _ping_controller(slurmdb_cluster_rec_t *cluster)
{
	int rc = SLURM_SUCCESS;
	slurm_msg_t req_msg;
	slurm_msg_t resp_msg;

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);
	req_msg.msg_type = REQUEST_PING;

	slurm_mutex_lock(&cluster->lock);

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR)
		info("pinging %s(%s:%d)", cluster->name, cluster->control_host,
		     cluster->control_port);

	if ((rc = _send_recv_msg(cluster, &req_msg, &resp_msg, true))) {
		error("failed to ping %s(%s:%d)",
		      cluster->name, cluster->control_host,
		      cluster->control_port);
	} else if ((rc = slurm_get_return_code(resp_msg.msg_type,
					       resp_msg.data)))
		error("ping returned error from %s(%s:%d)",
		      cluster->name, cluster->control_host,
		      cluster->control_port);
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR)
		info("finished pinging %s(%s:%d)", cluster->name,
		     cluster->control_host, cluster->control_port);
	slurm_mutex_unlock(&cluster->lock);
	slurm_free_msg_members(&req_msg);
	slurm_free_msg_members(&resp_msg);
	return rc;
}
示例#2
0
static void *_msg_thr_internal(void *arg)
{
	slurm_addr_t cli_addr;
	int newsockfd;
	slurm_msg_t msg;
	int *slurmctld_fd_ptr = (int *)arg;

	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

	while (!srun_shutdown) {
		newsockfd = slurm_accept_msg_conn(*slurmctld_fd_ptr, &cli_addr);
		if (newsockfd == SLURM_SOCKET_ERROR) {
			if (errno != EINTR)
				error("slurm_accept_msg_conn: %m");
			continue;
		}
		slurm_msg_t_init(&msg);
		if (slurm_receive_msg(newsockfd, &msg, 0) != 0) {
			error("slurm_receive_msg: %m");
			/* close the new socket */
			close(newsockfd);
			continue;
		}
		_handle_msg(&msg);
		slurm_free_msg_members(&msg);
		close(newsockfd);
	}
	return NULL;
}
示例#3
0
/* _background_rpc_mgr - Read and process incoming RPCs to the background
 *	controller (that's us) */
static void *_background_rpc_mgr(void *no_data)
{
	int newsockfd;
	int sockfd;
	slurm_addr_t cli_addr;
	slurm_msg_t msg;
	int error_code;
	char* node_addr = NULL;

	/* Read configuration only */
	slurmctld_lock_t config_read_lock = {
		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
	int sigarray[] = {SIGUSR1, 0};

	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
	debug3("_background_rpc_mgr pid = %lu", (unsigned long) getpid());

	/* initialize port for RPCs */
	lock_slurmctld(config_read_lock);

	/* set node_addr to bind to (NULL means any) */
	if ((xstrcmp(slurmctld_conf.backup_controller,
		     slurmctld_conf.backup_addr) != 0)) {
		node_addr = slurmctld_conf.backup_addr ;
	}

	if ((sockfd =
	     slurm_init_msg_engine_addrname_port(node_addr,
						 slurmctld_conf.
						 slurmctld_port))
	    == SLURM_SOCKET_ERROR)
		fatal("slurm_init_msg_engine_addrname_port error %m");
	unlock_slurmctld(config_read_lock);

	/* Prepare to catch SIGUSR1 to interrupt accept().
	 * This signal is generated by the slurmctld signal
	 * handler thread upon receipt of SIGABRT, SIGINT,
	 * or SIGTERM. That thread does all processing of
	 * all signals. */
	xsignal(SIGUSR1, _sig_handler);
	xsignal_unblock(sigarray);

	/*
	 * Process incoming RPCs indefinitely
	 */
	while (slurmctld_config.shutdown_time == 0) {
		/* accept needed for stream implementation
		 * is a no-op in message implementation that just passes
		 * sockfd to newsockfd */
		if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr))
		    == SLURM_SOCKET_ERROR) {
			if (errno != EINTR)
				error("slurm_accept_msg_conn: %m");
			continue;
		}

		slurm_msg_t_init(&msg);
		if (slurm_receive_msg(newsockfd, &msg, 0) != 0)
			error("slurm_receive_msg: %m");

		error_code = _background_process_msg(&msg);
		if ((error_code == SLURM_SUCCESS)			&&
		    (msg.msg_type == REQUEST_SHUTDOWN_IMMEDIATE)	&&
		    (slurmctld_config.shutdown_time == 0))
			slurmctld_config.shutdown_time = time(NULL);

		slurm_free_msg_members(&msg);

		slurm_close(newsockfd);	/* close new socket */
	}

	debug3("_background_rpc_mgr shutting down");
	slurm_close(sockfd);	/* close the main socket */
	pthread_exit((void *) 0);
	return NULL;
}