static int _ping_controller(slurmdb_cluster_rec_t *cluster) { int rc = SLURM_SUCCESS; slurm_msg_t req_msg; slurm_msg_t resp_msg; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); req_msg.msg_type = REQUEST_PING; slurm_mutex_lock(&cluster->lock); if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("pinging %s(%s:%d)", cluster->name, cluster->control_host, cluster->control_port); if ((rc = _send_recv_msg(cluster, &req_msg, &resp_msg, true))) { error("failed to ping %s(%s:%d)", cluster->name, cluster->control_host, cluster->control_port); } else if ((rc = slurm_get_return_code(resp_msg.msg_type, resp_msg.data))) error("ping returned error from %s(%s:%d)", cluster->name, cluster->control_host, cluster->control_port); if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("finished pinging %s(%s:%d)", cluster->name, cluster->control_host, cluster->control_port); slurm_mutex_unlock(&cluster->lock); slurm_free_msg_members(&req_msg); slurm_free_msg_members(&resp_msg); return rc; }
static void *_msg_thr_internal(void *arg) { slurm_addr_t cli_addr; int newsockfd; slurm_msg_t msg; int *slurmctld_fd_ptr = (int *)arg; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); while (!srun_shutdown) { newsockfd = slurm_accept_msg_conn(*slurmctld_fd_ptr, &cli_addr); if (newsockfd == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } slurm_msg_t_init(&msg); if (slurm_receive_msg(newsockfd, &msg, 0) != 0) { error("slurm_receive_msg: %m"); /* close the new socket */ close(newsockfd); continue; } _handle_msg(&msg); slurm_free_msg_members(&msg); close(newsockfd); } return NULL; }
/* _background_rpc_mgr - Read and process incoming RPCs to the background * controller (that's us) */ static void *_background_rpc_mgr(void *no_data) { int newsockfd; int sockfd; slurm_addr_t cli_addr; slurm_msg_t msg; int error_code; char* node_addr = NULL; /* Read configuration only */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; int sigarray[] = {SIGUSR1, 0}; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); debug3("_background_rpc_mgr pid = %lu", (unsigned long) getpid()); /* initialize port for RPCs */ lock_slurmctld(config_read_lock); /* set node_addr to bind to (NULL means any) */ if ((xstrcmp(slurmctld_conf.backup_controller, slurmctld_conf.backup_addr) != 0)) { node_addr = slurmctld_conf.backup_addr ; } if ((sockfd = slurm_init_msg_engine_addrname_port(node_addr, slurmctld_conf. slurmctld_port)) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_addrname_port error %m"); unlock_slurmctld(config_read_lock); /* Prepare to catch SIGUSR1 to interrupt accept(). * This signal is generated by the slurmctld signal * handler thread upon receipt of SIGABRT, SIGINT, * or SIGTERM. That thread does all processing of * all signals. */ xsignal(SIGUSR1, _sig_handler); xsignal_unblock(sigarray); /* * Process incoming RPCs indefinitely */ while (slurmctld_config.shutdown_time == 0) { /* accept needed for stream implementation * is a no-op in message implementation that just passes * sockfd to newsockfd */ if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } slurm_msg_t_init(&msg); if (slurm_receive_msg(newsockfd, &msg, 0) != 0) error("slurm_receive_msg: %m"); error_code = _background_process_msg(&msg); if ((error_code == SLURM_SUCCESS) && (msg.msg_type == REQUEST_SHUTDOWN_IMMEDIATE) && (slurmctld_config.shutdown_time == 0)) slurmctld_config.shutdown_time = time(NULL); slurm_free_msg_members(&msg); slurm_close(newsockfd); /* close new socket */ } debug3("_background_rpc_mgr shutting down"); slurm_close(sockfd); /* close the main socket */ pthread_exit((void *) 0); return NULL; }