static void *_pty_thread(void *arg) { int fd = -1; srun_job_t *job = (srun_job_t *) arg; slurm_addr_t client_addr; xsignal_unblock(pty_sigarray); xsignal(SIGWINCH, _handle_sigwinch); if ((fd = slurm_accept_msg_conn(job->pty_fd, &client_addr)) < 0) { error("pty: accept failure: %m"); return NULL; } while (job->state <= SRUN_JOB_RUNNING) { debug2("waiting for SIGWINCH"); poll(NULL, 0, -1); if (winch) { set_winsize(job); _notify_winsize_change(fd, job); } winch = 0; } return NULL; }
static void *_msg_thr_internal(void *arg) { slurm_addr_t cli_addr; int newsockfd; slurm_msg_t msg; int *slurmctld_fd_ptr = (int *)arg; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); while (!srun_shutdown) { newsockfd = slurm_accept_msg_conn(*slurmctld_fd_ptr, &cli_addr); if (newsockfd == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } slurm_msg_t_init(&msg); if (slurm_receive_msg(newsockfd, &msg, 0) != 0) { error("slurm_receive_msg: %m"); /* close the new socket */ close(newsockfd); continue; } _handle_msg(&msg); slurm_free_msg_members(&msg); close(newsockfd); } return NULL; }
static void _msg_engine(void) { slurm_addr_t *cli; slurm_fd_t sock; msg_pthread = pthread_self(); slurmd_req(NULL); /* initialize timer */ while (!_shutdown) { if (_reconfig) { verbose("got reconfigure request"); _wait_for_all_threads(5); /* Wait for RPCs to finish */ _reconfigure(); } cli = xmalloc (sizeof (slurm_addr_t)); if ((sock = slurm_accept_msg_conn(conf->lfd, cli)) >= 0) { _handle_connection(sock, cli); continue; } /* * Otherwise, accept() failed. */ xfree (cli); if (errno == EINTR) continue; error("accept: %m"); } verbose("got shutdown request"); slurm_shutdown_msg_engine(conf->lfd); return; }
void *_recv_msg_proc(void *no_data) { slurm_fd_t sock_fd, new_sock_fd; slurm_addr_t client_addr; connection_arg_t *conn_arg = (connection_arg_t*)malloc(sizeof(connection_arg_t)); sock_fd = slurm_init_msg_engine_addrname_port(slurmctld_conf.control_addr, slurmctld_conf.slurmctld_port); if (sock_fd == SLURM_SOCKET_ERROR) { fatal("slurm_init_msg_engine_addrname_port error %m"); } while (1) { if ((new_sock_fd = slurm_accept_msg_conn(sock_fd, &client_addr))== SLURM_SOCKET_ERROR) { error("slurm_accept_msg_conn: %m"); continue; } conn_arg->newsockfd = new_sock_fd; pthread_t* serv_thread = (pthread_t*)malloc(sizeof(pthread_t)); while (pthread_create(serv_thread, NULL, _service_connection, (void*) conn_arg)) { error("pthread_create error:%m"); sleep(1); } } return NULL; }
/* Process incoming RPCs. Meant to execute as a pthread */ extern void *rpc_mgr(void *no_data) { int sockfd, newsockfd; int i; uint16_t port; slurm_addr_t cli_addr; slurmdbd_conn_t *conn_arg = NULL; master_thread_id = pthread_self(); /* initialize port for RPCs */ if ((sockfd = slurm_init_msg_engine_port(get_dbd_port())) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_port error %m"); slurm_persist_conn_recv_server_init(); /* * Process incoming RPCs until told to shutdown */ while (!shutdown_time && (i = slurm_persist_conn_wait_for_thread_loc()) >= 0) { /* * accept needed for stream implementation is a no-op in * message implementation that just passes sockfd to newsockfd */ if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == SLURM_SOCKET_ERROR) { slurm_persist_conn_free_thread_loc(i); if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } fd_set_nonblocking(newsockfd); conn_arg = xmalloc(sizeof(slurmdbd_conn_t)); conn_arg->conn = xmalloc(sizeof(slurm_persist_conn_t)); conn_arg->conn->fd = newsockfd; conn_arg->conn->flags = PERSIST_FLAG_DBD; conn_arg->conn->callback_proc = proc_req; conn_arg->conn->callback_fini = _connection_fini_callback; conn_arg->conn->shutdown = &shutdown_time; conn_arg->conn->version = SLURM_MIN_PROTOCOL_VERSION; conn_arg->conn->rem_host = xmalloc_nz(sizeof(char) * 16); /* Don't fill in the rem_port here. It will be filled in * later if it is a slurmctld connection. */ slurm_get_ip_str(&cli_addr, &port, conn_arg->conn->rem_host, sizeof(char) * 16); slurm_persist_conn_recv_thread_init( conn_arg->conn, i, conn_arg); } debug("rpc_mgr shutting down"); (void) slurm_shutdown_msg_engine(sockfd); pthread_exit((void *) 0); return NULL; }
/*****************************************************************************\ * message hander thread \*****************************************************************************/ static void *_msg_thread(void *no_data) { slurm_fd_t sock_fd = -1, new_fd; slurm_addr_t cli_addr; char *msg; int i; /* If JobSubmitDynAllocPort is already taken, keep trying to open it * once per minute. Slurmctld will continue to function * during this interval even if nothing can be scheduled. */ for (i=0; (!thread_shutdown); i++) { if (i > 0) sleep(60); sock_fd = slurm_init_msg_engine_port(sched_port); if (sock_fd != SLURM_SOCKET_ERROR) break; error("dynalloc: slurm_init_msg_engine_port %u %m", sched_port); error("dynalloc: Unable to communicate with ORTE RAS"); } /* Process incoming RPCs until told to shutdown */ while (!thread_shutdown) { if ((new_fd = slurm_accept_msg_conn(sock_fd, &cli_addr)) == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("dyalloc: slurm_accept_msg_conn %m"); continue; } if (thread_shutdown) { close(new_fd); break; } err_code = 0; err_msg = ""; msg = _recv_msg(new_fd); if (msg) { _proc_msg(new_fd, msg); xfree(msg); } slurm_close_accepted_conn(new_fd); } verbose("dynalloc: message engine shutdown"); if (sock_fd > 0) (void) slurm_shutdown_msg_engine(sock_fd); pthread_exit((void *) 0); return NULL; }
static void *_msg_thread(void *no_data) { int sock_fd = -1, new_fd; slurm_addr_t cli_addr; char *msg; int i; /* If Port is already taken, keep trying to open it 10 secs */ for (i = 0; (!thread_shutdown); i++) { if (i > 0) sleep(10); sock_fd = slurm_init_msg_engine_port(nonstop_comm_port); if (sock_fd != SLURM_SOCKET_ERROR) break; error("slurmctld/nonstop: can not open port: %hu %m", nonstop_comm_port); } /* Process incoming RPCs until told to shutdown */ while (!thread_shutdown) { new_fd = slurm_accept_msg_conn(sock_fd, &cli_addr); if (new_fd == SLURM_SOCKET_ERROR) { if (errno != EINTR) { info("slurmctld/nonstop: " "slurm_accept_msg_conn %m"); } continue; } if (thread_shutdown) { close(new_fd); break; } /* It would be nice to create a pthread for each new * RPC, but that leaks memory on some systems when * done from a plugin. Alternately, we could maintain * a pool of pthreads and reuse them. */ msg = _recv_msg(new_fd); if (msg) { _proc_msg(new_fd, msg, cli_addr); xfree(msg); } slurm_close(new_fd); } debug("slurmctld/nonstop: message engine shutdown"); if (sock_fd > 0) (void) slurm_shutdown_msg_engine(sock_fd); pthread_exit((void *) 0); return NULL; }
/* Accept RPC from slurmctld and process it. * IN slurmctld_fd: file descriptor for slurmctld communications * OUT resp: resource allocation response message * RET 1 if resp is filled in, 0 otherwise */ static int _accept_msg_connection(int listen_fd, resource_allocation_response_msg_t **resp) { int conn_fd; slurm_msg_t *msg = NULL; slurm_addr_t cli_addr; char host[256]; uint16_t port; int rc = 0; conn_fd = slurm_accept_msg_conn(listen_fd, &cli_addr); if (conn_fd < 0) { error("Unable to accept connection: %m"); return rc; } slurm_get_addr(&cli_addr, &port, host, sizeof(host)); debug2("got message connection from %s:%hu", host, port); msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); if((rc = slurm_receive_msg(conn_fd, msg, 0)) != 0) { slurm_free_msg(msg); if (errno == EINTR) { slurm_close_accepted_conn(conn_fd); *resp = NULL; return 0; } error("_accept_msg_connection[%s]: %m", host); slurm_close_accepted_conn(conn_fd); return SLURM_ERROR; } rc = _handle_msg(msg, resp); /* handle_msg frees msg */ slurm_free_msg(msg); slurm_close_accepted_conn(conn_fd); return rc; }
/* _background_rpc_mgr - Read and process incoming RPCs to the background * controller (that's us) */ static void *_background_rpc_mgr(void *no_data) { slurm_fd_t newsockfd; slurm_fd_t sockfd; slurm_addr_t cli_addr; slurm_msg_t *msg = NULL; int error_code; char* node_addr = NULL; /* Read configuration only */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; int sigarray[] = {SIGUSR1, 0}; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); debug3("_background_rpc_mgr pid = %lu", (unsigned long) getpid()); /* initialize port for RPCs */ lock_slurmctld(config_read_lock); /* set node_addr to bind to (NULL means any) */ if ((strcmp(slurmctld_conf.backup_controller, slurmctld_conf.backup_addr) != 0)) { node_addr = slurmctld_conf.backup_addr ; } if ((sockfd = slurm_init_msg_engine_addrname_port(node_addr, slurmctld_conf. slurmctld_port)) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_addrname_port error %m"); unlock_slurmctld(config_read_lock); /* Prepare to catch SIGUSR1 to interrupt accept(). * This signal is generated by the slurmctld signal * handler thread upon receipt of SIGABRT, SIGINT, * or SIGTERM. That thread does all processing of * all signals. */ xsignal(SIGUSR1, _sig_handler); xsignal_unblock(sigarray); /* * Process incoming RPCs indefinitely */ while (slurmctld_config.shutdown_time == 0) { /* accept needed for stream implementation * is a no-op in message implementation that just passes * sockfd to newsockfd */ if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); if (slurm_receive_msg(newsockfd, msg, 0) != 0) error("slurm_receive_msg: %m"); error_code = _background_process_msg(msg); if ((error_code == SLURM_SUCCESS) && (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE) && (slurmctld_config.shutdown_time == 0)) slurmctld_config.shutdown_time = time(NULL); slurm_free_msg_data(msg->msg_type, msg->data); slurm_free_msg(msg); slurm_close(newsockfd); /* close new socket */ } debug3("_background_rpc_mgr shutting down"); slurm_close(sockfd); /* close the main socket */ pthread_exit((void *) 0); return NULL; }
/*****************************************************************************\ * message hander thread \*****************************************************************************/ static void *_msg_thread(void *no_data) { slurm_fd_t sock_fd = -1, new_fd; slurm_addr_t cli_addr; char *msg; slurm_ctl_conf_t *conf; int i; /* Locks: Write configuration, job, node, and partition */ slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; conf = slurm_conf_lock(); sched_port = conf->schedport; slurm_conf_unlock(); /* Wait until configuration is completely loaded */ lock_slurmctld(config_write_lock); unlock_slurmctld(config_write_lock); /* If SchedulerPort is already taken, keep trying to open it * once per minute. Slurmctld will continue to function * during this interval even if nothing can be scheduled. */ for (i=0; (!thread_shutdown); i++) { if (i > 0) sleep(60); sock_fd = slurm_init_msg_engine_port(sched_port); if (sock_fd != SLURM_SOCKET_ERROR) break; error("wiki: slurm_init_msg_engine_port %u %m", sched_port); error("wiki: Unable to communicate with Moab"); } /* Process incoming RPCs until told to shutdown */ while (!thread_shutdown) { if ((new_fd = slurm_accept_msg_conn(sock_fd, &cli_addr)) == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("wiki: slurm_accept_msg_conn %m"); continue; } if (thread_shutdown) { close(new_fd); break; } /* It would be nice to create a pthread for each new * RPC, but that leaks memory on some systems when * done from a plugin. * FIXME: Maintain a pool of and reuse them. */ err_code = 0; err_msg = ""; msg = _recv_msg(new_fd); if (msg) { _proc_msg(new_fd, msg); xfree(msg); } slurm_close_accepted_conn(new_fd); } if (sock_fd > 0) (void) slurm_shutdown_msg_engine(sock_fd); pthread_exit((void *) 0); return NULL; }
/* Process incoming RPCs. Meant to execute as a pthread */ extern void *rpc_mgr(void *no_data) { pthread_attr_t thread_attr_rpc_req; slurm_fd_t sockfd, newsockfd; int i, retry_cnt, sigarray[] = {SIGUSR1, 0}; slurm_addr_t cli_addr; slurmdbd_conn_t *conn_arg = NULL; slurm_mutex_lock(&thread_count_lock); master_thread_id = pthread_self(); slurm_mutex_unlock(&thread_count_lock); (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); /* threads to process individual RPC's are detached */ slurm_attr_init(&thread_attr_rpc_req); if (pthread_attr_setdetachstate (&thread_attr_rpc_req, PTHREAD_CREATE_DETACHED)) fatal("pthread_attr_setdetachstate %m"); /* initialize port for RPCs */ if ((sockfd = slurm_init_msg_engine_port(get_dbd_port())) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_port error %m"); /* Prepare to catch SIGUSR1 to interrupt accept(). * This signal is generated by the slurmdbd signal * handler thread upon receipt of SIGABRT, SIGINT, * or SIGTERM. That thread does all processing of * all signals. */ xsignal(SIGUSR1, _sig_handler); xsignal_unblock(sigarray); /* * Process incoming RPCs until told to shutdown */ while ((i = _wait_for_server_thread()) >= 0) { /* * accept needed for stream implementation is a no-op in * message implementation that just passes sockfd to newsockfd */ if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == SLURM_SOCKET_ERROR) { _free_server_thread((pthread_t) 0); if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } fd_set_nonblocking(newsockfd); conn_arg = xmalloc(sizeof(slurmdbd_conn_t)); conn_arg->newsockfd = newsockfd; slurm_get_ip_str(&cli_addr, &conn_arg->orig_port, conn_arg->ip, sizeof(conn_arg->ip)); retry_cnt = 0; while (pthread_create(&slave_thread_id[i], &thread_attr_rpc_req, _service_connection, (void *) conn_arg)) { if (retry_cnt > 0) { error("pthread_create failure, " "aborting RPC: %m"); close(newsockfd); break; } error("pthread_create failure: %m"); retry_cnt++; usleep(1000); /* retry in 1 msec */ } } debug3("rpc_mgr shutting down"); slurm_attr_destroy(&thread_attr_rpc_req); (void) slurm_shutdown_msg_engine(sockfd); _wait_for_thread_fini(); pthread_exit((void *) 0); return NULL; }
/* Wait for barrier and get full PMI Keyval space data */ int slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr, int pmi_rank, int pmi_size) { int rc, srun_fd, retries = 0, timeout = 0; slurm_msg_t msg_send, msg_rcv; slurm_addr_t slurm_addr, srun_reply_addr; char hostname[64]; uint16_t port; kvs_get_msg_t data; char *env_pmi_ifhn; if (kvs_set_ptr == NULL) return EINVAL; *kvs_set_ptr = NULL; /* initialization */ if ((rc = _get_addr()) != SLURM_SUCCESS) { error("_get_addr: %m"); return rc; } _set_pmi_time(); if (pmi_fd < 0) { if ((pmi_fd = slurm_init_msg_engine_port(0)) < 0) { error("slurm_init_msg_engine_port: %m"); return SLURM_ERROR; } fd_set_blocking(pmi_fd); } if (slurm_get_stream_addr(pmi_fd, &slurm_addr) < 0) { error("slurm_get_stream_addr: %m"); return SLURM_ERROR; } /* hostname is not set here, so slurm_get_addr fails slurm_get_addr(&slurm_addr, &port, hostname, sizeof(hostname)); */ port = ntohs(slurm_addr.sin_port); if ((env_pmi_ifhn = getenv("SLURM_PMI_RESP_IFHN"))) { strncpy(hostname, env_pmi_ifhn, sizeof(hostname)); hostname[sizeof(hostname)-1] = 0; } else gethostname_short(hostname, sizeof(hostname)); data.task_id = pmi_rank; data.size = pmi_size; data.port = port; data.hostname = hostname; slurm_msg_t_init(&msg_send); slurm_msg_t_init(&msg_rcv); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_GET_REQ; msg_send.data = &data; /* Send the RPC to the local srun communcation manager. * Since the srun can be sent thousands of messages at * the same time and refuse some connections, retry as * needed. Wait until all key-pairs have been sent by * all tasks then spread out messages by task's rank. * Also increase the message timeout if many tasks * since the srun command can get very overloaded (the * default timeout is 10 secs). */ _delay_rpc(pmi_rank, pmi_size); if (pmi_size > 4000) /* 240 secs */ timeout = slurm_get_msg_timeout() * 24000; else if (pmi_size > 1000) /* 120 secs */ timeout = slurm_get_msg_timeout() * 12000; else if (pmi_size > 100) /* 60 secs */ timeout = slurm_get_msg_timeout() * 6000; else if (pmi_size > 10) /* 20 secs */ timeout = slurm_get_msg_timeout() * 2000; while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { if (retries++ > MAX_RETRIES) { error("slurm_get_kvs_comm_set: %m"); return SLURM_ERROR; } else debug("get kvs retry %d", retries); _delay_rpc(pmi_rank, pmi_size); } if (rc != SLURM_SUCCESS) { error("slurm_get_kvs_comm_set error_code=%d", rc); return rc; } /* get the message after all tasks reach the barrier */ srun_fd = slurm_accept_msg_conn(pmi_fd, &srun_reply_addr); if (srun_fd < 0) { error("slurm_accept_msg_conn: %m"); return errno; } while ((rc = slurm_receive_msg(srun_fd, &msg_rcv, timeout)) != 0) { if (errno == EINTR) continue; error("slurm_receive_msg: %m"); slurm_close(srun_fd); return errno; } if (msg_rcv.auth_cred) (void)g_slurm_auth_destroy(msg_rcv.auth_cred); if (msg_rcv.msg_type != PMI_KVS_GET_RESP) { error("slurm_get_kvs_comm_set msg_type=%d", msg_rcv.msg_type); slurm_close(srun_fd); return SLURM_UNEXPECTED_MSG_ERROR; } if (slurm_send_rc_msg(&msg_rcv, SLURM_SUCCESS) < 0) error("slurm_send_rc_msg: %m"); slurm_close(srun_fd); *kvs_set_ptr = msg_rcv.data; rc = _forward_comm_set(*kvs_set_ptr); return rc; }