static void *_msg_thr_internal(void *arg) { slurm_addr_t cli_addr; slurm_fd_t newsockfd; slurm_msg_t *msg; int *slurmctld_fd_ptr = (int *)arg; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); while (!srun_shutdown) { newsockfd = slurm_accept_msg_conn(*slurmctld_fd_ptr, &cli_addr); if (newsockfd == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } msg = xmalloc(sizeof(slurm_msg_t)); if (slurm_receive_msg(newsockfd, msg, 0) != 0) { error("slurm_receive_msg: %m"); /* close the new socket */ slurm_close(newsockfd); continue; } _handle_msg(msg); slurm_free_msg(msg); slurm_close(newsockfd); } return NULL; }
extern int slurm_open_stream(slurm_addr_t *addr, bool retry) { int retry_cnt; int fd; uint16_t port; char ip[32]; if ( (addr->sin_family == 0) || (addr->sin_port == 0) ) { error("Error connecting, bad data: family = %u, port = %u", addr->sin_family, addr->sin_port); return SLURM_SOCKET_ERROR; } for (retry_cnt=0; ; retry_cnt++) { int rc; if ((fd =_slurm_create_socket(SLURM_STREAM)) < 0) { error("Error creating slurm stream socket: %m"); slurm_seterrno(errno); return SLURM_SOCKET_ERROR; } if (retry_cnt) { if (retry_cnt == 1) { debug3("Error connecting, " "picking new stream port"); } _sock_bind_wild(fd); } rc = _slurm_connect(fd, (struct sockaddr const *)addr, sizeof(*addr)); if (rc >= 0) /* success */ break; if (((errno != ECONNREFUSED) && (errno != ETIMEDOUT)) || (!retry) || (retry_cnt >= PORT_RETRIES)) { slurm_seterrno(errno); goto error; } if ((slurm_close(fd) < 0) && (errno == EINTR)) slurm_close(fd); /* try again */ } return fd; error: slurm_get_ip_str(addr, &port, ip, sizeof(ip)); debug2("Error connecting slurm stream socket at %s:%d: %m", ip, ntohs(port)); if ((slurm_close(fd) < 0) && (errno == EINTR)) slurm_close(fd); /* try again */ return SLURM_SOCKET_ERROR; }
/* * send_slurmctld_register_req - request register from slurmctld * IN host: control host of cluster * IN port: control port of cluster * IN rpc_version: rpc version of cluster * RET: error code */ static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec) { slurm_addr_t ctld_address; int fd; int rc = SLURM_SUCCESS; slurm_set_addr_char(&ctld_address, cluster_rec->control_port, cluster_rec->control_host); fd = slurm_open_msg_conn(&ctld_address); if (fd < 0) { rc = SLURM_ERROR; } else { slurm_msg_t out_msg; slurm_msg_t_init(&out_msg); out_msg.msg_type = ACCOUNTING_REGISTER_CTLD; out_msg.flags = SLURM_GLOBAL_AUTH_KEY; out_msg.protocol_version = cluster_rec->rpc_version; slurm_send_node_msg(fd, &out_msg); /* We probably need to add matching recv_msg function * for an arbitray fd or should these be fire * and forget? For this, that we can probably * forget about it */ slurm_close(fd); } return rc; }
/*****************************************************************************\ * terminate message hander thread \*****************************************************************************/ extern void term_msg_thread(void) { pthread_mutex_lock(&thread_flag_mutex); if (thread_running) { int fd; slurm_addr_t addr; thread_shutdown = true; /* Open and close a connection to the wiki listening port. * Allows slurm_accept_msg_conn() to return in * _msg_thread() so that it can check the thread_shutdown * flag. */ slurm_set_addr(&addr, sched_port, "localhost"); fd = slurm_open_stream(&addr, true); if (fd != -1) { /* we don't care if the open failed */ slurm_close(fd); } debug2("waiting for sched/wiki thread to exit"); pthread_join(msg_thread_id, NULL); msg_thread_id = 0; thread_shutdown = false; thread_running = false; debug2("join of sched/wiki thread was successful"); } pthread_mutex_unlock(&thread_flag_mutex); }
/* Accept RPC from slurmctld and process it. * IN slurmctld_fd: file descriptor for slurmctld communications * OUT resp: resource allocation response message * RET 1 if resp is filled in, 0 otherwise */ static int _accept_msg_connection(int listen_fd, resource_allocation_response_msg_t **resp) { int conn_fd; slurm_msg_t *msg = NULL; slurm_addr_t cli_addr; char host[256]; uint16_t port; int rc = 0; conn_fd = slurm_accept_msg_conn(listen_fd, &cli_addr); if (conn_fd < 0) { error("Unable to accept connection: %m"); return rc; } slurm_get_addr(&cli_addr, &port, host, sizeof(host)); debug2("got message connection from %s:%hu", host, port); msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); if ((rc = slurm_receive_msg(conn_fd, msg, 0)) != 0) { slurm_free_msg(msg); if (errno == EINTR) { slurm_close(conn_fd); *resp = NULL; return 0; } error("_accept_msg_connection[%s]: %m", host); slurm_close(conn_fd); return SLURM_ERROR; } rc = _handle_msg(msg, resp); /* handle_msg frees msg */ slurm_free_msg(msg); slurm_close(conn_fd); return rc; }
extern int slurm_init_msg_engine(slurm_addr_t *addr) { int rc; int fd; const int one = 1; const size_t sz1 = sizeof(one); if ((fd = _slurm_create_socket(SLURM_STREAM)) < 0) { error("Error creating slurm stream socket: %m"); return fd; } rc = _slurm_setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sz1); if (rc < 0) { error("setsockopt SO_REUSEADDR failed: %m"); goto error; } rc = bind(fd, (struct sockaddr const *) addr, sizeof(*addr)); if (rc < 0) { error("Error binding slurm stream socket: %m"); goto error; } if (listen(fd, SLURM_PROTOCOL_DEFAULT_LISTEN_BACKLOG) < 0) { error( "Error listening on slurm stream socket: %m" ) ; rc = SLURM_ERROR; goto error; } return fd; error: if ((slurm_close(fd) < 0) && (errno == EINTR)) slurm_close(fd); /* try again */ return rc; }
static void *_msg_thread(void *no_data) { int sock_fd = -1, new_fd; slurm_addr_t cli_addr; char *msg; int i; /* If Port is already taken, keep trying to open it 10 secs */ for (i = 0; (!thread_shutdown); i++) { if (i > 0) sleep(10); sock_fd = slurm_init_msg_engine_port(nonstop_comm_port); if (sock_fd != SLURM_SOCKET_ERROR) break; error("slurmctld/nonstop: can not open port: %hu %m", nonstop_comm_port); } /* Process incoming RPCs until told to shutdown */ while (!thread_shutdown) { new_fd = slurm_accept_msg_conn(sock_fd, &cli_addr); if (new_fd == SLURM_SOCKET_ERROR) { if (errno != EINTR) { info("slurmctld/nonstop: " "slurm_accept_msg_conn %m"); } continue; } if (thread_shutdown) { close(new_fd); break; } /* It would be nice to create a pthread for each new * RPC, but that leaks memory on some systems when * done from a plugin. Alternately, we could maintain * a pool of pthreads and reuse them. */ msg = _recv_msg(new_fd); if (msg) { _proc_msg(new_fd, msg, cli_addr); xfree(msg); } slurm_close(new_fd); } debug("slurmctld/nonstop: message engine shutdown"); if (sock_fd > 0) (void) slurm_shutdown_msg_engine(sock_fd); pthread_exit((void *) 0); return NULL; }
/* * cluster_first_reg - ask for controller to send nodes in a down state * and jobs pending or running on first registration. * * IN host: controller host * IN port: controller port * IN rpc_version: controller rpc version * RET: error code */ extern int cluster_first_reg(char *host, uint16_t port, uint16_t rpc_version) { slurm_addr_t ctld_address; slurm_fd_t fd; int rc = SLURM_SUCCESS; info("First time to register cluster requesting " "running jobs and system information."); slurm_set_addr_char(&ctld_address, port, host); fd = slurm_open_msg_conn(&ctld_address); if (fd < 0) { error("can not open socket back to slurmctld " "%s(%u): %m", host, port); rc = SLURM_ERROR; } else { slurm_msg_t out_msg; accounting_update_msg_t update; /* We have to put this update message here so we can tell the sender to send the correct RPC version. */ memset(&update, 0, sizeof(accounting_update_msg_t)); update.rpc_version = rpc_version; slurm_msg_t_init(&out_msg); out_msg.msg_type = ACCOUNTING_FIRST_REG; out_msg.flags = SLURM_GLOBAL_AUTH_KEY; out_msg.data = &update; slurm_send_node_msg(fd, &out_msg); /* We probably need to add matching recv_msg function * for an arbitray fd or should these be fire * and forget? For this, that we can probably * forget about it */ slurm_close(fd); } return rc; }
void *_forward_thread(void *arg) { forward_msg_t *fwd_msg = (forward_msg_t *)arg; forward_struct_t *fwd_struct = fwd_msg->fwd_struct; Buf buffer = init_buf(BUF_SIZE); /* probably enough for header */ List ret_list = NULL; int fd = -1; ret_data_info_t *ret_data_info = NULL; char *name = NULL; hostlist_t hl = hostlist_create(fwd_msg->header.forward.nodelist); slurm_addr_t addr; char *buf = NULL; int steps = 0; int start_timeout = fwd_msg->timeout; /* repeat until we are sure the message was sent */ while ((name = hostlist_shift(hl))) { if (slurm_conf_get_addr(name, &addr) == SLURM_ERROR) { error("forward_thread: can't find address for host " "%s, check slurm.conf", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, SLURM_UNKNOWN_FORWARD_ADDR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); continue; } goto cleanup; } if ((fd = slurm_open_msg_conn(&addr)) < 0) { error("forward_thread to %s: %m", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } buf = hostlist_ranged_string_xmalloc(hl); xfree(fwd_msg->header.forward.nodelist); fwd_msg->header.forward.nodelist = buf; fwd_msg->header.forward.cnt = hostlist_count(hl); #if 0 info("sending %d forwards (%s) to %s", fwd_msg->header.forward.cnt, fwd_msg->header.forward.nodelist, name); #endif if (fwd_msg->header.forward.nodelist[0]) { debug3("forward: send to %s along with %s", name, fwd_msg->header.forward.nodelist); } else debug3("forward: send to %s ", name); pack_header(&fwd_msg->header, buffer); /* add forward data to buffer */ if (remaining_buf(buffer) < fwd_struct->buf_len) { int new_size = buffer->processed + fwd_struct->buf_len; new_size += 1024; /* padded for paranoia */ xrealloc_nz(buffer->head, new_size); buffer->size = new_size; } if (fwd_struct->buf_len) { memcpy(&buffer->head[buffer->processed], fwd_struct->buf, fwd_struct->buf_len); buffer->processed += fwd_struct->buf_len; } /* * forward message */ if (slurm_msg_sendto(fd, get_buf_data(buffer), get_buf_offset(buffer), SLURM_PROTOCOL_NO_SEND_RECV_FLAGS ) < 0) { error("forward_thread: slurm_msg_sendto: %m"); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } /* These messages don't have a return message, but if * we got here things worked out so make note of the * list of nodes as success. */ if ((fwd_msg->header.msg_type == REQUEST_SHUTDOWN) || (fwd_msg->header.msg_type == REQUEST_RECONFIGURE) || (fwd_msg->header.msg_type == REQUEST_REBOOT_NODES)) { slurm_mutex_lock(&fwd_struct->forward_mutex); ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); while ((name = hostlist_shift(hl))) { ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); } goto cleanup; } if (fwd_msg->header.forward.cnt > 0) { static int message_timeout = -1; if (message_timeout < 0) message_timeout = slurm_get_msg_timeout() * 1000; if (!fwd_msg->header.forward.tree_width) fwd_msg->header.forward.tree_width = slurm_get_tree_width(); steps = (fwd_msg->header.forward.cnt+1) / fwd_msg->header.forward.tree_width; fwd_msg->timeout = (message_timeout*steps); /* info("got %d * %d = %d", message_timeout, */ /* steps, fwd_msg->timeout); */ steps++; fwd_msg->timeout += (start_timeout*steps); /* info("now + %d*%d = %d", start_timeout, */ /* steps, fwd_msg->timeout); */ } ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout); /* info("sent %d forwards got %d back", */ /* fwd_msg->header.forward.cnt, list_count(ret_list)); */ if (!ret_list || (fwd_msg->header.forward.cnt != 0 && list_count(ret_list) <= 1)) { slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); FREE_NULL_LIST(ret_list); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; continue; } goto cleanup; } else if ((fwd_msg->header.forward.cnt+1) != list_count(ret_list)) { /* this should never be called since the above should catch the failed forwards and pipe them back down, but this is here so we never have to worry about a locked mutex */ ListIterator itr = NULL; char *tmp = NULL; int first_node_found = 0; hostlist_iterator_t host_itr = hostlist_iterator_create(hl); error("We shouldn't be here. We forwarded to %d " "but only got %d back", (fwd_msg->header.forward.cnt+1), list_count(ret_list)); while ((tmp = hostlist_next(host_itr))) { int node_found = 0; itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr))) { if (!ret_data_info->node_name) { first_node_found = 1; ret_data_info->node_name = xstrdup(name); } if (!xstrcmp(tmp, ret_data_info->node_name)) { node_found = 1; break; } } list_iterator_destroy(itr); if (!node_found) { mark_as_failed_forward( &fwd_struct->ret_list, tmp, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } free(tmp); } hostlist_iterator_destroy(host_itr); if (!first_node_found) { mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } } break; } slurm_mutex_lock(&fwd_struct->forward_mutex); if (ret_list) { while ((ret_data_info = list_pop(ret_list)) != NULL) { if (!ret_data_info->node_name) { ret_data_info->node_name = xstrdup(name); } list_push(fwd_struct->ret_list, ret_data_info); debug3("got response from %s", ret_data_info->node_name); } FREE_NULL_LIST(ret_list); } free(name); cleanup: if ((fd >= 0) && slurm_close(fd) < 0) error ("close(%d): %m", fd); hostlist_destroy(hl); destroy_forward(&fwd_msg->header.forward); free_buf(buffer); slurm_cond_signal(&fwd_struct->notify); slurm_mutex_unlock(&fwd_struct->forward_mutex); xfree(fwd_msg); return (NULL); }
/* _background_rpc_mgr - Read and process incoming RPCs to the background * controller (that's us) */ static void *_background_rpc_mgr(void *no_data) { slurm_fd_t newsockfd; slurm_fd_t sockfd; slurm_addr_t cli_addr; slurm_msg_t *msg = NULL; int error_code; char* node_addr = NULL; /* Read configuration only */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; int sigarray[] = {SIGUSR1, 0}; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); debug3("_background_rpc_mgr pid = %lu", (unsigned long) getpid()); /* initialize port for RPCs */ lock_slurmctld(config_read_lock); /* set node_addr to bind to (NULL means any) */ if ((strcmp(slurmctld_conf.backup_controller, slurmctld_conf.backup_addr) != 0)) { node_addr = slurmctld_conf.backup_addr ; } if ((sockfd = slurm_init_msg_engine_addrname_port(node_addr, slurmctld_conf. slurmctld_port)) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_addrname_port error %m"); unlock_slurmctld(config_read_lock); /* Prepare to catch SIGUSR1 to interrupt accept(). * This signal is generated by the slurmctld signal * handler thread upon receipt of SIGABRT, SIGINT, * or SIGTERM. That thread does all processing of * all signals. */ xsignal(SIGUSR1, _sig_handler); xsignal_unblock(sigarray); /* * Process incoming RPCs indefinitely */ while (slurmctld_config.shutdown_time == 0) { /* accept needed for stream implementation * is a no-op in message implementation that just passes * sockfd to newsockfd */ if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); if (slurm_receive_msg(newsockfd, msg, 0) != 0) error("slurm_receive_msg: %m"); error_code = _background_process_msg(msg); if ((error_code == SLURM_SUCCESS) && (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE) && (slurmctld_config.shutdown_time == 0)) slurmctld_config.shutdown_time = time(NULL); slurm_free_msg_data(msg->msg_type, msg->data); slurm_free_msg(msg); slurm_close(newsockfd); /* close new socket */ } debug3("_background_rpc_mgr shutting down"); slurm_close(sockfd); /* close the main socket */ pthread_exit((void *) 0); return NULL; }
static void * _service_connection(void *arg) { slurmdbd_conn_t *conn = (slurmdbd_conn_t *) arg; uint32_t nw_size = 0, msg_size = 0, uid = NO_VAL; char *msg = NULL; ssize_t msg_read = 0, offset = 0; bool fini = false, first = true; Buf buffer = NULL; int rc = SLURM_SUCCESS; debug2("Opened connection %d from %s", conn->newsockfd, conn->ip); while (!fini) { if (!_fd_readable(conn->newsockfd)) break; /* problem with this socket */ msg_read = read(conn->newsockfd, &nw_size, sizeof(nw_size)); if (msg_read == 0) /* EOF */ break; if (msg_read != sizeof(nw_size)) { error("Could not read msg_size from " "connection %d(%s) uid(%d)", conn->newsockfd, conn->ip, uid); break; } msg_size = ntohl(nw_size); if ((msg_size < 2) || (msg_size > MAX_MSG_SIZE)) { error("Invalid msg_size (%u) from " "connection %d(%s) uid(%d)", msg_size, conn->newsockfd, conn->ip, uid); break; } msg = xmalloc(msg_size); offset = 0; while (msg_size > offset) { if (!_fd_readable(conn->newsockfd)) break; /* problem with this socket */ msg_read = read(conn->newsockfd, (msg + offset), (msg_size - offset)); if (msg_read <= 0) { error("read(%d): %m", conn->newsockfd); break; } offset += msg_read; } if (msg_size == offset) { rc = proc_req( conn, msg, msg_size, first, &buffer, &uid); first = false; if (rc != SLURM_SUCCESS && rc != ACCOUNTING_FIRST_REG) { error("Processing last message from " "connection %d(%s) uid(%d)", conn->newsockfd, conn->ip, uid); if (rc == ESLURM_ACCESS_DENIED || rc == SLURM_PROTOCOL_VERSION_ERROR) fini = true; } } else { buffer = make_dbd_rc_msg(conn->rpc_version, SLURM_ERROR, "Bad offset", 0); fini = true; } (void) _send_resp(conn->newsockfd, buffer); xfree(msg); } if (conn->ctld_port) { if (!shutdown_time) { slurmdb_cluster_rec_t cluster_rec; ListIterator itr; slurmdbd_conn_t *slurmdbd_conn; memset(&cluster_rec, 0, sizeof(slurmdb_cluster_rec_t)); cluster_rec.name = conn->cluster_name; cluster_rec.control_host = conn->ip; cluster_rec.control_port = conn->ctld_port; cluster_rec.cpu_count = conn->cluster_cpus; debug("cluster %s has disconnected", conn->cluster_name); clusteracct_storage_g_fini_ctld( conn->db_conn, &cluster_rec); slurm_mutex_lock(®istered_lock); itr = list_iterator_create(registered_clusters); while ((slurmdbd_conn = list_next(itr))) { if (conn == slurmdbd_conn) { list_delete_item(itr); break; } } list_iterator_destroy(itr); slurm_mutex_unlock(®istered_lock); } /* needs to be the last thing done */ acct_storage_g_commit(conn->db_conn, 1); } acct_storage_g_close_connection(&conn->db_conn); if (slurm_close(conn->newsockfd) < 0) error("close(%d): %m(%s)", conn->newsockfd, conn->ip); else debug2("Closed connection %d uid(%d)", conn->newsockfd, uid); xfree(conn->cluster_name); xfree(conn); _free_server_thread(pthread_self()); return NULL; }
/*****************************************************************************\ * message hander thread \*****************************************************************************/ static void *_msg_thread(void *no_data) { slurm_fd_t sock_fd = -1, new_fd; slurm_addr_t cli_addr; char *msg; slurm_ctl_conf_t *conf; int i; /* Locks: Write configuration, job, node, and partition */ slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; conf = slurm_conf_lock(); sched_port = conf->schedport; slurm_conf_unlock(); /* Wait until configuration is completely loaded */ lock_slurmctld(config_write_lock); unlock_slurmctld(config_write_lock); /* If SchedulerPort is already taken, keep trying to open it * once per minute. Slurmctld will continue to function * during this interval even if nothing can be scheduled. */ for (i=0; (!thread_shutdown); i++) { if (i > 0) sleep(60); sock_fd = slurm_init_msg_engine_port(sched_port); if (sock_fd != SLURM_SOCKET_ERROR) break; error("wiki: slurm_init_msg_engine_port %u %m", sched_port); error("wiki: Unable to communicate with Moab"); } /* Process incoming RPCs until told to shutdown */ while (!thread_shutdown) { if ((new_fd = slurm_accept_msg_conn(sock_fd, &cli_addr)) == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("wiki: slurm_accept_msg_conn %m"); continue; } if (thread_shutdown) { close(new_fd); break; } /* It would be nice to create a pthread for each new * RPC, but that leaks memory on some systems when * done from a plugin. * FIXME: Maintain a pool of and reuse them. */ err_code = 0; err_msg = ""; msg = _recv_msg(new_fd); if (msg) { _proc_msg(new_fd, msg); xfree(msg); } slurm_close(new_fd); } if (sock_fd > 0) (void) slurm_shutdown_msg_engine(sock_fd); pthread_exit((void *) 0); return NULL; }
/* Wait for barrier and get full PMI Keyval space data */ int slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr, int pmi_rank, int pmi_size) { int rc, srun_fd, retries = 0, timeout = 0; slurm_msg_t msg_send, msg_rcv; slurm_addr_t slurm_addr, srun_reply_addr; char hostname[64]; uint16_t port; kvs_get_msg_t data; char *env_pmi_ifhn; if (kvs_set_ptr == NULL) return EINVAL; *kvs_set_ptr = NULL; /* initialization */ if ((rc = _get_addr()) != SLURM_SUCCESS) { error("_get_addr: %m"); return rc; } _set_pmi_time(); if (pmi_fd < 0) { if ((pmi_fd = slurm_init_msg_engine_port(0)) < 0) { error("slurm_init_msg_engine_port: %m"); return SLURM_ERROR; } fd_set_blocking(pmi_fd); } if (slurm_get_stream_addr(pmi_fd, &slurm_addr) < 0) { error("slurm_get_stream_addr: %m"); return SLURM_ERROR; } /* hostname is not set here, so slurm_get_addr fails slurm_get_addr(&slurm_addr, &port, hostname, sizeof(hostname)); */ port = ntohs(slurm_addr.sin_port); if ((env_pmi_ifhn = getenv("SLURM_PMI_RESP_IFHN"))) { strncpy(hostname, env_pmi_ifhn, sizeof(hostname)); hostname[sizeof(hostname)-1] = 0; } else gethostname_short(hostname, sizeof(hostname)); data.task_id = pmi_rank; data.size = pmi_size; data.port = port; data.hostname = hostname; slurm_msg_t_init(&msg_send); slurm_msg_t_init(&msg_rcv); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_GET_REQ; msg_send.data = &data; /* Send the RPC to the local srun communcation manager. * Since the srun can be sent thousands of messages at * the same time and refuse some connections, retry as * needed. Wait until all key-pairs have been sent by * all tasks then spread out messages by task's rank. * Also increase the message timeout if many tasks * since the srun command can get very overloaded (the * default timeout is 10 secs). */ _delay_rpc(pmi_rank, pmi_size); if (pmi_size > 4000) /* 240 secs */ timeout = slurm_get_msg_timeout() * 24000; else if (pmi_size > 1000) /* 120 secs */ timeout = slurm_get_msg_timeout() * 12000; else if (pmi_size > 100) /* 60 secs */ timeout = slurm_get_msg_timeout() * 6000; else if (pmi_size > 10) /* 20 secs */ timeout = slurm_get_msg_timeout() * 2000; while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { if (retries++ > MAX_RETRIES) { error("slurm_get_kvs_comm_set: %m"); return SLURM_ERROR; } else debug("get kvs retry %d", retries); _delay_rpc(pmi_rank, pmi_size); } if (rc != SLURM_SUCCESS) { error("slurm_get_kvs_comm_set error_code=%d", rc); return rc; } /* get the message after all tasks reach the barrier */ srun_fd = slurm_accept_msg_conn(pmi_fd, &srun_reply_addr); if (srun_fd < 0) { error("slurm_accept_msg_conn: %m"); return errno; } while ((rc = slurm_receive_msg(srun_fd, &msg_rcv, timeout)) != 0) { if (errno == EINTR) continue; error("slurm_receive_msg: %m"); slurm_close(srun_fd); return errno; } if (msg_rcv.auth_cred) (void)g_slurm_auth_destroy(msg_rcv.auth_cred); if (msg_rcv.msg_type != PMI_KVS_GET_RESP) { error("slurm_get_kvs_comm_set msg_type=%d", msg_rcv.msg_type); slurm_close(srun_fd); return SLURM_UNEXPECTED_MSG_ERROR; } if (slurm_send_rc_msg(&msg_rcv, SLURM_SUCCESS) < 0) error("slurm_send_rc_msg: %m"); slurm_close(srun_fd); *kvs_set_ptr = msg_rcv.data; rc = _forward_comm_set(*kvs_set_ptr); return rc; }
int eio_message_socket_accept(eio_obj_t *obj, List objs) { int fd; unsigned char *uc; unsigned short port; struct sockaddr_in addr; slurm_msg_t *msg = NULL; int len = sizeof(addr); debug3("Called eio_msg_socket_accept"); xassert(obj); xassert(obj->ops->handle_msg); while ((fd = accept(obj->fd, (struct sockaddr *)&addr, (socklen_t *)&len)) < 0) { if (errno == EINTR) continue; if ((errno == EAGAIN) || (errno == ECONNABORTED) || (errno == EWOULDBLOCK)) { return SLURM_SUCCESS; } error("Error on msg accept socket: %m"); if ((errno == EMFILE) || (errno == ENFILE) || (errno == ENOBUFS) || (errno == ENOMEM)) { return SLURM_SUCCESS; } obj->shutdown = true; return SLURM_SUCCESS; } net_set_keep_alive(fd); fd_set_close_on_exec(fd); fd_set_blocking(fd); /* Should not call slurm_get_addr() because the IP may not be * in /etc/hosts. */ uc = (unsigned char *)&addr.sin_addr.s_addr; port = addr.sin_port; debug2("got message connection from %u.%u.%u.%u:%hu %d", uc[0], uc[1], uc[2], uc[3], ntohs(port), fd); fflush(stdout); msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); again: if (slurm_receive_msg(fd, msg, obj->ops->timeout) != 0) { if (errno == EINTR) goto again; error("slurm_receive_msg[%u.%u.%u.%u]: %m", uc[0], uc[1], uc[2], uc[3]); goto cleanup; } (*obj->ops->handle_msg)(obj->arg, msg); cleanup: if ((msg->conn_fd >= 0) && (slurm_close(msg->conn_fd) < 0)) error ("close(%d): %m", msg->conn_fd); slurm_free_msg(msg); return SLURM_SUCCESS; }