static int _local_send_recv_rc_msgs(const char *nodelist, slurm_msg_type_t type, void *data) { List ret_list = NULL; int temp_rc = 0, rc = 0; ret_data_info_t *ret_data_info = NULL; slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); msg->msg_type = type; msg->data = data; if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) { while ((ret_data_info = list_pop(ret_list))) { temp_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); if (temp_rc) rc = temp_rc; } } else { error("slurm_signal_job: no list was returned"); rc = SLURM_ERROR; } slurm_free_msg(msg); return rc; }
static void * _service_connection(void *arg) { conn_t *con = (conn_t *) arg; slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t)); int rc = SLURM_SUCCESS; debug3("in the service_connection"); slurm_msg_t_init(msg); if ((rc = slurm_receive_msg_and_forward(con->fd, con->cli_addr, msg, 0)) != SLURM_SUCCESS) { error("service_connection: slurm_receive_msg: %m"); /* if this fails we need to make sure the nodes we forward to are taken care of and sent back. This way the control also has a better idea what happened to us */ slurm_send_rc_msg(msg, rc); goto cleanup; } debug2("got this type of message %d", msg->msg_type); slurmd_req(msg); cleanup: if ((msg->conn_fd >= 0) && slurm_close_accepted_conn(msg->conn_fd) < 0) error ("close(%d): %m", con->fd); xfree(con->cli_addr); xfree(con); slurm_free_msg(msg); _decrement_thd_count(); return NULL; }
int eio_message_socket_accept(eio_obj_t *obj, List objs) { int fd; unsigned char *uc; unsigned short port; struct sockaddr_in addr; slurm_msg_t *msg = NULL; int len = sizeof(addr); debug3("Called eio_msg_socket_accept"); xassert(obj); xassert(obj->ops->handle_msg); while ((fd = accept(obj->fd, (struct sockaddr *)&addr, (socklen_t *)&len)) < 0) { if (errno == EINTR) continue; if (errno == EAGAIN || errno == ECONNABORTED || errno == EWOULDBLOCK) { return SLURM_SUCCESS; } error("Error on msg accept socket: %m"); obj->shutdown = true; return SLURM_SUCCESS; } fd_set_close_on_exec(fd); fd_set_blocking(fd); /* Should not call slurm_get_addr() because the IP may not be in /etc/hosts. */ uc = (unsigned char *)&addr.sin_addr.s_addr; port = addr.sin_port; debug2("got message connection from %u.%u.%u.%u:%hu %d", uc[0], uc[1], uc[2], uc[3], ntohs(port), fd); fflush(stdout); msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); again: if(slurm_receive_msg(fd, msg, obj->ops->timeout) != 0) { if (errno == EINTR) { goto again; } error("slurm_receive_msg[%u.%u.%u.%u]: %m", uc[0],uc[1],uc[2],uc[3]); goto cleanup; } (*obj->ops->handle_msg)(obj->arg, msg); /* handle_msg should free * msg->data */ cleanup: if ((msg->conn_fd >= 0) && slurm_close_accepted_conn(msg->conn_fd) < 0) error ("close(%d): %m", msg->conn_fd); slurm_free_msg(msg); return SLURM_SUCCESS; }
static void *_msg_thr_internal(void *arg) { slurm_addr_t cli_addr; slurm_fd_t newsockfd; slurm_msg_t *msg; int *slurmctld_fd_ptr = (int *)arg; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); while (!srun_shutdown) { newsockfd = slurm_accept_msg_conn(*slurmctld_fd_ptr, &cli_addr); if (newsockfd == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } msg = xmalloc(sizeof(slurm_msg_t)); if (slurm_receive_msg(newsockfd, msg, 0) != 0) { error("slurm_receive_msg: %m"); /* close the new socket */ slurm_close_accepted_conn(newsockfd); continue; } _handle_msg(msg); slurm_free_msg(msg); slurm_close_accepted_conn(newsockfd); } return NULL; }
/* Accept RPC from slurmctld and process it. * IN slurmctld_fd: file descriptor for slurmctld communications * OUT resp: resource allocation response message * RET 1 if resp is filled in, 0 otherwise */ static int _accept_msg_connection(int listen_fd, resource_allocation_response_msg_t **resp) { int conn_fd; slurm_msg_t *msg = NULL; slurm_addr_t cli_addr; char host[256]; uint16_t port; int rc = 0; conn_fd = slurm_accept_msg_conn(listen_fd, &cli_addr); if (conn_fd < 0) { error("Unable to accept connection: %m"); return rc; } slurm_get_addr(&cli_addr, &port, host, sizeof(host)); debug2("got message connection from %s:%hu", host, port); msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); if((rc = slurm_receive_msg(conn_fd, msg, 0)) != 0) { slurm_free_msg(msg); if (errno == EINTR) { slurm_close_accepted_conn(conn_fd); *resp = NULL; return 0; } error("_accept_msg_connection[%s]: %m", host); slurm_close_accepted_conn(conn_fd); return SLURM_ERROR; } rc = _handle_msg(msg, resp); /* handle_msg frees msg */ slurm_free_msg(msg); slurm_close_accepted_conn(conn_fd); return rc; }
static int _send_to_stepds(hostlist_t hl, const char *addr, uint32_t len, char *data) { List ret_list = NULL; int temp_rc = 0, rc = 0; ret_data_info_t *ret_data_info = NULL; slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t)); forward_data_msg_t req; char *nodelist = NULL; slurm_msg_t_init(msg); req.address = xstrdup(addr); req.len = len; req.data = data; msg->msg_type = REQUEST_FORWARD_DATA; msg->data = &req; nodelist = hostlist_ranged_string_xmalloc(hl); if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) { while ((ret_data_info = list_pop(ret_list))) { temp_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); if (temp_rc) { rc = temp_rc; } else { hostlist_delete_host(hl, ret_data_info->node_name); } } } else { error("tree_msg_to_stepds: no list was returned"); rc = SLURM_ERROR; } slurm_free_msg(msg); xfree(nodelist); xfree(req.address); return rc; }
int _send_message_controller (enum controller_id dest, slurm_msg_t *req) { int rc = SLURM_PROTOCOL_SUCCESS; slurm_fd_t fd = -1; slurm_msg_t *resp_msg = NULL; /* always going to one node (primary or backup per value of "dest") */ if ((fd = slurm_open_controller_conn_spec(dest)) < 0) slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR); if (slurm_send_node_msg(fd, req) < 0) { slurm_shutdown_msg_conn(fd); slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR); } resp_msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(resp_msg); if((rc = slurm_receive_msg(fd, resp_msg, 0)) != 0) { slurm_shutdown_msg_conn(fd); return SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR; } if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS) rc = SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR; else if (resp_msg->msg_type != RESPONSE_SLURM_RC) rc = SLURM_UNEXPECTED_MSG_ERROR; else rc = slurm_get_return_code(resp_msg->msg_type, resp_msg->data); slurm_free_msg(resp_msg); if (rc) slurm_seterrno_ret(rc); return rc; }
/* _background_rpc_mgr - Read and process incoming RPCs to the background * controller (that's us) */ static void *_background_rpc_mgr(void *no_data) { slurm_fd_t newsockfd; slurm_fd_t sockfd; slurm_addr_t cli_addr; slurm_msg_t *msg = NULL; int error_code; char* node_addr = NULL; /* Read configuration only */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; int sigarray[] = {SIGUSR1, 0}; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); debug3("_background_rpc_mgr pid = %lu", (unsigned long) getpid()); /* initialize port for RPCs */ lock_slurmctld(config_read_lock); /* set node_addr to bind to (NULL means any) */ if ((strcmp(slurmctld_conf.backup_controller, slurmctld_conf.backup_addr) != 0)) { node_addr = slurmctld_conf.backup_addr ; } if ((sockfd = slurm_init_msg_engine_addrname_port(node_addr, slurmctld_conf. slurmctld_port)) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_addrname_port error %m"); unlock_slurmctld(config_read_lock); /* Prepare to catch SIGUSR1 to interrupt accept(). * This signal is generated by the slurmctld signal * handler thread upon receipt of SIGABRT, SIGINT, * or SIGTERM. That thread does all processing of * all signals. */ xsignal(SIGUSR1, _sig_handler); xsignal_unblock(sigarray); /* * Process incoming RPCs indefinitely */ while (slurmctld_config.shutdown_time == 0) { /* accept needed for stream implementation * is a no-op in message implementation that just passes * sockfd to newsockfd */ if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); if (slurm_receive_msg(newsockfd, msg, 0) != 0) error("slurm_receive_msg: %m"); error_code = _background_process_msg(msg); if ((error_code == SLURM_SUCCESS) && (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE) && (slurmctld_config.shutdown_time == 0)) slurmctld_config.shutdown_time = time(NULL); slurm_free_msg_data(msg->msg_type, msg->data); slurm_free_msg(msg); slurm_close(newsockfd); /* close new socket */ } debug3("_background_rpc_mgr shutting down"); slurm_close(sockfd); /* close the main socket */ pthread_exit((void *) 0); return NULL; }