Exemple #1
0
static int _local_send_recv_rc_msgs(const char *nodelist,
				    slurm_msg_type_t type, void *data)
{
	List ret_list = NULL;
	int temp_rc = 0, rc = 0;
	ret_data_info_t *ret_data_info = NULL;
	slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t));

	slurm_msg_t_init(msg);
	msg->msg_type = type;
	msg->data = data;

	if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) {
		while ((ret_data_info = list_pop(ret_list))) {
			temp_rc = slurm_get_return_code(ret_data_info->type,
							ret_data_info->data);
			if (temp_rc)
				rc = temp_rc;
		}
	} else {
		error("slurm_signal_job: no list was returned");
		rc = SLURM_ERROR;
	}

	slurm_free_msg(msg);
	return rc;
}
Exemple #2
0
static void *
_service_connection(void *arg)
{
	conn_t *con = (conn_t *) arg;
	slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t));
	int rc = SLURM_SUCCESS;

	debug3("in the service_connection");
	slurm_msg_t_init(msg);
	if ((rc = slurm_receive_msg_and_forward(con->fd, con->cli_addr, msg, 0))
	   != SLURM_SUCCESS) {
		error("service_connection: slurm_receive_msg: %m");
		/* if this fails we need to make sure the nodes we forward
		   to are taken care of and sent back. This way the control
		   also has a better idea what happened to us */
		slurm_send_rc_msg(msg, rc);
		goto cleanup;
	}
	debug2("got this type of message %d", msg->msg_type);
	slurmd_req(msg);

cleanup:
	if ((msg->conn_fd >= 0) && slurm_close_accepted_conn(msg->conn_fd) < 0)
		error ("close(%d): %m", con->fd);

	xfree(con->cli_addr);
	xfree(con);
	slurm_free_msg(msg);
	_decrement_thd_count();
	return NULL;
}
Exemple #3
0
int eio_message_socket_accept(eio_obj_t *obj, List objs)
{
	int fd;
	unsigned char *uc;
	unsigned short port;
	struct sockaddr_in addr;
	slurm_msg_t *msg = NULL;
	int len = sizeof(addr);

	debug3("Called eio_msg_socket_accept");

	xassert(obj);
	xassert(obj->ops->handle_msg);

	while ((fd = accept(obj->fd, (struct sockaddr *)&addr,
			    (socklen_t *)&len)) < 0) {
		if (errno == EINTR)
			continue;
		if (errno == EAGAIN       ||
		    errno == ECONNABORTED ||
		    errno == EWOULDBLOCK) {
			return SLURM_SUCCESS;
		}
		error("Error on msg accept socket: %m");
		obj->shutdown = true;
		return SLURM_SUCCESS;
	}

	fd_set_close_on_exec(fd);
	fd_set_blocking(fd);

	/* Should not call slurm_get_addr() because the IP may not be
	   in /etc/hosts. */
	uc = (unsigned char *)&addr.sin_addr.s_addr;
	port = addr.sin_port;
	debug2("got message connection from %u.%u.%u.%u:%hu %d",
	       uc[0], uc[1], uc[2], uc[3], ntohs(port), fd);
	fflush(stdout);

	msg = xmalloc(sizeof(slurm_msg_t));
	slurm_msg_t_init(msg);
again:
	if(slurm_receive_msg(fd, msg, obj->ops->timeout) != 0) {
		if (errno == EINTR) {
			goto again;
		}
		error("slurm_receive_msg[%u.%u.%u.%u]: %m",
		      uc[0],uc[1],uc[2],uc[3]);
		goto cleanup;
	}

	(*obj->ops->handle_msg)(obj->arg, msg); /* handle_msg should free
					      * msg->data */
cleanup:
	if ((msg->conn_fd >= 0) && slurm_close_accepted_conn(msg->conn_fd) < 0)
		error ("close(%d): %m", msg->conn_fd);
	slurm_free_msg(msg);

	return SLURM_SUCCESS;
}
Exemple #4
0
static void *_msg_thr_internal(void *arg)
{
	slurm_addr_t cli_addr;
	slurm_fd_t newsockfd;
	slurm_msg_t *msg;
	int *slurmctld_fd_ptr = (int *)arg;

	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

	while (!srun_shutdown) {
		newsockfd = slurm_accept_msg_conn(*slurmctld_fd_ptr, &cli_addr);
		if (newsockfd == SLURM_SOCKET_ERROR) {
			if (errno != EINTR)
				error("slurm_accept_msg_conn: %m");
			continue;
		}
		msg = xmalloc(sizeof(slurm_msg_t));
		if (slurm_receive_msg(newsockfd, msg, 0) != 0) {
			error("slurm_receive_msg: %m");
			/* close the new socket */
			slurm_close_accepted_conn(newsockfd);
			continue;
		}
		_handle_msg(msg);
		slurm_free_msg(msg);
		slurm_close_accepted_conn(newsockfd);
	}
	return NULL;
}
Exemple #5
0
/* Accept RPC from slurmctld and process it.
 * IN slurmctld_fd: file descriptor for slurmctld communications
 * OUT resp: resource allocation response message
 * RET 1 if resp is filled in, 0 otherwise */
static int
_accept_msg_connection(int listen_fd,
		       resource_allocation_response_msg_t **resp)
{
	int	     conn_fd;
	slurm_msg_t  *msg = NULL;
	slurm_addr_t   cli_addr;
	char         host[256];
	uint16_t     port;
	int          rc = 0;

	conn_fd = slurm_accept_msg_conn(listen_fd, &cli_addr);
	if (conn_fd < 0) {
		error("Unable to accept connection: %m");
		return rc;
	}

	slurm_get_addr(&cli_addr, &port, host, sizeof(host));
	debug2("got message connection from %s:%hu", host, port);

	msg = xmalloc(sizeof(slurm_msg_t));
	slurm_msg_t_init(msg);

	if((rc = slurm_receive_msg(conn_fd, msg, 0)) != 0) {
		slurm_free_msg(msg);

		if (errno == EINTR) {
			slurm_close_accepted_conn(conn_fd);
			*resp = NULL;
			return 0;
		}

		error("_accept_msg_connection[%s]: %m", host);
		slurm_close_accepted_conn(conn_fd);
		return SLURM_ERROR;
	}

	rc = _handle_msg(msg, resp); /* handle_msg frees msg */
	slurm_free_msg(msg);

	slurm_close_accepted_conn(conn_fd);
	return rc;
}
Exemple #6
0
static int _send_to_stepds(hostlist_t hl, const char *addr, uint32_t len,
		char *data)
{
	List ret_list = NULL;
	int temp_rc = 0, rc = 0;
	ret_data_info_t *ret_data_info = NULL;
	slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t));
	forward_data_msg_t req;
	char *nodelist = NULL;

	slurm_msg_t_init(msg);
	req.address = xstrdup(addr);
	req.len = len;
	req.data = data;

	msg->msg_type = REQUEST_FORWARD_DATA;
	msg->data = &req;

	nodelist = hostlist_ranged_string_xmalloc(hl);

	if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) {
		while ((ret_data_info = list_pop(ret_list))) {
			temp_rc = slurm_get_return_code(ret_data_info->type,
					ret_data_info->data);
			if (temp_rc) {
				rc = temp_rc;
			} else {
				hostlist_delete_host(hl,
						ret_data_info->node_name);
			}
		}
	} else {
		error("tree_msg_to_stepds: no list was returned");
		rc = SLURM_ERROR;
	}

	slurm_free_msg(msg);
	xfree(nodelist);
	xfree(req.address);
	return rc;
}
Exemple #7
0
int
_send_message_controller (enum controller_id dest, slurm_msg_t *req)
{
	int rc = SLURM_PROTOCOL_SUCCESS;
	slurm_fd_t fd = -1;
	slurm_msg_t *resp_msg = NULL;

	/* always going to one node (primary or backup per value of "dest") */
	if ((fd = slurm_open_controller_conn_spec(dest)) < 0)
		slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR);

	if (slurm_send_node_msg(fd, req) < 0) {
		slurm_shutdown_msg_conn(fd);
		slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR);
	}
	resp_msg = xmalloc(sizeof(slurm_msg_t));
	slurm_msg_t_init(resp_msg);

	if((rc = slurm_receive_msg(fd, resp_msg, 0)) != 0) {
		slurm_shutdown_msg_conn(fd);
		return SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR;
	}

	if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS)
		rc = SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR;
	else if (resp_msg->msg_type != RESPONSE_SLURM_RC)
		rc = SLURM_UNEXPECTED_MSG_ERROR;
	else
		rc = slurm_get_return_code(resp_msg->msg_type,
					   resp_msg->data);
	slurm_free_msg(resp_msg);

	if (rc)
		slurm_seterrno_ret(rc);
        return rc;
}
Exemple #8
0
/* _background_rpc_mgr - Read and process incoming RPCs to the background
 *	controller (that's us) */
static void *_background_rpc_mgr(void *no_data)
{
	slurm_fd_t newsockfd;
	slurm_fd_t sockfd;
	slurm_addr_t cli_addr;
	slurm_msg_t *msg = NULL;
	int error_code;
	char* node_addr = NULL;

	/* Read configuration only */
	slurmctld_lock_t config_read_lock = {
		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
	int sigarray[] = {SIGUSR1, 0};

	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
	debug3("_background_rpc_mgr pid = %lu", (unsigned long) getpid());

	/* initialize port for RPCs */
	lock_slurmctld(config_read_lock);

	/* set node_addr to bind to (NULL means any) */
	if ((strcmp(slurmctld_conf.backup_controller,
		    slurmctld_conf.backup_addr) != 0)) {
		node_addr = slurmctld_conf.backup_addr ;
	}

	if ((sockfd =
	     slurm_init_msg_engine_addrname_port(node_addr,
						 slurmctld_conf.
						 slurmctld_port))
	    == SLURM_SOCKET_ERROR)
		fatal("slurm_init_msg_engine_addrname_port error %m");
	unlock_slurmctld(config_read_lock);

	/* Prepare to catch SIGUSR1 to interrupt accept().
	 * This signal is generated by the slurmctld signal
	 * handler thread upon receipt of SIGABRT, SIGINT,
	 * or SIGTERM. That thread does all processing of
	 * all signals. */
	xsignal(SIGUSR1, _sig_handler);
	xsignal_unblock(sigarray);

	/*
	 * Process incoming RPCs indefinitely
	 */
	while (slurmctld_config.shutdown_time == 0) {
		/* accept needed for stream implementation
		 * is a no-op in message implementation that just passes
		 * sockfd to newsockfd */
		if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr))
		    == SLURM_SOCKET_ERROR) {
			if (errno != EINTR)
				error("slurm_accept_msg_conn: %m");
			continue;
		}

		msg = xmalloc(sizeof(slurm_msg_t));
		slurm_msg_t_init(msg);
		if (slurm_receive_msg(newsockfd, msg, 0) != 0)
			error("slurm_receive_msg: %m");

		error_code = _background_process_msg(msg);
		if ((error_code == SLURM_SUCCESS)			&&
		    (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE)	&&
		    (slurmctld_config.shutdown_time == 0))
			slurmctld_config.shutdown_time = time(NULL);

		slurm_free_msg_data(msg->msg_type, msg->data);
		slurm_free_msg(msg);

		slurm_close(newsockfd);	/* close new socket */
	}

	debug3("_background_rpc_mgr shutting down");
	slurm_close(sockfd);	/* close the main socket */
	pthread_exit((void *) 0);
	return NULL;
}