Example #1
0
/*
 * send_slurmctld_register_req - request register from slurmctld
 * IN host: control host of cluster
 * IN port: control port of cluster
 * IN rpc_version: rpc version of cluster
 * RET:  error code
 */
static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec)
{
	slurm_addr_t ctld_address;
	slurm_fd_t fd;
	int rc = SLURM_SUCCESS;

	slurm_set_addr_char(&ctld_address, cluster_rec->control_port,
			    cluster_rec->control_host);
	fd = slurm_open_msg_conn(&ctld_address);
	if (fd < 0) {
		rc = SLURM_ERROR;
	} else {
		slurm_msg_t out_msg;
		slurm_msg_t_init(&out_msg);
		out_msg.msg_type = ACCOUNTING_REGISTER_CTLD;
		out_msg.flags = SLURM_GLOBAL_AUTH_KEY;
		out_msg.protocol_version
			= slurmdbd_translate_rpc(cluster_rec->rpc_version);
		slurm_send_node_msg(fd, &out_msg);
		/* We probably need to add matching recv_msg function
		 * for an arbitray fd or should these be fire
		 * and forget?  For this, that we can probably
		 * forget about it */
		slurm_close_stream(fd);
	}
	return rc;
}
Example #2
0
/*
 * cluster_first_reg - ask for controller to send nodes in a down state
 *    and jobs pending or running on first registration.
 *
 * IN host: controller host
 * IN port: controller port
 * IN rpc_version: controller rpc version
 * RET: error code
 */
extern int cluster_first_reg(char *host, uint16_t port, uint16_t rpc_version)
{
	slurm_addr_t ctld_address;
	int fd;
	int rc = SLURM_SUCCESS;

	info("First time to register cluster requesting "
	     "running jobs and system information.");

	slurm_set_addr_char(&ctld_address, port, host);
	fd = slurm_open_msg_conn(&ctld_address);
	if (fd < 0) {
		error("can not open socket back to slurmctld "
		      "%s(%u): %m", host, port);
		rc = SLURM_ERROR;
	} else {
		slurm_msg_t out_msg;
		accounting_update_msg_t update;
		/* We have to put this update message here so
		   we can tell the sender to send the correct
		   RPC version.
		*/
		memset(&update, 0, sizeof(accounting_update_msg_t));
		update.rpc_version = rpc_version;
		slurm_msg_t_init(&out_msg);
		out_msg.msg_type = ACCOUNTING_FIRST_REG;
		out_msg.flags = SLURM_GLOBAL_AUTH_KEY;
		out_msg.data = &update;
		slurm_send_node_msg(fd, &out_msg);
		/* We probably need to add matching recv_msg function
		 * for an arbitray fd or should these be fire
		 * and forget?  For this, that we can probably
		 * forget about it */
		close(fd);
	}
	return rc;
}
Example #3
0
int
_send_message_controller (enum controller_id dest, slurm_msg_t *req)
{
	int rc = SLURM_PROTOCOL_SUCCESS;
	slurm_fd_t fd = -1;
	slurm_msg_t *resp_msg = NULL;

	/* always going to one node (primary or backup per value of "dest") */
	if ((fd = slurm_open_controller_conn_spec(dest)) < 0)
		slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR);

	if (slurm_send_node_msg(fd, req) < 0) {
		slurm_shutdown_msg_conn(fd);
		slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR);
	}
	resp_msg = xmalloc(sizeof(slurm_msg_t));
	slurm_msg_t_init(resp_msg);

	if((rc = slurm_receive_msg(fd, resp_msg, 0)) != 0) {
		slurm_shutdown_msg_conn(fd);
		return SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR;
	}

	if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS)
		rc = SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR;
	else if (resp_msg->msg_type != RESPONSE_SLURM_RC)
		rc = SLURM_UNEXPECTED_MSG_ERROR;
	else
		rc = slurm_get_return_code(resp_msg->msg_type,
					   resp_msg->data);
	slurm_free_msg(resp_msg);

	if (rc)
		slurm_seterrno_ret(rc);
        return rc;
}
Example #4
0
/* Open a persistent socket connection
 * IN/OUT - persistent connection needing rem_host and rem_port filled in.
 * Returned completely filled in.
 * Returns SLURM_SUCCESS on success or SLURM_ERROR on failure */
extern int slurm_persist_conn_open(slurm_persist_conn_t *persist_conn)
{
	int rc = SLURM_ERROR;
	slurm_msg_t req_msg;
	persist_init_req_msg_t req;
	persist_rc_msg_t *resp = NULL;

	if (slurm_persist_conn_open_without_init(persist_conn) != SLURM_SUCCESS)
		return rc;

	slurm_msg_t_init(&req_msg);

	/* Always send the lowest protocol since we don't know what version the
	 * other side is running yet.
	 */
	req_msg.protocol_version = persist_conn->version;
	req_msg.msg_type = REQUEST_PERSIST_INIT;

	req_msg.flags |= SLURM_GLOBAL_AUTH_KEY;
	if (persist_conn->flags & PERSIST_FLAG_DBD)
		req_msg.flags |= SLURMDBD_CONNECTION;

	memset(&req, 0, sizeof(persist_init_req_msg_t));
	req.cluster_name = persist_conn->cluster_name;
	req.persist_type = persist_conn->persist_type;
	req.port = persist_conn->my_port;
	req.version = SLURM_PROTOCOL_VERSION;

	req_msg.data = &req;

	if (slurm_send_node_msg(persist_conn->fd, &req_msg) < 0) {
		error("%s: failed to send persistent connection init message to %s:%d",
		      __func__, persist_conn->rem_host, persist_conn->rem_port);
		_close_fd(&persist_conn->fd);
	} else {
		Buf buffer = slurm_persist_recv_msg(persist_conn);
		persist_msg_t msg;
		slurm_persist_conn_t persist_conn_tmp;

		if (!buffer) {
			if (_comm_fail_log(persist_conn)) {
				error("%s: No response to persist_init",
				      __func__);
			}
			_close_fd(&persist_conn->fd);
			goto end_it;
		}
		memset(&msg, 0, sizeof(persist_msg_t));
		memcpy(&persist_conn_tmp, persist_conn,
		       sizeof(slurm_persist_conn_t));
		/* The first unpack is done the same way for dbd or normal
		 * communication . */
		persist_conn_tmp.flags &= (~PERSIST_FLAG_DBD);
		rc = slurm_persist_msg_unpack(&persist_conn_tmp, &msg, buffer);
		free_buf(buffer);

		resp = (persist_rc_msg_t *)msg.data;
		if (resp && (rc == SLURM_SUCCESS)) {
			rc = resp->rc;
			persist_conn->version = resp->ret_info;
			persist_conn->flags |= resp->flags;
		}

		if (rc != SLURM_SUCCESS) {
			if (resp) {
				error("%s: Something happened with the receiving/processing of the persistent connection init message to %s:%d: %s",
				      __func__, persist_conn->rem_host,
				      persist_conn->rem_port, resp->comment);
			} else {
				error("%s: Failed to unpack persistent connection init resp message from %s:%d",
				      __func__,
				      persist_conn->rem_host,
				      persist_conn->rem_port);
			}
			_close_fd(&persist_conn->fd);
		}
	}

end_it:

	slurm_persist_free_rc_msg(resp);

	return rc;
}