Example #1
0
static int _get_return_code(void)
{
	int rc = SLURM_ERROR;
	Buf buffer = slurm_persist_recv_msg(slurmdbd_conn);
	if (buffer == NULL)
		return rc;

	rc = _unpack_return_code(slurmdbd_conn->version, buffer);

	free_buf(buffer);
	return rc;
}
Example #2
0
/* Open a persistent socket connection
 * IN/OUT - persistent connection needing rem_host and rem_port filled in.
 * Returned completely filled in.
 * Returns SLURM_SUCCESS on success or SLURM_ERROR on failure */
extern int slurm_persist_conn_open(slurm_persist_conn_t *persist_conn)
{
	int rc = SLURM_ERROR;
	slurm_msg_t req_msg;
	persist_init_req_msg_t req;
	persist_rc_msg_t *resp = NULL;

	if (slurm_persist_conn_open_without_init(persist_conn) != SLURM_SUCCESS)
		return rc;

	slurm_msg_t_init(&req_msg);

	/* Always send the lowest protocol since we don't know what version the
	 * other side is running yet.
	 */
	req_msg.protocol_version = persist_conn->version;
	req_msg.msg_type = REQUEST_PERSIST_INIT;

	req_msg.flags |= SLURM_GLOBAL_AUTH_KEY;
	if (persist_conn->flags & PERSIST_FLAG_DBD)
		req_msg.flags |= SLURMDBD_CONNECTION;

	memset(&req, 0, sizeof(persist_init_req_msg_t));
	req.cluster_name = persist_conn->cluster_name;
	req.persist_type = persist_conn->persist_type;
	req.port = persist_conn->my_port;
	req.version = SLURM_PROTOCOL_VERSION;

	req_msg.data = &req;

	if (slurm_send_node_msg(persist_conn->fd, &req_msg) < 0) {
		error("%s: failed to send persistent connection init message to %s:%d",
		      __func__, persist_conn->rem_host, persist_conn->rem_port);
		_close_fd(&persist_conn->fd);
	} else {
		Buf buffer = slurm_persist_recv_msg(persist_conn);
		persist_msg_t msg;
		slurm_persist_conn_t persist_conn_tmp;

		if (!buffer) {
			if (_comm_fail_log(persist_conn)) {
				error("%s: No response to persist_init",
				      __func__);
			}
			_close_fd(&persist_conn->fd);
			goto end_it;
		}
		memset(&msg, 0, sizeof(persist_msg_t));
		memcpy(&persist_conn_tmp, persist_conn,
		       sizeof(slurm_persist_conn_t));
		/* The first unpack is done the same way for dbd or normal
		 * communication . */
		persist_conn_tmp.flags &= (~PERSIST_FLAG_DBD);
		rc = slurm_persist_msg_unpack(&persist_conn_tmp, &msg, buffer);
		free_buf(buffer);

		resp = (persist_rc_msg_t *)msg.data;
		if (resp && (rc == SLURM_SUCCESS)) {
			rc = resp->rc;
			persist_conn->version = resp->ret_info;
			persist_conn->flags |= resp->flags;
		}

		if (rc != SLURM_SUCCESS) {
			if (resp) {
				error("%s: Something happened with the receiving/processing of the persistent connection init message to %s:%d: %s",
				      __func__, persist_conn->rem_host,
				      persist_conn->rem_port, resp->comment);
			} else {
				error("%s: Failed to unpack persistent connection init resp message from %s:%d",
				      __func__,
				      persist_conn->rem_host,
				      persist_conn->rem_port);
			}
			_close_fd(&persist_conn->fd);
		}
	}

end_it:

	slurm_persist_free_rc_msg(resp);

	return rc;
}
Example #3
0
/* Send an RPC to the SlurmDBD and wait for an arbitrary reply message.
 * The RPC will not be queued if an error occurs.
 * The "resp" message must be freed by the caller.
 * Returns SLURM_SUCCESS or an error code */
extern int send_recv_slurmdbd_msg(uint16_t rpc_version,
				  slurmdbd_msg_t *req,
				  slurmdbd_msg_t *resp)
{
	int rc = SLURM_SUCCESS;
	Buf buffer;

	xassert(req);
	xassert(resp);

	/* To make sure we can get this to send instead of the agent
	   sending stuff that can happen anytime we set halt_agent and
	   then after we get into the mutex we unset.
	*/
	halt_agent = 1;
	slurm_mutex_lock(&slurmdbd_lock);
	halt_agent = 0;
	if (!slurmdbd_conn || (slurmdbd_conn->fd < 0)) {
		/* Either slurm_open_slurmdbd_conn() was not executed or
		 * the connection to Slurm DBD has been closed */
		if (req->msg_type == DBD_GET_CONFIG)
			_open_slurmdbd_conn(0);
		else
			_open_slurmdbd_conn(1);
		if (!slurmdbd_conn || (slurmdbd_conn->fd < 0)) {
			rc = SLURM_ERROR;
			goto end_it;
		}
	}

	if (!(buffer = pack_slurmdbd_msg(req, rpc_version))) {
		rc = SLURM_ERROR;
		goto end_it;
	}

	rc = slurm_persist_send_msg(slurmdbd_conn, buffer);
	free_buf(buffer);
	if (rc != SLURM_SUCCESS) {
		error("slurmdbd: Sending message type %s: %d: %m",
		      rpc_num2string(req->msg_type), rc);
		goto end_it;
	}

	buffer = slurm_persist_recv_msg(slurmdbd_conn);
	if (buffer == NULL) {
		error("slurmdbd: Getting response to message type %u",
		      req->msg_type);
		rc = SLURM_ERROR;
		goto end_it;
	}

	rc = unpack_slurmdbd_msg(resp, rpc_version, buffer);
	/* check for the rc of the start job message */
	if (rc == SLURM_SUCCESS && resp->msg_type == DBD_ID_RC)
		rc = ((dbd_id_rc_msg_t *)resp->data)->return_code;

	free_buf(buffer);
end_it:
	slurm_cond_signal(&slurmdbd_cond);
	slurm_mutex_unlock(&slurmdbd_lock);

	return rc;
}
Example #4
0
static int _handle_mult_rc_ret(void)
{
	Buf buffer;
	uint16_t msg_type;
	persist_rc_msg_t *msg = NULL;
	dbd_list_msg_t *list_msg = NULL;
	int rc = SLURM_ERROR;
	Buf out_buf = NULL;

	buffer = slurm_persist_recv_msg(slurmdbd_conn);
	if (buffer == NULL)
		return rc;

	safe_unpack16(&msg_type, buffer);
	switch (msg_type) {
	case DBD_GOT_MULT_MSG:
		if (slurmdbd_unpack_list_msg(
			    &list_msg, slurmdbd_conn->version,
			    DBD_GOT_MULT_MSG, buffer)
		    != SLURM_SUCCESS) {
			error("slurmdbd: unpack message error");
			break;
		}

		slurm_mutex_lock(&agent_lock);
		if (agent_list) {
			ListIterator itr =
				list_iterator_create(list_msg->my_list);
			while ((out_buf = list_next(itr))) {
				Buf b;
				if ((rc = _unpack_return_code(
					     slurmdbd_conn->version, out_buf))
				    != SLURM_SUCCESS)
					break;

				if ((b = list_dequeue(agent_list))) {
					free_buf(b);
				} else {
					error("slurmdbd: DBD_GOT_MULT_MSG "
					      "unpack message error");
				}
			}
			list_iterator_destroy(itr);
		}
		slurm_mutex_unlock(&agent_lock);
		slurmdbd_free_list_msg(list_msg);
		break;
	case PERSIST_RC:
		if (slurm_persist_unpack_rc_msg(
			    &msg, buffer, slurmdbd_conn->version)
		    == SLURM_SUCCESS) {
			rc = msg->rc;
			if (rc != SLURM_SUCCESS) {
				if (msg->ret_info == DBD_REGISTER_CTLD &&
				    slurm_get_accounting_storage_enforce()) {
					error("slurmdbd: PERSIST_RC is %d from "
					      "%s(%u): %s",
					      rc,
					      slurmdbd_msg_type_2_str(
						      msg->ret_info, 1),
					      msg->ret_info,
					      msg->comment);
					fatal("You need to add this cluster "
					      "to accounting if you want to "
					      "enforce associations, or no "
					      "jobs will ever run.");
				} else
					debug("slurmdbd: PERSIST_RC is %d from "
					      "%s(%u): %s",
					      rc,
					      slurmdbd_msg_type_2_str(
						      msg->ret_info, 1),
					      msg->ret_info,
					      msg->comment);
			}
			slurm_persist_free_rc_msg(msg);
		} else
			error("slurmdbd: unpack message error");
		break;
	default:
		error("slurmdbd: bad message type %d != PERSIST_RC", msg_type);
	}

unpack_error:
	free_buf(buffer);
	return rc;
}