Exemple #1
0
/*
 * initialise ibw portion of a ctdb node 
 */
static int ctdb_ibw_add_node(struct ctdb_node *node)
{
	struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private_data, struct ibw_ctx);
	struct ctdb_ibw_node *cn = talloc_zero(node, struct ctdb_ibw_node);

	assert(cn!=NULL);
	cn->conn = ibw_conn_new(ictx, node);
	node->private_data = (void *)cn;

	return (cn->conn!=NULL ? 0 : -1);
}
Exemple #2
0
static int ibw_refill_cq_recv(struct ibw_conn *conn)
{
	struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
	struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
	int	rc;
	struct ibv_sge list = {
		.addr 	= (uintptr_t) NULL, /* filled below */
		.length = pctx->opts.recv_bufsize,
		.lkey 	= pconn->mr_recv->lkey /* always the same */
	};
	struct ibv_recv_wr wr = {
		.wr_id 	    = 0, /* filled below */
		.sg_list    = &list,
		.num_sge    = 1,
	};
	struct ibv_recv_wr *bad_wr;

	DEBUG(DEBUG_DEBUG, ("ibw_refill_cq_recv(cmid: %p)\n", pconn->cm_id));

	list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index;
	wr.wr_id = pconn->recv_index;
	pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr;

	rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr);
	if (rc) {
		sprintf(ibw_lasterr, "refill/ibv_post_recv failed with %d\n", rc);
		DEBUG(DEBUG_ERR, (ibw_lasterr));
		return -2;
	}

	return 0;
}

static int ibw_fill_cq(struct ibw_conn *conn)
{
	struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
	struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
	int	i, rc;
	struct ibv_sge list = {
		.addr 	= (uintptr_t) NULL, /* filled below */
		.length = pctx->opts.recv_bufsize,
		.lkey 	= pconn->mr_recv->lkey /* always the same */
	};
	struct ibv_recv_wr wr = {
		.wr_id 	    = 0, /* filled below */
		.sg_list    = &list,
		.num_sge    = 1,
	};
	struct ibv_recv_wr *bad_wr;

	DEBUG(DEBUG_DEBUG, ("ibw_fill_cq(cmid: %p)\n", pconn->cm_id));

	for(i = pctx->opts.max_recv_wr; i!=0; i--) {
		list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index;
		wr.wr_id = pconn->recv_index;
		pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr;

		rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr);
		if (rc) {
			sprintf(ibw_lasterr, "fill/ibv_post_recv failed with %d\n", rc);
			DEBUG(DEBUG_ERR, (ibw_lasterr));
			return -2;
		}
	}

	return 0;
}

static int ibw_manage_connect(struct ibw_conn *conn)
{
	struct rdma_conn_param conn_param;
	struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
	int	rc;

	DEBUG(DEBUG_DEBUG, ("ibw_manage_connect(cmid: %p)\n", pconn->cm_id));

	if (ibw_setup_cq_qp(conn))
		return -1;

	/* cm connect */
	memset(&conn_param, 0, sizeof conn_param);
	conn_param.responder_resources = 1;
	conn_param.initiator_depth = 1;
	conn_param.retry_count = 10;

	rc = rdma_connect(pconn->cm_id, &conn_param);
	if (rc)
		sprintf(ibw_lasterr, "rdma_connect error %d\n", rc);

	return rc;
}

static void ibw_event_handler_cm(struct tevent_context *ev,
	struct tevent_fd *fde, uint16_t flags, void *private_data)
{
	int	rc;
	struct ibw_ctx	*ctx = talloc_get_type(private_data, struct ibw_ctx);
	struct ibw_ctx_priv *pctx = talloc_get_type(ctx->internal, struct ibw_ctx_priv);
	struct ibw_conn *conn = NULL;
	struct ibw_conn_priv *pconn = NULL;
	struct rdma_cm_id *cma_id = NULL;
	struct rdma_cm_event *event = NULL;

	assert(ctx!=NULL);

	rc = rdma_get_cm_event(pctx->cm_channel, &event);
	if (rc) {
		ctx->state = IBWS_ERROR;
		event = NULL;
		sprintf(ibw_lasterr, "rdma_get_cm_event error %d\n", rc);
		goto error;
	}
	cma_id = event->id;

	DEBUG(DEBUG_DEBUG, ("cma_event type %d cma_id %p (%s)\n", event->event, cma_id,
		  (cma_id == pctx->cm_id) ? "parent" : "child"));

	switch (event->event) {
	case RDMA_CM_EVENT_ADDR_RESOLVED:
		DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_ADDR_RESOLVED\n"));
		/* continuing from ibw_connect ... */
		rc = rdma_resolve_route(cma_id, 2000);
		if (rc) {
			sprintf(ibw_lasterr, "rdma_resolve_route error %d\n", rc);
			goto error;
		}
		/* continued at RDMA_CM_EVENT_ROUTE_RESOLVED */
		break;

	case RDMA_CM_EVENT_ROUTE_RESOLVED:
		DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_ROUTE_RESOLVED\n"));
		/* after RDMA_CM_EVENT_ADDR_RESOLVED: */
		assert(cma_id->context!=NULL);
		conn = talloc_get_type(cma_id->context, struct ibw_conn);

		rc = ibw_manage_connect(conn);
		if (rc)
			goto error;

		break;

	case RDMA_CM_EVENT_CONNECT_REQUEST:
		DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_CONNECT_REQUEST\n"));
		ctx->state = IBWS_CONNECT_REQUEST;
		conn = ibw_conn_new(ctx, ctx);
		pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
		pconn->cm_id = cma_id; /* !!! event will be freed but id not */
		cma_id->context = (void *)conn;
		DEBUG(DEBUG_DEBUG, ("pconn->cm_id %p\n", pconn->cm_id));

		if (ibw_setup_cq_qp(conn))
			goto error;

		conn->state = IBWC_INIT;
		pctx->connstate_func(ctx, conn);

		/* continued at ibw_accept when invoked by the func above */
		if (!pconn->is_accepted) {
			rc = rdma_reject(cma_id, NULL, 0);
			if (rc)
				DEBUG(DEBUG_ERR, ("rdma_reject failed with rc=%d\n", rc));
			talloc_free(conn);
			DEBUG(DEBUG_DEBUG, ("pconn->cm_id %p wasn't accepted\n", pconn->cm_id));
		}

		/* TODO: clarify whether if it's needed by upper layer: */
		ctx->state = IBWS_READY;
		pctx->connstate_func(ctx, NULL);

		/* NOTE: more requests can arrive until RDMA_CM_EVENT_ESTABLISHED ! */
		break;

	case RDMA_CM_EVENT_ESTABLISHED:
		/* expected after ibw_accept and ibw_connect[not directly] */
		DEBUG(DEBUG_INFO, ("ESTABLISHED (conn: %p)\n", cma_id->context));
		conn = talloc_get_type(cma_id->context, struct ibw_conn);
		assert(conn!=NULL); /* important assumption */

		DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp succeeded (cmid=%p)\n", cma_id));

		/* client conn is up */
		conn->state = IBWC_CONNECTED;

		/* both ctx and conn have changed */
		pctx->connstate_func(ctx, conn);
		break;

	case RDMA_CM_EVENT_ADDR_ERROR:
		sprintf(ibw_lasterr, "RDMA_CM_EVENT_ADDR_ERROR, error %d\n", event->status);
	case RDMA_CM_EVENT_ROUTE_ERROR:
		sprintf(ibw_lasterr, "RDMA_CM_EVENT_ROUTE_ERROR, error %d\n", event->status);
	case RDMA_CM_EVENT_CONNECT_ERROR:
		sprintf(ibw_lasterr, "RDMA_CM_EVENT_CONNECT_ERROR, error %d\n", event->status);
	case RDMA_CM_EVENT_UNREACHABLE:
		sprintf(ibw_lasterr, "RDMA_CM_EVENT_UNREACHABLE, error %d\n", event->status);
		goto error;
	case RDMA_CM_EVENT_REJECTED:
		sprintf(ibw_lasterr, "RDMA_CM_EVENT_REJECTED, error %d\n", event->status);
		DEBUG(DEBUG_INFO, ("cm event handler: %s", ibw_lasterr));
		conn = talloc_get_type(cma_id->context, struct ibw_conn);
		if (conn) {
			/* must be done BEFORE connstate */
			if ((rc=rdma_ack_cm_event(event)))
				DEBUG(DEBUG_ERR, ("reject/rdma_ack_cm_event failed with %d\n", rc));
			event = NULL; /* not to touch cma_id or conn */
			conn->state = IBWC_ERROR;
			/* it should free the conn */
			pctx->connstate_func(NULL, conn);
		}
		break; /* this is not strictly an error */

	case RDMA_CM_EVENT_DISCONNECTED:
		DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_DISCONNECTED\n"));
		if ((rc=rdma_ack_cm_event(event)))
			DEBUG(DEBUG_ERR, ("disc/rdma_ack_cm_event failed with %d\n", rc));
		event = NULL; /* don't ack more */

		if (cma_id!=pctx->cm_id) {
			DEBUG(DEBUG_ERR, ("client DISCONNECT event cm_id=%p\n", cma_id));
			conn = talloc_get_type(cma_id->context, struct ibw_conn);
			conn->state = IBWC_DISCONNECTED;
			pctx->connstate_func(NULL, conn);
		}
		break;

	case RDMA_CM_EVENT_DEVICE_REMOVAL:
		sprintf(ibw_lasterr, "cma detected device removal!\n");
		goto error;

	default:
		sprintf(ibw_lasterr, "unknown event %d\n", event->event);
		goto error;
	}

	if (event!=NULL && (rc=rdma_ack_cm_event(event))) {
		sprintf(ibw_lasterr, "rdma_ack_cm_event failed with %d\n", rc);
		goto error;
	}

	return;
error:
	DEBUG(DEBUG_ERR, ("cm event handler: %s", ibw_lasterr));

	if (event!=NULL) {
		if (cma_id!=NULL && cma_id!=pctx->cm_id) {
			conn = talloc_get_type(cma_id->context, struct ibw_conn);
			if (conn) {
				conn->state = IBWC_ERROR;
				pctx->connstate_func(NULL, conn);
			}
		} else {
			ctx->state = IBWS_ERROR;
			pctx->connstate_func(ctx, NULL);
		}

		if ((rc=rdma_ack_cm_event(event))!=0) {
			DEBUG(DEBUG_ERR, ("rdma_ack_cm_event failed with %d\n", rc));
		}
	}

	return;
}
Exemple #3
0
int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn)
{
	if (ctx!=NULL) {
		/* ctx->state changed */
		switch(ctx->state) {
		case IBWS_INIT: /* ctx start - after ibw_init */
			break;
		case IBWS_READY: /* after ibw_bind & ibw_listen */
			break;
		case IBWS_CONNECT_REQUEST: /* after [IBWS_READY + incoming request] */
				/* => [(ibw_accept)IBWS_READY | (ibw_disconnect)STOPPED | ERROR] */
			if (ibw_accept(ctx, conn, NULL)) {
				DEBUG(DEBUG_ERR, ("connstate_handler/ibw_accept failed\n"));
				return -1;
			} /* else continue in IBWC_CONNECTED */
			break;
		case IBWS_STOPPED: /* normal stop <= ibw_disconnect+(IBWS_READY | IBWS_CONNECT_REQUEST) */
			/* TODO: have a CTDB upcall for which CTDB should wait in a (final) loop */
			break;
		case IBWS_ERROR: /* abnormal state; ibw_stop must be called after this */
			break;
		default:
			assert(0);
			break;
		}
	}

	if (conn!=NULL) {
		/* conn->state changed */
		switch(conn->state) {
		case IBWC_INIT: /* conn start - internal state */
			break;
		case IBWC_CONNECTED: { /* after ibw_accept or ibw_connect */
			struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
			if (node!=NULL) { /* after ibw_connect */
				struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node);

				node->ctdb->upcalls->node_connected(node);
				ctdb_flush_cn_queue(cn);
			} else { /* after ibw_accept */
				/* NOP in CTDB case */
			}
		} break;
		case IBWC_DISCONNECTED: { /* after ibw_disconnect */
			struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
			if (node!=NULL)
				node->ctdb->upcalls->node_dead(node);
			talloc_free(conn);
			/* normal + intended disconnect => not reconnecting in this layer */
		} break;
		case IBWC_ERROR: {
			struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
			if (node!=NULL) {
				struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node);
				struct ibw_ctx *ictx = cn->conn->ctx;

				DEBUG(DEBUG_DEBUG, ("IBWC_ERROR, reconnecting...\n"));
				talloc_free(cn->conn); /* internal queue content is destroyed */
				cn->conn = (void *)ibw_conn_new(ictx, node);
				tevent_add_timer(node->ctdb->ev, node,
						 timeval_current_ofs(1, 0),
						 ctdb_ibw_node_connect_event, node);
			}
		} break;
		default:
			assert(0);
			break;
		}
	}