Example #1
0
static int fi_ibv_get_device_attrs(struct ibv_context *ctx, struct fi_info *info)
{
	struct ibv_device_attr device_attr;
	struct ibv_port_attr port_attr;
	int ret = 0;

	ret = ibv_query_device(ctx, &device_attr);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_device", errno);
		return -errno;
	}

	info->domain_attr->cq_cnt 		= device_attr.max_cq;
	info->domain_attr->ep_cnt 		= device_attr.max_qp;
	info->domain_attr->tx_ctx_cnt 		= MIN(info->domain_attr->tx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->rx_ctx_cnt 		= MIN(info->domain_attr->rx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->max_ep_tx_ctx 	= device_attr.max_qp;
	info->domain_attr->max_ep_rx_ctx 	= device_attr.max_qp;

	ret = fi_ibv_get_qp_cap(ctx, &device_attr, info);
	if (ret)
		return ret;

	ret = ibv_query_port(ctx, 1, &port_attr);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_port", errno);
		return -errno;
	}

	info->ep_attr->max_msg_size 		= port_attr.max_msg_sz;
	info->ep_attr->max_order_raw_size 	= port_attr.max_msg_sz;
	info->ep_attr->max_order_waw_size	= port_attr.max_msg_sz;

	return 0;
}
Example #2
0
static int fi_ibv_get_device_attrs(struct ibv_context *ctx, struct fi_info *info)
{
	struct ibv_device_attr device_attr;
	struct ibv_port_attr port_attr;
	int ret = 0;

	ret = ibv_query_device(ctx, &device_attr);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_device", errno);
		return -errno;
	}

	info->domain_attr->cq_cnt 		= device_attr.max_cq;
	info->domain_attr->ep_cnt 		= device_attr.max_qp;
	info->domain_attr->tx_ctx_cnt 		= MIN(info->domain_attr->tx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->rx_ctx_cnt 		= MIN(info->domain_attr->rx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->max_ep_tx_ctx 	= MIN(info->domain_attr->tx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->max_ep_rx_ctx 	= MIN(info->domain_attr->rx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->max_ep_srx_ctx	= device_attr.max_qp;
	info->domain_attr->mr_cnt		= device_attr.max_mr;

	if (info->ep_attr->type == FI_EP_RDM)
		info->domain_attr->cntr_cnt	= device_attr.max_qp * 4;

	info->tx_attr->size 			= device_attr.max_qp_wr;
	info->tx_attr->iov_limit 		= device_attr.max_sge;
	info->tx_attr->rma_iov_limit		= device_attr.max_sge;

	info->rx_attr->size 			= device_attr.max_srq_wr ?
						  MIN(device_attr.max_qp_wr,
						      device_attr.max_srq_wr) :
						      device_attr.max_qp_wr;
	info->rx_attr->iov_limit 		= MIN(device_attr.max_sge,
						      device_attr.max_srq_sge);

	ret = fi_ibv_get_qp_cap(ctx, info);
	if (ret)
		return ret;

	ret = ibv_query_port(ctx, 1, &port_attr);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_port", errno);
		return -errno;
	}

	info->ep_attr->max_msg_size 		= port_attr.max_msg_sz;
	info->ep_attr->max_order_raw_size 	= port_attr.max_msg_sz;
	info->ep_attr->max_order_waw_size	= port_attr.max_msg_sz;

	return 0;
}
Example #3
0
static ssize_t
fi_ibv_rdm_process_event_rejected(struct fi_ibv_rdm_ep *ep,
				  struct rdma_cm_event *event)
{
	struct fi_ibv_rdm_conn *conn = event->id->context;
	ssize_t ret = FI_SUCCESS;
	const int *pdata = event->param.conn.private_data;

	if ((pdata && *pdata == 0xdeadbeef) ||
	    /* 
	     * TODO: this is a workaround of the case when private_data is not
	     * arriving from rdma_reject call on iWarp devices
	     */
	    (conn->cm_role == FI_VERBS_CM_PASSIVE &&
	     event->status == -ECONNREFUSED))
	{
		errno = 0;
		rdma_destroy_qp(event->id);
		if (errno) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_qp failed\n",
					 errno);
			ret = -errno;
		}
		if (rdma_destroy_id(event->id)) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id failed\n",
					 errno);
			if (ret == FI_SUCCESS)
				ret = -errno;
		}
		VERBS_INFO(FI_LOG_AV,
			"Rejected from conn %p, addr %s:%u, cm_role %d, status %d\n",
			conn, inet_ntoa(conn->addr.sin_addr),
			ntohs(conn->addr.sin_port),
			conn->cm_role,
			event->status);
	} else {
		VERBS_INFO(FI_LOG_AV,
			"Unexpected REJECT from conn %p, addr %s:%u, cm_role %d, "
			"msg len %d, msg %x, status %d, err %d\n",
			conn, inet_ntoa(conn->addr.sin_addr),
			ntohs(conn->addr.sin_port),
			conn->cm_role,
			event->param.conn.private_data_len,
			event->param.conn.private_data ?
			*(int *)event->param.conn.private_data : 0,
			event->status, errno);
		conn->state = FI_VERBS_CONN_REJECTED;

	}
	return ret;
}
Example #4
0
static inline int fi_ibv_get_qp_cap(struct ibv_context *ctx,
				    struct fi_info *info)
{
	struct ibv_pd *pd;
	struct ibv_cq *cq;
	struct ibv_qp *qp;
	struct ibv_qp_init_attr init_attr;
	int ret = 0;

	pd = ibv_alloc_pd(ctx);
	if (!pd) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_alloc_pd", errno);
		return -errno;
	}

	cq = ibv_create_cq(ctx, 1, NULL, NULL, 0);
	if (!cq) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_create_cq", errno);
		ret = -errno;
		goto err1;
	}

	memset(&init_attr, 0, sizeof init_attr);
	init_attr.send_cq = cq;
	init_attr.recv_cq = cq;
	init_attr.cap.max_send_wr = verbs_default_tx_size;
	init_attr.cap.max_recv_wr = verbs_default_rx_size;
	init_attr.cap.max_send_sge = verbs_default_tx_iov_limit;
	init_attr.cap.max_recv_sge = verbs_default_rx_iov_limit;
	init_attr.cap.max_inline_data = verbs_default_inline_size;

	init_attr.qp_type = IBV_QPT_RC;

	qp = ibv_create_qp(pd, &init_attr);
	if (!qp) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_create_qp", errno);
		ret = -errno;
		goto err2;
	}

	info->tx_attr->inject_size = init_attr.cap.max_inline_data;

	ibv_destroy_qp(qp);
err2:
	ibv_destroy_cq(cq);
err1:
	ibv_dealloc_pd(pd);

	return ret;
}
Example #5
0
static ssize_t
fi_ibv_rdm_process_route_resolved(struct rdma_cm_event *event,
				  struct fi_ibv_rdm_ep *ep)
{
	struct fi_ibv_rdm_tagged_conn *conn = event->id->context;
	ssize_t ret = FI_SUCCESS;

	struct rdma_conn_param cm_params;
	fi_ibv_rdm_pack_cm_params(&cm_params, conn, ep);

	VERBS_INFO(FI_LOG_AV,
		"ROUTE RESOLVED, conn %p, addr %s:%u\n", conn,
		inet_ntoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port));

	if (rdma_connect(event->id, &cm_params)) {
		VERBS_INFO_ERRNO(FI_LOG_AV,
				 "rdma_connect failed\n", errno);
		ret = -errno;

		free((void *)cm_params.private_data);
		assert(0);
	}

	return ret;
}
Example #6
0
int fi_ibv_create_ep(const char *node, const char *service,
		     uint64_t flags, const struct fi_info *hints,
		     struct rdma_addrinfo **rai, struct rdma_cm_id **id)
{
	struct rdma_addrinfo *_rai = NULL;
	int ret;

	ret = fi_ibv_get_rdma_rai(node, service, flags, hints, &_rai);
	if (ret) {
		return ret;
	}

	ret = rdma_create_ep(id, _rai, NULL, NULL);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_create_ep", errno);
		ret = -errno;
		goto err1;
	}

	if (rai) {
		*rai = _rai;
	} else {
		rdma_freeaddrinfo(_rai);
	}

	return ret;
err1:
	rdma_freeaddrinfo(_rai);

	return ret;
}
Example #7
0
int fi_ibv_accept_xrc(struct fi_ibv_xrc_ep *ep, int reciprocal,
		      void *param, size_t paramlen)
{
	struct sockaddr *addr;
	struct fi_ibv_connreq *connreq;
	struct rdma_conn_param conn_param = { 0 };
	struct fi_ibv_xrc_cm_data *cm_data = param;
	int ret;

	addr = rdma_get_local_addr(ep->tgt_id);
	if (addr)
		ofi_straddr_dbg(&fi_ibv_prov, FI_LOG_CORE, "src_addr", addr);

	addr = rdma_get_peer_addr(ep->tgt_id);
	if (addr)
		ofi_straddr_dbg(&fi_ibv_prov, FI_LOG_CORE, "dest_addr", addr);

	connreq = container_of(ep->base_ep.info->handle,
			       struct fi_ibv_connreq, handle);
	ret = fi_ibv_ep_create_tgt_qp(ep, connreq->xrc.conn_data);
	if (ret)
		return ret;

	fi_ibv_set_xrc_cm_data(cm_data, connreq->xrc.is_reciprocal,
			       connreq->xrc.conn_tag, connreq->xrc.port,
			       ep->srqn);
	conn_param.private_data = cm_data;
	conn_param.private_data_len = paramlen;
	conn_param.responder_resources = RDMA_MAX_RESP_RES;
	conn_param.initiator_depth = RDMA_MAX_INIT_DEPTH;
	conn_param.flow_control = 1;
	conn_param.rnr_retry_count = 7;
	if (ep->base_ep.srq_ep)
		conn_param.srq = 1;

	/* Shared INI/TGT QP connection use a temporarily reserved QP number
	 * avoid the appearance of being a stale/duplicate IB CM message */
	if (!ep->tgt_id->qp)
		conn_param.qp_num = ep->conn_setup->rsvd_tgt_qpn->qp_num;

	if (connreq->xrc.is_reciprocal)
		fi_ibv_eq_clear_xrc_conn_tag(ep);
	else
		ep->conn_setup->conn_tag = connreq->xrc.conn_tag;

	assert(ep->conn_state == FI_IBV_XRC_UNCONNECTED ||
	       ep->conn_state == FI_IBV_XRC_ORIG_CONNECTED);
	fi_ibv_next_xrc_conn_state(ep);

	ret = rdma_accept(ep->tgt_id, &conn_param);
	if (ret) {
		ret = -errno;
		VERBS_INFO_ERRNO(FI_LOG_EP_CTRL,
				 "XRC TGT, ibv_open_qp", errno);
		fi_ibv_prev_xrc_conn_state(ep);
	}
	free(connreq);
	return ret;
}
Example #8
0
int fi_ibv_get_rdma_rai(const char *node, const char *service, uint64_t flags,
		   const struct fi_info *hints, struct rdma_addrinfo **rai)
{
	struct rdma_addrinfo rai_hints, *_rai;
	struct rdma_addrinfo **rai_current;
	int ret = fi_ibv_fi_to_rai(hints, flags, &rai_hints);

	if (ret)
		goto out;

	if (!node && !rai_hints.ai_dst_addr) {
		if ((!rai_hints.ai_src_addr && !service) ||
		    (!rai_hints.ai_src_addr && FI_IBV_EP_TYPE_IS_RDM(hints)))
		{
			node = local_node;
		}
		rai_hints.ai_flags |= RAI_PASSIVE;
	}

	ret = rdma_getaddrinfo((char *) node, (char *) service,
				&rai_hints, &_rai);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_getaddrinfo", errno);
		if (errno) {
			ret = -errno;
		}
		goto out;
	}

	/*
	 * If caller requested rai, remove ib_rai entries added by IBACM to
	 * prevent wrong ib_connect_hdr from being sent in connect request.
	 */
	if (rai && hints && (hints->addr_format != FI_SOCKADDR_IB)) {
		for (rai_current = &_rai; *rai_current;) {
			struct rdma_addrinfo *rai_next;
			if ((*rai_current)->ai_family == AF_IB) {
				rai_next = (*rai_current)->ai_next;
				(*rai_current)->ai_next = NULL;
				rdma_freeaddrinfo(*rai_current);
				*rai_current = rai_next;
				continue;
			}
			rai_current = &(*rai_current)->ai_next;
		}
	}

	if (rai)
		*rai = _rai;

out:
	if (rai_hints.ai_src_addr)
		free(rai_hints.ai_src_addr);
	if (rai_hints.ai_dst_addr)
		free(rai_hints.ai_dst_addr);
	return ret;
}
Example #9
0
static ssize_t
fi_ibv_rdm_process_addr_resolved(struct rdma_cm_id *id,
				 struct fi_ibv_rdm_ep *ep)
{
	ssize_t ret = FI_SUCCESS;
	struct ibv_qp_init_attr qp_attr;
	struct fi_ibv_rdm_tagged_conn *conn = id->context;

	VERBS_INFO(FI_LOG_AV, "ADDR_RESOLVED conn %p, addr %s:%u\n",
		   conn, inet_ntoa(conn->addr.sin_addr),
		   ntohs(conn->addr.sin_port));

	assert(id->verbs == ep->domain->verbs);

	do {
		fi_ibv_rdm_tagged_init_qp_attributes(&qp_attr, ep);
		if (rdma_create_qp(id, ep->domain->pd, &qp_attr)) {
			VERBS_INFO_ERRNO(FI_LOG_AV,
					 "rdma_create_qp failed\n", errno);
			return -errno;
		}

		if (conn->cm_role == FI_VERBS_CM_PASSIVE) {
			break;
		}

		conn->qp[0] = id->qp;
		assert(conn->id[0] == id);
		if (conn->cm_role == FI_VERBS_CM_SELF) {
			break;
		}

		ret = fi_ibv_rdm_prepare_conn_memory(ep, conn);
		if (ret != FI_SUCCESS) {
			goto err;
		}

		ret = fi_ibv_rdm_repost_receives(conn, ep, ep->rq_wr_depth);
		if (ret < 0) {
			VERBS_INFO(FI_LOG_AV, "repost receives failed\n");
			goto err;
		} else {
			ret = FI_SUCCESS;
		}
	} while (0);

	if (rdma_resolve_route(id, FI_IBV_RDM_CM_RESOLVEADDR_TIMEOUT)) {
		VERBS_INFO(FI_LOG_AV, "rdma_resolve_route failed\n");
		ret = -FI_EHOSTUNREACH;
		goto err;
	}

	return ret;
err:
	rdma_destroy_qp(id);
	return ret;
}
Example #10
0
static ssize_t
fi_ibv_rdm_process_event_rejected(struct fi_ibv_rdm_ep *ep,
				  struct rdma_cm_event *event)
{
	struct fi_ibv_rdm_tagged_conn *conn = event->id->context;
	ssize_t ret = FI_SUCCESS;

	if (NULL != event->param.conn.private_data &&
	    *((int *)event->param.conn.private_data) == 0xdeadbeef ) {
		assert(conn->cm_role == FI_VERBS_CM_PASSIVE);
		errno = 0;
		rdma_destroy_qp(event->id);
		if (errno) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_qp failed\n",
					 errno);
			ret = -errno;
		}
		if (rdma_destroy_id(event->id)) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id failed\n",
					 errno);
			if (ret == FI_SUCCESS)
				ret = -errno;
		}
		VERBS_INFO(FI_LOG_AV,
			"Rejected from conn %p, addr %s:%u, cm_role %d, status %d\n",
			conn, inet_ntoa(conn->addr.sin_addr),
			ntohs(conn->addr.sin_port),
			conn->cm_role,
			event->status);
	} else {
		VERBS_INFO(FI_LOG_AV,
			"Unexpected REJECT from conn %p, addr %s:%u, cm_role %d, msg len %d, msg %x, status %d\n",
			conn, inet_ntoa(conn->addr.sin_addr),
			ntohs(conn->addr.sin_port),
			conn->cm_role,
			event->param.conn.private_data_len,
			event->param.conn.private_data ?
			*(int *)event->param.conn.private_data : 0,
			event->status);
		conn->state = FI_VERBS_CONN_REJECTED;

	}
	return ret;
}
Example #11
0
static int fi_ibv_rdm_cm_init(struct fi_ibv_rdm_cm* cm,
			      const struct rdma_addrinfo* rai)
{
	struct sockaddr_in* src_addr = (struct sockaddr_in*)rai->ai_src_addr;
	cm->ec = rdma_create_event_channel();

	if (!cm->ec) {
		VERBS_INFO(FI_LOG_EP_CTRL,
			"Failed to create listener event channel: %s\n",
			strerror(errno));
		return -FI_EOTHER;
	}

	if (fi_fd_nonblock(cm->ec->fd) != 0) {
		VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno);
		return -FI_EOTHER;
	}

	if (rdma_create_id(cm->ec, &cm->listener, NULL, RDMA_PS_TCP)) {
		VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n",
			     strerror(errno));
		return -FI_EOTHER;
	}

	if (fi_ibv_rdm_find_ipoib_addr(src_addr, cm)) {
		VERBS_INFO(FI_LOG_EP_CTRL, 
			   "Failed to find correct IPoIB address\n");
		return -FI_ENODEV;
	}

	cm->my_addr.sin_port = src_addr->sin_port;

	char my_ipoib_addr_str[INET6_ADDRSTRLEN];
	inet_ntop(cm->my_addr.sin_family,
		  &cm->my_addr.sin_addr.s_addr,
		  my_ipoib_addr_str, INET_ADDRSTRLEN);

	VERBS_INFO(FI_LOG_EP_CTRL, "My IPoIB: %s\n", my_ipoib_addr_str);

	if (rdma_bind_addr(cm->listener, (struct sockaddr *)&cm->my_addr)) {
		VERBS_INFO(FI_LOG_EP_CTRL,
			"Failed to bind cm listener to my IPoIB addr %s: %s\n",
			my_ipoib_addr_str, strerror(errno));
		return -FI_EOTHER;
	}

	if (!cm->my_addr.sin_port) {
		cm->my_addr.sin_port = rdma_get_src_port(cm->listener);
	}
	assert(cm->my_addr.sin_family == AF_INET);

	VERBS_INFO(FI_LOG_EP_CTRL, "My ep_addr: %s:%u\n",
		inet_ntoa(cm->my_addr.sin_addr), ntohs(cm->my_addr.sin_port));

	return FI_SUCCESS;
}
Example #12
0
int fi_ibv_init_info(void)
{
	struct ibv_context **ctx_list;
	struct fi_info *fi = NULL, *tail = NULL;
	int ret = 0, i, num_devices;

	if (verbs_info)
		return 0;

	pthread_mutex_lock(&verbs_info_lock);
	if (verbs_info)
		goto unlock;

	if (!fi_ibv_have_device()) {
		VERBS_INFO(FI_LOG_FABRIC, "No RDMA devices found\n");
		ret = -FI_ENODATA;
		goto unlock;
	}

	ctx_list = rdma_get_devices(&num_devices);
	if (!num_devices) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_get_devices", errno);
		ret = -errno;
		goto unlock;
	}

	for (i = 0; i < num_devices; i++) {
		ret = fi_ibv_alloc_info(ctx_list[i], &fi, &verbs_msg_domain);
		if (!ret) {
			if (!verbs_info)
				verbs_info = fi;
			else
				tail->next = fi;
			tail = fi;

			ret = fi_ibv_alloc_info(ctx_list[i], &fi,
						&verbs_rdm_domain);
			if (!ret) {
				tail->next = fi;
				tail = fi;
			}
		}
	}

	ret = verbs_info ? 0 : ret;

	rdma_free_devices(ctx_list);
unlock:
	pthread_mutex_unlock(&verbs_info_lock);
	return ret;
}
Example #13
0
static ssize_t
fi_ibv_rdm_dereg_and_free(struct ibv_mr **mr, char **buff)
{
	ssize_t ret = FI_SUCCESS;
	if (ibv_dereg_mr(*mr)) {
		VERBS_INFO_ERRNO(FI_LOG_AV, "ibv_dereg_mr failed\n", errno);
		ret = -errno;
	}

	*mr = NULL;
	free(*buff);
	*buff = NULL;

	return ret;
}
Example #14
0
int fi_ibv_create_ep(const char *node, const char *service,
		     uint64_t flags, const struct fi_info *hints,
		     struct rdma_addrinfo **rai, struct rdma_cm_id **id)
{
	struct rdma_addrinfo *_rai;
	struct sockaddr *local_addr;
	int ret;

	ret = fi_ibv_get_rdma_rai(node, service, flags, hints, &_rai);
	if (ret) {
		return ret;
	}

	ret = rdma_create_ep(id, _rai, NULL, NULL);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_create_ep", errno);
		ret = -errno;
		goto err1;
	}
	if (rai && !_rai->ai_src_addr) {
		local_addr = rdma_get_local_addr(*id);
		_rai->ai_src_len = fi_ibv_sockaddr_len(local_addr);
		if (!(_rai->ai_src_addr = malloc(_rai->ai_src_len))) {
			ret = -FI_ENOMEM;
			goto err2;
		}
		memcpy(_rai->ai_src_addr, local_addr, _rai->ai_src_len);
	}

	if (rai) {
		*rai = _rai;
	} else {
		rdma_freeaddrinfo(_rai);
	}

	return ret;
err2:
	rdma_destroy_ep(*id);
err1:
	rdma_freeaddrinfo(_rai);

	return ret;
}
Example #15
0
static int fi_ibv_alloc_info(struct ibv_context *ctx, struct fi_info **info,
			     const struct verbs_ep_domain *ep_dom)
{
	struct fi_info *fi;
	union ibv_gid gid;
	size_t name_len;
	int ret;
	int param;

	if (!(fi = fi_allocinfo()))
		return -FI_ENOMEM;

	fi->caps		= ep_dom->caps;
	fi->handle		= NULL;
	if (ep_dom->type == FI_EP_RDM) {
		fi->mode	= VERBS_RDM_MODE;
		*(fi->tx_attr)	= verbs_rdm_tx_attr;
	} else {
		fi->mode	= VERBS_MODE;
		*(fi->tx_attr)	= verbs_tx_attr;
	}

	*(fi->rx_attr)		= (ep_dom->type == FI_EP_RDM)
				? verbs_rdm_rx_attr : verbs_rx_attr;
	*(fi->ep_attr)		= verbs_ep_attr;
	*(fi->domain_attr)	= verbs_domain_attr;
	*(fi->fabric_attr)	= verbs_fabric_attr;

	fi->ep_attr->type	= ep_dom->type;
	fi->tx_attr->caps	= ep_dom->caps;
	fi->rx_attr->caps	= ep_dom->caps;

	ret = fi_ibv_get_device_attrs(ctx, fi);
	if (ret)
		goto err;

	if (ep_dom->type == FI_EP_RDM) {
		fi->tx_attr->inject_size = FI_IBV_RDM_DFLT_BUFFERED_SSIZE;
		fi->tx_attr->iov_limit = 1;
		fi->tx_attr->rma_iov_limit = 1;
		if (!fi_param_get_int(&fi_ibv_prov, "rdm_buffer_size", &param)) {
			if (param > sizeof (struct fi_ibv_rdm_tagged_rndv_header)) {
				fi->tx_attr->inject_size = param;
			} else {
				FI_INFO(&fi_ibv_prov, FI_LOG_CORE,
					"rdm_buffer_size too small, should be greater then %d\n",
					sizeof (struct fi_ibv_rdm_tagged_rndv_header));
				ret = -FI_EINVAL;
				goto err;
			}
		}
	}

	switch (ctx->device->transport_type) {
	case IBV_TRANSPORT_IB:
		if(ibv_query_gid(ctx, 1, 0, &gid)) {
			VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_gid", errno);
			ret = -errno;
			goto err;
		}

		name_len =  strlen(VERBS_IB_PREFIX) + INET6_ADDRSTRLEN;

		if (!(fi->fabric_attr->name = calloc(1, name_len + 1))) {
			ret = -FI_ENOMEM;
			goto err;
		}

		snprintf(fi->fabric_attr->name, name_len, VERBS_IB_PREFIX "%lx",
			 gid.global.subnet_prefix);

		fi->ep_attr->protocol = (ep_dom == &verbs_msg_domain) ?
					FI_PROTO_RDMA_CM_IB_RC : FI_PROTO_IB_RDM;
		break;
	case IBV_TRANSPORT_IWARP:
		fi->fabric_attr->name = strdup(VERBS_IWARP_FABRIC);
		if (!fi->fabric_attr->name) {
			ret = -FI_ENOMEM;
			goto err;
		}

		if (ep_dom == &verbs_msg_domain) {
			fi->ep_attr->protocol = FI_PROTO_IWARP;
			fi->tx_attr->op_flags = VERBS_TX_OP_FLAGS_IWARP;
		} else {
			fi->ep_attr->protocol = FI_PROTO_IWARP_RDM;
			fi->tx_attr->op_flags = VERBS_TX_OP_FLAGS_IWARP_RDM;
		}
		break;
	default:
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unknown transport type\n");
		ret = -FI_ENODATA;
		goto err;
	}

	name_len = strlen(ctx->device->name) + strlen(ep_dom->suffix);
	fi->domain_attr->name = malloc(name_len + 1);
	if (!fi->domain_attr->name) {
		ret = -FI_ENOMEM;
		goto err;
	}

	snprintf(fi->domain_attr->name, name_len + 1, "%s%s",
		 ctx->device->name, ep_dom->suffix);
	fi->domain_attr->name[name_len] = '\0';

	*info = fi;
	return 0;
err:
	fi_freeinfo(fi);
	return ret;
}
Example #16
0
int fi_ibv_init_info(void)
{
	struct ibv_context **ctx_list;
	struct fi_info *fi = NULL, *tail = NULL;
	int ret = 0, i, num_devices, fork_unsafe = 0;

	if (verbs_info)
		return 0;

	pthread_mutex_lock(&verbs_info_lock);
	if (verbs_info)
		goto unlock;

	if (!fi_ibv_have_device()) {
		VERBS_INFO(FI_LOG_FABRIC, "No RDMA devices found\n");
		ret = -FI_ENODATA;
		goto unlock;
	}

	fi_param_get_bool(NULL, "fork_unsafe", &fork_unsafe);

	if (!fork_unsafe) {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Enabling IB fork support\n");
		ret = ibv_fork_init();
		if (ret) {
			FI_WARN(&fi_ibv_prov, FI_LOG_CORE,
					"Enabling IB fork support failed: %s (%d)\n",
					strerror(ret), ret);
			goto unlock;
		}
	} else {
		FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Not enabling IB fork support\n");
	}

	ctx_list = rdma_get_devices(&num_devices);
	if (!num_devices) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_get_devices", errno);
		ret = -errno;
		goto unlock;
	}

	for (i = 0; i < num_devices; i++) {
		ret = fi_ibv_alloc_info(ctx_list[i], &fi, &verbs_msg_domain);
		if (!ret) {
			if (!verbs_info)
				verbs_info = fi;
			else
				tail->next = fi;
			tail = fi;

			ret = fi_ibv_alloc_info(ctx_list[i], &fi,
						&verbs_rdm_domain);
			if (!ret) {
				tail->next = fi;
				tail = fi;
			}
		}
	}

	ret = verbs_info ? 0 : ret;

	rdma_free_devices(ctx_list);
unlock:
	pthread_mutex_unlock(&verbs_info_lock);
	return ret;
}
Example #17
0
static ssize_t
fi_ibv_rdm_process_connect_request(struct rdma_cm_event *event,
					  struct fi_ibv_rdm_ep *ep)
{
	struct ibv_qp_init_attr qp_attr;
	struct rdma_conn_param cm_params;
	struct fi_ibv_rdm_tagged_conn *conn = NULL;
	struct rdma_cm_id *id = event->id;
	ssize_t ret = FI_SUCCESS;

	char *p = (char *) event->param.conn.private_data;

	if (ep->is_closing) {
		int rej_message = 0xdeadbeef;
		if (rdma_reject(id, &rej_message, sizeof(int))) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_reject\n", errno);
			ret = -errno;
			if (rdma_destroy_id(id)) {
				VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n",
						 errno);
				ret = (ret == FI_SUCCESS) ? -errno : ret;
			}
		}
		assert(ret == FI_SUCCESS);
		return ret;
	}

	HASH_FIND(hh, fi_ibv_rdm_tagged_conn_hash, p, FI_IBV_RDM_DFLT_ADDRLEN,
		  conn);

	if (!conn) {
		conn = memalign(FI_IBV_RDM_MEM_ALIGNMENT, sizeof(*conn));
		if (!conn)
			return -FI_ENOMEM;

		memset(conn, 0, sizeof(struct fi_ibv_rdm_tagged_conn));

		conn->state = FI_VERBS_CONN_ALLOCATED;
		dlist_init(&conn->postponed_requests_head);
		fi_ibv_rdm_unpack_cm_params(&event->param.conn, conn, ep);
		fi_ibv_rdm_conn_init_cm_role(conn, ep);

		FI_INFO(&fi_ibv_prov, FI_LOG_AV,
			"CONN REQUEST, NOT found in hash, new conn %p %d, addr %s:%u, HASH ADD\n",
			conn, conn->cm_role, inet_ntoa(conn->addr.sin_addr),
			ntohs(conn->addr.sin_port));

		HASH_ADD(hh, fi_ibv_rdm_tagged_conn_hash, addr,
			FI_IBV_RDM_DFLT_ADDRLEN, conn);
	} else {
		if (conn->cm_role != FI_VERBS_CM_ACTIVE) {
			/*
			 * Do it before rdma_create_qp since that call would
			 * modify event->param.conn.private_data buffer
			 */
			fi_ibv_rdm_unpack_cm_params(&event->param.conn, conn,
						    ep);
		}

		FI_INFO(&fi_ibv_prov, FI_LOG_AV,
			"CONN REQUEST,  FOUND in hash, conn %p %d, addr %s:%u\n",
			conn, conn->cm_role, inet_ntoa(conn->addr.sin_addr),
			ntohs(conn->addr.sin_port));
	}

	if (conn->cm_role == FI_VERBS_CM_ACTIVE) {
		int rej_message = 0xdeadbeef;
		if (rdma_reject(id, &rej_message, sizeof(rej_message))) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_reject\n", errno);
			ret = -errno;
			if (rdma_destroy_id(id)) {
				VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n",
						 errno);
				ret = (ret == FI_SUCCESS) ? -errno : ret;
			}
		}
		if (conn->state == FI_VERBS_CONN_ALLOCATED) {
			ret = fi_ibv_rdm_start_connection(ep, conn);
			if (ret != FI_SUCCESS)
				goto err;
		}
	} else {
		assert(conn->state == FI_VERBS_CONN_ALLOCATED ||
		       conn->state == FI_VERBS_CONN_STARTED);

		const size_t idx = 
			(conn->cm_role == FI_VERBS_CM_PASSIVE) ? 0 : 1;

		conn->state = FI_VERBS_CONN_STARTED;

		assert (conn->id[idx] == NULL);
		conn->id[idx] = id;

		ret = fi_ibv_rdm_prepare_conn_memory(ep, conn);
		if (ret != FI_SUCCESS)
			goto err;

		fi_ibv_rdm_tagged_init_qp_attributes(&qp_attr, ep);
		if (rdma_create_qp(id, ep->domain->pd, &qp_attr)) {
			ret = -errno;
			goto err;
		}
		conn->qp[idx] = id->qp;

		ret = fi_ibv_rdm_repost_receives(conn, ep, ep->rq_wr_depth);
		if (ret < 0) {
			VERBS_INFO(FI_LOG_AV, "repost receives failed\n");
			goto err;
		} else {
			ret = FI_SUCCESS;
		}

		id->context = conn;

		fi_ibv_rdm_pack_cm_params(&cm_params, conn, ep);

		if (rdma_accept(id, &cm_params)) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_accept\n", errno);
			ret = -errno;
			goto err;
		}
		if (cm_params.private_data) {
			free((void *) cm_params.private_data);
		}
	}

	return ret;
err:
	/* ret err code is already set here, just cleanup resources */
	fi_ibv_rdm_conn_cleanup(conn);
	return ret;
}
Example #18
0
static inline int fi_ibv_get_qp_cap(struct ibv_context *ctx,
				    struct ibv_device_attr *device_attr,
				    struct fi_info *info)
{
	struct ibv_pd *pd;
	struct ibv_cq *cq;
	struct ibv_qp *qp;
	struct ibv_qp_init_attr init_attr;
	int ret = 0;

	pd = ibv_alloc_pd(ctx);
	if (!pd) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_alloc_pd", errno);
		return -errno;
	}

	cq = ibv_create_cq(ctx, 1, NULL, NULL, 0);
	if (!cq) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_create_cq", errno);
		ret = -errno;
		goto err1;
	}

	/* TODO: serialize access to string buffers */
	fi_read_file(FI_CONF_DIR, "def_tx_ctx_size",
			def_tx_ctx_size, sizeof def_tx_ctx_size);
	fi_read_file(FI_CONF_DIR, "def_rx_ctx_size",
			def_rx_ctx_size, sizeof def_rx_ctx_size);
	fi_read_file(FI_CONF_DIR, "def_tx_iov_limit",
			def_tx_iov_limit, sizeof def_tx_iov_limit);
	fi_read_file(FI_CONF_DIR, "def_rx_iov_limit",
			def_rx_iov_limit, sizeof def_rx_iov_limit);
	fi_read_file(FI_CONF_DIR, "def_inject_size",
			def_inject_size, sizeof def_inject_size);

	memset(&init_attr, 0, sizeof init_attr);
	init_attr.send_cq = cq;
	init_attr.recv_cq = cq;
	init_attr.cap.max_send_wr = MIN(atoi(def_tx_ctx_size), device_attr->max_qp_wr);
	init_attr.cap.max_recv_wr = MIN(atoi(def_rx_ctx_size), device_attr->max_qp_wr);
	init_attr.cap.max_send_sge = MIN(atoi(def_tx_iov_limit), device_attr->max_sge);
	init_attr.cap.max_recv_sge = MIN(atoi(def_rx_iov_limit), device_attr->max_sge);
	init_attr.cap.max_inline_data = atoi(def_inject_size);
	init_attr.qp_type = IBV_QPT_RC;

	qp = ibv_create_qp(pd, &init_attr);
	if (!qp) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_create_qp", errno);
		ret = -errno;
		goto err2;
	}

	info->tx_attr->inject_size	= init_attr.cap.max_inline_data;
	info->tx_attr->iov_limit 	= init_attr.cap.max_send_sge;
	info->tx_attr->size	 	= init_attr.cap.max_send_wr;

	info->rx_attr->iov_limit 	= init_attr.cap.max_recv_sge;
	/*
	 * On some HW ibv_create_qp can increase max_recv_wr value more than
	 * it really supports. So, alignment with device capability is needed.
	 */
	info->rx_attr->size	 	= MIN(init_attr.cap.max_recv_wr,
						device_attr->max_qp_wr);

	ibv_destroy_qp(qp);
err2:
	ibv_destroy_cq(cq);
err1:
	ibv_dealloc_pd(pd);

	return ret;
}
Example #19
0
ssize_t fi_ibv_rdm_conn_cleanup(struct fi_ibv_rdm_tagged_conn *conn)
{
	ssize_t ret = FI_SUCCESS;
	ssize_t err = FI_SUCCESS;

	VERBS_DBG(FI_LOG_AV, "conn %p, exp = %lld unexp = %lld\n", conn,
		     conn->exp_counter, conn->unexp_counter);

	errno = 0;
	if (conn->id[0]) {
		rdma_destroy_qp(conn->id[0]);
		if (errno) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_qp\n", errno);
			ret = -errno;
		}

		if (rdma_destroy_id(conn->id[0])) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n", errno);
			if (ret == FI_SUCCESS)
				ret = -errno;
		}
	}

	if (conn->id[1]) {
		assert(conn->cm_role == FI_VERBS_CM_SELF);
		rdma_destroy_qp(conn->id[1]);
		if (errno) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_qp\n", errno);
			if (ret == FI_SUCCESS)
				ret = -errno;
		}
		if (rdma_destroy_id(conn->id[1])) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n", errno);
			if (ret == FI_SUCCESS)
				ret = -errno;
		}
	}

	if (conn->s_mr) {
		err = fi_ibv_rdm_dereg_and_free(&conn->s_mr, &conn->sbuf_mem_reg);
		if ((err != FI_SUCCESS) && (ret == FI_SUCCESS)) {
			ret = err;
		}
	}
	if (conn->r_mr) {
		err = fi_ibv_rdm_dereg_and_free(&conn->r_mr, &conn->rbuf_mem_reg);
		if ((err != FI_SUCCESS) && (ret == FI_SUCCESS)) {
			ret = err;
		}
	}
	if (conn->ack_mr) {
		if (ibv_dereg_mr(conn->ack_mr)) {
			VERBS_INFO_ERRNO(FI_LOG_AV, "ibv_dereg_mr failed\n",
					 errno);
			if (ret == FI_SUCCESS)
				ret = -errno;
		}
	}

	if (conn->rma_mr) {
		err = fi_ibv_rdm_dereg_and_free(&conn->rma_mr,
						&conn->rmabuf_mem_reg);
		if ((err != FI_SUCCESS) && (ret == FI_SUCCESS)) {
			ret = err;
		}
	}

	free(conn);
	return ret;
}
Example #20
0
static int
fi_ibv_mr_reg(struct fid *fid, const void *buf, size_t len,
	   uint64_t access, uint64_t offset, uint64_t requested_key,
	   uint64_t flags, struct fid_mr **mr, void *context)
{
	struct fi_ibv_mem_desc *md;
	int fi_ibv_access = 0;
	struct fid_domain *domain;

	if (flags)
		return -FI_EBADFLAGS;

	if (fid->fclass != FI_CLASS_DOMAIN) {
		return -FI_EINVAL;
	}
	domain = container_of(fid, struct fid_domain, fid);

	md = calloc(1, sizeof *md);
	if (!md)
		return -FI_ENOMEM;

	md->domain = container_of(domain, struct fi_ibv_domain, domain_fid);
	md->mr_fid.fid.fclass = FI_CLASS_MR;
	md->mr_fid.fid.context = context;
	md->mr_fid.fid.ops = &fi_ibv_mr_ops;

	/* Enable local write access by default for FI_EP_RDM which hides local
	 * registration requirements. This allows to avoid buffering or double
	 * registration */
	if (!(md->domain->info->caps & FI_LOCAL_MR))
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;

	/* Local read access to an MR is enabled by default in verbs */

	if (access & FI_RECV)
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;

	/* iWARP spec requires Remote Write access for an MR that is used
	 * as a data sink for a Remote Read */
	if (access & FI_READ) {
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;
		if (md->domain->verbs->device->transport_type == IBV_TRANSPORT_IWARP)
			fi_ibv_access |= IBV_ACCESS_REMOTE_WRITE;
	}

	if (access & FI_WRITE)
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;

	if (access & FI_REMOTE_READ)
		fi_ibv_access |= IBV_ACCESS_REMOTE_READ;

	/* Verbs requires Local Write access too for Remote Write access */
	if (access & FI_REMOTE_WRITE)
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE |
			IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC;

	md->mr = ibv_reg_mr(md->domain->pd, (void *) buf, len, fi_ibv_access);
	if (!md->mr)
		goto err;

	md->mr_fid.mem_desc = (void *) (uintptr_t) md->mr->lkey;
	md->mr_fid.key = md->mr->rkey;
	*mr = &md->mr_fid;
	if(md->domain->eq && (md->domain->eq_flags & FI_REG_MR)) {
		struct fi_eq_entry entry = {
			.fid = &md->mr_fid.fid,
			.context = context
		};
		fi_ibv_eq_write_event(md->domain->eq, FI_MR_COMPLETE,
			 	      &entry, sizeof(entry));
	}
	return 0;

err:
	free(md);
	return -errno;
}

static int fi_ibv_mr_regv(struct fid *fid, const struct iovec * iov,
		size_t count, uint64_t access, uint64_t offset, uint64_t requested_key,
		uint64_t flags, struct fid_mr **mr, void *context)
{
	if (count > VERBS_MR_IOV_LIMIT) {
		VERBS_WARN(FI_LOG_FABRIC,
			   "iov count > %d not supported\n",
			   VERBS_MR_IOV_LIMIT);
		return -FI_EINVAL;
	}
	return fi_ibv_mr_reg(fid, iov->iov_base, iov->iov_len, access, offset,
			requested_key, flags, mr, context);
}

static int fi_ibv_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr,
		uint64_t flags, struct fid_mr **mr)
{
	return fi_ibv_mr_regv(fid, attr->mr_iov, attr->iov_count, attr->access,
			0, attr->requested_key, flags, mr, attr->context);
}

static int fi_ibv_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
	struct fi_ibv_domain *domain;
	struct fi_ibv_eq *eq;

	domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid);

	switch (bfid->fclass) {
	case FI_CLASS_EQ:
		eq = container_of(bfid, struct fi_ibv_eq, eq_fid);
		domain->eq = eq;
		domain->eq_flags = flags;
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

static int fi_ibv_domain_close(fid_t fid)
{
	struct fi_ibv_domain *domain;
	int ret;

	domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid);

	if (domain->rdm) {
		rdma_destroy_ep(domain->rdm_cm->listener);
		free(domain->rdm_cm);
	}

	if (domain->pd) {
		ret = ibv_dealloc_pd(domain->pd);
		if (ret)
			return -ret;
		domain->pd = NULL;
	}

	fi_freeinfo(domain->info);
	free(domain);
	return 0;
}

static int fi_ibv_open_device_by_name(struct fi_ibv_domain *domain, const char *name)
{
	struct ibv_context **dev_list;
	int i, ret = -FI_ENODEV;

	if (!name)
		return -FI_EINVAL;

	dev_list = rdma_get_devices(NULL);
	if (!dev_list)
		return -errno;

	for (i = 0; dev_list[i] && ret; i++) {
		if (domain->rdm) {
			ret = strncmp(name, ibv_get_device_name(dev_list[i]->device),
				      strlen(name) - strlen(verbs_rdm_domain.suffix));

		} else {
			ret = strcmp(name, ibv_get_device_name(dev_list[i]->device));
		}

		if (!ret)
			domain->verbs = dev_list[i];
	}
	rdma_free_devices(dev_list);
	return ret;
}

static struct fi_ops fi_ibv_fid_ops = {
	.size = sizeof(struct fi_ops),
	.close = fi_ibv_domain_close,
	.bind = fi_ibv_domain_bind,
	.control = fi_no_control,
	.ops_open = fi_no_ops_open,
};

static struct fi_ops_mr fi_ibv_domain_mr_ops = {
	.size = sizeof(struct fi_ops_mr),
	.reg = fi_ibv_mr_reg,
	.regv = fi_ibv_mr_regv,
	.regattr = fi_ibv_mr_regattr,
};

static struct fi_ops_domain fi_ibv_domain_ops = {
	.size = sizeof(struct fi_ops_domain),
	.av_open = fi_no_av_open,
	.cq_open = fi_ibv_cq_open,
	.endpoint = fi_ibv_open_ep,
	.scalable_ep = fi_no_scalable_ep,
	.cntr_open = fi_no_cntr_open,
	.poll_open = fi_no_poll_open,
	.stx_ctx = fi_no_stx_context,
	.srx_ctx = fi_ibv_srq_context,
};

static struct fi_ops_domain fi_ibv_rdm_domain_ops = {
	.size = sizeof(struct fi_ops_domain),
	.av_open = fi_ibv_rdm_av_open,
	.cq_open = fi_ibv_rdm_cq_open,
	.endpoint = fi_ibv_rdm_open_ep,
	.scalable_ep = fi_no_scalable_ep,
	.cntr_open = fi_rbv_rdm_cntr_open,
	.poll_open = fi_no_poll_open,
	.stx_ctx = fi_no_stx_context,
	.srx_ctx = fi_no_srx_context,
};

static int
fi_ibv_domain(struct fid_fabric *fabric, struct fi_info *info,
	   struct fid_domain **domain, void *context)
{
	struct fi_ibv_domain *_domain;
	struct fi_ibv_fabric *fab;
	struct fi_info *fi;
	int ret;

	fi = fi_ibv_get_verbs_info(info->domain_attr->name);
	if (!fi)
		return -FI_EINVAL;

	fab = container_of(fabric, struct fi_ibv_fabric, util_fabric.fabric_fid);
	ret = ofi_check_domain_attr(&fi_ibv_prov, fabric->api_version,
				    fi->domain_attr, info->domain_attr);
	if (ret)
		return ret;

	_domain = calloc(1, sizeof *_domain);
	if (!_domain)
		return -FI_ENOMEM;

	_domain->info = fi_dupinfo(info);
	if (!_domain->info)
		goto err1;

	_domain->rdm = FI_IBV_EP_TYPE_IS_RDM(info);
	if (_domain->rdm) {
		_domain->rdm_cm = calloc(1, sizeof(*_domain->rdm_cm));
		if (!_domain->rdm_cm) {
			ret = -FI_ENOMEM;
			goto err2;
		}
	}
	ret = fi_ibv_open_device_by_name(_domain, info->domain_attr->name);
	if (ret)
		goto err2;

	_domain->pd = ibv_alloc_pd(_domain->verbs);
	if (!_domain->pd) {
		ret = -errno;
		goto err2;
	}

	_domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN;
	_domain->domain_fid.fid.context = context;
	_domain->domain_fid.fid.ops = &fi_ibv_fid_ops;
	_domain->domain_fid.mr = &fi_ibv_domain_mr_ops;
	if (_domain->rdm) {
		_domain->domain_fid.ops = &fi_ibv_rdm_domain_ops;

		_domain->rdm_cm->ec = rdma_create_event_channel();

		if (!_domain->rdm_cm->ec) {
			VERBS_INFO(FI_LOG_EP_CTRL,
				"Failed to create listener event channel: %s\n",
				strerror(errno));
			ret = -FI_EOTHER;
			goto err2;
		}

		if (fi_fd_nonblock(_domain->rdm_cm->ec->fd) != 0) {
			VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno);
			ret = -FI_EOTHER;
			goto err3;
		}

		if (rdma_create_id(_domain->rdm_cm->ec,
				   &_domain->rdm_cm->listener, NULL, RDMA_PS_TCP))
		{
			VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n",
				   strerror(errno));
			ret = -FI_EOTHER;
			goto err3;
		}
		_domain->rdm_cm->is_bound = 0;
	} else {
		_domain->domain_fid.ops = &fi_ibv_domain_ops;
	}
	_domain->fab = fab;

	*domain = &_domain->domain_fid;
	return 0;
err3:
	if (_domain->rdm)
		rdma_destroy_event_channel(_domain->rdm_cm->ec);
err2:
	if (_domain->rdm)
		free(_domain->rdm_cm);
	fi_freeinfo(_domain->info);
err1:
	free(_domain);
	return ret;
}

static int fi_ibv_trywait(struct fid_fabric *fabric, struct fid **fids, int count)
{
	struct fi_ibv_cq *cq;
	int ret, i;

	for (i = 0; i < count; i++) {
		switch (fids[i]->fclass) {
		case FI_CLASS_CQ:
			cq = container_of(fids[i], struct fi_ibv_cq, cq_fid.fid);
			ret = cq->trywait(fids[i]);
			if (ret)
				return ret;
			break;
		case FI_CLASS_EQ:
			/* We are always ready to wait on an EQ since
			 * rdmacm EQ is based on an fd */
			continue;
		case FI_CLASS_CNTR:
		case FI_CLASS_WAIT:
			return -FI_ENOSYS;
		default:
			return -FI_EINVAL;
		}

	}
	return FI_SUCCESS;
}

static int fi_ibv_fabric_close(fid_t fid)
{
	struct fi_ibv_fabric *fab;
	int ret;

	fab = container_of(fid, struct fi_ibv_fabric, util_fabric.fabric_fid.fid);
	ret = ofi_fabric_close(&fab->util_fabric);
	if (ret)
		return ret;
	free(fab);

	return 0;
}

static struct fi_ops fi_ibv_fi_ops = {
	.size = sizeof(struct fi_ops),
	.close = fi_ibv_fabric_close,
	.bind = fi_no_bind,
	.control = fi_no_control,
	.ops_open = fi_no_ops_open,
};

static struct fi_ops_fabric fi_ibv_ops_fabric = {
	.size = sizeof(struct fi_ops_fabric),
	.domain = fi_ibv_domain,
	.passive_ep = fi_ibv_passive_ep,
	.eq_open = fi_ibv_eq_open,
	.wait_open = fi_no_wait_open,
	.trywait = fi_ibv_trywait
};

int fi_ibv_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
		  void *context)
{
	struct fi_ibv_fabric *fab;
	struct fi_info *info;
	int ret;

	ret = fi_ibv_init_info();
	if (ret)
		return ret;

	fab = calloc(1, sizeof(*fab));
	if (!fab)
		return -FI_ENOMEM;

	for (info = verbs_info; info; info = info->next) {
		ret = ofi_fabric_init(&fi_ibv_prov, info->fabric_attr, attr,
				      &fab->util_fabric, context);
		if (ret != -FI_ENODATA)
			break;
	}
	if (ret) {
		free(fab);
		return ret;
	}

	*fabric = &fab->util_fabric.fabric_fid;
	(*fabric)->fid.ops = &fi_ibv_fi_ops;
	(*fabric)->ops = &fi_ibv_ops_fabric;

	return 0;
}
Example #21
0
ssize_t fi_ibv_rdm_cm_progress(struct fi_ibv_rdm_ep *ep)
{
	struct rdma_cm_event *event = NULL;
	void *data = NULL;
	ssize_t ret = FI_SUCCESS;

	if (rdma_get_cm_event(ep->cm.ec, &event)) {
		if(errno == EAGAIN) {
			errno = 0;
			usleep(FI_IBV_RDM_CM_THREAD_TIMEOUT);
			return FI_SUCCESS;
		} else {
			VERBS_INFO_ERRNO(FI_LOG_AV,
					 "rdma_get_cm_event failed\n", errno);
			ret = -errno;
		}
	}

	while (ret == FI_SUCCESS && event) {
		pthread_mutex_lock(&ep->cm_lock);

		struct rdma_cm_event event_copy;
		memcpy(&event_copy, event, sizeof(*event));
		if (event->param.conn.private_data_len) {
			data = malloc(event->param.conn.private_data_len);
			if (!data) {
				pthread_mutex_unlock(&ep->cm_lock);
				ret = -FI_ENOMEM;
				break;
			}
			memcpy(data, event->param.conn.private_data,
				      event->param.conn.private_data_len);
			event_copy.param.conn.private_data = data;
			event_copy.param.conn.private_data_len =
			    event->param.conn.private_data_len;
		}
		if (rdma_ack_cm_event(event)) {
			VERBS_INFO_ERRNO(FI_LOG_AV,
					 "rdma_get_cm_event failed\n", errno);
			ret = -errno;
		}

		if (ret == FI_SUCCESS){
			ret = fi_ibv_rdm_process_event(&event_copy, ep);
		}

		free(data);
		data = NULL;

		event = NULL;

		pthread_mutex_unlock(&ep->cm_lock);

		if (ret != FI_SUCCESS) {
			break;
		}

		if(rdma_get_cm_event(ep->cm.ec, &event)) {
			if(errno == EAGAIN) {
				errno = 0;
				usleep(FI_IBV_RDM_CM_THREAD_TIMEOUT);
				break;
			} else {
				VERBS_INFO_ERRNO(FI_LOG_AV,
						 "rdma_get_cm_event failed\n",
						 errno);
				ret = -errno;
			}
		}
	}

	return ret;
}