Example #1
0
static void handle_accept_conn(struct poll_fd_mgr *poll_mgr,
			       struct poll_fd_info *poll_info)
{
	struct fi_eq_cm_entry cm_entry;
	struct fi_eq_err_entry err_entry;
	struct tcpx_ep *ep;
	int ret;

	assert(poll_info->fid->fclass == FI_CLASS_EP);
	ep = container_of(poll_info->fid, struct tcpx_ep, util_ep.ep_fid.fid);

	ret = tx_cm_data(ep->conn_fd, ofi_ctrl_connresp, poll_info);
	if (ret)
		goto err;

	cm_entry.fid =  poll_info->fid;

	ret = (int) fi_eq_write(&ep->util_ep.eq->eq_fid, FI_CONNECTED,
				&cm_entry, sizeof(cm_entry), 0);
	if (ret < 0) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n");
	}

	ret = fi_fd_nonblock(ep->conn_fd);
	return;
err:
	memset(&err_entry, 0, sizeof err_entry);
	err_entry.fid = poll_info->fid;
	err_entry.context = poll_info->fid->context;
	err_entry.err = ret;

	fi_eq_write(&ep->util_ep.eq->eq_fid, FI_SHUTDOWN,
		    &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR);
}
Example #2
0
static int proc_conn_resp(struct poll_fd_mgr *poll_mgr,
			  struct poll_fd_info *poll_info,
			  struct tcpx_ep *ep,
			  int index)
{
	struct ofi_ctrl_hdr conn_resp;
	struct fi_eq_cm_entry *cm_entry;
	int ret = FI_SUCCESS;

	assert(poll_mgr->poll_fds[index].revents == POLLIN);
	ret = rx_cm_data(ep->conn_fd, &conn_resp, ofi_ctrl_connresp, poll_info);
	if (ret)
		return ret;

	cm_entry = calloc(1, sizeof(*cm_entry) + poll_info->cm_data_sz);
	if (!cm_entry) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "mem alloc failed\n");
		return -FI_ENOMEM;
	}

	cm_entry->fid = poll_info->fid;
	memcpy(cm_entry->data, poll_info->cm_data, poll_info->cm_data_sz);

	ret = (int) fi_eq_write(&ep->util_ep.eq->eq_fid, FI_CONNECTED, cm_entry,
				sizeof(*cm_entry) + poll_info->cm_data_sz, 0);
	if (ret < 0) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n");
		goto err;
	}
	ret = fi_fd_nonblock(ep->conn_fd);
err:
	free(cm_entry);
	return ret;
}
Example #3
0
static int fi_ibv_rdm_cm_init(struct fi_ibv_rdm_cm* cm,
			      const struct rdma_addrinfo* rai)
{
	struct sockaddr_in* src_addr = (struct sockaddr_in*)rai->ai_src_addr;
	cm->ec = rdma_create_event_channel();

	if (!cm->ec) {
		VERBS_INFO(FI_LOG_EP_CTRL,
			"Failed to create listener event channel: %s\n",
			strerror(errno));
		return -FI_EOTHER;
	}

	if (fi_fd_nonblock(cm->ec->fd) != 0) {
		VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno);
		return -FI_EOTHER;
	}

	if (rdma_create_id(cm->ec, &cm->listener, NULL, RDMA_PS_TCP)) {
		VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n",
			     strerror(errno));
		return -FI_EOTHER;
	}

	if (fi_ibv_rdm_find_ipoib_addr(src_addr, cm)) {
		VERBS_INFO(FI_LOG_EP_CTRL, 
			   "Failed to find correct IPoIB address\n");
		return -FI_ENODEV;
	}

	cm->my_addr.sin_port = src_addr->sin_port;

	char my_ipoib_addr_str[INET6_ADDRSTRLEN];
	inet_ntop(cm->my_addr.sin_family,
		  &cm->my_addr.sin_addr.s_addr,
		  my_ipoib_addr_str, INET_ADDRSTRLEN);

	VERBS_INFO(FI_LOG_EP_CTRL, "My IPoIB: %s\n", my_ipoib_addr_str);

	if (rdma_bind_addr(cm->listener, (struct sockaddr *)&cm->my_addr)) {
		VERBS_INFO(FI_LOG_EP_CTRL,
			"Failed to bind cm listener to my IPoIB addr %s: %s\n",
			my_ipoib_addr_str, strerror(errno));
		return -FI_EOTHER;
	}

	if (!cm->my_addr.sin_port) {
		cm->my_addr.sin_port = rdma_get_src_port(cm->listener);
	}
	assert(cm->my_addr.sin_family == AF_INET);

	VERBS_INFO(FI_LOG_EP_CTRL, "My ep_addr: %s:%u\n",
		inet_ntoa(cm->my_addr.sin_addr), ntohs(cm->my_addr.sin_port));

	return FI_SUCCESS;
}
Example #4
0
int fd_set_nonblock(int fd)
{
	int ret;

	ret = fi_fd_nonblock(fd);
	if (ret) {
		SOCK_LOG_ERROR("fi_fd_nonblock failed\n");
	}

	return ret;
}
Example #5
0
static int tcpx_ep_msg_xfer_enable(struct tcpx_ep *ep)
{
	int ret;

	fastlock_acquire(&ep->lock);
	if (ep->cm_state != TCPX_EP_CONNECTING) {
		fastlock_release(&ep->lock);
		return -FI_EINVAL;
	}
	ep->progress_func = tcpx_ep_progress;
	ret = fi_fd_nonblock(ep->conn_fd);
	if (ret)
		goto err;

	ret = tcpx_cq_wait_ep_add(ep);
	if (ret)
		goto err;

	ep->cm_state = TCPX_EP_CONNECTED;
err:
	fastlock_release(&ep->lock);
	return ret;
}
int fi_ibv_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
		   struct fid_eq **eq, void *context)
{
	struct fi_ibv_eq *_eq;
	struct epoll_event event;
	int ret;

	_eq = calloc(1, sizeof *_eq);
	if (!_eq)
		return -ENOMEM;

	_eq->fab = container_of(fabric, struct fi_ibv_fabric, fabric_fid);

	fastlock_init(&_eq->lock);
	ret = dlistfd_head_init(&_eq->list_head);
	if (ret) {
		FI_INFO(&fi_ibv_prov, FI_LOG_EQ, "Unable to initialize dlistfd\n");
		goto err1;
	}

	_eq->epfd = epoll_create1(0);
	if (_eq->epfd < 0) {
		ret = -errno;
		goto err2;
	}

	memset(&event, 0, sizeof(event));
	event.events = EPOLLIN;

	if (epoll_ctl(_eq->epfd, EPOLL_CTL_ADD,
		      _eq->list_head.signal.fd[FI_READ_FD], &event)) {
		ret = -errno;
		goto err3;
	}

	switch (attr->wait_obj) {
	case FI_WAIT_NONE:
	case FI_WAIT_UNSPEC:
	case FI_WAIT_FD:
		_eq->channel = rdma_create_event_channel();
		if (!_eq->channel) {
			ret = -errno;
			goto err3;
		}

		ret = fi_fd_nonblock(_eq->channel->fd);
		if (ret)
			goto err4;

		if (epoll_ctl(_eq->epfd, EPOLL_CTL_ADD, _eq->channel->fd, &event)) {
			ret = -errno;
			goto err4;
		}

		break;
	default:
		ret = -FI_ENOSYS;
		goto err1;
	}

	_eq->flags = attr->flags;
	_eq->eq_fid.fid.fclass = FI_CLASS_EQ;
	_eq->eq_fid.fid.context = context;
	_eq->eq_fid.fid.ops = &fi_ibv_eq_fi_ops;
	_eq->eq_fid.ops = &fi_ibv_eq_ops;

	*eq = &_eq->eq_fid;
	return 0;
err4:
	if (_eq->channel)
		rdma_destroy_event_channel(_eq->channel);
err3:
	close(_eq->epfd);
err2:
	dlistfd_head_free(&_eq->list_head);
err1:
	fastlock_destroy(&_eq->lock);
	free(_eq);
	return ret;
}
DIRECT_FN STATIC int gnix_connect(struct fid_ep *ep, const void *addr,
				  const void *param, size_t paramlen)
{
	int ret;
	struct gnix_fid_ep *ep_priv;
	struct sockaddr_in saddr;
	struct gnix_pep_sock_connreq req;
	struct fi_eq_cm_entry *eqe_ptr;
	struct gnix_vc *vc;
	struct gnix_mbox *mbox = NULL;
	struct gnix_av_addr_entry av_entry;

	if (!ep || !addr || (paramlen && !param) ||
	    paramlen > GNIX_CM_DATA_MAX_SIZE)
		return -FI_EINVAL;

	ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid.fid);

	COND_ACQUIRE(ep_priv->requires_lock, &ep_priv->vc_lock);

	if (ep_priv->conn_state != GNIX_EP_UNCONNECTED) {
		ret = -FI_EINVAL;
		goto err_unlock;
	}

	ret = _gnix_pe_to_ip(addr, &saddr);
	if (ret != FI_SUCCESS) {
		GNIX_INFO(FI_LOG_EP_CTRL,
			  "Failed to translate gnix_ep_name to IP\n");
		goto err_unlock;
	}

	/* Create new VC without CM data. */
	av_entry.gnix_addr = ep_priv->dest_addr.gnix_addr;
	av_entry.cm_nic_cdm_id = ep_priv->dest_addr.cm_nic_cdm_id;
	ret = _gnix_vc_alloc(ep_priv, &av_entry, &vc);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Failed to create VC:: %d\n",
			  ret);
		goto err_unlock;
	}
	ep_priv->vc = vc;

	ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, &mbox);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_DATA,
			  "_gnix_mbox_alloc returned %s\n",
			  fi_strerror(-ret));
		goto err_mbox_alloc;
	}
	vc->smsg_mbox = mbox;

	ep_priv->conn_fd = socket(AF_INET, SOCK_STREAM, 0);
	if (ep_priv->conn_fd < 0) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Failed to create connect socket, errno: %d\n",
			  errno);
		ret = -FI_ENOSPC;
		goto err_socket;
	}

	/* Currently blocks until connected. */
	ret = connect(ep_priv->conn_fd, (struct sockaddr *)&saddr,
		      sizeof(saddr));
	if (ret) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Failed to connect, errno: %d\n",
			  errno);
		ret = -FI_EIO;
		goto err_connect;
	}

	req.info = *ep_priv->info;

	/* Note addrs are swapped. */
	memcpy(&req.dest_addr, (void *)&ep_priv->src_addr,
	       sizeof(req.dest_addr));
	memcpy(&ep_priv->dest_addr, addr, sizeof(ep_priv->dest_addr));
	memcpy(&req.src_addr, addr, sizeof(req.src_addr));

	if (ep_priv->info->tx_attr)
		req.tx_attr = *ep_priv->info->tx_attr;
	if (ep_priv->info->rx_attr)
		req.rx_attr = *ep_priv->info->rx_attr;
	if (ep_priv->info->ep_attr)
		req.ep_attr = *ep_priv->info->ep_attr;
	if (ep_priv->info->domain_attr)
		req.domain_attr = *ep_priv->info->domain_attr;
	if (ep_priv->info->fabric_attr)
		req.fabric_attr = *ep_priv->info->fabric_attr;

	req.fabric_attr.fabric = NULL;
	req.domain_attr.domain = NULL;

	req.vc_id = vc->vc_id;
	req.vc_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
	req.vc_mbox_attr.msg_buffer = mbox->base;
	req.vc_mbox_attr.buff_size =  vc->ep->nic->mem_per_mbox;
	req.vc_mbox_attr.mem_hndl = *mbox->memory_handle;
	req.vc_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
	req.vc_mbox_attr.mbox_maxcredit =
			ep_priv->domain->params.mbox_maxcredit;
	req.vc_mbox_attr.msg_maxsize = ep_priv->domain->params.mbox_msg_maxsize;
	req.cq_irq_mdh = ep_priv->nic->irq_mem_hndl;
	req.peer_caps = ep_priv->caps;

	req.cm_data_len = paramlen;
	if (paramlen) {
		eqe_ptr = (struct fi_eq_cm_entry *)req.eqe_buf;
		memcpy(eqe_ptr->data, param, paramlen);
	}

	ret = write(ep_priv->conn_fd, &req, sizeof(req));
	if (ret != sizeof(req)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Failed to send req, errno: %d\n",
			  errno);
		ret = -FI_EIO;
		goto err_write;
	}
	/* set fd to non-blocking now since we can't block within the eq
	 * progress system
	 */
	fi_fd_nonblock(ep_priv->conn_fd);

	ep_priv->conn_state = GNIX_EP_CONNECTING;

	COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock);

	GNIX_DEBUG(FI_LOG_EP_CTRL, "Sent conn req: %p, %s\n",
		   ep_priv, inet_ntoa(saddr.sin_addr));

	return FI_SUCCESS;

err_write:
err_connect:
	close(ep_priv->conn_fd);
	ep_priv->conn_fd = -1;
err_socket:
	_gnix_mbox_free(ep_priv->vc->smsg_mbox);
	ep_priv->vc->smsg_mbox = NULL;
err_mbox_alloc:
	_gnix_vc_destroy(ep_priv->vc);
	ep_priv->vc = NULL;
err_unlock:
	COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock);

	return ret;
}
Example #8
0
int
usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info,
              struct fid_pep **pep_o, void *context)
{
    struct usdf_pep *pep;
    struct usdf_fabric *fp;
    struct sockaddr_in *sin;
    int ret;
    int optval;

    USDF_TRACE_SYS(EP_CTRL, "\n");

    if (!info) {
        USDF_DBG_SYS(EP_CTRL, "null fi_info struct is invalid\n");
        return -FI_EINVAL;
    }

    if (info->ep_attr->type != FI_EP_MSG) {
        return -FI_ENODEV;
    }

    if ((info->caps & ~USDF_MSG_CAPS) != 0) {
        return -FI_EBADF;
    }

    switch (info->addr_format) {
    case FI_SOCKADDR:
        if (((struct sockaddr *)info->src_addr)->sa_family != AF_INET) {
            USDF_WARN_SYS(EP_CTRL, "non-AF_INET src_addr specified\n");
            return -FI_EINVAL;
        }
        break;
    case FI_SOCKADDR_IN:
        break;
    default:
        USDF_WARN_SYS(EP_CTRL, "unknown/unsupported addr_format\n");
        return -FI_EINVAL;
    }

    if (info->src_addrlen &&
            info->src_addrlen != sizeof(struct sockaddr_in)) {
        USDF_WARN_SYS(EP_CTRL, "unexpected src_addrlen\n");
        return -FI_EINVAL;
    }

    fp = fab_ftou(fabric);

    pep = calloc(1, sizeof(*pep));
    if (pep == NULL) {
        return -FI_ENOMEM;
    }

    pep->pep_fid.fid.fclass = FI_CLASS_PEP;
    pep->pep_fid.fid.context = context;
    pep->pep_fid.fid.ops = &usdf_pep_ops;
    pep->pep_fid.ops = &usdf_pep_base_ops;
    pep->pep_fid.cm = &usdf_pep_cm_ops;
    pep->pep_fabric = fp;

    pep->pep_state = USDF_PEP_UNBOUND;
    pep->pep_sock = socket(AF_INET, SOCK_STREAM, 0);
    if (pep->pep_sock == -1) {
        ret = -errno;
        goto fail;
    }
    ret = fi_fd_nonblock(pep->pep_sock);
    if (ret) {
        ret = -errno;
        goto fail;
    }

    /* set SO_REUSEADDR to prevent annoying "Address already in use" errors
     * on successive runs of programs listening on a well known port */
    optval = 1;
    ret = setsockopt(pep->pep_sock, SOL_SOCKET, SO_REUSEADDR, &optval,
                     sizeof(optval));
    if (ret == -1) {
        ret = -errno;
        goto fail;
    }

    pep->pep_info = fi_dupinfo(info);
    if (!pep->pep_info) {
        ret = -FI_ENOMEM;
        goto fail;
    }

    if (info->src_addrlen == 0) {
        /* Copy the source address information from the device
         * attributes.
         */
        pep->pep_info->src_addrlen = sizeof(struct sockaddr_in);
        sin = calloc(1, pep->pep_info->src_addrlen);
        if (!sin) {
            USDF_WARN_SYS(EP_CTRL,
                          "calloc for src address failed\n");
            goto fail;
        }

        sin->sin_family = AF_INET;
        sin->sin_addr.s_addr = fp->fab_dev_attrs->uda_ipaddr_be;
        pep->pep_info->src_addr = sin;
    }

    memcpy(&pep->pep_src_addr, pep->pep_info->src_addr,
           pep->pep_info->src_addrlen);

    /* initialize connreq freelist */
    ret = pthread_spin_init(&pep->pep_cr_lock, PTHREAD_PROCESS_PRIVATE);
    if (ret != 0) {
        ret = -ret;
        goto fail;
    }
    TAILQ_INIT(&pep->pep_cr_free);
    TAILQ_INIT(&pep->pep_cr_pending);
    pep->pep_backlog = 10;
    pep->pep_cr_max_data = USDF_MAX_CONN_DATA;

    ret = usdf_pep_grow_backlog(pep);
    if (ret != 0) {
        goto fail;
    }

    atomic_initialize(&pep->pep_refcnt, 0);
    atomic_inc(&fp->fab_refcnt);

    *pep_o = pep_utof(pep);
    return 0;

fail:
    if (pep != NULL) {
        usdf_pep_free_cr_lists(pep);
        if (pep->pep_sock != -1) {
            close(pep->pep_sock);
        }
        fi_freeinfo(pep->pep_info);
        free(pep);
    }
    return ret;
}
Example #9
0
static int
fi_ibv_mr_reg(struct fid *fid, const void *buf, size_t len,
	   uint64_t access, uint64_t offset, uint64_t requested_key,
	   uint64_t flags, struct fid_mr **mr, void *context)
{
	struct fi_ibv_mem_desc *md;
	int fi_ibv_access = 0;
	struct fid_domain *domain;

	if (flags)
		return -FI_EBADFLAGS;

	if (fid->fclass != FI_CLASS_DOMAIN) {
		return -FI_EINVAL;
	}
	domain = container_of(fid, struct fid_domain, fid);

	md = calloc(1, sizeof *md);
	if (!md)
		return -FI_ENOMEM;

	md->domain = container_of(domain, struct fi_ibv_domain, domain_fid);
	md->mr_fid.fid.fclass = FI_CLASS_MR;
	md->mr_fid.fid.context = context;
	md->mr_fid.fid.ops = &fi_ibv_mr_ops;

	/* Enable local write access by default for FI_EP_RDM which hides local
	 * registration requirements. This allows to avoid buffering or double
	 * registration */
	if (!(md->domain->info->caps & FI_LOCAL_MR))
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;

	/* Local read access to an MR is enabled by default in verbs */

	if (access & FI_RECV)
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;

	/* iWARP spec requires Remote Write access for an MR that is used
	 * as a data sink for a Remote Read */
	if (access & FI_READ) {
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;
		if (md->domain->verbs->device->transport_type == IBV_TRANSPORT_IWARP)
			fi_ibv_access |= IBV_ACCESS_REMOTE_WRITE;
	}

	if (access & FI_WRITE)
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;

	if (access & FI_REMOTE_READ)
		fi_ibv_access |= IBV_ACCESS_REMOTE_READ;

	/* Verbs requires Local Write access too for Remote Write access */
	if (access & FI_REMOTE_WRITE)
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE |
			IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC;

	md->mr = ibv_reg_mr(md->domain->pd, (void *) buf, len, fi_ibv_access);
	if (!md->mr)
		goto err;

	md->mr_fid.mem_desc = (void *) (uintptr_t) md->mr->lkey;
	md->mr_fid.key = md->mr->rkey;
	*mr = &md->mr_fid;
	if(md->domain->eq && (md->domain->eq_flags & FI_REG_MR)) {
		struct fi_eq_entry entry = {
			.fid = &md->mr_fid.fid,
			.context = context
		};
		fi_ibv_eq_write_event(md->domain->eq, FI_MR_COMPLETE,
			 	      &entry, sizeof(entry));
	}
	return 0;

err:
	free(md);
	return -errno;
}

static int fi_ibv_mr_regv(struct fid *fid, const struct iovec * iov,
		size_t count, uint64_t access, uint64_t offset, uint64_t requested_key,
		uint64_t flags, struct fid_mr **mr, void *context)
{
	if (count > VERBS_MR_IOV_LIMIT) {
		VERBS_WARN(FI_LOG_FABRIC,
			   "iov count > %d not supported\n",
			   VERBS_MR_IOV_LIMIT);
		return -FI_EINVAL;
	}
	return fi_ibv_mr_reg(fid, iov->iov_base, iov->iov_len, access, offset,
			requested_key, flags, mr, context);
}

static int fi_ibv_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr,
		uint64_t flags, struct fid_mr **mr)
{
	return fi_ibv_mr_regv(fid, attr->mr_iov, attr->iov_count, attr->access,
			0, attr->requested_key, flags, mr, attr->context);
}

static int fi_ibv_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
	struct fi_ibv_domain *domain;
	struct fi_ibv_eq *eq;

	domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid);

	switch (bfid->fclass) {
	case FI_CLASS_EQ:
		eq = container_of(bfid, struct fi_ibv_eq, eq_fid);
		domain->eq = eq;
		domain->eq_flags = flags;
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

static int fi_ibv_domain_close(fid_t fid)
{
	struct fi_ibv_domain *domain;
	int ret;

	domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid);

	if (domain->rdm) {
		rdma_destroy_ep(domain->rdm_cm->listener);
		free(domain->rdm_cm);
	}

	if (domain->pd) {
		ret = ibv_dealloc_pd(domain->pd);
		if (ret)
			return -ret;
		domain->pd = NULL;
	}

	fi_freeinfo(domain->info);
	free(domain);
	return 0;
}

static int fi_ibv_open_device_by_name(struct fi_ibv_domain *domain, const char *name)
{
	struct ibv_context **dev_list;
	int i, ret = -FI_ENODEV;

	if (!name)
		return -FI_EINVAL;

	dev_list = rdma_get_devices(NULL);
	if (!dev_list)
		return -errno;

	for (i = 0; dev_list[i] && ret; i++) {
		if (domain->rdm) {
			ret = strncmp(name, ibv_get_device_name(dev_list[i]->device),
				      strlen(name) - strlen(verbs_rdm_domain.suffix));

		} else {
			ret = strcmp(name, ibv_get_device_name(dev_list[i]->device));
		}

		if (!ret)
			domain->verbs = dev_list[i];
	}
	rdma_free_devices(dev_list);
	return ret;
}

static struct fi_ops fi_ibv_fid_ops = {
	.size = sizeof(struct fi_ops),
	.close = fi_ibv_domain_close,
	.bind = fi_ibv_domain_bind,
	.control = fi_no_control,
	.ops_open = fi_no_ops_open,
};

static struct fi_ops_mr fi_ibv_domain_mr_ops = {
	.size = sizeof(struct fi_ops_mr),
	.reg = fi_ibv_mr_reg,
	.regv = fi_ibv_mr_regv,
	.regattr = fi_ibv_mr_regattr,
};

static struct fi_ops_domain fi_ibv_domain_ops = {
	.size = sizeof(struct fi_ops_domain),
	.av_open = fi_no_av_open,
	.cq_open = fi_ibv_cq_open,
	.endpoint = fi_ibv_open_ep,
	.scalable_ep = fi_no_scalable_ep,
	.cntr_open = fi_no_cntr_open,
	.poll_open = fi_no_poll_open,
	.stx_ctx = fi_no_stx_context,
	.srx_ctx = fi_ibv_srq_context,
};

static struct fi_ops_domain fi_ibv_rdm_domain_ops = {
	.size = sizeof(struct fi_ops_domain),
	.av_open = fi_ibv_rdm_av_open,
	.cq_open = fi_ibv_rdm_cq_open,
	.endpoint = fi_ibv_rdm_open_ep,
	.scalable_ep = fi_no_scalable_ep,
	.cntr_open = fi_rbv_rdm_cntr_open,
	.poll_open = fi_no_poll_open,
	.stx_ctx = fi_no_stx_context,
	.srx_ctx = fi_no_srx_context,
};

static int
fi_ibv_domain(struct fid_fabric *fabric, struct fi_info *info,
	   struct fid_domain **domain, void *context)
{
	struct fi_ibv_domain *_domain;
	struct fi_ibv_fabric *fab;
	struct fi_info *fi;
	int ret;

	fi = fi_ibv_get_verbs_info(info->domain_attr->name);
	if (!fi)
		return -FI_EINVAL;

	fab = container_of(fabric, struct fi_ibv_fabric, util_fabric.fabric_fid);
	ret = ofi_check_domain_attr(&fi_ibv_prov, fabric->api_version,
				    fi->domain_attr, info->domain_attr);
	if (ret)
		return ret;

	_domain = calloc(1, sizeof *_domain);
	if (!_domain)
		return -FI_ENOMEM;

	_domain->info = fi_dupinfo(info);
	if (!_domain->info)
		goto err1;

	_domain->rdm = FI_IBV_EP_TYPE_IS_RDM(info);
	if (_domain->rdm) {
		_domain->rdm_cm = calloc(1, sizeof(*_domain->rdm_cm));
		if (!_domain->rdm_cm) {
			ret = -FI_ENOMEM;
			goto err2;
		}
	}
	ret = fi_ibv_open_device_by_name(_domain, info->domain_attr->name);
	if (ret)
		goto err2;

	_domain->pd = ibv_alloc_pd(_domain->verbs);
	if (!_domain->pd) {
		ret = -errno;
		goto err2;
	}

	_domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN;
	_domain->domain_fid.fid.context = context;
	_domain->domain_fid.fid.ops = &fi_ibv_fid_ops;
	_domain->domain_fid.mr = &fi_ibv_domain_mr_ops;
	if (_domain->rdm) {
		_domain->domain_fid.ops = &fi_ibv_rdm_domain_ops;

		_domain->rdm_cm->ec = rdma_create_event_channel();

		if (!_domain->rdm_cm->ec) {
			VERBS_INFO(FI_LOG_EP_CTRL,
				"Failed to create listener event channel: %s\n",
				strerror(errno));
			ret = -FI_EOTHER;
			goto err2;
		}

		if (fi_fd_nonblock(_domain->rdm_cm->ec->fd) != 0) {
			VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno);
			ret = -FI_EOTHER;
			goto err3;
		}

		if (rdma_create_id(_domain->rdm_cm->ec,
				   &_domain->rdm_cm->listener, NULL, RDMA_PS_TCP))
		{
			VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n",
				   strerror(errno));
			ret = -FI_EOTHER;
			goto err3;
		}
		_domain->rdm_cm->is_bound = 0;
	} else {
		_domain->domain_fid.ops = &fi_ibv_domain_ops;
	}
	_domain->fab = fab;

	*domain = &_domain->domain_fid;
	return 0;
err3:
	if (_domain->rdm)
		rdma_destroy_event_channel(_domain->rdm_cm->ec);
err2:
	if (_domain->rdm)
		free(_domain->rdm_cm);
	fi_freeinfo(_domain->info);
err1:
	free(_domain);
	return ret;
}

static int fi_ibv_trywait(struct fid_fabric *fabric, struct fid **fids, int count)
{
	struct fi_ibv_cq *cq;
	int ret, i;

	for (i = 0; i < count; i++) {
		switch (fids[i]->fclass) {
		case FI_CLASS_CQ:
			cq = container_of(fids[i], struct fi_ibv_cq, cq_fid.fid);
			ret = cq->trywait(fids[i]);
			if (ret)
				return ret;
			break;
		case FI_CLASS_EQ:
			/* We are always ready to wait on an EQ since
			 * rdmacm EQ is based on an fd */
			continue;
		case FI_CLASS_CNTR:
		case FI_CLASS_WAIT:
			return -FI_ENOSYS;
		default:
			return -FI_EINVAL;
		}

	}
	return FI_SUCCESS;
}

static int fi_ibv_fabric_close(fid_t fid)
{
	struct fi_ibv_fabric *fab;
	int ret;

	fab = container_of(fid, struct fi_ibv_fabric, util_fabric.fabric_fid.fid);
	ret = ofi_fabric_close(&fab->util_fabric);
	if (ret)
		return ret;
	free(fab);

	return 0;
}

static struct fi_ops fi_ibv_fi_ops = {
	.size = sizeof(struct fi_ops),
	.close = fi_ibv_fabric_close,
	.bind = fi_no_bind,
	.control = fi_no_control,
	.ops_open = fi_no_ops_open,
};

static struct fi_ops_fabric fi_ibv_ops_fabric = {
	.size = sizeof(struct fi_ops_fabric),
	.domain = fi_ibv_domain,
	.passive_ep = fi_ibv_passive_ep,
	.eq_open = fi_ibv_eq_open,
	.wait_open = fi_no_wait_open,
	.trywait = fi_ibv_trywait
};

int fi_ibv_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
		  void *context)
{
	struct fi_ibv_fabric *fab;
	struct fi_info *info;
	int ret;

	ret = fi_ibv_init_info();
	if (ret)
		return ret;

	fab = calloc(1, sizeof(*fab));
	if (!fab)
		return -FI_ENOMEM;

	for (info = verbs_info; info; info = info->next) {
		ret = ofi_fabric_init(&fi_ibv_prov, info->fabric_attr, attr,
				      &fab->util_fabric, context);
		if (ret != -FI_ENODATA)
			break;
	}
	if (ret) {
		free(fab);
		return ret;
	}

	*fabric = &fab->util_fabric.fabric_fid;
	(*fabric)->fid.ops = &fi_ibv_fi_ops;
	(*fabric)->ops = &fi_ibv_ops_fabric;

	return 0;
}
Example #10
0
int
usdf_cm_msg_connect(struct fid_ep *fep, const void *addr,
		const void *param, size_t paramlen)
{
	struct usdf_connreq *crp;
	struct usdf_ep *ep;
	struct usdf_rx *rx;
	struct usdf_domain *udp;
	const struct sockaddr_in *sin;
	struct epoll_event ev;
	struct usdf_fabric *fp;
	struct usdf_connreq_msg *reqp;
	struct usd_qp_impl *qp;
	size_t request_size;
	int ret;

	USDF_TRACE_SYS(EP_CTRL, "\n");

	if (paramlen > USDF_MAX_CONN_DATA)
		return -FI_EINVAL;

	ep = ep_ftou(fep);
	udp = ep->ep_domain;
	fp = udp->dom_fabric;
	sin = addr;

	/* Although paramlen may be less than USDF_MAX_CONN_DATA, the same crp
	 * struct is used for receiving the accept and reject payload. The
	 * structure has to be prepared to receive the maximum allowable amount
	 * of data per transfer. The maximum size includes the connection
	 * request structure, the connection request message, and the maximum
	 * amount of data per connection request message.
	 */
	request_size = sizeof(*crp) + sizeof(*reqp) + USDF_MAX_CONN_DATA;
	crp = calloc(1, request_size);
	if (crp == NULL) {
		ret = -errno;
		goto fail;
	}
	ep->e.msg.ep_connreq = crp;

	crp->handle.fclass = FI_CLASS_CONNREQ;

	if (ep->e.msg.ep_cm_sock == -1) {
		crp->cr_sockfd = socket(AF_INET, SOCK_STREAM, 0);
		if (crp->cr_sockfd == -1) {
			ret = -errno;
			goto fail;
		}
	} else {
		crp->cr_sockfd = ep->e.msg.ep_cm_sock;
		ep->e.msg.ep_cm_sock = -1;
	}

	ret = fi_fd_nonblock(crp->cr_sockfd);
	if (ret) {
		ret = -errno;
		goto fail;
	}

	ret = usdf_ep_msg_get_queues(ep);
	if (ret != 0) {
		goto fail;
	}
	rx = ep->ep_rx;
	qp = to_qpi(rx->rx_qp);

	ret = connect(crp->cr_sockfd, (struct sockaddr *)sin, sizeof(*sin));
	if (ret != 0 && errno != EINPROGRESS) {
		ret = -errno;
		goto fail;
	}

	/* If cr_sockfd was previously unbound, connect(2) will do a a bind(2)
	 * for us.  Update our snapshot of the locally bound address. */
	ret = usdf_msg_upd_lcl_addr(ep);
	if (ret)
		goto fail;

	/* allocate remote peer ID */
	ep->e.msg.ep_rem_peer_id = udp->dom_next_peer;
	udp->dom_peer_tab[udp->dom_next_peer] = ep;
	++udp->dom_next_peer;

	crp->cr_ep = ep;
	reqp = (struct usdf_connreq_msg *)crp->cr_data;
	crp->cr_ptr = crp->cr_data;
	crp->cr_resid =  sizeof(*reqp) + paramlen;

	reqp->creq_peer_id = htons(ep->e.msg.ep_rem_peer_id);
	reqp->creq_ipaddr = fp->fab_dev_attrs->uda_ipaddr_be;
	reqp->creq_port =
		qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
	reqp->creq_datalen = htonl(paramlen);
	memcpy(reqp->creq_data, param, paramlen);

	/* register for notification when connect completes */
	crp->cr_pollitem.pi_rtn = usdf_cm_msg_connect_cb_wr;
	crp->cr_pollitem.pi_context = crp;
	ev.events = EPOLLOUT;
	ev.data.ptr = &crp->cr_pollitem;
	ret = epoll_ctl(fp->fab_epollfd, EPOLL_CTL_ADD, crp->cr_sockfd, &ev);
	if (ret != 0) {
		crp->cr_pollitem.pi_rtn = NULL;
		ret = -errno;
		goto fail;
	}

	return 0;

fail:
	if (crp != NULL) {
		if (crp->cr_sockfd != -1) {
			close(crp->cr_sockfd);
		}
		free(crp);
		ep->e.msg.ep_connreq = NULL;
	}
	usdf_ep_msg_release_queues(ep);
	return ret;
}