static void handle_accept_conn(struct poll_fd_mgr *poll_mgr, struct poll_fd_info *poll_info) { struct fi_eq_cm_entry cm_entry; struct fi_eq_err_entry err_entry; struct tcpx_ep *ep; int ret; assert(poll_info->fid->fclass == FI_CLASS_EP); ep = container_of(poll_info->fid, struct tcpx_ep, util_ep.ep_fid.fid); ret = tx_cm_data(ep->conn_fd, ofi_ctrl_connresp, poll_info); if (ret) goto err; cm_entry.fid = poll_info->fid; ret = (int) fi_eq_write(&ep->util_ep.eq->eq_fid, FI_CONNECTED, &cm_entry, sizeof(cm_entry), 0); if (ret < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n"); } ret = fi_fd_nonblock(ep->conn_fd); return; err: memset(&err_entry, 0, sizeof err_entry); err_entry.fid = poll_info->fid; err_entry.context = poll_info->fid->context; err_entry.err = ret; fi_eq_write(&ep->util_ep.eq->eq_fid, FI_SHUTDOWN, &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR); }
static int proc_conn_resp(struct poll_fd_mgr *poll_mgr, struct poll_fd_info *poll_info, struct tcpx_ep *ep, int index) { struct ofi_ctrl_hdr conn_resp; struct fi_eq_cm_entry *cm_entry; int ret = FI_SUCCESS; assert(poll_mgr->poll_fds[index].revents == POLLIN); ret = rx_cm_data(ep->conn_fd, &conn_resp, ofi_ctrl_connresp, poll_info); if (ret) return ret; cm_entry = calloc(1, sizeof(*cm_entry) + poll_info->cm_data_sz); if (!cm_entry) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "mem alloc failed\n"); return -FI_ENOMEM; } cm_entry->fid = poll_info->fid; memcpy(cm_entry->data, poll_info->cm_data, poll_info->cm_data_sz); ret = (int) fi_eq_write(&ep->util_ep.eq->eq_fid, FI_CONNECTED, cm_entry, sizeof(*cm_entry) + poll_info->cm_data_sz, 0); if (ret < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n"); goto err; } ret = fi_fd_nonblock(ep->conn_fd); err: free(cm_entry); return ret; }
static int fi_ibv_rdm_cm_init(struct fi_ibv_rdm_cm* cm, const struct rdma_addrinfo* rai) { struct sockaddr_in* src_addr = (struct sockaddr_in*)rai->ai_src_addr; cm->ec = rdma_create_event_channel(); if (!cm->ec) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create listener event channel: %s\n", strerror(errno)); return -FI_EOTHER; } if (fi_fd_nonblock(cm->ec->fd) != 0) { VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno); return -FI_EOTHER; } if (rdma_create_id(cm->ec, &cm->listener, NULL, RDMA_PS_TCP)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n", strerror(errno)); return -FI_EOTHER; } if (fi_ibv_rdm_find_ipoib_addr(src_addr, cm)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to find correct IPoIB address\n"); return -FI_ENODEV; } cm->my_addr.sin_port = src_addr->sin_port; char my_ipoib_addr_str[INET6_ADDRSTRLEN]; inet_ntop(cm->my_addr.sin_family, &cm->my_addr.sin_addr.s_addr, my_ipoib_addr_str, INET_ADDRSTRLEN); VERBS_INFO(FI_LOG_EP_CTRL, "My IPoIB: %s\n", my_ipoib_addr_str); if (rdma_bind_addr(cm->listener, (struct sockaddr *)&cm->my_addr)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to bind cm listener to my IPoIB addr %s: %s\n", my_ipoib_addr_str, strerror(errno)); return -FI_EOTHER; } if (!cm->my_addr.sin_port) { cm->my_addr.sin_port = rdma_get_src_port(cm->listener); } assert(cm->my_addr.sin_family == AF_INET); VERBS_INFO(FI_LOG_EP_CTRL, "My ep_addr: %s:%u\n", inet_ntoa(cm->my_addr.sin_addr), ntohs(cm->my_addr.sin_port)); return FI_SUCCESS; }
int fd_set_nonblock(int fd) { int ret; ret = fi_fd_nonblock(fd); if (ret) { SOCK_LOG_ERROR("fi_fd_nonblock failed\n"); } return ret; }
static int tcpx_ep_msg_xfer_enable(struct tcpx_ep *ep) { int ret; fastlock_acquire(&ep->lock); if (ep->cm_state != TCPX_EP_CONNECTING) { fastlock_release(&ep->lock); return -FI_EINVAL; } ep->progress_func = tcpx_ep_progress; ret = fi_fd_nonblock(ep->conn_fd); if (ret) goto err; ret = tcpx_cq_wait_ep_add(ep); if (ret) goto err; ep->cm_state = TCPX_EP_CONNECTED; err: fastlock_release(&ep->lock); return ret; }
int fi_ibv_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, struct fid_eq **eq, void *context) { struct fi_ibv_eq *_eq; struct epoll_event event; int ret; _eq = calloc(1, sizeof *_eq); if (!_eq) return -ENOMEM; _eq->fab = container_of(fabric, struct fi_ibv_fabric, fabric_fid); fastlock_init(&_eq->lock); ret = dlistfd_head_init(&_eq->list_head); if (ret) { FI_INFO(&fi_ibv_prov, FI_LOG_EQ, "Unable to initialize dlistfd\n"); goto err1; } _eq->epfd = epoll_create1(0); if (_eq->epfd < 0) { ret = -errno; goto err2; } memset(&event, 0, sizeof(event)); event.events = EPOLLIN; if (epoll_ctl(_eq->epfd, EPOLL_CTL_ADD, _eq->list_head.signal.fd[FI_READ_FD], &event)) { ret = -errno; goto err3; } switch (attr->wait_obj) { case FI_WAIT_NONE: case FI_WAIT_UNSPEC: case FI_WAIT_FD: _eq->channel = rdma_create_event_channel(); if (!_eq->channel) { ret = -errno; goto err3; } ret = fi_fd_nonblock(_eq->channel->fd); if (ret) goto err4; if (epoll_ctl(_eq->epfd, EPOLL_CTL_ADD, _eq->channel->fd, &event)) { ret = -errno; goto err4; } break; default: ret = -FI_ENOSYS; goto err1; } _eq->flags = attr->flags; _eq->eq_fid.fid.fclass = FI_CLASS_EQ; _eq->eq_fid.fid.context = context; _eq->eq_fid.fid.ops = &fi_ibv_eq_fi_ops; _eq->eq_fid.ops = &fi_ibv_eq_ops; *eq = &_eq->eq_fid; return 0; err4: if (_eq->channel) rdma_destroy_event_channel(_eq->channel); err3: close(_eq->epfd); err2: dlistfd_head_free(&_eq->list_head); err1: fastlock_destroy(&_eq->lock); free(_eq); return ret; }
DIRECT_FN STATIC int gnix_connect(struct fid_ep *ep, const void *addr, const void *param, size_t paramlen) { int ret; struct gnix_fid_ep *ep_priv; struct sockaddr_in saddr; struct gnix_pep_sock_connreq req; struct fi_eq_cm_entry *eqe_ptr; struct gnix_vc *vc; struct gnix_mbox *mbox = NULL; struct gnix_av_addr_entry av_entry; if (!ep || !addr || (paramlen && !param) || paramlen > GNIX_CM_DATA_MAX_SIZE) return -FI_EINVAL; ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid.fid); COND_ACQUIRE(ep_priv->requires_lock, &ep_priv->vc_lock); if (ep_priv->conn_state != GNIX_EP_UNCONNECTED) { ret = -FI_EINVAL; goto err_unlock; } ret = _gnix_pe_to_ip(addr, &saddr); if (ret != FI_SUCCESS) { GNIX_INFO(FI_LOG_EP_CTRL, "Failed to translate gnix_ep_name to IP\n"); goto err_unlock; } /* Create new VC without CM data. */ av_entry.gnix_addr = ep_priv->dest_addr.gnix_addr; av_entry.cm_nic_cdm_id = ep_priv->dest_addr.cm_nic_cdm_id; ret = _gnix_vc_alloc(ep_priv, &av_entry, &vc); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "Failed to create VC:: %d\n", ret); goto err_unlock; } ep_priv->vc = vc; ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, &mbox); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_DATA, "_gnix_mbox_alloc returned %s\n", fi_strerror(-ret)); goto err_mbox_alloc; } vc->smsg_mbox = mbox; ep_priv->conn_fd = socket(AF_INET, SOCK_STREAM, 0); if (ep_priv->conn_fd < 0) { GNIX_WARN(FI_LOG_EP_CTRL, "Failed to create connect socket, errno: %d\n", errno); ret = -FI_ENOSPC; goto err_socket; } /* Currently blocks until connected. */ ret = connect(ep_priv->conn_fd, (struct sockaddr *)&saddr, sizeof(saddr)); if (ret) { GNIX_WARN(FI_LOG_EP_CTRL, "Failed to connect, errno: %d\n", errno); ret = -FI_EIO; goto err_connect; } req.info = *ep_priv->info; /* Note addrs are swapped. */ memcpy(&req.dest_addr, (void *)&ep_priv->src_addr, sizeof(req.dest_addr)); memcpy(&ep_priv->dest_addr, addr, sizeof(ep_priv->dest_addr)); memcpy(&req.src_addr, addr, sizeof(req.src_addr)); if (ep_priv->info->tx_attr) req.tx_attr = *ep_priv->info->tx_attr; if (ep_priv->info->rx_attr) req.rx_attr = *ep_priv->info->rx_attr; if (ep_priv->info->ep_attr) req.ep_attr = *ep_priv->info->ep_attr; if (ep_priv->info->domain_attr) req.domain_attr = *ep_priv->info->domain_attr; if (ep_priv->info->fabric_attr) req.fabric_attr = *ep_priv->info->fabric_attr; req.fabric_attr.fabric = NULL; req.domain_attr.domain = NULL; req.vc_id = vc->vc_id; req.vc_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; req.vc_mbox_attr.msg_buffer = mbox->base; req.vc_mbox_attr.buff_size = vc->ep->nic->mem_per_mbox; req.vc_mbox_attr.mem_hndl = *mbox->memory_handle; req.vc_mbox_attr.mbox_offset = (uint64_t)mbox->offset; req.vc_mbox_attr.mbox_maxcredit = ep_priv->domain->params.mbox_maxcredit; req.vc_mbox_attr.msg_maxsize = ep_priv->domain->params.mbox_msg_maxsize; req.cq_irq_mdh = ep_priv->nic->irq_mem_hndl; req.peer_caps = ep_priv->caps; req.cm_data_len = paramlen; if (paramlen) { eqe_ptr = (struct fi_eq_cm_entry *)req.eqe_buf; memcpy(eqe_ptr->data, param, paramlen); } ret = write(ep_priv->conn_fd, &req, sizeof(req)); if (ret != sizeof(req)) { GNIX_WARN(FI_LOG_EP_CTRL, "Failed to send req, errno: %d\n", errno); ret = -FI_EIO; goto err_write; } /* set fd to non-blocking now since we can't block within the eq * progress system */ fi_fd_nonblock(ep_priv->conn_fd); ep_priv->conn_state = GNIX_EP_CONNECTING; COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock); GNIX_DEBUG(FI_LOG_EP_CTRL, "Sent conn req: %p, %s\n", ep_priv, inet_ntoa(saddr.sin_addr)); return FI_SUCCESS; err_write: err_connect: close(ep_priv->conn_fd); ep_priv->conn_fd = -1; err_socket: _gnix_mbox_free(ep_priv->vc->smsg_mbox); ep_priv->vc->smsg_mbox = NULL; err_mbox_alloc: _gnix_vc_destroy(ep_priv->vc); ep_priv->vc = NULL; err_unlock: COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock); return ret; }
int usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_pep **pep_o, void *context) { struct usdf_pep *pep; struct usdf_fabric *fp; struct sockaddr_in *sin; int ret; int optval; USDF_TRACE_SYS(EP_CTRL, "\n"); if (!info) { USDF_DBG_SYS(EP_CTRL, "null fi_info struct is invalid\n"); return -FI_EINVAL; } if (info->ep_attr->type != FI_EP_MSG) { return -FI_ENODEV; } if ((info->caps & ~USDF_MSG_CAPS) != 0) { return -FI_EBADF; } switch (info->addr_format) { case FI_SOCKADDR: if (((struct sockaddr *)info->src_addr)->sa_family != AF_INET) { USDF_WARN_SYS(EP_CTRL, "non-AF_INET src_addr specified\n"); return -FI_EINVAL; } break; case FI_SOCKADDR_IN: break; default: USDF_WARN_SYS(EP_CTRL, "unknown/unsupported addr_format\n"); return -FI_EINVAL; } if (info->src_addrlen && info->src_addrlen != sizeof(struct sockaddr_in)) { USDF_WARN_SYS(EP_CTRL, "unexpected src_addrlen\n"); return -FI_EINVAL; } fp = fab_ftou(fabric); pep = calloc(1, sizeof(*pep)); if (pep == NULL) { return -FI_ENOMEM; } pep->pep_fid.fid.fclass = FI_CLASS_PEP; pep->pep_fid.fid.context = context; pep->pep_fid.fid.ops = &usdf_pep_ops; pep->pep_fid.ops = &usdf_pep_base_ops; pep->pep_fid.cm = &usdf_pep_cm_ops; pep->pep_fabric = fp; pep->pep_state = USDF_PEP_UNBOUND; pep->pep_sock = socket(AF_INET, SOCK_STREAM, 0); if (pep->pep_sock == -1) { ret = -errno; goto fail; } ret = fi_fd_nonblock(pep->pep_sock); if (ret) { ret = -errno; goto fail; } /* set SO_REUSEADDR to prevent annoying "Address already in use" errors * on successive runs of programs listening on a well known port */ optval = 1; ret = setsockopt(pep->pep_sock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (ret == -1) { ret = -errno; goto fail; } pep->pep_info = fi_dupinfo(info); if (!pep->pep_info) { ret = -FI_ENOMEM; goto fail; } if (info->src_addrlen == 0) { /* Copy the source address information from the device * attributes. */ pep->pep_info->src_addrlen = sizeof(struct sockaddr_in); sin = calloc(1, pep->pep_info->src_addrlen); if (!sin) { USDF_WARN_SYS(EP_CTRL, "calloc for src address failed\n"); goto fail; } sin->sin_family = AF_INET; sin->sin_addr.s_addr = fp->fab_dev_attrs->uda_ipaddr_be; pep->pep_info->src_addr = sin; } memcpy(&pep->pep_src_addr, pep->pep_info->src_addr, pep->pep_info->src_addrlen); /* initialize connreq freelist */ ret = pthread_spin_init(&pep->pep_cr_lock, PTHREAD_PROCESS_PRIVATE); if (ret != 0) { ret = -ret; goto fail; } TAILQ_INIT(&pep->pep_cr_free); TAILQ_INIT(&pep->pep_cr_pending); pep->pep_backlog = 10; pep->pep_cr_max_data = USDF_MAX_CONN_DATA; ret = usdf_pep_grow_backlog(pep); if (ret != 0) { goto fail; } atomic_initialize(&pep->pep_refcnt, 0); atomic_inc(&fp->fab_refcnt); *pep_o = pep_utof(pep); return 0; fail: if (pep != NULL) { usdf_pep_free_cr_lists(pep); if (pep->pep_sock != -1) { close(pep->pep_sock); } fi_freeinfo(pep->pep_info); free(pep); } return ret; }
static int fi_ibv_mr_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { struct fi_ibv_mem_desc *md; int fi_ibv_access = 0; struct fid_domain *domain; if (flags) return -FI_EBADFLAGS; if (fid->fclass != FI_CLASS_DOMAIN) { return -FI_EINVAL; } domain = container_of(fid, struct fid_domain, fid); md = calloc(1, sizeof *md); if (!md) return -FI_ENOMEM; md->domain = container_of(domain, struct fi_ibv_domain, domain_fid); md->mr_fid.fid.fclass = FI_CLASS_MR; md->mr_fid.fid.context = context; md->mr_fid.fid.ops = &fi_ibv_mr_ops; /* Enable local write access by default for FI_EP_RDM which hides local * registration requirements. This allows to avoid buffering or double * registration */ if (!(md->domain->info->caps & FI_LOCAL_MR)) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; /* Local read access to an MR is enabled by default in verbs */ if (access & FI_RECV) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; /* iWARP spec requires Remote Write access for an MR that is used * as a data sink for a Remote Read */ if (access & FI_READ) { fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; if (md->domain->verbs->device->transport_type == IBV_TRANSPORT_IWARP) fi_ibv_access |= IBV_ACCESS_REMOTE_WRITE; } if (access & FI_WRITE) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; if (access & FI_REMOTE_READ) fi_ibv_access |= IBV_ACCESS_REMOTE_READ; /* Verbs requires Local Write access too for Remote Write access */ if (access & FI_REMOTE_WRITE) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; md->mr = ibv_reg_mr(md->domain->pd, (void *) buf, len, fi_ibv_access); if (!md->mr) goto err; md->mr_fid.mem_desc = (void *) (uintptr_t) md->mr->lkey; md->mr_fid.key = md->mr->rkey; *mr = &md->mr_fid; if(md->domain->eq && (md->domain->eq_flags & FI_REG_MR)) { struct fi_eq_entry entry = { .fid = &md->mr_fid.fid, .context = context }; fi_ibv_eq_write_event(md->domain->eq, FI_MR_COMPLETE, &entry, sizeof(entry)); } return 0; err: free(md); return -errno; } static int fi_ibv_mr_regv(struct fid *fid, const struct iovec * iov, size_t count, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { if (count > VERBS_MR_IOV_LIMIT) { VERBS_WARN(FI_LOG_FABRIC, "iov count > %d not supported\n", VERBS_MR_IOV_LIMIT); return -FI_EINVAL; } return fi_ibv_mr_reg(fid, iov->iov_base, iov->iov_len, access, offset, requested_key, flags, mr, context); } static int fi_ibv_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr) { return fi_ibv_mr_regv(fid, attr->mr_iov, attr->iov_count, attr->access, 0, attr->requested_key, flags, mr, attr->context); } static int fi_ibv_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags) { struct fi_ibv_domain *domain; struct fi_ibv_eq *eq; domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid); switch (bfid->fclass) { case FI_CLASS_EQ: eq = container_of(bfid, struct fi_ibv_eq, eq_fid); domain->eq = eq; domain->eq_flags = flags; break; default: return -EINVAL; } return 0; } static int fi_ibv_domain_close(fid_t fid) { struct fi_ibv_domain *domain; int ret; domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid); if (domain->rdm) { rdma_destroy_ep(domain->rdm_cm->listener); free(domain->rdm_cm); } if (domain->pd) { ret = ibv_dealloc_pd(domain->pd); if (ret) return -ret; domain->pd = NULL; } fi_freeinfo(domain->info); free(domain); return 0; } static int fi_ibv_open_device_by_name(struct fi_ibv_domain *domain, const char *name) { struct ibv_context **dev_list; int i, ret = -FI_ENODEV; if (!name) return -FI_EINVAL; dev_list = rdma_get_devices(NULL); if (!dev_list) return -errno; for (i = 0; dev_list[i] && ret; i++) { if (domain->rdm) { ret = strncmp(name, ibv_get_device_name(dev_list[i]->device), strlen(name) - strlen(verbs_rdm_domain.suffix)); } else { ret = strcmp(name, ibv_get_device_name(dev_list[i]->device)); } if (!ret) domain->verbs = dev_list[i]; } rdma_free_devices(dev_list); return ret; } static struct fi_ops fi_ibv_fid_ops = { .size = sizeof(struct fi_ops), .close = fi_ibv_domain_close, .bind = fi_ibv_domain_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; static struct fi_ops_mr fi_ibv_domain_mr_ops = { .size = sizeof(struct fi_ops_mr), .reg = fi_ibv_mr_reg, .regv = fi_ibv_mr_regv, .regattr = fi_ibv_mr_regattr, }; static struct fi_ops_domain fi_ibv_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = fi_no_av_open, .cq_open = fi_ibv_cq_open, .endpoint = fi_ibv_open_ep, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_no_cntr_open, .poll_open = fi_no_poll_open, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_ibv_srq_context, }; static struct fi_ops_domain fi_ibv_rdm_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = fi_ibv_rdm_av_open, .cq_open = fi_ibv_rdm_cq_open, .endpoint = fi_ibv_rdm_open_ep, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_rbv_rdm_cntr_open, .poll_open = fi_no_poll_open, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, }; static int fi_ibv_domain(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct fi_ibv_domain *_domain; struct fi_ibv_fabric *fab; struct fi_info *fi; int ret; fi = fi_ibv_get_verbs_info(info->domain_attr->name); if (!fi) return -FI_EINVAL; fab = container_of(fabric, struct fi_ibv_fabric, util_fabric.fabric_fid); ret = ofi_check_domain_attr(&fi_ibv_prov, fabric->api_version, fi->domain_attr, info->domain_attr); if (ret) return ret; _domain = calloc(1, sizeof *_domain); if (!_domain) return -FI_ENOMEM; _domain->info = fi_dupinfo(info); if (!_domain->info) goto err1; _domain->rdm = FI_IBV_EP_TYPE_IS_RDM(info); if (_domain->rdm) { _domain->rdm_cm = calloc(1, sizeof(*_domain->rdm_cm)); if (!_domain->rdm_cm) { ret = -FI_ENOMEM; goto err2; } } ret = fi_ibv_open_device_by_name(_domain, info->domain_attr->name); if (ret) goto err2; _domain->pd = ibv_alloc_pd(_domain->verbs); if (!_domain->pd) { ret = -errno; goto err2; } _domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN; _domain->domain_fid.fid.context = context; _domain->domain_fid.fid.ops = &fi_ibv_fid_ops; _domain->domain_fid.mr = &fi_ibv_domain_mr_ops; if (_domain->rdm) { _domain->domain_fid.ops = &fi_ibv_rdm_domain_ops; _domain->rdm_cm->ec = rdma_create_event_channel(); if (!_domain->rdm_cm->ec) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create listener event channel: %s\n", strerror(errno)); ret = -FI_EOTHER; goto err2; } if (fi_fd_nonblock(_domain->rdm_cm->ec->fd) != 0) { VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno); ret = -FI_EOTHER; goto err3; } if (rdma_create_id(_domain->rdm_cm->ec, &_domain->rdm_cm->listener, NULL, RDMA_PS_TCP)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n", strerror(errno)); ret = -FI_EOTHER; goto err3; } _domain->rdm_cm->is_bound = 0; } else { _domain->domain_fid.ops = &fi_ibv_domain_ops; } _domain->fab = fab; *domain = &_domain->domain_fid; return 0; err3: if (_domain->rdm) rdma_destroy_event_channel(_domain->rdm_cm->ec); err2: if (_domain->rdm) free(_domain->rdm_cm); fi_freeinfo(_domain->info); err1: free(_domain); return ret; } static int fi_ibv_trywait(struct fid_fabric *fabric, struct fid **fids, int count) { struct fi_ibv_cq *cq; int ret, i; for (i = 0; i < count; i++) { switch (fids[i]->fclass) { case FI_CLASS_CQ: cq = container_of(fids[i], struct fi_ibv_cq, cq_fid.fid); ret = cq->trywait(fids[i]); if (ret) return ret; break; case FI_CLASS_EQ: /* We are always ready to wait on an EQ since * rdmacm EQ is based on an fd */ continue; case FI_CLASS_CNTR: case FI_CLASS_WAIT: return -FI_ENOSYS; default: return -FI_EINVAL; } } return FI_SUCCESS; } static int fi_ibv_fabric_close(fid_t fid) { struct fi_ibv_fabric *fab; int ret; fab = container_of(fid, struct fi_ibv_fabric, util_fabric.fabric_fid.fid); ret = ofi_fabric_close(&fab->util_fabric); if (ret) return ret; free(fab); return 0; } static struct fi_ops fi_ibv_fi_ops = { .size = sizeof(struct fi_ops), .close = fi_ibv_fabric_close, .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; static struct fi_ops_fabric fi_ibv_ops_fabric = { .size = sizeof(struct fi_ops_fabric), .domain = fi_ibv_domain, .passive_ep = fi_ibv_passive_ep, .eq_open = fi_ibv_eq_open, .wait_open = fi_no_wait_open, .trywait = fi_ibv_trywait }; int fi_ibv_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context) { struct fi_ibv_fabric *fab; struct fi_info *info; int ret; ret = fi_ibv_init_info(); if (ret) return ret; fab = calloc(1, sizeof(*fab)); if (!fab) return -FI_ENOMEM; for (info = verbs_info; info; info = info->next) { ret = ofi_fabric_init(&fi_ibv_prov, info->fabric_attr, attr, &fab->util_fabric, context); if (ret != -FI_ENODATA) break; } if (ret) { free(fab); return ret; } *fabric = &fab->util_fabric.fabric_fid; (*fabric)->fid.ops = &fi_ibv_fi_ops; (*fabric)->ops = &fi_ibv_ops_fabric; return 0; }
int usdf_cm_msg_connect(struct fid_ep *fep, const void *addr, const void *param, size_t paramlen) { struct usdf_connreq *crp; struct usdf_ep *ep; struct usdf_rx *rx; struct usdf_domain *udp; const struct sockaddr_in *sin; struct epoll_event ev; struct usdf_fabric *fp; struct usdf_connreq_msg *reqp; struct usd_qp_impl *qp; size_t request_size; int ret; USDF_TRACE_SYS(EP_CTRL, "\n"); if (paramlen > USDF_MAX_CONN_DATA) return -FI_EINVAL; ep = ep_ftou(fep); udp = ep->ep_domain; fp = udp->dom_fabric; sin = addr; /* Although paramlen may be less than USDF_MAX_CONN_DATA, the same crp * struct is used for receiving the accept and reject payload. The * structure has to be prepared to receive the maximum allowable amount * of data per transfer. The maximum size includes the connection * request structure, the connection request message, and the maximum * amount of data per connection request message. */ request_size = sizeof(*crp) + sizeof(*reqp) + USDF_MAX_CONN_DATA; crp = calloc(1, request_size); if (crp == NULL) { ret = -errno; goto fail; } ep->e.msg.ep_connreq = crp; crp->handle.fclass = FI_CLASS_CONNREQ; if (ep->e.msg.ep_cm_sock == -1) { crp->cr_sockfd = socket(AF_INET, SOCK_STREAM, 0); if (crp->cr_sockfd == -1) { ret = -errno; goto fail; } } else { crp->cr_sockfd = ep->e.msg.ep_cm_sock; ep->e.msg.ep_cm_sock = -1; } ret = fi_fd_nonblock(crp->cr_sockfd); if (ret) { ret = -errno; goto fail; } ret = usdf_ep_msg_get_queues(ep); if (ret != 0) { goto fail; } rx = ep->ep_rx; qp = to_qpi(rx->rx_qp); ret = connect(crp->cr_sockfd, (struct sockaddr *)sin, sizeof(*sin)); if (ret != 0 && errno != EINPROGRESS) { ret = -errno; goto fail; } /* If cr_sockfd was previously unbound, connect(2) will do a a bind(2) * for us. Update our snapshot of the locally bound address. */ ret = usdf_msg_upd_lcl_addr(ep); if (ret) goto fail; /* allocate remote peer ID */ ep->e.msg.ep_rem_peer_id = udp->dom_next_peer; udp->dom_peer_tab[udp->dom_next_peer] = ep; ++udp->dom_next_peer; crp->cr_ep = ep; reqp = (struct usdf_connreq_msg *)crp->cr_data; crp->cr_ptr = crp->cr_data; crp->cr_resid = sizeof(*reqp) + paramlen; reqp->creq_peer_id = htons(ep->e.msg.ep_rem_peer_id); reqp->creq_ipaddr = fp->fab_dev_attrs->uda_ipaddr_be; reqp->creq_port = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; reqp->creq_datalen = htonl(paramlen); memcpy(reqp->creq_data, param, paramlen); /* register for notification when connect completes */ crp->cr_pollitem.pi_rtn = usdf_cm_msg_connect_cb_wr; crp->cr_pollitem.pi_context = crp; ev.events = EPOLLOUT; ev.data.ptr = &crp->cr_pollitem; ret = epoll_ctl(fp->fab_epollfd, EPOLL_CTL_ADD, crp->cr_sockfd, &ev); if (ret != 0) { crp->cr_pollitem.pi_rtn = NULL; ret = -errno; goto fail; } return 0; fail: if (crp != NULL) { if (crp->cr_sockfd != -1) { close(crp->cr_sockfd); } free(crp); ep->e.msg.ep_connreq = NULL; } usdf_ep_msg_release_queues(ep); return ret; }