int ofi_ep_bind(struct util_ep *util_ep, struct fid *fid, uint64_t flags) { int ret; struct util_av *av; struct util_cq *cq; struct util_eq *eq; struct util_cntr *cntr; ret = ofi_ep_bind_valid(util_ep->domain->prov, fid, flags); if (ret) return ret; switch (fid->fclass) { case FI_CLASS_CQ: cq = container_of(fid, struct util_cq, cq_fid.fid); return ofi_ep_bind_cq(util_ep, cq, flags); case FI_CLASS_EQ: eq = container_of(fid, struct util_eq, eq_fid.fid); return ofi_ep_bind_eq(util_ep, eq); case FI_CLASS_AV: av = container_of(fid, struct util_av, av_fid.fid); return ofi_ep_bind_av(util_ep, av); case FI_CLASS_CNTR: cntr = container_of(fid, struct util_cntr, cntr_fid.fid); return ofi_ep_bind_cntr(util_ep, cntr, flags); } return -FI_EINVAL; }
static int smr_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) { struct smr_ep *ep; struct util_av *av; int ret = 0; ep = container_of(ep_fid, struct smr_ep, util_ep.ep_fid.fid); switch (bfid->fclass) { case FI_CLASS_AV: av = container_of(bfid, struct util_av, av_fid.fid); ret = ofi_ep_bind_av(&ep->util_ep, av); if (ret) { FI_WARN(&smr_prov, FI_LOG_EP_CTRL, "duplicate AV binding\n"); return -FI_EINVAL; } break; case FI_CLASS_CQ: ret = smr_ep_bind_cq(ep, container_of(bfid, struct util_cq, cq_fid.fid), flags); break; case FI_CLASS_EQ: break; default: FI_WARN(&smr_prov, FI_LOG_EP_CTRL, "invalid fid class\n"); ret = -FI_EINVAL; break; } return ret; }
static int rxm_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) { struct rxm_ep *rxm_ep; struct util_av *util_av; int ret = 0; rxm_ep = container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid); switch (bfid->fclass) { case FI_CLASS_AV: util_av = container_of(bfid, struct util_av, av_fid.fid); ret = ofi_ep_bind_av(&rxm_ep->util_ep, util_av); if (ret) return ret; rxm_ep->util_ep.cmap = ofi_cmap_alloc(util_av, rxm_conn_close); if (!rxm_ep->util_ep.cmap) return -FI_ENOMEM; break; case FI_CLASS_CQ: ret = rxm_ep_bind_cq(rxm_ep, container_of(bfid, struct util_cq, cq_fid.fid), flags); break; case FI_CLASS_EQ: break; default: FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "invalid fid class\n"); ret = -FI_EINVAL; break; } return ret; }
static int rxd_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) { struct rxd_ep *ep; struct rxd_av *av; struct util_cq *cq; struct util_cntr *cntr; int ret = 0; ep = container_of(ep_fid, struct rxd_ep, util_ep.ep_fid.fid); switch (bfid->fclass) { case FI_CLASS_AV: av = container_of(bfid, struct rxd_av, util_av.av_fid.fid); ret = ofi_ep_bind_av(&ep->util_ep, &av->util_av); if (ret) return ret; ret = fi_ep_bind(ep->dg_ep, &av->dg_av->fid, flags); if (ret) return ret; break; case FI_CLASS_CQ: cq = container_of(bfid, struct util_cq, cq_fid.fid); ret = ofi_ep_bind_cq(&ep->util_ep, cq, flags); if (ret) return ret; if (!ep->dg_cq) { ret = rxd_dg_cq_open(ep, cq->wait ? FI_WAIT_FD : FI_WAIT_NONE); if (ret) return ret; } if (cq->wait) ret = rxd_ep_wait_fd_add(ep, cq->wait); break; case FI_CLASS_EQ: break; case FI_CLASS_CNTR: cntr = container_of(bfid, struct util_cntr, cntr_fid.fid); ret = ofi_ep_bind_cntr(&ep->util_ep, cntr, flags); if (ret) return ret; if (!ep->dg_cq) { ret = rxd_dg_cq_open(ep, cntr->wait ? FI_WAIT_FD : FI_WAIT_NONE); } else if (!ep->dg_cq_fd && cntr->wait) { /* Reopen CQ with WAIT fd set */ ret = fi_close(&ep->dg_cq->fid); if (ret) { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "Unable to close dg CQ: %s\n", fi_strerror(-ret)); return ret; } ep->dg_cq = NULL; ret = rxd_dg_cq_open(ep, FI_WAIT_FD); } if (ret) return ret; if (cntr->wait) ret = rxd_ep_wait_fd_add(ep, cntr->wait); break; default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid fid class\n"); ret = -FI_EINVAL; break; } return ret; }
static ssize_t mrail_send(struct fid_ep *ep_fid, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct iovec iov = { .iov_base = (void *)buf, .iov_len = len, }; return mrail_send_common(ep_fid, &iov, &desc, 1, len, dest_addr, 0, context, mrail_comp_flag(ep_fid)); } static ssize_t mrail_inject(struct fid_ep *ep_fid, const void *buf, size_t len, fi_addr_t dest_addr) { struct iovec iov = { .iov_base = (void *)buf, .iov_len = len, }; return mrail_send_common(ep_fid, &iov, NULL, 1, len, dest_addr, 0, NULL, mrail_inject_flags(ep_fid)); } static ssize_t mrail_injectdata(struct fid_ep *ep_fid, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr) { struct iovec iov = { .iov_base = (void *)buf, .iov_len = len, }; return mrail_send_common(ep_fid, &iov, NULL, 1, len, dest_addr, data, NULL, (mrail_inject_flags(ep_fid) | FI_REMOTE_CQ_DATA)); } static ssize_t mrail_tsendmsg(struct fid_ep *ep_fid, const struct fi_msg_tagged *msg, uint64_t flags) { return mrail_tsend_common(ep_fid, msg->msg_iov, msg->desc, msg->iov_count, ofi_total_iov_len(msg->msg_iov, msg->iov_count), msg->addr, msg->tag, msg->data, msg->context, flags | mrail_comp_flag(ep_fid)); } static ssize_t mrail_tsend(struct fid_ep *ep_fid, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t tag, void *context) { struct iovec iov = { .iov_base = (void *)buf, .iov_len = len, }; return mrail_tsend_common(ep_fid, &iov, &desc, 1, len, dest_addr, tag, 0, context, mrail_comp_flag(ep_fid)); } static ssize_t mrail_tsenddata(struct fid_ep *ep_fid, const void *buf, size_t len, void *desc, uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context) { struct iovec iov = { .iov_base = (void *)buf, .iov_len = len, }; return mrail_tsend_common(ep_fid, &iov, &desc, 1, len, dest_addr, tag, data, context, (mrail_comp_flag(ep_fid) | FI_REMOTE_CQ_DATA)); } static ssize_t mrail_tinject(struct fid_ep *ep_fid, const void *buf, size_t len, fi_addr_t dest_addr, uint64_t tag) { struct iovec iov = { .iov_base = (void *)buf, .iov_len = len, }; return mrail_tsend_common(ep_fid, &iov, NULL, 1, len, dest_addr, tag, 0, NULL, mrail_inject_flags(ep_fid)); } static ssize_t mrail_tinjectdata(struct fid_ep *ep_fid, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr, uint64_t tag) { struct iovec iov = { .iov_base = (void *)buf, .iov_len = len, }; return mrail_tsend_common(ep_fid, &iov, NULL, 1, len, dest_addr, tag, data, NULL, (mrail_inject_flags(ep_fid) | FI_REMOTE_CQ_DATA)); } static struct mrail_unexp_msg_entry * mrail_get_unexp_msg_entry(struct mrail_recv_queue *recv_queue, void *context) { // TODO use buf pool // context would be mrail_ep from which u can get the buf pool struct mrail_unexp_msg_entry *unexp_msg_entry = malloc(sizeof(*unexp_msg_entry) + sizeof(struct fi_cq_tagged_entry)); return unexp_msg_entry; } static int mrail_getname(fid_t fid, void *addr, size_t *addrlen) { struct mrail_ep *mrail_ep = container_of(fid, struct mrail_ep, util_ep.ep_fid.fid); struct mrail_domain *mrail_domain = container_of(mrail_ep->util_ep.domain, struct mrail_domain, util_domain); size_t i, offset = 0, rail_addrlen; int ret; if (*addrlen < mrail_domain->addrlen) return -FI_ETOOSMALL; for (i = 0; i < mrail_ep->num_eps; i++) { rail_addrlen = *addrlen - offset; ret = fi_getname(&mrail_ep->rails[i].ep->fid, (char *)addr + offset, &rail_addrlen); if (ret) { FI_WARN(&mrail_prov, FI_LOG_EP_CTRL, "Unable to get name for rail: %zd\n", i); return ret; } offset += rail_addrlen; } return 0; } static void mrail_tx_buf_init(void *pool_ctx, void *buf) { struct mrail_ep *mrail_ep = pool_ctx; struct mrail_tx_buf *tx_buf = buf; tx_buf->ep = mrail_ep; tx_buf->hdr.version = MRAIL_HDR_VERSION; } static void mrail_ep_free_bufs(struct mrail_ep *mrail_ep) { if (mrail_ep->req_pool) util_buf_pool_destroy(mrail_ep->req_pool); if (mrail_ep->ooo_recv_pool) util_buf_pool_destroy(mrail_ep->ooo_recv_pool); if (mrail_ep->tx_buf_pool) util_buf_pool_destroy(mrail_ep->tx_buf_pool); if (mrail_ep->recv_fs) mrail_recv_fs_free(mrail_ep->recv_fs); } static int mrail_ep_alloc_bufs(struct mrail_ep *mrail_ep) { struct util_buf_attr attr = { .size = sizeof(struct mrail_tx_buf), .alignment = sizeof(void *), .max_cnt = 0, .chunk_cnt = 64, .alloc_hndlr = NULL, .free_hndlr = NULL, .init = mrail_tx_buf_init, .ctx = mrail_ep, }; size_t buf_size, rxq_total_size = 0; struct fi_info *fi; int ret; for (fi = mrail_ep->info->next; fi; fi = fi->next) rxq_total_size += fi->rx_attr->size; mrail_ep->recv_fs = mrail_recv_fs_create(rxq_total_size, mrail_init_recv, mrail_ep); if (!mrail_ep->recv_fs) return -FI_ENOMEM; ret = util_buf_pool_create(&mrail_ep->ooo_recv_pool, sizeof(struct mrail_ooo_recv), sizeof(void *), 0, 64); if (!mrail_ep->ooo_recv_pool) goto err; ret = util_buf_pool_create_attr(&attr, &mrail_ep->tx_buf_pool); if (!mrail_ep->tx_buf_pool) goto err; buf_size = (sizeof(struct mrail_req) + (mrail_ep->num_eps * sizeof(struct mrail_subreq))); ret = util_buf_pool_create(&mrail_ep->req_pool, buf_size, sizeof(void *), 0, 64); if (ret) goto err; return 0; err: mrail_ep_free_bufs(mrail_ep); return ret; } static int mrail_ep_close(fid_t fid) { struct mrail_ep *mrail_ep = container_of(fid, struct mrail_ep, util_ep.ep_fid.fid); int ret, retv = 0; size_t i; mrail_ep_free_bufs(mrail_ep); for (i = 0; i < mrail_ep->num_eps; i++) { ret = fi_close(&mrail_ep->rails[i].ep->fid); if (ret) retv = ret; } free(mrail_ep->rails); ret = ofi_endpoint_close(&mrail_ep->util_ep); if (ret) retv = ret; free(mrail_ep); return retv; } static int mrail_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) { struct mrail_ep *mrail_ep = container_of(ep_fid, struct mrail_ep, util_ep.ep_fid.fid); struct mrail_cq *mrail_cq; struct mrail_av *mrail_av; struct util_cntr *cntr; int ret = 0; size_t i; switch (bfid->fclass) { case FI_CLASS_AV: mrail_av = container_of(bfid, struct mrail_av, util_av.av_fid.fid); ret = ofi_ep_bind_av(&mrail_ep->util_ep, &mrail_av->util_av); if (ret) return ret; for (i = 0; i < mrail_ep->num_eps; i++) { ret = fi_ep_bind(mrail_ep->rails[i].ep, &mrail_av->avs[i]->fid, flags); if (ret) return ret; } break; case FI_CLASS_CQ: mrail_cq = container_of(bfid, struct mrail_cq, util_cq.cq_fid.fid); ret = ofi_ep_bind_cq(&mrail_ep->util_ep, &mrail_cq->util_cq, flags); if (ret) return ret; for (i = 0; i < mrail_ep->num_eps; i++) { ret = fi_ep_bind(mrail_ep->rails[i].ep, &mrail_cq->cqs[i]->fid, flags); if (ret) return ret; } break; case FI_CLASS_CNTR: cntr = container_of(bfid, struct util_cntr, cntr_fid.fid); ret = ofi_ep_bind_cntr(&mrail_ep->util_ep, cntr, flags); if (ret) return ret; break; case FI_CLASS_EQ: ret = -FI_ENOSYS; break; default: FI_WARN(&mrail_prov, FI_LOG_EP_CTRL, "invalid fid class\n"); ret = -FI_EINVAL; break; } return ret; } static int mrail_ep_ctrl(struct fid *fid, int command, void *arg) { struct mrail_ep *mrail_ep; size_t i, buf_recv_min = sizeof(struct mrail_hdr); int ret; mrail_ep = container_of(fid, struct mrail_ep, util_ep.ep_fid.fid); switch (command) { case FI_ENABLE: if (!mrail_ep->util_ep.rx_cq || !mrail_ep->util_ep.tx_cq) return -FI_ENOCQ; if (!mrail_ep->util_ep.av) return -FI_ENOAV; for (i = 0; i < mrail_ep->num_eps; i++) { ret = fi_setopt(&mrail_ep->rails[i].ep->fid, FI_OPT_ENDPOINT, FI_OPT_BUFFERED_MIN, &buf_recv_min, sizeof(buf_recv_min)); if (ret) return ret; ret = fi_enable(mrail_ep->rails[i].ep); if (ret) return ret; } break; default: return -FI_ENOSYS; } return 0; } static struct fi_ops mrail_ep_fi_ops = { .size = sizeof(struct fi_ops), .close = mrail_ep_close, .bind = mrail_ep_bind, .control = mrail_ep_ctrl, .ops_open = fi_no_ops_open, }; static int mrail_ep_setopt(fid_t fid, int level, int optname, const void *optval, size_t optlen) { struct mrail_ep *mrail_ep; size_t i; int ret = 0; mrail_ep = container_of(fid, struct mrail_ep, util_ep.ep_fid.fid); for (i = 0; i < mrail_ep->num_eps; i++) { ret = fi_setopt(&mrail_ep->rails[i].ep->fid, level, optname, optval, optlen); if (ret) return ret; } return ret; } static struct fi_ops_ep mrail_ops_ep = { .size = sizeof(struct fi_ops_ep), .cancel = fi_no_cancel, .getopt = fi_no_getopt, .setopt = mrail_ep_setopt, .tx_ctx = fi_no_tx_ctx, .rx_ctx = fi_no_rx_ctx, .rx_size_left = fi_no_rx_size_left, .tx_size_left = fi_no_tx_size_left, }; static struct fi_ops_cm mrail_ops_cm = { .size = sizeof(struct fi_ops_cm), .setname = fi_no_setname, .getname = mrail_getname, .getpeer = fi_no_getpeer, .connect = fi_no_connect, .listen = fi_no_listen, .accept = fi_no_accept, .reject = fi_no_reject, .shutdown = fi_no_shutdown, .join = fi_no_join, }; static struct fi_ops_msg mrail_ops_msg = { .size = sizeof(struct fi_ops_msg), .recv = mrail_recv, .recvv = fi_no_msg_recvv, .recvmsg = mrail_recvmsg, .send = mrail_send, .sendv = fi_no_msg_sendv, .sendmsg = mrail_sendmsg, .inject = mrail_inject, .senddata = fi_no_msg_senddata, .injectdata = mrail_injectdata, }; struct fi_ops_tagged mrail_ops_tagged = { .size = sizeof(struct fi_ops_tagged), .recv = mrail_trecv, .recvv = fi_no_tagged_recvv, .recvmsg = mrail_trecvmsg, .send = mrail_tsend, .sendv = fi_no_tagged_sendv, .sendmsg = mrail_tsendmsg, .inject = mrail_tinject, .senddata = mrail_tsenddata, .injectdata = mrail_tinjectdata, }; void mrail_ep_progress(struct util_ep *ep) { struct mrail_ep *mrail_ep; mrail_ep = container_of(ep, struct mrail_ep, util_ep); mrail_progress_deferred_reqs(mrail_ep); } int mrail_ep_open(struct fid_domain *domain_fid, struct fi_info *info, struct fid_ep **ep_fid, void *context) { struct mrail_domain *mrail_domain = container_of(domain_fid, struct mrail_domain, util_domain.domain_fid); struct mrail_ep *mrail_ep; struct fi_info *fi; size_t i; int ret; if (strcmp(mrail_domain->info->domain_attr->name, info->domain_attr->name)) { FI_WARN(&mrail_prov, FI_LOG_EP_CTRL, "info domain name: %s " "doesn't match fid_domain name: %s!\n", info->domain_attr->name, mrail_domain->info->domain_attr->name); return -FI_EINVAL; } mrail_ep = calloc(1, sizeof(*mrail_ep)); if (!mrail_ep) return -FI_ENOMEM; // TODO detect changes b/w mrail_domain->info and info arg // this may be difficult and we may not support such changes mrail_ep->info = mrail_domain->info; mrail_ep->num_eps = mrail_domain->num_domains; ret = ofi_endpoint_init(domain_fid, &mrail_util_prov, info, &mrail_ep->util_ep, context, &mrail_ep_progress); if (ret) { goto free_ep; } mrail_ep->rails = calloc(mrail_ep->num_eps, sizeof(*mrail_ep->rails)); if (!mrail_ep->rails) { ret = -FI_ENOMEM; goto err; } for (i = 0, fi = mrail_ep->info->next; fi; fi = fi->next, i++) { fi->tx_attr->op_flags &= ~FI_COMPLETION; ret = fi_endpoint(mrail_domain->domains[i], fi, &mrail_ep->rails[i].ep, mrail_ep); if (ret) { FI_WARN(&mrail_prov, FI_LOG_EP_CTRL, "Unable to open EP\n"); goto err; } mrail_ep->rails[i].info = fi; } ret = mrail_ep_alloc_bufs(mrail_ep); if (ret) goto err; slist_init(&mrail_ep->deferred_reqs); if (mrail_ep->info->caps & FI_DIRECTED_RECV) { mrail_recv_queue_init(&mrail_prov, &mrail_ep->recv_queue, mrail_match_recv_addr, mrail_match_unexp_addr, mrail_get_unexp_msg_entry); mrail_recv_queue_init(&mrail_prov, &mrail_ep->trecv_queue, mrail_match_recv_addr_tag, mrail_match_unexp_addr_tag, mrail_get_unexp_msg_entry); } else { mrail_recv_queue_init(&mrail_prov, &mrail_ep->recv_queue, mrail_match_recv_any, mrail_match_unexp_any, mrail_get_unexp_msg_entry); mrail_recv_queue_init(&mrail_prov, &mrail_ep->trecv_queue, mrail_match_recv_tag, mrail_match_unexp_tag, mrail_get_unexp_msg_entry); } ofi_atomic_initialize32(&mrail_ep->tx_rail, 0); ofi_atomic_initialize32(&mrail_ep->rx_rail, 0); *ep_fid = &mrail_ep->util_ep.ep_fid; (*ep_fid)->fid.ops = &mrail_ep_fi_ops; (*ep_fid)->ops = &mrail_ops_ep; (*ep_fid)->cm = &mrail_ops_cm; (*ep_fid)->msg = &mrail_ops_msg; (*ep_fid)->tagged = &mrail_ops_tagged; (*ep_fid)->rma = &mrail_ops_rma; return 0; err: mrail_ep_close(&mrail_ep->util_ep.ep_fid.fid); free_ep: free(mrail_ep); return ret; }