static ssize_t fi_ibv_rdm_tagged_recvfrom(struct fid_ep *ep_fid, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context) { int ret = 0; struct fi_ibv_rdm_tagged_request *request = util_buf_alloc(fi_ibv_rdm_tagged_request_pool); fi_ibv_rdm_tagged_zero_request(request); FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG); struct fi_ibv_rdm_tagged_conn *conn = (src_addr == FI_ADDR_UNSPEC) ? NULL : (struct fi_ibv_rdm_tagged_conn *) src_addr; struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid); { struct fi_ibv_rdm_tagged_recv_start_data recv_data = { .peek_data = { .minfo = { .conn = conn, .tag = tag, .tagmask = ~ignore }, .context = context, .flags = 0 }, .dest_addr = buf, .data_len = len, .ep = ep };
static ssize_t fi_ibv_cq_read(struct fid_cq *cq_fid, void *buf, size_t count) { struct fi_ibv_cq *cq; struct fi_ibv_wce *wce; struct slist_entry *entry; struct ibv_wc wc; ssize_t ret = 0, i; cq = container_of(cq_fid, struct fi_ibv_cq, util_cq.cq_fid); cq->util_cq.cq_fastlock_acquire(&cq->util_cq.cq_lock); for (i = 0; i < count; i++) { if (!slist_empty(&cq->wcq)) { wce = container_of(cq->wcq.head, struct fi_ibv_wce, entry); if (wce->wc.status) { ret = -FI_EAVAIL; break; } entry = slist_remove_head(&cq->wcq); wce = container_of(entry, struct fi_ibv_wce, entry); cq->read_entry(&wce->wc, (char *)buf + i * cq->entry_size); util_buf_release(cq->wce_pool, wce); continue; } ret = fi_ibv_poll_cq(cq, &wc); if (ret <= 0) break; /* Insert error entry into wcq */ if (OFI_UNLIKELY(wc.status)) { if (wc.status == IBV_WC_WR_FLUSH_ERR) { /* Handle case when remote side destroys * the connection, but local side isn't aware * about that yet */ VERBS_DBG(FI_LOG_CQ, "Ignoring WC with status " "IBV_WC_WR_FLUSH_ERR(%d)\n", wc.status); i--; continue; } wce = util_buf_alloc(cq->wce_pool); if (!wce) { cq->util_cq.cq_fastlock_release(&cq->util_cq.cq_lock); return -FI_ENOMEM; } memset(wce, 0, sizeof(*wce)); memcpy(&wce->wc, &wc, sizeof wc); slist_insert_tail(&wce->entry, &cq->wcq); ret = -FI_EAVAIL; break; } cq->read_entry(&wc, (char *)buf + i * cq->entry_size); }
static int util_mr_cache_create(struct ofi_mr_cache *cache, const struct iovec *iov, uint64_t access, struct ofi_mr_entry **entry) { int ret; FI_DBG(cache->domain->prov, FI_LOG_MR, "create %p (len: %" PRIu64 ")\n", iov->iov_base, iov->iov_len); util_mr_cache_process_events(cache); *entry = util_buf_alloc(cache->entry_pool); if (OFI_UNLIKELY(!*entry)) return -FI_ENOMEM; (*entry)->iov = *iov; (*entry)->use_cnt = 1; ret = cache->add_region(cache, *entry); if (ret) { while (ret && ofi_mr_cache_flush(cache)) { ret = cache->add_region(cache, *entry); } if (ret) { assert(!ofi_mr_cache_flush(cache)); util_buf_release(cache->entry_pool, *entry); return ret; } } cache->cached_size += iov->iov_len; if ((++cache->cached_cnt > cache->max_cached_cnt) || (cache->cached_size > cache->max_cached_size)) { (*entry)->cached = 0; } else { if (cache->mr_storage.insert(&cache->mr_storage, &(*entry)->iov, *entry)) { ret = -FI_ENOMEM; goto err; } (*entry)->cached = 1; ret = ofi_monitor_subscribe(&cache->nq, iov->iov_base, iov->iov_len, &(*entry)->subscription); if (ret) goto err; (*entry)->subscribed = 1; } return 0; err: util_mr_free_entry(cache, *entry); return ret; }
struct tcpx_pe_entry *pe_entry_alloc(struct tcpx_progress *progress) { struct tcpx_pe_entry *pe_entry; pe_entry = util_buf_alloc(progress->pe_entry_pool); if (!pe_entry) { FI_WARN(&tcpx_prov, FI_LOG_DOMAIN,"failed to get buffer\n"); return NULL; } memset(pe_entry, 0, sizeof(*pe_entry)); return pe_entry; }
static ssize_t fi_ibv_cq_read(struct fid_cq *cq_fid, void *buf, size_t count) { struct fi_ibv_cq *cq; struct fi_ibv_wce *wce; struct slist_entry *entry; struct ibv_wc wc; ssize_t ret = 0, i; cq = container_of(cq_fid, struct fi_ibv_cq, cq_fid); fastlock_acquire(&cq->lock); for (i = 0; i < count; i++) { if (!slist_empty(&cq->wcq)) { wce = container_of(cq->wcq.head, struct fi_ibv_wce, entry); if (wce->wc.status) { ret = -FI_EAVAIL; break; } entry = slist_remove_head(&cq->wcq); wce = container_of(entry, struct fi_ibv_wce, entry); cq->read_entry(&wce->wc, i, buf); util_buf_release(cq->domain->fab->wce_pool, wce); continue; } ret = fi_ibv_poll_cq(cq, &wc); if (ret <= 0) break; /* Insert error entry into wcq */ if (wc.status) { wce = util_buf_alloc(cq->domain->fab->wce_pool); if (!wce) { fastlock_release(&cq->lock); return -FI_ENOMEM; } memset(wce, 0, sizeof(*wce)); memcpy(&wce->wc, &wc, sizeof wc); slist_insert_tail(&wce->entry, &cq->wcq); ret = -FI_EAVAIL; break; } cq->read_entry(&wc, i, buf); }
static struct mrail_tx_buf *mrail_get_tx_buf(struct mrail_ep *mrail_ep, void *context, uint32_t seq, uint8_t op, uint64_t flags) { struct mrail_tx_buf *tx_buf = util_buf_alloc(mrail_ep->tx_buf_pool); if (OFI_UNLIKELY(!tx_buf)) return NULL; assert(tx_buf->ep == mrail_ep); assert(tx_buf->hdr.version == MRAIL_HDR_VERSION); tx_buf->context = context; tx_buf->flags = flags; tx_buf->hdr.op = op; tx_buf->hdr.seq = htonl(seq); return tx_buf; }
static inline void rxd_ep_enqueue_pkt(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp) { struct rxd_unexp_cq_entry *unexp; if (comp->flags & RXD_UNEXP_ENTRY || ep->num_unexp_pkt > RXD_EP_MAX_UNEXP_PKT) return; unexp = util_buf_alloc(ep->rx_cq->unexp_pool); assert(unexp); unexp->cq_entry = *comp; unexp->cq_entry.flags |= RXD_UNEXP_ENTRY; dlist_init(&unexp->entry); dlist_insert_tail(&ep->rx_cq->unexp_list, &unexp->entry); FI_INFO(&rxd_prov, FI_LOG_EP_CTRL, "enqueuing unordered pkt: %p, seg_no: %d\n", ctrl->msg_id, ctrl->seg_no); ep->num_unexp_pkt++; }
static int fi_ibv_signal_send(struct fi_ibv_msg_ep *ep, struct ibv_send_wr *wr) { struct fi_ibv_msg_epe *epe; fastlock_acquire(&ep->scq->lock); if (VERBS_SIGNAL_SEND(ep)) { epe = util_buf_alloc(ep->scq->epe_pool); if (!epe) { fastlock_release(&ep->scq->lock); return -FI_ENOMEM; } memset(epe, 0, sizeof(*epe)); wr->send_flags |= IBV_SEND_SIGNALED; wr->wr_id = ep->ep_id; epe->ep = ep; slist_insert_tail(&epe->entry, &ep->scq->ep_list); ofi_atomic_inc32(&ep->comp_pending); } fastlock_release(&ep->scq->lock); return 0; }
static int fi_ibv_reap_comp(struct fi_ibv_msg_ep *ep) { struct fi_ibv_wce *wce = NULL; int got_wc = 0; int ret = 0; fastlock_acquire(&ep->scq->lock); while (ofi_atomic_get32(&ep->comp_pending) > 0) { if (!wce) { wce = util_buf_alloc(ep->scq->wce_pool); if (!wce) { fastlock_release(&ep->scq->lock); return -FI_ENOMEM; } memset(wce, 0, sizeof(*wce)); } ret = fi_ibv_poll_cq(ep->scq, &wce->wc); if (ret < 0) { VERBS_WARN(FI_LOG_EP_DATA, "Failed to read completion for signaled send\n"); util_buf_release(ep->scq->wce_pool, wce); fastlock_release(&ep->scq->lock); return ret; } else if (ret > 0) { slist_insert_tail(&wce->entry, &ep->scq->wcq); got_wc = 1; wce = NULL; } } if (wce) util_buf_release(ep->scq->wce_pool, wce); if (got_wc && ep->scq->channel) ret = fi_ibv_cq_signal(&ep->scq->cq_fid); fastlock_release(&ep->scq->lock); return ret; }
static ssize_t fi_ibv_rdm_ep_rma_readmsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, uint64_t flags) { struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid); struct fi_ibv_rdm_conn *conn = ep->av->addr_to_conn(ep, msg->addr); struct fi_ibv_rdm_rma_start_data start_data = { .ep_rdm = ep, .conn = conn, .context = msg->context, .flags = FI_RMA | FI_READ | (ep->tx_selective_completion ? (flags & FI_COMPLETION) : FI_COMPLETION), .data_len = (uint64_t)msg->msg_iov[0].iov_len, .rbuf = (uintptr_t)msg->rma_iov[0].addr, .lbuf = (uintptr_t)msg->msg_iov[0].iov_base, .rkey = (uint64_t)(uintptr_t)(msg->rma_iov[0].key), .lkey = (uint64_t)(uintptr_t)(msg->desc ? msg->desc[0] : NULL), .op_code = IBV_WR_RDMA_READ }; struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep }; struct fi_ibv_rdm_buf *rdm_buf = NULL; ssize_t ret = FI_SUCCESS; if(msg->iov_count != 1 || msg->rma_iov_count != 1) { assert(0); return -FI_EMSGSIZE; } ret = fi_ibv_rdm_ep_rma_preinit((void**)&start_data.lkey, &rdm_buf, msg->msg_iov[0].iov_len, conn, ep); if (ret) { return ret; } struct fi_ibv_rdm_request *request = util_buf_alloc(fi_ibv_rdm_request_pool); FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG); /* Initial state */ request->state.eager = FI_IBV_STATE_EAGER_BEGIN; request->state.rndv = FI_IBV_STATE_RNDV_NOT_USED; request->state.err = FI_SUCCESS; request->minfo.is_tagged = 0; request->rmabuf = rdm_buf; fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data); return fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_POST_READY, &post_ready_data); } static ssize_t fi_ibv_rdm_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc, size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context) { struct fi_ibv_rdm_ep *ep_rdm = container_of(ep, struct fi_ibv_rdm_ep, ep_fid); struct fi_rma_iov rma_iov = { .addr = addr, .len = 0, .key = key }; struct fi_msg_rma msg = { .msg_iov = iov, .desc = desc, .iov_count = count, .addr = src_addr, .rma_iov = &rma_iov, .rma_iov_count = 1, .context = context, .data = 0 }; size_t i; for (i = 0; i < count; i++) { rma_iov.len += iov[i].iov_len; } return fi_ibv_rdm_ep_rma_readmsg(ep, &msg, (ep_rdm->tx_selective_completion ? 0ULL : FI_COMPLETION)); } static ssize_t fi_ibv_rdm_ep_rma_read(struct fid_ep *ep_fid, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context) { const struct iovec iov = { .iov_base = buf, .iov_len = len }; return fi_ibv_rdm_ep_rma_readv(ep_fid, &iov, &desc, 1, src_addr, addr, key, context); } static ssize_t fi_ibv_rdm_ep_rma_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, uint64_t flags) { struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid); struct fi_ibv_rdm_conn *conn = ep->av->addr_to_conn(ep, msg->addr); struct fi_ibv_rdm_request *request = NULL; struct fi_ibv_rdm_buf *rdm_buf = NULL; ssize_t ret = FI_SUCCESS; struct fi_ibv_rdm_rma_start_data start_data = { .conn = conn, .ep_rdm = ep, .context = msg->context, .flags = FI_RMA | FI_WRITE | (ep->tx_selective_completion ? (flags & FI_COMPLETION) : FI_COMPLETION), .data_len = (uint64_t)msg->msg_iov[0].iov_len, .rbuf = msg->rma_iov[0].addr, .lbuf = (uintptr_t)msg->msg_iov[0].iov_base, .rkey = msg->rma_iov[0].key, .lkey = (uint64_t)(uintptr_t)(msg->desc ? msg->desc[0] : NULL), .op_code = IBV_WR_RDMA_WRITE }; if(msg->iov_count != 1 && msg->rma_iov_count != 1) { assert(0); return -FI_EMSGSIZE; } ret = fi_ibv_rdm_ep_rma_preinit((void**)&start_data.lkey, &rdm_buf, msg->msg_iov[0].iov_len, conn, ep); if (ret) { return ret; } request = util_buf_alloc(fi_ibv_rdm_request_pool); FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG); /* Initial state */ request->state.eager = FI_IBV_STATE_EAGER_BEGIN; request->state.rndv = FI_IBV_STATE_RNDV_NOT_USED; request->state.err = FI_SUCCESS; request->minfo.is_tagged = 0; request->rmabuf = rdm_buf; fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data); struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep }; return fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_POST_READY, &post_ready_data); } static ssize_t fi_ibv_rdm_ep_rma_writev(struct fid_ep *ep_fid, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context) { struct fi_rma_iov rma_iov = { .addr = addr, .len = 0, .key = key }; struct fi_msg_rma msg = { .msg_iov = iov, .desc = desc, .iov_count = count, .addr = dest_addr, .rma_iov = &rma_iov, .rma_iov_count = 1, .context = context, .data = 0 }; size_t i; for (i = 0; i < count; i++) { rma_iov.len += iov[i].iov_len; } struct fi_ibv_rdm_ep *ep_rdm = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid); return fi_ibv_rdm_ep_rma_writemsg(ep_fid, &msg, (ep_rdm->tx_selective_completion ? 0ULL : FI_COMPLETION)); } static ssize_t fi_ibv_rdm_ep_rma_write(struct fid_ep *ep_fid, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context) { const struct iovec iov = { .iov_base = (void *)buf, .iov_len = len }; return fi_ibv_rdm_ep_rma_writev(ep_fid, &iov, &desc, 1, dest_addr, addr, key, context); } static ssize_t fi_ibv_rdm_ep_rma_inject_write(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr, uint64_t addr, uint64_t key) { struct fi_ibv_rdm_ep *ep_rdm = container_of(ep, struct fi_ibv_rdm_ep, ep_fid); struct fi_ibv_rdm_conn *conn = ep_rdm->av->addr_to_conn(ep_rdm, dest_addr); struct fi_ibv_rdm_rma_start_data start_data = { .conn = conn, .ep_rdm = ep_rdm, .flags = 0, /* inject does not generate completion */ .data_len = (uint64_t)len, .rbuf = addr, .lbuf = (uintptr_t)buf, .rkey = (uint64_t)key, .lkey = 0 }; struct fi_ibv_rdm_request *request = util_buf_alloc(fi_ibv_rdm_request_pool); ssize_t ret; FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG); /* Initial state */ request->state.eager = FI_IBV_STATE_EAGER_RMA_INJECT; request->state.rndv = FI_IBV_STATE_RNDV_NOT_USED; request->state.err = FI_SUCCESS; request->minfo.is_tagged = 0; ret = fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data); switch (ret) { case FI_SUCCESS: return ret; case -FI_EAGAIN: break; default: ret = -errno; break; } FI_IBV_RDM_DBG_REQUEST("to_pool: ", request, FI_LOG_DEBUG); util_buf_release(fi_ibv_rdm_request_pool, request); fi_ibv_rdm_tagged_poll(ep_rdm); return ret; } static struct fi_ops_rma fi_ibv_rdm_ep_rma_ops = { .size = sizeof(struct fi_ops_rma), .read = fi_ibv_rdm_ep_rma_read, .readv = fi_ibv_rdm_ep_rma_readv, .readmsg = fi_ibv_rdm_ep_rma_readmsg, .write = fi_ibv_rdm_ep_rma_write, .writev = fi_ibv_rdm_ep_rma_writev, .writemsg = fi_ibv_rdm_ep_rma_writemsg, .inject = fi_ibv_rdm_ep_rma_inject_write, .writedata = fi_no_rma_writedata, .injectdata = fi_no_rma_injectdata, }; struct fi_ops_rma *fi_ibv_rdm_ep_ops_rma() { return &fi_ibv_rdm_ep_rma_ops; }
static ssize_t fi_ibv_rdm_ep_rma_read(struct fid_ep *ep_fid, void *buf, size_t len, void *desc, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context) { ssize_t ret = FI_SUCCESS; struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid); if (desc == NULL && len >= ep->rndv_threshold) { goto out_errinput; } struct fi_ibv_rdm_tagged_conn *conn = (struct fi_ibv_rdm_tagged_conn *) src_addr; void *raw_buf = NULL; if (desc == NULL) { int again = 1; if (!conn->postponed_entry) { raw_buf = fi_ibv_rdm_rma_prepare_resources(conn, ep); if (raw_buf) { desc = (void*)(uintptr_t)conn->rma_mr->lkey; again = 0; } } if (again) { goto out_again; } } else if (!fi_ibv_rdm_check_connection(conn, ep) || RMA_RESOURCES_IS_BUSY(conn, ep)) { /* * TODO: Should be postponed queue flow for RMA be implemented? */ goto out_again; } struct fi_ibv_rdm_tagged_request *request = util_buf_alloc(fi_ibv_rdm_tagged_request_pool); FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG); /* Initial state */ request->state.eager = FI_IBV_STATE_EAGER_BEGIN; request->state.rndv = FI_IBV_STATE_RNDV_NOT_USED; request->rmabuf = raw_buf; struct fi_ibv_rdm_rma_start_data start_data = { .ep_rdm = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid), .conn = (struct fi_ibv_rdm_tagged_conn *) src_addr, .context = context, .data_len = (uint32_t)len, .rbuf = addr, .lbuf = (uintptr_t)buf, .rkey = (uint32_t)key, .lkey = (uint32_t)(uintptr_t)desc, .op_code = IBV_WR_RDMA_READ }; fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data); struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep }; ret = fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_SEND_READY, &post_ready_data); out: return ret; out_again: fi_ibv_rdm_tagged_poll(ep); ret = -FI_EAGAIN; goto out; out_errinput: ret = -FI_EINVAL; goto out; } static ssize_t fi_ibv_rdm_ep_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags) { if(msg->iov_count == 1 && msg->rma_iov_count == 1) { return fi_ibv_rdm_ep_rma_read(ep, msg->msg_iov[0].iov_base, msg->msg_iov[0].iov_len, msg->desc[0], msg->addr, msg->rma_iov[0].addr, msg->rma_iov[0].key, msg->context); } assert(0); return -FI_EMSGSIZE; } static ssize_t fi_ibv_rdm_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc, size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context) { struct fi_rma_iov rma_iov = { .addr = src_addr, .len = 0, .key = key }; size_t i; for (i = 0; i < count; i++) { rma_iov.len += iov[i].iov_len; } struct fi_msg_rma msg = { .msg_iov = iov, .desc = desc, .iov_count = count, .addr = addr, .rma_iov = &rma_iov, .rma_iov_count = 1, .context = context, .data = 0 }; return fi_ibv_rdm_ep_rma_readmsg(ep, &msg, 0); } static ssize_t fi_ibv_rdm_ep_rma_write(struct fid_ep *ep_fid, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context) { ssize_t ret = FI_SUCCESS; struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid); if (desc == NULL && len >= ep->rndv_threshold) { goto out_errinput; } struct fi_ibv_rdm_tagged_conn *conn = (struct fi_ibv_rdm_tagged_conn *) dest_addr; void *raw_buf = NULL; if (desc == NULL) { int again = 1; if (!conn->postponed_entry) { raw_buf = fi_ibv_rdm_rma_prepare_resources(conn, ep); if (raw_buf) { memcpy (raw_buf, buf, len); desc = (void*)(uintptr_t)conn->rma_mr->lkey; again = 0; } } if (again) { goto out_again; } } else if (!fi_ibv_rdm_check_connection(conn, ep) || SEND_RESOURCES_IS_BUSY(conn, ep)) { /* * TODO: Should be postponed queue flow for RMA be implemented? */ goto out_again; } struct fi_ibv_rdm_tagged_request *request = util_buf_alloc(fi_ibv_rdm_tagged_request_pool); FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG); /* Initial state */ request->state.eager = FI_IBV_STATE_EAGER_BEGIN; request->state.rndv = FI_IBV_STATE_RNDV_NOT_USED; request->rmabuf = raw_buf; struct fi_ibv_rdm_rma_start_data start_data = { .conn = conn, .ep_rdm = ep, .context = context, .data_len = (uint32_t)len, .rbuf = addr, .lbuf = (uintptr_t)buf, .rkey = (uint32_t)key, .lkey = (uint32_t)(uintptr_t)desc, .op_code = IBV_WR_RDMA_WRITE }; fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data); struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep }; ret = fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_SEND_READY, &post_ready_data); ret = (ret == FI_EP_RDM_HNDL_SUCCESS) ? FI_SUCCESS : -FI_EOTHER; out: return ret; out_again: fi_ibv_rdm_tagged_poll(ep); ret = -FI_EAGAIN; goto out; out_errinput: ret = -FI_EINVAL; goto out; } static ssize_t fi_ibv_rdm_ep_rma_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags) { if(msg->iov_count == 1 && msg->rma_iov_count == 1) { return fi_ibv_rdm_ep_rma_write(ep, msg->msg_iov[0].iov_base, msg->msg_iov[0].iov_len, msg->desc[0], msg->addr, msg->rma_iov[0].addr, msg->rma_iov[0].key, msg->context); } assert(0); return -FI_EMSGSIZE; } static ssize_t fi_ibv_rdm_ep_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context) { struct fi_rma_iov rma_iov = { .addr = dest_addr, .len = 0, .key = key }; size_t i; for (i = 0; i < count; i++) { rma_iov.len += iov[i].iov_len; } struct fi_msg_rma msg = { .msg_iov = iov, .desc = desc, .iov_count = count, .addr = addr, .rma_iov = &rma_iov, .rma_iov_count = 1, .context = context, .data = 0 }; return fi_ibv_rdm_ep_rma_writemsg(ep, &msg, 0); } static ssize_t fi_ibv_rdm_ep_rma_inject_write(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr, uint64_t addr, uint64_t key) { struct fi_ibv_rdm_ep *ep_rdm = container_of(ep, struct fi_ibv_rdm_ep, ep_fid); struct fi_ibv_rdm_tagged_conn *conn = (struct fi_ibv_rdm_tagged_conn *) dest_addr; struct fi_ibv_rdm_tagged_request *request = NULL; int ret = FI_EP_RDM_HNDL_AGAIN; if (len >= ep_rdm->rndv_threshold) { return -FI_EMSGSIZE; } if (fi_ibv_rdm_check_connection(conn, ep_rdm) && !RMA_RESOURCES_IS_BUSY(conn, ep_rdm) && !conn->postponed_entry) { request = util_buf_alloc(fi_ibv_rdm_tagged_request_pool); FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG); /* Initial state */ request->state.eager = FI_IBV_STATE_EAGER_RMA_INJECT; request->state.rndv = FI_IBV_STATE_RNDV_NOT_USED; struct fi_ibv_rdm_rma_start_data start_data = { .conn = conn, .ep_rdm = ep_rdm, .data_len = (uint32_t)len, .rbuf = addr, .lbuf = (uintptr_t)buf, .rkey = (uint32_t)key, .lkey = 0 }; ret = fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data); } switch (ret) { case FI_EP_RDM_HNDL_SUCCESS: return ret; case FI_EP_RDM_HNDL_AGAIN: ret = -FI_EAGAIN; break; default: ret = -errno; break; } if (request) { FI_IBV_RDM_TAGGED_DBG_REQUEST("to_pool: ", request, FI_LOG_DEBUG); util_buf_release(fi_ibv_rdm_tagged_request_pool, request); } fi_ibv_rdm_tagged_poll(ep_rdm); return ret; } static struct fi_ops_rma fi_ibv_rdm_ep_rma_ops = { .size = sizeof(struct fi_ops_rma), .read = fi_ibv_rdm_ep_rma_read, .readv = fi_ibv_rdm_ep_rma_readv, .readmsg = fi_ibv_rdm_ep_rma_readmsg, .write = fi_ibv_rdm_ep_rma_write, .writev = fi_ibv_rdm_ep_rma_writev, .writemsg = fi_ibv_rdm_ep_rma_writemsg, .inject = fi_ibv_rdm_ep_rma_inject_write, .writedata = fi_no_rma_writedata, .injectdata = fi_no_rma_injectdata, }; struct fi_ops_rma *fi_ibv_rdm_ep_ops_rma(struct fi_ibv_rdm_ep *ep) { return &fi_ibv_rdm_ep_rma_ops; }
.tagmask = ~(msg->ignore), .is_tagged = 1 }, .context = msg->context, .flags = ep_rdm->rx_op_flags | (ep_rdm->rx_selective_completion ? flags : (flags | FI_COMPLETION)) }, .dest_addr = (msg->iov_count) ? msg->msg_iov[0].iov_base : NULL, .data_len = (msg->iov_count) ? msg->msg_iov[0].iov_len : 0, .ep = ep_rdm }; struct fi_ibv_rdm_request *request = util_buf_alloc(fi_ibv_rdm_request_pool); fi_ibv_rdm_zero_request(request); FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG); if (flags & FI_PEEK) { recv_data.peek_data.flags |= FI_COMPLETION; ret = fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RECV_PEEK, &recv_data); if (ret == -FI_ENOMSG) { fi_ibv_rdm_tagged_poll(ep_rdm); } } else if (flags & FI_CLAIM) { recv_data.peek_data.flags |= FI_COMPLETION; ret = fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RECV_START, &recv_data);