static ssize_t mrail_send_common(struct fid_ep *ep_fid, const struct iovec *iov, void **desc, size_t count, size_t len, fi_addr_t dest_addr, uint64_t data, void *context, uint64_t flags) { struct mrail_ep *mrail_ep = container_of(ep_fid, struct mrail_ep, util_ep.ep_fid.fid); struct mrail_peer_info *peer_info; struct iovec *iov_dest = alloca(sizeof(*iov_dest) * (count + 1)); struct mrail_tx_buf *tx_buf; uint32_t i = mrail_get_tx_rail(mrail_ep); struct fi_msg msg; ssize_t ret; peer_info = ofi_av_get_addr(mrail_ep->util_ep.av, (int) dest_addr); ofi_ep_lock_acquire(&mrail_ep->util_ep); tx_buf = mrail_get_tx_buf(mrail_ep, context, peer_info->seq_no++, ofi_op_msg, flags | FI_MSG); if (OFI_UNLIKELY(!tx_buf)) { ret = -FI_ENOMEM; goto err1; } mrail_copy_iov_hdr(&tx_buf->hdr, iov_dest, iov, count); msg.msg_iov = iov_dest; msg.desc = desc; msg.iov_count = count + 1; msg.addr = dest_addr; msg.context = tx_buf; msg.data = data; if (len < mrail_ep->rails[i].info->tx_attr->inject_size) flags |= FI_INJECT; FI_DBG(&mrail_prov, FI_LOG_EP_DATA, "Posting send of length: %" PRIu64 " dest_addr: 0x%" PRIx64 " seq: %d on rail: %d\n", len, dest_addr, peer_info->seq_no - 1, i); ret = fi_sendmsg(mrail_ep->rails[i].ep, &msg, flags); if (ret) { FI_WARN(&mrail_prov, FI_LOG_EP_DATA, "Unable to fi_sendmsg on rail: %" PRIu32 "\n", i); goto err2; } else if (!(flags & FI_COMPLETION)) { ofi_ep_tx_cntr_inc(&mrail_ep->util_ep); } ofi_ep_lock_release(&mrail_ep->util_ep); return ret; err2: util_buf_release(mrail_ep->tx_buf_pool, tx_buf); err1: peer_info->seq_no--; ofi_ep_lock_release(&mrail_ep->util_ep); return ret; }
static int util_mr_cache_create(struct ofi_mr_cache *cache, const struct iovec *iov, uint64_t access, struct ofi_mr_entry **entry) { int ret; FI_DBG(cache->domain->prov, FI_LOG_MR, "create %p (len: %" PRIu64 ")\n", iov->iov_base, iov->iov_len); util_mr_cache_process_events(cache); *entry = util_buf_alloc(cache->entry_pool); if (OFI_UNLIKELY(!*entry)) return -FI_ENOMEM; (*entry)->iov = *iov; (*entry)->use_cnt = 1; ret = cache->add_region(cache, *entry); if (ret) { while (ret && ofi_mr_cache_flush(cache)) { ret = cache->add_region(cache, *entry); } if (ret) { assert(!ofi_mr_cache_flush(cache)); util_buf_release(cache->entry_pool, *entry); return ret; } } cache->cached_size += iov->iov_len; if ((++cache->cached_cnt > cache->max_cached_cnt) || (cache->cached_size > cache->max_cached_size)) { (*entry)->cached = 0; } else { if (cache->mr_storage.insert(&cache->mr_storage, &(*entry)->iov, *entry)) { ret = -FI_ENOMEM; goto err; } (*entry)->cached = 1; ret = ofi_monitor_subscribe(&cache->nq, iov->iov_base, iov->iov_len, &(*entry)->subscription); if (ret) goto err; (*entry)->subscribed = 1; } return 0; err: util_mr_free_entry(cache, *entry); return ret; }
static inline void rxd_handle_send_comp(struct fi_cq_msg_entry *comp) { struct rxd_pkt_meta *pkt_meta; pkt_meta = container_of(comp->op_context, struct rxd_pkt_meta, context); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "Send completion for: %p\n", pkt_meta); rxd_ep_lock_if_required(pkt_meta->ep); RXD_PKT_MARK_LOCAL_ACK(pkt_meta); rxd_tx_pkt_release(pkt_meta); rxd_ep_unlock_if_required(pkt_meta->ep); }
struct mrail_recv * mrail_match_recv_handle_unexp(struct mrail_recv_queue *recv_queue, uint64_t tag, uint64_t addr, char *data, size_t len, void *context) { struct dlist_entry *entry; struct mrail_unexp_msg_entry *unexp_msg_entry; struct mrail_match_attr match_attr = { .tag = tag, .addr = addr, }; entry = dlist_remove_first_match(&recv_queue->recv_list, recv_queue->match_recv, &match_attr); if (OFI_UNLIKELY(!entry)) { unexp_msg_entry = recv_queue->get_unexp_msg_entry(recv_queue, context); if (!unexp_msg_entry) { FI_WARN(recv_queue->prov, FI_LOG_CQ, "Unable to get unexp_msg_entry!"); assert(0); return NULL; } unexp_msg_entry->addr = addr; unexp_msg_entry->tag = tag; unexp_msg_entry->context = context; memcpy(unexp_msg_entry->data, data, len); FI_DBG(recv_queue->prov, FI_LOG_CQ, "No matching recv found for" " incoming msg with addr: 0x%" PRIx64 " tag: 0x%" PRIx64 "\n", unexp_msg_entry->addr, unexp_msg_entry->tag); FI_DBG(recv_queue->prov, FI_LOG_CQ, "Enqueueing unexp_msg_entry to " "unexpected msg list\n"); dlist_insert_tail(&unexp_msg_entry->entry, &recv_queue->unexp_msg_list); return NULL; } return container_of(entry, struct mrail_recv, entry); }
int rxd_av_dg_reverse_lookup(struct rxd_av *av, uint64_t start_idx, const void *addr, fi_addr_t *dg_fiaddr) { uint8_t curr_addr[RXD_MAX_DGRAM_ADDR]; size_t i, len; int ret; for (i = 0; i < (size_t) av->dg_av_used; i++) { len = sizeof curr_addr; ret = fi_av_lookup(av->dg_av, (i + start_idx) % av->dg_av_used, curr_addr, &len); if (!ret) { *dg_fiaddr = (i + start_idx) % av->dg_av_used; FI_DBG(&rxd_prov, FI_LOG_AV, "found: %" PRIu64 "\n", *dg_fiaddr); return 0; } } FI_DBG(&rxd_prov, FI_LOG_AV, "addr not found\n"); return -FI_ENODATA; }
int mrail_cq_write_recv_comp(struct mrail_ep *mrail_ep, struct mrail_hdr *hdr, struct fi_cq_tagged_entry *comp, struct mrail_recv *recv) { FI_DBG(&mrail_prov, FI_LOG_CQ, "writing recv completion: length: %zu " "tag: 0x%" PRIx64 "\n", comp->len - sizeof(struct mrail_pkt), hdr->tag); return ofi_cq_write(mrail_ep->util_ep.rx_cq, recv->context, recv->comp_flags | (comp->flags & FI_REMOTE_CQ_DATA), comp->len - sizeof(struct mrail_pkt), NULL, comp->data, hdr->tag); }
struct rxd_trecv_entry *rxd_get_trecv_entry(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry) { struct dlist_entry *match; struct rxd_trecv_entry *trecv_entry; match = dlist_find_first_match(&ep->trecv_list, &rxd_match_trecv_entry, (void *)rx_entry); if (!match) { /*todo: queue the pkt */ FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "no matching trecv entry, tag: %p\n", rx_entry->op_hdr.tag); return NULL; } FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "matched - tag: %p\n", rx_entry->op_hdr.tag); dlist_remove(match); trecv_entry = container_of(match, struct rxd_trecv_entry, entry); trecv_entry->rx_entry = rx_entry; return trecv_entry; }
static int util_mr_cache_merge(struct ofi_mr_cache *cache, const struct fi_mr_attr *attr, struct ofi_mr_entry *old_entry, struct ofi_mr_entry **entry) { struct iovec iov, *old_iov; iov = *attr->mr_iov; do { FI_DBG(cache->domain->prov, FI_LOG_MR, "merging %p (len: %" PRIu64 ") with %p (len: %" PRIu64 ")\n", iov.iov_base, iov.iov_len, old_entry->iov.iov_base, old_entry->iov.iov_len); old_iov = &old_entry->iov; iov.iov_len = ((uintptr_t) MAX(ofi_iov_end(&iov), ofi_iov_end(old_iov))) - ((uintptr_t) MIN(iov.iov_base, old_iov->iov_base)); iov.iov_base = MIN(iov.iov_base, old_iov->iov_base); FI_DBG(cache->domain->prov, FI_LOG_MR, "merged %p (len: %" PRIu64 ")\n", iov.iov_base, iov.iov_len); if (old_entry->subscribed) { /* old entry will be removed as soon as `use_cnt == 0`. * unsubscribe from the entry */ ofi_monitor_unsubscribe(&old_entry->subscription); old_entry->subscribed = 0; } cache->mr_storage.erase(&cache->mr_storage, old_entry); old_entry->cached = 0; if (old_entry->use_cnt == 0) { dlist_remove_init(&old_entry->lru_entry); util_mr_free_entry(cache, old_entry); } } while ((old_entry = cache->mr_storage.find(&cache->mr_storage, &iov))); return util_mr_cache_create(cache, &iov, attr->access, entry); }
static void server_sock_accept(struct util_wait *wait, struct tcpx_cm_context *cm_ctx) { struct tcpx_conn_handle *handle; struct tcpx_pep *pep; SOCKET sock; int ret; FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "Received Connreq\n"); assert(cm_ctx->fid->fclass == FI_CLASS_PEP); pep = container_of(cm_ctx->fid, struct tcpx_pep, util_pep.pep_fid.fid); sock = accept(pep->sock, NULL, 0); if (sock < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "accept error: %d\n", ofi_sockerr()); return; } handle = calloc(1, sizeof(*handle)); if (!handle) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "cannot allocate memory \n"); goto err1; } cm_ctx = calloc(1, sizeof(*cm_ctx)); if (!cm_ctx) goto err2; handle->conn_fd = sock; handle->handle.fclass = FI_CLASS_CONNREQ; handle->pep = pep; cm_ctx->fid = &handle->handle; cm_ctx->type = SERVER_RECV_CONNREQ; ret = ofi_wait_fd_add(wait, sock, FI_EPOLL_IN, tcpx_eq_wait_try_func, NULL, (void *) cm_ctx); if (ret) goto err3; wait->signal(wait); return; err3: free(cm_ctx); err2: free(handle); err1: ofi_close_socket(sock); }
void ofi_monitor_unsubscribe(struct ofi_subscription *subscription) { FI_DBG(&core_prov, FI_LOG_MR, "unsubscribing addr=%p len=%zu subscription=%p\n", subscription->addr, subscription->len, subscription); subscription->nq->monitor->unsubscribe(subscription->nq->monitor, subscription->addr, subscription->len, subscription); fastlock_acquire(&subscription->nq->lock); dlist_init(&subscription->entry); subscription->nq->refcnt--; fastlock_release(&subscription->nq->lock); }
static int rxm_ep_txrx_res_open(struct rxm_ep *rxm_ep) { struct rxm_domain *rxm_domain; uint8_t local_mr; int ret; rxm_domain = container_of(rxm_ep->util_ep.domain, struct rxm_domain, util_domain); local_mr = rxm_ep->msg_info->mode & FI_LOCAL_MR ? 1 : 0; FI_DBG(&rxm_prov, FI_LOG_EP_CTRL, "MSG provider mode & FI_LOCAL_MR: %d\n", local_mr); ret = rxm_buf_pool_create(local_mr, rxm_ep->msg_info->tx_attr->size, sizeof(struct rxm_pkt), &rxm_ep->tx_pool, rxm_domain->msg_domain); if (ret) return ret; ret = rxm_buf_pool_create(local_mr, rxm_ep->msg_info->rx_attr->size, sizeof(struct rxm_rx_buf), &rxm_ep->rx_pool, rxm_domain->msg_domain); if (ret) goto err1; rxm_ep->txe_fs = rxm_txe_fs_create(rxm_ep->rxm_info->tx_attr->size); if (!rxm_ep->txe_fs) { ret = -FI_ENOMEM; goto err2; } ofi_key_idx_init(&rxm_ep->tx_key_idx, fi_size_bits(rxm_ep->rxm_info->tx_attr->size)); ret = rxm_recv_queue_init(&rxm_ep->recv_queue, rxm_ep->rxm_info->rx_attr->size); if (ret) goto err3; ret = rxm_recv_queue_init(&rxm_ep->trecv_queue, rxm_ep->rxm_info->rx_attr->size); if (ret) goto err4; return 0; err4: rxm_recv_queue_close(&rxm_ep->recv_queue); err3: rxm_txe_fs_free(rxm_ep->txe_fs); err2: util_buf_pool_destroy(rxm_ep->tx_pool); err1: util_buf_pool_destroy(rxm_ep->rx_pool); return ret; }
static void rxd_handle_ack(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { struct rxd_tx_entry *tx_entry; uint64_t idx; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "ack- msg_id: %" PRIu64 ", segno: %d, segsz: %d, buf: %p\n", ctrl->msg_id, ctrl->seg_no, ctrl->seg_size, rx_buf); idx = ctrl->msg_id & RXD_TX_IDX_BITS; tx_entry = &ep->tx_entry_fs->buf[idx]; if (tx_entry->msg_id != ctrl->msg_id) goto out; rxd_ep_free_acked_pkts(ep, tx_entry, ctrl->seg_no); if ((tx_entry->bytes_sent == tx_entry->op_hdr.size) && dlist_empty(&tx_entry->pkt_list)) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "reporting TX completion : %p\n", tx_entry); if (tx_entry->op_type != RXD_TX_READ_REQ) { rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), tx_entry); rxd_cntr_report_tx_comp(ep, tx_entry); rxd_tx_entry_free(ep, tx_entry); } } else { tx_entry->rx_key = ctrl->rx_key; /* do not allow reduce window size (on duplicate acks) */ tx_entry->window = MAX(tx_entry->window, ctrl->seg_no + ctrl->seg_size); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "ack- msg_id: %" PRIu64 ", window: %d\n", ctrl->msg_id, tx_entry->window); } out: rxd_ep_repost_buff(rx_buf); }
static int verify_addr(struct ofi_util_mr * in_mr, struct fi_mr_attr * item, uint64_t in_access, uint64_t in_addr, ssize_t in_len) { int i = 0; uint64_t start = (uintptr_t) item->mr_iov[i].iov_base; uint64_t end = start + item->mr_iov[i].iov_len; if (!in_addr) { FI_DBG(in_mr->prov, FI_LOG_MR, "verify_addr: input address to is zero\n"); return -FI_EINVAL; } if ((in_access & item->access) != in_access) { FI_DBG(in_mr->prov, FI_LOG_MR, "verify_addr: requested access is not valid\n"); return -FI_EACCES; } for (i = 0; i < item->iov_count; i++) { if (start <= in_addr && end >= (in_addr + in_len)) return 0; } return -FI_EACCES; }
static void util_monitor_read_events(struct ofi_mem_monitor *monitor) { struct ofi_subscription *subscription; do { subscription = monitor->get_event(monitor); if (!subscription) { FI_DBG(&core_prov, FI_LOG_MR, "no more events to be read\n"); break; } FI_DBG(&core_prov, FI_LOG_MR, "found event, context=%p, addr=%p, len=%zu nq=%p\n", subscription, subscription->addr, subscription->len, subscription->nq); fastlock_acquire(&subscription->nq->lock); if (dlist_empty(&subscription->entry)) dlist_insert_tail(&subscription->entry, &subscription->nq->list); fastlock_release(&subscription->nq->lock); } while (1); }
static int rxd_cq_write_tagged(struct rxd_cq *cq, struct fi_cq_tagged_entry *cq_entry) { struct fi_cq_tagged_entry *comp; if (ofi_cirque_isfull(cq->util_cq.cirq)) return -FI_ENOSPC; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "report completion: %p\n", cq_entry->tag); comp = ofi_cirque_tail(cq->util_cq.cirq); *comp = *cq_entry; ofi_cirque_commit(cq->util_cq.cirq); return 0; }
void rxd_ep_check_unexp_msg_list(struct rxd_ep *ep, struct rxd_recv_entry *recv_entry) { struct dlist_entry *match; struct rxd_rx_entry *rx_entry; struct rxd_pkt_data_start *pkt_start; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "ep->num_unexp_msg: %d\n", ep->num_unexp_msg); match = dlist_remove_first_match(&ep->unexp_msg_list, &rxd_match_unexp_msg, (void *) recv_entry); if (match) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "progressing unexp msg entry\n"); dlist_remove(&recv_entry->entry); ep->num_unexp_msg--; rx_entry = container_of(match, struct rxd_rx_entry, unexp_entry); rx_entry->recv = recv_entry; pkt_start = (struct rxd_pkt_data_start *) rx_entry->unexp_buf->buf; rxd_ep_handle_data_msg(ep, rx_entry->peer_info, rx_entry, rx_entry->recv->iov, rx_entry->recv->msg.iov_count, &pkt_start->ctrl, pkt_start->data, rx_entry->unexp_buf); rxd_ep_repost_buff(rx_entry->unexp_buf); } }
int ofi_monitor_subscribe(struct ofi_mem_monitor *monitor, const void *addr, size_t len) { int ret; FI_DBG(&core_prov, FI_LOG_MR, "subscribing addr=%p len=%zu\n", addr, len); ret = monitor->subscribe(monitor, addr, len); if (OFI_UNLIKELY(ret)) { FI_WARN(&core_prov, FI_LOG_MR, "Failed (ret = %d) to monitor addr=%p len=%zu\n", ret, addr, len); } return ret; }
void ofi_mr_cache_delete(struct ofi_mr_cache *cache, struct ofi_mr_entry *entry) { FI_DBG(cache->domain->prov, FI_LOG_MR, "delete %p (len: %" PRIu64 ")\n", entry->iov.iov_base, entry->iov.iov_len); cache->delete_cnt++; util_mr_cache_process_events(cache); if (--entry->use_cnt == 0) { if (entry->cached) { dlist_insert_tail(&entry->lru_entry, &cache->lru_list); } else { util_mr_free_entry(cache, entry); } } }
int udpx_setname(fid_t fid, void *addr, size_t addrlen) { struct udpx_ep *ep; int ret; ep = container_of(fid, struct udpx_ep, util_ep.ep_fid.fid); FI_DBG(&udpx_prov, FI_LOG_EP_CTRL, "%s\n", ofi_hex_str(addr, addrlen)); ret = bind(ep->sock, addr, addrlen); if (ret) { FI_WARN(&udpx_prov, FI_LOG_EP_CTRL, "bind %d (%s)\n", errno, strerror(errno)); return -errno; } ep->is_bound = 1; return 0; }
static void client_send_connreq(struct util_wait *wait, struct tcpx_cm_context *cm_ctx) { struct tcpx_ep *ep; struct fi_eq_err_entry err_entry; socklen_t len; int status, ret = FI_SUCCESS; FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "client send connreq\n"); assert(cm_ctx->fid->fclass == FI_CLASS_EP); ep = container_of(cm_ctx->fid, struct tcpx_ep, util_ep.ep_fid.fid); len = sizeof(status); ret = getsockopt(ep->conn_fd, SOL_SOCKET, SO_ERROR, (char *) &status, &len); if (ret < 0 || status) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "connection failure\n"); ret = (ret < 0)? -ofi_sockerr() : status; goto err; } ret = tx_cm_data(ep->conn_fd, ofi_ctrl_connreq, cm_ctx); if (ret) goto err; ret = ofi_wait_fd_del(wait, ep->conn_fd); if (ret) goto err; cm_ctx->type = CLIENT_RECV_CONNRESP; ret = ofi_wait_fd_add(wait, ep->conn_fd, FI_EPOLL_IN, tcpx_eq_wait_try_func, NULL, cm_ctx); if (ret) goto err; wait->signal(wait); return; err: memset(&err_entry, 0, sizeof err_entry); err_entry.fid = cm_ctx->fid; err_entry.context = cm_ctx->fid->context; err_entry.err = -ret; free(cm_ctx); fi_eq_write(&ep->util_ep.eq->eq_fid, FI_NOTIFY, &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR); }
void rxd_handle_recv_comp(struct rxd_ep *ep, struct fi_cq_msg_entry *comp) { struct ofi_ctrl_hdr *ctrl; struct rxd_rx_buf *rx_buf; struct rxd_peer *peer; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "got recv completion\n"); assert(rxd_reposted_bufs); rxd_reposted_bufs--; rx_buf = container_of(comp->op_context, struct rxd_rx_buf, context); ctrl = (struct ofi_ctrl_hdr *) rx_buf->buf; peer = rxd_ep_getpeer_info(ep, ctrl->conn_id); if (ctrl->version != OFI_CTRL_VERSION) { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "ctrl version mismatch\n"); return; } switch (ctrl->type) { case ofi_ctrl_connreq: rxd_handle_conn_req(ep, ctrl, comp, rx_buf); break; case ofi_ctrl_ack: rxd_handle_ack(ep, ctrl, rx_buf); break; case ofi_ctrl_discard: rxd_handle_discard(ep, ctrl, rx_buf); break; case ofi_ctrl_connresp: rxd_handle_connect_ack(ep, ctrl, rx_buf); break; case ofi_ctrl_start_data: rxd_handle_start_data(ep, peer, ctrl, comp, rx_buf); break; case ofi_ctrl_data: rxd_handle_data(ep, peer, ctrl, comp, rx_buf); break; default: rxd_ep_repost_buff(rx_buf); FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid ctrl type \n", ctrl->type); } rxd_check_waiting_rx(ep); }
bool ofi_mr_cache_flush(struct ofi_mr_cache *cache) { struct ofi_mr_entry *entry; if (dlist_empty(&cache->lru_list)) return false; dlist_pop_front(&cache->lru_list, struct ofi_mr_entry, entry, lru_entry); dlist_init(&entry->lru_entry); FI_DBG(cache->domain->prov, FI_LOG_MR, "flush %p (len: %" PRIu64 ")\n", entry->iov.iov_base, entry->iov.iov_len); util_mr_uncache_entry(cache, entry); util_mr_free_entry(cache, entry); return true; }
struct rxd_recv_entry *rxd_get_recv_entry(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry) { struct dlist_entry *match; struct rxd_recv_entry *recv_entry; match = dlist_find_first_match(&ep->recv_list, &rxd_match_recv_entry, (void *) rx_entry); if (!match) { /*todo: queue the pkt */ FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "no matching recv entry\n"); return NULL; } dlist_remove(match); recv_entry = container_of(match, struct rxd_recv_entry, entry); return recv_entry; }
static void server_send_cm_accept(struct util_wait *wait, struct tcpx_cm_context *cm_ctx) { struct fi_eq_cm_entry cm_entry = {0}; struct fi_eq_err_entry err_entry; struct tcpx_ep *ep; int ret; assert(cm_ctx->fid->fclass == FI_CLASS_EP); ep = container_of(cm_ctx->fid, struct tcpx_ep, util_ep.ep_fid.fid); ret = tx_cm_data(ep->conn_fd, ofi_ctrl_connresp, cm_ctx); if (ret) goto err; cm_entry.fid = cm_ctx->fid; ret = (int) fi_eq_write(&ep->util_ep.eq->eq_fid, FI_CONNECTED, &cm_entry, sizeof(cm_entry), 0); if (ret < 0) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n"); } ret = ofi_wait_fd_del(wait, ep->conn_fd); if (ret) { FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Could not remove fd from wait\n"); goto err; } ret = tcpx_ep_msg_xfer_enable(ep); if (ret) goto err; FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "Connection Accept Successful\n"); free(cm_ctx); return; err: memset(&err_entry, 0, sizeof err_entry); err_entry.fid = cm_ctx->fid; err_entry.context = cm_ctx->fid->context; err_entry.err = -ret; free(cm_ctx); fi_eq_write(&ep->util_ep.eq->eq_fid, FI_NOTIFY, &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR); }
static void ofi_ini_dir(const char *dir) { int n = 0; char *lib; void *dlhandle; struct dirent **liblist = NULL; struct fi_provider* (*inif)(void); n = scandir(dir, &liblist, lib_filter, NULL); if (n < 0) goto libdl_done; while (n--) { if (asprintf(&lib, "%s/%s", dir, liblist[n]->d_name) < 0) { FI_WARN(&core_prov, FI_LOG_CORE, "asprintf failed to allocate memory\n"); goto libdl_done; } FI_DBG(&core_prov, FI_LOG_CORE, "opening provider lib %s\n", lib); dlhandle = dlopen(lib, RTLD_NOW); free(liblist[n]); if (dlhandle == NULL) { FI_WARN(&core_prov, FI_LOG_CORE, "dlopen(%s): %s\n", lib, dlerror()); free(lib); continue; } free(lib); inif = dlsym(dlhandle, "fi_prov_ini"); if (inif == NULL) { FI_WARN(&core_prov, FI_LOG_CORE, "dlsym: %s\n", dlerror()); dlclose(dlhandle); } else { ofi_register_provider((inif)(), dlhandle); } } libdl_done: while (n-- > 0) free(liblist[n]); free(liblist); }
static void util_mr_free_entry(struct ofi_mr_cache *cache, struct ofi_mr_entry *entry) { FI_DBG(cache->domain->prov, FI_LOG_MR, "free %p (len: %" PRIu64 ")\n", entry->iov.iov_base, entry->iov.iov_len); assert(!entry->cached); if (entry->subscribed) { ofi_monitor_unsubscribe(&entry->subscription); entry->subscribed = 0; } cache->delete_region(cache, entry); assert((cache->cached_cnt != 0) && (((ssize_t)cache->cached_size - (ssize_t)entry->iov.iov_len) >= 0)); cache->cached_cnt--; cache->cached_size -= entry->iov.iov_len; util_buf_release(cache->entry_pool, entry); }
int rxm_info_to_core(uint32_t version, const struct fi_info *hints, struct fi_info *core_info) { int use_srx = 0; rxm_info_to_core_mr_modes(version, hints, core_info); core_info->mode |= FI_RX_CQ_DATA | FI_CONTEXT; if (hints) { core_info->caps = hints->caps & RXM_PASSTHRU_CAPS; if (hints->caps & (FI_ATOMIC | FI_TAGGED)) core_info->caps |= FI_MSG | FI_SEND | FI_RECV; /* FI_RMA cap is needed for large message transfer protocol */ if (core_info->caps & FI_MSG) core_info->caps |= FI_RMA | FI_READ | FI_REMOTE_READ; if (hints->domain_attr) { core_info->domain_attr->caps |= hints->domain_attr->caps; core_info->domain_attr->threading = hints->domain_attr->threading; } if (hints->tx_attr) { core_info->tx_attr->msg_order = hints->tx_attr->msg_order; core_info->tx_attr->comp_order = hints->tx_attr->comp_order; } if (hints->rx_attr) { core_info->rx_attr->msg_order = hints->rx_attr->msg_order; core_info->rx_attr->comp_order = hints->rx_attr->comp_order; } } core_info->ep_attr->type = FI_EP_MSG; if (!fi_param_get_bool(&rxm_prov, "use_srx", &use_srx) && use_srx) { FI_DBG(&rxm_prov, FI_LOG_FABRIC, "Requesting shared receive context from core provider\n"); core_info->ep_attr->rx_ctx_cnt = FI_SHARED_CONTEXT; } core_info->tx_attr->size = rxm_msg_tx_size; core_info->rx_attr->size = rxm_msg_rx_size; return 0; }
int ofi_wait_fd_add(struct util_wait *wait, int fd, uint32_t events, ofi_wait_fd_try_func wait_try, void *arg, void *context) { struct ofi_wait_fd_entry *fd_entry; struct dlist_entry *entry; struct util_wait_fd *wait_fd = container_of(wait, struct util_wait_fd, util_wait); int ret = 0; fastlock_acquire(&wait_fd->lock); entry = dlist_find_first_match(&wait_fd->fd_list, ofi_wait_fd_match, &fd); if (entry) { FI_DBG(wait->prov, FI_LOG_EP_CTRL, "Given fd (%d) already added to wait list - %p \n", fd, wait_fd); fd_entry = container_of(entry, struct ofi_wait_fd_entry, entry); ofi_atomic_inc32(&fd_entry->ref); goto out; } ret = fi_epoll_add(wait_fd->epoll_fd, fd, events, context); if (ret) { FI_WARN(wait->prov, FI_LOG_FABRIC, "Unable to add fd to epoll\n"); goto out; } fd_entry = calloc(1, sizeof *fd_entry); if (!fd_entry) { ret = -FI_ENOMEM; fi_epoll_del(wait_fd->epoll_fd, fd); goto out; } fd_entry->fd = fd; fd_entry->wait_try = wait_try; fd_entry->arg = arg; ofi_atomic_initialize32(&fd_entry->ref, 1); dlist_insert_tail(&fd_entry->entry, &wait_fd->fd_list); out: fastlock_release(&wait_fd->lock); return ret; }
void rxd_handle_discard(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { uint64_t idx; struct rxd_tx_entry *tx_entry; rxd_ep_lock_if_required(ep); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "got Reject: msg: %p - %d\n", ctrl->msg_id, ctrl->seg_no); idx = ctrl->msg_id & RXD_TX_IDX_BITS; tx_entry = &ep->tx_entry_fs->buf[idx]; if (tx_entry->msg_id != ctrl->msg_id) goto out; rxd_tx_entry_discard(ep, tx_entry); out: rxd_ep_repost_buff(rx_buf); rxd_ep_unlock_if_required(ep); }
/* * Discarded transfers were discarded by the receiving side, so we abort * transferring the rest of the data. However, the completion is still * reported to the sender as successful. This ensures that short and long * messages are treated the same, since short messages would be entirely * buffered at the receiver, with no notification that the application later * discarded the message. */ static void rxd_handle_discard(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { struct rxd_tx_entry *tx_entry; uint64_t idx; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "discard- msg_id: %" PRIu64 ", segno: %d\n", ctrl->msg_id, ctrl->seg_no); idx = ctrl->msg_id & RXD_TX_IDX_BITS; tx_entry = &ep->tx_entry_fs->buf[idx]; if (tx_entry->msg_id == ctrl->msg_id) { rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), tx_entry); rxd_cntr_report_tx_comp(ep, tx_entry); rxd_tx_entry_done(ep, tx_entry); } rxd_ep_repost_buff(rx_buf); }