static void __gnix_msg_send_fr_complete(struct gnix_fab_req *req, struct gnix_tx_descriptor *txd) { atomic_dec(&req->vc->outstanding_tx_reqs); _gnix_nic_tx_free(req->gnix_ep->nic, txd); /* Schedule VC TX queue in case the VC is 'fenced'. */ _gnix_vc_tx_schedule(req->vc); _gnix_fr_free(req->gnix_ep, req); }
static int __gnix_rma_fab_req_complete(void *arg) { struct gnix_fab_req *req = (struct gnix_fab_req *)arg; struct gnix_fid_ep *ep = req->gnix_ep; int rc; struct gnix_fid_cntr *cntr = NULL; /* more transaction needed for request? */ if (req->flags & FI_COMPLETION) { rc = _gnix_cq_add_event(ep->send_cq, req->user_context, req->flags, req->len, (void *)req->loc_addr, req->imm, req->msg.tag); if (rc) { GNIX_WARN(FI_LOG_CQ, "_gnix_cq_add_event() failed: %d\n", rc); } } if ((req->type == GNIX_FAB_RQ_RDMA_WRITE) && ep->write_cntr) cntr = ep->write_cntr; if ((req->type == GNIX_FAB_RQ_RDMA_READ) && ep->read_cntr) cntr = ep->read_cntr; if (cntr) { rc = _gnix_cntr_inc(cntr); if (rc) GNIX_WARN(FI_LOG_CQ, "_gnix_cntr_inc() failed: %d\n", rc); } atomic_dec(&req->vc->outstanding_tx_reqs); /* We could have requests waiting for TXDs or FI_FENCE operations. * Schedule this VC to push any such TXs. */ _gnix_vc_schedule_tx(req->vc); _gnix_fr_free(ep, req); return FI_SUCCESS; }
static void __gnix_amo_fr_complete(struct gnix_fab_req *req, struct gnix_tx_descriptor *txd) { if (req->flags & FI_LOCAL_MR) { GNIX_INFO(FI_LOG_EP_DATA, "freeing auto-reg MR: %p\n", req->amo.loc_md); fi_close(&req->amo.loc_md->mr_fid.fid); } atomic_dec(&req->vc->outstanding_tx_reqs); _gnix_nic_tx_free(req->vc->ep->nic, txd); /* Schedule VC TX queue in case the VC is 'fenced'. */ _gnix_vc_tx_schedule(req->vc); _gnix_fr_free(req->vc->ep, req); }
static void __gnix_amo_fr_complete(struct gnix_fab_req *req) { int rc; if (req->flags & FI_LOCAL_MR) { GNIX_INFO(FI_LOG_EP_DATA, "freeing auto-reg MR: %p\n", req->amo.loc_md); rc = fi_close(&req->amo.loc_md->mr_fid.fid); if (rc != FI_SUCCESS) { GNIX_ERR(FI_LOG_DOMAIN, "failed to deregister auto-registered region, " "rc=%d\n", rc); } req->flags &= ~FI_LOCAL_MR; } ofi_atomic_dec32(&req->vc->outstanding_tx_reqs); /* Schedule VC TX queue in case the VC is 'fenced'. */ _gnix_vc_tx_schedule(req->vc); _gnix_fr_free(req->vc->ep, req); }
ssize_t _gnix_rma(struct gnix_fid_ep *ep, enum gnix_fab_req_type fr_type, uint64_t loc_addr, size_t len, void *mdesc, uint64_t dest_addr, uint64_t rem_addr, uint64_t mkey, void *context, uint64_t flags, uint64_t data) { struct gnix_vc *vc; struct gnix_fab_req *req; struct gnix_fid_mem_desc *md = NULL; int rc; int rdma; struct fid_mr *auto_mr = NULL; if (!ep) { return -FI_EINVAL; } if ((flags & FI_INJECT) && (len > GNIX_INJECT_SIZE)) { GNIX_INFO(FI_LOG_EP_DATA, "RMA length %d exceeds inject max size: %d\n", len, GNIX_INJECT_SIZE); return -FI_EINVAL; } /* find VC for target */ rc = _gnix_ep_get_vc(ep, dest_addr, &vc); if (rc) { GNIX_INFO(FI_LOG_EP_DATA, "_gnix_ep_get_vc() failed, addr: %lx, rc:\n", dest_addr, rc); return rc; } /* setup fabric request */ req = _gnix_fr_alloc(ep); if (!req) { GNIX_INFO(FI_LOG_EP_DATA, "_gnix_fr_alloc() failed\n"); return -FI_ENOSPC; } rdma = len >= ep->domain->params.rma_rdma_thresh; req->type = fr_type; req->gnix_ep = ep; req->vc = vc; req->user_context = context; req->work_fn = _gnix_rma_post_req; atomic_initialize(&req->rma.outstanding_txds, 0); if (fr_type == GNIX_FAB_RQ_RDMA_READ && (rem_addr & GNI_READ_ALIGN_MASK || len & GNI_READ_ALIGN_MASK)) { if (len >= GNIX_RMA_UREAD_CHAINED_THRESH) { GNIX_INFO(FI_LOG_EP_DATA, "Using CT for unaligned GET, req: %p\n", req); flags |= GNIX_RMA_CHAINED; } else { GNIX_INFO(FI_LOG_EP_DATA, "Using tmp buf for unaligned GET, req: %p\n", req); flags |= GNIX_RMA_INDIRECT; } if (rdma) req->work_fn = _gnix_rma_post_rdma_chain_req; } if (!(flags & GNIX_RMA_INDIRECT) && !mdesc && (rdma || fr_type == GNIX_FAB_RQ_RDMA_READ)) { /* We need to auto-register the source buffer. */ rc = gnix_mr_reg(&ep->domain->domain_fid.fid, (void *)loc_addr, len, FI_READ | FI_WRITE, 0, 0, 0, &auto_mr, NULL); if (rc != FI_SUCCESS) { GNIX_INFO(FI_LOG_EP_DATA, "Failed to auto-register local buffer: %d\n", rc); goto err_auto_reg; } flags |= FI_LOCAL_MR; mdesc = (void *)auto_mr; GNIX_INFO(FI_LOG_EP_DATA, "auto-reg MR: %p\n", auto_mr); } if (mdesc) md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid); req->rma.loc_md = (void *)md; req->rma.rem_addr = rem_addr; req->rma.rem_mr_key = mkey; req->rma.len = len; req->rma.imm = data; req->flags = flags; if (req->flags & FI_INJECT) { memcpy(req->inject_buf, (void *)loc_addr, len); req->rma.loc_addr = (uint64_t)req->inject_buf; } else { req->rma.loc_addr = loc_addr; } /* Inject interfaces always suppress completions. If * SELECTIVE_COMPLETION is set, honor any setting. Otherwise, always * deliver a completion. */ if ((flags & GNIX_SUPPRESS_COMPLETION) || (ep->send_selective_completion && !(flags & FI_COMPLETION))) { req->flags &= ~FI_COMPLETION; } else { req->flags |= FI_COMPLETION; } if (rdma) { req->flags |= GNIX_RMA_RDMA; } GNIX_INFO(FI_LOG_EP_DATA, "Queuing (%p %p %d)\n", (void *)loc_addr, (void *)rem_addr, len); return _gnix_vc_queue_tx_req(req); err_auto_reg: _gnix_fr_free(req->vc->ep, req); return rc; }
int gnix_ep_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { int ret = FI_SUCCESS; int tsret = FI_SUCCESS; uint32_t cdm_id; struct gnix_fid_domain *domain_priv; struct gnix_fid_ep *ep_priv; gnix_hashtable_attr_t gnix_ht_attr; gnix_ht_key_t *key_ptr; struct gnix_tag_storage_attr untagged_attr = { .type = GNIX_TAG_LIST, .use_src_addr_matching = 1, }; bool free_list_inited = false; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); if ((domain == NULL) || (info == NULL) || (ep == NULL) || (info->ep_attr == NULL)) return -FI_EINVAL; /* * TODO: need to implement other endpoint types */ if (info->ep_attr->type != FI_EP_RDM) return -FI_ENOSYS; domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); ep_priv = calloc(1, sizeof *ep_priv); if (!ep_priv) return -FI_ENOMEM; /* init untagged storages */ tsret = _gnix_posted_tag_storage_init( &ep_priv->posted_recv_queue, &untagged_attr); if (tsret) return tsret; tsret = _gnix_unexpected_tag_storage_init( &ep_priv->unexp_recv_queue, &untagged_attr); if (tsret) return tsret; /* init tagged storages */ tsret = _gnix_posted_tag_storage_init( &ep_priv->tagged_posted_recv_queue, NULL); if (tsret) return tsret; tsret = _gnix_unexpected_tag_storage_init( &ep_priv->tagged_unexp_recv_queue, NULL); if (tsret) return tsret; ep_priv->ep_fid.fid.fclass = FI_CLASS_EP; ep_priv->ep_fid.fid.context = context; ep_priv->ep_fid.fid.ops = &gnix_ep_fi_ops; ep_priv->ep_fid.ops = &gnix_ep_ops; ep_priv->domain = domain_priv; ep_priv->type = info->ep_attr->type; _gnix_ref_init(&ep_priv->ref_cnt, 1, __ep_destruct); fastlock_init(&ep_priv->recv_comp_lock); fastlock_init(&ep_priv->recv_queue_lock); fastlock_init(&ep_priv->tagged_queue_lock); slist_init(&ep_priv->pending_recv_comp_queue); ep_priv->caps = info->caps & GNIX_EP_RDM_CAPS; if (info->tx_attr) ep_priv->op_flags = info->tx_attr->op_flags; if (info->rx_attr) ep_priv->op_flags |= info->rx_attr->op_flags; ep_priv->op_flags &= GNIX_EP_OP_FLAGS; ep_priv->min_multi_recv = GNIX_OPT_MIN_MULTI_RECV_DEFAULT; ret = __fr_freelist_init(ep_priv); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "Error allocating gnix_fab_req freelist (%s)", fi_strerror(-ret)); goto err; } else free_list_inited = true; ep_priv->ep_fid.msg = &gnix_ep_msg_ops; ep_priv->ep_fid.rma = &gnix_ep_rma_ops; ep_priv->ep_fid.tagged = &gnix_ep_tagged_ops; ep_priv->ep_fid.atomic = &gnix_ep_atomic_ops; ep_priv->ep_fid.cm = &gnix_cm_ops; if (ep_priv->type == FI_EP_RDM) { if (info->src_addr != NULL) { ret = __gnix_ep_bound_prep(domain_priv, info, ep_priv); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "__gnix_ep_bound_prep returned error (%s)", fi_strerror(-ret)); goto err; } } else { fastlock_acquire(&domain_priv->cm_nic_lock); /* * if a cm_nic has not yet been allocated for this * domain, do it now. Reuse the embedded gnix_nic * in the cm_nic as the nic for this endpoint * to reduce demand on Aries hw resources. */ if (domain_priv->cm_nic == NULL) { ret = _gnix_cm_nic_alloc(domain_priv, info, &domain_priv->cm_nic); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_cm_nic_alloc returned %s\n", fi_strerror(-ret)); fastlock_release( &domain_priv->cm_nic_lock); goto err; } ep_priv->cm_nic = domain_priv->cm_nic; ep_priv->nic = ep_priv->cm_nic->nic; _gnix_ref_get(ep_priv->nic); } else { ep_priv->cm_nic = domain_priv->cm_nic; _gnix_ref_get(ep_priv->cm_nic); } fastlock_release(&domain_priv->cm_nic_lock); ep_priv->my_name.gnix_addr.device_addr = ep_priv->cm_nic->my_name.gnix_addr.device_addr; ep_priv->my_name.cm_nic_cdm_id = ep_priv->cm_nic->my_name.gnix_addr.cdm_id; ret = _gnix_get_new_cdm_id(domain_priv, &cdm_id); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "gnix_get_new_cdm_id call returned %s\n", fi_strerror(-ret)); goto err; } ep_priv->my_name.gnix_addr.cdm_id = cdm_id; } key_ptr = (gnix_ht_key_t *)&ep_priv->my_name.gnix_addr; ret = _gnix_ht_insert(ep_priv->cm_nic->addr_to_ep_ht, *key_ptr, ep_priv); if ((ret != FI_SUCCESS) && (ret != -FI_ENOSPC)) { GNIX_WARN(FI_LOG_EP_CTRL, "__gnix_ht_insert returned %d\n", ret); goto err; } gnix_ht_attr.ht_initial_size = domain_priv->params.ct_init_size; gnix_ht_attr.ht_maximum_size = domain_priv->params.ct_max_size; gnix_ht_attr.ht_increase_step = domain_priv->params.ct_step; gnix_ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT; gnix_ht_attr.ht_collision_thresh = 500; gnix_ht_attr.ht_hash_seed = 0xdeadbeefbeefdead; gnix_ht_attr.ht_internal_locking = 0; gnix_ht_attr.destructor = __gnix_vc_destroy_ht_entry; ep_priv->vc_ht = calloc(1, sizeof(struct gnix_hashtable)); if (ep_priv->vc_ht == NULL) goto err; ret = _gnix_ht_init(ep_priv->vc_ht, &gnix_ht_attr); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "gnix_ht_init call returned %d\n", ret); goto err; } fastlock_init(&ep_priv->vc_ht_lock); } else { ep_priv->cm_nic = NULL; ep_priv->vc = NULL; } ep_priv->progress_fn = NULL; ep_priv->rx_progress_fn = NULL; if (ep_priv->nic == NULL) { ret = gnix_nic_alloc(domain_priv, NULL, &ep_priv->nic); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_nic_alloc call returned %d\n", ret); goto err; } } /* * if smsg callbacks not present hook them up now */ if (ep_priv->nic->smsg_callbacks == NULL) ep_priv->nic->smsg_callbacks = gnix_ep_smsg_callbacks; _gnix_ref_get(ep_priv->domain); *ep = &ep_priv->ep_fid; return ret; err: if (free_list_inited == true) __fr_freelist_destroy(ep_priv); if (ep_priv->vc_ht != NULL) { _gnix_ht_destroy(ep_priv->vc_ht); /* may not be initialized but okay */ free(ep_priv->vc_ht); ep_priv->vc_ht = NULL; } if (ep_priv->cm_nic != NULL) ret = _gnix_cm_nic_free(ep_priv->cm_nic); if (ep_priv->nic != NULL) ret = _gnix_nic_free(ep_priv->nic); free(ep_priv); return ret; } static int __match_context(struct slist_entry *item, const void *arg) { struct gnix_fab_req *req; req = container_of(item, struct gnix_fab_req, slist); return req->user_context == arg; } static inline struct gnix_fab_req *__find_tx_req( struct gnix_fid_ep *ep, void *context) { struct gnix_fab_req *req = NULL; struct slist_entry *entry; struct gnix_vc *vc; GNIX_HASHTABLE_ITERATOR(ep->vc_ht, iter); GNIX_DEBUG(FI_LOG_EP_CTRL, "searching VCs for the correct context to" " cancel, context=%p", context); fastlock_acquire(&ep->vc_ht_lock); while ((vc = _gnix_ht_iterator_next(&iter))) { fastlock_acquire(&vc->tx_queue_lock); entry = slist_remove_first_match(&vc->tx_queue, __match_context, context); fastlock_release(&vc->tx_queue_lock); if (entry) { req = container_of(entry, struct gnix_fab_req, slist); break; } } fastlock_release(&ep->vc_ht_lock); return req; } static inline struct gnix_fab_req *__find_rx_req( struct gnix_fid_ep *ep, void *context) { struct gnix_fab_req *req = NULL; fastlock_acquire(&ep->recv_queue_lock); req = _gnix_remove_req_by_context(&ep->posted_recv_queue, context); fastlock_release(&ep->recv_queue_lock); if (req) return req; fastlock_acquire(&ep->tagged_queue_lock); req = _gnix_remove_req_by_context(&ep->tagged_posted_recv_queue, context); fastlock_release(&ep->tagged_queue_lock); return req; } static ssize_t gnix_ep_cancel(fid_t fid, void *context) { int ret = FI_SUCCESS; struct gnix_fid_ep *ep; struct gnix_fab_req *req; struct gnix_fid_cq *err_cq = NULL; struct gnix_fid_cntr *err_cntr = NULL; void *addr; uint64_t tag, flags; size_t len; int is_send = 0; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); if (!ep->domain) return -FI_EDOMAIN; /* without context, we will have to find a request that matches * a recv or send request. Try the send requests first. */ GNIX_INFO(FI_LOG_EP_CTRL, "looking for event to cancel\n"); req = __find_tx_req(ep, context); if (!req) { req = __find_rx_req(ep, context); if (req) { err_cq = ep->recv_cq; err_cntr = ep->recv_cntr; } } else { is_send = 1; err_cq = ep->send_cq; err_cntr = ep->send_cntr; } GNIX_INFO(FI_LOG_EP_CTRL, "finished searching\n"); if (!req) return -FI_ENOENT; if (err_cq) { /* add canceled event */ if (!(req->type == GNIX_FAB_RQ_RDMA_READ || req->type == GNIX_FAB_RQ_RDMA_WRITE)) { if (!is_send) { addr = (void *) req->msg.recv_addr; len = req->msg.recv_len; } else { addr = (void *) req->msg.send_addr; len = req->msg.send_len; } tag = req->msg.tag; } else { /* rma information */ addr = (void *) req->rma.loc_addr; len = req->rma.len; tag = 0; } flags = req->flags; _gnix_cq_add_error(err_cq, context, flags, len, addr, 0 /* data */, tag, len, FI_ECANCELED, FI_ECANCELED, 0); } if (err_cntr) { /* signal increase in cntr errs */ _gnix_cntr_inc_err(err_cntr); } _gnix_fr_free(ep, req); return ret; }