/* * define methods needed for the GNI fabric provider */ static int gnix_fabric_open(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context) { struct gnix_fid_fabric *fab; if (strcmp(attr->name, gnix_fab_name)) { return -FI_ENODATA; } fab = calloc(1, sizeof(*fab)); if (!fab) { return -FI_ENOMEM; } /* * set defaults related to use of GNI datagrams */ fab->n_bnd_dgrams = gnix_def_gni_n_dgrams; fab->n_wc_dgrams = gnix_def_gni_n_wc_dgrams; fab->datagram_timeout = gnix_def_gni_datagram_timeouts; fab->fab_fid.fid.fclass = FI_CLASS_FABRIC; fab->fab_fid.fid.context = context; fab->fab_fid.fid.ops = &gnix_fab_fi_ops; fab->fab_fid.ops = &gnix_fab_ops; _gnix_ref_init(&fab->ref_cnt, 1, __fabric_destruct); dlist_init(&fab->domain_list); *fabric = &fab->fab_fid; return FI_SUCCESS; }
Test(utils, references) { int refs; struct gnix_reference_tester test; /* initialize test structure */ _gnix_ref_init(&test.ref_cnt, 1, test_destruct); test.destructed = 0; /* check for validity */ cr_assert(atomic_get(&test.ref_cnt.references) == 1); cr_assert(test.destructed == 0); /* increment refs and check */ refs = _gnix_ref_get(&test); cr_assert(refs == 2); cr_assert(atomic_get(&test.ref_cnt.references) == 2); cr_assert(test.destructed == 0); /* decrement refs and check */ refs = _gnix_ref_put(&test); cr_assert(refs == 1); cr_assert(atomic_get(&test.ref_cnt.references) == 1); cr_assert(test.destructed == 0); /* decrement and destruct, check for validity */ refs = _gnix_ref_put(&test); cr_assert(refs == 0); cr_assert(atomic_get(&test.ref_cnt.references) == 0); cr_assert(test.destructed == 1); }
/** * Creates a shared transmit context. * * @param[in] val value to be sign extended * @param[in] len length to sign extend the value * @return FI_SUCCESS if shared tx context successfully created * @return -FI_EINVAL if invalid arg(s) supplied * @return -FI_ENOMEM insufficient memory */ DIRECT_FN STATIC int gnix_stx_open(struct fid_domain *dom, struct fi_tx_attr *tx_attr, struct fid_stx **stx, void *context) { int ret = FI_SUCCESS; struct gnix_fid_domain *domain; struct gnix_nic *nic; struct gnix_fid_stx *stx_priv; struct gnix_nic_attr nic_attr = {0}; GNIX_TRACE(FI_LOG_DOMAIN, "\n"); domain = container_of(dom, struct gnix_fid_domain, domain_fid.fid); if (domain->domain_fid.fid.fclass != FI_CLASS_DOMAIN) { ret = -FI_EINVAL; goto err; } stx_priv = calloc(1, sizeof(*stx_priv)); if (!stx_priv) { ret = -FI_ENOMEM; goto err; } stx_priv->domain = domain; /* * we force allocation of a nic to make semantics * match the intent fi_endpoint man page, provide * a TX context (aka gnix nic) that can be shared * explicitly amongst endpoints */ nic_attr.must_alloc = true; ret = gnix_nic_alloc(domain, &nic_attr, &nic); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_nic_alloc call returned %d\n", ret); goto err; } stx_priv->nic = nic; _gnix_ref_init(&stx_priv->ref_cnt, 1, __stx_destruct); _gnix_ref_get(stx_priv->domain); stx_priv->stx_fid.fid.fclass = FI_CLASS_STX_CTX; stx_priv->stx_fid.fid.context = context; stx_priv->stx_fid.fid.ops = &gnix_stx_ops; stx_priv->stx_fid.ops = NULL; *stx = &stx_priv->stx_fid; err: return ret; }
DIRECT_FN int gnix_passive_ep_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_pep **pep, void *context) { struct gnix_fid_fabric *fabric_priv; struct gnix_fid_pep *pep_priv; if (!fabric || !info || !pep) return -FI_EINVAL; fabric_priv = container_of(fabric, struct gnix_fid_fabric, fab_fid); pep_priv = calloc(1, sizeof(*pep_priv)); if (!pep_priv) return -FI_ENOMEM; pep_priv->pep_fid.fid.fclass = FI_CLASS_PEP; pep_priv->pep_fid.fid.context = context; pep_priv->pep_fid.fid.ops = &gnix_pep_fi_ops; pep_priv->pep_fid.ops = &gnix_pep_ops_ep; pep_priv->pep_fid.cm = &gnix_pep_ops_cm; pep_priv->listen_fd = -1; pep_priv->backlog = 5; /* TODO set via fi_control parameter. */ pep_priv->fabric = fabric_priv; fastlock_init(&pep_priv->lock); if (info->src_addr) { pep_priv->bound = 1; memcpy(&pep_priv->src_addr, info->src_addr, sizeof(struct sockaddr_in)); } else pep_priv->bound = 0; _gnix_ref_init(&pep_priv->ref_cnt, 1, __pep_destruct); *pep = &pep_priv->pep_fid; GNIX_DEBUG(FI_LOG_EP_CTRL, "Opened PEP: %p\n", pep_priv); return FI_SUCCESS; }
static int gnix_cq_set_wait(struct gnix_fid_cq *cq) { int ret = FI_SUCCESS; GNIX_TRACE(FI_LOG_CQ, "\n"); struct fi_wait_attr requested = { .wait_obj = cq->attr.wait_obj, .flags = 0 }; switch (cq->attr.wait_obj) { case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: ret = gnix_wait_open(&cq->domain->fabric->fab_fid, &requested, &cq->wait); break; case FI_WAIT_SET: ret = _gnix_wait_set_add(cq->attr.wait_set, &cq->cq_fid.fid); if (!ret) cq->wait = cq->attr.wait_set; break; default: break; } return ret; } static void free_cq_entry(struct slist_entry *item) { struct gnix_cq_entry *entry; entry = container_of(item, struct gnix_cq_entry, item); free(entry->the_entry); free(entry); } static struct slist_entry *alloc_cq_entry(size_t size) { struct gnix_cq_entry *entry = malloc(sizeof(*entry)); if (!entry) { GNIX_DEBUG(FI_LOG_CQ, "out of memory\n"); goto err; } entry->the_entry = malloc(size); if (!entry->the_entry) { GNIX_DEBUG(FI_LOG_CQ, "out of memory\n"); goto cleanup; } return &entry->item; cleanup: free(entry); err: return NULL; } static int __gnix_cq_progress(struct gnix_fid_cq *cq) { return _gnix_prog_progress(&cq->pset); } /******************************************************************************* * Exposed helper functions ******************************************************************************/ ssize_t _gnix_cq_add_event(struct gnix_fid_cq *cq, struct gnix_fid_ep *ep, void *op_context, uint64_t flags, size_t len, void *buf, uint64_t data, uint64_t tag, fi_addr_t src_addr) { struct gnix_cq_entry *event; struct slist_entry *item; uint64_t mask; ssize_t ret = FI_SUCCESS; if (ep) { if (ep->info && ep->info->mode & FI_NOTIFY_FLAGS_ONLY) { mask = (FI_REMOTE_CQ_DATA | FI_MULTI_RECV); if (flags & FI_RMA_EVENT) { mask |= (FI_REMOTE_READ | FI_REMOTE_WRITE | FI_RMA); } flags &= mask; } } COND_ACQUIRE(cq->requires_lock, &cq->lock); item = _gnix_queue_get_free(cq->events); if (!item) { GNIX_DEBUG(FI_LOG_CQ, "error creating cq_entry\n"); ret = -FI_ENOMEM; goto err; } event = container_of(item, struct gnix_cq_entry, item); assert(event->the_entry); fill_function[cq->attr.format](event->the_entry, op_context, flags, len, buf, data, tag); event->src_addr = src_addr; _gnix_queue_enqueue(cq->events, &event->item); GNIX_DEBUG(FI_LOG_CQ, "Added event: %lx\n", op_context); if (cq->wait) _gnix_signal_wait_obj(cq->wait); err: COND_RELEASE(cq->requires_lock, &cq->lock); return ret; } ssize_t _gnix_cq_add_error(struct gnix_fid_cq *cq, void *op_context, uint64_t flags, size_t len, void *buf, uint64_t data, uint64_t tag, size_t olen, int err, int prov_errno, void *err_data, size_t err_data_size) { struct fi_cq_err_entry *error; struct gnix_cq_entry *event; struct slist_entry *item; ssize_t ret = FI_SUCCESS; GNIX_INFO(FI_LOG_CQ, "creating error event entry\n"); COND_ACQUIRE(cq->requires_lock, &cq->lock); item = _gnix_queue_get_free(cq->errors); if (!item) { GNIX_WARN(FI_LOG_CQ, "error creating error entry\n"); ret = -FI_ENOMEM; goto err; } event = container_of(item, struct gnix_cq_entry, item); error = event->the_entry; error->op_context = op_context; error->flags = flags; error->len = len; error->buf = buf; error->data = data; error->tag = tag; error->olen = olen; error->err = err; error->prov_errno = prov_errno; error->err_data = err_data; error->err_data_size = err_data_size; _gnix_queue_enqueue(cq->errors, &event->item); if (cq->wait) _gnix_signal_wait_obj(cq->wait); err: COND_RELEASE(cq->requires_lock, &cq->lock); return ret; } int _gnix_cq_poll_obj_add(struct gnix_fid_cq *cq, void *obj, int (*prog_fn)(void *data)) { return _gnix_prog_obj_add(&cq->pset, obj, prog_fn); } int _gnix_cq_poll_obj_rem(struct gnix_fid_cq *cq, void *obj, int (*prog_fn)(void *data)) { return _gnix_prog_obj_rem(&cq->pset, obj, prog_fn); } static void __cq_destruct(void *obj) { struct gnix_fid_cq *cq = (struct gnix_fid_cq *) obj; _gnix_ref_put(cq->domain); switch (cq->attr.wait_obj) { case FI_WAIT_NONE: break; case FI_WAIT_SET: _gnix_wait_set_remove(cq->wait, &cq->cq_fid.fid); break; case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: assert(cq->wait); gnix_wait_close(&cq->wait->fid); break; default: GNIX_WARN(FI_LOG_CQ, "format: %d unsupported.\n", cq->attr.wait_obj); break; } _gnix_prog_fini(&cq->pset); _gnix_queue_destroy(cq->events); _gnix_queue_destroy(cq->errors); fastlock_destroy(&cq->lock); free(cq->cq_fid.ops); free(cq->cq_fid.fid.ops); free(cq); } /******************************************************************************* * API functions. ******************************************************************************/ static int gnix_cq_close(fid_t fid) { struct gnix_fid_cq *cq; int references_held; GNIX_TRACE(FI_LOG_CQ, "\n"); cq = container_of(fid, struct gnix_fid_cq, cq_fid); references_held = _gnix_ref_put(cq); if (references_held) { GNIX_INFO(FI_LOG_CQ, "failed to fully close cq due to lingering " "references. references=%i cq=%p\n", references_held, cq); } return FI_SUCCESS; } static ssize_t __gnix_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr) { struct gnix_fid_cq *cq_priv; struct gnix_cq_entry *event; struct slist_entry *temp; ssize_t read_count = 0; if (!cq || !buf || !count) return -FI_EINVAL; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); __gnix_cq_progress(cq_priv); if (_gnix_queue_peek(cq_priv->errors)) return -FI_EAVAIL; COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock); while (_gnix_queue_peek(cq_priv->events) && count--) { temp = _gnix_queue_dequeue(cq_priv->events); event = container_of(temp, struct gnix_cq_entry, item); assert(event->the_entry); memcpy(buf, event->the_entry, cq_priv->entry_size); if (src_addr) memcpy(&src_addr[read_count], &event->src_addr, sizeof(fi_addr_t)); _gnix_queue_enqueue_free(cq_priv->events, &event->item); buf = (void *) ((uint8_t *) buf + cq_priv->entry_size); read_count++; } COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock); return read_count ?: -FI_EAGAIN; } static ssize_t __gnix_cq_sreadfrom(int blocking, struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr, const void *cond, int timeout) { struct gnix_fid_cq *cq_priv; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); if ((blocking && !cq_priv->wait) || (blocking && cq_priv->attr.wait_obj == FI_WAIT_SET)) return -FI_EINVAL; if (_gnix_queue_peek(cq_priv->errors)) return -FI_EAVAIL; if (cq_priv->wait) gnix_wait_wait((struct fid_wait *)cq_priv->wait, timeout); return __gnix_cq_readfrom(cq, buf, count, src_addr); } DIRECT_FN STATIC ssize_t gnix_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr, const void *cond, int timeout) { return __gnix_cq_sreadfrom(1, cq, buf, count, src_addr, cond, timeout); } DIRECT_FN STATIC ssize_t gnix_cq_read(struct fid_cq *cq, void *buf, size_t count) { return __gnix_cq_sreadfrom(0, cq, buf, count, NULL, NULL, 0); } DIRECT_FN STATIC ssize_t gnix_cq_sread(struct fid_cq *cq, void *buf, size_t count, const void *cond, int timeout) { return __gnix_cq_sreadfrom(1, cq, buf, count, NULL, cond, timeout); } DIRECT_FN STATIC ssize_t gnix_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr) { return __gnix_cq_sreadfrom(0, cq, buf, count, src_addr, NULL, 0); } DIRECT_FN STATIC ssize_t gnix_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, uint64_t flags) { struct gnix_fid_cq *cq_priv; struct gnix_cq_entry *event; struct slist_entry *entry; size_t err_data_cpylen; struct fi_cq_err_entry *gnix_cq_err; ssize_t read_count = 0; if (!cq || !buf) return -FI_EINVAL; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); /* * we need to progress cq. some apps may be only using * cq to check for errors. */ _gnix_prog_progress(&cq_priv->pset); COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock); entry = _gnix_queue_dequeue(cq_priv->errors); if (!entry) { read_count = -FI_EAGAIN; goto err; } event = container_of(entry, struct gnix_cq_entry, item); gnix_cq_err = event->the_entry; buf->op_context = gnix_cq_err->op_context; buf->flags = gnix_cq_err->flags; buf->len = gnix_cq_err->len; buf->buf = gnix_cq_err->buf; buf->data = gnix_cq_err->data; buf->tag = gnix_cq_err->tag; buf->olen = gnix_cq_err->olen; buf->err = gnix_cq_err->err; buf->prov_errno = gnix_cq_err->prov_errno; if (gnix_cq_err->err_data != NULL) { /* * Note: If the api version is >= 1.5 then copy err_data into * buf->err_data and copy at most buf->err_data_size. * If buf->err_data_size is zero or the api version is < 1.5, * use the old method of allocating space in provider. */ if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version, FI_VERSION(1, 5)) || buf->err_data_size == 0) { err_data_cpylen = sizeof(cq_priv->err_data); memcpy(cq_priv->err_data, gnix_cq_err->err_data, err_data_cpylen); buf->err_data = cq_priv->err_data; } else { if (buf->err_data == NULL) return -FI_EINVAL; err_data_cpylen = MIN(buf->err_data_size, gnix_cq_err->err_data_size); memcpy(buf->err_data, gnix_cq_err->err_data, err_data_cpylen); buf->err_data_size = err_data_cpylen; } free(gnix_cq_err->err_data); gnix_cq_err->err_data = NULL; } else { if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version, FI_VERSION(1, 5))) { buf->err_data = NULL; } else { buf->err_data_size = 0; } } _gnix_queue_enqueue_free(cq_priv->errors, &event->item); read_count++; err: COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock); return read_count; } DIRECT_FN STATIC const char *gnix_cq_strerror(struct fid_cq *cq, int prov_errno, const void *prov_data, char *buf, size_t len) { return NULL; } DIRECT_FN STATIC int gnix_cq_signal(struct fid_cq *cq) { struct gnix_fid_cq *cq_priv; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); if (cq_priv->wait) _gnix_signal_wait_obj(cq_priv->wait); return FI_SUCCESS; } static int gnix_cq_control(struct fid *cq, int command, void *arg) { switch (command) { case FI_GETWAIT: return -FI_ENOSYS; default: return -FI_EINVAL; } } DIRECT_FN int gnix_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, struct fid_cq **cq, void *context) { struct gnix_fid_domain *domain_priv; struct gnix_fid_cq *cq_priv; struct fi_ops_cq *cq_ops; struct fi_ops *fi_cq_ops; int ret = FI_SUCCESS; GNIX_TRACE(FI_LOG_CQ, "\n"); cq_ops = calloc(1, sizeof(*cq_ops)); if (!cq_ops) { return -FI_ENOMEM; } fi_cq_ops = calloc(1, sizeof(*fi_cq_ops)); if (!fi_cq_ops) { ret = -FI_ENOMEM; goto free_cq_ops; } *cq_ops = gnix_cq_ops; *fi_cq_ops = gnix_cq_fi_ops; ret = verify_cq_attr(attr, cq_ops, fi_cq_ops); if (ret) goto free_fi_cq_ops; domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); if (!domain_priv) { ret = -FI_EINVAL; goto free_fi_cq_ops; } cq_priv = calloc(1, sizeof(*cq_priv)); if (!cq_priv) { ret = -FI_ENOMEM; goto free_fi_cq_ops; } cq_priv->requires_lock = (domain_priv->thread_model != FI_THREAD_COMPLETION); cq_priv->domain = domain_priv; cq_priv->attr = *attr; _gnix_ref_init(&cq_priv->ref_cnt, 1, __cq_destruct); _gnix_ref_get(cq_priv->domain); _gnix_prog_init(&cq_priv->pset); cq_priv->cq_fid.fid.fclass = FI_CLASS_CQ; cq_priv->cq_fid.fid.context = context; cq_priv->cq_fid.fid.ops = fi_cq_ops; cq_priv->cq_fid.ops = cq_ops; /* * Although we don't need to store entry_size since we're already * storing the format, this might provide a performance benefit * when allocating storage. */ cq_priv->entry_size = format_sizes[cq_priv->attr.format]; fastlock_init(&cq_priv->lock); ret = gnix_cq_set_wait(cq_priv); if (ret) goto free_cq_priv; ret = _gnix_queue_create(&cq_priv->events, alloc_cq_entry, free_cq_entry, cq_priv->entry_size, cq_priv->attr.size); if (ret) goto free_cq_priv; ret = _gnix_queue_create(&cq_priv->errors, alloc_cq_entry, free_cq_entry, sizeof(struct fi_cq_err_entry), 0); if (ret) goto free_gnix_queue; *cq = &cq_priv->cq_fid; return ret; free_gnix_queue: _gnix_queue_destroy(cq_priv->events); free_cq_priv: _gnix_ref_put(cq_priv->domain); fastlock_destroy(&cq_priv->lock); free(cq_priv); free_fi_cq_ops: free(fi_cq_ops); free_cq_ops: free(cq_ops); return ret; } /******************************************************************************* * FI_OPS_* data structures. ******************************************************************************/ static const struct fi_ops gnix_cq_fi_ops = { .size = sizeof(struct fi_ops), .close = gnix_cq_close, .bind = fi_no_bind, .control = gnix_cq_control, .ops_open = fi_no_ops_open }; static const struct fi_ops_cq gnix_cq_ops = { .size = sizeof(struct fi_ops_cq), .read = gnix_cq_read, .readfrom = gnix_cq_readfrom, .readerr = gnix_cq_readerr, .sread = gnix_cq_sread, .sreadfrom = gnix_cq_sreadfrom, .signal = gnix_cq_signal, .strerror = gnix_cq_strerror };
/* * TODO: Support shared named AVs. */ DIRECT_FN int gnix_av_open(struct fid_domain *domain, struct fi_av_attr *attr, struct fid_av **av, void *context) { struct gnix_fid_domain *int_dom = NULL; struct gnix_fid_av *int_av = NULL; struct gnix_hashtable_attr ht_attr; enum fi_av_type type = FI_AV_TABLE; size_t count = 128; int ret = FI_SUCCESS; GNIX_TRACE(FI_LOG_AV, "\n"); if (!domain) { ret = -FI_EINVAL; goto err; } int_dom = container_of(domain, struct gnix_fid_domain, domain_fid); if (!int_dom) { ret = -FI_EINVAL; goto err; } int_av = calloc(1, sizeof(*int_av)); if (!int_av) { ret = -FI_ENOMEM; goto err; } if (attr) { if (gnix_verify_av_attr(attr)) { ret = -FI_EINVAL; goto cleanup; } type = attr->type; count = attr->count; } int_av->domain = int_dom; int_av->type = type; int_av->addrlen = sizeof(struct gnix_address); int_av->capacity = count; if (type == FI_AV_TABLE) { int_av->table = calloc(count, sizeof(struct gnix_av_addr_entry)); if (!int_av->table) { ret = -FI_ENOMEM; goto cleanup; } } int_av->valid_entry_vec = calloc(count, sizeof(int)); if (!int_av->valid_entry_vec) { ret = -FI_ENOMEM; goto cleanup; } int_av->av_fid.fid.fclass = FI_CLASS_AV; int_av->av_fid.fid.context = context; int_av->av_fid.fid.ops = &gnix_fi_av_ops; int_av->av_fid.ops = &gnix_av_ops; if (type == FI_AV_MAP) { int_av->map_ht = calloc(1, sizeof(struct gnix_hashtable)); if (int_av->map_ht == NULL) goto cleanup; /* * use same parameters as used for ep vc hash */ ht_attr.ht_initial_size = int_dom->params.ct_init_size; ht_attr.ht_maximum_size = int_dom->params.ct_max_size; ht_attr.ht_increase_step = int_dom->params.ct_step; ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT; ht_attr.ht_collision_thresh = 500; ht_attr.ht_hash_seed = 0xdeadbeefbeefdead; ht_attr.ht_internal_locking = 1; ht_attr.destructor = NULL; ret = _gnix_ht_init(int_av->map_ht, &ht_attr); slist_init(&int_av->block_list); } _gnix_ref_init(&int_av->ref_cnt, 1, __av_destruct); *av = &int_av->av_fid; return ret; cleanup: if (int_av->table != NULL) free(int_av->table); if (int_av->valid_entry_vec != NULL) free(int_av->valid_entry_vec); free(int_av); err: return ret; }
DIRECT_FN int gnix_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **dom, void *context) { struct gnix_fid_domain *domain = NULL; int ret = FI_SUCCESS; uint8_t ptag; uint32_t cookie; struct gnix_fid_fabric *fabric_priv; GNIX_TRACE(FI_LOG_DOMAIN, "\n"); fabric_priv = container_of(fabric, struct gnix_fid_fabric, fab_fid); /* * check cookie/ptag credentials - for FI_EP_MSG we may be creating a * domain * using a cookie supplied being used by the server. Otherwise, we use * use the cookie/ptag supplied by the job launch system. */ if (info->dest_addr) { ret = gnixu_get_rdma_credentials(info->dest_addr, &ptag, &cookie); if (ret) { GNIX_WARN(FI_LOG_DOMAIN, "gnixu_get_rdma_credentials returned ptag %u cookie 0x%x\n", ptag, cookie); goto err; } } else { ret = gnixu_get_rdma_credentials(NULL, &ptag, &cookie); } GNIX_INFO(FI_LOG_DOMAIN, "gnix rdma credentials returned ptag %u cookie 0x%x\n", ptag, cookie); domain = calloc(1, sizeof *domain); if (domain == NULL) { ret = -FI_ENOMEM; goto err; } domain->mr_cache_attr = _gnix_default_mr_cache_attr; domain->mr_cache_attr.reg_context = (void *) domain; domain->mr_cache_attr.dereg_context = NULL; domain->mr_cache_attr.destruct_context = NULL; ret = _gnix_notifier_open(&domain->mr_cache_attr.notifier); if (ret != FI_SUCCESS) goto err; domain->mr_cache_ro = NULL; domain->mr_cache_rw = NULL; fastlock_init(&domain->mr_cache_lock); domain->udreg_reg_limit = 4096; dlist_init(&domain->nic_list); dlist_init(&domain->list); dlist_insert_after(&domain->list, &fabric_priv->domain_list); domain->fabric = fabric_priv; _gnix_ref_get(domain->fabric); domain->ptag = ptag; domain->cookie = cookie; domain->cdm_id_seed = getpid(); /* TODO: direct syscall better */ /* user tunables */ domain->params.msg_rendezvous_thresh = default_msg_rendezvous_thresh; domain->params.rma_rdma_thresh = default_rma_rdma_thresh; domain->params.ct_init_size = default_ct_init_size; domain->params.ct_max_size = default_ct_max_size; domain->params.ct_step = default_ct_step; domain->params.vc_id_table_capacity = default_vc_id_table_capacity; domain->params.mbox_page_size = default_mbox_page_size; domain->params.mbox_num_per_slab = default_mbox_num_per_slab; domain->params.mbox_maxcredit = default_mbox_maxcredit; domain->params.mbox_msg_maxsize = default_mbox_msg_maxsize; domain->params.max_retransmits = default_max_retransmits; domain->params.err_inject_count = default_err_inject_count; #if HAVE_XPMEM domain->params.xpmem_enabled = true; #else domain->params.xpmem_enabled = false; #endif domain->gni_tx_cq_size = default_tx_cq_size; domain->gni_rx_cq_size = default_rx_cq_size; domain->gni_cq_modes = gnix_def_gni_cq_modes; _gnix_ref_init(&domain->ref_cnt, 1, __domain_destruct); domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN; domain->domain_fid.fid.context = context; domain->domain_fid.fid.ops = &gnix_domain_fi_ops; domain->domain_fid.ops = &gnix_domain_ops; domain->domain_fid.mr = &gnix_domain_mr_ops; domain->control_progress = info->domain_attr->control_progress; domain->data_progress = info->domain_attr->data_progress; domain->thread_model = info->domain_attr->threading; domain->mr_is_init = 0; domain->mr_iov_limit = info->domain_attr->mr_iov_limit; fastlock_init(&domain->cm_nic_lock); _gnix_open_cache(domain, GNIX_DEFAULT_CACHE_TYPE); *dom = &domain->domain_fid; return FI_SUCCESS; err: if (domain != NULL) { free(domain); } return ret; }
int _gnix_cm_nic_alloc(struct gnix_fid_domain *domain, struct fi_info *info, uint32_t cdm_id, struct gnix_auth_key *auth_key, struct gnix_cm_nic **cm_nic_ptr) { int ret = FI_SUCCESS; struct gnix_cm_nic *cm_nic = NULL; gnix_hashtable_attr_t gnix_ht_attr = {0}; uint32_t name_type = GNIX_EPN_TYPE_UNBOUND; struct gnix_nic_attr nic_attr = {0}; struct gnix_ep_name ep_name; struct gnix_dgram_hndl_attr dgram_hndl_attr = {0}; struct gnix_dgram_hndl_attr *dgram_hndl_attr_ptr = NULL; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); *cm_nic_ptr = NULL; /* * if app has specified a src_addr in the info * argument and length matches that for gnix_ep_name * we must allocate a cm_nic, otherwise we first * check to see if there is a cm_nic already for this domain * and just use it. */ if (info->src_addr) { /*TODO (optimization): strchr to name_type and strtol */ _gnix_get_ep_name(info->src_addr, 0, &ep_name, domain); name_type = ep_name.name_type; } GNIX_INFO(FI_LOG_EP_CTRL, "creating cm_nic for %u/0x%x/%u\n", auth_key->ptag, auth_key->cookie, cdm_id); cm_nic = (struct gnix_cm_nic *)calloc(1, sizeof(*cm_nic)); if (cm_nic == NULL) { ret = -FI_ENOMEM; goto err; } /* * we have to force allocation of a new nic since we want * an a particulard cdm id */ nic_attr.must_alloc = true; nic_attr.use_cdm_id = true; nic_attr.cdm_id = cdm_id; nic_attr.auth_key = auth_key; ret = gnix_nic_alloc(domain, &nic_attr, &cm_nic->nic); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "gnix_nic_alloc returned %s\n", fi_strerror(-ret)); goto err; } cm_nic->my_name.gnix_addr.cdm_id = cdm_id; cm_nic->ptag = auth_key->ptag; cm_nic->my_name.cookie = auth_key->cookie; cm_nic->my_name.gnix_addr.device_addr = cm_nic->nic->device_addr; cm_nic->domain = domain; cm_nic->ctrl_progress = domain->control_progress; cm_nic->my_name.name_type = name_type; cm_nic->poll_cnt = 0; fastlock_init(&cm_nic->wq_lock); dlist_init(&cm_nic->cm_nic_wq); /* * prep the cm nic's dgram component */ if (domain->control_progress == FI_PROGRESS_AUTO) { dgram_hndl_attr.timeout_needed = __gnix_cm_nic_timeout_needed; dgram_hndl_attr.timeout_progress = __gnix_cm_nic_timeout_progress; dgram_hndl_attr.timeout_data = (void *)cm_nic; dgram_hndl_attr.timeout = domain->params.dgram_progress_timeout; dgram_hndl_attr_ptr = &dgram_hndl_attr; }; ret = _gnix_dgram_hndl_alloc(cm_nic, domain->control_progress, dgram_hndl_attr_ptr, &cm_nic->dgram_hndl); if (ret != FI_SUCCESS) goto err; /* * allocate hash table for translating ep addresses * to ep's. * This table will not be large - how many FI_EP_RDM ep's * will an app create using one domain?, nor in the critical path * so just use defaults. */ cm_nic->addr_to_ep_ht = calloc(1, sizeof(struct gnix_hashtable)); if (cm_nic->addr_to_ep_ht == NULL) goto err; gnix_ht_attr.ht_initial_size = 64; gnix_ht_attr.ht_maximum_size = 1024; gnix_ht_attr.ht_increase_step = 2; gnix_ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT; gnix_ht_attr.ht_collision_thresh = 500; gnix_ht_attr.ht_hash_seed = 0xdeadbeefbeefdead; gnix_ht_attr.ht_internal_locking = 1; gnix_ht_attr.destructor = NULL; ret = _gnix_ht_init(cm_nic->addr_to_ep_ht, &gnix_ht_attr); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "gnix_ht_init returned %s\n", fi_strerror(-ret)); goto err; } _gnix_ref_init(&cm_nic->ref_cnt, 1, __cm_nic_destruct); *cm_nic_ptr = cm_nic; pthread_mutex_lock(&gnix_cm_nic_list_lock); dlist_insert_tail(&cm_nic->cm_nic_list, &gnix_cm_nic_list); pthread_mutex_unlock(&gnix_cm_nic_list_lock); return ret; err: if (cm_nic->dgram_hndl) _gnix_dgram_hndl_free(cm_nic->dgram_hndl); if (cm_nic->nic) _gnix_nic_free(cm_nic->nic); if (cm_nic->addr_to_ep_ht) { _gnix_ht_destroy(cm_nic->addr_to_ep_ht); free(cm_nic->addr_to_ep_ht); } if (cm_nic != NULL) free(cm_nic); return ret; }
int gnix_ep_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { int ret = FI_SUCCESS; int tsret = FI_SUCCESS; uint32_t cdm_id; struct gnix_fid_domain *domain_priv; struct gnix_fid_ep *ep_priv; gnix_hashtable_attr_t gnix_ht_attr; gnix_ht_key_t *key_ptr; struct gnix_tag_storage_attr untagged_attr = { .type = GNIX_TAG_LIST, .use_src_addr_matching = 1, }; bool free_list_inited = false; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); if ((domain == NULL) || (info == NULL) || (ep == NULL) || (info->ep_attr == NULL)) return -FI_EINVAL; /* * TODO: need to implement other endpoint types */ if (info->ep_attr->type != FI_EP_RDM) return -FI_ENOSYS; domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); ep_priv = calloc(1, sizeof *ep_priv); if (!ep_priv) return -FI_ENOMEM; /* init untagged storages */ tsret = _gnix_posted_tag_storage_init( &ep_priv->posted_recv_queue, &untagged_attr); if (tsret) return tsret; tsret = _gnix_unexpected_tag_storage_init( &ep_priv->unexp_recv_queue, &untagged_attr); if (tsret) return tsret; /* init tagged storages */ tsret = _gnix_posted_tag_storage_init( &ep_priv->tagged_posted_recv_queue, NULL); if (tsret) return tsret; tsret = _gnix_unexpected_tag_storage_init( &ep_priv->tagged_unexp_recv_queue, NULL); if (tsret) return tsret; ep_priv->ep_fid.fid.fclass = FI_CLASS_EP; ep_priv->ep_fid.fid.context = context; ep_priv->ep_fid.fid.ops = &gnix_ep_fi_ops; ep_priv->ep_fid.ops = &gnix_ep_ops; ep_priv->domain = domain_priv; ep_priv->type = info->ep_attr->type; _gnix_ref_init(&ep_priv->ref_cnt, 1, __ep_destruct); fastlock_init(&ep_priv->recv_comp_lock); fastlock_init(&ep_priv->recv_queue_lock); fastlock_init(&ep_priv->tagged_queue_lock); slist_init(&ep_priv->pending_recv_comp_queue); ep_priv->caps = info->caps & GNIX_EP_RDM_CAPS; if (info->tx_attr) ep_priv->op_flags = info->tx_attr->op_flags; if (info->rx_attr) ep_priv->op_flags |= info->rx_attr->op_flags; ep_priv->op_flags &= GNIX_EP_OP_FLAGS; ep_priv->min_multi_recv = GNIX_OPT_MIN_MULTI_RECV_DEFAULT; ret = __fr_freelist_init(ep_priv); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "Error allocating gnix_fab_req freelist (%s)", fi_strerror(-ret)); goto err; } else free_list_inited = true; ep_priv->ep_fid.msg = &gnix_ep_msg_ops; ep_priv->ep_fid.rma = &gnix_ep_rma_ops; ep_priv->ep_fid.tagged = &gnix_ep_tagged_ops; ep_priv->ep_fid.atomic = &gnix_ep_atomic_ops; ep_priv->ep_fid.cm = &gnix_cm_ops; if (ep_priv->type == FI_EP_RDM) { if (info->src_addr != NULL) { ret = __gnix_ep_bound_prep(domain_priv, info, ep_priv); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "__gnix_ep_bound_prep returned error (%s)", fi_strerror(-ret)); goto err; } } else { fastlock_acquire(&domain_priv->cm_nic_lock); /* * if a cm_nic has not yet been allocated for this * domain, do it now. Reuse the embedded gnix_nic * in the cm_nic as the nic for this endpoint * to reduce demand on Aries hw resources. */ if (domain_priv->cm_nic == NULL) { ret = _gnix_cm_nic_alloc(domain_priv, info, &domain_priv->cm_nic); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_cm_nic_alloc returned %s\n", fi_strerror(-ret)); fastlock_release( &domain_priv->cm_nic_lock); goto err; } ep_priv->cm_nic = domain_priv->cm_nic; ep_priv->nic = ep_priv->cm_nic->nic; _gnix_ref_get(ep_priv->nic); } else { ep_priv->cm_nic = domain_priv->cm_nic; _gnix_ref_get(ep_priv->cm_nic); } fastlock_release(&domain_priv->cm_nic_lock); ep_priv->my_name.gnix_addr.device_addr = ep_priv->cm_nic->my_name.gnix_addr.device_addr; ep_priv->my_name.cm_nic_cdm_id = ep_priv->cm_nic->my_name.gnix_addr.cdm_id; ret = _gnix_get_new_cdm_id(domain_priv, &cdm_id); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "gnix_get_new_cdm_id call returned %s\n", fi_strerror(-ret)); goto err; } ep_priv->my_name.gnix_addr.cdm_id = cdm_id; } key_ptr = (gnix_ht_key_t *)&ep_priv->my_name.gnix_addr; ret = _gnix_ht_insert(ep_priv->cm_nic->addr_to_ep_ht, *key_ptr, ep_priv); if ((ret != FI_SUCCESS) && (ret != -FI_ENOSPC)) { GNIX_WARN(FI_LOG_EP_CTRL, "__gnix_ht_insert returned %d\n", ret); goto err; } gnix_ht_attr.ht_initial_size = domain_priv->params.ct_init_size; gnix_ht_attr.ht_maximum_size = domain_priv->params.ct_max_size; gnix_ht_attr.ht_increase_step = domain_priv->params.ct_step; gnix_ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT; gnix_ht_attr.ht_collision_thresh = 500; gnix_ht_attr.ht_hash_seed = 0xdeadbeefbeefdead; gnix_ht_attr.ht_internal_locking = 0; gnix_ht_attr.destructor = __gnix_vc_destroy_ht_entry; ep_priv->vc_ht = calloc(1, sizeof(struct gnix_hashtable)); if (ep_priv->vc_ht == NULL) goto err; ret = _gnix_ht_init(ep_priv->vc_ht, &gnix_ht_attr); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "gnix_ht_init call returned %d\n", ret); goto err; } fastlock_init(&ep_priv->vc_ht_lock); } else { ep_priv->cm_nic = NULL; ep_priv->vc = NULL; } ep_priv->progress_fn = NULL; ep_priv->rx_progress_fn = NULL; if (ep_priv->nic == NULL) { ret = gnix_nic_alloc(domain_priv, NULL, &ep_priv->nic); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_nic_alloc call returned %d\n", ret); goto err; } } /* * if smsg callbacks not present hook them up now */ if (ep_priv->nic->smsg_callbacks == NULL) ep_priv->nic->smsg_callbacks = gnix_ep_smsg_callbacks; _gnix_ref_get(ep_priv->domain); *ep = &ep_priv->ep_fid; return ret; err: if (free_list_inited == true) __fr_freelist_destroy(ep_priv); if (ep_priv->vc_ht != NULL) { _gnix_ht_destroy(ep_priv->vc_ht); /* may not be initialized but okay */ free(ep_priv->vc_ht); ep_priv->vc_ht = NULL; } if (ep_priv->cm_nic != NULL) ret = _gnix_cm_nic_free(ep_priv->cm_nic); if (ep_priv->nic != NULL) ret = _gnix_nic_free(ep_priv->nic); free(ep_priv); return ret; } static int __match_context(struct slist_entry *item, const void *arg) { struct gnix_fab_req *req; req = container_of(item, struct gnix_fab_req, slist); return req->user_context == arg; } static inline struct gnix_fab_req *__find_tx_req( struct gnix_fid_ep *ep, void *context) { struct gnix_fab_req *req = NULL; struct slist_entry *entry; struct gnix_vc *vc; GNIX_HASHTABLE_ITERATOR(ep->vc_ht, iter); GNIX_DEBUG(FI_LOG_EP_CTRL, "searching VCs for the correct context to" " cancel, context=%p", context); fastlock_acquire(&ep->vc_ht_lock); while ((vc = _gnix_ht_iterator_next(&iter))) { fastlock_acquire(&vc->tx_queue_lock); entry = slist_remove_first_match(&vc->tx_queue, __match_context, context); fastlock_release(&vc->tx_queue_lock); if (entry) { req = container_of(entry, struct gnix_fab_req, slist); break; } } fastlock_release(&ep->vc_ht_lock); return req; } static inline struct gnix_fab_req *__find_rx_req( struct gnix_fid_ep *ep, void *context) { struct gnix_fab_req *req = NULL; fastlock_acquire(&ep->recv_queue_lock); req = _gnix_remove_req_by_context(&ep->posted_recv_queue, context); fastlock_release(&ep->recv_queue_lock); if (req) return req; fastlock_acquire(&ep->tagged_queue_lock); req = _gnix_remove_req_by_context(&ep->tagged_posted_recv_queue, context); fastlock_release(&ep->tagged_queue_lock); return req; } static ssize_t gnix_ep_cancel(fid_t fid, void *context) { int ret = FI_SUCCESS; struct gnix_fid_ep *ep; struct gnix_fab_req *req; struct gnix_fid_cq *err_cq = NULL; struct gnix_fid_cntr *err_cntr = NULL; void *addr; uint64_t tag, flags; size_t len; int is_send = 0; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); if (!ep->domain) return -FI_EDOMAIN; /* without context, we will have to find a request that matches * a recv or send request. Try the send requests first. */ GNIX_INFO(FI_LOG_EP_CTRL, "looking for event to cancel\n"); req = __find_tx_req(ep, context); if (!req) { req = __find_rx_req(ep, context); if (req) { err_cq = ep->recv_cq; err_cntr = ep->recv_cntr; } } else { is_send = 1; err_cq = ep->send_cq; err_cntr = ep->send_cntr; } GNIX_INFO(FI_LOG_EP_CTRL, "finished searching\n"); if (!req) return -FI_ENOENT; if (err_cq) { /* add canceled event */ if (!(req->type == GNIX_FAB_RQ_RDMA_READ || req->type == GNIX_FAB_RQ_RDMA_WRITE)) { if (!is_send) { addr = (void *) req->msg.recv_addr; len = req->msg.recv_len; } else { addr = (void *) req->msg.send_addr; len = req->msg.send_len; } tag = req->msg.tag; } else { /* rma information */ addr = (void *) req->rma.loc_addr; len = req->rma.len; tag = 0; } flags = req->flags; _gnix_cq_add_error(err_cq, context, flags, len, addr, 0 /* data */, tag, len, FI_ECANCELED, FI_ECANCELED, 0); } if (err_cntr) { /* signal increase in cntr errs */ _gnix_cntr_inc_err(err_cntr); } _gnix_fr_free(ep, req); return ret; }