/* * Initialize all available memory monitors */ void ofi_monitor_init(void) { fastlock_init(&uffd_monitor->lock); dlist_init(&uffd_monitor->list); fi_param_define(NULL, "mr_cache_max_size", FI_PARAM_SIZE_T, "Defines the total number of bytes for all memory" " regions that may be tracked by the MR cache." " Setting this will reduce the amount of memory" " not actively in use that may be registered." " (default: 0 no limit is enforced)"); fi_param_define(NULL, "mr_cache_max_count", FI_PARAM_SIZE_T, "Defines the total number of memory regions that" " may be store in the cache. Setting this will" " reduce the number of registered regions, regardless" " of their size, stored in the cache. Setting this" " to zero will disable MR caching. (default: 1024)"); fi_param_define(NULL, "mr_cache_merge_regions", FI_PARAM_BOOL, "If set to true, overlapping or adjacent memory" " regions will be combined into a single, larger" " region. Merging regions can reduce the cache" " memory footprint, but can negatively impact" " performance in some situations. (default: false)"); fi_param_get_size_t(NULL, "mr_cache_max_size", &cache_params.max_size); fi_param_get_size_t(NULL, "mr_cache_max_count", &cache_params.max_cnt); fi_param_get_bool(NULL, "mr_cache_merge_regions", &cache_params.merge_regions); if (!cache_params.max_size) cache_params.max_size = SIZE_MAX; }
static void util_fabric_init(struct util_fabric *fabric, const char *name) { atomic_initialize(&fabric->ref, 0); dlist_init(&fabric->domain_list); fastlock_init(&fabric->lock); fabric->name = name; }
int ofi_endpoint_init(struct fid_domain *domain, const struct util_prov *util_prov, struct fi_info *info, struct util_ep *ep, void *context, ofi_ep_progress_func progress) { struct util_domain *util_domain; int ret; util_domain = container_of(domain, struct util_domain, domain_fid); if (!info || !info->ep_attr || !info->rx_attr || !info->tx_attr) return -FI_EINVAL; ret = ofi_prov_check_info(util_prov, util_domain->fabric->fabric_fid.api_version, info); if (ret) return ret; ep->ep_fid.fid.fclass = FI_CLASS_EP; ep->ep_fid.fid.context = context; ep->domain = util_domain; ep->caps = info->caps; ep->progress = progress; ep->tx_op_flags = info->tx_attr->op_flags; ep->rx_op_flags = info->rx_attr->op_flags; ofi_atomic_inc32(&util_domain->ref); if (util_domain->eq) ofi_ep_bind_eq(ep, util_domain->eq); fastlock_init(&ep->lock); return 0; }
int _gnix_mbox_allocator_create(struct gnix_nic *nic, gni_cq_handle_t cq_handle, enum gnix_page_size page_size, size_t mbox_size, size_t mpmmap, struct gnix_mbox_alloc_handle **alloc_handle) { struct gnix_mbox_alloc_handle *handle; char error_buf[256]; char *error; int ret; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); if (!nic || !mbox_size || !mpmmap || !alloc_handle) { GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to allocator_create.\n"); return -FI_EINVAL; } *alloc_handle = NULL; handle = calloc(1, sizeof(*handle)); if (!handle) { error = strerror_r(errno, error_buf, sizeof(error_buf)); GNIX_WARN(FI_LOG_EP_CTRL, "Error allocating alloc handle: %s\n", error); return -FI_ENOMEM; } handle->page_size = page_size * 1024 * 1024; handle->mbox_size = mbox_size; handle->mpmmap = mpmmap; handle->nic_handle = nic; handle->cq_handle = cq_handle; fastlock_init(&handle->lock); ret = __open_huge_page(handle); if (ret) { GNIX_WARN(FI_LOG_EP_CTRL, "Error opening huge page.\n"); goto err_huge_page; } ret = __create_slab(handle); if (ret) { GNIX_WARN(FI_LOG_EP_CTRL, "Slab creation failed.\n"); goto err_slab_creation; } *alloc_handle = handle; return ret; err_slab_creation: free(handle->filename); err_huge_page: free(handle); return ret; }
int tcpx_conn_mgr_init(struct tcpx_fabric *tcpx_fabric) { int ret; dlist_init(&tcpx_fabric->poll_mgr.list); fastlock_init(&tcpx_fabric->poll_mgr.lock); ret = fd_signal_init(&tcpx_fabric->poll_mgr.signal); if (ret) { FI_WARN(&tcpx_prov, FI_LOG_FABRIC,"signal init failed\n"); goto err; } tcpx_fabric->poll_mgr.run = 1; ret = pthread_create(&tcpx_fabric->conn_mgr_thread, 0, tcpx_conn_mgr_thread, (void *) tcpx_fabric); if (ret) { FI_WARN(&tcpx_prov, FI_LOG_FABRIC, "Failed creating tcpx connection manager thread"); goto err1; } return 0; err1: fd_signal_free(&tcpx_fabric->poll_mgr.signal); err: fastlock_destroy(&tcpx_fabric->poll_mgr.lock); return ret; }
int rxd_av_create(struct fid_domain *domain_fid, struct fi_av_attr *attr, struct fid_av **av_fid, void *context) { int ret; struct rxd_av *av; struct rxd_domain *domain; struct util_av_attr util_attr; struct fi_av_attr av_attr; if (!attr) return -FI_EINVAL; if (attr->name) return -FI_ENOSYS; domain = container_of(domain_fid, struct rxd_domain, util_domain.domain_fid); av = calloc(1, sizeof(*av)); if (!av) return -FI_ENOMEM; util_attr.addrlen = sizeof(fi_addr_t); util_attr.overhead = attr->count; util_attr.flags = FI_SOURCE; av->size = attr->count ? attr->count : RXD_AV_DEF_COUNT; if (attr->type == FI_AV_UNSPEC) attr->type = FI_AV_TABLE; ret = ofi_av_init(&domain->util_domain, attr, &util_attr, &av->util_av, context); if (ret) goto err1; av->size = av->util_av.count; av_attr = *attr; av_attr.type = FI_AV_TABLE; av_attr.count = 0; av_attr.flags = 0; ret = fi_av_open(domain->dg_domain, &av_attr, &av->dg_av, context); if (ret) goto err2; fastlock_init(&av->lock); av->addrlen = domain->addrlen; *av_fid = &av->util_av.av_fid; (*av_fid)->fid.fclass = FI_CLASS_AV; (*av_fid)->fid.ops = &rxd_av_fi_ops; (*av_fid)->ops = &rxd_av_ops; return 0; err2: ofi_av_close(&av->util_av); err1: free(av); return ret; }
int _gnix_notifier_init(void) { global_mr_not.fd = -1; global_mr_not.cntr = NULL; fastlock_init(&global_mr_not.lock); global_mr_not.ref_cnt = 0; return FI_SUCCESS; }
int ofi_wait_fd_open(struct fid_fabric *fabric_fid, struct fi_wait_attr *attr, struct fid_wait **waitset) { struct util_fabric *fabric; struct util_wait_fd *wait; int ret; fabric = container_of(fabric_fid, struct util_fabric, fabric_fid); ret = util_verify_wait_fd_attr(fabric->prov, attr); if (ret) return ret; wait = calloc(1, sizeof(*wait)); if (!wait) return -FI_ENOMEM; ret = fi_wait_init(fabric, attr, &wait->util_wait); if (ret) goto err1; wait->util_wait.signal = util_wait_fd_signal; wait->util_wait.wait_try = util_wait_fd_try; ret = fd_signal_init(&wait->signal); if (ret) goto err2; ret = fi_epoll_create(&wait->epoll_fd); if (ret) goto err3; ret = fi_epoll_add(wait->epoll_fd, wait->signal.fd[FI_READ_FD], FI_EPOLL_IN, &wait->util_wait.wait_fid.fid); if (ret) goto err4; wait->util_wait.wait_fid.fid.ops = &util_wait_fd_fi_ops; wait->util_wait.wait_fid.ops = &util_wait_fd_ops; dlist_init(&wait->fd_list); fastlock_init(&wait->lock); *waitset = &wait->util_wait.wait_fid; return 0; err4: fi_epoll_close(wait->epoll_fd); err3: fd_signal_free(&wait->signal); err2: fi_wait_cleanup(&wait->util_wait); err1: free(wait); return ret; }
static int util_domain_init(struct util_domain *domain, const struct fi_info *info) { atomic_initialize(&domain->ref, 0); fastlock_init(&domain->lock); domain->caps = info->caps; domain->mode = info->mode; domain->addr_format = info->addr_format; domain->av_type = info->domain_attr->av_type; domain->name = strdup(info->domain_attr->name); return domain->name ? 0 : -FI_ENOMEM; }
void ofi_monitor_add_queue(struct ofi_mem_monitor *monitor, struct ofi_notification_queue *nq) { fastlock_init(&nq->lock); dlist_init(&nq->list); fastlock_acquire(&nq->lock); nq->refcnt = 0; fastlock_release(&nq->lock); nq->monitor = monitor; ofi_atomic_inc32(&monitor->refcnt); }
int _gnix_buddy_allocator_create(void *base, uint32_t len, uint32_t max, gnix_buddy_alloc_handle_t **alloc_handle) { char err_buf[256] = {0}, *error = NULL; int fi_errno; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); /* Ensure parameters are valid */ if (unlikely(!base || !len || !max || max > len || !alloc_handle || IS_NOT_POW_TWO(max) || (len % max) || !(len / MIN_BLOCK_SIZE * 2))) { GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to _gnix_buddy_allocator_create." "\n"); return -FI_EINVAL; } *alloc_handle = calloc(1, sizeof(gnix_buddy_alloc_handle_t)); if (unlikely(!alloc_handle)) { error = strerror_r(errno, err_buf, sizeof(err_buf)); GNIX_WARN(FI_LOG_EP_CTRL, "Could not create buddy allocator handle.\n", error); return -FI_ENOMEM; } fastlock_init(&alloc_handle[0]->lock); alloc_handle[0]->base = base; alloc_handle[0]->len = len; alloc_handle[0]->max = max; if (__gnix_buddy_create_lists(alloc_handle[0])) { free(*alloc_handle); return -FI_ENOMEM; } /* The bitmap needs len / MIN_BLOCK_SIZE * 2 bits to flag every possible * block of size: min, min * 2, min * 4, ... , max that fits in the * base. block. The maximum number of bits used would be if max = len. */ if ((fi_errno = _gnix_alloc_bitmap(&alloc_handle[0]->bitmap, len / MIN_BLOCK_SIZE * 2))) { free(&alloc_handle[0]->lists); free(*alloc_handle); } return fi_errno; }
int ofi_endpoint_init(struct fid_domain *domain, const struct util_prov *util_prov, struct fi_info *info, struct util_ep *ep, void *context, ofi_ep_progress_func progress) { struct util_domain *util_domain; int ret; util_domain = container_of(domain, struct util_domain, domain_fid); if (!info || !info->ep_attr || !info->rx_attr || !info->tx_attr) return -FI_EINVAL; ret = ofi_prov_check_info(util_prov, util_domain->fabric->fabric_fid.api_version, info); if (ret) return ret; ep->ep_fid.fid.fclass = FI_CLASS_EP; ep->ep_fid.fid.context = context; ep->domain = util_domain; ep->caps = info->caps; ep->flags = 0; ep->progress = progress; ep->tx_op_flags = info->tx_attr->op_flags; ep->rx_op_flags = info->rx_attr->op_flags; ep->tx_msg_flags = 0; ep->rx_msg_flags = 0; ep->inject_op_flags = ((info->tx_attr->op_flags & ~(FI_COMPLETION | FI_INJECT_COMPLETE | FI_TRANSMIT_COMPLETE | FI_DELIVERY_COMPLETE)) | FI_INJECT); ep->tx_cntr_inc = ofi_cntr_inc_noop; ep->rx_cntr_inc = ofi_cntr_inc_noop; ep->rd_cntr_inc = ofi_cntr_inc_noop; ep->wr_cntr_inc = ofi_cntr_inc_noop; ep->rem_rd_cntr_inc = ofi_cntr_inc_noop; ep->rem_wr_cntr_inc = ofi_cntr_inc_noop; ep->type = info->ep_attr->type; ofi_atomic_inc32(&util_domain->ref); if (util_domain->eq) ofi_ep_bind_eq(ep, util_domain->eq); fastlock_init(&ep->lock); if (ep->domain->threading != FI_THREAD_SAFE) { ep->lock_acquire = ofi_fastlock_acquire_noop; ep->lock_release = ofi_fastlock_release_noop; } else { ep->lock_acquire = ofi_fastlock_acquire; ep->lock_release = ofi_fastlock_release; } return 0; }
static int psmx2_domain_init(struct psmx2_fid_domain *domain, struct psmx2_ep_name *src_addr) { int err; err = fastlock_init(&domain->mr_lock); if (err) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "fastlock_init(mr_lock) returns %d\n", err); goto err_out; } domain->mr_map = rbtNew(&psmx2_key_compare); if (!domain->mr_map) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "rbtNew failed\n"); goto err_out_destroy_mr_lock; } domain->mr_reserved_key = 1; domain->max_atomic_size = INT_MAX; ofi_atomic_initialize32(&domain->sep_cnt, 0); fastlock_init(&domain->sep_lock); dlist_init(&domain->sep_list); dlist_init(&domain->trx_ctxt_list); fastlock_init(&domain->trx_ctxt_lock); if (domain->progress_thread_enabled) psmx2_domain_start_progress(domain); return 0; err_out_destroy_mr_lock: fastlock_destroy(&domain->mr_lock); err_out: return err; }
int _gnix_notifier_init(struct gnix_mr_notifier *mrn) { if (mrn == NULL) { GNIX_INFO(FI_LOG_MR, "mr notifier NULL\n"); return -FI_EINVAL; } mrn->fd = 0; mrn->cntr = NULL; fastlock_init(&mrn->lock); return FI_SUCCESS; }
int sock_conn_map_init(struct sock_ep *ep, int init_size) { struct sock_conn_map *map = &ep->attr->cmap; map->table = calloc(init_size, sizeof(*map->table)); if (!map->table) return -FI_ENOMEM; if (fi_epoll_create(&map->epoll_set) < 0) { SOCK_LOG_ERROR("failed to create epoll set\n"); free(map->table); return -FI_ENOMEM; } fastlock_init(&map->lock); map->used = 0; map->size = init_size; return 0; }
static int util_av_init(struct util_av *av, const struct fi_av_attr *attr, const struct util_av_attr *util_attr) { int *entry, i, ret = 0; atomic_initialize(&av->ref, 0); fastlock_init(&av->lock); av->count = attr->count ? attr->count : UTIL_DEFAULT_AV_SIZE; av->count = roundup_power_of_two(av->count); av->addrlen = util_attr->addrlen; av->flags = util_attr->flags | attr->flags; FI_INFO(av->prov, FI_LOG_AV, "AV size %zu\n", av->count); /* TODO: Handle FI_READ */ /* TODO: Handle mmap - shared AV */ if (util_attr->flags & FI_SOURCE) { av->hash.slots = av->count; av->hash.total_count = av->count + util_attr->overhead; FI_INFO(av->prov, FI_LOG_AV, "FI_SOURCE requested, hash size %zu\n", av->hash.total_count); } av->data = malloc((av->count * util_attr->addrlen) + (av->hash.total_count * sizeof(*av->hash.table))); if (!av->data) return -FI_ENOMEM; for (i = 0; i < av->count - 1; i++) { entry = util_av_get_data(av, i); *entry = i + 1; } entry = util_av_get_data(av, av->count - 1); *entry = UTIL_NO_ENTRY; if (util_attr->flags & FI_SOURCE) { av->hash.table = util_av_get_data(av, av->count); util_av_hash_init(&av->hash); } return ret; }
DIRECT_FN int gnix_passive_ep_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_pep **pep, void *context) { struct gnix_fid_fabric *fabric_priv; struct gnix_fid_pep *pep_priv; if (!fabric || !info || !pep) return -FI_EINVAL; fabric_priv = container_of(fabric, struct gnix_fid_fabric, fab_fid); pep_priv = calloc(1, sizeof(*pep_priv)); if (!pep_priv) return -FI_ENOMEM; pep_priv->pep_fid.fid.fclass = FI_CLASS_PEP; pep_priv->pep_fid.fid.context = context; pep_priv->pep_fid.fid.ops = &gnix_pep_fi_ops; pep_priv->pep_fid.ops = &gnix_pep_ops_ep; pep_priv->pep_fid.cm = &gnix_pep_ops_cm; pep_priv->listen_fd = -1; pep_priv->backlog = 5; /* TODO set via fi_control parameter. */ pep_priv->fabric = fabric_priv; fastlock_init(&pep_priv->lock); if (info->src_addr) { pep_priv->bound = 1; memcpy(&pep_priv->src_addr, info->src_addr, sizeof(struct sockaddr_in)); } else pep_priv->bound = 0; _gnix_ref_init(&pep_priv->ref_cnt, 1, __pep_destruct); *pep = &pep_priv->pep_fid; GNIX_DEBUG(FI_LOG_EP_CTRL, "Opened PEP: %p\n", pep_priv); return FI_SUCCESS; }
int _gnix_sfl_init_ts(int elem_size, int offset, int init_size, int refill_size, int growth_factor, int max_refill_size, struct gnix_s_freelist *fl) { int ret; ret = _gnix_sfl_init(elem_size, offset, init_size, refill_size, growth_factor, max_refill_size, fl); if (ret == FI_SUCCESS) { fl->ts = 1; fastlock_init(&fl->lock); } return ret; }
static int util_eq_init(struct fid_fabric *fabric, struct util_eq *eq, const struct fi_eq_attr *attr) { struct fi_wait_attr wait_attr; struct fid_wait *wait; int ret; ofi_atomic_initialize32(&eq->ref, 0); slist_init(&eq->list); fastlock_init(&eq->lock); switch (attr->wait_obj) { case FI_WAIT_NONE: break; case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: memset(&wait_attr, 0, sizeof wait_attr); wait_attr.wait_obj = attr->wait_obj; eq->internal_wait = 1; ret = fi_wait_open(fabric, &wait_attr, &wait); if (ret) return ret; eq->wait = container_of(wait, struct util_wait, wait_fid); break; case FI_WAIT_SET: eq->wait = container_of(attr->wait_set, struct util_wait, wait_fid); break; default: assert(0); return -FI_EINVAL; } return 0; }
int gnix_ep_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { int ret = FI_SUCCESS; struct gnix_fid_domain *domain_priv; struct gnix_fid_ep *ep_priv; gnix_hashtable_attr_t gnix_ht_attr; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); if ((domain == NULL) || (info == NULL) || (ep == NULL)) return -FI_EINVAL; if (info->ep_attr->type != FI_EP_RDM) return -FI_ENOSYS; domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); ep_priv = calloc(1, sizeof *ep_priv); if (!ep_priv) return -FI_ENOMEM; ep_priv->ep_fid.fid.fclass = FI_CLASS_EP; ep_priv->ep_fid.fid.context = context; ep_priv->ep_fid.fid.ops = &gnix_ep_fi_ops; ep_priv->ep_fid.ops = &gnix_ep_ops; ep_priv->domain = domain_priv; ep_priv->type = info->ep_attr->type; fastlock_init(&ep_priv->vc_list_lock); dlist_init(&ep_priv->wc_vc_list); atomic_initialize(&ep_priv->active_fab_reqs, 0); atomic_initialize(&ep_priv->ref_cnt, 0); fastlock_init(&ep_priv->recv_queue_lock); slist_init(&ep_priv->unexp_recv_queue); slist_init(&ep_priv->posted_recv_queue); slist_init(&ep_priv->pending_recv_comp_queue); if (info->tx_attr) ep_priv->op_flags = info->tx_attr->op_flags; if (info->rx_attr) ep_priv->op_flags |= info->rx_attr->op_flags; ret = __fr_freelist_init(ep_priv); if (ret != FI_SUCCESS) { GNIX_ERR(FI_LOG_EP_CTRL, "Error allocating gnix_fab_req freelist (%s)", fi_strerror(-ret)); goto err1; } ep_priv->ep_fid.msg = &gnix_ep_msg_ops; ep_priv->ep_fid.rma = &gnix_ep_rma_ops; ep_priv->ep_fid.tagged = &gnix_ep_tagged_ops; ep_priv->ep_fid.atomic = NULL; ep_priv->ep_fid.cm = &gnix_cm_ops; /* * TODO, initialize vc hash table */ if (ep_priv->type == FI_EP_RDM) { ret = _gnix_cm_nic_alloc(domain_priv, &ep_priv->cm_nic); if (ret != FI_SUCCESS) goto err; gnix_ht_attr.ht_initial_size = domain_priv->params.ct_init_size; gnix_ht_attr.ht_maximum_size = domain_priv->params.ct_max_size; gnix_ht_attr.ht_increase_step = domain_priv->params.ct_step; gnix_ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT; gnix_ht_attr.ht_collision_thresh = 500; gnix_ht_attr.ht_hash_seed = 0xdeadbeefbeefdead; gnix_ht_attr.ht_internal_locking = 1; ep_priv->vc_ht = calloc(1, sizeof(struct gnix_hashtable)); if (ep_priv->vc_ht == NULL) goto err; ret = _gnix_ht_init(ep_priv->vc_ht, &gnix_ht_attr); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "gnix_ht_init call returned %d\n", ret); goto err; } } else { ep_priv->cm_nic = NULL; ep_priv->vc = NULL; } ep_priv->progress_fn = NULL; ep_priv->rx_progress_fn = NULL; ret = gnix_nic_alloc(domain_priv, &ep_priv->nic); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_nic_alloc call returned %d\n", ret); goto err; } /* * if smsg callbacks not present hook them up now */ if (ep_priv->nic->smsg_callbacks == NULL) ep_priv->nic->smsg_callbacks = gnix_ep_smsg_callbacks; atomic_inc(&domain_priv->ref_cnt); *ep = &ep_priv->ep_fid; return ret; err1: __fr_freelist_destroy(ep_priv); err: if (ep_priv->vc_ht != NULL) { _gnix_ht_destroy(ep_priv->vc_ht); /* may not be initialized but okay */ free(ep_priv->vc_ht); ep_priv->vc_ht = NULL; } if (ep_priv->cm_nic != NULL) ret = _gnix_cm_nic_free(ep_priv->cm_nic); free(ep_priv); return ret; }
static int gnix_cq_set_wait(struct gnix_fid_cq *cq) { int ret = FI_SUCCESS; GNIX_TRACE(FI_LOG_CQ, "\n"); struct fi_wait_attr requested = { .wait_obj = cq->attr.wait_obj, .flags = 0 }; switch (cq->attr.wait_obj) { case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: ret = gnix_wait_open(&cq->domain->fabric->fab_fid, &requested, &cq->wait); break; case FI_WAIT_SET: ret = _gnix_wait_set_add(cq->attr.wait_set, &cq->cq_fid.fid); if (!ret) cq->wait = cq->attr.wait_set; break; default: break; } return ret; } static void free_cq_entry(struct slist_entry *item) { struct gnix_cq_entry *entry; entry = container_of(item, struct gnix_cq_entry, item); free(entry->the_entry); free(entry); } static struct slist_entry *alloc_cq_entry(size_t size) { struct gnix_cq_entry *entry = malloc(sizeof(*entry)); if (!entry) { GNIX_DEBUG(FI_LOG_CQ, "out of memory\n"); goto err; } entry->the_entry = malloc(size); if (!entry->the_entry) { GNIX_DEBUG(FI_LOG_CQ, "out of memory\n"); goto cleanup; } return &entry->item; cleanup: free(entry); err: return NULL; } static int __gnix_cq_progress(struct gnix_fid_cq *cq) { return _gnix_prog_progress(&cq->pset); } /******************************************************************************* * Exposed helper functions ******************************************************************************/ ssize_t _gnix_cq_add_event(struct gnix_fid_cq *cq, struct gnix_fid_ep *ep, void *op_context, uint64_t flags, size_t len, void *buf, uint64_t data, uint64_t tag, fi_addr_t src_addr) { struct gnix_cq_entry *event; struct slist_entry *item; uint64_t mask; ssize_t ret = FI_SUCCESS; if (ep) { if (ep->info && ep->info->mode & FI_NOTIFY_FLAGS_ONLY) { mask = (FI_REMOTE_CQ_DATA | FI_MULTI_RECV); if (flags & FI_RMA_EVENT) { mask |= (FI_REMOTE_READ | FI_REMOTE_WRITE | FI_RMA); } flags &= mask; } } COND_ACQUIRE(cq->requires_lock, &cq->lock); item = _gnix_queue_get_free(cq->events); if (!item) { GNIX_DEBUG(FI_LOG_CQ, "error creating cq_entry\n"); ret = -FI_ENOMEM; goto err; } event = container_of(item, struct gnix_cq_entry, item); assert(event->the_entry); fill_function[cq->attr.format](event->the_entry, op_context, flags, len, buf, data, tag); event->src_addr = src_addr; _gnix_queue_enqueue(cq->events, &event->item); GNIX_DEBUG(FI_LOG_CQ, "Added event: %lx\n", op_context); if (cq->wait) _gnix_signal_wait_obj(cq->wait); err: COND_RELEASE(cq->requires_lock, &cq->lock); return ret; } ssize_t _gnix_cq_add_error(struct gnix_fid_cq *cq, void *op_context, uint64_t flags, size_t len, void *buf, uint64_t data, uint64_t tag, size_t olen, int err, int prov_errno, void *err_data, size_t err_data_size) { struct fi_cq_err_entry *error; struct gnix_cq_entry *event; struct slist_entry *item; ssize_t ret = FI_SUCCESS; GNIX_INFO(FI_LOG_CQ, "creating error event entry\n"); COND_ACQUIRE(cq->requires_lock, &cq->lock); item = _gnix_queue_get_free(cq->errors); if (!item) { GNIX_WARN(FI_LOG_CQ, "error creating error entry\n"); ret = -FI_ENOMEM; goto err; } event = container_of(item, struct gnix_cq_entry, item); error = event->the_entry; error->op_context = op_context; error->flags = flags; error->len = len; error->buf = buf; error->data = data; error->tag = tag; error->olen = olen; error->err = err; error->prov_errno = prov_errno; error->err_data = err_data; error->err_data_size = err_data_size; _gnix_queue_enqueue(cq->errors, &event->item); if (cq->wait) _gnix_signal_wait_obj(cq->wait); err: COND_RELEASE(cq->requires_lock, &cq->lock); return ret; } int _gnix_cq_poll_obj_add(struct gnix_fid_cq *cq, void *obj, int (*prog_fn)(void *data)) { return _gnix_prog_obj_add(&cq->pset, obj, prog_fn); } int _gnix_cq_poll_obj_rem(struct gnix_fid_cq *cq, void *obj, int (*prog_fn)(void *data)) { return _gnix_prog_obj_rem(&cq->pset, obj, prog_fn); } static void __cq_destruct(void *obj) { struct gnix_fid_cq *cq = (struct gnix_fid_cq *) obj; _gnix_ref_put(cq->domain); switch (cq->attr.wait_obj) { case FI_WAIT_NONE: break; case FI_WAIT_SET: _gnix_wait_set_remove(cq->wait, &cq->cq_fid.fid); break; case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: assert(cq->wait); gnix_wait_close(&cq->wait->fid); break; default: GNIX_WARN(FI_LOG_CQ, "format: %d unsupported.\n", cq->attr.wait_obj); break; } _gnix_prog_fini(&cq->pset); _gnix_queue_destroy(cq->events); _gnix_queue_destroy(cq->errors); fastlock_destroy(&cq->lock); free(cq->cq_fid.ops); free(cq->cq_fid.fid.ops); free(cq); } /******************************************************************************* * API functions. ******************************************************************************/ static int gnix_cq_close(fid_t fid) { struct gnix_fid_cq *cq; int references_held; GNIX_TRACE(FI_LOG_CQ, "\n"); cq = container_of(fid, struct gnix_fid_cq, cq_fid); references_held = _gnix_ref_put(cq); if (references_held) { GNIX_INFO(FI_LOG_CQ, "failed to fully close cq due to lingering " "references. references=%i cq=%p\n", references_held, cq); } return FI_SUCCESS; } static ssize_t __gnix_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr) { struct gnix_fid_cq *cq_priv; struct gnix_cq_entry *event; struct slist_entry *temp; ssize_t read_count = 0; if (!cq || !buf || !count) return -FI_EINVAL; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); __gnix_cq_progress(cq_priv); if (_gnix_queue_peek(cq_priv->errors)) return -FI_EAVAIL; COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock); while (_gnix_queue_peek(cq_priv->events) && count--) { temp = _gnix_queue_dequeue(cq_priv->events); event = container_of(temp, struct gnix_cq_entry, item); assert(event->the_entry); memcpy(buf, event->the_entry, cq_priv->entry_size); if (src_addr) memcpy(&src_addr[read_count], &event->src_addr, sizeof(fi_addr_t)); _gnix_queue_enqueue_free(cq_priv->events, &event->item); buf = (void *) ((uint8_t *) buf + cq_priv->entry_size); read_count++; } COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock); return read_count ?: -FI_EAGAIN; } static ssize_t __gnix_cq_sreadfrom(int blocking, struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr, const void *cond, int timeout) { struct gnix_fid_cq *cq_priv; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); if ((blocking && !cq_priv->wait) || (blocking && cq_priv->attr.wait_obj == FI_WAIT_SET)) return -FI_EINVAL; if (_gnix_queue_peek(cq_priv->errors)) return -FI_EAVAIL; if (cq_priv->wait) gnix_wait_wait((struct fid_wait *)cq_priv->wait, timeout); return __gnix_cq_readfrom(cq, buf, count, src_addr); } DIRECT_FN STATIC ssize_t gnix_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr, const void *cond, int timeout) { return __gnix_cq_sreadfrom(1, cq, buf, count, src_addr, cond, timeout); } DIRECT_FN STATIC ssize_t gnix_cq_read(struct fid_cq *cq, void *buf, size_t count) { return __gnix_cq_sreadfrom(0, cq, buf, count, NULL, NULL, 0); } DIRECT_FN STATIC ssize_t gnix_cq_sread(struct fid_cq *cq, void *buf, size_t count, const void *cond, int timeout) { return __gnix_cq_sreadfrom(1, cq, buf, count, NULL, cond, timeout); } DIRECT_FN STATIC ssize_t gnix_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr) { return __gnix_cq_sreadfrom(0, cq, buf, count, src_addr, NULL, 0); } DIRECT_FN STATIC ssize_t gnix_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, uint64_t flags) { struct gnix_fid_cq *cq_priv; struct gnix_cq_entry *event; struct slist_entry *entry; size_t err_data_cpylen; struct fi_cq_err_entry *gnix_cq_err; ssize_t read_count = 0; if (!cq || !buf) return -FI_EINVAL; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); /* * we need to progress cq. some apps may be only using * cq to check for errors. */ _gnix_prog_progress(&cq_priv->pset); COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock); entry = _gnix_queue_dequeue(cq_priv->errors); if (!entry) { read_count = -FI_EAGAIN; goto err; } event = container_of(entry, struct gnix_cq_entry, item); gnix_cq_err = event->the_entry; buf->op_context = gnix_cq_err->op_context; buf->flags = gnix_cq_err->flags; buf->len = gnix_cq_err->len; buf->buf = gnix_cq_err->buf; buf->data = gnix_cq_err->data; buf->tag = gnix_cq_err->tag; buf->olen = gnix_cq_err->olen; buf->err = gnix_cq_err->err; buf->prov_errno = gnix_cq_err->prov_errno; if (gnix_cq_err->err_data != NULL) { /* * Note: If the api version is >= 1.5 then copy err_data into * buf->err_data and copy at most buf->err_data_size. * If buf->err_data_size is zero or the api version is < 1.5, * use the old method of allocating space in provider. */ if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version, FI_VERSION(1, 5)) || buf->err_data_size == 0) { err_data_cpylen = sizeof(cq_priv->err_data); memcpy(cq_priv->err_data, gnix_cq_err->err_data, err_data_cpylen); buf->err_data = cq_priv->err_data; } else { if (buf->err_data == NULL) return -FI_EINVAL; err_data_cpylen = MIN(buf->err_data_size, gnix_cq_err->err_data_size); memcpy(buf->err_data, gnix_cq_err->err_data, err_data_cpylen); buf->err_data_size = err_data_cpylen; } free(gnix_cq_err->err_data); gnix_cq_err->err_data = NULL; } else { if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version, FI_VERSION(1, 5))) { buf->err_data = NULL; } else { buf->err_data_size = 0; } } _gnix_queue_enqueue_free(cq_priv->errors, &event->item); read_count++; err: COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock); return read_count; } DIRECT_FN STATIC const char *gnix_cq_strerror(struct fid_cq *cq, int prov_errno, const void *prov_data, char *buf, size_t len) { return NULL; } DIRECT_FN STATIC int gnix_cq_signal(struct fid_cq *cq) { struct gnix_fid_cq *cq_priv; cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); if (cq_priv->wait) _gnix_signal_wait_obj(cq_priv->wait); return FI_SUCCESS; } static int gnix_cq_control(struct fid *cq, int command, void *arg) { switch (command) { case FI_GETWAIT: return -FI_ENOSYS; default: return -FI_EINVAL; } } DIRECT_FN int gnix_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, struct fid_cq **cq, void *context) { struct gnix_fid_domain *domain_priv; struct gnix_fid_cq *cq_priv; struct fi_ops_cq *cq_ops; struct fi_ops *fi_cq_ops; int ret = FI_SUCCESS; GNIX_TRACE(FI_LOG_CQ, "\n"); cq_ops = calloc(1, sizeof(*cq_ops)); if (!cq_ops) { return -FI_ENOMEM; } fi_cq_ops = calloc(1, sizeof(*fi_cq_ops)); if (!fi_cq_ops) { ret = -FI_ENOMEM; goto free_cq_ops; } *cq_ops = gnix_cq_ops; *fi_cq_ops = gnix_cq_fi_ops; ret = verify_cq_attr(attr, cq_ops, fi_cq_ops); if (ret) goto free_fi_cq_ops; domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); if (!domain_priv) { ret = -FI_EINVAL; goto free_fi_cq_ops; } cq_priv = calloc(1, sizeof(*cq_priv)); if (!cq_priv) { ret = -FI_ENOMEM; goto free_fi_cq_ops; } cq_priv->requires_lock = (domain_priv->thread_model != FI_THREAD_COMPLETION); cq_priv->domain = domain_priv; cq_priv->attr = *attr; _gnix_ref_init(&cq_priv->ref_cnt, 1, __cq_destruct); _gnix_ref_get(cq_priv->domain); _gnix_prog_init(&cq_priv->pset); cq_priv->cq_fid.fid.fclass = FI_CLASS_CQ; cq_priv->cq_fid.fid.context = context; cq_priv->cq_fid.fid.ops = fi_cq_ops; cq_priv->cq_fid.ops = cq_ops; /* * Although we don't need to store entry_size since we're already * storing the format, this might provide a performance benefit * when allocating storage. */ cq_priv->entry_size = format_sizes[cq_priv->attr.format]; fastlock_init(&cq_priv->lock); ret = gnix_cq_set_wait(cq_priv); if (ret) goto free_cq_priv; ret = _gnix_queue_create(&cq_priv->events, alloc_cq_entry, free_cq_entry, cq_priv->entry_size, cq_priv->attr.size); if (ret) goto free_cq_priv; ret = _gnix_queue_create(&cq_priv->errors, alloc_cq_entry, free_cq_entry, sizeof(struct fi_cq_err_entry), 0); if (ret) goto free_gnix_queue; *cq = &cq_priv->cq_fid; return ret; free_gnix_queue: _gnix_queue_destroy(cq_priv->events); free_cq_priv: _gnix_ref_put(cq_priv->domain); fastlock_destroy(&cq_priv->lock); free(cq_priv); free_fi_cq_ops: free(fi_cq_ops); free_cq_ops: free(cq_ops); return ret; } /******************************************************************************* * FI_OPS_* data structures. ******************************************************************************/ static const struct fi_ops gnix_cq_fi_ops = { .size = sizeof(struct fi_ops), .close = gnix_cq_close, .bind = fi_no_bind, .control = gnix_cq_control, .ops_open = fi_no_ops_open }; static const struct fi_ops_cq gnix_cq_ops = { .size = sizeof(struct fi_ops_cq), .read = gnix_cq_read, .readfrom = gnix_cq_readfrom, .readerr = gnix_cq_readerr, .sread = gnix_cq_sread, .sreadfrom = gnix_cq_sreadfrom, .signal = gnix_cq_signal, .strerror = gnix_cq_strerror };
int fi_ibv_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, struct fid_eq **eq, void *context) { struct fi_ibv_eq *_eq; struct epoll_event event; int ret; _eq = calloc(1, sizeof *_eq); if (!_eq) return -ENOMEM; _eq->fab = container_of(fabric, struct fi_ibv_fabric, fabric_fid); fastlock_init(&_eq->lock); ret = dlistfd_head_init(&_eq->list_head); if (ret) { FI_INFO(&fi_ibv_prov, FI_LOG_EQ, "Unable to initialize dlistfd\n"); goto err1; } _eq->epfd = epoll_create1(0); if (_eq->epfd < 0) { ret = -errno; goto err2; } memset(&event, 0, sizeof(event)); event.events = EPOLLIN; if (epoll_ctl(_eq->epfd, EPOLL_CTL_ADD, _eq->list_head.signal.fd[FI_READ_FD], &event)) { ret = -errno; goto err3; } switch (attr->wait_obj) { case FI_WAIT_NONE: case FI_WAIT_UNSPEC: case FI_WAIT_FD: _eq->channel = rdma_create_event_channel(); if (!_eq->channel) { ret = -errno; goto err3; } ret = fi_fd_nonblock(_eq->channel->fd); if (ret) goto err4; if (epoll_ctl(_eq->epfd, EPOLL_CTL_ADD, _eq->channel->fd, &event)) { ret = -errno; goto err4; } break; default: ret = -FI_ENOSYS; goto err1; } _eq->flags = attr->flags; _eq->eq_fid.fid.fclass = FI_CLASS_EQ; _eq->eq_fid.fid.context = context; _eq->eq_fid.fid.ops = &fi_ibv_eq_fi_ops; _eq->eq_fid.ops = &fi_ibv_eq_ops; *eq = &_eq->eq_fid; return 0; err4: if (_eq->channel) rdma_destroy_event_channel(_eq->channel); err3: close(_eq->epfd); err2: dlistfd_head_free(&_eq->list_head); err1: fastlock_destroy(&_eq->lock); free(_eq); return ret; }
int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, struct fid_cntr **cntr, void *context) { int ret; struct sock_domain *dom; struct sock_cntr *_cntr; struct fi_wait_attr wait_attr; struct sock_fid_list *list_entry; struct sock_wait *wait; dom = container_of(domain, struct sock_domain, dom_fid); if (attr && sock_cntr_verify_attr(attr)) return -FI_ENOSYS; _cntr = calloc(1, sizeof(*_cntr)); if (!_cntr) return -FI_ENOMEM; ret = pthread_cond_init(&_cntr->cond, NULL); if (ret) goto err; if (attr == NULL) memcpy(&_cntr->attr, &sock_cntr_add, sizeof(sock_cntr_attr)); else memcpy(&_cntr->attr, attr, sizeof(sock_cntr_attr)); switch (_cntr->attr.wait_obj) { case FI_WAIT_NONE: case FI_WAIT_UNSPEC: case FI_WAIT_MUTEX_COND: _cntr->signal = 0; break; case FI_WAIT_FD: wait_attr.flags = 0; wait_attr.wait_obj = FI_WAIT_FD; ret = sock_wait_open(&dom->fab->fab_fid, &wait_attr, &_cntr->waitset); if (ret) { ret = FI_EINVAL; goto err; } _cntr->signal = 1; break; case FI_WAIT_SET: if (!attr) { ret = FI_EINVAL; goto err; } _cntr->waitset = attr->wait_set; _cntr->signal = 1; wait = container_of(attr->wait_set, struct sock_wait, wait_fid); list_entry = calloc(1, sizeof(*list_entry)); dlist_init(&list_entry->entry); list_entry->fid = &_cntr->cntr_fid.fid; dlist_insert_after(&list_entry->entry, &wait->fid_list); break; default: break; } pthread_mutex_init(&_cntr->mut, NULL); fastlock_init(&_cntr->list_lock); atomic_initialize(&_cntr->ref, 0); atomic_initialize(&_cntr->err_cnt, 0); atomic_initialize(&_cntr->value, 0); atomic_initialize(&_cntr->threshold, ~0); dlist_init(&_cntr->tx_list); dlist_init(&_cntr->rx_list); dlist_init(&_cntr->trigger_list); fastlock_init(&_cntr->trigger_lock); _cntr->cntr_fid.fid.fclass = FI_CLASS_CNTR; _cntr->cntr_fid.fid.context = context; _cntr->cntr_fid.fid.ops = &sock_cntr_fi_ops; _cntr->cntr_fid.ops = &sock_cntr_ops; atomic_inc(&dom->ref); _cntr->domain = dom; *cntr = &_cntr->cntr_fid; return 0; err: free(_cntr); return -ret; }
int sock_domain(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **dom, void *context) { int ret, flags; struct sock_domain *sock_domain; if(info && info->domain_attr){ ret = sock_verify_domain_attr(info->domain_attr); if(ret) return ret; } sock_domain = calloc(1, sizeof *sock_domain); if (!sock_domain) return -FI_ENOMEM; fastlock_init(&sock_domain->lock); atomic_init(&sock_domain->ref, 0); if(info && info->src_addr) { if (getnameinfo(info->src_addr, info->src_addrlen, NULL, 0, sock_domain->service, sizeof(sock_domain->service), NI_NUMERICSERV)) { SOCK_LOG_ERROR("could not resolve src_addr\n"); goto err; } sock_domain->info = *info; memcpy(&sock_domain->src_addr, info->src_addr, sizeof(struct sockaddr_in)); } else { SOCK_LOG_ERROR("invalid fi_info\n"); goto err; } sock_domain->dom_fid.fid.fclass = FI_CLASS_DOMAIN; sock_domain->dom_fid.fid.context = context; sock_domain->dom_fid.fid.ops = &sock_dom_fi_ops; sock_domain->dom_fid.ops = &sock_dom_ops; sock_domain->dom_fid.mr = &sock_dom_mr_ops; if (!info || !info->domain_attr || info->domain_attr->data_progress == FI_PROGRESS_UNSPEC) sock_domain->progress_mode = FI_PROGRESS_AUTO; else sock_domain->progress_mode = info->domain_attr->data_progress; sock_domain->pe = sock_pe_init(sock_domain); if(!sock_domain->pe){ SOCK_LOG_ERROR("Failed to init PE\n"); goto err; } sock_domain->ep_count = AF_INET; sock_domain->r_cmap.domain = sock_domain; fastlock_init(&sock_domain->r_cmap.lock); if(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_domain->signal_fds) < 0) goto err; flags = fcntl(sock_domain->signal_fds[1], F_GETFL, 0); fcntl(sock_domain->signal_fds[1], F_SETFL, flags | O_NONBLOCK); sock_conn_listen(sock_domain); while(!(volatile int)sock_domain->listening) pthread_yield(); *dom = &sock_domain->dom_fid; return 0; err: free(sock_domain); return -FI_EINVAL; }
static int psmx2_domain_init(struct psmx2_fid_domain *domain, struct psmx2_src_name *src_addr) { int err; psmx2_am_global_init(); psmx2_atomic_global_init(); domain->base_trx_ctxt = psmx2_trx_ctxt_alloc(domain, src_addr, -1); if (!domain->base_trx_ctxt) return -FI_ENODEV; err = fastlock_init(&domain->mr_lock); if (err) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "fastlock_init(mr_lock) returns %d\n", err); goto err_out_free_trx_ctxt; } domain->mr_map = rbtNew(&psmx2_key_compare); if (!domain->mr_map) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "rbtNew failed\n"); goto err_out_destroy_mr_lock; } domain->mr_reserved_key = 1; err = fastlock_init(&domain->vl_lock); if (err) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "fastlock_init(vl_lock) returns %d\n", err); goto err_out_delete_mr_map; } memset(domain->vl_map, 0, sizeof(domain->vl_map)); domain->vl_alloc = 0; ofi_atomic_initialize32(&domain->sep_cnt, 0); fastlock_init(&domain->sep_lock); dlist_init(&domain->sep_list); dlist_init(&domain->trx_ctxt_list); fastlock_init(&domain->trx_ctxt_lock); dlist_insert_before(&domain->base_trx_ctxt->entry, &domain->trx_ctxt_list); /* Set active domain before psmx2_domain_enable_ep() installs the * AM handlers to ensure that psmx2_active_fabric->active_domain * is always non-NULL inside the handlers. Notice that the vlaue * active_domain becomes NULL again only when the domain is closed. * At that time the AM handlers are gone with the PSM endpoint. */ domain->fabric->active_domain = domain; if (psmx2_domain_enable_ep(domain, NULL) < 0) goto err_out_reset_active_domain; if (domain->progress_thread_enabled) psmx2_domain_start_progress(domain); psmx2_am_init(domain->base_trx_ctxt); return 0; err_out_reset_active_domain: domain->fabric->active_domain = NULL; fastlock_destroy(&domain->vl_lock); err_out_delete_mr_map: rbtDelete(domain->mr_map); err_out_destroy_mr_lock: fastlock_destroy(&domain->mr_lock); err_out_free_trx_ctxt: psmx2_trx_ctxt_free(domain->base_trx_ctxt); return err; }
int psmx2_ep_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { struct psmx2_fid_domain *domain_priv; struct psmx2_fid_ep *ep_priv; struct psmx2_context *item; uint8_t vlane; uint64_t ep_cap; int err = -FI_EINVAL; int i; if (info) ep_cap = info->caps; else ep_cap = FI_TAGGED; domain_priv = container_of(domain, struct psmx2_fid_domain, domain.fid); if (!domain_priv) goto errout; err = psmx2_domain_check_features(domain_priv, ep_cap); if (err) goto errout; err = psmx2_alloc_vlane(domain_priv, &vlane); if (err) goto errout; ep_priv = (struct psmx2_fid_ep *) calloc(1, sizeof *ep_priv); if (!ep_priv) { err = -FI_ENOMEM; goto errout_free_vlane; } ep_priv->ep.fid.fclass = FI_CLASS_EP; ep_priv->ep.fid.context = context; ep_priv->ep.fid.ops = &psmx2_fi_ops; ep_priv->ep.ops = &psmx2_ep_ops; ep_priv->ep.cm = &psmx2_cm_ops; ep_priv->domain = domain_priv; ep_priv->vlane = vlane; PSMX2_CTXT_TYPE(&ep_priv->nocomp_send_context) = PSMX2_NOCOMP_SEND_CONTEXT; PSMX2_CTXT_EP(&ep_priv->nocomp_send_context) = ep_priv; PSMX2_CTXT_TYPE(&ep_priv->nocomp_recv_context) = PSMX2_NOCOMP_RECV_CONTEXT; PSMX2_CTXT_EP(&ep_priv->nocomp_recv_context) = ep_priv; if (ep_cap & FI_TAGGED) ep_priv->ep.tagged = &psmx2_tagged_ops; if (ep_cap & FI_MSG) ep_priv->ep.msg = &psmx2_msg_ops; if (ep_cap & FI_RMA) ep_priv->ep.rma = &psmx2_rma_ops; if (ep_cap & FI_ATOMICS) ep_priv->ep.atomic = &psmx2_atomic_ops; ep_priv->caps = ep_cap; err = psmx2_domain_enable_ep(domain_priv, ep_priv); if (err) goto errout_free_ep; psmx2_domain_acquire(domain_priv); domain_priv->eps[ep_priv->vlane] = ep_priv; if (info) { if (info->tx_attr) ep_priv->flags = info->tx_attr->op_flags; if (info->rx_attr) ep_priv->flags |= info->rx_attr->op_flags; } psmx2_ep_optimize_ops(ep_priv); slist_init(&ep_priv->free_context_list); fastlock_init(&ep_priv->context_lock); #define PSMX2_FREE_CONTEXT_LIST_SIZE 64 for (i=0; i<PSMX2_FREE_CONTEXT_LIST_SIZE; i++) { item = calloc(1, sizeof(*item)); if (!item) { FI_WARN(&psmx2_prov, FI_LOG_EP_CTRL, "out of memory.\n"); exit(-1); } slist_insert_tail(&item->list_entry, &ep_priv->free_context_list); } *ep = &ep_priv->ep; return 0; errout_free_ep: free(ep_priv); errout_free_vlane: psmx2_free_vlane(domain_priv, vlane); errout: return err; }
int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, struct fid_cq **cq, void *context) { struct sock_domain *sock_dom; struct sock_cq *sock_cq; struct fi_wait_attr wait_attr; struct sock_fid_list *list_entry; struct sock_wait *wait; int ret; sock_dom = container_of(domain, struct sock_domain, dom_fid); ret = sock_cq_verify_attr(attr); if (ret) return ret; sock_cq = calloc(1, sizeof(*sock_cq)); if (!sock_cq) return -FI_ENOMEM; atomic_initialize(&sock_cq->ref, 0); sock_cq->cq_fid.fid.fclass = FI_CLASS_CQ; sock_cq->cq_fid.fid.context = context; sock_cq->cq_fid.fid.ops = &sock_cq_fi_ops; sock_cq->cq_fid.ops = &sock_cq_ops; if (attr == NULL) sock_cq->attr = _sock_cq_def_attr; else { sock_cq->attr = *attr; if (attr->size == 0) sock_cq->attr.size = _sock_cq_def_attr.size; } sock_cq->domain = sock_dom; sock_cq->cq_entry_size = sock_cq_entry_size(sock_cq); sock_cq_set_report_fn(sock_cq); dlist_init(&sock_cq->tx_list); dlist_init(&sock_cq->rx_list); dlist_init(&sock_cq->ep_list); dlist_init(&sock_cq->overflow_list); if ((ret = rbfdinit(&sock_cq->cq_rbfd, sock_cq->attr.size * sock_cq->cq_entry_size))) goto err1; if ((ret = rbinit(&sock_cq->addr_rb, sock_cq->attr.size * sizeof(fi_addr_t)))) goto err2; if ((ret = rbinit(&sock_cq->cqerr_rb, sock_cq->attr.size * sizeof(struct fi_cq_err_entry)))) goto err3; fastlock_init(&sock_cq->lock); switch (sock_cq->attr.wait_obj) { case FI_WAIT_NONE: case FI_WAIT_UNSPEC: case FI_WAIT_FD: break; case FI_WAIT_MUTEX_COND: wait_attr.flags = 0; wait_attr.wait_obj = FI_WAIT_MUTEX_COND; ret = sock_wait_open(&sock_dom->fab->fab_fid, &wait_attr, &sock_cq->waitset); if (ret) { ret = -FI_EINVAL; goto err4; } sock_cq->signal = 1; break; case FI_WAIT_SET: if (!attr) { ret = -FI_EINVAL; goto err4; } sock_cq->waitset = attr->wait_set; sock_cq->signal = 1; wait = container_of(attr->wait_set, struct sock_wait, wait_fid); list_entry = calloc(1, sizeof(*list_entry)); dlist_init(&list_entry->entry); list_entry->fid = &sock_cq->cq_fid.fid; dlist_insert_after(&list_entry->entry, &wait->fid_list); break; default: break; } *cq = &sock_cq->cq_fid; atomic_inc(&sock_dom->ref); fastlock_init(&sock_cq->list_lock); return 0; err4: rbfree(&sock_cq->cqerr_rb); err3: rbfree(&sock_cq->addr_rb); err2: rbfdfree(&sock_cq->cq_rbfd); err1: free(sock_cq); return ret; }
void psmx_atomic_init(void) { fastlock_init(&psmx_atomic_lock); }
struct psmx2_trx_ctxt *psmx2_trx_ctxt_alloc(struct psmx2_fid_domain *domain, struct psmx2_src_name *src_addr, int sep_ctxt_idx) { struct psmx2_trx_ctxt *trx_ctxt; struct psm2_ep_open_opts opts; int should_retry = 0; int err; trx_ctxt = calloc(1, sizeof(*trx_ctxt)); if (!trx_ctxt) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "failed to allocate trx_ctxt.\n"); return NULL; } psm2_ep_open_opts_get_defaults(&opts); FI_INFO(&psmx2_prov, FI_LOG_CORE, "uuid: %s\n", psmx2_uuid_to_string(domain->fabric->uuid)); if (src_addr) { opts.unit = src_addr->unit; opts.port = src_addr->port; FI_INFO(&psmx2_prov, FI_LOG_CORE, "ep_open_opts: unit=%d port=%u\n", opts.unit, opts.port); } if (opts.unit < 0 && sep_ctxt_idx >= 0) { should_retry = 1; opts.unit = sep_ctxt_idx % psmx2_env.num_devunits; FI_INFO(&psmx2_prov, FI_LOG_CORE, "sep %d: ep_open_opts: unit=%d\n", sep_ctxt_idx, opts.unit); } err = psm2_ep_open(domain->fabric->uuid, &opts, &trx_ctxt->psm2_ep, &trx_ctxt->psm2_epid); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_ep_open returns %d, errno=%d\n", err, errno); if (!should_retry) { err = psmx2_errno(err); goto err_out; } /* When round-robin fails, retry w/o explicit assignment */ opts.unit = -1; err = psm2_ep_open(domain->fabric->uuid, &opts, &trx_ctxt->psm2_ep, &trx_ctxt->psm2_epid); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_ep_open returns %d, errno=%d\n", err, errno); err = psmx2_errno(err); goto err_out; } } FI_INFO(&psmx2_prov, FI_LOG_CORE, "epid: 0x%016lx\n", trx_ctxt->psm2_epid); err = psm2_mq_init(trx_ctxt->psm2_ep, PSM2_MQ_ORDERMASK_ALL, NULL, 0, &trx_ctxt->psm2_mq); if (err != PSM2_OK) { FI_WARN(&psmx2_prov, FI_LOG_CORE, "psm2_mq_init returns %d, errno=%d\n", err, errno); err = psmx2_errno(err); goto err_out_close_ep; } fastlock_init(&trx_ctxt->poll_lock); fastlock_init(&trx_ctxt->rma_queue.lock); fastlock_init(&trx_ctxt->trigger_queue.lock); slist_init(&trx_ctxt->rma_queue.list); slist_init(&trx_ctxt->trigger_queue.list); return trx_ctxt; err_out_close_ep: if (psm2_ep_close(trx_ctxt->psm2_ep, PSM2_EP_CLOSE_GRACEFUL, (int64_t) psmx2_env.timeout * 1000000000LL) != PSM2_OK) psm2_ep_close(trx_ctxt->psm2_ep, PSM2_EP_CLOSE_FORCE, 0); err_out: free(trx_ctxt); return NULL; }
int _gnix_mbox_allocator_create(struct gnix_nic *nic, gni_cq_handle_t cq_handle, enum gnix_page_size page_size, size_t mbox_size, size_t mpmmap, struct gnix_mbox_alloc_handle **alloc_handle) { struct gnix_mbox_alloc_handle *handle; char error_buf[256]; char *error; int ret; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); if (!nic || !mbox_size || !mpmmap || !alloc_handle) { GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to allocator_create.\n"); return -FI_EINVAL; } *alloc_handle = NULL; handle = calloc(1, sizeof(*handle)); if (!handle) { error = strerror_r(errno, error_buf, sizeof(error_buf)); GNIX_WARN(FI_LOG_EP_CTRL, "Error allocating alloc handle: %s\n", error); return -FI_ENOMEM; } handle->page_size = page_size * 1024 * 1024; handle->mbox_size = mbox_size; handle->mpmmap = mpmmap; handle->nic_handle = nic; handle->cq_handle = cq_handle; fastlock_init(&handle->lock); ret = __open_huge_page(handle); if (ret == FI_SUCCESS) { ret = __create_slab(handle); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "Slab creation failed.\n"); } } else { GNIX_WARN(FI_LOG_EP_CTRL, "Error opening huge page.\n"); } /* * try plan B - try to use anonymous mapping (base page size). * If a file was successfully opened, close fd and free filename * field in the handle. */ if ((ret != FI_SUCCESS) && (gnix_mbox_alloc_allow_fallback == true)) { if (handle->filename != NULL) { free(handle->filename); handle->filename = NULL; } if (handle->fd != -1) { ret = close(handle->fd); handle->fd = -1; if (ret) { GNIX_WARN(FI_LOG_EP_CTRL, "Error closing huge page - %d\n", ret); } } ret = __create_slab(handle); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "Slab(anon) creation failed.\n"); } } if (ret == FI_SUCCESS) { *alloc_handle = handle; } else { free(handle); } return ret; }