/* Create the tree */ int opal_rb_tree_init(opal_rb_tree_t * tree, opal_rb_tree_comp_fn_t comp) { opal_free_list_item_t * node; /* we need to get memory for the root pointer from the free list */ node = opal_free_list_get (&(tree->free_list)); tree->root_ptr = (opal_rb_tree_node_t *) node; if (NULL == node) { return OPAL_ERR_OUT_OF_RESOURCE; } node = opal_free_list_get (&(tree->free_list)); if (NULL == node) { opal_free_list_return (&tree->free_list, (opal_free_list_item_t*)tree->root_ptr); return OPAL_ERR_OUT_OF_RESOURCE; } tree->nill = (opal_rb_tree_node_t *) node; /* initialize tree->nill */ tree->nill->color = BLACK; tree->nill->left = tree->nill; tree->nill->right = tree->nill; tree->nill->parent = tree->nill; /* initialize the 'root' pointer */ tree->root_ptr->left = tree->nill; tree->root_ptr->right = tree->nill; tree->root_ptr->parent = tree->nill; tree->root_ptr->color = BLACK; tree->comp = comp; /* set the tree size to zero */ tree->tree_size = 0; return OPAL_SUCCESS; }
static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) { mca_btl_ugni_module_t *ugni_module = ep->btl; opal_free_list_item_t *mbox; mbox = opal_free_list_get (&ugni_module->smsg_mboxes); if (OPAL_UNLIKELY(NULL == mbox)) { return OPAL_ERR_OUT_OF_RESOURCE; } ep->mailbox = (mca_btl_ugni_smsg_mbox_t *) mbox; ep->mailbox->attr.index = ep->index; /* per ugni spec we need to zero mailbox data before connecting */ memset ((char *)ep->mailbox->attr.smsg_attr.msg_buffer + ep->mailbox->attr.smsg_attr.mbox_offset, 0, ep->mailbox->attr.smsg_attr.buff_size); return OPAL_SUCCESS; }
static void* getBuffer(JNIEnv *env, ompi_java_buffer_t **item, int size) { if(size > ompi_mpi_java_eager) { *item = NULL; return malloc(size); } else { opal_free_list_item_t *freeListItem; freeListItem = opal_free_list_get (&ompi_java_buffers); ompi_java_exceptionCheck(env, NULL == freeListItem ? MPI_ERR_NO_MEM : MPI_SUCCESS); if (NULL == freeListItem) { return NULL; } *item = (ompi_java_buffer_t*)freeListItem; return (*item)->buffer; } }
static int read_msg(void *start, ptl_size_t length, ptl_process_t target, ptl_match_bits_t match_bits, ptl_size_t remote_offset, ompi_mtl_portals4_recv_request_t *request) { int ret, i; ptl_size_t rest = length, asked = 0; int32_t frag_count; #if OMPI_MTL_PORTALS4_FLOW_CONTROL while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_progress(); } #endif frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl; ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count); for (i = 0 ; i < frag_count ; i++) { opal_free_list_item_t *tmp; ompi_mtl_portals4_rndv_get_frag_t* frag; tmp = opal_free_list_get (&ompi_mtl_portals4.fl_rndv_get_frag); if (NULL == tmp) return OMPI_ERR_OUT_OF_RESOURCE; frag = (ompi_mtl_portals4_rndv_get_frag_t*) tmp; frag->request = request; #if OPAL_ENABLE_DEBUG frag->frag_num = i; #endif frag->frag_start = (char*)start + i * ompi_mtl_portals4.max_msg_size_mtl; frag->frag_length = (OPAL_UNLIKELY(rest > ompi_mtl_portals4.max_msg_size_mtl)) ? ompi_mtl_portals4.max_msg_size_mtl : rest; frag->frag_target = target; frag->frag_match_bits = match_bits; frag->frag_remote_offset = remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl; frag->event_callback = ompi_mtl_portals4_rndv_get_frag_progress; frag->frag_abs_timeout_usec = 0; OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d, size %ld) send", i + 1, frag_count, frag->frag_length)); ret = PtlGet(ompi_mtl_portals4.send_md_h, (ptl_size_t) frag->frag_start, frag->frag_length, frag->frag_target, ompi_mtl_portals4.read_idx, frag->frag_match_bits, frag->frag_remote_offset, frag); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } rest -= frag->frag_length; asked += frag->frag_length; } return OMPI_SUCCESS; }
/* * get a tree item from the free list */ mca_mpool_base_tree_item_t* mca_mpool_base_tree_item_get(void) { return (mca_mpool_base_tree_item_t *) opal_free_list_get (&mca_mpool_base_tree_item_free_list); }
/* * This function opens and handle using the handle that was received * from the remote memory. It uses the addr and size of the remote * memory for caching the registration. */ int mca_rcache_rgpusm_register (mca_rcache_base_module_t *rcache, void *addr, size_t size, uint32_t flags, int32_t access_flags, mca_rcache_base_registration_t **reg) { mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache; mca_rcache_common_cuda_reg_t *rgpusm_reg; mca_rcache_common_cuda_reg_t *rget_reg; opal_free_list_item_t *item; int rc; int mypeer; /* just for debugging */ /* In order to preserve the signature of the mca_rcache_rgpusm_register * function, we are using the **reg variable to not only get back the * registration information, but to hand in the memory handle received * from the remote side. */ rget_reg = (mca_rcache_common_cuda_reg_t *)*reg; mypeer = flags; flags = 0; /* No need to support MCA_RCACHE_FLAGS_CACHE_BYPASS in here. It is not used. */ assert(0 == (flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)); /* This chunk of code handles the case where leave pinned is not * set and we do not use the cache. This is not typically how we * will be running. This means that one can have an unlimited * number of registrations occuring at the same time. Since we * are not leaving the registrations pinned, the number of * registrations is unlimited and there is no need for a cache. */ if(!mca_rcache_rgpusm_component.leave_pinned && 0 == mca_rcache_rgpusm_component.rcache_size_limit) { item = opal_free_list_get (&rcache_rgpusm->reg_list); if(NULL == item) { return OPAL_ERR_OUT_OF_RESOURCE; } rgpusm_reg = (mca_rcache_common_cuda_reg_t*)item; rgpusm_reg->base.rcache = rcache; rgpusm_reg->base.base = addr; rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;; rgpusm_reg->base.flags = flags; /* Copy the memory handle received into the registration */ memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle)); /* The rget_reg registration is holding the memory handle needed * to register the remote memory. This was received from the remote * process. A pointer to the memory is returned in the alloc_base field. */ rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg, (mca_rcache_base_registration_t *)rget_reg); /* This error should not happen with no cache in use. */ assert(OPAL_ERR_WOULD_BLOCK != rc); if(rc != OPAL_SUCCESS) { opal_free_list_return (&rcache_rgpusm->reg_list, item); return rc; } rgpusm_reg->base.ref_count++; *reg = (mca_rcache_base_registration_t *)rgpusm_reg; return OPAL_SUCCESS; } /* Check to see if memory is registered and stored in the cache. */ OPAL_THREAD_LOCK(&rcache->lock); mca_rcache_base_vma_find (rcache_rgpusm->vma_module, addr, size, reg); /* If *reg is not NULL, we have a registration. Let us see if the * memory handle matches the one we were looking for. If not, the * registration is invalid and needs to be removed. This happens * if memory was allocated, freed, and allocated again and ends up * with the same virtual address and within the limits of the * previous registration. The memory handle check will catch that * scenario as the handles have unique serial numbers. */ if (*reg != NULL) { rcache_rgpusm->stat_cache_hit++; opal_output_verbose(10, mca_rcache_rgpusm_component.output, "RGPUSM: Found addr=%p,size=%d (base=%p,size=%d) in cache", addr, (int)size, (*reg)->base, (int)((*reg)->bound - (*reg)->base)); if (mca_common_cuda_memhandle_matches((mca_rcache_common_cuda_reg_t *)*reg, rget_reg)) { /* Registration matches what was requested. All is good. */ rcache_rgpusm->stat_cache_valid++; } else { /* This is an old registration. Need to boot it. */ opal_output_verbose(10, mca_rcache_rgpusm_component.output, "RGPUSM: Mismatched Handle: Evicting/unregistering " "addr=%p,size=%d (base=%p,size=%d) from cache", addr, (int)size, (*reg)->base, (int)((*reg)->bound - (*reg)->base)); /* The ref_count has to be zero as this memory cannot possibly * be in use. Assert on that just to make sure. */ assert(0 == (*reg)->ref_count); if (mca_rcache_rgpusm_component.leave_pinned) { opal_list_remove_item(&rcache_rgpusm->lru_list, (opal_list_item_t*)(*reg)); } /* Bump the reference count to keep things copacetic in deregister */ (*reg)->ref_count++; /* Invalidate the registration so it will get booted out. */ (*reg)->flags |= MCA_RCACHE_FLAGS_INVALID; mca_rcache_rgpusm_deregister_no_lock(rcache, *reg); *reg = NULL; rcache_rgpusm->stat_cache_invalid++; } } else { /* Nothing was found in the cache. */ rcache_rgpusm->stat_cache_miss++; } /* If we have a registration here, then we know it is valid. */ if (*reg != NULL) { opal_output_verbose(10, mca_rcache_rgpusm_component.output, "RGPUSM: CACHE HIT is good: ep=%d, addr=%p, size=%d in cache", mypeer, addr, (int)size); /* When using leave pinned, we keep an LRU list. */ if ((0 == (*reg)->ref_count) && mca_rcache_rgpusm_component.leave_pinned) { opal_output_verbose(20, mca_rcache_rgpusm_component.output, "RGPUSM: POP OFF LRU: ep=%d, addr=%p, size=%d in cache", mypeer, addr, (int)size); opal_list_remove_item(&rcache_rgpusm->lru_list, (opal_list_item_t*)(*reg)); } (*reg)->ref_count++; OPAL_THREAD_UNLOCK(&rcache->lock); opal_output(-1, "reg->ref_count=%d", (int)(*reg)->ref_count); opal_output_verbose(80, mca_rcache_rgpusm_component.output, "RGPUSM: Found entry in cache addr=%p, size=%d", addr, (int)size); return OPAL_SUCCESS; } /* If we are here, then we did not find a registration, or it was invalid, * so this is a new one, and we are going to use the cache. */ assert(NULL == *reg); opal_output_verbose(10, mca_rcache_rgpusm_component.output, "RGPUSM: New registration ep=%d, addr=%p, size=%d. Need to register and insert in cache", mypeer, addr, (int)size); item = opal_free_list_get (&rcache_rgpusm->reg_list); if(NULL == item) { OPAL_THREAD_UNLOCK(&rcache->lock); return OPAL_ERR_OUT_OF_RESOURCE; } rgpusm_reg = (mca_rcache_common_cuda_reg_t*)item; rgpusm_reg->base.rcache = rcache; rgpusm_reg->base.base = addr; rgpusm_reg->base.bound = (unsigned char *)addr + size - 1; rgpusm_reg->base.flags = flags; /* Need the memory handle saved in the registration */ memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle)); /* Actually register the memory, which opens the memory handle. * Need to do this prior to putting in the cache as the base and * bound values may be changed by the registration. The memory * associated with the handle comes back in the alloc_base * value. */ rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg, (mca_rcache_base_registration_t *)rget_reg); /* There is a chance we can get the OPAL_ERR_WOULD_BLOCK from the * CUDA codes attempt to register the memory. The case that this * can happen is as follows. A block of memory is registered. * Then the sending side frees the memory. The sending side then * cuMemAllocs memory again and gets the same base * address. However, it cuMemAllocs a block that is larger than * the one in the cache. The cache will return that memory is not * registered and call into CUDA to register it. However, that * will fail with CUDA_ERROR_ALREADY_MAPPED. Therefore we need to * boot that previous allocation out and deregister it first. */ if (OPAL_ERR_WOULD_BLOCK == rc) { mca_rcache_base_registration_t *oldreg; /* Need to make sure it is at least 4 bytes in size This will * ensure we get the hit in the cache. */ mca_rcache_base_vma_find (rcache_rgpusm->vma_module, addr, 4, &oldreg); /* For most cases, we will find a registration that overlaps. * Removal of it should allow the registration we are * attempting to succeed. */ if (NULL != oldreg) { /* The ref_count has to be zero as this memory cannot * possibly be in use. Assert on that just to make sure. */ assert(0 == oldreg->ref_count); if (mca_rcache_rgpusm_component.leave_pinned) { opal_list_remove_item(&rcache_rgpusm->lru_list, (opal_list_item_t*)oldreg); } /* Bump the reference count to keep things copacetic in deregister */ oldreg->ref_count++; /* Invalidate the registration so it will get booted out. */ oldreg->flags |= MCA_RCACHE_FLAGS_INVALID; mca_rcache_rgpusm_deregister_no_lock(rcache, oldreg); rcache_rgpusm->stat_evicted++; /* And try again. This one usually works. */ rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg, (mca_rcache_base_registration_t *)rget_reg); } /* There is a chance that another registration is blocking our * ability to register. Check the rc to see if we still need * to try and clear out registrations. */ while (OPAL_SUCCESS != rc) { if (true != mca_rcache_rgpusm_deregister_lru(rcache)) { rc = OPAL_ERROR; break; } /* Clear out one registration. */ rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg, (mca_rcache_base_registration_t *)rget_reg); } } if(rc != OPAL_SUCCESS) { OPAL_THREAD_UNLOCK(&rcache->lock); opal_free_list_return (&rcache_rgpusm->reg_list, item); return rc; } opal_output_verbose(80, mca_rcache_rgpusm_component.output, "RGPUSM: About to insert in rgpusm cache addr=%p, size=%d", addr, (int)size); rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg, mca_rcache_rgpusm_component.rcache_size_limit); if (OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc) { opal_output_verbose(40, mca_rcache_rgpusm_component.output, "RGPUSM: No room in the cache - boot the first one out"); (void)mca_rcache_rgpusm_deregister_lru(rcache); if (mca_rcache_rgpusm_component.empty_cache) { int remNum = 1; /* Empty out every registration from LRU until it is empty */ opal_output_verbose(40, mca_rcache_rgpusm_component.output, "RGPUSM: About to delete all the unused entries in the cache"); while (mca_rcache_rgpusm_deregister_lru(rcache)) { remNum++; } opal_output_verbose(40, mca_rcache_rgpusm_component.output, "RGPUSM: Deleted and deregistered %d entries", remNum); rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg, mca_rcache_rgpusm_component.rcache_size_limit); } else { /* Check for room after one removal. If not, remove another one until there is space */ while((rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg, mca_rcache_rgpusm_component.rcache_size_limit)) == OPAL_ERR_TEMP_OUT_OF_RESOURCE) { opal_output_verbose(40, mca_rcache_rgpusm_component.output, "RGPUSM: No room in the cache - boot one out"); if (!mca_rcache_rgpusm_deregister_lru(rcache)) { break; } } } } if(rc != OPAL_SUCCESS) { OPAL_THREAD_UNLOCK(&rcache->lock); opal_free_list_return (&rcache_rgpusm->reg_list, item); /* We cannot recover from this. We can be here if the size of * the cache is smaller than the amount of memory we are * trying to register in a single transfer. In that case, rc * is MPI_ERR_OUT_OF_RESOURCES, but everything is stuck at * that point. Therefore, just error out completely. */ opal_output_verbose(10, mca_rcache_rgpusm_component.output, "RGPUSM: Failed to register addr=%p, size=%d", addr, (int)size); return OPAL_ERROR; } rgpusm_reg->base.ref_count++; *reg = (mca_rcache_base_registration_t *)rgpusm_reg; OPAL_THREAD_UNLOCK(&rcache->lock); return OPAL_SUCCESS; }
/* This inserts a node into the tree based on the passed values. */ int opal_rb_tree_insert(opal_rb_tree_t *tree, void * key, void * value) { opal_rb_tree_node_t * y; opal_rb_tree_node_t * node; opal_free_list_item_t * item; /* get the memory for a node */ item = opal_free_list_get (&tree->free_list); if (NULL == item) { return OPAL_ERR_OUT_OF_RESOURCE; } node = (opal_rb_tree_node_t *) item; /* insert the data into the node */ node->key = key; node->value = value; /* insert the node into the tree */ btree_insert(tree, node); /*do the rotations */ /* usually one would have to check for NULL, but because of the sentinal, * we don't have to */ while (node->parent->color == RED) { if (node->parent == node->parent->parent->left) { y = node->parent->parent->right; if (y->color == RED) { node->parent->color = BLACK; y->color = BLACK; node->parent->parent->color = RED; node = node->parent->parent; } else { if (node == node->parent->right) { node = node->parent; left_rotate(tree, node); } node->parent->color = BLACK; node->parent->parent->color = RED; right_rotate(tree, node->parent->parent); } } else { y = node->parent->parent->left; if (y->color == RED) { node->parent->color = BLACK; y->color = BLACK; node->parent->parent->color = RED; node = node->parent->parent; } else { if (node == node->parent->left) { node = node->parent; right_rotate(tree, node); } node->parent->color = BLACK; node->parent->parent->color = RED; left_rotate(tree, node->parent->parent); } } } /* after the rotations the root is black */ tree->root_ptr->left->color = BLACK; return OPAL_SUCCESS; }
static inline int ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode, ompi_mtl_portals4_isend_request_t* ptl_request) { int ret= OMPI_SUCCESS; void *start; size_t length; bool free_after; ptl_process_t ptl_proc; #if OMPI_MTL_PORTALS4_FLOW_CONTROL opal_free_list_item_t *item; ompi_mtl_portals4_pending_request_t *pending; #endif if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) { ptl_proc.rank = dest; } else { ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest); ptl_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); if (OMPI_SUCCESS != ret) return ret; ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1); ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->event_count = 0; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Send %lu to %x,%x of length %ld\n", ptl_request->opcount, ptl_proc.phys.nid, ptl_proc.phys.pid, (int64_t)length)); #if OMPI_MTL_PORTALS4_FLOW_CONTROL item = opal_free_list_get (&ompi_mtl_portals4.flowctl.pending_fl); if (NULL == item) return OMPI_ERR_OUT_OF_RESOURCE; pending = (ompi_mtl_portals4_pending_request_t*) item; ptl_request->pending = pending; pending->mode = mode; pending->start = start; pending->length = length; pending->contextid = comm->c_contextid; pending->tag = tag; pending->my_rank = comm->c_my_rank; pending->fc_notified = 0; pending->ptl_proc = ptl_proc; pending->ptl_request = ptl_request; if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); return OMPI_SUCCESS; } if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); ompi_mtl_portals4_pending_list_progress(); return OMPI_SUCCESS; } if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); return OMPI_SUCCESS; } #endif if (length <= ompi_mtl_portals4.eager_limit) { ret = ompi_mtl_portals4_short_isend(mode, start, length, comm->c_contextid, tag, comm->c_my_rank, ptl_proc, ptl_request); } else { ret = ompi_mtl_portals4_long_isend(start, length, comm->c_contextid, tag, comm->c_my_rank, ptl_proc, ptl_request); } return ret; }
/* * register memory */ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr, size_t size, uint32_t flags, mca_mpool_base_registration_t **reg) { mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool; const bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS); const bool persist = !!(flags & MCA_MPOOL_FLAGS_PERSIST); mca_mpool_base_registration_t *grdma_reg; opal_free_list_item_t *item; unsigned char *base, *bound; int rc; OPAL_THREAD_LOCK(&mpool->rcache->lock); /* if cache bypass is requested don't use the cache */ base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log); bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1), mca_mpool_base_page_size_log); if (!opal_list_is_empty (&mpool_grdma->pool->gc_list)) do_unregistration_gc(mpool); #if OPAL_CUDA_GDR_SUPPORT if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) { size_t psize; mca_common_cuda_get_address_range(&base, &psize, addr); bound = base + psize - 1; /* Check to see if this memory is in the cache and if it has been freed. If so, * this call will boot it out of the cache. */ check_for_cuda_freed_memory(mpool, base, psize); } #endif /* OPAL_CUDA_GDR_SUPPORT */ /* look through existing regs if not persistent registration requested. * Persistent registration are always registered and placed in the cache */ if(!(bypass_cache || persist)) { /* check to see if memory is registered */ mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg); if (*reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) { if (0 == (*reg)->ref_count) { /* Leave pinned must be set for this to still be in the rcache. */ opal_list_remove_item(&mpool_grdma->pool->lru_list, (opal_list_item_t *)(*reg)); } /* This segment fits fully within an existing segment. */ mpool_grdma->stat_cache_hit++; (*reg)->ref_count++; OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return OPAL_SUCCESS; } mpool_grdma->stat_cache_miss++; *reg = NULL; /* in case previous find found something */ /* Unless explicitly requested by the caller always store the * registration in the rcache. This will speed up the case where * no leave pinned protocol is in use but the same segment is in * use in multiple simultaneous transactions. We used to set bypass_cache * here is !mca_mpool_grdma_component.leave_pinned. */ } item = opal_free_list_get (&mpool_grdma->reg_list); if(NULL == item) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return OPAL_ERR_OUT_OF_RESOURCE; } grdma_reg = (mca_mpool_base_registration_t*)item; grdma_reg->mpool = mpool; grdma_reg->base = base; grdma_reg->bound = bound; grdma_reg->flags = flags; #if OPAL_CUDA_GDR_SUPPORT if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) { mca_common_cuda_get_buffer_id(grdma_reg); } #endif /* OPAL_CUDA_GDR_SUPPORT */ if (false == bypass_cache) { rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0); if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); opal_free_list_return (&mpool_grdma->reg_list, item); return rc; } } while (OPAL_ERR_OUT_OF_RESOURCE == (rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data, base, bound - base + 1, grdma_reg))) { /* try to remove one unused reg and retry */ if (!mca_mpool_grdma_evict (mpool)) { break; } } if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) { if (false == bypass_cache) { mpool->rcache->rcache_delete(mpool->rcache, grdma_reg); } OPAL_THREAD_UNLOCK(&mpool->rcache->lock); opal_free_list_return (&mpool_grdma->reg_list, item); return rc; } *reg = grdma_reg; (*reg)->ref_count++; OPAL_THREAD_UNLOCK(&mpool->rcache->lock); /* Cleanup any vmas that we have deferred deletion on */ mpool->rcache->rcache_clean(mpool->rcache); return OPAL_SUCCESS; }