Beispiel #1
0
int mca_pml_ob1_component_fini(void)
{
    int rc;

    /* Shutdown BML */
    if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize()))
        return rc;

    if(!mca_pml_ob1.enabled)
        return OMPI_SUCCESS; /* never selected.. return success.. */
    mca_pml_ob1.enabled = false;  /* not anymore */

    /* return the static receive/send requests to the respective free list and
     * let the free list handle destruction. */
    if( NULL != mca_pml_ob1_recvreq ) {
        opal_free_list_return (&mca_pml_base_recv_requests, (opal_free_list_item_t *) mca_pml_ob1_recvreq);
        mca_pml_ob1_recvreq = NULL;
    }

    if( NULL != mca_pml_ob1_sendreq ) {
        opal_free_list_return (&mca_pml_base_send_requests, (opal_free_list_item_t *) mca_pml_ob1_sendreq);
        mca_pml_ob1_sendreq = NULL;
    }

    OBJ_DESTRUCT(&mca_pml_ob1.rdma_pending);
    OBJ_DESTRUCT(&mca_pml_ob1.pckt_pending);
    OBJ_DESTRUCT(&mca_pml_ob1.recv_pending);
    OBJ_DESTRUCT(&mca_pml_ob1.send_pending);
    OBJ_DESTRUCT(&mca_pml_ob1.non_existing_communicator_pending);
    OBJ_DESTRUCT(&mca_pml_ob1.buffers);
    OBJ_DESTRUCT(&mca_pml_ob1.pending_pckts);
    OBJ_DESTRUCT(&mca_pml_ob1.recv_frags);
    OBJ_DESTRUCT(&mca_pml_ob1.rdma_frags);
    OBJ_DESTRUCT(&mca_pml_ob1.lock);
    OBJ_DESTRUCT(&mca_pml_ob1.send_ranges);

    if( NULL != mca_pml_ob1.allocator ) {
        (void)mca_pml_ob1.allocator->alc_finalize(mca_pml_ob1.allocator);
        mca_pml_ob1.allocator = NULL;
    }

#if 0
    if (mca_pml_base_send_requests.fl_num_allocated !=
        mca_pml_base_send_requests.super.opal_list_length) {
        opal_output(0, "ob1 send requests: %d allocated %d returned\n",
                    mca_pml_base_send_requests.fl_num_allocated,
                    mca_pml_base_send_requests.super.opal_list_length);
    }
    if (mca_pml_base_recv_requests.fl_num_allocated !=
        mca_pml_base_recv_requests.super.opal_list_length) {
        opal_output(0, "ob1 recv requests: %d allocated %d returned\n",
                    mca_pml_base_recv_requests.fl_num_allocated,
                    mca_pml_base_recv_requests.super.opal_list_length);
    }
#endif

    return OMPI_SUCCESS;
}
Beispiel #2
0
int mca_rcache_rgpusm_deregister_no_lock(struct mca_rcache_base_module_t *rcache,
                            mca_rcache_base_registration_t *reg)
{
    mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
    int rc = OPAL_SUCCESS;
    assert(reg->ref_count > 0);

    reg->ref_count--;
    opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count);
    if(reg->ref_count > 0) {
        return OPAL_SUCCESS;
    }
    if(mca_rcache_rgpusm_component.leave_pinned && registration_is_cachebale(reg))
    {
        /* if leave_pinned is set don't deregister memory, but put it
         * on LRU list for future use */
        opal_list_prepend(&rcache_rgpusm->lru_list, (opal_list_item_t*)reg);
    } else {
        /* Remove from rcache first */
        if(!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS))
            mca_rcache_base_vma_delete (rcache_rgpusm->vma_module, reg);

        assert(reg->ref_count == 0);
        rc = cuda_closememhandle (NULL, reg);

        if(OPAL_SUCCESS == rc) {
            opal_free_list_return (&rcache_rgpusm->reg_list,
                                   (opal_free_list_item_t*)reg);
        }
    }

    return rc;
}
Beispiel #3
0
/* Destroy the hashmap    */
int opal_rb_tree_destroy(opal_rb_tree_t *tree)
{
    opal_free_list_item_t * item;
    /* Recursive inorder traversal for delete    */

    inorder_destroy(tree, tree->root_ptr);
    /* Now free the root -- root does not get free'd in the above
     * inorder destroy    */
    item = (opal_free_list_item_t *) tree->root_ptr;
    opal_free_list_return(&(tree->free_list), item);

    /* free the tree->nill node */
    item = (opal_free_list_item_t *) tree->nill;
    opal_free_list_return (&(tree->free_list), item);
    return OPAL_SUCCESS;
}
static inline bool mca_mpool_rgpusm_deregister_lru (mca_mpool_base_module_t *mpool) {
    mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *) mpool;
    mca_mpool_base_registration_t *old_reg;
    int rc;

    /* Remove the registration from the cache and list before
       deregistering the memory */
    old_reg = (mca_mpool_base_registration_t*)
        opal_list_remove_first (&mpool_rgpusm->lru_list);
    if (NULL == old_reg) {
        return false;
    }

    mpool->rcache->rcache_delete(mpool->rcache, old_reg);

    /* Drop the rcache lock while we deregister the memory */
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
    assert(old_reg->ref_count == 0);
    rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data,
                                                old_reg);
    OPAL_THREAD_LOCK(&mpool->rcache->lock);

    /* This introduces a potential leak of registrations if
       the deregistration fails to occur as we no longer have
       a reference to it. Is this possible? */
    if (OPAL_SUCCESS != rc) {
        return false;
    }

    opal_free_list_return (&mpool_rgpusm->reg_list,
                           (opal_free_list_item_t*)old_reg);
    mpool_rgpusm->stat_evicted++;

    return true;
}
Beispiel #5
0
void mca_rcache_rgpusm_finalize(struct mca_rcache_base_module_t *rcache)
{
    mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
    mca_rcache_base_registration_t *reg;
    mca_rcache_base_registration_t *regs[RGPUSM_RCACHE_NREGS];
    int reg_cnt, i;
    int rc;

    /* Statistic */
    if(true == mca_rcache_rgpusm_component.print_stats) {
        opal_output(0, "%s rgpusm: stats "
                "(hit/valid/invalid/miss/evicted): %d/%d/%d/%d/%d\n",
                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                rcache_rgpusm->stat_cache_hit, rcache_rgpusm->stat_cache_valid,
                rcache_rgpusm->stat_cache_invalid, rcache_rgpusm->stat_cache_miss,
                rcache_rgpusm->stat_evicted);
    }

    OPAL_THREAD_LOCK(&rcache->lock);
    do {
        reg_cnt = mca_rcache_base_vma_find_all (rcache_rgpusm->vma_module, 0, (size_t)-1,
                regs, RGPUSM_RCACHE_NREGS);
        opal_output(-1, "Registration size at finalize = %d", reg_cnt);

        for(i = 0; i < reg_cnt; i++) {
            reg = regs[i];

            if(reg->ref_count) {
                reg->ref_count = 0; /* otherway dereg will fail on assert */
            } else if (mca_rcache_rgpusm_component.leave_pinned) {
                opal_list_remove_item(&rcache_rgpusm->lru_list,
                        (opal_list_item_t*)reg);
            }

            /* Remove from rcache first */
            mca_rcache_base_vma_delete (rcache_rgpusm->vma_module, reg);

            /* Drop lock before deregistering memory */
            OPAL_THREAD_UNLOCK(&rcache->lock);
            assert(reg->ref_count == 0);
            rc = cuda_closememhandle (NULL, reg);
            OPAL_THREAD_LOCK(&rcache->lock);

            if(rc != OPAL_SUCCESS) {
                /* Potentially lose track of registrations
                   do we have to put it back? */
                continue;
            }

            opal_free_list_return (&rcache_rgpusm->reg_list,
                                   (opal_free_list_item_t *) reg);
        }
    } while(reg_cnt == RGPUSM_RCACHE_NREGS);

    OBJ_DESTRUCT(&rcache_rgpusm->lru_list);
    OBJ_DESTRUCT(&rcache_rgpusm->reg_list);
    OPAL_THREAD_UNLOCK(&rcache->lock);
}
Beispiel #6
0
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect)
{
    mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
    mca_btl_ugni_device_t *device;
    int rc;

    if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
        /* nothing to do */
        return OPAL_SUCCESS;
    }

    device = ep->smsg_ep_handle.device;

    while (device->dev_smsg_local_cq.active_operations) {
        /* ensure all sends are complete before removing and procs */
        rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
        if (OPAL_SUCCESS != rc) {
            break;
        }
    }

    if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
        rc = mca_btl_ugni_ep_send_disconnect (ep);
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
            BTL_VERBOSE(("could not send disconnect message to peer"));
        }

        /* wait for the disconnect messagse to go */
        do {
            /* ensure all sends are complete before removing and procs */
            rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
            if (OPAL_SUCCESS != rc) {
                break;
            }
        } while (device->dev_smsg_local_cq.active_operations);

        (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, -1);
    }

    mca_btl_ugni_device_lock (device);

    /* NTH: this call may not need the device lock. seems to work without it but
     * the lock is here to be safe. */
    (void) mca_btl_ugni_ep_handle_cleanup (&ep->smsg_ep_handle);

    mca_btl_ugni_device_unlock (device);

    if (ep->mailbox) {
        opal_free_list_return (&ugni_module->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
        ep->mailbox = NULL;
    }

    ep->state = MCA_BTL_UGNI_EP_STATE_INIT;

    return OPAL_SUCCESS;
}
Beispiel #7
0
static void releaseBuffer(void *ptr, ompi_java_buffer_t *item)
{
    if(item == NULL)
    {
        free(ptr);
    }
    else
    {
        assert(item->buffer == ptr);
        opal_free_list_return (&ompi_java_buffers, (opal_free_list_item_t*)item);
    }
}
int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool,
                            mca_mpool_base_registration_t *reg)
{
    mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool;
    int rc = OPAL_SUCCESS;
    assert(reg->ref_count > 0);

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    reg->ref_count--;
    opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count);
    if(reg->ref_count > 0) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return OPAL_SUCCESS;
    }
    if(mca_mpool_rgpusm_component.leave_pinned && registration_is_cachebale(reg))
    {
        /* if leave_pinned is set don't deregister memory, but put it
         * on LRU list for future use */
        opal_list_prepend(&mpool_rgpusm->lru_list, (opal_list_item_t*)reg);
    } else {
        /* Remove from rcache first */
        if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
            mpool->rcache->rcache_delete(mpool->rcache, reg);

        /* Drop the rcache lock before deregistring the memory */
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

        {
             mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *)mpool;

             assert(reg->ref_count == 0);
             rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data,
                                                         reg);
         }

        OPAL_THREAD_LOCK(&mpool->rcache->lock);

        if(OPAL_SUCCESS == rc) {
            opal_free_list_return (&mpool_rgpusm->reg_list,
                                   (opal_free_list_item_t*)reg);
        }
    }
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);

    return rc;
}
Beispiel #9
0
int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool,
                            mca_mpool_base_registration_t *reg)
{
    mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool;
    int rc = OPAL_SUCCESS;
    assert(reg->ref_count > 0);

    opal_mutex_lock (&mpool->rcache->lock);
    reg->ref_count--;
    opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count);
    if(reg->ref_count > 0) {
        opal_mutex_unlock (&mpool->rcache->lock);
        return OPAL_SUCCESS;
    }
    if(mca_mpool_rgpusm_component.leave_pinned && registration_is_cacheable(reg))
    {
        /* if leave_pinned is set don't deregister memory, but put it
         * on LRU list for future use */
        opal_output_verbose(20, mca_mpool_rgpusm_component.output,
                            "RGPUSM: Deregister: addr=%p, size=%d: cacheable and pinned, leave in cache, PUSH IN LRU",
                            reg->base, (int)(reg->bound - reg->base + 1));
        opal_list_prepend(&mpool_rgpusm->lru_list, (opal_list_item_t*)reg);
    } else {
        /* Remove from rcache first */
        if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
            mpool->rcache->rcache_delete(mpool->rcache, reg);

        /* Drop the rcache lock before deregistring the memory */
        opal_mutex_unlock (&mpool->rcache->lock);

        {
             mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *)mpool;

             assert(reg->ref_count == 0);
             rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data,
                                                         reg);
         }

        opal_mutex_lock (&mpool->rcache->lock);

        if(OPAL_SUCCESS == rc) {
            opal_free_list_return (&mpool_rgpusm->reg_list,
                                   (opal_free_list_item_t*)reg);
        }
    }
    opal_mutex_unlock (&mpool->rcache->lock);

    return rc;
}
Beispiel #10
0
static void
inorder_destroy(opal_rb_tree_t *tree, opal_rb_tree_node_t * node)
{
    opal_free_list_item_t * item;

    if (node == tree->nill) {
        return;
    }

    inorder_destroy(tree, node->left);

    if (node->left != tree->nill) {
        item = (opal_free_list_item_t *) node->left;
        --tree->tree_size;
        opal_free_list_return (&tree->free_list, item);
    }

    inorder_destroy(tree, node->right);
    if (node->right != tree->nill) {
        item = (opal_free_list_item_t *) node->right;
        --tree->tree_size;
        opal_free_list_return (&tree->free_list, item);
    }
}
Beispiel #11
0
/* Delete a node from the tree based on the key */
int opal_rb_tree_delete(opal_rb_tree_t *tree, void *key)
{
    opal_rb_tree_node_t * p;
    opal_rb_tree_node_t * todelete;
    opal_rb_tree_node_t * y;
    opal_free_list_item_t * item;

    p = opal_rb_tree_find_node(tree, key);
    if (NULL == p) {
        return OPAL_ERR_NOT_FOUND;
    }
    if ((p->left == tree->nill) || (p->right == tree->nill)) {
        todelete = p;
    } else {
        todelete = btree_successor(tree, p);
    }

    if (todelete->left == tree->nill) {
        y = todelete->right;
    } else {
        y = todelete->left;
    }

    y->parent = todelete->parent;

    if (y->parent == tree->root_ptr) {
        tree->root_ptr->left = y;
    } else {
        if (todelete == todelete->parent->left) {
         todelete->parent->left = y;
        } else {
            todelete->parent->right = y;
        }
    }

    if (todelete != p) {
        p->key = todelete->key;
        p->value = todelete->value;
    }

    if (todelete->color == BLACK) {
        btree_delete_fixup(tree, y);
    }
    item = (opal_free_list_item_t *) todelete;
    opal_free_list_return (&(tree->free_list), item);
    --tree->tree_size;
    return OPAL_SUCCESS;
}
Beispiel #12
0
static int mca_spml_ikrit_put_request_free(struct oshmem_request_t** request)
{
    mca_spml_ikrit_put_request_t *put_req =
            *(mca_spml_ikrit_put_request_t **) request;

    assert(false == put_req->req_put.req_base.req_free_called);
    OPAL_THREAD_LOCK(&oshmem_request_lock);
    put_req->req_put.req_base.req_free_called = true;
    opal_free_list_return (&mca_spml_base_put_requests,
                           (opal_free_list_item_t*)put_req);
    OPAL_THREAD_UNLOCK(&oshmem_request_lock);

    *request = SHMEM_REQUEST_NULL; /*MPI_REQUEST_NULL;*/

    return OSHMEM_SUCCESS;
}
static int frag_send_cb (ompi_request_t *request)
{
    ompi_osc_pt2pt_frag_t *frag =
        (ompi_osc_pt2pt_frag_t*) request->req_complete_cb_data;
    ompi_osc_pt2pt_module_t *module = frag->module;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "osc pt2pt: frag_send complete to %d, frag = %p, request = %p",
                         frag->target, (void *) frag, (void *) request));

    mark_outgoing_completion(module);
    opal_free_list_return (&mca_osc_pt2pt_component.frags, &frag->super);

    ompi_request_free (&request);

    return 1;
}
Beispiel #14
0
static int mca_spml_ikrit_get_request_free(struct oshmem_request_t** request)
{
    mca_spml_ikrit_get_request_t *get_req =
            *(mca_spml_ikrit_get_request_t **) request;

    OPAL_THREAD_LOCK(&oshmem_request_lock);
    assert(false == get_req->req_get.req_base.req_free_called);
    get_req->req_get.req_base.req_free_called = true;
    opal_free_list_return (&mca_spml_base_get_requests,
                           (opal_free_list_item_t*)get_req);
    opal_memchecker_base_mem_noaccess(get_req, sizeof(*get_req));
    OPAL_THREAD_UNLOCK(&oshmem_request_lock);

    *request = SHMEM_REQUEST_NULL; /*MPI_REQUEST_NULL;*/

    return OSHMEM_SUCCESS;
}
Beispiel #15
0
int mca_rcache_rgpusm_deregister(struct mca_rcache_base_module_t *rcache,
                            mca_rcache_base_registration_t *reg)
{
    mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
    int rc = OPAL_SUCCESS;
    assert(reg->ref_count > 0);

    OPAL_THREAD_LOCK(&rcache->lock);
    reg->ref_count--;
    opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count);
    if(reg->ref_count > 0) {
        OPAL_THREAD_UNLOCK(&rcache->lock);
        return OPAL_SUCCESS;
    }
    if(mca_rcache_rgpusm_component.leave_pinned && registration_is_cachebale(reg))
    {
        /* if leave_pinned is set don't deregister memory, but put it
         * on LRU list for future use */
        opal_output_verbose(20, mca_rcache_rgpusm_component.output,
                            "RGPUSM: Deregister: addr=%p, size=%d: cacheable and pinned, leave in cache, PUSH IN LRU",
                            reg->base, (int)(reg->bound - reg->base + 1));
        opal_list_prepend(&rcache_rgpusm->lru_list, (opal_list_item_t*)reg);
    } else {
        /* Remove from rcache first */
        if(!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS))
            mca_rcache_base_vma_delete (rcache_rgpusm->vma_module, reg);

        /* Drop the rcache lock before deregistring the memory */
        OPAL_THREAD_UNLOCK(&rcache->lock);

        {
             assert(reg->ref_count == 0);
             rc = cuda_closememhandle (NULL, reg);
         }

        OPAL_THREAD_LOCK(&rcache->lock);

        if(OPAL_SUCCESS == rc) {
            opal_free_list_return (&rcache_rgpusm->reg_list,
                                   (opal_free_list_item_t*)reg);
        }
    }
    OPAL_THREAD_UNLOCK(&rcache->lock);

    return rc;
}
Beispiel #16
0
static inline int dereg_mem(mca_mpool_base_registration_t *reg)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) reg->mpool;
    int rc;

    if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
        reg->mpool->rcache->rcache_delete(reg->mpool->rcache, reg);

    /* Drop the rcache lock before deregistring the memory */
    OPAL_THREAD_UNLOCK(&reg->mpool->rcache->lock);
    rc = mpool_grdma->resources.deregister_mem(mpool_grdma->resources.reg_data,
                                               reg);
    OPAL_THREAD_LOCK(&reg->mpool->rcache->lock);

    if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
        opal_free_list_return (&mpool_grdma->reg_list,
                               (opal_free_list_item_t *) reg);
    }

    return rc;
}
Beispiel #17
0
static inline bool mca_mpool_rgpusm_deregister_lru (mca_mpool_base_module_t *mpool) {
    mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *) mpool;
    mca_mpool_base_registration_t *old_reg;
    int rc;

    /* Remove the registration from the cache and list before
       deregistering the memory */
    old_reg = (mca_mpool_base_registration_t*)
        opal_list_remove_first (&mpool_rgpusm->lru_list);
    if (NULL == old_reg) {
        opal_output_verbose(10, mca_mpool_rgpusm_component.output,
                            "RGPUSM: The LRU list is empty. There is nothing to deregister");
        return false;
    }

    mpool->rcache->rcache_delete(mpool->rcache, old_reg);

    /* Drop the rcache lock while we deregister the memory */
    opal_mutex_unlock (&mpool->rcache->lock);
    assert(old_reg->ref_count == 0);
    rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data,
                                                old_reg);
    opal_mutex_lock (&mpool->rcache->lock);

    /* This introduces a potential leak of registrations if
       the deregistration fails to occur as we no longer have
       a reference to it. Is this possible? */
    if (OPAL_SUCCESS != rc) {
        opal_output_verbose(10, mca_mpool_rgpusm_component.output,
                            "RGPUSM: Failed to deregister the memory addr=%p, size=%d",
                            old_reg->base, (int)(old_reg->bound - old_reg->base + 1));
        return false;
    }

    opal_free_list_return (&mpool_rgpusm->reg_list,
                           (opal_free_list_item_t*)old_reg);
    mpool_rgpusm->stat_evicted++;

    return true;
}
Beispiel #18
0
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) {
    gni_return_t rc;

    if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
        /* nothing to do */
        return OPAL_SUCCESS;
    }

    if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
        OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
        rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1,
                               MCA_BTL_UGNI_TAG_DISCONNECT);
        OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
        if (GNI_RC_SUCCESS != rc) {
            BTL_VERBOSE(("btl/ugni could not send close message"));
        }

        /* we might want to wait for local completion here (do we even care), yes we do */
        /* TODO: FIX FIX FIX */

    }

    /* TODO: FIX GROSS */
    OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
    (void) opal_common_ugni_ep_destroy (&ep->smsg_ep_handle);
    (void) opal_common_ugni_ep_destroy (&ep->rdma_ep_handle);
    OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);

    if (ep->mailbox) {
        opal_free_list_return (&ep->btl->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
        ep->mailbox = NULL;
    }

    ep->state = MCA_BTL_UGNI_EP_STATE_INIT;
    (void) opal_atomic_add_64 (&ep->btl->connected_peer_count, -11);

    return OPAL_SUCCESS;
}
Beispiel #19
0
int mca_mpool_rgpusm_deregister_no_lock(struct mca_mpool_base_module_t *mpool,
                            mca_mpool_base_registration_t *reg)
{
    mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool;
    int rc = OPAL_SUCCESS;
    assert(reg->ref_count > 0);

    reg->ref_count--;
    opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count);
    if(reg->ref_count > 0) {
        return OPAL_SUCCESS;
    }
    if(mca_mpool_rgpusm_component.leave_pinned && registration_is_cachebale(reg))
    {
        /* if leave_pinned is set don't deregister memory, but put it
         * on LRU list for future use */
        opal_list_prepend(&mpool_rgpusm->lru_list, (opal_list_item_t*)reg);
    } else {
        /* Remove from rcache first */
        if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
            mpool->rcache->rcache_delete(mpool->rcache, reg);

        {
             mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *)mpool;

             assert(reg->ref_count == 0);
             rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data,
                                                         reg);
         }

        if(OPAL_SUCCESS == rc) {
            opal_free_list_return (&mpool_rgpusm->reg_list,
                                   (opal_free_list_item_t*)reg);
        }
    }

    return rc;
}
Beispiel #20
0
/* Create the tree */
int opal_rb_tree_init(opal_rb_tree_t * tree,
                      opal_rb_tree_comp_fn_t comp)
{
    opal_free_list_item_t * node;
    /* we need to get memory for the root pointer from the free list */
    node = opal_free_list_get (&(tree->free_list));
    tree->root_ptr = (opal_rb_tree_node_t *) node;
    if (NULL == node) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    node = opal_free_list_get (&(tree->free_list));
    if (NULL == node) {
        opal_free_list_return (&tree->free_list, (opal_free_list_item_t*)tree->root_ptr);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    tree->nill = (opal_rb_tree_node_t *) node;
    /* initialize tree->nill */
    tree->nill->color = BLACK;
    tree->nill->left = tree->nill;
    tree->nill->right = tree->nill;
    tree->nill->parent = tree->nill;

    /* initialize the 'root' pointer */
    tree->root_ptr->left = tree->nill;
    tree->root_ptr->right = tree->nill;
    tree->root_ptr->parent = tree->nill;
    tree->root_ptr->color = BLACK;

    tree->comp = comp;

    /* set the tree size to zero */
    tree->tree_size = 0;

    return OPAL_SUCCESS;
}
Beispiel #21
0
static inline void free_put_req(mca_spml_ikrit_put_request_t *put_req)
{
    opal_free_list_return (&mca_spml_base_put_requests,
                           (opal_free_list_item_t*)put_req);
    opal_memchecker_base_mem_noaccess(put_req, sizeof(*put_req));
}
Beispiel #22
0
static inline int
ompi_mtl_portals4_callback(ptl_event_t *ev,
                           ompi_mtl_portals4_base_request_t* ptl_base_request,
                           bool *complete)
{
    int retval = OMPI_SUCCESS, ret, val, add = 1;
    ompi_mtl_portals4_isend_request_t* ptl_request =
        (ompi_mtl_portals4_isend_request_t*) ptl_base_request;

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    if (OPAL_UNLIKELY(ev->ni_fail_type == PTL_NI_PT_DISABLED)) {
        ompi_mtl_portals4_pending_request_t *pending =
            ptl_request->pending;

        OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output,
                             "send %lu hit flow control (%d)",
                             ptl_request->opcount, ev->type));

        /* BWB: FIX ME: this is a hack.. */
        if (pending->fc_notified) {
            return OMPI_SUCCESS;
        }
        pending->fc_notified = 1;

        if (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE)) {
            ret = PtlMEUnlink(ptl_request->me_h);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: send callback PtlMEUnlink returned %d",
                                    __FILE__, __LINE__, ret);
            }
        }

        opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
                         &pending->super.super);
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        ompi_mtl_portals4_flowctl_trigger();

        return OMPI_SUCCESS;
    }
#endif

    if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: send callback ni_fail_type: %d",
                            __FILE__, __LINE__, ev->ni_fail_type);
        *complete = true;
        return OMPI_ERROR;
    }

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "send %lu got event of type %d",
                         ptl_request->opcount, ev->type));

    if ((PTL_EVENT_ACK == ev->type) &&
        (PTL_PRIORITY_LIST == ev->ptl_list) &&
        (eager == ompi_mtl_portals4.protocol) &&
        (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE))) {
        /* long expected messages with the eager protocol won't see a
           get event to complete the message.  Give them an extra
           count to cause the message to complete with just the SEND
           and ACK events and remove the ME. (we wait for the counter
           to reach 3 events, but short messages start the counter at
           1, so they don't need to enter this path) */
        ret = PtlMEUnlink(ptl_request->me_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: send callback PtlMEUnlink returned %d",
                                __FILE__, __LINE__, ret);
        }
        add++;
    }
    val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add);

    assert(val <= 3);

    if (val == 3) {
        if (NULL != ptl_request->buffer_ptr) {
            free(ptl_request->buffer_ptr);
        }

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "send %lu completed",
                             ptl_request->opcount));

        *complete = true;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        opal_free_list_return (&ompi_mtl_portals4.flowctl.pending_fl,
                               &ptl_request->pending->super);

        if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
            ompi_mtl_portals4_pending_list_progress();
        }
#endif
    }

    return retval;
}
Beispiel #23
0
int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) {

    int ret = 0;
    int events_read;
    int events = 0;
    struct fi_cq_entry cq_entry[MCA_BTL_OFI_DEFAULT_MAX_CQE];
    struct fi_cq_err_entry cqerr = {0};

    mca_btl_ofi_completion_context_t *c_ctx;
    mca_btl_ofi_base_completion_t *comp;
    mca_btl_ofi_rdma_completion_t *rdma_comp;
    mca_btl_ofi_frag_completion_t *frag_comp;

    ret = fi_cq_read(context->cq, &cq_entry, mca_btl_ofi_component.num_cqe_read);

    if (0 < ret) {
        events_read = ret;
        for (int i = 0; i < events_read; i++) {
            if (NULL != cq_entry[i].op_context) {
                ++events;

                c_ctx = (mca_btl_ofi_completion_context_t*) cq_entry[i].op_context;

                /* We are casting to every type  here just for simplicity. */
                comp = (mca_btl_ofi_base_completion_t*) c_ctx->comp;
                frag_comp = (mca_btl_ofi_frag_completion_t*) c_ctx->comp;
                rdma_comp = (mca_btl_ofi_rdma_completion_t*) c_ctx->comp;

                switch (comp->type) {
                case MCA_BTL_OFI_TYPE_GET:
                case MCA_BTL_OFI_TYPE_PUT:
                case MCA_BTL_OFI_TYPE_AOP:
                case MCA_BTL_OFI_TYPE_AFOP:
                case MCA_BTL_OFI_TYPE_CSWAP:
                    /* call the callback */
                    if (rdma_comp->cbfunc) {
                        rdma_comp->cbfunc (comp->btl, comp->endpoint,
                                           rdma_comp->local_address, rdma_comp->local_handle,
                                           rdma_comp->cbcontext, rdma_comp->cbdata, OPAL_SUCCESS);
                    }

                    MCA_BTL_OFI_NUM_RDMA_DEC((mca_btl_ofi_module_t*) comp->btl);
                    break;

                case MCA_BTL_OFI_TYPE_RECV:
                    mca_btl_ofi_recv_frag((mca_btl_ofi_module_t*)  comp->btl,
                                          (mca_btl_ofi_endpoint_t*) comp->endpoint,
                                          context, frag_comp->frag);
                    break;

                case MCA_BTL_OFI_TYPE_SEND:
                    MCA_BTL_OFI_NUM_SEND_DEC((mca_btl_ofi_module_t*) comp->btl);
                    mca_btl_ofi_frag_complete(frag_comp->frag, OPAL_SUCCESS);
                    break;

                default:
                    /* catasthrophic */
                    BTL_ERROR(("unknown completion type"));
                    MCA_BTL_OFI_ABORT();
                }

                /* return the completion handler */
                opal_free_list_return(comp->my_list, (opal_free_list_item_t*) comp);
            }
        }
    } else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) {
        ret = fi_cq_readerr(context->cq, &cqerr, 0);

        /* cq readerr failed!? */
        if (0 > ret) {
            BTL_ERROR(("%s:%d: Error returned from fi_cq_readerr: %s(%d)",
                       __FILE__, __LINE__, fi_strerror(-ret), ret));
        } else {
            BTL_ERROR(("fi_cq_readerr: (provider err_code = %d)\n",
                       cqerr.prov_errno));
        }
        MCA_BTL_OFI_ABORT();
    }
#ifdef FI_EINTR
    /* sometimes, sockets provider complain about interupt. We do nothing. */
    else if (OPAL_UNLIKELY(ret == -FI_EINTR)) {

    }
#endif
    /* If the error is not FI_EAGAIN, report the error and abort. */
    else if (OPAL_UNLIKELY(ret != -FI_EAGAIN)) {
        BTL_ERROR(("fi_cq_read returned error %d:%s", ret, fi_strerror(-ret)));
        MCA_BTL_OFI_ABORT();
    }

    return events;
}
Beispiel #24
0
/*
 * put an item back into the free list
 */
void mca_mpool_base_tree_item_put(mca_mpool_base_tree_item_t* item) {
    opal_free_list_return (&mca_mpool_base_tree_item_free_list,
                           &item->super);
}
Beispiel #25
0
static int
ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev,
                                         ompi_mtl_portals4_rndv_get_frag_t* rndv_get_frag)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request =
        (ompi_mtl_portals4_recv_request_t*) rndv_get_frag->request;

    assert(PTL_EVENT_REPLY == ev->type);

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
        "Recv %lu (0x%lx) got reply event",
        ptl_request->opcount, ptl_request->hdr_data));


    if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
                            __FILE__, __LINE__, ev->ni_fail_type);

        if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_DROPPED)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "PTL_EVENT_REPLY with ni_fail_type: %u => cannot retry",
                                (uint32_t)ev->ni_fail_type);
            ret = PTL_FAIL;
            goto callback_error;
        }

        if (0 == rndv_get_frag->frag_abs_timeout_usec) {
            /* this is the first retry of the frag.  start the timer. */
            /* instead of recording the start time, record the end time
             * and avoid addition on each retry. */
            rndv_get_frag->frag_abs_timeout_usec = opal_timer_base_get_usec() + ompi_mtl_portals4.get_retransmit_timeout;
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "setting frag timeout at %lu",
                                rndv_get_frag->frag_abs_timeout_usec);
        } else if (opal_timer_base_get_usec() >= rndv_get_frag->frag_abs_timeout_usec) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "timeout retrying GET");
            ret = PTL_FAIL;
            goto callback_error;
        }

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
            "Rendezvous Get Failed: Reissuing frag #%u", rndv_get_frag->frag_num));

        ret = PtlGet(ompi_mtl_portals4.send_md_h,
                     (ptl_size_t) rndv_get_frag->frag_start,
                     rndv_get_frag->frag_length,
                     rndv_get_frag->frag_target,
                     ompi_mtl_portals4.read_idx,
                     rndv_get_frag->frag_match_bits,
                     rndv_get_frag->frag_remote_offset,
                     rndv_get_frag);
        if (OPAL_UNLIKELY(PTL_OK != ret)) {
            if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
            goto callback_error;
        }
        return OMPI_SUCCESS;
    }

    /* set the received length in the status, now that we know
           exactly how much data was sent. */
    ptl_request->super.super.ompi_req->req_status._ucount += ev->mlength;

    /* this frag is complete.  return to freelist. */
    opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag,
                           &rndv_get_frag->super);

    ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1);
    if (ret > 0) {
        return OMPI_SUCCESS;
    }
    assert(ptl_request->pending_reply == 0);

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
#endif

    /* make sure the data is in the right place.  Use _ucount for
           the total length because it will be set correctly for all
           three protocols. mlength is only correct for eager, and
           delivery_len is the length of the buffer, not the length of
           the send. */
    ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                   ptl_request->delivery_ptr,
                                   ptl_request->super.super.ompi_req->req_status._ucount);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                            __FILE__, __LINE__, ret);
        ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
    }

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
        "Recv %lu (0x%lx) completed , reply (pending_reply: %d)",
        ptl_request->opcount, ptl_request->hdr_data, ptl_request->pending_reply));
    ptl_request->super.super.completion_callback(&ptl_request->super.super);

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR =
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}
Beispiel #26
0
/*
 * register memory
 */
int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
                              size_t size, uint32_t flags,
                              mca_mpool_base_registration_t **reg)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
    const bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
    const bool persist = !!(flags & MCA_MPOOL_FLAGS_PERSIST);
    mca_mpool_base_registration_t *grdma_reg;
    opal_free_list_item_t *item;
    unsigned char *base, *bound;
    int rc;

    OPAL_THREAD_LOCK(&mpool->rcache->lock);

    /* if cache bypass is requested don't use the cache */
    base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
                                            mca_mpool_base_page_size_log);
    if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
        do_unregistration_gc(mpool);

#if OPAL_CUDA_GDR_SUPPORT
    if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
        size_t psize;
        mca_common_cuda_get_address_range(&base, &psize, addr);
        bound = base + psize - 1;
        /* Check to see if this memory is in the cache and if it has been freed. If so,
         * this call will boot it out of the cache. */
        check_for_cuda_freed_memory(mpool, base, psize);
    }
#endif /* OPAL_CUDA_GDR_SUPPORT */

    /* look through existing regs if not persistent registration requested.
     * Persistent registration are always registered and placed in the cache */
    if(!(bypass_cache || persist)) {
        /* check to see if memory is registered */
        mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg);
        if (*reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) {
            if (0 == (*reg)->ref_count) {
                /* Leave pinned must be set for this to still be in the rcache. */
                opal_list_remove_item(&mpool_grdma->pool->lru_list,
                                      (opal_list_item_t *)(*reg));
            }

            /* This segment fits fully within an existing segment. */
            mpool_grdma->stat_cache_hit++;
            (*reg)->ref_count++;
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return OPAL_SUCCESS;
        }

        mpool_grdma->stat_cache_miss++;
        *reg = NULL; /* in case previous find found something */

        /* Unless explicitly requested by the caller always store the
         * registration in the rcache. This will speed up the case where
         * no leave pinned protocol is in use but the same segment is in
         * use in multiple simultaneous transactions. We used to set bypass_cache
         * here is !mca_mpool_grdma_component.leave_pinned. */
    }

    item = opal_free_list_get (&mpool_grdma->reg_list);
    if(NULL == item) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    grdma_reg = (mca_mpool_base_registration_t*)item;

    grdma_reg->mpool = mpool;
    grdma_reg->base = base;
    grdma_reg->bound = bound;
    grdma_reg->flags = flags;
#if OPAL_CUDA_GDR_SUPPORT
    if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
        mca_common_cuda_get_buffer_id(grdma_reg);
    }
#endif /* OPAL_CUDA_GDR_SUPPORT */

    if (false == bypass_cache) {
        rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);

        if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            opal_free_list_return (&mpool_grdma->reg_list, item);
            return rc;
        }
    }

    while (OPAL_ERR_OUT_OF_RESOURCE ==
           (rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data,
                                                     base, bound - base + 1, grdma_reg))) {
        /* try to remove one unused reg and retry */
        if (!mca_mpool_grdma_evict (mpool)) {
            break;
        }
    }

    if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
        if (false == bypass_cache) {
            mpool->rcache->rcache_delete(mpool->rcache, grdma_reg);
        }
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        opal_free_list_return (&mpool_grdma->reg_list, item);
        return rc;
    }

    *reg = grdma_reg;
    (*reg)->ref_count++;
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);
    return OPAL_SUCCESS;
}
Beispiel #27
0
static void coll_handle_free(void *handle) {
    ompi_request_t *ompi_req = (ompi_request_t *)handle;
    opal_free_list_return (&mca_coll_hcoll_component.requests,
                           (opal_free_list_item_t *)ompi_req);
}
Beispiel #28
0
/*
 * This function opens and handle using the handle that was received
 * from the remote memory.  It uses the addr and size of the remote
 * memory for caching the registration.
 */
int mca_rcache_rgpusm_register (mca_rcache_base_module_t *rcache, void *addr,
                               size_t size, uint32_t flags, int32_t access_flags,
                               mca_rcache_base_registration_t **reg)
{
    mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
    mca_rcache_common_cuda_reg_t *rgpusm_reg;
    mca_rcache_common_cuda_reg_t *rget_reg;
    opal_free_list_item_t *item;
    int rc;
    int mypeer;  /* just for debugging */

    /* In order to preserve the signature of the mca_rcache_rgpusm_register
     * function, we are using the **reg variable to not only get back the
     * registration information, but to hand in the memory handle received
     * from the remote side. */
    rget_reg = (mca_rcache_common_cuda_reg_t *)*reg;

    mypeer = flags;
    flags = 0;
    /* No need to support MCA_RCACHE_FLAGS_CACHE_BYPASS in here. It is not used. */
    assert(0 == (flags & MCA_RCACHE_FLAGS_CACHE_BYPASS));

    /* This chunk of code handles the case where leave pinned is not
     * set and we do not use the cache.  This is not typically how we
     * will be running.  This means that one can have an unlimited
     * number of registrations occuring at the same time.  Since we
     * are not leaving the registrations pinned, the number of
     * registrations is unlimited and there is no need for a cache. */
    if(!mca_rcache_rgpusm_component.leave_pinned && 0 == mca_rcache_rgpusm_component.rcache_size_limit) {
        item = opal_free_list_get (&rcache_rgpusm->reg_list);
        if(NULL == item) {
            return OPAL_ERR_OUT_OF_RESOURCE;
        }
        rgpusm_reg = (mca_rcache_common_cuda_reg_t*)item;
        rgpusm_reg->base.rcache = rcache;
        rgpusm_reg->base.base = addr;
        rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;;
        rgpusm_reg->base.flags = flags;

        /* Copy the memory handle received into the registration */
        memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle));

        /* The rget_reg registration is holding the memory handle needed
         * to register the remote memory.  This was received from the remote
         * process.  A pointer to the memory is returned in the alloc_base field. */
        rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
                                 (mca_rcache_base_registration_t *)rget_reg);

        /* This error should not happen with no cache in use. */
        assert(OPAL_ERR_WOULD_BLOCK != rc);

        if(rc != OPAL_SUCCESS) {
            opal_free_list_return (&rcache_rgpusm->reg_list, item);
            return rc;
        }
        rgpusm_reg->base.ref_count++;
        *reg = (mca_rcache_base_registration_t *)rgpusm_reg;
        return OPAL_SUCCESS;
    }

    /* Check to see if memory is registered and stored in the cache. */
    OPAL_THREAD_LOCK(&rcache->lock);
    mca_rcache_base_vma_find (rcache_rgpusm->vma_module, addr, size, reg);

    /* If *reg is not NULL, we have a registration.  Let us see if the
     * memory handle matches the one we were looking for.  If not, the
     * registration is invalid and needs to be removed. This happens
     * if memory was allocated, freed, and allocated again and ends up
     * with the same virtual address and within the limits of the
     * previous registration.  The memory handle check will catch that
     * scenario as the handles have unique serial numbers.  */
    if (*reg != NULL) {
        rcache_rgpusm->stat_cache_hit++;
        opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                            "RGPUSM: Found addr=%p,size=%d (base=%p,size=%d) in cache",
                            addr, (int)size, (*reg)->base,
                            (int)((*reg)->bound - (*reg)->base));

        if (mca_common_cuda_memhandle_matches((mca_rcache_common_cuda_reg_t *)*reg, rget_reg)) {
            /* Registration matches what was requested.  All is good. */
            rcache_rgpusm->stat_cache_valid++;
        } else {
            /* This is an old registration.  Need to boot it. */
            opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                                "RGPUSM: Mismatched Handle: Evicting/unregistering "
                                "addr=%p,size=%d (base=%p,size=%d) from cache",
                                addr, (int)size, (*reg)->base,
                                (int)((*reg)->bound - (*reg)->base));

            /* The ref_count has to be zero as this memory cannot possibly
             * be in use.  Assert on that just to make sure. */
            assert(0 == (*reg)->ref_count);
            if (mca_rcache_rgpusm_component.leave_pinned) {
                opal_list_remove_item(&rcache_rgpusm->lru_list,
                                      (opal_list_item_t*)(*reg));
            }

            /* Bump the reference count to keep things copacetic in deregister */
            (*reg)->ref_count++;
            /* Invalidate the registration so it will get booted out. */
            (*reg)->flags |= MCA_RCACHE_FLAGS_INVALID;
            mca_rcache_rgpusm_deregister_no_lock(rcache, *reg);
            *reg = NULL;
            rcache_rgpusm->stat_cache_invalid++;
        }
    } else {
        /* Nothing was found in the cache. */
        rcache_rgpusm->stat_cache_miss++;
    }

    /* If we have a registration here, then we know it is valid. */
    if (*reg != NULL) {
        opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                            "RGPUSM: CACHE HIT is good: ep=%d, addr=%p, size=%d in cache",
                            mypeer, addr, (int)size);

        /* When using leave pinned, we keep an LRU list. */
        if ((0 == (*reg)->ref_count) && mca_rcache_rgpusm_component.leave_pinned) {
            opal_output_verbose(20, mca_rcache_rgpusm_component.output,
                                "RGPUSM: POP OFF LRU: ep=%d, addr=%p, size=%d in cache",
                                mypeer, addr, (int)size);
            opal_list_remove_item(&rcache_rgpusm->lru_list,
                                  (opal_list_item_t*)(*reg));
        }
        (*reg)->ref_count++;
        OPAL_THREAD_UNLOCK(&rcache->lock);
        opal_output(-1, "reg->ref_count=%d", (int)(*reg)->ref_count);
        opal_output_verbose(80, mca_rcache_rgpusm_component.output,
                           "RGPUSM: Found entry in cache addr=%p, size=%d", addr, (int)size);
        return OPAL_SUCCESS;
    }

    /* If we are here, then we did not find a registration, or it was invalid,
     * so this is a new one, and we are going to use the cache. */
    assert(NULL == *reg);
    opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                        "RGPUSM: New registration ep=%d, addr=%p, size=%d. Need to register and insert in cache",
                         mypeer, addr, (int)size);

    item = opal_free_list_get (&rcache_rgpusm->reg_list);
    if(NULL == item) {
        OPAL_THREAD_UNLOCK(&rcache->lock);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    rgpusm_reg = (mca_rcache_common_cuda_reg_t*)item;

    rgpusm_reg->base.rcache = rcache;
    rgpusm_reg->base.base = addr;
    rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;
    rgpusm_reg->base.flags = flags;

    /* Need the memory handle saved in the registration */
    memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle));

    /* Actually register the memory, which opens the memory handle.
     * Need to do this prior to putting in the cache as the base and
     * bound values may be changed by the registration.  The memory
     * associated with the handle comes back in the alloc_base
     * value. */
    rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
                             (mca_rcache_base_registration_t *)rget_reg);
    /* There is a chance we can get the OPAL_ERR_WOULD_BLOCK from the
     * CUDA codes attempt to register the memory.  The case that this
     * can happen is as follows.  A block of memory is registered.
     * Then the sending side frees the memory.  The sending side then
     * cuMemAllocs memory again and gets the same base
     * address. However, it cuMemAllocs a block that is larger than
     * the one in the cache.  The cache will return that memory is not
     * registered and call into CUDA to register it.  However, that
     * will fail with CUDA_ERROR_ALREADY_MAPPED.  Therefore we need to
     * boot that previous allocation out and deregister it first.
     */
    if (OPAL_ERR_WOULD_BLOCK == rc) {
        mca_rcache_base_registration_t *oldreg;

        /* Need to make sure it is at least 4 bytes in size  This will
         * ensure we get the hit in the cache. */
        mca_rcache_base_vma_find (rcache_rgpusm->vma_module, addr, 4, &oldreg);

        /* For most cases, we will find a registration that overlaps.
         * Removal of it should allow the registration we are
         * attempting to succeed. */
        if (NULL != oldreg) {
            /* The ref_count has to be zero as this memory cannot
             * possibly be in use.  Assert on that just to make sure. */
            assert(0 == oldreg->ref_count);
            if (mca_rcache_rgpusm_component.leave_pinned) {
                opal_list_remove_item(&rcache_rgpusm->lru_list,
                                      (opal_list_item_t*)oldreg);
            }

            /* Bump the reference count to keep things copacetic in deregister */
            oldreg->ref_count++;
            /* Invalidate the registration so it will get booted out. */
            oldreg->flags |= MCA_RCACHE_FLAGS_INVALID;
            mca_rcache_rgpusm_deregister_no_lock(rcache, oldreg);
            rcache_rgpusm->stat_evicted++;

            /* And try again.  This one usually works. */
            rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
                                     (mca_rcache_base_registration_t *)rget_reg);
        }

        /* There is a chance that another registration is blocking our
         * ability to register.  Check the rc to see if we still need
         * to try and clear out registrations. */
        while (OPAL_SUCCESS != rc) {
            if (true != mca_rcache_rgpusm_deregister_lru(rcache)) {
                rc = OPAL_ERROR;
                break;
            }
            /* Clear out one registration. */
            rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
                                     (mca_rcache_base_registration_t *)rget_reg);
        }
    }

    if(rc != OPAL_SUCCESS) {
        OPAL_THREAD_UNLOCK(&rcache->lock);
        opal_free_list_return (&rcache_rgpusm->reg_list, item);
        return rc;
    }

    opal_output_verbose(80, mca_rcache_rgpusm_component.output,
                        "RGPUSM: About to insert in rgpusm cache addr=%p, size=%d", addr, (int)size);
    rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg,
                                      mca_rcache_rgpusm_component.rcache_size_limit);
    if (OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc) {
        opal_output_verbose(40, mca_rcache_rgpusm_component.output,
                            "RGPUSM: No room in the cache - boot the first one out");
        (void)mca_rcache_rgpusm_deregister_lru(rcache);
        if (mca_rcache_rgpusm_component.empty_cache) {
            int remNum = 1;
            /* Empty out every registration from LRU until it is empty */
            opal_output_verbose(40, mca_rcache_rgpusm_component.output,
                                "RGPUSM: About to delete all the unused entries in the cache");
            while (mca_rcache_rgpusm_deregister_lru(rcache)) {
                remNum++;
            }
            opal_output_verbose(40, mca_rcache_rgpusm_component.output,
                                "RGPUSM: Deleted and deregistered %d entries", remNum);
            rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg,
                                             mca_rcache_rgpusm_component.rcache_size_limit);
        } else {
            /* Check for room after one removal. If not, remove another one until there is space */
            while((rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg,
                                                    mca_rcache_rgpusm_component.rcache_size_limit)) ==
                  OPAL_ERR_TEMP_OUT_OF_RESOURCE) {
                opal_output_verbose(40, mca_rcache_rgpusm_component.output,
                                    "RGPUSM: No room in the cache - boot one out");
                if (!mca_rcache_rgpusm_deregister_lru(rcache)) {
                    break;
                }
            }
        }
    }

    if(rc != OPAL_SUCCESS) {
        OPAL_THREAD_UNLOCK(&rcache->lock);
        opal_free_list_return (&rcache_rgpusm->reg_list, item);
        /* We cannot recover from this.  We can be here if the size of
         * the cache is smaller than the amount of memory we are
         * trying to register in a single transfer.  In that case, rc
         * is MPI_ERR_OUT_OF_RESOURCES, but everything is stuck at
         * that point.  Therefore, just error out completely.
         */
        opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                            "RGPUSM: Failed to register addr=%p, size=%d", addr, (int)size);
        return OPAL_ERROR;
    }

    rgpusm_reg->base.ref_count++;
    *reg = (mca_rcache_base_registration_t *)rgpusm_reg;
    OPAL_THREAD_UNLOCK(&rcache->lock);

    return OPAL_SUCCESS;
}